使用 Firecrawl 从 Google Sheets 批量抓取网站 URL 到 Google Docs

Name: 使用 Firecrawl 从 Google Sheets 批量抓取网站 URL 到 Google Docs
Rating: 4.5 (10 reviews)
Author: Growth AI

中级

这是一个Document Extraction, Multimodal AI领域的自动化工作流，包含 10 个节点。主要使用 If, Filter, GoogleDrive, GoogleSheets, SplitInBatches 等节点。使用 Firecrawl 从 Google Sheets 批量抓取网站 URL 到 Google Docs

前置要求

•Google Drive API 凭证
•Google Sheets API 凭证
•HTTP Webhook 端点（n8n 会自动生成）

使用的节点 (10)

分类

文档提取

多模态 AI

工作流预览

可视化展示节点连接关系，支持缩放和平移

如果

遍历项目

响应Webhook

当收到聊天消息时

爬取

获取URL

行不为空

创建markdown爬取文件

已爬取：完成

React Flow

导出工作流

复制以下 JSON 配置到 n8n 导入，即可使用此工作流

{
  "meta": {
    "instanceId": "393ca9e36a1f81b0f643c72792946a5fe5e49eb4864181ba4032e5a408278263",
    "templateCredsSetupCompleted": true
  },
  "nodes": [
    {
      "id": "b17a526e-3245-4255-9308-644d1a8b8a56",
      "name": "如果",
      "type": "n8n-nodes-base.if",
      "position": [
        480,
        -80
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "9bc90f3e-9c5d-4cbc-b899-93fa5e2de9a5",
              "operator": {
                "type": "string",
                "operation": "empty",
                "singleValue": true
              },
              "leftValue": "={{ $json.scraped }}",
              "rightValue": ""
            }
          ]
        }
      },
      "typeVersion": 2.2
    },
    {
      "id": "d573c093-b0fe-46dd-803a-f4f5407ef071",
      "name": "遍历项目",
      "type": "n8n-nodes-base.splitInBatches",
      "position": [
        768,
        -96
      ],
      "parameters": {
        "options": {}
      },
      "typeVersion": 3
    },
    {
      "id": "40bd3620-f640-4a09-8fc7-a074b56ca447",
      "name": "响应Webhook",
      "type": "n8n-nodes-base.respondToWebhook",
      "position": [
        1120,
        -240
      ],
      "parameters": {
        "options": {},
        "respondWith": "json",
        "responseBody": "{\n    \"text\": \"Fin du scraping rendez vous dans le dossier [Contenu scrapé](https://drive.google.com/drive/folders/1ry3xvQ9UqM2Rf9C4-AoJdg1lfB9inh_5) pour retrouver vos pages, déplacez les docs vers votre document RAG si vous souhaitez les ajouter à la base de données de votre client\"\n } "
      },
      "typeVersion": 1.2
    },
    {
      "id": "b6c0bf60-c7fe-4cf0-b6df-5cd10adc59d9",
      "name": "当收到聊天消息时",
      "type": "@n8n/n8n-nodes-langchain.chatTrigger",
      "position": [
        -160,
        -80
      ],
      "webhookId": "60fcb296-7be1-4d65-a3b0-59a6fe4c43c0",
      "parameters": {
        "mode": "webhook",
        "public": true,
        "options": {
          "responseMode": "responseNode"
        }
      },
      "typeVersion": 1.1
    },
    {
      "id": "a171ef02-49d0-407a-a690-1c0a33ac9960",
      "name": "爬取",
      "type": "@mendable/n8n-nodes-firecrawl.firecrawl",
      "position": [
        1136,
        -80
      ],
      "parameters": {
        "operation": "scrape",
        "requestOptions": {}
      },
      "credentials": {
        "firecrawlApi": {
          "id": "E34WDB80ik5VHjiI",
          "name": "Firecrawl account"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "e5ef4981-bad1-438e-ae51-64c81271fdac",
      "name": "获取URL",
      "type": "n8n-nodes-base.googleSheets",
      "position": [
        48,
        -80
      ],
      "parameters": {
        "options": {},
        "sheetName": {
          "__rl": true,
          "mode": "name",
          "value": "Page to doc"
        },
        "documentId": {
          "__rl": true,
          "mode": "url",
          "value": "={{ $json.chatInput }}"
        }
      },
      "credentials": {
        "googleSheetsOAuth2Api": {
          "id": "wBRLUCktxqXE6DVJ",
          "name": "Google Sheets account"
        }
      },
      "typeVersion": 4.5
    },
    {
      "id": "840557d7-7d60-4e2b-8a77-d28bd729005e",
      "name": "行不为空",
      "type": "n8n-nodes-base.filter",
      "position": [
        272,
        -80
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "48acd975-5041-455b-8e47-3b7eef32b483",
              "operator": {
                "type": "string",
                "operation": "exists",
                "singleValue": true
              },
              "leftValue": "={{ $json.URL }}",
              "rightValue": ""
            }
          ]
        }
      },
      "typeVersion": 2.2
    },
    {
      "id": "ad30c0c6-a5ba-460e-826a-ab329410c0b1",
      "name": "创建markdown爬取文件",
      "type": "n8n-nodes-base.googleDrive",
      "position": [
        1440,
        -80
      ],
      "parameters": {
        "name": "={{ $('Scraping').item.json.data.metadata.url }}",
        "content": "={{ $('Scraping').item.json.data.markdown }}",
        "driveId": {
          "__rl": true,
          "mode": "list",
          "value": "My Drive"
        },
        "options": {},
        "folderId": {
          "__rl": true,
          "mode": "list",
          "value": "1ry3xvQ9UqM2Rf9C4-AoJdg1lfB9inh_5",
          "cachedResultUrl": "https://drive.google.com/drive/folders/1ry3xvQ9UqM2Rf9C4-AoJdg1lfB9inh_5",
          "cachedResultName": "Contenu scrapé"
        },
        "operation": "createFromText"
      },
      "credentials": {
        "googleDriveOAuth2Api": {
          "id": "3TalAPza9NdMx3yx",
          "name": "Google Drive account"
        }
      },
      "typeVersion": 3
    },
    {
      "id": "f218e0e1-4262-4077-a8b9-284c7d2ec268",
      "name": "已爬取：完成",
      "type": "n8n-nodes-base.googleSheets",
      "position": [
        1712,
        -80
      ],
      "parameters": {
        "columns": {
          "value": {
            "URL": "={{ $('Loop Over Items').item.json.URL }}",
            "Scrapé": "OK"
          },
          "schema": [
            {
              "id": "URL",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "URL",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "Scrapé",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "Scrapé",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "row_number",
              "type": "string",
              "display": true,
              "removed": true,
              "readOnly": true,
              "required": false,
              "displayName": "row_number",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            }
          ],
          "mappingMode": "defineBelow",
          "matchingColumns": [
            "URL"
          ],
          "attemptToConvertTypes": false,
          "convertFieldsToString": false
        },
        "options": {},
        "operation": "update",
        "sheetName": {
          "__rl": true,
          "mode": "name",
          "value": "Page to doc"
        },
        "documentId": {
          "__rl": true,
          "mode": "url",
          "value": "={{ $('When chat message received').item.json.chatInput }}"
        }
      },
      "credentials": {
        "googleSheetsOAuth2Api": {
          "id": "wBRLUCktxqXE6DVJ",
          "name": "Google Sheets account"
        }
      },
      "typeVersion": 4.6,
      "alwaysOutputData": true
    },
    {
      "id": "25212a05-db4a-4d6f-a579-778ae04342ef",
      "name": "便签",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1056,
        -1008
      ],
      "parameters": {
        "color": 4,
        "width": 720,
        "height": 3536,
        "content": "# Firecrawl批量爬取到Google文档"
      },
      "typeVersion": 1
    }
  ],
  "pinData": {},
  "connections": {
    "If": {
      "main": [
        [
          {
            "node": "Loop Over Items",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Get URL": {
      "main": [
        [
          {
            "node": "Row not empty",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Scraping": {
      "main": [
        [
          {
            "node": "Create file markdown scraping",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Scraped : OK": {
      "main": [
        [
          {
            "node": "Loop Over Items",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Row not empty": {
      "main": [
        [
          {
            "node": "If",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Loop Over Items": {
      "main": [
        [
          {
            "node": "Respond to Webhook",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Scraping",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "When chat message received": {
      "main": [
        [
          {
            "node": "Get URL",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Create file markdown scraping": {
      "main": [
        [
          {
            "node": "Scraped : OK",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}

常见问题

如何使用这个工作流？

复制上方的 JSON 配置代码，在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」，粘贴配置后根据需要修改凭证设置即可。

这个工作流适合什么场景？

中级 - 文档提取, 多模态 AI

需要付费吗？

本工作流完全免费，您可以直接导入使用。但请注意，工作流中使用的第三方服务（如 OpenAI API）可能需要您自行付费。

使用 Firecrawl 从 Google Sheets 批量抓取网站 URL 到 Google Docs

使用的节点 (10)

分类

如何使用这个工作流？

这个工作流适合什么场景？

需要付费吗？

相关工作流推荐