8
n8n 中文网amn8n.com

使用 Firecrawl 从 Google Sheets 批量抓取网站 URL 到 Google Docs

中级

这是一个Document Extraction, Multimodal AI领域的自动化工作流,包含 10 个节点。主要使用 If, Filter, GoogleDrive, GoogleSheets, SplitInBatches 等节点。 使用 Firecrawl 从 Google Sheets 批量抓取网站 URL 到 Google Docs

前置要求
  • Google Drive API 凭证
  • Google Sheets API 凭证
  • HTTP Webhook 端点(n8n 会自动生成)
工作流预览
可视化展示节点连接关系,支持缩放和平移
导出工作流
复制以下 JSON 配置到 n8n 导入,即可使用此工作流
{
  "meta": {
    "instanceId": "393ca9e36a1f81b0f643c72792946a5fe5e49eb4864181ba4032e5a408278263",
    "templateCredsSetupCompleted": true
  },
  "nodes": [
    {
      "id": "b17a526e-3245-4255-9308-644d1a8b8a56",
      "name": "如果",
      "type": "n8n-nodes-base.if",
      "position": [
        480,
        -80
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "9bc90f3e-9c5d-4cbc-b899-93fa5e2de9a5",
              "operator": {
                "type": "string",
                "operation": "empty",
                "singleValue": true
              },
              "leftValue": "={{ $json.scraped }}",
              "rightValue": ""
            }
          ]
        }
      },
      "typeVersion": 2.2
    },
    {
      "id": "d573c093-b0fe-46dd-803a-f4f5407ef071",
      "name": "遍历项目",
      "type": "n8n-nodes-base.splitInBatches",
      "position": [
        768,
        -96
      ],
      "parameters": {
        "options": {}
      },
      "typeVersion": 3
    },
    {
      "id": "40bd3620-f640-4a09-8fc7-a074b56ca447",
      "name": "响应Webhook",
      "type": "n8n-nodes-base.respondToWebhook",
      "position": [
        1120,
        -240
      ],
      "parameters": {
        "options": {},
        "respondWith": "json",
        "responseBody": "{\n    \"text\": \"Fin du scraping rendez vous dans le dossier [Contenu scrapé](https://drive.google.com/drive/folders/1ry3xvQ9UqM2Rf9C4-AoJdg1lfB9inh_5) pour retrouver vos pages, déplacez les docs vers votre document RAG si vous souhaitez les ajouter à la base de données de votre client\"\n } "
      },
      "typeVersion": 1.2
    },
    {
      "id": "b6c0bf60-c7fe-4cf0-b6df-5cd10adc59d9",
      "name": "当收到聊天消息时",
      "type": "@n8n/n8n-nodes-langchain.chatTrigger",
      "position": [
        -160,
        -80
      ],
      "webhookId": "60fcb296-7be1-4d65-a3b0-59a6fe4c43c0",
      "parameters": {
        "mode": "webhook",
        "public": true,
        "options": {
          "responseMode": "responseNode"
        }
      },
      "typeVersion": 1.1
    },
    {
      "id": "a171ef02-49d0-407a-a690-1c0a33ac9960",
      "name": "爬取",
      "type": "@mendable/n8n-nodes-firecrawl.firecrawl",
      "position": [
        1136,
        -80
      ],
      "parameters": {
        "operation": "scrape",
        "requestOptions": {}
      },
      "credentials": {
        "firecrawlApi": {
          "id": "E34WDB80ik5VHjiI",
          "name": "Firecrawl account"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "e5ef4981-bad1-438e-ae51-64c81271fdac",
      "name": "获取URL",
      "type": "n8n-nodes-base.googleSheets",
      "position": [
        48,
        -80
      ],
      "parameters": {
        "options": {},
        "sheetName": {
          "__rl": true,
          "mode": "name",
          "value": "Page to doc"
        },
        "documentId": {
          "__rl": true,
          "mode": "url",
          "value": "={{ $json.chatInput }}"
        }
      },
      "credentials": {
        "googleSheetsOAuth2Api": {
          "id": "wBRLUCktxqXE6DVJ",
          "name": "Google Sheets account"
        }
      },
      "typeVersion": 4.5
    },
    {
      "id": "840557d7-7d60-4e2b-8a77-d28bd729005e",
      "name": "行不为空",
      "type": "n8n-nodes-base.filter",
      "position": [
        272,
        -80
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "48acd975-5041-455b-8e47-3b7eef32b483",
              "operator": {
                "type": "string",
                "operation": "exists",
                "singleValue": true
              },
              "leftValue": "={{ $json.URL }}",
              "rightValue": ""
            }
          ]
        }
      },
      "typeVersion": 2.2
    },
    {
      "id": "ad30c0c6-a5ba-460e-826a-ab329410c0b1",
      "name": "创建markdown爬取文件",
      "type": "n8n-nodes-base.googleDrive",
      "position": [
        1440,
        -80
      ],
      "parameters": {
        "name": "={{ $('Scraping').item.json.data.metadata.url }}",
        "content": "={{ $('Scraping').item.json.data.markdown }}",
        "driveId": {
          "__rl": true,
          "mode": "list",
          "value": "My Drive"
        },
        "options": {},
        "folderId": {
          "__rl": true,
          "mode": "list",
          "value": "1ry3xvQ9UqM2Rf9C4-AoJdg1lfB9inh_5",
          "cachedResultUrl": "https://drive.google.com/drive/folders/1ry3xvQ9UqM2Rf9C4-AoJdg1lfB9inh_5",
          "cachedResultName": "Contenu scrapé"
        },
        "operation": "createFromText"
      },
      "credentials": {
        "googleDriveOAuth2Api": {
          "id": "3TalAPza9NdMx3yx",
          "name": "Google Drive account"
        }
      },
      "typeVersion": 3
    },
    {
      "id": "f218e0e1-4262-4077-a8b9-284c7d2ec268",
      "name": "已爬取:完成",
      "type": "n8n-nodes-base.googleSheets",
      "position": [
        1712,
        -80
      ],
      "parameters": {
        "columns": {
          "value": {
            "URL": "={{ $('Loop Over Items').item.json.URL }}",
            "Scrapé": "OK"
          },
          "schema": [
            {
              "id": "URL",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "URL",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "Scrapé",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "Scrapé",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "row_number",
              "type": "string",
              "display": true,
              "removed": true,
              "readOnly": true,
              "required": false,
              "displayName": "row_number",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            }
          ],
          "mappingMode": "defineBelow",
          "matchingColumns": [
            "URL"
          ],
          "attemptToConvertTypes": false,
          "convertFieldsToString": false
        },
        "options": {},
        "operation": "update",
        "sheetName": {
          "__rl": true,
          "mode": "name",
          "value": "Page to doc"
        },
        "documentId": {
          "__rl": true,
          "mode": "url",
          "value": "={{ $('When chat message received').item.json.chatInput }}"
        }
      },
      "credentials": {
        "googleSheetsOAuth2Api": {
          "id": "wBRLUCktxqXE6DVJ",
          "name": "Google Sheets account"
        }
      },
      "typeVersion": 4.6,
      "alwaysOutputData": true
    },
    {
      "id": "25212a05-db4a-4d6f-a579-778ae04342ef",
      "name": "便签",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1056,
        -1008
      ],
      "parameters": {
        "color": 4,
        "width": 720,
        "height": 3536,
        "content": "# Firecrawl批量爬取到Google文档"
      },
      "typeVersion": 1
    }
  ],
  "pinData": {},
  "connections": {
    "If": {
      "main": [
        [
          {
            "node": "Loop Over Items",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Get URL": {
      "main": [
        [
          {
            "node": "Row not empty",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Scraping": {
      "main": [
        [
          {
            "node": "Create file markdown scraping",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Scraped : OK": {
      "main": [
        [
          {
            "node": "Loop Over Items",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Row not empty": {
      "main": [
        [
          {
            "node": "If",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Loop Over Items": {
      "main": [
        [
          {
            "node": "Respond to Webhook",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Scraping",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "When chat message received": {
      "main": [
        [
          {
            "node": "Get URL",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Create file markdown scraping": {
      "main": [
        [
          {
            "node": "Scraped : OK",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}
常见问题

如何使用这个工作流?

复制上方的 JSON 配置代码,在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」,粘贴配置后根据需要修改凭证设置即可。

这个工作流适合什么场景?

中级 - 文档提取, 多模态 AI

需要付费吗?

本工作流完全免费,您可以直接导入使用。但请注意,工作流中使用的第三方服务(如 OpenAI API)可能需要您自行付费。

工作流信息
难度等级
中级
节点数量10
分类2
节点类型9
难度说明

适合有一定经验的用户,包含 6-15 个节点的中等复杂度工作流

外部链接
在 n8n.io 查看

分享此工作流