8
n8n 中文网amn8n.com

使用Bright Data、Google Gemini和MCP自动化AI代理抓取网页数据

高级

这是一个AI, Marketing, IT Ops领域的自动化工作流,包含 19 个节点。主要使用 Set, Function, McpClient, HttpRequest, McpClientTool 等节点,结合人工智能技术实现智能自动化。 使用Bright Data、Google Gemini和MCP自动化AI代理抓取网页数据

前置要求
  • 可能需要目标 API 的认证凭证
  • Google Gemini API Key
工作流预览
可视化展示节点连接关系,支持缩放和平移
导出工作流
复制以下 JSON 配置到 n8n 导入,即可使用此工作流
{
  "id": "U6cY7PPR0vaRl1I0",
  "meta": {
    "instanceId": "885b4fb4a6a9c2cb5621429a7b972df0d05bb724c20ac7dac7171b62f1c7ef40",
    "templateCredsSetupCompleted": true
  },
  "name": "使用 Bright Data、Google Gemini 和 MCP 自动化 AI 代理抓取网页数据",
  "tags": [
    {
      "id": "ZOwtAMLepQaGW76t",
      "name": "Building Blocks",
      "createdAt": "2025-04-13T15:23:40.462Z",
      "updatedAt": "2025-04-13T15:23:40.462Z"
    },
    {
      "id": "ddPkw7Hg5dZhQu2w",
      "name": "AI",
      "createdAt": "2025-04-13T05:38:08.053Z",
      "updatedAt": "2025-04-13T05:38:08.053Z"
    }
  ],
  "nodes": [
    {
      "id": "0c747f5b-ef72-4e00-a028-4a08461dae28",
      "name": "AI Agent",
      "type": "@n8n/n8n-nodes-langchain.agent",
      "notes": "Bright Data Web Scraping Agent",
      "position": [
        -140,
        60
      ],
      "parameters": {
        "text": "=Scrape the web data as per the provided URL:  {{ $json.url }} using the format as {{ $json.format }}",
        "options": {
          "systemMessage": "=You are a helpful assistant."
        },
        "promptType": "define"
      },
      "notesInFlow": true,
      "typeVersion": 1.8
    },
    {
      "id": "16c7cd90-39da-47a4-8020-a0aa8f87275a",
      "name": "当点击\"测试工作流\"时",
      "type": "n8n-nodes-base.manualTrigger",
      "position": [
        -640,
        -300
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "7b544505-6b6b-4500-a2ad-f7cf62f98c13",
      "name": "MCP 客户端列出 Bright Data 所有工具",
      "type": "n8n-nodes-mcp.mcpClient",
      "position": [
        -380,
        -300
      ],
      "parameters": {},
      "credentials": {
        "mcpClientApi": {
          "id": "JtatFSfA2kkwctYa",
          "name": "MCP Client (STDIO) account"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "9f5bf319-9414-4974-bad2-ea24f09ae351",
      "name": "便签1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -220,
        -400
      ],
      "parameters": {
        "color": 3,
        "width": 440,
        "height": 320,
        "content": "## Bright Data 网页抓取工具"
      },
      "typeVersion": 1
    },
    {
      "id": "222e81cf-878e-42de-a325-6b6659145f98",
      "name": "MCP 客户端列出所有工具",
      "type": "n8n-nodes-mcp.mcpClientTool",
      "position": [
        440,
        420
      ],
      "parameters": {},
      "credentials": {
        "mcpClientApi": {
          "id": "JtatFSfA2kkwctYa",
          "name": "MCP Client (STDIO) account"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "a10dfd4f-cadf-4952-8449-1865406358d4",
      "name": "MCP 客户端 Bright Data 网页抓取工具",
      "type": "n8n-nodes-mcp.mcpClient",
      "notes": "Scrape a single webpage URL with advanced options for content extraction and get back the results in MarkDown language.",
      "position": [
        60,
        -300
      ],
      "parameters": {
        "toolName": "=scrape_as_markdown",
        "operation": "executeTool",
        "toolParameters": "={\n   \"url\": \"{{ $json.url }}\"\n} "
      },
      "credentials": {
        "mcpClientApi": {
          "id": "JtatFSfA2kkwctYa",
          "name": "MCP Client (STDIO) account"
        }
      },
      "notesInFlow": true,
      "typeVersion": 1
    },
    {
      "id": "0acbd4ff-ce4a-4ff2-b213-2d80dd91e302",
      "name": "网页抓取器的 Webhook",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        280,
        -300
      ],
      "parameters": {
        "url": "=https://webhook.site/daf9d591-a130-4010-b1d3-0c66f8fcf467",
        "options": {},
        "sendBody": true,
        "bodyParameters": {
          "parameters": [
            {
              "name": "response",
              "value": "={{ $json.result.content[0].text }}"
            }
          ]
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "009ac29f-8cad-4b58-9ca4-e75470a52dcc",
      "name": "设置 URL",
      "type": "n8n-nodes-base.set",
      "position": [
        -160,
        -300
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "214e61a0-3587-453f-baf5-eac013990857",
              "name": "url",
              "type": "string",
              "value": "https://about.google/"
            },
            {
              "id": "45014942-0a2e-4f46-b395-f82f97bfa93e",
              "name": "webhook_url",
              "type": "string",
              "value": "https://webhook.site/ce41e056-c097-48c8-a096-9b876d3abbf7"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "104706dd-ae58-47fd-8fea-cefa986ae40c",
      "name": "MCP 客户端以 Markdown 格式抓取",
      "type": "n8n-nodes-mcp.mcpClientTool",
      "notes": "Scrape a single webpage URL with advanced options for content extraction and get back the results in MarkDown language.",
      "position": [
        -60,
        400
      ],
      "parameters": {
        "toolName": "scrape_as_markdown",
        "operation": "executeTool",
        "toolParameters": "={\n  \"url\": \"{{ $json.url }}\"\n} ",
        "descriptionType": "manual",
        "toolDescription": "Scrape a single webpage URL with advanced options for content extraction and get back the results in MarkDown language."
      },
      "credentials": {
        "mcpClientApi": {
          "id": "JtatFSfA2kkwctYa",
          "name": "MCP Client (STDIO) account"
        }
      },
      "notesInFlow": false,
      "typeVersion": 1
    },
    {
      "id": "c03c655e-45c8-4278-a01f-ba48282459c5",
      "name": "MCP 客户端以 HTML 形式抓取",
      "type": "n8n-nodes-mcp.mcpClientTool",
      "notes": "Scrape a single webpage URL with advanced options for content extraction and get back the results in HTML.",
      "position": [
        200,
        400
      ],
      "parameters": {
        "toolName": "scrape_as_html",
        "operation": "executeTool",
        "toolParameters": "{\n  \"url\": \"{{ $json.url }}\"\n} ",
        "descriptionType": "manual",
        "toolDescription": "Scrape a single webpage URL with advanced options for content extraction and get back the results in HTML."
      },
      "credentials": {
        "mcpClientApi": {
          "id": "JtatFSfA2kkwctYa",
          "name": "MCP Client (STDIO) account"
        }
      },
      "notesInFlow": true,
      "typeVersion": 1
    },
    {
      "id": "587300ff-44e5-4ff2-9dee-a4f8720ca26b",
      "name": "AI 智能体使用的 Google Gemini 聊天模型",
      "type": "@n8n/n8n-nodes-langchain.lmChatGoogleGemini",
      "position": [
        -520,
        400
      ],
      "parameters": {
        "options": {},
        "modelName": "models/gemini-2.0-flash-exp"
      },
      "credentials": {
        "googlePalmApi": {
          "id": "YeO7dHZnuGBVQKVZ",
          "name": "Google Gemini(PaLM) Api account"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "38ba13a1-f8f7-48ce-a05d-2c2526de606d",
      "name": "便签",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -140,
        340
      ],
      "parameters": {
        "color": 4,
        "width": 480,
        "height": 260,
        "content": "## Bright Data 网页抓取工具集"
      },
      "typeVersion": 1
    },
    {
      "id": "e7c8d333-e256-4944-a584-575162072ca4",
      "name": "简单记忆",
      "type": "@n8n/n8n-nodes-langchain.memoryBufferWindow",
      "position": [
        -280,
        400
      ],
      "parameters": {
        "sessionKey": "=Perform the web scraping for the below URL\n\n{{ $json.url }}",
        "sessionIdType": "customKey",
        "contextWindowLength": 10
      },
      "typeVersion": 1.3
    },
    {
      "id": "5e89519e-8ee5-4c3b-807d-21cef6e36c32",
      "name": "网页抓取 AI 代理的 Webhook",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        260,
        120
      ],
      "parameters": {
        "url": "={{ $('Set the URL with the Webhook URL and data format').item.json.webhook_url }}",
        "options": {},
        "sendBody": true,
        "bodyParameters": {
          "parameters": [
            {
              "name": "response",
              "value": "={{ $json.output }}"
            }
          ]
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "2de093f4-15e5-4710-83d9-e6d9ed852873",
      "name": "使用 Webhook URL 和数据格式设置 URL",
      "type": "n8n-nodes-base.set",
      "position": [
        -400,
        60
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "214e61a0-3587-453f-baf5-eac013990857",
              "name": "url",
              "type": "string",
              "value": "https://about.google/"
            },
            {
              "id": "45014942-0a2e-4f46-b395-f82f97bfa93e",
              "name": "webhook_url",
              "type": "string",
              "value": "https://webhook.site/daf9d591-a130-4010-b1d3-0c66f8fcf467"
            },
            {
              "id": "7f6c03f6-9fa3-45f9-bf81-243b7106bdac",
              "name": "format",
              "type": "string",
              "value": "scrape_as_markdown"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "04a5b11f-1990-440e-be23-2fbcb985dd4a",
      "name": "创建二进制数据",
      "type": "n8n-nodes-base.function",
      "position": [
        260,
        -80
      ],
      "parameters": {
        "functionCode": "items[0].binary = {\n  data: {\n    data: new Buffer(JSON.stringify(items[0].json, null, 2)).toString('base64')\n  }\n};\nreturn items;"
      },
      "typeVersion": 1
    },
    {
      "id": "0765ffcd-4746-45f7-add8-f82d66709321",
      "name": "将抓取内容写入磁盘",
      "type": "n8n-nodes-base.readWriteFile",
      "position": [
        460,
        -80
      ],
      "parameters": {
        "options": {},
        "fileName": "d:\\Scraped-Content.json",
        "operation": "write"
      },
      "typeVersion": 1
    },
    {
      "id": "16cd9b48-765d-4f0d-8e78-6c41cfc50b03",
      "name": "便签2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -220,
        -540
      ],
      "parameters": {
        "width": 440,
        "height": 120,
        "content": "## 免责声明"
      },
      "typeVersion": 1
    },
    {
      "id": "ff4b9e9c-a0f0-4cdb-ad53-55dc412794fc",
      "name": "便签3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1080,
        -80
      ],
      "parameters": {
        "color": 5,
        "width": 480,
        "height": 380,
        "content": "## 说明"
      },
      "typeVersion": 1
    }
  ],
  "active": false,
  "pinData": {},
  "settings": {
    "executionOrder": "v1"
  },
  "versionId": "ccf9f9af-82d5-4751-b06d-497c043c85fc",
  "connections": {
    "AI Agent": {
      "main": [
        [
          {
            "node": "Webhook for Web Scraper AI Agent",
            "type": "main",
            "index": 0
          },
          {
            "node": "Create a binary data",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Set the URLs": {
      "main": [
        [
          {
            "node": "MCP Client Bright Data Web Scraper",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Simple Memory": {
      "ai_memory": [
        [
          {
            "node": "AI Agent",
            "type": "ai_memory",
            "index": 0
          }
        ]
      ]
    },
    "Create a binary data": {
      "main": [
        [
          {
            "node": "Write the scraped content to disk",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "MCP Client List all tools": {
      "ai_tool": [
        [
          {
            "node": "AI Agent",
            "type": "ai_tool",
            "index": 0
          }
        ]
      ]
    },
    "MCP Client to Scrape as HTML": {
      "ai_tool": [
        [
          {
            "node": "AI Agent",
            "type": "ai_tool",
            "index": 0
          }
        ]
      ]
    },
    "MCP Client to Scrape as Markdown": {
      "ai_tool": [
        [
          {
            "node": "AI Agent",
            "type": "ai_tool",
            "index": 0
          }
        ]
      ]
    },
    "Webhook for Web Scraper AI Agent": {
      "main": [
        []
      ]
    },
    "When clicking ‘Test workflow’": {
      "main": [
        [
          {
            "node": "MCP Client list all tools for Bright Data",
            "type": "main",
            "index": 0
          },
          {
            "node": "Set the URL with the Webhook URL and data format",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "MCP Client Bright Data Web Scraper": {
      "main": [
        [
          {
            "node": "Webhook for web scraper",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Google Gemini Chat Model for AI Agent": {
      "ai_languageModel": [
        [
          {
            "node": "AI Agent",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "MCP Client list all tools for Bright Data": {
      "main": [
        [
          {
            "node": "Set the URLs",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Set the URL with the Webhook URL and data format": {
      "main": [
        [
          {
            "node": "AI Agent",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}
常见问题

如何使用这个工作流?

复制上方的 JSON 配置代码,在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」,粘贴配置后根据需要修改凭证设置即可。

这个工作流适合什么场景?

高级 - 人工智能, 营销, IT 运维

需要付费吗?

本工作流完全免费,您可以直接导入使用。但请注意,工作流中使用的第三方服务(如 OpenAI API)可能需要您自行付费。

工作流信息
难度等级
高级
节点数量19
分类3
节点类型11
难度说明

适合高级用户,包含 16+ 个节点的复杂工作流

外部链接
在 n8n.io 查看

分享此工作流