使用Bright Data、Google Gemini和MCP自动化AI代理抓取网页数据

Name: 使用Bright Data、Google Gemini和MCP自动化AI代理抓取网页数据
Rating: 4.5 (10 reviews)
Author: Ranjan Dailata

高级

这是一个AI, Marketing, IT Ops领域的自动化工作流，包含 19 个节点。主要使用 Set, Function, McpClient, HttpRequest, McpClientTool 等节点，结合人工智能技术实现智能自动化。使用Bright Data、Google Gemini和MCP自动化AI代理抓取网页数据

前置要求

•可能需要目标 API 的认证凭证
•Google Gemini API Key

使用的节点 (19)

分类

人工智能

营销

IT 运维

工作流预览

可视化展示节点连接关系，支持缩放和平移

AI Agent

当点击"测试工作流"时

MCP 客户端列出 Bright Data 所有工具

MCP 客户端列出所有工具

MCP 客户端 Bright Data 网页抓取工具

网页抓取器的 Webhook

设置 URL

MCP 客户端以 Markdown 格式抓取

MCP 客户端以 HTML 形式抓取

AI 智能体使用的 Google Gemini 聊天模型

简单记忆

网页抓取 AI 代理的 Webhook

使用 Webhook URL 和数据格式设置 URL

创建二进制数据

将抓取内容写入磁盘

React Flow

导出工作流

复制以下 JSON 配置到 n8n 导入，即可使用此工作流

{
  "id": "U6cY7PPR0vaRl1I0",
  "meta": {
    "instanceId": "885b4fb4a6a9c2cb5621429a7b972df0d05bb724c20ac7dac7171b62f1c7ef40",
    "templateCredsSetupCompleted": true
  },
  "name": "使用 Bright Data、Google Gemini 和 MCP 自动化 AI 代理抓取网页数据",
  "tags": [
    {
      "id": "ZOwtAMLepQaGW76t",
      "name": "Building Blocks",
      "createdAt": "2025-04-13T15:23:40.462Z",
      "updatedAt": "2025-04-13T15:23:40.462Z"
    },
    {
      "id": "ddPkw7Hg5dZhQu2w",
      "name": "AI",
      "createdAt": "2025-04-13T05:38:08.053Z",
      "updatedAt": "2025-04-13T05:38:08.053Z"
    }
  ],
  "nodes": [
    {
      "id": "0c747f5b-ef72-4e00-a028-4a08461dae28",
      "name": "AI Agent",
      "type": "@n8n/n8n-nodes-langchain.agent",
      "notes": "Bright Data Web Scraping Agent",
      "position": [
        -140,
        60
      ],
      "parameters": {
        "text": "=Scrape the web data as per the provided URL:  {{ $json.url }} using the format as {{ $json.format }}",
        "options": {
          "systemMessage": "=You are a helpful assistant."
        },
        "promptType": "define"
      },
      "notesInFlow": true,
      "typeVersion": 1.8
    },
    {
      "id": "16c7cd90-39da-47a4-8020-a0aa8f87275a",
      "name": "当点击\"测试工作流\"时",
      "type": "n8n-nodes-base.manualTrigger",
      "position": [
        -640,
        -300
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "7b544505-6b6b-4500-a2ad-f7cf62f98c13",
      "name": "MCP 客户端列出 Bright Data 所有工具",
      "type": "n8n-nodes-mcp.mcpClient",
      "position": [
        -380,
        -300
      ],
      "parameters": {},
      "credentials": {
        "mcpClientApi": {
          "id": "JtatFSfA2kkwctYa",
          "name": "MCP Client (STDIO) account"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "9f5bf319-9414-4974-bad2-ea24f09ae351",
      "name": "便签1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -220,
        -400
      ],
      "parameters": {
        "color": 3,
        "width": 440,
        "height": 320,
        "content": "## Bright Data 网页抓取工具"
      },
      "typeVersion": 1
    },
    {
      "id": "222e81cf-878e-42de-a325-6b6659145f98",
      "name": "MCP 客户端列出所有工具",
      "type": "n8n-nodes-mcp.mcpClientTool",
      "position": [
        440,
        420
      ],
      "parameters": {},
      "credentials": {
        "mcpClientApi": {
          "id": "JtatFSfA2kkwctYa",
          "name": "MCP Client (STDIO) account"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "a10dfd4f-cadf-4952-8449-1865406358d4",
      "name": "MCP 客户端 Bright Data 网页抓取工具",
      "type": "n8n-nodes-mcp.mcpClient",
      "notes": "Scrape a single webpage URL with advanced options for content extraction and get back the results in MarkDown language.",
      "position": [
        60,
        -300
      ],
      "parameters": {
        "toolName": "=scrape_as_markdown",
        "operation": "executeTool",
        "toolParameters": "={\n   \"url\": \"{{ $json.url }}\"\n} "
      },
      "credentials": {
        "mcpClientApi": {
          "id": "JtatFSfA2kkwctYa",
          "name": "MCP Client (STDIO) account"
        }
      },
      "notesInFlow": true,
      "typeVersion": 1
    },
    {
      "id": "0acbd4ff-ce4a-4ff2-b213-2d80dd91e302",
      "name": "网页抓取器的 Webhook",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        280,
        -300
      ],
      "parameters": {
        "url": "=https://webhook.site/daf9d591-a130-4010-b1d3-0c66f8fcf467",
        "options": {},
        "sendBody": true,
        "bodyParameters": {
          "parameters": [
            {
              "name": "response",
              "value": "={{ $json.result.content[0].text }}"
            }
          ]
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "009ac29f-8cad-4b58-9ca4-e75470a52dcc",
      "name": "设置 URL",
      "type": "n8n-nodes-base.set",
      "position": [
        -160,
        -300
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "214e61a0-3587-453f-baf5-eac013990857",
              "name": "url",
              "type": "string",
              "value": "https://about.google/"
            },
            {
              "id": "45014942-0a2e-4f46-b395-f82f97bfa93e",
              "name": "webhook_url",
              "type": "string",
              "value": "https://webhook.site/ce41e056-c097-48c8-a096-9b876d3abbf7"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "104706dd-ae58-47fd-8fea-cefa986ae40c",
      "name": "MCP 客户端以 Markdown 格式抓取",
      "type": "n8n-nodes-mcp.mcpClientTool",
      "notes": "Scrape a single webpage URL with advanced options for content extraction and get back the results in MarkDown language.",
      "position": [
        -60,
        400
      ],
      "parameters": {
        "toolName": "scrape_as_markdown",
        "operation": "executeTool",
        "toolParameters": "={\n  \"url\": \"{{ $json.url }}\"\n} ",
        "descriptionType": "manual",
        "toolDescription": "Scrape a single webpage URL with advanced options for content extraction and get back the results in MarkDown language."
      },
      "credentials": {
        "mcpClientApi": {
          "id": "JtatFSfA2kkwctYa",
          "name": "MCP Client (STDIO) account"
        }
      },
      "notesInFlow": false,
      "typeVersion": 1
    },
    {
      "id": "c03c655e-45c8-4278-a01f-ba48282459c5",
      "name": "MCP 客户端以 HTML 形式抓取",
      "type": "n8n-nodes-mcp.mcpClientTool",
      "notes": "Scrape a single webpage URL with advanced options for content extraction and get back the results in HTML.",
      "position": [
        200,
        400
      ],
      "parameters": {
        "toolName": "scrape_as_html",
        "operation": "executeTool",
        "toolParameters": "{\n  \"url\": \"{{ $json.url }}\"\n} ",
        "descriptionType": "manual",
        "toolDescription": "Scrape a single webpage URL with advanced options for content extraction and get back the results in HTML."
      },
      "credentials": {
        "mcpClientApi": {
          "id": "JtatFSfA2kkwctYa",
          "name": "MCP Client (STDIO) account"
        }
      },
      "notesInFlow": true,
      "typeVersion": 1
    },
    {
      "id": "587300ff-44e5-4ff2-9dee-a4f8720ca26b",
      "name": "AI 智能体使用的 Google Gemini 聊天模型",
      "type": "@n8n/n8n-nodes-langchain.lmChatGoogleGemini",
      "position": [
        -520,
        400
      ],
      "parameters": {
        "options": {},
        "modelName": "models/gemini-2.0-flash-exp"
      },
      "credentials": {
        "googlePalmApi": {
          "id": "YeO7dHZnuGBVQKVZ",
          "name": "Google Gemini(PaLM) Api account"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "38ba13a1-f8f7-48ce-a05d-2c2526de606d",
      "name": "便签",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -140,
        340
      ],
      "parameters": {
        "color": 4,
        "width": 480,
        "height": 260,
        "content": "## Bright Data 网页抓取工具集"
      },
      "typeVersion": 1
    },
    {
      "id": "e7c8d333-e256-4944-a584-575162072ca4",
      "name": "简单记忆",
      "type": "@n8n/n8n-nodes-langchain.memoryBufferWindow",
      "position": [
        -280,
        400
      ],
      "parameters": {
        "sessionKey": "=Perform the web scraping for the below URL\n\n{{ $json.url }}",
        "sessionIdType": "customKey",
        "contextWindowLength": 10
      },
      "typeVersion": 1.3
    },
    {
      "id": "5e89519e-8ee5-4c3b-807d-21cef6e36c32",
      "name": "网页抓取 AI 代理的 Webhook",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        260,
        120
      ],
      "parameters": {
        "url": "={{ $('Set the URL with the Webhook URL and data format').item.json.webhook_url }}",
        "options": {},
        "sendBody": true,
        "bodyParameters": {
          "parameters": [
            {
              "name": "response",
              "value": "={{ $json.output }}"
            }
          ]
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "2de093f4-15e5-4710-83d9-e6d9ed852873",
      "name": "使用 Webhook URL 和数据格式设置 URL",
      "type": "n8n-nodes-base.set",
      "position": [
        -400,
        60
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "214e61a0-3587-453f-baf5-eac013990857",
              "name": "url",
              "type": "string",
              "value": "https://about.google/"
            },
            {
              "id": "45014942-0a2e-4f46-b395-f82f97bfa93e",
              "name": "webhook_url",
              "type": "string",
              "value": "https://webhook.site/daf9d591-a130-4010-b1d3-0c66f8fcf467"
            },
            {
              "id": "7f6c03f6-9fa3-45f9-bf81-243b7106bdac",
              "name": "format",
              "type": "string",
              "value": "scrape_as_markdown"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "04a5b11f-1990-440e-be23-2fbcb985dd4a",
      "name": "创建二进制数据",
      "type": "n8n-nodes-base.function",
      "position": [
        260,
        -80
      ],
      "parameters": {
        "functionCode": "items[0].binary = {\n  data: {\n    data: new Buffer(JSON.stringify(items[0].json, null, 2)).toString('base64')\n  }\n};\nreturn items;"
      },
      "typeVersion": 1
    },
    {
      "id": "0765ffcd-4746-45f7-add8-f82d66709321",
      "name": "将抓取内容写入磁盘",
      "type": "n8n-nodes-base.readWriteFile",
      "position": [
        460,
        -80
      ],
      "parameters": {
        "options": {},
        "fileName": "d:\\Scraped-Content.json",
        "operation": "write"
      },
      "typeVersion": 1
    },
    {
      "id": "16cd9b48-765d-4f0d-8e78-6c41cfc50b03",
      "name": "便签2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -220,
        -540
      ],
      "parameters": {
        "width": 440,
        "height": 120,
        "content": "## 免责声明"
      },
      "typeVersion": 1
    },
    {
      "id": "ff4b9e9c-a0f0-4cdb-ad53-55dc412794fc",
      "name": "便签3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1080,
        -80
      ],
      "parameters": {
        "color": 5,
        "width": 480,
        "height": 380,
        "content": "## 说明"
      },
      "typeVersion": 1
    }
  ],
  "active": false,
  "pinData": {},
  "settings": {
    "executionOrder": "v1"
  },
  "versionId": "ccf9f9af-82d5-4751-b06d-497c043c85fc",
  "connections": {
    "AI Agent": {
      "main": [
        [
          {
            "node": "Webhook for Web Scraper AI Agent",
            "type": "main",
            "index": 0
          },
          {
            "node": "Create a binary data",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Set the URLs": {
      "main": [
        [
          {
            "node": "MCP Client Bright Data Web Scraper",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Simple Memory": {
      "ai_memory": [
        [
          {
            "node": "AI Agent",
            "type": "ai_memory",
            "index": 0
          }
        ]
      ]
    },
    "Create a binary data": {
      "main": [
        [
          {
            "node": "Write the scraped content to disk",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "MCP Client List all tools": {
      "ai_tool": [
        [
          {
            "node": "AI Agent",
            "type": "ai_tool",
            "index": 0
          }
        ]
      ]
    },
    "MCP Client to Scrape as HTML": {
      "ai_tool": [
        [
          {
            "node": "AI Agent",
            "type": "ai_tool",
            "index": 0
          }
        ]
      ]
    },
    "MCP Client to Scrape as Markdown": {
      "ai_tool": [
        [
          {
            "node": "AI Agent",
            "type": "ai_tool",
            "index": 0
          }
        ]
      ]
    },
    "Webhook for Web Scraper AI Agent": {
      "main": [
        []
      ]
    },
    "When clicking ‘Test workflow’": {
      "main": [
        [
          {
            "node": "MCP Client list all tools for Bright Data",
            "type": "main",
            "index": 0
          },
          {
            "node": "Set the URL with the Webhook URL and data format",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "MCP Client Bright Data Web Scraper": {
      "main": [
        [
          {
            "node": "Webhook for web scraper",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Google Gemini Chat Model for AI Agent": {
      "ai_languageModel": [
        [
          {
            "node": "AI Agent",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "MCP Client list all tools for Bright Data": {
      "main": [
        [
          {
            "node": "Set the URLs",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Set the URL with the Webhook URL and data format": {
      "main": [
        [
          {
            "node": "AI Agent",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}

常见问题

如何使用这个工作流？

复制上方的 JSON 配置代码，在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」，粘贴配置后根据需要修改凭证设置即可。

这个工作流适合什么场景？

高级 - 人工智能, 营销, IT 运维

需要付费吗？

本工作流完全免费，您可以直接导入使用。但请注意，工作流中使用的第三方服务（如 OpenAI API）可能需要您自行付费。

使用Bright Data、Google Gemini和MCP自动化AI代理抓取网页数据

使用的节点 (19)

分类

如何使用这个工作流？

这个工作流适合什么场景？

需要付费吗？

相关工作流推荐