8
n8n 中文网amn8n.com

使用Bright Data和Google Gemini进行结构化数据提取和数据挖掘

高级

这是一个Engineering, AI, Marketing领域的自动化工作流,包含 18 个节点。主要使用 Set, Function, HttpRequest, ManualTrigger, ReadWriteFile 等节点,结合人工智能技术实现智能自动化。 使用Bright Data和Google Gemini进行结构化数据提取和数据挖掘

前置要求
  • 可能需要目标 API 的认证凭证
  • Google Gemini API Key
工作流预览
可视化展示节点连接关系,支持缩放和平移
导出工作流
复制以下 JSON 配置到 n8n 导入,即可使用此工作流
{
  "id": "1GOrjyc9mtZCMvCr",
  "meta": {
    "instanceId": "885b4fb4a6a9c2cb5621429a7b972df0d05bb724c20ac7dac7171b62f1c7ef40",
    "templateCredsSetupCompleted": true
  },
  "name": "使用Bright Data和Google Gemini进行结构化数据提取和数据挖掘",
  "tags": [
    {
      "id": "Kujft2FOjmOVQAmJ",
      "name": "Engineering",
      "createdAt": "2025-04-09T01:31:00.558Z",
      "updatedAt": "2025-04-09T01:31:00.558Z"
    },
    {
      "id": "ddPkw7Hg5dZhQu2w",
      "name": "AI",
      "createdAt": "2025-04-13T05:38:08.053Z",
      "updatedAt": "2025-04-13T05:38:08.053Z"
    }
  ],
  "nodes": [
    {
      "id": "1e9038e6-9ebc-4460-bee2-3faea3b38f4c",
      "name": "当点击\"测试工作流\"时",
      "type": "n8n-nodes-base.manualTrigger",
      "position": [
        200,
        -420
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "fd4ace46-7261-4380-8b65-1e00bb574f27",
      "name": "便签",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        200,
        -780
      ],
      "parameters": {
        "width": 400,
        "height": 300,
        "content": "## 说明"
      },
      "typeVersion": 1
    },
    {
      "id": "1c1dd10f-beb2-4cc7-9118-77efd3172651",
      "name": "便签1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        620,
        -780
      ],
      "parameters": {
        "width": 480,
        "height": 300,
        "content": "## LLM使用情况"
      },
      "typeVersion": 1
    },
    {
      "id": "9795ac80-6ded-465d-bfcf-0c6ce120452f",
      "name": "Markdown到文本数据提取器",
      "type": "@n8n/n8n-nodes-langchain.chainLlm",
      "position": [
        860,
        -420
      ],
      "parameters": {
        "text": "=You need to analyze the below markdown and convert to textual data. Please do not output with your own thoughts. Make sure to output with textual data only with no links, scripts, css etc.\n\n{{ $json.data }}",
        "messages": {
          "messageValues": [
            {
              "message": "You are a markdown expert"
            }
          ]
        },
        "promptType": "define"
      },
      "typeVersion": 1.6
    },
    {
      "id": "b6a8cc64-c0c7-40dc-b7c1-0571baf3a0a9",
      "name": "设置URL和Bright Data区域",
      "type": "n8n-nodes-base.set",
      "position": [
        420,
        -420
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "3aedba66-f447-4d7a-93c0-8158c5e795f9",
              "name": "url",
              "type": "string",
              "value": "https://www.bbc.com/news/world"
            },
            {
              "id": "4e7ee31d-da89-422f-8079-2ff2d357a0ba",
              "name": "zone",
              "type": "string",
              "value": "web_unlocker1"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "8d15dca1-3014-405f-ac35-78d64eda1d07",
      "name": "启动 Webhook 通知用于 Markdown 到文本数据提取",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        1314,
        -720
      ],
      "parameters": {
        "url": "https://webhook.site/3c36d7d1-de1b-4171-9fd3-643ea2e4dd76",
        "options": {},
        "sendBody": true,
        "bodyParameters": {
          "parameters": [
            {
              "name": "content",
              "value": "={{ $json.text }}"
            }
          ]
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "fff9e2d1-f3e2-47c3-8c3a-f9de8dbdee6a",
      "name": "为AI情感分析器启动Webhook通知",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        1612,
        80
      ],
      "parameters": {
        "url": "https://webhook.site/3c36d7d1-de1b-4171-9fd3-643ea2e4dd76",
        "options": {},
        "sendBody": true,
        "bodyParameters": {
          "parameters": [
            {
              "name": "summary",
              "value": "={{ $json.output }}"
            }
          ]
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "40c82a76-1710-4e57-8123-9c9fbc729110",
      "name": "Google Gemini 聊天模型用于数据提取",
      "type": "@n8n/n8n-nodes-langchain.lmChatGoogleGemini",
      "position": [
        948,
        -200
      ],
      "parameters": {
        "options": {},
        "modelName": "models/gemini-2.0-flash-exp"
      },
      "credentials": {
        "googlePalmApi": {
          "id": "YeO7dHZnuGBVQKVZ",
          "name": "Google Gemini(PaLM) Api account"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "0c1da174-9b9c-4067-9b2c-fa0cc8c33dc8",
      "name": "用于情感分析器的Google Gemini聊天模型",
      "type": "@n8n/n8n-nodes-langchain.lmChatGoogleGemini",
      "position": [
        1324,
        200
      ],
      "parameters": {
        "options": {},
        "modelName": "models/gemini-2.0-flash-exp"
      },
      "credentials": {
        "googlePalmApi": {
          "id": "YeO7dHZnuGBVQKVZ",
          "name": "Google Gemini(PaLM) Api account"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "7fae589c-854d-429e-9e67-527a002fcabf",
      "name": "执行Bright Data Web请求",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        640,
        -420
      ],
      "parameters": {
        "url": "https://api.brightdata.com/request",
        "method": "POST",
        "options": {},
        "sendBody": true,
        "sendHeaders": true,
        "authentication": "genericCredentialType",
        "bodyParameters": {
          "parameters": [
            {
              "name": "zone",
              "value": "={{ $json.zone }}"
            },
            {
              "name": "url",
              "value": "={{ $json.url }}?product=unlocker&method=api"
            },
            {
              "name": "format",
              "value": "raw"
            },
            {
              "name": "data_format",
              "value": "markdown"
            }
          ]
        },
        "genericAuthType": "httpHeaderAuth",
        "headerParameters": {
          "parameters": [
            {}
          ]
        }
      },
      "credentials": {
        "httpHeaderAuth": {
          "id": "kdbqXuxIR8qIxF7y",
          "name": "Header Auth account"
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "e15fb9ba-ea8f-41f0-9b99-437d14a98a7d",
      "name": "带有结构化响应的主题提取器",
      "type": "@n8n/n8n-nodes-langchain.informationExtractor",
      "position": [
        1236,
        -20
      ],
      "parameters": {
        "text": "=Perform the topic analysis on the below content and output with the structured information.\n\nHere's the content:\n\n{{ $('Perform Bright Data Web Request').item.json.data }}",
        "options": {
          "systemPromptTemplate": "You are an expert data analyst."
        },
        "schemaType": "manual",
        "inputSchema": "{\n  \"$schema\": \"http://json-schema.org/draft-07/schema#\",\n  \"title\": \"TopicModelingResponseArray\",\n  \"type\": \"array\",\n  \"items\": {\n    \"type\": \"object\",\n    \"properties\": {\n      \"topic\": {\n        \"type\": \"string\",\n        \"description\": \"The identified topic or theme derived from the input text.\"\n      },\n      \"score\": {\n        \"type\": \"number\",\n        \"minimum\": 0,\n        \"maximum\": 1,\n        \"description\": \"Confidence score representing how strongly this topic is reflected in the content.\"\n      },\n      \"summary\": {\n        \"type\": \"string\",\n        \"description\": \"Brief explanation of the topic’s context within the text.\"\n      },\n      \"keywords\": {\n        \"type\": \"array\",\n        \"description\": \"List of keywords associated with the topic.\",\n        \"items\": {\n          \"type\": \"string\"\n        }\n      }\n    },\n    \"required\": [\"topic\", \"score\", \"summary\", \"keywords\"],\n    \"additionalProperties\": false\n  }\n}\n"
      },
      "typeVersion": 1
    },
    {
      "id": "e7f2b2c5-89ba-45c4-b7a4-297a159f8b39",
      "name": "带有结构化响应的按位置和类别划分的趋势",
      "type": "@n8n/n8n-nodes-langchain.informationExtractor",
      "position": [
        1236,
        -520
      ],
      "parameters": {
        "text": "=Perform the data analysis on the below content and output with the structured information by clustering the emerging trends by location and category\n\nHere's the content:\n\n{{ $('Perform Bright Data Web Request').item.json.data }}",
        "options": {
          "systemPromptTemplate": "You are an expert data analyst."
        },
        "schemaType": "manual",
        "inputSchema": "{\n  \"$schema\": \"http://json-schema.org/draft-07/schema#\",\n  \"title\": \"EmergingTrendsClusteredByLocationAndCategory\",\n  \"type\": \"array\",\n  \"items\": {\n    \"type\": \"object\",\n    \"properties\": {\n      \"location\": {\n        \"type\": \"string\",\n        \"description\": \"Geographical region or city where the trend is observed.\"\n      },\n      \"category\": {\n        \"type\": \"string\",\n        \"description\": \"Domain or industry related to the trend (e.g., Technology, Finance, Healthcare).\"\n      },\n      \"trends\": {\n        \"type\": \"array\",\n        \"items\": {\n          \"type\": \"object\",\n          \"properties\": {\n            \"trend\": {\n              \"type\": \"string\",\n              \"description\": \"A concise label for the emerging trend.\"\n            },\n            \"score\": {\n              \"type\": \"number\",\n              \"minimum\": 0,\n              \"maximum\": 1,\n              \"description\": \"Confidence or prominence score of the trend.\"\n            },\n            \"summary\": {\n              \"type\": \"string\",\n              \"description\": \"Short explanation describing the context and impact of the trend.\"\n            },\n            \"mentions\": {\n              \"type\": \"array\",\n              \"items\": {\n                \"type\": \"string\"\n              },\n              \"description\": \"Keywords or phrases that commonly co-occur with the trend.\"\n            }\n          },\n          \"required\": [\"trend\", \"score\", \"summary\", \"mentions\"],\n          \"additionalProperties\": false\n        }\n      }\n    },\n    \"required\": [\"location\", \"category\", \"trends\"],\n    \"additionalProperties\": false\n  }\n}\n"
      },
      "typeVersion": 1
    },
    {
      "id": "92203e9f-cf13-435e-bf78-3c39a6e1e6f6",
      "name": "Google Gemini 聊天模型",
      "type": "@n8n/n8n-nodes-langchain.lmChatGoogleGemini",
      "position": [
        1324,
        -300
      ],
      "parameters": {
        "options": {},
        "modelName": "models/gemini-2.0-flash-exp"
      },
      "credentials": {
        "googlePalmApi": {
          "id": "YeO7dHZnuGBVQKVZ",
          "name": "Google Gemini(PaLM) Api account"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "1a252b74-6768-41a6-99dd-090e35c47065",
      "name": "为按位置和类别划分的趋势启动Webhook通知",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        1612,
        -320
      ],
      "parameters": {
        "url": "https://webhook.site/3c36d7d1-de1b-4171-9fd3-643ea2e4dd76",
        "options": {},
        "sendBody": true,
        "bodyParameters": {
          "parameters": [
            {
              "name": "summary",
              "value": "={{ $json.output }}"
            }
          ]
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "c952ab41-66af-4b41-b04e-407816074c87",
      "name": "为主题创建二进制文件",
      "type": "n8n-nodes-base.function",
      "position": [
        1612,
        -120
      ],
      "parameters": {
        "functionCode": "items[0].binary = {\n  data: {\n    data: new Buffer(JSON.stringify(items[0].json, null, 2)).toString('base64')\n  }\n};\nreturn items;"
      },
      "typeVersion": 1
    },
    {
      "id": "2cf80339-0927-4f48-a13a-c610eaf4edca",
      "name": "将主题文件写入磁盘",
      "type": "n8n-nodes-base.readWriteFile",
      "position": [
        1820,
        -120
      ],
      "parameters": {
        "options": {},
        "fileName": "d:\\topics.json",
        "operation": "write"
      },
      "typeVersion": 1
    },
    {
      "id": "cf1da0ee-bb78-4ea7-bb2d-f2f82f728b12",
      "name": "将趋势文件写入磁盘",
      "type": "n8n-nodes-base.readWriteFile",
      "position": [
        1832,
        -520
      ],
      "parameters": {
        "options": {},
        "fileName": "d:\\trends.json",
        "operation": "write"
      },
      "typeVersion": 1
    },
    {
      "id": "d38ca005-6ba3-4105-9fcd-058602ba16ce",
      "name": "为趋势创建二进制数据",
      "type": "n8n-nodes-base.function",
      "position": [
        1612,
        -520
      ],
      "parameters": {
        "functionCode": "items[0].binary = {\n  data: {\n    data: new Buffer(JSON.stringify(items[0].json, null, 2)).toString('base64')\n  }\n};\nreturn items;"
      },
      "typeVersion": 1
    }
  ],
  "active": false,
  "pinData": {},
  "settings": {
    "executionOrder": "v1"
  },
  "versionId": "6a81579d-1f3b-4ea2-821b-fff07b32ee7d",
  "connections": {
    "Google Gemini Chat Model": {
      "ai_languageModel": [
        [
          {
            "node": "Trends by location and category with the structured response",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "Set URL and Bright Data Zone": {
      "main": [
        [
          {
            "node": "Perform Bright Data Web Request",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Write the trends file to disk": {
      "main": [
        []
      ]
    },
    "Create a binary data for tends": {
      "main": [
        [
          {
            "node": "Write the trends file to disk",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Create a binary file for topics": {
      "main": [
        [
          {
            "node": "Write the topics file to disk",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Perform Bright Data Web Request": {
      "main": [
        [
          {
            "node": "Markdown to Textual Data Extractor",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "When clicking ‘Test workflow’": {
      "main": [
        [
          {
            "node": "Set URL and Bright Data Zone",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Markdown to Textual Data Extractor": {
      "main": [
        [
          {
            "node": "Topic Extractor with the structured response",
            "type": "main",
            "index": 0
          },
          {
            "node": "Initiate a Webhook Notification for Markdown to Textual Data Extraction",
            "type": "main",
            "index": 0
          },
          {
            "node": "Trends by location and category with the structured response",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Google Gemini Chat Model for Data Extract": {
      "ai_languageModel": [
        [
          {
            "node": "Markdown to Textual Data Extractor",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "Topic Extractor with the structured response": {
      "main": [
        [
          {
            "node": "Initiate a Webhook Notification for AI Sentiment Analyzer",
            "type": "main",
            "index": 0
          },
          {
            "node": "Create a binary file for topics",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Google Gemini Chat Model for Sentiment Analyzer": {
      "ai_languageModel": [
        [
          {
            "node": "Topic Extractor with the structured response",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "Trends by location and category with the structured response": {
      "main": [
        [
          {
            "node": "Initiate a Webhook Notification for trends by location and category",
            "type": "main",
            "index": 0
          },
          {
            "node": "Create a binary data for tends",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}
常见问题

如何使用这个工作流?

复制上方的 JSON 配置代码,在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」,粘贴配置后根据需要修改凭证设置即可。

这个工作流适合什么场景?

高级 - 工程, 人工智能, 营销

需要付费吗?

本工作流完全免费,您可以直接导入使用。但请注意,工作流中使用的第三方服务(如 OpenAI API)可能需要您自行付费。

工作流信息
难度等级
高级
节点数量18
分类3
节点类型9
难度说明

适合高级用户,包含 16+ 个节点的复杂工作流

外部链接
在 n8n.io 查看

分享此工作流