8
n8n 中文网amn8n.com

使用 Bright Data、Gemini 和 Pinecone 为 LLM 创建 AI 就绪的向量数据集

高级

这是一个Building Blocks, AI领域的自动化工作流,包含 21 个节点。主要使用 Set, HttpRequest, ManualTrigger, Agent, ChainLlm 等节点,结合人工智能技术实现智能自动化。 使用 Bright Data、Gemini 和 Pinecone 为 LLM 创建 AI 就绪的向量数据集

前置要求
  • 可能需要目标 API 的认证凭证
  • Google Gemini API Key
  • Pinecone API Key
工作流预览
可视化展示节点连接关系,支持缩放和平移
导出工作流
复制以下 JSON 配置到 n8n 导入,即可使用此工作流
{
  "id": "3Lih0LVosR8dZbla",
  "meta": {
    "instanceId": "885b4fb4a6a9c2cb5621429a7b972df0d05bb724c20ac7dac7171b62f1c7ef40",
    "templateCredsSetupCompleted": true
  },
  "name": "使用Bright Data、Gemini和Pinecone为LLM创建AI就绪的向量数据集",
  "tags": [
    {
      "id": "Kujft2FOjmOVQAmJ",
      "name": "Engineering",
      "createdAt": "2025-04-09T01:31:00.558Z",
      "updatedAt": "2025-04-09T01:31:00.558Z"
    },
    {
      "id": "ZOwtAMLepQaGW76t",
      "name": "Building Blocks",
      "createdAt": "2025-04-13T15:23:40.462Z",
      "updatedAt": "2025-04-13T15:23:40.462Z"
    },
    {
      "id": "ddPkw7Hg5dZhQu2w",
      "name": "AI",
      "createdAt": "2025-04-13T05:38:08.053Z",
      "updatedAt": "2025-04-13T05:38:08.053Z"
    }
  ],
  "nodes": [
    {
      "id": "0a468953-e348-420e-a6b3-c55fb20d3cbf",
      "name": "当点击\"测试工作流\"时",
      "type": "n8n-nodes-base.manualTrigger",
      "position": [
        200,
        -710
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "3725e480-246f-4f32-b0a7-b946cacbe830",
      "name": "AI Agent",
      "type": "@n8n/n8n-nodes-langchain.agent",
      "position": [
        1236,
        -60
      ],
      "parameters": {
        "text": "=Format the below search result\n\n{{ $json.output.search_result }}",
        "options": {},
        "promptType": "define",
        "hasOutputParser": true
      },
      "typeVersion": 1.8
    },
    {
      "id": "30a12b8e-02f5-4b2e-bf9f-20fd9658405e",
      "name": "Pinecone 向量存储",
      "type": "@n8n/n8n-nodes-langchain.vectorStorePinecone",
      "position": [
        1628,
        -10
      ],
      "parameters": {
        "mode": "insert",
        "options": {},
        "pineconeIndex": {
          "__rl": true,
          "mode": "list",
          "value": "hacker-news",
          "cachedResultName": "hacker-news"
        }
      },
      "credentials": {
        "pineconeApi": {
          "id": "wdfRQ6NE8yjCDFhY",
          "name": "PineconeApi account"
        }
      },
      "typeVersion": 1.1
    },
    {
      "id": "1738dea6-fa4f-4a8d-a6fb-2f01feb1a6d5",
      "name": "嵌入 Google Gemini",
      "type": "@n8n/n8n-nodes-langchain.embeddingsGoogleGemini",
      "position": [
        1612,
        210
      ],
      "parameters": {
        "modelName": "models/text-embedding-004"
      },
      "credentials": {
        "googlePalmApi": {
          "id": "YeO7dHZnuGBVQKVZ",
          "name": "Google Gemini(PaLM) Api account"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "e6443541-de71-4d26-ad58-d7c72868a190",
      "name": "默认数据加载器",
      "type": "@n8n/n8n-nodes-langchain.documentDefaultDataLoader",
      "position": [
        1760,
        220
      ],
      "parameters": {
        "options": {},
        "jsonData": "={{ $('Information Extractor with Data Formatter').item.json.output.search_result }}",
        "jsonMode": "expressionData"
      },
      "typeVersion": 1
    },
    {
      "id": "09ffc8cd-096f-47fe-937d-f8ab4fb41266",
      "name": "递归字符文本分割器",
      "type": "@n8n/n8n-nodes-langchain.textSplitterRecursiveCharacterTextSplitter",
      "position": [
        1820,
        410
      ],
      "parameters": {
        "options": {}
      },
      "typeVersion": 1
    },
    {
      "id": "90cc9aa4-0931-4c52-8734-e4e0de820205",
      "name": "Google Gemini聊天模型1",
      "type": "@n8n/n8n-nodes-langchain.lmChatGoogleGemini",
      "position": [
        1240,
        160
      ],
      "parameters": {
        "options": {},
        "modelName": "models/gemini-2.0-flash-exp"
      },
      "credentials": {
        "googlePalmApi": {
          "id": "YeO7dHZnuGBVQKVZ",
          "name": "Google Gemini(PaLM) Api account"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "1090a4af-7e5d-446b-a537-3afe48cd4909",
      "name": "Google Gemini 聊天模型2",
      "type": "@n8n/n8n-nodes-langchain.lmChatGoogleGemini",
      "position": [
        948,
        -340
      ],
      "parameters": {
        "options": {},
        "modelName": "models/gemini-2.0-flash-exp"
      },
      "credentials": {
        "googlePalmApi": {
          "id": "YeO7dHZnuGBVQKVZ",
          "name": "Google Gemini(PaLM) Api account"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "324c530c-0a03-411e-acb0-d82e9dc635cf",
      "name": "Google Gemini 聊天模型",
      "type": "@n8n/n8n-nodes-langchain.lmChatGoogleGemini",
      "position": [
        948,
        160
      ],
      "parameters": {
        "options": {},
        "modelName": "models/gemini-2.0-flash-exp"
      },
      "credentials": {
        "googlePalmApi": {
          "id": "YeO7dHZnuGBVQKVZ",
          "name": "Google Gemini(PaLM) Api account"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "3226a2d6-ade1-4d6a-95c5-0be4d787a947",
      "name": "结构化输出解析器",
      "type": "@n8n/n8n-nodes-langchain.outputParserStructured",
      "position": [
        1400,
        160
      ],
      "parameters": {
        "jsonSchemaExample": "[{\n\t\"id\": \"<string>\",\n\t\"title\": \"<string>\",\n    \"summary\": \"<string>\",\n    \"keywords\": [\"\"],\n    \"topics\": [\"\"]\n}]"
      },
      "typeVersion": 1.2
    },
    {
      "id": "a739a314-900a-4ef7-9cc2-1b65374e2e05",
      "name": "便签",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        40,
        -360
      ],
      "parameters": {
        "width": 480,
        "height": 220,
        "content": "## 注意"
      },
      "typeVersion": 1
    },
    {
      "id": "3dca6d46-c423-4fb5-a6e4-c2aa2852d51c",
      "name": "设置字段 - URL和Webhook URL",
      "type": "n8n-nodes-base.set",
      "notes": "Set the URL which you are interested to scrap the data",
      "position": [
        420,
        -710
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "1c132dd6-31e4-453b-a8cf-cad9845fe55b",
              "name": "url",
              "type": "string",
              "value": "https://news.ycombinator.com?product=unlocker&method=api"
            },
            {
              "id": "90f3272b-d13d-44e2-8b4c-0943648cfce9",
              "name": "webhook_url",
              "type": "string",
              "value": "https://webhook.site/bc804ce5-4a45-4177-a68a-99c80e5c86e6"
            }
          ]
        }
      },
      "notesInFlow": true,
      "typeVersion": 3.4
    },
    {
      "id": "216a3261-a398-484c-9bf4-ca5966b829b6",
      "name": "发起网络请求",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        640,
        -260
      ],
      "parameters": {
        "url": "https://api.brightdata.com/request",
        "method": "POST",
        "options": {},
        "sendBody": true,
        "sendHeaders": true,
        "authentication": "genericCredentialType",
        "bodyParameters": {
          "parameters": [
            {
              "name": "zone",
              "value": "web_unlocker1"
            },
            {
              "name": "url",
              "value": "={{ $json.url }}"
            },
            {
              "name": "format",
              "value": "raw"
            }
          ]
        },
        "genericAuthType": "httpHeaderAuth",
        "headerParameters": {
          "parameters": [
            {}
          ]
        }
      },
      "credentials": {
        "httpHeaderAuth": {
          "id": "kdbqXuxIR8qIxF7y",
          "name": "Header Auth account"
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "0c74e21c-3007-4297-b6ab-8ee17f4c6436",
      "name": "结构化JSON数据格式化器",
      "type": "@n8n/n8n-nodes-langchain.chainLlm",
      "position": [
        860,
        -560
      ],
      "parameters": {
        "text": "=Format the below response and produce a textual data. Output the response as per the below JSON schema.\n\nHere's the input: {{ $json.data }}\nHere's the JSON schema: \n\n[{\n    \"rank\": { \"type\": \"integer\" },\n    \"title\": { \"type\": \"string\" },\n    \"site\": { \"type\": \"string\" },\n    \"points\": { \"type\": \"integer\" },\n    \"user\": { \"type\": \"string\" },\n    \"age\": { \"type\": \"string\" },\n    \"comments\": { \"type\": \"string\" }\n}]",
        "messages": {
          "messageValues": [
            {
              "message": "You are an expert data formatter"
            }
          ]
        },
        "promptType": "define"
      },
      "typeVersion": 1.6
    },
    {
      "id": "012d4bb0-2b58-47cd-9cea-b4e0dced9082",
      "name": "结构化数据Webhook",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        1314,
        -860
      ],
      "parameters": {
        "url": "={{ $json.webhook_url }}",
        "options": {},
        "sendBody": true,
        "bodyParameters": {
          "parameters": [
            {
              "name": "response",
              "value": "={{ $json.text }}"
            }
          ]
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "93b35e5e-6f52-4aeb-8f1b-39cc495beefe",
      "name": "结构化AI代理响应Webhook",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        1750,
        -660
      ],
      "parameters": {
        "url": "={{ $json.webhook_url }}",
        "options": {},
        "sendBody": true,
        "bodyParameters": {
          "parameters": [
            {
              "name": "response",
              "value": "={{ $json.output }}"
            }
          ]
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "251b4251-255c-48c6-999b-02227fa2de9b",
      "name": "便签1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        800,
        -620
      ],
      "parameters": {
        "width": 360,
        "height": 420,
        "content": "## AI数据格式化器"
      },
      "typeVersion": 1
    },
    {
      "id": "f62463cd-6be3-4942-a636-de980a3154b4",
      "name": "便签2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1560,
        -160
      ],
      "parameters": {
        "color": 4,
        "width": 520,
        "height": 720,
        "content": "## 向量数据库持久化"
      },
      "typeVersion": 1
    },
    {
      "id": "ad20cc91-766a-4a57-be54-6f0d09a784eb",
      "name": "便签3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1260,
        -920
      ],
      "parameters": {
        "color": 3,
        "width": 680,
        "height": 440,
        "content": "## Webhook通知处理器"
      },
      "typeVersion": 1
    },
    {
      "id": "37ab5c0f-d36e-4131-844d-20a22d3f2861",
      "name": "带数据格式化器的信息提取器",
      "type": "@n8n/n8n-nodes-langchain.informationExtractor",
      "position": [
        860,
        -60
      ],
      "parameters": {
        "text": "={{ $json.data }}",
        "options": {
          "systemPromptTemplate": "You are an expert HTML extractor. Your job is to analyze the search result and extract the content as a collection on items"
        },
        "attributes": {
          "attributes": [
            {
              "name": "search_result",
              "description": "Search Response"
            }
          ]
        }
      },
      "typeVersion": 1
    },
    {
      "id": "e04e189a-8ba9-4ef4-9a49-fc13daf00828",
      "name": "便签4",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        800,
        -160
      ],
      "parameters": {
        "color": 5,
        "width": 720,
        "height": 720,
        "content": "## 使用AI代理进行数据提取/格式化"
      },
      "typeVersion": 1
    }
  ],
  "active": false,
  "pinData": {},
  "settings": {
    "executionOrder": "v1"
  },
  "versionId": "799fb406-600d-45a5-b926-24b8844f33a5",
  "connections": {
    "AI Agent": {
      "main": [
        [
          {
            "node": "Pinecone Vector Store",
            "type": "main",
            "index": 0
          },
          {
            "node": "Webhook for structured AI agent response",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Make a web request": {
      "main": [
        [
          {
            "node": "Structured JSON Data Formatter",
            "type": "main",
            "index": 0
          },
          {
            "node": "Information Extractor with Data Formatter",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Default Data Loader": {
      "ai_document": [
        [
          {
            "node": "Pinecone Vector Store",
            "type": "ai_document",
            "index": 0
          }
        ]
      ]
    },
    "Pinecone Vector Store": {
      "ai_tool": [
        []
      ]
    },
    "Embeddings Google Gemini": {
      "ai_embedding": [
        [
          {
            "node": "Pinecone Vector Store",
            "type": "ai_embedding",
            "index": 0
          }
        ]
      ]
    },
    "Google Gemini Chat Model": {
      "ai_languageModel": [
        [
          {
            "node": "Information Extractor with Data Formatter",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "Structured Output Parser": {
      "ai_outputParser": [
        [
          {
            "node": "AI Agent",
            "type": "ai_outputParser",
            "index": 0
          }
        ]
      ]
    },
    "Google Gemini Chat Model1": {
      "ai_languageModel": [
        [
          {
            "node": "AI Agent",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "Google Gemini Chat Model2": {
      "ai_languageModel": [
        [
          {
            "node": "Structured JSON Data Formatter",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "Structured JSON Data Formatter": {
      "main": [
        [
          {
            "node": "Webhook for structured data",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Set Fields - URL and Webhook URL": {
      "main": [
        [
          {
            "node": "Make a web request",
            "type": "main",
            "index": 0
          },
          {
            "node": "Webhook for structured data",
            "type": "main",
            "index": 0
          },
          {
            "node": "Webhook for structured AI agent response",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Recursive Character Text Splitter": {
      "ai_textSplitter": [
        [
          {
            "node": "Default Data Loader",
            "type": "ai_textSplitter",
            "index": 0
          }
        ]
      ]
    },
    "When clicking ‘Test workflow’": {
      "main": [
        [
          {
            "node": "Set Fields - URL and Webhook URL",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Information Extractor with Data Formatter": {
      "main": [
        [
          {
            "node": "AI Agent",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}
常见问题

如何使用这个工作流?

复制上方的 JSON 配置代码,在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」,粘贴配置后根据需要修改凭证设置即可。

这个工作流适合什么场景?

高级 - 构建模块, 人工智能

需要付费吗?

本工作流完全免费,您可以直接导入使用。但请注意,工作流中使用的第三方服务(如 OpenAI API)可能需要您自行付费。

工作流信息
难度等级
高级
节点数量21
分类2
节点类型13
难度说明

适合高级用户,包含 16+ 个节点的复杂工作流

外部链接
在 n8n.io 查看

分享此工作流