使用 Bright Data、Gemini 和 Pinecone 为 LLM 创建 AI 就绪的向量数据集
高级
这是一个Building Blocks, AI领域的自动化工作流,包含 21 个节点。主要使用 Set, HttpRequest, ManualTrigger, Agent, ChainLlm 等节点,结合人工智能技术实现智能自动化。 使用 Bright Data、Gemini 和 Pinecone 为 LLM 创建 AI 就绪的向量数据集
前置要求
- •可能需要目标 API 的认证凭证
- •Google Gemini API Key
- •Pinecone API Key
使用的节点 (21)
工作流预览
可视化展示节点连接关系,支持缩放和平移
导出工作流
复制以下 JSON 配置到 n8n 导入,即可使用此工作流
{
"id": "3Lih0LVosR8dZbla",
"meta": {
"instanceId": "885b4fb4a6a9c2cb5621429a7b972df0d05bb724c20ac7dac7171b62f1c7ef40",
"templateCredsSetupCompleted": true
},
"name": "使用Bright Data、Gemini和Pinecone为LLM创建AI就绪的向量数据集",
"tags": [
{
"id": "Kujft2FOjmOVQAmJ",
"name": "Engineering",
"createdAt": "2025-04-09T01:31:00.558Z",
"updatedAt": "2025-04-09T01:31:00.558Z"
},
{
"id": "ZOwtAMLepQaGW76t",
"name": "Building Blocks",
"createdAt": "2025-04-13T15:23:40.462Z",
"updatedAt": "2025-04-13T15:23:40.462Z"
},
{
"id": "ddPkw7Hg5dZhQu2w",
"name": "AI",
"createdAt": "2025-04-13T05:38:08.053Z",
"updatedAt": "2025-04-13T05:38:08.053Z"
}
],
"nodes": [
{
"id": "0a468953-e348-420e-a6b3-c55fb20d3cbf",
"name": "当点击\"测试工作流\"时",
"type": "n8n-nodes-base.manualTrigger",
"position": [
200,
-710
],
"parameters": {},
"typeVersion": 1
},
{
"id": "3725e480-246f-4f32-b0a7-b946cacbe830",
"name": "AI Agent",
"type": "@n8n/n8n-nodes-langchain.agent",
"position": [
1236,
-60
],
"parameters": {
"text": "=Format the below search result\n\n{{ $json.output.search_result }}",
"options": {},
"promptType": "define",
"hasOutputParser": true
},
"typeVersion": 1.8
},
{
"id": "30a12b8e-02f5-4b2e-bf9f-20fd9658405e",
"name": "Pinecone 向量存储",
"type": "@n8n/n8n-nodes-langchain.vectorStorePinecone",
"position": [
1628,
-10
],
"parameters": {
"mode": "insert",
"options": {},
"pineconeIndex": {
"__rl": true,
"mode": "list",
"value": "hacker-news",
"cachedResultName": "hacker-news"
}
},
"credentials": {
"pineconeApi": {
"id": "wdfRQ6NE8yjCDFhY",
"name": "PineconeApi account"
}
},
"typeVersion": 1.1
},
{
"id": "1738dea6-fa4f-4a8d-a6fb-2f01feb1a6d5",
"name": "嵌入 Google Gemini",
"type": "@n8n/n8n-nodes-langchain.embeddingsGoogleGemini",
"position": [
1612,
210
],
"parameters": {
"modelName": "models/text-embedding-004"
},
"credentials": {
"googlePalmApi": {
"id": "YeO7dHZnuGBVQKVZ",
"name": "Google Gemini(PaLM) Api account"
}
},
"typeVersion": 1
},
{
"id": "e6443541-de71-4d26-ad58-d7c72868a190",
"name": "默认数据加载器",
"type": "@n8n/n8n-nodes-langchain.documentDefaultDataLoader",
"position": [
1760,
220
],
"parameters": {
"options": {},
"jsonData": "={{ $('Information Extractor with Data Formatter').item.json.output.search_result }}",
"jsonMode": "expressionData"
},
"typeVersion": 1
},
{
"id": "09ffc8cd-096f-47fe-937d-f8ab4fb41266",
"name": "递归字符文本分割器",
"type": "@n8n/n8n-nodes-langchain.textSplitterRecursiveCharacterTextSplitter",
"position": [
1820,
410
],
"parameters": {
"options": {}
},
"typeVersion": 1
},
{
"id": "90cc9aa4-0931-4c52-8734-e4e0de820205",
"name": "Google Gemini聊天模型1",
"type": "@n8n/n8n-nodes-langchain.lmChatGoogleGemini",
"position": [
1240,
160
],
"parameters": {
"options": {},
"modelName": "models/gemini-2.0-flash-exp"
},
"credentials": {
"googlePalmApi": {
"id": "YeO7dHZnuGBVQKVZ",
"name": "Google Gemini(PaLM) Api account"
}
},
"typeVersion": 1
},
{
"id": "1090a4af-7e5d-446b-a537-3afe48cd4909",
"name": "Google Gemini 聊天模型2",
"type": "@n8n/n8n-nodes-langchain.lmChatGoogleGemini",
"position": [
948,
-340
],
"parameters": {
"options": {},
"modelName": "models/gemini-2.0-flash-exp"
},
"credentials": {
"googlePalmApi": {
"id": "YeO7dHZnuGBVQKVZ",
"name": "Google Gemini(PaLM) Api account"
}
},
"typeVersion": 1
},
{
"id": "324c530c-0a03-411e-acb0-d82e9dc635cf",
"name": "Google Gemini 聊天模型",
"type": "@n8n/n8n-nodes-langchain.lmChatGoogleGemini",
"position": [
948,
160
],
"parameters": {
"options": {},
"modelName": "models/gemini-2.0-flash-exp"
},
"credentials": {
"googlePalmApi": {
"id": "YeO7dHZnuGBVQKVZ",
"name": "Google Gemini(PaLM) Api account"
}
},
"typeVersion": 1
},
{
"id": "3226a2d6-ade1-4d6a-95c5-0be4d787a947",
"name": "结构化输出解析器",
"type": "@n8n/n8n-nodes-langchain.outputParserStructured",
"position": [
1400,
160
],
"parameters": {
"jsonSchemaExample": "[{\n\t\"id\": \"<string>\",\n\t\"title\": \"<string>\",\n \"summary\": \"<string>\",\n \"keywords\": [\"\"],\n \"topics\": [\"\"]\n}]"
},
"typeVersion": 1.2
},
{
"id": "a739a314-900a-4ef7-9cc2-1b65374e2e05",
"name": "便签",
"type": "n8n-nodes-base.stickyNote",
"position": [
40,
-360
],
"parameters": {
"width": 480,
"height": 220,
"content": "## 注意"
},
"typeVersion": 1
},
{
"id": "3dca6d46-c423-4fb5-a6e4-c2aa2852d51c",
"name": "设置字段 - URL和Webhook URL",
"type": "n8n-nodes-base.set",
"notes": "Set the URL which you are interested to scrap the data",
"position": [
420,
-710
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "1c132dd6-31e4-453b-a8cf-cad9845fe55b",
"name": "url",
"type": "string",
"value": "https://news.ycombinator.com?product=unlocker&method=api"
},
{
"id": "90f3272b-d13d-44e2-8b4c-0943648cfce9",
"name": "webhook_url",
"type": "string",
"value": "https://webhook.site/bc804ce5-4a45-4177-a68a-99c80e5c86e6"
}
]
}
},
"notesInFlow": true,
"typeVersion": 3.4
},
{
"id": "216a3261-a398-484c-9bf4-ca5966b829b6",
"name": "发起网络请求",
"type": "n8n-nodes-base.httpRequest",
"position": [
640,
-260
],
"parameters": {
"url": "https://api.brightdata.com/request",
"method": "POST",
"options": {},
"sendBody": true,
"sendHeaders": true,
"authentication": "genericCredentialType",
"bodyParameters": {
"parameters": [
{
"name": "zone",
"value": "web_unlocker1"
},
{
"name": "url",
"value": "={{ $json.url }}"
},
{
"name": "format",
"value": "raw"
}
]
},
"genericAuthType": "httpHeaderAuth",
"headerParameters": {
"parameters": [
{}
]
}
},
"credentials": {
"httpHeaderAuth": {
"id": "kdbqXuxIR8qIxF7y",
"name": "Header Auth account"
}
},
"typeVersion": 4.2
},
{
"id": "0c74e21c-3007-4297-b6ab-8ee17f4c6436",
"name": "结构化JSON数据格式化器",
"type": "@n8n/n8n-nodes-langchain.chainLlm",
"position": [
860,
-560
],
"parameters": {
"text": "=Format the below response and produce a textual data. Output the response as per the below JSON schema.\n\nHere's the input: {{ $json.data }}\nHere's the JSON schema: \n\n[{\n \"rank\": { \"type\": \"integer\" },\n \"title\": { \"type\": \"string\" },\n \"site\": { \"type\": \"string\" },\n \"points\": { \"type\": \"integer\" },\n \"user\": { \"type\": \"string\" },\n \"age\": { \"type\": \"string\" },\n \"comments\": { \"type\": \"string\" }\n}]",
"messages": {
"messageValues": [
{
"message": "You are an expert data formatter"
}
]
},
"promptType": "define"
},
"typeVersion": 1.6
},
{
"id": "012d4bb0-2b58-47cd-9cea-b4e0dced9082",
"name": "结构化数据Webhook",
"type": "n8n-nodes-base.httpRequest",
"position": [
1314,
-860
],
"parameters": {
"url": "={{ $json.webhook_url }}",
"options": {},
"sendBody": true,
"bodyParameters": {
"parameters": [
{
"name": "response",
"value": "={{ $json.text }}"
}
]
}
},
"typeVersion": 4.2
},
{
"id": "93b35e5e-6f52-4aeb-8f1b-39cc495beefe",
"name": "结构化AI代理响应Webhook",
"type": "n8n-nodes-base.httpRequest",
"position": [
1750,
-660
],
"parameters": {
"url": "={{ $json.webhook_url }}",
"options": {},
"sendBody": true,
"bodyParameters": {
"parameters": [
{
"name": "response",
"value": "={{ $json.output }}"
}
]
}
},
"typeVersion": 4.2
},
{
"id": "251b4251-255c-48c6-999b-02227fa2de9b",
"name": "便签1",
"type": "n8n-nodes-base.stickyNote",
"position": [
800,
-620
],
"parameters": {
"width": 360,
"height": 420,
"content": "## AI数据格式化器"
},
"typeVersion": 1
},
{
"id": "f62463cd-6be3-4942-a636-de980a3154b4",
"name": "便签2",
"type": "n8n-nodes-base.stickyNote",
"position": [
1560,
-160
],
"parameters": {
"color": 4,
"width": 520,
"height": 720,
"content": "## 向量数据库持久化"
},
"typeVersion": 1
},
{
"id": "ad20cc91-766a-4a57-be54-6f0d09a784eb",
"name": "便签3",
"type": "n8n-nodes-base.stickyNote",
"position": [
1260,
-920
],
"parameters": {
"color": 3,
"width": 680,
"height": 440,
"content": "## Webhook通知处理器"
},
"typeVersion": 1
},
{
"id": "37ab5c0f-d36e-4131-844d-20a22d3f2861",
"name": "带数据格式化器的信息提取器",
"type": "@n8n/n8n-nodes-langchain.informationExtractor",
"position": [
860,
-60
],
"parameters": {
"text": "={{ $json.data }}",
"options": {
"systemPromptTemplate": "You are an expert HTML extractor. Your job is to analyze the search result and extract the content as a collection on items"
},
"attributes": {
"attributes": [
{
"name": "search_result",
"description": "Search Response"
}
]
}
},
"typeVersion": 1
},
{
"id": "e04e189a-8ba9-4ef4-9a49-fc13daf00828",
"name": "便签4",
"type": "n8n-nodes-base.stickyNote",
"position": [
800,
-160
],
"parameters": {
"color": 5,
"width": 720,
"height": 720,
"content": "## 使用AI代理进行数据提取/格式化"
},
"typeVersion": 1
}
],
"active": false,
"pinData": {},
"settings": {
"executionOrder": "v1"
},
"versionId": "799fb406-600d-45a5-b926-24b8844f33a5",
"connections": {
"AI Agent": {
"main": [
[
{
"node": "Pinecone Vector Store",
"type": "main",
"index": 0
},
{
"node": "Webhook for structured AI agent response",
"type": "main",
"index": 0
}
]
]
},
"Make a web request": {
"main": [
[
{
"node": "Structured JSON Data Formatter",
"type": "main",
"index": 0
},
{
"node": "Information Extractor with Data Formatter",
"type": "main",
"index": 0
}
]
]
},
"Default Data Loader": {
"ai_document": [
[
{
"node": "Pinecone Vector Store",
"type": "ai_document",
"index": 0
}
]
]
},
"Pinecone Vector Store": {
"ai_tool": [
[]
]
},
"Embeddings Google Gemini": {
"ai_embedding": [
[
{
"node": "Pinecone Vector Store",
"type": "ai_embedding",
"index": 0
}
]
]
},
"Google Gemini Chat Model": {
"ai_languageModel": [
[
{
"node": "Information Extractor with Data Formatter",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"Structured Output Parser": {
"ai_outputParser": [
[
{
"node": "AI Agent",
"type": "ai_outputParser",
"index": 0
}
]
]
},
"Google Gemini Chat Model1": {
"ai_languageModel": [
[
{
"node": "AI Agent",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"Google Gemini Chat Model2": {
"ai_languageModel": [
[
{
"node": "Structured JSON Data Formatter",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"Structured JSON Data Formatter": {
"main": [
[
{
"node": "Webhook for structured data",
"type": "main",
"index": 0
}
]
]
},
"Set Fields - URL and Webhook URL": {
"main": [
[
{
"node": "Make a web request",
"type": "main",
"index": 0
},
{
"node": "Webhook for structured data",
"type": "main",
"index": 0
},
{
"node": "Webhook for structured AI agent response",
"type": "main",
"index": 0
}
]
]
},
"Recursive Character Text Splitter": {
"ai_textSplitter": [
[
{
"node": "Default Data Loader",
"type": "ai_textSplitter",
"index": 0
}
]
]
},
"When clicking ‘Test workflow’": {
"main": [
[
{
"node": "Set Fields - URL and Webhook URL",
"type": "main",
"index": 0
}
]
]
},
"Information Extractor with Data Formatter": {
"main": [
[
{
"node": "AI Agent",
"type": "main",
"index": 0
}
]
]
}
}
}常见问题
如何使用这个工作流?
复制上方的 JSON 配置代码,在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」,粘贴配置后根据需要修改凭证设置即可。
这个工作流适合什么场景?
高级 - 构建模块, 人工智能
需要付费吗?
本工作流完全免费,您可以直接导入使用。但请注意,工作流中使用的第三方服务(如 OpenAI API)可能需要您自行付费。
相关工作流推荐
使用Gemini AI和Bright Data提取和总结必应Copilot搜索结果
使用Gemini AI和Bright Data提取和总结必应Copilot搜索结果
If
Set
Wait
+9
19 节点Ranjan Dailata
人工智能
通过Bright Data提取、总结和分析亚马逊产品降价情况
使用Bright Data和Google Gemini提取、总结和分析亚马逊降价信息
Set
Wait
Merge
+14
26 节点Ranjan Dailata
人工智能
在可视化参考库中探索n8n节点
在可视化参考库中探索n8n节点
If
Ftp
Set
+93
113 节点I versus AI
其他
AI代理驱动的ProductHunt数据提取和搜索(使用Bright Data和Google Gemini)
使用Bright Data MCP和Google Gemini AI提取和搜索ProductHunt数据
Set
Function
Mcp Client
+10
21 节点Ranjan Dailata
人工智能
使用Bright Data和Google Gemini从LinkedIn生成公司故事
使用Bright Data和Google Gemini从LinkedIn生成公司故事
If
Set
Wait
+8
19 节点Ranjan Dailata
销售
使用Perplexity、Gemini AI和Bright Data搜索和汇总网络数据到Webhooks
使用Perplexity、Gemini AI和Bright Data搜索和汇总网络数据到Webhooks
If
Set
Wait
+8
17 节点Ranjan Dailata
人工智能