使用Gemini、Apify和LangChain通过自定义提示抓取和分析网站
中级
这是一个Document Extraction, AI RAG领域的自动化工作流,包含 9 个节点。主要使用 Aggregate, HttpRequest, SplitInBatches, Agent, ExecuteWorkflowTrigger 等节点。 使用Gemini、Apify和LangChain通过自定义提示抓取和分析网站
前置要求
- •可能需要目标 API 的认证凭证
工作流预览
可视化展示节点连接关系,支持缩放和平移
导出工作流
复制以下 JSON 配置到 n8n 导入,即可使用此工作流
{
"meta": {
"instanceId": "9e2be6ba5ff8dd7a908d0b8006c046764e7b3f15c79d121bf914d51a0b406e85",
"templateCredsSetupCompleted": true
},
"nodes": [
{
"id": "812de487-e4d4-4b50-9b1b-dd51ae6bff51",
"name": "当由其他工作流执行时",
"type": "n8n-nodes-base.executeWorkflowTrigger",
"position": [
-440,
-60
],
"parameters": {
"inputSource": "jsonExample",
"jsonExample": "{\n \"enqueue\": true,\n \"maxPages\": 5,\n \"url\": \"https://apify.com\",\n \"method\": \"GET\",\n \"prompt\":\"collect all contact informations available on this website \"\n \n}"
},
"typeVersion": 1.1
},
{
"id": "796399e7-7027-469d-861e-c56811323091",
"name": "HTTP 请求",
"type": "n8n-nodes-base.httpRequest",
"position": [
-220,
-60
],
"parameters": {
"url": "https://api.apify.com/v2/acts/mohamedgb00714~firescraper-ai-website-content-markdown-scraper/run-sync-get-dataset-items?token=apify_api_your_apify_api_token",
"method": "POST",
"options": {},
"jsonBody": "={\n \"enqueue\": {{ $json.enqueue }},\n \"getHtml\": false,\n \"getText\": false,\n \"maxPages\": {{ $json.maxPages }},\n \"screenshot\": false,\n \"startUrls\": [\n {\n \"url\": \"{{ $json.url }}\",\n \"method\": \"{{ $json.method }}\"\n }\n ]\n}",
"sendBody": true,
"specifyBody": "json"
},
"typeVersion": 4.2
},
{
"id": "da495119-6f7f-443b-8360-37015a3cb3a1",
"name": "遍历项目",
"type": "n8n-nodes-base.splitInBatches",
"position": [
0,
-60
],
"parameters": {
"options": {},
"batchSize": "=1"
},
"typeVersion": 3
},
{
"id": "5e9e67bf-5725-4021-b7dc-f3c2ad2083cb",
"name": "AI 代理",
"type": "@n8n/n8n-nodes-langchain.agent",
"position": [
440,
-40
],
"parameters": {
"text": "={{ $('When Executed by Another Workflow').item.json.prompt }}\nthis is only analyse of one page of full website here is the metadata and markdown of page {{ $json.url }}\nmetadata:{{ JSON.stringify( $json.metadata) }}\nmarkdown:{{ $json.markdown }}\n",
"options": {},
"promptType": "define"
},
"typeVersion": 2
},
{
"id": "1c9014f7-fbcb-4dec-a4ff-eb441c8654c3",
"name": "OpenRouter 聊天模型",
"type": "@n8n/n8n-nodes-langchain.lmChatOpenRouter",
"position": [
440,
260
],
"parameters": {
"model": "google/gemini-2.5-flash",
"options": {}
},
"credentials": {
"openRouterApi": {
"id": "tC9q2VizRVbSvlww",
"name": "OpenRouter account"
}
},
"typeVersion": 1
},
{
"id": "97d26611-95cd-4d32-9217-d79a93889783",
"name": "AI 代理1",
"type": "@n8n/n8n-nodes-langchain.agent",
"position": [
920,
-240
],
"parameters": {
"text": "={{ JSON.stringify($json) }}",
"options": {
"systemMessage": "=You are a helpful assistant you should collect alll informations from input to respond to this prompt with json format {{ $('When Executed by Another Workflow').item.json.prompt }}\n"
},
"promptType": "define"
},
"typeVersion": 2
},
{
"id": "6b84a208-802b-470c-9bcd-3c700f635f79",
"name": "聚合",
"type": "n8n-nodes-base.aggregate",
"position": [
520,
-300
],
"parameters": {
"options": {},
"fieldsToAggregate": {
"fieldToAggregate": [
{
"fieldToAggregate": "output"
}
]
}
},
"typeVersion": 1
},
{
"id": "e752bfa3-f3b1-4909-a99f-36401379ab69",
"name": "OpenRouter Chat Model1",
"type": "@n8n/n8n-nodes-langchain.lmChatOpenRouter",
"position": [
940,
-20
],
"parameters": {
"model": "google/gemini-2.5-pro-preview",
"options": {}
},
"credentials": {
"openRouterApi": {
"id": "tC9q2VizRVbSvlww",
"name": "OpenRouter account"
}
},
"typeVersion": 1
},
{
"id": "c5c2ac9f-013f-489f-bb56-f3359a7e5523",
"name": "便签",
"type": "n8n-nodes-base.stickyNote",
"position": [
-1280,
-440
],
"parameters": {
"width": 760,
"height": 2420,
"content": ""
},
"typeVersion": 1
}
],
"pinData": {
"When Executed by Another Workflow": [
{
"url": "https://apify.com",
"method": "GET",
"prompt": "collect all contact informations available on this website ",
"enqueue": true,
"maxPages": 5
}
]
},
"connections": {
"AI Agent": {
"main": [
[
{
"node": "Loop Over Items",
"type": "main",
"index": 0
}
]
]
},
"Aggregate": {
"main": [
[
{
"node": "AI Agent1",
"type": "main",
"index": 0
}
]
]
},
"HTTP Request": {
"main": [
[
{
"node": "Loop Over Items",
"type": "main",
"index": 0
}
]
]
},
"Loop Over Items": {
"main": [
[
{
"node": "Aggregate",
"type": "main",
"index": 0
}
],
[
{
"node": "AI Agent",
"type": "main",
"index": 0
}
]
]
},
"OpenRouter Chat Model": {
"ai_languageModel": [
[
{
"node": "AI Agent",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"OpenRouter Chat Model1": {
"ai_languageModel": [
[
{
"node": "AI Agent1",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"When Executed by Another Workflow": {
"main": [
[
{
"node": "HTTP Request",
"type": "main",
"index": 0
}
]
]
}
}
}常见问题
如何使用这个工作流?
复制上方的 JSON 配置代码,在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」,粘贴配置后根据需要修改凭证设置即可。
这个工作流适合什么场景?
中级 - 文档提取, AI RAG 检索增强
需要付费吗?
本工作流完全免费,您可以直接导入使用。但请注意,工作流中使用的第三方服务(如 OpenAI API)可能需要您自行付费。
相关工作流推荐
AI驱动的谷歌地图商家数据抓取与表格导出
基于AI的谷歌地图商家数据抓取,支持数据丰富化并导出至表格
If
Code
Wait
+13
25 节点Msaid Mohamed el hadi
潜在客户开发
使用Kimi-K2、Gemini嵌入和Qdrant构建文档RAG系统
使用Kimi-K2、Gemini嵌入和Qdrant构建文档RAG系统
Set
Split Out
Qdrant
+14
35 节点Jimleuk
文档提取
视觉RAG与图像嵌入,使用Cohere Command-A和Embed v4
视觉RAG与图像嵌入,使用Cohere Command-A和Embed v4
If
Set
Split Out
+16
38 节点Jimleuk
文档提取
上下文混合RAG AI文案
Google Drive到Supabase上下文向量数据库同步用于RAG应用
If
Set
Code
+25
76 节点Michael Taleb
AI RAG 检索增强
AIAutomationPro终极RAG聊天机器人v1 n8n市场模板
多语言Telegram RAG聊天机器人,集成监督AI和自动化Google Drive流程
If
Set
Wait
+29
128 节点Daniel Ng
客服机器人
与 XML 文件聊天
使用 GPT 和 LangChain 分析与聊天 XML 文件
Set
Xml
Code
+7
10 节点Sarfaraz Muhammad Sajib
文档提取
工作流信息
难度等级
中级
节点数量9
分类2
节点类型7
作者
Msaid Mohamed el hadi
@mohamedgb00714Hi! I’m Mohamed El Hadi, a passionate developer and automation expert from Algeria. I specialize in building smart, scalable workflows using n8n to streamline business operations, save time, and drive innovation. 🌍 Open to collaborations | 🤖 Automation lover | 💼 Founder of EcoPetDZ & AADL Auto Reload
外部链接
在 n8n.io 查看 →
分享此工作流