💡🌐 基于Jina.ai的多页面网站爬虫
高级
这是一个AI领域的自动化工作流,包含 16 个节点。主要使用 Set, Xml, Code, Wait, Limit 等节点,结合人工智能技术实现智能自动化。 💡🌐 使用Jina.ai的多页面网站爬虫工具
前置要求
- •Google Drive API 凭证
- •可能需要目标 API 的认证凭证
使用的节点 (16)
分类
工作流预览
可视化展示节点连接关系,支持缩放和平移
导出工作流
复制以下 JSON 配置到 n8n 导入,即可使用此工作流
{
"id": "xEij0kj2I1DHbL3I",
"meta": {
"instanceId": "31e69f7f4a77bf465b805824e303232f0227212ae922d12133a0f96ffeab4fef",
"templateCredsSetupCompleted": true
},
"name": "💡🌐 基于 Jina.ai 的必备多页面网站爬虫",
"tags": [],
"nodes": [
{
"id": "3a503859-ef0a-492d-81c6-37e4f0c4c25e",
"name": "便签",
"type": "n8n-nodes-base.stickyNote",
"position": [
-840,
0
],
"parameters": {
"color": 3,
"width": 340,
"height": 320,
"content": "## Jina.ai 网络爬虫"
},
"typeVersion": 1
},
{
"id": "c5217a1a-f074-409b-8340-72afdc5fc8b5",
"name": "当点击\"测试工作流\"时",
"type": "n8n-nodes-base.manualTrigger",
"position": [
-1500,
-300
],
"parameters": {},
"typeVersion": 1
},
{
"id": "72af3b00-2632-4877-a0b6-7477e2f468f7",
"name": "遍历项目",
"type": "n8n-nodes-base.splitInBatches",
"position": [
-1080,
20
],
"parameters": {
"options": {}
},
"typeVersion": 3
},
{
"id": "11f0fa02-51f8-41cc-b789-5c452b6899aa",
"name": "等待",
"type": "n8n-nodes-base.wait",
"position": [
80,
220
],
"webhookId": "081ce124-0cbf-4a21-a1e7-2c465f460448",
"parameters": {},
"typeVersion": 1.1
},
{
"id": "cf3b5887-8ff2-46e0-ab33-384ab0987cbb",
"name": "限制",
"type": "n8n-nodes-base.limit",
"position": [
80,
-300
],
"parameters": {
"maxItems": 20
},
"typeVersion": 1
},
{
"id": "c4f04d82-aa33-46cf-a8e2-0b4e717e754a",
"name": "获取网站 URL 列表",
"type": "n8n-nodes-base.httpRequest",
"position": [
-780,
-300
],
"parameters": {
"url": "={{ $json.sitemap_url }}",
"options": {}
},
"typeVersion": 4.2
},
{
"id": "7f507c38-1e9e-4c46-8dea-bd6daf65dc55",
"name": "转换为 JSON",
"type": "n8n-nodes-base.xml",
"position": [
-560,
-300
],
"parameters": {
"options": {}
},
"typeVersion": 1
},
{
"id": "e21b55c2-8b0d-4c7c-ba91-a2d563a4c966",
"name": "创建网站 URL 列表",
"type": "n8n-nodes-base.splitOut",
"position": [
-340,
-300
],
"parameters": {
"options": {},
"fieldToSplitOut": "urlset.url"
},
"typeVersion": 1
},
{
"id": "61555239-8a16-424e-8a60-700f6ebaa270",
"name": "按主题或页面筛选",
"type": "n8n-nodes-base.filter",
"position": [
-120,
-300
],
"parameters": {
"options": {},
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "or",
"conditions": [
{
"id": "d66c304d-879a-4dc4-908f-ab0665093672",
"operator": {
"name": "filter.operator.equals",
"type": "string",
"operation": "equals"
},
"leftValue": "={{ $json.loc }}",
"rightValue": "=https://ai.pydantic.dev/"
},
{
"id": "3c930950-bee4-442b-82e6-4437fd39a933",
"operator": {
"type": "string",
"operation": "contains"
},
"leftValue": "={{ $json.loc.toLowerCase() }}",
"rightValue": "agent"
},
{
"id": "aaeaf34e-ad5a-4673-b3bd-8bddf3500988",
"operator": {
"type": "string",
"operation": "contains"
},
"leftValue": "={{ $json.loc.toLowerCase() }}",
"rightValue": "tool"
}
]
}
},
"typeVersion": 2.2
},
{
"id": "dd25fb57-64a3-4c47-be04-6eb66d16520a",
"name": "设置网站 URL",
"type": "n8n-nodes-base.set",
"position": [
-1080,
-300
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "1601dc3e-8024-4e19-b592-93a4e4f77641",
"name": "sitemap_url",
"type": "string",
"value": "https://ai.pydantic.dev/sitemap.xml"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "14ac1c87-29fe-44c8-9c1e-f247a292dde5",
"name": "Jina.ai 网络爬虫",
"type": "n8n-nodes-base.httpRequest",
"position": [
-720,
120
],
"parameters": {
"url": "=https://r.jina.ai/{{ $json.loc }}",
"options": {}
},
"typeVersion": 4.2
},
{
"id": "be253ec2-f088-4895-8ef2-61a3720cf68b",
"name": "将网页内容保存到 Google Drive",
"type": "n8n-nodes-base.googleDrive",
"position": [
-120,
120
],
"parameters": {
"name": "={{ $('Loop Over Items').item.json.loc }} - {{ $json.title }}",
"content": "={{ $json.markdown }}",
"driveId": {
"__rl": true,
"mode": "list",
"value": "My Drive"
},
"options": {},
"folderId": {
"__rl": true,
"mode": "list",
"value": "root",
"cachedResultName": "/ (Root folder)"
},
"operation": "createFromText"
},
"credentials": {
"googleDriveOAuth2Api": {
"id": "UhdXGYLTAJbsa0xX",
"name": "Google Drive account"
}
},
"typeVersion": 3
},
{
"id": "95d808c7-a3ca-4f59-a385-cc77bdff322e",
"name": "提取标题和 Markdown 内容",
"type": "n8n-nodes-base.code",
"position": [
-380,
120
],
"parameters": {
"jsCode": "// Get the text output from the previous node\nconst data = $input.first().json.data;\n\n// Regular expression to capture the title line\nconst titleRegex = /^Title:\\s*(.+)$/m;\n// Regular expression to capture everything after \"Markdown Content:\"\nconst markdownRegex = /Markdown Content:\\n([\\s\\S]+)/;\n\n// Extract the title using the first capture group\nconst titleMatch = data.match(titleRegex);\nconst title = titleMatch ? titleMatch[1].trim() : '';\n\n// Extract the markdown content using the first capture group\nconst markdownMatch = data.match(markdownRegex);\nconst markdown = markdownMatch ? markdownMatch[1].trim() : '';\n\n// Return a single object with title and markdown as unique values\nreturn { title, markdown };"
},
"typeVersion": 2
},
{
"id": "2fb86c81-c144-4450-908c-559855deadef",
"name": "便签1",
"type": "n8n-nodes-base.stickyNote",
"position": [
-1240,
-580
],
"parameters": {
"color": 7,
"width": 1540,
"height": 1080,
"content": "# 💡🌐 基于 Jina.ai 的必备多页面网站爬虫"
},
"typeVersion": 1
},
{
"id": "b470b294-95d0-4e51-a9cc-2fe17316a771",
"name": "便签2",
"type": "n8n-nodes-base.stickyNote",
"position": [
-1580,
-400
],
"parameters": {
"color": 4,
"width": 280,
"height": 300,
"content": "## 👍试试我!"
},
"typeVersion": 1
},
{
"id": "fafd0623-a423-4e73-9609-cee8e81f5c13",
"name": "便签3",
"type": "n8n-nodes-base.stickyNote",
"position": [
-1180,
-400
],
"parameters": {
"width": 300,
"height": 300,
"content": "## 👇添加网站站点地图 URL"
},
"typeVersion": 1
}
],
"active": false,
"pinData": {},
"settings": {
"executionOrder": "v1"
},
"versionId": "2e815787-d83b-4ab7-a959-2f33006a37a5",
"connections": {
"Wait": {
"main": [
[
{
"node": "Loop Over Items",
"type": "main",
"index": 0
}
]
]
},
"Limit": {
"main": [
[
{
"node": "Loop Over Items",
"type": "main",
"index": 0
}
]
]
},
"Convert to JSON": {
"main": [
[
{
"node": "Create List of Website URLs",
"type": "main",
"index": 0
}
]
]
},
"Loop Over Items": {
"main": [
[],
[
{
"node": "Jina.ai Web Scraper",
"type": "main",
"index": 0
}
]
]
},
"Set Website URL": {
"main": [
[
{
"node": "Get List of Website URLs",
"type": "main",
"index": 0
}
]
]
},
"Jina.ai Web Scraper": {
"main": [
[
{
"node": "Extract Title & Markdown Content",
"type": "main",
"index": 0
}
]
]
},
"Get List of Website URLs": {
"main": [
[
{
"node": "Convert to JSON",
"type": "main",
"index": 0
}
]
]
},
"Filter By Topics or Pages": {
"main": [
[
{
"node": "Limit",
"type": "main",
"index": 0
}
]
]
},
"Create List of Website URLs": {
"main": [
[
{
"node": "Filter By Topics or Pages",
"type": "main",
"index": 0
}
]
]
},
"Extract Title & Markdown Content": {
"main": [
[
{
"node": "Save Webpage Contents to Google Drive",
"type": "main",
"index": 0
}
]
]
},
"When clicking ‘Test workflow’": {
"main": [
[
{
"node": "Set Website URL",
"type": "main",
"index": 0
}
]
]
},
"Save Webpage Contents to Google Drive": {
"main": [
[
{
"node": "Wait",
"type": "main",
"index": 0
}
]
]
}
}
}常见问题
如何使用这个工作流?
复制上方的 JSON 配置代码,在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」,粘贴配置后根据需要修改凭证设置即可。
这个工作流适合什么场景?
高级 - 人工智能
需要付费吗?
本工作流完全免费,您可以直接导入使用。但请注意,工作流中使用的第三方服务(如 OpenAI API)可能需要您自行付费。
相关工作流推荐
(Duc)深度研究市场模板
集成PerplexityAI研究和OpenAI内容的多层级WordPress博客生成器
If
Set
Xml
+28
132 节点Daniel Ng
人工智能
WordPress博客自动化专业版(SEO主题)v2
WordPress自动博客专业版 - SEO主题内容自动化机器
If
Set
Xml
+21
63 节点Daniel Ng
人工智能
WordPress博客自动化专业版(深度研究)v1
WordPress自动博客专业版 - 含深度研究的内容自动化机器
If
Set
Xml
+24
77 节点Daniel Ng
人工智能
API架构提取器
API架构提取器
If
Set
Code
+22
88 节点Polina Medvedieva
工程
[模板] AI宠物店 v8
🐶 AI宠物店助手 - 集成GPT-4o、Google日历和WhatsApp/Instagram/Facebook
If
N8n
Set
+38
244 节点Amanda Benks
销售
AI 代理餐厅 [模板]
🤖 WhatsApp、Instagram 和 Messenger 的 AI 餐厅助手
If
N8n
Set
+37
239 节点Amanda Benks
其他
工作流信息
难度等级
高级
节点数量16
分类1
节点类型12
作者
Joseph LePage
@joeAs an AI Automation consultant based in Canada, I partner with forward-thinking organizations to implement AI solutions that streamline operations and drive growth.
外部链接
在 n8n.io 查看 →
分享此工作流