网站抓取器
高级
这是一个Marketing, IT Ops领域的自动化工作流,包含 24 个节点。主要使用 If, Set, Code, Airtop, GoogleDocs 等节点。 基于Airtop的LLM网站抓取器
前置要求
- •Google Sheets API 凭证
工作流预览
可视化展示节点连接关系,支持缩放和平移
导出工作流
复制以下 JSON 配置到 n8n 导入,即可使用此工作流
{
"id": "3SNBO1RAF0ZIyido",
"meta": {
"instanceId": "28a947b92b197fc2524eaba16e57560338657b2b0b5796300b2f1cedc1d0d355",
"templateCredsSetupCompleted": true
},
"name": "网站抓取器",
"tags": [],
"nodes": [
{
"id": "aaa5bf43-bf76-4433-93c7-7e168f2e140c",
"name": "表单提交时",
"type": "n8n-nodes-base.formTrigger",
"position": [
-400,
-200
],
"webhookId": "6c508326-84d3-4155-9f2e-fe7ddb50f14a",
"parameters": {
"options": {},
"formTitle": "Website scraper",
"formFields": {
"values": [
{
"fieldLabel": "Seed url",
"requiredField": true
},
{
"fieldLabel": "Links must contain",
"requiredField": true
},
{
"fieldType": "number",
"fieldLabel": "Depth",
"requiredField": true
}
]
}
},
"typeVersion": 2.2
},
{
"id": "5f862a6f-8079-4d24-a2ae-7442c45c8f04",
"name": "要上传到电子表格的信息",
"type": "n8n-nodes-base.set",
"position": [
260,
-100
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "5970bab1-0bfb-4212-a8f2-df6b2e003800",
"name": "URL",
"type": "string",
"value": "={{ $('Unify params').item.json[\"Seed url\"] }}"
},
{
"id": "5c941ad3-70bf-4fba-8cf3-1f79c1b20d3a",
"name": "Scraped",
"type": "string",
"value": ""
}
]
}
},
"typeVersion": 3.4
},
{
"id": "556e5da4-4454-46cd-b17d-bd8695515670",
"name": "将信息加载到电子表格",
"type": "n8n-nodes-base.googleSheets",
"position": [
480,
-100
],
"parameters": {
"columns": {
"value": {},
"schema": [
{
"id": "URL",
"type": "string",
"display": true,
"removed": false,
"required": false,
"displayName": "URL",
"defaultMatch": false,
"canBeUsedToMatch": true
}
],
"mappingMode": "autoMapInputData",
"matchingColumns": [
"URL"
],
"attemptToConvertTypes": false,
"convertFieldsToString": false
},
"options": {},
"operation": "append",
"sheetName": {
"__rl": true,
"mode": "name",
"value": "={{ $('Create Spreadsheet').item.json.sheets[0].properties.title }}"
},
"documentId": {
"__rl": true,
"mode": "id",
"value": "={{ $('Create Spreadsheet').item.json.spreadsheetId }}"
}
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "DdxyhUJLGcclD2YO",
"name": "Google sheets Andres"
}
},
"typeVersion": 4.5
},
{
"id": "4592ada8-8f21-4117-a0ee-8906922f5685",
"name": "抓取网页",
"type": "n8n-nodes-base.airtop",
"position": [
700,
-100
],
"parameters": {
"url": "={{ $json.URL }}",
"resource": "extraction",
"operation": "scrape",
"profileName": "=",
"sessionMode": "new"
},
"credentials": {
"airtopApi": {
"id": "Yi4YPNnovLVUjFn5",
"name": "Airtop Official Org"
}
},
"typeVersion": 1
},
{
"id": "74a1399d-a335-455c-a3dd-167624b4a5f2",
"name": "创建 Google Docs",
"type": "n8n-nodes-base.googleDocs",
"position": [
920,
-100
],
"parameters": {
"title": "=Site to File - {{ $('Unify params').item.json[\"Seed url\"] }}",
"folderId": "default"
},
"credentials": {
"googleDocsOAuth2Api": {
"id": "pDMGILToKi9P4taJ",
"name": "Andres Google Docs"
}
},
"typeVersion": 2
},
{
"id": "1e69e40f-0465-430b-85ec-c6a71e1cb4a4",
"name": "写入抓取的内容",
"type": "n8n-nodes-base.googleDocs",
"position": [
1140,
-100
],
"parameters": {
"actionsUi": {
"actionFields": [
{
"text": "=The entire content from {{ $('Info to upload into spreadsheet').item.json.URL }} up to {{ $('Unify params').item.json.Depth }} levels deep.\n--------------------------------------------- \n{{ $('Scrape webpage').item.json.data.modelResponse.scrapedContent.text }}\n---------------------------------------------",
"action": "insert"
}
]
},
"operation": "update",
"documentURL": "={{ $json.id }}"
},
"credentials": {
"googleDocsOAuth2Api": {
"id": "pDMGILToKi9P4taJ",
"name": "Andres Google Docs"
}
},
"typeVersion": 2
},
{
"id": "49e9b38c-c2d4-49f3-bf3f-531b10257db4",
"name": "是否抓取更多?",
"type": "n8n-nodes-base.if",
"position": [
1380,
-100
],
"parameters": {
"options": {},
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "707d2bae-e834-478b-a4f6-9e6bbfa47530",
"operator": {
"type": "number",
"operation": "lt"
},
"leftValue": "={{ $runIndex }}",
"rightValue": "={{ $('Unify params').first().json.Depth - 1 }}"
},
{
"id": "cc763610-f775-4dcb-ae29-a3235b011b75",
"operator": {
"type": "number",
"operation": "gt"
},
"leftValue": "={{ Number($('Unify params').first().json.Depth) }}",
"rightValue": 1
}
]
}
},
"typeVersion": 2.2
},
{
"id": "239b2a02-bd66-4805-8ad3-b4ef6daa5e60",
"name": "读取抓取的网页",
"type": "n8n-nodes-base.googleSheets",
"position": [
1580,
-175
],
"parameters": {
"options": {},
"sheetName": {
"__rl": true,
"mode": "name",
"value": "={{ $('Create Spreadsheet').first().json.sheets[0].properties.title }}"
},
"documentId": {
"__rl": true,
"mode": "id",
"value": "={{ $('Create Spreadsheet').first().json.spreadsheetId }}"
}
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "DdxyhUJLGcclD2YO",
"name": "Google sheets Andres"
}
},
"typeVersion": 4.5
},
{
"id": "029d7de1-9043-424e-9c22-aed436436e6a",
"name": "检索要抓取的链接",
"type": "n8n-nodes-base.airtop",
"position": [
1800,
-175
],
"parameters": {
"url": "={{ $json.URL }}",
"prompt": "Extract the list of links that lead to other pages in the same domain",
"resource": "extraction",
"operation": "query",
"sessionMode": "new",
"additionalFields": {
"outputSchema": "{\n \"type\": \"object\",\n \"properties\": {\n \"internal_links\": {\n \"type\": \"array\",\n \"items\": {\n \"type\": \"string\",\n \"description\": \"A URL that leads to another page within the airtop.ai domain\"\n },\n \"description\": \"A list of URLs that lead to other pages within the airtop.ai domain\"\n }\n },\n \"required\": [\n \"internal_links\"\n ],\n \"additionalProperties\": false,\n \"$schema\": \"http://json-schema.org/draft-07/schema#\"\n}"
}
},
"credentials": {
"airtopApi": {
"id": "Yi4YPNnovLVUjFn5",
"name": "Airtop Official Org"
}
},
"typeVersion": 1
},
{
"id": "57f2a3ee-40c9-4e72-99f4-739fff04667a",
"name": "筛选要插入到 Sheets 的链接",
"type": "n8n-nodes-base.code",
"position": [
2020,
-180
],
"parameters": {
"jsCode": "const modelResponse = $('Retrieve links to scrape').first().json.data.modelResponse;\nconst containsString = $('Unify params').first().json[\"Links must contain\"];\nconst parsed = JSON.parse(modelResponse);\nconst links = [...new Set(parsed.internal_links)]; \nconst sheetsLinks = $input.all().map(item => item.json.URL)\nlet response; \n\nif(containsString === \"\"){\n response = links\n .map(item =>(\n { json: { link: item.split('?')[0] } }\n ))\n} else {\n response = links\n .map(item =>\n item.includes(containsString)\n ? { json: { link: item.split('?')[0] } }\n : null\n )\n .filter(item => item !== null);\n}\nconst dedupeBetweenSheetsAndModel = response.filter(item => !sheetsLinks.includes(item.json.link));\nconst deduped = [...new Map(dedupeBetweenSheetsAndModel.map(item => [item.json.link, item])).values()]\nreturn deduped\n"
},
"typeVersion": 2
},
{
"id": "6a0f20e8-df45-4799-8162-21d427e19e49",
"name": "插入新链接",
"type": "n8n-nodes-base.googleSheets",
"position": [
2240,
-175
],
"parameters": {
"columns": {
"value": {
"URL": "={{ $json.link }}"
},
"schema": [
{
"id": "URL",
"type": "string",
"display": true,
"removed": false,
"required": false,
"displayName": "URL",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Scraped",
"type": "string",
"display": true,
"removed": false,
"required": false,
"displayName": "Scraped",
"defaultMatch": false,
"canBeUsedToMatch": true
}
],
"mappingMode": "defineBelow",
"matchingColumns": [],
"attemptToConvertTypes": false,
"convertFieldsToString": false
},
"options": {},
"operation": "append",
"sheetName": {
"__rl": true,
"mode": "name",
"value": "={{ $('Create Spreadsheet').first().json.sheets[0].properties.title }}"
},
"documentId": {
"__rl": true,
"mode": "id",
"value": "={{ $('Create Spreadsheet').first().json.spreadsheetId }}"
}
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "DdxyhUJLGcclD2YO",
"name": "Google sheets Andres"
}
},
"typeVersion": 4.5
},
{
"id": "9433ab9e-b562-4aa3-a311-ae2a355ce774",
"name": "抓取网页1",
"type": "n8n-nodes-base.airtop",
"position": [
2460,
-175
],
"parameters": {
"url": "={{ $json.URL }}",
"resource": "extraction",
"operation": "scrape",
"sessionMode": "new"
},
"credentials": {
"airtopApi": {
"id": "Yi4YPNnovLVUjFn5",
"name": "Airtop Official Org"
}
},
"typeVersion": 1
},
{
"id": "704c25cf-b690-492d-a759-7b24a870edf4",
"name": "用新抓取的内容更新",
"type": "n8n-nodes-base.googleDocs",
"position": [
2680,
-175
],
"parameters": {
"actionsUi": {
"actionFields": [
{
"text": "=------------------------- \n{{ $json.data.modelResponse.scrapedContent.text }}\n-------------------------",
"action": "insert"
}
]
},
"operation": "update",
"documentURL": "={{ $('Create Google Docs').first().json.id }}"
},
"credentials": {
"googleDocsOAuth2Api": {
"id": "pDMGILToKi9P4taJ",
"name": "Andres Google Docs"
}
},
"typeVersion": 2
},
{
"id": "14f1465d-3d7d-4b7a-87d2-2552b9514e37",
"name": "标记已抓取的链接",
"type": "n8n-nodes-base.set",
"position": [
2900,
-175
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "72c9cd92-fd01-4339-b9fe-52477b691df3",
"name": "URL",
"type": "string",
"value": "={{ $('Insert new links').item.json.URL }}"
},
{
"id": "a817116e-6ae5-4e0a-b210-4fd27f5a455a",
"name": "Scraped",
"type": "string",
"value": "={{ $runIndex }}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "364513e7-39c0-47af-83bd-475ffb0ae2a0",
"name": "插入标记",
"type": "n8n-nodes-base.googleSheets",
"position": [
3120,
-100
],
"parameters": {
"columns": {
"value": {
"URL": "={{ $('Insert new links').item.json.URL }}"
},
"schema": [
{
"id": "URL",
"type": "string",
"display": true,
"removed": false,
"required": false,
"displayName": "URL",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Scraped",
"type": "string",
"display": true,
"removed": false,
"required": false,
"displayName": "Scraped",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "row_number",
"type": "string",
"display": true,
"removed": false,
"readOnly": true,
"required": false,
"displayName": "row_number",
"defaultMatch": false,
"canBeUsedToMatch": true
}
],
"mappingMode": "autoMapInputData",
"matchingColumns": [
"URL"
],
"attemptToConvertTypes": false,
"convertFieldsToString": false
},
"options": {},
"operation": "update",
"sheetName": {
"__rl": true,
"mode": "name",
"value": "={{ $('Create Spreadsheet').first().json.sheets[0].properties.title }}"
},
"documentId": {
"__rl": true,
"mode": "id",
"value": "={{ $('Create Spreadsheet').first().json.spreadsheetId }}"
}
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "DdxyhUJLGcclD2YO",
"name": "Google sheets Andres"
}
},
"typeVersion": 4.5
},
{
"id": "bd799398-e6c6-4cd4-a8a9-d189acabb194",
"name": "便签",
"type": "n8n-nodes-base.stickyNote",
"position": [
-20,
-220
],
"parameters": {
"width": 660,
"height": 280,
"content": "## 创建电子表格"
},
"typeVersion": 1
},
{
"id": "7a5aeddd-9c0f-4ea2-8452-35dd14e6963a",
"name": "创建电子表格",
"type": "n8n-nodes-base.googleSheets",
"position": [
40,
-100
],
"parameters": {
"title": "=Site map - {{ $json[\"Seed url\"] }} (Depth - {{ $json.Depth }})",
"options": {},
"resource": "spreadsheet"
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "DdxyhUJLGcclD2YO",
"name": "Google sheets Andres"
}
},
"typeVersion": 4.5
},
{
"id": "19a6c42b-8be5-4599-bc60-99f0b09e3623",
"name": "便签1",
"type": "n8n-nodes-base.stickyNote",
"position": [
660,
-220
],
"parameters": {
"color": 4,
"width": 180,
"height": 280,
"content": "## 抓取网页"
},
"typeVersion": 1
},
{
"id": "5de141f9-9e1e-4159-8611-06d23ee7b476",
"name": "便签2",
"type": "n8n-nodes-base.stickyNote",
"position": [
860,
-220
],
"parameters": {
"width": 440,
"height": 280,
"content": "## 创建文档"
},
"typeVersion": 1
},
{
"id": "a9108190-01d9-4c9a-a7b0-9ea582026acb",
"name": "便签3",
"type": "n8n-nodes-base.stickyNote",
"position": [
1320,
-280
],
"parameters": {
"width": 1980,
"height": 380,
"content": "## 递归抓取过程"
},
"typeVersion": 1
},
{
"id": "bec8b412-51e1-45df-95b5-ae4e4aeb1fc2",
"name": "便签4",
"type": "n8n-nodes-base.stickyNote",
"position": [
-1160,
-540
],
"parameters": {
"color": 7,
"width": 660,
"height": 940,
"content": "README"
},
"typeVersion": 1
},
{
"id": "ef3cfdbf-bd61-4452-bad0-d0154bbd893b",
"name": "当被另一个工作流执行时",
"type": "n8n-nodes-base.executeWorkflowTrigger",
"position": [
-400,
0
],
"parameters": {
"workflowInputs": {
"values": [
{
"name": "Seed url"
},
{
"name": "Links must contain"
},
{
"name": "Depth",
"type": "number"
}
]
}
},
"typeVersion": 1.1
},
{
"id": "16d54958-18b6-497f-af9e-5953a39ae0bb",
"name": "便签5",
"type": "n8n-nodes-base.stickyNote",
"position": [
-440,
-300
],
"parameters": {
"width": 400,
"height": 460,
"content": "## 输入参数"
},
"typeVersion": 1
},
{
"id": "99cc34ec-37eb-423a-9cc5-7c1b7736d352",
"name": "统一参数",
"type": "n8n-nodes-base.set",
"position": [
-180,
-100
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "18e3200c-ab2a-44da-b401-eace47a3ccc0",
"name": "Seed url",
"type": "string",
"value": "={{ $json[\"Seed url\"] }}"
},
{
"id": "cb21aa86-bf75-427f-bf07-70d4f2d83894",
"name": "Links must contain",
"type": "string",
"value": "={{ $json[\"Links must contain\"] }}"
},
{
"id": "80bb934f-816d-43f0-9432-170047fa02a3",
"name": "Depth",
"type": "number",
"value": "={{ $json.Depth }}"
}
]
}
},
"typeVersion": 3.4
}
],
"active": false,
"pinData": {},
"settings": {
"executionOrder": "v1"
},
"versionId": "dffadb4b-7c72-42ec-802c-26d4f96b5ec7",
"connections": {
"Insert flag": {
"main": [
[
{
"node": "Should scrape more?",
"type": "main",
"index": 0
}
]
]
},
"Unify params": {
"main": [
[
{
"node": "Create Spreadsheet",
"type": "main",
"index": 0
}
]
]
},
"Scrape webpage": {
"main": [
[
{
"node": "Create Google Docs",
"type": "main",
"index": 0
}
]
]
},
"Scrape webpage1": {
"main": [
[
{
"node": "Update with new scraped content",
"type": "main",
"index": 0
}
]
]
},
"Insert new links": {
"main": [
[
{
"node": "Scrape webpage1",
"type": "main",
"index": 0
}
]
]
},
"Flag scraped link": {
"main": [
[
{
"node": "Insert flag",
"type": "main",
"index": 0
}
]
]
},
"Create Google Docs": {
"main": [
[
{
"node": "Write scraped content",
"type": "main",
"index": 0
}
]
]
},
"Create Spreadsheet": {
"main": [
[
{
"node": "Info to upload into spreadsheet",
"type": "main",
"index": 0
}
]
]
},
"On form submission": {
"main": [
[
{
"node": "Unify params",
"type": "main",
"index": 0
}
]
]
},
"Should scrape more?": {
"main": [
[
{
"node": "Read scraped webpages",
"type": "main",
"index": 0
}
],
[]
]
},
"Read scraped webpages": {
"main": [
[
{
"node": "Retrieve links to scrape",
"type": "main",
"index": 0
}
]
]
},
"Write scraped content": {
"main": [
[
{
"node": "Should scrape more?",
"type": "main",
"index": 0
}
]
]
},
"Load info to spreadsheet": {
"main": [
[
{
"node": "Scrape webpage",
"type": "main",
"index": 0
}
]
]
},
"Retrieve links to scrape": {
"main": [
[
{
"node": "Filter links to insert to Sheets",
"type": "main",
"index": 0
}
]
]
},
"Info to upload into spreadsheet": {
"main": [
[
{
"node": "Load info to spreadsheet",
"type": "main",
"index": 0
}
]
]
},
"Update with new scraped content": {
"main": [
[
{
"node": "Flag scraped link",
"type": "main",
"index": 0
}
]
]
},
"Filter links to insert to Sheets": {
"main": [
[
{
"node": "Insert new links",
"type": "main",
"index": 0
}
]
]
},
"When Executed by Another Workflow": {
"main": [
[
{
"node": "Unify params",
"type": "main",
"index": 0
}
]
]
}
}
}常见问题
如何使用这个工作流?
复制上方的 JSON 配置代码,在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」,粘贴配置后根据需要修改凭证设置即可。
这个工作流适合什么场景?
高级 - 营销, IT 运维
需要付费吗?
本工作流完全免费,您可以直接导入使用。但请注意,工作流中使用的第三方服务(如 OpenAI API)可能需要您自行付费。
相关工作流推荐
LinkedIn帖子互动数据提取器 - 模板
使用Airtop提取LinkedIn帖子互动数据
Set
Code
Airtop
+3
7 节点Airtop
营销
(Duc)深度研究市场模板
集成PerplexityAI研究和OpenAI内容的多层级WordPress博客生成器
If
Set
Xml
+28
132 节点Daniel Ng
人工智能
X - 线程中的响应
使用Airtop浏览器自动化的X线程自动回复
Set
Wait
Airtop
+3
11 节点Airtop
人工智能
CSV到HubSpot上传器(动态字段映射与Google Sheets集成)
具有动态字段映射和Google Sheets集成的CSV到HubSpot上传器
If
Set
Code
+10
36 节点PollupAI
人工智能
提取LinkedIn个人资料信息
使用Airtop和AI解析提取结构化LinkedIn个人资料数据
Set
Airtop
Form Trigger
+2
6 节点Airtop
销售
提取X帖子评论
使用Airtop浏览器自动化提取并结构化X帖子评论
Set
Airtop
Form Trigger
+2
7 节点Airtop
营销
工作流信息
难度等级
高级
节点数量24
分类2
节点类型9
作者
Airtop
@cesar-at-airtopAirtop provides an intelligent browser automation API for AI agents, enabling seamless web interaction, including login, navigation, and data extraction from any site, even those with complex authentication - all with natural language instructions.In simple terms, we allow you to automate anything humans can do online, on any site with just words
外部链接
在 n8n.io 查看 →
分享此工作流