网页阅读器
中级
这是一个Document Extraction领域的自动化工作流,包含 15 个节点。主要使用 If, Set, HttpRequest, StopAndError, ExecuteWorkflowTrigger 等节点。 为AI代理和工作流提取干净的网页内容,含反爬虫备用方案
前置要求
- •可能需要目标 API 的认证凭证
分类
工作流预览
可视化展示节点连接关系,支持缩放和平移
导出工作流
复制以下 JSON 配置到 n8n 导入,即可使用此工作流
{
"id": "9UyGvrk6EDY6Hm3W",
"meta": {
"instanceId": "7e84375f1a5a2398bff60c3e83bb370423dae55c261ed7c48ca02f15548655a7",
"templateCredsSetupCompleted": true
},
"name": "网页阅读器",
"tags": [],
"nodes": [
{
"id": "f449a425-4ae9-462d-91bb-ff0b85a73202",
"name": "内容提取器",
"type": "n8n-nodes-webpage-content-extractor.webpageContentExtractor",
"position": [
940,
100
],
"parameters": {
"html": "={{ $json.data }}"
},
"typeVersion": 1
},
{
"id": "e52eddc5-72a7-4bd8-8679-ecedccad447c",
"name": "尝试反机器人规避",
"type": "n8n-nodes-base.if",
"position": [
280,
180
],
"parameters": {
"options": {},
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "or",
"conditions": [
{
"id": "1351d4e8-1c27-43c2-8335-aee7c097422a",
"operator": {
"type": "string",
"operation": "equals"
},
"leftValue": "={{ $json.error.code }}",
"rightValue": "ECONNABORTED"
},
{
"id": "28a4c2eb-0a9b-44ac-87d5-6571be2fb447",
"operator": {
"type": "string",
"operation": "equals"
},
"leftValue": "={{ $json.error.code }}",
"rightValue": "ETIMEDOUT"
},
{
"id": "1287e08b-a342-4651-8e56-1d1ff4677222",
"operator": {
"type": "string",
"operation": "equals"
},
"leftValue": "={{ $json.error.code }}",
"rightValue": "ERR_CANCELED"
},
{
"id": "45256daa-063f-4ed3-8ef0-5ec91cdc0974",
"operator": {
"name": "filter.operator.equals",
"type": "string",
"operation": "equals"
},
"leftValue": "={{ $json.error.code }}",
"rightValue": "ERR_BAD_REQUEST"
}
]
}
},
"typeVersion": 2.2
},
{
"id": "a90654b8-b83b-41ed-a665-9a0303a84de3",
"name": "Scrape.do",
"type": "n8n-nodes-base.httpRequest",
"position": [
500,
180
],
"parameters": {
"url": "=http://api.scrape.do",
"options": {
"timeout": 120000
},
"sendQuery": true,
"authentication": "genericCredentialType",
"genericAuthType": "httpQueryAuth",
"queryParameters": {
"parameters": [
{
"name": "url",
"value": "={{ $json.url }}"
}
]
}
},
"credentials": {
"httpQueryAuth": {
"id": "SMKkxhdbOewTAnqe",
"name": "Scrape.do account"
}
},
"retryOnFail": true,
"typeVersion": 4.2,
"waitBetweenTries": 5000
},
{
"id": "62c1fb07-35e0-4942-b38d-b888b559e109",
"name": "服务器错误",
"type": "n8n-nodes-base.stopAndError",
"position": [
500,
380
],
"parameters": {
"errorMessage": "=Error requesting website ({{ $json.error.code }})"
},
"typeVersion": 1
},
{
"id": "7e793496-3ba7-4a30-bb6c-d483c00671c6",
"name": "非 404",
"type": "n8n-nodes-base.if",
"position": [
60,
180
],
"parameters": {
"options": {},
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "81558598-6188-4712-962c-3f80fcba1297",
"operator": {
"type": "number",
"operation": "notEquals"
},
"leftValue": "={{ $json.error.status }}",
"rightValue": 404
}
]
}
},
"typeVersion": 2.2
},
{
"id": "9ae25973-ffa0-4b14-943b-d8a9fa0ee3b0",
"name": "未找到",
"type": "n8n-nodes-base.stopAndError",
"position": [
280,
380
],
"parameters": {
"errorMessage": "=Error requesting website (404)"
},
"typeVersion": 1
},
{
"id": "8af2bbee-ebd0-49e2-aa4a-bc58e1ccaf31",
"name": "简单爬虫",
"type": "n8n-nodes-base.httpRequest",
"onError": "continueErrorOutput",
"position": [
-160,
120
],
"parameters": {
"url": "={{ $json.url }}",
"options": {
"timeout": 10000,
"redirect": {
"redirect": {}
},
"allowUnauthorizedCerts": true
},
"sendHeaders": true,
"headerParameters": {
"parameters": [
{
"name": "User-Agent",
"value": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36"
}
]
}
},
"retryOnFail": true,
"typeVersion": 4.2,
"waitBetweenTries": 5000
},
{
"id": "f0f8106a-9a8c-492e-8082-fc82a3852765",
"name": "全文",
"type": "n8n-nodes-base.if",
"position": [
1160,
100
],
"parameters": {
"options": {},
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "b32569d1-ba84-401f-9dc9-99b2c804cba2",
"operator": {
"type": "boolean",
"operation": "true",
"singleValue": true
},
"leftValue": "={{ $('Workflow Call').item.json.fulltext }}",
"rightValue": ""
}
]
}
},
"typeVersion": 2.2
},
{
"id": "325b73df-6fe0-4c22-985e-0916a09a8865",
"name": "全文输出",
"type": "n8n-nodes-base.set",
"position": [
1380,
0
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "91a15268-86a9-4390-9e19-9fba4d21aeed",
"name": "title",
"type": "string",
"value": "={{ $json.title.replace(/\\p{Extended_Pictographic}/gu, '') }}"
},
{
"id": "90d16e3d-49ca-4a65-a4ae-cd689de990db",
"name": "text",
"type": "string",
"value": "={{\n( $json.textContent || '' )\n.replace(/\\p{Extended_Pictographic}/gu, '')\n.replace(/[\\r\\n]+/g, ' ')\n.replace(/\\s+/g, ' ')\n.trim()\n}}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "32e88e32-0068-47de-8f72-aee167f15ca2",
"name": "摘要输出",
"type": "n8n-nodes-base.set",
"position": [
1380,
200
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "91a15268-86a9-4390-9e19-9fba4d21aeed",
"name": "title",
"type": "string",
"value": "={{ $json.title.replace(/\\p{Extended_Pictographic}/gu, '') }}"
},
{
"id": "28476e01-485e-4373-a6c3-b3703d4ba1e4",
"name": "url",
"type": "string",
"value": "={{ $('Workflow Call').item.json.url }}"
},
{
"id": "90d16e3d-49ca-4a65-a4ae-cd689de990db",
"name": "content",
"type": "string",
"value": "={{\n( $json.excerpt || '' )\n.replace(/\\p{Extended_Pictographic}/gu, '')\n.replace(/[\\r\\n]+/g, ' ')\n.replace(/\\s+/g, ' ')\n.trim()\n}}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "96438bb1-1918-4ae6-9a40-0624968ca7b3",
"name": "是否为二进制",
"type": "n8n-nodes-base.if",
"position": [
720,
0
],
"parameters": {
"options": {},
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "8255ef66-f18d-4f38-a283-592cbd617109",
"operator": {
"type": "object",
"operation": "exists",
"singleValue": true
},
"leftValue": "={{ $binary.data }}",
"rightValue": ".pdf"
}
]
}
},
"typeVersion": 2.2
},
{
"id": "5bb977be-3e50-4240-a05a-4df23e8f7470",
"name": "内容类型错误",
"type": "n8n-nodes-base.stopAndError",
"position": [
940,
-80
],
"parameters": {
"errorMessage": "=Unsupported content-type"
},
"typeVersion": 1
},
{
"id": "7b68057e-9189-4291-a40f-e9941443a65a",
"name": "工作流调用",
"type": "n8n-nodes-base.executeWorkflowTrigger",
"position": [
-380,
120
],
"parameters": {
"workflowInputs": {
"values": [
{
"name": "url"
},
{
"name": "fulltext",
"type": "boolean"
}
]
}
},
"typeVersion": 1.1
},
{
"id": "bbe6563d-1131-4f7e-9a19-0dff16d1adb5",
"name": "便签",
"type": "n8n-nodes-base.stickyNote",
"position": [
-1200,
-100
],
"parameters": {
"width": 760,
"height": 640,
"content": "# 面向 AI 代理和工作流的网页阅读器"
},
"typeVersion": 1
},
{
"id": "cf3a4e8f-d63f-482d-81eb-746ed7f66c85",
"name": "便签1",
"type": "n8n-nodes-base.stickyNote",
"position": [
1600,
-100
],
"parameters": {
"width": 680,
"height": 660,
"content": "# 设置"
},
"typeVersion": 1
}
],
"active": false,
"pinData": {},
"settings": {
"callerPolicy": "workflowsFromSameOwner",
"errorWorkflow": "4HcJPFvOCSd7pZeG",
"executionOrder": "v1",
"saveDataSuccessExecution": "none"
},
"versionId": "8cc14be0-b5b6-41c3-8838-e92591538965",
"connections": {
"Not 404": {
"main": [
[
{
"node": "Try Antibot Evasion",
"type": "main",
"index": 0
}
],
[
{
"node": "Not Found",
"type": "main",
"index": 0
}
]
]
},
"Full Text": {
"main": [
[
{
"node": "Fulltext Output",
"type": "main",
"index": 0
}
],
[
{
"node": "Summary Output",
"type": "main",
"index": 0
}
]
]
},
"Is Binary": {
"main": [
[
{
"node": "ContentType Error",
"type": "main",
"index": 0
}
],
[
{
"node": "Content Extractor",
"type": "main",
"index": 0
}
]
]
},
"Scrape.do": {
"main": [
[
{
"node": "Is Binary",
"type": "main",
"index": 0
}
]
]
},
"Workflow Call": {
"main": [
[
{
"node": "Simple Scraper",
"type": "main",
"index": 0
}
]
]
},
"Simple Scraper": {
"main": [
[
{
"node": "Is Binary",
"type": "main",
"index": 0
}
],
[
{
"node": "Not 404",
"type": "main",
"index": 0
}
]
]
},
"Content Extractor": {
"main": [
[
{
"node": "Full Text",
"type": "main",
"index": 0
}
]
]
},
"Try Antibot Evasion": {
"main": [
[
{
"node": "Scrape.do",
"type": "main",
"index": 0
}
],
[
{
"node": "Server Error",
"type": "main",
"index": 0
}
]
]
}
}
}常见问题
如何使用这个工作流?
复制上方的 JSON 配置代码,在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」,粘贴配置后根据需要修改凭证设置即可。
这个工作流适合什么场景?
中级 - 文档提取
需要付费吗?
本工作流完全免费,您可以直接导入使用。但请注意,工作流中使用的第三方服务(如 OpenAI API)可能需要您自行付费。
相关工作流推荐
PDF 转订单
使用AI将PDF采购订单自动化转换为Adobe Commerce销售订单
If
Set
Code
+19
96 节点JKingma
文档提取
文档代理模板
使用Gemini和Google Drive从模板创建自定义PDF文档
If
Set
Code
+13
36 节点Ozgur Karateke
文档提取
AI邮件分诊与GPT-4警报系统及Telegram通知
AI邮件分诊与GPT-4警报系统及Telegram通知
If
Set
Gmail
+22
104 节点Peter Joslyn
客户支持
使用OpenAI脚本、Leonardo图像和HeyGen虚拟形象创建AI视频
使用OpenAI脚本、Leonardo图像和HeyGen虚拟形象创建AI视频
If
Set
Code
+15
68 节点Adam Crafts
设计
使用脚本、图像和HeyGen虚拟形象创建AI视频(🔥限时优惠)
使用脚本、图像和HeyGen虚拟形象创建AI视频(🔥限时优惠)
If
Set
Code
+15
68 节点Adam Crafts
设计
1. 播放列表详情设置机器人副本
使用 Suno、GPT-4、Runway 和 Creatomate 创建 AI 生成的 YouTube 音乐播放列表
If
Set
Code
+22
203 节点Joseph
内容创作