8
n8n 中文网amn8n.com

网页阅读器

中级

这是一个Document Extraction领域的自动化工作流,包含 15 个节点。主要使用 If, Set, HttpRequest, StopAndError, ExecuteWorkflowTrigger 等节点。 为AI代理和工作流提取干净的网页内容,含反爬虫备用方案

前置要求
  • 可能需要目标 API 的认证凭证
工作流预览
可视化展示节点连接关系,支持缩放和平移
导出工作流
复制以下 JSON 配置到 n8n 导入,即可使用此工作流
{
  "id": "9UyGvrk6EDY6Hm3W",
  "meta": {
    "instanceId": "7e84375f1a5a2398bff60c3e83bb370423dae55c261ed7c48ca02f15548655a7",
    "templateCredsSetupCompleted": true
  },
  "name": "网页阅读器",
  "tags": [],
  "nodes": [
    {
      "id": "f449a425-4ae9-462d-91bb-ff0b85a73202",
      "name": "内容提取器",
      "type": "n8n-nodes-webpage-content-extractor.webpageContentExtractor",
      "position": [
        940,
        100
      ],
      "parameters": {
        "html": "={{ $json.data }}"
      },
      "typeVersion": 1
    },
    {
      "id": "e52eddc5-72a7-4bd8-8679-ecedccad447c",
      "name": "尝试反机器人规避",
      "type": "n8n-nodes-base.if",
      "position": [
        280,
        180
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "or",
          "conditions": [
            {
              "id": "1351d4e8-1c27-43c2-8335-aee7c097422a",
              "operator": {
                "type": "string",
                "operation": "equals"
              },
              "leftValue": "={{ $json.error.code }}",
              "rightValue": "ECONNABORTED"
            },
            {
              "id": "28a4c2eb-0a9b-44ac-87d5-6571be2fb447",
              "operator": {
                "type": "string",
                "operation": "equals"
              },
              "leftValue": "={{ $json.error.code }}",
              "rightValue": "ETIMEDOUT"
            },
            {
              "id": "1287e08b-a342-4651-8e56-1d1ff4677222",
              "operator": {
                "type": "string",
                "operation": "equals"
              },
              "leftValue": "={{ $json.error.code }}",
              "rightValue": "ERR_CANCELED"
            },
            {
              "id": "45256daa-063f-4ed3-8ef0-5ec91cdc0974",
              "operator": {
                "name": "filter.operator.equals",
                "type": "string",
                "operation": "equals"
              },
              "leftValue": "={{ $json.error.code }}",
              "rightValue": "ERR_BAD_REQUEST"
            }
          ]
        }
      },
      "typeVersion": 2.2
    },
    {
      "id": "a90654b8-b83b-41ed-a665-9a0303a84de3",
      "name": "Scrape.do",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        500,
        180
      ],
      "parameters": {
        "url": "=http://api.scrape.do",
        "options": {
          "timeout": 120000
        },
        "sendQuery": true,
        "authentication": "genericCredentialType",
        "genericAuthType": "httpQueryAuth",
        "queryParameters": {
          "parameters": [
            {
              "name": "url",
              "value": "={{ $json.url }}"
            }
          ]
        }
      },
      "credentials": {
        "httpQueryAuth": {
          "id": "SMKkxhdbOewTAnqe",
          "name": "Scrape.do account"
        }
      },
      "retryOnFail": true,
      "typeVersion": 4.2,
      "waitBetweenTries": 5000
    },
    {
      "id": "62c1fb07-35e0-4942-b38d-b888b559e109",
      "name": "服务器错误",
      "type": "n8n-nodes-base.stopAndError",
      "position": [
        500,
        380
      ],
      "parameters": {
        "errorMessage": "=Error requesting website ({{ $json.error.code }})"
      },
      "typeVersion": 1
    },
    {
      "id": "7e793496-3ba7-4a30-bb6c-d483c00671c6",
      "name": "非 404",
      "type": "n8n-nodes-base.if",
      "position": [
        60,
        180
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "81558598-6188-4712-962c-3f80fcba1297",
              "operator": {
                "type": "number",
                "operation": "notEquals"
              },
              "leftValue": "={{ $json.error.status }}",
              "rightValue": 404
            }
          ]
        }
      },
      "typeVersion": 2.2
    },
    {
      "id": "9ae25973-ffa0-4b14-943b-d8a9fa0ee3b0",
      "name": "未找到",
      "type": "n8n-nodes-base.stopAndError",
      "position": [
        280,
        380
      ],
      "parameters": {
        "errorMessage": "=Error requesting website (404)"
      },
      "typeVersion": 1
    },
    {
      "id": "8af2bbee-ebd0-49e2-aa4a-bc58e1ccaf31",
      "name": "简单爬虫",
      "type": "n8n-nodes-base.httpRequest",
      "onError": "continueErrorOutput",
      "position": [
        -160,
        120
      ],
      "parameters": {
        "url": "={{ $json.url }}",
        "options": {
          "timeout": 10000,
          "redirect": {
            "redirect": {}
          },
          "allowUnauthorizedCerts": true
        },
        "sendHeaders": true,
        "headerParameters": {
          "parameters": [
            {
              "name": "User-Agent",
              "value": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36"
            }
          ]
        }
      },
      "retryOnFail": true,
      "typeVersion": 4.2,
      "waitBetweenTries": 5000
    },
    {
      "id": "f0f8106a-9a8c-492e-8082-fc82a3852765",
      "name": "全文",
      "type": "n8n-nodes-base.if",
      "position": [
        1160,
        100
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "b32569d1-ba84-401f-9dc9-99b2c804cba2",
              "operator": {
                "type": "boolean",
                "operation": "true",
                "singleValue": true
              },
              "leftValue": "={{ $('Workflow Call').item.json.fulltext }}",
              "rightValue": ""
            }
          ]
        }
      },
      "typeVersion": 2.2
    },
    {
      "id": "325b73df-6fe0-4c22-985e-0916a09a8865",
      "name": "全文输出",
      "type": "n8n-nodes-base.set",
      "position": [
        1380,
        0
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "91a15268-86a9-4390-9e19-9fba4d21aeed",
              "name": "title",
              "type": "string",
              "value": "={{ $json.title.replace(/\\p{Extended_Pictographic}/gu, '') }}"
            },
            {
              "id": "90d16e3d-49ca-4a65-a4ae-cd689de990db",
              "name": "text",
              "type": "string",
              "value": "={{\n( $json.textContent || '' )\n.replace(/\\p{Extended_Pictographic}/gu, '')\n.replace(/[\\r\\n]+/g, ' ')\n.replace(/\\s+/g, ' ')\n.trim()\n}}"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "32e88e32-0068-47de-8f72-aee167f15ca2",
      "name": "摘要输出",
      "type": "n8n-nodes-base.set",
      "position": [
        1380,
        200
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "91a15268-86a9-4390-9e19-9fba4d21aeed",
              "name": "title",
              "type": "string",
              "value": "={{ $json.title.replace(/\\p{Extended_Pictographic}/gu, '') }}"
            },
            {
              "id": "28476e01-485e-4373-a6c3-b3703d4ba1e4",
              "name": "url",
              "type": "string",
              "value": "={{ $('Workflow Call').item.json.url }}"
            },
            {
              "id": "90d16e3d-49ca-4a65-a4ae-cd689de990db",
              "name": "content",
              "type": "string",
              "value": "={{\n( $json.excerpt || '' )\n.replace(/\\p{Extended_Pictographic}/gu, '')\n.replace(/[\\r\\n]+/g, ' ')\n.replace(/\\s+/g, ' ')\n.trim()\n}}"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "96438bb1-1918-4ae6-9a40-0624968ca7b3",
      "name": "是否为二进制",
      "type": "n8n-nodes-base.if",
      "position": [
        720,
        0
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "8255ef66-f18d-4f38-a283-592cbd617109",
              "operator": {
                "type": "object",
                "operation": "exists",
                "singleValue": true
              },
              "leftValue": "={{ $binary.data }}",
              "rightValue": ".pdf"
            }
          ]
        }
      },
      "typeVersion": 2.2
    },
    {
      "id": "5bb977be-3e50-4240-a05a-4df23e8f7470",
      "name": "内容类型错误",
      "type": "n8n-nodes-base.stopAndError",
      "position": [
        940,
        -80
      ],
      "parameters": {
        "errorMessage": "=Unsupported content-type"
      },
      "typeVersion": 1
    },
    {
      "id": "7b68057e-9189-4291-a40f-e9941443a65a",
      "name": "工作流调用",
      "type": "n8n-nodes-base.executeWorkflowTrigger",
      "position": [
        -380,
        120
      ],
      "parameters": {
        "workflowInputs": {
          "values": [
            {
              "name": "url"
            },
            {
              "name": "fulltext",
              "type": "boolean"
            }
          ]
        }
      },
      "typeVersion": 1.1
    },
    {
      "id": "bbe6563d-1131-4f7e-9a19-0dff16d1adb5",
      "name": "便签",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1200,
        -100
      ],
      "parameters": {
        "width": 760,
        "height": 640,
        "content": "# 面向 AI 代理和工作流的网页阅读器"
      },
      "typeVersion": 1
    },
    {
      "id": "cf3a4e8f-d63f-482d-81eb-746ed7f66c85",
      "name": "便签1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1600,
        -100
      ],
      "parameters": {
        "width": 680,
        "height": 660,
        "content": "# 设置"
      },
      "typeVersion": 1
    }
  ],
  "active": false,
  "pinData": {},
  "settings": {
    "callerPolicy": "workflowsFromSameOwner",
    "errorWorkflow": "4HcJPFvOCSd7pZeG",
    "executionOrder": "v1",
    "saveDataSuccessExecution": "none"
  },
  "versionId": "8cc14be0-b5b6-41c3-8838-e92591538965",
  "connections": {
    "Not 404": {
      "main": [
        [
          {
            "node": "Try Antibot Evasion",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Not Found",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Full Text": {
      "main": [
        [
          {
            "node": "Fulltext Output",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Summary Output",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Is Binary": {
      "main": [
        [
          {
            "node": "ContentType Error",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Content Extractor",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Scrape.do": {
      "main": [
        [
          {
            "node": "Is Binary",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Workflow Call": {
      "main": [
        [
          {
            "node": "Simple Scraper",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Simple Scraper": {
      "main": [
        [
          {
            "node": "Is Binary",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Not 404",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Content Extractor": {
      "main": [
        [
          {
            "node": "Full Text",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Try Antibot Evasion": {
      "main": [
        [
          {
            "node": "Scrape.do",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Server Error",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}
常见问题

如何使用这个工作流?

复制上方的 JSON 配置代码,在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」,粘贴配置后根据需要修改凭证设置即可。

这个工作流适合什么场景?

中级 - 文档提取

需要付费吗?

本工作流完全免费,您可以直接导入使用。但请注意,工作流中使用的第三方服务(如 OpenAI API)可能需要您自行付费。

工作流信息
难度等级
中级
节点数量15
分类1
节点类型7
难度说明

适合有一定经验的用户,包含 6-15 个节点的中等复杂度工作流

作者
Arthur Braghetto

Arthur Braghetto

@arthurmb

Engineer, Coder, Maker, Curious...

外部链接
在 n8n.io 查看

分享此工作流