8
n8n 中文网amn8n.com

网站抓取

高级

这是一个Market Research, AI Summarization领域的自动化工作流,包含 16 个节点。主要使用 Code, Wait, Merge, Airtable, SplitOut 等节点。 网站内容抓取与SEO关键词提取(GPT-4o-mini和Airtable)

前置要求
  • Airtable API Key
  • 可能需要目标 API 的认证凭证
  • OpenAI API Key
工作流预览
可视化展示节点连接关系,支持缩放和平移
导出工作流
复制以下 JSON 配置到 n8n 导入,即可使用此工作流
{
  "id": "z9QAdDgZ9JXvCxLb",
  "meta": {
    "instanceId": "b9ec70f4cccbc2d0ccd7d27d44e6c3431584a8262568e237f9e554fc0cc44167",
    "templateCredsSetupCompleted": true
  },
  "name": "网站抓取",
  "tags": [],
  "nodes": [
    {
      "id": "59597272-acf0-426f-881f-2a82f0b60151",
      "name": "OpenAI 聊天模型",
      "type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
      "position": [
        -740,
        300
      ],
      "parameters": {
        "model": {
          "__rl": true,
          "mode": "list",
          "value": "gpt-4o-mini",
          "cachedResultName": "gpt-4o-mini"
        },
        "options": {}
      },
      "credentials": {
        "openAiApi": {
          "id": "E8szCEHOxKgKzE4E",
          "name": "OpenAi account 2"
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "95607ab2-d841-4394-b31b-d9b28dfb5d41",
      "name": "网站名称",
      "type": "n8n-nodes-base.formTrigger",
      "position": [
        -1780,
        60
      ],
      "webhookId": "3e762442-715e-47e1-a65e-ae92085857ae",
      "parameters": {
        "options": {
          "buttonLabel": "Submit"
        },
        "formTitle": "Website Name",
        "formFields": {
          "values": [
            {
              "fieldLabel": "Website Name ",
              "requiredField": true
            }
          ]
        },
        "responseMode": "lastNode",
        "formDescription": "=Website Scraper"
      },
      "typeVersion": 2.2
    },
    {
      "id": "59294fd4-98ad-40b5-951c-7df706064d2f",
      "name": "等待1",
      "type": "n8n-nodes-base.wait",
      "position": [
        420,
        60
      ],
      "webhookId": "783f92c4-5078-40d2-ae9f-b31664b08086",
      "parameters": {
        "amount": 20
      },
      "typeVersion": 1.1
    },
    {
      "id": "4305bade-cdeb-4ea7-b5c0-27a2878538d4",
      "name": "拆分输出1",
      "type": "n8n-nodes-base.splitOut",
      "position": [
        -1000,
        60
      ],
      "parameters": {
        "include": "allOtherFields",
        "options": {},
        "fieldToSplitOut": "cleanedData"
      },
      "typeVersion": 1
    },
    {
      "id": "69c1eaa7-586a-4b5d-94b8-7a1b39a9eb3a",
      "name": "拆分输出2",
      "type": "n8n-nodes-base.splitOut",
      "position": [
        260,
        60
      ],
      "parameters": {
        "include": "allOtherFields",
        "options": {},
        "fieldToSplitOut": "cleaned"
      },
      "typeVersion": 1
    },
    {
      "id": "c2049eb9-3c1c-4339-82c8-8a3f026280ee",
      "name": "Airtable",
      "type": "n8n-nodes-base.airtable",
      "position": [
        1180,
        120
      ],
      "parameters": {
        "base": {
          "__rl": true,
          "mode": "list",
          "value": "appxR9kySQVhhjSZ9",
          "cachedResultUrl": "https://airtable.com/appxR9kySQVhhjSZ9",
          "cachedResultName": "website"
        },
        "table": {
          "__rl": true,
          "mode": "list",
          "value": "tblirvzTvL2ShdbR1",
          "cachedResultUrl": "https://airtable.com/appxR9kySQVhhjSZ9/tblirvzTvL2ShdbR1",
          "cachedResultName": "Table 1"
        },
        "columns": {
          "value": {
            "Data": "={{ $json.cleaned }}",
            "Status": "Done",
            "Keyword": "={{ $json.output }}",
            "Website Name": "={{ $('Website Name').item.json['Website Name SEO'] }}"
          },
          "schema": [
            {
              "id": "id",
              "type": "string",
              "display": true,
              "removed": true,
              "readOnly": true,
              "required": false,
              "displayName": "id",
              "defaultMatch": true
            },
            {
              "id": "Website Name",
              "type": "string",
              "display": true,
              "removed": false,
              "readOnly": false,
              "required": false,
              "displayName": "Website Name",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "Data",
              "type": "string",
              "display": true,
              "removed": false,
              "readOnly": false,
              "required": false,
              "displayName": "Data",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "Keyword",
              "type": "string",
              "display": true,
              "removed": false,
              "readOnly": false,
              "required": false,
              "displayName": "Keyword",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "Status",
              "type": "options",
              "display": true,
              "options": [
                {
                  "name": "Todo",
                  "value": "Todo"
                },
                {
                  "name": "In progress",
                  "value": "In progress"
                },
                {
                  "name": "Done",
                  "value": "Done"
                }
              ],
              "removed": false,
              "readOnly": false,
              "required": false,
              "displayName": "Status",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            }
          ],
          "mappingMode": "defineBelow",
          "matchingColumns": [
            "Data"
          ],
          "attemptToConvertTypes": false,
          "convertFieldsToString": false
        },
        "options": {},
        "operation": "upsert",
        "authentication": "airtableOAuth2Api"
      },
      "credentials": {
        "airtableOAuth2Api": {
          "id": "UGDOircVkhTGi44j",
          "name": "Airtable Personal Access Token account"
        }
      },
      "typeVersion": 2.1
    },
    {
      "id": "1025ab64-2301-4956-8d29-4a8baf28fd5a",
      "name": "OpenAI 聊天模型1",
      "type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
      "position": [
        400,
        500
      ],
      "parameters": {
        "model": {
          "__rl": true,
          "mode": "list",
          "value": "gpt-4o-mini"
        },
        "options": {}
      },
      "credentials": {
        "openAiApi": {
          "id": "E8szCEHOxKgKzE4E",
          "name": "OpenAi account 2"
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "4385b878-8b56-4064-b903-e435e309fb94",
      "name": "合并",
      "type": "n8n-nodes-base.merge",
      "position": [
        860,
        120
      ],
      "parameters": {
        "mode": "combineBySql"
      },
      "typeVersion": 3.1
    },
    {
      "id": "b091d9c9-53f7-412f-bdb1-739f3cb6e6f8",
      "name": "便签",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1580,
        -140
      ],
      "parameters": {
        "content": "## 读取网站"
      },
      "typeVersion": 1
    },
    {
      "id": "6c6541b6-78a4-49ab-9ae6-de9dc49d602f",
      "name": "便签1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1280,
        -140
      ],
      "parameters": {
        "width": 150,
        "content": "## 清理后的HTML代码"
      },
      "typeVersion": 1
    },
    {
      "id": "be1c00ee-f688-486d-bba8-deba80c6c6cf",
      "name": "便签2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -760,
        -140
      ],
      "parameters": {
        "content": "## 按主题分类的信息。"
      },
      "typeVersion": 1
    },
    {
      "id": "e9e5af64-e6ec-4881-aa82-ee206e702adf",
      "name": "HTTP",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -1520,
        60
      ],
      "parameters": {
        "url": "={{ $json['Website Name SEO'] }}",
        "options": {}
      },
      "typeVersion": 4.2
    },
    {
      "id": "ea6cd1c6-0a59-459f-b648-0cc91f5551c4",
      "name": "HTML",
      "type": "n8n-nodes-base.code",
      "position": [
        -1240,
        60
      ],
      "parameters": {
        "jsCode": "const data = $(\"HTTP\").all()[0]?.json?.data;\n\nfunction extractTextFromHTML(html) {\n  const cleanedHTML = html\n    .replace(/<style[\\s\\S]*?>[\\s\\S]*?<\\/style>/gi, \"\")\n    .replace(/<[^>]+>/g, \"\")\n    .replace(/\\s+/g, \" \")\n    .trim();\n\n  return cleanedHTML;\n}\n\nconst cleanedData = extractTextFromHTML(data);\n\nreturn { cleanedData };\n"
      },
      "typeVersion": 2
    },
    {
      "id": "686f3bd7-fa12-4927-8c6e-60a53fb82aff",
      "name": "按主题分类的信息。",
      "type": "@n8n/n8n-nodes-langchain.agent",
      "position": [
        -760,
        60
      ],
      "parameters": {
        "text": "={{ $('Website Name').item.json['Website Name SEO'] }}",
        "options": {
          "systemMessage": "={{ $json.cleanedData }}\n\nfind it topic wise information.\n"
        },
        "promptType": "define"
      },
      "typeVersion": 1.8
    },
    {
      "id": "2b2ee97d-9771-4842-94db-0538254bc5ec",
      "name": "列表",
      "type": "@n8n/n8n-nodes-langchain.agent",
      "position": [
        400,
        320
      ],
      "parameters": {
        "text": "={{ $json.cleaned }}",
        "options": {
          "systemMessage": "=only for list number of 90 keyword data \"\"\"Important Keyword List for SEO\"\"\"\n"
        },
        "promptType": "define"
      },
      "typeVersion": 2
    },
    {
      "id": "51d9ec4a-84b0-4cb2-bab4-d468eaf4b5a3",
      "name": "清理后的 ##",
      "type": "n8n-nodes-base.code",
      "position": [
        -300,
        60
      ],
      "parameters": {
        "jsCode": "const input = $json[\"output\"]; // Replace \"text\" with your actual field name\nconst cleaned = input\n  .replace(/\\*\\*/g, '')        // Remove all double asterisks **\n  .replace(/^###\\s?/gm, '')  // Remove all ### at the start of lines\n  .replace(/^##\\s?/gm, '');   // Remove all ## at the start of lines\n\n\nreturn {\n  json: {\n    cleaned\n  }\n};\n"
      },
      "typeVersion": 2
    }
  ],
  "active": false,
  "pinData": {},
  "settings": {
    "executionOrder": "v1"
  },
  "versionId": "433fcbb8-f8db-418d-830a-431a60d97abf",
  "connections": {
    "HTML": {
      "main": [
        [
          {
            "node": "Split Out1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "HTTP": {
      "main": [
        [
          {
            "node": "HTML",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "list": {
      "main": [
        [
          {
            "node": "Merge",
            "type": "main",
            "index": 1
          }
        ]
      ]
    },
    "Merge": {
      "main": [
        [
          {
            "node": "Airtable",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Wait1": {
      "main": [
        [
          {
            "node": "Merge",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Cleaned ##": {
      "main": [
        [
          {
            "node": "Split Out2",
            "type": "main",
            "index": 0
          },
          {
            "node": "list",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Split Out1": {
      "main": [
        [
          {
            "node": "Topic Wise information.",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Split Out2": {
      "main": [
        [
          {
            "node": "Wait1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Website Name": {
      "main": [
        [
          {
            "node": "HTTP",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "OpenAI Chat Model": {
      "ai_languageModel": [
        [
          {
            "node": "Topic Wise information.",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "OpenAI Chat Model1": {
      "ai_languageModel": [
        [
          {
            "node": "list",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "Topic Wise information.": {
      "main": [
        [
          {
            "node": "Cleaned ##",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}
常见问题

如何使用这个工作流?

复制上方的 JSON 配置代码,在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」,粘贴配置后根据需要修改凭证设置即可。

这个工作流适合什么场景?

高级 - 市场调研, AI 摘要总结

需要付费吗?

本工作流完全免费,您可以直接导入使用。但请注意,工作流中使用的第三方服务(如 OpenAI API)可能需要您自行付费。

工作流信息
难度等级
高级
节点数量16
分类2
节点类型10
难度说明

适合高级用户,包含 16+ 个节点的复杂工作流

作者
Abhishek Patoliya

Abhishek Patoliya

@abhishekpatoliya

Experienced n8n developer specializing in business process automation and system integrations. I've helped dozens of companies automate their workflows, from CRM synchronization to marketing automation pipelines. Whether you need a simple trigger-based workflow or a complex multi-branch automation, I can bring your ideas to life.

外部链接
在 n8n.io 查看

分享此工作流