8
n8n 中文网amn8n.com

从Screaming Frog网站爬取生成AI就绪的llms.txt文件

高级

这是一个AI领域的自动化工作流,包含 23 个节点。主要使用 Set, Filter, Summarize, FormTrigger, ConvertToFile 等节点,结合人工智能技术实现智能自动化。 从Screaming Frog网站爬取生成AI就绪的llms.txt文件

前置要求
  • OpenAI API Key
工作流预览
可视化展示节点连接关系,支持缩放和平移
导出工作流
复制以下 JSON 配置到 n8n 导入,即可使用此工作流
{
  "id": "",
  "meta": {
    "instanceId": "",
    "templateCredsSetupCompleted": true
  },
  "name": "从 Screaming Frog 网站爬取生成 AI 就绪的 llms.txt 文件",
  "tags": [],
  "nodes": [
    {
      "id": "ca701618-b2d5-48ee-a503-d3513d018a65",
      "name": "便签",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        360,
        -500
      ],
      "parameters": {
        "color": 7,
        "width": 360,
        "height": 860,
        "content": "## 表单 - Screaming Frog internal_html.csv 上传"
      },
      "typeVersion": 1
    },
    {
      "id": "bc040ca0-f38d-4458-a60c-17f71dbfd1ea",
      "name": "便签1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        780,
        -500
      ],
      "parameters": {
        "color": 7,
        "width": 360,
        "height": 860,
        "content": "## 从 Screaming Frog 文件提取数据"
      },
      "typeVersion": 1
    },
    {
      "id": "f71a7d10-847d-48e7-8820-ec0c1e7ea055",
      "name": "便签2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1200,
        -500
      ],
      "parameters": {
        "color": 7,
        "width": 360,
        "height": 860,
        "content": "## 设置有用字段"
      },
      "typeVersion": 1
    },
    {
      "id": "6f6546b8-adeb-4998-ae19-d93525337eb7",
      "name": "设置有用字段",
      "type": "n8n-nodes-base.set",
      "position": [
        1340,
        60
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "0e7d4a06-83fc-4834-93fe-2e758cbe2307",
              "name": "url",
              "type": "string",
              "value": "={{ $json.Address || $json.Adresse || $json.Dirección || $json.Indirizzo }}"
            },
            {
              "id": "c82f4d4c-9d0b-4c7d-9647-5d0240b58643",
              "name": "title",
              "type": "string",
              "value": "={{ $json['Title 1'] || $json['Titolo 1'] || $json['Titolo 1'] || $json['Título 1'] || $json['Titel 1'] }}"
            },
            {
              "id": "abea81db-ce3b-4ac1-bd21-09ccfffb567a",
              "name": "description",
              "type": "string",
              "value": "={{ $json['Meta Description 1'] || $json['Meta description 1'] }}"
            },
            {
              "id": "2ca75d74-70f8-400b-b862-9da186135915",
              "name": "statut",
              "type": "string",
              "value": "={{ $json['Status Code'] || $json['Code HTTP'] || $json['Status-Code'] || $json['Código de respuesta'] || $json['Codice di stato']}}"
            },
            {
              "id": "754d3202-38b0-4d79-ba24-8078b3244307",
              "name": "indexability",
              "type": "string",
              "value": "={{ $json.Indexability || $json.Indexabilité || $json.Indicizzabilità || $json.Indexabilidad || $json.Indexierbarkeit}}"
            },
            {
              "id": "8bc6583d-bb34-4d22-b310-fe79bb8ac85d",
              "name": "content_type",
              "type": "string",
              "value": "={{ $json['Content Type'] || $json['Type de contenu'] || $json['Tipo di contenuto'] || $json['Tipo de contenido'] || $json['Inhaltstyp']}}"
            },
            {
              "id": "c874ba1a-769e-43d3-9555-8c9914ca9b76",
              "name": "word_count",
              "type": "string",
              "value": "={{ $json['Word Count'] || $json['Nombre de mots'] || $json['Conteggio delle parole'] || $json['Conteggio delle parole'] || $json['Recuento de palabras'] || $json['Wortanzahl'] }}"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "1a9af7a0-d2d5-44cb-9770-2d5a1e5706f4",
      "name": "文本分类器",
      "type": "@n8n/n8n-nodes-langchain.textClassifier",
      "disabled": true,
      "position": [
        2260,
        60
      ],
      "parameters": {
        "options": {},
        "inputText": "=url : {{ $json.url }}\ntitle : {{ $json.title }}\ndescription : {{ $json.description }}\nwords count : {{ $json.word_count }}",
        "categories": {
          "categories": [
            {
              "category": "useful_content",
              "description": "Pages that are likely to contain high-quality content, making them suitable for inclusion in a file that aids content discovery for an LLM. "
            },
            {
              "category": "other_content",
              "description": "Pages that should not be included (e.g., pagination, or low-value content)."
            }
          ]
        }
      },
      "typeVersion": 1
    },
    {
      "id": "74a4e378-4228-4142-92ca-e541efde2b15",
      "name": "OpenAI 聊天模型",
      "type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
      "position": [
        2180,
        240
      ],
      "parameters": {
        "model": {
          "__rl": true,
          "mode": "list",
          "value": "gpt-4o-mini"
        },
        "options": {}
      },
      "credentials": {
        "openAiApi": {
          "id": "",
          "name": "OpenAi Connection"
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "63dc6cfe-bc73-43b5-8c7d-4f5fd6501d3b",
      "name": "无操作,不执行任何动作",
      "type": "n8n-nodes-base.noOp",
      "position": [
        2580,
        200
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "cb555b99-9e63-4b6b-a1fc-512b5467d666",
      "name": "便签3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1620,
        -500
      ],
      "parameters": {
        "color": 7,
        "width": 360,
        "height": 860,
        "content": "## 过滤 URL"
      },
      "typeVersion": 1
    },
    {
      "id": "e34e56e2-5cc8-4e50-bfb0-3aa2e5e04abf",
      "name": "过滤 URL",
      "type": "n8n-nodes-base.filter",
      "position": [
        1740,
        60
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "cef4feaa-1c46-45b1-92b7-f5c2051b1dc5",
              "operator": {
                "type": "number",
                "operation": "equals"
              },
              "leftValue": "={{ Number($json.statut) }}",
              "rightValue": 200
            },
            {
              "id": "bb821656-9740-4da4-8aa9-f65ad098c470",
              "operator": {
                "type": "boolean",
                "operation": "true",
                "singleValue": true
              },
              "leftValue": "={{ [\"Indexable\", \"Indicizzabile\", \"Indexierbar\"].includes($json.indexability) }}",
              "rightValue": "={{ \"Indexable\" || \"Indicizzabile\" }}"
            },
            {
              "id": "5c93ddb8-8091-406a-bc04-fa14e8b73fb9",
              "operator": {
                "type": "string",
                "operation": "contains"
              },
              "leftValue": "={{ $json.content_type }}",
              "rightValue": "text/html"
            }
          ]
        }
      },
      "typeVersion": 2.2
    },
    {
      "id": "b98f19a8-afd3-4d26-8063-dee3ee75055f",
      "name": "便签4",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        2040,
        -800
      ],
      "parameters": {
        "color": 2,
        "width": 740,
        "height": 1160,
        "content": "## 文本分类器"
      },
      "typeVersion": 1
    },
    {
      "id": "63e3ea7a-cec3-442c-9812-771def0a9949",
      "name": "便签5",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        2840,
        -500
      ],
      "parameters": {
        "color": 7,
        "width": 360,
        "height": 860,
        "content": "## 设置字段 - llms.txt 行"
      },
      "typeVersion": 1
    },
    {
      "id": "78f58220-feb5-4044-b994-39a0e4f1e9e4",
      "name": "便签6",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        3260,
        -500
      ],
      "parameters": {
        "color": 7,
        "width": 360,
        "height": 860,
        "content": "## 汇总 - 连接"
      },
      "typeVersion": 1
    },
    {
      "id": "7a119633-7cd3-4de5-a1cd-7f708e1abf4a",
      "name": "便签7",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        3680,
        -500
      ],
      "parameters": {
        "color": 7,
        "width": 360,
        "height": 860,
        "content": "## 设置字段 - llms.txt 内容"
      },
      "typeVersion": 1
    },
    {
      "id": "554f6858-68e8-4b35-a6c4-21bed6832323",
      "name": "便签8",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        4100,
        -500
      ],
      "parameters": {
        "color": 7,
        "width": 360,
        "height": 860,
        "content": "## 生成 llms.txt 文件"
      },
      "typeVersion": 1
    },
    {
      "id": "24bdefba-e2f2-41f0-93e7-9f8d2fc11f43",
      "name": "便签9",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        4520,
        -500
      ],
      "parameters": {
        "color": 7,
        "width": 360,
        "height": 860,
        "content": "## 上传文件到任意位置"
      },
      "typeVersion": 1
    },
    {
      "id": "a3be51e3-810c-40a7-a996-98a3d383c2b9",
      "name": "汇总 - 连接",
      "type": "n8n-nodes-base.summarize",
      "position": [
        3380,
        40
      ],
      "parameters": {
        "options": {},
        "fieldsToSummarize": {
          "values": [
            {
              "field": "llmTxtRow",
              "separateBy": "\n",
              "aggregation": "concatenate"
            }
          ]
        }
      },
      "typeVersion": 1.1
    },
    {
      "id": "8d3a892a-3d11-4d8a-8ec6-84f8f3af1183",
      "name": "设置字段 - llms.txt 内容",
      "type": "n8n-nodes-base.set",
      "position": [
        3820,
        40
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "97062a99-e944-4e1e-89b1-62cf9e3462dd",
              "name": "llmsTxtFile",
              "type": "string",
              "value": "=# {{ $('Form - Screaming frog internal_html.csv upload').item.json['What is the name of your website?'] }}\n> {{ $('Form - Screaming frog internal_html.csv upload').item.json['Can you provide a short description of your website? (in the language of the website)'] }}\n\n{{ $json.concatenated_llmTxtRow }}"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "bc2a692a-47ea-4bf1-a102-e607fd544158",
      "name": "上传文件到任意位置",
      "type": "n8n-nodes-base.noOp",
      "position": [
        4640,
        40
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "404510a2-35b2-44cf-9d02-eb0abcf4e9b3",
      "name": "设置字段 - llms.txt 行",
      "type": "n8n-nodes-base.set",
      "position": [
        2960,
        40
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "95e75caa-8110-476b-9cb1-73c15361fa56",
              "name": "llmTxtRow",
              "type": "string",
              "value": "=- [{{ $json.title }}]({{ $json.url }}){{ $json.description ? ': ' + $json.description : '' }}"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "f54d51f2-17bc-4c58-b177-0e77e16f7b72",
      "name": "便签 10",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -420,
        -1020
      ],
      "parameters": {
        "color": 5,
        "width": 700,
        "height": 1380,
        "content": "# 从 Screaming Frog 网站爬取生成 AI 就绪的 llms.txt 文件"
      },
      "typeVersion": 1
    },
    {
      "id": "e33104af-802a-43f2-b26d-f368f7de2fd7",
      "name": "表单 - Screaming Frog internal_html.csv 上传",
      "type": "n8n-nodes-base.formTrigger",
      "position": [
        460,
        60
      ],
      "webhookId": "8791f39a-3d81-405c-b177-0a733ebf74cb",
      "parameters": {
        "options": {
          "buttonLabel": "Get the llms.txt file"
        },
        "formTitle": "llms.txt Generator - From Screaming Frog export",
        "formFields": {
          "values": [
            {
              "fieldLabel": "What is the name of your website?",
              "placeholder": "Example : The best website ever",
              "requiredField": true
            },
            {
              "fieldLabel": "Can you provide a short description of your website? (in the language of the website)",
              "placeholder": "Example : This is the best website ever because all the content is engaging and valuable.",
              "requiredField": true
            },
            {
              "fieldType": "file",
              "fieldLabel": "screaming_frog_export",
              "multipleFiles": false,
              "requiredField": true,
              "acceptFileTypes": ".csv"
            }
          ]
        },
        "responseMode": "lastNode",
        "formDescription": "Generate a simple llms.txt file from a Screaming Frog Export\nIt is recommended to use the internal_html.csv export, although internal_all.csv will also work.\n\nFill in the fields in this form.Just fill in the fields in this form  😄"
      },
      "typeVersion": 2.2
    },
    {
      "id": "f6b17fdd-a098-411e-8d53-3f6e638cc3ba",
      "name": "从 Screaming Frog 文件提取数据",
      "type": "n8n-nodes-base.extractFromFile",
      "position": [
        900,
        60
      ],
      "parameters": {
        "options": {},
        "operation": "xls",
        "binaryPropertyName": "screaming_frog_export"
      },
      "typeVersion": 1
    },
    {
      "id": "6bbd8d1f-3322-4c6d-af08-c842386239ce",
      "name": "生成 llms.txt 文件",
      "type": "n8n-nodes-base.convertToFile",
      "position": [
        4220,
        40
      ],
      "parameters": {
        "options": {
          "encoding": "utf8",
          "fileName": "llms.txt"
        },
        "operation": "toText",
        "sourceProperty": "llmsTxtFile"
      },
      "typeVersion": 1.1
    }
  ],
  "active": false,
  "pinData": {},
  "settings": {
    "executionOrder": "v1"
  },
  "versionId": "",
  "connections": {
    "Filter URLs": {
      "main": [
        [
          {
            "node": "Text Classifier",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Text Classifier": {
      "main": [
        [
          {
            "node": "Set Field - llms.txt Row",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "No Operation, do nothing",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "OpenAI Chat Model": {
      "ai_languageModel": [
        [
          {
            "node": "Text Classifier",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "Set useful fields": {
      "main": [
        [
          {
            "node": "Filter URLs",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Generate llms.txt file": {
      "main": [
        []
      ]
    },
    "Summarize - Concatenate": {
      "main": [
        [
          {
            "node": "Set Fields - llms.txt Content",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Set Field - llms.txt Row": {
      "main": [
        [
          {
            "node": "Summarize - Concatenate",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Set Fields - llms.txt Content": {
      "main": [
        [
          {
            "node": "Generate llms.txt file",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Extract data from Screaming Frog file": {
      "main": [
        [
          {
            "node": "Set useful fields",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Form - Screaming frog internal_html.csv upload": {
      "main": [
        [
          {
            "node": "Extract data from Screaming Frog file",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}
常见问题

如何使用这个工作流?

复制上方的 JSON 配置代码,在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」,粘贴配置后根据需要修改凭证设置即可。

这个工作流适合什么场景?

高级 - 人工智能

需要付费吗?

本工作流完全免费,您可以直接导入使用。但请注意,工作流中使用的第三方服务(如 OpenAI API)可能需要您自行付费。

工作流信息
难度等级
高级
节点数量23
分类1
节点类型10
难度说明

适合高级用户,包含 16+ 个节点的复杂工作流

作者
Dataki

Dataki

@dataki

I am passionate about transforming complex processes into seamless automations with n8n. My expertise spans across creating ETL pipelines, sales automations, and data & AI-driven workflows. As an avid problem solver, I thrive on optimizing workflows to drive efficiency and innovation.

外部链接
在 n8n.io 查看

分享此工作流