使用Firecrawl和Google Sheets生成网站地图和可视化树状图

Name: 使用Firecrawl和Google Sheets生成网站地图和可视化树状图
Rating: 4.5 (10 reviews)
Author: Growth AI
中级
这是一个Market Research, Multimodal AI领域的自动化工作流，包含 8 个节点。主要使用 If, Code, GoogleDrive, GoogleSheets, RespondToWebhook 等节点。使用Firecrawl和Google Sheets生成网站地图和可视化树状图
前置要求
•Google Drive API 凭证
•Google Sheets API 凭证
•HTTP Webhook 端点（n8n 会自动生成）
使用的节点 (8)

分类

市场调研
多模态 AI
工作流预览
可视化展示节点连接关系，支持缩放和平移
当收到聊天消息时
Firecrawl 正常
复制模板
数据映射
将 URL 排序到表格中
最终答案
无效 URL
映射网站并获取 URL
React Flow
导出工作流
复制以下 JSON 配置到 n8n 导入，即可使用此工作流
{
  "meta": {
    "instanceId": "393ca9e36a1f81b0f643c72792946a5fe5e49eb4864181ba4032e5a408278263",
    "templateCredsSetupCompleted": true
  },
  "nodes": [
    {
      "id": "f6f18549-b9e2-4ea5-b0ad-9a4a4df3bff1",
      "name": "当收到聊天消息时",
      "type": "@n8n/n8n-nodes-langchain.chatTrigger",
      "position": [
        0,
        0
      ],
      "webhookId": "9a4aeebc-9dd5-4248-8349-ebaf7e9bd7ce",
      "parameters": {
        "mode": "webhook",
        "public": true,
        "options": {
          "responseMode": "responseNode"
        }
      },
      "typeVersion": 1.1
    },
    {
      "id": "3ab02f4d-4593-4d32-8007-f657e7706f84",
      "name": "Firecrawl 正常",
      "type": "n8n-nodes-base.if",
      "position": [
        480,
        0
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "d1e1025f-704e-4392-bf2b-5be624a9c3a2",
              "operator": {
                "type": "boolean",
                "operation": "true",
                "singleValue": true
              },
              "leftValue": "={{ $json.success }}",
              "rightValue": "true"
            }
          ]
        }
      },
      "typeVersion": 2.2
    },
    {
      "id": "1f44edba-d802-4e48-a193-9aa073971724",
      "name": "复制模板",
      "type": "n8n-nodes-base.googleDrive",
      "position": [
        768,
        0
      ],
      "parameters": {
        "name": "={{ $('When chat message received').item.json.chatInput }} - n8n - Arborescence",
        "fileId": {
          "__rl": true,
          "mode": "id",
          "value": "12lV4HwgudgzPPGXKNesIEExbFg09Tuu9gyC_jSS1HjI"
        },
        "options": {},
        "operation": "copy"
      },
      "credentials": {
        "googleDriveOAuth2Api": {
          "id": "3TalAPza9NdMx3yx",
          "name": "Google Drive account"
        }
      },
      "executeOnce": true,
      "typeVersion": 3
    },
    {
      "id": "9e188a8a-4faa-488d-ba3e-10e25fb94c05",
      "name": "数据映射",
      "type": "n8n-nodes-base.googleSheets",
      "position": [
        1408,
        0
      ],
      "parameters": {
        "columns": {
          "value": {},
          "schema": [
            {
              "id": "Niv 0",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "Niv 0",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "Niv 1",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "Niv 1",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "Niv 2",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "Niv 2",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "Niv 3",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "Niv 3",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "Niv 4",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "Niv 4",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "Niv 5",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "Niv 5",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "error",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "error",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "message",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "message",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            }
          ],
          "mappingMode": "autoMapInputData",
          "matchingColumns": [],
          "attemptToConvertTypes": false,
          "convertFieldsToString": false
        },
        "options": {},
        "operation": "append",
        "sheetName": {
          "__rl": true,
          "mode": "name",
          "value": "FR"
        },
        "documentId": {
          "__rl": true,
          "mode": "id",
          "value": "={{ $('Copy template').item.json.id }}"
        }
      },
      "credentials": {
        "googleSheetsOAuth2Api": {
          "id": "wBRLUCktxqXE6DVJ",
          "name": "Google Sheets account"
        }
      },
      "typeVersion": 4.5
    },
    {
      "id": "90c7df17-d3ad-434b-b6ec-23c7e64888de",
      "name": "将 URL 排序到表格中",
      "type": "n8n-nodes-base.code",
      "position": [
        1120,
        0
      ],
      "parameters": {
        "jsCode": "/**\n * Fonction pour traiter les URLs collectées par Firecrawl et générer une arborescence de site\n * en traitant séparément les différents domaines et sous-domaines\n * \n * @param {Object} inputData - Les données brutes de l'appel Firecrawl\n * @returns {Array} - Tableau d'objets avec les colonnes pour Google Sheets\n */\nfunction createSiteHierarchy(inputData) {\n  // Vérifier que les données d'entrée sont valides\n  if (!inputData || !inputData.success || !Array.isArray(inputData.links) || inputData.links.length === 0) {\n    throw new Error(\"Données d'entrée invalides ou vides\");\n  }\n\n  // Normaliser toutes les URLs (convertir http en https)\n  const urls = inputData.links.map(url => {\n    if (url.startsWith('http://')) {\n      return 'https://' + url.substring(7);\n    }\n    return url;\n  });\n\n  // Extraire les différents domaines/sous-domaines présents dans les URLs\n  const domainPattern = /^https?:\\/\\/([^\\/]+)/;\n  const domains = {};\n  \n  // Regrouper les URLs par domaine/sous-domaine\n  for (const url of urls) {\n    const match = url.match(domainPattern);\n    if (!match) continue;\n    \n    const fullDomain = match[1]; // ex: www.zest.fr, wiki.zest.fr\n    \n    // Extraire le sous-domaine et le domaine de base\n    const domainParts = fullDomain.split('.');\n    const isSubdomain = domainParts.length > 2;\n    \n    // Déterminer le domaine principal\n    let mainDomain;\n    if (isSubdomain) {\n      // Pour les sous-domaines comme wiki.zest.fr\n      mainDomain = domainParts.slice(domainParts.length - 2).join('.');\n    } else {\n      // Pour les domaines principaux comme zest.fr\n      mainDomain = fullDomain;\n    }\n    \n    // Enregistrer cette URL dans son groupe de domaine\n    if (!domains[fullDomain]) {\n      domains[fullDomain] = {\n        mainDomain: mainDomain,\n        fullDomain: fullDomain,\n        baseUrl: `https://${fullDomain}`,\n        urls: []\n      };\n    }\n    \n    domains[fullDomain].urls.push(url);\n  }\n  \n  // Traiter chaque domaine/sous-domaine séparément\n  const results = [];\n  \n  // Fonction pour formater le texte d'affichage d'une URL\n  function formatDisplayText(segment) {\n    if (!segment) return \"HOME PAGE\";\n    // Décodage des caractères URL (comme %20, %C3%A9, etc.)\n    try {\n      const decoded = decodeURIComponent(segment);\n      return decoded.toUpperCase().replace(/-/g, ' ');\n    } catch (e) {\n      // En cas d'erreur de décodage, utiliser le segment tel quel\n      return segment.toUpperCase().replace(/-/g, ' ');\n    }\n  }\n  \n  // Fonction pour extraire le chemin relatif d'une URL\n  function getPathFromUrl(url, baseUrl) {\n    // Supprimer le domaine\n    let path = url.replace(baseUrl, '');\n    \n    // Supprimer les slashes au début et à la fin\n    if (path.startsWith('/')) path = path.substring(1);\n    if (path.endsWith('/')) path = path.substring(0, path.length - 1);\n    \n    return path;\n  }\n  \n  // Fonction pour créer l'arborescence d'un domaine spécifique\n  function processUrlsForDomain(domainInfo) {\n    // Créer une structure arborescente pour ce domaine\n    const tree = {};\n    \n    // Ajouter la page d'accueil (niveau 0)\n    tree[domainInfo.baseUrl] = {\n      url: domainInfo.baseUrl,\n      level: 0,\n      segments: [],\n      displayText: domainInfo.fullDomain.toUpperCase(),\n      children: {}\n    };\n    \n    // Trier les URLs par longueur de chemin (du plus court au plus long)\n    domainInfo.urls.sort((a, b) => {\n      const pathA = getPathFromUrl(a, domainInfo.baseUrl);\n      const pathB = getPathFromUrl(b, domainInfo.baseUrl);\n      \n      const segmentsA = pathA ? pathA.split('/') : [];\n      const segmentsB = pathB ? pathB.split('/') : [];\n      \n      // D'abord comparer le nombre de segments\n      if (segmentsA.length !== segmentsB.length) {\n        return segmentsA.length - segmentsB.length;\n      }\n      \n      // Si même nombre de segments, comparer alphabétiquement\n      return pathA.localeCompare(pathB);\n    });\n    \n    // Construire l'arborescence\n    for (const url of domainInfo.urls) {\n      // Ignorer l'URL racine déjà ajoutée\n      if (url === domainInfo.baseUrl || url === domainInfo.baseUrl + '/') continue;\n      \n      const path = getPathFromUrl(url, domainInfo.baseUrl);\n      const segments = path ? path.split('/') : [];\n      \n      // Déterminer le niveau (limité à 5)\n      const level = Math.min(segments.length, 5);\n      \n      if (level === 0) continue; // Ignorer les duplications de l'URL racine\n      \n      // Construire le chemin complet segment par segment\n      let currentNode = tree[domainInfo.baseUrl];\n      let parentPath = domainInfo.baseUrl;\n      \n      for (let i = 0; i < level; i++) {\n        const segment = segments[i];\n        const currentPath = parentPath + '/' + segment;\n        \n        // Créer le nœud s'il n'existe pas\n        if (!currentNode.children[segment]) {\n          currentNode.children[segment] = {\n            url: currentPath,\n            level: i + 1,\n            segments: segments.slice(0, i + 1),\n            displayText: formatDisplayText(segment),\n            children: {}\n          };\n        }\n        \n        // Avancer au nœud enfant\n        currentNode = currentNode.children[segment];\n        parentPath = currentPath;\n      }\n    }\n    \n    // Convertir l'arborescence en lignes\n    const domainRows = [];\n    \n    // Fonction récursive pour parcourir l'arborescence\n    function traverseTree(node) {\n      // Créer une nouvelle ligne\n      const row = {\n        \"Niv 0\": \"\",\n        \"Niv 1\": \"\",\n        \"Niv 2\": \"\",\n        \"Niv 3\": \"\",\n        \"Niv 4\": \"\",\n        \"Niv 5\": \"\",\n        \"URL\": node.url // Ajout de la colonne URL avec l'URL en texte brut\n      };\n      \n      // Définir la valeur au niveau approprié\n      if (node.level <= 5) {\n        row[`Niv ${node.level}`] = `=HYPERLINK(\"${node.url}\";\"${node.displayText}\")`;\n      }\n      \n      // Ajouter la ligne au résultat\n      domainRows.push(row);\n      \n      // Traiter les enfants dans l'ordre alphabétique\n      const children = Object.values(node.children);\n      children.sort((a, b) => a.displayText.localeCompare(b.displayText));\n      \n      for (const child of children) {\n        traverseTree(child);\n      }\n    }\n    \n    // Commencer le parcours avec le nœud racine\n    traverseTree(tree[domainInfo.baseUrl]);\n    \n    return domainRows;\n  }\n  \n  // Trier les domaines: d'abord le domaine principal (sans sous-domaine), puis les sous-domaines\n  const sortedDomains = Object.values(domains).sort((a, b) => {\n    // Si un domaine est exactement le domaine principal, il vient en premier\n    const aParts = a.fullDomain.split('.');\n    const bParts = b.fullDomain.split('.');\n    \n    // Cas spécial pour www: le traiter comme domaine principal\n    const aIsWWW = aParts.length > 2 && aParts[0] === 'www';\n    const bIsWWW = bParts.length > 2 && bParts[0] === 'www';\n    \n    if (aIsWWW && !bIsWWW) return -1;\n    if (!aIsWWW && bIsWWW) return 1;\n    \n    // Ensuite comparer par nombre de parties\n    if (aParts.length !== bParts.length) {\n      return aParts.length - bParts.length;\n    }\n    \n    // Enfin, comparer alphabétiquement\n    return a.fullDomain.localeCompare(b.fullDomain);\n  });\n  \n  // Traiter chaque domaine et ajouter les résultats\n  for (const domainInfo of sortedDomains) {\n    const domainRows = processUrlsForDomain(domainInfo);\n    results.push(...domainRows);\n  }\n  \n  return results;\n}\n\n/**\n * Fonction principale pour traiter l'entrée de n8n\n */\nfunction processInput() {\n  try {\n    // Récupérer les données de la node \"Map a website and get urls\" en utilisant la méthode $()\n    // Cette méthode a été confirmée fonctionnelle par nos tests\n    const firecrawlData = $('Map a website and get urls').item.json;\n    \n    // Vérifier la structure des données\n    if (!firecrawlData || !firecrawlData.success || !Array.isArray(firecrawlData.links)) {\n      throw new Error(\"Données d'entrée non valides ou structure incorrecte\");\n    }\n    \n    // Traiter les URLs pour créer l'arborescence\n    const siteHierarchy = createSiteHierarchy(firecrawlData);\n    \n    // Créer un nouvel item pour chaque ligne de l'arborescence\n    // C'est le format attendu par Google Sheets dans n8n\n    return siteHierarchy.map(row => {\n      return {\n        json: row\n      };\n    });\n    \n  } catch (error) {\n    console.error(\"Erreur lors du traitement:\", error.message);\n    // Retourner un message d'erreur formaté pour n8n\n    return [{\n      json: {\n        error: true,\n        message: error.message,\n        details: error.stack\n      }\n    }];\n  }\n}\n\n// Exécuter le traitement\nreturn processInput();"
      },
      "typeVersion": 2
    },
    {
      "id": "85d435ab-89fb-4f30-a5ff-66239dd02bfc",
      "name": "最终答案",
      "type": "n8n-nodes-base.respondToWebhook",
      "position": [
        1648,
        0
      ],
      "parameters": {
        "options": {},
        "respondWith": "json",
        "responseBody": "={\n  \"text\": \"Cliquez [ici](https://docs.google.com/spreadsheets/d/{{ $('Copy template').item.json.id }}) afin d'accéder à votre arborescence\"\n}"
      },
      "typeVersion": 1.1
    },
    {
      "id": "9d187a39-57fd-43f3-9426-6ba0f13b4a6b",
      "name": "无效 URL",
      "type": "n8n-nodes-base.respondToWebhook",
      "position": [
        672,
        208
      ],
      "parameters": {
        "options": {},
        "respondWith": "json",
        "responseBody": "={\n  \"text\": \"L'url {{ $('Chat input').item.json.chatInput }} n'est pas une url correcte ou elle n'est pas prise en compte par ce service\"\n}"
      },
      "typeVersion": 1.1
    },
    {
      "id": "b58250bb-3f3e-4a29-a8c1-215f23503a79",
      "name": "映射网站并获取 URL",
      "type": "@mendable/n8n-nodes-firecrawl.firecrawl",
      "position": [
        272,
        0
      ],
      "parameters": {
        "url": "={{ $json.chatInput }}",
        "operation": "map",
        "sitemapOnly": true,
        "ignoreSitemap": false,
        "requestOptions": {}
      },
      "credentials": {
        "firecrawlApi": {
          "id": "E34WDB80ik5VHjiI",
          "name": "Firecrawl account"
        }
      },
      "typeVersion": 1
    }
  ],
  "pinData": {},
  "connections": {
    "Data mapping": {
      "main": [
        [
          {
            "node": "Final answer",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Firecrawl OK": {
      "main": [
        [
          {
            "node": "Copy template",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Bad URL",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Copy template": {
      "main": [
        [
          {
            "node": "Sorting URL into table",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Sorting URL into table": {
      "main": [
        [
          {
            "node": "Data mapping",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Map a website and get urls": {
      "main": [
        [
          {
            "node": "Firecrawl OK",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "When chat message received": {
      "main": [
        [
          {
            "node": "Map a website and get urls",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}
常见问题
如何使用这个工作流？

复制上方的 JSON 配置代码，在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」，粘贴配置后根据需要修改凭证设置即可。
这个工作流适合什么场景？

中级 - 市场调研, 多模态 AI
需要付费吗？

本工作流完全免费，您可以直接导入使用。但请注意，工作流中使用的第三方服务（如 OpenAI API）可能需要您自行付费。
使用Firecrawl和Google Sheets生成网站地图和可视化树状图

使用的节点 (8)

分类

如何使用这个工作流？

这个工作流适合什么场景？

需要付费吗？

相关工作流推荐