GPT-5 nanoとGoogle Sheetsを使ってウェブサイトをスクレイピングして質問に回答
上級
これはMarket Research, Multimodal AI分野の自動化ワークフローで、44個のノードを含みます。主にIf, Set, Xml, Code, Markdownなどのノードを使用。 GPT-5 nanoとGoogle Sheetsでウェブサイトをスクレイピングし、質問に答える
前提条件
- •ターゲットAPIの認証情報が必要な場合あり
- •Google Sheets API認証情報
- •OpenAI API Key
使用ノード (44)
ワークフロープレビュー
ノード接続関係を可視化、ズームとパンをサポート
ワークフローをエクスポート
以下のJSON設定をn8nにインポートして、このワークフローを使用できます
{
"meta": {
"instanceId": "3dd9efe937707b07af3ede5b46321ec0e2a9e49d7ef201e274c4c4aa1a4615a6",
"templateCredsSetupCompleted": true
},
"nodes": [
{
"id": "05174bb2-efd5-4de8-9e17-26c2a85eff06",
"name": "AIエージェント",
"type": "@n8n/n8n-nodes-langchain.agent",
"position": [
-2432,
272
],
"parameters": {
"text": "={{ $node[\"Chat web\"].json[\"chatInput\"] }}",
"options": {
"systemMessage": "Actuas como una página web, mediante la tool sheet tienes acceso a toda la página web y todo lo que te pida el usaurio puedes consultarlo allí, responde el usuario en base a la info de allí"
},
"promptType": "define"
},
"typeVersion": 2.2
},
{
"id": "6dc738b3-4ebe-4f5b-b8b3-decf9ce15e70",
"name": "OpenAI チャットモデル",
"type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
"position": [
-2496,
480
],
"parameters": {
"model": {
"__rl": true,
"mode": "list",
"value": "gpt-5-nano",
"cachedResultName": "gpt-5-nano"
},
"options": {}
},
"credentials": {
"openAiApi": {
"id": "dfSo8Emt7Jfhxkoj",
"name": "OpenAi account"
}
},
"typeVersion": 1.2
},
{
"id": "0461df33-2d2f-42e2-a0d3-288bd78275f1",
"name": "シンプルメモリ",
"type": "@n8n/n8n-nodes-langchain.memoryBufferWindow",
"position": [
-2336,
480
],
"parameters": {
"contextWindowLength": 50
},
"typeVersion": 1.3
},
{
"id": "fdcce6e6-f00f-4f84-ac6e-2e181452d3ac",
"name": "Google Sheetsで行を取得",
"type": "n8n-nodes-base.googleSheetsTool",
"position": [
-1968,
464
],
"parameters": {
"options": {},
"sheetName": {
"__rl": true,
"mode": "list",
"value": "gid=0",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/112qqkm4omdSzDT2jI17IQAxYvGjKuGlYxj6XytDA5L8/edit#gid=0",
"cachedResultName": "Web"
},
"documentId": {
"__rl": true,
"mode": "list",
"value": "112qqkm4omdSzDT2jI17IQAxYvGjKuGlYxj6XytDA5L8",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/112qqkm4omdSzDT2jI17IQAxYvGjKuGlYxj6XytDA5L8/edit?usp=drivesdk",
"cachedResultName": "Web chat Workflow"
}
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "wVh07BIjSJqZc11s",
"name": "Google Sheets account"
}
},
"typeVersion": 4.6
},
{
"id": "621aa928-83c5-48a4-8488-67c58fa1aec8",
"name": "If",
"type": "n8n-nodes-base.if",
"position": [
-3376,
560
],
"parameters": {
"options": {},
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "0adf46cd-5ca1-418e-a8b8-0571240e0efb",
"operator": {
"type": "boolean",
"operation": "true",
"singleValue": true
},
"leftValue": "={{ $json['Data schema'] }}",
"rightValue": ""
}
]
}
},
"typeVersion": 2.2
},
{
"id": "8df9234a-85ad-45b4-bc17-ad64edaab08a",
"name": "サイトマップマッピング",
"type": "n8n-nodes-base.httpRequest",
"onError": "continueErrorOutput",
"position": [
-1536,
736
],
"parameters": {
"url": "={{ $json.sitemapUrl }}",
"options": {},
"sendHeaders": true,
"headerParameters": {
"parameters": [
{
"name": "User-Agent",
"value": "={{ $json.userAgent }}"
},
{
"name": "Accept-Language",
"value": "es-ES,es;q=0.9,en;q=0.8"
},
{
"name": "Accept-Encoding",
"value": "gzip, deflate, br"
},
{
"name": "Referer",
"value": "https://www.google.com/"
},
{
"name": "Connection",
"value": "keep-alive"
},
{
"name": "Upgrade-Insecure-Requests",
"value": "1"
},
{
"name": "Sec-Fetch-Dest",
"value": "document"
},
{
"name": "Sec-Fetch-Mode",
"value": "navigate"
},
{
"name": "DNT",
"value": "1"
},
{
"name": "Accept",
"value": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
}
]
}
},
"typeVersion": 4.2
},
{
"id": "0d18ebca-52f3-46ed-934c-44c9bad53dab",
"name": "XML1",
"type": "n8n-nodes-base.xml",
"position": [
-1088,
960
],
"parameters": {
"options": {}
},
"typeVersion": 1
},
{
"id": "39127cf7-f627-4fca-b1b7-c51b3656947d",
"name": "UAローテーター1",
"type": "n8n-nodes-base.code",
"position": [
-2160,
736
],
"parameters": {
"jsCode": "const userAgents = [\n // Escritorio - Windows\n \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36\",\n \"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/115.0\",\n \"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36\",\n\n // Escritorio - Mac\n \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.1 Safari/605.1.15\",\n \"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36\",\n\n // Móvil - Android\n \"Mozilla/5.0 (Linux; Android 10; SM-G973F) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Mobile Safari/537.36\",\n \"Mozilla/5.0 (Linux; Android 9; Mi 9T Pro) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Mobile Safari/537.36\",\n\n // Móvil - iPhone\n \"Mozilla/5.0 (iPhone; CPU iPhone OS 16_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.0 Mobile/15E148 Safari/604.1\",\n \"Mozilla/5.0 (iPhone; CPU iPhone OS 15_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.2 Mobile/15E148 Safari/604.1\"\n];\n\n// Escoge uno aleatorio\nconst randomUA = userAgents[Math.floor(Math.random() * userAgents.length)];\n\nreturn [\n {\n json: {\n userAgent: randomUA\n }\n }\n];\n"
},
"typeVersion": 2
},
{
"id": "4260d45a-8705-483a-b17f-58211512ba59",
"name": "リクエストエラー",
"type": "n8n-nodes-base.stopAndError",
"position": [
-1712,
592
],
"parameters": {
"errorMessage": "URL mal introducida, debes introducir con el siguiente formato: ejemplo.com"
},
"typeVersion": 1
},
{
"id": "59d0fe0a-9e27-4755-ac23-f46fa6d2aa95",
"name": "サイトマップエラー",
"type": "n8n-nodes-base.stopAndError",
"position": [
-1088,
528
],
"parameters": {
"errorMessage": "Sitemap no encontrado o acceso bloqueadp"
},
"executeOnce": false,
"typeVersion": 1
},
{
"id": "0526a778-8d63-4dcc-9815-a002ffd70a7f",
"name": "robots.txtをリクエスト",
"type": "n8n-nodes-base.httpRequest",
"onError": "continueErrorOutput",
"position": [
-1920,
736
],
"parameters": {
"url": "={{ $node[\"AI Agent1\"].json[\"output\"][\"URL\"] }}/robots.txt",
"options": {},
"sendHeaders": true,
"headerParameters": {
"parameters": [
{
"name": "User-Agent",
"value": "={{ $json.userAgent }}"
},
{
"name": "Accept-Language",
"value": "es-ES,es;q=0.9,en;q=0.8"
},
{
"name": "Accept-Encoding",
"value": "gzip, deflate, br"
},
{
"name": "Referer",
"value": "https://www.google.com/"
},
{
"name": "Connection",
"value": "keep-alive"
},
{
"name": "Upgrade-Insecure-Requests",
"value": "1"
},
{
"name": "Sec-Fetch-Dest",
"value": "document"
},
{
"name": "Sec-Fetch-Mode",
"value": "navigate"
},
{
"name": "DNT",
"value": "1"
},
{
"name": "Accept",
"value": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
}
]
}
},
"typeVersion": 4.2
},
{
"id": "5948d577-4aea-4394-9b20-687f44efe5c8",
"name": "サイトマップURLを抽出",
"type": "n8n-nodes-base.code",
"position": [
-1712,
736
],
"parameters": {
"jsCode": "// Simulación: contenido del robots.txt como string, en n8n será $input o $json dependiendo de tu configuración\nconst robotsTxtContent = $input.first().json.data || \"\"; // Cambia esto por la variable correcta en n8n\n\n// Función para extraer URL del sitemap\nfunction extractSitemapUrl(robotsTxt) {\n // Buscamos línea que empiece con \"Sitemap:\" (ignorando mayúsculas y espacios)\n const lines = robotsTxt.split(/\\r?\\n/);\n for (const line of lines) {\n const match = line.match(/^\\s*Sitemap:\\s*(.+)$/i);\n if (match) {\n return match[1].trim();\n }\n }\n return null; // No encontrado\n}\n\nconst sitemapUrl = extractSitemapUrl(robotsTxtContent);\n\n// Devolver JSON con la URL del sitemap (o null si no hay)\nreturn [{ json: { sitemapUrl } }];"
},
"typeVersion": 2
},
{
"id": "c12d6ae7-23ee-4f7a-9a33-7e43d1e475b9",
"name": "OPTIONS",
"type": "n8n-nodes-base.set",
"position": [
-2400,
736
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "71b9ad22-d418-4fff-92bb-dafd0818575d",
"name": "scan_pages",
"type": "boolean",
"value": true
},
{
"id": "42483a05-34f0-4cef-b404-dae43a7bee22",
"name": "scan_posts",
"type": "boolean",
"value": false
},
{
"id": "00a5ed31-dd44-4f9f-97f1-7aa4fe636afd",
"name": "category",
"type": "boolean",
"value": false
},
{
"id": "a2b0930f-8a9b-4f78-8d20-466366853b55",
"name": "tags",
"type": "boolean",
"value": false
}
]
}
},
"typeVersion": 3.4
},
{
"id": "2ab278f9-2904-4b6c-a2a3-6a703c0bb3ae",
"name": "AIエージェント1",
"type": "@n8n/n8n-nodes-langchain.agent",
"position": [
-3024,
736
],
"parameters": {
"text": "={{ $node[\"Chat web\"].json[\"chatInput\"] }}",
"options": {
"systemMessage": "Responde en formato JSON, el url si lo es, si no lo es pon cualquier valor y con una boolean que se indica respondiendo si es url o no (true or false)"
},
"promptType": "define",
"hasOutputParser": true
},
"typeVersion": 2.2
},
{
"id": "afdbed61-346e-44a6-aa69-23a2b7ecf553",
"name": "OpenAI チャットモデル1",
"type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
"position": [
-3024,
944
],
"parameters": {
"model": {
"__rl": true,
"mode": "list",
"value": "gpt-5-nano",
"cachedResultName": "gpt-5-nano"
},
"options": {}
},
"credentials": {
"openAiApi": {
"id": "dfSo8Emt7Jfhxkoj",
"name": "OpenAi account"
}
},
"typeVersion": 1.2
},
{
"id": "175a77cd-bd0a-4849-8c9b-d36b4ddcecd9",
"name": "ウェブチャット",
"type": "@n8n/n8n-nodes-langchain.chatTrigger",
"position": [
-3776,
560
],
"webhookId": "01764d18-dae5-4dff-8e99-cb90682e9187",
"parameters": {
"public": true,
"options": {
"responseMode": "responseNodes"
},
"authentication": "basicAuth"
},
"credentials": {
"httpBasicAuth": {
"id": "PkymFgJgUnBzIwMu",
"name": "Unnamed credential"
}
},
"typeVersion": 1.3
},
{
"id": "16ef0fa6-4259-43bf-b74f-3dc70d4b54e3",
"name": "構造化出力パーサー",
"type": "@n8n/n8n-nodes-langchain.outputParserStructured",
"position": [
-2880,
944
],
"parameters": {
"jsonSchemaExample": "{\n \"URL\": \"example.com\",\n \"URL_bool\":true\n}"
},
"typeVersion": 1.3
},
{
"id": "15992fbe-4ee5-4630-a377-f1b8d21ebc1b",
"name": "If1",
"type": "n8n-nodes-base.if",
"position": [
-2640,
752
],
"parameters": {
"options": {},
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "3851cb51-a282-4388-b4f6-1c1f68e8c7c5",
"operator": {
"type": "boolean",
"operation": "true",
"singleValue": true
},
"leftValue": "={{ $json.output.URL_bool }}",
"rightValue": ""
}
]
}
},
"typeVersion": 2.2
},
{
"id": "ab8d3076-4420-48ef-b8fa-e25adbbd11e2",
"name": "チャットに返信",
"type": "@n8n/n8n-nodes-langchain.chat",
"position": [
-2400,
928
],
"parameters": {
"message": "Debes introducir una URL válida ejemplo: https://google.es",
"options": {}
},
"typeVersion": 1
},
{
"id": "c5a8dd49-3a82-45c7-a139-b30b4cc21e05",
"name": "チャットに返信1",
"type": "@n8n/n8n-nodes-langchain.chat",
"position": [
-2080,
272
],
"parameters": {
"message": "={{ $json.output }}",
"options": {}
},
"typeVersion": 1
},
{
"id": "d665823a-b40a-45a5-ac12-0a789c1b8ecd",
"name": "モデルにメッセージ",
"type": "@n8n/n8n-nodes-langchain.openAi",
"position": [
-3024,
1280
],
"parameters": {
"modelId": {
"__rl": true,
"mode": "list",
"value": "gpt-4o",
"cachedResultName": "GPT-4O"
},
"options": {},
"messages": {
"values": [
{
"role": "system",
"content": "=De aquí saca y devuelve en formato JSON, los siguientes urls de los sitemap que sean true: \n\nPages: {{ $('OPTIONS').item.json.scan_pages }}\n\nPosts: {{ $('OPTIONS').item.json.scan_posts }}\n\nCategorias: {{ $('OPTIONS').item.json.category }}\n\nTags: {{ $('OPTIONS').item.json.tags }}\n\nSalida:\n\n{\n\"sitemap_page\":\"https://...\",\n\"sitemap_posts\":\"https://\"\n}"
},
{
"content": "=Sitemap: \n{{ $json.sitemapindex.sitemap[0].loc }}\n\n{{ $json.sitemapindex.sitemap[1].loc }}\n\n{{ $json.sitemapindex.sitemap[2].loc }}"
}
]
},
"jsonOutput": true
},
"credentials": {
"openAiApi": {
"id": "dfSo8Emt7Jfhxkoj",
"name": "OpenAi account"
}
},
"typeVersion": 1.8
},
{
"id": "d71fbfb6-3e9b-427b-afe3-6fd77ff77ede",
"name": "XML",
"type": "n8n-nodes-base.xml",
"position": [
-2480,
1280
],
"parameters": {
"options": {}
},
"typeVersion": 1
},
{
"id": "6578bcc5-b412-46bf-88d5-8b285372e9b9",
"name": "アイテムをループ処理",
"type": "n8n-nodes-base.splitInBatches",
"position": [
-1856,
1280
],
"parameters": {
"options": {}
},
"typeVersion": 3
},
{
"id": "25c7cbaf-7eb9-4e71-a488-b6d16242d324",
"name": "モデルにメッセージ1",
"type": "@n8n/n8n-nodes-langchain.openAi",
"position": [
-1200,
1408
],
"parameters": {
"modelId": {
"__rl": true,
"mode": "list",
"value": "gpt-5-nano",
"cachedResultName": "GPT-5-NANO"
},
"options": {},
"messages": {
"values": [
{
"role": "system",
"content": "El usuario te mandara el contenido de la página web, tu mision es sacar un resumen de la página web, idioma de la págn, h1, enlaces internos (no imagenes ni css ni js) y enlaces externos y añadirlos mediante la tool sheet a la db"
},
{
"content": "=URL: {{ $('Split URLs').item.json.urls }}\n\n{{ $json.data }}"
}
]
}
},
"credentials": {
"openAiApi": {
"id": "dfSo8Emt7Jfhxkoj",
"name": "OpenAi account"
}
},
"typeVersion": 1.8
},
{
"id": "6fb7c3fa-7851-49cd-8d0b-01df74a80f35",
"name": "Google Sheetsに行を追加",
"type": "n8n-nodes-base.googleSheetsTool",
"position": [
-1056,
1648
],
"parameters": {
"columns": {
"value": {
"Lang": "={{ /*n8n-auto-generated-fromAI-override*/ $fromAI('Lang', ``, 'string') }}",
"Page URL": "={{ /*n8n-auto-generated-fromAI-override*/ $fromAI('Page_URL', ``, 'string') }}",
"External URLs": "={{ /*n8n-auto-generated-fromAI-override*/ $fromAI('External_URLs', ``, 'string') }}",
"Internal URLs": "={{ /*n8n-auto-generated-fromAI-override*/ $fromAI('Internal_URLs', ``, 'string') }}",
"Summary Content": "={{ /*n8n-auto-generated-fromAI-override*/ $fromAI('Summary_Content', ``, 'string') }}",
"H1 and hierarchy": "={{ /*n8n-auto-generated-fromAI-override*/ $fromAI('H1_and_hierarchy', ``, 'string') }}"
},
"schema": [
{
"id": "Page URL",
"type": "string",
"display": true,
"required": false,
"displayName": "Page URL",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Content text",
"type": "string",
"display": true,
"required": false,
"displayName": "Content text",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Lang",
"type": "string",
"display": true,
"required": false,
"displayName": "Lang",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "H1 and hierarchy",
"type": "string",
"display": true,
"required": false,
"displayName": "H1 and hierarchy",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "External URLs",
"type": "string",
"display": true,
"required": false,
"displayName": "External URLs",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Internal URLs",
"type": "string",
"display": true,
"required": false,
"displayName": "Internal URLs",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Summary Content",
"type": "string",
"display": true,
"required": false,
"displayName": "Summary Content",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Data schema",
"type": "string",
"display": true,
"required": false,
"displayName": "Data schema",
"defaultMatch": false,
"canBeUsedToMatch": true
}
],
"mappingMode": "defineBelow",
"matchingColumns": [],
"attemptToConvertTypes": false,
"convertFieldsToString": false
},
"options": {},
"operation": "append",
"sheetName": {
"__rl": true,
"mode": "list",
"value": "gid=0",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/112qqkm4omdSzDT2jI17IQAxYvGjKuGlYxj6XytDA5L8/edit#gid=0",
"cachedResultName": "Web"
},
"documentId": {
"__rl": true,
"mode": "list",
"value": "112qqkm4omdSzDT2jI17IQAxYvGjKuGlYxj6XytDA5L8",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/112qqkm4omdSzDT2jI17IQAxYvGjKuGlYxj6XytDA5L8/edit?usp=drivesdk",
"cachedResultName": "Web chat Workflow"
}
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "wVh07BIjSJqZc11s",
"name": "Google Sheets account"
}
},
"typeVersion": 4.6
},
{
"id": "9a5d11a2-0fc8-48a1-8fa0-c2f53fb49b54",
"name": "完了",
"type": "n8n-nodes-base.googleSheets",
"position": [
-1616,
1168
],
"parameters": {
"columns": {
"value": {
"Data schema": "={{true}}"
},
"schema": [
{
"id": "Page URL",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "Page URL",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Content text",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "Content text",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Lang",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "Lang",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "H1 and hierarchy",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "H1 and hierarchy",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "External URLs",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "External URLs",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Internal URLs",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "Internal URLs",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Summary Content",
"type": "string",
"display": true,
"removed": true,
"required": false,
"displayName": "Summary Content",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Data schema",
"type": "string",
"display": true,
"removed": false,
"required": false,
"displayName": "Data schema",
"defaultMatch": false,
"canBeUsedToMatch": true
}
],
"mappingMode": "defineBelow",
"matchingColumns": [
"Data schema"
],
"attemptToConvertTypes": false,
"convertFieldsToString": false
},
"options": {},
"operation": "appendOrUpdate",
"sheetName": {
"__rl": true,
"mode": "list",
"value": "gid=0",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/112qqkm4omdSzDT2jI17IQAxYvGjKuGlYxj6XytDA5L8/edit#gid=0",
"cachedResultName": "Web"
},
"documentId": {
"__rl": true,
"mode": "list",
"value": "112qqkm4omdSzDT2jI17IQAxYvGjKuGlYxj6XytDA5L8",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/112qqkm4omdSzDT2jI17IQAxYvGjKuGlYxj6XytDA5L8/edit?usp=drivesdk",
"cachedResultName": "Web chat Workflow"
}
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "wVh07BIjSJqZc11s",
"name": "Google Sheets account"
}
},
"typeVersion": 4.6
},
{
"id": "c371c8db-e752-48fa-999d-4813aeb13f38",
"name": "HTTP リクエスト2",
"type": "n8n-nodes-base.httpRequestTool",
"position": [
-2176,
480
],
"parameters": {
"url": "={{ /*n8n-auto-generated-fromAI-override*/ $fromAI('URL', ``, 'string') }}",
"options": {}
},
"typeVersion": 4.2
},
{
"id": "40169b8e-5948-4422-98d9-4bca87ccab73",
"name": "マージ",
"type": "n8n-nodes-base.code",
"position": [
-2272,
1280
],
"parameters": {
"jsCode": "// Obtenemos el array de URLs del JSON\nconst urlsArray = $input.first().json.urlset.url;\n\n// Creamos un objeto donde cada clave es \"url 1\", \"url 2\", etc.\nconst result = {};\nurlsArray.forEach((item, index) => {\n if (item.loc) {\n result[`url ${index + 1}`] = item.loc;\n }\n});\n\n// Devolvemos el objeto\nreturn [\n {\n json: {\n urls: result\n }\n }\n];\n"
},
"typeVersion": 2
},
{
"id": "5d98fe9e-890c-4c9f-81c8-309cc23dc8af",
"name": "URLを分割",
"type": "n8n-nodes-base.splitOut",
"position": [
-2064,
1280
],
"parameters": {
"options": {},
"fieldToSplitOut": "urls"
},
"typeVersion": 1
},
{
"id": "98abaa2b-ddbc-4c04-830e-d7112a6a57e2",
"name": "URLをリクエスト",
"type": "n8n-nodes-base.httpRequest",
"onError": "continueRegularOutput",
"position": [
-1616,
1408
],
"parameters": {
"url": "={{ $('Split URLs').item.json.urls }}",
"options": {},
"sendHeaders": true,
"headerParameters": {
"parameters": [
{
"name": "User-Agent",
"value": "={{ $json.userAgent }}"
},
{
"name": "Accept-Language",
"value": "es-ES,es;q=0.9,en;q=0.8"
},
{
"name": "Accept-Encoding",
"value": "gzip, deflate, br"
},
{
"name": "Referer",
"value": "https://www.google.com/"
},
{
"name": "Connection",
"value": "keep-alive"
},
{
"name": "Upgrade-Insecure-Requests",
"value": "1"
},
{
"name": "Sec-Fetch-Dest",
"value": "document"
},
{
"name": "Sec-Fetch-Mode",
"value": "navigate"
},
{
"name": "DNT",
"value": "1"
},
{
"name": "Accept",
"value": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
}
]
}
},
"typeVersion": 4.2
},
{
"id": "71d974a6-4f60-4573-be09-7cbb09502fa3",
"name": "HTMLをMarkdownに変換",
"type": "n8n-nodes-base.markdown",
"position": [
-1408,
1408
],
"parameters": {
"html": "={{ $json.data }}",
"options": {}
},
"typeVersion": 1
},
{
"id": "f076a729-8f40-4a3b-ad32-83837964c42c",
"name": "サイトマップマッピング",
"type": "n8n-nodes-base.httpRequest",
"position": [
-2672,
1280
],
"parameters": {
"url": "={{ $json.message.content.sitemap_page }}",
"options": {},
"sendHeaders": true,
"headerParameters": {
"parameters": [
{
"name": "Accept-Language",
"value": "es-ES,es;q=0.9,en;q=0.8"
},
{
"name": "Accept-Encoding",
"value": "gzip, deflate, br"
},
{
"name": "Referer",
"value": "https://www.google.com/"
},
{
"name": "Connection",
"value": "keep-alive"
},
{
"name": "Upgrade-Insecure-Requests",
"value": "1"
},
{
"name": "Sec-Fetch-Dest",
"value": "document"
},
{
"name": "Sec-Fetch-Mode",
"value": "navigate"
},
{
"name": "DNT",
"value": "1"
},
{
"name": "Accept",
"value": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
}
]
}
},
"typeVersion": 4.2
},
{
"id": "bca3322d-bc2e-4932-a5f2-a2e9548a8aef",
"name": "データスキーマを取得",
"type": "n8n-nodes-base.googleSheets",
"maxTries": 5,
"position": [
-3568,
560
],
"parameters": {
"options": {},
"filtersUI": {
"values": [
{
"lookupValue": "={{ true }}",
"lookupColumn": "Data schema"
}
]
},
"sheetName": {
"__rl": true,
"mode": "list",
"value": "gid=0",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/112qqkm4omdSzDT2jI17IQAxYvGjKuGlYxj6XytDA5L8/edit#gid=0",
"cachedResultName": "Web"
},
"documentId": {
"__rl": true,
"mode": "list",
"value": "112qqkm4omdSzDT2jI17IQAxYvGjKuGlYxj6XytDA5L8",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/112qqkm4omdSzDT2jI17IQAxYvGjKuGlYxj6XytDA5L8/edit?usp=drivesdk",
"cachedResultName": "Web chat Workflow"
}
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "wVh07BIjSJqZc11s",
"name": "Google Sheets account"
}
},
"retryOnFail": true,
"typeVersion": 4.6,
"alwaysOutputData": true,
"waitBetweenTries": 3000
},
{
"id": "d051d2f3-cc65-4cb8-8a67-90d7df0dda08",
"name": "付箋",
"type": "n8n-nodes-base.stickyNote",
"position": [
-3568,
736
],
"parameters": {
"width": 256,
"height": 176,
"content": "## Document example URL \nhttps://docs.google.com/spreadsheets/d/112qqkm4omdSzDT2jI17IQAxYvGjKuGlYxj6XytDA5L8/edit?usp=sharing"
},
"typeVersion": 1
},
{
"id": "2ffd406b-1b1b-44d6-be7e-5bbdf73ad5d0",
"name": "付箋1",
"type": "n8n-nodes-base.stickyNote",
"position": [
-3776,
240
],
"parameters": {
"color": 5,
"width": 496,
"height": 288,
"content": "## Overview\nThis is a web consultation chat workflow that, on the first run with a given URL, discovers the sitemap, crawls the site, extracts useful information (language, H1 hierarchy, internal/external links, summary) and stores it in Google Sheets.\n\nFrom then on, if the “schema” flag is set in the sheet (Data schema = true), the chat switches to an Agent mode that responds to the user “as if it were the website,” consulting the database (Google Sheets) and making controlled HTTP requests when needed.\n\n"
},
"typeVersion": 1
},
{
"id": "303ec015-8a82-4092-9dd9-46bb7658a1d3",
"name": "付箋2",
"type": "n8n-nodes-base.stickyNote",
"position": [
-864,
32
],
"parameters": {
"width": 992,
"height": 1104,
"content": "## 1) Chat trigger & schema check\n\n* **Chat web (trigger)** – Public webhook with Basic Auth. Captures `chatInput`.\n* **Get data schema (Google Sheets)** – Filters rows where **Data schema = true**.\n* **If**:\n\n * If schema exists → **Branch A (Agent mode with existing data)**.\n * If not → **Branch B (URL validation & initial crawling)**.\n\n---\n\n## 2) Branch A — Agent mode (consults an already indexed site)\n\n* **AI Agent** (LangChain Agent):\n\n * *System*: “You act as a website… use the tool sheet to access all site info.”\n * **Connected tools**:\n\n * **Get row(s) in sheet in Google Sheets** – lets the agent read the database.\n * **HTTP Request2** – allows the agent to fetch a URL it generates via `$fromAI('URL')`.\n * **OpenAI Chat Model (gpt-5-nano)** – LLM powering the agent.\n * **Simple Memory** – short-term context window (50 messages).\n* **Respond to Chat1** – Sends the agent’s `output` back to the user.\n\n**Purpose**: The user can ask questions (“What’s on page X?”, “What links are there?”), and the agent answers using the sheet’s stored data and, if necessary, live HTTP fetches.\n\n---\n\n## 3) Branch B — URL validation & crawl preparation\n\n* **AI Agent1** (URL classifier):\n\n * *System*: “Return JSON with `URL` and `URL_bool` (true if it’s a valid URL).”\n * **OpenAI Chat Model1 (gpt-5-nano)** + **Structured Output Parser** enforce JSON.\n* **If1**:\n\n * If `URL_bool = true` → continue.\n * If `false` → **Respond to Chat** (“You must enter a valid URL…”).\n* **OPTIONS (Set)**: Flags to choose which sitemaps to process:\n\n * `scan_pages: true`, `scan_posts/category/tags: false` (pages only).\n* **UA Rotativo1 (Code)**: Selects a **random User-Agent** (desktop/mobile, Win/Mac/iOS/Android) to reduce blocking.\n* **Req robots (HTTP Request)**: Downloads `robots.txt` from `{{ AI Agent1.output.URL }}/robots.txt` with realistic headers (language, compression, referer, etc.). If it fails, goes to **Req Error** (“URL not valid…”).\n* **extract sitemap url (Code)**: Parses `robots.txt` and **extracts the `Sitemap:` line** → `sitemapUrl`.\n* **Maping Sitemap (HTTP Request)**: Downloads the **sitemap index** (`sitemapindex`) with error handling (failure → **Sitemap Error**).\n* **XML1 (XML→JSON)**: Converts the sitemap XML to JSON.\n\n"
},
"typeVersion": 1
},
{
"id": "f37eac9a-5e47-45a3-a1ba-e65ebb312571",
"name": "付箋3",
"type": "n8n-nodes-base.stickyNote",
"position": [
-3968,
560
],
"parameters": {
"width": 150,
"height": 96,
"content": "# P1"
},
"typeVersion": 1
},
{
"id": "8a2b47b5-dd9c-4f20-b76f-437446d0d0c6",
"name": "付箋4",
"type": "n8n-nodes-base.stickyNote",
"position": [
-2720,
288
],
"parameters": {
"width": 166,
"height": 272,
"content": "\n\n\n\n\n\n\n\n# P2"
},
"typeVersion": 1
},
{
"id": "d63fd3cc-2966-4460-8de0-8b871d6f2e78",
"name": "付箋5",
"type": "n8n-nodes-base.stickyNote",
"position": [
-3248,
736
],
"parameters": {
"width": 150,
"height": 320,
"content": "\n\n\n\n\n\n\n\n\n\n\n# P3"
},
"typeVersion": 1
},
{
"id": "059d0c29-58d7-4b75-9ec4-89d8b1b8e54b",
"name": "付箋6",
"type": "n8n-nodes-base.stickyNote",
"position": [
-848,
1200
],
"parameters": {
"width": 992,
"height": 1248,
"content": "## 4) Selecting the specific sitemap (pages)\n\n* **Message a model (GPT-4o)**:\n\n * *System*: Ask the model to **select and return in JSON** the relevant sitemaps based on the flags (`scan_pages`, `scan_posts`, etc.).\n * *User*: Passes the first 3 `loc` entries from `sitemapindex`.\n * **Expected output**: e.g. `{ \"sitemap_page\": \"https://…\" }`.\n* **Maping Sitemaps (HTTP Request)**: Downloads the **pages sitemap** provided by the model.\n* **XML (XML→JSON)**: Converts it to JSON (`urlset.url`).\n\n---\n\n## 5) Expanding URLs & page-by-page processing loop\n\n* **Merge (Code)**: Turns `urlset.url` into an object `{ urls: { \"url 1\": \"...\", \"url 2\": \"...\" } }`.\n* **Split URLs**: Breaks that object into **one item per URL**.\n* **Loop Over Items (SplitInBatches)**: Iterates through each URL (supports batching). From here, two parallel flows run:\n\n 1. **Req URL (HTTP Request)** → **HTML to Markdown** → **Message a model1 (gpt-5-nano)**:\n\n * *System*: “You will receive page content; extract: summary, language, H1/hierarchy, internal links (no CSS/JS/images), external links; then add them to the DB via the sheet tool.”\n * *User*: `{{ $json.data }}` (HTML converted to Markdown).\n * **Append row in sheet in Google Sheets (Tool)** is connected as an **AI tool** and uses `$fromAI(...)` mappings to fill columns:\n\n * **Lang**, **H1 and hierarchy**, **External URLs**, **Internal URLs**, **Summary Content**.\n * **Result**: Adds one row per page with extracted fields.\n 2. **Complete (Google Sheets, appendOrUpdate)**:\n\n * Marks/ensures a row with **`Data schema = true`** exists (acts as a “ready” flag so future runs go into **Agent mode**).\n\n---\n\n## 6) What gets stored in Google Sheets\n\nSheet: **“Web”** (gid=0). Columns in the schema:\n\n* **Lang** – detected language.\n* **H1 and hierarchy** – H1 and heading hierarchy.\n* **External URLs** – outbound links.\n* **Internal URLs** – valid internal links.\n* **Summary Content** – page summary.\n* **Data schema** – boolean flag controlling the flow mode.\n\n> Note: In the current mapping of “Append row…”, only these 5 are populated. “Page URL” and “Content text” exist in the schema but are not currently mapped (you could add them by requesting from the LLM and using `$fromAI`).\n\n\n\n## 7) Models & memory\n\n* **gpt-5-nano** – for URL classification, per-page structured extraction, and main Agent responses when data exists.\n* **GPT-4o** – for sitemap selection from the index.\n* **Memory** – 50-message context window for short-term chat continuity in Agent mode.\n"
},
"typeVersion": 1
},
{
"id": "33d198cc-058e-4935-9e49-adc77baf654b",
"name": "付箋7",
"type": "n8n-nodes-base.stickyNote",
"position": [
-2144,
912
],
"parameters": {
"color": 3,
"width": 608,
"height": 192,
"content": "## User experience per case\n\n* **No valid URL** – Returns a message requesting a sample URL.\n* **First time with valid URL** – No data yet; runs **discovery → crawling → extraction → save to Sheets**.\n* **Subsequent interactions** (with `Data schema = true`) – **Agent** answers as if it were the website, using the **DB in Sheets** and live HTTP when needed.\n"
},
"typeVersion": 1
},
{
"id": "670ecb31-9a8d-4d13-aa94-066463f91e6a",
"name": "付箋8",
"type": "n8n-nodes-base.stickyNote",
"position": [
-3248,
1136
],
"parameters": {
"width": 150,
"height": 320,
"content": "\n\n\n\n\n\n\n\n\n\n\n# P4"
},
"typeVersion": 1
},
{
"id": "c0bffbee-cf0d-4abf-99a0-dbbc5347c08d",
"name": "付箋9",
"type": "n8n-nodes-base.stickyNote",
"position": [
-2096,
1440
],
"parameters": {
"width": 150,
"height": 320,
"content": "\n\n\n\n\n\n\n\n\n\n\n## P5 & P6"
},
"typeVersion": 1
},
{
"id": "de0a9316-6130-4272-bce1-db37039e9c3d",
"name": "付箋10",
"type": "n8n-nodes-base.stickyNote",
"position": [
-3968,
720
],
"parameters": {
"color": 5,
"width": 272,
"height": 144,
"content": "## Node By OXSR\nMore info and nodes\nhttps://n8n.io/creators/oxsr11/\n\nGit: https://github.com/oxsr"
},
"typeVersion": 1
}
],
"pinData": {},
"connections": {
"621aa928-83c5-48a4-8488-67c58fa1aec8": {
"main": [
[
{
"node": "05174bb2-efd5-4de8-9e17-26c2a85eff06",
"type": "main",
"index": 0
}
],
[
{
"node": "2ab278f9-2904-4b6c-a2a3-6a703c0bb3ae",
"type": "main",
"index": 0
}
]
]
},
"15992fbe-4ee5-4630-a377-f1b8d21ebc1b": {
"main": [
[
{
"node": "c12d6ae7-23ee-4f7a-9a33-7e43d1e475b9",
"type": "main",
"index": 0
}
],
[
{
"node": "ab8d3076-4420-48ef-b8fa-e25adbbd11e2",
"type": "main",
"index": 0
}
]
]
},
"d71fbfb6-3e9b-427b-afe3-6fd77ff77ede": {
"main": [
[
{
"node": "40169b8e-5948-4422-98d9-4bca87ccab73",
"type": "main",
"index": 0
}
]
]
},
"0d18ebca-52f3-46ed-934c-44c9bad53dab": {
"main": [
[
{
"node": "d665823a-b40a-45a5-ac12-0a789c1b8ecd",
"type": "main",
"index": 0
}
]
]
},
"40169b8e-5948-4422-98d9-4bca87ccab73": {
"main": [
[
{
"node": "5d98fe9e-890c-4c9f-81c8-309cc23dc8af",
"type": "main",
"index": 0
}
]
]
},
"c12d6ae7-23ee-4f7a-9a33-7e43d1e475b9": {
"main": [
[
{
"node": "39127cf7-f627-4fca-b1b7-c51b3656947d",
"type": "main",
"index": 0
}
]
]
},
"98abaa2b-ddbc-4c04-830e-d7112a6a57e2": {
"main": [
[
{
"node": "71d974a6-4f60-4573-be09-7cbb09502fa3",
"type": "main",
"index": 0
}
]
]
},
"05174bb2-efd5-4de8-9e17-26c2a85eff06": {
"main": [
[
{
"node": "c5a8dd49-3a82-45c7-a139-b30b4cc21e05",
"type": "main",
"index": 0
}
]
]
},
"175a77cd-bd0a-4849-8c9b-d36b4ddcecd9": {
"main": [
[
{
"node": "bca3322d-bc2e-4932-a5f2-a2e9548a8aef",
"type": "main",
"index": 0
}
]
]
},
"2ab278f9-2904-4b6c-a2a3-6a703c0bb3ae": {
"main": [
[
{
"node": "15992fbe-4ee5-4630-a377-f1b8d21ebc1b",
"type": "main",
"index": 0
}
]
]
},
"0526a778-8d63-4dcc-9815-a002ffd70a7f": {
"main": [
[
{
"node": "5948d577-4aea-4394-9b20-687f44efe5c8",
"type": "main",
"index": 0
}
],
[
{
"node": "4260d45a-8705-483a-b17f-58211512ba59",
"type": "main",
"index": 0
}
]
]
},
"5d98fe9e-890c-4c9f-81c8-309cc23dc8af": {
"main": [
[
{
"node": "6578bcc5-b412-46bf-88d5-8b285372e9b9",
"type": "main",
"index": 0
}
]
]
},
"39127cf7-f627-4fca-b1b7-c51b3656947d": {
"main": [
[
{
"node": "0526a778-8d63-4dcc-9815-a002ffd70a7f",
"type": "main",
"index": 0
}
]
]
},
"c371c8db-e752-48fa-999d-4813aeb13f38": {
"ai_tool": [
[
{
"node": "05174bb2-efd5-4de8-9e17-26c2a85eff06",
"type": "ai_tool",
"index": 0
}
]
]
},
"0461df33-2d2f-42e2-a0d3-288bd78275f1": {
"ai_memory": [
[
{
"node": "05174bb2-efd5-4de8-9e17-26c2a85eff06",
"type": "ai_memory",
"index": 0
}
]
]
},
"f076a729-8f40-4a3b-ad32-83837964c42c": {
"main": [
[
{
"node": "d71fbfb6-3e9b-427b-afe3-6fd77ff77ede",
"type": "main",
"index": 0
}
]
]
},
"bca3322d-bc2e-4932-a5f2-a2e9548a8aef": {
"main": [
[
{
"node": "621aa928-83c5-48a4-8488-67c58fa1aec8",
"type": "main",
"index": 0
}
]
]
},
"6578bcc5-b412-46bf-88d5-8b285372e9b9": {
"main": [
[
{
"node": "9a5d11a2-0fc8-48a1-8fa0-c2f53fb49b54",
"type": "main",
"index": 0
}
],
[
{
"node": "98abaa2b-ddbc-4c04-830e-d7112a6a57e2",
"type": "main",
"index": 0
}
]
]
},
"d665823a-b40a-45a5-ac12-0a789c1b8ecd": {
"main": [
[
{
"node": "f076a729-8f40-4a3b-ad32-83837964c42c",
"type": "main",
"index": 0
}
]
]
},
"71d974a6-4f60-4573-be09-7cbb09502fa3": {
"main": [
[
{
"node": "25c7cbaf-7eb9-4e71-a488-b6d16242d324",
"type": "main",
"index": 0
}
]
]
},
"25c7cbaf-7eb9-4e71-a488-b6d16242d324": {
"main": [
[
{
"node": "6578bcc5-b412-46bf-88d5-8b285372e9b9",
"type": "main",
"index": 0
}
]
]
},
"6dc738b3-4ebe-4f5b-b8b3-decf9ce15e70": {
"ai_languageModel": [
[
{
"node": "05174bb2-efd5-4de8-9e17-26c2a85eff06",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"afdbed61-346e-44a6-aa69-23a2b7ecf553": {
"ai_languageModel": [
[
{
"node": "2ab278f9-2904-4b6c-a2a3-6a703c0bb3ae",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"5948d577-4aea-4394-9b20-687f44efe5c8": {
"main": [
[
{
"node": "f076a729-8f40-4a3b-ad32-83837964c42c",
"type": "main",
"index": 0
}
]
]
},
"16ef0fa6-4259-43bf-b74f-3dc70d4b54e3": {
"ai_outputParser": [
[
{
"node": "2ab278f9-2904-4b6c-a2a3-6a703c0bb3ae",
"type": "ai_outputParser",
"index": 0
}
]
]
},
"6fb7c3fa-7851-49cd-8d0b-01df74a80f35": {
"ai_tool": [
[
{
"node": "25c7cbaf-7eb9-4e71-a488-b6d16242d324",
"type": "ai_tool",
"index": 0
}
]
]
},
"fdcce6e6-f00f-4f84-ac6e-2e181452d3ac": {
"ai_tool": [
[
{
"node": "05174bb2-efd5-4de8-9e17-26c2a85eff06",
"type": "ai_tool",
"index": 0
}
]
]
}
}
}よくある質問
このワークフローの使い方は?
上記のJSON設定コードをコピーし、n8nインスタンスで新しいワークフローを作成して「JSONからインポート」を選択、設定を貼り付けて認証情報を必要に応じて変更してください。
このワークフローはどんな場面に適していますか?
上級 - 市場調査, マルチモーダルAI
有料ですか?
このワークフローは完全無料です。ただし、ワークフローで使用するサードパーティサービス(OpenAI APIなど)は別途料金が発生する場合があります。
関連ワークフロー
YouTube 動画に基づく自律ブログ公開
YouTube 動画から ChatGPT、Sheets、Apify、Pexels、WordPress を使用してブログの自主公開
If
Set
Code
+
If
Set
Code
80 ノードOriol Seguí
コンテンツ作成
n8nノードの探索(可視化リファレンスライブラリ内)
n8nノードを可視化リファレンスライブラリで探索
If
Ftp
Set
+
If
Ftp
Set
113 ノードI versus AI
その他
再生リスト詳細設定ボットコピー
Suno、GPT-4、Runway、Creatomate を使って AI 生成の YouTube ミュージックプレイリストを作成
If
Set
Code
+
If
Set
Code
203 ノードJoseph
コンテンツ作成
WordPressブログの自動化プロフェッショナル版(先端研究)v2.1マーケットプラグイン
GPT-4o、Perplexity AI、そして多言語対応を使ったSEO最適化ブログ作成の自動化
If
Set
Xml
+
If
Set
Xml
125 ノードDaniel Ng
コンテンツ作成
GPT-5 nanoとYoast SEOでWordPress SEO最適化を自動化
GPT-5 nanoとYoast SEOを使って、WordPressのSEO最適化を自動化
Set
Code
Gmail
+
Set
Code
Gmail
35 ノードOriol Seguí
その他
ディープラーニングエージェント
ディープリサーチアジェント - 研究の自動化とNotionレポートジェネレーター
Set
Code
Filter
+
Set
Code
Filter
43 ノードAziz B
市場調査