💡🌐 Crawler de sitio web multipágina basado en Jina.ai
Este es unAIflujo de automatización del dominio deautomatización que contiene 16 nodos.Utiliza principalmente nodos como Set, Xml, Code, Wait, Limit, combinando tecnología de inteligencia artificial para lograr automatización inteligente. 💡🌐 Usa las herramientas de rastreo de sitios web multipágina de Jina.ai
- •Credenciales de API de Google Drive
- •Pueden requerirse credenciales de autenticación para la API de destino
Nodos utilizados (16)
Categoría
{
"id": "xEij0kj2I1DHbL3I",
"meta": {
"instanceId": "31e69f7f4a77bf465b805824e303232f0227212ae922d12133a0f96ffeab4fef",
"templateCredsSetupCompleted": true
},
"name": "💡🌐 Essential Multipage Website Scraper with Jina.ai",
"tags": [],
"nodes": [
{
"id": "3a503859-ef0a-492d-81c6-37e4f0c4c25e",
"name": "Nota adhesiva",
"type": "n8n-nodes-base.stickyNote",
"position": [
-840,
0
],
"parameters": {
"color": 3,
"width": 340,
"height": 320,
"content": "## Jina.ai Web Scraper\n### No API Key Required\n"
},
"typeVersion": 1
},
{
"id": "c5217a1a-f074-409b-8340-72afdc5fc8b5",
"name": "Al hacer clic en 'Probar flujo de trabajo'",
"type": "n8n-nodes-base.manualTrigger",
"position": [
-1500,
-300
],
"parameters": {},
"typeVersion": 1
},
{
"id": "72af3b00-2632-4877-a0b6-7477e2f468f7",
"name": "Iterar sobre elementos",
"type": "n8n-nodes-base.splitInBatches",
"position": [
-1080,
20
],
"parameters": {
"options": {}
},
"typeVersion": 3
},
{
"id": "11f0fa02-51f8-41cc-b789-5c452b6899aa",
"name": "Esperar",
"type": "n8n-nodes-base.wait",
"position": [
80,
220
],
"webhookId": "081ce124-0cbf-4a21-a1e7-2c465f460448",
"parameters": {},
"typeVersion": 1.1
},
{
"id": "cf3b5887-8ff2-46e0-ab33-384ab0987cbb",
"name": "Límite",
"type": "n8n-nodes-base.limit",
"position": [
80,
-300
],
"parameters": {
"maxItems": 20
},
"typeVersion": 1
},
{
"id": "c4f04d82-aa33-46cf-a8e2-0b4e717e754a",
"name": "Obtener lista de URLs del sitio web",
"type": "n8n-nodes-base.httpRequest",
"position": [
-780,
-300
],
"parameters": {
"url": "={{ $json.sitemap_url }}",
"options": {}
},
"typeVersion": 4.2
},
{
"id": "7f507c38-1e9e-4c46-8dea-bd6daf65dc55",
"name": "Convertir a JSON",
"type": "n8n-nodes-base.xml",
"position": [
-560,
-300
],
"parameters": {
"options": {}
},
"typeVersion": 1
},
{
"id": "e21b55c2-8b0d-4c7c-ba91-a2d563a4c966",
"name": "Crear lista de URLs del sitio web",
"type": "n8n-nodes-base.splitOut",
"position": [
-340,
-300
],
"parameters": {
"options": {},
"fieldToSplitOut": "urlset.url"
},
"typeVersion": 1
},
{
"id": "61555239-8a16-424e-8a60-700f6ebaa270",
"name": "Filtrar por temas o páginas",
"type": "n8n-nodes-base.filter",
"position": [
-120,
-300
],
"parameters": {
"options": {},
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "or",
"conditions": [
{
"id": "d66c304d-879a-4dc4-908f-ab0665093672",
"operator": {
"name": "filter.operator.equals",
"type": "string",
"operation": "equals"
},
"leftValue": "={{ $json.loc }}",
"rightValue": "=https://ai.pydantic.dev/"
},
{
"id": "3c930950-bee4-442b-82e6-4437fd39a933",
"operator": {
"type": "string",
"operation": "contains"
},
"leftValue": "={{ $json.loc.toLowerCase() }}",
"rightValue": "agent"
},
{
"id": "aaeaf34e-ad5a-4673-b3bd-8bddf3500988",
"operator": {
"type": "string",
"operation": "contains"
},
"leftValue": "={{ $json.loc.toLowerCase() }}",
"rightValue": "tool"
}
]
}
},
"typeVersion": 2.2
},
{
"id": "dd25fb57-64a3-4c47-be04-6eb66d16520a",
"name": "Establecer URL del sitio web",
"type": "n8n-nodes-base.set",
"position": [
-1080,
-300
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "1601dc3e-8024-4e19-b592-93a4e4f77641",
"name": "sitemap_url",
"type": "string",
"value": "https://ai.pydantic.dev/sitemap.xml"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "14ac1c87-29fe-44c8-9c1e-f247a292dde5",
"name": "Jina.ai Web Scraper",
"type": "n8n-nodes-base.httpRequest",
"position": [
-720,
120
],
"parameters": {
"url": "=https://r.jina.ai/{{ $json.loc }}",
"options": {}
},
"typeVersion": 4.2
},
{
"id": "be253ec2-f088-4895-8ef2-61a3720cf68b",
"name": "Guardar contenidos web en Google Drive",
"type": "n8n-nodes-base.googleDrive",
"position": [
-120,
120
],
"parameters": {
"name": "={{ $('Loop Over Items').item.json.loc }} - {{ $json.title }}",
"content": "={{ $json.markdown }}",
"driveId": {
"__rl": true,
"mode": "list",
"value": "My Drive"
},
"options": {},
"folderId": {
"__rl": true,
"mode": "list",
"value": "root",
"cachedResultName": "/ (Root folder)"
},
"operation": "createFromText"
},
"credentials": {
"googleDriveOAuth2Api": {
"id": "UhdXGYLTAJbsa0xX",
"name": "Google Drive account"
}
},
"typeVersion": 3
},
{
"id": "95d808c7-a3ca-4f59-a385-cc77bdff322e",
"name": "Extraer título y contenido en Markdown",
"type": "n8n-nodes-base.code",
"position": [
-380,
120
],
"parameters": {
"jsCode": "// Get the text output from the previous node\nconst data = $input.first().json.data;\n\n// Regular expression to capture the title line\nconst titleRegex = /^Title:\\s*(.+)$/m;\n// Regular expression to capture everything after \"Markdown Content:\"\nconst markdownRegex = /Markdown Content:\\n([\\s\\S]+)/;\n\n// Extract the title using the first capture group\nconst titleMatch = data.match(titleRegex);\nconst title = titleMatch ? titleMatch[1].trim() : '';\n\n// Extract the markdown content using the first capture group\nconst markdownMatch = data.match(markdownRegex);\nconst markdown = markdownMatch ? markdownMatch[1].trim() : '';\n\n// Return a single object with title and markdown as unique values\nreturn { title, markdown };"
},
"typeVersion": 2
},
{
"id": "2fb86c81-c144-4450-908c-559855deadef",
"name": "Nota adhesiva1",
"type": "n8n-nodes-base.stickyNote",
"position": [
-1240,
-580
],
"parameters": {
"color": 7,
"width": 1540,
"height": 1080,
"content": "# 💡🌐 Essential Multipage Website Scraper with Jina.ai\n## Scrape entire websites with this workflow\n**Use responsibly and follow local rules and regulations**"
},
"typeVersion": 1
},
{
"id": "b470b294-95d0-4e51-a9cc-2fe17316a771",
"name": "Nota adhesiva2",
"type": "n8n-nodes-base.stickyNote",
"position": [
-1580,
-400
],
"parameters": {
"color": 4,
"width": 280,
"height": 300,
"content": "## 👍Try Me!"
},
"typeVersion": 1
},
{
"id": "fafd0623-a423-4e73-9609-cee8e81f5c13",
"name": "Nota adhesiva3",
"type": "n8n-nodes-base.stickyNote",
"position": [
-1180,
-400
],
"parameters": {
"width": 300,
"height": 300,
"content": "## 👇Add Website Sitemap URL"
},
"typeVersion": 1
}
],
"active": false,
"pinData": {},
"settings": {
"executionOrder": "v1"
},
"versionId": "2e815787-d83b-4ab7-a959-2f33006a37a5",
"connections": {
"11f0fa02-51f8-41cc-b789-5c452b6899aa": {
"main": [
[
{
"node": "72af3b00-2632-4877-a0b6-7477e2f468f7",
"type": "main",
"index": 0
}
]
]
},
"cf3b5887-8ff2-46e0-ab33-384ab0987cbb": {
"main": [
[
{
"node": "72af3b00-2632-4877-a0b6-7477e2f468f7",
"type": "main",
"index": 0
}
]
]
},
"7f507c38-1e9e-4c46-8dea-bd6daf65dc55": {
"main": [
[
{
"node": "e21b55c2-8b0d-4c7c-ba91-a2d563a4c966",
"type": "main",
"index": 0
}
]
]
},
"72af3b00-2632-4877-a0b6-7477e2f468f7": {
"main": [
[],
[
{
"node": "14ac1c87-29fe-44c8-9c1e-f247a292dde5",
"type": "main",
"index": 0
}
]
]
},
"dd25fb57-64a3-4c47-be04-6eb66d16520a": {
"main": [
[
{
"node": "c4f04d82-aa33-46cf-a8e2-0b4e717e754a",
"type": "main",
"index": 0
}
]
]
},
"14ac1c87-29fe-44c8-9c1e-f247a292dde5": {
"main": [
[
{
"node": "95d808c7-a3ca-4f59-a385-cc77bdff322e",
"type": "main",
"index": 0
}
]
]
},
"c4f04d82-aa33-46cf-a8e2-0b4e717e754a": {
"main": [
[
{
"node": "7f507c38-1e9e-4c46-8dea-bd6daf65dc55",
"type": "main",
"index": 0
}
]
]
},
"61555239-8a16-424e-8a60-700f6ebaa270": {
"main": [
[
{
"node": "cf3b5887-8ff2-46e0-ab33-384ab0987cbb",
"type": "main",
"index": 0
}
]
]
},
"e21b55c2-8b0d-4c7c-ba91-a2d563a4c966": {
"main": [
[
{
"node": "61555239-8a16-424e-8a60-700f6ebaa270",
"type": "main",
"index": 0
}
]
]
},
"95d808c7-a3ca-4f59-a385-cc77bdff322e": {
"main": [
[
{
"node": "be253ec2-f088-4895-8ef2-61a3720cf68b",
"type": "main",
"index": 0
}
]
]
},
"c5217a1a-f074-409b-8340-72afdc5fc8b5": {
"main": [
[
{
"node": "dd25fb57-64a3-4c47-be04-6eb66d16520a",
"type": "main",
"index": 0
}
]
]
},
"be253ec2-f088-4895-8ef2-61a3720cf68b": {
"main": [
[
{
"node": "11f0fa02-51f8-41cc-b789-5c452b6899aa",
"type": "main",
"index": 0
}
]
]
}
}
}¿Cómo usar este flujo de trabajo?
Copie el código de configuración JSON de arriba, cree un nuevo flujo de trabajo en su instancia de n8n y seleccione "Importar desde JSON", pegue la configuración y luego modifique la configuración de credenciales según sea necesario.
¿En qué escenarios es adecuado este flujo de trabajo?
Avanzado - Inteligencia Artificial
¿Es de pago?
Este flujo de trabajo es completamente gratuito, puede importarlo y usarlo directamente. Sin embargo, tenga en cuenta que los servicios de terceros utilizados en el flujo de trabajo (como la API de OpenAI) pueden requerir un pago por su cuenta.
Flujos de trabajo relacionados recomendados
Joseph LePage
@joeAs an AI Automation consultant based in Canada, I partner with forward-thinking organizations to implement AI solutions that streamline operations and drive growth.
Compartir este flujo de trabajo