RAG de documentos y agente de chat: Google Drive a Qdrant con Mistral OCR
Este es unInternal Wiki, AI RAGflujo de automatización del dominio deautomatización que contiene 40 nodos.Utiliza principalmente nodos como If, Set, Code, GoogleDrive, HttpRequest. RAG de documentos y chat agent: Google Drive a Qdrant con Mistral OCR
- •Credenciales de API de Google Drive
- •Pueden requerirse credenciales de autenticación para la API de destino
- •Clave de API de OpenAI
- •Información de conexión del servidor Qdrant
Nodos utilizados (40)
Categoría
{
"meta": {
"instanceId": "e4680277d6b9c8b80748f71c2c1d0f9a640576175738ea2675967f762eeaf9df",
"templateCredsSetupCompleted": true
},
"nodes": [
{
"id": "7935a827-516b-4e64-b370-f17d23c70857",
"name": "Embeddings OpenAI1",
"type": "@n8n/n8n-nodes-langchain.embeddingsOpenAi",
"position": [
1488,
432
],
"parameters": {
"options": {}
},
"credentials": {
"openAiApi": {
"id": "BEMsaCWtnyqTUtIt",
"name": "OpenAi account 8 dbt digi"
}
},
"typeVersion": 1.2
},
{
"id": "32429618-1bed-4134-953f-fa5f0eed6981",
"name": "OpenAI Chat Model1",
"type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
"position": [
528,
432
],
"parameters": {
"model": {
"__rl": true,
"mode": "list",
"value": "gpt-4.1-mini",
"cachedResultName": "gpt-4.1-mini"
},
"options": {
"temperature": 0.5
}
},
"credentials": {
"openAiApi": {
"id": "BEMsaCWtnyqTUtIt",
"name": "OpenAi account 8 dbt digi"
}
},
"typeVersion": 1.2
},
{
"id": "c770feb0-c01a-4e94-832a-3dafc88fb28f",
"name": "Búsqueda Web",
"type": "@n8n/n8n-nodes-langchain.toolHttpRequest",
"position": [
1200,
480
],
"parameters": {
"url": "https://api.tavily.com/search",
"method": "POST",
"jsonBody": "={\n \"query\": \"{query}\",\n \"topic\": \"general\",\n \"search_depth\": \"advanced\",\n \"max_results\": 20,\n \"include_answer\": true,\n \"include_raw_content\": false,\n \"include_images\": false,\n \"include_image_descriptions\": false,\n \"include_domains\": [],\n \"exclude_domains\": [\"\"]\n}",
"sendBody": true,
"sendHeaders": true,
"specifyBody": "json",
"toolDescription": "Web Search tool ",
"optimizeResponse": true,
"parametersHeaders": {
"values": [
{
"name": "Authorization",
"value": "add tavily token",
"valueProvider": "fieldValue"
},
{
"name": "Content-Type",
"value": "application/json",
"valueProvider": "fieldValue"
}
]
},
"placeholderDefinitions": {
"values": [
{
"name": "query",
"type": "string",
"description": "Search Querry"
}
]
}
},
"typeVersion": 1.1
},
{
"id": "5b31d3c8-3fe5-4cfd-bb09-81193e6d973f",
"name": "Memoria Simple1",
"type": "@n8n/n8n-nodes-langchain.memoryBufferWindow",
"position": [
688,
432
],
"parameters": {},
"typeVersion": 1.3
},
{
"id": "42ff3014-5b83-474f-a35c-5384828b49fd",
"name": "Almacén Vectorial Qdrant",
"type": "@n8n/n8n-nodes-langchain.vectorStoreQdrant",
"position": [
1488,
304
],
"parameters": {
"mode": "retrieve-as-tool",
"topK": 3,
"options": {},
"toolName": "add_name",
"toolDescription": "Use RAG to look up information in the knowledgebase.",
"qdrantCollection": {
"__rl": true,
"mode": "list",
"value": "docaiauto",
"cachedResultName": "docaiauto"
}
},
"credentials": {
"qdrantApi": {
"id": "4NinNhNX7VxfgZxs",
"name": "QdrantApi account 2"
}
},
"typeVersion": 1.1
},
{
"id": "c405636b-4957-4de1-9cd4-8733647af3a9",
"name": "Cuando se recibe mensaje de chat",
"type": "@n8n/n8n-nodes-langchain.chatTrigger",
"position": [
496,
112
],
"webhookId": "9c27865f-526c-490f-80a2-645bc919e9de",
"parameters": {
"public": true,
"options": {}
},
"typeVersion": 1.1
},
{
"id": "8f2950f4-0c96-4d70-9aa0-2729dd0b560e",
"name": "Cargador de Datos Predeterminado",
"type": "@n8n/n8n-nodes-langchain.documentDefaultDataLoader",
"position": [
3344,
1328
],
"parameters": {
"options": {
"metadata": {
"metadataValues": [
{
"name": "source",
"value": "={{ $('set all metadata').item.json[\"Document name\"] }}"
},
{
"name": "blobType",
"value": "application/jsonb"
},
{
"name": "loc",
"value": "={{ $('Mistral DOC OCR').item.json.usage_info }}"
},
{
"name": "source_metadata_id",
"value": "={{ $('Google Drive1').item.json.file_url }}"
},
{
"name": "department",
"value": "ai_automation"
},
{
"name": "PROJECT",
"value": "={{ $('set all metadata').item.json.PROJECT }}"
},
{
"name": "=DOCUMNENT_TYPE",
"value": "={{ $('set all metadata').item.json.DOCUMENT_TYPE }}"
},
{
"name": "ASSIGNED_TO",
"value": "={{ $('set all metadata').item.json.ASSIGNEDTO }}"
}
]
}
},
"jsonData": "={{ $json.content }}",
"jsonMode": "expressionData"
},
"typeVersion": 1
},
{
"id": "b008bae1-bc63-47da-8afa-b8a1b17ea412",
"name": "Separador de Texto por Caracteres",
"type": "@n8n/n8n-nodes-langchain.textSplitterCharacterTextSplitter",
"position": [
3408,
1440
],
"parameters": {},
"typeVersion": 1
},
{
"id": "fb42d3ab-5c20-4efc-920a-87dc96890cab",
"name": "Google Drive",
"type": "n8n-nodes-base.googleDrive",
"position": [
368,
1136
],
"parameters": {
"filter": {
"folderId": {
"__rl": true,
"mode": "list",
"value": "1C1zD1XefBltEAocX6kfHFbzQtzzAxo_E",
"cachedResultUrl": "https://drive.google.com/drive/folders/1C1zD1XefBltEAocX6kfHFbzQtzzAxo_E",
"cachedResultName": "knowledgebaseforaibot"
}
},
"options": {
"fields": [
"id",
"name",
"webViewLink",
"mimeType",
"*"
]
},
"resource": "fileFolder",
"searchMethod": "query"
},
"credentials": {
"googleDriveOAuth2Api": {
"id": "xS6kSuLaEkasxHtm",
"name": "Google Drive account 6 rn dbt"
}
},
"typeVersion": 3
},
{
"id": "393465a4-4b14-42ef-b2ca-608a161c6914",
"name": "Google Drive1",
"type": "n8n-nodes-base.googleDrive",
"position": [
1088,
1152
],
"parameters": {
"fileId": {
"__rl": true,
"mode": "url",
"value": "={{ $('add metadata').item.json.file_url }}"
},
"options": {},
"operation": "download"
},
"credentials": {
"googleDriveOAuth2Api": {
"id": "xS6kSuLaEkasxHtm",
"name": "Google Drive account 6 rn dbt"
}
},
"typeVersion": 3
},
{
"id": "3e38082e-4619-47cf-98e0-f7f66a8541c1",
"name": "Subida Mistral",
"type": "n8n-nodes-base.httpRequest",
"position": [
1328,
1152
],
"parameters": {
"url": "https://api.mistral.ai/v1/files",
"method": "POST",
"options": {},
"sendBody": true,
"contentType": "multipart-form-data",
"authentication": "predefinedCredentialType",
"bodyParameters": {
"parameters": [
{
"name": "purpose",
"value": "ocr"
},
{
"name": "file",
"parameterType": "formBinaryData",
"inputDataFieldName": "data"
}
]
},
"nodeCredentialType": "mistralCloudApi"
},
"credentials": {
"mistralCloudApi": {
"id": "k9FknrnRcIKYNX7T",
"name": "Mistral Cloud account 2 dbt rn"
}
},
"typeVersion": 4.2
},
{
"id": "255683ca-712a-4386-ae19-b3fbb5f37e30",
"name": "URL Firmada Mistral",
"type": "n8n-nodes-base.httpRequest",
"position": [
1552,
1152
],
"parameters": {
"url": "=https://api.mistral.ai/v1/files/{{ $json.id }}/url",
"options": {},
"sendQuery": true,
"sendHeaders": true,
"authentication": "predefinedCredentialType",
"queryParameters": {
"parameters": [
{
"name": "expiry",
"value": "24"
}
]
},
"headerParameters": {
"parameters": [
{
"name": "Accept",
"value": "application/json"
}
]
},
"nodeCredentialType": "mistralCloudApi"
},
"credentials": {
"mistralCloudApi": {
"id": "k9FknrnRcIKYNX7T",
"name": "Mistral Cloud account 2 dbt rn"
}
},
"typeVersion": 4.2
},
{
"id": "2e846e38-ae17-4717-a39b-b3d0e0c567f9",
"name": "OCR de Documento Mistral",
"type": "n8n-nodes-base.httpRequest",
"position": [
1776,
1152
],
"parameters": {
"url": "https://api.mistral.ai/v1/ocr",
"method": "POST",
"options": {},
"jsonBody": "={\n \"model\": \"mistral-ocr-latest\",\n \"document\": {\n \"type\": \"document_url\",\n \"document_url\": \"{{ $json.url }}\"\n },\n \"include_image_base64\": true\n}",
"sendBody": true,
"specifyBody": "json",
"authentication": "predefinedCredentialType",
"nodeCredentialType": "mistralCloudApi"
},
"credentials": {
"mistralCloudApi": {
"id": "k9FknrnRcIKYNX7T",
"name": "Mistral Cloud account 2 dbt rn"
}
},
"typeVersion": 4.2
},
{
"id": "a1c139ec-5ec2-4142-aacc-5df387b677fd",
"name": "Al hacer clic en 'Probar flujo'",
"type": "n8n-nodes-base.manualTrigger",
"position": [
96,
1136
],
"parameters": {},
"typeVersion": 1
},
{
"id": "6ceb4cbc-b4b0-4ac6-befe-a92649279bf4",
"name": "Embeddings OpenAI",
"type": "@n8n/n8n-nodes-langchain.embeddingsOpenAi",
"position": [
3168,
1456
],
"parameters": {
"model": "text-embedding-3-small",
"options": {}
},
"credentials": {
"openAiApi": {
"id": "BEMsaCWtnyqTUtIt",
"name": "OpenAi account 8 dbt digi"
}
},
"typeVersion": 1
},
{
"id": "ee2ce006-d94c-49a7-849f-8155c95ee118",
"name": "Almacén Vectorial Qdrant1",
"type": "@n8n/n8n-nodes-langchain.vectorStoreQdrant",
"position": [
3184,
1152
],
"parameters": {
"mode": "insert",
"options": {},
"qdrantCollection": {
"__rl": true,
"mode": "list",
"value": "docaiauto",
"cachedResultName": "docaiauto"
},
"embeddingBatchSize": "=200"
},
"credentials": {
"qdrantApi": {
"id": "hBirQvCk1VaV8cfQ",
"name": "QdrantApi account"
}
},
"typeVersion": 1.1
},
{
"id": "b4a1a6a6-6f2e-4ac6-8fba-f5752ac66259",
"name": "Modelo de Chat en la Nube Mistral",
"type": "@n8n/n8n-nodes-langchain.lmChatMistralCloud",
"position": [
2208,
1456
],
"parameters": {
"model": "mistral-small-latest",
"options": {}
},
"credentials": {
"mistralCloudApi": {
"id": "k9FknrnRcIKYNX7T",
"name": "Mistral Cloud account 2 dbt rn"
}
},
"typeVersion": 1
},
{
"id": "85a90fcf-b75c-4ecc-90e2-611c8db61073",
"name": "agente de chat de IA",
"type": "@n8n/n8n-nodes-langchain.agent",
"position": [
992,
112
],
"parameters": {
"options": {
"systemMessage": "=You are a helpful, intelligent AI Knowledge Bot and proactive chat agent. \nLead conversations by asking open-ended questions and offering suggestions. \nAnticipate needs, follow up thoughtfully, and make the user feel supported throughout.\n\n---\n\n## **Knowledge Access Workflow**\n\n1. **Primary Source: qdrant vector store (`add_table_name` table)** \n - For **every** user query, first search **only** in qdrant. \n - If relevant information is found:\n - Respond in a **clear, structured, and informative** manner. \n - **Always include multiple images** if available — aim for **maximum number of relevant images**, not just one. \n - Place each image **inline**, immediately after the sentence/section it illustrates. \n - Never group all images at the end.\n - Always **cite the internal document name** and **provide a clickable URL** to the source document.\n - Use `list doc` tool with `source_metadata_id` to retrieve the document URL.\n \n **Example Citation:** \n > This information is based on the internal AI documentation related to automation projects (Automation Timesheet Blogs). \n > [View Full Document](document_url_here)\n\n2. **If No Match Found:** \n - Reply: \n `\"The information you requested is not available in our internal documentation. Would you like me to do a Web search?\"`\n\n3. **Web Search Rules:** \n - Only search if user explicitly agrees (e.g., \"Yes\", \"Okay\"). \n - use 'web search 1'tool\n - Clearly label as **External Source** and provide clickable URLs. \n Example: \n > This information comes from an external source ([website_name](source_url)).\n\n4. **If No Data in Qdrant or Web:** \n - Reply: \n `\"At the moment, there is no information available to fully address this request based on current internal and external sources.\"`\n\n---\n\n## **Formatting & Style**\n- **Language:** All qdrant queries in English; all responses in English. \n- **Tone:** Professional, clear, structured, and helpful. \n- **Image Usage:** \n - Always include **all available relevant images** MARKDOWN — even if it means showing several per section. \n - Place images **exactly after the point they illustrate**. \n - Use different images for different subtopics, comparisons, or examples. \n - Do **not** skip images if they exist in the source.\n- **For Guide/Instruction Responses:** \n 1. **Step-by-Step Guide** (detailed instructions) \n 2. **Things to Consider** (common pitfalls, important details) \n 3. **Further Understanding** \n - Suggest 2 related topics the user might explore next, relevant to their query.\n- Never skip key details. Always check if more images can be added to improve clarity.\n\n---\n\n## **Core Principles**\n- Be proactive — never wait for the user to guide the entire conversation. \n- Ask open-ended follow-ups. \n- Anticipate related needs based on the query. \n- Maintain accuracy, cite sources, and **use as many relevant images as possible**. \n- ALWAYS SHOW IMAGE AND Keep images inline and contextually placed.\n",
"returnIntermediateSteps": false
}
},
"typeVersion": 1.7
},
{
"id": "3e71d64a-3a04-4f5e-b009-8eddbef94e8c",
"name": "Nota Adhesiva",
"type": "n8n-nodes-base.stickyNote",
"position": [
1008,
416
],
"parameters": {
"color": 5,
"width": 400,
"height": 208,
"content": "## WEB SEARCH using tavily (http node)\n [Tavily setup Guide](https://docs.tavily.com/welcome)"
},
"typeVersion": 1
},
{
"id": "3c335f41-3d05-4436-ad0f-ce6957967a0c",
"name": "Nota Adhesiva1",
"type": "n8n-nodes-base.stickyNote",
"position": [
320,
928
],
"parameters": {
"color": 5,
"width": 224,
"height": 400,
"content": "## GET ALL FILE DATA FROM SELECTED GOOGLE DRIVE FOLDER"
},
"typeVersion": 1
},
{
"id": "a404df40-0f40-46b5-a65b-edc182d06d9b",
"name": "Nota Adhesiva2",
"type": "n8n-nodes-base.stickyNote",
"position": [
1936,
1040
],
"parameters": {
"color": 5,
"width": 192,
"height": 256,
"content": "## Remove empty data fields "
},
"typeVersion": 1
},
{
"id": "4776e7ea-7243-474f-9ec0-47afc46d8479",
"name": "Nota Adhesiva3",
"type": "n8n-nodes-base.stickyNote",
"position": [
1024,
976
],
"parameters": {
"color": 5,
"width": 208,
"height": 320,
"content": "## GET individual files from selected gdrive"
},
"typeVersion": 1
},
{
"id": "3a593a87-2d91-48dc-85da-98a5bc562ff6",
"name": "Nota Adhesiva4",
"type": "n8n-nodes-base.stickyNote",
"position": [
1280,
1008
],
"parameters": {
"color": 5,
"width": 608,
"height": 272,
"content": "## MISTRAL OCR\n [OCR Guide](https://mistral.ai/news/mistral-ocr)\n1. UPLOAD FILE\n2. GET SIGNED URL\n3. GET EXTRACT DATA AFTER USING MISTRAL OCR"
},
"typeVersion": 1
},
{
"id": "65106e47-a303-4f46-be3b-207d6e6c04fa",
"name": "Nota Adhesiva5",
"type": "n8n-nodes-base.stickyNote",
"position": [
352,
16
],
"parameters": {
"color": 5,
"width": 368,
"height": 288,
"content": "## Hosted Chat interface \n"
},
"typeVersion": 1
},
{
"id": "78103cfe-e101-4d70-8dea-08b4ca2bbdb6",
"name": "Nota Adhesiva6",
"type": "n8n-nodes-base.stickyNote",
"position": [
1472,
96
],
"parameters": {
"color": 5,
"width": 272,
"height": 496,
"content": "## QDRANT VCETOR AND OPEN API EMBEDDING \n [QDRANT Guide](https://qdrant.tech/documentation/)"
},
"typeVersion": 1
},
{
"id": "716859d0-f3eb-431c-a6fc-37301508c968",
"name": "Nota Adhesiva7",
"type": "n8n-nodes-base.stickyNote",
"position": [
928,
0
],
"parameters": {
"color": 5,
"width": 432,
"height": 304,
"content": "## AI chat agent\n interact with user and process user input and provide appropriate response using different tools. "
},
"typeVersion": 1
},
{
"id": "aa7c86ee-3229-415e-96eb-92fbd530aa44",
"name": "Nota Adhesiva10",
"type": "n8n-nodes-base.stickyNote",
"position": [
2672,
1008
],
"parameters": {
"color": 5,
"width": 320,
"height": 288,
"content": "## clean all extracted data and convert them to smaller chunks"
},
"typeVersion": 1
},
{
"id": "ccd381e0-1cd2-406d-9aec-7b03c18ee435",
"name": "Nota Adhesiva11",
"type": "n8n-nodes-base.stickyNote",
"position": [
2176,
928
],
"parameters": {
"color": 5,
"width": 288,
"height": 368,
"content": "## assignment agent\n\nfor any given file this node assign which type documents it is ,which project its related too and who are working on it"
},
"typeVersion": 1
},
{
"id": "87f7a493-d65c-447b-a465-3175c1bcbea7",
"name": "Nota Adhesiva12",
"type": "n8n-nodes-base.stickyNote",
"position": [
3088,
976
],
"parameters": {
"color": 5,
"width": 512,
"height": 608,
"content": "## load all chunks into qdrant vector database"
},
"typeVersion": 1
},
{
"id": "66f3dfcf-9fbe-4d63-888e-dda7c422c282",
"name": "Nota Adhesiva9",
"type": "n8n-nodes-base.stickyNote",
"position": [
576,
992
],
"parameters": {
"color": 5,
"width": 256,
"height": 320,
"content": "## loop over google drive folder items"
},
"typeVersion": 1
},
{
"id": "d21fe18b-7f9b-46b5-a8b5-2725c849db7f",
"name": "Iterar sobre cada archivo en carpeta de gdrive",
"type": "n8n-nodes-base.splitInBatches",
"position": [
608,
1136
],
"parameters": {
"options": {}
},
"typeVersion": 3
},
{
"id": "4300ad47-a92a-408b-aa8b-b56249d341e9",
"name": "añadir metadatos",
"type": "n8n-nodes-base.set",
"position": [
896,
1152
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "10646eae-ae46-4327-a4dc-9987c2d76173",
"name": "file_id",
"type": "string",
"value": "={{ $json.id }}"
},
{
"id": "f4536df5-d0b1-4392-bf17-b8137fb31a44",
"name": "file_type",
"type": "string",
"value": "={{ $json.mimeType }}"
},
{
"id": "77d782de-169d-4a46-8a8e-a3831c04d90f",
"name": "file_title",
"type": "string",
"value": "={{ $json.name }}"
},
{
"id": "9bde4d7f-e4f3-4ebd-9338-dce1350f9eab",
"name": "file_url",
"type": "string",
"value": "={{ $json.webViewLink }}"
},
{
"id": "fae402c8-c486-4b57-8d28-bf669db6b442",
"name": "last_modified_date",
"type": "string",
"value": "={{ $json.modifiedTime }}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "efc3f354-0a6d-46dc-8ecf-793873e19466",
"name": "Nodo Si",
"type": "n8n-nodes-base.if",
"position": [
1984,
1152
],
"parameters": {
"options": {},
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "loose"
},
"combinator": "and",
"conditions": [
{
"id": "d15e917b-43d2-40b8-8b49-af467ff63961",
"operator": {
"type": "string",
"operation": "notExists",
"singleValue": true
},
"leftValue": "={{ $json.data[0].parseJson().skipped }}",
"rightValue": ""
}
]
},
"looseTypeValidation": true
},
"typeVersion": 2.2
},
{
"id": "33e574c3-47a2-454b-85e9-e3e9983e3d63",
"name": "asigna metadatos diferentes según nombre de archivo",
"type": "@n8n/n8n-nodes-langchain.informationExtractor",
"position": [
2208,
1152
],
"parameters": {
"text": "=\n{{ $json.pages[0].markdown }}",
"options": {
"systemPromptTemplate": "You are an expert extraction algorithm.\nOnly extract relevant information from the text.\ngive data is "
},
"attributes": {
"attributes": [
{
"name": "document_type",
"description": "type of document this can be blog, user documentation, technical documentation, manual/guide, educational"
},
{
"name": "project",
"description": " from give project this can any one ,social/digimarketing , chatbot, knowledge bot if nothing that its default"
},
{
"name": "assigned_to",
"description": "who are handling give projects mention all name given for a give project\n\nif project LINKED/DIGIMARRKETING/SOCIAL = [employee name]\nif project KB BOT = [employee name]\nif project CHAT BOT = [employee name]\nif project TIMESHEET = [employee name]"
}
]
}
},
"typeVersion": 1
},
{
"id": "c4b2a5cb-b849-4d7e-9f24-7ef67ee4031a",
"name": "establecer todos los metadatos",
"type": "n8n-nodes-base.set",
"position": [
2512,
1152
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "5132d92c-41da-4a55-ad79-0c329ca7e626",
"name": "Document name",
"type": "string",
"value": "={{ $('Google Drive1').item.json.file_title }}"
},
{
"id": "c8160701-2be7-43c6-bcfa-295fbebe0e23",
"name": "Document data",
"type": "string",
"value": "={{ $('If NODE').item.json.pages[0].markdown }}"
},
{
"id": "1087ab34-5643-4755-b545-cf34d0ae2cd2",
"name": " source",
"type": "string",
"value": "={{ $('Google Drive1').item.json.file_id }}"
},
{
"id": "4317aa31-d9fd-4adc-ab26-b48c208041b3",
"name": "ASSIGNEDTO",
"type": "string",
"value": "={{ $json.output.assigned_to.split(\",\") }}"
},
{
"id": "7a61d775-06b1-42e2-a82a-d8e756fa5586",
"name": "PROJECT",
"type": "string",
"value": "={{ $json.output.project }}"
},
{
"id": "300f49d6-8851-458d-a296-8ceb68ebdd5f",
"name": "DOCUMENT_TYPE",
"type": "string",
"value": "={{ $json.output.document_type }}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "2a4026cf-3034-49a8-8dc8-ad67ae571213",
"name": "limpiar salida",
"type": "n8n-nodes-base.code",
"position": [
2704,
1152
],
"parameters": {
"jsCode": "// Get incoming data (first item)\nconst data = items[0].json;\n\n// If wrapped in an array, unwrap it\nconst input = Array.isArray(data) ? data[0] : data;\n\nlet output = [\n {\n content: input[\"Document data\"], // main text for embedding\n metadata: {\n document_name: input[\"Document name\"],\n source_id: input[\" source\"], // note: there is a space before \"source\" in your key\n }\n }\n];\n\nreturn output.map(o => ({ json: o }));\n"
},
"typeVersion": 2
},
{
"id": "f9172d1d-060c-46f7-8e48-b235a2610185",
"name": "convertir datos en fragmentos más pequeños",
"type": "n8n-nodes-base.code",
"position": [
2864,
1152
],
"parameters": {
"jsCode": "const chunkSize = 1000; // characters\nconst chunkOverlap = 100; // characters\n\n\nlet newItems = [];\n\nfor (const item of items) {\n let text = item.json.content; // <-- use item.json\n if (!text || typeof text !== \"string\") continue;\n\n const sourceId = item.json.metadata.source_id;\n\n \n // Chunking\n for (let i = 0; i < updatedText.length; i += chunkSize - chunkOverlap) {\n let chunk = updatedText.slice(i, i + chunkSize);\n newItems.push({\n json: {\n content: chunk,\n metadata: item.json.metadata\n }\n });\n }\n}\n\nreturn newItems;\n"
},
"typeVersion": 2
},
{
"id": "2a9539e7-e8dc-4c2a-a016-daab8c7d5339",
"name": "Nota Adhesiva8",
"type": "n8n-nodes-base.stickyNote",
"position": [
3696,
736
],
"parameters": {
"color": 5,
"width": 800,
"height": 1248,
"content": "### **Node List & Descriptions**\n\n1. **Manual Trigger** \n - **What:** Kicks off the workflow when the user clicks **“Test workflow.”** \n - **Why:** Allows for on-demand execution and testing of the data pipeline. \n\n2. **Google Drive (List Files)** \n - **What:** Lists all files within a predefined Google Drive folder (`knowledgebaseforaibot`). \n - **Why:** To retrieve the initial list of all documents that need to be processed. \n\n3. **Loop Over Files** \n - **What:** Iterates through each file retrieved from Google Drive, processing them one by one. \n - **Why:** Ensures each document is handled individually in the following steps. \n\n4. **Google Drive (Download File)** \n - **What:** Downloads the binary content of the current file being processed in the loop. \n - **Why:** Provides the actual file data needed for OCR processing. \n\n5. **Mistral OCR (via HTTP Requests)** \n - **What:** \n - Uploads the file to the Mistral API. \n - Retrieves a temporary signed URL for access. \n - Calls the Mistral OCR endpoint to extract all text content from the document. \n - **Why:** Converts scanned or image-based documents (like PDFs, JPGs) into machine-readable text. \n\n6. **If Node (Check for Success)** \n - **What:** Verifies that the Mistral OCR operation completed successfully without errors. \n - **Why:** Ensures only valid, readable documents continue in the workflow, avoiding process failures. \n\n7. **Information Extractor (LangChain)** \n - **What:** Uses the Mistral AI chat model to analyze the extracted text and assign metadata such as: \n - `document_type` \n - `project` \n - `assigned_to` \n - **Why:** Automatically categorizes and tags documents based on their content for better organization and searchability. \n\n8. **Code Node (Clean & Chunk Data)** \n - **What:** \n - Formats the extracted text and metadata into a clean JSON structure. \n - Splits the document text into smaller, overlapping chunks (≈1000 characters each). \n - **Why:** Prepares data for embedding and ensures compatibility with language model context limits. \n\n9. **OpenAI Embeddings (LangChain)** \n - **What:** Converts each text chunk into a numerical vector using OpenAI’s `text-embedding-3-small` model. \n - **Why:** Encodes the semantic meaning of the text for similarity-based search and retrieval. \n\n10. **Qdrant Vector Store (LangChain)** \n - **What:** Inserts the text chunks and their corresponding embeddings into a Qdrant vector database collection named `docaiauto`. \n - **Why:** Creates a searchable, semantic knowledge base for AI-driven document retrieval. \n\n---\n\n### **flow of data**\n\n**Trigger → List Files (Google Drive) → Loop → Download File → OCR (Mistral) → Validate → Categorize (AI) → Clean & Chunk → Embed (OpenAI) → Store (Qdrant)**\n"
},
"typeVersion": 1
},
{
"id": "1dc0d13d-c75c-44dd-a94c-0b177cf6c094",
"name": "Nota Adhesiva13",
"type": "n8n-nodes-base.stickyNote",
"position": [
1808,
-320
],
"parameters": {
"color": 5,
"width": 960,
"height": 928,
"content": "### **Node List & Descriptions**\n\n1. **Chat Trigger** \n - **What:** Kicks off the workflow whenever a new message is received in the public chat interface. \n - **Why:** To make the workflow interactive and responsive to user input in real-time. \n\n2. **AI Chat Agent** \n - **What:** The central brain of the workflow. It orchestrates the entire process based on a detailed system prompt, managing memory and deciding which tools to use (internal knowledge base or web search). \n - **Why:** To act as an intelligent agent that can handle complex user queries by following a specific set of rules for information retrieval. \n\n3. **OpenAI Chat Model** \n - **What:** The underlying Large Language Model (`gpt-4.1-mini`) that powers the agent's reasoning, understanding, and response generation. \n - **Why:** Provides the core intelligence for understanding user intent and formulating human-like, context-aware answers. \n\n4. **Simple Memory** \n - **What:** Stores a history of the current conversation to provide context for follow-up questions. \n - **Why:** Allows the agent to remember what was previously discussed, leading to a more natural and coherent conversation. \n\n5. **Qdrant Vector Store (RAG Tool)** \n - **What:** \n - Acts as the primary tool for the agent. \n - Searches the internal knowledge base (`docaiauto` collection) for information relevant to the user's query. \n - **Why:** Ensures that the agent first attempts to answer questions using verified, internal documentation before seeking external information. \n\n6. **OpenAI Embeddings** \n - **What:** Works in conjunction with the Qdrant Vector Store to convert the user's text query into a numerical vector. \n - **Why:** Enables powerful semantic search, allowing the agent to find information based on meaning and context, not just keywords. \n\n7. **Web Search (Tavily Tool)** \n - **What:** Acts as a secondary tool for the agent, available only upon user confirmation. It performs an external web search using the Tavily API. \n - **Why:** Provides a fallback to access up-to-date, public information when the internal knowledge base does not contain the required answer. \n\n---\n\n### **Quick Reference: Flow Logic**\n\n**Chat Message → AI Agent → Use Tools (Qdrant or Web Search) → Generate Response (OpenAI) → Send Reply**\n"
},
"typeVersion": 1
},
{
"id": "83726285-3bfd-4d22-ad35-39a5ff3ef414",
"name": "Nota Adhesiva14",
"type": "n8n-nodes-base.stickyNote",
"position": [
-1232,
-176
],
"parameters": {
"color": 5,
"width": 864,
"height": 1856,
"content": "# Konowledge RAG and AI chat agent \n## by DIGITAL BIZ TECH\n\n## **Overview**\n\nThis workflow automates the process of transforming all files stored in Google Drive into a **searchable, intelligent knowledge base** — fully integrated with a **chat-enabled AI assistant**. \nIt also supports **web search augmentation**, allowing the agent to explain, compare, or enrich document insights using the latest online data.\n\n---\n\n## **Key Objectives**\n\n1. **Ingest and Process All Files**\n - Retrieve all files from a specific Google Drive folder.\n - Automatically extract their text content using OCR and AI-based parsing.\n - Generate structured metadata for each document (e.g., project, assignment, employee, and category).\n - Convert the text into **vector embeddings** and store them in a **Qdrant vector database** for semantic retrieval.\n\n2. **Enrich Metadata Intelligence**\n - Enhance each document with contextual metadata fields:\n - `project` — identifies which project the document belongs to.\n - `assignment` — links the file to a specific task or deliverable.\n - `employee` — indicates the team member responsible or related to the document.\n - Improves the **contextual accuracy** of RAG (Retrieval-Augmented Generation) queries by enabling more targeted retrieval.\n\n3. **Integrate with Chat-Based AI Agent**\n - The agent uses a **ai agent with different tools** to:\n - Query internal knowledge (Qdrant collection) for relevant chunks.\n - Optionally trigger a **web search (Tavily API)** when internal data is insufficient.\n - Combine results intelligently to deliver accurate and explainable answers.\n - Users can interact naturally through a chat interface to explore, summarize, or cross-reference different documents.\n\n4. **Maintain a Robust RAG Pipeline for Unstructured Data**\n - Designed to handle **unclean, inconsistent, or multi-format data sources** (PDFs, DOCX, images, etc.).\n - The cleaning and chunking logic ensures uniform embeddings, even from noisy input.\n - The RAG system automatically improves over time as new documents are added or updated in Google Drive.\n\n---\n\n## **Functional Flow**\n\n**Google Drive → File Extraction → Metadata Enrichment → Text Chunking → Embedding (OpenAI) → Storage (Qdrant) → AI Chat Agent → Web Search (Optional)**\n\n---\n\n## **Use Cases**\n\n- 📁 **Automated Knowledge Management:** Build a live, self-updating internal document knowledge base. \n- 🧠 **Smart AI Assistance:** Enable employees to query project or task documents conversationally. \n- 🌐 **Hybrid Knowledge Retrieval:** Combine private document insights with external web data. \n- 🧩 **Flexible Integration:** Can be connected to any folder or department for scalable RAG deployment.\n\n---\n\n## **Benefits**\n\n- Fully automated ingestion from Google Drive. \n- Rich, context-aware metadata for intelligent document relationships. \n- Chat interface for easy access to organizational knowledge. \n- Optional web search for real-time, external context expansion. \n- Maintains a clean, high-quality **RAG (Retrieval-Augmented Generation)** pipeline even from unclean data sources.\n\n---\n\n### **Quick Reference: Flow Logic**\n\n**Google Drive → OCR/Text Extraction → Metadata Enrichment → Embeddings → Qdrant (RAG Store) → Chat AI Agent → Optional Web Search**"
},
"typeVersion": 1
}
],
"pinData": {},
"connections": {
"efc3f354-0a6d-46dc-8ecf-793873e19466": {
"main": [
[
{
"node": "33e574c3-47a2-454b-85e9-e3e9983e3d63",
"type": "main",
"index": 0
}
],
[
{
"node": "d21fe18b-7f9b-46b5-a8b5-2725c849db7f",
"type": "main",
"index": 0
}
]
]
},
"c770feb0-c01a-4e94-832a-3dafc88fb28f": {
"ai_tool": [
[
{
"node": "85a90fcf-b75c-4ecc-90e2-611c8db61073",
"type": "ai_tool",
"index": 0
}
]
]
},
"fb42d3ab-5c20-4efc-920a-87dc96890cab": {
"main": [
[
{
"node": "d21fe18b-7f9b-46b5-a8b5-2725c849db7f",
"type": "main",
"index": 0
}
]
]
},
"4300ad47-a92a-408b-aa8b-b56249d341e9": {
"main": [
[
{
"node": "393465a4-4b14-42ef-b2ca-608a161c6914",
"type": "main",
"index": 0
}
]
]
},
"2a4026cf-3034-49a8-8dc8-ad67ae571213": {
"main": [
[
{
"node": "f9172d1d-060c-46f7-8e48-b235a2610185",
"type": "main",
"index": 0
}
]
]
},
"393465a4-4b14-42ef-b2ca-608a161c6914": {
"main": [
[
{
"node": "3e38082e-4619-47cf-98e0-f7f66a8541c1",
"type": "main",
"index": 0
}
]
]
},
"3e38082e-4619-47cf-98e0-f7f66a8541c1": {
"main": [
[
{
"node": "255683ca-712a-4386-ae19-b3fbb5f37e30",
"type": "main",
"index": 0
}
]
]
},
"5b31d3c8-3fe5-4cfd-bb09-81193e6d973f": {
"ai_memory": [
[
{
"node": "85a90fcf-b75c-4ecc-90e2-611c8db61073",
"type": "ai_memory",
"index": 0
}
]
]
},
"2e846e38-ae17-4717-a39b-b3d0e0c567f9": {
"main": [
[
{
"node": "efc3f354-0a6d-46dc-8ecf-793873e19466",
"type": "main",
"index": 0
}
]
]
},
"c4b2a5cb-b849-4d7e-9f24-7ef67ee4031a": {
"main": [
[
{
"node": "2a4026cf-3034-49a8-8dc8-ad67ae571213",
"type": "main",
"index": 0
}
]
]
},
"6ceb4cbc-b4b0-4ac6-befe-a92649279bf4": {
"ai_embedding": [
[
{
"node": "ee2ce006-d94c-49a7-849f-8155c95ee118",
"type": "ai_embedding",
"index": 0
}
]
]
},
"7935a827-516b-4e64-b370-f17d23c70857": {
"ai_embedding": [
[
{
"node": "42ff3014-5b83-474f-a35c-5384828b49fd",
"type": "ai_embedding",
"index": 0
}
]
]
},
"255683ca-712a-4386-ae19-b3fbb5f37e30": {
"main": [
[
{
"node": "2e846e38-ae17-4717-a39b-b3d0e0c567f9",
"type": "main",
"index": 0
}
]
]
},
"32429618-1bed-4134-953f-fa5f0eed6981": {
"ai_languageModel": [
[
{
"node": "85a90fcf-b75c-4ecc-90e2-611c8db61073",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"8f2950f4-0c96-4d70-9aa0-2729dd0b560e": {
"ai_document": [
[
{
"node": "ee2ce006-d94c-49a7-849f-8155c95ee118",
"type": "ai_document",
"index": 0
}
]
]
},
"42ff3014-5b83-474f-a35c-5384828b49fd": {
"ai_tool": [
[
{
"node": "85a90fcf-b75c-4ecc-90e2-611c8db61073",
"type": "ai_tool",
"index": 0
}
]
]
},
"ee2ce006-d94c-49a7-849f-8155c95ee118": {
"main": [
[
{
"node": "d21fe18b-7f9b-46b5-a8b5-2725c849db7f",
"type": "main",
"index": 0
}
]
]
},
"b008bae1-bc63-47da-8afa-b8a1b17ea412": {
"ai_textSplitter": [
[
{
"node": "8f2950f4-0c96-4d70-9aa0-2729dd0b560e",
"type": "ai_textSplitter",
"index": 0
}
]
]
},
"b4a1a6a6-6f2e-4ac6-8fba-f5752ac66259": {
"ai_languageModel": [
[
{
"node": "33e574c3-47a2-454b-85e9-e3e9983e3d63",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"c405636b-4957-4de1-9cd4-8733647af3a9": {
"main": [
[
{
"node": "85a90fcf-b75c-4ecc-90e2-611c8db61073",
"type": "main",
"index": 0
}
]
]
},
"f9172d1d-060c-46f7-8e48-b235a2610185": {
"main": [
[
{
"node": "ee2ce006-d94c-49a7-849f-8155c95ee118",
"type": "main",
"index": 0
}
]
]
},
"a1c139ec-5ec2-4142-aacc-5df387b677fd": {
"main": [
[
{
"node": "fb42d3ab-5c20-4efc-920a-87dc96890cab",
"type": "main",
"index": 0
}
]
]
},
"d21fe18b-7f9b-46b5-a8b5-2725c849db7f": {
"main": [
[],
[
{
"node": "4300ad47-a92a-408b-aa8b-b56249d341e9",
"type": "main",
"index": 0
}
]
]
},
"33e574c3-47a2-454b-85e9-e3e9983e3d63": {
"main": [
[
{
"node": "c4b2a5cb-b849-4d7e-9f24-7ef67ee4031a",
"type": "main",
"index": 0
}
]
]
}
}
}¿Cómo usar este flujo de trabajo?
Copie el código de configuración JSON de arriba, cree un nuevo flujo de trabajo en su instancia de n8n y seleccione "Importar desde JSON", pegue la configuración y luego modifique la configuración de credenciales según sea necesario.
¿En qué escenarios es adecuado este flujo de trabajo?
Avanzado - Wiki interno, RAG de IA
¿Es de pago?
Este flujo de trabajo es completamente gratuito, puede importarlo y usarlo directamente. Sin embargo, tenga en cuenta que los servicios de terceros utilizados en el flujo de trabajo (como la API de OpenAI) pueden requerir un pago por su cuenta.
Flujos de trabajo relacionados recomendados
DIGITAL BIZ TECH
@dbtCompartir este flujo de trabajo