ドキュメントRAGとチャットアジェント:Google DriveからQdrantへ、Mistral OCR
上級
これはInternal Wiki, AI RAG分野の自動化ワークフローで、40個のノードを含みます。主にIf, Set, Code, GoogleDrive, HttpRequestなどのノードを使用。 ドキュメントRAGチャットエージェント:Google Drive→QdrantとMistral OCR
前提条件
- •Google Drive API認証情報
- •ターゲットAPIの認証情報が必要な場合あり
- •OpenAI API Key
- •Qdrantサーバー接続情報
使用ノード (40)
カテゴリー
ワークフロープレビュー
ノード接続関係を可視化、ズームとパンをサポート
ワークフローをエクスポート
以下のJSON設定をn8nにインポートして、このワークフローを使用できます
{
"meta": {
"instanceId": "e4680277d6b9c8b80748f71c2c1d0f9a640576175738ea2675967f762eeaf9df",
"templateCredsSetupCompleted": true
},
"nodes": [
{
"id": "7935a827-516b-4e64-b370-f17d23c70857",
"name": "OpenAI 埋め込み1",
"type": "@n8n/n8n-nodes-langchain.embeddingsOpenAi",
"position": [
1488,
432
],
"parameters": {
"options": {}
},
"credentials": {
"openAiApi": {
"id": "BEMsaCWtnyqTUtIt",
"name": "OpenAi account 8 dbt digi"
}
},
"typeVersion": 1.2
},
{
"id": "32429618-1bed-4134-953f-fa5f0eed6981",
"name": "OpenAI チャットモデル1",
"type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
"position": [
528,
432
],
"parameters": {
"model": {
"__rl": true,
"mode": "list",
"value": "gpt-4.1-mini",
"cachedResultName": "gpt-4.1-mini"
},
"options": {
"temperature": 0.5
}
},
"credentials": {
"openAiApi": {
"id": "BEMsaCWtnyqTUtIt",
"name": "OpenAi account 8 dbt digi"
}
},
"typeVersion": 1.2
},
{
"id": "c770feb0-c01a-4e94-832a-3dafc88fb28f",
"name": "Web Search",
"type": "@n8n/n8n-nodes-langchain.toolHttpRequest",
"position": [
1200,
480
],
"parameters": {
"url": "https://api.tavily.com/search",
"method": "POST",
"jsonBody": "={\n \"query\": \"{query}\",\n \"topic\": \"general\",\n \"search_depth\": \"advanced\",\n \"max_results\": 20,\n \"include_answer\": true,\n \"include_raw_content\": false,\n \"include_images\": false,\n \"include_image_descriptions\": false,\n \"include_domains\": [],\n \"exclude_domains\": [\"\"]\n}",
"sendBody": true,
"sendHeaders": true,
"specifyBody": "json",
"toolDescription": "Web Search tool ",
"optimizeResponse": true,
"parametersHeaders": {
"values": [
{
"name": "Authorization",
"value": "add tavily token",
"valueProvider": "fieldValue"
},
{
"name": "Content-Type",
"value": "application/json",
"valueProvider": "fieldValue"
}
]
},
"placeholderDefinitions": {
"values": [
{
"name": "query",
"type": "string",
"description": "Search Querry"
}
]
}
},
"typeVersion": 1.1
},
{
"id": "5b31d3c8-3fe5-4cfd-bb09-81193e6d973f",
"name": "シンプルメモリ1",
"type": "@n8n/n8n-nodes-langchain.memoryBufferWindow",
"position": [
688,
432
],
"parameters": {},
"typeVersion": 1.3
},
{
"id": "42ff3014-5b83-474f-a35c-5384828b49fd",
"name": "Qdrant ベクトルストア",
"type": "@n8n/n8n-nodes-langchain.vectorStoreQdrant",
"position": [
1488,
304
],
"parameters": {
"mode": "retrieve-as-tool",
"topK": 3,
"options": {},
"toolName": "add_name",
"toolDescription": "Use RAG to look up information in the knowledgebase.",
"qdrantCollection": {
"__rl": true,
"mode": "list",
"value": "docaiauto",
"cachedResultName": "docaiauto"
}
},
"credentials": {
"qdrantApi": {
"id": "4NinNhNX7VxfgZxs",
"name": "QdrantApi account 2"
}
},
"typeVersion": 1.1
},
{
"id": "c405636b-4957-4de1-9cd4-8733647af3a9",
"name": "チャットメッセージ受信時",
"type": "@n8n/n8n-nodes-langchain.chatTrigger",
"position": [
496,
112
],
"webhookId": "9c27865f-526c-490f-80a2-645bc919e9de",
"parameters": {
"public": true,
"options": {}
},
"typeVersion": 1.1
},
{
"id": "8f2950f4-0c96-4d70-9aa0-2729dd0b560e",
"name": "Default Data Loader",
"type": "@n8n/n8n-nodes-langchain.documentDefaultDataLoader",
"position": [
3344,
1328
],
"parameters": {
"options": {
"metadata": {
"metadataValues": [
{
"name": "source",
"value": "={{ $('set all metadata').item.json[\"Document name\"] }}"
},
{
"name": "blobType",
"value": "application/jsonb"
},
{
"name": "loc",
"value": "={{ $('Mistral DOC OCR').item.json.usage_info }}"
},
{
"name": "source_metadata_id",
"value": "={{ $('Google Drive1').item.json.file_url }}"
},
{
"name": "department",
"value": "ai_automation"
},
{
"name": "PROJECT",
"value": "={{ $('set all metadata').item.json.PROJECT }}"
},
{
"name": "=DOCUMNENT_TYPE",
"value": "={{ $('set all metadata').item.json.DOCUMENT_TYPE }}"
},
{
"name": "ASSIGNED_TO",
"value": "={{ $('set all metadata').item.json.ASSIGNEDTO }}"
}
]
}
},
"jsonData": "={{ $json.content }}",
"jsonMode": "expressionData"
},
"typeVersion": 1
},
{
"id": "b008bae1-bc63-47da-8afa-b8a1b17ea412",
"name": "Character Text Splitter",
"type": "@n8n/n8n-nodes-langchain.textSplitterCharacterTextSplitter",
"position": [
3408,
1440
],
"parameters": {},
"typeVersion": 1
},
{
"id": "fb42d3ab-5c20-4efc-920a-87dc96890cab",
"name": "Google ドライブ",
"type": "n8n-nodes-base.googleDrive",
"position": [
368,
1136
],
"parameters": {
"filter": {
"folderId": {
"__rl": true,
"mode": "list",
"value": "1C1zD1XefBltEAocX6kfHFbzQtzzAxo_E",
"cachedResultUrl": "https://drive.google.com/drive/folders/1C1zD1XefBltEAocX6kfHFbzQtzzAxo_E",
"cachedResultName": "knowledgebaseforaibot"
}
},
"options": {
"fields": [
"id",
"name",
"webViewLink",
"mimeType",
"*"
]
},
"resource": "fileFolder",
"searchMethod": "query"
},
"credentials": {
"googleDriveOAuth2Api": {
"id": "xS6kSuLaEkasxHtm",
"name": "Google Drive account 6 rn dbt"
}
},
"typeVersion": 3
},
{
"id": "393465a4-4b14-42ef-b2ca-608a161c6914",
"name": "Google ドライブ1",
"type": "n8n-nodes-base.googleDrive",
"position": [
1088,
1152
],
"parameters": {
"fileId": {
"__rl": true,
"mode": "url",
"value": "={{ $('add metadata').item.json.file_url }}"
},
"options": {},
"operation": "download"
},
"credentials": {
"googleDriveOAuth2Api": {
"id": "xS6kSuLaEkasxHtm",
"name": "Google Drive account 6 rn dbt"
}
},
"typeVersion": 3
},
{
"id": "3e38082e-4619-47cf-98e0-f7f66a8541c1",
"name": "Mistral Upload",
"type": "n8n-nodes-base.httpRequest",
"position": [
1328,
1152
],
"parameters": {
"url": "https://api.mistral.ai/v1/files",
"method": "POST",
"options": {},
"sendBody": true,
"contentType": "multipart-form-data",
"authentication": "predefinedCredentialType",
"bodyParameters": {
"parameters": [
{
"name": "purpose",
"value": "ocr"
},
{
"name": "file",
"parameterType": "formBinaryData",
"inputDataFieldName": "data"
}
]
},
"nodeCredentialType": "mistralCloudApi"
},
"credentials": {
"mistralCloudApi": {
"id": "k9FknrnRcIKYNX7T",
"name": "Mistral Cloud account 2 dbt rn"
}
},
"typeVersion": 4.2
},
{
"id": "255683ca-712a-4386-ae19-b3fbb5f37e30",
"name": "Mistral Signed URL",
"type": "n8n-nodes-base.httpRequest",
"position": [
1552,
1152
],
"parameters": {
"url": "=https://api.mistral.ai/v1/files/{{ $json.id }}/url",
"options": {},
"sendQuery": true,
"sendHeaders": true,
"authentication": "predefinedCredentialType",
"queryParameters": {
"parameters": [
{
"name": "expiry",
"value": "24"
}
]
},
"headerParameters": {
"parameters": [
{
"name": "Accept",
"value": "application/json"
}
]
},
"nodeCredentialType": "mistralCloudApi"
},
"credentials": {
"mistralCloudApi": {
"id": "k9FknrnRcIKYNX7T",
"name": "Mistral Cloud account 2 dbt rn"
}
},
"typeVersion": 4.2
},
{
"id": "2e846e38-ae17-4717-a39b-b3d0e0c567f9",
"name": "Mistral DOC OCR",
"type": "n8n-nodes-base.httpRequest",
"position": [
1776,
1152
],
"parameters": {
"url": "https://api.mistral.ai/v1/ocr",
"method": "POST",
"options": {},
"jsonBody": "={\n \"model\": \"mistral-ocr-latest\",\n \"document\": {\n \"type\": \"document_url\",\n \"document_url\": \"{{ $json.url }}\"\n },\n \"include_image_base64\": true\n}",
"sendBody": true,
"specifyBody": "json",
"authentication": "predefinedCredentialType",
"nodeCredentialType": "mistralCloudApi"
},
"credentials": {
"mistralCloudApi": {
"id": "k9FknrnRcIKYNX7T",
"name": "Mistral Cloud account 2 dbt rn"
}
},
"typeVersion": 4.2
},
{
"id": "a1c139ec-5ec2-4142-aacc-5df387b677fd",
"name": "クリック時 ‘Test workflow’",
"type": "n8n-nodes-base.manualTrigger",
"position": [
96,
1136
],
"parameters": {},
"typeVersion": 1
},
{
"id": "6ceb4cbc-b4b0-4ac6-befe-a92649279bf4",
"name": "OpenAI 埋め込み",
"type": "@n8n/n8n-nodes-langchain.embeddingsOpenAi",
"position": [
3168,
1456
],
"parameters": {
"model": "text-embedding-3-small",
"options": {}
},
"credentials": {
"openAiApi": {
"id": "BEMsaCWtnyqTUtIt",
"name": "OpenAi account 8 dbt digi"
}
},
"typeVersion": 1
},
{
"id": "ee2ce006-d94c-49a7-849f-8155c95ee118",
"name": "Qdrant ベクトルストア1",
"type": "@n8n/n8n-nodes-langchain.vectorStoreQdrant",
"position": [
3184,
1152
],
"parameters": {
"mode": "insert",
"options": {},
"qdrantCollection": {
"__rl": true,
"mode": "list",
"value": "docaiauto",
"cachedResultName": "docaiauto"
},
"embeddingBatchSize": "=200"
},
"credentials": {
"qdrantApi": {
"id": "hBirQvCk1VaV8cfQ",
"name": "QdrantApi account"
}
},
"typeVersion": 1.1
},
{
"id": "b4a1a6a6-6f2e-4ac6-8fba-f5752ac66259",
"name": "Mistral Cloud Chat Model",
"type": "@n8n/n8n-nodes-langchain.lmChatMistralCloud",
"position": [
2208,
1456
],
"parameters": {
"model": "mistral-small-latest",
"options": {}
},
"credentials": {
"mistralCloudApi": {
"id": "k9FknrnRcIKYNX7T",
"name": "Mistral Cloud account 2 dbt rn"
}
},
"typeVersion": 1
},
{
"id": "85a90fcf-b75c-4ecc-90e2-611c8db61073",
"name": "ai chat agent",
"type": "@n8n/n8n-nodes-langchain.agent",
"position": [
992,
112
],
"parameters": {
"options": {
"systemMessage": "=You are a helpful, intelligent AI Knowledge Bot and proactive chat agent. \nLead conversations by asking open-ended questions and offering suggestions. \nAnticipate needs, follow up thoughtfully, and make the user feel supported throughout.\n\n---\n\n## **Knowledge Access Workflow**\n\n1. **Primary Source: qdrant vector store (`add_table_name` table)** \n - For **every** user query, first search **only** in qdrant. \n - If relevant information is found:\n - Respond in a **clear, structured, and informative** manner. \n - **Always include multiple images** if available — aim for **maximum number of relevant images**, not just one. \n - Place each image **inline**, immediately after the sentence/section it illustrates. \n - Never group all images at the end.\n - Always **cite the internal document name** and **provide a clickable URL** to the source document.\n - Use `list doc` tool with `source_metadata_id` to retrieve the document URL.\n \n **Example Citation:** \n > This information is based on the internal AI documentation related to automation projects (Automation Timesheet Blogs). \n > [View Full Document](document_url_here)\n\n2. **If No Match Found:** \n - Reply: \n `\"The information you requested is not available in our internal documentation. Would you like me to do a Web search?\"`\n\n3. **Web Search Rules:** \n - Only search if user explicitly agrees (e.g., \"Yes\", \"Okay\"). \n - use 'web search 1'tool\n - Clearly label as **External Source** and provide clickable URLs. \n Example: \n > This information comes from an external source ([website_name](source_url)).\n\n4. **If No Data in Qdrant or Web:** \n - Reply: \n `\"At the moment, there is no information available to fully address this request based on current internal and external sources.\"`\n\n---\n\n## **Formatting & Style**\n- **Language:** All qdrant queries in English; all responses in English. \n- **Tone:** Professional, clear, structured, and helpful. \n- **Image Usage:** \n - Always include **all available relevant images** MARKDOWN — even if it means showing several per section. \n - Place images **exactly after the point they illustrate**. \n - Use different images for different subtopics, comparisons, or examples. \n - Do **not** skip images if they exist in the source.\n- **For Guide/Instruction Responses:** \n 1. **Step-by-Step Guide** (detailed instructions) \n 2. **Things to Consider** (common pitfalls, important details) \n 3. **Further Understanding** \n - Suggest 2 related topics the user might explore next, relevant to their query.\n- Never skip key details. Always check if more images can be added to improve clarity.\n\n---\n\n## **Core Principles**\n- Be proactive — never wait for the user to guide the entire conversation. \n- Ask open-ended follow-ups. \n- Anticipate related needs based on the query. \n- Maintain accuracy, cite sources, and **use as many relevant images as possible**. \n- ALWAYS SHOW IMAGE AND Keep images inline and contextually placed.\n",
"returnIntermediateSteps": false
}
},
"typeVersion": 1.7
},
{
"id": "3e71d64a-3a04-4f5e-b009-8eddbef94e8c",
"name": "付箋",
"type": "n8n-nodes-base.stickyNote",
"position": [
1008,
416
],
"parameters": {
"color": 5,
"width": 400,
"height": 208,
"content": "## WEB SEARCH using tavily (http node)\n [Tavily setup Guide](https://docs.tavily.com/welcome)"
},
"typeVersion": 1
},
{
"id": "3c335f41-3d05-4436-ad0f-ce6957967a0c",
"name": "付箋1",
"type": "n8n-nodes-base.stickyNote",
"position": [
320,
928
],
"parameters": {
"color": 5,
"width": 224,
"height": 400,
"content": "## GET ALL FILE DATA FROM SELECTED GOOGLE DRIVE FOLDER"
},
"typeVersion": 1
},
{
"id": "a404df40-0f40-46b5-a65b-edc182d06d9b",
"name": "付箋2",
"type": "n8n-nodes-base.stickyNote",
"position": [
1936,
1040
],
"parameters": {
"color": 5,
"width": 192,
"height": 256,
"content": "## Remove empty data fields "
},
"typeVersion": 1
},
{
"id": "4776e7ea-7243-474f-9ec0-47afc46d8479",
"name": "付箋3",
"type": "n8n-nodes-base.stickyNote",
"position": [
1024,
976
],
"parameters": {
"color": 5,
"width": 208,
"height": 320,
"content": "## GET individual files from selected gdrive"
},
"typeVersion": 1
},
{
"id": "3a593a87-2d91-48dc-85da-98a5bc562ff6",
"name": "付箋4",
"type": "n8n-nodes-base.stickyNote",
"position": [
1280,
1008
],
"parameters": {
"color": 5,
"width": 608,
"height": 272,
"content": "## MISTRAL OCR\n [OCR Guide](https://mistral.ai/news/mistral-ocr)\n1. UPLOAD FILE\n2. GET SIGNED URL\n3. GET EXTRACT DATA AFTER USING MISTRAL OCR"
},
"typeVersion": 1
},
{
"id": "65106e47-a303-4f46-be3b-207d6e6c04fa",
"name": "付箋5",
"type": "n8n-nodes-base.stickyNote",
"position": [
352,
16
],
"parameters": {
"color": 5,
"width": 368,
"height": 288,
"content": "## Hosted Chat interface \n"
},
"typeVersion": 1
},
{
"id": "78103cfe-e101-4d70-8dea-08b4ca2bbdb6",
"name": "付箋6",
"type": "n8n-nodes-base.stickyNote",
"position": [
1472,
96
],
"parameters": {
"color": 5,
"width": 272,
"height": 496,
"content": "## QDRANT VCETOR AND OPEN API EMBEDDING \n [QDRANT Guide](https://qdrant.tech/documentation/)"
},
"typeVersion": 1
},
{
"id": "716859d0-f3eb-431c-a6fc-37301508c968",
"name": "付箋7",
"type": "n8n-nodes-base.stickyNote",
"position": [
928,
0
],
"parameters": {
"color": 5,
"width": 432,
"height": 304,
"content": "## AI chat agent\n interact with user and process user input and provide appropriate response using different tools. "
},
"typeVersion": 1
},
{
"id": "aa7c86ee-3229-415e-96eb-92fbd530aa44",
"name": "付箋10",
"type": "n8n-nodes-base.stickyNote",
"position": [
2672,
1008
],
"parameters": {
"color": 5,
"width": 320,
"height": 288,
"content": "## clean all extracted data and convert them to smaller chunks"
},
"typeVersion": 1
},
{
"id": "ccd381e0-1cd2-406d-9aec-7b03c18ee435",
"name": "付箋11",
"type": "n8n-nodes-base.stickyNote",
"position": [
2176,
928
],
"parameters": {
"color": 5,
"width": 288,
"height": 368,
"content": "## assignment agent\n\nfor any given file this node assign which type documents it is ,which project its related too and who are working on it"
},
"typeVersion": 1
},
{
"id": "87f7a493-d65c-447b-a465-3175c1bcbea7",
"name": "付箋12",
"type": "n8n-nodes-base.stickyNote",
"position": [
3088,
976
],
"parameters": {
"color": 5,
"width": 512,
"height": 608,
"content": "## load all chunks into qdrant vector database"
},
"typeVersion": 1
},
{
"id": "66f3dfcf-9fbe-4d63-888e-dda7c422c282",
"name": "付箋9",
"type": "n8n-nodes-base.stickyNote",
"position": [
576,
992
],
"parameters": {
"color": 5,
"width": 256,
"height": 320,
"content": "## loop over google drive folder items"
},
"typeVersion": 1
},
{
"id": "d21fe18b-7f9b-46b5-a8b5-2725c849db7f",
"name": "Loop Over each file in gdrive folder",
"type": "n8n-nodes-base.splitInBatches",
"position": [
608,
1136
],
"parameters": {
"options": {}
},
"typeVersion": 3
},
{
"id": "4300ad47-a92a-408b-aa8b-b56249d341e9",
"name": "add metadata",
"type": "n8n-nodes-base.set",
"position": [
896,
1152
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "10646eae-ae46-4327-a4dc-9987c2d76173",
"name": "file_id",
"type": "string",
"value": "={{ $json.id }}"
},
{
"id": "f4536df5-d0b1-4392-bf17-b8137fb31a44",
"name": "file_type",
"type": "string",
"value": "={{ $json.mimeType }}"
},
{
"id": "77d782de-169d-4a46-8a8e-a3831c04d90f",
"name": "file_title",
"type": "string",
"value": "={{ $json.name }}"
},
{
"id": "9bde4d7f-e4f3-4ebd-9338-dce1350f9eab",
"name": "file_url",
"type": "string",
"value": "={{ $json.webViewLink }}"
},
{
"id": "fae402c8-c486-4b57-8d28-bf669db6b442",
"name": "last_modified_date",
"type": "string",
"value": "={{ $json.modifiedTime }}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "efc3f354-0a6d-46dc-8ecf-793873e19466",
"name": "If NODE",
"type": "n8n-nodes-base.if",
"position": [
1984,
1152
],
"parameters": {
"options": {},
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "loose"
},
"combinator": "and",
"conditions": [
{
"id": "d15e917b-43d2-40b8-8b49-af467ff63961",
"operator": {
"type": "string",
"operation": "notExists",
"singleValue": true
},
"leftValue": "={{ $json.data[0].parseJson().skipped }}",
"rightValue": ""
}
]
},
"looseTypeValidation": true
},
"typeVersion": 2.2
},
{
"id": "33e574c3-47a2-454b-85e9-e3e9983e3d63",
"name": "based file name it assign differ metadata",
"type": "@n8n/n8n-nodes-langchain.informationExtractor",
"position": [
2208,
1152
],
"parameters": {
"text": "=\n{{ $json.pages[0].markdown }}",
"options": {
"systemPromptTemplate": "You are an expert extraction algorithm.\nOnly extract relevant information from the text.\ngive data is "
},
"attributes": {
"attributes": [
{
"name": "document_type",
"description": "type of document this can be blog, user documentation, technical documentation, manual/guide, educational"
},
{
"name": "project",
"description": " from give project this can any one ,social/digimarketing , chatbot, knowledge bot if nothing that its default"
},
{
"name": "assigned_to",
"description": "who are handling give projects mention all name given for a give project\n\nif project LINKED/DIGIMARRKETING/SOCIAL = [employee name]\nif project KB BOT = [employee name]\nif project CHAT BOT = [employee name]\nif project TIMESHEET = [employee name]"
}
]
}
},
"typeVersion": 1
},
{
"id": "c4b2a5cb-b849-4d7e-9f24-7ef67ee4031a",
"name": "set all metadata",
"type": "n8n-nodes-base.set",
"position": [
2512,
1152
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "5132d92c-41da-4a55-ad79-0c329ca7e626",
"name": "Document name",
"type": "string",
"value": "={{ $('Google Drive1').item.json.file_title }}"
},
{
"id": "c8160701-2be7-43c6-bcfa-295fbebe0e23",
"name": "Document data",
"type": "string",
"value": "={{ $('If NODE').item.json.pages[0].markdown }}"
},
{
"id": "1087ab34-5643-4755-b545-cf34d0ae2cd2",
"name": " source",
"type": "string",
"value": "={{ $('Google Drive1').item.json.file_id }}"
},
{
"id": "4317aa31-d9fd-4adc-ab26-b48c208041b3",
"name": "ASSIGNEDTO",
"type": "string",
"value": "={{ $json.output.assigned_to.split(\",\") }}"
},
{
"id": "7a61d775-06b1-42e2-a82a-d8e756fa5586",
"name": "PROJECT",
"type": "string",
"value": "={{ $json.output.project }}"
},
{
"id": "300f49d6-8851-458d-a296-8ceb68ebdd5f",
"name": "DOCUMENT_TYPE",
"type": "string",
"value": "={{ $json.output.document_type }}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "2a4026cf-3034-49a8-8dc8-ad67ae571213",
"name": "clean output",
"type": "n8n-nodes-base.code",
"position": [
2704,
1152
],
"parameters": {
"jsCode": "// Get incoming data (first item)\nconst data = items[0].json;\n\n// If wrapped in an array, unwrap it\nconst input = Array.isArray(data) ? data[0] : data;\n\nlet output = [\n {\n content: input[\"Document data\"], // main text for embedding\n metadata: {\n document_name: input[\"Document name\"],\n source_id: input[\" source\"], // note: there is a space before \"source\" in your key\n }\n }\n];\n\nreturn output.map(o => ({ json: o }));\n"
},
"typeVersion": 2
},
{
"id": "f9172d1d-060c-46f7-8e48-b235a2610185",
"name": "convert data into smaller chunks",
"type": "n8n-nodes-base.code",
"position": [
2864,
1152
],
"parameters": {
"jsCode": "const chunkSize = 1000; // characters\nconst chunkOverlap = 100; // characters\n\n\nlet newItems = [];\n\nfor (const item of items) {\n let text = item.json.content; // <-- use item.json\n if (!text || typeof text !== \"string\") continue;\n\n const sourceId = item.json.metadata.source_id;\n\n \n // Chunking\n for (let i = 0; i < updatedText.length; i += chunkSize - chunkOverlap) {\n let chunk = updatedText.slice(i, i + chunkSize);\n newItems.push({\n json: {\n content: chunk,\n metadata: item.json.metadata\n }\n });\n }\n}\n\nreturn newItems;\n"
},
"typeVersion": 2
},
{
"id": "2a9539e7-e8dc-4c2a-a016-daab8c7d5339",
"name": "付箋8",
"type": "n8n-nodes-base.stickyNote",
"position": [
3696,
736
],
"parameters": {
"color": 5,
"width": 800,
"height": 1248,
"content": "### **Node List & Descriptions**\n\n1. **Manual Trigger** \n - **What:** Kicks off the workflow when the user clicks **“Test workflow.”** \n - **Why:** Allows for on-demand execution and testing of the data pipeline. \n\n2. **Google Drive (List Files)** \n - **What:** Lists all files within a predefined Google Drive folder (`knowledgebaseforaibot`). \n - **Why:** To retrieve the initial list of all documents that need to be processed. \n\n3. **Loop Over Files** \n - **What:** Iterates through each file retrieved from Google Drive, processing them one by one. \n - **Why:** Ensures each document is handled individually in the following steps. \n\n4. **Google Drive (Download File)** \n - **What:** Downloads the binary content of the current file being processed in the loop. \n - **Why:** Provides the actual file data needed for OCR processing. \n\n5. **Mistral OCR (via HTTP Requests)** \n - **What:** \n - Uploads the file to the Mistral API. \n - Retrieves a temporary signed URL for access. \n - Calls the Mistral OCR endpoint to extract all text content from the document. \n - **Why:** Converts scanned or image-based documents (like PDFs, JPGs) into machine-readable text. \n\n6. **If Node (Check for Success)** \n - **What:** Verifies that the Mistral OCR operation completed successfully without errors. \n - **Why:** Ensures only valid, readable documents continue in the workflow, avoiding process failures. \n\n7. **Information Extractor (LangChain)** \n - **What:** Uses the Mistral AI chat model to analyze the extracted text and assign metadata such as: \n - `document_type` \n - `project` \n - `assigned_to` \n - **Why:** Automatically categorizes and tags documents based on their content for better organization and searchability. \n\n8. **Code Node (Clean & Chunk Data)** \n - **What:** \n - Formats the extracted text and metadata into a clean JSON structure. \n - Splits the document text into smaller, overlapping chunks (≈1000 characters each). \n - **Why:** Prepares data for embedding and ensures compatibility with language model context limits. \n\n9. **OpenAI Embeddings (LangChain)** \n - **What:** Converts each text chunk into a numerical vector using OpenAI’s `text-embedding-3-small` model. \n - **Why:** Encodes the semantic meaning of the text for similarity-based search and retrieval. \n\n10. **Qdrant Vector Store (LangChain)** \n - **What:** Inserts the text chunks and their corresponding embeddings into a Qdrant vector database collection named `docaiauto`. \n - **Why:** Creates a searchable, semantic knowledge base for AI-driven document retrieval. \n\n---\n\n### **flow of data**\n\n**Trigger → List Files (Google Drive) → Loop → Download File → OCR (Mistral) → Validate → Categorize (AI) → Clean & Chunk → Embed (OpenAI) → Store (Qdrant)**\n"
},
"typeVersion": 1
},
{
"id": "1dc0d13d-c75c-44dd-a94c-0b177cf6c094",
"name": "付箋13",
"type": "n8n-nodes-base.stickyNote",
"position": [
1808,
-320
],
"parameters": {
"color": 5,
"width": 960,
"height": 928,
"content": "### **Node List & Descriptions**\n\n1. **Chat Trigger** \n - **What:** Kicks off the workflow whenever a new message is received in the public chat interface. \n - **Why:** To make the workflow interactive and responsive to user input in real-time. \n\n2. **AI Chat Agent** \n - **What:** The central brain of the workflow. It orchestrates the entire process based on a detailed system prompt, managing memory and deciding which tools to use (internal knowledge base or web search). \n - **Why:** To act as an intelligent agent that can handle complex user queries by following a specific set of rules for information retrieval. \n\n3. **OpenAI Chat Model** \n - **What:** The underlying Large Language Model (`gpt-4.1-mini`) that powers the agent's reasoning, understanding, and response generation. \n - **Why:** Provides the core intelligence for understanding user intent and formulating human-like, context-aware answers. \n\n4. **Simple Memory** \n - **What:** Stores a history of the current conversation to provide context for follow-up questions. \n - **Why:** Allows the agent to remember what was previously discussed, leading to a more natural and coherent conversation. \n\n5. **Qdrant Vector Store (RAG Tool)** \n - **What:** \n - Acts as the primary tool for the agent. \n - Searches the internal knowledge base (`docaiauto` collection) for information relevant to the user's query. \n - **Why:** Ensures that the agent first attempts to answer questions using verified, internal documentation before seeking external information. \n\n6. **OpenAI Embeddings** \n - **What:** Works in conjunction with the Qdrant Vector Store to convert the user's text query into a numerical vector. \n - **Why:** Enables powerful semantic search, allowing the agent to find information based on meaning and context, not just keywords. \n\n7. **Web Search (Tavily Tool)** \n - **What:** Acts as a secondary tool for the agent, available only upon user confirmation. It performs an external web search using the Tavily API. \n - **Why:** Provides a fallback to access up-to-date, public information when the internal knowledge base does not contain the required answer. \n\n---\n\n### **Quick Reference: Flow Logic**\n\n**Chat Message → AI Agent → Use Tools (Qdrant or Web Search) → Generate Response (OpenAI) → Send Reply**\n"
},
"typeVersion": 1
},
{
"id": "83726285-3bfd-4d22-ad35-39a5ff3ef414",
"name": "付箋14",
"type": "n8n-nodes-base.stickyNote",
"position": [
-1232,
-176
],
"parameters": {
"color": 5,
"width": 864,
"height": 1856,
"content": "# Konowledge RAG and AI chat agent \n## by DIGITAL BIZ TECH\n\n## **Overview**\n\nThis workflow automates the process of transforming all files stored in Google Drive into a **searchable, intelligent knowledge base** — fully integrated with a **chat-enabled AI assistant**. \nIt also supports **web search augmentation**, allowing the agent to explain, compare, or enrich document insights using the latest online data.\n\n---\n\n## **Key Objectives**\n\n1. **Ingest and Process All Files**\n - Retrieve all files from a specific Google Drive folder.\n - Automatically extract their text content using OCR and AI-based parsing.\n - Generate structured metadata for each document (e.g., project, assignment, employee, and category).\n - Convert the text into **vector embeddings** and store them in a **Qdrant vector database** for semantic retrieval.\n\n2. **Enrich Metadata Intelligence**\n - Enhance each document with contextual metadata fields:\n - `project` — identifies which project the document belongs to.\n - `assignment` — links the file to a specific task or deliverable.\n - `employee` — indicates the team member responsible or related to the document.\n - Improves the **contextual accuracy** of RAG (Retrieval-Augmented Generation) queries by enabling more targeted retrieval.\n\n3. **Integrate with Chat-Based AI Agent**\n - The agent uses a **ai agent with different tools** to:\n - Query internal knowledge (Qdrant collection) for relevant chunks.\n - Optionally trigger a **web search (Tavily API)** when internal data is insufficient.\n - Combine results intelligently to deliver accurate and explainable answers.\n - Users can interact naturally through a chat interface to explore, summarize, or cross-reference different documents.\n\n4. **Maintain a Robust RAG Pipeline for Unstructured Data**\n - Designed to handle **unclean, inconsistent, or multi-format data sources** (PDFs, DOCX, images, etc.).\n - The cleaning and chunking logic ensures uniform embeddings, even from noisy input.\n - The RAG system automatically improves over time as new documents are added or updated in Google Drive.\n\n---\n\n## **Functional Flow**\n\n**Google Drive → File Extraction → Metadata Enrichment → Text Chunking → Embedding (OpenAI) → Storage (Qdrant) → AI Chat Agent → Web Search (Optional)**\n\n---\n\n## **Use Cases**\n\n- 📁 **Automated Knowledge Management:** Build a live, self-updating internal document knowledge base. \n- 🧠 **Smart AI Assistance:** Enable employees to query project or task documents conversationally. \n- 🌐 **Hybrid Knowledge Retrieval:** Combine private document insights with external web data. \n- 🧩 **Flexible Integration:** Can be connected to any folder or department for scalable RAG deployment.\n\n---\n\n## **Benefits**\n\n- Fully automated ingestion from Google Drive. \n- Rich, context-aware metadata for intelligent document relationships. \n- Chat interface for easy access to organizational knowledge. \n- Optional web search for real-time, external context expansion. \n- Maintains a clean, high-quality **RAG (Retrieval-Augmented Generation)** pipeline even from unclean data sources.\n\n---\n\n### **Quick Reference: Flow Logic**\n\n**Google Drive → OCR/Text Extraction → Metadata Enrichment → Embeddings → Qdrant (RAG Store) → Chat AI Agent → Optional Web Search**"
},
"typeVersion": 1
}
],
"pinData": {},
"connections": {
"efc3f354-0a6d-46dc-8ecf-793873e19466": {
"main": [
[
{
"node": "33e574c3-47a2-454b-85e9-e3e9983e3d63",
"type": "main",
"index": 0
}
],
[
{
"node": "d21fe18b-7f9b-46b5-a8b5-2725c849db7f",
"type": "main",
"index": 0
}
]
]
},
"c770feb0-c01a-4e94-832a-3dafc88fb28f": {
"ai_tool": [
[
{
"node": "85a90fcf-b75c-4ecc-90e2-611c8db61073",
"type": "ai_tool",
"index": 0
}
]
]
},
"Google Drive": {
"main": [
[
{
"node": "d21fe18b-7f9b-46b5-a8b5-2725c849db7f",
"type": "main",
"index": 0
}
]
]
},
"4300ad47-a92a-408b-aa8b-b56249d341e9": {
"main": [
[
{
"node": "Google Drive1",
"type": "main",
"index": 0
}
]
]
},
"2a4026cf-3034-49a8-8dc8-ad67ae571213": {
"main": [
[
{
"node": "f9172d1d-060c-46f7-8e48-b235a2610185",
"type": "main",
"index": 0
}
]
]
},
"Google Drive1": {
"main": [
[
{
"node": "3e38082e-4619-47cf-98e0-f7f66a8541c1",
"type": "main",
"index": 0
}
]
]
},
"3e38082e-4619-47cf-98e0-f7f66a8541c1": {
"main": [
[
{
"node": "255683ca-712a-4386-ae19-b3fbb5f37e30",
"type": "main",
"index": 0
}
]
]
},
"Simple Memory1": {
"ai_memory": [
[
{
"node": "85a90fcf-b75c-4ecc-90e2-611c8db61073",
"type": "ai_memory",
"index": 0
}
]
]
},
"2e846e38-ae17-4717-a39b-b3d0e0c567f9": {
"main": [
[
{
"node": "efc3f354-0a6d-46dc-8ecf-793873e19466",
"type": "main",
"index": 0
}
]
]
},
"c4b2a5cb-b849-4d7e-9f24-7ef67ee4031a": {
"main": [
[
{
"node": "2a4026cf-3034-49a8-8dc8-ad67ae571213",
"type": "main",
"index": 0
}
]
]
},
"Embeddings OpenAI": {
"ai_embedding": [
[
{
"node": "Qdrant Vector Store1",
"type": "ai_embedding",
"index": 0
}
]
]
},
"Embeddings OpenAI1": {
"ai_embedding": [
[
{
"node": "Qdrant Vector Store",
"type": "ai_embedding",
"index": 0
}
]
]
},
"255683ca-712a-4386-ae19-b3fbb5f37e30": {
"main": [
[
{
"node": "2e846e38-ae17-4717-a39b-b3d0e0c567f9",
"type": "main",
"index": 0
}
]
]
},
"OpenAI Chat Model1": {
"ai_languageModel": [
[
{
"node": "85a90fcf-b75c-4ecc-90e2-611c8db61073",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"8f2950f4-0c96-4d70-9aa0-2729dd0b560e": {
"ai_document": [
[
{
"node": "Qdrant Vector Store1",
"type": "ai_document",
"index": 0
}
]
]
},
"Qdrant Vector Store": {
"ai_tool": [
[
{
"node": "85a90fcf-b75c-4ecc-90e2-611c8db61073",
"type": "ai_tool",
"index": 0
}
]
]
},
"Qdrant Vector Store1": {
"main": [
[
{
"node": "d21fe18b-7f9b-46b5-a8b5-2725c849db7f",
"type": "main",
"index": 0
}
]
]
},
"b008bae1-bc63-47da-8afa-b8a1b17ea412": {
"ai_textSplitter": [
[
{
"node": "8f2950f4-0c96-4d70-9aa0-2729dd0b560e",
"type": "ai_textSplitter",
"index": 0
}
]
]
},
"b4a1a6a6-6f2e-4ac6-8fba-f5752ac66259": {
"ai_languageModel": [
[
{
"node": "33e574c3-47a2-454b-85e9-e3e9983e3d63",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"When chat message received": {
"main": [
[
{
"node": "85a90fcf-b75c-4ecc-90e2-611c8db61073",
"type": "main",
"index": 0
}
]
]
},
"f9172d1d-060c-46f7-8e48-b235a2610185": {
"main": [
[
{
"node": "Qdrant Vector Store1",
"type": "main",
"index": 0
}
]
]
},
"When clicking ‘Test workflow’": {
"main": [
[
{
"node": "Google Drive",
"type": "main",
"index": 0
}
]
]
},
"d21fe18b-7f9b-46b5-a8b5-2725c849db7f": {
"main": [
[],
[
{
"node": "4300ad47-a92a-408b-aa8b-b56249d341e9",
"type": "main",
"index": 0
}
]
]
},
"33e574c3-47a2-454b-85e9-e3e9983e3d63": {
"main": [
[
{
"node": "c4b2a5cb-b849-4d7e-9f24-7ef67ee4031a",
"type": "main",
"index": 0
}
]
]
}
}
}よくある質問
このワークフローの使い方は?
上記のJSON設定コードをコピーし、n8nインスタンスで新しいワークフローを作成して「JSONからインポート」を選択、設定を貼り付けて認証情報を必要に応じて変更してください。
このワークフローはどんな場面に適していますか?
上級 - 内部Wiki, AI RAG検索拡張
有料ですか?
このワークフローは完全無料です。ただし、ワークフローで使用するサードパーティサービス(OpenAI APIなど)は別途料金が発生する場合があります。
関連ワークフロー
メール履歴RAG
Gmail、OpenAI、Qdrantベクトルデータベースを使ってメールの知識ベースを作成する
Code
Gmail
Gmail Trigger
+
Code
Gmail
Gmail Trigger
23 ノードZain Ali
内部Wiki
n8nノードの探索(可視化リファレンスライブラリ内)
n8nノードを可視化リファレンスライブラリで探索
If
Ftp
Set
+
If
Ftp
Set
113 ノードI versus AI
その他
コンテキスト・ハイブリッドRAG AIコピー
RAGアプリケーション向けのGoogle DriveからSupabaseコンテキストベクトルデータベースへの同期
If
Set
Code
+
If
Set
Code
76 ノードMichael Taleb
AI RAG検索拡張
私のスマートエージェントアリーナコミュニティ競技会
Qdrant、Mistral OCR、GPT-4を使ったRAGベースのQ&Aシステムの構築
Set
Code
Wait
+
Set
Code
Wait
41 ノードDavide
コンテンツ作成
🤖 Gemini RAGパイプラインを使用してドキュメントエキスパートチャットボットを構築
n8nドキュメント専門のチャットボットをOpenAI RAGパイプラインで構築
Set
Html
Filter
+
Set
Html
Filter
46 ノードAyham
内部Wiki
🤖 ドキュメント + Google Drive + Gemini + Qdrant 対応の AI 駆動型 RAG チャットボット
🤖 ドキュメント+Google Drive+Gemini+Qdranto対応のAI駆動型RAGチャットボット
If
Set
Wait
+
If
Set
Wait
50 ノードJoseph LePage
人工知能