ドキュメントRAGとチャットアジェント：Google DriveからQdrantへ、Mistral OCR

Name: ドキュメントRAGとチャットアジェント：Google DriveからQdrantへ、Mistral OCR
Rating: 4.5 (10 reviews)
Author: DIGITAL BIZ TECH
上級
これはInternal Wiki, AI RAG分野の自動化ワークフローで、40個のノードを含みます。主にIf, Set, Code, GoogleDrive, HttpRequestなどのノードを使用。ドキュメントRAGチャットエージェント：Google Drive→QdrantとMistral OCR
前提条件
•Google Drive API認証情報
•ターゲットAPIの認証情報が必要な場合あり
•OpenAI API Key
•Qdrantサーバー接続情報
使用ノード (40)

DocumentDefaultDataLoader
TextSplitterCharacterTextSplitter
カテゴリー

内部Wiki
AI RAG検索拡張
ワークフロープレビュー
ノード接続関係を可視化、ズームとパンをサポート
OpenAI 埋め込み1
OpenAI チャットモデル1
Web Search
シンプルメモリ1
Qdrant ベクトルストア
チャットメッセージ受信時
Default Data Loader
Character Text Splitter
Google ドライブ
Google ドライブ1
Mistral Upload
Mistral Signed URL
Mistral DOC OCR
クリック時 ‘Test workflow’
OpenAI 埋め込み
Qdrant ベクトルストア1
Mistral Cloud Chat Model
ai chat agent
Loop Over each file in gdrive folder
add metadata
If NODE
based file name it assign differ metadata
set all metadata
clean output
convert data into smaller chunks
React Flow
ワークフローをエクスポート
以下のJSON設定をn8nにインポートして、このワークフローを使用できます
{
  "meta": {
    "instanceId": "e4680277d6b9c8b80748f71c2c1d0f9a640576175738ea2675967f762eeaf9df",
    "templateCredsSetupCompleted": true
  },
  "nodes": [
    {
      "id": "7935a827-516b-4e64-b370-f17d23c70857",
      "name": "OpenAI 埋め込み1",
      "type": "@n8n/n8n-nodes-langchain.embeddingsOpenAi",
      "position": [
        1488,
        432
      ],
      "parameters": {
        "options": {}
      },
      "credentials": {
        "openAiApi": {
          "id": "BEMsaCWtnyqTUtIt",
          "name": "OpenAi account 8 dbt digi"
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "32429618-1bed-4134-953f-fa5f0eed6981",
      "name": "OpenAI チャットモデル1",
      "type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
      "position": [
        528,
        432
      ],
      "parameters": {
        "model": {
          "__rl": true,
          "mode": "list",
          "value": "gpt-4.1-mini",
          "cachedResultName": "gpt-4.1-mini"
        },
        "options": {
          "temperature": 0.5
        }
      },
      "credentials": {
        "openAiApi": {
          "id": "BEMsaCWtnyqTUtIt",
          "name": "OpenAi account 8 dbt digi"
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "c770feb0-c01a-4e94-832a-3dafc88fb28f",
      "name": "Web Search",
      "type": "@n8n/n8n-nodes-langchain.toolHttpRequest",
      "position": [
        1200,
        480
      ],
      "parameters": {
        "url": "https://api.tavily.com/search",
        "method": "POST",
        "jsonBody": "={\n  \"query\": \"{query}\",\n  \"topic\": \"general\",\n  \"search_depth\": \"advanced\",\n  \"max_results\": 20,\n  \"include_answer\": true,\n  \"include_raw_content\": false,\n  \"include_images\": false,\n  \"include_image_descriptions\": false,\n  \"include_domains\": [],\n  \"exclude_domains\": [\"\"]\n}",
        "sendBody": true,
        "sendHeaders": true,
        "specifyBody": "json",
        "toolDescription": "Web Search tool ",
        "optimizeResponse": true,
        "parametersHeaders": {
          "values": [
            {
              "name": "Authorization",
              "value": "add tavily token",
              "valueProvider": "fieldValue"
            },
            {
              "name": "Content-Type",
              "value": "application/json",
              "valueProvider": "fieldValue"
            }
          ]
        },
        "placeholderDefinitions": {
          "values": [
            {
              "name": "query",
              "type": "string",
              "description": "Search Querry"
            }
          ]
        }
      },
      "typeVersion": 1.1
    },
    {
      "id": "5b31d3c8-3fe5-4cfd-bb09-81193e6d973f",
      "name": "シンプルメモリ1",
      "type": "@n8n/n8n-nodes-langchain.memoryBufferWindow",
      "position": [
        688,
        432
      ],
      "parameters": {},
      "typeVersion": 1.3
    },
    {
      "id": "42ff3014-5b83-474f-a35c-5384828b49fd",
      "name": "Qdrant ベクトルストア",
      "type": "@n8n/n8n-nodes-langchain.vectorStoreQdrant",
      "position": [
        1488,
        304
      ],
      "parameters": {
        "mode": "retrieve-as-tool",
        "topK": 3,
        "options": {},
        "toolName": "add_name",
        "toolDescription": "Use RAG to look up information in the knowledgebase.",
        "qdrantCollection": {
          "__rl": true,
          "mode": "list",
          "value": "docaiauto",
          "cachedResultName": "docaiauto"
        }
      },
      "credentials": {
        "qdrantApi": {
          "id": "4NinNhNX7VxfgZxs",
          "name": "QdrantApi account 2"
        }
      },
      "typeVersion": 1.1
    },
    {
      "id": "c405636b-4957-4de1-9cd4-8733647af3a9",
      "name": "チャットメッセージ受信時",
      "type": "@n8n/n8n-nodes-langchain.chatTrigger",
      "position": [
        496,
        112
      ],
      "webhookId": "9c27865f-526c-490f-80a2-645bc919e9de",
      "parameters": {
        "public": true,
        "options": {}
      },
      "typeVersion": 1.1
    },
    {
      "id": "8f2950f4-0c96-4d70-9aa0-2729dd0b560e",
      "name": "Default Data Loader",
      "type": "@n8n/n8n-nodes-langchain.documentDefaultDataLoader",
      "position": [
        3344,
        1328
      ],
      "parameters": {
        "options": {
          "metadata": {
            "metadataValues": [
              {
                "name": "source",
                "value": "={{ $('set all metadata').item.json[\"Document name\"] }}"
              },
              {
                "name": "blobType",
                "value": "application/jsonb"
              },
              {
                "name": "loc",
                "value": "={{ $('Mistral DOC OCR').item.json.usage_info }}"
              },
              {
                "name": "source_metadata_id",
                "value": "={{ $('Google Drive1').item.json.file_url }}"
              },
              {
                "name": "department",
                "value": "ai_automation"
              },
              {
                "name": "PROJECT",
                "value": "={{ $('set all metadata').item.json.PROJECT }}"
              },
              {
                "name": "=DOCUMNENT_TYPE",
                "value": "={{ $('set all metadata').item.json.DOCUMENT_TYPE }}"
              },
              {
                "name": "ASSIGNED_TO",
                "value": "={{ $('set all metadata').item.json.ASSIGNEDTO }}"
              }
            ]
          }
        },
        "jsonData": "={{ $json.content }}",
        "jsonMode": "expressionData"
      },
      "typeVersion": 1
    },
    {
      "id": "b008bae1-bc63-47da-8afa-b8a1b17ea412",
      "name": "Character Text Splitter",
      "type": "@n8n/n8n-nodes-langchain.textSplitterCharacterTextSplitter",
      "position": [
        3408,
        1440
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "fb42d3ab-5c20-4efc-920a-87dc96890cab",
      "name": "Google ドライブ",
      "type": "n8n-nodes-base.googleDrive",
      "position": [
        368,
        1136
      ],
      "parameters": {
        "filter": {
          "folderId": {
            "__rl": true,
            "mode": "list",
            "value": "1C1zD1XefBltEAocX6kfHFbzQtzzAxo_E",
            "cachedResultUrl": "https://drive.google.com/drive/folders/1C1zD1XefBltEAocX6kfHFbzQtzzAxo_E",
            "cachedResultName": "knowledgebaseforaibot"
          }
        },
        "options": {
          "fields": [
            "id",
            "name",
            "webViewLink",
            "mimeType",
            "*"
          ]
        },
        "resource": "fileFolder",
        "searchMethod": "query"
      },
      "credentials": {
        "googleDriveOAuth2Api": {
          "id": "xS6kSuLaEkasxHtm",
          "name": "Google Drive account 6 rn dbt"
        }
      },
      "typeVersion": 3
    },
    {
      "id": "393465a4-4b14-42ef-b2ca-608a161c6914",
      "name": "Google ドライブ1",
      "type": "n8n-nodes-base.googleDrive",
      "position": [
        1088,
        1152
      ],
      "parameters": {
        "fileId": {
          "__rl": true,
          "mode": "url",
          "value": "={{ $('add metadata').item.json.file_url }}"
        },
        "options": {},
        "operation": "download"
      },
      "credentials": {
        "googleDriveOAuth2Api": {
          "id": "xS6kSuLaEkasxHtm",
          "name": "Google Drive account 6 rn dbt"
        }
      },
      "typeVersion": 3
    },
    {
      "id": "3e38082e-4619-47cf-98e0-f7f66a8541c1",
      "name": "Mistral Upload",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        1328,
        1152
      ],
      "parameters": {
        "url": "https://api.mistral.ai/v1/files",
        "method": "POST",
        "options": {},
        "sendBody": true,
        "contentType": "multipart-form-data",
        "authentication": "predefinedCredentialType",
        "bodyParameters": {
          "parameters": [
            {
              "name": "purpose",
              "value": "ocr"
            },
            {
              "name": "file",
              "parameterType": "formBinaryData",
              "inputDataFieldName": "data"
            }
          ]
        },
        "nodeCredentialType": "mistralCloudApi"
      },
      "credentials": {
        "mistralCloudApi": {
          "id": "k9FknrnRcIKYNX7T",
          "name": "Mistral Cloud account 2 dbt rn"
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "255683ca-712a-4386-ae19-b3fbb5f37e30",
      "name": "Mistral Signed URL",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        1552,
        1152
      ],
      "parameters": {
        "url": "=https://api.mistral.ai/v1/files/{{ $json.id }}/url",
        "options": {},
        "sendQuery": true,
        "sendHeaders": true,
        "authentication": "predefinedCredentialType",
        "queryParameters": {
          "parameters": [
            {
              "name": "expiry",
              "value": "24"
            }
          ]
        },
        "headerParameters": {
          "parameters": [
            {
              "name": "Accept",
              "value": "application/json"
            }
          ]
        },
        "nodeCredentialType": "mistralCloudApi"
      },
      "credentials": {
        "mistralCloudApi": {
          "id": "k9FknrnRcIKYNX7T",
          "name": "Mistral Cloud account 2 dbt rn"
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "2e846e38-ae17-4717-a39b-b3d0e0c567f9",
      "name": "Mistral DOC OCR",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        1776,
        1152
      ],
      "parameters": {
        "url": "https://api.mistral.ai/v1/ocr",
        "method": "POST",
        "options": {},
        "jsonBody": "={\n  \"model\": \"mistral-ocr-latest\",\n  \"document\": {\n    \"type\": \"document_url\",\n    \"document_url\": \"{{ $json.url }}\"\n  },\n  \"include_image_base64\": true\n}",
        "sendBody": true,
        "specifyBody": "json",
        "authentication": "predefinedCredentialType",
        "nodeCredentialType": "mistralCloudApi"
      },
      "credentials": {
        "mistralCloudApi": {
          "id": "k9FknrnRcIKYNX7T",
          "name": "Mistral Cloud account 2 dbt rn"
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "a1c139ec-5ec2-4142-aacc-5df387b677fd",
      "name": "クリック時 ‘Test workflow’",
      "type": "n8n-nodes-base.manualTrigger",
      "position": [
        96,
        1136
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "6ceb4cbc-b4b0-4ac6-befe-a92649279bf4",
      "name": "OpenAI 埋め込み",
      "type": "@n8n/n8n-nodes-langchain.embeddingsOpenAi",
      "position": [
        3168,
        1456
      ],
      "parameters": {
        "model": "text-embedding-3-small",
        "options": {}
      },
      "credentials": {
        "openAiApi": {
          "id": "BEMsaCWtnyqTUtIt",
          "name": "OpenAi account 8 dbt digi"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "ee2ce006-d94c-49a7-849f-8155c95ee118",
      "name": "Qdrant ベクトルストア1",
      "type": "@n8n/n8n-nodes-langchain.vectorStoreQdrant",
      "position": [
        3184,
        1152
      ],
      "parameters": {
        "mode": "insert",
        "options": {},
        "qdrantCollection": {
          "__rl": true,
          "mode": "list",
          "value": "docaiauto",
          "cachedResultName": "docaiauto"
        },
        "embeddingBatchSize": "=200"
      },
      "credentials": {
        "qdrantApi": {
          "id": "hBirQvCk1VaV8cfQ",
          "name": "QdrantApi account"
        }
      },
      "typeVersion": 1.1
    },
    {
      "id": "b4a1a6a6-6f2e-4ac6-8fba-f5752ac66259",
      "name": "Mistral Cloud Chat Model",
      "type": "@n8n/n8n-nodes-langchain.lmChatMistralCloud",
      "position": [
        2208,
        1456
      ],
      "parameters": {
        "model": "mistral-small-latest",
        "options": {}
      },
      "credentials": {
        "mistralCloudApi": {
          "id": "k9FknrnRcIKYNX7T",
          "name": "Mistral Cloud account 2 dbt rn"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "85a90fcf-b75c-4ecc-90e2-611c8db61073",
      "name": "ai chat agent",
      "type": "@n8n/n8n-nodes-langchain.agent",
      "position": [
        992,
        112
      ],
      "parameters": {
        "options": {
          "systemMessage": "=You are a helpful, intelligent AI Knowledge Bot and proactive chat agent.  \nLead conversations by asking open-ended questions and offering suggestions.  \nAnticipate needs, follow up thoughtfully, and make the user feel supported throughout.\n\n---\n\n## **Knowledge Access Workflow**\n\n1. **Primary Source: qdrant vector store (`add_table_name` table)**  \n   - For **every** user query, first search **only** in qdrant.  \n   - If relevant information is found:\n     - Respond in a **clear, structured, and informative** manner.  \n     - **Always include multiple images** if available — aim for **maximum number of relevant images**, not just one.  \n       - Place each image **inline**, immediately after the sentence/section it illustrates.  \n       - Never group all images at the end.\n     - Always **cite the internal document name** and **provide a clickable URL** to the source document.\n       - Use `list doc` tool with `source_metadata_id` to retrieve the document URL.\n     \n     **Example Citation:**  \n     > This information is based on the internal AI documentation related to automation projects (Automation Timesheet Blogs).  \n     > [View Full Document](document_url_here)\n\n2. **If No  Match Found:**  \n   - Reply:  \n     `\"The information you requested is not available in our internal documentation. Would you like me to do a Web search?\"`\n\n3. **Web Search Rules:**  \n   - Only search if user explicitly agrees (e.g., \"Yes\", \"Okay\").  \n  - use 'web search 1'tool\n   - Clearly label as **External Source** and provide clickable URLs.  \n     Example:  \n     > This information comes from an external source ([website_name](source_url)).\n\n4. **If No Data in Qdrant or Web:**  \n   - Reply:  \n     `\"At the moment, there is no information available to fully address this request based on current internal and external sources.\"`\n\n---\n\n## **Formatting & Style**\n- **Language:** All qdrant queries in English; all responses in English.  \n- **Tone:** Professional, clear, structured, and helpful.  \n- **Image Usage:**  \n  - Always include **all available relevant images** MARKDOWN  — even if it means showing several per section.  \n  - Place images **exactly after the point they illustrate**.  \n  - Use different images for different subtopics, comparisons, or examples.  \n  - Do **not** skip images if they exist in the source.\n- **For Guide/Instruction Responses:**  \n  1. **Step-by-Step Guide** (detailed instructions)  \n  2. **Things to Consider** (common pitfalls, important details)  \n  3. **Further Understanding**  \n     - Suggest 2 related topics the user might explore next, relevant to their query.\n- Never skip key details. Always check if more images can be added to improve clarity.\n\n---\n\n## **Core Principles**\n- Be proactive — never wait for the user to guide the entire conversation.  \n- Ask open-ended follow-ups.  \n- Anticipate related needs based on the query.  \n- Maintain accuracy, cite sources, and **use as many relevant images as possible**.  \n-  ALWAYS SHOW  IMAGE AND Keep images inline and contextually placed.\n",
          "returnIntermediateSteps": false
        }
      },
      "typeVersion": 1.7
    },
    {
      "id": "3e71d64a-3a04-4f5e-b009-8eddbef94e8c",
      "name": "付箋",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1008,
        416
      ],
      "parameters": {
        "color": 5,
        "width": 400,
        "height": 208,
        "content": "## WEB SEARCH using tavily (http node)\n [Tavily setup Guide](https://docs.tavily.com/welcome)"
      },
      "typeVersion": 1
    },
    {
      "id": "3c335f41-3d05-4436-ad0f-ce6957967a0c",
      "name": "付箋1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        320,
        928
      ],
      "parameters": {
        "color": 5,
        "width": 224,
        "height": 400,
        "content": "## GET ALL FILE DATA FROM SELECTED GOOGLE DRIVE FOLDER"
      },
      "typeVersion": 1
    },
    {
      "id": "a404df40-0f40-46b5-a65b-edc182d06d9b",
      "name": "付箋2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1936,
        1040
      ],
      "parameters": {
        "color": 5,
        "width": 192,
        "height": 256,
        "content": "## Remove  empty data fields  "
      },
      "typeVersion": 1
    },
    {
      "id": "4776e7ea-7243-474f-9ec0-47afc46d8479",
      "name": "付箋3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1024,
        976
      ],
      "parameters": {
        "color": 5,
        "width": 208,
        "height": 320,
        "content": "## GET  individual files from selected gdrive"
      },
      "typeVersion": 1
    },
    {
      "id": "3a593a87-2d91-48dc-85da-98a5bc562ff6",
      "name": "付箋4",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1280,
        1008
      ],
      "parameters": {
        "color": 5,
        "width": 608,
        "height": 272,
        "content": "## MISTRAL OCR\n [OCR Guide](https://mistral.ai/news/mistral-ocr)\n1. UPLOAD FILE\n2. GET SIGNED URL\n3. GET EXTRACT DATA AFTER USING MISTRAL OCR"
      },
      "typeVersion": 1
    },
    {
      "id": "65106e47-a303-4f46-be3b-207d6e6c04fa",
      "name": "付箋5",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        352,
        16
      ],
      "parameters": {
        "color": 5,
        "width": 368,
        "height": 288,
        "content": "## Hosted Chat interface  \n"
      },
      "typeVersion": 1
    },
    {
      "id": "78103cfe-e101-4d70-8dea-08b4ca2bbdb6",
      "name": "付箋6",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1472,
        96
      ],
      "parameters": {
        "color": 5,
        "width": 272,
        "height": 496,
        "content": "## QDRANT VCETOR AND OPEN API EMBEDDING \n [QDRANT Guide](https://qdrant.tech/documentation/)"
      },
      "typeVersion": 1
    },
    {
      "id": "716859d0-f3eb-431c-a6fc-37301508c968",
      "name": "付箋7",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        928,
        0
      ],
      "parameters": {
        "color": 5,
        "width": 432,
        "height": 304,
        "content": "## AI chat agent\n interact with user and process user input and provide appropriate response using different tools. "
      },
      "typeVersion": 1
    },
    {
      "id": "aa7c86ee-3229-415e-96eb-92fbd530aa44",
      "name": "付箋10",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        2672,
        1008
      ],
      "parameters": {
        "color": 5,
        "width": 320,
        "height": 288,
        "content": "## clean all extracted data and convert them to smaller chunks"
      },
      "typeVersion": 1
    },
    {
      "id": "ccd381e0-1cd2-406d-9aec-7b03c18ee435",
      "name": "付箋11",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        2176,
        928
      ],
      "parameters": {
        "color": 5,
        "width": 288,
        "height": 368,
        "content": "## assignment agent\n\nfor any given file this node assign which type documents it is ,which project its related too and who are working on it"
      },
      "typeVersion": 1
    },
    {
      "id": "87f7a493-d65c-447b-a465-3175c1bcbea7",
      "name": "付箋12",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        3088,
        976
      ],
      "parameters": {
        "color": 5,
        "width": 512,
        "height": 608,
        "content": "## load all chunks into qdrant vector database"
      },
      "typeVersion": 1
    },
    {
      "id": "66f3dfcf-9fbe-4d63-888e-dda7c422c282",
      "name": "付箋9",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        576,
        992
      ],
      "parameters": {
        "color": 5,
        "width": 256,
        "height": 320,
        "content": "## loop over google drive folder items"
      },
      "typeVersion": 1
    },
    {
      "id": "d21fe18b-7f9b-46b5-a8b5-2725c849db7f",
      "name": "Loop Over each file in gdrive folder",
      "type": "n8n-nodes-base.splitInBatches",
      "position": [
        608,
        1136
      ],
      "parameters": {
        "options": {}
      },
      "typeVersion": 3
    },
    {
      "id": "4300ad47-a92a-408b-aa8b-b56249d341e9",
      "name": "add metadata",
      "type": "n8n-nodes-base.set",
      "position": [
        896,
        1152
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "10646eae-ae46-4327-a4dc-9987c2d76173",
              "name": "file_id",
              "type": "string",
              "value": "={{ $json.id }}"
            },
            {
              "id": "f4536df5-d0b1-4392-bf17-b8137fb31a44",
              "name": "file_type",
              "type": "string",
              "value": "={{ $json.mimeType }}"
            },
            {
              "id": "77d782de-169d-4a46-8a8e-a3831c04d90f",
              "name": "file_title",
              "type": "string",
              "value": "={{ $json.name }}"
            },
            {
              "id": "9bde4d7f-e4f3-4ebd-9338-dce1350f9eab",
              "name": "file_url",
              "type": "string",
              "value": "={{ $json.webViewLink }}"
            },
            {
              "id": "fae402c8-c486-4b57-8d28-bf669db6b442",
              "name": "last_modified_date",
              "type": "string",
              "value": "={{ $json.modifiedTime }}"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "efc3f354-0a6d-46dc-8ecf-793873e19466",
      "name": "If NODE",
      "type": "n8n-nodes-base.if",
      "position": [
        1984,
        1152
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "loose"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "d15e917b-43d2-40b8-8b49-af467ff63961",
              "operator": {
                "type": "string",
                "operation": "notExists",
                "singleValue": true
              },
              "leftValue": "={{ $json.data[0].parseJson().skipped }}",
              "rightValue": ""
            }
          ]
        },
        "looseTypeValidation": true
      },
      "typeVersion": 2.2
    },
    {
      "id": "33e574c3-47a2-454b-85e9-e3e9983e3d63",
      "name": "based file name it assign differ metadata",
      "type": "@n8n/n8n-nodes-langchain.informationExtractor",
      "position": [
        2208,
        1152
      ],
      "parameters": {
        "text": "=\n{{ $json.pages[0].markdown }}",
        "options": {
          "systemPromptTemplate": "You are an expert extraction algorithm.\nOnly extract relevant information from the text.\ngive data is "
        },
        "attributes": {
          "attributes": [
            {
              "name": "document_type",
              "description": "type of document this can be blog, user documentation, technical documentation, manual/guide, educational"
            },
            {
              "name": "project",
              "description": " from give project this can any one ,social/digimarketing ,  chatbot, knowledge bot if nothing that its default"
            },
            {
              "name": "assigned_to",
              "description": "who are handling give projects mention all name given for a give project\n\nif project LINKED/DIGIMARRKETING/SOCIAL = [employee name]\nif project KB BOT = [employee name]\nif project CHAT BOT = [employee name]\nif project TIMESHEET = [employee name]"
            }
          ]
        }
      },
      "typeVersion": 1
    },
    {
      "id": "c4b2a5cb-b849-4d7e-9f24-7ef67ee4031a",
      "name": "set all metadata",
      "type": "n8n-nodes-base.set",
      "position": [
        2512,
        1152
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "5132d92c-41da-4a55-ad79-0c329ca7e626",
              "name": "Document name",
              "type": "string",
              "value": "={{ $('Google Drive1').item.json.file_title }}"
            },
            {
              "id": "c8160701-2be7-43c6-bcfa-295fbebe0e23",
              "name": "Document data",
              "type": "string",
              "value": "={{ $('If NODE').item.json.pages[0].markdown }}"
            },
            {
              "id": "1087ab34-5643-4755-b545-cf34d0ae2cd2",
              "name": " source",
              "type": "string",
              "value": "={{ $('Google Drive1').item.json.file_id }}"
            },
            {
              "id": "4317aa31-d9fd-4adc-ab26-b48c208041b3",
              "name": "ASSIGNEDTO",
              "type": "string",
              "value": "={{ $json.output.assigned_to.split(\",\") }}"
            },
            {
              "id": "7a61d775-06b1-42e2-a82a-d8e756fa5586",
              "name": "PROJECT",
              "type": "string",
              "value": "={{ $json.output.project }}"
            },
            {
              "id": "300f49d6-8851-458d-a296-8ceb68ebdd5f",
              "name": "DOCUMENT_TYPE",
              "type": "string",
              "value": "={{ $json.output.document_type }}"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "2a4026cf-3034-49a8-8dc8-ad67ae571213",
      "name": "clean output",
      "type": "n8n-nodes-base.code",
      "position": [
        2704,
        1152
      ],
      "parameters": {
        "jsCode": "// Get incoming data (first item)\nconst data = items[0].json;\n\n// If wrapped in an array, unwrap it\nconst input = Array.isArray(data) ? data[0] : data;\n\nlet output = [\n  {\n    content: input[\"Document data\"], // main text for embedding\n    metadata: {\n      document_name: input[\"Document name\"],\n      source_id: input[\" source\"], // note: there is a space before \"source\" in your key\n    }\n  }\n];\n\nreturn output.map(o => ({ json: o }));\n"
      },
      "typeVersion": 2
    },
    {
      "id": "f9172d1d-060c-46f7-8e48-b235a2610185",
      "name": "convert data into smaller chunks",
      "type": "n8n-nodes-base.code",
      "position": [
        2864,
        1152
      ],
      "parameters": {
        "jsCode": "const chunkSize = 1000; // characters\nconst chunkOverlap = 100; // characters\n\n\nlet newItems = [];\n\nfor (const item of items) {\n    let text = item.json.content; // <-- use item.json\n    if (!text || typeof text !== \"string\") continue;\n\n    const sourceId = item.json.metadata.source_id;\n\n   \n    // Chunking\n    for (let i = 0; i < updatedText.length; i += chunkSize - chunkOverlap) {\n        let chunk = updatedText.slice(i, i + chunkSize);\n        newItems.push({\n            json: {\n                content: chunk,\n                metadata: item.json.metadata\n            }\n        });\n    }\n}\n\nreturn newItems;\n"
      },
      "typeVersion": 2
    },
    {
      "id": "2a9539e7-e8dc-4c2a-a016-daab8c7d5339",
      "name": "付箋8",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        3696,
        736
      ],
      "parameters": {
        "color": 5,
        "width": 800,
        "height": 1248,
        "content": "### **Node List & Descriptions**\n\n1. **Manual Trigger**  \n   - **What:** Kicks off the workflow when the user clicks **“Test workflow.”**  \n   - **Why:** Allows for on-demand execution and testing of the data pipeline.  \n\n2. **Google Drive (List Files)**  \n   - **What:** Lists all files within a predefined Google Drive folder (`knowledgebaseforaibot`).  \n   - **Why:** To retrieve the initial list of all documents that need to be processed.  \n\n3. **Loop Over Files**  \n   - **What:** Iterates through each file retrieved from Google Drive, processing them one by one.  \n   - **Why:** Ensures each document is handled individually in the following steps.  \n\n4. **Google Drive (Download File)**  \n   - **What:** Downloads the binary content of the current file being processed in the loop.  \n   - **Why:** Provides the actual file data needed for OCR processing.  \n\n5. **Mistral OCR (via HTTP Requests)**  \n   - **What:**  \n     - Uploads the file to the Mistral API.  \n     - Retrieves a temporary signed URL for access.  \n     - Calls the Mistral OCR endpoint to extract all text content from the document.  \n   - **Why:** Converts scanned or image-based documents (like PDFs, JPGs) into machine-readable text.  \n\n6. **If Node (Check for Success)**  \n   - **What:** Verifies that the Mistral OCR operation completed successfully without errors.  \n   - **Why:** Ensures only valid, readable documents continue in the workflow, avoiding process failures.  \n\n7. **Information Extractor (LangChain)**  \n   - **What:** Uses the Mistral AI chat model to analyze the extracted text and assign metadata such as:  \n     - `document_type`  \n     - `project`  \n     - `assigned_to`  \n   - **Why:** Automatically categorizes and tags documents based on their content for better organization and searchability.  \n\n8. **Code Node (Clean & Chunk Data)**  \n   - **What:**  \n     - Formats the extracted text and metadata into a clean JSON structure.  \n     - Splits the document text into smaller, overlapping chunks (≈1000 characters each).  \n   - **Why:** Prepares data for embedding and ensures compatibility with language model context limits.  \n\n9. **OpenAI Embeddings (LangChain)**  \n   - **What:** Converts each text chunk into a numerical vector using OpenAI’s `text-embedding-3-small` model.  \n   - **Why:** Encodes the semantic meaning of the text for similarity-based search and retrieval.  \n\n10. **Qdrant Vector Store (LangChain)**  \n    - **What:** Inserts the text chunks and their corresponding embeddings into a Qdrant vector database collection named `docaiauto`.  \n    - **Why:** Creates a searchable, semantic knowledge base for AI-driven document retrieval.  \n\n---\n\n### **flow of data**\n\n**Trigger → List Files (Google Drive) → Loop → Download File → OCR (Mistral) → Validate → Categorize (AI) → Clean & Chunk → Embed (OpenAI) → Store (Qdrant)**\n"
      },
      "typeVersion": 1
    },
    {
      "id": "1dc0d13d-c75c-44dd-a94c-0b177cf6c094",
      "name": "付箋13",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1808,
        -320
      ],
      "parameters": {
        "color": 5,
        "width": 960,
        "height": 928,
        "content": "### **Node List & Descriptions**\n\n1. **Chat Trigger**  \n   - **What:** Kicks off the workflow whenever a new message is received in the public chat interface.  \n   - **Why:** To make the workflow interactive and responsive to user input in real-time.  \n\n2. **AI Chat Agent**  \n   - **What:** The central brain of the workflow. It orchestrates the entire process based on a detailed system prompt, managing memory and deciding which tools to use (internal knowledge base or web search).  \n   - **Why:** To act as an intelligent agent that can handle complex user queries by following a specific set of rules for information retrieval.  \n\n3. **OpenAI Chat Model**  \n   - **What:** The underlying Large Language Model (`gpt-4.1-mini`) that powers the agent's reasoning, understanding, and response generation.  \n   - **Why:** Provides the core intelligence for understanding user intent and formulating human-like, context-aware answers.  \n\n4. **Simple Memory**  \n   - **What:** Stores a history of the current conversation to provide context for follow-up questions.  \n   - **Why:** Allows the agent to remember what was previously discussed, leading to a more natural and coherent conversation.  \n\n5. **Qdrant Vector Store (RAG Tool)**  \n   - **What:**  \n     - Acts as the primary tool for the agent.  \n     - Searches the internal knowledge base (`docaiauto` collection) for information relevant to the user's query.  \n   - **Why:** Ensures that the agent first attempts to answer questions using verified, internal documentation before seeking external information.  \n\n6. **OpenAI Embeddings**  \n   - **What:** Works in conjunction with the Qdrant Vector Store to convert the user's text query into a numerical vector.  \n   - **Why:** Enables powerful semantic search, allowing the agent to find information based on meaning and context, not just keywords.  \n\n7. **Web Search (Tavily Tool)**  \n   - **What:** Acts as a secondary tool for the agent, available only upon user confirmation. It performs an external web search using the Tavily API.  \n   - **Why:** Provides a fallback to access up-to-date, public information when the internal knowledge base does not contain the required answer.  \n\n---\n\n### **Quick Reference: Flow Logic**\n\n**Chat Message → AI Agent → Use Tools (Qdrant or Web Search) → Generate Response (OpenAI) → Send Reply**\n"
      },
      "typeVersion": 1
    },
    {
      "id": "83726285-3bfd-4d22-ad35-39a5ff3ef414",
      "name": "付箋14",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1232,
        -176
      ],
      "parameters": {
        "color": 5,
        "width": 864,
        "height": 1856,
        "content": "# Konowledge RAG and AI chat agent  \n## by DIGITAL BIZ TECH\n\n## **Overview**\n\nThis workflow automates the process of transforming all files stored in Google Drive into a **searchable, intelligent knowledge base** — fully integrated with a **chat-enabled AI assistant**.  \nIt also supports **web search augmentation**, allowing the agent to explain, compare, or enrich document insights using the latest online data.\n\n---\n\n## **Key Objectives**\n\n1. **Ingest and Process All Files**\n   - Retrieve all files from a specific Google Drive folder.\n   - Automatically extract their text content using OCR and AI-based parsing.\n   - Generate structured metadata for each document (e.g., project, assignment, employee, and category).\n   - Convert the text into **vector embeddings** and store them in a **Qdrant vector database** for semantic retrieval.\n\n2. **Enrich Metadata Intelligence**\n   - Enhance each document with contextual metadata fields:\n     - `project` — identifies which project the document belongs to.\n     - `assignment` — links the file to a specific task or deliverable.\n     - `employee` — indicates the team member responsible or related to the document.\n   - Improves the **contextual accuracy** of RAG (Retrieval-Augmented Generation) queries by enabling more targeted retrieval.\n\n3. **Integrate with Chat-Based AI Agent**\n   - The agent uses a **ai agent with different tools** to:\n     - Query internal knowledge (Qdrant collection) for relevant chunks.\n     - Optionally trigger a **web search (Tavily API)** when internal data is insufficient.\n     - Combine results intelligently to deliver accurate and explainable answers.\n   - Users can interact naturally through a chat interface to explore, summarize, or cross-reference different documents.\n\n4. **Maintain a Robust RAG Pipeline for Unstructured Data**\n   - Designed to handle **unclean, inconsistent, or multi-format data sources** (PDFs, DOCX, images, etc.).\n   - The cleaning and chunking logic ensures uniform embeddings, even from noisy input.\n   - The RAG system automatically improves over time as new documents are added or updated in Google Drive.\n\n---\n\n## **Functional Flow**\n\n**Google Drive → File Extraction → Metadata Enrichment → Text Chunking → Embedding (OpenAI) → Storage (Qdrant) → AI Chat Agent → Web Search (Optional)**\n\n---\n\n## **Use Cases**\n\n- 📁 **Automated Knowledge Management:** Build a live, self-updating internal document knowledge base.  \n- 🧠 **Smart AI Assistance:** Enable employees to query project or task documents conversationally.  \n- 🌐 **Hybrid Knowledge Retrieval:** Combine private document insights with external web data.  \n- 🧩 **Flexible Integration:** Can be connected to any folder or department for scalable RAG deployment.\n\n---\n\n## **Benefits**\n\n- Fully automated ingestion from Google Drive.  \n- Rich, context-aware metadata for intelligent document relationships.  \n- Chat interface for easy access to organizational knowledge.  \n- Optional web search for real-time, external context expansion.  \n- Maintains a clean, high-quality **RAG (Retrieval-Augmented Generation)** pipeline even from unclean data sources.\n\n---\n\n### **Quick Reference: Flow Logic**\n\n**Google Drive → OCR/Text Extraction → Metadata Enrichment → Embeddings → Qdrant (RAG Store) → Chat AI Agent → Optional Web Search**"
      },
      "typeVersion": 1
    }
  ],
  "pinData": {},
  "connections": {
    "efc3f354-0a6d-46dc-8ecf-793873e19466": {
      "main": [
        [
          {
            "node": "33e574c3-47a2-454b-85e9-e3e9983e3d63",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "d21fe18b-7f9b-46b5-a8b5-2725c849db7f",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "c770feb0-c01a-4e94-832a-3dafc88fb28f": {
      "ai_tool": [
        [
          {
            "node": "85a90fcf-b75c-4ecc-90e2-611c8db61073",
            "type": "ai_tool",
            "index": 0
          }
        ]
      ]
    },
    "Google Drive": {
      "main": [
        [
          {
            "node": "d21fe18b-7f9b-46b5-a8b5-2725c849db7f",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "4300ad47-a92a-408b-aa8b-b56249d341e9": {
      "main": [
        [
          {
            "node": "Google Drive1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "2a4026cf-3034-49a8-8dc8-ad67ae571213": {
      "main": [
        [
          {
            "node": "f9172d1d-060c-46f7-8e48-b235a2610185",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Google Drive1": {
      "main": [
        [
          {
            "node": "3e38082e-4619-47cf-98e0-f7f66a8541c1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "3e38082e-4619-47cf-98e0-f7f66a8541c1": {
      "main": [
        [
          {
            "node": "255683ca-712a-4386-ae19-b3fbb5f37e30",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Simple Memory1": {
      "ai_memory": [
        [
          {
            "node": "85a90fcf-b75c-4ecc-90e2-611c8db61073",
            "type": "ai_memory",
            "index": 0
          }
        ]
      ]
    },
    "2e846e38-ae17-4717-a39b-b3d0e0c567f9": {
      "main": [
        [
          {
            "node": "efc3f354-0a6d-46dc-8ecf-793873e19466",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "c4b2a5cb-b849-4d7e-9f24-7ef67ee4031a": {
      "main": [
        [
          {
            "node": "2a4026cf-3034-49a8-8dc8-ad67ae571213",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Embeddings OpenAI": {
      "ai_embedding": [
        [
          {
            "node": "Qdrant Vector Store1",
            "type": "ai_embedding",
            "index": 0
          }
        ]
      ]
    },
    "Embeddings OpenAI1": {
      "ai_embedding": [
        [
          {
            "node": "Qdrant Vector Store",
            "type": "ai_embedding",
            "index": 0
          }
        ]
      ]
    },
    "255683ca-712a-4386-ae19-b3fbb5f37e30": {
      "main": [
        [
          {
            "node": "2e846e38-ae17-4717-a39b-b3d0e0c567f9",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "OpenAI Chat Model1": {
      "ai_languageModel": [
        [
          {
            "node": "85a90fcf-b75c-4ecc-90e2-611c8db61073",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "8f2950f4-0c96-4d70-9aa0-2729dd0b560e": {
      "ai_document": [
        [
          {
            "node": "Qdrant Vector Store1",
            "type": "ai_document",
            "index": 0
          }
        ]
      ]
    },
    "Qdrant Vector Store": {
      "ai_tool": [
        [
          {
            "node": "85a90fcf-b75c-4ecc-90e2-611c8db61073",
            "type": "ai_tool",
            "index": 0
          }
        ]
      ]
    },
    "Qdrant Vector Store1": {
      "main": [
        [
          {
            "node": "d21fe18b-7f9b-46b5-a8b5-2725c849db7f",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "b008bae1-bc63-47da-8afa-b8a1b17ea412": {
      "ai_textSplitter": [
        [
          {
            "node": "8f2950f4-0c96-4d70-9aa0-2729dd0b560e",
            "type": "ai_textSplitter",
            "index": 0
          }
        ]
      ]
    },
    "b4a1a6a6-6f2e-4ac6-8fba-f5752ac66259": {
      "ai_languageModel": [
        [
          {
            "node": "33e574c3-47a2-454b-85e9-e3e9983e3d63",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "When chat message received": {
      "main": [
        [
          {
            "node": "85a90fcf-b75c-4ecc-90e2-611c8db61073",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "f9172d1d-060c-46f7-8e48-b235a2610185": {
      "main": [
        [
          {
            "node": "Qdrant Vector Store1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "When clicking ‘Test workflow’": {
      "main": [
        [
          {
            "node": "Google Drive",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "d21fe18b-7f9b-46b5-a8b5-2725c849db7f": {
      "main": [
        [],
        [
          {
            "node": "4300ad47-a92a-408b-aa8b-b56249d341e9",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "33e574c3-47a2-454b-85e9-e3e9983e3d63": {
      "main": [
        [
          {
            "node": "c4b2a5cb-b849-4d7e-9f24-7ef67ee4031a",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}
よくある質問
このワークフローの使い方は？

上記のJSON設定コードをコピーし、n8nインスタンスで新しいワークフローを作成して「JSONからインポート」を選択、設定を貼り付けて認証情報を必要に応じて変更してください。
このワークフローはどんな場面に適していますか？

上級 - 内部Wiki, AI RAG検索拡張
有料ですか？

このワークフローは完全無料です。ただし、ワークフローで使用するサードパーティサービス（OpenAI APIなど）は別途料金が発生する場合があります。
ドキュメントRAGとチャットアジェント：Google DriveからQdrantへ、Mistral OCR

使用ノード (40)

カテゴリー

このワークフローの使い方は？

このワークフローはどんな場面に適していますか？

有料ですか？

関連ワークフロー

カテゴリー