ドキュメントRAGとチャットアジェント:Google DriveからQdrantへ、Mistral OCR

上級

これはInternal Wiki, AI RAG分野の自動化ワークフローで、40個のノードを含みます。主にIf, Set, Code, GoogleDrive, HttpRequestなどのノードを使用。 ドキュメントRAGチャットエージェント:Google Drive→QdrantとMistral OCR

前提条件
  • Google Drive API認証情報
  • ターゲットAPIの認証情報が必要な場合あり
  • OpenAI API Key
  • Qdrantサーバー接続情報
ワークフロープレビュー
ノード接続関係を可視化、ズームとパンをサポート
ワークフローをエクスポート
以下のJSON設定をn8nにインポートして、このワークフローを使用できます
{
  "meta": {
    "instanceId": "e4680277d6b9c8b80748f71c2c1d0f9a640576175738ea2675967f762eeaf9df",
    "templateCredsSetupCompleted": true
  },
  "nodes": [
    {
      "id": "7935a827-516b-4e64-b370-f17d23c70857",
      "name": "OpenAI 埋め込み1",
      "type": "@n8n/n8n-nodes-langchain.embeddingsOpenAi",
      "position": [
        1488,
        432
      ],
      "parameters": {
        "options": {}
      },
      "credentials": {
        "openAiApi": {
          "id": "BEMsaCWtnyqTUtIt",
          "name": "OpenAi account 8 dbt digi"
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "32429618-1bed-4134-953f-fa5f0eed6981",
      "name": "OpenAI チャットモデル1",
      "type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
      "position": [
        528,
        432
      ],
      "parameters": {
        "model": {
          "__rl": true,
          "mode": "list",
          "value": "gpt-4.1-mini",
          "cachedResultName": "gpt-4.1-mini"
        },
        "options": {
          "temperature": 0.5
        }
      },
      "credentials": {
        "openAiApi": {
          "id": "BEMsaCWtnyqTUtIt",
          "name": "OpenAi account 8 dbt digi"
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "c770feb0-c01a-4e94-832a-3dafc88fb28f",
      "name": "Web Search",
      "type": "@n8n/n8n-nodes-langchain.toolHttpRequest",
      "position": [
        1200,
        480
      ],
      "parameters": {
        "url": "https://api.tavily.com/search",
        "method": "POST",
        "jsonBody": "={\n  \"query\": \"{query}\",\n  \"topic\": \"general\",\n  \"search_depth\": \"advanced\",\n  \"max_results\": 20,\n  \"include_answer\": true,\n  \"include_raw_content\": false,\n  \"include_images\": false,\n  \"include_image_descriptions\": false,\n  \"include_domains\": [],\n  \"exclude_domains\": [\"\"]\n}",
        "sendBody": true,
        "sendHeaders": true,
        "specifyBody": "json",
        "toolDescription": "Web Search tool ",
        "optimizeResponse": true,
        "parametersHeaders": {
          "values": [
            {
              "name": "Authorization",
              "value": "add tavily token",
              "valueProvider": "fieldValue"
            },
            {
              "name": "Content-Type",
              "value": "application/json",
              "valueProvider": "fieldValue"
            }
          ]
        },
        "placeholderDefinitions": {
          "values": [
            {
              "name": "query",
              "type": "string",
              "description": "Search Querry"
            }
          ]
        }
      },
      "typeVersion": 1.1
    },
    {
      "id": "5b31d3c8-3fe5-4cfd-bb09-81193e6d973f",
      "name": "シンプルメモリ1",
      "type": "@n8n/n8n-nodes-langchain.memoryBufferWindow",
      "position": [
        688,
        432
      ],
      "parameters": {},
      "typeVersion": 1.3
    },
    {
      "id": "42ff3014-5b83-474f-a35c-5384828b49fd",
      "name": "Qdrant ベクトルストア",
      "type": "@n8n/n8n-nodes-langchain.vectorStoreQdrant",
      "position": [
        1488,
        304
      ],
      "parameters": {
        "mode": "retrieve-as-tool",
        "topK": 3,
        "options": {},
        "toolName": "add_name",
        "toolDescription": "Use RAG to look up information in the knowledgebase.",
        "qdrantCollection": {
          "__rl": true,
          "mode": "list",
          "value": "docaiauto",
          "cachedResultName": "docaiauto"
        }
      },
      "credentials": {
        "qdrantApi": {
          "id": "4NinNhNX7VxfgZxs",
          "name": "QdrantApi account 2"
        }
      },
      "typeVersion": 1.1
    },
    {
      "id": "c405636b-4957-4de1-9cd4-8733647af3a9",
      "name": "チャットメッセージ受信時",
      "type": "@n8n/n8n-nodes-langchain.chatTrigger",
      "position": [
        496,
        112
      ],
      "webhookId": "9c27865f-526c-490f-80a2-645bc919e9de",
      "parameters": {
        "public": true,
        "options": {}
      },
      "typeVersion": 1.1
    },
    {
      "id": "8f2950f4-0c96-4d70-9aa0-2729dd0b560e",
      "name": "Default Data Loader",
      "type": "@n8n/n8n-nodes-langchain.documentDefaultDataLoader",
      "position": [
        3344,
        1328
      ],
      "parameters": {
        "options": {
          "metadata": {
            "metadataValues": [
              {
                "name": "source",
                "value": "={{ $('set all metadata').item.json[\"Document name\"] }}"
              },
              {
                "name": "blobType",
                "value": "application/jsonb"
              },
              {
                "name": "loc",
                "value": "={{ $('Mistral DOC OCR').item.json.usage_info }}"
              },
              {
                "name": "source_metadata_id",
                "value": "={{ $('Google Drive1').item.json.file_url }}"
              },
              {
                "name": "department",
                "value": "ai_automation"
              },
              {
                "name": "PROJECT",
                "value": "={{ $('set all metadata').item.json.PROJECT }}"
              },
              {
                "name": "=DOCUMNENT_TYPE",
                "value": "={{ $('set all metadata').item.json.DOCUMENT_TYPE }}"
              },
              {
                "name": "ASSIGNED_TO",
                "value": "={{ $('set all metadata').item.json.ASSIGNEDTO }}"
              }
            ]
          }
        },
        "jsonData": "={{ $json.content }}",
        "jsonMode": "expressionData"
      },
      "typeVersion": 1
    },
    {
      "id": "b008bae1-bc63-47da-8afa-b8a1b17ea412",
      "name": "Character Text Splitter",
      "type": "@n8n/n8n-nodes-langchain.textSplitterCharacterTextSplitter",
      "position": [
        3408,
        1440
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "fb42d3ab-5c20-4efc-920a-87dc96890cab",
      "name": "Google ドライブ",
      "type": "n8n-nodes-base.googleDrive",
      "position": [
        368,
        1136
      ],
      "parameters": {
        "filter": {
          "folderId": {
            "__rl": true,
            "mode": "list",
            "value": "1C1zD1XefBltEAocX6kfHFbzQtzzAxo_E",
            "cachedResultUrl": "https://drive.google.com/drive/folders/1C1zD1XefBltEAocX6kfHFbzQtzzAxo_E",
            "cachedResultName": "knowledgebaseforaibot"
          }
        },
        "options": {
          "fields": [
            "id",
            "name",
            "webViewLink",
            "mimeType",
            "*"
          ]
        },
        "resource": "fileFolder",
        "searchMethod": "query"
      },
      "credentials": {
        "googleDriveOAuth2Api": {
          "id": "xS6kSuLaEkasxHtm",
          "name": "Google Drive account 6 rn dbt"
        }
      },
      "typeVersion": 3
    },
    {
      "id": "393465a4-4b14-42ef-b2ca-608a161c6914",
      "name": "Google ドライブ1",
      "type": "n8n-nodes-base.googleDrive",
      "position": [
        1088,
        1152
      ],
      "parameters": {
        "fileId": {
          "__rl": true,
          "mode": "url",
          "value": "={{ $('add metadata').item.json.file_url }}"
        },
        "options": {},
        "operation": "download"
      },
      "credentials": {
        "googleDriveOAuth2Api": {
          "id": "xS6kSuLaEkasxHtm",
          "name": "Google Drive account 6 rn dbt"
        }
      },
      "typeVersion": 3
    },
    {
      "id": "3e38082e-4619-47cf-98e0-f7f66a8541c1",
      "name": "Mistral Upload",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        1328,
        1152
      ],
      "parameters": {
        "url": "https://api.mistral.ai/v1/files",
        "method": "POST",
        "options": {},
        "sendBody": true,
        "contentType": "multipart-form-data",
        "authentication": "predefinedCredentialType",
        "bodyParameters": {
          "parameters": [
            {
              "name": "purpose",
              "value": "ocr"
            },
            {
              "name": "file",
              "parameterType": "formBinaryData",
              "inputDataFieldName": "data"
            }
          ]
        },
        "nodeCredentialType": "mistralCloudApi"
      },
      "credentials": {
        "mistralCloudApi": {
          "id": "k9FknrnRcIKYNX7T",
          "name": "Mistral Cloud account 2 dbt rn"
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "255683ca-712a-4386-ae19-b3fbb5f37e30",
      "name": "Mistral Signed URL",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        1552,
        1152
      ],
      "parameters": {
        "url": "=https://api.mistral.ai/v1/files/{{ $json.id }}/url",
        "options": {},
        "sendQuery": true,
        "sendHeaders": true,
        "authentication": "predefinedCredentialType",
        "queryParameters": {
          "parameters": [
            {
              "name": "expiry",
              "value": "24"
            }
          ]
        },
        "headerParameters": {
          "parameters": [
            {
              "name": "Accept",
              "value": "application/json"
            }
          ]
        },
        "nodeCredentialType": "mistralCloudApi"
      },
      "credentials": {
        "mistralCloudApi": {
          "id": "k9FknrnRcIKYNX7T",
          "name": "Mistral Cloud account 2 dbt rn"
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "2e846e38-ae17-4717-a39b-b3d0e0c567f9",
      "name": "Mistral DOC OCR",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        1776,
        1152
      ],
      "parameters": {
        "url": "https://api.mistral.ai/v1/ocr",
        "method": "POST",
        "options": {},
        "jsonBody": "={\n  \"model\": \"mistral-ocr-latest\",\n  \"document\": {\n    \"type\": \"document_url\",\n    \"document_url\": \"{{ $json.url }}\"\n  },\n  \"include_image_base64\": true\n}",
        "sendBody": true,
        "specifyBody": "json",
        "authentication": "predefinedCredentialType",
        "nodeCredentialType": "mistralCloudApi"
      },
      "credentials": {
        "mistralCloudApi": {
          "id": "k9FknrnRcIKYNX7T",
          "name": "Mistral Cloud account 2 dbt rn"
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "a1c139ec-5ec2-4142-aacc-5df387b677fd",
      "name": "クリック時 ‘Test workflow’",
      "type": "n8n-nodes-base.manualTrigger",
      "position": [
        96,
        1136
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "6ceb4cbc-b4b0-4ac6-befe-a92649279bf4",
      "name": "OpenAI 埋め込み",
      "type": "@n8n/n8n-nodes-langchain.embeddingsOpenAi",
      "position": [
        3168,
        1456
      ],
      "parameters": {
        "model": "text-embedding-3-small",
        "options": {}
      },
      "credentials": {
        "openAiApi": {
          "id": "BEMsaCWtnyqTUtIt",
          "name": "OpenAi account 8 dbt digi"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "ee2ce006-d94c-49a7-849f-8155c95ee118",
      "name": "Qdrant ベクトルストア1",
      "type": "@n8n/n8n-nodes-langchain.vectorStoreQdrant",
      "position": [
        3184,
        1152
      ],
      "parameters": {
        "mode": "insert",
        "options": {},
        "qdrantCollection": {
          "__rl": true,
          "mode": "list",
          "value": "docaiauto",
          "cachedResultName": "docaiauto"
        },
        "embeddingBatchSize": "=200"
      },
      "credentials": {
        "qdrantApi": {
          "id": "hBirQvCk1VaV8cfQ",
          "name": "QdrantApi account"
        }
      },
      "typeVersion": 1.1
    },
    {
      "id": "b4a1a6a6-6f2e-4ac6-8fba-f5752ac66259",
      "name": "Mistral Cloud Chat Model",
      "type": "@n8n/n8n-nodes-langchain.lmChatMistralCloud",
      "position": [
        2208,
        1456
      ],
      "parameters": {
        "model": "mistral-small-latest",
        "options": {}
      },
      "credentials": {
        "mistralCloudApi": {
          "id": "k9FknrnRcIKYNX7T",
          "name": "Mistral Cloud account 2 dbt rn"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "85a90fcf-b75c-4ecc-90e2-611c8db61073",
      "name": "ai chat agent",
      "type": "@n8n/n8n-nodes-langchain.agent",
      "position": [
        992,
        112
      ],
      "parameters": {
        "options": {
          "systemMessage": "=You are a helpful, intelligent AI Knowledge Bot and proactive chat agent.  \nLead conversations by asking open-ended questions and offering suggestions.  \nAnticipate needs, follow up thoughtfully, and make the user feel supported throughout.\n\n---\n\n## **Knowledge Access Workflow**\n\n1. **Primary Source: qdrant vector store (`add_table_name` table)**  \n   - For **every** user query, first search **only** in qdrant.  \n   - If relevant information is found:\n     - Respond in a **clear, structured, and informative** manner.  \n     - **Always include multiple images** if available — aim for **maximum number of relevant images**, not just one.  \n       - Place each image **inline**, immediately after the sentence/section it illustrates.  \n       - Never group all images at the end.\n     - Always **cite the internal document name** and **provide a clickable URL** to the source document.\n       - Use `list doc` tool with `source_metadata_id` to retrieve the document URL.\n     \n     **Example Citation:**  \n     > This information is based on the internal AI documentation related to automation projects (Automation Timesheet Blogs).  \n     > [View Full Document](document_url_here)\n\n2. **If No  Match Found:**  \n   - Reply:  \n     `\"The information you requested is not available in our internal documentation. Would you like me to do a Web search?\"`\n\n3. **Web Search Rules:**  \n   - Only search if user explicitly agrees (e.g., \"Yes\", \"Okay\").  \n  - use 'web search 1'tool\n   - Clearly label as **External Source** and provide clickable URLs.  \n     Example:  \n     > This information comes from an external source ([website_name](source_url)).\n\n4. **If No Data in Qdrant or Web:**  \n   - Reply:  \n     `\"At the moment, there is no information available to fully address this request based on current internal and external sources.\"`\n\n---\n\n## **Formatting & Style**\n- **Language:** All qdrant queries in English; all responses in English.  \n- **Tone:** Professional, clear, structured, and helpful.  \n- **Image Usage:**  \n  - Always include **all available relevant images** MARKDOWN  — even if it means showing several per section.  \n  - Place images **exactly after the point they illustrate**.  \n  - Use different images for different subtopics, comparisons, or examples.  \n  - Do **not** skip images if they exist in the source.\n- **For Guide/Instruction Responses:**  \n  1. **Step-by-Step Guide** (detailed instructions)  \n  2. **Things to Consider** (common pitfalls, important details)  \n  3. **Further Understanding**  \n     - Suggest 2 related topics the user might explore next, relevant to their query.\n- Never skip key details. Always check if more images can be added to improve clarity.\n\n---\n\n## **Core Principles**\n- Be proactive — never wait for the user to guide the entire conversation.  \n- Ask open-ended follow-ups.  \n- Anticipate related needs based on the query.  \n- Maintain accuracy, cite sources, and **use as many relevant images as possible**.  \n-  ALWAYS SHOW  IMAGE AND Keep images inline and contextually placed.\n",
          "returnIntermediateSteps": false
        }
      },
      "typeVersion": 1.7
    },
    {
      "id": "3e71d64a-3a04-4f5e-b009-8eddbef94e8c",
      "name": "付箋",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1008,
        416
      ],
      "parameters": {
        "color": 5,
        "width": 400,
        "height": 208,
        "content": "## WEB SEARCH using tavily (http node)\n [Tavily setup Guide](https://docs.tavily.com/welcome)"
      },
      "typeVersion": 1
    },
    {
      "id": "3c335f41-3d05-4436-ad0f-ce6957967a0c",
      "name": "付箋1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        320,
        928
      ],
      "parameters": {
        "color": 5,
        "width": 224,
        "height": 400,
        "content": "## GET ALL FILE DATA FROM SELECTED GOOGLE DRIVE FOLDER"
      },
      "typeVersion": 1
    },
    {
      "id": "a404df40-0f40-46b5-a65b-edc182d06d9b",
      "name": "付箋2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1936,
        1040
      ],
      "parameters": {
        "color": 5,
        "width": 192,
        "height": 256,
        "content": "## Remove  empty data fields  "
      },
      "typeVersion": 1
    },
    {
      "id": "4776e7ea-7243-474f-9ec0-47afc46d8479",
      "name": "付箋3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1024,
        976
      ],
      "parameters": {
        "color": 5,
        "width": 208,
        "height": 320,
        "content": "## GET  individual files from selected gdrive"
      },
      "typeVersion": 1
    },
    {
      "id": "3a593a87-2d91-48dc-85da-98a5bc562ff6",
      "name": "付箋4",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1280,
        1008
      ],
      "parameters": {
        "color": 5,
        "width": 608,
        "height": 272,
        "content": "## MISTRAL OCR\n [OCR Guide](https://mistral.ai/news/mistral-ocr)\n1. UPLOAD FILE\n2. GET SIGNED URL\n3. GET EXTRACT DATA AFTER USING MISTRAL OCR"
      },
      "typeVersion": 1
    },
    {
      "id": "65106e47-a303-4f46-be3b-207d6e6c04fa",
      "name": "付箋5",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        352,
        16
      ],
      "parameters": {
        "color": 5,
        "width": 368,
        "height": 288,
        "content": "## Hosted Chat interface  \n"
      },
      "typeVersion": 1
    },
    {
      "id": "78103cfe-e101-4d70-8dea-08b4ca2bbdb6",
      "name": "付箋6",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1472,
        96
      ],
      "parameters": {
        "color": 5,
        "width": 272,
        "height": 496,
        "content": "## QDRANT VCETOR AND OPEN API EMBEDDING \n [QDRANT Guide](https://qdrant.tech/documentation/)"
      },
      "typeVersion": 1
    },
    {
      "id": "716859d0-f3eb-431c-a6fc-37301508c968",
      "name": "付箋7",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        928,
        0
      ],
      "parameters": {
        "color": 5,
        "width": 432,
        "height": 304,
        "content": "## AI chat agent\n interact with user and process user input and provide appropriate response using different tools. "
      },
      "typeVersion": 1
    },
    {
      "id": "aa7c86ee-3229-415e-96eb-92fbd530aa44",
      "name": "付箋10",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        2672,
        1008
      ],
      "parameters": {
        "color": 5,
        "width": 320,
        "height": 288,
        "content": "## clean all extracted data and convert them to smaller chunks"
      },
      "typeVersion": 1
    },
    {
      "id": "ccd381e0-1cd2-406d-9aec-7b03c18ee435",
      "name": "付箋11",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        2176,
        928
      ],
      "parameters": {
        "color": 5,
        "width": 288,
        "height": 368,
        "content": "## assignment agent\n\nfor any given file this node assign which type documents it is ,which project its related too and who are working on it"
      },
      "typeVersion": 1
    },
    {
      "id": "87f7a493-d65c-447b-a465-3175c1bcbea7",
      "name": "付箋12",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        3088,
        976
      ],
      "parameters": {
        "color": 5,
        "width": 512,
        "height": 608,
        "content": "## load all chunks into qdrant vector database"
      },
      "typeVersion": 1
    },
    {
      "id": "66f3dfcf-9fbe-4d63-888e-dda7c422c282",
      "name": "付箋9",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        576,
        992
      ],
      "parameters": {
        "color": 5,
        "width": 256,
        "height": 320,
        "content": "## loop over google drive folder items"
      },
      "typeVersion": 1
    },
    {
      "id": "d21fe18b-7f9b-46b5-a8b5-2725c849db7f",
      "name": "Loop Over each file in gdrive folder",
      "type": "n8n-nodes-base.splitInBatches",
      "position": [
        608,
        1136
      ],
      "parameters": {
        "options": {}
      },
      "typeVersion": 3
    },
    {
      "id": "4300ad47-a92a-408b-aa8b-b56249d341e9",
      "name": "add metadata",
      "type": "n8n-nodes-base.set",
      "position": [
        896,
        1152
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "10646eae-ae46-4327-a4dc-9987c2d76173",
              "name": "file_id",
              "type": "string",
              "value": "={{ $json.id }}"
            },
            {
              "id": "f4536df5-d0b1-4392-bf17-b8137fb31a44",
              "name": "file_type",
              "type": "string",
              "value": "={{ $json.mimeType }}"
            },
            {
              "id": "77d782de-169d-4a46-8a8e-a3831c04d90f",
              "name": "file_title",
              "type": "string",
              "value": "={{ $json.name }}"
            },
            {
              "id": "9bde4d7f-e4f3-4ebd-9338-dce1350f9eab",
              "name": "file_url",
              "type": "string",
              "value": "={{ $json.webViewLink }}"
            },
            {
              "id": "fae402c8-c486-4b57-8d28-bf669db6b442",
              "name": "last_modified_date",
              "type": "string",
              "value": "={{ $json.modifiedTime }}"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "efc3f354-0a6d-46dc-8ecf-793873e19466",
      "name": "If NODE",
      "type": "n8n-nodes-base.if",
      "position": [
        1984,
        1152
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "loose"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "d15e917b-43d2-40b8-8b49-af467ff63961",
              "operator": {
                "type": "string",
                "operation": "notExists",
                "singleValue": true
              },
              "leftValue": "={{ $json.data[0].parseJson().skipped }}",
              "rightValue": ""
            }
          ]
        },
        "looseTypeValidation": true
      },
      "typeVersion": 2.2
    },
    {
      "id": "33e574c3-47a2-454b-85e9-e3e9983e3d63",
      "name": "based file name it assign differ metadata",
      "type": "@n8n/n8n-nodes-langchain.informationExtractor",
      "position": [
        2208,
        1152
      ],
      "parameters": {
        "text": "=\n{{ $json.pages[0].markdown }}",
        "options": {
          "systemPromptTemplate": "You are an expert extraction algorithm.\nOnly extract relevant information from the text.\ngive data is "
        },
        "attributes": {
          "attributes": [
            {
              "name": "document_type",
              "description": "type of document this can be blog, user documentation, technical documentation, manual/guide, educational"
            },
            {
              "name": "project",
              "description": " from give project this can any one ,social/digimarketing ,  chatbot, knowledge bot if nothing that its default"
            },
            {
              "name": "assigned_to",
              "description": "who are handling give projects mention all name given for a give project\n\nif project LINKED/DIGIMARRKETING/SOCIAL = [employee name]\nif project KB BOT = [employee name]\nif project CHAT BOT = [employee name]\nif project TIMESHEET = [employee name]"
            }
          ]
        }
      },
      "typeVersion": 1
    },
    {
      "id": "c4b2a5cb-b849-4d7e-9f24-7ef67ee4031a",
      "name": "set all metadata",
      "type": "n8n-nodes-base.set",
      "position": [
        2512,
        1152
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "5132d92c-41da-4a55-ad79-0c329ca7e626",
              "name": "Document name",
              "type": "string",
              "value": "={{ $('Google Drive1').item.json.file_title }}"
            },
            {
              "id": "c8160701-2be7-43c6-bcfa-295fbebe0e23",
              "name": "Document data",
              "type": "string",
              "value": "={{ $('If NODE').item.json.pages[0].markdown }}"
            },
            {
              "id": "1087ab34-5643-4755-b545-cf34d0ae2cd2",
              "name": " source",
              "type": "string",
              "value": "={{ $('Google Drive1').item.json.file_id }}"
            },
            {
              "id": "4317aa31-d9fd-4adc-ab26-b48c208041b3",
              "name": "ASSIGNEDTO",
              "type": "string",
              "value": "={{ $json.output.assigned_to.split(\",\") }}"
            },
            {
              "id": "7a61d775-06b1-42e2-a82a-d8e756fa5586",
              "name": "PROJECT",
              "type": "string",
              "value": "={{ $json.output.project }}"
            },
            {
              "id": "300f49d6-8851-458d-a296-8ceb68ebdd5f",
              "name": "DOCUMENT_TYPE",
              "type": "string",
              "value": "={{ $json.output.document_type }}"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "2a4026cf-3034-49a8-8dc8-ad67ae571213",
      "name": "clean output",
      "type": "n8n-nodes-base.code",
      "position": [
        2704,
        1152
      ],
      "parameters": {
        "jsCode": "// Get incoming data (first item)\nconst data = items[0].json;\n\n// If wrapped in an array, unwrap it\nconst input = Array.isArray(data) ? data[0] : data;\n\nlet output = [\n  {\n    content: input[\"Document data\"], // main text for embedding\n    metadata: {\n      document_name: input[\"Document name\"],\n      source_id: input[\" source\"], // note: there is a space before \"source\" in your key\n    }\n  }\n];\n\nreturn output.map(o => ({ json: o }));\n"
      },
      "typeVersion": 2
    },
    {
      "id": "f9172d1d-060c-46f7-8e48-b235a2610185",
      "name": "convert data into smaller chunks",
      "type": "n8n-nodes-base.code",
      "position": [
        2864,
        1152
      ],
      "parameters": {
        "jsCode": "const chunkSize = 1000; // characters\nconst chunkOverlap = 100; // characters\n\n\nlet newItems = [];\n\nfor (const item of items) {\n    let text = item.json.content; // <-- use item.json\n    if (!text || typeof text !== \"string\") continue;\n\n    const sourceId = item.json.metadata.source_id;\n\n   \n    // Chunking\n    for (let i = 0; i < updatedText.length; i += chunkSize - chunkOverlap) {\n        let chunk = updatedText.slice(i, i + chunkSize);\n        newItems.push({\n            json: {\n                content: chunk,\n                metadata: item.json.metadata\n            }\n        });\n    }\n}\n\nreturn newItems;\n"
      },
      "typeVersion": 2
    },
    {
      "id": "2a9539e7-e8dc-4c2a-a016-daab8c7d5339",
      "name": "付箋8",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        3696,
        736
      ],
      "parameters": {
        "color": 5,
        "width": 800,
        "height": 1248,
        "content": "### **Node List & Descriptions**\n\n1. **Manual Trigger**  \n   - **What:** Kicks off the workflow when the user clicks **“Test workflow.”**  \n   - **Why:** Allows for on-demand execution and testing of the data pipeline.  \n\n2. **Google Drive (List Files)**  \n   - **What:** Lists all files within a predefined Google Drive folder (`knowledgebaseforaibot`).  \n   - **Why:** To retrieve the initial list of all documents that need to be processed.  \n\n3. **Loop Over Files**  \n   - **What:** Iterates through each file retrieved from Google Drive, processing them one by one.  \n   - **Why:** Ensures each document is handled individually in the following steps.  \n\n4. **Google Drive (Download File)**  \n   - **What:** Downloads the binary content of the current file being processed in the loop.  \n   - **Why:** Provides the actual file data needed for OCR processing.  \n\n5. **Mistral OCR (via HTTP Requests)**  \n   - **What:**  \n     - Uploads the file to the Mistral API.  \n     - Retrieves a temporary signed URL for access.  \n     - Calls the Mistral OCR endpoint to extract all text content from the document.  \n   - **Why:** Converts scanned or image-based documents (like PDFs, JPGs) into machine-readable text.  \n\n6. **If Node (Check for Success)**  \n   - **What:** Verifies that the Mistral OCR operation completed successfully without errors.  \n   - **Why:** Ensures only valid, readable documents continue in the workflow, avoiding process failures.  \n\n7. **Information Extractor (LangChain)**  \n   - **What:** Uses the Mistral AI chat model to analyze the extracted text and assign metadata such as:  \n     - `document_type`  \n     - `project`  \n     - `assigned_to`  \n   - **Why:** Automatically categorizes and tags documents based on their content for better organization and searchability.  \n\n8. **Code Node (Clean & Chunk Data)**  \n   - **What:**  \n     - Formats the extracted text and metadata into a clean JSON structure.  \n     - Splits the document text into smaller, overlapping chunks (≈1000 characters each).  \n   - **Why:** Prepares data for embedding and ensures compatibility with language model context limits.  \n\n9. **OpenAI Embeddings (LangChain)**  \n   - **What:** Converts each text chunk into a numerical vector using OpenAI’s `text-embedding-3-small` model.  \n   - **Why:** Encodes the semantic meaning of the text for similarity-based search and retrieval.  \n\n10. **Qdrant Vector Store (LangChain)**  \n    - **What:** Inserts the text chunks and their corresponding embeddings into a Qdrant vector database collection named `docaiauto`.  \n    - **Why:** Creates a searchable, semantic knowledge base for AI-driven document retrieval.  \n\n---\n\n### **flow of data**\n\n**Trigger → List Files (Google Drive) → Loop → Download File → OCR (Mistral) → Validate → Categorize (AI) → Clean & Chunk → Embed (OpenAI) → Store (Qdrant)**\n"
      },
      "typeVersion": 1
    },
    {
      "id": "1dc0d13d-c75c-44dd-a94c-0b177cf6c094",
      "name": "付箋13",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1808,
        -320
      ],
      "parameters": {
        "color": 5,
        "width": 960,
        "height": 928,
        "content": "### **Node List & Descriptions**\n\n1. **Chat Trigger**  \n   - **What:** Kicks off the workflow whenever a new message is received in the public chat interface.  \n   - **Why:** To make the workflow interactive and responsive to user input in real-time.  \n\n2. **AI Chat Agent**  \n   - **What:** The central brain of the workflow. It orchestrates the entire process based on a detailed system prompt, managing memory and deciding which tools to use (internal knowledge base or web search).  \n   - **Why:** To act as an intelligent agent that can handle complex user queries by following a specific set of rules for information retrieval.  \n\n3. **OpenAI Chat Model**  \n   - **What:** The underlying Large Language Model (`gpt-4.1-mini`) that powers the agent's reasoning, understanding, and response generation.  \n   - **Why:** Provides the core intelligence for understanding user intent and formulating human-like, context-aware answers.  \n\n4. **Simple Memory**  \n   - **What:** Stores a history of the current conversation to provide context for follow-up questions.  \n   - **Why:** Allows the agent to remember what was previously discussed, leading to a more natural and coherent conversation.  \n\n5. **Qdrant Vector Store (RAG Tool)**  \n   - **What:**  \n     - Acts as the primary tool for the agent.  \n     - Searches the internal knowledge base (`docaiauto` collection) for information relevant to the user's query.  \n   - **Why:** Ensures that the agent first attempts to answer questions using verified, internal documentation before seeking external information.  \n\n6. **OpenAI Embeddings**  \n   - **What:** Works in conjunction with the Qdrant Vector Store to convert the user's text query into a numerical vector.  \n   - **Why:** Enables powerful semantic search, allowing the agent to find information based on meaning and context, not just keywords.  \n\n7. **Web Search (Tavily Tool)**  \n   - **What:** Acts as a secondary tool for the agent, available only upon user confirmation. It performs an external web search using the Tavily API.  \n   - **Why:** Provides a fallback to access up-to-date, public information when the internal knowledge base does not contain the required answer.  \n\n---\n\n### **Quick Reference: Flow Logic**\n\n**Chat Message → AI Agent → Use Tools (Qdrant or Web Search) → Generate Response (OpenAI) → Send Reply**\n"
      },
      "typeVersion": 1
    },
    {
      "id": "83726285-3bfd-4d22-ad35-39a5ff3ef414",
      "name": "付箋14",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1232,
        -176
      ],
      "parameters": {
        "color": 5,
        "width": 864,
        "height": 1856,
        "content": "# Konowledge RAG and AI chat agent  \n## by DIGITAL BIZ TECH\n\n## **Overview**\n\nThis workflow automates the process of transforming all files stored in Google Drive into a **searchable, intelligent knowledge base** — fully integrated with a **chat-enabled AI assistant**.  \nIt also supports **web search augmentation**, allowing the agent to explain, compare, or enrich document insights using the latest online data.\n\n---\n\n## **Key Objectives**\n\n1. **Ingest and Process All Files**\n   - Retrieve all files from a specific Google Drive folder.\n   - Automatically extract their text content using OCR and AI-based parsing.\n   - Generate structured metadata for each document (e.g., project, assignment, employee, and category).\n   - Convert the text into **vector embeddings** and store them in a **Qdrant vector database** for semantic retrieval.\n\n2. **Enrich Metadata Intelligence**\n   - Enhance each document with contextual metadata fields:\n     - `project` — identifies which project the document belongs to.\n     - `assignment` — links the file to a specific task or deliverable.\n     - `employee` — indicates the team member responsible or related to the document.\n   - Improves the **contextual accuracy** of RAG (Retrieval-Augmented Generation) queries by enabling more targeted retrieval.\n\n3. **Integrate with Chat-Based AI Agent**\n   - The agent uses a **ai agent with different tools** to:\n     - Query internal knowledge (Qdrant collection) for relevant chunks.\n     - Optionally trigger a **web search (Tavily API)** when internal data is insufficient.\n     - Combine results intelligently to deliver accurate and explainable answers.\n   - Users can interact naturally through a chat interface to explore, summarize, or cross-reference different documents.\n\n4. **Maintain a Robust RAG Pipeline for Unstructured Data**\n   - Designed to handle **unclean, inconsistent, or multi-format data sources** (PDFs, DOCX, images, etc.).\n   - The cleaning and chunking logic ensures uniform embeddings, even from noisy input.\n   - The RAG system automatically improves over time as new documents are added or updated in Google Drive.\n\n---\n\n## **Functional Flow**\n\n**Google Drive → File Extraction → Metadata Enrichment → Text Chunking → Embedding (OpenAI) → Storage (Qdrant) → AI Chat Agent → Web Search (Optional)**\n\n---\n\n## **Use Cases**\n\n- 📁 **Automated Knowledge Management:** Build a live, self-updating internal document knowledge base.  \n- 🧠 **Smart AI Assistance:** Enable employees to query project or task documents conversationally.  \n- 🌐 **Hybrid Knowledge Retrieval:** Combine private document insights with external web data.  \n- 🧩 **Flexible Integration:** Can be connected to any folder or department for scalable RAG deployment.\n\n---\n\n## **Benefits**\n\n- Fully automated ingestion from Google Drive.  \n- Rich, context-aware metadata for intelligent document relationships.  \n- Chat interface for easy access to organizational knowledge.  \n- Optional web search for real-time, external context expansion.  \n- Maintains a clean, high-quality **RAG (Retrieval-Augmented Generation)** pipeline even from unclean data sources.\n\n---\n\n### **Quick Reference: Flow Logic**\n\n**Google Drive → OCR/Text Extraction → Metadata Enrichment → Embeddings → Qdrant (RAG Store) → Chat AI Agent → Optional Web Search**"
      },
      "typeVersion": 1
    }
  ],
  "pinData": {},
  "connections": {
    "efc3f354-0a6d-46dc-8ecf-793873e19466": {
      "main": [
        [
          {
            "node": "33e574c3-47a2-454b-85e9-e3e9983e3d63",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "d21fe18b-7f9b-46b5-a8b5-2725c849db7f",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "c770feb0-c01a-4e94-832a-3dafc88fb28f": {
      "ai_tool": [
        [
          {
            "node": "85a90fcf-b75c-4ecc-90e2-611c8db61073",
            "type": "ai_tool",
            "index": 0
          }
        ]
      ]
    },
    "Google Drive": {
      "main": [
        [
          {
            "node": "d21fe18b-7f9b-46b5-a8b5-2725c849db7f",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "4300ad47-a92a-408b-aa8b-b56249d341e9": {
      "main": [
        [
          {
            "node": "Google Drive1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "2a4026cf-3034-49a8-8dc8-ad67ae571213": {
      "main": [
        [
          {
            "node": "f9172d1d-060c-46f7-8e48-b235a2610185",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Google Drive1": {
      "main": [
        [
          {
            "node": "3e38082e-4619-47cf-98e0-f7f66a8541c1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "3e38082e-4619-47cf-98e0-f7f66a8541c1": {
      "main": [
        [
          {
            "node": "255683ca-712a-4386-ae19-b3fbb5f37e30",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Simple Memory1": {
      "ai_memory": [
        [
          {
            "node": "85a90fcf-b75c-4ecc-90e2-611c8db61073",
            "type": "ai_memory",
            "index": 0
          }
        ]
      ]
    },
    "2e846e38-ae17-4717-a39b-b3d0e0c567f9": {
      "main": [
        [
          {
            "node": "efc3f354-0a6d-46dc-8ecf-793873e19466",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "c4b2a5cb-b849-4d7e-9f24-7ef67ee4031a": {
      "main": [
        [
          {
            "node": "2a4026cf-3034-49a8-8dc8-ad67ae571213",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Embeddings OpenAI": {
      "ai_embedding": [
        [
          {
            "node": "Qdrant Vector Store1",
            "type": "ai_embedding",
            "index": 0
          }
        ]
      ]
    },
    "Embeddings OpenAI1": {
      "ai_embedding": [
        [
          {
            "node": "Qdrant Vector Store",
            "type": "ai_embedding",
            "index": 0
          }
        ]
      ]
    },
    "255683ca-712a-4386-ae19-b3fbb5f37e30": {
      "main": [
        [
          {
            "node": "2e846e38-ae17-4717-a39b-b3d0e0c567f9",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "OpenAI Chat Model1": {
      "ai_languageModel": [
        [
          {
            "node": "85a90fcf-b75c-4ecc-90e2-611c8db61073",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "8f2950f4-0c96-4d70-9aa0-2729dd0b560e": {
      "ai_document": [
        [
          {
            "node": "Qdrant Vector Store1",
            "type": "ai_document",
            "index": 0
          }
        ]
      ]
    },
    "Qdrant Vector Store": {
      "ai_tool": [
        [
          {
            "node": "85a90fcf-b75c-4ecc-90e2-611c8db61073",
            "type": "ai_tool",
            "index": 0
          }
        ]
      ]
    },
    "Qdrant Vector Store1": {
      "main": [
        [
          {
            "node": "d21fe18b-7f9b-46b5-a8b5-2725c849db7f",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "b008bae1-bc63-47da-8afa-b8a1b17ea412": {
      "ai_textSplitter": [
        [
          {
            "node": "8f2950f4-0c96-4d70-9aa0-2729dd0b560e",
            "type": "ai_textSplitter",
            "index": 0
          }
        ]
      ]
    },
    "b4a1a6a6-6f2e-4ac6-8fba-f5752ac66259": {
      "ai_languageModel": [
        [
          {
            "node": "33e574c3-47a2-454b-85e9-e3e9983e3d63",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "When chat message received": {
      "main": [
        [
          {
            "node": "85a90fcf-b75c-4ecc-90e2-611c8db61073",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "f9172d1d-060c-46f7-8e48-b235a2610185": {
      "main": [
        [
          {
            "node": "Qdrant Vector Store1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "When clicking ‘Test workflow’": {
      "main": [
        [
          {
            "node": "Google Drive",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "d21fe18b-7f9b-46b5-a8b5-2725c849db7f": {
      "main": [
        [],
        [
          {
            "node": "4300ad47-a92a-408b-aa8b-b56249d341e9",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "33e574c3-47a2-454b-85e9-e3e9983e3d63": {
      "main": [
        [
          {
            "node": "c4b2a5cb-b849-4d7e-9f24-7ef67ee4031a",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}
よくある質問

このワークフローの使い方は?

上記のJSON設定コードをコピーし、n8nインスタンスで新しいワークフローを作成して「JSONからインポート」を選択、設定を貼り付けて認証情報を必要に応じて変更してください。

このワークフローはどんな場面に適していますか?

上級 - 内部Wiki, AI RAG検索拡張

有料ですか?

このワークフローは完全無料です。ただし、ワークフローで使用するサードパーティサービス(OpenAI APIなど)は別途料金が発生する場合があります。

ワークフロー情報
難易度
上級
ノード数40
カテゴリー2
ノードタイプ19
難易度説明

上級者向け、16ノード以上の複雑なワークフロー

外部リンク
n8n.ioで表示

このワークフローを共有

カテゴリー

カテゴリー: 34