8
n8n 中文网amn8n.com

PDF 转向量 RAG 系统:Mistral OCR、Weaviate 数据库和 MCP 服务器

中级

这是一个Document Extraction, Multimodal AI领域的自动化工作流,包含 13 个节点。主要使用 Set, MistralAi, FormTrigger, McpTrigger, RerankerCohere 等节点。 使用Mistral OCR和Weaviate数据库构建PDF搜索系统

前置要求
  • 无特殊前置要求,导入即可使用
工作流预览
可视化展示节点连接关系,支持缩放和平移
导出工作流
复制以下 JSON 配置到 n8n 导入,即可使用此工作流
{
  "id": "rV1w47cZn1rsk7MP",
  "meta": {
    "instanceId": "402614e91d3374d08756ba129f83d1678d5ada16d0573bfa1398e795ef5f7589",
    "templateCredsSetupCompleted": true
  },
  "name": "PDF 转向量 RAG 系统:Mistral OCR、Weaviate 数据库和 MCP 服务器",
  "tags": [],
  "nodes": [
    {
      "id": "d9e90589-d6b6-4601-bac8-5009b765fa78",
      "name": "Cohere 嵌入",
      "type": "@n8n/n8n-nodes-langchain.embeddingsCohere",
      "position": [
        160,
        336
      ],
      "parameters": {
        "modelName": "embed-multilingual-v3.0"
      },
      "typeVersion": 1
    },
    {
      "id": "5e7c6668-64a4-4cc2-b519-ab75f07ecab5",
      "name": "文档加载器",
      "type": "@n8n/n8n-nodes-langchain.documentDefaultDataLoader",
      "position": [
        -144,
        336
      ],
      "parameters": {
        "options": {},
        "textSplittingMode": "custom"
      },
      "typeVersion": 1.1
    },
    {
      "id": "b808993d-a6b9-497f-88b5-271c16abc185",
      "name": "Cohere 重排序器",
      "type": "@n8n/n8n-nodes-langchain.rerankerCohere",
      "position": [
        304,
        336
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "117948ee-4be7-4869-b35b-d0c58a66fcd5",
      "name": "MCP 知识服务器",
      "type": "@n8n/n8n-nodes-langchain.mcpTrigger",
      "position": [
        192,
        -80
      ],
      "webhookId": "4f3bebe7-f088-4131-b3ad-c4e300f74517",
      "parameters": {
        "path": "c74c97f5-0197-45e3-b4dd-f3efbd4bab22",
        "authentication": "headerAuth"
      },
      "typeVersion": 2
    },
    {
      "id": "700afe32-2bcc-4f31-a680-cdce710861e2",
      "name": "搜索知识库",
      "type": "@n8n/n8n-nodes-langchain.vectorStoreWeaviate",
      "position": [
        256,
        128
      ],
      "parameters": {
        "mode": "retrieve-as-tool",
        "options": {},
        "useReranker": true,
        "toolDescription": "Use this tool to search and retrieve information from the knowledge base containing various documents and resources",
        "weaviateCollection": {
          "__rl": true,
          "mode": "list",
          "value": "KnowledgeDocuments",
          "cachedResultName": "KnowledgeDocuments"
        },
        "includeDocumentMetadata": false
      },
      "typeVersion": 1.3
    },
    {
      "id": "4075cf1b-e9f6-44cc-b827-29fa5eb1ee97",
      "name": "上传说明",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -880,
        -16
      ],
      "parameters": {
        "color": 5,
        "width": 688,
        "height": 304,
        "content": "## 手动文档(PDF)上传部分"
      },
      "typeVersion": 1
    },
    {
      "id": "bf763fa9-03f4-4010-a6a9-63b6bb104fa1",
      "name": "文本分割器",
      "type": "@n8n/n8n-nodes-langchain.textSplitterRecursiveCharacterTextSplitter",
      "position": [
        -144,
        480
      ],
      "parameters": {
        "options": {
          "splitCode": "markdown"
        },
        "chunkSize": 600,
        "chunkOverlap": 200
      },
      "typeVersion": 1
    },
    {
      "id": "d1fe1c74-d9bc-4040-8446-47e90365c5f7",
      "name": "上传 PDF",
      "type": "n8n-nodes-base.formTrigger",
      "position": [
        -832,
        112
      ],
      "webhookId": "ff827d70-a596-4f81-a99e-63bf42189fd0",
      "parameters": {
        "options": {
          "ignoreBots": true,
          "buttonLabel": "Upload Document",
          "appendAttribution": true
        },
        "formTitle": "Upload Documents to Knowledge Base",
        "formFields": {
          "values": [
            {
              "fieldType": "file",
              "fieldLabel": "PDF File",
              "multipleFiles": false,
              "requiredField": true,
              "acceptFileTypes": ".pdf"
            }
          ]
        },
        "responseMode": "lastNode",
        "formDescription": "Upload PDF files to the knowledge base for AI-powered search and retrieval"
      },
      "typeVersion": 2.2
    },
    {
      "id": "1f04ecce-0277-4a88-9514-fec6b45ba1cf",
      "name": "从 PDF 提取文本",
      "type": "n8n-nodes-base.mistralAi",
      "position": [
        -608,
        112
      ],
      "parameters": {
        "options": {},
        "binaryProperty": "file"
      },
      "retryOnFail": true,
      "typeVersion": 1
    },
    {
      "id": "53c82051-ce9b-432e-b90e-d5eb83483e49",
      "name": "准备文档数据",
      "type": "n8n-nodes-base.set",
      "position": [
        -384,
        112
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "518ae17b-b486-4438-8151-f49afb3b68eb",
              "name": "filename",
              "type": "string",
              "value": "={{ $('Upload PDF').item.json.file.filename }}"
            },
            {
              "id": "a574ee4d-6341-4fd5-ac8e-9452eff70aa1",
              "name": "content",
              "type": "string",
              "value": "={{ $json.extractedText }}"
            },
            {
              "id": "metadata-source",
              "name": "source",
              "type": "string",
              "value": "uploaded_pdf"
            },
            {
              "id": "metadata-timestamp",
              "name": "upload_timestamp",
              "type": "string",
              "value": "={{ new Date().toISOString() }}"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "d465c50a-87e9-4824-83ca-d5662630590c",
      "name": "存储到向量数据库",
      "type": "@n8n/n8n-nodes-langchain.vectorStoreWeaviate",
      "position": [
        -112,
        112
      ],
      "parameters": {
        "mode": "insert",
        "options": {},
        "weaviateCollection": {
          "__rl": true,
          "mode": "list",
          "value": ""
        }
      },
      "typeVersion": 1.3
    },
    {
      "id": "5a5e4028-3764-4bb2-8900-df77c0c47bde",
      "name": "便签",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        112,
        304
      ],
      "parameters": {
        "color": 4,
        "width": 320,
        "height": 288,
        "content": "## 嵌入与重排序"
      },
      "typeVersion": 1
    },
    {
      "id": "f0ecfbb5-e9ce-49fd-a921-672b12b7ef13",
      "name": "便签1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        112,
        -240
      ],
      "parameters": {
        "width": 336,
        "height": 288,
        "content": "## MCP 服务器触发器"
      },
      "typeVersion": 1
    }
  ],
  "active": false,
  "pinData": {},
  "settings": {
    "executionOrder": "v1"
  },
  "versionId": "8b3d2e49-d84f-4fca-bdda-298f3da3c15b",
  "connections": {
    "Upload PDF": {
      "main": [
        [
          {
            "node": "Extract Text from PDF",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Text Splitter": {
      "ai_textSplitter": [
        [
          {
            "node": "Document Loader",
            "type": "ai_textSplitter",
            "index": 0
          }
        ]
      ]
    },
    "Cohere Reranker": {
      "ai_reranker": [
        [
          {
            "node": "Search Knowledge Base",
            "type": "ai_reranker",
            "index": 0
          }
        ]
      ]
    },
    "Document Loader": {
      "ai_document": [
        [
          {
            "node": "Store in Vector Database",
            "type": "ai_document",
            "index": 0
          }
        ]
      ]
    },
    "Cohere Embeddings": {
      "ai_embedding": [
        [
          {
            "node": "Search Knowledge Base",
            "type": "ai_embedding",
            "index": 0
          },
          {
            "node": "Store in Vector Database",
            "type": "ai_embedding",
            "index": 0
          }
        ]
      ]
    },
    "Extract Text from PDF": {
      "main": [
        [
          {
            "node": "Prepare Document Data",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Prepare Document Data": {
      "main": [
        [
          {
            "node": "Store in Vector Database",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Search Knowledge Base": {
      "ai_tool": [
        [
          {
            "node": "MCP Knowledge Server",
            "type": "ai_tool",
            "index": 0
          }
        ]
      ]
    }
  }
}
常见问题

如何使用这个工作流?

复制上方的 JSON 配置代码,在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」,粘贴配置后根据需要修改凭证设置即可。

这个工作流适合什么场景?

中级 - 文档提取, 多模态 AI

需要付费吗?

本工作流完全免费,您可以直接导入使用。但请注意,工作流中使用的第三方服务(如 OpenAI API)可能需要您自行付费。

工作流信息
难度等级
中级
节点数量13
分类2
节点类型10
难度说明

适合有一定经验的用户,包含 6-15 个节点的中等复杂度工作流

作者
Dietmar

Dietmar

@docd

Medical specialist in internal medicine, gastroenterology, and infectious diseases. Building innovative healthcare automation workflows with n8n, integrating AI, speech-to-text, and medical data standards for efficient clinical documentation and analysis.

外部链接
在 n8n.io 查看

分享此工作流