8
n8n 中文网amn8n.com

使用PDF向量、GPT-4和Neo4j构建学术知识图谱

中级

这是一个AI RAG, Multimodal AI领域的自动化工作流,包含 10 个节点。主要使用 Code, Neo4j, OpenAi, Postgres, PdfVector 等节点。 使用PDF向量、GPT-4和Neo4j从研究论文构建学术知识图谱

前置要求
  • OpenAI API Key
  • PostgreSQL 数据库连接信息
工作流预览
可视化展示节点连接关系,支持缩放和平移
导出工作流
复制以下 JSON 配置到 n8n 导入,即可使用此工作流
{
  "meta": {
    "instanceId": "placeholder"
  },
  "nodes": [
    {
      "id": "kb-info",
      "name": "知识库信息",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        250,
        150
      ],
      "parameters": {
        "content": "## 知识库构建器"
      },
      "typeVersion": 1
    },
    {
      "id": "daily-update",
      "name": "每日知识库更新",
      "type": "n8n-nodes-base.scheduleTrigger",
      "position": [
        450,
        300
      ],
      "parameters": {
        "rule": {
          "interval": [
            {
              "field": "days",
              "daysInterval": 1
            }
          ]
        }
      },
      "typeVersion": 1
    },
    {
      "id": "fetch-papers",
      "name": "PDF 向量 - 获取论文",
      "type": "n8n-nodes-pdfvector.pdfVector",
      "position": [
        650,
        300
      ],
      "parameters": {
        "limit": 20,
        "query": "={{ $json.domain || 'artificial intelligence' }}",
        "fields": [
          "title",
          "authors",
          "abstract",
          "year",
          "doi",
          "pdfUrl",
          "totalCitations"
        ],
        "resource": "academic",
        "yearFrom": "={{ new Date().getFullYear() }}",
        "operation": "search",
        "providers": [
          "semantic_scholar",
          "arxiv"
        ]
      },
      "typeVersion": 1
    },
    {
      "id": "parse-papers",
      "name": "PDF 向量 - 解析论文",
      "type": "n8n-nodes-pdfvector.pdfVector",
      "position": [
        850,
        300
      ],
      "parameters": {
        "useLlm": "always",
        "resource": "document",
        "operation": "parse",
        "documentUrl": "={{ $json.pdfUrl }}"
      },
      "typeVersion": 1
    },
    {
      "id": "extract-entities",
      "name": "提取实体",
      "type": "n8n-nodes-base.openAi",
      "position": [
        1050,
        300
      ],
      "parameters": {
        "model": "gpt-4",
        "options": {
          "responseFormat": {
            "type": "json_object"
          }
        },
        "messages": {
          "values": [
            {
              "content": "Extract knowledge graph entities from this paper:\n\nTitle: {{ $json.title }}\nContent: {{ $json.content }}\n\nExtract:\n1. Key concepts (5-10 main ideas)\n2. Methods used\n3. Datasets mentioned\n4. Research questions\n5. Key findings\n6. Future directions\n\nAlso identify relationships between these entities.\n\nReturn as structured JSON with entities and relationships arrays."
            }
          ]
        }
      },
      "typeVersion": 1
    },
    {
      "id": "build-graph",
      "name": "构建图结构",
      "type": "n8n-nodes-base.code",
      "position": [
        1250,
        300
      ],
      "parameters": {
        "functionCode": "const extraction = JSON.parse($json.content);\nconst paper = $node['PDF Vector - Fetch Papers'].json;\n\n// Create nodes for Neo4j\nconst nodes = [];\n\n// Paper node\nnodes.push({\n  label: 'Paper',\n  properties: {\n    id: paper.doi || paper.title.replace(/[^a-zA-Z0-9]/g, ''),\n    title: paper.title,\n    year: paper.year,\n    authors: paper.authors.join('; '),\n    citations: paper.totalCitations\n  }\n});\n\n// Author nodes\npaper.authors.forEach(author => {\n  nodes.push({\n    label: 'Author',\n    properties: {\n      name: author\n    }\n  });\n});\n\n// Concept nodes\nextraction.concepts?.forEach(concept => {\n  nodes.push({\n    label: 'Concept',\n    properties: {\n      name: concept\n    }\n  });\n});\n\n// Method nodes\nextraction.methods?.forEach(method => {\n  nodes.push({\n    label: 'Method',\n    properties: {\n      name: method\n    }\n  });\n});\n\n// Create relationships\nconst relationships = [];\n\n// Paper-Author relationships\npaper.authors.forEach(author => {\n  relationships.push({\n    from: paper.doi || paper.title,\n    to: author,\n    type: 'AUTHORED_BY'\n  });\n});\n\n// Paper-Concept relationships\nextraction.concepts?.forEach(concept => {\n  relationships.push({\n    from: paper.doi || paper.title,\n    to: concept,\n    type: 'DISCUSSES'\n  });\n});\n\n// Paper-Method relationships\nextraction.methods?.forEach(method => {\n  relationships.push({\n    from: paper.doi || paper.title,\n    to: method,\n    type: 'USES'\n  });\n});\n\nreturn { nodes, relationships };"
      },
      "typeVersion": 1
    },
    {
      "id": "create-nodes",
      "name": "创建图节点",
      "type": "n8n-nodes-base.neo4j",
      "position": [
        1450,
        250
      ],
      "parameters": {
        "query": "=UNWIND $nodes AS node\nMERGE (n:Node {id: node.properties.id})\nSET n += node.properties\nSET n:${node.label}",
        "operation": "create",
        "parameters": "={{ { nodes: $json.nodes } }}"
      },
      "typeVersion": 1
    },
    {
      "id": "create-relationships",
      "name": "创建关系",
      "type": "n8n-nodes-base.neo4j",
      "position": [
        1450,
        350
      ],
      "parameters": {
        "query": "=UNWIND $relationships AS rel\nMATCH (a {id: rel.from})\nMATCH (b {id: rel.to})\nMERGE (a)-[r:${rel.type}]->(b)",
        "operation": "create",
        "parameters": "={{ { relationships: $json.relationships } }}"
      },
      "typeVersion": 1
    },
    {
      "id": "kb-stats",
      "name": "知识库统计",
      "type": "n8n-nodes-base.code",
      "position": [
        1650,
        300
      ],
      "parameters": {
        "functionCode": "// Generate knowledge base statistics\nconst stats = {\n  papersProcessed: $items().length,\n  conceptsExtracted: $json.nodes.filter(n => n.label === 'Concept').length,\n  authorsAdded: $json.nodes.filter(n => n.label === 'Author').length,\n  methodsIdentified: $json.nodes.filter(n => n.label === 'Method').length,\n  timestamp: new Date().toISOString()\n};\n\nreturn stats;"
      },
      "typeVersion": 1
    },
    {
      "id": "log-update",
      "name": "记录知识库更新",
      "type": "n8n-nodes-base.postgres",
      "position": [
        1850,
        300
      ],
      "parameters": {
        "table": "kb_updates",
        "columns": "papers_processed,concepts,authors,methods,updated_at",
        "operation": "insert"
      },
      "typeVersion": 1
    }
  ],
  "connections": {
    "KB Statistics": {
      "main": [
        [
          {
            "node": "Log KB Update",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Daily KB Update": {
      "main": [
        [
          {
            "node": "PDF Vector - Fetch Papers",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Extract Entities": {
      "main": [
        [
          {
            "node": "Build Graph Structure",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Create Graph Nodes": {
      "main": [
        [
          {
            "node": "KB Statistics",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Create Relationships": {
      "main": [
        [
          {
            "node": "KB Statistics",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Build Graph Structure": {
      "main": [
        [
          {
            "node": "Create Graph Nodes",
            "type": "main",
            "index": 0
          },
          {
            "node": "Create Relationships",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "PDF Vector - Fetch Papers": {
      "main": [
        [
          {
            "node": "PDF Vector - Parse Papers",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "PDF Vector - Parse Papers": {
      "main": [
        [
          {
            "node": "Extract Entities",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}
常见问题

如何使用这个工作流?

复制上方的 JSON 配置代码,在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」,粘贴配置后根据需要修改凭证设置即可。

这个工作流适合什么场景?

中级 - AI RAG 检索增强, 多模态 AI

需要付费吗?

本工作流完全免费,您可以直接导入使用。但请注意,工作流中使用的第三方服务(如 OpenAI API)可能需要您自行付费。

工作流信息
难度等级
中级
节点数量10
分类2
节点类型7
难度说明

适合有一定经验的用户,包含 6-15 个节点的中等复杂度工作流

作者
PDF Vector

PDF Vector

@pdfvector

A fully featured PDF APIs for developers - Parse any PDF or Word document, extract structured data, and access millions of academic papers - all through simple APIs.

外部链接
在 n8n.io 查看

分享此工作流