使用PDF向量、GPT-4和Neo4j构建学术知识图谱
中级
这是一个AI RAG, Multimodal AI领域的自动化工作流,包含 10 个节点。主要使用 Code, Neo4j, OpenAi, Postgres, PdfVector 等节点。 使用PDF向量、GPT-4和Neo4j从研究论文构建学术知识图谱
前置要求
- •OpenAI API Key
- •PostgreSQL 数据库连接信息
工作流预览
可视化展示节点连接关系,支持缩放和平移
导出工作流
复制以下 JSON 配置到 n8n 导入,即可使用此工作流
{
"meta": {
"instanceId": "placeholder"
},
"nodes": [
{
"id": "kb-info",
"name": "知识库信息",
"type": "n8n-nodes-base.stickyNote",
"position": [
250,
150
],
"parameters": {
"content": "## 知识库构建器"
},
"typeVersion": 1
},
{
"id": "daily-update",
"name": "每日知识库更新",
"type": "n8n-nodes-base.scheduleTrigger",
"position": [
450,
300
],
"parameters": {
"rule": {
"interval": [
{
"field": "days",
"daysInterval": 1
}
]
}
},
"typeVersion": 1
},
{
"id": "fetch-papers",
"name": "PDF 向量 - 获取论文",
"type": "n8n-nodes-pdfvector.pdfVector",
"position": [
650,
300
],
"parameters": {
"limit": 20,
"query": "={{ $json.domain || 'artificial intelligence' }}",
"fields": [
"title",
"authors",
"abstract",
"year",
"doi",
"pdfUrl",
"totalCitations"
],
"resource": "academic",
"yearFrom": "={{ new Date().getFullYear() }}",
"operation": "search",
"providers": [
"semantic_scholar",
"arxiv"
]
},
"typeVersion": 1
},
{
"id": "parse-papers",
"name": "PDF 向量 - 解析论文",
"type": "n8n-nodes-pdfvector.pdfVector",
"position": [
850,
300
],
"parameters": {
"useLlm": "always",
"resource": "document",
"operation": "parse",
"documentUrl": "={{ $json.pdfUrl }}"
},
"typeVersion": 1
},
{
"id": "extract-entities",
"name": "提取实体",
"type": "n8n-nodes-base.openAi",
"position": [
1050,
300
],
"parameters": {
"model": "gpt-4",
"options": {
"responseFormat": {
"type": "json_object"
}
},
"messages": {
"values": [
{
"content": "Extract knowledge graph entities from this paper:\n\nTitle: {{ $json.title }}\nContent: {{ $json.content }}\n\nExtract:\n1. Key concepts (5-10 main ideas)\n2. Methods used\n3. Datasets mentioned\n4. Research questions\n5. Key findings\n6. Future directions\n\nAlso identify relationships between these entities.\n\nReturn as structured JSON with entities and relationships arrays."
}
]
}
},
"typeVersion": 1
},
{
"id": "build-graph",
"name": "构建图结构",
"type": "n8n-nodes-base.code",
"position": [
1250,
300
],
"parameters": {
"functionCode": "const extraction = JSON.parse($json.content);\nconst paper = $node['PDF Vector - Fetch Papers'].json;\n\n// Create nodes for Neo4j\nconst nodes = [];\n\n// Paper node\nnodes.push({\n label: 'Paper',\n properties: {\n id: paper.doi || paper.title.replace(/[^a-zA-Z0-9]/g, ''),\n title: paper.title,\n year: paper.year,\n authors: paper.authors.join('; '),\n citations: paper.totalCitations\n }\n});\n\n// Author nodes\npaper.authors.forEach(author => {\n nodes.push({\n label: 'Author',\n properties: {\n name: author\n }\n });\n});\n\n// Concept nodes\nextraction.concepts?.forEach(concept => {\n nodes.push({\n label: 'Concept',\n properties: {\n name: concept\n }\n });\n});\n\n// Method nodes\nextraction.methods?.forEach(method => {\n nodes.push({\n label: 'Method',\n properties: {\n name: method\n }\n });\n});\n\n// Create relationships\nconst relationships = [];\n\n// Paper-Author relationships\npaper.authors.forEach(author => {\n relationships.push({\n from: paper.doi || paper.title,\n to: author,\n type: 'AUTHORED_BY'\n });\n});\n\n// Paper-Concept relationships\nextraction.concepts?.forEach(concept => {\n relationships.push({\n from: paper.doi || paper.title,\n to: concept,\n type: 'DISCUSSES'\n });\n});\n\n// Paper-Method relationships\nextraction.methods?.forEach(method => {\n relationships.push({\n from: paper.doi || paper.title,\n to: method,\n type: 'USES'\n });\n});\n\nreturn { nodes, relationships };"
},
"typeVersion": 1
},
{
"id": "create-nodes",
"name": "创建图节点",
"type": "n8n-nodes-base.neo4j",
"position": [
1450,
250
],
"parameters": {
"query": "=UNWIND $nodes AS node\nMERGE (n:Node {id: node.properties.id})\nSET n += node.properties\nSET n:${node.label}",
"operation": "create",
"parameters": "={{ { nodes: $json.nodes } }}"
},
"typeVersion": 1
},
{
"id": "create-relationships",
"name": "创建关系",
"type": "n8n-nodes-base.neo4j",
"position": [
1450,
350
],
"parameters": {
"query": "=UNWIND $relationships AS rel\nMATCH (a {id: rel.from})\nMATCH (b {id: rel.to})\nMERGE (a)-[r:${rel.type}]->(b)",
"operation": "create",
"parameters": "={{ { relationships: $json.relationships } }}"
},
"typeVersion": 1
},
{
"id": "kb-stats",
"name": "知识库统计",
"type": "n8n-nodes-base.code",
"position": [
1650,
300
],
"parameters": {
"functionCode": "// Generate knowledge base statistics\nconst stats = {\n papersProcessed: $items().length,\n conceptsExtracted: $json.nodes.filter(n => n.label === 'Concept').length,\n authorsAdded: $json.nodes.filter(n => n.label === 'Author').length,\n methodsIdentified: $json.nodes.filter(n => n.label === 'Method').length,\n timestamp: new Date().toISOString()\n};\n\nreturn stats;"
},
"typeVersion": 1
},
{
"id": "log-update",
"name": "记录知识库更新",
"type": "n8n-nodes-base.postgres",
"position": [
1850,
300
],
"parameters": {
"table": "kb_updates",
"columns": "papers_processed,concepts,authors,methods,updated_at",
"operation": "insert"
},
"typeVersion": 1
}
],
"connections": {
"KB Statistics": {
"main": [
[
{
"node": "Log KB Update",
"type": "main",
"index": 0
}
]
]
},
"Daily KB Update": {
"main": [
[
{
"node": "PDF Vector - Fetch Papers",
"type": "main",
"index": 0
}
]
]
},
"Extract Entities": {
"main": [
[
{
"node": "Build Graph Structure",
"type": "main",
"index": 0
}
]
]
},
"Create Graph Nodes": {
"main": [
[
{
"node": "KB Statistics",
"type": "main",
"index": 0
}
]
]
},
"Create Relationships": {
"main": [
[
{
"node": "KB Statistics",
"type": "main",
"index": 0
}
]
]
},
"Build Graph Structure": {
"main": [
[
{
"node": "Create Graph Nodes",
"type": "main",
"index": 0
},
{
"node": "Create Relationships",
"type": "main",
"index": 0
}
]
]
},
"PDF Vector - Fetch Papers": {
"main": [
[
{
"node": "PDF Vector - Parse Papers",
"type": "main",
"index": 0
}
]
]
},
"PDF Vector - Parse Papers": {
"main": [
[
{
"node": "Extract Entities",
"type": "main",
"index": 0
}
]
]
}
}
}常见问题
如何使用这个工作流?
复制上方的 JSON 配置代码,在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」,粘贴配置后根据需要修改凭证设置即可。
这个工作流适合什么场景?
中级 - AI RAG 检索增强, 多模态 AI
需要付费吗?
本工作流完全免费,您可以直接导入使用。但请注意,工作流中使用的第三方服务(如 OpenAI API)可能需要您自行付费。
相关工作流推荐
使用 PDF 向量、OCR、GPT-4 和 Google Drive 的研究论文分析系统
使用 PDF 向量、OCR、GPT-4 和 Google Drive 的研究论文分析系统
Code
Open Ai
Postgres
+4
11 节点PDF Vector
文档提取
PDF报告监控器 - 含GPT-3.5洞察与Slack/邮件告警
PDF报告监控器 - 含GPT-3.5洞察与Slack/邮件告警
If
Ftp
Code
+7
10 节点PDF Vector
AI 摘要总结
使用GPT-4和多数据库搜索自动化学术文献综述
使用GPT-4和多数据库搜索自动化学术文献综述
If
Set
Code
+4
13 节点PDF Vector
文档提取
自动化学术论文监控,含PDF向量、GPT-3.5和Slack提醒
自动化学术论文监控,含PDF向量、GPT-3.5和Slack提醒
Set
Code
Slack
+5
10 节点PDF Vector
个人效率
使用PDF向量、Google Drive和数据库提取和存储发票数据
使用PDF向量、Google Drive和数据库提取和存储发票数据
If
Code
Slack
+7
26 节点PDF Vector
发票处理
使用GPT-4、PDFVector和PostgreSQL导出从文档提取数据
使用GPT-4、PDFVector和PostgreSQL导出从文档提取数据
Code
Open Ai
Switch
+5
9 节点PDF Vector
文档提取
工作流信息
难度等级
中级
节点数量10
分类2
节点类型7
作者
PDF Vector
@pdfvectorA fully featured PDF APIs for developers - Parse any PDF or Word document, extract structured data, and access millions of academic papers - all through simple APIs.
外部链接
在 n8n.io 查看 →
分享此工作流