使用 PDF 向量、OCR、GPT-4 和 Google Drive 的研究论文分析系统
中级
这是一个Document Extraction, AI RAG, Multimodal AI领域的自动化工作流,包含 11 个节点。主要使用 Code, OpenAi, Postgres, GoogleDrive, ManualTrigger 等节点。 使用 PDF 向量、OCR、GPT-4 和 Google Drive 的研究论文分析系统
前置要求
- •OpenAI API Key
- •PostgreSQL 数据库连接信息
- •Google Drive API 凭证
工作流预览
可视化展示节点连接关系,支持缩放和平移
导出工作流
复制以下 JSON 配置到 n8n 导入,即可使用此工作流
{
"meta": {
"instanceId": "placeholder"
},
"nodes": [
{
"id": "overview-note",
"name": "研究概览",
"type": "n8n-nodes-base.stickyNote",
"position": [
50,
50
],
"parameters": {
"color": 5,
"width": 350,
"height": 180,
"content": "## 📚 研究论文分析器"
},
"typeVersion": 1
},
{
"id": "search-note",
"name": "学术搜索",
"type": "n8n-nodes-base.stickyNote",
"position": [
450,
450
],
"parameters": {
"width": 260,
"height": 150,
"content": "## 🔍 论文搜索"
},
"typeVersion": 1
},
{
"id": "extract-note",
"name": "论文提取",
"type": "n8n-nodes-base.stickyNote",
"position": [
750,
450
],
"parameters": {
"width": 260,
"height": 180,
"content": "## 📄 提取"
},
"typeVersion": 1
},
{
"id": "summary-note",
"name": "AI 分析",
"type": "n8n-nodes-base.stickyNote",
"position": [
1050,
450
],
"parameters": {
"color": 6,
"width": 260,
"height": 180,
"content": "## 🤖 AI 摘要"
},
"typeVersion": 1
},
{
"id": "manual-trigger",
"name": "手动触发器",
"type": "n8n-nodes-base.manualTrigger",
"notes": "Start paper analysis",
"position": [
250,
300
],
"parameters": {},
"typeVersion": 1
},
{
"id": "google-drive",
"name": "Google Drive - 获取论文",
"type": "n8n-nodes-base.googleDrive",
"notes": "Retrieve paper from Drive",
"position": [
450,
300
],
"parameters": {
"fileId": "={{ $json.fileId }}",
"operation": "download"
},
"typeVersion": 3
},
{
"id": "pdfvector-parse",
"name": "PDF Vector - 解析论文",
"type": "n8n-nodes-pdfvector.pdfVector",
"notes": "Parse research paper",
"position": [
650,
300
],
"parameters": {
"useLLM": "always",
"resource": "document",
"inputType": "file",
"operation": "parse",
"binaryPropertyName": "data"
},
"typeVersion": 1
},
{
"id": "pdfvector-extract",
"name": "PDF Vector - 提取数据",
"type": "n8n-nodes-pdfvector.pdfVector",
"notes": "Extract structured data",
"position": [
850,
300
],
"parameters": {
"prompt": "Extract key information from this research document or image including title, authors with affiliations, abstract, keywords, research questions, methodology, key findings, conclusions, limitations, and future work suggestions. Use OCR if this is a scanned document or image.",
"schema": "{\"type\":\"object\",\"properties\":{\"title\":{\"type\":\"string\"},\"authors\":{\"type\":\"array\",\"items\":{\"type\":\"object\",\"properties\":{\"name\":{\"type\":\"string\"},\"affiliation\":{\"type\":\"string\"},\"email\":{\"type\":\"string\"}}}},\"abstract\":{\"type\":\"string\"},\"keywords\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}},\"researchQuestions\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}},\"methodology\":{\"type\":\"object\",\"properties\":{\"approach\":{\"type\":\"string\"},\"dataCollection\":{\"type\":\"string\"},\"analysis\":{\"type\":\"string\"},\"tools\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}}}},\"findings\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}},\"conclusions\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}},\"limitations\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}},\"futureWork\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}},\"references\":{\"type\":\"number\"}},\"required\":[\"title\",\"authors\"],\"additionalProperties\":false}",
"resource": "document",
"inputType": "file",
"operation": "extract",
"binaryPropertyName": "data"
},
"typeVersion": 1
},
{
"id": "openai-analyze",
"name": "生成 AI 摘要",
"type": "n8n-nodes-base.openAi",
"notes": "Create AI summary",
"position": [
1050,
300
],
"parameters": {
"model": "gpt-4",
"messages": {
"values": [
{
"content": "Based on this research paper, provide:\n\n1. A concise summary (150 words) suitable for a research database\n2. The main contribution to the field (2-3 sentences)\n3. Potential applications or impact\n4. Classification tags (e.g., empirical study, theoretical framework, review, etc.)\n\nPaper content:\n{{ $node['PDF Vector - Parse Paper'].json.content }}"
}
]
}
},
"typeVersion": 1
},
{
"id": "prepare-data",
"name": "准备数据库条目",
"type": "n8n-nodes-base.code",
"notes": "Combine all data",
"position": [
1250,
300
],
"parameters": {
"jsCode": "// Combine all analysis data\nconst parsedContent = $node['PDF Vector - Parse Paper'].json;\nconst extractedData = $node['PDF Vector - Extract Data'].json.data;\nconst aiSummary = $node['Generate AI Summary'].json.choices[0].message.content;\n\n// Calculate reading time (assuming 250 words per minute)\nconst wordCount = parsedContent.content.split(' ').length;\nconst readingTimeMinutes = Math.ceil(wordCount / 250);\n\n// Prepare database entry\nconst paperAnalysis = {\n // Basic information\n title: extractedData.title,\n authors: extractedData.authors,\n url: $node['Google Drive - Get Paper'].json.webViewLink,\n \n // Content\n abstract: extractedData.abstract,\n keywords: extractedData.keywords,\n fullText: parsedContent.content,\n \n // Analysis\n aiSummary: aiSummary,\n methodology: extractedData.methodology,\n findings: extractedData.findings,\n conclusions: extractedData.conclusions,\n limitations: extractedData.limitations,\n futureWork: extractedData.futureWork,\n \n // Metadata\n wordCount: wordCount,\n readingTimeMinutes: readingTimeMinutes,\n referenceCount: extractedData.references || 0,\n processedAt: new Date().toISOString(),\n \n // Searchable fields\n searchText: `${extractedData.title} ${extractedData.abstract} ${extractedData.keywords.join(' ')}`.toLowerCase()\n};\n\nreturn [{ json: paperAnalysis }];"
},
"typeVersion": 2
},
{
"id": "database-store",
"name": "存储到数据库",
"type": "n8n-nodes-base.postgres",
"notes": "Save to research database",
"position": [
1450,
300
],
"parameters": {
"table": "research_papers",
"columns": "title,authors,url,abstract,keywords,ai_summary,methodology,findings,processed_at,search_text",
"operation": "insert"
},
"typeVersion": 1
}
],
"connections": {
"Manual Trigger": {
"main": [
[
{
"node": "Google Drive - Get Paper",
"type": "main",
"index": 0
}
]
]
},
"Generate AI Summary": {
"main": [
[
{
"node": "Prepare Database Entry",
"type": "main",
"index": 0
}
]
]
},
"Prepare Database Entry": {
"main": [
[
{
"node": "Store in Database",
"type": "main",
"index": 0
}
]
]
},
"Google Drive - Get Paper": {
"main": [
[
{
"node": "PDF Vector - Parse Paper",
"type": "main",
"index": 0
}
]
]
},
"PDF Vector - Parse Paper": {
"main": [
[
{
"node": "PDF Vector - Extract Data",
"type": "main",
"index": 0
}
]
]
},
"PDF Vector - Extract Data": {
"main": [
[
{
"node": "Generate AI Summary",
"type": "main",
"index": 0
}
]
]
}
}
}常见问题
如何使用这个工作流?
复制上方的 JSON 配置代码,在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」,粘贴配置后根据需要修改凭证设置即可。
这个工作流适合什么场景?
中级 - 文档提取, AI RAG 检索增强, 多模态 AI
需要付费吗?
本工作流完全免费,您可以直接导入使用。但请注意,工作流中使用的第三方服务(如 OpenAI API)可能需要您自行付费。
相关工作流推荐
使用PDF Vector和HIPAA合规从医疗文档提取临床数据
使用PDF Vector和HIPAA合规从医疗文档提取临床数据
If
Code
Postgres
+4
9 节点PDF Vector
文档提取
使用GPT-4和多数据库搜索自动化学术文献综述
使用GPT-4和多数据库搜索自动化学术文献综述
If
Set
Code
+4
13 节点PDF Vector
文档提取
使用PDF向量、GPT-4和Neo4j构建学术知识图谱
使用PDF向量、GPT-4和Neo4j从研究论文构建学术知识图谱
Code
Neo4j
Open Ai
+4
10 节点PDF Vector
AI RAG 检索增强
使用GPT-4、PDFVector和PostgreSQL导出从文档提取数据
使用GPT-4、PDFVector和PostgreSQL导出从文档提取数据
Code
Open Ai
Switch
+5
9 节点PDF Vector
文档提取
使用PDF Vector进行OCR、分析和Google Drive的文档处理
使用PDF Vector进行OCR、分析和Google Drive的文档处理
Set
Code
Split Out
+6
13 节点PDF Vector
文档提取
使用GPT-4和多数据库搜索构建全面文献综述
使用GPT-4和多数据库搜索构建全面文献综述
Code
Open Ai
Pdf Vector
+2
8 节点PDF Vector
文档提取
工作流信息
难度等级
中级
节点数量11
分类3
节点类型7
作者
PDF Vector
@pdfvectorA fully featured PDF APIs for developers - Parse any PDF or Word document, extract structured data, and access millions of academic papers - all through simple APIs.
外部链接
在 n8n.io 查看 →
分享此工作流