跨五个数据库的学术研究搜索,含PDF向量和多重导出
中级
这是一个AI RAG, Multimodal AI领域的自动化工作流,包含 9 个节点。主要使用 Set, Code, PdfVector, WriteBinaryFile 等节点。 跨五个数据库的学术研究搜索,含PDF向量和多重导出
前置要求
- •无特殊前置要求,导入即可使用
工作流预览
可视化展示节点连接关系,支持缩放和平移
导出工作流
复制以下 JSON 配置到 n8n 导入,即可使用此工作流
{
"meta": {
"instanceId": "placeholder"
},
"nodes": [
{
"id": "search-info",
"name": "搜索配置",
"type": "n8n-nodes-base.stickyNote",
"position": [
250,
150
],
"parameters": {
"content": "## 多数据库搜索"
},
"typeVersion": 1
},
{
"id": "search-params",
"name": "设置搜索参数",
"type": "n8n-nodes-base.set",
"position": [
450,
300
],
"parameters": {
"values": {
"number": [
{
"name": "yearFrom",
"value": 2020
},
{
"name": "resultsPerSource",
"value": 25
}
],
"string": [
{
"name": "searchQuery",
"value": "machine learning healthcare applications"
}
]
}
},
"typeVersion": 1
},
{
"id": "pdfvector-search",
"name": "PDF 向量 - 多数据库搜索",
"type": "n8n-nodes-pdfvector.pdfVector",
"position": [
650,
300
],
"parameters": {
"limit": "={{ $json.resultsPerSource }}",
"query": "={{ $json.searchQuery }}",
"fields": [
"title",
"authors",
"year",
"doi",
"abstract",
"totalCitations",
"pdfUrl",
"provider"
],
"resource": "academic",
"yearFrom": "={{ $json.yearFrom }}",
"operation": "search",
"providers": [
"pubmed",
"semantic_scholar",
"arxiv",
"google_scholar",
"eric"
]
},
"typeVersion": 1
},
{
"id": "deduplicate",
"name": "去重结果",
"type": "n8n-nodes-base.code",
"position": [
850,
300
],
"parameters": {
"functionCode": "// Deduplicate papers based on DOI and title similarity\nconst papers = $json;\nconst unique = new Map();\n\npapers.forEach(paper => {\n // First check DOI\n if (paper.doi && !unique.has(paper.doi)) {\n unique.set(paper.doi, paper);\n } else if (!paper.doi) {\n // For papers without DOI, check title similarity\n const normalizedTitle = paper.title.toLowerCase().replace(/[^a-z0-9]/g, '');\n let isDuplicate = false;\n \n for (const [key, existingPaper] of unique) {\n const existingTitle = existingPaper.title.toLowerCase().replace(/[^a-z0-9]/g, '');\n if (normalizedTitle === existingTitle) {\n isDuplicate = true;\n // Merge provider info\n if (!existingPaper.providers) existingPaper.providers = [existingPaper.provider];\n existingPaper.providers.push(paper.provider);\n break;\n }\n }\n \n if (!isDuplicate) {\n unique.set(normalizedTitle, paper);\n }\n }\n});\n\nreturn Array.from(unique.values());"
},
"typeVersion": 1
},
{
"id": "rank-results",
"name": "按相关性排序",
"type": "n8n-nodes-base.code",
"position": [
1050,
300
],
"parameters": {
"functionCode": "// Calculate relevance score\nconst papers = $json;\nconst query = $node['Set Search Parameters'].json.searchQuery.toLowerCase();\n\nconst scored = papers.map(paper => {\n let score = 0;\n \n // Title relevance\n const titleWords = paper.title.toLowerCase().split(' ');\n const queryWords = query.split(' ');\n queryWords.forEach(word => {\n if (titleWords.includes(word)) score += 10;\n });\n \n // Citation impact\n score += Math.log(paper.totalCitations + 1) * 5;\n \n // Recency bonus\n const yearDiff = new Date().getFullYear() - paper.year;\n score += Math.max(0, 10 - yearDiff);\n \n // Full text availability\n if (paper.pdfUrl) score += 15;\n \n return { ...paper, relevanceScore: score };\n});\n\n// Sort by relevance\nreturn scored.sort((a, b) => b.relevanceScore - a.relevanceScore);"
},
"typeVersion": 1
},
{
"id": "generate-bibtex",
"name": "生成 BibTeX",
"type": "n8n-nodes-base.code",
"position": [
1250,
250
],
"parameters": {
"functionCode": "// Generate BibTeX entries\nconst papers = $json;\n\nconst bibtex = papers.map((paper, index) => {\n const key = paper.doi ? paper.doi.replace(/[^a-zA-Z0-9]/g, '') : `paper${index}`;\n const authors = paper.authors.join(' and ');\n \n return `@article{${key},\n title={${paper.title}},\n author={${authors}},\n year={${paper.year}},\n doi={${paper.doi || ''}},\n abstract={${paper.abstract || ''}}\n}`;\n}).join('\\n\\n');\n\nreturn { bibtex, papers };"
},
"typeVersion": 1
},
{
"id": "export-bibtex",
"name": "导出 BibTeX 文件",
"type": "n8n-nodes-base.writeBinaryFile",
"position": [
1450,
250
],
"parameters": {
"fileName": "search_results_{{ $now.format('yyyy-MM-dd') }}.bib",
"fileContent": "={{ $json.bibtex }}"
},
"typeVersion": 1
},
{
"id": "export-json",
"name": "导出 JSON",
"type": "n8n-nodes-base.writeBinaryFile",
"position": [
1450,
350
],
"parameters": {
"fileName": "search_results_{{ $now.format('yyyy-MM-dd') }}.json",
"fileContent": "={{ JSON.stringify($json.papers, null, 2) }}"
},
"typeVersion": 1
},
{
"id": "export-csv",
"name": "导出 CSV",
"type": "n8n-nodes-base.writeBinaryFile",
"position": [
1450,
450
],
"parameters": {
"fileName": "search_results_{{ $now.format('yyyy-MM-dd') }}.csv",
"fileContent": "={{ $json.papers.map(p => [p.title, p.authors.join(';'), p.year, p.doi, p.totalCitations, p.pdfUrl].join(',\t')).join('\\n') }}"
},
"typeVersion": 1
}
],
"connections": {
"Generate BibTeX": {
"main": [
[
{
"node": "Export BibTeX File",
"type": "main",
"index": 0
},
{
"node": "Export JSON",
"type": "main",
"index": 0
},
{
"node": "Export CSV",
"type": "main",
"index": 0
}
]
]
},
"Rank by Relevance": {
"main": [
[
{
"node": "Generate BibTeX",
"type": "main",
"index": 0
}
]
]
},
"Deduplicate Results": {
"main": [
[
{
"node": "Rank by Relevance",
"type": "main",
"index": 0
}
]
]
},
"Set Search Parameters": {
"main": [
[
{
"node": "PDF Vector - Multi-DB Search",
"type": "main",
"index": 0
}
]
]
},
"PDF Vector - Multi-DB Search": {
"main": [
[
{
"node": "Deduplicate Results",
"type": "main",
"index": 0
}
]
]
}
}
}常见问题
如何使用这个工作流?
复制上方的 JSON 配置代码,在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」,粘贴配置后根据需要修改凭证设置即可。
这个工作流适合什么场景?
中级 - AI RAG 检索增强, 多模态 AI
需要付费吗?
本工作流完全免费,您可以直接导入使用。但请注意,工作流中使用的第三方服务(如 OpenAI API)可能需要您自行付费。
相关工作流推荐
使用GPT-4和多数据库搜索自动化学术文献综述
使用GPT-4和多数据库搜索自动化学术文献综述
If
Set
Code
+4
13 节点PDF Vector
文档提取
学术引用网络构建器
使用PDF向量API构建学术引用网络,用于Gephi可视化
Set
Code
Pdf Vector
+2
9 节点PDF Vector
文档提取
使用PDF向量、GPT-4和Neo4j构建学术知识图谱
使用PDF向量、GPT-4和Neo4j从研究论文构建学术知识图谱
Code
Neo4j
Open Ai
+4
10 节点PDF Vector
AI RAG 检索增强
使用PDF向量和Webhooks构建文档问答API
使用PDF向量和Webhooks构建文档问答API
If
Code
Webhook
+3
11 节点PDF Vector
内部知识库
使用 PDF 向量、OCR、GPT-4 和 Google Drive 的研究论文分析系统
使用 PDF 向量、OCR、GPT-4 和 Google Drive 的研究论文分析系统
Code
Open Ai
Postgres
+4
11 节点PDF Vector
文档提取
自动化学术论文监控,含PDF向量、GPT-3.5和Slack提醒
自动化学术论文监控,含PDF向量、GPT-3.5和Slack提醒
Set
Code
Slack
+5
10 节点PDF Vector
个人效率
工作流信息
难度等级
中级
节点数量9
分类2
节点类型5
作者
PDF Vector
@pdfvectorA fully featured PDF APIs for developers - Parse any PDF or Word document, extract structured data, and access millions of academic papers - all through simple APIs.
外部链接
在 n8n.io 查看 →
分享此工作流