8
n8n 中文网amn8n.com

跨五个数据库的学术研究搜索,含PDF向量和多重导出

中级

这是一个AI RAG, Multimodal AI领域的自动化工作流,包含 9 个节点。主要使用 Set, Code, PdfVector, WriteBinaryFile 等节点。 跨五个数据库的学术研究搜索,含PDF向量和多重导出

前置要求
  • 无特殊前置要求,导入即可使用
工作流预览
可视化展示节点连接关系,支持缩放和平移
导出工作流
复制以下 JSON 配置到 n8n 导入,即可使用此工作流
{
  "meta": {
    "instanceId": "placeholder"
  },
  "nodes": [
    {
      "id": "search-info",
      "name": "搜索配置",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        250,
        150
      ],
      "parameters": {
        "content": "## 多数据库搜索"
      },
      "typeVersion": 1
    },
    {
      "id": "search-params",
      "name": "设置搜索参数",
      "type": "n8n-nodes-base.set",
      "position": [
        450,
        300
      ],
      "parameters": {
        "values": {
          "number": [
            {
              "name": "yearFrom",
              "value": 2020
            },
            {
              "name": "resultsPerSource",
              "value": 25
            }
          ],
          "string": [
            {
              "name": "searchQuery",
              "value": "machine learning healthcare applications"
            }
          ]
        }
      },
      "typeVersion": 1
    },
    {
      "id": "pdfvector-search",
      "name": "PDF 向量 - 多数据库搜索",
      "type": "n8n-nodes-pdfvector.pdfVector",
      "position": [
        650,
        300
      ],
      "parameters": {
        "limit": "={{ $json.resultsPerSource }}",
        "query": "={{ $json.searchQuery }}",
        "fields": [
          "title",
          "authors",
          "year",
          "doi",
          "abstract",
          "totalCitations",
          "pdfUrl",
          "provider"
        ],
        "resource": "academic",
        "yearFrom": "={{ $json.yearFrom }}",
        "operation": "search",
        "providers": [
          "pubmed",
          "semantic_scholar",
          "arxiv",
          "google_scholar",
          "eric"
        ]
      },
      "typeVersion": 1
    },
    {
      "id": "deduplicate",
      "name": "去重结果",
      "type": "n8n-nodes-base.code",
      "position": [
        850,
        300
      ],
      "parameters": {
        "functionCode": "// Deduplicate papers based on DOI and title similarity\nconst papers = $json;\nconst unique = new Map();\n\npapers.forEach(paper => {\n  // First check DOI\n  if (paper.doi && !unique.has(paper.doi)) {\n    unique.set(paper.doi, paper);\n  } else if (!paper.doi) {\n    // For papers without DOI, check title similarity\n    const normalizedTitle = paper.title.toLowerCase().replace(/[^a-z0-9]/g, '');\n    let isDuplicate = false;\n    \n    for (const [key, existingPaper] of unique) {\n      const existingTitle = existingPaper.title.toLowerCase().replace(/[^a-z0-9]/g, '');\n      if (normalizedTitle === existingTitle) {\n        isDuplicate = true;\n        // Merge provider info\n        if (!existingPaper.providers) existingPaper.providers = [existingPaper.provider];\n        existingPaper.providers.push(paper.provider);\n        break;\n      }\n    }\n    \n    if (!isDuplicate) {\n      unique.set(normalizedTitle, paper);\n    }\n  }\n});\n\nreturn Array.from(unique.values());"
      },
      "typeVersion": 1
    },
    {
      "id": "rank-results",
      "name": "按相关性排序",
      "type": "n8n-nodes-base.code",
      "position": [
        1050,
        300
      ],
      "parameters": {
        "functionCode": "// Calculate relevance score\nconst papers = $json;\nconst query = $node['Set Search Parameters'].json.searchQuery.toLowerCase();\n\nconst scored = papers.map(paper => {\n  let score = 0;\n  \n  // Title relevance\n  const titleWords = paper.title.toLowerCase().split(' ');\n  const queryWords = query.split(' ');\n  queryWords.forEach(word => {\n    if (titleWords.includes(word)) score += 10;\n  });\n  \n  // Citation impact\n  score += Math.log(paper.totalCitations + 1) * 5;\n  \n  // Recency bonus\n  const yearDiff = new Date().getFullYear() - paper.year;\n  score += Math.max(0, 10 - yearDiff);\n  \n  // Full text availability\n  if (paper.pdfUrl) score += 15;\n  \n  return { ...paper, relevanceScore: score };\n});\n\n// Sort by relevance\nreturn scored.sort((a, b) => b.relevanceScore - a.relevanceScore);"
      },
      "typeVersion": 1
    },
    {
      "id": "generate-bibtex",
      "name": "生成 BibTeX",
      "type": "n8n-nodes-base.code",
      "position": [
        1250,
        250
      ],
      "parameters": {
        "functionCode": "// Generate BibTeX entries\nconst papers = $json;\n\nconst bibtex = papers.map((paper, index) => {\n  const key = paper.doi ? paper.doi.replace(/[^a-zA-Z0-9]/g, '') : `paper${index}`;\n  const authors = paper.authors.join(' and ');\n  \n  return `@article{${key},\n  title={${paper.title}},\n  author={${authors}},\n  year={${paper.year}},\n  doi={${paper.doi || ''}},\n  abstract={${paper.abstract || ''}}\n}`;\n}).join('\\n\\n');\n\nreturn { bibtex, papers };"
      },
      "typeVersion": 1
    },
    {
      "id": "export-bibtex",
      "name": "导出 BibTeX 文件",
      "type": "n8n-nodes-base.writeBinaryFile",
      "position": [
        1450,
        250
      ],
      "parameters": {
        "fileName": "search_results_{{ $now.format('yyyy-MM-dd') }}.bib",
        "fileContent": "={{ $json.bibtex }}"
      },
      "typeVersion": 1
    },
    {
      "id": "export-json",
      "name": "导出 JSON",
      "type": "n8n-nodes-base.writeBinaryFile",
      "position": [
        1450,
        350
      ],
      "parameters": {
        "fileName": "search_results_{{ $now.format('yyyy-MM-dd') }}.json",
        "fileContent": "={{ JSON.stringify($json.papers, null, 2) }}"
      },
      "typeVersion": 1
    },
    {
      "id": "export-csv",
      "name": "导出 CSV",
      "type": "n8n-nodes-base.writeBinaryFile",
      "position": [
        1450,
        450
      ],
      "parameters": {
        "fileName": "search_results_{{ $now.format('yyyy-MM-dd') }}.csv",
        "fileContent": "={{ $json.papers.map(p => [p.title, p.authors.join(';'), p.year, p.doi, p.totalCitations, p.pdfUrl].join(',\t')).join('\\n') }}"
      },
      "typeVersion": 1
    }
  ],
  "connections": {
    "Generate BibTeX": {
      "main": [
        [
          {
            "node": "Export BibTeX File",
            "type": "main",
            "index": 0
          },
          {
            "node": "Export JSON",
            "type": "main",
            "index": 0
          },
          {
            "node": "Export CSV",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Rank by Relevance": {
      "main": [
        [
          {
            "node": "Generate BibTeX",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Deduplicate Results": {
      "main": [
        [
          {
            "node": "Rank by Relevance",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Set Search Parameters": {
      "main": [
        [
          {
            "node": "PDF Vector - Multi-DB Search",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "PDF Vector - Multi-DB Search": {
      "main": [
        [
          {
            "node": "Deduplicate Results",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}
常见问题

如何使用这个工作流?

复制上方的 JSON 配置代码,在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」,粘贴配置后根据需要修改凭证设置即可。

这个工作流适合什么场景?

中级 - AI RAG 检索增强, 多模态 AI

需要付费吗?

本工作流完全免费,您可以直接导入使用。但请注意,工作流中使用的第三方服务(如 OpenAI API)可能需要您自行付费。

工作流信息
难度等级
中级
节点数量9
分类2
节点类型5
难度说明

适合有一定经验的用户,包含 6-15 个节点的中等复杂度工作流

作者
PDF Vector

PDF Vector

@pdfvector

A fully featured PDF APIs for developers - Parse any PDF or Word document, extract structured data, and access millions of academic papers - all through simple APIs.

外部链接
在 n8n.io 查看

分享此工作流