8
n8n 中文网amn8n.com

使用GPT-4和多数据库搜索自动化学术文献综述

中级

这是一个Document Extraction, AI RAG, Multimodal AI领域的自动化工作流,包含 13 个节点。主要使用 If, Set, Code, OpenAi, SplitInBatches 等节点。 使用GPT-4和多数据库搜索自动化学术文献综述

前置要求
  • OpenAI API Key
工作流预览
可视化展示节点连接关系,支持缩放和平移
导出工作流
复制以下 JSON 配置到 n8n 导入,即可使用此工作流
{
  "meta": {
    "instanceId": "placeholder"
  },
  "nodes": [
    {
      "id": "overview-note",
      "name": "审查概览",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        50,
        50
      ],
      "parameters": {
        "color": 5,
        "width": 350,
        "height": 180,
        "content": "## 📖 文献综述生成器"
      },
      "typeVersion": 1
    },
    {
      "id": "search-note",
      "name": "数据库搜索",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        450,
        450
      ],
      "parameters": {
        "width": 260,
        "height": 160,
        "content": "## 🔍 搜索策略"
      },
      "typeVersion": 1
    },
    {
      "id": "quality-note",
      "name": "研究质量",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        850,
        450
      ],
      "parameters": {
        "width": 260,
        "height": 150,
        "content": "## 📊 质量评估"
      },
      "typeVersion": 1
    },
    {
      "id": "output-note",
      "name": "最终综述",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1150,
        450
      ],
      "parameters": {
        "color": 6,
        "width": 260,
        "height": 180,
        "content": "## 📝 综述输出"
      },
      "typeVersion": 1
    },
    {
      "id": "set-parameters",
      "name": "设置搜索参数",
      "type": "n8n-nodes-base.set",
      "notes": "Configure literature review parameters",
      "position": [
        250,
        300
      ],
      "parameters": {
        "values": {
          "number": [
            {
              "name": "maxPapers",
              "value": 20
            }
          ],
          "string": [
            {
              "name": "topic",
              "value": "machine learning in healthcare"
            },
            {
              "name": "yearFrom",
              "value": "2020"
            },
            {
              "name": "yearTo",
              "value": "2024"
            }
          ]
        }
      },
      "typeVersion": 1
    },
    {
      "id": "pdfvector-search",
      "name": "PDF 向量 - 搜索论文",
      "type": "n8n-nodes-pdfvector.pdfVector",
      "notes": "Search academic databases",
      "position": [
        450,
        300
      ],
      "parameters": {
        "limit": 50,
        "query": "={{ $json.topic }}",
        "yearTo": "={{ $json.yearTo }}",
        "resource": "academic",
        "yearFrom": "={{ $json.yearFrom }}",
        "operation": "search",
        "providers": [
          "pubmed",
          "semantic-scholar",
          "arxiv"
        ],
        "additionalFields": {
          "fields": [
            "title",
            "abstract",
            "authors",
            "year",
            "doi",
            "pdfURL",
            "totalCitations"
          ]
        }
      },
      "typeVersion": 1
    },
    {
      "id": "rank-papers",
      "name": "排名和选择论文",
      "type": "n8n-nodes-base.code",
      "notes": "Rank papers by relevance",
      "position": [
        650,
        300
      ],
      "parameters": {
        "jsCode": "// Rank papers by relevance and citations\nconst papers = $input.all().map(item => item.json);\nconst searchTopic = $node['Set Search Parameters'].json.topic;\n\n// Calculate relevance scores\nconst scoredPapers = papers.map(paper => {\n  let score = 0;\n  \n  // Citation score (normalized)\n  const maxCitations = Math.max(...papers.map(p => p.totalCitations || 0));\n  const citationScore = (paper.totalCitations || 0) / (maxCitations || 1) * 40;\n  score += citationScore;\n  \n  // Recency score\n  const paperYear = parseInt(paper.year);\n  const currentYear = new Date().getFullYear();\n  const recencyScore = Math.max(0, 20 - (currentYear - paperYear) * 2);\n  score += recencyScore;\n  \n  // Title relevance\n  const topicWords = searchTopic.toLowerCase().split(' ');\n  const titleWords = paper.title.toLowerCase();\n  const titleMatches = topicWords.filter(word => titleWords.includes(word)).length;\n  score += titleMatches * 10;\n  \n  // Abstract relevance\n  if (paper.abstract) {\n    const abstractWords = paper.abstract.toLowerCase();\n    const abstractMatches = topicWords.filter(word => abstractWords.includes(word)).length;\n    score += abstractMatches * 5;\n  }\n  \n  return {\n    ...paper,\n    relevanceScore: Math.round(score),\n    rankingDetails: {\n      citationScore: Math.round(citationScore),\n      recencyScore,\n      titleRelevance: titleMatches,\n      abstractRelevance: abstractMatches || 0\n    }\n  };\n});\n\n// Sort by score and limit to top N\nconst maxPapers = $node['Set Search Parameters'].json.maxPapers;\nconst topPapers = scoredPapers\n  .sort((a, b) => b.relevanceScore - a.relevanceScore)\n  .slice(0, maxPapers);\n\nreturn topPapers.map(paper => ({ json: paper }));"
      },
      "typeVersion": 2
    },
    {
      "id": "split-batch",
      "name": "逐个处理",
      "type": "n8n-nodes-base.splitInBatches",
      "notes": "Process papers individually",
      "position": [
        850,
        300
      ],
      "parameters": {
        "options": {},
        "batchSize": 1
      },
      "typeVersion": 1
    },
    {
      "id": "has-pdf",
      "name": "有 PDF 吗?",
      "type": "n8n-nodes-base.if",
      "position": [
        1050,
        300
      ],
      "parameters": {
        "conditions": {
          "string": [
            {
              "value1": "={{ $json.pdfURL }}",
              "operation": "isNotEmpty"
            }
          ]
        }
      },
      "typeVersion": 1
    },
    {
      "id": "pdfvector-parse",
      "name": "PDF 向量 - 解析论文",
      "type": "n8n-nodes-pdfvector.pdfVector",
      "notes": "Parse paper content from PDF or image",
      "position": [
        1250,
        250
      ],
      "parameters": {
        "url": "={{ $json.pdfURL }}",
        "useLLM": "auto",
        "resource": "document",
        "inputType": "url",
        "operation": "parse"
      },
      "typeVersion": 1
    },
    {
      "id": "analyze-paper",
      "name": "分析论文内容",
      "type": "n8n-nodes-base.openAi",
      "notes": "Generate review entry",
      "position": [
        1450,
        300
      ],
      "parameters": {
        "model": "gpt-4",
        "messages": {
          "values": [
            {
              "content": "Create a literature review entry for this paper in the context of '{{ $node['Set Search Parameters'].json.topic }}':\n\nTitle: {{ $json.title }}\nAuthors: {{ $json.authors }}\nYear: {{ $json.year }}\nCitations: {{ $json.totalCitations }}\n\nContent: {{ $json.content || $json.abstract }}\n\nProvide:\n1. A 3-4 sentence summary of the paper's contribution\n2. Key methodology used\n3. Main findings (2-3 bullet points)\n4. How it relates to the topic\n5. Limitations mentioned\n6. Suggested citation in APA format"
            }
          ]
        }
      },
      "typeVersion": 1
    },
    {
      "id": "store-entry",
      "name": "存储综述条目",
      "type": "n8n-nodes-base.set",
      "notes": "Save processed entry",
      "position": [
        1650,
        300
      ],
      "parameters": {
        "values": {
          "string": [
            {
              "name": "reviewEntry",
              "value": "={{ $json.choices[0].message.content }}"
            },
            {
              "name": "paperTitle",
              "value": "={{ $node['Has PDF?'].json.title }}"
            },
            {
              "name": "paperDoi",
              "value": "={{ $node['Has PDF?'].json.doi }}"
            }
          ]
        }
      },
      "typeVersion": 1
    },
    {
      "id": "compile-review",
      "name": "编译文献综述",
      "type": "n8n-nodes-base.code",
      "notes": "Generate final document",
      "position": [
        1850,
        300
      ],
      "parameters": {
        "functionCode": "// Wait for all papers to be processed\nconst allEntries = $input.all().map(item => item.json);\n\n// Group papers by themes/methodologies\nconst themes = {\n  'Machine Learning Models': [],\n  'Clinical Applications': [],\n  'Data Processing': [],\n  'Evaluation Studies': [],\n  'Review Papers': [],\n  'Other': []\n};\n\n// Categorize papers (simplified - in production use NLP)\nallEntries.forEach(entry => {\n  const review = entry.reviewEntry.toLowerCase();\n  if (review.includes('neural network') || review.includes('deep learning')) {\n    themes['Machine Learning Models'].push(entry);\n  } else if (review.includes('clinical') || review.includes('patient')) {\n    themes['Clinical Applications'].push(entry);\n  } else if (review.includes('preprocessing') || review.includes('data processing')) {\n    themes['Data Processing'].push(entry);\n  } else if (review.includes('evaluation') || review.includes('comparison')) {\n    themes['Evaluation Studies'].push(entry);\n  } else if (review.includes('review') || review.includes('survey')) {\n    themes['Review Papers'].push(entry);\n  } else {\n    themes['Other'].push(entry);\n  }\n});\n\n// Generate literature review document\nlet reviewDocument = `# Literature Review: ${$node['Set Search Parameters'].json.topic}\\n\\n`;\nreviewDocument += `Generated on: ${new Date().toLocaleDateString()}\\n\\n`;\nreviewDocument += `## Summary\\n\\n`;\nreviewDocument += `This review analyzes ${allEntries.length} papers published between ${$node['Set Search Parameters'].json.yearFrom} and ${$node['Set Search Parameters'].json.yearTo} on the topic of ${$node['Set Search Parameters'].json.topic}.\\n\\n`;\n\n// Add themed sections\nObject.entries(themes).forEach(([theme, papers]) => {\n  if (papers.length > 0) {\n    reviewDocument += `## ${theme} (${papers.length} papers)\\n\\n`;\n    papers.forEach(paper => {\n      reviewDocument += `### ${paper.paperTitle}\\n\\n`;\n      reviewDocument += paper.reviewEntry + '\\n\\n';\n    });\n  }\n});\n\n// Add bibliography\nreviewDocument += `## Bibliography\\n\\n`;\nallEntries.forEach((entry, index) => {\n  const citation = entry.reviewEntry.split('Suggested citation:')[1] || 'Citation not available';\n  reviewDocument += `${index + 1}. ${citation.trim()}\\n\\n`;\n});\n\nreturn [{\n  json: {\n    reviewDocument,\n    totalPapers: allEntries.length,\n    themes: Object.entries(themes).map(([theme, papers]) => ({\n      theme,\n      count: papers.length\n    })),\n    generatedAt: new Date().toISOString()\n  }\n}];"
      },
      "typeVersion": 2
    }
  ],
  "connections": {
    "Has PDF?": {
      "main": [
        [
          {
            "node": "PDF Vector - Parse Paper",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Analyze Paper Content",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Process One by One": {
      "main": [
        [
          {
            "node": "Has PDF?",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Store Review Entry": {
      "main": [
        [
          {
            "node": "Process One by One",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Rank & Select Papers": {
      "main": [
        [
          {
            "node": "Process One by One",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Analyze Paper Content": {
      "main": [
        [
          {
            "node": "Store Review Entry",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Set Search Parameters": {
      "main": [
        [
          {
            "node": "PDF Vector - Search Papers",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "PDF Vector - Parse Paper": {
      "main": [
        [
          {
            "node": "Analyze Paper Content",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "PDF Vector - Search Papers": {
      "main": [
        [
          {
            "node": "Rank & Select Papers",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}
常见问题

如何使用这个工作流?

复制上方的 JSON 配置代码,在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」,粘贴配置后根据需要修改凭证设置即可。

这个工作流适合什么场景?

中级 - 文档提取, AI RAG 检索增强, 多模态 AI

需要付费吗?

本工作流完全免费,您可以直接导入使用。但请注意,工作流中使用的第三方服务(如 OpenAI API)可能需要您自行付费。

工作流信息
难度等级
中级
节点数量13
分类3
节点类型7
难度说明

适合有一定经验的用户,包含 6-15 个节点的中等复杂度工作流

作者
PDF Vector

PDF Vector

@pdfvector

A fully featured PDF APIs for developers - Parse any PDF or Word document, extract structured data, and access millions of academic papers - all through simple APIs.

外部链接
在 n8n.io 查看

分享此工作流