8
n8n 中文网amn8n.com

使用 PDF 向量、OCR、GPT-4 和 Google Drive 的研究论文分析系统

中级

这是一个Document Extraction, AI RAG, Multimodal AI领域的自动化工作流,包含 11 个节点。主要使用 Code, OpenAi, Postgres, GoogleDrive, ManualTrigger 等节点。 使用 PDF 向量、OCR、GPT-4 和 Google Drive 的研究论文分析系统

前置要求
  • OpenAI API Key
  • PostgreSQL 数据库连接信息
  • Google Drive API 凭证
工作流预览
可视化展示节点连接关系,支持缩放和平移
导出工作流
复制以下 JSON 配置到 n8n 导入,即可使用此工作流
{
  "meta": {
    "instanceId": "placeholder"
  },
  "nodes": [
    {
      "id": "overview-note",
      "name": "研究概览",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        50,
        50
      ],
      "parameters": {
        "color": 5,
        "width": 350,
        "height": 180,
        "content": "## 📚 研究论文分析器"
      },
      "typeVersion": 1
    },
    {
      "id": "search-note",
      "name": "学术搜索",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        450,
        450
      ],
      "parameters": {
        "width": 260,
        "height": 150,
        "content": "## 🔍 论文搜索"
      },
      "typeVersion": 1
    },
    {
      "id": "extract-note",
      "name": "论文提取",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        750,
        450
      ],
      "parameters": {
        "width": 260,
        "height": 180,
        "content": "## 📄 提取"
      },
      "typeVersion": 1
    },
    {
      "id": "summary-note",
      "name": "AI 分析",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1050,
        450
      ],
      "parameters": {
        "color": 6,
        "width": 260,
        "height": 180,
        "content": "## 🤖 AI 摘要"
      },
      "typeVersion": 1
    },
    {
      "id": "manual-trigger",
      "name": "手动触发器",
      "type": "n8n-nodes-base.manualTrigger",
      "notes": "Start paper analysis",
      "position": [
        250,
        300
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "google-drive",
      "name": "Google Drive - 获取论文",
      "type": "n8n-nodes-base.googleDrive",
      "notes": "Retrieve paper from Drive",
      "position": [
        450,
        300
      ],
      "parameters": {
        "fileId": "={{ $json.fileId }}",
        "operation": "download"
      },
      "typeVersion": 3
    },
    {
      "id": "pdfvector-parse",
      "name": "PDF Vector - 解析论文",
      "type": "n8n-nodes-pdfvector.pdfVector",
      "notes": "Parse research paper",
      "position": [
        650,
        300
      ],
      "parameters": {
        "useLLM": "always",
        "resource": "document",
        "inputType": "file",
        "operation": "parse",
        "binaryPropertyName": "data"
      },
      "typeVersion": 1
    },
    {
      "id": "pdfvector-extract",
      "name": "PDF Vector - 提取数据",
      "type": "n8n-nodes-pdfvector.pdfVector",
      "notes": "Extract structured data",
      "position": [
        850,
        300
      ],
      "parameters": {
        "prompt": "Extract key information from this research document or image including title, authors with affiliations, abstract, keywords, research questions, methodology, key findings, conclusions, limitations, and future work suggestions. Use OCR if this is a scanned document or image.",
        "schema": "{\"type\":\"object\",\"properties\":{\"title\":{\"type\":\"string\"},\"authors\":{\"type\":\"array\",\"items\":{\"type\":\"object\",\"properties\":{\"name\":{\"type\":\"string\"},\"affiliation\":{\"type\":\"string\"},\"email\":{\"type\":\"string\"}}}},\"abstract\":{\"type\":\"string\"},\"keywords\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}},\"researchQuestions\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}},\"methodology\":{\"type\":\"object\",\"properties\":{\"approach\":{\"type\":\"string\"},\"dataCollection\":{\"type\":\"string\"},\"analysis\":{\"type\":\"string\"},\"tools\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}}}},\"findings\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}},\"conclusions\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}},\"limitations\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}},\"futureWork\":{\"type\":\"array\",\"items\":{\"type\":\"string\"}},\"references\":{\"type\":\"number\"}},\"required\":[\"title\",\"authors\"],\"additionalProperties\":false}",
        "resource": "document",
        "inputType": "file",
        "operation": "extract",
        "binaryPropertyName": "data"
      },
      "typeVersion": 1
    },
    {
      "id": "openai-analyze",
      "name": "生成 AI 摘要",
      "type": "n8n-nodes-base.openAi",
      "notes": "Create AI summary",
      "position": [
        1050,
        300
      ],
      "parameters": {
        "model": "gpt-4",
        "messages": {
          "values": [
            {
              "content": "Based on this research paper, provide:\n\n1. A concise summary (150 words) suitable for a research database\n2. The main contribution to the field (2-3 sentences)\n3. Potential applications or impact\n4. Classification tags (e.g., empirical study, theoretical framework, review, etc.)\n\nPaper content:\n{{ $node['PDF Vector - Parse Paper'].json.content }}"
            }
          ]
        }
      },
      "typeVersion": 1
    },
    {
      "id": "prepare-data",
      "name": "准备数据库条目",
      "type": "n8n-nodes-base.code",
      "notes": "Combine all data",
      "position": [
        1250,
        300
      ],
      "parameters": {
        "jsCode": "// Combine all analysis data\nconst parsedContent = $node['PDF Vector - Parse Paper'].json;\nconst extractedData = $node['PDF Vector - Extract Data'].json.data;\nconst aiSummary = $node['Generate AI Summary'].json.choices[0].message.content;\n\n// Calculate reading time (assuming 250 words per minute)\nconst wordCount = parsedContent.content.split(' ').length;\nconst readingTimeMinutes = Math.ceil(wordCount / 250);\n\n// Prepare database entry\nconst paperAnalysis = {\n  // Basic information\n  title: extractedData.title,\n  authors: extractedData.authors,\n  url: $node['Google Drive - Get Paper'].json.webViewLink,\n  \n  // Content\n  abstract: extractedData.abstract,\n  keywords: extractedData.keywords,\n  fullText: parsedContent.content,\n  \n  // Analysis\n  aiSummary: aiSummary,\n  methodology: extractedData.methodology,\n  findings: extractedData.findings,\n  conclusions: extractedData.conclusions,\n  limitations: extractedData.limitations,\n  futureWork: extractedData.futureWork,\n  \n  // Metadata\n  wordCount: wordCount,\n  readingTimeMinutes: readingTimeMinutes,\n  referenceCount: extractedData.references || 0,\n  processedAt: new Date().toISOString(),\n  \n  // Searchable fields\n  searchText: `${extractedData.title} ${extractedData.abstract} ${extractedData.keywords.join(' ')}`.toLowerCase()\n};\n\nreturn [{ json: paperAnalysis }];"
      },
      "typeVersion": 2
    },
    {
      "id": "database-store",
      "name": "存储到数据库",
      "type": "n8n-nodes-base.postgres",
      "notes": "Save to research database",
      "position": [
        1450,
        300
      ],
      "parameters": {
        "table": "research_papers",
        "columns": "title,authors,url,abstract,keywords,ai_summary,methodology,findings,processed_at,search_text",
        "operation": "insert"
      },
      "typeVersion": 1
    }
  ],
  "connections": {
    "Manual Trigger": {
      "main": [
        [
          {
            "node": "Google Drive - Get Paper",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Generate AI Summary": {
      "main": [
        [
          {
            "node": "Prepare Database Entry",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Prepare Database Entry": {
      "main": [
        [
          {
            "node": "Store in Database",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Google Drive - Get Paper": {
      "main": [
        [
          {
            "node": "PDF Vector - Parse Paper",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "PDF Vector - Parse Paper": {
      "main": [
        [
          {
            "node": "PDF Vector - Extract Data",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "PDF Vector - Extract Data": {
      "main": [
        [
          {
            "node": "Generate AI Summary",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}
常见问题

如何使用这个工作流?

复制上方的 JSON 配置代码,在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」,粘贴配置后根据需要修改凭证设置即可。

这个工作流适合什么场景?

中级 - 文档提取, AI RAG 检索增强, 多模态 AI

需要付费吗?

本工作流完全免费,您可以直接导入使用。但请注意,工作流中使用的第三方服务(如 OpenAI API)可能需要您自行付费。

工作流信息
难度等级
中级
节点数量11
分类3
节点类型7
难度说明

适合有一定经验的用户,包含 6-15 个节点的中等复杂度工作流

作者
PDF Vector

PDF Vector

@pdfvector

A fully featured PDF APIs for developers - Parse any PDF or Word document, extract structured data, and access millions of academic papers - all through simple APIs.

外部链接
在 n8n.io 查看

分享此工作流