8
n8n 中文网amn8n.com

使用PDF Vector AI从文档中提取和验证法律引用

中级

这是一个Document Extraction, AI Summarization领域的自动化工作流,包含 8 个节点。主要使用 If, Code, GoogleDrive, ManualTrigger, PdfVector 等节点。 使用PDF Vector AI从文档中提取和验证法律引用

前置要求
  • Google Drive API 凭证
工作流预览
可视化展示节点连接关系,支持缩放和平移
导出工作流
复制以下 JSON 配置到 n8n 导入,即可使用此工作流
{
  "meta": {
    "instanceId": "placeholder"
  },
  "nodes": [
    {
      "id": "manual-trigger",
      "name": "手动触发器",
      "type": "n8n-nodes-base.manualTrigger",
      "notes": "Start citation extraction",
      "position": [
        250,
        300
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "google-drive",
      "name": "Google Drive - 获取法律文档",
      "type": "n8n-nodes-base.googleDrive",
      "notes": "Retrieve document from Drive",
      "position": [
        450,
        300
      ],
      "parameters": {
        "fileId": "={{ $json.fileId }}",
        "operation": "download"
      },
      "typeVersion": 3
    },
    {
      "id": "pdfvector-extract",
      "name": "PDF向量 - 提取引用",
      "type": "n8n-nodes-pdfvector.pdfVector",
      "notes": "Extract all citations",
      "position": [
        650,
        300
      ],
      "parameters": {
        "prompt": "Extract all legal citations from this document or image. Include case citations (with reporter and year), statute citations (with title and section), regulatory citations, and academic citations (with author, title, journal, and year). For each citation, include the surrounding context (1-2 sentences) and page number where it appears. Use OCR if this is a scanned legal document or image.",
        "schema": "{\"type\":\"object\",\"properties\":{\"documentInfo\":{\"type\":\"object\",\"properties\":{\"title\":{\"type\":\"string\"},\"documentType\":{\"type\":\"string\"},\"court\":{\"type\":\"string\"},\"date\":{\"type\":\"string\"},\"docketNumber\":{\"type\":\"string\"}}},\"caseCitations\":{\"type\":\"array\",\"items\":{\"type\":\"object\",\"properties\":{\"caseName\":{\"type\":\"string\"},\"reporter\":{\"type\":\"string\"},\"volume\":{\"type\":\"string\"},\"page\":{\"type\":\"string\"},\"year\":{\"type\":\"string\"},\"court\":{\"type\":\"string\"},\"context\":{\"type\":\"string\"},\"pageNumber\":{\"type\":\"number\"},\"pinCite\":{\"type\":\"string\"}}}},\"statuteCitations\":{\"type\":\"array\",\"items\":{\"type\":\"object\",\"properties\":{\"title\":{\"type\":\"string\"},\"code\":{\"type\":\"string\"},\"section\":{\"type\":\"string\"},\"subsection\":{\"type\":\"string\"},\"year\":{\"type\":\"string\"},\"context\":{\"type\":\"string\"},\"pageNumber\":{\"type\":\"number\"}}}},\"regulatoryCitations\":{\"type\":\"array\",\"items\":{\"type\":\"object\",\"properties\":{\"title\":{\"type\":\"string\"},\"agency\":{\"type\":\"string\"},\"section\":{\"type\":\"string\"},\"context\":{\"type\":\"string\"},\"pageNumber\":{\"type\":\"number\"}}}},\"academicCitations\":{\"type\":\"array\",\"items\":{\"type\":\"object\",\"properties\":{\"authors\":{\"type\":\"string\"},\"title\":{\"type\":\"string\"},\"journal\":{\"type\":\"string\"},\"volume\":{\"type\":\"string\"},\"page\":{\"type\":\"string\"},\"year\":{\"type\":\"string\"},\"doi\":{\"type\":\"string\"},\"context\":{\"type\":\"string\"},\"pageNumber\":{\"type\":\"number\"}}}},\"otherCitations\":{\"type\":\"array\",\"items\":{\"type\":\"object\",\"properties\":{\"text\":{\"type\":\"string\"},\"type\":{\"type\":\"string\"},\"context\":{\"type\":\"string\"},\"pageNumber\":{\"type\":\"number\"}}}}},\"required\":[\"documentInfo\"],\"additionalProperties\":false}",
        "resource": "document",
        "inputType": "file",
        "operation": "extract",
        "binaryPropertyName": "data"
      },
      "typeVersion": 1
    },
    {
      "id": "analyze-citations",
      "name": "分析与验证引用",
      "type": "n8n-nodes-base.code",
      "notes": "Process citation data",
      "position": [
        850,
        300
      ],
      "parameters": {
        "jsCode": "// Process and validate citations\nconst citations = $input.first().json.data;\nconst citationAnalysis = {\n  summary: {\n    totalCitations: 0,\n    caseLaw: citations.caseCitations?.length || 0,\n    statutes: citations.statuteCitations?.length || 0,\n    regulations: citations.regulatoryCitations?.length || 0,\n    academic: citations.academicCitations?.length || 0,\n    other: citations.otherCitations?.length || 0\n  },\n  validation: {\n    invalidCitations: [],\n    warnings: []\n  },\n  academicDOIs: [],\n  citationNetwork: {}\n};\n\n// Calculate total\ncitationAnalysis.summary.totalCitations = \n  citationAnalysis.summary.caseLaw + \n  citationAnalysis.summary.statutes + \n  citationAnalysis.summary.regulations + \n  citationAnalysis.summary.academic + \n  citationAnalysis.summary.other;\n\n// Validate case citations\nif (citations.caseCitations) {\n  citations.caseCitations.forEach((cite, index) => {\n    // Check for required fields\n    if (!cite.reporter || !cite.volume || !cite.page) {\n      citationAnalysis.validation.invalidCitations.push({\n        type: 'case',\n        index,\n        citation: cite.caseName,\n        issue: 'Missing reporter, volume, or page'\n      });\n    }\n    \n    // Build citation network (track which cases cite which)\n    if (!citationAnalysis.citationNetwork[cite.caseName]) {\n      citationAnalysis.citationNetwork[cite.caseName] = {\n        citedIn: [citations.documentInfo.title],\n        pageNumbers: [cite.pageNumber]\n      };\n    }\n  });\n}\n\n// Validate statute citations\nif (citations.statuteCitations) {\n  citations.statuteCitations.forEach((cite, index) => {\n    if (!cite.title || !cite.section) {\n      citationAnalysis.validation.invalidCitations.push({\n        type: 'statute',\n        index,\n        citation: `${cite.title} ${cite.code}`,\n        issue: 'Missing title or section'\n      });\n    }\n  });\n}\n\n// Extract DOIs for academic fetching\nif (citations.academicCitations) {\n  citations.academicCitations.forEach(cite => {\n    if (cite.doi) {\n      citationAnalysis.academicDOIs.push(cite.doi);\n    } else {\n      // Try to construct search query for papers without DOI\n      citationAnalysis.validation.warnings.push({\n        type: 'academic',\n        citation: cite.title,\n        warning: 'No DOI found - manual search may be needed'\n      });\n    }\n  });\n}\n\n// Analyze citation patterns\nconst citationPatterns = {\n  mostCitedCases: [],\n  primaryAuthorities: [],\n  secondaryAuthorities: []\n};\n\n// Identify primary authorities (statutes and binding cases)\ncitationPatterns.primaryAuthorities = [\n  ...citations.statuteCitations?.map(c => `${c.title} ${c.code} § ${c.section}`) || [],\n  ...citations.caseCitations?.filter(c => c.court?.includes('Supreme'))?.map(c => c.caseName) || []\n];\n\n// Identify secondary authorities\ncitationPatterns.secondaryAuthorities = \n  citations.academicCitations?.map(c => `${c.authors}, ${c.title}`) || [];\n\nreturn [{\n  json: {\n    originalData: citations,\n    analysis: citationAnalysis,\n    patterns: citationPatterns,\n    doisToFetch: citationAnalysis.academicDOIs.join(','),\n    processedAt: new Date().toISOString()\n  }\n}];"
      },
      "typeVersion": 1
    },
    {
      "id": "has-dois",
      "name": "是否有学术DOI?",
      "type": "n8n-nodes-base.if",
      "position": [
        1050,
        300
      ],
      "parameters": {
        "conditions": {
          "string": [
            {
              "value1": "={{ $json.doisToFetch }}",
              "operation": "isNotEmpty"
            }
          ]
        }
      },
      "typeVersion": 1
    },
    {
      "id": "pdfvector-fetch",
      "name": "PDF向量 - 获取论文",
      "type": "n8n-nodes-pdfvector.pdfVector",
      "notes": "Retrieve academic papers",
      "position": [
        1250,
        250
      ],
      "parameters": {
        "ids": "={{ $json.doisToFetch }}",
        "fields": [
          "title",
          "abstract",
          "authors",
          "year",
          "doi",
          "pdfURL",
          "totalCitations"
        ],
        "resource": "academic",
        "operation": "fetch"
      },
      "typeVersion": 1
    },
    {
      "id": "generate-report",
      "name": "生成引用报告",
      "type": "n8n-nodes-base.code",
      "notes": "Create final report",
      "position": [
        1450,
        300
      ],
      "parameters": {
        "jsCode": "// Generate comprehensive citation report\nconst citationData = $node['Has Academic DOIs?'].json;\nconst academicPapers = $json.publications || [];\n\n// Create citation report\nlet report = `# Legal Citation Analysis Report\\n\\n`;\nreport += `**Document:** ${citationData.originalData.documentInfo.title}\\n`;\nreport += `**Type:** ${citationData.originalData.documentInfo.documentType}\\n`;\nreport += `**Date:** ${citationData.originalData.documentInfo.date}\\n\\n`;\n\nreport += `## Citation Summary\\n\\n`;\nreport += `- **Total Citations:** ${citationData.analysis.summary.totalCitations}\\n`;\nreport += `- **Case Law:** ${citationData.analysis.summary.caseLaw}\\n`;\nreport += `- **Statutes:** ${citationData.analysis.summary.statutes}\\n`;\nreport += `- **Regulations:** ${citationData.analysis.summary.regulations}\\n`;\nreport += `- **Academic:** ${citationData.analysis.summary.academic}\\n`;\nreport += `- **Other:** ${citationData.analysis.summary.other}\\n\\n`;\n\n// Add validation issues\nif (citationData.analysis.validation.invalidCitations.length > 0) {\n  report += `## Citation Issues\\n\\n`;\n  citationData.analysis.validation.invalidCitations.forEach(issue => {\n    report += `- **${issue.type}:** ${issue.citation} - ${issue.issue}\\n`;\n  });\n  report += `\\n`;\n}\n\n// Add case law section\nif (citationData.originalData.caseCitations?.length > 0) {\n  report += `## Case Law Citations\\n\\n`;\n  citationData.originalData.caseCitations.forEach(cite => {\n    report += `### ${cite.caseName}\\n`;\n    report += `- **Citation:** ${cite.volume} ${cite.reporter} ${cite.page} (${cite.year})\\n`;\n    report += `- **Court:** ${cite.court || 'Not specified'}\\n`;\n    report += `- **Context:** ${cite.context}\\n`;\n    report += `- **Page:** ${cite.pageNumber}\\n\\n`;\n  });\n}\n\n// Add statute section\nif (citationData.originalData.statuteCitations?.length > 0) {\n  report += `## Statutory Citations\\n\\n`;\n  citationData.originalData.statuteCitations.forEach(cite => {\n    report += `- **${cite.title} ${cite.code} § ${cite.section}**${cite.subsection ? ` (${cite.subsection})` : ''}\\n`;\n    report += `  - Context: ${cite.context}\\n`;\n    report += `  - Page: ${cite.pageNumber}\\n\\n`;\n  });\n}\n\n// Add academic section with fetched data\nif (citationData.originalData.academicCitations?.length > 0) {\n  report += `## Academic Citations\\n\\n`;\n  citationData.originalData.academicCitations.forEach(cite => {\n    report += `### ${cite.title}\\n`;\n    report += `- **Authors:** ${cite.authors}\\n`;\n    report += `- **Journal:** ${cite.journal}, Vol. ${cite.volume}, p. ${cite.page} (${cite.year})\\n`;\n    \n    // Add fetched paper data if available\n    const fetchedPaper = academicPapers.find(p => p.doi === cite.doi);\n    if (fetchedPaper) {\n      report += `- **Citations:** ${fetchedPaper.totalCitations || 0}\\n`;\n      report += `- **Abstract Available:** Yes\\n`;\n      if (fetchedPaper.pdfURL) {\n        report += `- **Full Text:** [Available](${fetchedPaper.pdfURL})\\n`;\n      }\n    }\n    \n    report += `- **Context:** ${cite.context}\\n`;\n    report += `- **Page:** ${cite.pageNumber}\\n\\n`;\n  });\n}\n\n// Add citation patterns\nreport += `## Citation Analysis\\n\\n`;\nreport += `### Primary Authorities\\n`;\ncitationData.patterns.primaryAuthorities.forEach(auth => {\n  report += `- ${auth}\\n`;\n});\nreport += `\\n### Secondary Authorities\\n`;\ncitationData.patterns.secondaryAuthorities.forEach(auth => {\n  report += `- ${auth}\\n`;\n});\n\nreturn [{\n  json: {\n    report,\n    citationData,\n    academicPapers,\n    exportFormat: 'markdown',\n    generatedAt: new Date().toISOString()\n  }\n}];"
      },
      "typeVersion": 1
    },
    {
      "id": "save-report",
      "name": "保存引用报告",
      "type": "n8n-nodes-base.writeBinaryFile",
      "notes": "Export report",
      "position": [
        1650,
        300
      ],
      "parameters": {
        "fileName": "citation_report_{{ $now.format('yyyy-MM-dd_HH-mm') }}.md",
        "fileContent": "={{ $json.report }}"
      },
      "typeVersion": 1
    }
  ],
  "connections": {
    "Manual Trigger": {
      "main": [
        [
          {
            "node": "Google Drive - Get Legal Document",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Has Academic DOIs?": {
      "main": [
        [
          {
            "node": "PDF Vector - Fetch Papers",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Generate Citation Report",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Generate Citation Report": {
      "main": [
        [
          {
            "node": "Save Citation Report",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "PDF Vector - Fetch Papers": {
      "main": [
        [
          {
            "node": "Generate Citation Report",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Analyze & Validate Citations": {
      "main": [
        [
          {
            "node": "Has Academic DOIs?",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "PDF Vector - Extract Citations": {
      "main": [
        [
          {
            "node": "Analyze & Validate Citations",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Google Drive - Get Legal Document": {
      "main": [
        [
          {
            "node": "PDF Vector - Extract Citations",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}
常见问题

如何使用这个工作流?

复制上方的 JSON 配置代码,在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」,粘贴配置后根据需要修改凭证设置即可。

这个工作流适合什么场景?

中级 - 文档提取, AI 摘要总结

需要付费吗?

本工作流完全免费,您可以直接导入使用。但请注意,工作流中使用的第三方服务(如 OpenAI API)可能需要您自行付费。

工作流信息
难度等级
中级
节点数量8
分类2
节点类型6
难度说明

适合有一定经验的用户,包含 6-15 个节点的中等复杂度工作流

作者
PDF Vector

PDF Vector

@pdfvector

A fully featured PDF APIs for developers - Parse any PDF or Word document, extract structured data, and access millions of academic papers - all through simple APIs.

外部链接
在 n8n.io 查看

分享此工作流