Extrahiere Daten aus Dokumenten mit GPT-4, PDFVector und PostgreSQL-Export
Dies ist ein Document Extraction, Multimodal AI-Bereich Automatisierungsworkflow mit 9 Nodes. Hauptsächlich werden Code, OpenAi, Switch, Postgres, PdfVector und andere Nodes verwendet. Extrahieren Sie Daten aus Dokumenten mit GPT-4, PDFVector und PostgreSQL-Export
- •OpenAI API Key
- •PostgreSQL-Datenbankverbindungsdaten
Verwendete Nodes (9)
Kategorie
{
"meta": {
"instanceId": "placeholder"
},
"nodes": [
{
"id": "workflow-info",
"name": "Pipeline-Info",
"type": "n8n-nodes-base.stickyNote",
"position": [
250,
150
],
"parameters": {
"content": "## Document Extraction Pipeline\n\nExtracts structured data from:\n- Invoices\n- Contracts\n- Reports\n- Forms\n\nCustomize extraction rules in the AI node"
},
"typeVersion": 1
},
{
"id": "file-trigger",
"name": "Watch Folder",
"type": "n8n-nodes-base.localFileTrigger",
"notes": "Triggers when new documents arrive",
"position": [
450,
300
],
"parameters": {
"path": "/documents/incoming",
"events": [
"file:created"
]
},
"typeVersion": 1
},
{
"id": "pdfvector-parse",
"name": "PDF Vector - Parse Document",
"type": "n8n-nodes-pdfvector.pdfVector",
"notes": "Parse with LLM for better extraction",
"position": [
650,
300
],
"parameters": {
"useLlm": "always",
"resource": "document",
"operation": "parse",
"documentUrl": "={{ $json.filePath }}"
},
"typeVersion": 1
},
{
"id": "extract-data",
"name": "Strukturierte Daten extrahieren",
"type": "n8n-nodes-base.openAi",
"position": [
850,
300
],
"parameters": {
"model": "gpt-4",
"options": {
"responseFormat": {
"type": "json_object"
}
},
"messages": {
"values": [
{
"content": "Extract the following information from this document:\n\n1. Document Type (invoice, contract, report, etc.)\n2. Date/Dates mentioned\n3. Parties involved (names, companies)\n4. Key amounts/values\n5. Important terms or conditions\n6. Reference numbers\n7. Addresses\n8. Contact information\n\nDocument content:\n{{ $json.content }}\n\nReturn as structured JSON."
}
]
}
},
"typeVersion": 1
},
{
"id": "validate-data",
"name": "Daten validieren & bereinigen",
"type": "n8n-nodes-base.code",
"position": [
1050,
300
],
"parameters": {
"functionCode": "// Validate and clean extracted data\nconst extracted = JSON.parse($json.content);\nconst validated = {};\n\n// Validate document type\nvalidated.documentType = extracted.documentType || 'unknown';\n\n// Parse and validate dates\nif (extracted.date) {\n const date = new Date(extracted.date);\n validated.date = isNaN(date) ? null : date.toISOString();\n}\n\n// Clean monetary values\nif (extracted.amounts) {\n validated.amounts = extracted.amounts.map(amt => {\n const cleaned = amt.replace(/[^0-9.-]/g, '');\n return parseFloat(cleaned) || 0;\n });\n}\n\n// Validate email addresses\nif (extracted.emails) {\n validated.emails = extracted.emails.filter(email => \n /^[^\\s@]+@[^\\s@]+\\.[^\\s@]+$/.test(email)\n );\n}\n\nvalidated.raw = extracted;\nvalidated.fileName = $node['Watch Folder'].json.fileName;\nvalidated.processedAt = new Date().toISOString();\n\nreturn validated;"
},
"typeVersion": 1
},
{
"id": "route-by-type",
"name": "Nach Dokumenttyp weiterleiten",
"type": "n8n-nodes-base.switch",
"position": [
1250,
300
],
"parameters": {
"conditions": {
"string": [
{
"value1": "={{ $json.documentType }}",
"value2": "invoice",
"operation": "equals"
}
]
}
},
"typeVersion": 1
},
{
"id": "store-invoice",
"name": "Rechnungsdaten speichern",
"type": "n8n-nodes-base.postgres",
"position": [
1450,
250
],
"parameters": {
"table": "invoices",
"columns": "invoice_number,vendor,amount,date,raw_data",
"operation": "insert"
},
"typeVersion": 1
},
{
"id": "store-other",
"name": "Andere Dokumente speichern",
"type": "n8n-nodes-base.postgres",
"position": [
1450,
350
],
"parameters": {
"table": "documents",
"columns": "type,content,metadata,processed_at",
"operation": "insert"
},
"typeVersion": 1
},
{
"id": "export-csv",
"name": "Export zu CSV",
"type": "n8n-nodes-base.writeBinaryFile",
"position": [
1650,
300
],
"parameters": {
"fileName": "extracted_data_{{ $now.format('yyyy-MM-dd') }}.csv",
"fileContent": "={{ $items().map(item => item.json).toCsv() }}"
},
"typeVersion": 1
}
],
"connections": {
"file-trigger": {
"main": [
[
{
"node": "pdfvector-parse",
"type": "main",
"index": 0
}
]
]
},
"store-invoice": {
"main": [
[
{
"node": "export-csv",
"type": "main",
"index": 0
}
]
]
},
"store-other": {
"main": [
[
{
"node": "export-csv",
"type": "main",
"index": 0
}
]
]
},
"validate-data": {
"main": [
[
{
"node": "route-by-type",
"type": "main",
"index": 0
}
]
]
},
"route-by-type": {
"main": [
[
{
"node": "store-invoice",
"type": "main",
"index": 0
}
],
[
{
"node": "store-other",
"type": "main",
"index": 0
}
]
]
},
"extract-data": {
"main": [
[
{
"node": "validate-data",
"type": "main",
"index": 0
}
]
]
},
"pdfvector-parse": {
"main": [
[
{
"node": "extract-data",
"type": "main",
"index": 0
}
]
]
}
}
}Wie verwende ich diesen Workflow?
Kopieren Sie den obigen JSON-Code, erstellen Sie einen neuen Workflow in Ihrer n8n-Instanz und wählen Sie "Aus JSON importieren". Fügen Sie die Konfiguration ein und passen Sie die Anmeldedaten nach Bedarf an.
Für welche Szenarien ist dieser Workflow geeignet?
Fortgeschritten - Dokumentenextraktion, Multimodales KI
Ist es kostenpflichtig?
Dieser Workflow ist völlig kostenlos. Beachten Sie jedoch, dass Drittanbieterdienste (wie OpenAI API), die im Workflow verwendet werden, möglicherweise kostenpflichtig sind.
Verwandte Workflows
PDF Vector
@pdfvectorA fully featured PDF APIs for developers - Parse any PDF or Word document, extract structured data, and access millions of academic papers - all through simple APIs.
Diesen Workflow teilen