PDF发票数据提取到JSON
这是一个Miscellaneous, AI Summarization, Multimodal AI领域的自动化工作流,包含 10 个节点。主要使用 Set, Xml, FormTrigger, ExtractFromFile, GoogleGemini 等节点。 使用Gemini AI和XML转换从PDF中提取发票数据到JSON
- •无特殊前置要求,导入即可使用
{
"meta": {
"instanceId": "d1451097bf16b4787e3f6ede2b364ece110261879ec2f0efaeba689056c0a1ab"
},
"nodes": [
{
"id": "3a0d9a6f-6e6e-44a3-9eb0-1755b01fed0c",
"name": "表单提交时",
"type": "n8n-nodes-base.formTrigger",
"position": [
672,
-480
],
"webhookId": "0387941a-9e42-44ab-96ac-dde230418ac3",
"parameters": {
"options": {},
"formTitle": "Test",
"formFields": {
"values": [
{
"fieldType": "file",
"fieldLabel": "data"
}
]
}
},
"typeVersion": 2.3
},
{
"id": "d510fda8-ceaa-4d57-8946-39a97b23f3e1",
"name": "从文件提取",
"type": "n8n-nodes-base.extractFromFile",
"position": [
832,
-480
],
"parameters": {
"options": {},
"operation": "pdf"
},
"typeVersion": 1
},
{
"id": "e070def8-b13a-49fa-ae4a-e366d1f474da",
"name": "消息模型",
"type": "@n8n/n8n-nodes-langchain.googleGemini",
"position": [
704,
-240
],
"parameters": {
"modelId": {
"__rl": true,
"mode": "list",
"value": "models/gemma-3n-e4b-it",
"cachedResultName": "models/gemma-3n-e4b-it"
},
"options": {},
"messages": {
"values": [
{
"content": "=Considera la transcripcion del invoice adjunta, reescribela como un XML siguiendo este esquema:\n\n{{ $json.estructuraXML }}\n\nInvoice:\n\n{{ $json.text_limpio }}"
}
]
}
},
"credentials": {
"googlePalmApi": {
"id": "d4exk6UjdeHXH93h",
"name": "Google Gemini(PaLM) Api account 2"
}
},
"typeVersion": 1
},
{
"id": "4e435b5b-95da-4b6a-a888-c2f74cd96cd1",
"name": "清理数据",
"type": "n8n-nodes-base.set",
"position": [
1104,
-480
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "ad0e7b3d-4011-4bfb-851e-c049883dc00a",
"name": "text_limpio",
"type": "string",
"value": "={{ $json.text.replace(/\\n/g, ' ') }}"
},
{
"id": "e0b6ea3e-17d6-4c18-a5f5-1b2cf98b4ddb",
"name": "estructuraXML",
"type": "string",
"value": "<invoice>\n <invoice_number>[invoice_number]</invoice_number>\n <date_of_issue>[date_of_issue]</date_of_issue>\n <due_date>[due_date]</due_date>\n\n <billed_to>\n <company_name>[billed_to.company_name]</company_name>\n <contact_name>[billed_to.contact_name]</contact_name>\n <address>[billed_to.address]</address>\n <postal_code>[billed_to.postal_code]</postal_code>\n <city>[billed_to.city]</city>\n <state>[billed_to.state]</state>\n <country>[billed_to.country]</country>\n <rfc>[billed_to.rfc]</rfc>\n </billed_to>\n\n <from>\n <company_name>[from.company_name]</company_name>\n <address>[from.address]</address>\n <postal_code>[from.postal_code]</postal_code>\n <city>[from.city]</city>\n <state>[from.state]</state>\n <country>[from.country]</country>\n <rfc>[from.rfc]</rfc>\n </from>\n\n <purchase_order>[purchase_order]</purchase_order>\n\n <items>\n <item>\n <description>[item.description]</description>\n <unit_cost>[item.unit_cost]</unit_cost>\n <quantity>[item.quantity]</quantity>\n <amount>[item.amount]</amount>\n </item>\n </items>\n\n <bank_account_details>\n <account_holder_name>[bank_account_details.account_holder_name]</account_holder_name>\n <account_number>[bank_account_details.account_number]</account_number>\n <routing_number>[bank_account_details.routing_number]</routing_number>\n <swift_code>[bank_account_details.swift_code]</swift_code>\n <bank_name>[bank_account_details.bank_name]</bank_name>\n <currency>[bank_account_details.currency]</currency>\n </bank_account_details>\n\n <financials>\n <subtotal>[subtotal]</subtotal>\n <tax_rate>[tax_rate]</tax_rate>\n <tax_amount>[tax_amount]</tax_amount>\n <shipping_cost>[shipping_cost]</shipping_cost>\n <invoice_total>[invoice_total]</invoice_total>\n </financials>\n</invoice>"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "93fd56a6-33f9-4ac2-88b2-72157beb871f",
"name": "清理XML",
"type": "n8n-nodes-base.set",
"position": [
1040,
-240
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "ddaad091-c54e-44d9-bf05-604e3bf43caa",
"name": "factura_limpia",
"type": "string",
"value": "={{ $json.content.parts[0].text.replace('```xml', '').replace('```', '').replace(/(\\n|\\s{2,})/g, '').replace(/(\\s<)/g, '<').replace(/(>\\s)/g, '>') }}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "9d96dd97-9048-4a6f-b11c-52c30a6d3fa3",
"name": "XML转JSON",
"type": "n8n-nodes-base.xml",
"position": [
1200,
-240
],
"parameters": {
"options": {
"trim": false,
"normalize": false,
"normalizeTags": false
},
"dataPropertyName": "factura_limpia"
},
"typeVersion": 1
},
{
"id": "ee4365f4-08b5-42de-afb7-6a187272fabb",
"name": "便签",
"type": "n8n-nodes-base.stickyNote",
"position": [
624,
-544
],
"parameters": {
"color": 4,
"width": 352,
"height": 240,
"content": "## PDF转文本"
},
"typeVersion": 1
},
{
"id": "e6bdaed7-1cee-4412-86c8-c7409ac1231e",
"name": "便签1",
"type": "n8n-nodes-base.stickyNote",
"position": [
976,
-544
],
"parameters": {
"color": 2,
"width": 368,
"height": 240,
"content": "## 清理数据和XML结构定义"
},
"typeVersion": 1
},
{
"id": "26faacbb-3464-46fe-8e1f-cd105942d179",
"name": "便签2",
"type": "n8n-nodes-base.stickyNote",
"position": [
624,
-304
],
"parameters": {
"color": 3,
"width": 352,
"height": 256,
"content": "## 生成XML字符串"
},
"typeVersion": 1
},
{
"id": "33493f4d-a615-4a80-8727-7ebba208f215",
"name": "便签3",
"type": "n8n-nodes-base.stickyNote",
"position": [
976,
-304
],
"parameters": {
"color": 5,
"width": 368,
"height": 256,
"content": "## 字符串转XML转Json"
},
"typeVersion": 1
}
],
"pinData": {},
"connections": {
"Limpio XML": {
"main": [
[
{
"node": "XML to JSON",
"type": "main",
"index": 0
}
]
]
},
"Limpio data": {
"main": [
[
{
"node": "Message a model",
"type": "main",
"index": 0
}
]
]
},
"Message a model": {
"main": [
[
{
"node": "Limpio XML",
"type": "main",
"index": 0
}
]
]
},
"Extract from File": {
"main": [
[
{
"node": "Limpio data",
"type": "main",
"index": 0
}
]
]
},
"On form submission": {
"main": [
[
{
"node": "Extract from File",
"type": "main",
"index": 0
}
]
]
}
}
}如何使用这个工作流?
复制上方的 JSON 配置代码,在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」,粘贴配置后根据需要修改凭证设置即可。
这个工作流适合什么场景?
中级 - 杂项, AI 摘要总结, 多模态 AI
需要付费吗?
本工作流完全免费,您可以直接导入使用。但请注意,工作流中使用的第三方服务(如 OpenAI API)可能需要您自行付费。
相关工作流推荐
Mauricio Perera
@rckflrAutomation consultant with over 10 years of experience specializing in AI, no-code, and workflow optimization. I’ve delivered tailored AI and NLP solutions across real estate, healthcare, and more, enhancing efficiency and customer experiences. Proficient in tools like Make, Airtable, and Zapier, I also integrate GPT models to create scalable, innovative automations. Contact me to discuss custom n8n workflows or advanced automations to streamline your processes.
分享此工作流