8
n8n 中文网amn8n.com

TyphoonV2.2 多文件与多页面及带AI

中级

这是一个AI Summarization, Multimodal AI领域的自动化工作流,包含 15 个节点。主要使用 Code, Aggregate, GoogleSheets, ManualTrigger, ReadWriteFile 等节点。 使用TyphoonOCR和AI处理泰语文档至Google表格(多页PDF)

前置要求
  • Google Sheets API 凭证
工作流预览
可视化展示节点连接关系,支持缩放和平移
导出工作流
复制以下 JSON 配置到 n8n 导入,即可使用此工作流
{
  "id": "usFtj9NVTTqcIUFw",
  "meta": {
    "instanceId": "ec740af1a2cd76453ffc1c197c5b3c66b8ee9fcd44f7dc2e779efb35ee81469a",
    "templateCredsSetupCompleted": true
  },
  "name": "TyphoonV2.2 多文件与多页面及带 AI",
  "tags": [],
  "nodes": [
    {
      "id": "bcda99fe-5a10-4c43-af9d-3510ff00fb5e",
      "name": "当点击\"测试工作流\"时",
      "type": "n8n-nodes-base.manualTrigger",
      "position": [
        912,
        208
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "06b15c69-b854-4956-a6fc-3355f1a72427",
      "name": "使用 Typhoon OCR 提取文本",
      "type": "n8n-nodes-base.executeCommand",
      "position": [
        752,
        640
      ],
      "parameters": {
        "command": "=python -c \"import sys, os; os.environ['TYPHOON_OCR_API_KEY'] = '<Please update your Typhoon OCR>'; from typhoon_ocr import ocr_document; sys.stdout.reconfigure(encoding='utf-8'); input_path = sys.argv[1]; text = ocr_document(input_path); print(text)\" \"/doc/tmp/pages/{{$json[\"fileName\"]}}\"",
        "executeOnce": false
      },
      "typeVersion": 1
    },
    {
      "id": "1c8ac652-4ac9-424e-847b-5b845753c621",
      "name": "从文档文件夹加载 PDF",
      "type": "n8n-nodes-base.readWriteFile",
      "position": [
        1184,
        208
      ],
      "parameters": {
        "options": {},
        "fileSelector": "/doc/multipage/*"
      },
      "typeVersion": 1,
      "alwaysOutputData": true
    },
    {
      "id": "291f466a-3928-47bb-8cfb-48de2ad0256f",
      "name": "便签",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        192,
        176
      ],
      "parameters": {
        "width": 1352,
        "height": 968,
        "content": "## 泰语 OCR 到表格 (V2.2 – 多文件与多页面带 AI)"
      },
      "typeVersion": 1
    },
    {
      "id": "d585efcd-9b42-4bea-b128-1e0042e6b67f",
      "name": "从磁盘读取/写入文件",
      "type": "n8n-nodes-base.readWriteFile",
      "position": [
        848,
        416
      ],
      "parameters": {
        "options": {},
        "fileName": "/doc/tmp/in.pdf",
        "operation": "write"
      },
      "typeVersion": 1
    },
    {
      "id": "8fff15da-ea36-4b56-83da-011ae227964b",
      "name": "设置输入路径",
      "type": "n8n-nodes-base.code",
      "position": [
        1008,
        416
      ],
      "parameters": {
        "jsCode": "// รับ items จากโหนดก่อนหน้า แล้วเติมฟิลด์ inputPath ให้เรียบร้อย\n// กติกา: ใช้ filePath ถ้ามี; ถ้าไม่มี ให้ประกอบจาก directory + fileName; ถ้าไม่มี directory ก็ใช้ fileName\nreturn items.map(({ json, binary }) => {\n  const filePath =\n    json.filePath\n      ? json.filePath\n      : (json.directory\n          ? `${json.directory.replace(/\\/$/, '')}/${json.fileName}`\n          : json.fileName);\n\n  return {\n    json: {\n      ...json,\n      inputPath: filePath,\n    },\n    binary, // เผื่ออยากพก binary ต่อไปด้วย\n  };\n});\n"
      },
      "typeVersion": 2
    },
    {
      "id": "e4d8e648-e26b-4707-adf5-525e88a09ec8",
      "name": "拆分 PDF 页面",
      "type": "n8n-nodes-base.executeCommand",
      "position": [
        1184,
        416
      ],
      "parameters": {
        "command": "=sh -lc '\nset -e\nIN=\"{{ $json.inputPath }}\"\nOUT=\"/doc/tmp/pages\"\nrm -rf \"$OUT\" && mkdir -p \"$OUT\"\n\n# นับจำนวนหน้า (ต้องมี poppler-utils)\nPAGES=$(pdfinfo \"$IN\" 2>/dev/null | awk -F\": *\" \"/^Pages/{print \\$2}\")\n[ -n \"$PAGES\" ] || { echo \"Cannot detect page count for: $IN\" >&2; exit 1; }\n\npdfseparate -f 1 -l \"$PAGES\" \"$IN\" \"$OUT/page_%d.pdf\"\nls -1 \"$OUT\"/page_*.pdf\n'\n",
        "executeOnce": false
      },
      "typeVersion": 1
    },
    {
      "id": "57bb55c3-07e2-4ace-9678-8fafd5b420c6",
      "name": "读取拆分后的 PDF 页面",
      "type": "n8n-nodes-base.readWriteFile",
      "position": [
        1360,
        416
      ],
      "parameters": {
        "options": {},
        "fileSelector": "/doc/tmp/pages/*.pdf"
      },
      "typeVersion": 1
    },
    {
      "id": "33589105-6b44-4d83-bd68-50b381d681e1",
      "name": "聚合",
      "type": "n8n-nodes-base.aggregate",
      "position": [
        944,
        640
      ],
      "parameters": {
        "options": {},
        "fieldsToAggregate": {
          "fieldToAggregate": [
            {
              "fieldToAggregate": "stdout"
            }
          ]
        }
      },
      "typeVersion": 1
    },
    {
      "id": "a65d7907-3744-4629-a8fa-37c36b31e3f3",
      "name": "遍历项目",
      "type": "n8n-nodes-base.splitInBatches",
      "position": [
        672,
        400
      ],
      "parameters": {
        "options": {}
      },
      "typeVersion": 3
    },
    {
      "id": "eadb2b7e-4ad3-4a46-8ecd-da1702470f1a",
      "name": "解析 JSON 为表格格式",
      "type": "n8n-nodes-base.code",
      "position": [
        752,
        864
      ],
      "parameters": {
        "mode": "runOnceForEachItem",
        "jsCode": "const raw = $json[\"text\"];\n\n// 1. ลบ ```json และ ``` ที่ LLM อาจใส่มา\nconst cleaned = raw.replace(/```json\\n?|```/g, \"\").trim();\n\nlet parsed;\ntry {\n  // 2. แปลงเป็น object\n  parsed = JSON.parse(cleaned);\n} catch (err) {\n  throw new Error(\"JSON parsing failed: \" + err.message + \"\\n\\nRaw text:\\n\" + cleaned);\n}\n\n// 3. หาก contact เป็น object แยก field ออกมา\nconst contact = parsed.contact || {};\n\nreturn {\n  book_id: parsed.book_id || \"\",\n  date: parsed.date || \"\",\n  subject: parsed.subject || \"\",\n  to: parsed.to || \"\",\n  attach: parsed.attach || \"\",\n  detail: parsed.detail || \"\",\n  signed_by: parsed.signed_by || \"\",\n  signed_by2: parsed.signed_by2 || \"\",\n  contact_phone: contact.phone || \"\",\n  contact_email: contact.email || \"\",\n  contact_fax: contact.fax || \"\",\n  download_url: parsed.download_url || \"\"\n};\n"
      },
      "typeVersion": 2
    },
    {
      "id": "8f2fb4c3-bbb3-49aa-8796-f85219241f47",
      "name": "使用 LLM 将文本结构化为 JSON",
      "type": "@n8n/n8n-nodes-langchain.chainLlm",
      "position": [
        1136,
        640
      ],
      "parameters": {
        "text": "=ข้อความด้านล่างนี้เป็นเนื้อหา OCR จากหนังสือราชการ กรุณาแยกหัวข้อสำคัญออกมาในรูปแบบ JSON:\n\n1. book_id: เลขที่หนังสือ\n2. date: วันที่ในเอกสาร\n3. subject: หัวเรื่อง\n4. to: เรียน\n5. attach: สิ่งที่ส่งมาด้วย\n6. detail: เนื้อความในหนังสือ\n7. signed_by: ผู้ลงนาม\n8. signed_by2: ตำแหน่งผู้ลงนาม\n9. contact: ช่องทางติดต่อ (เช่น เบอร์โทร อีเมล)\n10. download_url: ลิงก์สำหรับดาวน์โหลด (ถ้ามี)\n\nOCR_TEXT:\n\"\"\"\n{{ $json[\"stdout\"] }}\n\"\"\"",
        "promptType": "define"
      },
      "typeVersion": 1.6
    },
    {
      "id": "205beaeb-8532-4768-a2db-e4ab773a2fc8",
      "name": "OpenRouter 聊天模型",
      "type": "@n8n/n8n-nodes-langchain.lmChatOpenRouter",
      "position": [
        1136,
        784
      ],
      "parameters": {
        "model": "openai/gpt-4o-mini",
        "options": {}
      },
      "credentials": {
        "openRouterApi": {
          "id": "JVYq1Px3c4AZWpkk",
          "name": "OpenRouter account"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "aa0f180d-9ae8-4a09-ad09-d409d92b4c6b",
      "name": "清理临时文件",
      "type": "n8n-nodes-base.executeCommand",
      "position": [
        1248,
        864
      ],
      "parameters": {
        "command": "=sh -lc '\nset -e\n\n# ลบโฟลเดอร์ temp ที่ใช้เก็บไฟล์ที่ split\nrm -rf /doc/tmp/pages\n\n# ย้ายไฟล์ต้นฉบับ (input PDF) ไปเก็บไว้ใน Completed\nmkdir -p /doc/multipage/Completed\n\n# ใช้ $json.fileName ของ node ต้นทาง (Load PDFs from doc Folder)\n# ถ้า fileName เป็น path อยู่แล้ว ให้ basename ออกมาก่อน\nsrc=\"/doc/multipage/{{ $('Load PDFs from doc Folder').item.json.fileName }}\"\ndst=\"/doc/multipage/Completed/{{ $('Load PDFs from doc Folder').item.json.fileName }}\"\n\nmv \"$src\" \"$dst\"\necho \"Moved $src → $dst\"\n'\n"
      },
      "typeVersion": 1
    },
    {
      "id": "32acc4b4-3bb3-49d7-8a50-984e02f31077",
      "name": "保存到 Google Sheet",
      "type": "n8n-nodes-base.googleSheets",
      "position": [
        992,
        864
      ],
      "parameters": {
        "columns": {
          "value": {
            "to": "={{ $json.to }}",
            "date": "={{ $json.date }}",
            "attach": "={{ $json.attach }}",
            "detail": "={{ $json.detail }}",
            "book_id": "={{ $json.book_id }}",
            "subject": "={{ $json.subject }}",
            "signed_by": "={{ $json.signed_by }}",
            "signed_by2": "={{ $json.signed_by2 }}",
            "contact_fax": "={{ $json.contact_fax }}",
            "download_url": "={{ $json.download_url }}",
            "contact_email": "={{ $json.contact_email }}",
            "contact_phone": "={{ $json.contact_phone }}"
          },
          "schema": [
            {
              "id": "book_id",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "book_id",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "date",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "date",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "subject",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "subject",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "to",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "to",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "attach",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "attach",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "detail",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "detail",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "signed_by",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "signed_by",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "signed_by2",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "signed_by2",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "contact_phone",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "contact_phone",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "contact_email",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "contact_email",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "contact_fax",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "contact_fax",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "download_url",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "download_url",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            }
          ],
          "mappingMode": "defineBelow",
          "matchingColumns": [
            "book_id"
          ],
          "attemptToConvertTypes": false,
          "convertFieldsToString": false
        },
        "options": {},
        "operation": "append",
        "sheetName": {
          "__rl": true,
          "mode": "list",
          "value": "gid=0",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/1h70cJyLj5i2j0Ag5kqp93ccZjjhJnqpLmz-ee5r4brU/edit#gid=0",
          "cachedResultName": "Sheet1"
        },
        "documentId": {
          "__rl": true,
          "mode": "list",
          "value": "1h70cJyLj5i2j0Ag5kqp93ccZjjhJnqpLmz-ee5r4brU",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/1h70cJyLj5i2j0Ag5kqp93ccZjjhJnqpLmz-ee5r4brU/edit?usp=drivesdk",
          "cachedResultName": "TyphoonOCR_Extracted_Data"
        }
      },
      "credentials": {
        "googleSheetsOAuth2Api": {
          "id": "ot4XqoGzFPMsgA5P",
          "name": "Google Sheets account"
        }
      },
      "typeVersion": 4.5
    }
  ],
  "active": false,
  "pinData": {},
  "settings": {
    "executionOrder": "v1"
  },
  "versionId": "7166e717-0700-4f73-872d-a771d37b1e65",
  "connections": {
    "Aggregate": {
      "main": [
        [
          {
            "node": "Structure Text to JSON with LLM",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Set_Input_Path": {
      "main": [
        [
          {
            "node": "Split PDF page",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Split PDF page": {
      "main": [
        [
          {
            "node": "Read Splite PDF Page",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Clear tmp files": {
      "main": [
        [
          {
            "node": "Loop Over Items",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Loop Over Items": {
      "main": [
        [],
        [
          {
            "node": "Read/Write Files from Disk",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Read Splite PDF Page": {
      "main": [
        [
          {
            "node": "Extract Text with Typhoon OCR",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Save to Google Sheet": {
      "main": [
        [
          {
            "node": "Clear tmp files",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "OpenRouter Chat Model": {
      "ai_languageModel": [
        [
          {
            "node": "Structure Text to JSON with LLM",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "Load PDFs from doc Folder": {
      "main": [
        [
          {
            "node": "Loop Over Items",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Parse JSON to Sheet Format": {
      "main": [
        [
          {
            "node": "Save to Google Sheet",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Read/Write Files from Disk": {
      "main": [
        [
          {
            "node": "Set_Input_Path",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Extract Text with Typhoon OCR": {
      "main": [
        [
          {
            "node": "Aggregate",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Structure Text to JSON with LLM": {
      "main": [
        [
          {
            "node": "Parse JSON to Sheet Format",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "When clicking ‘Test workflow’": {
      "main": [
        [
          {
            "node": "Load PDFs from doc Folder",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}
常见问题

如何使用这个工作流?

复制上方的 JSON 配置代码,在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」,粘贴配置后根据需要修改凭证设置即可。

这个工作流适合什么场景?

中级 - AI 摘要总结, 多模态 AI

需要付费吗?

本工作流完全免费,您可以直接导入使用。但请注意,工作流中使用的第三方服务(如 OpenAI API)可能需要您自行付费。

工作流信息
难度等级
中级
节点数量15
分类2
节点类型10
难度说明

适合有一定经验的用户,包含 6-15 个节点的中等复杂度工作流

作者
Jaruphat J.

Jaruphat J.

@jaruphatj

Project Manager who passionate about Automation & AI and continuously explore innovative ways to improve business processes through intelligent workflow automation. Let’s connect and automate the future!

外部链接
在 n8n.io 查看

分享此工作流