8
n8n 中文网amn8n.com

从Google Drive提取并格式化PDF数据

中级

这是一个Content Creation, Multimodal AI领域的自动化工作流,包含 15 个节点。主要使用 Set, Code, GoogleDrive, ManualTrigger, ExtractFromFile 等节点。 从Google Drive提取并格式化PDF数据

前置要求
  • Google Drive API 凭证
工作流预览
可视化展示节点连接关系,支持缩放和平移
导出工作流
复制以下 JSON 配置到 n8n 导入,即可使用此工作流
{
  "meta": {
    "instanceId": "cd9bb7894b11bab249a60976239056d06e4831b51d7348f6790a85241c21fc56",
    "templateCredsSetupCompleted": true
  },
  "nodes": [
    {
      "id": "4e195179-a7df-4daa-a734-4ddb75242d02",
      "name": "完成!",
      "type": "n8n-nodes-base.noOp",
      "position": [
        688,
        -32
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "2c1bacd1-864c-4da9-a3c8-fc6646a1935a",
      "name": "开始",
      "type": "n8n-nodes-base.manualTrigger",
      "position": [
        -480,
        0
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "d3a06fc0-6f82-4d6a-8cda-6694432830d8",
      "name": "仅获取PDF数据",
      "type": "n8n-nodes-base.set",
      "position": [
        288,
        0
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "ccd95b23-ca0d-4e0a-a2af-c0e4fc9aae4e",
              "name": "text",
              "type": "string",
              "value": "={{ $json.text }}"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "2e7a429c-13ae-4ea9-80c5-5b482489e78b",
      "name": "获取PDF文件/文件",
      "type": "n8n-nodes-base.googleDrive",
      "position": [
        -304,
        0
      ],
      "parameters": {
        "filter": {
          "folderId": {
            "__rl": true,
            "mode": "list",
            "value": ""
          },
          "whatToSearch": "files"
        },
        "options": {
          "fields": [
            "id",
            "name"
          ]
        },
        "resource": "fileFolder",
        "returnAll": true,
        "queryString": "*.pdf"
      },
      "credentials": {
        "googleDriveOAuth2Api": {
          "id": "TB3MDL9X1SLIEPS5",
          "name": "Template"
        }
      },
      "typeVersion": 3
    },
    {
      "id": "0ce127fc-8604-492b-96b5-8fff0ed1f6f6",
      "name": "下载检索文件/文件",
      "type": "n8n-nodes-base.googleDrive",
      "position": [
        -112,
        0
      ],
      "parameters": {
        "fileId": {
          "__rl": true,
          "mode": "id",
          "value": "={{ $json.id }}"
        },
        "options": {
          "googleFileConversion": {
            "conversion": {
              "docsToFormat": "text/plain"
            }
          }
        },
        "operation": "download"
      },
      "credentials": {
        "googleDriveOAuth2Api": {
          "id": "TB3MDL9X1SLIEPS5",
          "name": "Template"
        }
      },
      "typeVersion": 3
    },
    {
      "id": "0e761f9a-2d40-4787-8751-73e280beb452",
      "name": "提取文件/文件的数据",
      "type": "n8n-nodes-base.extractFromFile",
      "position": [
        80,
        0
      ],
      "parameters": {
        "options": {},
        "operation": "pdf"
      },
      "typeVersion": 1
    },
    {
      "id": "398f6a89-2792-4e50-9da4-9444455cc2ae",
      "name": "数据解析器与清理器",
      "type": "n8n-nodes-base.code",
      "position": [
        480,
        0
      ],
      "parameters": {
        "jsCode": "/**\n * This function removes all newline characters (\"\\n\") from a given string.\n * In the context of your n8n workflow, you can use this in a \"Code\" node\n * to clean up the PDF text content before passing it to the AI Agent.\n *\n * @param {string} text The input string that may contain newline characters.\n * @returns {string} The processed string with all newline characters removed.\n */\nfunction removeNewlines(text) {\n  if (typeof text !== 'string') {\n    // Return an empty string or handle the error as appropriate for your workflow\n    console.error(\"Input must be a string.\");\n    return \"\";\n  }\n  // The .replace() method with a regular expression /g ensures all occurrences are replaced.\n  return text.replace(/\\n/g, ' ');\n}\n\n// Example usage based on the text you provided:\n// In your n8n \"Code\" node, you would get the input from the previous node.\n// For example: const a_variable_from_another_node = \"your text here\";\nconst inputText = $input.first().json.text;\nconst cleanedText = removeNewlines(inputText);\nconsole.log(\"Original Text:\");\nconsole.log(inputText);\nconsole.log(\"\\\\n------------------\\\\n\");\nconsole.log(\"Cleaned Text:\");\nconsole.log(cleanedText);\n\n// To use this in n8n, you'd typically return the result like this:\nreturn { cleanedText: cleanedText };\n"
      },
      "typeVersion": 2
    },
    {
      "id": "91f0e401-6ac0-496d-b99f-9c5056105f74",
      "name": "便签2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        656,
        128
      ],
      "parameters": {
        "width": 560,
        "height": 256,
        "content": "## 🙏 **衷心感谢您试用此工作流**"
      },
      "typeVersion": 1
    },
    {
      "id": "5464e441-7f31-4a24-9fa1-afc18dd664a6",
      "name": "便签3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        656,
        416
      ],
      "parameters": {
        "width": 560,
        "height": 448,
        "content": "## 🔍 **故障排除**"
      },
      "typeVersion": 1
    },
    {
      "id": "85ea9ac7-1668-4990-9f06-8a11f39013a2",
      "name": "便签4",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -512,
        176
      ],
      "parameters": {
        "width": 1136,
        "height": 688,
        "content": "## 🛠️ **分步设置指南**"
      },
      "typeVersion": 1
    },
    {
      "id": "0ccb06b5-ca65-49cf-945d-309fccb6d4a1",
      "name": "便签5",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        64,
        288
      ],
      "parameters": {
        "width": 544,
        "height": 560,
        "content": ""
      },
      "typeVersion": 1
    },
    {
      "id": "56d9b5fe-5ae6-4d9b-b298-64e4884a5939",
      "name": "### 需要帮助?",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -496,
        288
      ],
      "parameters": {
        "width": 544,
        "height": 560,
        "content": "### **1. 准备您的Google Drive** 📂"
      },
      "typeVersion": 1
    },
    {
      "id": "d85b90d7-2f7d-41f4-8c94-35b5a4c72487",
      "name": "## 试试看!",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1104,
        48
      ],
      "parameters": {
        "width": 560,
        "height": 192,
        "content": "## 🔧 **自定义选项**"
      },
      "typeVersion": 1
    },
    {
      "id": "ab5b72ca-c58d-499f-b770-90fe19086dfc",
      "name": "GET 模型",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1104,
        272
      ],
      "parameters": {
        "width": 560,
        "height": 592,
        "content": "## 📋 **工作流流程说明**"
      },
      "typeVersion": 1
    },
    {
      "id": "6d0970ab-067d-4975-842c-398fda000f40",
      "name": "## 1. 创建新的自定义 OpenAI 凭据",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -448,
        -400
      ],
      "parameters": {
        "width": 960,
        "height": 352,
        "content": "## 📁 **从Google Drive提取和清理PDF数据**"
      },
      "typeVersion": 1
    }
  ],
  "pinData": {},
  "connections": {
    "Start": {
      "main": [
        [
          {
            "node": "Get PDF Files/File",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Get PDF Data Only": {
      "main": [
        [
          {
            "node": "Data Parser & Cleaner",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Get PDF Files/File": {
      "main": [
        [
          {
            "node": "Download Retrieval Files/File",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Data Parser & Cleaner": {
      "main": [
        [
          {
            "node": "Done !",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Extract Files/File's Data": {
      "main": [
        [
          {
            "node": "Get PDF Data Only",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Download Retrieval Files/File": {
      "main": [
        [
          {
            "node": "Extract Files/File's Data",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}
常见问题

如何使用这个工作流?

复制上方的 JSON 配置代码,在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」,粘贴配置后根据需要修改凭证设置即可。

这个工作流适合什么场景?

中级 - 内容创作, 多模态 AI

需要付费吗?

本工作流完全免费,您可以直接导入使用。但请注意,工作流中使用的第三方服务(如 OpenAI API)可能需要您自行付费。

工作流信息
难度等级
中级
节点数量15
分类2
节点类型7
难度说明

适合有一定经验的用户,包含 6-15 个节点的中等复杂度工作流

外部链接
在 n8n.io 查看

分享此工作流