Google Drive에서 PDF 데이터를 추출하고 포맷

Name: Google Drive에서 PDF 데이터를 추출하고 포맷
Rating: 4.5 (10 reviews)
Author: EoCi - Mr.Eo

중급

이것은Content Creation, Multimodal AI분야의자동화 워크플로우로, 15개의 노드를 포함합니다.주로 Set, Code, GoogleDrive, ManualTrigger, ExtractFromFile 등의 노드를 사용하며. Google Drive에서 PDF 데이터 추출 및 포맷

사전 요구사항

•Google Drive API 인증 정보

사용된 노드 (15)

카테고리

콘텐츠 제작

멀티모달 AI

워크플로우 미리보기

노드 연결 관계를 시각적으로 표시하며, 확대/축소 및 이동을 지원합니다

완료 !

시작

PDF 데이터만 가져오기

PDF 파일/파일 가져오기

검색 파일/파일 다운로드

파일/파일 데이터 추출

데이터 파서 및 클리너

React Flow

워크플로우 내보내기

다음 JSON 구성을 복사하여 n8n에 가져오면 이 워크플로우를 사용할 수 있습니다

{
  "meta": {
    "instanceId": "cd9bb7894b11bab249a60976239056d06e4831b51d7348f6790a85241c21fc56",
    "templateCredsSetupCompleted": true
  },
  "nodes": [
    {
      "id": "4e195179-a7df-4daa-a734-4ddb75242d02",
      "name": "완료 !",
      "type": "n8n-nodes-base.noOp",
      "position": [
        688,
        -32
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "2c1bacd1-864c-4da9-a3c8-fc6646a1935a",
      "name": "시작",
      "type": "n8n-nodes-base.manualTrigger",
      "position": [
        -480,
        0
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "d3a06fc0-6f82-4d6a-8cda-6694432830d8",
      "name": "PDF 데이터만 가져오기",
      "type": "n8n-nodes-base.set",
      "position": [
        288,
        0
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "ccd95b23-ca0d-4e0a-a2af-c0e4fc9aae4e",
              "name": "text",
              "type": "string",
              "value": "={{ $json.text }}"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "2e7a429c-13ae-4ea9-80c5-5b482489e78b",
      "name": "PDF 파일/파일 가져오기",
      "type": "n8n-nodes-base.googleDrive",
      "position": [
        -304,
        0
      ],
      "parameters": {
        "filter": {
          "folderId": {
            "__rl": true,
            "mode": "list",
            "value": ""
          },
          "whatToSearch": "files"
        },
        "options": {
          "fields": [
            "id",
            "name"
          ]
        },
        "resource": "fileFolder",
        "returnAll": true,
        "queryString": "*.pdf"
      },
      "credentials": {
        "googleDriveOAuth2Api": {
          "id": "TB3MDL9X1SLIEPS5",
          "name": "Template"
        }
      },
      "typeVersion": 3
    },
    {
      "id": "0ce127fc-8604-492b-96b5-8fff0ed1f6f6",
      "name": "검색 파일/파일 다운로드",
      "type": "n8n-nodes-base.googleDrive",
      "position": [
        -112,
        0
      ],
      "parameters": {
        "fileId": {
          "__rl": true,
          "mode": "id",
          "value": "={{ $json.id }}"
        },
        "options": {
          "googleFileConversion": {
            "conversion": {
              "docsToFormat": "text/plain"
            }
          }
        },
        "operation": "download"
      },
      "credentials": {
        "googleDriveOAuth2Api": {
          "id": "TB3MDL9X1SLIEPS5",
          "name": "Template"
        }
      },
      "typeVersion": 3
    },
    {
      "id": "0e761f9a-2d40-4787-8751-73e280beb452",
      "name": "파일/파일 데이터 추출",
      "type": "n8n-nodes-base.extractFromFile",
      "position": [
        80,
        0
      ],
      "parameters": {
        "options": {},
        "operation": "pdf"
      },
      "typeVersion": 1
    },
    {
      "id": "398f6a89-2792-4e50-9da4-9444455cc2ae",
      "name": "데이터 파서 및 클리너",
      "type": "n8n-nodes-base.code",
      "position": [
        480,
        0
      ],
      "parameters": {
        "jsCode": "/**\n * This function removes all newline characters (\"\\n\") from a given string.\n * In the context of your n8n workflow, you can use this in a \"Code\" node\n * to clean up the PDF text content before passing it to the AI Agent.\n *\n * @param {string} text The input string that may contain newline characters.\n * @returns {string} The processed string with all newline characters removed.\n */\nfunction removeNewlines(text) {\n  if (typeof text !== 'string') {\n    // Return an empty string or handle the error as appropriate for your workflow\n    console.error(\"Input must be a string.\");\n    return \"\";\n  }\n  // The .replace() method with a regular expression /g ensures all occurrences are replaced.\n  return text.replace(/\\n/g, ' ');\n}\n\n// Example usage based on the text you provided:\n// In your n8n \"Code\" node, you would get the input from the previous node.\n// For example: const a_variable_from_another_node = \"your text here\";\nconst inputText = $input.first().json.text;\nconst cleanedText = removeNewlines(inputText);\nconsole.log(\"Original Text:\");\nconsole.log(inputText);\nconsole.log(\"\\\\n------------------\\\\n\");\nconsole.log(\"Cleaned Text:\");\nconsole.log(cleanedText);\n\n// To use this in n8n, you'd typically return the result like this:\nreturn { cleanedText: cleanedText };\n"
      },
      "typeVersion": 2
    },
    {
      "id": "91f0e401-6ac0-496d-b99f-9c5056105f74",
      "name": "스티키 노트2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        656,
        128
      ],
      "parameters": {
        "width": 560,
        "height": 256,
        "content": "## 🙏 **A Big Thank You For Trying This Workflow**\nYour time and trust mean a lot. I truly appreciate you giving this workflow a try.\n\nFeedback is the key to making this project better and more effective. If you have a moment, I'd love to hear your:\n- Suggestions for improvement.\n- Ideas for new features.\n- Requests for other automation workflows.\n\n### Thank you for being part of this journey!"
      },
      "typeVersion": 1
    },
    {
      "id": "5464e441-7f31-4a24-9fa1-afc18dd664a6",
      "name": "스티키 노트3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        656,
        416
      ],
      "parameters": {
        "width": 560,
        "height": 448,
        "content": "## 🔍 **TROUBLESHOOTING**\nRunning into issues? Here are some common fixes.\n- **Common Issues:**\n  - **\"Workflow finds no files\":**\n    1. Double-check that the Folder in the Google Drive node is correct.\n    2. Ensure your n8n Google credential has permission to view files in that folder.\n    3. Verify the files actually have a .pdf extension.\n\n- **\"Code node throws an error\":**\n  - Open the Code node and check the browser's developer console for JavaScript syntax errors. Make sure the input path to your text (items[0].json.text), matches what the Extract From File node is providing.\n\n- **Debug Checklist:**\n[ ] Are your Google Drive credentials valid? Try reconnecting them.\n[ ] Did you select the correct folder in the first Google Drive node?\n[ ] Does the output of the Extract From File node show the text you expect?\n[ ] Is the Code node correctly referencing the input data from the previous node?"
      },
      "typeVersion": 1
    },
    {
      "id": "85ea9ac7-1668-4990-9f06-8a11f39013a2",
      "name": "스티키 노트4",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -512,
        176
      ],
      "parameters": {
        "width": 1136,
        "height": 688,
        "content": "## 🛠️ **STEP-BY-STEP SETUP GUIDE**\nFollow these steps to get your workflow running in under 5 minutes.\n---\n---\n---\n---\n---\n---"
      },
      "typeVersion": 1
    },
    {
      "id": "0ccb06b5-ca65-49cf-945d-309fccb6d4a1",
      "name": "스티키 노트5",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        64,
        288
      ],
      "parameters": {
        "width": 544,
        "height": 560,
        "content": "\n### **4. Configure the Download Node (Download Retrieval File)** 📥\nThis node takes the file IDs found in the previous step and downloads the files.\n- **Operation:** Ensure this is set to Download.\n- **File ID:** This field should already be set using an expression {{ $json.id }}. This dynamically pulls the ID of each file found in the search step. You can leave this as is.\n---\n---\n---\n\n### **5. Configure the Code Node (Data Parser & Cleaner)** ✨\nThis is where you define your custom cleaning rules.\n- Open the Code node to view the JavaScript editor.\n- The raw text from the PDF will be available as input (e.g., items[0].json.text).\n- Modify the JavaScript code to perform your desired cleaning. This could be as simple as trimming whitespace or as complex as using regular expressions to find specific data.\n---\n---\n---\n\n### **6. Test Your Workflow!** ✅\nNow let's see it in action.\n1. Click Execute workflow at the top of the canvas.\n2. The workflow will run, and each node should get a green checkmark.\n3. Click on the final node (Done !) and check its Output to see the clean, extracted text."
      },
      "typeVersion": 1
    },
    {
      "id": "56d9b5fe-5ae6-4d9b-b298-64e4884a5939",
      "name": "스티키 노트6",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -496,
        288
      ],
      "parameters": {
        "width": 544,
        "height": 560,
        "content": "### **1. Prepare Your Google Drive** 📂\nBefore you begin, make sure you have a dedicated folder in your Google Drive where you will place the PDFs you want to process.\n- In Google Drive, create a new folder (e.g., \"PDFs for n8n\").\n- Upload one or more PDF files into this folder to use for testing.\n---\n---\n---\n\n### **2. Connect Your Google Drive Credential** 🔗\nYou only need to connect your Google account once.\n- In the n8n canvas, click on the first Google Drive node (Get PDF Files/File).\n- In the \"Credential\" field, click \"Create New\", then fill in \"Client ID\" and \"Client Secret\".\n- After that, click on \"Sign In\" button a window will pop up asking you to sign in with your Google account and grant n8n permission.\n- Once completed, select this same credential for the second Google Drive node (Download Retrieval Files/File).\n---\n---\n---\n\n### **3. Configure the Search Node (Get PDF Files/File)** 🔎\n- This node tells the workflow where to look for your files.\n- **Operation:** Ensure this is set to Search.\n- **Search Query:** Type *.pdf to find all files with a PDF extension.\n- Click **\"Add Filter\"** and select Folder.\n- In the new filter, set the operation to In folder and use the list to select the Google Drive folder you created in Step 1.\n"
      },
      "typeVersion": 1
    },
    {
      "id": "d85b90d7-2f7d-41f4-8c94-35b5a4c72487",
      "name": "스티키 노트7",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1104,
        48
      ],
      "parameters": {
        "width": 560,
        "height": 192,
        "content": "## 🔧 **CUSTOMIZATION OPTIONS**\nMake this workflow your own! Here are a few ideas to get you started:\n- 💾 **Data Fields:** Modify the \"Get PDF Data Only\" node to get more data fields such as \"Number of Pages\", \"metadata\", \"info\".\n\n- ✨ **Parser & Cleaner Rules:** Modify the code of \"Data Parser & Cleaner\" node to get your desired output (formatted result)."
      },
      "typeVersion": 1
    },
    {
      "id": "ab5b72ca-c58d-499f-b770-90fe19086dfc",
      "name": "스티키 노트8",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1104,
        272
      ],
      "parameters": {
        "width": 560,
        "height": 592,
        "content": "## 📋 **WORKFLOW FLOW EXPLAINED**\nThis workflow follows a simple, powerful, four-stage process to turn files into data.\n\n### **1. INPUT STAGE (File Discovery)**\n- The **\"Google Drive\"** node acts as the entry point, searching files/file in a specific folder you defined.\n- It's configured to find all files ending in `.pdf` to ensure only the correct documents are processed.\n\n### 2. **RETRIEVAL STAGE (File Download)**\n- The workflow loops over every file found in the previous stage.\n- A second Google Drive node downloads files/file, preparing for data extraction.\n\n### **3.PROCESSING STAGE (Data Extraction)**\n- The **\"Extract From File\"** node takes the binary data of the downloaded PDF.\n- It reads the document and pulls out all the raw, unstructured text from its pages.\n\n### **4. FORMATTING STAGE (Data Parsing & Cleaning)**\n- The raw text is passed to the Code node.\n- This is where the magic happens! A custom JavaScript script cleans the text by removing unwanted lines, fixing spacing, or even restructuring it into a clean JSON format. The output is ready for use."
      },
      "typeVersion": 1
    },
    {
      "id": "6d0970ab-067d-4975-842c-398fda000f40",
      "name": "스티키 노트9",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -448,
        -400
      ],
      "parameters": {
        "width": 960,
        "height": 352,
        "content": "## 📁 **Extract and Clean PDF Data from Google Drive**\n### ⚡️**Quick Demo**\n- **Input:** \"A Google Drive folder containing multiple PDF files, like invoices or reports.\"\n- **Output:*** \"Clean, extracted text from each PDF, formatted by a custom script into a structured object ready for the next step.\"\n\n### ✅**What You Get**\n- **Automated File Discovery:** Automatically finds and loops through all .pdf files in a specific folder.\n- **Custom Cleaning Engine:** A dedicated Code node gives you full control to clean, parse, and structure the extracted text using JavaScript.\n- **On-Demand Execution:** A manual trigger lets you run the entire process with a single click whenever you need it.\n\n### 🎯**Perfect For**\n- Archiving the contents of articles, documents, reports, etc.\n- Anyone who often works with pdf files."
      },
      "typeVersion": 1
    }
  ],
  "pinData": {},
  "connections": {
    "2c1bacd1-864c-4da9-a3c8-fc6646a1935a": {
      "main": [
        [
          {
            "node": "2e7a429c-13ae-4ea9-80c5-5b482489e78b",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "d3a06fc0-6f82-4d6a-8cda-6694432830d8": {
      "main": [
        [
          {
            "node": "398f6a89-2792-4e50-9da4-9444455cc2ae",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "2e7a429c-13ae-4ea9-80c5-5b482489e78b": {
      "main": [
        [
          {
            "node": "0ce127fc-8604-492b-96b5-8fff0ed1f6f6",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "398f6a89-2792-4e50-9da4-9444455cc2ae": {
      "main": [
        [
          {
            "node": "4e195179-a7df-4daa-a734-4ddb75242d02",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "0e761f9a-2d40-4787-8751-73e280beb452": {
      "main": [
        [
          {
            "node": "d3a06fc0-6f82-4d6a-8cda-6694432830d8",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "0ce127fc-8604-492b-96b5-8fff0ed1f6f6": {
      "main": [
        [
          {
            "node": "0e761f9a-2d40-4787-8751-73e280beb452",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}

자주 묻는 질문

이 워크플로우를 어떻게 사용하나요?

위의 JSON 구성 코드를 복사하여 n8n 인스턴스에서 새 워크플로우를 생성하고 "JSON에서 가져오기"를 선택한 후, 구성을 붙여넣고 필요에 따라 인증 설정을 수정하세요.

이 워크플로우는 어떤 시나리오에 적합한가요?

중급 - 콘텐츠 제작, 멀티모달 AI

유료인가요?

이 워크플로우는 완전히 무료이며 직접 가져와 사용할 수 있습니다. 다만, 워크플로우에서 사용하는 타사 서비스(예: OpenAI API)는 사용자 직접 비용을 지불해야 할 수 있습니다.

Google Drive에서 PDF 데이터를 추출하고 포맷

사용된 노드 (15)

카테고리

이 워크플로우를 어떻게 사용하나요?

이 워크플로우는 어떤 시나리오에 적합한가요?

유료인가요?

관련 워크플로우 추천

카테고리