Amazon製品データをScrape.do、GPT-4、Google Sheetsで抽出

中級

これはMarket Research, AI Summarization分野の自動化ワークフローで、11個のノードを含みます。主にHtml, SplitOut, HttpRequest, GoogleSheets, ManualTriggerなどのノードを使用。 Scrape.do、GPT-4、Google Sheets を使用して Amazon 製品データを抽出する

前提条件
  • ターゲットAPIの認証情報が必要な場合あり
  • Google Sheets API認証情報
  • OpenAI API Key

カテゴリー

ワークフロープレビュー
ノード接続関係を可視化、ズームとパンをサポート
ワークフローをエクスポート
以下のJSON設定をn8nにインポートして、このワークフローを使用できます
{
  "meta": {
    "instanceId": "cb5caf45c9475b848c7e83772505bb02340e165acdd8de77e25011192306257c",
    "templateCredsSetupCompleted": true
  },
  "nodes": [
    {
      "id": "c499851d-09d6-4a25-812e-c1d3efa3f0a8",
      "name": "ワークフローテストクリック時",
      "type": "n8n-nodes-base.manualTrigger",
      "position": [
        -1648,
        272
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "80562cea-7422-44ec-9886-1928bb8f81f1",
      "name": "OpenAI Chat Model",
      "type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
      "position": [
        -624,
        336
      ],
      "parameters": {
        "model": {
          "__rl": true,
          "mode": "list",
          "value": "gpt-4o-mini"
        },
        "options": {
          "maxTokens": 500,
          "temperature": 0,
          "responseFormat": "json_object"
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "da77ba7c-a40c-4d79-91f1-fd485d101f76",
      "name": "Structured Output Parser",
      "type": "@n8n/n8n-nodes-langchain.outputParserStructured",
      "position": [
        -288,
        304
      ],
      "parameters": {
        "schemaType": "manual",
        "inputSchema": "{\n  \"type\": \"object\",\n  \"properties\": {\n    \"name\": { \n      \"type\": \"string\", \n      \"description\": \"Product name/title\" \n    },\n    \"description\": { \n      \"type\": \"string\", \n      \"description\": \"Product description or key features\" \n    },\n    \"rating\": { \n      \"type\": [\"number\", \"null\"], \n      \"description\": \"Average rating (e.g., 4.5)\" \n    },\n    \"reviews\": { \n      \"type\": [\"integer\", \"null\"], \n      \"description\": \"Number of reviews\" \n    },\n    \"price\": { \n      \"type\": [\"string\", \"null\"], \n      \"description\": \"Product price with currency\" \n    }\n  },\n  \"required\": [\"name\"]\n}"
      },
      "typeVersion": 1.3
    },
    {
      "id": "daf15a88-7d2f-4542-b3f0-c3658960cb22",
      "name": "1. Google Sheetsから商品URLを取得",
      "type": "n8n-nodes-base.googleSheets",
      "position": [
        -1392,
        272
      ],
      "parameters": {
        "options": {},
        "sheetName": {
          "__rl": true,
          "mode": "list",
          "value": "gid=0",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/19Allmozbygw-QogPeq2TH9m9D57FCn4MTu3zmJukg1A/edit#gid=0",
          "cachedResultName": "Sheet1"
        },
        "documentId": {
          "__rl": true,
          "mode": "list",
          "value": "19Allmozbygw-QogPeq2TH9m9D57FCn4MTu3zmJukg1A",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/19Allmozbygw-QogPeq2TH9m9D57FCn4MTu3zmJukg1A/edit?usp=drivesdk",
          "cachedResultName": "Amazon Product List"
        }
      },
      "credentials": {
        "googleSheetsOAuth2Api": {
          "id": "df8r9D022KIAOHTC",
          "name": "Google Sheets account"
        }
      },
      "typeVersion": 4.7
    },
    {
      "id": "41e494b5-f3e9-48dd-8c7b-0096790df02b",
      "name": "2. 各URLをループ処理",
      "type": "n8n-nodes-base.splitInBatches",
      "position": [
        -1168,
        272
      ],
      "parameters": {
        "options": {}
      },
      "typeVersion": 3
    },
    {
      "id": "c588ede7-1689-492d-a863-949ade5ffe33",
      "name": "3. 商品ページHTMLをスクレイピング",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -960,
        128
      ],
      "parameters": {
        "url": "=https://api.scrape.do/?token={{$vars.SCRAPEDO_TOKEN}}&url={{ encodeURIComponent($json.url) }}&geoCode=us&render=false",
        "options": {
          "timeout": 60000,
          "response": {
            "response": {}
          }
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "818b6ea9-b259-4d67-bfb9-f02366da89c1",
      "name": "4. 生データ要素を抽出",
      "type": "n8n-nodes-base.html",
      "position": [
        -752,
        128
      ],
      "parameters": {
        "options": {},
        "operation": "extractHtmlContent",
        "extractionValues": {
          "values": [
            {
              "key": "productTitle",
              "cssSelector": "#productTitle, h1[data-automation-id=\"product-title\"], .product-title"
            },
            {
              "key": "price",
              "cssSelector": ".a-price .a-offscreen, .a-price-whole, .a-price-fraction, .priceToPay .a-price .a-offscreen"
            },
            {
              "key": "rating",
              "cssSelector": ".a-icon-alt, [data-hook=\"average-star-rating\"], .a-star-medium .a-icon-alt"
            },
            {
              "key": "reviewCount",
              "cssSelector": "[data-hook=\"total-review-count\"], .a-link-normal[href*=\"customerReviews\"], #acrCustomerReviewText"
            },
            {
              "key": "featureBullets",
              "cssSelector": "#feature-bullets ul, .a-unordered-list.a-nostyle.a-vertical.feature"
            },
            {
              "key": "productDescription",
              "cssSelector": "#productDescription, [data-feature-name=\"productDescription\"], .product-description"
            }
          ]
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "2c491fda-9510-46f9-973a-754587601b7c",
      "name": "5. AIでデータをクリーニング・構造化",
      "type": "@n8n/n8n-nodes-langchain.chainLlm",
      "position": [
        -512,
        128
      ],
      "parameters": {
        "text": "={{ JSON.stringify($json, null, 2) }}",
        "batching": {},
        "messages": {
          "messageValues": [
            {
              "message": "Extract Amazon product data and return ONLY valid JSON.\n\nInput: {{ $json }}\n\nExtract:\n- name: product title from productTitle\n- description: create from featureBullets OR productDescription (max 150 chars, if empty use \"No description\")\n- rating: extract number from rating (e.g. \"4.5 out of 5\" → 4.5, if no rating use null)\n- reviews: extract number from reviewCount (e.g. \"1,234 ratings\" → 1234, if none use null)\n- price: format price from price field (add $ if missing, if no price use null)\n\nReturn exact JSON:\n{\n  \"name\": \"product title here\",\n  \"description\": \"description here or No description\",\n  \"rating\": 4.5,\n  \"reviews\": 1234,\n  \"price\": \"$29.99\"\n}"
            }
          ]
        },
        "promptType": "define",
        "hasOutputParser": true
      },
      "typeVersion": 1.7
    },
    {
      "id": "7796a70c-99a4-4e6e-b18a-5c63adc90871",
      "name": "6. 最終JSON出力をフォーマット",
      "type": "n8n-nodes-base.splitOut",
      "position": [
        -128,
        128
      ],
      "parameters": {
        "include": "selectedOtherFields",
        "options": {},
        "fieldToSplitOut": "output",
        "fieldsToInclude": "output.name, output.description, output.rating, output.reviews, output.price"
      },
      "typeVersion": 1
    },
    {
      "id": "7c3d7a0e-4d59-41e0-bdc8-87005237d8a9",
      "name": "7. 商品データをGoogle Sheetsに保存",
      "type": "n8n-nodes-base.googleSheets",
      "position": [
        80,
        272
      ],
      "parameters": {
        "columns": {
          "value": {},
          "schema": [],
          "mappingMode": "autoMapInputData",
          "matchingColumns": [],
          "attemptToConvertTypes": false,
          "convertFieldsToString": false
        },
        "options": {
          "useAppend": true
        },
        "operation": "append",
        "sheetName": {
          "__rl": true,
          "mode": "list",
          "value": 838351250,
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/19Allmozbygw-QogPeq2TH9m9D57FCn4MTu3zmJukg1A/edit#gid=838351250",
          "cachedResultName": "Sheet2"
        },
        "documentId": {
          "__rl": true,
          "mode": "list",
          "value": "19Allmozbygw-QogPeq2TH9m9D57FCn4MTu3zmJukg1A",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/19Allmozbygw-QogPeq2TH9m9D57FCn4MTu3zmJukg1A/edit?usp=drivesdk",
          "cachedResultName": "Amazon Product List"
        }
      },
      "credentials": {
        "googleSheetsOAuth2Api": {
          "id": "df8r9D022KIAOHTC",
          "name": "Google Sheets account"
        }
      },
      "typeVersion": 4.7
    },
    {
      "id": "1d3b653a-e5d8-4e88-a210-15224c6282c1",
      "name": "付箋メモ1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -2272,
        -144
      ],
      "parameters": {
        "width": 528,
        "height": 896,
        "content": "## Amazon Scraper with Scrape.do API\n\n### Setup Instructions:\n\n1. **Get Scrape.do API Token:**\n   - Sign up at https://scrape.do\n   - Get your API token from the dashboard\n\n2. **Set up Workflow Variables:**\n   - SCRAPEDO_TOKEN: Your Scrape.do API token\n   - WEB_SHEET_ID: Google Sheet document ID\n   - TRACK_SHEET_GID: Sheet name/ID with URLs to scrape\n   - RESULTS_SHEET_GID: Sheet name/ID for results\n\n3. **Google Sheets Setup:**\n   - Create a Google Sheet with two tabs\n   - First tab: Add Amazon product URLs in a column named 'url'\n   - Second tab: Will store results (name, description, rating, reviews, price)\n   - Share the sheet with your service account email\n\n4. **Credentials:**\n   - Add Google Sheets OAuth2 credentials\n   - Add OpenRouter API credentials (for GPT-4)\n\n### Features:\n- Uses Scrape.do to bypass Amazon's anti-bot protection\n- Extracts product data using pattern matching and AI\n- Handles pagination with Split In Batches\n- Saves structured data to Google Sheets\n\n### Scrape.do Advantages:\n- No need for complex proxy rotation\n- Automatic CAPTCHA handling\n- Better success rate than BrightData\n- Simple API integration"
      },
      "typeVersion": 1
    }
  ],
  "pinData": {},
  "connections": {
    "80562cea-7422-44ec-9886-1928bb8f81f1": {
      "ai_languageModel": [
        [
          {
            "node": "2c491fda-9510-46f9-973a-754587601b7c",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "41e494b5-f3e9-48dd-8c7b-0096790df02b": {
      "main": [
        [],
        [
          {
            "node": "c588ede7-1689-492d-a863-949ade5ffe33",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "da77ba7c-a40c-4d79-91f1-fd485d101f76": {
      "ai_outputParser": [
        [
          {
            "node": "2c491fda-9510-46f9-973a-754587601b7c",
            "type": "ai_outputParser",
            "index": 0
          }
        ]
      ]
    },
    "c588ede7-1689-492d-a863-949ade5ffe33": {
      "main": [
        [
          {
            "node": "818b6ea9-b259-4d67-bfb9-f02366da89c1",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "7796a70c-99a4-4e6e-b18a-5c63adc90871": {
      "main": [
        [
          {
            "node": "7c3d7a0e-4d59-41e0-bdc8-87005237d8a9",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "c499851d-09d6-4a25-812e-c1d3efa3f0a8": {
      "main": [
        [
          {
            "node": "daf15a88-7d2f-4542-b3f0-c3658960cb22",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "818b6ea9-b259-4d67-bfb9-f02366da89c1": {
      "main": [
        [
          {
            "node": "2c491fda-9510-46f9-973a-754587601b7c",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "2c491fda-9510-46f9-973a-754587601b7c": {
      "main": [
        [
          {
            "node": "7796a70c-99a4-4e6e-b18a-5c63adc90871",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "7c3d7a0e-4d59-41e0-bdc8-87005237d8a9": {
      "main": [
        [
          {
            "node": "41e494b5-f3e9-48dd-8c7b-0096790df02b",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "daf15a88-7d2f-4542-b3f0-c3658960cb22": {
      "main": [
        [
          {
            "node": "41e494b5-f3e9-48dd-8c7b-0096790df02b",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}
よくある質問

このワークフローの使い方は?

上記のJSON設定コードをコピーし、n8nインスタンスで新しいワークフローを作成して「JSONからインポート」を選択、設定を貼り付けて認証情報を必要に応じて変更してください。

このワークフローはどんな場面に適していますか?

中級 - 市場調査, AI要約

有料ですか?

このワークフローは完全無料です。ただし、ワークフローで使用するサードパーティサービス(OpenAI APIなど)は別途料金が発生する場合があります。

ワークフロー情報
難易度
中級
ノード数11
カテゴリー2
ノードタイプ10
難易度説明

経験者向け、6-15ノードの中程度の複雑さのワークフロー

作成者

Hello, I'm Onur I've been working as a freelance software developer for about four years. In addition, I develop my own projects. For some time, I have been improving myself and providing various services related to AI and AI workflows. Both by writing low code and code. If you have any questions, don't hesitate to contact me.

外部リンク
n8n.ioで表示

このワークフローを共有

カテゴリー

カテゴリー: 34