8
n8n 中文网amn8n.com

使用Scrape.do、GPT-4和Google Sheets提取亚马逊产品数据

中级

这是一个Market Research, AI Summarization领域的自动化工作流,包含 11 个节点。主要使用 Html, SplitOut, HttpRequest, GoogleSheets, ManualTrigger 等节点。 使用Scrape.do、GPT-4和Google Sheets提取亚马逊产品数据

前置要求
  • 可能需要目标 API 的认证凭证
  • Google Sheets API 凭证
  • OpenAI API Key
工作流预览
可视化展示节点连接关系,支持缩放和平移
导出工作流
复制以下 JSON 配置到 n8n 导入,即可使用此工作流
{
  "meta": {
    "instanceId": "cb5caf45c9475b848c7e83772505bb02340e165acdd8de77e25011192306257c",
    "templateCredsSetupCompleted": true
  },
  "nodes": [
    {
      "id": "c499851d-09d6-4a25-812e-c1d3efa3f0a8",
      "name": "When clicking Test workflow",
      "type": "n8n-nodes-base.manualTrigger",
      "position": [
        -1648,
        272
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "80562cea-7422-44ec-9886-1928bb8f81f1",
      "name": "OpenAI 聊天模型",
      "type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
      "position": [
        -624,
        336
      ],
      "parameters": {
        "model": {
          "__rl": true,
          "mode": "list",
          "value": "gpt-4o-mini"
        },
        "options": {
          "maxTokens": 500,
          "temperature": 0,
          "responseFormat": "json_object"
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "da77ba7c-a40c-4d79-91f1-fd485d101f76",
      "name": "结构化输出解析器",
      "type": "@n8n/n8n-nodes-langchain.outputParserStructured",
      "position": [
        -288,
        304
      ],
      "parameters": {
        "schemaType": "manual",
        "inputSchema": "{\n  \"type\": \"object\",\n  \"properties\": {\n    \"name\": { \n      \"type\": \"string\", \n      \"description\": \"Product name/title\" \n    },\n    \"description\": { \n      \"type\": \"string\", \n      \"description\": \"Product description or key features\" \n    },\n    \"rating\": { \n      \"type\": [\"number\", \"null\"], \n      \"description\": \"Average rating (e.g., 4.5)\" \n    },\n    \"reviews\": { \n      \"type\": [\"integer\", \"null\"], \n      \"description\": \"Number of reviews\" \n    },\n    \"price\": { \n      \"type\": [\"string\", \"null\"], \n      \"description\": \"Product price with currency\" \n    }\n  },\n  \"required\": [\"name\"]\n}"
      },
      "typeVersion": 1.3
    },
    {
      "id": "daf15a88-7d2f-4542-b3f0-c3658960cb22",
      "name": "1. Get Product URLs from Google Sheets",
      "type": "n8n-nodes-base.googleSheets",
      "position": [
        -1392,
        272
      ],
      "parameters": {
        "options": {},
        "sheetName": {
          "__rl": true,
          "mode": "list",
          "value": "gid=0",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/19Allmozbygw-QogPeq2TH9m9D57FCn4MTu3zmJukg1A/edit#gid=0",
          "cachedResultName": "Sheet1"
        },
        "documentId": {
          "__rl": true,
          "mode": "list",
          "value": "19Allmozbygw-QogPeq2TH9m9D57FCn4MTu3zmJukg1A",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/19Allmozbygw-QogPeq2TH9m9D57FCn4MTu3zmJukg1A/edit?usp=drivesdk",
          "cachedResultName": "Amazon Product List"
        }
      },
      "credentials": {
        "googleSheetsOAuth2Api": {
          "id": "df8r9D022KIAOHTC",
          "name": "Google Sheets account"
        }
      },
      "typeVersion": 4.7
    },
    {
      "id": "41e494b5-f3e9-48dd-8c7b-0096790df02b",
      "name": "2. Loop Through Each URL",
      "type": "n8n-nodes-base.splitInBatches",
      "position": [
        -1168,
        272
      ],
      "parameters": {
        "options": {}
      },
      "typeVersion": 3
    },
    {
      "id": "c588ede7-1689-492d-a863-949ade5ffe33",
      "name": "3. Scrape Product Page HTML",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -960,
        128
      ],
      "parameters": {
        "url": "=https://api.scrape.do/?token={{$vars.SCRAPEDO_TOKEN}}&url={{ encodeURIComponent($json.url) }}&geoCode=us&render=false",
        "options": {
          "timeout": 60000,
          "response": {
            "response": {}
          }
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "818b6ea9-b259-4d67-bfb9-f02366da89c1",
      "name": "4. Extract Raw Data Elements",
      "type": "n8n-nodes-base.html",
      "position": [
        -752,
        128
      ],
      "parameters": {
        "options": {},
        "operation": "extractHtmlContent",
        "extractionValues": {
          "values": [
            {
              "key": "productTitle",
              "cssSelector": "#productTitle, h1[data-automation-id=\"product-title\"], .product-title"
            },
            {
              "key": "price",
              "cssSelector": ".a-price .a-offscreen, .a-price-whole, .a-price-fraction, .priceToPay .a-price .a-offscreen"
            },
            {
              "key": "rating",
              "cssSelector": ".a-icon-alt, [data-hook=\"average-star-rating\"], .a-star-medium .a-icon-alt"
            },
            {
              "key": "reviewCount",
              "cssSelector": "[data-hook=\"total-review-count\"], .a-link-normal[href*=\"customerReviews\"], #acrCustomerReviewText"
            },
            {
              "key": "featureBullets",
              "cssSelector": "#feature-bullets ul, .a-unordered-list.a-nostyle.a-vertical.feature"
            },
            {
              "key": "productDescription",
              "cssSelector": "#productDescription, [data-feature-name=\"productDescription\"], .product-description"
            }
          ]
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "2c491fda-9510-46f9-973a-754587601b7c",
      "name": "5. Clean & Structure Data with AI",
      "type": "@n8n/n8n-nodes-langchain.chainLlm",
      "position": [
        -512,
        128
      ],
      "parameters": {
        "text": "={{ JSON.stringify($json, null, 2) }}",
        "batching": {},
        "messages": {
          "messageValues": [
            {
              "message": "Extract Amazon product data and return ONLY valid JSON.\n\nInput: {{ $json }}\n\nExtract:\n- name: product title from productTitle\n- description: create from featureBullets OR productDescription (max 150 chars, if empty use \"No description\")\n- rating: extract number from rating (e.g. \"4.5 out of 5\" → 4.5, if no rating use null)\n- reviews: extract number from reviewCount (e.g. \"1,234 ratings\" → 1234, if none use null)\n- price: format price from price field (add $ if missing, if no price use null)\n\nReturn exact JSON:\n{\n  \"name\": \"product title here\",\n  \"description\": \"description here or No description\",\n  \"rating\": 4.5,\n  \"reviews\": 1234,\n  \"price\": \"$29.99\"\n}"
            }
          ]
        },
        "promptType": "define",
        "hasOutputParser": true
      },
      "typeVersion": 1.7
    },
    {
      "id": "7796a70c-99a4-4e6e-b18a-5c63adc90871",
      "name": "6. Format Final JSON Output",
      "type": "n8n-nodes-base.splitOut",
      "position": [
        -128,
        128
      ],
      "parameters": {
        "include": "selectedOtherFields",
        "options": {},
        "fieldToSplitOut": "output",
        "fieldsToInclude": "output.name, output.description, output.rating, output.reviews, output.price"
      },
      "typeVersion": 1
    },
    {
      "id": "7c3d7a0e-4d59-41e0-bdc8-87005237d8a9",
      "name": "7. Save Product Data to Google Sheets",
      "type": "n8n-nodes-base.googleSheets",
      "position": [
        80,
        272
      ],
      "parameters": {
        "columns": {
          "value": {},
          "schema": [],
          "mappingMode": "autoMapInputData",
          "matchingColumns": [],
          "attemptToConvertTypes": false,
          "convertFieldsToString": false
        },
        "options": {
          "useAppend": true
        },
        "operation": "append",
        "sheetName": {
          "__rl": true,
          "mode": "list",
          "value": 838351250,
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/19Allmozbygw-QogPeq2TH9m9D57FCn4MTu3zmJukg1A/edit#gid=838351250",
          "cachedResultName": "Sheet2"
        },
        "documentId": {
          "__rl": true,
          "mode": "list",
          "value": "19Allmozbygw-QogPeq2TH9m9D57FCn4MTu3zmJukg1A",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/19Allmozbygw-QogPeq2TH9m9D57FCn4MTu3zmJukg1A/edit?usp=drivesdk",
          "cachedResultName": "Amazon Product List"
        }
      },
      "credentials": {
        "googleSheetsOAuth2Api": {
          "id": "df8r9D022KIAOHTC",
          "name": "Google Sheets account"
        }
      },
      "typeVersion": 4.7
    },
    {
      "id": "1d3b653a-e5d8-4e88-a210-15224c6282c1",
      "name": "便签1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -2272,
        -144
      ],
      "parameters": {
        "width": 528,
        "height": 896,
        "content": "## Amazon Scraper with Scrape.do API\n\n### Setup Instructions:\n\n1. **Get Scrape.do API Token:**\n   - Sign up at https://scrape.do\n   - Get your API token from the dashboard\n\n2. **Set up Workflow Variables:**\n   - SCRAPEDO_TOKEN: Your Scrape.do API token\n   - WEB_SHEET_ID: Google Sheet document ID\n   - TRACK_SHEET_GID: Sheet name/ID with URLs to scrape\n   - RESULTS_SHEET_GID: Sheet name/ID for results\n\n3. **Google Sheets Setup:**\n   - Create a Google Sheet with two tabs\n   - First tab: Add Amazon product URLs in a column named 'url'\n   - Second tab: Will store results (name, description, rating, reviews, price)\n   - Share the sheet with your service account email\n\n4. **Credentials:**\n   - Add Google Sheets OAuth2 credentials\n   - Add OpenRouter API credentials (for GPT-4)\n\n### Features:\n- Uses Scrape.do to bypass Amazon's anti-bot protection\n- Extracts product data using pattern matching and AI\n- Handles pagination with Split In Batches\n- Saves structured data to Google Sheets\n\n### Scrape.do Advantages:\n- No need for complex proxy rotation\n- Automatic CAPTCHA handling\n- Better success rate than BrightData\n- Simple API integration"
      },
      "typeVersion": 1
    }
  ],
  "pinData": {},
  "connections": {
    "OpenAI Chat Model": {
      "ai_languageModel": [
        [
          {
            "node": "5. Clean & Structure Data with AI",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "2. Loop Through Each URL": {
      "main": [
        [],
        [
          {
            "node": "3. Scrape Product Page HTML",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Structured Output Parser": {
      "ai_outputParser": [
        [
          {
            "node": "5. Clean & Structure Data with AI",
            "type": "ai_outputParser",
            "index": 0
          }
        ]
      ]
    },
    "3. Scrape Product Page HTML": {
      "main": [
        [
          {
            "node": "4. Extract Raw Data Elements",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "6. Format Final JSON Output": {
      "main": [
        [
          {
            "node": "7. Save Product Data to Google Sheets",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "When clicking Test workflow": {
      "main": [
        [
          {
            "node": "1. Get Product URLs from Google Sheets",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "4. Extract Raw Data Elements": {
      "main": [
        [
          {
            "node": "5. Clean & Structure Data with AI",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "5. Clean & Structure Data with AI": {
      "main": [
        [
          {
            "node": "6. Format Final JSON Output",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "7. Save Product Data to Google Sheets": {
      "main": [
        [
          {
            "node": "2. Loop Through Each URL",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "1. Get Product URLs from Google Sheets": {
      "main": [
        [
          {
            "node": "2. Loop Through Each URL",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}
常见问题

如何使用这个工作流?

复制上方的 JSON 配置代码,在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」,粘贴配置后根据需要修改凭证设置即可。

这个工作流适合什么场景?

中级 - 市场调研, AI 摘要总结

需要付费吗?

本工作流完全免费,您可以直接导入使用。但请注意,工作流中使用的第三方服务(如 OpenAI API)可能需要您自行付费。

工作流信息
难度等级
中级
节点数量11
分类2
节点类型10
难度说明

适合有一定经验的用户,包含 6-15 个节点的中等复杂度工作流

作者

Hello, I'm Onur I've been working as a freelance software developer for about four years. In addition, I develop my own projects. For some time, I have been improving myself and providing various services related to AI and AI workflows. Both by writing low code and code. If you have any questions, don't hesitate to contact me.

外部链接
在 n8n.io 查看

分享此工作流