使用Scrape.do、GPT-4和Google Sheets提取亚马逊产品数据
中级
这是一个Market Research, AI Summarization领域的自动化工作流,包含 11 个节点。主要使用 Html, SplitOut, HttpRequest, GoogleSheets, ManualTrigger 等节点。 使用Scrape.do、GPT-4和Google Sheets提取亚马逊产品数据
前置要求
- •可能需要目标 API 的认证凭证
- •Google Sheets API 凭证
- •OpenAI API Key
使用的节点 (11)
工作流预览
可视化展示节点连接关系,支持缩放和平移
导出工作流
复制以下 JSON 配置到 n8n 导入,即可使用此工作流
{
"meta": {
"instanceId": "cb5caf45c9475b848c7e83772505bb02340e165acdd8de77e25011192306257c",
"templateCredsSetupCompleted": true
},
"nodes": [
{
"id": "c499851d-09d6-4a25-812e-c1d3efa3f0a8",
"name": "When clicking Test workflow",
"type": "n8n-nodes-base.manualTrigger",
"position": [
-1648,
272
],
"parameters": {},
"typeVersion": 1
},
{
"id": "80562cea-7422-44ec-9886-1928bb8f81f1",
"name": "OpenAI 聊天模型",
"type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
"position": [
-624,
336
],
"parameters": {
"model": {
"__rl": true,
"mode": "list",
"value": "gpt-4o-mini"
},
"options": {
"maxTokens": 500,
"temperature": 0,
"responseFormat": "json_object"
}
},
"typeVersion": 1.2
},
{
"id": "da77ba7c-a40c-4d79-91f1-fd485d101f76",
"name": "结构化输出解析器",
"type": "@n8n/n8n-nodes-langchain.outputParserStructured",
"position": [
-288,
304
],
"parameters": {
"schemaType": "manual",
"inputSchema": "{\n \"type\": \"object\",\n \"properties\": {\n \"name\": { \n \"type\": \"string\", \n \"description\": \"Product name/title\" \n },\n \"description\": { \n \"type\": \"string\", \n \"description\": \"Product description or key features\" \n },\n \"rating\": { \n \"type\": [\"number\", \"null\"], \n \"description\": \"Average rating (e.g., 4.5)\" \n },\n \"reviews\": { \n \"type\": [\"integer\", \"null\"], \n \"description\": \"Number of reviews\" \n },\n \"price\": { \n \"type\": [\"string\", \"null\"], \n \"description\": \"Product price with currency\" \n }\n },\n \"required\": [\"name\"]\n}"
},
"typeVersion": 1.3
},
{
"id": "daf15a88-7d2f-4542-b3f0-c3658960cb22",
"name": "1. Get Product URLs from Google Sheets",
"type": "n8n-nodes-base.googleSheets",
"position": [
-1392,
272
],
"parameters": {
"options": {},
"sheetName": {
"__rl": true,
"mode": "list",
"value": "gid=0",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/19Allmozbygw-QogPeq2TH9m9D57FCn4MTu3zmJukg1A/edit#gid=0",
"cachedResultName": "Sheet1"
},
"documentId": {
"__rl": true,
"mode": "list",
"value": "19Allmozbygw-QogPeq2TH9m9D57FCn4MTu3zmJukg1A",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/19Allmozbygw-QogPeq2TH9m9D57FCn4MTu3zmJukg1A/edit?usp=drivesdk",
"cachedResultName": "Amazon Product List"
}
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "df8r9D022KIAOHTC",
"name": "Google Sheets account"
}
},
"typeVersion": 4.7
},
{
"id": "41e494b5-f3e9-48dd-8c7b-0096790df02b",
"name": "2. Loop Through Each URL",
"type": "n8n-nodes-base.splitInBatches",
"position": [
-1168,
272
],
"parameters": {
"options": {}
},
"typeVersion": 3
},
{
"id": "c588ede7-1689-492d-a863-949ade5ffe33",
"name": "3. Scrape Product Page HTML",
"type": "n8n-nodes-base.httpRequest",
"position": [
-960,
128
],
"parameters": {
"url": "=https://api.scrape.do/?token={{$vars.SCRAPEDO_TOKEN}}&url={{ encodeURIComponent($json.url) }}&geoCode=us&render=false",
"options": {
"timeout": 60000,
"response": {
"response": {}
}
}
},
"typeVersion": 4.2
},
{
"id": "818b6ea9-b259-4d67-bfb9-f02366da89c1",
"name": "4. Extract Raw Data Elements",
"type": "n8n-nodes-base.html",
"position": [
-752,
128
],
"parameters": {
"options": {},
"operation": "extractHtmlContent",
"extractionValues": {
"values": [
{
"key": "productTitle",
"cssSelector": "#productTitle, h1[data-automation-id=\"product-title\"], .product-title"
},
{
"key": "price",
"cssSelector": ".a-price .a-offscreen, .a-price-whole, .a-price-fraction, .priceToPay .a-price .a-offscreen"
},
{
"key": "rating",
"cssSelector": ".a-icon-alt, [data-hook=\"average-star-rating\"], .a-star-medium .a-icon-alt"
},
{
"key": "reviewCount",
"cssSelector": "[data-hook=\"total-review-count\"], .a-link-normal[href*=\"customerReviews\"], #acrCustomerReviewText"
},
{
"key": "featureBullets",
"cssSelector": "#feature-bullets ul, .a-unordered-list.a-nostyle.a-vertical.feature"
},
{
"key": "productDescription",
"cssSelector": "#productDescription, [data-feature-name=\"productDescription\"], .product-description"
}
]
}
},
"typeVersion": 1.2
},
{
"id": "2c491fda-9510-46f9-973a-754587601b7c",
"name": "5. Clean & Structure Data with AI",
"type": "@n8n/n8n-nodes-langchain.chainLlm",
"position": [
-512,
128
],
"parameters": {
"text": "={{ JSON.stringify($json, null, 2) }}",
"batching": {},
"messages": {
"messageValues": [
{
"message": "Extract Amazon product data and return ONLY valid JSON.\n\nInput: {{ $json }}\n\nExtract:\n- name: product title from productTitle\n- description: create from featureBullets OR productDescription (max 150 chars, if empty use \"No description\")\n- rating: extract number from rating (e.g. \"4.5 out of 5\" → 4.5, if no rating use null)\n- reviews: extract number from reviewCount (e.g. \"1,234 ratings\" → 1234, if none use null)\n- price: format price from price field (add $ if missing, if no price use null)\n\nReturn exact JSON:\n{\n \"name\": \"product title here\",\n \"description\": \"description here or No description\",\n \"rating\": 4.5,\n \"reviews\": 1234,\n \"price\": \"$29.99\"\n}"
}
]
},
"promptType": "define",
"hasOutputParser": true
},
"typeVersion": 1.7
},
{
"id": "7796a70c-99a4-4e6e-b18a-5c63adc90871",
"name": "6. Format Final JSON Output",
"type": "n8n-nodes-base.splitOut",
"position": [
-128,
128
],
"parameters": {
"include": "selectedOtherFields",
"options": {},
"fieldToSplitOut": "output",
"fieldsToInclude": "output.name, output.description, output.rating, output.reviews, output.price"
},
"typeVersion": 1
},
{
"id": "7c3d7a0e-4d59-41e0-bdc8-87005237d8a9",
"name": "7. Save Product Data to Google Sheets",
"type": "n8n-nodes-base.googleSheets",
"position": [
80,
272
],
"parameters": {
"columns": {
"value": {},
"schema": [],
"mappingMode": "autoMapInputData",
"matchingColumns": [],
"attemptToConvertTypes": false,
"convertFieldsToString": false
},
"options": {
"useAppend": true
},
"operation": "append",
"sheetName": {
"__rl": true,
"mode": "list",
"value": 838351250,
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/19Allmozbygw-QogPeq2TH9m9D57FCn4MTu3zmJukg1A/edit#gid=838351250",
"cachedResultName": "Sheet2"
},
"documentId": {
"__rl": true,
"mode": "list",
"value": "19Allmozbygw-QogPeq2TH9m9D57FCn4MTu3zmJukg1A",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/19Allmozbygw-QogPeq2TH9m9D57FCn4MTu3zmJukg1A/edit?usp=drivesdk",
"cachedResultName": "Amazon Product List"
}
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "df8r9D022KIAOHTC",
"name": "Google Sheets account"
}
},
"typeVersion": 4.7
},
{
"id": "1d3b653a-e5d8-4e88-a210-15224c6282c1",
"name": "便签1",
"type": "n8n-nodes-base.stickyNote",
"position": [
-2272,
-144
],
"parameters": {
"width": 528,
"height": 896,
"content": "## Amazon Scraper with Scrape.do API\n\n### Setup Instructions:\n\n1. **Get Scrape.do API Token:**\n - Sign up at https://scrape.do\n - Get your API token from the dashboard\n\n2. **Set up Workflow Variables:**\n - SCRAPEDO_TOKEN: Your Scrape.do API token\n - WEB_SHEET_ID: Google Sheet document ID\n - TRACK_SHEET_GID: Sheet name/ID with URLs to scrape\n - RESULTS_SHEET_GID: Sheet name/ID for results\n\n3. **Google Sheets Setup:**\n - Create a Google Sheet with two tabs\n - First tab: Add Amazon product URLs in a column named 'url'\n - Second tab: Will store results (name, description, rating, reviews, price)\n - Share the sheet with your service account email\n\n4. **Credentials:**\n - Add Google Sheets OAuth2 credentials\n - Add OpenRouter API credentials (for GPT-4)\n\n### Features:\n- Uses Scrape.do to bypass Amazon's anti-bot protection\n- Extracts product data using pattern matching and AI\n- Handles pagination with Split In Batches\n- Saves structured data to Google Sheets\n\n### Scrape.do Advantages:\n- No need for complex proxy rotation\n- Automatic CAPTCHA handling\n- Better success rate than BrightData\n- Simple API integration"
},
"typeVersion": 1
}
],
"pinData": {},
"connections": {
"OpenAI Chat Model": {
"ai_languageModel": [
[
{
"node": "5. Clean & Structure Data with AI",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"2. Loop Through Each URL": {
"main": [
[],
[
{
"node": "3. Scrape Product Page HTML",
"type": "main",
"index": 0
}
]
]
},
"Structured Output Parser": {
"ai_outputParser": [
[
{
"node": "5. Clean & Structure Data with AI",
"type": "ai_outputParser",
"index": 0
}
]
]
},
"3. Scrape Product Page HTML": {
"main": [
[
{
"node": "4. Extract Raw Data Elements",
"type": "main",
"index": 0
}
]
]
},
"6. Format Final JSON Output": {
"main": [
[
{
"node": "7. Save Product Data to Google Sheets",
"type": "main",
"index": 0
}
]
]
},
"When clicking Test workflow": {
"main": [
[
{
"node": "1. Get Product URLs from Google Sheets",
"type": "main",
"index": 0
}
]
]
},
"4. Extract Raw Data Elements": {
"main": [
[
{
"node": "5. Clean & Structure Data with AI",
"type": "main",
"index": 0
}
]
]
},
"5. Clean & Structure Data with AI": {
"main": [
[
{
"node": "6. Format Final JSON Output",
"type": "main",
"index": 0
}
]
]
},
"7. Save Product Data to Google Sheets": {
"main": [
[
{
"node": "2. Loop Through Each URL",
"type": "main",
"index": 0
}
]
]
},
"1. Get Product URLs from Google Sheets": {
"main": [
[
{
"node": "2. Loop Through Each URL",
"type": "main",
"index": 0
}
]
]
}
}
}常见问题
如何使用这个工作流?
复制上方的 JSON 配置代码,在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」,粘贴配置后根据需要修改凭证设置即可。
这个工作流适合什么场景?
中级 - 市场调研, AI 摘要总结
需要付费吗?
本工作流完全免费,您可以直接导入使用。但请注意,工作流中使用的第三方服务(如 OpenAI API)可能需要您自行付费。
相关工作流推荐
自动化B2B潜在客户生成:Google Places、Scrape.do与AI数据增强
自动化B2B潜在客户生成:Google Places、Scrape.do与AI数据增强
If
Set
Html
+9
19 节点Onur
杂项
自动化 Meta 广告分析
使用 Gemini AI、ScrapingFlash 和 Google Sheets 自动分析 Meta 广告
Limit
Split Out
Http Request
+8
17 节点Paul-François GORIAUX
市场调研
品牌可见性检查 - AI实验室演示项目
跨AI搜索工具的品牌可见性和情感分析 (OpenAI、Perplexity、ChatGPT)
If
Set
Limit
+11
48 节点AOE Agent Lab
市场调研
使用 Bright Data API 和 AI 抓取分析 Google 广告并发送邮件报告
使用 Bright Data API 和 AI 抓取分析 Google 广告并发送邮件报告
Set
Code
Gmail
+15
45 节点Zacharia Kimotho
市场调研
新抓取器_TechCrunch新闻-AI1
TechCrunch AI文章抓取与分类器,使用GPT-4.1-nano到Sheets和Telegram
Set
Code
Html
+12
18 节点Mujahid Kabae
市场调研
01 使用AI媒体买家分析Facebook广告表现并将洞察发送到Google Sheets
使用Gemini AI分析Facebook广告并将洞察发送到Google Sheets
If
Set
Code
+13
34 节点JJ Tham
市场调研
工作流信息
难度等级
中级
节点数量11
分类2
节点类型10
作者
Onur
@onurpolat05Hello, I'm Onur I've been working as a freelance software developer for about four years. In addition, I develop my own projects. For some time, I have been improving myself and providing various services related to AI and AI workflows. Both by writing low code and code. If you have any questions, don't hesitate to contact me.
外部链接
在 n8n.io 查看 →
分享此工作流