Amazon-Produktdaten mit Scrape.do, GPT-4 und Google Sheets extrahieren
Dies ist ein Market Research, AI Summarization-Bereich Automatisierungsworkflow mit 11 Nodes. Hauptsächlich werden Html, SplitOut, HttpRequest, GoogleSheets, ManualTrigger und andere Nodes verwendet. Extrahieren von Amazon-Produktdaten mit Scrape.do, GPT-4 und Google Sheets
- •Möglicherweise sind Ziel-API-Anmeldedaten erforderlich
- •Google Sheets API-Anmeldedaten
- •OpenAI API Key
Verwendete Nodes (11)
Kategorie
{
"meta": {
"instanceId": "cb5caf45c9475b848c7e83772505bb02340e165acdd8de77e25011192306257c",
"templateCredsSetupCompleted": true
},
"nodes": [
{
"id": "c499851d-09d6-4a25-812e-c1d3efa3f0a8",
"name": "Bei Klick auf Workflow testen",
"type": "n8n-nodes-base.manualTrigger",
"position": [
-1648,
272
],
"parameters": {},
"typeVersion": 1
},
{
"id": "80562cea-7422-44ec-9886-1928bb8f81f1",
"name": "OpenAI Chat Model",
"type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
"position": [
-624,
336
],
"parameters": {
"model": {
"__rl": true,
"mode": "list",
"value": "gpt-4o-mini"
},
"options": {
"maxTokens": 500,
"temperature": 0,
"responseFormat": "json_object"
}
},
"typeVersion": 1.2
},
{
"id": "da77ba7c-a40c-4d79-91f1-fd485d101f76",
"name": "Structured Output Parser",
"type": "@n8n/n8n-nodes-langchain.outputParserStructured",
"position": [
-288,
304
],
"parameters": {
"schemaType": "manual",
"inputSchema": "{\n \"type\": \"object\",\n \"properties\": {\n \"name\": { \n \"type\": \"string\", \n \"description\": \"Product name/title\" \n },\n \"description\": { \n \"type\": \"string\", \n \"description\": \"Product description or key features\" \n },\n \"rating\": { \n \"type\": [\"number\", \"null\"], \n \"description\": \"Average rating (e.g., 4.5)\" \n },\n \"reviews\": { \n \"type\": [\"integer\", \"null\"], \n \"description\": \"Number of reviews\" \n },\n \"price\": { \n \"type\": [\"string\", \"null\"], \n \"description\": \"Product price with currency\" \n }\n },\n \"required\": [\"name\"]\n}"
},
"typeVersion": 1.3
},
{
"id": "daf15a88-7d2f-4542-b3f0-c3658960cb22",
"name": "1. Produkt-URLs aus Google Sheets abrufen",
"type": "n8n-nodes-base.googleSheets",
"position": [
-1392,
272
],
"parameters": {
"options": {},
"sheetName": {
"__rl": true,
"mode": "list",
"value": "gid=0",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/19Allmozbygw-QogPeq2TH9m9D57FCn4MTu3zmJukg1A/edit#gid=0",
"cachedResultName": "Sheet1"
},
"documentId": {
"__rl": true,
"mode": "list",
"value": "19Allmozbygw-QogPeq2TH9m9D57FCn4MTu3zmJukg1A",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/19Allmozbygw-QogPeq2TH9m9D57FCn4MTu3zmJukg1A/edit?usp=drivesdk",
"cachedResultName": "Amazon Product List"
}
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "df8r9D022KIAOHTC",
"name": "Google Sheets account"
}
},
"typeVersion": 4.7
},
{
"id": "41e494b5-f3e9-48dd-8c7b-0096790df02b",
"name": "2. Jede URL durchlaufen",
"type": "n8n-nodes-base.splitInBatches",
"position": [
-1168,
272
],
"parameters": {
"options": {}
},
"typeVersion": 3
},
{
"id": "c588ede7-1689-492d-a863-949ade5ffe33",
"name": "3. Produktseiten-HTML scrapen",
"type": "n8n-nodes-base.httpRequest",
"position": [
-960,
128
],
"parameters": {
"url": "=https://api.scrape.do/?token={{$vars.SCRAPEDO_TOKEN}}&url={{ encodeURIComponent($json.url) }}&geoCode=us&render=false",
"options": {
"timeout": 60000,
"response": {
"response": {}
}
}
},
"typeVersion": 4.2
},
{
"id": "818b6ea9-b259-4d67-bfb9-f02366da89c1",
"name": "4. Rohe Datenelemente extrahieren",
"type": "n8n-nodes-base.html",
"position": [
-752,
128
],
"parameters": {
"options": {},
"operation": "extractHtmlContent",
"extractionValues": {
"values": [
{
"key": "productTitle",
"cssSelector": "#productTitle, h1[data-automation-id=\"product-title\"], .product-title"
},
{
"key": "price",
"cssSelector": ".a-price .a-offscreen, .a-price-whole, .a-price-fraction, .priceToPay .a-price .a-offscreen"
},
{
"key": "rating",
"cssSelector": ".a-icon-alt, [data-hook=\"average-star-rating\"], .a-star-medium .a-icon-alt"
},
{
"key": "reviewCount",
"cssSelector": "[data-hook=\"total-review-count\"], .a-link-normal[href*=\"customerReviews\"], #acrCustomerReviewText"
},
{
"key": "featureBullets",
"cssSelector": "#feature-bullets ul, .a-unordered-list.a-nostyle.a-vertical.feature"
},
{
"key": "productDescription",
"cssSelector": "#productDescription, [data-feature-name=\"productDescription\"], .product-description"
}
]
}
},
"typeVersion": 1.2
},
{
"id": "2c491fda-9510-46f9-973a-754587601b7c",
"name": "5. Daten mit KI bereinigen und strukturieren",
"type": "@n8n/n8n-nodes-langchain.chainLlm",
"position": [
-512,
128
],
"parameters": {
"text": "={{ JSON.stringify($json, null, 2) }}",
"batching": {},
"messages": {
"messageValues": [
{
"message": "Extract Amazon product data and return ONLY valid JSON.\n\nInput: {{ $json }}\n\nExtract:\n- name: product title from productTitle\n- description: create from featureBullets OR productDescription (max 150 chars, if empty use \"No description\")\n- rating: extract number from rating (e.g. \"4.5 out of 5\" → 4.5, if no rating use null)\n- reviews: extract number from reviewCount (e.g. \"1,234 ratings\" → 1234, if none use null)\n- price: format price from price field (add $ if missing, if no price use null)\n\nReturn exact JSON:\n{\n \"name\": \"product title here\",\n \"description\": \"description here or No description\",\n \"rating\": 4.5,\n \"reviews\": 1234,\n \"price\": \"$29.99\"\n}"
}
]
},
"promptType": "define",
"hasOutputParser": true
},
"typeVersion": 1.7
},
{
"id": "7796a70c-99a4-4e6e-b18a-5c63adc90871",
"name": "6. Endgültige JSON-Ausgabe formatieren",
"type": "n8n-nodes-base.splitOut",
"position": [
-128,
128
],
"parameters": {
"include": "selectedOtherFields",
"options": {},
"fieldToSplitOut": "output",
"fieldsToInclude": "output.name, output.description, output.rating, output.reviews, output.price"
},
"typeVersion": 1
},
{
"id": "7c3d7a0e-4d59-41e0-bdc8-87005237d8a9",
"name": "7. Produktdaten in Google Sheets speichern",
"type": "n8n-nodes-base.googleSheets",
"position": [
80,
272
],
"parameters": {
"columns": {
"value": {},
"schema": [],
"mappingMode": "autoMapInputData",
"matchingColumns": [],
"attemptToConvertTypes": false,
"convertFieldsToString": false
},
"options": {
"useAppend": true
},
"operation": "append",
"sheetName": {
"__rl": true,
"mode": "list",
"value": 838351250,
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/19Allmozbygw-QogPeq2TH9m9D57FCn4MTu3zmJukg1A/edit#gid=838351250",
"cachedResultName": "Sheet2"
},
"documentId": {
"__rl": true,
"mode": "list",
"value": "19Allmozbygw-QogPeq2TH9m9D57FCn4MTu3zmJukg1A",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/19Allmozbygw-QogPeq2TH9m9D57FCn4MTu3zmJukg1A/edit?usp=drivesdk",
"cachedResultName": "Amazon Product List"
}
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "df8r9D022KIAOHTC",
"name": "Google Sheets account"
}
},
"typeVersion": 4.7
},
{
"id": "1d3b653a-e5d8-4e88-a210-15224c6282c1",
"name": "Notizzettel1",
"type": "n8n-nodes-base.stickyNote",
"position": [
-2272,
-144
],
"parameters": {
"width": 528,
"height": 896,
"content": "## Amazon Scraper with Scrape.do API\n\n### Setup Instructions:\n\n1. **Get Scrape.do API Token:**\n - Sign up at https://scrape.do\n - Get your API token from the dashboard\n\n2. **Set up Workflow Variables:**\n - SCRAPEDO_TOKEN: Your Scrape.do API token\n - WEB_SHEET_ID: Google Sheet document ID\n - TRACK_SHEET_GID: Sheet name/ID with URLs to scrape\n - RESULTS_SHEET_GID: Sheet name/ID for results\n\n3. **Google Sheets Setup:**\n - Create a Google Sheet with two tabs\n - First tab: Add Amazon product URLs in a column named 'url'\n - Second tab: Will store results (name, description, rating, reviews, price)\n - Share the sheet with your service account email\n\n4. **Credentials:**\n - Add Google Sheets OAuth2 credentials\n - Add OpenRouter API credentials (for GPT-4)\n\n### Features:\n- Uses Scrape.do to bypass Amazon's anti-bot protection\n- Extracts product data using pattern matching and AI\n- Handles pagination with Split In Batches\n- Saves structured data to Google Sheets\n\n### Scrape.do Advantages:\n- No need for complex proxy rotation\n- Automatic CAPTCHA handling\n- Better success rate than BrightData\n- Simple API integration"
},
"typeVersion": 1
}
],
"pinData": {},
"connections": {
"80562cea-7422-44ec-9886-1928bb8f81f1": {
"ai_languageModel": [
[
{
"node": "2c491fda-9510-46f9-973a-754587601b7c",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"41e494b5-f3e9-48dd-8c7b-0096790df02b": {
"main": [
[],
[
{
"node": "c588ede7-1689-492d-a863-949ade5ffe33",
"type": "main",
"index": 0
}
]
]
},
"da77ba7c-a40c-4d79-91f1-fd485d101f76": {
"ai_outputParser": [
[
{
"node": "2c491fda-9510-46f9-973a-754587601b7c",
"type": "ai_outputParser",
"index": 0
}
]
]
},
"c588ede7-1689-492d-a863-949ade5ffe33": {
"main": [
[
{
"node": "818b6ea9-b259-4d67-bfb9-f02366da89c1",
"type": "main",
"index": 0
}
]
]
},
"7796a70c-99a4-4e6e-b18a-5c63adc90871": {
"main": [
[
{
"node": "7c3d7a0e-4d59-41e0-bdc8-87005237d8a9",
"type": "main",
"index": 0
}
]
]
},
"c499851d-09d6-4a25-812e-c1d3efa3f0a8": {
"main": [
[
{
"node": "daf15a88-7d2f-4542-b3f0-c3658960cb22",
"type": "main",
"index": 0
}
]
]
},
"818b6ea9-b259-4d67-bfb9-f02366da89c1": {
"main": [
[
{
"node": "2c491fda-9510-46f9-973a-754587601b7c",
"type": "main",
"index": 0
}
]
]
},
"2c491fda-9510-46f9-973a-754587601b7c": {
"main": [
[
{
"node": "7796a70c-99a4-4e6e-b18a-5c63adc90871",
"type": "main",
"index": 0
}
]
]
},
"7c3d7a0e-4d59-41e0-bdc8-87005237d8a9": {
"main": [
[
{
"node": "41e494b5-f3e9-48dd-8c7b-0096790df02b",
"type": "main",
"index": 0
}
]
]
},
"daf15a88-7d2f-4542-b3f0-c3658960cb22": {
"main": [
[
{
"node": "41e494b5-f3e9-48dd-8c7b-0096790df02b",
"type": "main",
"index": 0
}
]
]
}
}
}Wie verwende ich diesen Workflow?
Kopieren Sie den obigen JSON-Code, erstellen Sie einen neuen Workflow in Ihrer n8n-Instanz und wählen Sie "Aus JSON importieren". Fügen Sie die Konfiguration ein und passen Sie die Anmeldedaten nach Bedarf an.
Für welche Szenarien ist dieser Workflow geeignet?
Fortgeschritten - Marktforschung, KI-Zusammenfassung
Ist es kostenpflichtig?
Dieser Workflow ist völlig kostenlos. Beachten Sie jedoch, dass Drittanbieterdienste (wie OpenAI API), die im Workflow verwendet werden, möglicherweise kostenpflichtig sind.
Verwandte Workflows
Onur
@onurpolat05Hello, I'm Onur I've been working as a freelance software developer for about four years. In addition, I develop my own projects. For some time, I have been improving myself and providing various services related to AI and AI workflows. Both by writing low code and code. If you have any questions, don't hesitate to contact me.
Diesen Workflow teilen