亚马逊产品搜索爬虫(BrightData、GPT-4和Google Sheets)
中级
这是一个AI, Marketing领域的自动化工作流,包含 11 个节点。主要使用 Code, SplitOut, HttpRequest, GoogleSheets, ManualTrigger 等节点,结合人工智能技术实现智能自动化。 使用BrightData、GPT-4和Google Sheets的亚马逊产品搜索爬虫
前置要求
- •可能需要目标 API 的认证凭证
- •Google Sheets API 凭证
使用的节点 (11)
工作流预览
可视化展示节点连接关系,支持缩放和平移
导出工作流
复制以下 JSON 配置到 n8n 导入,即可使用此工作流
{
"meta": {
"instanceId": "4a11afdb3c52fd098e3eae9fad4b39fdf1bbcde142f596adda46c795e366b326"
},
"nodes": [
{
"id": "f1b36f4b-6558-4e83-a999-e6f2d24e196c",
"name": "OpenRouter 聊天模型",
"type": "@n8n/n8n-nodes-langchain.lmChatOpenRouter",
"position": [
620,
240
],
"parameters": {
"model": "openai/gpt-4.1",
"options": {}
},
"typeVersion": 1
},
{
"id": "89ca0a07-286f-4e68-9e85-0327a4859cc0",
"name": "结构化输出解析器",
"type": "@n8n/n8n-nodes-langchain.outputParserStructured",
"position": [
900,
240
],
"parameters": {
"schemaType": "manual",
"inputSchema": "{\n \"type\": \"array\",\n \"items\": {\n \"type\": \"object\",\n \"properties\": {\n \"name\": { \"type\": \"string\" },\n \"description\": { \"type\": \"string\" },\n \"rating\": { \"type\": \"number\" },\n \"reviews\": { \"type\": \"integer\" },\n \"price\": { \"type\": \"string\" }\n },\n \"required\": [\"name\", \"description\", \"rating\", \"reviews\", \"price\"]\n }\n}"
},
"typeVersion": 1.2
},
{
"id": "e4800c1d-c0d8-4093-81ec-fc19ad0034cd",
"name": "scrap url",
"type": "n8n-nodes-base.httpRequest",
"position": [
240,
60
],
"parameters": {
"url": "https://api.brightdata.com/request",
"method": "POST",
"options": {},
"sendBody": true,
"sendHeaders": true,
"bodyParameters": {
"parameters": [
{
"name": "zone",
"value": "web_unlocker1"
},
{
"name": "url",
"value": "={{ $json.url }}"
},
{
"name": "format",
"value": "raw"
}
]
},
"headerParameters": {
"parameters": [
{
"name": "Authorization",
"value": "{{BRIGHTDATA_TOKEN}}"
}
]
}
},
"typeVersion": 4.2
},
{
"id": "1a1f768f-615d-4035-81b0-63b860f8e6ac",
"name": "便签1",
"type": "n8n-nodes-base.stickyNote",
"position": [
160,
-140
],
"parameters": {
"content": "## 网页抓取 API"
},
"typeVersion": 1
},
{
"id": "2f260d96-4fff-4a4f-af29-1e43f465d54c",
"name": "当点击\"测试工作流\"时",
"type": "n8n-nodes-base.manualTrigger",
"position": [
-440,
200
],
"parameters": {},
"typeVersion": 1
},
{
"id": "4be9033f-0b9f-466d-916e-88fbb2a80417",
"name": "url",
"type": "n8n-nodes-base.splitInBatches",
"position": [
20,
200
],
"parameters": {
"options": {}
},
"typeVersion": 3
},
{
"id": "21b6d21c-b977-4175-9068-e0e2e19fa472",
"name": "get urls to scrape",
"type": "n8n-nodes-base.googleSheets",
"position": [
-200,
200
],
"parameters": {
"options": {},
"sheetName": "{{TRACK_SHEET_GID}}",
"documentId": "{{WEB_SHEET_ID}}"
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "KsXWRZTrfCUFrrHD",
"name": "Google Sheets"
}
},
"typeVersion": 4.5
},
{
"id": "25ef76ec-cf0d-422e-b060-68c49192a008",
"name": "clean html",
"type": "n8n-nodes-base.code",
"position": [
460,
60
],
"parameters": {
"jsCode": "// CleanHtmlFunction.js\n// Purpose: n8n Function node to clean HTML: remove doctype, scripts, styles, head, comments, classes, extra blank lines, and non-whitelisted tags\n\nreturn items.map(item => {\n const rawHtml = item.json.data;\n\n // 1) remove doctype, scripts, styles, comments and head section, and strip class attributes\n let cleaned = rawHtml\n .replace(/<!doctype html>/gi, '')\n .replace(/<script[\\s\\S]*?<\\/script>/gi, '')\n .replace(/<style[\\s\\S]*?<\\/style>/gi, '')\n .replace(/<!--[\\s\\S]*?-->/g, '')\n .replace(/<head[\\s\\S]*?<\\/head>/gi, '')\n .replace(/\\sclass=\"[^\"]*\"/gi, '');\n\n // 2) define whitelist of tags to keep\n const allowedTags = [\n 'h1','h2','h3','h4','h5','h6',\n 'p','ul','ol','li',\n 'strong','em','a','blockquote',\n 'code','pre'\n ];\n\n // 3) strip out all tags not in the whitelist, reconstruct allowed tags cleanly\n cleaned = cleaned.replace(\n /<\\/?([a-z][a-z0-9]*)\\b[^>]*>/gi,\n (match, tagName) => {\n const name = tagName.toLowerCase();\n if (allowedTags.includes(name)) {\n return match.startsWith('</') ? `</${name}>` : `<${name}>`;\n }\n return '';\n }\n );\n\n // 4) collapse multiple blank or whitespace-only lines into a single newline\n cleaned = cleaned.replace(/(\\s*\\r?\\n\\s*){2,}/g, '\\n');\n\n // 5) trim leading/trailing whitespace\n cleaned = cleaned.trim();\n\n return {\n json: { cleanedHtml: cleaned }\n };\n});"
},
"typeVersion": 2
},
{
"id": "f72660d5-8427-4655-acbe-10365273c27b",
"name": "extract data",
"type": "@n8n/n8n-nodes-langchain.chainLlm",
"position": [
680,
60
],
"parameters": {
"text": "={{ $json.cleanedHtml }}",
"messages": {
"messageValues": [
{
"message": "=You are an expert in web page scraping. Provide a structured response in JSON format. Only the response, without commentary.\n\nExtract the product information for {{ $(‘url’).item.json.url.split(’/s?k=’)[1].split(’&’)[0] }} present on the page.\n\nname\ndescription\nrating\nreviews\nprice"
}
]
},
"promptType": "define",
"hasOutputParser": true
},
"typeVersion": 1.6
},
{
"id": "8b4af1bb-d7f8-456e-b630-ecd9b6e4bcdc",
"name": "add results",
"type": "n8n-nodes-base.googleSheets",
"position": [
1280,
200
],
"parameters": {
"columns": {
"value": {
"name": "={{ $json.output.name }}",
"price": "={{ $json.output.price }}",
"rating": "={{ $json.output.rating }}",
"reviews": "={{ $json.output.reviews }}",
"description": "={{ $json.output.description }}"
},
"schema": [
{
"id": "name",
"type": "string"
},
{
"id": "description",
"type": "string"
},
{
"id": "rating",
"type": "string"
},
{
"id": "reviews",
"type": "string"
},
{
"id": "price",
"type": "string"
}
],
"mappingMode": "defineBelow"
},
"options": {},
"operation": "append",
"sheetName": "{{RESULTS_SHEET_GID}}",
"documentId": "{{WEB_SHEET_ID}}"
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "KsXWRZTrfCUFrrHD",
"name": "Google Sheets"
}
},
"typeVersion": 4.5
},
{
"id": "7a5ba438-2ede-4d6c-b8fa-9a958ba1ef3e",
"name": "Split items",
"type": "n8n-nodes-base.splitOut",
"position": [
1060,
60
],
"parameters": {
"include": "allOtherFields",
"options": {},
"fieldToSplitOut": "output"
},
"typeVersion": 1
}
],
"pinData": {},
"connections": {
"url": {
"main": [
[],
[
{
"node": "scrap url",
"type": "main",
"index": 0
}
]
]
},
"scrap url": {
"main": [
[
{
"node": "clean html",
"type": "main",
"index": 0
}
]
]
},
"clean html": {
"main": [
[
{
"node": "extract data",
"type": "main",
"index": 0
}
]
]
},
"Split items": {
"main": [
[
{
"node": "add results",
"type": "main",
"index": 0
}
]
]
},
"add results": {
"main": [
[
{
"node": "url",
"type": "main",
"index": 0
}
]
]
},
"extract data": {
"main": [
[
{
"node": "Split items",
"type": "main",
"index": 0
}
]
]
},
"get urls to scrape": {
"main": [
[
{
"node": "url",
"type": "main",
"index": 0
}
]
]
},
"OpenRouter Chat Model": {
"ai_languageModel": [
[
{
"node": "extract data",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"Structured Output Parser": {
"ai_outputParser": [
[
{
"node": "extract data",
"type": "ai_outputParser",
"index": 0
}
]
]
},
"When clicking ‘Test workflow’": {
"main": [
[
{
"node": "get urls to scrape",
"type": "main",
"index": 0
}
]
]
}
}
}常见问题
如何使用这个工作流?
复制上方的 JSON 配置代码,在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」,粘贴配置后根据需要修改凭证设置即可。
这个工作流适合什么场景?
中级 - 人工智能, 营销
需要付费吗?
本工作流完全免费,您可以直接导入使用。但请注意,工作流中使用的第三方服务(如 OpenAI API)可能需要您自行付费。
相关工作流推荐
WordPress的Rank Math批量标题和描述优化器
WordPress的Rank Math批量标题和描述优化器
If
Set
Limit
+9
16 节点phil
人工智能
(Duc)深度研究市场模板
集成PerplexityAI研究和OpenAI内容的多层级WordPress博客生成器
If
Set
Xml
+28
132 节点Daniel Ng
人工智能
WordPress博客自动化专业版(SEO主题)v2
WordPress自动博客专业版 - SEO主题内容自动化机器
If
Set
Xml
+21
63 节点Daniel Ng
人工智能
自动化新闻到简报AI代理v13
AI新闻研究团队:24/7简报自动化,含Perplexity引用
Set
Code
Gmail
+15
37 节点Derek Cheung
产品
AI个性化多产品邮件营销
基于SMTP轮换的AI个性化多产品邮件营销(GPT-4o/o3-mini)
If
Code
Wait
+16
41 节点Badr
销售
使用 Browserflow 和 Google Sheets 自动化 LinkedIn 请求与破冰消息
使用 Browserflow 和 Google Sheets 自动化 LinkedIn 请求与破冰消息
If
Set
Sort
+15
44 节点PollupAI
销售
工作流信息
难度等级
中级
节点数量11
分类2
节点类型10
作者
phil
@philAccélérateur de Chiffre d'Affaires : Automatisez votre entreprise pour la rendre plus visible sur Google, pour trouver de nouveaux Clients, pour gagner du temps
外部链接
在 n8n.io 查看 →
分享此工作流