使用Bright Data网络爬虫进行结构化批量数据提取
高级
这是一个Engineering, Product领域的自动化工作流,包含 16 个节点。主要使用 If, Set, Wait, Function, Aggregate 等节点。 使用Bright Data和Webhook通知的异步批量网络爬取
前置要求
- •可能需要目标 API 的认证凭证
工作流预览
可视化展示节点连接关系,支持缩放和平移
导出工作流
复制以下 JSON 配置到 n8n 导入,即可使用此工作流
{
"id": "OjwmaLrXhW4pO5ph",
"meta": {
"instanceId": "885b4fb4a6a9c2cb5621429a7b972df0d05bb724c20ac7dac7171b62f1c7ef40"
},
"name": "使用 Bright Data Web Scraper 进行结构化批量数据提取",
"tags": [
{
"id": "Kujft2FOjmOVQAmJ",
"name": "Engineering",
"createdAt": "2025-04-09T01:31:00.558Z",
"updatedAt": "2025-04-09T01:31:00.558Z"
},
{
"id": "ZOwtAMLepQaGW76t",
"name": "Building Blocks",
"createdAt": "2025-04-13T15:23:40.462Z",
"updatedAt": "2025-04-13T15:23:40.462Z"
}
],
"nodes": [
{
"id": "1bdca5ae-1e56-4cf2-a8dc-e135a6a2dfec",
"name": "当点击\"测试工作流\"时",
"type": "n8n-nodes-base.manualTrigger",
"position": [
-900,
-395
],
"parameters": {},
"typeVersion": 1
},
{
"id": "533968cd-1329-4a86-8875-478600ed82b7",
"name": "如果",
"type": "n8n-nodes-base.if",
"position": [
200,
-470
],
"parameters": {
"options": {},
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "6a7e5360-4cb5-4806-892e-5c85037fa71c",
"operator": {
"type": "string",
"operation": "equals"
},
"leftValue": "={{ $json.status }}",
"rightValue": "ready"
}
]
}
},
"typeVersion": 2.2
},
{
"id": "83991fdf-0402-4de3-bbb5-7050e3e9fb62",
"name": "设置快照ID",
"type": "n8n-nodes-base.set",
"position": [
-240,
-395
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "2c3369c6-9206-45d7-9349-f577baeaf189",
"name": "snapshot_id",
"type": "string",
"value": "={{ $json.snapshot_id }}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "408a36af-decb-49b3-a95e-a2df0b6eea5f",
"name": "下载快照",
"type": "n8n-nodes-base.httpRequest",
"position": [
640,
-520
],
"parameters": {
"url": "=https://api.brightdata.com/datasets/v3/snapshot/{{ $json.snapshot_id }}",
"options": {
"timeout": 10000
},
"sendQuery": true,
"authentication": "genericCredentialType",
"genericAuthType": "httpHeaderAuth",
"queryParameters": {
"parameters": [
{
"name": "format",
"value": "json"
}
]
}
},
"credentials": {
"httpHeaderAuth": {
"id": "kdbqXuxIR8qIxF7y",
"name": "Header Auth account"
}
},
"typeVersion": 4.2
},
{
"id": "9d6cd882-c287-46ca-bc1e-df6b995fc422",
"name": "等待",
"type": "n8n-nodes-base.wait",
"position": [
420,
-295
],
"webhookId": "631cd5de-36b3-4264-88ae-45b30e2c2ccc",
"parameters": {
"amount": 30
},
"typeVersion": 1.1
},
{
"id": "c9cf847a-6399-4c93-a901-30f1c0e7408a",
"name": "检查错误",
"type": "n8n-nodes-base.if",
"position": [
420,
-520
],
"parameters": {
"options": {},
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "b267071c-7102-407b-a98d-f613bcb1a106",
"operator": {
"type": "string",
"operation": "equals"
},
"leftValue": "={{ $json.errors.toString() }}",
"rightValue": "0"
}
]
}
},
"typeVersion": 2.2
},
{
"id": "b648614e-c33e-4818-8348-e95df56928c7",
"name": "检查快照状态",
"type": "n8n-nodes-base.httpRequest",
"position": [
-20,
-395
],
"parameters": {
"url": "=https://api.brightdata.com/datasets/v3/progress/{{ $json.snapshot_id }}",
"options": {},
"sendHeaders": true,
"authentication": "genericCredentialType",
"genericAuthType": "httpHeaderAuth",
"headerParameters": {
"parameters": [
{}
]
}
},
"credentials": {
"httpHeaderAuth": {
"id": "kdbqXuxIR8qIxF7y",
"name": "Header Auth account"
}
},
"typeVersion": 4.2
},
{
"id": "408a1584-666f-471e-bfcd-c4d857319688",
"name": "发起 Webhook 通知",
"type": "n8n-nodes-base.httpRequest",
"position": [
1080,
-520
],
"parameters": {
"url": "https://webhook.site/daf9d591-a130-4010-b1d3-0c66f8fcf467",
"options": {},
"sendBody": true,
"bodyParameters": {
"parameters": [
{
"name": "response",
"value": "={{ $json.data[0] }}"
}
]
}
},
"typeVersion": 4.2
},
{
"id": "6548a794-a4fd-4050-b07d-bc7ca4517882",
"name": "聚合 JSON 响应",
"type": "n8n-nodes-base.aggregate",
"position": [
860,
-520
],
"parameters": {
"options": {},
"aggregate": "aggregateAllItemData"
},
"typeVersion": 1
},
{
"id": "c84e195c-edd2-4f59-8986-516d116b7352",
"name": "设置数据集ID、请求URL",
"type": "n8n-nodes-base.set",
"position": [
-680,
-400
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "c16061c8-c829-4bd3-b335-e79c605665f2",
"name": "dataset_id",
"type": "string",
"value": "gd_l7q7dkf244hwjntr0"
},
{
"id": "a4594c55-e39e-4a9e-80d6-d39370001e20",
"name": "request",
"type": "string",
"value": "[{ \"url\": \"https://www.amazon.com/Quencher-FlowState-Stainless-Insulated-Smoothie/dp/B0CRMZHDG8\" }]"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "ceae108e-ed78-40c5-8e58-7013591ccaad",
"name": "便签",
"type": "n8n-nodes-base.stickyNote",
"position": [
-900,
-700
],
"parameters": {
"width": 520,
"height": 280,
"content": "## 注意"
},
"typeVersion": 1
},
{
"id": "1f55cffa-abd9-437b-bc9d-3fe0d8b02454",
"name": "便签1",
"type": "n8n-nodes-base.stickyNote",
"position": [
-120,
-600
],
"parameters": {
"color": 5,
"width": 720,
"height": 500,
"content": "## 等待快照准备就绪"
},
"typeVersion": 1
},
{
"id": "d8ba0f62-80a9-4e66-b70c-086ee5992df6",
"name": "便签2",
"type": "n8n-nodes-base.stickyNote",
"position": [
-900,
-220
],
"parameters": {
"color": 4,
"width": 660,
"content": "## 谁将受益?"
},
"typeVersion": 1
},
{
"id": "7fdffafd-f256-4760-b001-a42b5198dbad",
"name": "创建二进制数据",
"type": "n8n-nodes-base.function",
"position": [
1100,
-720
],
"parameters": {
"functionCode": "items[0].binary = {\n data: {\n data: new Buffer(JSON.stringify(items[0].json, null, 2)).toString('base64')\n }\n};\nreturn items;"
},
"typeVersion": 1
},
{
"id": "934ab31a-cfb9-4e97-8d86-92cd95dd219c",
"name": "将文件写入磁盘",
"type": "n8n-nodes-base.readWriteFile",
"position": [
1320,
-720
],
"parameters": {
"options": {},
"fileName": "d:\\bulk_data.json",
"operation": "write"
},
"typeVersion": 1
},
{
"id": "1130523a-b598-425e-acf1-417ae8699f66",
"name": "向指定URL发起HTTP请求",
"type": "n8n-nodes-base.httpRequest",
"position": [
-460,
-395
],
"parameters": {
"url": "https://api.brightdata.com/datasets/v3/trigger",
"method": "POST",
"options": {},
"jsonBody": "={{ $json.request }}",
"sendBody": true,
"sendQuery": true,
"sendHeaders": true,
"specifyBody": "json",
"authentication": "genericCredentialType",
"genericAuthType": "httpHeaderAuth",
"queryParameters": {
"parameters": [
{
"name": "dataset_id",
"value": "={{ $json.dataset_id }}"
},
{
"name": "format",
"value": "json"
},
{
"name": "uncompressed_webhook",
"value": "true"
}
]
},
"headerParameters": {
"parameters": [
{}
]
}
},
"credentials": {
"httpHeaderAuth": {
"id": "kdbqXuxIR8qIxF7y",
"name": "Header Auth account"
}
},
"typeVersion": 4.2
}
],
"active": false,
"pinData": {},
"settings": {
"executionOrder": "v1"
},
"versionId": "8fb2eb85-ffd6-4632-9668-00f29bc91c34",
"connections": {
"If": {
"main": [
[
{
"node": "Check on the errors",
"type": "main",
"index": 0
}
],
[
{
"node": "Wait",
"type": "main",
"index": 0
}
]
]
},
"Wait": {
"main": [
[
{
"node": "Check Snapshot Status",
"type": "main",
"index": 0
}
]
]
},
"Set Snapshot Id": {
"main": [
[
{
"node": "Check Snapshot Status",
"type": "main",
"index": 0
}
]
]
},
"Download Snapshot": {
"main": [
[
{
"node": "Aggregate JSON Response",
"type": "main",
"index": 0
}
]
]
},
"Check on the errors": {
"main": [
[
{
"node": "Download Snapshot",
"type": "main",
"index": 0
}
]
]
},
"Create a binary data": {
"main": [
[
{
"node": "Write the file to disk",
"type": "main",
"index": 0
}
]
]
},
"Check Snapshot Status": {
"main": [
[
{
"node": "If",
"type": "main",
"index": 0
}
]
]
},
"Aggregate JSON Response": {
"main": [
[
{
"node": "Initiate a Webhook Notification",
"type": "main",
"index": 0
},
{
"node": "Create a binary data",
"type": "main",
"index": 0
}
]
]
},
"Set Dataset Id, Request URL": {
"main": [
[
{
"node": "HTTP Request to the specified URL",
"type": "main",
"index": 0
}
]
]
},
"HTTP Request to the specified URL": {
"main": [
[
{
"node": "Set Snapshot Id",
"type": "main",
"index": 0
}
]
]
},
"When clicking ‘Test workflow’": {
"main": [
[
{
"node": "Set Dataset Id, Request URL",
"type": "main",
"index": 0
}
]
]
}
}
}常见问题
如何使用这个工作流?
复制上方的 JSON 配置代码,在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」,粘贴配置后根据需要修改凭证设置即可。
这个工作流适合什么场景?
高级 - 工程, 产品
需要付费吗?
本工作流完全免费,您可以直接导入使用。但请注意,工作流中使用的第三方服务(如 OpenAI API)可能需要您自行付费。
相关工作流推荐
使用Bright Data和OpenAI的TrustPilot SaaS产品评论追踪器
使用Bright Data和OpenAI的TrustPilot SaaS产品评论追踪器
Set
Code
Merge
+11
22 节点Ranjan Dailata
产品
API架构提取器
API架构提取器
If
Set
Code
+22
88 节点Polina Medvedieva
工程
使用 Bright Data 抓取和 Google Gemini 自动化 Etsy 数据挖掘
使用 Bright Data 抓取和 Google Gemini 实现 Etsy 数据挖掘自动化
Set
Function
Split Out
+8
19 节点Ranjan Dailata
产品
使用Bright Data和OpenAI 4o mini进行DNB公司搜索与提取
基于Bright Data和OpenAI 4o mini的DNB公司搜索与提取
Set
Function
Mcp Client
+7
18 节点Ranjan Dailata
产品
Google趋势数据提取,使用Bright Data和Google Gemini进行摘要生成
使用Bright Data和Google Gemini的Google趋势数据提取与摘要生成
Set
Gmail
Function
+8
16 节点Ranjan Dailata
工程
使用Bright Data和Google Gemini进行结构化数据提取和数据挖掘
使用Bright Data和Google Gemini进行结构化数据提取和数据挖掘
Set
Function
Http Request
+6
18 节点Ranjan Dailata
工程