轻量网页抓取器
这是一个Lead Generation领域的自动化工作流,包含 26 个节点。主要使用 Set, Code, Wait, Limit, Merge 等节点。 使用Apify和邮件提取的Google Maps线索生成用于Airtable
- •Airtable API Key
- •可能需要目标 API 的认证凭证
使用的节点 (26)
分类
{
"id": "Rp64ykElPQZ1J9L8",
"meta": {
"instanceId": "7aff71fd69feb5070397c156c8849f33a22c92a58c89ad8935111d92acdc904d",
"templateCredsSetupCompleted": true
},
"name": "轻量网页抓取器",
"tags": [],
"nodes": [
{
"id": "1ed880cf-9d9d-4e2d-bc1d-76ab49a44164",
"name": "限制",
"type": "n8n-nodes-base.limit",
"position": [
520,
180
],
"parameters": {
"maxItems": 30
},
"typeVersion": 1
},
{
"id": "b106fc00-65fa-4300-adda-99822ac96d44",
"name": "表单提交时",
"type": "n8n-nodes-base.formTrigger",
"position": [
-360,
180
],
"webhookId": "2800ca7a-990c-4ce5-a7cd-b40d50f69158",
"parameters": {
"options": {},
"formTitle": "Web Scraper",
"formFields": {
"values": [
{
"fieldLabel": "Keyword",
"requiredField": true
},
{
"fieldLabel": "Location",
"requiredField": true
},
{
"fieldLabel": "No. Of Leads",
"placeholder": "10"
}
]
},
"formDescription": "This scrapes website urls from Google Maps to get company information."
},
"typeVersion": 2.2
},
{
"id": "1a61eada-9883-437b-9b85-7553ad972639",
"name": "运行执行器",
"type": "@apify/n8n-nodes-apify.apify",
"position": [
-140,
180
],
"parameters": {
"actorId": {
"__rl": true,
"mode": "list",
"value": "nwua9Gu5YrADL7ZDj",
"cachedResultUrl": "https://console.apify.com/actors/nwua9Gu5YrADL7ZDj/input",
"cachedResultName": "Google Maps Scraper"
},
"timeout": 120,
"operation": "Run actor",
"customBody": "={\n \"language\": \"en\",\n \"locationQuery\": \"{{ $json.Location }}\",\n \"maxCrawledPlacesPerSearch\": {{ $json['No. Of Leads'] ? Number($json['No. Of Leads']) : 10 }},\n \"searchStringsArray\": [\n \"{{ $json.Keyword }}\"\n ],\n \"skipClosedPlaces\": false,\n \"website\": \"withWebsite\"\n}",
"waitForFinish": 20
},
"credentials": {
"apifyApi": {
"id": "ykVYTlJ99a6TZf2C",
"name": "Apify account"
}
},
"typeVersion": 1
},
{
"id": "1eab0a81-e729-49f0-add9-7a3d9de9844f",
"name": "获取数据集项",
"type": "@apify/n8n-nodes-apify.apify",
"position": [
80,
180
],
"parameters": {
"offset": {},
"resource": "Datasets",
"datasetId": "={{ $json.defaultDatasetId }}",
"operation": "Get items"
},
"credentials": {
"apifyApi": {
"id": "ykVYTlJ99a6TZf2C",
"name": "Apify account"
}
},
"typeVersion": 1
},
{
"id": "043184f1-353d-460c-9104-bd68158e8530",
"name": "提取所需字段",
"type": "n8n-nodes-base.set",
"position": [
300,
180
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "f19ca41a-9da1-4a8d-ae29-1c3dc133f588",
"name": "Company Name",
"type": "string",
"value": "={{ $json.title }}"
},
{
"id": "adbbe03b-f320-4566-bee2-ca69d7a6b2fe",
"name": "Company Category",
"type": "string",
"value": "={{ $json.categoryName }}"
},
{
"id": "624abbcd-9882-4594-ae6c-7cb8b5b00578",
"name": "Address",
"type": "string",
"value": "={{ $json.address }}"
},
{
"id": "1b05c9b0-b473-4b49-95fd-bfb52f3195ee",
"name": "Website",
"type": "string",
"value": "={{ $json.website }}"
},
{
"id": "3c4d7142-6545-42eb-99e6-ef1a32ce5157",
"name": "Phone Number",
"type": "string",
"value": "={{ $json.phoneUnformatted }}"
},
{
"id": "c4a5b4de-6591-4b36-a791-f180e744649c",
"name": "Rating",
"type": "string",
"value": "={{ $json.totalScore }}"
},
{
"id": "be70e33b-13aa-4aa2-ad34-cbd58f515dea",
"name": "Other Categories",
"type": "array",
"value": "={{ $json.categories }}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "26e3f96c-79e5-4135-96ec-a89441618ed0",
"name": "遍历项目",
"type": "n8n-nodes-base.splitInBatches",
"position": [
740,
180
],
"parameters": {
"options": {}
},
"typeVersion": 3
},
{
"id": "20d15211-f36d-4ce1-ab7f-f3606e40f198",
"name": "Markdown",
"type": "n8n-nodes-base.markdown",
"onError": "continueRegularOutput",
"position": [
1840,
105
],
"parameters": {
"html": "={{ $json.data }}",
"options": {}
},
"typeVersion": 1,
"alwaysOutputData": true
},
{
"id": "58b07df7-6d08-40db-b489-dd1e8f7544a6",
"name": "Loop Over Items1",
"type": "n8n-nodes-base.splitInBatches",
"position": [
960,
-245
],
"parameters": {
"options": {}
},
"typeVersion": 3
},
{
"id": "1476a2bb-7943-457c-bf74-4ae07ea4005a",
"name": "等待1",
"type": "n8n-nodes-base.wait",
"position": [
1400,
-320
],
"webhookId": "f0d05b18-eeb4-471d-8abc-55b1d39f33bb",
"parameters": {
"amount": 1
},
"typeVersion": 1.1
},
{
"id": "2f112d83-278b-42d9-ac8b-ba88588abdf4",
"name": "合并",
"type": "n8n-nodes-base.merge",
"position": [
2280,
255
],
"parameters": {
"mode": "combine",
"options": {},
"combineBy": "combineByPosition"
},
"typeVersion": 3.2
},
{
"id": "5489d865-ac36-4be8-a822-a673d5f85ab9",
"name": "提取所需字段1",
"type": "n8n-nodes-base.set",
"position": [
1400,
-520
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "f19ca41a-9da1-4a8d-ae29-1c3dc133f588",
"name": "Company Name",
"type": "string",
"value": "={{ $json['Company Name'] }}"
},
{
"id": "adbbe03b-f320-4566-bee2-ca69d7a6b2fe",
"name": "Company Category",
"type": "string",
"value": "={{ $json['Company Category'] }}"
},
{
"id": "624abbcd-9882-4594-ae6c-7cb8b5b00578",
"name": "Address",
"type": "string",
"value": "={{ $json.Address }}"
},
{
"id": "1b05c9b0-b473-4b49-95fd-bfb52f3195ee",
"name": "Website",
"type": "string",
"value": "={{ $json.Website }}"
},
{
"id": "3c4d7142-6545-42eb-99e6-ef1a32ce5157",
"name": "Phone Number",
"type": "string",
"value": "={{ $json['Phone Number'] }}"
},
{
"id": "c4a5b4de-6591-4b36-a791-f180e744649c",
"name": "Rating",
"type": "string",
"value": "={{ $json.Rating }}"
},
{
"id": "cbc0a9fd-95ce-47da-882c-92894333452f",
"name": "Email",
"type": "string",
"value": "={{ $json.email }}"
},
{
"id": "be70e33b-13aa-4aa2-ad34-cbd58f515dea",
"name": "Other Categories",
"type": "array",
"value": "={{ $json['Other Categories'] }}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "8e194e31-0a42-41b9-ba8e-1ab513fe619d",
"name": "合并1",
"type": "n8n-nodes-base.merge",
"position": [
1620,
-170
],
"parameters": {
"mode": "combine",
"options": {},
"combineBy": "combineByPosition"
},
"typeVersion": 3.2
},
{
"id": "87be44e2-d4e4-4e58-9aee-36aca8025e6a",
"name": "解析URL/网站",
"type": "n8n-nodes-base.set",
"position": [
960,
105
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "cd0bee55-966e-4ea8-8ba4-fcccd19fccd5",
"name": "website",
"type": "string",
"value": "={{ $json.Website }}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "532b375f-a0b2-42c3-8344-89a9f401a97d",
"name": "移除查询参数和片段",
"type": "n8n-nodes-base.code",
"position": [
1180,
105
],
"parameters": {
"jsCode": "return items.map(item => {\n const rawUrl = item.json.website?.trim() || '';\n\n // If no scheme (http/https), prepend https://\n if (!/^https?:\\/\\//i.test(rawUrl)) {\n rawUrl = 'https://' + rawUrl;\n }\n\n try {\n const parsed = new URL(rawUrl);\n\n // Remove query parameters and fragments\n parsed.search = '';\n parsed.hash = '';\n\n item.json.cleanedWebsite = parsed.toString();\n item.json.websiteType = 'cleaned'; // optional tag\n } catch (err) {\n item.json.cleanedWebsite = null;\n item.json.websiteType = 'invalid';\n item.json.error = 'Invalid or missing URL';\n }\n\n return item;\n});"
},
"typeVersion": 2
},
{
"id": "8c1b4f28-cb4e-4e8d-9783-b68175ee26fc",
"name": "用户代理",
"type": "n8n-nodes-base.set",
"position": [
1400,
105
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "38d260fd-92b5-4d92-803a-6ff41a9b3e42",
"name": "website",
"type": "string",
"value": "={{ $json.website }}"
},
{
"id": "ca022cbe-43bf-4f8a-bb9a-701e6ceb1933",
"name": "User-Agent",
"type": "string",
"value": "={{ [\n \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36\",\n \"Mozilla/5.0 (Macintosh; Intel Mac OS X 13_0) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.0 Safari/605.1.15\",\n \"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36\",\n \"Mozilla/5.0 (iPhone; CPU iPhone OS 16_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.0 Mobile/15E148 Safari/604.1\",\n \"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:118.0) Gecko/20100101 Firefox/118.0\"\n][Math.floor(Math.random() * 5)] }}"
},
{
"id": "cc63a3da-904a-4d54-adb4-40064ed2378a",
"name": "Accept",
"type": "string",
"value": "=text/html,*/*;q=0.8"
},
{
"id": "5cfc4087-5aff-4f73-8aad-1394268fd358",
"name": "Accept-Language",
"type": "string",
"value": "en-US,en;q=0.9"
},
{
"id": "1882eb17-c18f-4de2-b3bd-40b87dce55a6",
"name": "Referer",
"type": "string",
"value": "https://google.com/"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "79882f8b-ad9d-481d-ab8a-38191f78868b",
"name": "网站抓取",
"type": "n8n-nodes-base.httpRequest",
"onError": "continueRegularOutput",
"position": [
1620,
105
],
"parameters": {
"url": "={{ $json.website }}",
"options": {},
"sendHeaders": true,
"headerParameters": {
"parameters": [
{
"name": "User-Agent",
"value": "={{ $json['User-Agent'] }}"
},
{
"name": "Accept-Language",
"value": "={{ $json['Accept-Language'] }}"
},
{
"name": "Referer",
"value": "={{ $json.Referer }}"
},
{
"name": "Accept",
"value": "={{ $json.Accept }}"
}
]
}
},
"typeVersion": 4.2,
"alwaysOutputData": true
},
{
"id": "00890b55-73a1-4995-a0aa-d55c76b4dcc3",
"name": "随机等待",
"type": "n8n-nodes-base.wait",
"position": [
2060,
105
],
"webhookId": "cb8b9ecd-52e5-4e9a-b21f-34a0a859376f",
"parameters": {
"amount": "={{ Math.floor(Math.random() * 6 + 2) }}"
},
"typeVersion": 1.1
},
{
"id": "a3f3f96e-1187-4e30-8b6f-b8d008244a0c",
"name": "提取邮箱地址",
"type": "n8n-nodes-base.code",
"onError": "continueRegularOutput",
"position": [
1180,
-320
],
"parameters": {
"jsCode": "// Regex to match emails (basic form)\nconst emailRegex = /[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-z]{2,}/gi;\n\n// Step 1: Collect all matches from all markdowns\nlet allEmails = [];\n\nfor (const item of items) {\n const markdown = item.json.data || '';\n const matches = markdown.match(emailRegex);\n\n if (matches && matches.length) {\n allEmails.push(...matches);\n }\n}\n\n// ✅ Step 2: Filter out fake emails that end with file extensions\nconst filteredEmails = allEmails.filter(email => \n !email.toLowerCase().match(/\\.(png|jpg|jpeg|gif|css|js)$/)\n);\n\n// Step 3: Remove duplicates\nconst uniqueEmails = [...new Set(filteredEmails)];\n\n// Step 4: Return first email or \"N/A\"\nreturn [\n {\n json: {\n email: uniqueEmails[0] || \"N/A\",\n allEmails: uniqueEmails.length ? uniqueEmails : [\"N/A\"]\n }\n }\n];\n"
},
"typeVersion": 2,
"alwaysOutputData": true
},
{
"id": "4e16b580-50c8-4a99-8027-21f43913817d",
"name": "仅筛选含邮箱的潜在客户",
"type": "n8n-nodes-base.filter",
"position": [
1180,
-520
],
"parameters": {
"options": {},
"conditions": {
"options": {
"version": 2,
"leftValue": "",
"caseSensitive": true,
"typeValidation": "strict"
},
"combinator": "and",
"conditions": [
{
"id": "df37d096-dcae-4a0b-9cce-a6c48f482eac",
"operator": {
"type": "string",
"operation": "notEquals"
},
"leftValue": "={{ $json.email }}",
"rightValue": "N/A"
}
]
}
},
"typeVersion": 2.2
},
{
"id": "7f10d476-4787-4632-90cb-3b006e8675c3",
"name": "数据库",
"type": "n8n-nodes-base.airtable",
"position": [
1620,
-520
],
"parameters": {
"base": {
"__rl": true,
"mode": "list",
"value": "appBnug5XIRAWl5sK",
"cachedResultUrl": "https://airtable.com/appBnug5XIRAWl5sK",
"cachedResultName": "N8n"
},
"table": {
"__rl": true,
"mode": "list",
"value": "tblP5rgq3s76pYsCf",
"cachedResultUrl": "https://airtable.com/appBnug5XIRAWl5sK/tblP5rgq3s76pYsCf",
"cachedResultName": "Web Scraper"
},
"columns": {
"value": {
"Email": "={{ $json.Email }}",
"Rating": "={{ $json.Rating }}",
"Website": "={{ $json.Website }}",
"Category": "={{ $json['Company Category'] }}",
"Location": "={{ $json.Address }}",
"Phone Number": "={{ $json['Phone Number'] }}",
"Business Name": "={{ $json['Company Name'] }}",
"Other Categories": "={{ $json['Other Categories'].join() }}"
},
"schema": [
{
"id": "id",
"type": "string",
"display": true,
"removed": false,
"readOnly": true,
"required": false,
"displayName": "id",
"defaultMatch": true
},
{
"id": "Website",
"type": "string",
"display": true,
"removed": false,
"readOnly": false,
"required": false,
"displayName": "Website",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Business Name",
"type": "string",
"display": true,
"removed": false,
"readOnly": false,
"required": false,
"displayName": "Business Name",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Category",
"type": "string",
"display": true,
"removed": false,
"readOnly": false,
"required": false,
"displayName": "Category",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Location",
"type": "string",
"display": true,
"removed": false,
"readOnly": false,
"required": false,
"displayName": "Location",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Email",
"type": "string",
"display": true,
"removed": false,
"readOnly": false,
"required": false,
"displayName": "Email",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Phone Number",
"type": "string",
"display": true,
"removed": false,
"readOnly": false,
"required": false,
"displayName": "Phone Number",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Rating",
"type": "number",
"display": true,
"removed": false,
"readOnly": false,
"required": false,
"displayName": "Rating",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "Other Categories",
"type": "string",
"display": true,
"removed": false,
"readOnly": false,
"required": false,
"displayName": "Other Categories",
"defaultMatch": false,
"canBeUsedToMatch": true
}
],
"mappingMode": "defineBelow",
"matchingColumns": [
"Email"
],
"attemptToConvertTypes": false,
"convertFieldsToString": false
},
"options": {},
"operation": "upsert"
},
"credentials": {
"airtableTokenApi": {
"id": "62mW0dwxFBT7jfcZ",
"name": "Airtable Personal Access Token account"
}
},
"typeVersion": 2.1
},
{
"id": "ed41e59c-c9c0-4673-af40-fc2d3f08a2ef",
"name": "便签",
"type": "n8n-nodes-base.stickyNote",
"position": [
-460,
80
],
"parameters": {
"color": 5,
"width": 260,
"height": 260,
"content": "## 表单提交"
},
"typeVersion": 1
},
{
"id": "4737b43b-d22e-4b7d-ae58-4e279a6167ca",
"name": "便签1",
"type": "n8n-nodes-base.stickyNote",
"position": [
-160,
80
],
"parameters": {
"color": 3,
"width": 800,
"height": 260,
"content": "## 抓取企业信息(通过Apify)并提取所需字段"
},
"typeVersion": 1
},
{
"id": "fde95017-afe6-494e-8752-6498c56fa73f",
"name": "便签2",
"type": "n8n-nodes-base.stickyNote",
"position": [
680,
40
],
"parameters": {
"color": 3,
"width": 1780,
"height": 400,
"content": ""
},
"typeVersion": 1
},
{
"id": "e36373c3-0e42-49c6-9981-afe723576463",
"name": "便签3",
"type": "n8n-nodes-base.stickyNote",
"position": [
680,
-340
],
"parameters": {
"color": 4,
"width": 1140,
"height": 360,
"content": ""
},
"typeVersion": 1
},
{
"id": "57abe783-4152-4342-983c-f97ea53cef2a",
"name": "便签4",
"type": "n8n-nodes-base.stickyNote",
"position": [
1080,
-620
],
"parameters": {
"color": 4,
"width": 660,
"height": 260,
"content": "## 筛选、清理和存储"
},
"typeVersion": 1
},
{
"id": "4408295c-b2ac-4207-9e86-079f50e6f051",
"name": "便签5",
"type": "n8n-nodes-base.stickyNote",
"position": [
-500,
-520
],
"parameters": {
"width": 580,
"height": 420,
"content": "## 🛠 设置方法"
},
"typeVersion": 1
}
],
"active": false,
"pinData": {},
"settings": {
"executionOrder": "v1"
},
"versionId": "6eac21e5-60c7-401b-a761-88a8778b3c59",
"connections": {
"Limit": {
"main": [
[
{
"node": "Loop Over Items",
"type": "main",
"index": 0
}
]
]
},
"Merge": {
"main": [
[
{
"node": "Loop Over Items",
"type": "main",
"index": 0
}
]
]
},
"Wait1": {
"main": [
[
{
"node": "Merge1",
"type": "main",
"index": 0
}
]
]
},
"Merge1": {
"main": [
[
{
"node": "Loop Over Items1",
"type": "main",
"index": 0
}
]
]
},
"Database": {
"main": [
[]
]
},
"Markdown": {
"main": [
[
{
"node": "Random Wait",
"type": "main",
"index": 0
}
]
]
},
"Random Wait": {
"main": [
[
{
"node": "Merge",
"type": "main",
"index": 0
}
]
]
},
"User-Agents": {
"main": [
[
{
"node": "Website Scraping",
"type": "main",
"index": 0
}
]
]
},
"Run an Actor": {
"main": [
[
{
"node": "Get dataset items",
"type": "main",
"index": 0
}
]
]
},
"Loop Over Items": {
"main": [
[
{
"node": "Loop Over Items1",
"type": "main",
"index": 0
}
],
[
{
"node": "Parse url/website",
"type": "main",
"index": 0
},
{
"node": "Merge",
"type": "main",
"index": 1
}
]
]
},
"Loop Over Items1": {
"main": [
[
{
"node": "Filter Leads with Email only",
"type": "main",
"index": 0
}
],
[
{
"node": "Extract Email Address",
"type": "main",
"index": 0
},
{
"node": "Merge1",
"type": "main",
"index": 1
}
]
]
},
"Website Scraping": {
"main": [
[
{
"node": "Markdown",
"type": "main",
"index": 0
}
]
]
},
"Get dataset items": {
"main": [
[
{
"node": "Grab Desired Fields",
"type": "main",
"index": 0
}
]
]
},
"Parse url/website": {
"main": [
[
{
"node": "Remove Query Parameters & Fragments",
"type": "main",
"index": 0
}
]
]
},
"On form submission": {
"main": [
[
{
"node": "Run an Actor",
"type": "main",
"index": 0
}
]
]
},
"Grab Desired Fields": {
"main": [
[
{
"node": "Limit",
"type": "main",
"index": 0
}
]
]
},
"Grab Desired Fields1": {
"main": [
[
{
"node": "Database",
"type": "main",
"index": 0
}
]
]
},
"Extract Email Address": {
"main": [
[
{
"node": "Wait1",
"type": "main",
"index": 0
}
]
]
},
"Filter Leads with Email only": {
"main": [
[
{
"node": "Grab Desired Fields1",
"type": "main",
"index": 0
}
]
]
},
"Remove Query Parameters & Fragments": {
"main": [
[
{
"node": "User-Agents",
"type": "main",
"index": 0
}
]
]
}
}
}如何使用这个工作流?
复制上方的 JSON 配置代码,在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」,粘贴配置后根据需要修改凭证设置即可。
这个工作流适合什么场景?
高级 - 潜在客户开发
需要付费吗?
本工作流完全免费,您可以直接导入使用。但请注意,工作流中使用的第三方服务(如 OpenAI API)可能需要您自行付费。
相关工作流推荐
Ezema Kingsley Chibuzo
@kingsleyData Analyst, Automation Developer, and AI Workflow Specialist with experience designing end-to-end systems using n8n and its integrations. I help businesses unlock insights, automate operations, and scale with custom AI-powered workflows, dynamic dashboards, and no-code tools. My work combines strong analytical thinking with smart automation — from data collection and enrichment to lead generation, reporting, and AI agent orchestration. Here’s my contact below if you’d like to connect with me:
分享此工作流