研究论文爬虫到Google Sheets
中级
这是一个AI领域的自动化工作流,包含 12 个节点。主要使用 Set, Code, Html, HttpRequest, GoogleSheets 等节点,结合人工智能技术实现智能自动化。 使用Bright Data和n8n自动化研究论文收集
前置要求
- •可能需要目标 API 的认证凭证
- •Google Sheets API 凭证
分类
工作流预览
可视化展示节点连接关系,支持缩放和平移
导出工作流
复制以下 JSON 配置到 n8n 导入,即可使用此工作流
{
"id": "giq3zqaP4QbY6LgC",
"meta": {
"instanceId": "60046904b104f0f72b2629a9d88fe9f676be4035769f1f08dad1dd38a76b9480"
},
"name": "研究论文爬虫到Google Sheets",
"tags": [],
"nodes": [
{
"id": "7d81edf3-6f00-4634-b79f-dbda3f9958e5",
"name": "开始爬取(手动触发器)",
"type": "n8n-nodes-base.manualTrigger",
"position": [
-1080,
580
],
"parameters": {},
"typeVersion": 1
},
{
"id": "6e172db5-7483-4079-bf8a-785602526bdc",
"name": "设置研究主题",
"type": "n8n-nodes-base.set",
"position": [
-860,
580
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "b530a847-0bb2-4039-9ad0-cbc9cc4d909e",
"name": "Topic",
"type": "string",
"value": "machine+learning"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "e65d092a-6854-478c-b33e-2fc309f71ae8",
"name": "发送请求到Bright Data API",
"type": "n8n-nodes-base.httpRequest",
"position": [
-600,
580
],
"parameters": {
"url": "https://api.brightdata.com/request",
"method": "POST",
"options": {},
"sendBody": true,
"sendHeaders": true,
"bodyParameters": {
"parameters": [
{
"name": "zone",
"value": "n8n_unblocker"
},
{
"name": "url",
"value": "=https://scholar.google.com/scholar?q={{ $json.Topic }}"
},
{
"name": "country",
"value": "us"
},
{
"name": "format",
"value": "raw"
}
]
},
"headerParameters": {
"parameters": [
{
"name": "Authorization",
"value": "Bearer 40127ac3c2b4861572c8ad4c6d2273a0ce0472cb3ea7d3ac85a74a34629067aa"
}
]
}
},
"typeVersion": 4.2
},
{
"id": "211bae33-32c5-44e8-b306-a5e0d520a4a0",
"name": "从HTML提取数据(标题、作者等)",
"type": "n8n-nodes-base.html",
"position": [
-400,
580
],
"parameters": {
"options": {},
"operation": "extractHtmlContent",
"extractionValues": {
"values": [
{
"key": "Title",
"cssSelector": "h3.gs_rt, a.gs_rt",
"returnArray": true
},
{
"key": "Author",
"cssSelector": "div.gs_a",
"returnArray": true
},
{
"key": "Abstract",
"cssSelector": "div.gs_rs",
"returnArray": true
},
{
"key": "PDF Link\t",
"cssSelector": "a[href*='pdf']",
"returnArray": true,
"returnValue": "attribute"
}
]
}
},
"typeVersion": 1.2
},
{
"id": "9ab7ba20-8614-46c5-b57a-3749d6ae04c4",
"name": "清理和结构化提取的数据",
"type": "n8n-nodes-base.code",
"position": [
-200,
580
],
"parameters": {
"jsCode": "const titles = items[0].json.Title || [];\nconst authors = items[0].json.Author || [];\nconst abstracts = items[0].json.Abstract || [];\nconst pdfLinks = items[0].json[\"PDF Link\\t\"] || [];\n\nconst output = [];\n\nfor (let i = 0; i < titles.length; i++) {\n // Clean title (remove tags like [PDF][B])\n let title = titles[i].replace(/\\[.*?\\]/g, '').trim();\n\n // Clean author (remove any trailing dashes or HTML leftovers)\n let author = authors[i] ? authors[i].replace(/\\s*-\\s*.*/, '').trim() : '';\n\n // Abstract fallback\n let abstract = abstracts[i] || '';\n\n // Get PDF link — from either a single object or array of duplicates\n let linkObj = pdfLinks[i];\n let pdfLink = '';\n\n if (Array.isArray(linkObj)) {\n // If multiple objects per item\n pdfLink = linkObj.find(obj => obj.href)?.href || '';\n } else if (linkObj?.href) {\n pdfLink = linkObj.href;\n }\n\n // Push cleaned object\n output.push({\n json: {\n title,\n author,\n abstract,\n pdfLink\n }\n });\n}\n\nreturn output;\n"
},
"typeVersion": 2
},
{
"id": "a246f20c-2bb9-4319-8812-e296c87a7df0",
"name": "保存结果到Google Sheet",
"type": "n8n-nodes-base.googleSheets",
"position": [
120,
580
],
"parameters": {
"columns": {
"value": {
"Topic": "={{ $('Set Research topic').item.json.Topic }}",
"title": "={{ $json.title }}",
"author": "={{ $json.author }}",
"abstract": "={{ $json.abstract }}",
"pdf link": "={{ $json.pdfLink }}"
},
"schema": [
{
"id": "Topic",
"type": "string",
"display": true,
"required": false,
"displayName": "Topic",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "title",
"type": "string",
"display": true,
"required": false,
"displayName": "title",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "author",
"type": "string",
"display": true,
"required": false,
"displayName": "author",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "abstract",
"type": "string",
"display": true,
"required": false,
"displayName": "abstract",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "pdf link",
"type": "string",
"display": true,
"required": false,
"displayName": "pdf link",
"defaultMatch": false,
"canBeUsedToMatch": true
}
],
"mappingMode": "defineBelow",
"matchingColumns": [],
"attemptToConvertTypes": false,
"convertFieldsToString": false
},
"options": {},
"operation": "append",
"sheetName": {
"__rl": true,
"mode": "list",
"value": "gid=0",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/1sOfCFsvHS9-BeE_PQ6J_jtQofCRcOv02XS7hrmFmpxQ/edit#gid=0",
"cachedResultName": "Sheet1"
},
"documentId": {
"__rl": true,
"mode": "list",
"value": "1sOfCFsvHS9-BeE_PQ6J_jtQofCRcOv02XS7hrmFmpxQ",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/1sOfCFsvHS9-BeE_PQ6J_jtQofCRcOv02XS7hrmFmpxQ/edit?usp=drivesdk",
"cachedResultName": "Research papers from Google Scholar"
}
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "r2mDaisH6e9VkwHl",
"name": "Google Sheets account"
}
},
"typeVersion": 4.6
},
{
"id": "1b4a1504-4a4a-4a0d-892b-d0c3e205ed85",
"name": "便签",
"type": "n8n-nodes-base.stickyNote",
"position": [
-1140,
60
],
"parameters": {
"color": 5,
"width": 420,
"height": 720,
"content": "## 🔹 **第一部分:用户输入与触发器**"
},
"typeVersion": 1
},
{
"id": "bc56f528-6d18-4e05-942f-c06bb6e10b27",
"name": "便签1",
"type": "n8n-nodes-base.stickyNote",
"position": [
-660,
80
],
"parameters": {
"color": 6,
"width": 600,
"height": 700,
"content": "## 🔸 **第二部分:爬取与解析网站**"
},
"typeVersion": 1
},
{
"id": "2c54e5e6-011a-4562-98ac-9cc9834bc284",
"name": "便签2",
"type": "n8n-nodes-base.stickyNote",
"position": [
0,
0
],
"parameters": {
"color": 3,
"width": 340,
"height": 780,
"content": "## 🟢 **第三部分:保存到Google Sheets**"
},
"typeVersion": 1
},
{
"id": "4ce90703-961e-4070-9356-c9dffc23a6c5",
"name": "便签 9",
"type": "n8n-nodes-base.stickyNote",
"position": [
-2980,
80
],
"parameters": {
"color": 4,
"width": 1300,
"height": 320,
"content": "======================================="
},
"typeVersion": 1
},
{
"id": "069ddb89-f7a1-4c4b-b65d-212be3252750",
"name": "便签说明4",
"type": "n8n-nodes-base.stickyNote",
"position": [
-2980,
420
],
"parameters": {
"color": 4,
"width": 1289,
"height": 1878,
"content": "## 🌟 研究论文爬虫到Google Sheets"
},
"typeVersion": 1
},
{
"id": "a1a5e609-756a-4757-a026-1349cf388e61",
"name": "便签说明5",
"type": "n8n-nodes-base.stickyNote",
"position": [
400,
0
],
"parameters": {
"color": 7,
"width": 380,
"height": 240,
"content": "## 如果您通过此链接加入 Bright Data,我将获得少量佣金 — 感谢您支持更多免费内容!"
},
"typeVersion": 1
}
],
"active": false,
"pinData": {},
"settings": {
"executionOrder": "v1"
},
"versionId": "f931202a-3c22-495d-b775-71665bdf6c27",
"connections": {
"Set Research topic": {
"main": [
[
{
"node": "Send Request to Bright Data API",
"type": "main",
"index": 0
}
]
]
},
"Send Request to Bright Data API": {
"main": [
[
{
"node": "Extract Data from HTML (Title, Author, etc.)",
"type": "main",
"index": 0
}
]
]
},
"Start Scraping (Manual Trigger)": {
"main": [
[
{
"node": "Set Research topic",
"type": "main",
"index": 0
}
]
]
},
"Clean & Structure Extracted Data": {
"main": [
[
{
"node": "Save Results to Google Sheet",
"type": "main",
"index": 0
}
]
]
},
"Extract Data from HTML (Title, Author, etc.)": {
"main": [
[
{
"node": "Clean & Structure Extracted Data",
"type": "main",
"index": 0
}
]
]
}
}
}常见问题
如何使用这个工作流?
复制上方的 JSON 配置代码,在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」,粘贴配置后根据需要修改凭证设置即可。
这个工作流适合什么场景?
中级 - 人工智能
需要付费吗?
本工作流完全免费,您可以直接导入使用。但请注意,工作流中使用的第三方服务(如 OpenAI API)可能需要您自行付费。
相关工作流推荐
自动化论坛监控_via_Bright_data
使用Bright Data和n8n的论坛监控自动化
Set
Code
Html
+6
17 节点Yaron Been
人工智能
比特币价格下跌警报系统_via_Bright_Data
Bright Data和n8n的实时比特币价格警报
If
Code
Html
+6
15 节点Yaron Been
人工智能
Builtwith 技术栈追踪器
使用 BuiltWith 追踪技术栈并查找决策者,同步至 Google Sheets
Set
Code
Http Request
+3
10 节点Yaron Been
人工智能
使用 Bright Data 抓取即将发生的事件
使用 Bright Data 和 n8n 的自动化事件发现
Code
Html
Http Request
+3
11 节点Yaron Been
人工智能
AI YouTube分析助手:评论分析与洞察报告
AI YouTube分析助手:评论分析器与洞察报告生成器
If
Set
Code
+9
19 节点Yaron Been
人工智能
使用Bright Data和LLMs自动化大规模超个性化外联
通过Bright Data和大语言模型实现大规模超个性化外联自动化
If
Set
Wait
+8
21 节点Yaron Been
销售
工作流信息
难度等级
中级
节点数量12
分类1
节点类型7
作者
Yaron Been
@yaron-nofluffBuilding AI Agents and Automations | Growth Marketer | Entrepreneur | Book Author & Podcast Host If you need any help with Automations, feel free to reach out via linkedin: https://www.linkedin.com/in/yaronbeen/ And check out my Youtube channel: https://www.youtube.com/@YaronBeen/videos
外部链接
在 n8n.io 查看 →
分享此工作流