연구 논문 크롤러에서 Google Sheets로
중급
이것은AI분야의자동화 워크플로우로, 12개의 노드를 포함합니다.주로 Set, Code, Html, HttpRequest, GoogleSheets 등의 노드를 사용하며인공지능 기술을 결합하여 스마트 자동화를 구현합니다. 사용Bright Data및n8n자동화研究论文收集
사전 요구사항
- •대상 API의 인증 정보가 필요할 수 있음
- •Google Sheets API 인증 정보
카테고리
워크플로우 미리보기
노드 연결 관계를 시각적으로 표시하며, 확대/축소 및 이동을 지원합니다
워크플로우 내보내기
다음 JSON 구성을 복사하여 n8n에 가져오면 이 워크플로우를 사용할 수 있습니다
{
"id": "giq3zqaP4QbY6LgC",
"meta": {
"instanceId": "60046904b104f0f72b2629a9d88fe9f676be4035769f1f08dad1dd38a76b9480"
},
"name": "Research_Paper_Scraper_to_Google_Sheets",
"tags": [],
"nodes": [
{
"id": "7d81edf3-6f00-4634-b79f-dbda3f9958e5",
"name": "크롤링 시작 (수동 트리거)",
"type": "n8n-nodes-base.manualTrigger",
"position": [
-1080,
580
],
"parameters": {},
"typeVersion": 1
},
{
"id": "6e172db5-7483-4079-bf8a-785602526bdc",
"name": "연구 주제 설정",
"type": "n8n-nodes-base.set",
"position": [
-860,
580
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "b530a847-0bb2-4039-9ad0-cbc9cc4d909e",
"name": "Topic",
"type": "string",
"value": "machine+learning"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "e65d092a-6854-478c-b33e-2fc309f71ae8",
"name": "Bright Data API 요청 전송",
"type": "n8n-nodes-base.httpRequest",
"position": [
-600,
580
],
"parameters": {
"url": "https://api.brightdata.com/request",
"method": "POST",
"options": {},
"sendBody": true,
"sendHeaders": true,
"bodyParameters": {
"parameters": [
{
"name": "zone",
"value": "n8n_unblocker"
},
{
"name": "url",
"value": "=https://scholar.google.com/scholar?q={{ $json.Topic }}"
},
{
"name": "country",
"value": "us"
},
{
"name": "format",
"value": "raw"
}
]
},
"headerParameters": {
"parameters": [
{
"name": "Authorization",
"value": "Bearer 40127ac3c2b4861572c8ad4c6d2273a0ce0472cb3ea7d3ac85a74a34629067aa"
}
]
}
},
"typeVersion": 4.2
},
{
"id": "211bae33-32c5-44e8-b306-a5e0d520a4a0",
"name": "HTML에서 데이터 추출 (제목, 저자 등)",
"type": "n8n-nodes-base.html",
"position": [
-400,
580
],
"parameters": {
"options": {},
"operation": "extractHtmlContent",
"extractionValues": {
"values": [
{
"key": "Title",
"cssSelector": "h3.gs_rt, a.gs_rt",
"returnArray": true
},
{
"key": "Author",
"cssSelector": "div.gs_a",
"returnArray": true
},
{
"key": "Abstract",
"cssSelector": "div.gs_rs",
"returnArray": true
},
{
"key": "PDF Link\t",
"cssSelector": "a[href*='pdf']",
"returnArray": true,
"returnValue": "attribute"
}
]
}
},
"typeVersion": 1.2
},
{
"id": "9ab7ba20-8614-46c5-b57a-3749d6ae04c4",
"name": "추출 데이터 정리 및 구조화",
"type": "n8n-nodes-base.code",
"position": [
-200,
580
],
"parameters": {
"jsCode": "const titles = items[0].json.Title || [];\nconst authors = items[0].json.Author || [];\nconst abstracts = items[0].json.Abstract || [];\nconst pdfLinks = items[0].json[\"PDF Link\\t\"] || [];\n\nconst output = [];\n\nfor (let i = 0; i < titles.length; i++) {\n // Clean title (remove tags like [PDF][B])\n let title = titles[i].replace(/\\[.*?\\]/g, '').trim();\n\n // Clean author (remove any trailing dashes or HTML leftovers)\n let author = authors[i] ? authors[i].replace(/\\s*-\\s*.*/, '').trim() : '';\n\n // Abstract fallback\n let abstract = abstracts[i] || '';\n\n // Get PDF link — from either a single object or array of duplicates\n let linkObj = pdfLinks[i];\n let pdfLink = '';\n\n if (Array.isArray(linkObj)) {\n // If multiple objects per item\n pdfLink = linkObj.find(obj => obj.href)?.href || '';\n } else if (linkObj?.href) {\n pdfLink = linkObj.href;\n }\n\n // Push cleaned object\n output.push({\n json: {\n title,\n author,\n abstract,\n pdfLink\n }\n });\n}\n\nreturn output;\n"
},
"typeVersion": 2
},
{
"id": "a246f20c-2bb9-4319-8812-e296c87a7df0",
"name": "결과를 Google Sheet에 저장",
"type": "n8n-nodes-base.googleSheets",
"position": [
120,
580
],
"parameters": {
"columns": {
"value": {
"Topic": "={{ $('Set Research topic').item.json.Topic }}",
"title": "={{ $json.title }}",
"author": "={{ $json.author }}",
"abstract": "={{ $json.abstract }}",
"pdf link": "={{ $json.pdfLink }}"
},
"schema": [
{
"id": "Topic",
"type": "string",
"display": true,
"required": false,
"displayName": "Topic",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "title",
"type": "string",
"display": true,
"required": false,
"displayName": "title",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "author",
"type": "string",
"display": true,
"required": false,
"displayName": "author",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "abstract",
"type": "string",
"display": true,
"required": false,
"displayName": "abstract",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "pdf link",
"type": "string",
"display": true,
"required": false,
"displayName": "pdf link",
"defaultMatch": false,
"canBeUsedToMatch": true
}
],
"mappingMode": "defineBelow",
"matchingColumns": [],
"attemptToConvertTypes": false,
"convertFieldsToString": false
},
"options": {},
"operation": "append",
"sheetName": {
"__rl": true,
"mode": "list",
"value": "gid=0",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/1sOfCFsvHS9-BeE_PQ6J_jtQofCRcOv02XS7hrmFmpxQ/edit#gid=0",
"cachedResultName": "Sheet1"
},
"documentId": {
"__rl": true,
"mode": "list",
"value": "1sOfCFsvHS9-BeE_PQ6J_jtQofCRcOv02XS7hrmFmpxQ",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/1sOfCFsvHS9-BeE_PQ6J_jtQofCRcOv02XS7hrmFmpxQ/edit?usp=drivesdk",
"cachedResultName": "Research papers from Google Scholar"
}
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "r2mDaisH6e9VkwHl",
"name": "Google Sheets account"
}
},
"typeVersion": 4.6
},
{
"id": "1b4a1504-4a4a-4a0d-892b-d0c3e205ed85",
"name": "스티키 노트",
"type": "n8n-nodes-base.stickyNote",
"position": [
-1140,
60
],
"parameters": {
"color": 5,
"width": 420,
"height": 720,
"content": "## 🔹 **Section 1: User Input & Trigger**\n\n**🧩 Nodes: Start Scraping | Set Topic**\n📍 **Purpose:** Let users easily input the topic they want to scrape — no need to deal with complex URLs.\n\n| 🧱 Node | ✅ New Name | 💡 Description |\n| --------- | ---------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| ⚡ Trigger | **Start Scraping (Manual)** | This node starts the workflow when you click “Execute Workflow.” It's the entry point. |\n| ✏️ Set | **Set Topic (Manual Input)** | Instead of requiring a URL, the user will enter a topic (like \"machine learning\" or \"digital marketing\"). This topic will be used to automatically generate the URL behind the scenes. |\n\n### 🧠 How it helps:\n\n* Great for beginners: Just type the topic, hit run.\n* Keeps the interface clean and user-friendly.\n* Avoids confusion around URLs and formats.\n\n---\n\n"
},
"typeVersion": 1
},
{
"id": "bc56f528-6d18-4e05-942f-c06bb6e10b27",
"name": "스티키 노트1",
"type": "n8n-nodes-base.stickyNote",
"position": [
-660,
80
],
"parameters": {
"color": 6,
"width": 600,
"height": 700,
"content": "## 🔸 **Section 2: Scrape & Parse Website**\n\n**🧩 Nodes: Send Request | Extract HTML | Clean Data**\n📍 **Purpose:** Uses the Bright Data proxy to access the webpage, extract raw HTML content, and clean it up into a readable format (title, author, abstract, etc.).\n\n| 🧱 Node | ✅ New Name | 💡 Description |\n| --------------- | ------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| 🌐 HTTP Request | **Send Topic Request to Bright Data** | This sends a request to the Bright Data API using the topic you set earlier. It uses Bright Data’s network to safely load the actual website and return HTML content. |\n| 🧱 HTML Extract | **Extract Data from Webpage** | Parses the returned HTML to find relevant data like titles, authors, abstracts, and links. |\n| 🔣 Code | **Clean and Format Scraped Data** | A custom code block that organizes the messy data into neat records. For example: title → column A, abstract → column B, etc. |\n\n### 🧠 How it helps:\n\n* Makes web scraping safe and reliable by using proxies.\n* Converts unreadable HTML into structured information.\n* Beginner-friendly: No need to write a parser yourself.\n\n---\n\n"
},
"typeVersion": 1
},
{
"id": "2c54e5e6-011a-4562-98ac-9cc9834bc284",
"name": "스티키 노트2",
"type": "n8n-nodes-base.stickyNote",
"position": [
0,
0
],
"parameters": {
"color": 3,
"width": 340,
"height": 780,
"content": "## 🟢 **Section 3: Save to Google Sheets**\n\n**🧩 Node: Append to Google Sheets**\n📍 **Purpose:** Automatically sends the clean data into a Google Sheet for easy access, filtering, or sharing.\n\n| 🧱 Node | ✅ New Name | 💡 Description |\n| ---------------- | ------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------- |\n| 📄 Google Sheets | **Store Scraped Data in Spreadsheet** | Takes the structured output and appends it to the connected Google Sheet. Each result gets a row with title, author, abstract, etc. |\n\n### 🧠 How it helps:\n\n* No manual copy-pasting ever again!\n* Shareable and searchable format.\n* Updates automatically as you scrape more topics.\n\n---\n\n"
},
"typeVersion": 1
},
{
"id": "4ce90703-961e-4070-9356-c9dffc23a6c5",
"name": "스티키 노트9",
"type": "n8n-nodes-base.stickyNote",
"position": [
-2980,
80
],
"parameters": {
"color": 4,
"width": 1300,
"height": 320,
"content": "=======================================\n WORKFLOW ASSISTANCE\n=======================================\nFor any questions or support, please contact:\n Yaron@nofluff.online\n\nExplore more tips and tutorials here:\n - YouTube: https://www.youtube.com/@YaronBeen/videos\n - LinkedIn: https://www.linkedin.com/in/yaronbeen/\n=======================================\n"
},
"typeVersion": 1
},
{
"id": "069ddb89-f7a1-4c4b-b65d-212be3252750",
"name": "스티키 노트4",
"type": "n8n-nodes-base.stickyNote",
"position": [
-2980,
420
],
"parameters": {
"color": 4,
"width": 1289,
"height": 1878,
"content": "## 🌟 Research Paper Scraper to Google Sheets\n\n**Automate extraction of data from any website based on a topic — no coding needed!**\n\n---\n\n## 🔹 **Section 1: User Input & Trigger**\n\n**🧩 Nodes: Start Scraping | Set Topic**\n📍 **Purpose:** Let users easily input the topic they want to scrape — no need to deal with complex URLs.\n\n| 🧱 Node | ✅ New Name | 💡 Description |\n| --------- | ---------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| ⚡ Trigger | **Start Scraping (Manual)** | This node starts the workflow when you click “Execute Workflow.” It's the entry point. |\n| ✏️ Set | **Set Topic (Manual Input)** | Instead of requiring a URL, the user will enter a topic (like \"machine learning\" or \"digital marketing\"). This topic will be used to automatically generate the URL behind the scenes. |\n\n### 🧠 How it helps:\n\n* Great for beginners: Just type the topic, hit run.\n* Keeps the interface clean and user-friendly.\n* Avoids confusion around URLs and formats.\n\n---\n\n## 🔸 **Section 2: Scrape & Parse Website**\n\n**🧩 Nodes: Send Request | Extract HTML | Clean Data**\n📍 **Purpose:** Uses the Bright Data proxy to access the webpage, extract raw HTML content, and clean it up into a readable format (title, author, abstract, etc.).\n\n| 🧱 Node | ✅ New Name | 💡 Description |\n| --------------- | ------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| 🌐 HTTP Request | **Send Topic Request to Bright Data** | This sends a request to the Bright Data API using the topic you set earlier. It uses Bright Data’s network to safely load the actual website and return HTML content. |\n| 🧱 HTML Extract | **Extract Data from Webpage** | Parses the returned HTML to find relevant data like titles, authors, abstracts, and links. |\n| 🔣 Code | **Clean and Format Scraped Data** | A custom code block that organizes the messy data into neat records. For example: title → column A, abstract → column B, etc. |\n\n### 🧠 How it helps:\n\n* Makes web scraping safe and reliable by using proxies.\n* Converts unreadable HTML into structured information.\n* Beginner-friendly: No need to write a parser yourself.\n\n---\n\n## 🟢 **Section 3: Save to Google Sheets**\n\n**🧩 Node: Append to Google Sheets**\n📍 **Purpose:** Automatically sends the clean data into a Google Sheet for easy access, filtering, or sharing.\n\n| 🧱 Node | ✅ New Name | 💡 Description |\n| ---------------- | ------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------- |\n| 📄 Google Sheets | **Store Scraped Data in Spreadsheet** | Takes the structured output and appends it to the connected Google Sheet. Each result gets a row with title, author, abstract, etc. |\n\n### 🧠 How it helps:\n\n* No manual copy-pasting ever again!\n* Shareable and searchable format.\n* Updates automatically as you scrape more topics.\n\n---\n\n## ✅ What a Beginner Gains from This Workflow\n\n| 💡 Feature | 🚀 Benefit |\n| --------------------------- | --------------------------------------------------------------------------------- |\n| Topic-based input | You don’t need to find or understand complex URLs. Just type “AI” or “marketing.” |\n| Fully automated scraping | You don’t need to open browsers or inspect elements. |\n| Ready-to-use Google Sheet | The final data is clean and saved into a sheet you can use anywhere. |\n| Beautiful, modular workflow | Each step is visual, editable, and reusable without coding skills. |\n\n---\n\n## 🎯 Final Result:\n\nYou type a **topic** → Bright Data scrapes the web → It extracts content → Cleans it → Saves it into **Google Sheets**.\nEverything happens automatically. **No code. No hassle. Just data.**\n\n---\n\n"
},
"typeVersion": 1
},
{
"id": "a1a5e609-756a-4757-a026-1349cf388e61",
"name": "스티키 노트5",
"type": "n8n-nodes-base.stickyNote",
"position": [
400,
0
],
"parameters": {
"color": 7,
"width": 380,
"height": 240,
"content": "## I’ll receive a tiny commission if you join Bright Data through this link—thanks for fueling more free content!\n\n### https://get.brightdata.com/1tndi4600b25"
},
"typeVersion": 1
}
],
"active": false,
"pinData": {},
"settings": {
"executionOrder": "v1"
},
"versionId": "f931202a-3c22-495d-b775-71665bdf6c27",
"connections": {
"6e172db5-7483-4079-bf8a-785602526bdc": {
"main": [
[
{
"node": "e65d092a-6854-478c-b33e-2fc309f71ae8",
"type": "main",
"index": 0
}
]
]
},
"e65d092a-6854-478c-b33e-2fc309f71ae8": {
"main": [
[
{
"node": "211bae33-32c5-44e8-b306-a5e0d520a4a0",
"type": "main",
"index": 0
}
]
]
},
"7d81edf3-6f00-4634-b79f-dbda3f9958e5": {
"main": [
[
{
"node": "6e172db5-7483-4079-bf8a-785602526bdc",
"type": "main",
"index": 0
}
]
]
},
"9ab7ba20-8614-46c5-b57a-3749d6ae04c4": {
"main": [
[
{
"node": "a246f20c-2bb9-4319-8812-e296c87a7df0",
"type": "main",
"index": 0
}
]
]
},
"211bae33-32c5-44e8-b306-a5e0d520a4a0": {
"main": [
[
{
"node": "9ab7ba20-8614-46c5-b57a-3749d6ae04c4",
"type": "main",
"index": 0
}
]
]
}
}
}자주 묻는 질문
이 워크플로우를 어떻게 사용하나요?
위의 JSON 구성 코드를 복사하여 n8n 인스턴스에서 새 워크플로우를 생성하고 "JSON에서 가져오기"를 선택한 후, 구성을 붙여넣고 필요에 따라 인증 설정을 수정하세요.
이 워크플로우는 어떤 시나리오에 적합한가요?
중급 - 인공지능
유료인가요?
이 워크플로우는 완전히 무료이며 직접 가져와 사용할 수 있습니다. 다만, 워크플로우에서 사용하는 타사 서비스(예: OpenAI API)는 사용자 직접 비용을 지불해야 할 수 있습니다.
관련 워크플로우 추천
자동화된 포럼 모니터링_via_Bright_data
Bright Data와 n8n을 사용하여 포럼 모니터링 자동화
Set
Code
Html
+
Set
Code
Html
17 노드Yaron Been
인공지능
Bright Data를 사용하여 예정된 이벤트 스크래핑
사용 Bright Data 및 n8n 의자동화事件发现
Code
Html
Http Request
+
Code
Html
Http Request
11 노드Yaron Been
인공지능
AI YouTube 분석 어시스턴트: 댓글 분석 및 인사이트 리포트
AI YouTube 분석 어시스턴트: 댓글 분석기 및 인사이트 리포트 생성기
If
Set
Code
+
If
Set
Code
19 노드Yaron Been
인공지능
Bright Data와 LLMs로 대규모 초개인화 아웃리치 자동화
Bright Data 및 대규모 언어 모델을 통한 대규모 초개인화 아웃리치 자동화
If
Set
Wait
+
If
Set
Wait
21 노드Yaron Been
영업
Bright_data를 통한 자동화된 소셜 미디어 헤드라인 발행기
Bright Data 및 n8n을 사용한 소셜 미디어 제목 자동화
Set
Html
Twitter
+
Set
Html
Twitter
16 노드Yaron Been
인공지능
Bright Data를 통한 경쟁사 가격 모니터링
Bright Data와 n8n을 사용한 자동 경쟁사 가격 모니터링
If
Code
Html
+
If
Code
Html
15 노드Yaron Been
인공지능
워크플로우 정보
난이도
중급
노드 수12
카테고리1
노드 유형7
저자
Yaron Been
@yaron-nofluffBuilding AI Agents and Automations | Growth Marketer | Entrepreneur | Book Author & Podcast Host If you need any help with Automations, feel free to reach out via linkedin: https://www.linkedin.com/in/yaronbeen/ And check out my Youtube channel: https://www.youtube.com/@YaronBeen/videos
외부 링크
n8n.io에서 보기 →
이 워크플로우 공유