研究論文スパイダーをGoogleスプシへ
中級
これはAI分野の自動化ワークフローで、12個のノードを含みます。主にSet, Code, Html, HttpRequest, GoogleSheetsなどのノードを使用、AI技術を活用したスマート自動化を実現。 Bright Dataとn8nを使った研究論文収集の自動化
前提条件
- •ターゲットAPIの認証情報が必要な場合あり
- •Google Sheets API認証情報
カテゴリー
ワークフロープレビュー
ノード接続関係を可視化、ズームとパンをサポート
ワークフローをエクスポート
以下のJSON設定をn8nにインポートして、このワークフローを使用できます
{
"id": "giq3zqaP4QbY6LgC",
"meta": {
"instanceId": "60046904b104f0f72b2629a9d88fe9f676be4035769f1f08dad1dd38a76b9480"
},
"name": "Research_Paper_Scraper_to_Google_Sheets",
"tags": [],
"nodes": [
{
"id": "7d81edf3-6f00-4634-b79f-dbda3f9958e5",
"name": "スクレイピング開始(手動トリガー)",
"type": "n8n-nodes-base.manualTrigger",
"position": [
-1080,
580
],
"parameters": {},
"typeVersion": 1
},
{
"id": "6e172db5-7483-4079-bf8a-785602526bdc",
"name": "研究トピックの設定",
"type": "n8n-nodes-base.set",
"position": [
-860,
580
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "b530a847-0bb2-4039-9ad0-cbc9cc4d909e",
"name": "Topic",
"type": "string",
"value": "machine+learning"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "e65d092a-6854-478c-b33e-2fc309f71ae8",
"name": "Bright Data APIへのリクエスト送信",
"type": "n8n-nodes-base.httpRequest",
"position": [
-600,
580
],
"parameters": {
"url": "https://api.brightdata.com/request",
"method": "POST",
"options": {},
"sendBody": true,
"sendHeaders": true,
"bodyParameters": {
"parameters": [
{
"name": "zone",
"value": "n8n_unblocker"
},
{
"name": "url",
"value": "=https://scholar.google.com/scholar?q={{ $json.Topic }}"
},
{
"name": "country",
"value": "us"
},
{
"name": "format",
"value": "raw"
}
]
},
"headerParameters": {
"parameters": [
{
"name": "Authorization",
"value": "Bearer 40127ac3c2b4861572c8ad4c6d2273a0ce0472cb3ea7d3ac85a74a34629067aa"
}
]
}
},
"typeVersion": 4.2
},
{
"id": "211bae33-32c5-44e8-b306-a5e0d520a4a0",
"name": "HTMLからのデータ抽出(タイトル、著者など)",
"type": "n8n-nodes-base.html",
"position": [
-400,
580
],
"parameters": {
"options": {},
"operation": "extractHtmlContent",
"extractionValues": {
"values": [
{
"key": "Title",
"cssSelector": "h3.gs_rt, a.gs_rt",
"returnArray": true
},
{
"key": "Author",
"cssSelector": "div.gs_a",
"returnArray": true
},
{
"key": "Abstract",
"cssSelector": "div.gs_rs",
"returnArray": true
},
{
"key": "PDF Link\t",
"cssSelector": "a[href*='pdf']",
"returnArray": true,
"returnValue": "attribute"
}
]
}
},
"typeVersion": 1.2
},
{
"id": "9ab7ba20-8614-46c5-b57a-3749d6ae04c4",
"name": "抽出データのクリーンアップと構造化",
"type": "n8n-nodes-base.code",
"position": [
-200,
580
],
"parameters": {
"jsCode": "const titles = items[0].json.Title || [];\nconst authors = items[0].json.Author || [];\nconst abstracts = items[0].json.Abstract || [];\nconst pdfLinks = items[0].json[\"PDF Link\\t\"] || [];\n\nconst output = [];\n\nfor (let i = 0; i < titles.length; i++) {\n // Clean title (remove tags like [PDF][B])\n let title = titles[i].replace(/\\[.*?\\]/g, '').trim();\n\n // Clean author (remove any trailing dashes or HTML leftovers)\n let author = authors[i] ? authors[i].replace(/\\s*-\\s*.*/, '').trim() : '';\n\n // Abstract fallback\n let abstract = abstracts[i] || '';\n\n // Get PDF link — from either a single object or array of duplicates\n let linkObj = pdfLinks[i];\n let pdfLink = '';\n\n if (Array.isArray(linkObj)) {\n // If multiple objects per item\n pdfLink = linkObj.find(obj => obj.href)?.href || '';\n } else if (linkObj?.href) {\n pdfLink = linkObj.href;\n }\n\n // Push cleaned object\n output.push({\n json: {\n title,\n author,\n abstract,\n pdfLink\n }\n });\n}\n\nreturn output;\n"
},
"typeVersion": 2
},
{
"id": "a246f20c-2bb9-4319-8812-e296c87a7df0",
"name": "結果をGoogle Sheetに保存",
"type": "n8n-nodes-base.googleSheets",
"position": [
120,
580
],
"parameters": {
"columns": {
"value": {
"Topic": "={{ $('Set Research topic').item.json.Topic }}",
"title": "={{ $json.title }}",
"author": "={{ $json.author }}",
"abstract": "={{ $json.abstract }}",
"pdf link": "={{ $json.pdfLink }}"
},
"schema": [
{
"id": "Topic",
"type": "string",
"display": true,
"required": false,
"displayName": "Topic",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "title",
"type": "string",
"display": true,
"required": false,
"displayName": "title",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "author",
"type": "string",
"display": true,
"required": false,
"displayName": "author",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "abstract",
"type": "string",
"display": true,
"required": false,
"displayName": "abstract",
"defaultMatch": false,
"canBeUsedToMatch": true
},
{
"id": "pdf link",
"type": "string",
"display": true,
"required": false,
"displayName": "pdf link",
"defaultMatch": false,
"canBeUsedToMatch": true
}
],
"mappingMode": "defineBelow",
"matchingColumns": [],
"attemptToConvertTypes": false,
"convertFieldsToString": false
},
"options": {},
"operation": "append",
"sheetName": {
"__rl": true,
"mode": "list",
"value": "gid=0",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/1sOfCFsvHS9-BeE_PQ6J_jtQofCRcOv02XS7hrmFmpxQ/edit#gid=0",
"cachedResultName": "Sheet1"
},
"documentId": {
"__rl": true,
"mode": "list",
"value": "1sOfCFsvHS9-BeE_PQ6J_jtQofCRcOv02XS7hrmFmpxQ",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/1sOfCFsvHS9-BeE_PQ6J_jtQofCRcOv02XS7hrmFmpxQ/edit?usp=drivesdk",
"cachedResultName": "Research papers from Google Scholar"
}
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "r2mDaisH6e9VkwHl",
"name": "Google Sheets account"
}
},
"typeVersion": 4.6
},
{
"id": "1b4a1504-4a4a-4a0d-892b-d0c3e205ed85",
"name": "付箋",
"type": "n8n-nodes-base.stickyNote",
"position": [
-1140,
60
],
"parameters": {
"color": 5,
"width": 420,
"height": 720,
"content": "## 🔹 **Section 1: User Input & Trigger**\n\n**🧩 Nodes: Start Scraping | Set Topic**\n📍 **Purpose:** Let users easily input the topic they want to scrape — no need to deal with complex URLs.\n\n| 🧱 Node | ✅ New Name | 💡 Description |\n| --------- | ---------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| ⚡ Trigger | **Start Scraping (Manual)** | This node starts the workflow when you click “Execute Workflow.” It's the entry point. |\n| ✏️ Set | **Set Topic (Manual Input)** | Instead of requiring a URL, the user will enter a topic (like \"machine learning\" or \"digital marketing\"). This topic will be used to automatically generate the URL behind the scenes. |\n\n### 🧠 How it helps:\n\n* Great for beginners: Just type the topic, hit run.\n* Keeps the interface clean and user-friendly.\n* Avoids confusion around URLs and formats.\n\n---\n\n"
},
"typeVersion": 1
},
{
"id": "bc56f528-6d18-4e05-942f-c06bb6e10b27",
"name": "付箋1",
"type": "n8n-nodes-base.stickyNote",
"position": [
-660,
80
],
"parameters": {
"color": 6,
"width": 600,
"height": 700,
"content": "## 🔸 **Section 2: Scrape & Parse Website**\n\n**🧩 Nodes: Send Request | Extract HTML | Clean Data**\n📍 **Purpose:** Uses the Bright Data proxy to access the webpage, extract raw HTML content, and clean it up into a readable format (title, author, abstract, etc.).\n\n| 🧱 Node | ✅ New Name | 💡 Description |\n| --------------- | ------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| 🌐 HTTP Request | **Send Topic Request to Bright Data** | This sends a request to the Bright Data API using the topic you set earlier. It uses Bright Data’s network to safely load the actual website and return HTML content. |\n| 🧱 HTML Extract | **Extract Data from Webpage** | Parses the returned HTML to find relevant data like titles, authors, abstracts, and links. |\n| 🔣 Code | **Clean and Format Scraped Data** | A custom code block that organizes the messy data into neat records. For example: title → column A, abstract → column B, etc. |\n\n### 🧠 How it helps:\n\n* Makes web scraping safe and reliable by using proxies.\n* Converts unreadable HTML into structured information.\n* Beginner-friendly: No need to write a parser yourself.\n\n---\n\n"
},
"typeVersion": 1
},
{
"id": "2c54e5e6-011a-4562-98ac-9cc9834bc284",
"name": "付箋2",
"type": "n8n-nodes-base.stickyNote",
"position": [
0,
0
],
"parameters": {
"color": 3,
"width": 340,
"height": 780,
"content": "## 🟢 **Section 3: Save to Google Sheets**\n\n**🧩 Node: Append to Google Sheets**\n📍 **Purpose:** Automatically sends the clean data into a Google Sheet for easy access, filtering, or sharing.\n\n| 🧱 Node | ✅ New Name | 💡 Description |\n| ---------------- | ------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------- |\n| 📄 Google Sheets | **Store Scraped Data in Spreadsheet** | Takes the structured output and appends it to the connected Google Sheet. Each result gets a row with title, author, abstract, etc. |\n\n### 🧠 How it helps:\n\n* No manual copy-pasting ever again!\n* Shareable and searchable format.\n* Updates automatically as you scrape more topics.\n\n---\n\n"
},
"typeVersion": 1
},
{
"id": "4ce90703-961e-4070-9356-c9dffc23a6c5",
"name": "付箋9",
"type": "n8n-nodes-base.stickyNote",
"position": [
-2980,
80
],
"parameters": {
"color": 4,
"width": 1300,
"height": 320,
"content": "=======================================\n WORKFLOW ASSISTANCE\n=======================================\nFor any questions or support, please contact:\n Yaron@nofluff.online\n\nExplore more tips and tutorials here:\n - YouTube: https://www.youtube.com/@YaronBeen/videos\n - LinkedIn: https://www.linkedin.com/in/yaronbeen/\n=======================================\n"
},
"typeVersion": 1
},
{
"id": "069ddb89-f7a1-4c4b-b65d-212be3252750",
"name": "付箋4",
"type": "n8n-nodes-base.stickyNote",
"position": [
-2980,
420
],
"parameters": {
"color": 4,
"width": 1289,
"height": 1878,
"content": "## 🌟 Research Paper Scraper to Google Sheets\n\n**Automate extraction of data from any website based on a topic — no coding needed!**\n\n---\n\n## 🔹 **Section 1: User Input & Trigger**\n\n**🧩 Nodes: Start Scraping | Set Topic**\n📍 **Purpose:** Let users easily input the topic they want to scrape — no need to deal with complex URLs.\n\n| 🧱 Node | ✅ New Name | 💡 Description |\n| --------- | ---------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| ⚡ Trigger | **Start Scraping (Manual)** | This node starts the workflow when you click “Execute Workflow.” It's the entry point. |\n| ✏️ Set | **Set Topic (Manual Input)** | Instead of requiring a URL, the user will enter a topic (like \"machine learning\" or \"digital marketing\"). This topic will be used to automatically generate the URL behind the scenes. |\n\n### 🧠 How it helps:\n\n* Great for beginners: Just type the topic, hit run.\n* Keeps the interface clean and user-friendly.\n* Avoids confusion around URLs and formats.\n\n---\n\n## 🔸 **Section 2: Scrape & Parse Website**\n\n**🧩 Nodes: Send Request | Extract HTML | Clean Data**\n📍 **Purpose:** Uses the Bright Data proxy to access the webpage, extract raw HTML content, and clean it up into a readable format (title, author, abstract, etc.).\n\n| 🧱 Node | ✅ New Name | 💡 Description |\n| --------------- | ------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| 🌐 HTTP Request | **Send Topic Request to Bright Data** | This sends a request to the Bright Data API using the topic you set earlier. It uses Bright Data’s network to safely load the actual website and return HTML content. |\n| 🧱 HTML Extract | **Extract Data from Webpage** | Parses the returned HTML to find relevant data like titles, authors, abstracts, and links. |\n| 🔣 Code | **Clean and Format Scraped Data** | A custom code block that organizes the messy data into neat records. For example: title → column A, abstract → column B, etc. |\n\n### 🧠 How it helps:\n\n* Makes web scraping safe and reliable by using proxies.\n* Converts unreadable HTML into structured information.\n* Beginner-friendly: No need to write a parser yourself.\n\n---\n\n## 🟢 **Section 3: Save to Google Sheets**\n\n**🧩 Node: Append to Google Sheets**\n📍 **Purpose:** Automatically sends the clean data into a Google Sheet for easy access, filtering, or sharing.\n\n| 🧱 Node | ✅ New Name | 💡 Description |\n| ---------------- | ------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------- |\n| 📄 Google Sheets | **Store Scraped Data in Spreadsheet** | Takes the structured output and appends it to the connected Google Sheet. Each result gets a row with title, author, abstract, etc. |\n\n### 🧠 How it helps:\n\n* No manual copy-pasting ever again!\n* Shareable and searchable format.\n* Updates automatically as you scrape more topics.\n\n---\n\n## ✅ What a Beginner Gains from This Workflow\n\n| 💡 Feature | 🚀 Benefit |\n| --------------------------- | --------------------------------------------------------------------------------- |\n| Topic-based input | You don’t need to find or understand complex URLs. Just type “AI” or “marketing.” |\n| Fully automated scraping | You don’t need to open browsers or inspect elements. |\n| Ready-to-use Google Sheet | The final data is clean and saved into a sheet you can use anywhere. |\n| Beautiful, modular workflow | Each step is visual, editable, and reusable without coding skills. |\n\n---\n\n## 🎯 Final Result:\n\nYou type a **topic** → Bright Data scrapes the web → It extracts content → Cleans it → Saves it into **Google Sheets**.\nEverything happens automatically. **No code. No hassle. Just data.**\n\n---\n\n"
},
"typeVersion": 1
},
{
"id": "a1a5e609-756a-4757-a026-1349cf388e61",
"name": "付箋5",
"type": "n8n-nodes-base.stickyNote",
"position": [
400,
0
],
"parameters": {
"color": 7,
"width": 380,
"height": 240,
"content": "## I’ll receive a tiny commission if you join Bright Data through this link—thanks for fueling more free content!\n\n### https://get.brightdata.com/1tndi4600b25"
},
"typeVersion": 1
}
],
"active": false,
"pinData": {},
"settings": {
"executionOrder": "v1"
},
"versionId": "f931202a-3c22-495d-b775-71665bdf6c27",
"connections": {
"6e172db5-7483-4079-bf8a-785602526bdc": {
"main": [
[
{
"node": "e65d092a-6854-478c-b33e-2fc309f71ae8",
"type": "main",
"index": 0
}
]
]
},
"e65d092a-6854-478c-b33e-2fc309f71ae8": {
"main": [
[
{
"node": "211bae33-32c5-44e8-b306-a5e0d520a4a0",
"type": "main",
"index": 0
}
]
]
},
"7d81edf3-6f00-4634-b79f-dbda3f9958e5": {
"main": [
[
{
"node": "6e172db5-7483-4079-bf8a-785602526bdc",
"type": "main",
"index": 0
}
]
]
},
"9ab7ba20-8614-46c5-b57a-3749d6ae04c4": {
"main": [
[
{
"node": "a246f20c-2bb9-4319-8812-e296c87a7df0",
"type": "main",
"index": 0
}
]
]
},
"211bae33-32c5-44e8-b306-a5e0d520a4a0": {
"main": [
[
{
"node": "9ab7ba20-8614-46c5-b57a-3749d6ae04c4",
"type": "main",
"index": 0
}
]
]
}
}
}よくある質問
このワークフローの使い方は?
上記のJSON設定コードをコピーし、n8nインスタンスで新しいワークフローを作成して「JSONからインポート」を選択、設定を貼り付けて認証情報を必要に応じて変更してください。
このワークフローはどんな場面に適していますか?
中級 - 人工知能
有料ですか?
このワークフローは完全無料です。ただし、ワークフローで使用するサードパーティサービス(OpenAI APIなど)は別途料金が発生する場合があります。
関連ワークフロー
Bright Dataを使った自動化フォーラムモニタリング
Bright Dataとn8nを使ったフォーラム監視の自動化
Set
Code
Html
+
Set
Code
Html
17 ノードYaron Been
人工知能
Bright Dataを使って起こるべきイベントをスクレイピング
Bright Dataとn8nによる自動イベント発見
Code
Html
Http Request
+
Code
Html
Http Request
11 ノードYaron Been
人工知能
AI YouTube分析アシスタント:コメント分析とインサイトレポート
AI YouTube分析アシスタント:コメント分析ツールとインサイトレポート生成ツール
If
Set
Code
+
If
Set
Code
19 ノードYaron Been
人工知能
大規模超パーソナライズドアウトーチをBright DataとLLMで自動化
Bright Dataと大規模言語モデルを通じた大規模超パーソナライズドアッティングの自動化
If
Set
Wait
+
If
Set
Wait
21 ノードYaron Been
営業
Bright Dataを活用した自動ソーシャルメディアタイトル投稿ツール
Bright Dataとn8nを使ってソーシャルメディアのタイトルを自動化する
Set
Html
Twitter
+
Set
Html
Twitter
16 ノードYaron Been
人工知能
Bright Dataを活用した競合価格監視
Bright Dataとn8nを使用した自動のな競合価格モニタリング
If
Code
Html
+
If
Code
Html
15 ノードYaron Been
人工知能
ワークフロー情報
難易度
中級
ノード数12
カテゴリー1
ノードタイプ7
作成者
Yaron Been
@yaron-nofluffBuilding AI Agents and Automations | Growth Marketer | Entrepreneur | Book Author & Podcast Host If you need any help with Automations, feel free to reach out via linkedin: https://www.linkedin.com/in/yaronbeen/ And check out my Youtube channel: https://www.youtube.com/@YaronBeen/videos
外部リンク
n8n.ioで表示 →
このワークフローを共有