8
n8n 中文网amn8n.com

使用Decodo、Gemini AI和Google Sheets抓取、结构化并存储新闻数据

高级

这是一个Market Research, AI Summarization领域的自动化工作流,包含 18 个节点。主要使用 Set, Wait, Crypto, SplitOut, GoogleSheets 等节点。 使用Decodo、Gemini AI和Google Sheets抓取、结构化并存储新闻数据

前置要求
  • Google Sheets API 凭证
  • Google Gemini API Key
工作流预览
可视化展示节点连接关系,支持缩放和平移
导出工作流
复制以下 JSON 配置到 n8n 导入,即可使用此工作流
{
  "meta": {
    "instanceId": "689fa22e68cd4198e4ae37f3cc44f498087edd235a867e22515be823bab694c7",
    "templateCredsSetupCompleted": true
  },
  "nodes": [
    {
      "id": "dad6ee4f-190c-40d6-b164-49dd6f4b820e",
      "name": "计划触发器",
      "type": "n8n-nodes-base.scheduleTrigger",
      "position": [
        960,
        -368
      ],
      "parameters": {
        "rule": {
          "interval": [
            {}
          ]
        }
      },
      "typeVersion": 1.2
    },
    {
      "id": "10ba30a4-f1e5-4567-9e64-fbe7b58a6219",
      "name": "分割论坛",
      "type": "n8n-nodes-base.splitOut",
      "position": [
        1408,
        -368
      ],
      "parameters": {
        "include": "selectedOtherFields",
        "options": {
          "destinationFieldName": "url"
        },
        "fieldToSplitOut": "forums",
        "fieldsToInclude": "geo"
      },
      "typeVersion": 1
    },
    {
      "id": "59a50a1d-ae14-4947-b27e-cfb29d44d7f3",
      "name": "迭代论坛",
      "type": "n8n-nodes-base.splitInBatches",
      "position": [
        1632,
        -368
      ],
      "parameters": {
        "options": {}
      },
      "typeVersion": 3
    },
    {
      "id": "f2cac076-1743-47be-a0d9-a0b70f477404",
      "name": "Google Gemini 模型",
      "type": "@n8n/n8n-nodes-langchain.lmChatGoogleGemini",
      "position": [
        2096,
        -272
      ],
      "parameters": {
        "options": {}
      },
      "credentials": {
        "googlePalmApi": {
          "id": "C4mfRdRQfQ524QK8",
          "name": "Johan Gemini"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "6ec03e78-3b6d-40f3-8351-2210dee705fb",
      "name": "提取结构化新闻数据",
      "type": "@n8n/n8n-nodes-langchain.chainLlm",
      "position": [
        2080,
        -496
      ],
      "parameters": {
        "text": "=",
        "batching": {},
        "messages": {
          "messageValues": [
            {
              "message": "=You are an intelligent data extraction model specialized in reading unstructured forum or news data and converting it into structured JSON format."
            },
            {
              "type": "HumanMessagePromptTemplate",
              "message": "=Your task is to extract a list of news posts from the provided scraped text. Each post should include:\n- title\n- url\n- source (domain name)\n- points (integer, can be null)\n- comments (integer, can be null)\n- author (string, can be null)\n- posted_at (string, e.g. or \"2025-10-18\")\n\nReturn the result strictly as a JSON array following this schema:\n[\n  {\n    \"title\": \"\",\n    \"url\": \"\",\n    \"source\": \"\",\n    \"points\": 0,\n    \"comments\": 0,\n    \"author\": \"\",\n    \"posted_at\": \"\"\n  }\n]\n\nRules:\n- Output JSON only — no explanations, comments, or markdown formatting.\n- Never include text outside the JSON array.\n- Ensure all keys exist even if null.\n- Determine the posted dates using {{ $now.format('yyyy-MM-dd HH:mm:ss') }} as reference."
            },
            {
              "type": "HumanMessagePromptTemplate",
              "message": "=Example scraped text:\n\"1. Claude Haiku 4.5 (anthropic.com/news/claude-haiku-4-5) - 617 points by adocomplete 2 hours ago | 228 comments\n2. Claude Haiku 4.5 System Card (anthropic.com/system-card.pdf) - 51 points by vinhnx 1 day ago | 1 comment\""
            },
            {
              "type": "AIMessagePromptTemplate",
              "message": "=[\n  {\n    \"title\": \"Claude Haiku 4.5\",\n    \"url\": \"https://www.anthropic.com/news/claude-haiku-4-5\",\n    \"source\": \"anthropic.com\",\n    \"points\": 617,\n    \"comments\": 228,\n    \"author\": \"adocomplete\",\n    \"posted_at\": \"2025-10-18\"\n  },\n  {\n    \"title\": \"Claude Haiku 4.5 System Card\",\n    \"url\": \"https://www.anthropic.com/system-card.pdf\",\n    \"source\": \"anthropic.com\",\n    \"points\": 51,\n    \"comments\": 1,\n    \"author\": \"vinhnx\",\n    \"posted_at\": \"2025-10-17\"\n  }\n]"
            },
            {
              "type": "HumanMessagePromptTemplate",
              "message": "=={{ $json.data.results.first().content }}"
            }
          ]
        },
        "promptType": "define",
        "hasOutputParser": true
      },
      "typeVersion": 1.7
    },
    {
      "id": "3325da16-bc04-4c47-b2d7-dd35ab706a34",
      "name": "解析 JSON 输出",
      "type": "@n8n/n8n-nodes-langchain.outputParserStructured",
      "position": [
        2224,
        -272
      ],
      "parameters": {
        "jsonSchemaExample": "[\n  {\n    \"title\": \"Claude Haiku 4.5\",\n    \"url\": \"https://www.anthropic.com/news/claude-haiku-4-5\",\n    \"source\": \"anthropic.com\",\n    \"points\": 617,\n    \"comments\": 228,\n    \"author\": \"adocomplete\",\n    \"posted_at\": \"2025-10-18\"\n  },\n  {\n    \"title\": \"Claude Haiku 4.5 System Card\",\n    \"url\": \"https://www.anthropic.com/system-card.pdf\",\n    \"source\": \"anthropic.com\",\n    \"points\": 51,\n    \"comments\": 1,\n    \"author\": \"vinhnx\",\n    \"posted_at\": \"2025-10-17\"\n  }\n]"
      },
      "typeVersion": 1.3
    },
    {
      "id": "e54df6e4-4498-496a-9038-baf64da0bf8a",
      "name": "分割新闻项",
      "type": "n8n-nodes-base.splitOut",
      "position": [
        2432,
        -640
      ],
      "parameters": {
        "options": {},
        "fieldToSplitOut": "output"
      },
      "typeVersion": 1
    },
    {
      "id": "ef9511a9-b1c6-4613-9131-35add8ba953c",
      "name": "生成唯一键",
      "type": "n8n-nodes-base.crypto",
      "position": [
        2656,
        -640
      ],
      "parameters": {
        "value": "={{ `${$json.url}+${$json.author}` }}",
        "dataPropertyName": "key"
      },
      "executeOnce": false,
      "typeVersion": 1
    },
    {
      "id": "a170da82-089c-461d-84db-4dcf445c5927",
      "name": "在抓取之间等待",
      "type": "n8n-nodes-base.wait",
      "position": [
        2656,
        -272
      ],
      "webhookId": "4c658f26-ded6-4162-bb08-0fa0db99a667",
      "parameters": {
        "unit": "minutes",
        "amount": 1
      },
      "executeOnce": false,
      "typeVersion": 1.1
    },
    {
      "id": "40ae6657-16a4-4c7e-b471-fdbab870e69e",
      "name": "便签",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        0,
        -752
      ],
      "parameters": {
        "width": 864,
        "height": 848,
        "content": "![Waha Johan](https://drive.google.com/thumbnail?id=1SHtHQ7h1pflq_L_obGfBK29wGUY16-Vg&sz=w2000)"
      },
      "typeVersion": 1
    },
    {
      "id": "9e816c1f-15dc-4bbd-baed-f19f2c8e2470",
      "name": "更新 Google Sheet(新闻)",
      "type": "n8n-nodes-base.googleSheets",
      "position": [
        2880,
        -640
      ],
      "parameters": {
        "columns": {
          "value": {
            "key": "={{ $json.key }}",
            "url": "={{ $json.url }}",
            "title": "={{ $json.title }}",
            "author": "={{ $json.author }}",
            "points": "={{ $json.points }}",
            "source": "={{ $json.source }}",
            "comments": "={{ $json.comments }}",
            "posted_at": "={{ $json.posted_at }}",
            "last_updated": "={{ $now }}"
          },
          "schema": [
            {
              "id": "key",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "key",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "title",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "title",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "url",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "url",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "source",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "source",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "points",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "points",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "comments",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "comments",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "author",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "author",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "posted_at",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "posted_at",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "last_updated",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "last_updated",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            }
          ],
          "mappingMode": "defineBelow",
          "matchingColumns": [
            "key"
          ],
          "attemptToConvertTypes": false,
          "convertFieldsToString": false
        },
        "options": {},
        "operation": "appendOrUpdate",
        "sheetName": {
          "__rl": true,
          "mode": "list",
          "value": 800242193,
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/1ZT7TGHRidqMlQpx-mCcqzUMdg_dzidPCZxNrabKJrhM/edit#gid=800242193",
          "cachedResultName": "News"
        },
        "documentId": {
          "__rl": true,
          "mode": "id",
          "value": "={{ $('Workflow Config').item.json.sheet_id }}"
        }
      },
      "credentials": {
        "googleSheetsOAuth2Api": {
          "id": "GBiqmzOXHpo7XcLf",
          "name": "Johan Sheets"
        }
      },
      "typeVersion": 4.7
    },
    {
      "id": "534a4190-017a-44c1-a909-e1e246f2696b",
      "name": "工作流配置",
      "type": "n8n-nodes-base.set",
      "position": [
        1184,
        -368
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "3896038b-41c9-496f-af43-dee58ca1ee4f",
              "name": "forums",
              "type": "array",
              "value": "={{[\n  \"https://news.ycombinator.com/from?site=openai.com\",\n  \"https://news.ycombinator.com/from?site=anthropic.com\",\n]}}"
            },
            {
              "id": "371bf5a1-4377-4f15-9521-f240efd7db47",
              "name": "geo",
              "type": "string",
              "value": "United States"
            },
            {
              "id": "232246ee-aebd-4b4c-abc3-15818c2830d7",
              "name": "sheet_id",
              "type": "string",
              "value": "={{ YOUR_SHEET_ID }}"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "0b9f0b9b-c682-4c78-80a4-2e2873cf21de",
      "name": "便签2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1136,
        -464
      ],
      "parameters": {
        "color": 5,
        "width": 192,
        "height": 240,
        "content": "### 指定论坛 URL、地理位置和 Sheet ID"
      },
      "typeVersion": 1
    },
    {
      "id": "a0abdc20-c173-4561-9ad4-9c83e6b50e5b",
      "name": "便签7",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        2832,
        -736
      ],
      "parameters": {
        "color": 5,
        "width": 192,
        "height": 272,
        "content": "### 确保您的表格选项卡匹配列架构"
      },
      "typeVersion": 1
    },
    {
      "id": "630ddec3-b370-4c23-b306-21e8c63cf989",
      "name": "便签3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        912,
        -464
      ],
      "parameters": {
        "color": 5,
        "width": 192,
        "height": 240,
        "content": "### 调整计划(例如,每天午夜)"
      },
      "typeVersion": 1
    },
    {
      "id": "490d8405-a01d-48a4-ae8e-f213dcba5128",
      "name": "抓取论坛数据",
      "type": "@decodo/n8n-nodes-decodo.decodo",
      "position": [
        1856,
        -496
      ],
      "parameters": {
        "geo": "={{ $json.geo }}",
        "url": "={{ $json.url }}"
      },
      "credentials": {
        "decodoApi": {
          "id": "kfy3Vs5AgxfDGzpC",
          "name": "Johan Decodo"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "dc69f29f-5553-4edd-a730-4786fb8a9302",
      "name": "便签8",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        2384,
        -496
      ],
      "parameters": {
        "color": 5,
        "width": 192,
        "height": 272,
        "content": "### 确保您的表格选项卡匹配列架构"
      },
      "typeVersion": 1
    },
    {
      "id": "4a9901a4-1487-4eb2-8ce5-351f62749f12",
      "name": "记录抓取结果",
      "type": "n8n-nodes-base.googleSheets",
      "position": [
        2432,
        -400
      ],
      "parameters": {
        "columns": {
          "value": {
            "geo": "={{ $('Iterate Forums').item.json.geo }}",
            "forum": "={{ $('Iterate Forums').item.json.url }}",
            "news_count": "={{ $json.output.length }}",
            "scraped_at": "={{ $now }}"
          },
          "schema": [
            {
              "id": "forum",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "forum",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "geo",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "geo",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "news_count",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "news_count",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "scraped_at",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "scraped_at",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            }
          ],
          "mappingMode": "defineBelow",
          "matchingColumns": [],
          "attemptToConvertTypes": false,
          "convertFieldsToString": false
        },
        "options": {},
        "operation": "append",
        "sheetName": {
          "__rl": true,
          "mode": "list",
          "value": "gid=0",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/1ZT7TGHRidqMlQpx-mCcqzUMdg_dzidPCZxNrabKJrhM/edit#gid=0",
          "cachedResultName": "Logs"
        },
        "documentId": {
          "__rl": true,
          "mode": "id",
          "value": "={{ $('Workflow Config').item.json.sheet_id }}"
        }
      },
      "credentials": {
        "googleSheetsOAuth2Api": {
          "id": "GBiqmzOXHpo7XcLf",
          "name": "Johan Sheets"
        }
      },
      "executeOnce": false,
      "typeVersion": 4.7
    }
  ],
  "pinData": {},
  "connections": {
    "Split Forums": {
      "main": [
        [
          {
            "node": "Iterate Forums",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Iterate Forums": {
      "main": [
        [],
        [
          {
            "node": "Scrape Forum Data",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Workflow Config": {
      "main": [
        [
          {
            "node": "Split Forums",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Schedule Trigger": {
      "main": [
        [
          {
            "node": "Workflow Config",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Split News Items": {
      "main": [
        [
          {
            "node": "Generate Unique Key",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Parse JSON Output": {
      "ai_outputParser": [
        [
          {
            "node": "Extract Structured News Data",
            "type": "ai_outputParser",
            "index": 0
          }
        ]
      ]
    },
    "Scrape Forum Data": {
      "main": [
        [
          {
            "node": "Extract Structured News Data",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Log Scrape Results": {
      "main": [
        [
          {
            "node": "Wait Between Scrapes",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Generate Unique Key": {
      "main": [
        [
          {
            "node": "Update Google Sheet (News)",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Google Gemini Model": {
      "ai_languageModel": [
        [
          {
            "node": "Extract Structured News Data",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "Wait Between Scrapes": {
      "main": [
        [
          {
            "node": "Iterate Forums",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Extract Structured News Data": {
      "main": [
        [
          {
            "node": "Split News Items",
            "type": "main",
            "index": 0
          },
          {
            "node": "Log Scrape Results",
            "type": "main",
            "index": 0
          }
        ],
        []
      ]
    }
  }
}
常见问题

如何使用这个工作流?

复制上方的 JSON 配置代码,在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」,粘贴配置后根据需要修改凭证设置即可。

这个工作流适合什么场景?

高级 - 市场调研, AI 摘要总结

需要付费吗?

本工作流完全免费,您可以直接导入使用。但请注意,工作流中使用的第三方服务(如 OpenAI API)可能需要您自行付费。

工作流信息
难度等级
高级
节点数量18
分类2
节点类型12
难度说明

适合高级用户,包含 16+ 个节点的复杂工作流

作者
Fahmi Fahreza

Fahmi Fahreza

@fahmiiireza

AI Automation Developer

外部链接
在 n8n.io 查看

分享此工作流