Bright DataとGoogle Geminiで構造化スクレイピング

上級

これはContent Creation, Multimodal AI分野の自動化ワークフローで、18個のノードを含みます。主にSet, Function, HttpRequest, ManualTrigger, ReadWriteFileなどのノードを使用。 Bright DataとGoogle Geminiを使ってウェブデータを抽出・分析

前提条件
  • ターゲットAPIの認証情報が必要な場合あり
  • Google Gemini API Key
ワークフロープレビュー
ノード接続関係を可視化、ズームとパンをサポート
ワークフローをエクスポート
以下のJSON設定をn8nにインポートして、このワークフローを使用できます
{
  "id": "1GOrjyc9mtZCMvCr",
  "meta": {
    "instanceId": "885b4fb4a6a9c2cb5621429a7b972df0d05bb724c20ac7dac7171b62f1c7ef40",
    "templateCredsSetupCompleted": true
  },
  "name": "Structured Data Extract, Data Mining with Bright Data & Google Gemini",
  "tags": [
    {
      "id": "Kujft2FOjmOVQAmJ",
      "name": "Engineering",
      "createdAt": "2025-04-09T01:31:00.558Z",
      "updatedAt": "2025-04-09T01:31:00.558Z"
    },
    {
      "id": "ddPkw7Hg5dZhQu2w",
      "name": "AI",
      "createdAt": "2025-04-13T05:38:08.053Z",
      "updatedAt": "2025-04-13T05:38:08.053Z"
    }
  ],
  "nodes": [
    {
      "id": "1e9038e6-9ebc-4460-bee2-3faea3b38f4c",
      "name": "ワークフローをテストクリック時",
      "type": "n8n-nodes-base.manualTrigger",
      "position": [
        200,
        -420
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "fd4ace46-7261-4380-8b65-1e00bb574f27",
      "name": "付箋ノート",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        200,
        -780
      ],
      "parameters": {
        "width": 400,
        "height": 300,
        "content": "## Note\n\nThis workflow deals with the structured data extraction by utilizing Bright Data Web Unlocker Product.\n\nThe Basic LLM Chain, Information Extraction, are being used to demonstrate the usage of the N8N AI capabilities.\n\n**Please make sure to set the web URL of your interest within the \"Set URL and Bright Data Zone\" node and update the Webhook Notification URL**"
      },
      "typeVersion": 1
    },
    {
      "id": "1c1dd10f-beb2-4cc7-9118-77efd3172651",
      "name": "付箋ノート1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        620,
        -780
      ],
      "parameters": {
        "width": 480,
        "height": 300,
        "content": "## LLM Usages\n\nGoogle Gemini Flash Exp model is being used.\n\nBasic LLM Chain Data Extractor.\n\nInformation Extraction is being used for the handling the custom sentiment analysis with the structured response."
      },
      "typeVersion": 1
    },
    {
      "id": "9795ac80-6ded-465d-bfcf-0c6ce120452f",
      "name": "Markdownからテキストデータ抽出",
      "type": "@n8n/n8n-nodes-langchain.chainLlm",
      "position": [
        860,
        -420
      ],
      "parameters": {
        "text": "=You need to analyze the below markdown and convert to textual data. Please do not output with your own thoughts. Make sure to output with textual data only with no links, scripts, css etc.\n\n{{ $json.data }}",
        "messages": {
          "messageValues": [
            {
              "message": "You are a markdown expert"
            }
          ]
        },
        "promptType": "define"
      },
      "typeVersion": 1.6
    },
    {
      "id": "b6a8cc64-c0c7-40dc-b7c1-0571baf3a0a9",
      "name": "URLとBright Dataゾーンの設定",
      "type": "n8n-nodes-base.set",
      "position": [
        420,
        -420
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "3aedba66-f447-4d7a-93c0-8158c5e795f9",
              "name": "url",
              "type": "string",
              "value": "https://www.bbc.com/news/world"
            },
            {
              "id": "4e7ee31d-da89-422f-8079-2ff2d357a0ba",
              "name": "zone",
              "type": "string",
              "value": "web_unlocker1"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "8d15dca1-3014-405f-ac35-78d64eda1d07",
      "name": "Markdownからテキストデータ抽出のための Webhook 通知を開始",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        1314,
        -720
      ],
      "parameters": {
        "url": "https://webhook.site/3c36d7d1-de1b-4171-9fd3-643ea2e4dd76",
        "options": {},
        "sendBody": true,
        "bodyParameters": {
          "parameters": [
            {
              "name": "content",
              "value": "={{ $json.text }}"
            }
          ]
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "fff9e2d1-f3e2-47c3-8c3a-f9de8dbdee6a",
      "name": "AI感情分析のための Webhook 通知を開始",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        1612,
        80
      ],
      "parameters": {
        "url": "https://webhook.site/3c36d7d1-de1b-4171-9fd3-643ea2e4dd76",
        "options": {},
        "sendBody": true,
        "bodyParameters": {
          "parameters": [
            {
              "name": "summary",
              "value": "={{ $json.output }}"
            }
          ]
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "40c82a76-1710-4e57-8123-9c9fbc729110",
      "name": "データ抽出用 Google Gemini チャットモデル",
      "type": "@n8n/n8n-nodes-langchain.lmChatGoogleGemini",
      "position": [
        948,
        -200
      ],
      "parameters": {
        "options": {},
        "modelName": "models/gemini-2.0-flash-exp"
      },
      "credentials": {
        "googlePalmApi": {
          "id": "YeO7dHZnuGBVQKVZ",
          "name": "Google Gemini(PaLM) Api account"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "0c1da174-9b9c-4067-9b2c-fa0cc8c33dc8",
      "name": "感情分析用 Google Gemini チャットモデル",
      "type": "@n8n/n8n-nodes-langchain.lmChatGoogleGemini",
      "position": [
        1324,
        200
      ],
      "parameters": {
        "options": {},
        "modelName": "models/gemini-2.0-flash-exp"
      },
      "credentials": {
        "googlePalmApi": {
          "id": "YeO7dHZnuGBVQKVZ",
          "name": "Google Gemini(PaLM) Api account"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "7fae589c-854d-429e-9e67-527a002fcabf",
      "name": "Bright Dataウェブリクエストの実行",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        640,
        -420
      ],
      "parameters": {
        "url": "https://api.brightdata.com/request",
        "method": "POST",
        "options": {},
        "sendBody": true,
        "sendHeaders": true,
        "authentication": "genericCredentialType",
        "bodyParameters": {
          "parameters": [
            {
              "name": "zone",
              "value": "={{ $json.zone }}"
            },
            {
              "name": "url",
              "value": "={{ $json.url }}?product=unlocker&method=api"
            },
            {
              "name": "format",
              "value": "raw"
            },
            {
              "name": "data_format",
              "value": "markdown"
            }
          ]
        },
        "genericAuthType": "httpHeaderAuth",
        "headerParameters": {
          "parameters": [
            {}
          ]
        }
      },
      "credentials": {
        "httpHeaderAuth": {
          "id": "kdbqXuxIR8qIxF7y",
          "name": "Header Auth account"
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "e15fb9ba-ea8f-41f0-9b99-437d14a98a7d",
      "name": "構造化レスポンスを用いたトピック抽出",
      "type": "@n8n/n8n-nodes-langchain.informationExtractor",
      "position": [
        1236,
        -20
      ],
      "parameters": {
        "text": "=Perform the topic analysis on the below content and output with the structured information.\n\nHere's the content:\n\n{{ $('Perform Bright Data Web Request').item.json.data }}",
        "options": {
          "systemPromptTemplate": "You are an expert data analyst."
        },
        "schemaType": "manual",
        "inputSchema": "{\n  \"$schema\": \"http://json-schema.org/draft-07/schema#\",\n  \"title\": \"TopicModelingResponseArray\",\n  \"type\": \"array\",\n  \"items\": {\n    \"type\": \"object\",\n    \"properties\": {\n      \"topic\": {\n        \"type\": \"string\",\n        \"description\": \"The identified topic or theme derived from the input text.\"\n      },\n      \"score\": {\n        \"type\": \"number\",\n        \"minimum\": 0,\n        \"maximum\": 1,\n        \"description\": \"Confidence score representing how strongly this topic is reflected in the content.\"\n      },\n      \"summary\": {\n        \"type\": \"string\",\n        \"description\": \"Brief explanation of the topic’s context within the text.\"\n      },\n      \"keywords\": {\n        \"type\": \"array\",\n        \"description\": \"List of keywords associated with the topic.\",\n        \"items\": {\n          \"type\": \"string\"\n        }\n      }\n    },\n    \"required\": [\"topic\", \"score\", \"summary\", \"keywords\"],\n    \"additionalProperties\": false\n  }\n}\n"
      },
      "typeVersion": 1
    },
    {
      "id": "e7f2b2c5-89ba-45c4-b7a4-297a159f8b39",
      "name": "構造化レスポンスを用いた地域・カテゴリ別トレンド",
      "type": "@n8n/n8n-nodes-langchain.informationExtractor",
      "position": [
        1236,
        -520
      ],
      "parameters": {
        "text": "=Perform the data analysis on the below content and output with the structured information by clustering the emerging trends by location and category\n\nHere's the content:\n\n{{ $('Perform Bright Data Web Request').item.json.data }}",
        "options": {
          "systemPromptTemplate": "You are an expert data analyst."
        },
        "schemaType": "manual",
        "inputSchema": "{\n  \"$schema\": \"http://json-schema.org/draft-07/schema#\",\n  \"title\": \"EmergingTrendsClusteredByLocationAndCategory\",\n  \"type\": \"array\",\n  \"items\": {\n    \"type\": \"object\",\n    \"properties\": {\n      \"location\": {\n        \"type\": \"string\",\n        \"description\": \"Geographical region or city where the trend is observed.\"\n      },\n      \"category\": {\n        \"type\": \"string\",\n        \"description\": \"Domain or industry related to the trend (e.g., Technology, Finance, Healthcare).\"\n      },\n      \"trends\": {\n        \"type\": \"array\",\n        \"items\": {\n          \"type\": \"object\",\n          \"properties\": {\n            \"trend\": {\n              \"type\": \"string\",\n              \"description\": \"A concise label for the emerging trend.\"\n            },\n            \"score\": {\n              \"type\": \"number\",\n              \"minimum\": 0,\n              \"maximum\": 1,\n              \"description\": \"Confidence or prominence score of the trend.\"\n            },\n            \"summary\": {\n              \"type\": \"string\",\n              \"description\": \"Short explanation describing the context and impact of the trend.\"\n            },\n            \"mentions\": {\n              \"type\": \"array\",\n              \"items\": {\n                \"type\": \"string\"\n              },\n              \"description\": \"Keywords or phrases that commonly co-occur with the trend.\"\n            }\n          },\n          \"required\": [\"trend\", \"score\", \"summary\", \"mentions\"],\n          \"additionalProperties\": false\n        }\n      }\n    },\n    \"required\": [\"location\", \"category\", \"trends\"],\n    \"additionalProperties\": false\n  }\n}\n"
      },
      "typeVersion": 1
    },
    {
      "id": "92203e9f-cf13-435e-bf78-3c39a6e1e6f6",
      "name": "Google Gemini チャットモデル",
      "type": "@n8n/n8n-nodes-langchain.lmChatGoogleGemini",
      "position": [
        1324,
        -300
      ],
      "parameters": {
        "options": {},
        "modelName": "models/gemini-2.0-flash-exp"
      },
      "credentials": {
        "googlePalmApi": {
          "id": "YeO7dHZnuGBVQKVZ",
          "name": "Google Gemini(PaLM) Api account"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "1a252b74-6768-41a6-99dd-090e35c47065",
      "name": "地域・カテゴリ別トレンドのための Webhook 通知を開始",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        1612,
        -320
      ],
      "parameters": {
        "url": "https://webhook.site/3c36d7d1-de1b-4171-9fd3-643ea2e4dd76",
        "options": {},
        "sendBody": true,
        "bodyParameters": {
          "parameters": [
            {
              "name": "summary",
              "value": "={{ $json.output }}"
            }
          ]
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "c952ab41-66af-4b41-b04e-407816074c87",
      "name": "トピック用バイナリファイルの作成",
      "type": "n8n-nodes-base.function",
      "position": [
        1612,
        -120
      ],
      "parameters": {
        "functionCode": "items[0].binary = {\n  data: {\n    data: new Buffer(JSON.stringify(items[0].json, null, 2)).toString('base64')\n  }\n};\nreturn items;"
      },
      "typeVersion": 1
    },
    {
      "id": "2cf80339-0927-4f48-a13a-c610eaf4edca",
      "name": "トピックファイルをディスクに書き込み",
      "type": "n8n-nodes-base.readWriteFile",
      "position": [
        1820,
        -120
      ],
      "parameters": {
        "options": {},
        "fileName": "d:\\topics.json",
        "operation": "write"
      },
      "typeVersion": 1
    },
    {
      "id": "cf1da0ee-bb78-4ea7-bb2d-f2f82f728b12",
      "name": "トレンドファイルをディスクに書き込み",
      "type": "n8n-nodes-base.readWriteFile",
      "position": [
        1832,
        -520
      ],
      "parameters": {
        "options": {},
        "fileName": "d:\\trends.json",
        "operation": "write"
      },
      "typeVersion": 1
    },
    {
      "id": "d38ca005-6ba3-4105-9fcd-058602ba16ce",
      "name": "トレンド用バイナリデータの作成",
      "type": "n8n-nodes-base.function",
      "position": [
        1612,
        -520
      ],
      "parameters": {
        "functionCode": "items[0].binary = {\n  data: {\n    data: new Buffer(JSON.stringify(items[0].json, null, 2)).toString('base64')\n  }\n};\nreturn items;"
      },
      "typeVersion": 1
    }
  ],
  "active": false,
  "pinData": {},
  "settings": {
    "executionOrder": "v1"
  },
  "versionId": "6a81579d-1f3b-4ea2-821b-fff07b32ee7d",
  "connections": {
    "92203e9f-cf13-435e-bf78-3c39a6e1e6f6": {
      "ai_languageModel": [
        [
          {
            "node": "e7f2b2c5-89ba-45c4-b7a4-297a159f8b39",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "b6a8cc64-c0c7-40dc-b7c1-0571baf3a0a9": {
      "main": [
        [
          {
            "node": "7fae589c-854d-429e-9e67-527a002fcabf",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "cf1da0ee-bb78-4ea7-bb2d-f2f82f728b12": {
      "main": [
        []
      ]
    },
    "d38ca005-6ba3-4105-9fcd-058602ba16ce": {
      "main": [
        [
          {
            "node": "cf1da0ee-bb78-4ea7-bb2d-f2f82f728b12",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "c952ab41-66af-4b41-b04e-407816074c87": {
      "main": [
        [
          {
            "node": "2cf80339-0927-4f48-a13a-c610eaf4edca",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "7fae589c-854d-429e-9e67-527a002fcabf": {
      "main": [
        [
          {
            "node": "9795ac80-6ded-465d-bfcf-0c6ce120452f",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "1e9038e6-9ebc-4460-bee2-3faea3b38f4c": {
      "main": [
        [
          {
            "node": "b6a8cc64-c0c7-40dc-b7c1-0571baf3a0a9",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "9795ac80-6ded-465d-bfcf-0c6ce120452f": {
      "main": [
        [
          {
            "node": "e15fb9ba-ea8f-41f0-9b99-437d14a98a7d",
            "type": "main",
            "index": 0
          },
          {
            "node": "8d15dca1-3014-405f-ac35-78d64eda1d07",
            "type": "main",
            "index": 0
          },
          {
            "node": "e7f2b2c5-89ba-45c4-b7a4-297a159f8b39",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "40c82a76-1710-4e57-8123-9c9fbc729110": {
      "ai_languageModel": [
        [
          {
            "node": "9795ac80-6ded-465d-bfcf-0c6ce120452f",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "e15fb9ba-ea8f-41f0-9b99-437d14a98a7d": {
      "main": [
        [
          {
            "node": "fff9e2d1-f3e2-47c3-8c3a-f9de8dbdee6a",
            "type": "main",
            "index": 0
          },
          {
            "node": "c952ab41-66af-4b41-b04e-407816074c87",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "0c1da174-9b9c-4067-9b2c-fa0cc8c33dc8": {
      "ai_languageModel": [
        [
          {
            "node": "e15fb9ba-ea8f-41f0-9b99-437d14a98a7d",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "e7f2b2c5-89ba-45c4-b7a4-297a159f8b39": {
      "main": [
        [
          {
            "node": "1a252b74-6768-41a6-99dd-090e35c47065",
            "type": "main",
            "index": 0
          },
          {
            "node": "d38ca005-6ba3-4105-9fcd-058602ba16ce",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}
よくある質問

このワークフローの使い方は?

上記のJSON設定コードをコピーし、n8nインスタンスで新しいワークフローを作成して「JSONからインポート」を選択、設定を貼り付けて認証情報を必要に応じて変更してください。

このワークフローはどんな場面に適していますか?

上級 - コンテンツ作成, マルチモーダルAI

有料ですか?

このワークフローは完全無料です。ただし、ワークフローで使用するサードパーティサービス(OpenAI APIなど)は別途料金が発生する場合があります。

ワークフロー情報
難易度
上級
ノード数18
カテゴリー2
ノードタイプ9
難易度説明

上級者向け、16ノード以上の複雑なワークフロー

作成者
Amit Mehta

Amit Mehta

@amitswba

I'm a workflow automation expert with 15+ years in IT industry. I build smart, scalable n8n workflows for AI automation, marketing, CRM, and SaaS integrations. My focus is on simplifying business processes with tools like OpenAI, WhatsApp, Gmail, and Airtable. I help teams and solopreneurs automate smarter, reduce manual tasks, and grow faster—one workflow at a time.

外部リンク
n8n.ioで表示

このワークフローを共有

カテゴリー

カテゴリー: 34