Bright Dataによる構造化された大量データ抽出のウェブスパイダー

上級

これはEngineering, Product分野の自動化ワークフローで、16個のノードを含みます。主にIf, Set, Wait, Function, Aggregateなどのノードを使用。 Bright DataとWebhookを使った非同期バッチWebスクレイピング

前提条件
  • ターゲットAPIの認証情報が必要な場合あり
ワークフロープレビュー
ノード接続関係を可視化、ズームとパンをサポート
ワークフローをエクスポート
以下のJSON設定をn8nにインポートして、このワークフローを使用できます
{
  "id": "OjwmaLrXhW4pO5ph",
  "meta": {
    "instanceId": "885b4fb4a6a9c2cb5621429a7b972df0d05bb724c20ac7dac7171b62f1c7ef40"
  },
  "name": "Structured Bulk Data Extract with Bright Data Web Scraper",
  "tags": [
    {
      "id": "Kujft2FOjmOVQAmJ",
      "name": "Engineering",
      "createdAt": "2025-04-09T01:31:00.558Z",
      "updatedAt": "2025-04-09T01:31:00.558Z"
    },
    {
      "id": "ZOwtAMLepQaGW76t",
      "name": "Building Blocks",
      "createdAt": "2025-04-13T15:23:40.462Z",
      "updatedAt": "2025-04-13T15:23:40.462Z"
    }
  ],
  "nodes": [
    {
      "id": "1bdca5ae-1e56-4cf2-a8dc-e135a6a2dfec",
      "name": "「テスト実行」クリック時",
      "type": "n8n-nodes-base.manualTrigger",
      "position": [
        -900,
        -395
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "533968cd-1329-4a86-8875-478600ed82b7",
      "name": "If",
      "type": "n8n-nodes-base.if",
      "position": [
        200,
        -470
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "6a7e5360-4cb5-4806-892e-5c85037fa71c",
              "operator": {
                "type": "string",
                "operation": "equals"
              },
              "leftValue": "={{ $json.status }}",
              "rightValue": "ready"
            }
          ]
        }
      },
      "typeVersion": 2.2
    },
    {
      "id": "83991fdf-0402-4de3-bbb5-7050e3e9fb62",
      "name": "Set Snapshot Id",
      "type": "n8n-nodes-base.set",
      "position": [
        -240,
        -395
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "2c3369c6-9206-45d7-9349-f577baeaf189",
              "name": "snapshot_id",
              "type": "string",
              "value": "={{ $json.snapshot_id }}"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "408a36af-decb-49b3-a95e-a2df0b6eea5f",
      "name": "Download Snapshot",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        640,
        -520
      ],
      "parameters": {
        "url": "=https://api.brightdata.com/datasets/v3/snapshot/{{ $json.snapshot_id }}",
        "options": {
          "timeout": 10000
        },
        "sendQuery": true,
        "authentication": "genericCredentialType",
        "genericAuthType": "httpHeaderAuth",
        "queryParameters": {
          "parameters": [
            {
              "name": "format",
              "value": "json"
            }
          ]
        }
      },
      "credentials": {
        "httpHeaderAuth": {
          "id": "kdbqXuxIR8qIxF7y",
          "name": "Header Auth account"
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "9d6cd882-c287-46ca-bc1e-df6b995fc422",
      "name": "Wait",
      "type": "n8n-nodes-base.wait",
      "position": [
        420,
        -295
      ],
      "webhookId": "631cd5de-36b3-4264-88ae-45b30e2c2ccc",
      "parameters": {
        "amount": 30
      },
      "typeVersion": 1.1
    },
    {
      "id": "c9cf847a-6399-4c93-a901-30f1c0e7408a",
      "name": "エラー確認",
      "type": "n8n-nodes-base.if",
      "position": [
        420,
        -520
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "b267071c-7102-407b-a98d-f613bcb1a106",
              "operator": {
                "type": "string",
                "operation": "equals"
              },
              "leftValue": "={{ $json.errors.toString() }}",
              "rightValue": "0"
            }
          ]
        }
      },
      "typeVersion": 2.2
    },
    {
      "id": "b648614e-c33e-4818-8348-e95df56928c7",
      "name": "スナップショットステータス確認",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -20,
        -395
      ],
      "parameters": {
        "url": "=https://api.brightdata.com/datasets/v3/progress/{{ $json.snapshot_id }}",
        "options": {},
        "sendHeaders": true,
        "authentication": "genericCredentialType",
        "genericAuthType": "httpHeaderAuth",
        "headerParameters": {
          "parameters": [
            {}
          ]
        }
      },
      "credentials": {
        "httpHeaderAuth": {
          "id": "kdbqXuxIR8qIxF7y",
          "name": "Header Auth account"
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "408a1584-666f-471e-bfcd-c4d857319688",
      "name": "Webhook通知の開始",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        1080,
        -520
      ],
      "parameters": {
        "url": "https://webhook.site/daf9d591-a130-4010-b1d3-0c66f8fcf467",
        "options": {},
        "sendBody": true,
        "bodyParameters": {
          "parameters": [
            {
              "name": "response",
              "value": "={{ $json.data[0] }}"
            }
          ]
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "6548a794-a4fd-4050-b07d-bc7ca4517882",
      "name": "JSONレスポンスの集計",
      "type": "n8n-nodes-base.aggregate",
      "position": [
        860,
        -520
      ],
      "parameters": {
        "options": {},
        "aggregate": "aggregateAllItemData"
      },
      "typeVersion": 1
    },
    {
      "id": "c84e195c-edd2-4f59-8986-516d116b7352",
      "name": "Set Dataset Id, Request URL",
      "type": "n8n-nodes-base.set",
      "position": [
        -680,
        -400
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "c16061c8-c829-4bd3-b335-e79c605665f2",
              "name": "dataset_id",
              "type": "string",
              "value": "gd_l7q7dkf244hwjntr0"
            },
            {
              "id": "a4594c55-e39e-4a9e-80d6-d39370001e20",
              "name": "request",
              "type": "string",
              "value": "[{     \"url\": \"https://www.amazon.com/Quencher-FlowState-Stainless-Insulated-Smoothie/dp/B0CRMZHDG8\"   }]"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "ceae108e-ed78-40c5-8e58-7013591ccaad",
      "name": "付箋",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -900,
        -700
      ],
      "parameters": {
        "width": 520,
        "height": 280,
        "content": "## Note\n\nDeals with the Amazon web scraping by utilizing Bright Data Web Scraper Product.\n\n\n**Please make sure to set the Bright Data \n -> Dataset Id, Request URL and update the Webhook Notification URL**\n\nRefer \n- https://brightdata.com/products/web-scraper/ai\n- https://brightdata.com/products/web-scraper"
      },
      "typeVersion": 1
    },
    {
      "id": "1f55cffa-abd9-437b-bc9d-3fe0d8b02454",
      "name": "付箋1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -120,
        -600
      ],
      "parameters": {
        "color": 5,
        "width": 720,
        "height": 500,
        "content": "## Wait until the Snapshot is ready"
      },
      "typeVersion": 1
    },
    {
      "id": "d8ba0f62-80a9-4e66-b70c-086ee5992df6",
      "name": "付箋2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -900,
        -220
      ],
      "parameters": {
        "color": 4,
        "width": 660,
        "content": "## Who can benefit?\nData analysts, scientists, engineers, and developers seeking efficient methods to collect and analyze web data for AI, ML, big data applications, and more will find Scraper APIs particularly beneficial."
      },
      "typeVersion": 1
    },
    {
      "id": "7fdffafd-f256-4760-b001-a42b5198dbad",
      "name": "Create a binary data",
      "type": "n8n-nodes-base.function",
      "position": [
        1100,
        -720
      ],
      "parameters": {
        "functionCode": "items[0].binary = {\n  data: {\n    data: new Buffer(JSON.stringify(items[0].json, null, 2)).toString('base64')\n  }\n};\nreturn items;"
      },
      "typeVersion": 1
    },
    {
      "id": "934ab31a-cfb9-4e97-8d86-92cd95dd219c",
      "name": "Write the file to disk",
      "type": "n8n-nodes-base.readWriteFile",
      "position": [
        1320,
        -720
      ],
      "parameters": {
        "options": {},
        "fileName": "d:\\bulk_data.json",
        "operation": "write"
      },
      "typeVersion": 1
    },
    {
      "id": "1130523a-b598-425e-acf1-417ae8699f66",
      "name": "指定URLへのHTTPリクエスト",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -460,
        -395
      ],
      "parameters": {
        "url": "https://api.brightdata.com/datasets/v3/trigger",
        "method": "POST",
        "options": {},
        "jsonBody": "={{ $json.request }}",
        "sendBody": true,
        "sendQuery": true,
        "sendHeaders": true,
        "specifyBody": "json",
        "authentication": "genericCredentialType",
        "genericAuthType": "httpHeaderAuth",
        "queryParameters": {
          "parameters": [
            {
              "name": "dataset_id",
              "value": "={{ $json.dataset_id }}"
            },
            {
              "name": "format",
              "value": "json"
            },
            {
              "name": "uncompressed_webhook",
              "value": "true"
            }
          ]
        },
        "headerParameters": {
          "parameters": [
            {}
          ]
        }
      },
      "credentials": {
        "httpHeaderAuth": {
          "id": "kdbqXuxIR8qIxF7y",
          "name": "Header Auth account"
        }
      },
      "typeVersion": 4.2
    }
  ],
  "active": false,
  "pinData": {},
  "settings": {
    "executionOrder": "v1"
  },
  "versionId": "8fb2eb85-ffd6-4632-9668-00f29bc91c34",
  "connections": {
    "533968cd-1329-4a86-8875-478600ed82b7": {
      "main": [
        [
          {
            "node": "c9cf847a-6399-4c93-a901-30f1c0e7408a",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "9d6cd882-c287-46ca-bc1e-df6b995fc422",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "9d6cd882-c287-46ca-bc1e-df6b995fc422": {
      "main": [
        [
          {
            "node": "b648614e-c33e-4818-8348-e95df56928c7",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "83991fdf-0402-4de3-bbb5-7050e3e9fb62": {
      "main": [
        [
          {
            "node": "b648614e-c33e-4818-8348-e95df56928c7",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "408a36af-decb-49b3-a95e-a2df0b6eea5f": {
      "main": [
        [
          {
            "node": "6548a794-a4fd-4050-b07d-bc7ca4517882",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "c9cf847a-6399-4c93-a901-30f1c0e7408a": {
      "main": [
        [
          {
            "node": "408a36af-decb-49b3-a95e-a2df0b6eea5f",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "7fdffafd-f256-4760-b001-a42b5198dbad": {
      "main": [
        [
          {
            "node": "934ab31a-cfb9-4e97-8d86-92cd95dd219c",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "b648614e-c33e-4818-8348-e95df56928c7": {
      "main": [
        [
          {
            "node": "533968cd-1329-4a86-8875-478600ed82b7",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "6548a794-a4fd-4050-b07d-bc7ca4517882": {
      "main": [
        [
          {
            "node": "408a1584-666f-471e-bfcd-c4d857319688",
            "type": "main",
            "index": 0
          },
          {
            "node": "7fdffafd-f256-4760-b001-a42b5198dbad",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "c84e195c-edd2-4f59-8986-516d116b7352": {
      "main": [
        [
          {
            "node": "1130523a-b598-425e-acf1-417ae8699f66",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "1130523a-b598-425e-acf1-417ae8699f66": {
      "main": [
        [
          {
            "node": "83991fdf-0402-4de3-bbb5-7050e3e9fb62",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "1bdca5ae-1e56-4cf2-a8dc-e135a6a2dfec": {
      "main": [
        [
          {
            "node": "c84e195c-edd2-4f59-8986-516d116b7352",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}
よくある質問

このワークフローの使い方は?

上記のJSON設定コードをコピーし、n8nインスタンスで新しいワークフローを作成して「JSONからインポート」を選択、設定を貼り付けて認証情報を必要に応じて変更してください。

このワークフローはどんな場面に適していますか?

上級 - エンジニアリング, プロダクト

有料ですか?

このワークフローは完全無料です。ただし、ワークフローで使用するサードパーティサービス(OpenAI APIなど)は別途料金が発生する場合があります。

ワークフロー情報
難易度
上級
ノード数16
カテゴリー2
ノードタイプ9
難易度説明

上級者向け、16ノード以上の複雑なワークフロー

外部リンク
n8n.ioで表示

このワークフローを共有

カテゴリー

カテゴリー: 34