Strukturierte Massendatenexaktion mit Bright Data Web-Scraping

Experte

Dies ist ein Engineering, Product-Bereich Automatisierungsworkflow mit 16 Nodes. Hauptsächlich werden If, Set, Wait, Function, Aggregate und andere Nodes verwendet. Asynchrones Batch-Webcrawlen mit Bright Data und Webhook-Benachrichtigungen

Voraussetzungen
  • Möglicherweise sind Ziel-API-Anmeldedaten erforderlich
Workflow-Vorschau
Visualisierung der Node-Verbindungen, mit Zoom und Pan
Workflow exportieren
Kopieren Sie die folgende JSON-Konfiguration und importieren Sie sie in n8n
{
  "id": "OjwmaLrXhW4pO5ph",
  "meta": {
    "instanceId": "885b4fb4a6a9c2cb5621429a7b972df0d05bb724c20ac7dac7171b62f1c7ef40"
  },
  "name": "Structured Bulk Data Extract with Bright Data Web Scraper",
  "tags": [
    {
      "id": "Kujft2FOjmOVQAmJ",
      "name": "Engineering",
      "createdAt": "2025-04-09T01:31:00.558Z",
      "updatedAt": "2025-04-09T01:31:00.558Z"
    },
    {
      "id": "ZOwtAMLepQaGW76t",
      "name": "Building Blocks",
      "createdAt": "2025-04-13T15:23:40.462Z",
      "updatedAt": "2025-04-13T15:23:40.462Z"
    }
  ],
  "nodes": [
    {
      "id": "1bdca5ae-1e56-4cf2-a8dc-e135a6a2dfec",
      "name": "Bei Klick auf 'Workflow testen'",
      "type": "n8n-nodes-base.manualTrigger",
      "position": [
        -900,
        -395
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "533968cd-1329-4a86-8875-478600ed82b7",
      "name": "Wenn",
      "type": "n8n-nodes-base.if",
      "position": [
        200,
        -470
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "6a7e5360-4cb5-4806-892e-5c85037fa71c",
              "operator": {
                "type": "string",
                "operation": "equals"
              },
              "leftValue": "={{ $json.status }}",
              "rightValue": "ready"
            }
          ]
        }
      },
      "typeVersion": 2.2
    },
    {
      "id": "83991fdf-0402-4de3-bbb5-7050e3e9fb62",
      "name": "Snapshot-ID setzen",
      "type": "n8n-nodes-base.set",
      "position": [
        -240,
        -395
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "2c3369c6-9206-45d7-9349-f577baeaf189",
              "name": "snapshot_id",
              "type": "string",
              "value": "={{ $json.snapshot_id }}"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "408a36af-decb-49b3-a95e-a2df0b6eea5f",
      "name": "Snapshot herunterladen",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        640,
        -520
      ],
      "parameters": {
        "url": "=https://api.brightdata.com/datasets/v3/snapshot/{{ $json.snapshot_id }}",
        "options": {
          "timeout": 10000
        },
        "sendQuery": true,
        "authentication": "genericCredentialType",
        "genericAuthType": "httpHeaderAuth",
        "queryParameters": {
          "parameters": [
            {
              "name": "format",
              "value": "json"
            }
          ]
        }
      },
      "credentials": {
        "httpHeaderAuth": {
          "id": "kdbqXuxIR8qIxF7y",
          "name": "Header Auth account"
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "9d6cd882-c287-46ca-bc1e-df6b995fc422",
      "name": "Warten",
      "type": "n8n-nodes-base.wait",
      "position": [
        420,
        -295
      ],
      "webhookId": "631cd5de-36b3-4264-88ae-45b30e2c2ccc",
      "parameters": {
        "amount": 30
      },
      "typeVersion": 1.1
    },
    {
      "id": "c9cf847a-6399-4c93-a901-30f1c0e7408a",
      "name": "Fehler überprüfen",
      "type": "n8n-nodes-base.if",
      "position": [
        420,
        -520
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "b267071c-7102-407b-a98d-f613bcb1a106",
              "operator": {
                "type": "string",
                "operation": "equals"
              },
              "leftValue": "={{ $json.errors.toString() }}",
              "rightValue": "0"
            }
          ]
        }
      },
      "typeVersion": 2.2
    },
    {
      "id": "b648614e-c33e-4818-8348-e95df56928c7",
      "name": "Snapshot-Status prüfen",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -20,
        -395
      ],
      "parameters": {
        "url": "=https://api.brightdata.com/datasets/v3/progress/{{ $json.snapshot_id }}",
        "options": {},
        "sendHeaders": true,
        "authentication": "genericCredentialType",
        "genericAuthType": "httpHeaderAuth",
        "headerParameters": {
          "parameters": [
            {}
          ]
        }
      },
      "credentials": {
        "httpHeaderAuth": {
          "id": "kdbqXuxIR8qIxF7y",
          "name": "Header Auth account"
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "408a1584-666f-471e-bfcd-c4d857319688",
      "name": "Webhook-Benachrichtigung auslösen",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        1080,
        -520
      ],
      "parameters": {
        "url": "https://webhook.site/daf9d591-a130-4010-b1d3-0c66f8fcf467",
        "options": {},
        "sendBody": true,
        "bodyParameters": {
          "parameters": [
            {
              "name": "response",
              "value": "={{ $json.data[0] }}"
            }
          ]
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "6548a794-a4fd-4050-b07d-bc7ca4517882",
      "name": "JSON-Antwort aggregieren",
      "type": "n8n-nodes-base.aggregate",
      "position": [
        860,
        -520
      ],
      "parameters": {
        "options": {},
        "aggregate": "aggregateAllItemData"
      },
      "typeVersion": 1
    },
    {
      "id": "c84e195c-edd2-4f59-8986-516d116b7352",
      "name": "Dataset-ID, Request-URL setzen",
      "type": "n8n-nodes-base.set",
      "position": [
        -680,
        -400
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "c16061c8-c829-4bd3-b335-e79c605665f2",
              "name": "dataset_id",
              "type": "string",
              "value": "gd_l7q7dkf244hwjntr0"
            },
            {
              "id": "a4594c55-e39e-4a9e-80d6-d39370001e20",
              "name": "request",
              "type": "string",
              "value": "[{     \"url\": \"https://www.amazon.com/Quencher-FlowState-Stainless-Insulated-Smoothie/dp/B0CRMZHDG8\"   }]"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "ceae108e-ed78-40c5-8e58-7013591ccaad",
      "name": "Haftnotiz",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -900,
        -700
      ],
      "parameters": {
        "width": 520,
        "height": 280,
        "content": "## Note\n\nDeals with the Amazon web scraping by utilizing Bright Data Web Scraper Product.\n\n\n**Please make sure to set the Bright Data \n -> Dataset Id, Request URL and update the Webhook Notification URL**\n\nRefer \n- https://brightdata.com/products/web-scraper/ai\n- https://brightdata.com/products/web-scraper"
      },
      "typeVersion": 1
    },
    {
      "id": "1f55cffa-abd9-437b-bc9d-3fe0d8b02454",
      "name": "Haftnotiz1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -120,
        -600
      ],
      "parameters": {
        "color": 5,
        "width": 720,
        "height": 500,
        "content": "## Wait until the Snapshot is ready"
      },
      "typeVersion": 1
    },
    {
      "id": "d8ba0f62-80a9-4e66-b70c-086ee5992df6",
      "name": "Haftnotiz2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -900,
        -220
      ],
      "parameters": {
        "color": 4,
        "width": 660,
        "content": "## Who can benefit?\nData analysts, scientists, engineers, and developers seeking efficient methods to collect and analyze web data for AI, ML, big data applications, and more will find Scraper APIs particularly beneficial."
      },
      "typeVersion": 1
    },
    {
      "id": "7fdffafd-f256-4760-b001-a42b5198dbad",
      "name": "Binärdaten erstellen",
      "type": "n8n-nodes-base.function",
      "position": [
        1100,
        -720
      ],
      "parameters": {
        "functionCode": "items[0].binary = {\n  data: {\n    data: new Buffer(JSON.stringify(items[0].json, null, 2)).toString('base64')\n  }\n};\nreturn items;"
      },
      "typeVersion": 1
    },
    {
      "id": "934ab31a-cfb9-4e97-8d86-92cd95dd219c",
      "name": "Datei auf Festplatte schreiben",
      "type": "n8n-nodes-base.readWriteFile",
      "position": [
        1320,
        -720
      ],
      "parameters": {
        "options": {},
        "fileName": "d:\\bulk_data.json",
        "operation": "write"
      },
      "typeVersion": 1
    },
    {
      "id": "1130523a-b598-425e-acf1-417ae8699f66",
      "name": "HTTP-Anfrage an spezifizierte URL",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -460,
        -395
      ],
      "parameters": {
        "url": "https://api.brightdata.com/datasets/v3/trigger",
        "method": "POST",
        "options": {},
        "jsonBody": "={{ $json.request }}",
        "sendBody": true,
        "sendQuery": true,
        "sendHeaders": true,
        "specifyBody": "json",
        "authentication": "genericCredentialType",
        "genericAuthType": "httpHeaderAuth",
        "queryParameters": {
          "parameters": [
            {
              "name": "dataset_id",
              "value": "={{ $json.dataset_id }}"
            },
            {
              "name": "format",
              "value": "json"
            },
            {
              "name": "uncompressed_webhook",
              "value": "true"
            }
          ]
        },
        "headerParameters": {
          "parameters": [
            {}
          ]
        }
      },
      "credentials": {
        "httpHeaderAuth": {
          "id": "kdbqXuxIR8qIxF7y",
          "name": "Header Auth account"
        }
      },
      "typeVersion": 4.2
    }
  ],
  "active": false,
  "pinData": {},
  "settings": {
    "executionOrder": "v1"
  },
  "versionId": "8fb2eb85-ffd6-4632-9668-00f29bc91c34",
  "connections": {
    "533968cd-1329-4a86-8875-478600ed82b7": {
      "main": [
        [
          {
            "node": "c9cf847a-6399-4c93-a901-30f1c0e7408a",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "9d6cd882-c287-46ca-bc1e-df6b995fc422",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "9d6cd882-c287-46ca-bc1e-df6b995fc422": {
      "main": [
        [
          {
            "node": "b648614e-c33e-4818-8348-e95df56928c7",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "83991fdf-0402-4de3-bbb5-7050e3e9fb62": {
      "main": [
        [
          {
            "node": "b648614e-c33e-4818-8348-e95df56928c7",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "408a36af-decb-49b3-a95e-a2df0b6eea5f": {
      "main": [
        [
          {
            "node": "6548a794-a4fd-4050-b07d-bc7ca4517882",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "c9cf847a-6399-4c93-a901-30f1c0e7408a": {
      "main": [
        [
          {
            "node": "408a36af-decb-49b3-a95e-a2df0b6eea5f",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "7fdffafd-f256-4760-b001-a42b5198dbad": {
      "main": [
        [
          {
            "node": "934ab31a-cfb9-4e97-8d86-92cd95dd219c",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "b648614e-c33e-4818-8348-e95df56928c7": {
      "main": [
        [
          {
            "node": "533968cd-1329-4a86-8875-478600ed82b7",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "6548a794-a4fd-4050-b07d-bc7ca4517882": {
      "main": [
        [
          {
            "node": "408a1584-666f-471e-bfcd-c4d857319688",
            "type": "main",
            "index": 0
          },
          {
            "node": "7fdffafd-f256-4760-b001-a42b5198dbad",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "c84e195c-edd2-4f59-8986-516d116b7352": {
      "main": [
        [
          {
            "node": "1130523a-b598-425e-acf1-417ae8699f66",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "1130523a-b598-425e-acf1-417ae8699f66": {
      "main": [
        [
          {
            "node": "83991fdf-0402-4de3-bbb5-7050e3e9fb62",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "1bdca5ae-1e56-4cf2-a8dc-e135a6a2dfec": {
      "main": [
        [
          {
            "node": "c84e195c-edd2-4f59-8986-516d116b7352",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}
Häufig gestellte Fragen

Wie verwende ich diesen Workflow?

Kopieren Sie den obigen JSON-Code, erstellen Sie einen neuen Workflow in Ihrer n8n-Instanz und wählen Sie "Aus JSON importieren". Fügen Sie die Konfiguration ein und passen Sie die Anmeldedaten nach Bedarf an.

Für welche Szenarien ist dieser Workflow geeignet?

Experte - Engineering, Produkt

Ist es kostenpflichtig?

Dieser Workflow ist völlig kostenlos. Beachten Sie jedoch, dass Drittanbieterdienste (wie OpenAI API), die im Workflow verwendet werden, möglicherweise kostenpflichtig sind.

Workflow-Informationen
Schwierigkeitsgrad
Experte
Anzahl der Nodes16
Kategorie2
Node-Typen9
Schwierigkeitsbeschreibung

Für fortgeschrittene Benutzer, komplexe Workflows mit 16+ Nodes

Externe Links
Auf n8n.io ansehen

Diesen Workflow teilen

Kategorien

Kategorien: 34