Jina.ai ベースのマルチページウェブサイトクローラー

上級

これはAI分野の自動化ワークフローで、16個のノードを含みます。主にSet, Xml, Code, Wait, Limitなどのノードを使用、AI技術を活用したスマート自動化を実現。 Jina.aiを使用した複数ページウェブサイトクローラーツール

前提条件
  • Google Drive API認証情報
  • ターゲットAPIの認証情報が必要な場合あり

カテゴリー

ワークフロープレビュー
ノード接続関係を可視化、ズームとパンをサポート
ワークフローをエクスポート
以下のJSON設定をn8nにインポートして、このワークフローを使用できます
{
  "id": "xEij0kj2I1DHbL3I",
  "meta": {
    "instanceId": "31e69f7f4a77bf465b805824e303232f0227212ae922d12133a0f96ffeab4fef",
    "templateCredsSetupCompleted": true
  },
  "name": "💡🌐 Essential Multipage Website Scraper with Jina.ai",
  "tags": [],
  "nodes": [
    {
      "id": "3a503859-ef0a-492d-81c6-37e4f0c4c25e",
      "name": "付箋",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -840,
        0
      ],
      "parameters": {
        "color": 3,
        "width": 340,
        "height": 320,
        "content": "## Jina.ai Web Scraper\n### No API Key Required\n"
      },
      "typeVersion": 1
    },
    {
      "id": "c5217a1a-f074-409b-8340-72afdc5fc8b5",
      "name": "「ワークフローテスト」クリック時",
      "type": "n8n-nodes-base.manualTrigger",
      "position": [
        -1500,
        -300
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "72af3b00-2632-4877-a0b6-7477e2f468f7",
      "name": "アイテムをループ処理",
      "type": "n8n-nodes-base.splitInBatches",
      "position": [
        -1080,
        20
      ],
      "parameters": {
        "options": {}
      },
      "typeVersion": 3
    },
    {
      "id": "11f0fa02-51f8-41cc-b789-5c452b6899aa",
      "name": "待機",
      "type": "n8n-nodes-base.wait",
      "position": [
        80,
        220
      ],
      "webhookId": "081ce124-0cbf-4a21-a1e7-2c465f460448",
      "parameters": {},
      "typeVersion": 1.1
    },
    {
      "id": "cf3b5887-8ff2-46e0-ab33-384ab0987cbb",
      "name": "制限",
      "type": "n8n-nodes-base.limit",
      "position": [
        80,
        -300
      ],
      "parameters": {
        "maxItems": 20
      },
      "typeVersion": 1
    },
    {
      "id": "c4f04d82-aa33-46cf-a8e2-0b4e717e754a",
      "name": "ウェブサイトURLリストの取得",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -780,
        -300
      ],
      "parameters": {
        "url": "={{ $json.sitemap_url }}",
        "options": {}
      },
      "typeVersion": 4.2
    },
    {
      "id": "7f507c38-1e9e-4c46-8dea-bd6daf65dc55",
      "name": "JSON に変換",
      "type": "n8n-nodes-base.xml",
      "position": [
        -560,
        -300
      ],
      "parameters": {
        "options": {}
      },
      "typeVersion": 1
    },
    {
      "id": "e21b55c2-8b0d-4c7c-ba91-a2d563a4c966",
      "name": "ウェブサイトURLリストの作成",
      "type": "n8n-nodes-base.splitOut",
      "position": [
        -340,
        -300
      ],
      "parameters": {
        "options": {},
        "fieldToSplitOut": "urlset.url"
      },
      "typeVersion": 1
    },
    {
      "id": "61555239-8a16-424e-8a60-700f6ebaa270",
      "name": "トピックまたはページでフィルタリング",
      "type": "n8n-nodes-base.filter",
      "position": [
        -120,
        -300
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "or",
          "conditions": [
            {
              "id": "d66c304d-879a-4dc4-908f-ab0665093672",
              "operator": {
                "name": "filter.operator.equals",
                "type": "string",
                "operation": "equals"
              },
              "leftValue": "={{ $json.loc }}",
              "rightValue": "=https://ai.pydantic.dev/"
            },
            {
              "id": "3c930950-bee4-442b-82e6-4437fd39a933",
              "operator": {
                "type": "string",
                "operation": "contains"
              },
              "leftValue": "={{ $json.loc.toLowerCase() }}",
              "rightValue": "agent"
            },
            {
              "id": "aaeaf34e-ad5a-4673-b3bd-8bddf3500988",
              "operator": {
                "type": "string",
                "operation": "contains"
              },
              "leftValue": "={{ $json.loc.toLowerCase() }}",
              "rightValue": "tool"
            }
          ]
        }
      },
      "typeVersion": 2.2
    },
    {
      "id": "dd25fb57-64a3-4c47-be04-6eb66d16520a",
      "name": "ウェブサイトURLを設定",
      "type": "n8n-nodes-base.set",
      "position": [
        -1080,
        -300
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "1601dc3e-8024-4e19-b592-93a4e4f77641",
              "name": "sitemap_url",
              "type": "string",
              "value": "https://ai.pydantic.dev/sitemap.xml"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "14ac1c87-29fe-44c8-9c1e-f247a292dde5",
      "name": "Jina.ai Web Scraper",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -720,
        120
      ],
      "parameters": {
        "url": "=https://r.jina.ai/{{ $json.loc }}",
        "options": {}
      },
      "typeVersion": 4.2
    },
    {
      "id": "be253ec2-f088-4895-8ef2-61a3720cf68b",
      "name": "ウェブページコンテンツを Google Drive に保存",
      "type": "n8n-nodes-base.googleDrive",
      "position": [
        -120,
        120
      ],
      "parameters": {
        "name": "={{ $('Loop Over Items').item.json.loc }} - {{ $json.title }}",
        "content": "={{ $json.markdown }}",
        "driveId": {
          "__rl": true,
          "mode": "list",
          "value": "My Drive"
        },
        "options": {},
        "folderId": {
          "__rl": true,
          "mode": "list",
          "value": "root",
          "cachedResultName": "/ (Root folder)"
        },
        "operation": "createFromText"
      },
      "credentials": {
        "googleDriveOAuth2Api": {
          "id": "UhdXGYLTAJbsa0xX",
          "name": "Google Drive account"
        }
      },
      "typeVersion": 3
    },
    {
      "id": "95d808c7-a3ca-4f59-a385-cc77bdff322e",
      "name": "タイトルとMarkdownコンテンツを抽出",
      "type": "n8n-nodes-base.code",
      "position": [
        -380,
        120
      ],
      "parameters": {
        "jsCode": "// Get the text output from the previous node\nconst data = $input.first().json.data;\n\n// Regular expression to capture the title line\nconst titleRegex = /^Title:\\s*(.+)$/m;\n// Regular expression to capture everything after \"Markdown Content:\"\nconst markdownRegex = /Markdown Content:\\n([\\s\\S]+)/;\n\n// Extract the title using the first capture group\nconst titleMatch = data.match(titleRegex);\nconst title = titleMatch ? titleMatch[1].trim() : '';\n\n// Extract the markdown content using the first capture group\nconst markdownMatch = data.match(markdownRegex);\nconst markdown = markdownMatch ? markdownMatch[1].trim() : '';\n\n// Return a single object with title and markdown as unique values\nreturn { title, markdown };"
      },
      "typeVersion": 2
    },
    {
      "id": "2fb86c81-c144-4450-908c-559855deadef",
      "name": "付箋1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1240,
        -580
      ],
      "parameters": {
        "color": 7,
        "width": 1540,
        "height": 1080,
        "content": "# 💡🌐 Essential Multipage Website Scraper with Jina.ai\n## Scrape entire websites with this workflow\n**Use responsibly and follow local rules and regulations**"
      },
      "typeVersion": 1
    },
    {
      "id": "b470b294-95d0-4e51-a9cc-2fe17316a771",
      "name": "付箋2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1580,
        -400
      ],
      "parameters": {
        "color": 4,
        "width": 280,
        "height": 300,
        "content": "## 👍Try Me!"
      },
      "typeVersion": 1
    },
    {
      "id": "fafd0623-a423-4e73-9609-cee8e81f5c13",
      "name": "付箋3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1180,
        -400
      ],
      "parameters": {
        "width": 300,
        "height": 300,
        "content": "## 👇Add Website Sitemap URL"
      },
      "typeVersion": 1
    }
  ],
  "active": false,
  "pinData": {},
  "settings": {
    "executionOrder": "v1"
  },
  "versionId": "2e815787-d83b-4ab7-a959-2f33006a37a5",
  "connections": {
    "11f0fa02-51f8-41cc-b789-5c452b6899aa": {
      "main": [
        [
          {
            "node": "72af3b00-2632-4877-a0b6-7477e2f468f7",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "cf3b5887-8ff2-46e0-ab33-384ab0987cbb": {
      "main": [
        [
          {
            "node": "72af3b00-2632-4877-a0b6-7477e2f468f7",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "7f507c38-1e9e-4c46-8dea-bd6daf65dc55": {
      "main": [
        [
          {
            "node": "e21b55c2-8b0d-4c7c-ba91-a2d563a4c966",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "72af3b00-2632-4877-a0b6-7477e2f468f7": {
      "main": [
        [],
        [
          {
            "node": "14ac1c87-29fe-44c8-9c1e-f247a292dde5",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "dd25fb57-64a3-4c47-be04-6eb66d16520a": {
      "main": [
        [
          {
            "node": "c4f04d82-aa33-46cf-a8e2-0b4e717e754a",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "14ac1c87-29fe-44c8-9c1e-f247a292dde5": {
      "main": [
        [
          {
            "node": "95d808c7-a3ca-4f59-a385-cc77bdff322e",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "c4f04d82-aa33-46cf-a8e2-0b4e717e754a": {
      "main": [
        [
          {
            "node": "7f507c38-1e9e-4c46-8dea-bd6daf65dc55",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "61555239-8a16-424e-8a60-700f6ebaa270": {
      "main": [
        [
          {
            "node": "cf3b5887-8ff2-46e0-ab33-384ab0987cbb",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "e21b55c2-8b0d-4c7c-ba91-a2d563a4c966": {
      "main": [
        [
          {
            "node": "61555239-8a16-424e-8a60-700f6ebaa270",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "95d808c7-a3ca-4f59-a385-cc77bdff322e": {
      "main": [
        [
          {
            "node": "be253ec2-f088-4895-8ef2-61a3720cf68b",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "c5217a1a-f074-409b-8340-72afdc5fc8b5": {
      "main": [
        [
          {
            "node": "dd25fb57-64a3-4c47-be04-6eb66d16520a",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "be253ec2-f088-4895-8ef2-61a3720cf68b": {
      "main": [
        [
          {
            "node": "11f0fa02-51f8-41cc-b789-5c452b6899aa",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}
よくある質問

このワークフローの使い方は?

上記のJSON設定コードをコピーし、n8nインスタンスで新しいワークフローを作成して「JSONからインポート」を選択、設定を貼り付けて認証情報を必要に応じて変更してください。

このワークフローはどんな場面に適していますか?

上級 - 人工知能

有料ですか?

このワークフローは完全無料です。ただし、ワークフローで使用するサードパーティサービス(OpenAI APIなど)は別途料金が発生する場合があります。

ワークフロー情報
難易度
上級
ノード数16
カテゴリー1
ノードタイプ12
難易度説明

上級者向け、16ノード以上の複雑なワークフロー

作成者
Joseph LePage

Joseph LePage

@joe

As an AI Automation consultant based in Canada, I partner with forward-thinking organizations to implement AI solutions that streamline operations and drive growth.

外部リンク
n8n.ioで表示

このワークフローを共有

カテゴリー

カテゴリー: 34