8
n8n 中文网amn8n.com

[1/3 - 异常检测] [1/2 - KNN分类] 批量上传数据集到Qdrant(农作物数据集)

高级

这是一个AI领域的自动化工作流,包含 25 个节点。主要使用 If, Set, Code, Filter, HttpRequest 等节点,结合人工智能技术实现智能自动化。 将图像数据集上传到Qdrant [1/3异常检测][1/2 KNN分类]

前置要求
  • 可能需要目标 API 的认证凭证
工作流预览
可视化展示节点连接关系,支持缩放和平移
导出工作流
复制以下 JSON 配置到 n8n 导入,即可使用此工作流
{
  "id": "pPtCy6qPfEv1qNRn",
  "meta": {
    "instanceId": "205b3bc06c96f2dc835b4f00e1cbf9a937a74eeb3b47c99d0c30b0586dbf85aa"
  },
  "name": "[1/3 - 异常检测] [1/2 - KNN分类] 批量上传数据集到Qdrant(农作物数据集)",
  "tags": [
    {
      "id": "n3zAUYFhdqtjhcLf",
      "name": "qdrant",
      "createdAt": "2024-12-10T11:56:59.987Z",
      "updatedAt": "2024-12-10T11:56:59.987Z"
    }
  ],
  "nodes": [
    {
      "id": "53831410-b4f3-4374-8bdd-c2a33cd873cb",
      "name": "当点击\"测试工作流\"时",
      "type": "n8n-nodes-base.manualTrigger",
      "position": [
        -640,
        0
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "e303ccea-c0e0-4fe5-bd31-48380a0e438f",
      "name": "Google云存储",
      "type": "n8n-nodes-base.googleCloudStorage",
      "position": [
        820,
        160
      ],
      "parameters": {
        "resource": "object",
        "returnAll": true,
        "bucketName": "n8n-qdrant-demo",
        "listFilters": {
          "prefix": "agricultural-crops"
        },
        "requestOptions": {}
      },
      "credentials": {
        "googleCloudStorageOAuth2Api": {
          "id": "fn0sr7grtfprVQvL",
          "name": "Google Cloud Storage account"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "737bdb15-61cf-48eb-96af-569eb5986ee8",
      "name": "获取Qdrant字段",
      "type": "n8n-nodes-base.set",
      "position": [
        1080,
        160
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "10d9147f-1c0c-4357-8413-3130829c2e24",
              "name": "=publicLink",
              "type": "string",
              "value": "=https://storage.googleapis.com/{{ $json.bucket }}/{{ $json.selfLink.split('/').splice(-1) }}"
            },
            {
              "id": "ff9e6a0b-e47a-4550-a13b-465507c75f8f",
              "name": "cropName",
              "type": "string",
              "value": "={{ $json.id.split('/').slice(-3, -2)[0].toLowerCase()}}"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "2b18ed0c-38d3-49e9-be3d-4f7b35f4d9e5",
      "name": "Qdrant集群变量",
      "type": "n8n-nodes-base.set",
      "position": [
        -360,
        0
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "58b7384d-fd0c-44aa-9f8e-0306a99be431",
              "name": "qdrantCloudURL",
              "type": "string",
              "value": "=https://152bc6e2-832a-415c-a1aa-fb529f8baf8d.eu-central-1-0.aws.cloud.qdrant.io"
            },
            {
              "id": "e34c4d88-b102-43cc-a09e-e0553f2da23a",
              "name": "collectionName",
              "type": "string",
              "value": "=agricultural-crops"
            },
            {
              "id": "33581e0a-307f-4380-9533-615791096de7",
              "name": "VoyageEmbeddingsDim",
              "type": "number",
              "value": 1024
            },
            {
              "id": "6e390343-2cd2-4559-aba9-82b13acb7f52",
              "name": "batchSize",
              "type": "number",
              "value": 4
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "f88d290e-3311-4322-b2a5-1350fc1f8768",
      "name": "嵌入农作物图像",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        2120,
        160
      ],
      "parameters": {
        "url": "https://api.voyageai.com/v1/multimodalembeddings",
        "method": "POST",
        "options": {},
        "jsonBody": "={{\n{\n  \"inputs\": $json.batchVoyage,\n  \"model\": \"voyage-multimodal-3\",\n  \"input_type\": \"document\"\n}\n}}",
        "sendBody": true,
        "specifyBody": "json",
        "authentication": "genericCredentialType",
        "genericAuthType": "httpHeaderAuth"
      },
      "credentials": {
        "httpHeaderAuth": {
          "id": "Vb0RNVDnIHmgnZOP",
          "name": "Voyage API"
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "250c6a8d-f545-4037-8069-c834437bbe15",
      "name": "创建 Qdrant 集合",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        320,
        160
      ],
      "parameters": {
        "url": "={{ $('Qdrant cluster variables').first().json.qdrantCloudURL }}/collections/{{ $('Qdrant cluster variables').first().json.collectionName }}",
        "method": "PUT",
        "options": {},
        "jsonBody": "={{\n{\n  \"vectors\": {\n    \"voyage\": { \n      \"size\": $('Qdrant cluster variables').first().json.VoyageEmbeddingsDim, \n      \"distance\": \"Cosine\" \n    } \n  }\n}\n}}",
        "sendBody": true,
        "specifyBody": "json",
        "authentication": "predefinedCredentialType",
        "nodeCredentialType": "qdrantApi"
      },
      "credentials": {
        "qdrantApi": {
          "id": "it3j3hP9FICqhgX6",
          "name": "QdrantApi account"
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "20b612ff-4794-43ef-bf45-008a16a2f30f",
      "name": "检查Qdrant集合是否存在",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        -100,
        0
      ],
      "parameters": {
        "url": "={{ $json.qdrantCloudURL }}/collections/{{ $json.collectionName }}/exists",
        "options": {},
        "authentication": "predefinedCredentialType",
        "nodeCredentialType": "qdrantApi"
      },
      "credentials": {
        "qdrantApi": {
          "id": "it3j3hP9FICqhgX6",
          "name": "QdrantApi account"
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "c067740b-5de3-452e-a614-bf14985a73a0",
      "name": "API格式的批次",
      "type": "n8n-nodes-base.set",
      "position": [
        1860,
        160
      ],
      "parameters": {
        "options": {},
        "assignments": {
          "assignments": [
            {
              "id": "f14db112-6f15-4405-aa47-8cb56bb8ae7a",
              "name": "=batchVoyage",
              "type": "array",
              "value": "={{ $json.batch.map(item => ({ \"content\": ([{\"type\": \"image_url\", \"image_url\": item[\"publicLink\"]}])}))}}"
            },
            {
              "id": "3885fd69-66f5-4435-86a4-b80eaa568ac1",
              "name": "=batchPayloadQdrant",
              "type": "array",
              "value": "={{ $json.batch.map(item => ({\"crop_name\":item[\"cropName\"], \"image_path\":item[\"publicLink\"]})) }}"
            },
            {
              "id": "8ea7a91e-af27-49cb-9a29-41dae15c4e33",
              "name": "uuids",
              "type": "array",
              "value": "={{ $json.uuids }}"
            }
          ]
        }
      },
      "typeVersion": 3.4
    },
    {
      "id": "bf9a9532-db64-4c02-b91d-47e708ded4d3",
      "name": "批量上传到Qdrant",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        2320,
        160
      ],
      "parameters": {
        "url": "={{ $('Qdrant cluster variables').first().json.qdrantCloudURL }}/collections/{{ $('Qdrant cluster variables').first().json.collectionName }}/points",
        "method": "PUT",
        "options": {},
        "jsonBody": "={{\n{\n  \"batch\": {\n      \"ids\" : $('Batches in the API\\'s format').item.json.uuids,\n      \"vectors\": {\"voyage\": $json.data.map(item => item[\"embedding\"]) },\n      \"payloads\": $('Batches in the API\\'s format').item.json.batchPayloadQdrant\n  }\n}\n}}",
        "sendBody": true,
        "specifyBody": "json",
        "authentication": "predefinedCredentialType",
        "nodeCredentialType": "qdrantApi"
      },
      "credentials": {
        "qdrantApi": {
          "id": "it3j3hP9FICqhgX6",
          "name": "QdrantApi account"
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "3c30373f-c84c-405f-bb84-ec8b4c7419f4",
      "name": "分批次处理,为Qdrant点生成UUID",
      "type": "n8n-nodes-base.code",
      "position": [
        1600,
        160
      ],
      "parameters": {
        "language": "python",
        "pythonCode": "import uuid\n\ncrops = [item.json for item in _input.all()]\nbatch_size = int(_('Qdrant cluster variables').first()['json']['batchSize'])\n\ndef split_into_batches_add_uuids(array, batch_size):\n    return [\n      {\n        \"batch\": array[i:i + batch_size],\n        \"uuids\": [str(uuid.uuid4()) for j in range(len(array[i:i + batch_size]))]\n      }\n       for i in range(0, len(array), batch_size)\n    ]\n\n# Split crops into batches\nbatched_crops = split_into_batches_add_uuids(crops, batch_size)\n\nreturn batched_crops"
      },
      "typeVersion": 2
    },
    {
      "id": "2b028f8c-0a4c-4a3a-9e2b-14b1c2401c6d",
      "name": "如果集合存在",
      "type": "n8n-nodes-base.if",
      "position": [
        120,
        0
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "2104b862-667c-4a34-8888-9cb81a2e10f8",
              "operator": {
                "type": "boolean",
                "operation": "true",
                "singleValue": true
              },
              "leftValue": "={{ $json.result.exists }}",
              "rightValue": "true"
            }
          ]
        }
      },
      "typeVersion": 2.2
    },
    {
      "id": "768793f6-391e-4cc9-b637-f32ee2f77156",
      "name": "便签",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        500,
        340
      ],
      "parameters": {
        "width": 280,
        "height": 200,
        "content": "在下一个工作流中,我们将使用Qdrant获取属于每个由`crop_name`定义的农作物类型(例如*\"黄瓜\"*)的图像数量。"
      },
      "typeVersion": 1
    },
    {
      "id": "0c8896f7-8c57-4add-bc4d-03c4a774bdf1",
      "name": "crop_name上的有效载荷索引",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        500,
        160
      ],
      "parameters": {
        "url": "={{ $('Qdrant cluster variables').first().json.qdrantCloudURL }}/collections/{{ $('Qdrant cluster variables').first().json.collectionName }}/index",
        "method": "PUT",
        "options": {},
        "jsonBody": "={\n  \"field_name\": \"crop_name\",\n  \"field_schema\": \"keyword\"\n}",
        "sendBody": true,
        "specifyBody": "json",
        "authentication": "predefinedCredentialType",
        "nodeCredentialType": "qdrantApi"
      },
      "credentials": {
        "qdrantApi": {
          "id": "it3j3hP9FICqhgX6",
          "name": "QdrantApi account"
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "342186f6-41bf-46be-9be8-a9b1ca290d55",
      "name": "便签1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -360,
        -360
      ],
      "parameters": {
        "height": 300,
        "content": "设置变量"
      },
      "typeVersion": 1
    },
    {
      "id": "fae9248c-dbcc-4b6d-b977-0047f120a587",
      "name": "便签2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -100,
        -220
      ],
      "parameters": {
        "content": "在Qdrant中,您可以创建一次集合;如果尝试使用相同名称创建两次,将会报错,因此我在此添加了检查该名称的集合是否已存在"
      },
      "typeVersion": 1
    },
    {
      "id": "f7aea242-3d98-4a1c-a98a-986ac2b4928b",
      "name": "便签3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        180,
        340
      ],
      "parameters": {
        "height": 280,
        "content": "如果变量中设置的名称对应的集合尚不存在,我将创建一个空集合;"
      },
      "typeVersion": 1
    },
    {
      "id": "b84045c1-f66a-4543-8d42-1e76de0b6e91",
      "name": "便签4",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        800,
        -280
      ],
      "parameters": {
        "height": 400,
        "content": "现在是时候嵌入我们的图像数据集并上传到Qdrant了;"
      },
      "typeVersion": 1
    },
    {
      "id": "255dfad8-c545-4d75-bc9c-529aa50447a9",
      "name": "便签5",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1080,
        -140
      ],
      "parameters": {
        "height": 240,
        "content": "Google存储节点返回**mediaLink**,可直接用于下载图像;然而,我们只需要公共图像URL,以便Voyage API可以处理它;因此我们在这里构建这个公共链接,并从存储图像的文件夹中提取农作物名称(例如*\"黄瓜\"*)"
      },
      "typeVersion": 1
    },
    {
      "id": "a6acce75-cce0-4de3-bc64-37592c97359b",
      "name": "### 需要帮助?",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1600,
        -80
      ],
      "parameters": {
        "height": 180,
        "content": "我将图像重新分组为`batchSize`大小的批次,并为实现批量上传到Qdrant,生成UUID作为批次[点ID](https://qdrant.tech/documentation/concepts/points/#point-ids)(Qdrant不为用户设置ID;用户必须自行选择)"
      },
      "typeVersion": 1
    },
    {
      "id": "cab3cc83-b50c-41f4-8d51-59e04bba5556",
      "name": "## 试试看!",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1340,
        -60
      ],
      "parameters": {
        "content": "由于我们基于农作物数据集构建异常检测,为了正确测试,我完全没有将番茄图片上传到Qdrant;我在此处将其过滤掉"
      },
      "typeVersion": 1
    },
    {
      "id": "e5cdcce5-efdc-41f2-9796-656bd345f783",
      "name": "## 1. 创建新的自定义 OpenAI 凭据",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1860,
        -100
      ],
      "parameters": {
        "height": 200,
        "content": "由于Voyage API需要[特定的json结构](https://docs.voyageai.com/reference/multimodal-embeddings-api)进行批量嵌入,[Qdrant的API批量上传点](https://api.qdrant.tech/api-reference/points/upsert-points)也是如此,我正在调整json的结构"
      },
      "typeVersion": 1
    },
    {
      "id": "a7f15c44-3d5c-4b43-bfb2-94fe27a32071",
      "name": "便签 11",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        2120,
        20
      ],
      "parameters": {
        "width": 180,
        "height": 80,
        "content": "使用Voyage模型嵌入图像(注意`input_type`)"
      },
      "typeVersion": 1
    },
    {
      "id": "01b92e7e-d954-4d58-85b1-109c336546c4",
      "name": "过滤掉番茄以测试异常",
      "type": "n8n-nodes-base.filter",
      "position": [
        1340,
        160
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "f7953ae2-5333-4805-abe5-abf6da645c5e",
              "operator": {
                "type": "string",
                "operation": "notEquals"
              },
              "leftValue": "={{ $json.cropName }}",
              "rightValue": "tomato"
            }
          ]
        }
      },
      "typeVersion": 2.2
    },
    {
      "id": "8d564817-885e-453a-a087-900b34b84d9c",
      "name": "GET 模型",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -1160,
        -280
      ],
      "parameters": {
        "width": 440,
        "height": 460,
        "content": "## 批量上传数据集到Qdrant"
      },
      "typeVersion": 1
    },
    {
      "id": "0233d3d0-bbdf-4d5b-a366-53cbfa4b6f9c",
      "name": "便签10",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -860,
        360
      ],
      "parameters": {
        "color": 4,
        "width": 540,
        "height": 420,
        "content": "### 用于异常检测"
      },
      "typeVersion": 1
    }
  ],
  "active": false,
  "pinData": {},
  "settings": {
    "executionOrder": "v1"
  },
  "versionId": "27776c4a-3bf9-4704-9c13-345b75ffacc0",
  "connections": {
    "Embed crop image": {
      "main": [
        [
          {
            "node": "Batch Upload to Qdrant",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Google Cloud Storage": {
      "main": [
        [
          {
            "node": "Get fields for Qdrant",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "If collection exists": {
      "main": [
        [
          {
            "node": "Google Cloud Storage",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Create Qdrant Collection",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Get fields for Qdrant": {
      "main": [
        [
          {
            "node": "Filtering out tomato to test anomalies",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Batch Upload to Qdrant": {
      "main": [
        []
      ]
    },
    "Create Qdrant Collection": {
      "main": [
        [
          {
            "node": "Payload index on crop_name",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Qdrant cluster variables": {
      "main": [
        [
          {
            "node": "Check Qdrant Collection Existence",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Payload index on crop_name": {
      "main": [
        [
          {
            "node": "Google Cloud Storage",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Batches in the API's format": {
      "main": [
        [
          {
            "node": "Embed crop image",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Check Qdrant Collection Existence": {
      "main": [
        [
          {
            "node": "If collection exists",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "When clicking ‘Test workflow’": {
      "main": [
        [
          {
            "node": "Qdrant cluster variables",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Filtering out tomato to test anomalies": {
      "main": [
        [
          {
            "node": "Split in batches, generate uuids for Qdrant points",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Split in batches, generate uuids for Qdrant points": {
      "main": [
        [
          {
            "node": "Batches in the API's format",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}
常见问题

如何使用这个工作流?

复制上方的 JSON 配置代码,在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」,粘贴配置后根据需要修改凭证设置即可。

这个工作流适合什么场景?

高级 - 人工智能

需要付费吗?

本工作流完全免费,您可以直接导入使用。但请注意,工作流中使用的第三方服务(如 OpenAI API)可能需要您自行付费。

工作流信息
难度等级
高级
节点数量25
分类1
节点类型8
难度说明

适合高级用户,包含 16+ 个节点的复杂工作流

作者

Qdrant DevRel, ML/NLP/math nerd with yapping skills

外部链接
在 n8n.io 查看

分享此工作流