使用 n8n、Scrapeless 和 Claude 构建 AI 驱动的网络数据管道

Name: 使用 n8n、Scrapeless 和 Claude 构建 AI 驱动的网络数据管道
Rating: 4.5 (10 reviews)
Author: scrapeless official
高级
这是一个Engineering, AI领域的自动化工作流，包含 20 个节点。主要使用 If, Set, Code, HttpRequest, ManualTrigger 等节点，结合人工智能技术实现智能自动化。使用 Claude、Ollama 和 Qdrant 从网络内容创建 AI 就绪的向量数据集
前置要求
•可能需要目标 API 的认证凭证
使用的节点 (20)

分类

工程
人工智能
工作流预览
可视化展示节点连接关系，支持缩放和平移
当点击"测试工作流"时
设置字段 - URL 和 Webhook URL
Scrapeless 网络请求
格式化 Claude 输出
检查集合是否存在
集合存在检查
创建 Qdrant 集合
Claude 数据提取器
Ollama 嵌入
Qdrant 向量存储
Claude AI Agent
用于结构化 AI agent 响应的 Webhook
导出数据 webhook
AI 数据检查器
React Flow
导出工作流
复制以下 JSON 配置到 n8n 导入，即可使用此工作流
{
  "id": "tTMZ2w3OvZFF1qDX",
  "meta": {
    "instanceId": "00078a12eb9132a267d925042ab574964c429326494199b73fefad612cec4111"
  },
  "name": "使用 n8n、Scrapeless 和 Claude 构建 AI 驱动的网络数据管道",
  "tags": [
    {
      "id": "Cu2uFDtw5wsdcHBH",
      "name": "Building Blocks",
      "createdAt": "2025-05-19T02:37:48.404Z",
      "updatedAt": "2025-05-19T02:37:48.404Z"
    },
    {
      "id": "PBConYPLh7mnOKsG",
      "name": "AI",
      "createdAt": "2025-05-19T02:37:48.399Z",
      "updatedAt": "2025-05-19T02:37:48.399Z"
    },
    {
      "id": "vhgqzFa23bYmJ6xM",
      "name": "Engineering",
      "createdAt": "2025-05-19T02:37:48.394Z",
      "updatedAt": "2025-05-19T02:37:48.394Z"
    }
  ],
  "nodes": [
    {
      "id": "05f02bd8-01d5-49fa-a6cf-989499d1b299",
      "name": "当点击\"测试工作流\"时",
      "type": "n8n-nodes-base.manualTrigger",
      "position": [
        -600,
        160
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "0102acf2-84f4-4bdb-939a-1f6653abd61f",
      "name": "便签",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -420,
        500
      ],
      "parameters": {
        "width": 480,
        "height": 353,
        "content": "## 注意"
      },
      "typeVersion": 1
    },
    {
      "id": "279c7fef-a0fa-40c6-84e0-3f47c64f61d0",
      "name": "设置字段 - URL 和 Webhook URL",
      "type": "n8n-nodes-base.set",
      "notes": "Configure URL, webhook Discord, and Scrapeless parameters",
      "position": [
        140,
        200
      ],
      "parameters": {
        "options": {}
      },
      "notesInFlow": true,
      "typeVersion": 3.4
    },
    {
      "id": "9f4ae239-db55-418a-9984-0b7291432484",
      "name": "Scrapeless 网络请求",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        600,
        260
      ],
      "parameters": {
        "url": "https://api.scrapeless.com/api/v1/unlocker/request",
        "method": "POST",
        "options": {},
        "jsonBody": "{\n  \"actor\": \"unlocker.webunlocker\",\n  \"proxy\": {\n    \"country\": \"ANY\"\n  },\n  \"input\": {\n    \"url\": \"https://news.ycombinator.com/\",\n    \"method\": \"GET\",\n    \"redirect\": true,\n    \"js_render\": true,\n    \"js_instructions\": [\n      {\n        \"wait\": 100\n      }\n    ],\n    \"block\": {\n      \"resources\": [\n        \"image\",\n        \"font\",\n        \"script\"\n      ]\n    }\n  }\n}",
        "sendBody": true,
        "sendHeaders": true,
        "specifyBody": "json",
        "headerParameters": {
          "parameters": [
            {
              "name": "x-api-token",
              "value": "scrapeless_api_key"
            }
          ]
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "d3592464-2890-4a78-ad00-1f2744c33cb3",
      "name": "便签1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1000,
        220
      ],
      "parameters": {
        "width": 299.4593773279841,
        "height": 275.17733400027635,
        "content": "## AI 数据格式化器"
      },
      "typeVersion": 1
    },
    {
      "id": "d1660d56-623b-4a13-b527-95f8304a7193",
      "name": "Sticky Note2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1820,
        640
      ],
      "parameters": {
        "color": 4,
        "width": 691.0849556663684,
        "height": 430.23565450317744,
        "content": "## 向量数据库持久化"
      },
      "typeVersion": 1
    },
    {
      "id": "e9cd437d-478a-40f4-9a27-df9f6ef84b3f",
      "name": "Sticky Note3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1840,
        160
      ],
      "parameters": {
        "color": 3,
        "width": 636.0351499864845,
        "height": 305.42311858115056,
        "content": "## Webhook Discord 处理器"
      },
      "typeVersion": 1
    },
    {
      "id": "d78741da-460d-4c27-9e9a-64be81c76513",
      "name": "便签4",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1040,
        680
      ],
      "parameters": {
        "color": 5,
        "width": 720,
        "height": 392.5761165830749,
        "content": "## 使用 Claude AI Agent 进行数据提取/格式化"
      },
      "typeVersion": 1
    },
    {
      "id": "4bde24dc-931f-40ef-9453-7978fd04fc1a",
      "name": "格式化 Claude 输出",
      "type": "n8n-nodes-base.code",
      "position": [
        1620,
        860
      ],
      "parameters": {
        "jsCode": "// Format Claude Output - Parse and structure Claude response\n// Second node: Formats Claude API response for Qdrant and workflow\n\nconst claudeResponse = items[0].json;\n\nif (claudeResponse.error) {\n  console.error('❌ Received error from Claude extractor:', claudeResponse.message);\n  return [{\n    json: {\n      id: Math.random().toString(36).substr(2, 9),\n      page_type: \"error\",\n      metadata: {\n        title: \"Extraction Error\",\n        description: `Error during extraction: ${claudeResponse.message}`,\n        url: \"Unknown\",\n        extracted_at: new Date().toISOString(),\n        error: true\n      },\n      content: {\n        main_text: `Processing failed: ${claudeResponse.message}`,\n        summary: \"Data extraction failed\"\n      },\n      vector_ready: false,\n      processing_error: claudeResponse\n    }\n  }];\n}\n\nlet extractedData = {};\n\ntry {\n  if (claudeResponse.content && Array.isArray(claudeResponse.content)) {\n    const responseText = claudeResponse.content[0].text;\n    console.log('🔍 Processing Claude response text...');\n    \n    const jsonMatch = responseText.match(/```json\\n([\\s\\S]*?)\\n```/) || responseText.match(/\\{[\\s\\S]*\\}/);\n    \n    if (jsonMatch) {\n      try {\n        extractedData = JSON.parse(jsonMatch[1] || jsonMatch[0]);\n        console.log('✅ Successfully parsed Claude JSON response');\n      } catch (parseError) {\n        console.error('❌ JSON parsing error:', parseError);\n        \n        extractedData = {\n          page_type: \"parse_error\",\n          metadata: {\n            title: \"JSON Parse Error\",\n            description: \"Failed to parse Claude response as JSON\",\n            url: \"Unknown\",\n            extracted_at: new Date().toISOString(),\n            parse_error: parseError.message\n          },\n          content: {\n            main_text: responseText,\n            summary: \"Raw Claude response (unparseable)\",\n            raw_response: responseText\n          }\n        };\n      }\n    } else {\n      console.warn('⚠️ No JSON structure found in Claude response');\n      \n      extractedData = {\n        page_type: \"unstructured\",\n        metadata: {\n          title: \"Unstructured Response\",\n          description: \"Claude response without JSON structure\",\n          url: \"Unknown\",\n          extracted_at: new Date().toISOString()\n        },\n        content: {\n          main_text: responseText,\n          summary: \"Unstructured content from Claude\",\n          raw_response: responseText\n        }\n      };\n    }\n  } else {\n    throw new Error('Unexpected Claude response format');\n  }\n\n  if (!extractedData.id) {\n    extractedData.id = Math.random().toString(36).substr(2, 9);\n  }\n\n  extractedData.technical_metadata = {\n    extraction_source: \"scrapeless\",\n    ai_processor: \"claude-3-7-sonnet-20250219\",\n    processing_timestamp: new Date().toISOString(),\n    workflow_version: \"n8n-v2\",\n    data_quality: extractedData.page_type !== \"error\" && extractedData.page_type !== \"parse_error\" ? \"high\" : \"low\"\n  };\n\n  extractedData.vector_ready = extractedData.content && extractedData.content.main_text ? true : false;\n\n  if (extractedData.content && extractedData.content.main_text) {\n    if (extractedData.content.main_text.length < 50) {\n      extractedData.technical_metadata.content_warning = \"Content too short for meaningful vectorization\";\n    }\n    \n    extractedData.searchable_content = [\n      extractedData.metadata?.title || '',\n      extractedData.metadata?.description || '',\n      extractedData.content.main_text || '',\n      extractedData.content.summary || '',\n      (extractedData.content.key_points || []).join(' '),\n      (extractedData.entities?.topics || []).join(' ')\n    ].filter(text => text.length > 0).join(' ');\n  }\n\n  console.log('✅ Format processing complete:', {\n    page_type: extractedData.page_type,\n    has_content: !!extractedData.content?.main_text,\n    vector_ready: extractedData.vector_ready,\n    id: extractedData.id\n  });\n\n  return [{ json: extractedData }];\n\n} catch (error) {\n  console.error('❌ Error during Claude response formatting:', error);\n  \n  return [{\n    json: {\n      id: Math.random().toString(36).substr(2, 9),\n      page_type: \"format_error\",\n      metadata: {\n        title: \"Formatting Error\",\n        description: `Error during response formatting: ${error.message}`,\n        url: \"Unknown\",\n        extracted_at: new Date().toISOString(),\n        error: true\n      },\n      content: {\n        main_text: `Formatting failed: ${error.message}`,\n        summary: \"Failed to format Claude response\"\n      },\n      technical_metadata: {\n        extraction_source: \"claude_formatter\",\n        error_details: error.message,\n        raw_claude_response: claudeResponse,\n        processing_timestamp: new Date().toISOString()\n      },\n      vector_ready: false\n    }\n  }];\n}"
      },
      "typeVersion": 2
    },
    {
      "id": "9b524862-ed1b-4601-bfa6-928fbebde0f9",
      "name": "检查集合是否存在",
      "type": "n8n-nodes-base.httpRequest",
      "onError": "continueRegularOutput",
      "position": [
        -420,
        20
      ],
      "parameters": {
        "url": "http://localhost:6333/collections/hacker-news",
        "options": {},
        "sendHeaders": true,
        "headerParameters": {
          "parameters": [
            {
              "name": "Content-Type",
              "value": "application/json"
            }
          ]
        }
      },
      "typeVersion": 4.2,
      "alwaysOutputData": true
    },
    {
      "id": "0c6d1977-4812-4cd9-aa0a-b5c7adeb7e16",
      "name": "集合存在检查",
      "type": "n8n-nodes-base.if",
      "position": [
        -240,
        20
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 1,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "64e5c63b-c488-44cc-9d26-2027e059c4b2",
              "operator": {
                "name": "filter.operator.equals",
                "type": "string",
                "operation": "equals"
              },
              "leftValue": "={{ $node['Check Collection Exists'].json.result ? $node['Check Collection Exists'].json.status : 'not_found' }}",
              "rightValue": "ok"
            }
          ]
        }
      },
      "typeVersion": 2
    },
    {
      "id": "22104741-3314-42fb-bc94-3a742af94245",
      "name": "创建 Qdrant 集合",
      "type": "n8n-nodes-base.httpRequest",
      "position": [
        0,
        0
      ],
      "parameters": {
        "url": "http://localhost:6333/collections/hacker-news",
        "method": "PUT",
        "options": {},
        "sendBody": true,
        "sendHeaders": true,
        "bodyParameters": {
          "parameters": [
            {}
          ]
        },
        "headerParameters": {
          "parameters": [
            {
              "name": "Content-Type",
              "value": "application/json"
            }
          ]
        }
      },
      "typeVersion": 4.2
    },
    {
      "id": "2b7c493b-cb8f-45e3-9167-159ec5f8aa8b",
      "name": "Scrapeless 配置信息",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        440,
        80
      ],
      "parameters": {
        "color": 6,
        "width": 441.35610553772244,
        "height": 368.2417530681812,
        "content": "## Scrapeless 配置"
      },
      "typeVersion": 1
    },
    {
      "id": "0431e4e1-d5fe-404b-8891-e8b4dc157d5f",
      "name": "Claude 数据提取器",
      "type": "n8n-nodes-base.code",
      "position": [
        1080,
        860
      ],
      "parameters": {
        "jsCode": "// Claude Data Extractor - Raw extraction from HTML\n// First node: Makes API call to Claude for content extraction\n\nconst inputData = items[0].json;\n\nlet htmlContent = '';\nif (inputData.data && inputData.data.html) {\n  htmlContent = inputData.data.html;\n} else if (inputData.data && inputData.data.content) {\n  htmlContent = inputData.data.content;\n} else if (inputData.content) {\n  htmlContent = inputData.content;\n} else {\n  htmlContent = JSON.stringify(inputData);\n}\n\nconst pageUrl = inputData.url || inputData.data?.url || 'Unknown URL';\n\nconst extractionPrompt = `You are an expert web content extractor. Analyze this HTML content and extract important information in a structured JSON format.\n\n**INSTRUCTIONS:**\n1. Identify the content type (article, e-commerce, blog, news, documentation, etc.)\n2. Extract relevant information based on the type\n3. Create structured and consistent JSON output\n4. Ignore technical HTML (menus, ads, footers, etc.)\n\n**REQUIRED OUTPUT FORMAT:**\n\\`\\`\\`json\n{\n  \"page_type\": \"article|product|blog|news|documentation|listing|other\",\n  \"metadata\": {\n    \"title\": \"Main page title\",\n    \"description\": \"Description or summary\",\n    \"url\": \"${pageUrl}\",\n    \"extracted_at\": \"${new Date().toISOString()}\",\n    \"language\": \"en|fr|es|...\",\n    \"author\": \"Author if available\",\n    \"date_published\": \"Date if available\",\n    \"tags\": [\"tag1\", \"tag2\"]\n  },\n  \"content\": {\n    \"main_text\": \"Main content extracted and cleaned\",\n    \"summary\": \"Summary in 2-3 sentences\",\n    \"key_points\": [\"Point 1\", \"Point 2\", \"Point 3\"],\n    \"sections\": [\n      {\n        \"title\": \"Section 1\",\n        \"content\": \"Section content\"\n      }\n    ]\n  },\n  \"structured_data\": {\n    // For e-commerce\n    \"price\": \"Price if product\",\n    \"currency\": \"EUR|USD|...\",\n    \"availability\": \"In stock/Out of stock\",\n    \"rating\": \"Rating if available\",\n    \n    // For articles/news\n    \"category\": \"Category\",\n    \"reading_time\": \"Estimated reading time\",\n    \n    // For all types\n    \"images\": [\"Image URL 1\", \"Image URL 2\"],\n    \"links\": [\n      {\"text\": \"Link text\", \"url\": \"Link URL\"}\n    ]\n  },\n  \"entities\": {\n    \"people\": [\"Names of people mentioned\"],\n    \"organizations\": [\"Organizations/companies\"],\n    \"locations\": [\"Places mentioned\"],\n    \"technologies\": [\"Technologies/tools mentioned\"],\n    \"topics\": [\"Main topics\"]\n  }\n}\n\\`\\`\\`\n\n**HTML TO ANALYZE:**\n${htmlContent.substring(0, 15000)} ${htmlContent.length > 15000 ? '...[TRUNCATED]' : ''}\n\nReturn ONLY the structured JSON, without additional explanations.`;\n\nconst claudePayload = {\n  model: \"claude-3-7-sonnet-20250219\",\n  max_tokens: 4096,\n  messages: [\n    {\n      role: \"user\",\n      content: extractionPrompt\n    }\n  ]\n};\n\ntry {\n  const options = {\n    method: 'POST',\n    url: 'https://api.anthropic.com/v1/messages',\n    headers: {\n      'x-api-key': 'YOUR-API-KEY',\n      'content-type': 'application/json'\n    },\n    body: claudePayload,\n    json: true\n  };\n\n  const claudeResponse = await this.helpers.request(options);\n  console.log('✅ Claude extraction call successful');\n  \n  return [{ json: claudeResponse }];\n\n} catch (error) {\n  console.error('❌ Error during Claude extraction:', error);\n  \n  return [{\n    json: {\n      error: true,\n      message: error.message,\n      original_data: inputData,\n      timestamp: new Date().toISOString()\n    }\n  }];\n}"
      },
      "typeVersion": 2
    },
    {
      "id": "b04dfca9-ebf0-46f7-b1e5-93ddf79e2451",
      "name": "Ollama 嵌入",
      "type": "n8n-nodes-base.code",
      "position": [
        1920,
        860
      ],
      "parameters": {
        "jsCode": "// Simple Ollama Embeddings\n// Gets text embeddings from Ollama using the all-minilm model (you can use other models)\n\nconst inputData = items[0].json;\n\nlet textToEmbed = '';\n\nif (inputData.content && typeof inputData.content === 'string') {\n  textToEmbed = inputData.content;\n} else if (inputData.content && inputData.content.main_text) {\n  textToEmbed = inputData.content.main_text;\n  \n  if (inputData.content.summary) {\n    textToEmbed += ' ' + inputData.content.summary;\n  }\n} else if (inputData.searchable_content) {\n  textToEmbed = inputData.searchable_content;\n} else if (inputData.metadata && inputData.metadata.title) {\n  textToEmbed = inputData.metadata.title;\n  if (inputData.metadata.description) {\n    textToEmbed += ' ' + inputData.metadata.description;\n  }\n} else {\n  textToEmbed = JSON.stringify(inputData).substring(0, 1000);\n}\n\ntextToEmbed = textToEmbed.substring(0, 2000);\n\ntry {\n  console.log('🔍 Getting embeddings for:', textToEmbed.substring(0, 100) + '...');\n  \n  const response = await this.helpers.request({\n    method: 'POST',\n    url: 'http://127.0.0.1:11434/api/embeddings',\n    headers: {\n      'Content-Type': 'application/json'\n    },\n    body: {\n      model: \"all-minilm\",\n      prompt: textToEmbed\n    },\n    json: true\n  });\n  \n  if (!response.embedding || !Array.isArray(response.embedding)) {\n    throw new Error('No valid embedding returned from Ollama');\n  }\n  \n  console.log(`✅ Got embedding with ${response.embedding.length} dimensions`);\n  \n  return [{\n    json: {\n      ...inputData,\n      vector: response.embedding,\n      vector_info: {\n        dimensions: response.embedding.length,\n        model: \"all-minilm\",\n        created_at: new Date().toISOString()\n      }\n    }\n  }];\n  \n} catch (error) {\n  console.error('❌ Error getting embeddings:', error);\n  \n  return [{\n    json: {\n      ...inputData,\n      error: true,\n      error_message: error.message,\n      error_type: 'embedding_failed',\n      error_time: new Date().toISOString()\n    }\n  }];\n}"
      },
      "typeVersion": 2
    },
    {
      "id": "17a38e65-1f04-4c2d-9fc7-fd05c2d7c14d",
      "name": "Qdrant 向量存储",
      "type": "n8n-nodes-base.code",
      "position": [
        2220,
        860
      ],
      "parameters": {
        "jsCode": "// Simple Qdrant Storage\n// Stores vectors in Qdrant\n\n// Get data with vector from Ollama\nconst inputData = items[0].json;\n\n// 1. Generate a valid Qdrant ID (must be integer)\nconst pointId = Math.floor(Math.random() * 1000000000);\n\n// 2. Extract basic metadata\nconst title = \n  (inputData.metadata && inputData.metadata.title) || \n  inputData.title || \n  'Untitled';\n\nconst url = \n  (inputData.metadata && inputData.metadata.url) || \n  inputData.url || \n  '';\n\n// 3. Check if we have a vector\nconst hasVector = inputData.vector && Array.isArray(inputData.vector) && inputData.vector.length > 0;\n\nif (!hasVector) {\n  console.error('❌ No valid vector found in input');\n  return [{\n    json: {\n      error: true,\n      message: 'No valid vector found',\n      id: pointId,\n      title: title\n    }\n  }];\n}\n\n// 4. Create Qdrant payload\nconst qdrantPayload = {\n  points: [\n    {\n      id: pointId,         \n      vector: inputData.vector,\n      payload: {\n        title: title,\n        url: url,\n        original_id: inputData.id || '',\n        \n        // Content\n        page_type: inputData.page_type || 'unknown',\n        content: typeof inputData.content === 'string' \n          ? inputData.content.substring(0, 1000) \n          : (inputData.content && inputData.content.main_text \n              ? inputData.content.main_text.substring(0, 1000) \n              : ''),\n        \n        author: (inputData.metadata && inputData.metadata.author) || '',\n        language: (inputData.metadata && inputData.metadata.language) || 'en',\n        tags: (inputData.metadata && inputData.metadata.tags) || [],\n        \n        vector_dimensions: inputData.vector.length,\n        stored_at: new Date().toISOString()\n      }\n    }\n  ]\n};\n\n// 5. Store in Qdrant\ntry {\n  console.log(`💾 Storing document \"${title}\" with ID ${pointId} in Qdrant`);\n  \n  const response = await this.helpers.request({\n    method: 'PUT',\n    url: 'http://127.0.0.1:6333/collections/hacker-news/points',\n    headers: {\n      'Content-Type': 'application/json'\n    },\n    body: qdrantPayload,\n    json: true\n  });\n  \n  console.log('✅ Successfully stored in Qdrant:', response);\n  \n  return [{\n    json: {\n      success: true,\n      id: pointId,\n      title: title,\n      vector_dimensions: inputData.vector.length,\n      qdrant_response: response,\n      timestamp: new Date().toISOString()\n    }\n  }];\n  \n} catch (error) {\n  console.error('❌ Error storing in Qdrant:', error);\n  \n  // Check if collection doesn't exist\n  if (error.message && (error.message.includes('404') || \n                         error.message.includes('collection not found'))) {\n    try {\n      // we already check if collection exist before but in case we verify it one more time\n      console.log('🔧 Creating collection \"hacker-news\"...');\n      \n      await this.helpers.request({\n        method: 'PUT',\n        url: 'http://127.0.0.1:6333/collections/hacker-news',\n        headers: {\n          'Content-Type': 'application/json'\n        },\n        body: {\n          vectors: {\n            size: inputData.vector.length,\n            distance: \"Cosine\"\n          }\n        },\n        json: true\n      });\n      \n      console.log('✅ Collection created, retrying storage...');\n      \n      const response = await this.helpers.request({\n        method: 'PUT',\n        url: 'http://127.0.0.1:6333/collections/hacker-news/points',\n        headers: {\n          'Content-Type': 'application/json'\n        },\n        body: qdrantPayload,\n        json: true\n      });\n      \n      return [{\n        json: {\n          success: true,\n          collection_created: true,\n          id: pointId,\n          title: title,\n          vector_dimensions: inputData.vector.length,\n          qdrant_response: response,\n          timestamp: new Date().toISOString()\n        }\n      }];\n      \n    } catch (retryError) {\n      console.error('❌ Error creating collection:', retryError);\n      \n      return [{\n        json: {\n          error: true,\n          message: 'Failed to create collection: ' + retryError.message,\n          id: pointId,\n          title: title\n        }\n      }];\n    }\n  }\n  \n  return [{\n    json: {\n      error: true,\n      message: error.message,\n      id: pointId,\n      title: title,\n      timestamp: new Date().toISOString()\n    }\n  }];\n}"
      },
      "typeVersion": 2
    },
    {
      "id": "c0939f66-cee8-44c2-9766-f33c1306dd45",
      "name": "Claude AI Agent",
      "type": "n8n-nodes-base.code",
      "position": [
        1360,
        920
      ],
      "parameters": {
        "jsCode": "// AI Agent - Enhanced Data Validation & Correction\n// Between Claude Data Extractor and Format Claude Output\n// Validates, enriches and corrects raw extraction\n\nconst claudeResponse = items[0].json;\n\nif (claudeResponse.error) {\n  console.log('⚠️ Received error from Claude Data Extractor, passing through...');\n  return [{ json: claudeResponse }];\n}\n\nlet extractedContent = '';\nif (claudeResponse.content && Array.isArray(claudeResponse.content)) {\n  extractedContent = claudeResponse.content[0].text;\n} else {\n  extractedContent = JSON.stringify(claudeResponse);\n}\n\nconst validationPrompt = `You are an AI data validator and enhancer. Analyze this raw extraction result and improve it.\n\n**ORIGINAL EXTRACTION RESULT:**\n${extractedContent}\n\n**YOUR TASKS:**\n1. **Validate the JSON Structure**: Ensure the extraction is valid JSON\n2. **Fix Parsing Errors**: Correct any malformed JSON or missing fields\n3. **Enhance Missing Data**: Fill in missing metadata when possible\n4. **Standardize Format**: Ensure consistent structure\n5. **Quality Check**: Verify content makes sense\n\n**VALIDATION & ENHANCEMENT RULES:**\n- If JSON is malformed, fix the syntax\n- If required fields are missing, add them with reasonable defaults\n- If content is too short, extract more from the raw data if available\n- If page_type is wrong, correct it based on content analysis\n- If dates are malformed, standardize them to ISO format\n- If URLs are partial, make them complete when possible\n\n**REQUIRED OUTPUT FORMAT:**\nReturn a VALID JSON object with this exact structure:\n\\`\\`\\`json\n{\n  \"page_type\": \"article|product|blog|news|documentation|listing|other\",\n  \"metadata\": {\n    \"title\": \"Actual page title (required)\",\n    \"description\": \"Actual description (required)\",\n    \"url\": \"Complete URL if available\",\n    \"extracted_at\": \"ISO timestamp\",\n    \"language\": \"en|fr|es|...\",\n    \"author\": \"Author name if found\",\n    \"date_published\": \"ISO date if found\",\n    \"tags\": [\"relevant\", \"tags\"]\n  },\n  \"content\": {\n    \"main_text\": \"Clean, readable main content (required)\",\n    \"summary\": \"2-3 sentence summary (required)\",\n    \"key_points\": [\"Important point 1\", \"Important point 2\"],\n    \"sections\": [\n      {\n        \"title\": \"Section title\",\n        \"content\": \"Section content\"\n      }\n    ]\n  },\n  \"structured_data\": {\n    \"price\": \"Product price if applicable\",\n    \"currency\": \"Currency code if applicable\", \n    \"availability\": \"Stock status if applicable\",\n    \"rating\": \"Rating if applicable\",\n    \"category\": \"Content category\",\n    \"reading_time\": \"Estimated reading time\",\n    \"images\": [\"Image URLs\"],\n    \"links\": [{\"text\": \"Link text\", \"url\": \"Link URL\"}]\n  },\n  \"entities\": {\n    \"people\": [\"Person names\"],\n    \"organizations\": [\"Company names\"],\n    \"locations\": [\"Place names\"],\n    \"technologies\": [\"Tech terms\"],\n    \"topics\": [\"Main topics\"]\n  },\n  \"validation_info\": {\n    \"original_valid\": true/false,\n    \"corrections_made\": [\"List of fixes applied\"],\n    \"confidence_score\": 0.0-1.0,\n    \"quality_issues\": [\"Any remaining issues\"]\n  }\n}\n\\`\\`\\`\n\n**IMPORTANT:**\n- Return ONLY the corrected JSON, no explanations\n- Ensure ALL required fields have meaningful values\n- Fix any syntax errors in the original\n- If original is completely invalid, create a reasonable structure from available data`;\n\nconst enhancementPayload = {\n  model: \"claude-3-7-sonnet-20250219\",\n  max_tokens: 4096,\n  messages: [\n    {\n      role: \"user\",\n      content: validationPrompt\n    }\n  ]\n};\n\ntry {\n  const options = {\n    method: 'POST',\n    url: 'https://api.anthropic.com/v1/messages',\n    headers: {\n      'x-api-key': 'YOUR-API-KEY',\n      'content-type': 'application/json'\n    },\n    body: enhancementPayload,\n    json: true\n  };\n\n  console.log('🔍 AI Agent validating and enhancing extraction...');\n  \n  const aiResponse = await this.helpers.request(options);\n  \n  if (aiResponse.content && Array.isArray(aiResponse.content)) {\n    const enhancedText = aiResponse.content[0].text;\n    \n    const jsonMatch = enhancedText.match(/```json\\n([\\s\\S]*?)\\n```/) || enhancedText.match(/\\{[\\s\\S]*\\}/);\n    \n    if (jsonMatch) {\n      try {\n        const enhancedData = JSON.parse(jsonMatch[1] || jsonMatch[0]);\n        \n        enhancedData.ai_processing = {\n          processed_by: \"claude-ai-agent\",\n          processing_timestamp: new Date().toISOString(),\n          original_extraction_valid: !claudeResponse.error,\n          enhancements_applied: true\n        };\n        \n        console.log('✅ AI Agent enhancement successful:', {\n          page_type: enhancedData.page_type,\n          title: enhancedData.metadata?.title?.substring(0, 50) + '...',\n          confidence: enhancedData.validation_info?.confidence_score || 'unknown',\n          corrections: enhancedData.validation_info?.corrections_made?.length || 0\n        });\n        \n        return [{\n          json: {\n            content: [\n              {\n                text: JSON.stringify(enhancedData, null, 2)\n              }\n            ],\n            model: \"claude-3-7-sonnet-ai-agent\",\n            usage: aiResponse.usage || {}\n          }\n        }];\n        \n      } catch (parseError) {\n        console.error('❌ Failed to parse AI Agent response:', parseError);\n        return [{ json: claudeResponse }];\n      }\n    } else {\n      console.warn('⚠️ No JSON found in AI Agent response');\n      return [{ json: claudeResponse }];\n    }\n  } else {\n    throw new Error('Invalid AI Agent response format');\n  }\n\n} catch (error) {\n  console.error('❌ AI Agent error:', error);\n  \n  return [{\n    json: {\n      ...claudeResponse,\n      ai_agent_error: true,\n      ai_agent_error_message: error.message,\n      ai_agent_timestamp: new Date().toISOString()\n    }\n  }];\n}"
      },
      "typeVersion": 2
    },
    {
      "id": "0cb93f10-3e59-4e38-bbc2-4bd7c809db27",
      "name": "用于结构化 AI agent 响应的 Webhook",
      "type": "n8n-nodes-base.code",
      "position": [
        2260,
        300
      ],
      "parameters": {
        "jsCode": "// Webhook Notification - Data Stored Success/Error\n\n// Get data from Qdrant Vector Store\nconst qdrantResult = items[0].json;\n\nconsole.log('📝 Qdrant result structure:', Object.keys(qdrantResult));\nconsole.log('📝 Full Qdrant result for debugging:', JSON.stringify(qdrantResult, null, 2).substring(0, 1000) + '...');\n\n// Configuration for webhooks - Add your URLs here\nconst webhooks = {\n  discord: \"\",\n  slack: \"\", \n  teams: \"\",\n  telegram: \"\",\n  custom: \"\"\n};\n\nlet isSuccess = false;\nlet errorDetails = {};\n\nif (qdrantResult.success === true) {\n  isSuccess = true;\n} else if (qdrantResult.qdrant_response && \n           qdrantResult.qdrant_response.status && \n           qdrantResult.qdrant_response.status.status === \"ok\") {\n  isSuccess = true;\n} else if (qdrantResult.status && qdrantResult.status.status === \"ok\") {\n  isSuccess = true;\n} else if (qdrantResult.qdrant_response && qdrantResult.qdrant_response.result) {\n  isSuccess = true;\n}\n\nif (!isSuccess) {\n  errorDetails = {\n    error_message: qdrantResult.message || qdrantResult.error_message || \"Unknown error\",\n    error_details: qdrantResult.error_details || {},\n    status_code: qdrantResult.status_code || qdrantResult.qdrant_response?.status_code,\n    raw_error: qdrantResult.error || qdrantResult.qdrant_response?.error || \"No specific error found\"\n  };\n  \n  console.log('❌ Detected error in Qdrant result:', errorDetails);\n}\n\nconst pointId = qdrantResult.point_info?.id || \n               (qdrantResult.qdrant_response?.result?.ids && qdrantResult.qdrant_response.result.ids[0]) || \n               qdrantResult.id ||\n               (isSuccess ? \"stored-but-no-id\" : \"not-stored\");\n\nconst itemTitle = qdrantResult.point_info?.title || \n                 qdrantResult.original_data?.title || \n                 qdrantResult.original_data?.metadata?.title ||\n                 qdrantResult.payload?.title ||\n                 qdrantResult.points?.[0]?.payload?.title ||\n                 (qdrantResult.points?.[0] ? \"Data without title\" : \"Untitled\");\n\nconst itemUrl = qdrantResult.original_data?.metadata?.url ||\n               qdrantResult.payload?.url ||\n               qdrantResult.points?.[0]?.payload?.url ||\n               qdrantResult.url ||\n               \"No URL available\";\n\nconst vectorDimensions = qdrantResult.point_info?.vector_dimensions || \n                        qdrantResult.vector?.length ||\n                        qdrantResult.points?.[0]?.vector?.length ||\n                        (qdrantResult.qdrant_response?.result?.vector_size) || \n                        \"unknown\";\n\nconst collectionName = qdrantResult.collection || \n                      (qdrantResult.qdrant_response?.collection_name) || \n                      \"hacker-news\";\n\nconst timestamp = new Date().toISOString();\nconst notificationData = {\n  status: isSuccess ? \"success\" : \"error\",\n  message: isSuccess \n    ? \"✅ Data successfully scraped and stored in vector database\" \n    : \"❌ Error storing data in vector database\",\n  details: {\n    id: pointId,\n    title: itemTitle?.substring(0, 100) + (itemTitle?.length > 100 ? \"...\" : \"\") || \"No title\",\n    url: itemUrl,\n    vector_size: vectorDimensions,\n    timestamp: timestamp,\n    collection: collectionName\n  },\n  error: !isSuccess ? errorDetails : undefined\n};\n\nfunction createMessageForPlatform(platform, data) {\n  switch (platform) {\n    case 'discord':\n      const fields = [\n        {\n          name: \"Item ID\",\n          value: data.details.id,\n          inline: true\n        },\n        {\n          name: \"Title\",\n          value: data.details.title || \"No title\",\n          inline: true\n        },\n        {\n          name: \"Collection\",\n          value: data.details.collection,\n          inline: true\n        },\n        {\n          name: \"Vector Size\",\n          value: `${data.details.vector_size} dimensions`,\n          inline: true\n        }\n      ];\n      \n      if (data.details.url && data.details.url !== \"No URL available\") {\n        fields.push({\n          name: \"URL\",\n          value: data.details.url,\n          inline: false\n        });\n      }\n      \n      if (data.error) {\n        fields.push({\n          name: \"Error Message\",\n          value: data.error.error_message || \"Unknown error\",\n          inline: false\n        });\n        \n        const errorDetailsStr = JSON.stringify(data.error.error_details, null, 2);\n        if (errorDetailsStr && errorDetailsStr !== \"{}\" && errorDetailsStr.length < 1000) {\n          fields.push({\n            name: \"Error Details\",\n            value: \"```json\\n\" + errorDetailsStr + \"\\n```\",\n            inline: false\n          });\n        }\n      }\n      \n      return {\n        embeds: [{\n          title: data.status === \"success\" ? \"✅ Vector Storage Success\" : \"❌ Vector Storage Error\",\n          description: data.message,\n          color: data.status === \"success\" ? 0x00ff00 : 0xff0000,\n          fields: fields,\n          timestamp: data.details.timestamp,\n          footer: {\n            text: \"n8n Workflow - Vector DB\"\n          }\n        }]\n      };\n      \n    case 'slack':\n      const blocks = [\n        {\n          type: \"section\",\n          text: {\n            type: \"mrkdwn\",\n            text: `*${data.status === \"success\" ? \"✅ Vector Storage Success\" : \"❌ Vector Storage Error\"}*\\n${data.message}`\n          }\n        },\n        {\n          type: \"section\",\n          fields: [\n            {\n              type: \"mrkdwn\",\n              text: `*ID:*\\n${data.details.id}`\n            },\n            {\n              type: \"mrkdwn\",\n              text: `*Title:*\\n${data.details.title}`\n            },\n            {\n              type: \"mrkdwn\",\n              text: `*Collection:*\\n${data.details.collection}`\n            },\n            {\n              type: \"mrkdwn\",\n              text: `*Vector:*\\n${data.details.vector_size} dimensions`\n            }\n          ]\n        }\n      ];\n      \n      if (data.details.url && data.details.url !== \"No URL available\") {\n        blocks.push({\n          type: \"section\",\n          text: {\n            type: \"mrkdwn\",\n            text: `*URL:*\\n${data.details.url}`\n          }\n        });\n      }\n      \n      if (data.error) {\n        blocks.push({\n          type: \"section\",\n          text: {\n            type: \"mrkdwn\",\n            text: `*Error:*\\n${data.error.error_message}`\n          }\n        });\n      }\n      \n      blocks.push({\n        type: \"context\",\n        elements: [\n          {\n            type: \"mrkdwn\",\n            text: `⏰ ${data.details.timestamp}`\n          }\n        ]\n      });\n      \n      return { blocks };\n      \n    case 'teams':\n      const facts = [\n        {\n          name: \"ID\",\n          value: data.details.id\n        },\n        {\n          name: \"Title\",\n          value: data.details.title\n        },\n        {\n          name: \"Collection\",\n          value: data.details.collection\n        },\n        {\n          name: \"Vector Size\",\n          value: `${data.details.vector_size} dimensions`\n        },\n        {\n          name: \"Timestamp\",\n          value: data.details.timestamp\n        }\n      ];\n      \n      if (data.details.url && data.details.url !== \"No URL available\") {\n        facts.push({\n          name: \"URL\",\n          value: data.details.url\n        });\n      }\n      \n      if (data.error) {\n        facts.push({\n          name: \"Error\",\n          value: data.error.error_message\n        });\n      }\n      \n      return {\n        \"@type\": \"MessageCard\",\n        \"@context\": \"http://schema.org/extensions\",\n        \"themeColor\": data.status === \"success\" ? \"00FF00\" : \"FF0000\",\n        \"summary\": data.message,\n        \"sections\": [{\n          \"activityTitle\": data.status === \"success\" ? \"✅ Vector Storage Success\" : \"❌ Vector Storage Error\",\n          \"activitySubtitle\": data.message,\n          \"facts\": facts\n        }]\n      };\n      \n    default:\n      return {\n        status: data.status,\n        message: data.message,\n        details: data.details,\n        error: data.error,\n        timestamp: data.details.timestamp\n      };\n  }\n}\n\nasync function sendToWebhook(platform, webhookUrl, data) {\n  if (!webhookUrl || webhookUrl.trim() === \"\") {\n    console.log(`⚠️ No webhook URL for ${platform} - skipping`);\n    return { skipped: true, platform };\n  }\n  \n  try {\n    const message = createMessageForPlatform(platform, data);\n    \n    const options = {\n      method: 'POST',\n      url: webhookUrl,\n      headers: {\n        'Content-Type': 'application/json'\n      },\n      body: message,\n      json: true\n    };\n    \n    const response = await this.helpers.request(options);\n    console.log(`✅ Sent notification to ${platform}`);\n    \n    return {\n      success: true,\n      platform,\n      response: response\n    };\n  } catch (error) {\n    console.error(`❌ Error sending to ${platform}:`, error);\n    \n    return {\n      error: true,\n      platform,\n      message: error.message\n    };\n  }\n}\n\nasync function sendAllNotifications() {\n  const results = [];\n  \n  for (const [platform, webhookUrl] of Object.entries(webhooks)) {\n    const result = await sendToWebhook(platform, webhookUrl, notificationData);\n    results.push(result);\n  }\n  \n  return results;\n}\n\ntry {\n  const notificationResults = await sendAllNotifications();\n  \n  console.log('✅ Notification summary:', {\n    total: notificationResults.length,\n    success: notificationResults.filter(r => r.success).length,\n    skipped: notificationResults.filter(r => r.skipped).length,\n    errors: notificationResults.filter(r => r.error).length\n  });\n  \n  return [{\n    json: {\n      original_qdrant_result: qdrantResult,\n      notification_results: notificationResults,\n      notification_data: notificationData,\n      is_success: isSuccess,\n      timestamp: new Date().toISOString()\n    }\n  }];\n  \n} catch (error) {\n  console.error('❌ Error in webhook notifications:', error);\n  \n  try {\n    const errorData = {\n      status: \"error\",\n      message: \"❌ Critical error in webhook notification\",\n      details: {\n        id: \"webhook-error\",\n        title: error.message,\n        url: \"N/A\",\n        vector_size: \"N/A\",\n        timestamp: new Date().toISOString(),\n        collection: \"N/A\"\n      },\n      error: {\n        error_message: error.message,\n        error_stack: error.stack\n      }\n    };\n    \n    if (webhooks.discord) {\n      const message = createMessageForPlatform('discord', errorData);\n      await this.helpers.request({\n        method: 'POST',\n        url: webhooks.discord,\n        headers: { 'Content-Type': 'application/json' },\n        body: message,\n        json: true\n      });\n    }\n  } catch (webhookError) {\n    console.error('💥 Critical error in error handler:', webhookError);\n  }\n  \n  return [{\n    json: {\n      error: true,\n      message: error.message,\n      original_data: qdrantResult\n    }\n  }];\n}"
      },
      "typeVersion": 2
    },
    {
      "id": "257f6f96-d02a-4fba-bd26-baf5aa3c3d89",
      "name": "导出数据 webhook",
      "type": "n8n-nodes-base.code",
      "position": [
        1900,
        320
      ],
      "parameters": {
        "jsCode": "const inputData = items[0].json;\n\nconst webhooks = {\n  discord: \"\",\n  slack: \"\",\n  linear: \"\",\n  teams: \"\",\n  telegram: \"\"\n};\n\nlet formattedData = {};\ntry {\n  if (inputData.content && Array.isArray(inputData.content)) {\n    const claudeText = inputData.content[0].text;\n    const jsonMatch = claudeText.match(/\\{[\\s\\S]*\\}/);\n    if (jsonMatch) {\n      formattedData = JSON.parse(jsonMatch[0]);\n    } else {\n      formattedData = { content: claudeText };\n    }\n  } else {\n    formattedData = inputData;\n  }\n} catch (parseError) {\n  console.error('Error parsing Claude response:', parseError);\n  formattedData = { \n    error: \"Parse error\", \n    raw_content: inputData \n  };\n}\n\nconst timestamp = new Date().toISOString().replace(/[:.]/g, '-');\nconst filename = `extracted-data-${timestamp}.txt`;\n\nconst fileContent = `🤖 EXTRACTED AND FORMATTED DATA\n=======================================\nTimestamp: ${new Date().toISOString()}\nSource: n8n Workflow (Scrapeless + Claude)\n=======================================\n\n📊 STRUCTURED DATA:\n${JSON.stringify(formattedData, null, 2)}\n\n=======================================\n🔍 RAW DATA (Debug):\n${JSON.stringify(inputData, null, 2)}\n=======================================`;\n\nasync function sendFileToWebhook(platform, webhookUrl, fileContent, filename) {\n  if (!webhookUrl || webhookUrl.trim() === \"\") {\n    console.log(`⚠️ ${platform} webhook URL empty - skipping`);\n    return { skipped: true, platform };\n  }\n  \n  try {\n    let formData;\n    let contentType;\n    \n    switch (platform) {\n      case 'discord':\n        formData = {\n          content: `🤖 **Extracted Data** - ${timestamp}`,\n          file: {\n            value: Buffer.from(fileContent, 'utf8'),\n            options: {\n              filename: filename,\n              contentType: 'text/plain'\n            }\n          }\n        };\n        contentType = 'multipart/form-data';\n        break;\n        \n      case 'slack':\n        const slackMessage = {\n          text: `🤖 Extracted Data - ${timestamp}`,\n          blocks: [\n            {\n              type: \"section\",\n              text: {\n                type: \"mrkdwn\",\n                text: \"*📊 Extracted and Formatted Data*\"\n              }\n            },\n            {\n              type: \"section\",\n              text: {\n                type: \"mrkdwn\",\n                text: `\\`\\`\\`${fileContent.substring(0, 2800)}\\`\\`\\``\n              }\n            }\n          ]\n        };\n        \n        const response = await this.helpers.request({\n          method: 'POST',\n          url: webhookUrl,\n          headers: { 'Content-Type': 'application/json' },\n          body: slackMessage,\n          json: true\n        });\n        \n        return { success: true, platform, response, method: 'json_message' };\n        \n      case 'telegram':\n        formData = {\n          document: {\n            value: Buffer.from(fileContent, 'utf8'),\n            options: {\n              filename: filename,\n              contentType: 'text/plain'\n            }\n          },\n          caption: `🤖 Extracted Data - ${timestamp}`\n        };\n        contentType = 'multipart/form-data';\n        break;\n        \n      default:\n        const jsonMessage = {\n          text: `🤖 Extracted Data - ${timestamp}`,\n          attachment: {\n            filename: filename,\n            content: fileContent\n          },\n          metadata: {\n            timestamp: timestamp,\n            platform: platform\n          }\n        };\n        \n        const jsonResponse = await this.helpers.request({\n          method: 'POST',\n          url: webhookUrl,\n          headers: { 'Content-Type': 'application/json' },\n          body: jsonMessage,\n          json: true\n        });\n        \n        return { success: true, platform, response: jsonResponse, method: 'json_fallback' };\n    }\n    \n    if (formData && contentType === 'multipart/form-data') {\n      const response = await this.helpers.request({\n        method: 'POST',\n        url: webhookUrl,\n        formData: formData,\n        headers: {}\n      });\n      \n      console.log(`✅ ${platform} file sent successfully`);\n      return { \n        success: true, \n        platform, \n        response: response,\n        method: 'file_upload',\n        filename: filename\n      };\n    }\n    \n  } catch (error) {\n    console.error(`❌ Error ${platform} webhook:`, error);\n    return { \n      error: true, \n      platform, \n      message: error.message || 'Unknown error'\n    };\n  }\n}\n\nconst results = [];\n\nfor (const [platform, webhookUrl] of Object.entries(webhooks)) {\n  const result = await sendFileToWebhook(platform, webhookUrl, fileContent, filename);\n  results.push(result);\n}\n\nreturn [{\n  json: {\n    webhook_results: results,\n    file_info: {\n      filename: filename,\n      size_bytes: Buffer.byteLength(fileContent, 'utf8'),\n      content_preview: fileContent.substring(0, 200) + '...'\n    },\n    formatted_data: formattedData,\n    timestamp: new Date().toISOString(),\n    summary: {\n      total_platforms: Object.keys(webhooks).length,\n      sent_successfully: results.filter(r => r.success).length,\n      skipped: results.filter(r => r.skipped).length,\n      errors: results.filter(r => r.error).length,\n      file_uploads: results.filter(r => r.method === 'file_upload').length,\n      json_messages: results.filter(r => r.method === 'json_message' || r.method === 'json_fallback').length\n    }\n  }\n}];"
      },
      "typeVersion": 2
    },
    {
      "id": "f704e1d8-2177-45f3-a34a-5e53b5fbe248",
      "name": "AI 数据检查器",
      "type": "n8n-nodes-base.code",
      "position": [
        1100,
        320
      ],
      "parameters": {
        "jsCode": "const inputData = items[0].json;\n\nlet htmlContent = '';\nif (inputData.data && inputData.data.html) {\n  htmlContent = inputData.data.html;\n} else if (inputData.data && inputData.data.content) {\n  htmlContent = inputData.data.content;\n} else if (inputData.content) {\n  htmlContent = inputData.content;\n} else if (inputData.data) {\n  htmlContent = JSON.stringify(inputData.data);\n} else {\n  htmlContent = JSON.stringify(inputData);\n}\n\nconst claudePayload = {\n  model: \"claude-3-7-sonnet-20250219\",\n  max_tokens: 4096,\n  messages: [\n    {\n      role: \"user\",\n      content: `Extract and format this HTML content into structured JSON. Focus on main articles, titles, and content. Return the data in this format:\n{\n  \"search_result\": {\n    \"title\": \"Page title or main heading\",\n    \"articles\": [\n      {\n        \"title\": \"Article title\",\n        \"content\": \"Article content/summary\",\n        \"url\": \"Article URL if available\"\n      }\n    ],\n    \"extracted_at\": \"${new Date().toISOString()}\"\n  }\n}\n\n\n\nHTML Content:\n${htmlContent}`\n    }\n  ]\n};\n\ntry {\n  const options = {\n    method: 'POST',\n    url: 'https://api.anthropic.com/v1/messages',\n    headers: {\n      'x-api-key': 'YOUR-API-KEY',\n      'content-type': 'application/json'\n    },\n    body: claudePayload,\n    json: true\n  };\n\n  const claudeResponse = await this.helpers.request(options);\n  \n  console.log('Claude Response:', JSON.stringify(claudeResponse, null, 2));\n  \n  return [{ json: claudeResponse }];\n  \n} catch (error) {\n  console.error('Error calling Claude API:', error);\n  \n  return [{\n    json: {\n      error: true,\n      message: error.message,\n      input_data: inputData\n    }\n  }];\n}"
      },
      "typeVersion": 2
    }
  ],
  "active": false,
  "pinData": {},
  "settings": {
    "executionOrder": "v1"
  },
  "versionId": "107aa993-f9c8-46a7-aafa-b75db5f66780",
  "connections": {
    "AI Data Checker": {
      "main": [
        [
          {
            "node": "Expot data webhook",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Claude AI Agent": {
      "main": [
        [
          {
            "node": "Format Claude Output",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Ollama Embeddings": {
      "main": [
        [
          {
            "node": "Qdrant Vector store",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Qdrant Vector store": {
      "main": [
        [
          {
            "node": "Webhook for structured AI agent response",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Format Claude Output": {
      "main": [
        [
          {
            "node": "Ollama Embeddings",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Claude Data extractor": {
      "main": [
        [
          {
            "node": "Claude AI Agent",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Scrapeless Web Request": {
      "main": [
        [
          {
            "node": "AI Data Checker",
            "type": "main",
            "index": 0
          },
          {
            "node": "Claude Data extractor",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Check Collection Exists": {
      "main": [
        [
          {
            "node": "Collection Exists Check",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Collection Exists Check": {
      "main": [
        [
          {
            "node": "Set Fields - URL and Webhook URL",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Create Qdrant Collection",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Create Qdrant Collection": {
      "main": [
        [
          {
            "node": "Set Fields - URL and Webhook URL",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "When clicking 'Test workflow'": {
      "main": [
        [
          {
            "node": "Check Collection Exists",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Set Fields - URL and Webhook URL": {
      "main": [
        [
          {
            "node": "Scrapeless Web Request",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}
常见问题
如何使用这个工作流？

复制上方的 JSON 配置代码，在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」，粘贴配置后根据需要修改凭证设置即可。
这个工作流适合什么场景？

高级 - 工程, 人工智能
需要付费吗？

本工作流完全免费，您可以直接导入使用。但请注意，工作流中使用的第三方服务（如 OpenAI API）可能需要您自行付费。
使用 n8n、Scrapeless 和 Claude 构建 AI 驱动的网络数据管道

使用的节点 (20)

分类

如何使用这个工作流？

这个工作流适合什么场景？

需要付费吗？

相关工作流推荐