8
n8n 中文网amn8n.com

法律基准测试的简单评估

高级

这是一个Engineering, AI领域的自动化工作流,包含 19 个节点。主要使用 If, Wait, GoogleDrive, GoogleSheets, ManualTrigger 等节点,结合人工智能技术实现智能自动化。 法律基准测试的简单评估

前置要求
  • Google Drive API 凭证
  • Google Sheets API 凭证
工作流预览
可视化展示节点连接关系,支持缩放和平移
导出工作流
复制以下 JSON 配置到 n8n 导入,即可使用此工作流
{
  "meta": {
    "instanceId": "45e293393b5dd8437fb351e5b1ef5511ef67e6e0826a1c10b9b68be850b67593"
  },
  "nodes": [
    {
      "id": "5c25edcc-987c-4b60-b472-6c476dc866c6",
      "name": "当点击“测试工作流”时",
      "type": "n8n-nodes-base.manualTrigger",
      "position": [
        -580,
        440
      ],
      "parameters": {},
      "typeVersion": 1
    },
    {
      "id": "60521a1e-3f00-4dd6-9651-a5f9ae1659ab",
      "name": "遍历项目",
      "type": "n8n-nodes-base.splitInBatches",
      "position": [
        -20,
        -180
      ],
      "parameters": {
        "options": {
          "reset": false
        },
        "batchSize": "=1"
      },
      "typeVersion": 3
    },
    {
      "id": "8e0fd8c4-51a1-44fb-8524-034df4fd4fe5",
      "name": "等待",
      "type": "n8n-nodes-base.wait",
      "position": [
        1520,
        340
      ],
      "webhookId": "4e47e9cb-5e6d-4020-a2db-f60c04f2f727",
      "parameters": {
        "amount": 0.5
      },
      "typeVersion": 1.1
    },
    {
      "id": "0e457acc-5047-43ed-977f-f08714498ffc",
      "name": "OpenRouter 聊天模型",
      "type": "@n8n/n8n-nodes-langchain.lmChatOpenRouter",
      "position": [
        740,
        320
      ],
      "parameters": {
        "model": "openai/gpt-4.1",
        "options": {}
      },
      "credentials": {
        "openRouterApi": {
          "id": "ipzDVYsZqbum9bX4",
          "name": "OpenRouter account 2"
        }
      },
      "typeVersion": 1
    },
    {
      "id": "46ce90ac-6b58-4770-a6bd-48fccefc8713",
      "name": "从文件提取",
      "type": "n8n-nodes-base.extractFromFile",
      "position": [
        500,
        20
      ],
      "parameters": {
        "options": {},
        "operation": "pdf"
      },
      "typeVersion": 1
    },
    {
      "id": "9e76d679-6f4b-419b-af36-771aba5b9e98",
      "name": "Google Drive",
      "type": "n8n-nodes-base.googleDrive",
      "onError": "continueRegularOutput",
      "position": [
        120,
        20
      ],
      "parameters": {
        "fileId": {
          "__rl": true,
          "mode": "id",
          "value": "={{ $json[\"URL\"].match(/[-\\w]{25,}/)[0] }}"
        },
        "options": {},
        "operation": "download"
      },
      "credentials": {
        "googleDriveOAuth2Api": {
          "id": "yej6mV2w6RslwOGo",
          "name": "Google Drive account"
        }
      },
      "typeVersion": 3,
      "alwaysOutputData": true
    },
    {
      "id": "8f2fc75f-ea5d-4700-b58b-b5f5e3e360e5",
      "name": "获取测试",
      "type": "n8n-nodes-base.googleSheets",
      "position": [
        -420,
        440
      ],
      "parameters": {
        "options": {},
        "sheetName": {
          "__rl": true,
          "mode": "list",
          "value": "gid=0",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/10l_gMtPsge00eTTltGrgvAo54qhh3_twEDsETrQLAGU/edit#gid=0",
          "cachedResultName": "Tests"
        },
        "documentId": {
          "__rl": true,
          "mode": "list",
          "value": "10l_gMtPsge00eTTltGrgvAo54qhh3_twEDsETrQLAGU",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/10l_gMtPsge00eTTltGrgvAo54qhh3_twEDsETrQLAGU/edit?usp=drivesdk",
          "cachedResultName": "LLM Judge"
        }
      },
      "credentials": {
        "googleSheetsOAuth2Api": {
          "id": "04iXS2lwUVyzn6F2",
          "name": "Google Sheets account"
        }
      },
      "typeVersion": 4.5
    },
    {
      "id": "49b0cc43-30d3-4331-a608-4f51c9ac0efd",
      "name": "结构化输出解析器",
      "type": "@n8n/n8n-nodes-langchain.outputParserStructured",
      "position": [
        900,
        320
      ],
      "parameters": {
        "jsonSchemaExample": "{\n  \"reasoning\": \"The Assistant fabricated a $1 million figure and a 12-month provision that are not found in the source. This breaches factual correctness and completeness. The output would mislead business stakeholders if used without correction.\",\n  \"decision\": \"Fail\"\n}"
      },
      "typeVersion": 1.2
    },
    {
      "id": "cf3cce00-33c4-4bcd-a2a2-75d7e49439d3",
      "name": "更新结果",
      "type": "n8n-nodes-base.googleSheets",
      "position": [
        1220,
        140
      ],
      "parameters": {
        "columns": {
          "value": {
            "ID": "={{ $('Loop Over Items').item.json[\"ID\"] }}",
            "URL": "={{ $('Loop Over Items').item.json[\"URL\"] }}",
            "Input": "={{ $('Loop Over Items').item.json[\"Input\"] }}",
            "Output": "={{ $('Loop Over Items').item.json[\"Output\"] }}",
            "Decision": "={{ $json.output.decision }}",
            "Test No.": "={{ $('Loop Over Items').item.json[\"Test No.\"] }}",
            "Reasoning": "={{ $json.output.reasoning }}",
            "AI Platform": "={{ $('Loop Over Items').item.json[\"AI Platform\"] }}",
            "Relevant Source Reference": "={{ $('Loop Over Items').item.json[\"Relevant Source Reference\"] }}"
          },
          "schema": [
            {
              "id": "ID",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "ID",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "Test No.",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "Test No.",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "AI Platform",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "AI Platform",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "Relevant Source Reference",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "Relevant Source Reference",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "URL",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "URL",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "Input",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "Input",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "Output",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "Output",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "Decision",
              "type": "string",
              "display": true,
              "removed": false,
              "required": false,
              "displayName": "Decision",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            },
            {
              "id": "Reasoning",
              "type": "string",
              "display": true,
              "required": false,
              "displayName": "Reasoning",
              "defaultMatch": false,
              "canBeUsedToMatch": true
            }
          ],
          "mappingMode": "defineBelow",
          "matchingColumns": [],
          "attemptToConvertTypes": false,
          "convertFieldsToString": false
        },
        "options": {},
        "operation": "append",
        "sheetName": {
          "__rl": true,
          "mode": "list",
          "value": 537199982,
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/10l_gMtPsge00eTTltGrgvAo54qhh3_twEDsETrQLAGU/edit#gid=537199982",
          "cachedResultName": "Results"
        },
        "documentId": {
          "__rl": true,
          "mode": "list",
          "value": "10l_gMtPsge00eTTltGrgvAo54qhh3_twEDsETrQLAGU",
          "cachedResultUrl": "https://docs.google.com/spreadsheets/d/10l_gMtPsge00eTTltGrgvAo54qhh3_twEDsETrQLAGU/edit?usp=drivesdk",
          "cachedResultName": "Info Extraction Tasks (LLM Judge)"
        }
      },
      "credentials": {
        "googleSheetsOAuth2Api": {
          "id": "04iXS2lwUVyzn6F2",
          "name": "Google Sheets account"
        }
      },
      "typeVersion": 4.5
    },
    {
      "id": "80a9da50-93df-476a-a39f-ffece642a934",
      "name": "便签",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -540,
        220
      ],
      "parameters": {
        "color": 6,
        "width": 360,
        "height": 180,
        "content": "## 1. 获取测试用例"
      },
      "typeVersion": 1
    },
    {
      "id": "2bba0cc9-460d-48c3-bcc6-a79c2b00b23e",
      "name": "便签1",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -80,
        -400
      ],
      "parameters": {
        "color": 6,
        "width": 260,
        "height": 200,
        "content": "## 2. 遍历数据"
      },
      "typeVersion": 1
    },
    {
      "id": "7ccb6426-e79e-4cdc-ac5f-59c022f8978d",
      "name": "便签2",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        200,
        -180
      ],
      "parameters": {
        "color": 6,
        "width": 360,
        "content": "## 3. 将 PDF 作为文本抓取"
      },
      "typeVersion": 1
    },
    {
      "id": "6dc17e29-ea6c-460b-b0e0-99cc41977891",
      "name": "便签3",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        700,
        -180
      ],
      "parameters": {
        "color": 6,
        "width": 360,
        "height": 280,
        "content": "## 4. 评判 LLM 输出"
      },
      "typeVersion": 1
    },
    {
      "id": "1fd001a6-e67a-406c-b81c-c0a454ecd73e",
      "name": "LLM 评判",
      "type": "@n8n/n8n-nodes-langchain.chainLlm",
      "position": [
        740,
        140
      ],
      "parameters": {
        "text": "=INPUT:\n\n{\n  \"task\": {{ $('Loop Over Items').item.json['Input '] }},\n  \"source\": {{ $json.text }},\n  \"output\": {{ $('Loop Over Items').item.json['Output '] }}\n}\n\nOUTPUT:",
        "messages": {
          "messageValues": [
            {
              "message": "=You are an expert legal evaluator. Your role is to decide if each AI output for an information extraction task is fully accurate.\nInstructions:\nRead the extraction task and the source material carefully.\n\n\nExtract the correct answer for yourself, based only on the source material. (Do not include your answer in your output.)\n\n\nFor each AI output provided, do the following:\n\n\nCompare it against your own extraction.\n\n\nJudge “accuracy” as follows:\n\n\nPass: The output is complete (all required info present), faithful (statements fully supported by the source), and contains no hallucinated or omitted content.\n\n\nFail: Any omission, misstatement, unsupported addition, or distortion.\n\n\nIgnore differences in style, order, or formatting. Judge on factual substance only.\n\n\nFor each output, give:\n\n\nAccuracy: Pass or Fail\n\n\nReasoning: In 1-2 sentences, cite the specific content that was included, missed, or misstated. Be as objective and specific as possible.\n\n\nOutput Format:\n\nGive your answer in a JSON format. Don't include any other text than the JSON.\n\n{\n  \"decision\": \"Pass/Fail\",\n  \"reasoning\": \"[Concise explanation referencing the substance of the source]\"\n}\n\nGuidelines:\nUse your internal extraction for comparison, but do not include it in your output.\n\n\nFocus only on whether the output accurately and fully reflects the required information from the source.\n\n\nDo not consider writing style or format.\n\n\nJustify your decision with evidence from the source.\n\n\nExamples:\n\nAI Output 1:\n\n{\n  \"decision\": \"Pass\",\n  \"reasoning\": \"Correctly included the requirements for advance notice and refund as set out in the source, with no omissions.\"\n}\n\n\nAI Output 2:\n\n{\n  \"decision\": \"Fail\",\n  \"reasoning\": \"Missed the requirement that refunds are provided only as credit, not cash, making the output incomplete.\"\n}"
            }
          ]
        },
        "promptType": "define",
        "hasOutputParser": true
      },
      "typeVersion": 1.4
    },
    {
      "id": "2e94c252-9973-4915-ab21-838681e7ea5c",
      "name": "便签4",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1160,
        -80
      ],
      "parameters": {
        "color": 6,
        "width": 260,
        "height": 180,
        "content": "## 5. 更新结果"
      },
      "typeVersion": 1
    },
    {
      "id": "05d7a5c0-3ad5-4ec1-ae3f-f533e16f8027",
      "name": "便利贴5",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        1460,
        140
      ],
      "parameters": {
        "color": 6,
        "width": 220,
        "height": 180,
        "content": "## 6. 运行间暂停"
      },
      "typeVersion": 1
    },
    {
      "id": "099b10f3-c9cf-4fa0-9a07-ccb73ea11542",
      "name": "便签 6",
      "type": "n8n-nodes-base.stickyNote",
      "position": [
        -600,
        640
      ],
      "parameters": {
        "width": 460,
        "height": 280,
        "content": "## 数据格式"
      },
      "typeVersion": 1
    },
    {
      "id": "65532c6d-5638-432f-8f6a-a5a02b1aaf08",
      "name": "是 PDF 吗?",
      "type": "n8n-nodes-base.if",
      "position": [
        -260,
        440
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "loose"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "1609d1f6-2142-4965-8d6b-01cfa53251c4",
              "operator": {
                "type": "string",
                "operation": "contains"
              },
              "leftValue": "={{ $json['Relevant Source Reference'] }}",
              "rightValue": ".pdf"
            }
          ]
        },
        "looseTypeValidation": true
      },
      "typeVersion": 2.2
    },
    {
      "id": "17aa2c76-7ea7-41fe-84ed-4e485ef4db5e",
      "name": "是文件吗?",
      "type": "n8n-nodes-base.if",
      "position": [
        300,
        20
      ],
      "parameters": {
        "options": {},
        "conditions": {
          "options": {
            "version": 2,
            "leftValue": "",
            "caseSensitive": true,
            "typeValidation": "strict"
          },
          "combinator": "and",
          "conditions": [
            {
              "id": "ad7272d4-0840-4c71-bee4-64faf944e3ca",
              "operator": {
                "type": "object",
                "operation": "notEmpty",
                "singleValue": true
              },
              "leftValue": "={{ $binary }}",
              "rightValue": ""
            }
          ]
        }
      },
      "typeVersion": 2.2
    }
  ],
  "pinData": {},
  "connections": {
    "Wait": {
      "main": [
        [
          {
            "node": "Loop Over Items",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Is PDF?": {
      "main": [
        [
          {
            "node": "Loop Over Items",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Get Tests": {
      "main": [
        [
          {
            "node": "Is PDF?",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "LLM Judge": {
      "main": [
        [
          {
            "node": "Update Results",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Is a file?": {
      "main": [
        [
          {
            "node": "Extract from File",
            "type": "main",
            "index": 0
          }
        ],
        [
          {
            "node": "Loop Over Items",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Google Drive": {
      "main": [
        [
          {
            "node": "Is a file?",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Update Results": {
      "main": [
        [
          {
            "node": "Wait",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Loop Over Items": {
      "main": [
        [],
        [
          {
            "node": "Google Drive",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "Extract from File": {
      "main": [
        [
          {
            "node": "LLM Judge",
            "type": "main",
            "index": 0
          }
        ]
      ]
    },
    "OpenRouter Chat Model": {
      "ai_languageModel": [
        [
          {
            "node": "LLM Judge",
            "type": "ai_languageModel",
            "index": 0
          }
        ]
      ]
    },
    "Structured Output Parser": {
      "ai_outputParser": [
        [
          {
            "node": "LLM Judge",
            "type": "ai_outputParser",
            "index": 0
          }
        ]
      ]
    },
    "When clicking ‘Test workflow’": {
      "main": [
        [
          {
            "node": "Get Tests",
            "type": "main",
            "index": 0
          }
        ]
      ]
    }
  }
}
常见问题

如何使用这个工作流?

复制上方的 JSON 配置代码,在您的 n8n 实例中创建新工作流并选择「从 JSON 导入」,粘贴配置后根据需要修改凭证设置即可。

这个工作流适合什么场景?

高级 - 工程, 人工智能

需要付费吗?

本工作流完全免费,您可以直接导入使用。但请注意,工作流中使用的第三方服务(如 OpenAI API)可能需要您自行付费。

工作流信息
难度等级
高级
节点数量19
分类2
节点类型11
难度说明

适合高级用户,包含 16+ 个节点的复杂工作流

作者
Adam Janes

Adam Janes

@adamjanes

I am a product-minded technologist with hacker DNA building things in AI automation. I have a broad and varied background - having worked in Product, Design, and Sales - combined with deep technical experience as a Senior Developer and Fractional CTO. I am also a best-selling Udemy instructor (with 25K+ students), and founder of WOOFCODE - a free coding camp for fullstack developers. I practice non-violent communication, motivational interviewing, and Tibetan Buddhist meditation.

外部链接
在 n8n.io 查看

分享此工作流