Usar OpenAI para evaluar la precisión de las respuestas RAG: métricas basadas en documentos
Este es unEngineering, AIflujo de automatización del dominio deautomatización que contiene 25 nodos.Utiliza principalmente nodos como Set, Evaluation, HttpRequest, ManualTrigger, Agent, combinando tecnología de inteligencia artificial para lograr automatización inteligente. Evaluar precisión de respuestas RAG con OpenAI: métricas basadas en documentos
- •Pueden requerirse credenciales de autenticación para la API de destino
- •Clave de API de OpenAI
Nodos utilizados (25)
Categoría
{
"meta": {
"instanceId": "408f9fb9940c3cb18ffdef0e0150fe342d6e655c3a9fac21f0f644e8bedabcd9",
"templateCredsSetupCompleted": true
},
"nodes": [
{
"id": "976ded9c-2080-484e-ad64-eb3c6e671961",
"name": "Al hacer clic en 'Ejecutar flujo de trabajo'",
"type": "n8n-nodes-base.manualTrigger",
"position": [
-760,
-320
],
"parameters": {},
"typeVersion": 1
},
{
"id": "348dfa43-be32-48a8-a9e1-a0641852517e",
"name": "Obtener hoja de datos",
"type": "n8n-nodes-base.httpRequest",
"position": [
-540,
-320
],
"parameters": {
"url": "https://bitcoin.org/bitcoin.pdf",
"options": {}
},
"typeVersion": 4.2
},
{
"id": "e4854cab-8bc9-4e63-911f-a9a4f5993855",
"name": "Almacén vectorial simple",
"type": "@n8n/n8n-nodes-langchain.vectorStoreInMemory",
"position": [
-300,
-320
],
"parameters": {
"mode": "insert",
"memoryKey": "evaluations_document_groundness",
"clearStore": true
},
"typeVersion": 1.1
},
{
"id": "f69492e2-c85a-451c-9e02-f5902f214da6",
"name": "Embeddings OpenAI",
"type": "@n8n/n8n-nodes-langchain.embeddingsOpenAi",
"position": [
-320,
-100
],
"parameters": {
"options": {}
},
"credentials": {
"openAiApi": {
"id": "8gccIjcuf3gvaoEr",
"name": "OpenAi account"
}
},
"typeVersion": 1.2
},
{
"id": "65131cfb-ac44-48fa-a648-033c0611e8ea",
"name": "Cargador de datos predeterminado",
"type": "@n8n/n8n-nodes-langchain.documentDefaultDataLoader",
"position": [
-200,
-100
],
"parameters": {
"options": {
"metadata": {
"metadataValues": [
{
"name": "origin",
"value": "evaluations"
}
]
}
},
"dataType": "binary"
},
"typeVersion": 1
},
{
"id": "0800667d-7b33-4c8b-bc61-d12ccf94a640",
"name": "Separador de texto recursivo por caracteres",
"type": "@n8n/n8n-nodes-langchain.textSplitterRecursiveCharacterTextSplitter",
"position": [
-120,
100
],
"parameters": {
"options": {}
},
"typeVersion": 1
},
{
"id": "04a8a558-07cf-41f4-85a9-37e79be90b52",
"name": "Agente de IA",
"type": "@n8n/n8n-nodes-langchain.agent",
"position": [
-300,
620
],
"parameters": {
"options": {
"systemMessage": "You are a helpful reference assistant who fully grounds their answer using retrieved knowledgebase document. It is important not to give any information from other sources as the goal is to be faithful to the contents of the knowledgebase.",
"returnIntermediateSteps": true
}
},
"typeVersion": 2
},
{
"id": "6f6961f7-73c3-4e16-942e-e2da096fec94",
"name": "Modelo de chat OpenAI",
"type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
"position": [
-320,
845
],
"parameters": {
"model": {
"__rl": true,
"mode": "list",
"value": "gpt-4o-mini"
},
"options": {}
},
"credentials": {
"openAiApi": {
"id": "8gccIjcuf3gvaoEr",
"name": "OpenAi account"
}
},
"typeVersion": 1.2
},
{
"id": "a063f080-9227-471f-8330-3da574e275d1",
"name": "Almacén vectorial simple1",
"type": "@n8n/n8n-nodes-langchain.vectorStoreInMemory",
"position": [
-200,
847.5
],
"parameters": {
"mode": "retrieve-as-tool",
"toolName": "bitcoin_whitepaper",
"memoryKey": "evaluations_document_groundness",
"toolDescription": "Call this tool to query over the bitcoin whitepaper to answer technical questions about bitcoin."
},
"typeVersion": 1.1
},
{
"id": "85584b59-0970-4bd9-ac96-f31a0046f20e",
"name": "Embeddings OpenAI1",
"type": "@n8n/n8n-nodes-langchain.embeddingsOpenAi",
"position": [
-120,
980
],
"parameters": {
"options": {}
},
"credentials": {
"openAiApi": {
"id": "8gccIjcuf3gvaoEr",
"name": "OpenAi account"
}
},
"typeVersion": 1.2
},
{
"id": "a99b3732-f8ac-45d3-a2f5-7a1177dc5742",
"name": "Al recibir mensaje de chat",
"type": "@n8n/n8n-nodes-langchain.chatTrigger",
"position": [
-540,
725
],
"webhookId": "638886a2-37cc-4edd-9b9f-95b09b255ffb",
"parameters": {
"options": {}
},
"typeVersion": 1.1
},
{
"id": "c21ee596-bc9c-4bb3-8cac-f243a2d5102b",
"name": "Evaluación",
"type": "n8n-nodes-base.evaluation",
"position": [
240,
620
],
"parameters": {
"operation": "checkIfEvaluating"
},
"typeVersion": 4.6
},
{
"id": "64104870-1796-4fcc-8c0d-cc4302de5167",
"name": "Sin operación, no hacer nada",
"type": "n8n-nodes-base.noOp",
"position": [
460,
720
],
"parameters": {},
"typeVersion": 1
},
{
"id": "91e0a42e-6afc-463b-8f25-7117b057159d",
"name": "Obtener documentos",
"type": "n8n-nodes-base.set",
"position": [
460,
480
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "a1e75813-76a9-49ba-a8c1-1f2cd62d647c",
"name": "output",
"type": "string",
"value": "={{ $json.output }}"
},
{
"id": "94f08596-6060-4fb6-9998-ac1b3f0e730d",
"name": "documents",
"type": "array",
"value": "={{\n$json.intermediateSteps\n .find(step => step.action.tool === 'bitcoin_whitepaper')\n .observation\n}}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "8fba17b7-c24b-4a26-a998-e3d2f7acc481",
"name": "Al obtener fila de conjunto de datos",
"type": "n8n-nodes-base.evaluationTrigger",
"position": [
-760,
525
],
"parameters": {
"sheetName": {
"__rl": true,
"mode": "list",
"value": 633230352,
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/1YOnu2JJjlxd787AuYcg-wKbkjyjyZFgASYVV0jsij5Y/edit#gid=633230352",
"cachedResultName": "RAG Document Groundedness"
},
"documentId": {
"__rl": true,
"mode": "list",
"value": "1YOnu2JJjlxd787AuYcg-wKbkjyjyZFgASYVV0jsij5Y",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/1YOnu2JJjlxd787AuYcg-wKbkjyjyZFgASYVV0jsij5Y/edit?usp=drivesdk",
"cachedResultName": "96. Evaluations Test"
}
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "XHvC7jIRR8A2TlUl",
"name": "Google Sheets account"
}
},
"typeVersion": 4.6
},
{
"id": "611a6e12-f52b-4b3c-b74c-238a4f75f612",
"name": "Reasignar entrada",
"type": "n8n-nodes-base.set",
"position": [
-540,
525
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "cc14c8c8-5a61-46db-b1c7-6d9ddfb2e4c8",
"name": "chatInput",
"type": "string",
"value": "={{ $json.input }}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "0c1e25b8-9145-443e-8c9f-34eb62aa7192",
"name": "Modelo de chat OpenAI1",
"type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
"position": [
660,
660
],
"parameters": {
"model": {
"__rl": true,
"mode": "list",
"value": "gpt-4.1-mini",
"cachedResultName": "gpt-4.1-mini"
},
"options": {}
},
"credentials": {
"openAiApi": {
"id": "8gccIjcuf3gvaoEr",
"name": "OpenAi account"
}
},
"typeVersion": 1.2
},
{
"id": "a9d8a911-9519-4497-9127-0a2e3da9ae1b",
"name": "Analizador de salida estructurada",
"type": "@n8n/n8n-nodes-langchain.outputParserStructured",
"position": [
860,
660
],
"parameters": {
"jsonSchemaExample": "{\n \"rating\": 1,\n \"reason\": \"The date of birth of Einstein is mentioned clearly in the context.\"\n}"
},
"typeVersion": 1.2
},
{
"id": "038f408d-51f0-4625-b39d-e326f24850b4",
"name": "Fundamentación documental",
"type": "@n8n/n8n-nodes-langchain.chainLlm",
"position": [
660,
480
],
"parameters": {
"text": "=# Document and AI-generated Response\n## Documents\n{{ $json.documents\n .map(doc => `* ${doc.text.parseJson().pageContent.replaceAll('\\n', ' ')}`)\n .join('\\n')\n}}\n\n## AI-generated Response\n{{ $('Get Documents').first().json.output }}",
"batching": {},
"messages": {
"messageValues": [
{
"message": "=# Instruction\nYou are an expert evaluator. Your task is to evaluate the quality of the responses generated by AI models.\nWe will provide you with the user input and an AI-generated responses.\nYou should first read the user input carefully for analyzing the task, and then evaluate the quality of the responses based on the criteria provided in the Evaluation section below.\nYou will assign the response a rating following the Rating Rubric and Evaluation Steps. Give step-by-step explanations for your rating, and only choose ratings from the Rating Rubric.\n\n# Evaluation\n## Metric Definition\nYou will be assessing groundedness, which measures the ability to provide or reference information included only in the user prompt.\n\n## Criteria\nGroundedness: The response contains information included only in the document. The response does not reference any outside information.\n\n## Rating Rubric\n1: (Fully grounded). All aspects of the response are attributable to the context.\n0: (Not fully grounded). The entire response or a portion of the response is not attributable to the context provided by the documents.\n\n## Evaluation Steps\nSTEP 1: Assess the response in aspects of Groundedness. Identify any information in the response not present in the documents and provide assessment according to the criterion. \nSTEP 2: Score based on the rating rubric. Give a brief rationale to explain your evaluation considering Groundedness."
}
]
},
"promptType": "define",
"hasOutputParser": true
},
"typeVersion": 1.7
},
{
"id": "e867ed3e-df63-40e2-a3f6-0d19bbace13a",
"name": "Establecer salidas",
"type": "n8n-nodes-base.evaluation",
"position": [
1020,
480
],
"parameters": {
"outputs": {
"values": [
{
"outputName": "output",
"outputValue": "={{ $('Get Documents').item.json.output }}"
},
{
"outputName": "documents",
"outputValue": "={{ $('Get Documents').item.json.documents.map(doc => doc.text).join('\\n') }}"
},
{
"outputName": "score",
"outputValue": "={{ $json.output.rating }}"
},
{
"outputName": "score_reason",
"outputValue": "={{ $json.output.reason }}"
}
]
},
"sheetName": {
"__rl": true,
"mode": "list",
"value": 633230352,
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/1YOnu2JJjlxd787AuYcg-wKbkjyjyZFgASYVV0jsij5Y/edit#gid=633230352",
"cachedResultName": "RAG Document Groundedness"
},
"documentId": {
"__rl": true,
"mode": "list",
"value": "1YOnu2JJjlxd787AuYcg-wKbkjyjyZFgASYVV0jsij5Y",
"cachedResultUrl": "https://docs.google.com/spreadsheets/d/1YOnu2JJjlxd787AuYcg-wKbkjyjyZFgASYVV0jsij5Y/edit?usp=drivesdk",
"cachedResultName": "96. Evaluations Test"
}
},
"credentials": {
"googleSheetsOAuth2Api": {
"id": "XHvC7jIRR8A2TlUl",
"name": "Google Sheets account"
}
},
"typeVersion": 4.6
},
{
"id": "7350d6c6-1a9d-4663-b509-de1c9d73588a",
"name": "Establecer métricas",
"type": "n8n-nodes-base.evaluation",
"position": [
1220,
480
],
"parameters": {
"metrics": {
"assignments": [
{
"id": "e3f04944-ab59-4e2f-83f8-6efa36816671",
"name": "score",
"type": "number",
"value": "={{ $json.output.rating }}"
}
]
},
"operation": "setMetrics"
},
"typeVersion": 4.6
},
{
"id": "e5313dae-6eff-4b71-83b1-0c89ff319662",
"name": "Nota adhesiva",
"type": "n8n-nodes-base.stickyNote",
"position": [
-840,
-500
],
"parameters": {
"color": 7,
"width": 960,
"height": 780,
"content": "## 1. Ready your RAG Vector Store\n[Read more about the Simple Vector Store](https://docs.n8n.io/integrations/builtin/cluster-nodes/root-nodes/n8n-nodes-langchain.vectorstoreinmemory/)\n\nFor this exercise, we'll use the Bitcoin Whitepaper as a source of documents for our evaluation."
},
"typeVersion": 1
},
{
"id": "36da4904-2256-4d4a-9a1d-17ad3af939d4",
"name": "Nota adhesiva1",
"type": "n8n-nodes-base.stickyNote",
"position": [
-840,
300
],
"parameters": {
"color": 7,
"width": 960,
"height": 900,
"content": "## 2. Setup Your AI Workflow to Use Evaluations\n[Learn more about the Evaluations Trigger](https://docs.n8n.io/integrations/builtin/?utm_source=n8n_app&utm_medium=node_settings_modal-credential_link&utm_campaign=n8n-nodes-base.evaluationTrigger)\n\nThe Evaluations Trigger is a separate execution which does not affect your production workflow in any way. It is manually triggered and automatically pulled datasets from the assigned Google Sheet."
},
"typeVersion": 1
},
{
"id": "d892b15f-126d-4fac-8458-167a9f4bf1b1",
"name": "Nota adhesiva2",
"type": "n8n-nodes-base.stickyNote",
"position": [
160,
300
],
"parameters": {
"color": 7,
"width": 1260,
"height": 640,
"content": "## 3. Document Groundedness: Is the AI response based on the retrieved documents?\n[Learn more about the Evaluations Node](https://docs.n8n.io/integrations/builtin/?utm_source=n8n_app&utm_medium=node_settings_modal-credential_link&utm_campaign=n8n-nodes-base.evaluation)\n\nFor this evaluation, we simply want to check if the Agent's answer was grounded in any of the documents retrieved from our vector store.\nA higher score represents a greater alignment between the retrieved information and the expected output, indicating that the retriever is effectively sourcing relevant and accurate content to aid the generator in producing contextually appropriate responses."
},
"typeVersion": 1
},
{
"id": "c500b17e-0c27-4121-9f89-40531b83f48e",
"name": "Nota adhesiva3",
"type": "n8n-nodes-base.stickyNote",
"position": [
-1280,
-500
],
"parameters": {
"width": 380,
"height": 880,
"content": "## Try It Out!\n### This n8n template demonstrates how to calculate the evaluation metric \"RAG document groundedness\" which in this scenario, measures the ability to provide or reference information included only in retrieved vector store documents.\n\nThe scoring approach is adapted from [https://cloud.google.com/vertex-ai/generative-ai/docs/models/metrics-templates#pointwise_groundedness](https://cloud.google.com/vertex-ai/generative-ai/docs/models/metrics-templates#pointwise_groundedness)\n\n### How it works\n* This evaluation works best for an agent that requires document retrieval from a vector store or similar source.\n* For our scoring, we need to collect the agent's response and the documents retrieved and use an LLM to assess if the former is based off the latter.\n* A key factor is to look out information in the response which is not mentioned in the documents.\n* A high score indicates LLM adherence and alignment whereas a low score could signal inadequate prompt or model hallucination.\n\n### Requirements\n* n8n version 1.94+\n* Check out this Google Sheet for a sample data [https://docs.google.com/spreadsheets/d/1YOnu2JJjlxd787AuYcg-wKbkjyjyZFgASYVV0jsij5Y/edit?usp=sharing](https://docs.google.com/spreadsheets/d/1YOnu2JJjlxd787AuYcg-wKbkjyjyZFgASYVV0jsij5Y/edit?usp=sharing)\n\n\n### Need Help?\nJoin the [Discord](https://discord.com/invite/XPKeKXeB7d) or ask in the [Forum](https://community.n8n.io/)!\n\nHappy Hacking!"
},
"typeVersion": 1
}
],
"pinData": {},
"connections": {
"04a8a558-07cf-41f4-85a9-37e79be90b52": {
"main": [
[
{
"node": "c21ee596-bc9c-4bb3-8cac-f243a2d5102b",
"type": "main",
"index": 0
}
]
]
},
"c21ee596-bc9c-4bb3-8cac-f243a2d5102b": {
"main": [
[
{
"node": "91e0a42e-6afc-463b-8f25-7117b057159d",
"type": "main",
"index": 0
}
],
[
{
"node": "64104870-1796-4fcc-8c0d-cc4302de5167",
"type": "main",
"index": 0
}
]
]
},
"611a6e12-f52b-4b3c-b74c-238a4f75f612": {
"main": [
[
{
"node": "04a8a558-07cf-41f4-85a9-37e79be90b52",
"type": "main",
"index": 0
}
]
]
},
"e867ed3e-df63-40e2-a3f6-0d19bbace13a": {
"main": [
[
{
"node": "7350d6c6-1a9d-4663-b509-de1c9d73588a",
"type": "main",
"index": 0
}
]
]
},
"348dfa43-be32-48a8-a9e1-a0641852517e": {
"main": [
[
{
"node": "e4854cab-8bc9-4e63-911f-a9a4f5993855",
"type": "main",
"index": 0
}
]
]
},
"91e0a42e-6afc-463b-8f25-7117b057159d": {
"main": [
[
{
"node": "038f408d-51f0-4625-b39d-e326f24850b4",
"type": "main",
"index": 0
}
]
]
},
"f69492e2-c85a-451c-9e02-f5902f214da6": {
"ai_embedding": [
[
{
"node": "e4854cab-8bc9-4e63-911f-a9a4f5993855",
"type": "ai_embedding",
"index": 0
}
]
]
},
"6f6961f7-73c3-4e16-942e-e2da096fec94": {
"ai_languageModel": [
[
{
"node": "04a8a558-07cf-41f4-85a9-37e79be90b52",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"038f408d-51f0-4625-b39d-e326f24850b4": {
"main": [
[
{
"node": "e867ed3e-df63-40e2-a3f6-0d19bbace13a",
"type": "main",
"index": 0
}
]
]
},
"85584b59-0970-4bd9-ac96-f31a0046f20e": {
"ai_embedding": [
[
{
"node": "a063f080-9227-471f-8330-3da574e275d1",
"type": "ai_embedding",
"index": 0
}
]
]
},
"0c1e25b8-9145-443e-8c9f-34eb62aa7192": {
"ai_languageModel": [
[
{
"node": "038f408d-51f0-4625-b39d-e326f24850b4",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"65131cfb-ac44-48fa-a648-033c0611e8ea": {
"ai_document": [
[
{
"node": "e4854cab-8bc9-4e63-911f-a9a4f5993855",
"type": "ai_document",
"index": 0
}
]
]
},
"e4854cab-8bc9-4e63-911f-a9a4f5993855": {
"main": [
[]
]
},
"a063f080-9227-471f-8330-3da574e275d1": {
"ai_tool": [
[
{
"node": "04a8a558-07cf-41f4-85a9-37e79be90b52",
"type": "ai_tool",
"index": 0
}
]
]
},
"a9d8a911-9519-4497-9127-0a2e3da9ae1b": {
"ai_outputParser": [
[
{
"node": "038f408d-51f0-4625-b39d-e326f24850b4",
"type": "ai_outputParser",
"index": 0
}
]
]
},
"a99b3732-f8ac-45d3-a2f5-7a1177dc5742": {
"main": [
[
{
"node": "04a8a558-07cf-41f4-85a9-37e79be90b52",
"type": "main",
"index": 0
}
]
]
},
"8fba17b7-c24b-4a26-a998-e3d2f7acc481": {
"main": [
[
{
"node": "611a6e12-f52b-4b3c-b74c-238a4f75f612",
"type": "main",
"index": 0
}
]
]
},
"0800667d-7b33-4c8b-bc61-d12ccf94a640": {
"ai_textSplitter": [
[
{
"node": "65131cfb-ac44-48fa-a648-033c0611e8ea",
"type": "ai_textSplitter",
"index": 0
}
]
]
},
"976ded9c-2080-484e-ad64-eb3c6e671961": {
"main": [
[
{
"node": "348dfa43-be32-48a8-a9e1-a0641852517e",
"type": "main",
"index": 0
}
]
]
}
}
}¿Cómo usar este flujo de trabajo?
Copie el código de configuración JSON de arriba, cree un nuevo flujo de trabajo en su instancia de n8n y seleccione "Importar desde JSON", pegue la configuración y luego modifique la configuración de credenciales según sea necesario.
¿En qué escenarios es adecuado este flujo de trabajo?
Avanzado - Ingeniería, Inteligencia Artificial
¿Es de pago?
Este flujo de trabajo es completamente gratuito, puede importarlo y usarlo directamente. Sin embargo, tenga en cuenta que los servicios de terceros utilizados en el flujo de trabajo (como la API de OpenAI) pueden requerir un pago por su cuenta.
Flujos de trabajo relacionados recomendados
Jimleuk
@jimleukFreelance consultant based in the UK specialising in AI-powered automations. I work with select clients tackling their most challenging projects. For business enquiries, send me an email at hello@jimle.uk LinkedIn: https://www.linkedin.com/in/jimleuk/ X/Twitter: https://x.com/jimle_uk
Compartir este flujo de trabajo