OpenAI GPT-4o-mini とテキスト転換 speech で音声アシスタントインターフェースを作成
中級
これはSupport Chatbot, AI Chatbot分野の自動化ワークフローで、14個のノードを含みます。主にHtml, Webhook, Agent, OpenAi, RespondToWebhookなどのノードを使用。 OpenAI GPT-4o-miniとテキスト-to-スピーチを使って音声アシスタントインターフェースを作成
前提条件
- •HTTP Webhookエンドポイント(n8nが自動生成)
- •OpenAI API Key
カテゴリー
ワークフロープレビュー
ノード接続関係を可視化、ズームとパンをサポート
ワークフローをエクスポート
以下のJSON設定をn8nにインポートして、このワークフローを使用できます
{
"meta": {
"instanceId": "6052a1b29f061469e8139dae44556603650099c3365d7598798f132ae827fa1c"
},
"nodes": [
{
"id": "251e4d25-f04b-4861-b4fb-c9aa63654d2e",
"name": "音声インターフェースエンドポイント",
"type": "n8n-nodes-base.webhook",
"position": [
880,
180
],
"webhookId": "71ac230d-5949-41ba-b05e-761cb5cb07f3",
"parameters": {
"path": "voice-assistant",
"options": {},
"responseMode": "responseNode"
},
"typeVersion": 2
},
{
"id": "299de4f0-2bdd-46da-bfdb-35128a6240e0",
"name": "音声アシスタントUI",
"type": "n8n-nodes-base.html",
"position": [
1100,
180
],
"parameters": {
"html": "<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n <meta charset=\"UTF-8\">\n <title>AI Voice Assistant</title>\n <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n <style>\n body {\n background-color: #000;\n color: white;\n font-family: 'Segoe UI', sans-serif;\n display: flex;\n flex-direction: column;\n justify-content: center;\n align-items: center;\n height: 100vh;\n margin: 0;\n overflow: hidden;\n }\n\n .orb-container {\n position: relative;\n }\n\n .orb {\n width: 200px;\n height: 200px;\n background: radial-gradient(circle at 30% 30%, #00ffff, #004d4d);\n border-radius: 50%;\n box-shadow: 0 0 40px #00ffff88;\n cursor: pointer;\n display: flex;\n align-items: center;\n justify-content: center;\n z-index: 2; /* Ensures clickability */\n }\n\n .ring {\n position: absolute;\n top: -20px;\n left: -20px;\n width: 240px;\n height: 240px;\n border-radius: 50%;\n box-shadow: 0 0 30px #00ffff55;\n animation: pulse 2s infinite;\n z-index: 1;\n pointer-events: none; /* Prevents click interference */\n }\n \n .orb.listening {\n background: radial-gradient(circle at 30% 30%, #00ffcc, #005c5c);\n box-shadow: 0 0 50px #00ffff;\n }\n\n .orb.playing {\n background: radial-gradient(circle at 30% 30%, #ff00ff, #520052);\n box-shadow: 0 0 50px #ff00ff99;\n }\n\n .ring.listening {\n animation: vibrate 0.4s infinite;\n }\n\n .ring.playing {\n animation: glow 1s ease-in-out infinite alternate;\n }\n\n @keyframes pulse {\n 0%, 100% { transform: scale(1); opacity: 0.6; }\n 50% { transform: scale(1.2); opacity: 1; }\n }\n\n @keyframes vibrate {\n 0% { transform: scale(1.05); }\n 50% { transform: scale(1.15); }\n 100% { transform: scale(1.05); }\n }\n\n @keyframes glow {\n 0% { box-shadow: 0 0 40px #ff00ff88; }\n 100% { box-shadow: 0 0 60px #ff00ffcc; }\n }\n\n .status-text {\n margin-top: 20px;\n font-size: 18px;\n color: #aaa;\n font-weight: 300;\n font-style: italic;\n }\n </style>\n</head>\n<body>\n <div class=\"orb-container\">\n <div id=\"ring\" class=\"ring\"></div>\n <div id=\"orb\" class=\"orb\" title=\"Click to speak with AI\"></div>\n </div>\n <div id=\"status\" class=\"status-text\">Click to speak</div>\n\n <audio id=\"beep\" src=\"https://www.soundjay.com/button/sounds/button-3.mp3\" preload=\"auto\"></audio>\n\n <script>\n const orb = document.getElementById('orb');\n const ring = document.getElementById('ring');\n const statusText = document.getElementById('status');\n const beep = document.getElementById('beep');\n\n let recognition;\n let lastTranscript = \"\";\n let silenceTimeout;\n\n // Initialize speech recognition\n function startRecognition() {\n if (!('webkitSpeechRecognition' in window)) {\n alert('This browser does not support speech recognition.');\n return;\n }\n\n beep.play();\n recognition = new webkitSpeechRecognition();\n recognition.lang = 'en-US'; // Change to your preferred language\n recognition.interimResults = true;\n recognition.continuous = true;\n lastTranscript = \"\";\n\n recognition.onstart = () => {\n orb.classList.add('listening');\n ring.classList.add('listening');\n statusText.textContent = \"Listening...\";\n };\n\n recognition.onresult = (event) => {\n for (let i = event.resultIndex; i < event.results.length; ++i) {\n if (event.results[i].isFinal) {\n lastTranscript += event.results[i][0].transcript;\n }\n }\n\n // Reset silence timeout\n clearTimeout(silenceTimeout);\n silenceTimeout = setTimeout(stopRecognition, 1000);\n };\n\n recognition.onerror = (event) => {\n console.error('Error:', event.error);\n orb.classList.remove('listening');\n ring.classList.remove('listening');\n statusText.textContent = \"Error: \" + event.error;\n };\n\n recognition.onend = () => {\n orb.classList.remove('listening');\n ring.classList.remove('listening');\n };\n\n recognition.start();\n }\n\n // Stop recognition and send to webhook\n async function stopRecognition() {\n if (recognition) {\n recognition.stop();\n orb.classList.remove('listening');\n ring.classList.remove('listening');\n statusText.textContent = \"Processing...\";\n\n if (lastTranscript.trim() !== '') {\n try {\n // IMPORTANT: Replace YOUR_WEBHOOK_URL_HERE with the actual webhook URL from the 'Audio Processing Endpoint' node\n const response = await fetch(\"YOUR_WEBHOOK_URL_HERE\", {\n method: \"POST\",\n headers: { \"Content-Type\": \"application/json\" },\n body: JSON.stringify({ question: lastTranscript.trim() })\n });\n\n if (!response.ok) throw new Error(\"Server response error\");\n\n const blob = await response.blob();\n const audioURL = URL.createObjectURL(blob);\n const audio = new Audio(audioURL);\n\n audio.onplay = () => {\n orb.classList.add('playing');\n ring.classList.add('playing');\n statusText.textContent = \"Responding...\";\n };\n\n audio.onended = () => {\n orb.classList.remove('playing');\n ring.classList.remove('playing');\n statusText.textContent = \"Click to speak\";\n };\n\n audio.play();\n\n } catch (err) {\n console.error(\"Error sending or processing response:\", err);\n statusText.textContent = \"Error communicating with AI\";\n }\n }\n }\n }\n\n // Click handler for the orb\n orb.addEventListener('click', () => {\n if (recognition && recognition.running) {\n stopRecognition();\n } else {\n startRecognition();\n }\n });\n </script>\n</body>\n</html>\n"
},
"typeVersion": 1.2
},
{
"id": "cbfca6b1-5b62-414d-a71b-e5a6b03236f1",
"name": "HTMLインターフェース送信",
"type": "n8n-nodes-base.respondToWebhook",
"position": [
1300,
180
],
"parameters": {
"options": {},
"respondWith": "text",
"responseBody": "={{ $json.html }}"
},
"typeVersion": 1.1
},
{
"id": "028000ca-2232-4168-867b-88f53eab9760",
"name": "オーディオ処理エンドポイント",
"type": "n8n-nodes-base.webhook",
"position": [
720,
720
],
"webhookId": "287d40b1-4172-4ba0-9a1d-6d971dd9cf68",
"parameters": {
"path": "process-audio",
"options": {},
"httpMethod": "POST",
"responseMode": "responseNode"
},
"typeVersion": 2
},
{
"id": "d86e58ed-a0be-4853-a1dd-7d59bd6d2c1f",
"name": "ユーザークエリ処理",
"type": "@n8n/n8n-nodes-langchain.agent",
"position": [
940,
720
],
"parameters": {
"text": "={{ $json.body.question }}",
"options": {
"systemMessage": "You are a helpful AI assistant. Respond in a friendly and conversational manner."
},
"promptType": "define"
},
"typeVersion": 1.8
},
{
"id": "9f336bfd-1dfc-4d4a-9fad-74d3df57bf0c",
"name": "会話メモリ",
"type": "@n8n/n8n-nodes-langchain.memoryBufferWindow",
"position": [
1040,
920
],
"parameters": {
"sessionKey": "voice-assistant-session",
"sessionIdType": "customKey",
"contextWindowLength": 30
},
"typeVersion": 1.3
},
{
"id": "4e400995-440d-4a2b-927c-9e612a649fe8",
"name": "音声応答送信",
"type": "n8n-nodes-base.respondToWebhook",
"position": [
1520,
720
],
"parameters": {
"options": {},
"respondWith": "binary"
},
"typeVersion": 1.1
},
{
"id": "e468576e-22b6-44ef-9b0e-d2a95d72d2aa",
"name": "音声応答生成",
"type": "@n8n/n8n-nodes-langchain.openAi",
"disabled": true,
"position": [
1300,
720
],
"parameters": {
"input": "={{ $json.output }}",
"voice": "onyx",
"options": {},
"resource": "audio"
},
"typeVersion": 1.8
},
{
"id": "be60d217-d893-497f-87ff-a882ef11afc9",
"name": "付箋",
"type": "n8n-nodes-base.stickyNote",
"position": [
740,
-20
],
"parameters": {
"color": 5,
"width": 840,
"height": 420,
"content": "## VOICE ASSISTANT INTERFACE\n\nThis webhook serves the HTML interface with the interactive orb that users click to speak with the AI assistant.\n\nAccess this webhook URL in your browser to use the voice assistant."
},
"typeVersion": 1
},
{
"id": "12cca86e-0868-4569-b89c-7f0d638254d1",
"name": "付箋1",
"type": "n8n-nodes-base.stickyNote",
"position": [
620,
500
],
"parameters": {
"color": 3,
"width": 1100,
"height": 580,
"content": "## BACKEND PROCESSING\n\nThis section handles:\n1. Receiving transcribed speech from the frontend\n2. Processing through AI with conversation memory\n3. Converting response to speech\n4. Sending audio back to the browser"
},
"typeVersion": 1
},
{
"id": "e5a3196b-2865-4a62-b2d9-d755a67fcb38",
"name": "テンプレート説明",
"type": "n8n-nodes-base.stickyNote",
"position": [
-120,
-20
],
"parameters": {
"color": 6,
"width": 600,
"height": 1460,
"content": "## Voice Assistant Interface with n8n and OpenAI\n\nThis workflow creates a voice-activated AI assistant interface that runs directly in your browser. Users can click on a glowing orb to speak with the AI, which responds with voice using OpenAI's text-to-speech capabilities.\n\n## Who is it for?\n\nThis template is perfect for:\n- Developers looking to add voice interfaces to their applications\n- Customer service teams wanting to create voice-enabled support systems\n- Content creators building interactive voice experiences\n- Anyone interested in creating their own \"Alexa-like\" assistant\n\n## How it works\n\nThe workflow consists of two main parts:\n\n1. **Frontend Interface**: A beautiful animated orb that users click to activate voice recording\n2. **Backend Processing**: Receives the audio transcription, processes it through an AI agent with memory, and returns voice responses\n\nThe system uses:\n- Web Speech API for voice recognition (browser-based)\n- OpenAI GPT-4o-mini for intelligent responses\n- OpenAI Text-to-Speech for voice synthesis\n- Session memory to maintain conversation context\n\n## Setup requirements\n\n- n8n instance (self-hosted or cloud)\n- OpenAI API key with access to:\n - GPT-4o-mini model\n - Text-to-Speech API\n- Modern web browser with Web Speech API support (Chrome, Edge, Safari)\n\n## How to set up\n\n1. Import the workflow into your n8n instance\n2. Add your OpenAI credentials to both OpenAI nodes\n3. Copy the webhook URL from the \"Audio Processing Endpoint\" node\n4. Edit the \"Voice Assistant UI\" node and replace `YOUR_WEBHOOK_URL_HERE` with your webhook URL\n5. Access the \"Voice Interface Endpoint\" webhook URL in your browser\n6. Click the orb and start talking!\n\n## How to customize the workflow\n\n- **Change the AI personality**: Edit the system message in the \"Process User Query\" node\n- **Modify the visual style**: Customize the CSS in the \"Voice Assistant UI\" node\n- **Add more capabilities**: Connect additional tools to the AI Agent\n- **Change the voice**: Select a different voice in the \"Generate Voice Response\" node\n- **Adjust memory**: Modify the context window length in the \"Conversation Memory\" node\n\n## Demo\n\nWatch the template in action: https://youtu.be/0bMdJcRMnZY"
},
"typeVersion": 1
},
{
"id": "eba88594-9e7e-47b1-b1de-5e19e4607035",
"name": "セットアップ手順",
"type": "n8n-nodes-base.stickyNote",
"position": [
1820,
-40
],
"parameters": {
"color": 7,
"width": 400,
"height": 500,
"content": "## ⚙️ SETUP INSTRUCTIONS\n\n1. **Add OpenAI Credentials**:\n - Click on \"GPT-4o-mini Model\" node\n - Add your OpenAI API credentials\n - Do the same for \"Generate Voice Response\" node\n\n2. **Configure Webhook URL**:\n - Copy the webhook URL from \"Audio Processing Endpoint\"\n - Edit \"Voice Assistant UI\" node\n - Replace YOUR_WEBHOOK_URL_HERE with the copied URL\n\n3. **Test the Assistant**:\n - Open the \"Voice Interface Endpoint\" webhook URL in your browser\n - Click the glowing orb\n - Allow microphone permissions\n - Start speaking!"
},
"typeVersion": 1
},
{
"id": "8eb03b82-cc02-46d9-b9b6-873718202e32",
"name": "カスタマイズオプション",
"type": "n8n-nodes-base.stickyNote",
"position": [
1820,
560
],
"parameters": {
"color": 7,
"width": 400,
"height": 440,
"content": "## 🎨 CUSTOMIZATION OPTIONS\n\n**Language Support**:\n- Change `recognition.lang = 'en-US'` in the HTML\n- Options: 'pt-BR', 'es-ES', 'fr-FR', etc.\n\n**Voice Options**:\n- alloy: Neutral and balanced\n- echo: Warm and conversational\n- fable: Expressive and dynamic\n- onyx: Deep and authoritative\n- nova: Friendly and upbeat\n- shimmer: Soft and gentle\n\n**Visual Themes**:\n- Modify CSS colors for different moods\n- Adjust animation speeds\n- Change orb size and effects"
},
"typeVersion": 1
},
{
"id": "c7d2aac4-5cb2-405c-8a4f-0f1020d76eec",
"name": "GPT-4o-miniモデル",
"type": "@n8n/n8n-nodes-langchain.lmChatOpenAi",
"disabled": true,
"position": [
900,
920
],
"parameters": {
"model": {
"__rl": true,
"mode": "list",
"value": "gpt-4o-mini",
"cachedResultName": "gpt-4o-mini"
},
"options": {}
},
"typeVersion": 1.2
}
],
"pinData": {},
"connections": {
"c7d2aac4-5cb2-405c-8a4f-0f1020d76eec": {
"ai_languageModel": [
[
{
"node": "d86e58ed-a0be-4853-a1dd-7d59bd6d2c1f",
"type": "ai_languageModel",
"index": 0
}
]
]
},
"d86e58ed-a0be-4853-a1dd-7d59bd6d2c1f": {
"main": [
[
{
"node": "e468576e-22b6-44ef-9b0e-d2a95d72d2aa",
"type": "main",
"index": 0
}
]
]
},
"299de4f0-2bdd-46da-bfdb-35128a6240e0": {
"main": [
[
{
"node": "cbfca6b1-5b62-414d-a71b-e5a6b03236f1",
"type": "main",
"index": 0
}
]
]
},
"9f336bfd-1dfc-4d4a-9fad-74d3df57bf0c": {
"ai_memory": [
[
{
"node": "d86e58ed-a0be-4853-a1dd-7d59bd6d2c1f",
"type": "ai_memory",
"index": 0
}
]
]
},
"e468576e-22b6-44ef-9b0e-d2a95d72d2aa": {
"main": [
[
{
"node": "4e400995-440d-4a2b-927c-9e612a649fe8",
"type": "main",
"index": 0
}
]
]
},
"251e4d25-f04b-4861-b4fb-c9aa63654d2e": {
"main": [
[
{
"node": "299de4f0-2bdd-46da-bfdb-35128a6240e0",
"type": "main",
"index": 0
}
]
]
},
"028000ca-2232-4168-867b-88f53eab9760": {
"main": [
[
{
"node": "d86e58ed-a0be-4853-a1dd-7d59bd6d2c1f",
"type": "main",
"index": 0
}
]
]
}
}
}よくある質問
このワークフローの使い方は?
上記のJSON設定コードをコピーし、n8nインスタンスで新しいワークフローを作成して「JSONからインポート」を選択、設定を貼り付けて認証情報を必要に応じて変更してください。
このワークフローはどんな場面に適していますか?
中級 - サポートチャットボット, AIチャットボット
有料ですか?
このワークフローは完全無料です。ただし、ワークフローで使用するサードパーティサービス(OpenAI APIなど)は別途料金が発生する場合があります。
関連ワークフロー
AI駆動型Facebookページサポートチャットボット:自動返信とインテリジェントな顧客対応
GPT-4.1を使ったFacebook Messengerチャットボット:ヒューマンゲージ対応
If
Set
Wait
+
If
Set
Wait
32 ノードSpaGreen Creative
サポートチャットボット
AI駆動の複数ステップツール実行を備えたHVACスケジューリングエージェント
AIエージェント、Googleカレンダー、GmailによるHVACサービススケジューリングを自動化
Wait
Gmail
Webhook
+
Wait
Gmail
Webhook
30 ノードBhuvanesh R
サポートチャットボット
AIエージェント - 短縮URLジェネレーター
GPT-4.1 およびデータストアによる短縮 URL の生成と管理
Set
Code
Html
+
Set
Code
Html
18 ノードNghia Nguyen
その他
WhatsApp AI アシスタントとGoogleドキュメント、Gemini を使って顧客サポートを自動化
WhatsApp AIアシスタントとGoogleドキュメント、Geminiを使って顧客サポートを自動化
If
Code
Webhook
+
If
Code
Webhook
14 ノードJamot
サポートチャットボット
ワークフレーム4
専門アシスタントを備えた GPT-4 マルチエージェントチャットの洗練された Web インターフェース
Code
Switch
Webhook
+
Code
Switch
Webhook
24 ノードHugo
サポートチャットボット
J.A.R.V.I.S.
Telegram上でOpenAI、SERP、ベクターストアを使った包括のなマルチモーダルアシスタントの構築
If
Set
Code
+
If
Set
Code
48 ノードFabioInTech
サポートチャットボット