OpenAI GPT-4o-mini を使用して音声認識を行う API
中級
これはDocument Extraction, Multimodal AI分野の自動化ワークフローで、10個のノードを含みます。主にSet, Webhook, HttpRequest, RespondToWebhookなどのノードを使用。 OpenAI GPT4o-miniを使用した音声認識API (speech-to-text)
前提条件
- •HTTP Webhookエンドポイント(n8nが自動生成)
- •ターゲットAPIの認証情報が必要な場合あり
ワークフロープレビュー
ノード接続関係を可視化、ズームとパンをサポート
ワークフローをエクスポート
以下のJSON設定をn8nにインポートして、このワークフローを使用できます
{
"nodes": [
{
"id": "6b7dd876-ed21-47f6-877b-d6c45f8bc9b3",
"name": "OpenAIで文字起こし",
"type": "n8n-nodes-base.httpRequest",
"position": [
560,
140
],
"parameters": {
"url": "https://api.openai.com/v1/audio/transcriptions",
"method": "POST",
"options": {},
"sendBody": true,
"contentType": "multipart-form-data",
"authentication": "predefinedCredentialType",
"bodyParameters": {
"parameters": [
{
"name": "file",
"parameterType": "formBinaryData",
"inputDataFieldName": "audio_file"
},
{
"name": "model",
"value": "gpt-4o-mini-transcribe"
}
]
},
"nodeCredentialType": "openAiApi"
},
"credentials": {
"openAiApi": {
"id": "dMiSy27YCK6c6rra",
"name": "Duv's OpenAI"
}
},
"typeVersion": 4.2
},
{
"id": "26543502-9e91-4d70-af12-df78ac5ba630",
"name": "文字起こし結果を抽出",
"type": "n8n-nodes-base.set",
"position": [
840,
140
],
"parameters": {
"options": {},
"assignments": {
"assignments": [
{
"id": "51b3d82e-6ef0-4b0b-86aa-33cf8203a24e",
"name": "Transcript",
"type": "string",
"value": "={{ $json.text }}"
}
]
}
},
"typeVersion": 3.4
},
{
"id": "d12de568-f2b8-4757-b45f-f79bc579ee36",
"name": "変換対象の音声を含むWebhook",
"type": "n8n-nodes-base.webhook",
"position": [
220,
140
],
"webhookId": "6a06f5e4-9105-4780-9840-9b7619a25647",
"parameters": {
"path": "audio-to-transcribe",
"options": {},
"httpMethod": "POST",
"responseMode": "responseNode"
},
"typeVersion": 2
},
{
"id": "e5de7e17-64a7-4466-a381-0dfb2e9d9711",
"name": "文字起こし結果でWebhookに応答",
"type": "n8n-nodes-base.respondToWebhook",
"position": [
1100,
140
],
"parameters": {
"options": {
"responseCode": 200
}
},
"typeVersion": 1.3
},
{
"id": "883dbfbe-7330-41e6-bc7e-6dda8385250c",
"name": "付箋",
"type": "n8n-nodes-base.stickyNote",
"position": [
120,
700
],
"parameters": {
"color": 4,
"width": 580,
"height": 120,
"content": "<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n <meta charset=\"UTF-8\">\n <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n <title>Transcribe Your Audio</title>\n <script src=\"https://cdn.tailwindcss.com\"></script>\n <style>\n body {\n font-family: sans-serif;\n background-color: #f0f4f8;\n display: flex;\n justify-content: center;\n align-items: center;\n min-height: 100vh;\n margin: 0;\n }\n .container {\n background-color: #ffffff;\n border-radius: 0.75rem; /* Equivalent to rounded-xl */\n box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06); /* Equivalent to shadow-md */\n padding: 2rem; /* Equivalent to p-8 */\n width: 100%;\n max-width: 28rem; /* Equivalent to max-w-sm */\n position: relative;\n }\n .loading-overlay {\n position: absolute;\n top: 0;\n left: 0;\n right: 0;\n bottom: 0;\n background: rgba(255, 255, 255, 0.8);\n backdrop-filter: blur(4px);\n display: flex;\n flex-direction: column;\n align-items: center;\n justify-content: center;\n border-radius: 0.75rem;\n z-index: 10;\n opacity: 0;\n visibility: hidden;\n transition: opacity 0.3s ease, visibility 0.3s ease;\n }\n .loading-overlay.visible {\n opacity: 1;\n visibility: visible;\n }\n .spinner {\n border: 4px solid rgba(0, 0, 0, 0.1);\n border-left-color: #000;\n border-radius: 50%;\n width: 32px;\n height: 32px;\n animation: spin 1s linear infinite;\n }\n @keyframes spin {\n to { transform: rotate(360deg); }\n }\n </style>\n</head>\n<body>\n <div class=\"container\">\n <h2 class=\"text-2xl font-bold text-center mb-6\">Transcribe Your Audio</h2>\n\n <!-- Audio Recording Section -->\n <div id=\"audio-section\" class=\"space-y-4\">\n <div class=\"flex flex-col items-center\">\n <button id=\"record-btn\" class=\"bg-blue-500 hover:bg-blue-600 text-white font-bold py-3 px-6 rounded-full text-lg mb-4 transition-colors\">\n Start Recording\n </button>\n <p id=\"recording-status\" class=\"text-gray-600 text-sm\">Press to record</p>\n <p id=\"timer\" class=\"text-xl font-mono text-gray-700 mt-2\">00:00</p>\n </div>\n\n <!-- Audio Playback -->\n <div id=\"audio-playback-container\" class=\"hidden flex flex-col items-center mt-4\">\n <audio id=\"audio-player\" controls class=\"w-full\"></audio>\n <div class=\"flex space-x-4 mt-4\">\n <button id=\"re-record-btn\" class=\"bg-gray-200 hover:bg-gray-300 text-gray-800 font-semibold py-2 px-4 rounded-md transition-colors\">\n Re-record\n </button>\n <button id=\"submit-audio-btn\" class=\"bg-green-500 hover:bg-green-600 text-white font-semibold py-2 px-4 rounded-md transition-colors\">\n Transcribe\n </button>\n </div>\n </div>\n </div>\n\n <!-- Transcript Display Section -->\n <div id=\"transcript-section\" class=\"hidden mt-6 space-y-4\">\n <label for=\"transcript-input\" class=\"block text-gray-700 font-semibold mb-1\">Your Transcript</label>\n <textarea id=\"transcript-input\" rows=\"6\" class=\"w-full p-3 border border-gray-300 rounded-md focus:ring-2 focus:ring-blue-500 focus:border-transparent resize-y\"></textarea>\n <button id=\"transcribe-another-btn\" class=\"w-full bg-blue-500 hover:bg-blue-600 text-white font-bold py-2 px-4 rounded-md transition-colors\">\n Transcribe Another Audio\n </button>\n </div>\n\n <!-- Loading Overlay -->\n <div id=\"loading-overlay\" class=\"loading-overlay\">\n <div class=\"spinner\"></div>\n <p id=\"loading-text\" class=\"mt-4 text-gray-700\">Processing...</p>\n </div>\n </div>\n\n <script>\n const recordBtn = document.getElementById('record-btn');\n const recordingStatus = document.getElementById('recording-status');\n const timerEl = document.getElementById('timer');\n const audioPlaybackContainer = document.getElementById('audio-playback-container');\n const audioPlayer = document.getElementById('audio-player');\n const reRecordBtn = document.getElementById('re-record-btn');\n const submitAudioBtn = document.getElementById('submit-audio-btn');\n const transcriptSection = document.getElementById('transcript-section');\n const transcriptInput = document.getElementById('transcript-input');\n const transcribeAnotherBtn = document.getElementById('transcribe-another-btn'); // Re-added\n const loadingOverlay = document.getElementById('loading-overlay');\n const loadingText = document.getElementById('loading-text');\n\n let mediaRecorder;\n let audioChunks = [];\n let recordedAudioBlob = null;\n let timerInterval;\n let seconds = 0;\n let isRecording = false;\n\n const WEBHOOK_URL = 'YOUR WEBHOOK URL';\n\n // --- Section Management ---\n const sections = {\n 'audio-section': document.getElementById('audio-section'),\n 'transcript-section': document.getElementById('transcript-section')\n };\n\n const showSection = (sectionId) => {\n for (const id in sections) {\n if (sections.hasOwnProperty(id)) {\n if (id === sectionId) {\n sections[id].classList.remove('hidden');\n } else {\n sections[id].classList.add('hidden');\n }\n }\n }\n };\n\n // --- Utility Functions ---\n const formatTime = (time) => {\n const minutes = Math.floor(time / 60).toString().padStart(2, '0');\n const seconds = Math.floor(time % 60).toString().padStart(2, '0');\n return `${minutes}:${seconds}`;\n };\n\n const showLoading = (message) => {\n loadingText.textContent = message;\n loadingOverlay.classList.add('visible');\n };\n\n const hideLoading = () => {\n loadingOverlay.classList.remove('visible');\n };\n\n const updateTimer = () => {\n seconds++;\n timerEl.textContent = formatTime(seconds);\n };\n\n const resetRecordingState = () => {\n isRecording = false;\n clearInterval(timerInterval);\n seconds = 0;\n timerEl.textContent = '00:00';\n recordBtn.textContent = 'Start Recording';\n recordBtn.classList.remove('bg-red-500', 'hover:bg-red-600');\n recordBtn.classList.add('bg-blue-500', 'hover:bg-blue-600');\n recordingStatus.textContent = 'Press to record';\n audioPlaybackContainer.classList.add('hidden');\n recordBtn.style.display = 'block';\n transcriptInput.value = '';\n };\n\n // --- Event Handlers ---\n recordBtn.addEventListener('click', async () => {\n if (isRecording) {\n mediaRecorder.stop();\n } else if (recordBtn.textContent === 'Record New Audio') {\n resetRecordingState();\n showSection('audio-section');\n } else {\n try {\n const stream = await navigator.mediaDevices.getUserMedia({ audio: true });\n mediaRecorder = new MediaRecorder(stream, { mimeType: 'audio/webm' });\n audioChunks = [];\n\n mediaRecorder.ondataavailable = (event) => {\n audioChunks.push(event.data);\n };\n\n mediaRecorder.onstop = () => {\n recordedAudioBlob = new Blob(audioChunks, { type: 'audio/webm' });\n const audioURL = URL.createObjectURL(recordedAudioBlob);\n audioPlayer.src = audioURL;\n stream.getTracks().forEach(track => track.stop());\n\n recordBtn.style.display = 'none';\n recordingStatus.textContent = 'Recording finished.';\n audioPlaybackContainer.classList.remove('hidden');\n clearInterval(timerInterval);\n };\n\n mediaRecorder.start();\n isRecording = true;\n recordBtn.textContent = 'Stop Recording';\n recordBtn.classList.remove('bg-blue-500', 'hover:bg-blue-600');\n recordBtn.classList.add('bg-red-500', 'hover:bg-red-600');\n recordingStatus.textContent = 'Recording...';\n seconds = 0;\n timerEl.textContent = '00:00';\n timerInterval = setInterval(updateTimer, 1000);\n } catch (error) {\n console.error('Error accessing microphone:', error);\n alert('Could not access microphone. Please allow access.'); // Using alert for simplicity as per previous instructions\n recordingStatus.textContent = 'Error: Microphone access denied.';\n }\n }\n });\n\n reRecordBtn.addEventListener('click', () => {\n resetRecordingState();\n showSection('audio-section');\n });\n\n submitAudioBtn.addEventListener('click', async () => {\n if (!recordedAudioBlob) {\n alert('Please record audio first.');\n return;\n }\n\n showLoading('Transcribing audio...');\n\n const formData = new FormData();\n formData.append('audio_file', recordedAudioBlob, 'audio.webm');\n\n try {\n const response = await fetch(WEBHOOK_URL, {\n method: 'POST',\n body: formData,\n });\n\n if (!response.ok) {\n throw new Error(`HTTP error! status: ${response.status}`);\n }\n\n const result = await response.json();\n console.log(\"Webhook response:\", result);\n\n transcriptInput.value = result.Transcript || 'No transcript received.';\n showSection('transcript-section');\n \n audioPlaybackContainer.classList.add('hidden');\n \n recordBtn.style.display = 'block'; \n recordBtn.textContent = 'Record New Audio'; \n recordBtn.classList.remove('bg-red-500', 'hover:bg-red-600');\n recordBtn.classList.add('bg-blue-500', 'hover:bg-blue-600');\n recordingStatus.textContent = 'Audio transcribed!';\n timerEl.textContent = '00:00';\n \n } catch (error) {\n console.error('Error sending audio to webhook:', error);\n alert(`Failed to transcribe audio: ${error.message}`);\n resetRecordingState();\n showSection('audio-section');\n } finally {\n hideLoading();\n }\n });\n\n // Event listener for the new \"Transcribe Another Audio\" button\n transcribeAnotherBtn.addEventListener('click', () => {\n resetRecordingState(); // Reset recording state\n showSection('audio-section'); // Go back to the audio recording section\n });\n\n // Initial setup when the page loads\n document.addEventListener('DOMContentLoaded', () => {\n showSection('audio-section');\n resetRecordingState(); // Ensure initial state is clean\n });\n </script>\n</body>\n</html>\n"
},
"typeVersion": 1
},
{
"id": "9c06f4c8-ae6e-43a9-9eda-a1452d81e17f",
"name": "付箋1",
"type": "n8n-nodes-base.stickyNote",
"position": [
-320,
-40
],
"parameters": {
"width": 380,
"height": 860,
"content": "## Speech Transcription API Endpoint\n\nThis workflow exposes a webhook that transcribes any audio file sent to it.\n\n**How to use**\n\n1. **Add credentials:** Select the **Transcribe with OpenAI** node and add your OpenAI API key.\n2. **Get your endpoint URL:**\n\n * Make sure the workflow is **Active**.\n\n * Click the **Webhook** node and copy the **Production URL**.\n\n3. **Connect the frontend:**\n\n * Find the sticky note labeled \"Example Frontend Code Below\". Copy the code from the note beneath it.\n\n * In the code, replace the `YOUR WEBHOOK URL` placeholder with the URL you copied in step 2.\n\n\nThe provided snippet below is a great starting point. Feel free to adapt it and build the interface you need!\n\n"
},
"typeVersion": 1
},
{
"id": "b92cc8ab-64c9-4b24-a222-aa542b4bb710",
"name": "付箋2",
"type": "n8n-nodes-base.stickyNote",
"position": [
120,
380
],
"parameters": {
"color": 4,
"width": 580,
"height": 300,
"content": "## Example Frontend Code Below**\nThe sticky note directly below contains a complete HTML file that creates a functional audio recording interface. It's a plug-and-play example to demonstrate how to call this webhook.\n\n**To use it:**\n\n1. Copy the entire code block from the note below.\n2. Save it as an `.html` file.\n3. **Remember to replace** the `YOUR WEBHOOK URL` placeholder inside the code with your actual URL from the Webhook node.\n4. Open the file in your browser to test."
},
"typeVersion": 1
},
{
"id": "883ba3ee-2a32-477f-8493-da931847a9cb",
"name": "付箋3",
"type": "n8n-nodes-base.stickyNote",
"position": [
120,
-40
],
"parameters": {
"color": 7,
"width": 280,
"height": 360,
"content": "## The webhook to call from your app\nPOST the audio as \"audio_file\" to this webhook to start the workflow."
},
"typeVersion": 1
},
{
"id": "40e86f5a-c472-4801-9235-3a2f8e3b0088",
"name": "付箋4",
"type": "n8n-nodes-base.stickyNote",
"position": [
460,
-40
],
"parameters": {
"color": 7,
"width": 280,
"height": 360,
"content": "## AI transcription with OpenAI GPT4o-mini transcribe"
},
"typeVersion": 1
},
{
"id": "4d9d11f7-ebfa-4277-bf41-9070b6d052b1",
"name": "付箋5",
"type": "n8n-nodes-base.stickyNote",
"position": [
1020,
-20
],
"parameters": {
"color": 7,
"width": 280,
"height": 340,
"content": "## Sending the transcript back to your app\nYour app should expect the key \"Transcript\" in the body of the webhook response."
},
"typeVersion": 1
}
],
"connections": {
"26543502-9e91-4d70-af12-df78ac5ba630": {
"main": [
[
{
"node": "e5de7e17-64a7-4466-a381-0dfb2e9d9711",
"type": "main",
"index": 0
}
]
]
},
"6b7dd876-ed21-47f6-877b-d6c45f8bc9b3": {
"main": [
[
{
"node": "26543502-9e91-4d70-af12-df78ac5ba630",
"type": "main",
"index": 0
}
]
]
},
"d12de568-f2b8-4757-b45f-f79bc579ee36": {
"main": [
[
{
"node": "6b7dd876-ed21-47f6-877b-d6c45f8bc9b3",
"type": "main",
"index": 0
}
]
]
}
}
}よくある質問
このワークフローの使い方は?
上記のJSON設定コードをコピーし、n8nインスタンスで新しいワークフローを作成して「JSONからインポート」を選択、設定を貼り付けて認証情報を必要に応じて変更してください。
このワークフローはどんな場面に適していますか?
中級 - 文書抽出, マルチモーダルAI
有料ですか?
このワークフローは完全無料です。ただし、ワークフローで使用するサードパーティサービス(OpenAI APIなど)は別途料金が発生する場合があります。
関連ワークフロー
段階のなドキュメント承認と監査ワークフロー
Supabase と Gmail を使ったマルチレベルワークフローのドキュメント approval の自動化
If
Set
Form
+
If
Set
Form
38 ノードAlok Kumar
文書抽出
Telegram経由でMistral OCRを使用して画像とPDFからMarkdown形式でテキストを抽出
Telegramを通じてMistral OCRで画像とPDFからMarkdownへのテキスト抽出
If
Set
Code
+
If
Set
Code
40 ノードRostislav
文書抽出
GPT-4 と Airtable を使用してワークフローを自動のに記録・バックアップ
GPT-4 と Airtable を使用してワーキ弗洛ーを自動のに記録しバックアップする
If
N8n
Set
+
If
N8n
Set
38 ノードGuillaume Duvernay
AI要約
LookioとOpenAI GPTを使用して知識源から事実に基づく記事を作成
Lookio と OpenAI GPT を使って、知识源から基礎のな記事を作成する
Set
Split Out
Aggregate
+
Set
Split Out
Aggregate
19 ノードGuillaume Duvernay
コンテンツ作成
Super RAGとGPT-5を使用して知識源から事実に基づく記事を作成
Super RAG と GPT-5 を使って、知识源から基礎のな記事を作成する
Set
Split Out
Aggregate
+
Set
Split Out
Aggregate
19 ノードGuillaume Duvernay
AI RAG検索拡張
Slack内で定期AI駆動データ要約をSuper Assistantで作成
Super Assistantを使ってSlack内で定期のなAI駆動のデータ サマリー作成
Set
Slack
Http Request
+
Set
Slack
Http Request
9 ノードGuillaume Duvernay
AI RAG検索拡張