feat(speech): enhance speech configuration and example integration

- Add comprehensive speech configuration in .env.example and app config - Update Docker speech Dockerfile for more flexible model handling - Create detailed README for speech-to-text examples - Implement example script demonstrating speech features - Improve speech service initialization and configuration management
2025-02-04 19:35:50 +01:00
parent 60f18f8e71
commit 3a6f79c9a8
14 changed files with 669 additions and 86 deletions
--- a/examples/README.md
+++ b/examples/README.md
@@ -0,0 +1,91 @@
+# Speech-to-Text Examples
+
+This directory contains examples demonstrating how to use the speech-to-text integration with wake word detection.
+
+## Prerequisites
+
+1. Make sure you have Docker installed and running
+2. Build and start the services:
+   ```bash
+   docker-compose up -d
+   ```
+
+## Running the Example
+
+1. Install dependencies:
+   ```bash
+   npm install
+   ```
+
+2. Run the example:
+   ```bash
+   npm run example:speech
+   ```
+
+   Or using `ts-node` directly:
+   ```bash
+   npx ts-node examples/speech-to-text-example.ts
+   ```
+
+## Features Demonstrated
+
+1. **Wake Word Detection**
+   - Listens for wake words: "hey jarvis", "ok google", "alexa"
+   - Automatically saves audio when wake word is detected
+   - Transcribes the detected speech
+
+2. **Manual Transcription**
+   - Example of how to transcribe audio files manually
+   - Supports different models and configurations
+
+3. **Event Handling**
+   - Wake word detection events
+   - Transcription results
+   - Progress updates
+   - Error handling
+
+## Example Output
+
+When a wake word is detected, you'll see output like this:
+
+```
+🎤 Wake word detected!
+  Timestamp: 20240203_123456
+  Audio file: /path/to/audio/wake_word_20240203_123456.wav
+  Metadata file: /path/to/audio/wake_word_20240203_123456.wav.json
+
+📝 Transcription result:
+  Full text: This is what was said after the wake word.
+
+  Segments:
+    1. [0.00s - 1.52s] (95.5% confidence)
+       "This is what was said"
+    2. [1.52s - 2.34s] (98.2% confidence)
+       "after the wake word."
+```
+
+## Customization
+
+You can customize the behavior by:
+
+1. Changing the wake word models in `docker/speech/Dockerfile`
+2. Modifying transcription options in the example file
+3. Adding your own event handlers
+4. Implementing different audio processing logic
+
+## Troubleshooting
+
+1. **Docker Issues**
+   - Make sure Docker is running
+   - Check container logs: `docker-compose logs fast-whisper`
+   - Verify container is up: `docker ps`
+
+2. **Audio Issues**
+   - Check audio device permissions
+   - Verify audio file format (WAV files recommended)
+   - Check audio file permissions
+
+3. **Performance Issues**
+   - Try using a smaller model (tiny.en or base.en)
+   - Adjust beam size and patience parameters
+   - Consider using GPU acceleration if available 
--- a/examples/speech-to-text-example.ts
+++ b/examples/speech-to-text-example.ts
@@ -0,0 +1,91 @@
+import { SpeechToText, TranscriptionResult, WakeWordEvent } from '../src/speech/speechToText';
+import path from 'path';
+
+async function main() {
+    // Initialize the speech-to-text service
+    const speech = new SpeechToText('fast-whisper');
+
+    // Check if the service is available
+    const isHealthy = await speech.checkHealth();
+    if (!isHealthy) {
+        console.error('Speech service is not available. Make sure Docker is running and the fast-whisper container is up.');
+        console.error('Run: docker-compose up -d');
+        process.exit(1);
+    }
+
+    console.log('Speech service is ready!');
+    console.log('Listening for wake words: "hey jarvis", "ok google", "alexa"');
+    console.log('Press Ctrl+C to exit');
+
+    // Set up event handlers
+    speech.on('wake_word', (event: WakeWordEvent) => {
+        console.log('\n🎤 Wake word detected!');
+        console.log('  Timestamp:', event.timestamp);
+        console.log('  Audio file:', event.audioFile);
+        console.log('  Metadata file:', event.metadataFile);
+    });
+
+    speech.on('transcription', (event: { audioFile: string; result: TranscriptionResult }) => {
+        console.log('\n📝 Transcription result:');
+        console.log('  Full text:', event.result.text);
+        console.log('\n  Segments:');
+        event.result.segments.forEach((segment, index) => {
+            console.log(`    ${index + 1}. [${segment.start.toFixed(2)}s - ${segment.end.toFixed(2)}s] (${(segment.confidence * 100).toFixed(1)}% confidence)`);
+            console.log(`       "${segment.text}"`);
+        });
+    });
+
+    speech.on('progress', (event: { type: string; data: string }) => {
+        if (event.type === 'stderr' && !event.data.includes('Loading model')) {
+            console.error('❌ Error:', event.data);
+        }
+    });
+
+    speech.on('error', (error: Error) => {
+        console.error('❌ Error:', error.message);
+    });
+
+    // Example of manual transcription
+    async function transcribeFile(filepath: string) {
+        try {
+            console.log(`\n🎯 Manually transcribing: ${filepath}`);
+            const result = await speech.transcribeAudio(filepath, {
+                model: 'base.en',  // You can change this to tiny.en, small.en, medium.en, or large-v2
+                language: 'en',
+                temperature: 0,
+                beamSize: 5
+            });
+
+            console.log('\n📝 Transcription result:');
+            console.log('  Text:', result.text);
+        } catch (error) {
+            console.error('❌ Transcription failed:', error instanceof Error ? error.message : error);
+        }
+    }
+
+    // Create audio directory if it doesn't exist
+    const audioDir = path.join(__dirname, '..', 'audio');
+    if (!require('fs').existsSync(audioDir)) {
+        require('fs').mkdirSync(audioDir, { recursive: true });
+    }
+
+    // Start wake word detection
+    speech.startWakeWordDetection(audioDir);
+
+    // Example: You can also manually transcribe files
+    // Uncomment the following line and replace with your audio file:
+    // await transcribeFile('/path/to/your/audio.wav');
+
+    // Keep the process running
+    process.on('SIGINT', () => {
+        console.log('\nStopping speech service...');
+        speech.stopWakeWordDetection();
+        process.exit(0);
+    });
+}
+
+// Run the example
+main().catch(error => {
+    console.error('Fatal error:', error);
+    process.exit(1);
+});