feat(speech): enhance speech configuration and example integration
- Add comprehensive speech configuration in .env.example and app config - Update Docker speech Dockerfile for more flexible model handling - Create detailed README for speech-to-text examples - Implement example script demonstrating speech features - Improve speech service initialization and configuration management
This commit is contained in:
91
examples/README.md
Normal file
91
examples/README.md
Normal file
@@ -0,0 +1,91 @@
|
||||
# Speech-to-Text Examples
|
||||
|
||||
This directory contains examples demonstrating how to use the speech-to-text integration with wake word detection.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
1. Make sure you have Docker installed and running
|
||||
2. Build and start the services:
|
||||
```bash
|
||||
docker-compose up -d
|
||||
```
|
||||
|
||||
## Running the Example
|
||||
|
||||
1. Install dependencies:
|
||||
```bash
|
||||
npm install
|
||||
```
|
||||
|
||||
2. Run the example:
|
||||
```bash
|
||||
npm run example:speech
|
||||
```
|
||||
|
||||
Or using `ts-node` directly:
|
||||
```bash
|
||||
npx ts-node examples/speech-to-text-example.ts
|
||||
```
|
||||
|
||||
## Features Demonstrated
|
||||
|
||||
1. **Wake Word Detection**
|
||||
- Listens for wake words: "hey jarvis", "ok google", "alexa"
|
||||
- Automatically saves audio when wake word is detected
|
||||
- Transcribes the detected speech
|
||||
|
||||
2. **Manual Transcription**
|
||||
- Example of how to transcribe audio files manually
|
||||
- Supports different models and configurations
|
||||
|
||||
3. **Event Handling**
|
||||
- Wake word detection events
|
||||
- Transcription results
|
||||
- Progress updates
|
||||
- Error handling
|
||||
|
||||
## Example Output
|
||||
|
||||
When a wake word is detected, you'll see output like this:
|
||||
|
||||
```
|
||||
🎤 Wake word detected!
|
||||
Timestamp: 20240203_123456
|
||||
Audio file: /path/to/audio/wake_word_20240203_123456.wav
|
||||
Metadata file: /path/to/audio/wake_word_20240203_123456.wav.json
|
||||
|
||||
📝 Transcription result:
|
||||
Full text: This is what was said after the wake word.
|
||||
|
||||
Segments:
|
||||
1. [0.00s - 1.52s] (95.5% confidence)
|
||||
"This is what was said"
|
||||
2. [1.52s - 2.34s] (98.2% confidence)
|
||||
"after the wake word."
|
||||
```
|
||||
|
||||
## Customization
|
||||
|
||||
You can customize the behavior by:
|
||||
|
||||
1. Changing the wake word models in `docker/speech/Dockerfile`
|
||||
2. Modifying transcription options in the example file
|
||||
3. Adding your own event handlers
|
||||
4. Implementing different audio processing logic
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
1. **Docker Issues**
|
||||
- Make sure Docker is running
|
||||
- Check container logs: `docker-compose logs fast-whisper`
|
||||
- Verify container is up: `docker ps`
|
||||
|
||||
2. **Audio Issues**
|
||||
- Check audio device permissions
|
||||
- Verify audio file format (WAV files recommended)
|
||||
- Check audio file permissions
|
||||
|
||||
3. **Performance Issues**
|
||||
- Try using a smaller model (tiny.en or base.en)
|
||||
- Adjust beam size and patience parameters
|
||||
- Consider using GPU acceleration if available
|
||||
91
examples/speech-to-text-example.ts
Normal file
91
examples/speech-to-text-example.ts
Normal file
@@ -0,0 +1,91 @@
|
||||
import { SpeechToText, TranscriptionResult, WakeWordEvent } from '../src/speech/speechToText';
|
||||
import path from 'path';
|
||||
|
||||
async function main() {
|
||||
// Initialize the speech-to-text service
|
||||
const speech = new SpeechToText('fast-whisper');
|
||||
|
||||
// Check if the service is available
|
||||
const isHealthy = await speech.checkHealth();
|
||||
if (!isHealthy) {
|
||||
console.error('Speech service is not available. Make sure Docker is running and the fast-whisper container is up.');
|
||||
console.error('Run: docker-compose up -d');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log('Speech service is ready!');
|
||||
console.log('Listening for wake words: "hey jarvis", "ok google", "alexa"');
|
||||
console.log('Press Ctrl+C to exit');
|
||||
|
||||
// Set up event handlers
|
||||
speech.on('wake_word', (event: WakeWordEvent) => {
|
||||
console.log('\n🎤 Wake word detected!');
|
||||
console.log(' Timestamp:', event.timestamp);
|
||||
console.log(' Audio file:', event.audioFile);
|
||||
console.log(' Metadata file:', event.metadataFile);
|
||||
});
|
||||
|
||||
speech.on('transcription', (event: { audioFile: string; result: TranscriptionResult }) => {
|
||||
console.log('\n📝 Transcription result:');
|
||||
console.log(' Full text:', event.result.text);
|
||||
console.log('\n Segments:');
|
||||
event.result.segments.forEach((segment, index) => {
|
||||
console.log(` ${index + 1}. [${segment.start.toFixed(2)}s - ${segment.end.toFixed(2)}s] (${(segment.confidence * 100).toFixed(1)}% confidence)`);
|
||||
console.log(` "${segment.text}"`);
|
||||
});
|
||||
});
|
||||
|
||||
speech.on('progress', (event: { type: string; data: string }) => {
|
||||
if (event.type === 'stderr' && !event.data.includes('Loading model')) {
|
||||
console.error('❌ Error:', event.data);
|
||||
}
|
||||
});
|
||||
|
||||
speech.on('error', (error: Error) => {
|
||||
console.error('❌ Error:', error.message);
|
||||
});
|
||||
|
||||
// Example of manual transcription
|
||||
async function transcribeFile(filepath: string) {
|
||||
try {
|
||||
console.log(`\n🎯 Manually transcribing: ${filepath}`);
|
||||
const result = await speech.transcribeAudio(filepath, {
|
||||
model: 'base.en', // You can change this to tiny.en, small.en, medium.en, or large-v2
|
||||
language: 'en',
|
||||
temperature: 0,
|
||||
beamSize: 5
|
||||
});
|
||||
|
||||
console.log('\n📝 Transcription result:');
|
||||
console.log(' Text:', result.text);
|
||||
} catch (error) {
|
||||
console.error('❌ Transcription failed:', error instanceof Error ? error.message : error);
|
||||
}
|
||||
}
|
||||
|
||||
// Create audio directory if it doesn't exist
|
||||
const audioDir = path.join(__dirname, '..', 'audio');
|
||||
if (!require('fs').existsSync(audioDir)) {
|
||||
require('fs').mkdirSync(audioDir, { recursive: true });
|
||||
}
|
||||
|
||||
// Start wake word detection
|
||||
speech.startWakeWordDetection(audioDir);
|
||||
|
||||
// Example: You can also manually transcribe files
|
||||
// Uncomment the following line and replace with your audio file:
|
||||
// await transcribeFile('/path/to/your/audio.wav');
|
||||
|
||||
// Keep the process running
|
||||
process.on('SIGINT', () => {
|
||||
console.log('\nStopping speech service...');
|
||||
speech.stopWakeWordDetection();
|
||||
process.exit(0);
|
||||
});
|
||||
}
|
||||
|
||||
// Run the example
|
||||
main().catch(error => {
|
||||
console.error('Fatal error:', error);
|
||||
process.exit(1);
|
||||
});
|
||||
Reference in New Issue
Block a user