feat: Enhance speech-to-text example with live microphone transcription

- Add live microphone recording and transcription functionality
- Implement audio buffer processing with 5-second intervals
- Update SpeechToText initialization with more flexible configuration
- Add TypeScript type definitions for node-record-lpcm16
- Improve error handling and process management for audio recording
This commit is contained in:
jango-blockchained
2025-02-06 12:55:15 +01:00
parent 9d125a87d9
commit 9d50395dc5
2 changed files with 71 additions and 13 deletions

View File

@@ -1,9 +1,15 @@
import { SpeechToText, TranscriptionResult, WakeWordEvent } from '../src/speech/speechToText'; import { SpeechToText, TranscriptionResult, WakeWordEvent } from '../src/speech/speechToText';
import path from 'path'; import path from 'path';
import recorder from 'node-record-lpcm16';
import { Writable } from 'stream';
async function main() { async function main() {
// Initialize the speech-to-text service // Initialize the speech-to-text service
const speech = new SpeechToText('fast-whisper'); const speech = new SpeechToText({
modelPath: 'base.en',
modelType: 'whisper',
containerName: 'fast-whisper'
});
// Check if the service is available // Check if the service is available
const isHealthy = await speech.checkHealth(); const isHealthy = await speech.checkHealth();
@@ -45,12 +51,51 @@ async function main() {
console.error('❌ Error:', error.message); console.error('❌ Error:', error.message);
}); });
// Create audio directory if it doesn't exist
const audioDir = path.join(__dirname, '..', 'audio');
if (!require('fs').existsSync(audioDir)) {
require('fs').mkdirSync(audioDir, { recursive: true });
}
// Start microphone recording
console.log('Starting microphone recording...');
let audioBuffer = Buffer.alloc(0);
const audioStream = new Writable({
write(chunk: Buffer, encoding, callback) {
audioBuffer = Buffer.concat([audioBuffer, chunk]);
callback();
}
});
const recording = recorder.record({
sampleRate: 16000,
channels: 1,
audioType: 'wav'
});
recording.stream().pipe(audioStream);
// Process audio every 5 seconds
setInterval(async () => {
if (audioBuffer.length > 0) {
try {
const result = await speech.transcribe(audioBuffer);
console.log('\n🎤 Live transcription:', result);
// Reset buffer after processing
audioBuffer = Buffer.alloc(0);
} catch (error) {
console.error('❌ Transcription error:', error);
}
}
}, 5000);
// Example of manual transcription // Example of manual transcription
async function transcribeFile(filepath: string) { async function transcribeFile(filepath: string) {
try { try {
console.log(`\n🎯 Manually transcribing: ${filepath}`); console.log(`\n🎯 Manually transcribing: ${filepath}`);
const result = await speech.transcribeAudio(filepath, { const result = await speech.transcribeAudio(filepath, {
model: 'base.en', // You can change this to tiny.en, small.en, medium.en, or large-v2 model: 'base.en',
language: 'en', language: 'en',
temperature: 0, temperature: 0,
beamSize: 5 beamSize: 5
@@ -63,22 +108,13 @@ async function main() {
} }
} }
// Create audio directory if it doesn't exist
const audioDir = path.join(__dirname, '..', 'audio');
if (!require('fs').existsSync(audioDir)) {
require('fs').mkdirSync(audioDir, { recursive: true });
}
// Start wake word detection // Start wake word detection
speech.startWakeWordDetection(audioDir); speech.startWakeWordDetection(audioDir);
// Example: You can also manually transcribe files // Handle cleanup on exit
// Uncomment the following line and replace with your audio file:
// await transcribeFile('/path/to/your/audio.wav');
// Keep the process running
process.on('SIGINT', () => { process.on('SIGINT', () => {
console.log('\nStopping speech service...'); console.log('\nStopping speech service...');
recording.stop();
speech.stopWakeWordDetection(); speech.stopWakeWordDetection();
process.exit(0); process.exit(0);
}); });

22
src/types/node-record-lpcm16.d.ts vendored Normal file
View File

@@ -0,0 +1,22 @@
declare module 'node-record-lpcm16' {
import { Readable } from 'stream';
interface RecordOptions {
sampleRate?: number;
channels?: number;
audioType?: string;
threshold?: number;
thresholdStart?: number;
thresholdEnd?: number;
silence?: number;
verbose?: boolean;
recordProgram?: string;
}
interface Recording {
stream(): Readable;
stop(): void;
}
export function record(options?: RecordOptions): Recording;
}