feat: Enhance speech-to-text example with live microphone transcription
- Add live microphone recording and transcription functionality - Implement audio buffer processing with 5-second intervals - Update SpeechToText initialization with more flexible configuration - Add TypeScript type definitions for node-record-lpcm16 - Improve error handling and process management for audio recording
This commit is contained in:
@@ -1,9 +1,15 @@
|
|||||||
import { SpeechToText, TranscriptionResult, WakeWordEvent } from '../src/speech/speechToText';
|
import { SpeechToText, TranscriptionResult, WakeWordEvent } from '../src/speech/speechToText';
|
||||||
import path from 'path';
|
import path from 'path';
|
||||||
|
import recorder from 'node-record-lpcm16';
|
||||||
|
import { Writable } from 'stream';
|
||||||
|
|
||||||
async function main() {
|
async function main() {
|
||||||
// Initialize the speech-to-text service
|
// Initialize the speech-to-text service
|
||||||
const speech = new SpeechToText('fast-whisper');
|
const speech = new SpeechToText({
|
||||||
|
modelPath: 'base.en',
|
||||||
|
modelType: 'whisper',
|
||||||
|
containerName: 'fast-whisper'
|
||||||
|
});
|
||||||
|
|
||||||
// Check if the service is available
|
// Check if the service is available
|
||||||
const isHealthy = await speech.checkHealth();
|
const isHealthy = await speech.checkHealth();
|
||||||
@@ -45,12 +51,51 @@ async function main() {
|
|||||||
console.error('❌ Error:', error.message);
|
console.error('❌ Error:', error.message);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Create audio directory if it doesn't exist
|
||||||
|
const audioDir = path.join(__dirname, '..', 'audio');
|
||||||
|
if (!require('fs').existsSync(audioDir)) {
|
||||||
|
require('fs').mkdirSync(audioDir, { recursive: true });
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start microphone recording
|
||||||
|
console.log('Starting microphone recording...');
|
||||||
|
let audioBuffer = Buffer.alloc(0);
|
||||||
|
|
||||||
|
const audioStream = new Writable({
|
||||||
|
write(chunk: Buffer, encoding, callback) {
|
||||||
|
audioBuffer = Buffer.concat([audioBuffer, chunk]);
|
||||||
|
callback();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
const recording = recorder.record({
|
||||||
|
sampleRate: 16000,
|
||||||
|
channels: 1,
|
||||||
|
audioType: 'wav'
|
||||||
|
});
|
||||||
|
|
||||||
|
recording.stream().pipe(audioStream);
|
||||||
|
|
||||||
|
// Process audio every 5 seconds
|
||||||
|
setInterval(async () => {
|
||||||
|
if (audioBuffer.length > 0) {
|
||||||
|
try {
|
||||||
|
const result = await speech.transcribe(audioBuffer);
|
||||||
|
console.log('\n🎤 Live transcription:', result);
|
||||||
|
// Reset buffer after processing
|
||||||
|
audioBuffer = Buffer.alloc(0);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('❌ Transcription error:', error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}, 5000);
|
||||||
|
|
||||||
// Example of manual transcription
|
// Example of manual transcription
|
||||||
async function transcribeFile(filepath: string) {
|
async function transcribeFile(filepath: string) {
|
||||||
try {
|
try {
|
||||||
console.log(`\n🎯 Manually transcribing: ${filepath}`);
|
console.log(`\n🎯 Manually transcribing: ${filepath}`);
|
||||||
const result = await speech.transcribeAudio(filepath, {
|
const result = await speech.transcribeAudio(filepath, {
|
||||||
model: 'base.en', // You can change this to tiny.en, small.en, medium.en, or large-v2
|
model: 'base.en',
|
||||||
language: 'en',
|
language: 'en',
|
||||||
temperature: 0,
|
temperature: 0,
|
||||||
beamSize: 5
|
beamSize: 5
|
||||||
@@ -63,22 +108,13 @@ async function main() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create audio directory if it doesn't exist
|
|
||||||
const audioDir = path.join(__dirname, '..', 'audio');
|
|
||||||
if (!require('fs').existsSync(audioDir)) {
|
|
||||||
require('fs').mkdirSync(audioDir, { recursive: true });
|
|
||||||
}
|
|
||||||
|
|
||||||
// Start wake word detection
|
// Start wake word detection
|
||||||
speech.startWakeWordDetection(audioDir);
|
speech.startWakeWordDetection(audioDir);
|
||||||
|
|
||||||
// Example: You can also manually transcribe files
|
// Handle cleanup on exit
|
||||||
// Uncomment the following line and replace with your audio file:
|
|
||||||
// await transcribeFile('/path/to/your/audio.wav');
|
|
||||||
|
|
||||||
// Keep the process running
|
|
||||||
process.on('SIGINT', () => {
|
process.on('SIGINT', () => {
|
||||||
console.log('\nStopping speech service...');
|
console.log('\nStopping speech service...');
|
||||||
|
recording.stop();
|
||||||
speech.stopWakeWordDetection();
|
speech.stopWakeWordDetection();
|
||||||
process.exit(0);
|
process.exit(0);
|
||||||
});
|
});
|
||||||
|
|||||||
22
src/types/node-record-lpcm16.d.ts
vendored
Normal file
22
src/types/node-record-lpcm16.d.ts
vendored
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
declare module 'node-record-lpcm16' {
|
||||||
|
import { Readable } from 'stream';
|
||||||
|
|
||||||
|
interface RecordOptions {
|
||||||
|
sampleRate?: number;
|
||||||
|
channels?: number;
|
||||||
|
audioType?: string;
|
||||||
|
threshold?: number;
|
||||||
|
thresholdStart?: number;
|
||||||
|
thresholdEnd?: number;
|
||||||
|
silence?: number;
|
||||||
|
verbose?: boolean;
|
||||||
|
recordProgram?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
interface Recording {
|
||||||
|
stream(): Readable;
|
||||||
|
stop(): void;
|
||||||
|
}
|
||||||
|
|
||||||
|
export function record(options?: RecordOptions): Recording;
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user