feat(speech): enhance speech configuration and example integration

- Add comprehensive speech configuration in .env.example and app config
- Update Docker speech Dockerfile for more flexible model handling
- Create detailed README for speech-to-text examples
- Implement example script demonstrating speech features
- Improve speech service initialization and configuration management
This commit is contained in:
jango-blockchained
2025-02-04 19:35:50 +01:00
parent 60f18f8e71
commit 3a6f79c9a8
14 changed files with 669 additions and 86 deletions

View File

@@ -2,6 +2,7 @@ import { spawn } from 'child_process';
import { EventEmitter } from 'events';
import { watch } from 'fs';
import path from 'path';
import { ISpeechToText, SpeechToTextConfig } from "./types.js";
export interface TranscriptionOptions {
model?: 'tiny.en' | 'base.en' | 'small.en' | 'medium.en' | 'large-v2';
@@ -35,13 +36,80 @@ export class TranscriptionError extends Error {
}
}
export class SpeechToText extends EventEmitter {
export class SpeechToText extends EventEmitter implements ISpeechToText {
private containerName: string;
private audioWatcher?: ReturnType<typeof watch>;
private modelPath: string;
private modelType: string;
private isInitialized: boolean = false;
constructor(containerName = 'fast-whisper') {
constructor(config: SpeechToTextConfig) {
super();
this.containerName = containerName;
this.containerName = config.containerName || 'fast-whisper';
this.modelPath = config.modelPath;
this.modelType = config.modelType;
}
public async initialize(): Promise<void> {
if (this.isInitialized) {
return;
}
try {
// Initialization logic will be implemented here
await this.setupContainer();
this.isInitialized = true;
this.emit('ready');
} catch (error) {
this.emit('error', error);
throw error;
}
}
public async shutdown(): Promise<void> {
if (!this.isInitialized) {
return;
}
try {
// Cleanup logic will be implemented here
await this.cleanupContainer();
this.isInitialized = false;
this.emit('shutdown');
} catch (error) {
this.emit('error', error);
throw error;
}
}
public async transcribe(audioData: Buffer): Promise<string> {
if (!this.isInitialized) {
throw new Error("Speech-to-text service is not initialized");
}
try {
// Transcription logic will be implemented here
this.emit('transcribing');
const result = await this.processAudio(audioData);
this.emit('transcribed', result);
return result;
} catch (error) {
this.emit('error', error);
throw error;
}
}
private async setupContainer(): Promise<void> {
// Container setup logic will be implemented here
await new Promise(resolve => setTimeout(resolve, 100)); // Placeholder
}
private async cleanupContainer(): Promise<void> {
// Container cleanup logic will be implemented here
await new Promise(resolve => setTimeout(resolve, 100)); // Placeholder
}
private async processAudio(audioData: Buffer): Promise<string> {
// Audio processing logic will be implemented here
await new Promise(resolve => setTimeout(resolve, 100)); // Placeholder
return "Transcription placeholder";
}
startWakeWordDetection(audioDir: string = './audio'): void {
@@ -50,10 +118,12 @@ export class SpeechToText extends EventEmitter {
if (eventType === 'rename' && filename && filename.startsWith('wake_word_') && filename.endsWith('.wav')) {
const audioFile = path.join(audioDir, filename);
const metadataFile = `${audioFile}.json`;
const parts = filename.split('_');
const timestamp = parts[parts.length - 1].split('.')[0];
// Emit wake word event
this.emit('wake_word', {
timestamp: filename.split('_')[2].split('.')[0],
timestamp,
audioFile,
metadataFile
} as WakeWordEvent);
@@ -91,7 +161,6 @@ export class SpeechToText extends EventEmitter {
} = options;
return new Promise((resolve, reject) => {
// Construct Docker command to run fast-whisper
const args = [
'exec',
this.containerName,
@@ -106,20 +175,33 @@ export class SpeechToText extends EventEmitter {
audioFilePath
];
const process = spawn('docker', args);
let process;
try {
process = spawn('docker', args);
} catch (error) {
this.emit('progress', { type: 'stderr', data: 'Failed to start Docker process' });
reject(new TranscriptionError('Failed to start Docker process'));
return;
}
let stdout = '';
let stderr = '';
process.stdout.on('data', (data: Buffer) => {
process.stdout?.on('data', (data: Buffer) => {
stdout += data.toString();
this.emit('progress', { type: 'stdout', data: data.toString() });
});
process.stderr.on('data', (data: Buffer) => {
process.stderr?.on('data', (data: Buffer) => {
stderr += data.toString();
this.emit('progress', { type: 'stderr', data: data.toString() });
});
process.on('error', (error: Error) => {
this.emit('progress', { type: 'stderr', data: error.message });
reject(new TranscriptionError(`Failed to execute Docker command: ${error.message}`));
});
process.on('close', (code: number) => {
if (code !== 0) {
reject(new TranscriptionError(`Transcription failed: ${stderr}`));
@@ -146,10 +228,14 @@ export class SpeechToText extends EventEmitter {
return new Promise((resolve) => {
let output = '';
process.stdout.on('data', (data: Buffer) => {
process.stdout?.on('data', (data: Buffer) => {
output += data.toString();
});
process.on('error', () => {
resolve(false);
});
process.on('close', (code: number) => {
resolve(code === 0 && output.toLowerCase().includes('up'));
});