feat(speech): enhance speech configuration and example integration

- Add comprehensive speech configuration in .env.example and app config - Update Docker speech Dockerfile for more flexible model handling - Create detailed README for speech-to-text examples - Implement example script demonstrating speech features - Improve speech service initialization and configuration management
2025-02-04 19:35:50 +01:00
parent 60f18f8e71
commit 3a6f79c9a8
14 changed files with 669 additions and 86 deletions
--- a/src/speech/speechToText.ts
+++ b/src/speech/speechToText.ts
@@ -2,6 +2,7 @@ import { spawn } from 'child_process';
 import { EventEmitter } from 'events';
 import { watch } from 'fs';
 import path from 'path';
+import { ISpeechToText, SpeechToTextConfig } from "./types.js";

 export interface TranscriptionOptions {
    model?: 'tiny.en' | 'base.en' | 'small.en' | 'medium.en' | 'large-v2';
@@ -35,13 +36,80 @@ export class TranscriptionError extends Error {
    }
 }

-export class SpeechToText extends EventEmitter {
+export class SpeechToText extends EventEmitter implements ISpeechToText {
    private containerName: string;
    private audioWatcher?: ReturnType<typeof watch>;
+    private modelPath: string;
+    private modelType: string;
+    private isInitialized: boolean = false;

-    constructor(containerName = 'fast-whisper') {
+    constructor(config: SpeechToTextConfig) {
        super();
-        this.containerName = containerName;
+        this.containerName = config.containerName || 'fast-whisper';
+        this.modelPath = config.modelPath;
+        this.modelType = config.modelType;
+    }
+
+    public async initialize(): Promise<void> {
+        if (this.isInitialized) {
+            return;
+        }
+        try {
+            // Initialization logic will be implemented here
+            await this.setupContainer();
+            this.isInitialized = true;
+            this.emit('ready');
+        } catch (error) {
+            this.emit('error', error);
+            throw error;
+        }
+    }
+
+    public async shutdown(): Promise<void> {
+        if (!this.isInitialized) {
+            return;
+        }
+        try {
+            // Cleanup logic will be implemented here
+            await this.cleanupContainer();
+            this.isInitialized = false;
+            this.emit('shutdown');
+        } catch (error) {
+            this.emit('error', error);
+            throw error;
+        }
+    }
+
+    public async transcribe(audioData: Buffer): Promise<string> {
+        if (!this.isInitialized) {
+            throw new Error("Speech-to-text service is not initialized");
+        }
+        try {
+            // Transcription logic will be implemented here
+            this.emit('transcribing');
+            const result = await this.processAudio(audioData);
+            this.emit('transcribed', result);
+            return result;
+        } catch (error) {
+            this.emit('error', error);
+            throw error;
+        }
+    }
+
+    private async setupContainer(): Promise<void> {
+        // Container setup logic will be implemented here
+        await new Promise(resolve => setTimeout(resolve, 100)); // Placeholder
+    }
+
+    private async cleanupContainer(): Promise<void> {
+        // Container cleanup logic will be implemented here
+        await new Promise(resolve => setTimeout(resolve, 100)); // Placeholder
+    }
+
+    private async processAudio(audioData: Buffer): Promise<string> {
+        // Audio processing logic will be implemented here
+        await new Promise(resolve => setTimeout(resolve, 100)); // Placeholder
+        return "Transcription placeholder";
    }

    startWakeWordDetection(audioDir: string = './audio'): void {
@@ -50,10 +118,12 @@ export class SpeechToText extends EventEmitter {
            if (eventType === 'rename' && filename && filename.startsWith('wake_word_') && filename.endsWith('.wav')) {
                const audioFile = path.join(audioDir, filename);
                const metadataFile = `${audioFile}.json`;
+                const parts = filename.split('_');
+                const timestamp = parts[parts.length - 1].split('.')[0];

                // Emit wake word event
                this.emit('wake_word', {
-                    timestamp: filename.split('_')[2].split('.')[0],
+                    timestamp,
                    audioFile,
                    metadataFile
                } as WakeWordEvent);
@@ -91,7 +161,6 @@ export class SpeechToText extends EventEmitter {
        } = options;

        return new Promise((resolve, reject) => {
-            // Construct Docker command to run fast-whisper
            const args = [
                'exec',
                this.containerName,
@@ -106,20 +175,33 @@ export class SpeechToText extends EventEmitter {
                audioFilePath
            ];

-            const process = spawn('docker', args);
+            let process;
+            try {
+                process = spawn('docker', args);
+            } catch (error) {
+                this.emit('progress', { type: 'stderr', data: 'Failed to start Docker process' });
+                reject(new TranscriptionError('Failed to start Docker process'));
+                return;
+            }
+
            let stdout = '';
            let stderr = '';

-            process.stdout.on('data', (data: Buffer) => {
+            process.stdout?.on('data', (data: Buffer) => {
                stdout += data.toString();
                this.emit('progress', { type: 'stdout', data: data.toString() });
            });

-            process.stderr.on('data', (data: Buffer) => {
+            process.stderr?.on('data', (data: Buffer) => {
                stderr += data.toString();
                this.emit('progress', { type: 'stderr', data: data.toString() });
            });

+            process.on('error', (error: Error) => {
+                this.emit('progress', { type: 'stderr', data: error.message });
+                reject(new TranscriptionError(`Failed to execute Docker command: ${error.message}`));
+            });
+
            process.on('close', (code: number) => {
                if (code !== 0) {
                    reject(new TranscriptionError(`Transcription failed: ${stderr}`));
@@ -146,10 +228,14 @@ export class SpeechToText extends EventEmitter {

            return new Promise((resolve) => {
                let output = '';
-                process.stdout.on('data', (data: Buffer) => {
+                process.stdout?.on('data', (data: Buffer) => {
                    output += data.toString();
                });

+                process.on('error', () => {
+                    resolve(false);
+                });
+
                process.on('close', (code: number) => {
                    resolve(code === 0 && output.toLowerCase().includes('up'));
                });