feat(speech): add speech-to-text and wake word detection modules

- Implement SpeechToText class with Docker-based transcription capabilities - Add wake word detection using OpenWakeWord and fast-whisper models - Create Dockerfile for speech processing container - Develop comprehensive test suite for speech recognition functionality - Include audio processing and event-driven transcription features
2025-02-04 19:08:01 +01:00
parent 47f11b3d95
commit 60f18f8e71
5 changed files with 649 additions and 246 deletions
--- a/docker/speech/Dockerfile
+++ b/docker/speech/Dockerfile
@@ -0,0 +1,39 @@
+FROM python:3.10-slim
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    git \
+    build-essential \
+    portaudio19-dev \
+    python3-pyaudio \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install fast-whisper and its dependencies
+RUN pip install --no-cache-dir torch torchaudio --index-url https://download.pytorch.org/whl/cpu
+RUN pip install --no-cache-dir fast-whisper
+
+# Install wake word detection
+RUN pip install --no-cache-dir openwakeword pyaudio sounddevice
+
+# Create directories
+RUN mkdir -p /models /audio
+
+# Download the base model by default
+RUN python -c "from faster_whisper import WhisperModel; WhisperModel.download_model('base.en', cache_dir='/models')"
+
+# Download OpenWakeWord models
+RUN mkdir -p /models/wake_word && \
+    python -c "import openwakeword; openwakeword.download_models(['hey_jarvis', 'ok_google', 'alexa'], '/models/wake_word')"
+
+WORKDIR /app
+
+# Copy the wake word detection script
+COPY wake_word_detector.py .
+
+# Set environment variables
+ENV WHISPER_MODEL_PATH=/models
+ENV WAKEWORD_MODEL_PATH=/models/wake_word
+ENV PYTHONUNBUFFERED=1
+
+# Run the wake word detection service
+CMD ["python", "wake_word_detector.py"]