feat(speech): enhance speech processing with advanced audio setup and detection

- Add audio setup script for PulseAudio configuration - Improve wake word detection with advanced noise filtering - Implement continuous transcription and command processing - Update speech Dockerfile with additional audio dependencies - Enhance logging and error handling in wake word detector
2025-02-04 22:51:06 +01:00
parent e1db799b1d
commit b9727981cc
4 changed files with 374 additions and 103 deletions
--- a/docker/speech/Dockerfile
+++ b/docker/speech/Dockerfile
@@ -13,9 +13,10 @@ RUN python -m venv /opt/venv
 ENV PATH="/opt/venv/bin:$PATH"

 # Install Python dependencies with specific versions and CPU-only variants
-RUN pip install --no-cache-dir numpy==1.24.3
-RUN pip install --no-cache-dir torch==2.1.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cpu
-RUN pip install --no-cache-dir faster-whisper==0.10.0 openwakeword==0.4.0 pyaudio==0.2.14 sounddevice==0.4.6
+RUN pip install --no-cache-dir "numpy>=1.24.3,<2.0.0" && \
+    pip install --no-cache-dir torch==2.1.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cpu && \
+    pip install --no-cache-dir faster-whisper==0.10.0 openwakeword==0.4.0 pyaudio==0.2.14 sounddevice==0.4.6 requests==2.31.0 && \
+    pip freeze > /opt/venv/requirements.txt

 # Create final image
 FROM python:3.10-slim
@@ -24,10 +25,14 @@ FROM python:3.10-slim
 COPY --from=builder /opt/venv /opt/venv
 ENV PATH="/opt/venv/bin:$PATH"

-# Install only runtime dependencies
+# Install audio dependencies
 RUN apt-get update && apt-get install -y \
    portaudio19-dev \
    python3-pyaudio \
+    alsa-utils \
+    libasound2 \
+    libasound2-plugins \
+    pulseaudio \
    && rm -rf /var/lib/apt/lists/*

 # Create necessary directories
@@ -55,5 +60,9 @@ ENV PYTHONMALLOC=malloc \
 HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
    CMD ps aux | grep '[p]ython' || exit 1

-# Run the wake word detection service with resource constraints
-CMD ["python", "-X", "faulthandler", "wake_word_detector.py"] 
+# Copy audio setup script
+COPY setup-audio.sh /setup-audio.sh
+RUN chmod +x /setup-audio.sh
+
+# Start command
+CMD ["/bin/bash", "-c", "/setup-audio.sh && python -u wake_word_detector.py"]