refactor: improve Docker speech container audio configuration and user permissions

- Update Dockerfile to enhance audio setup and user management - Modify setup-audio.sh to add robust PulseAudio socket and device checks - Add proper user and directory permissions for audio and model directories - Simplify container startup process and improve audio device detection
fix: correct Mermaid diagram syntax for better rendering
2025-02-05 03:30:15 +01:00 · 2025-02-05 03:10:25 +01:00
4 changed files with 46 additions and 27 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -87,4 +87,6 @@ site/
 # Python
 __pycache__/
 *.py[cod]
-*$py.class
+*$py.class
+
+models/
--- a/README.md
+++ b/README.md
@@ -58,17 +58,17 @@ Our architecture is engineered for performance, scalability, and security. The f
 ```mermaid
 graph TD
    subgraph Client
-       A[Client Application<br>(Web / Mobile / Voice)]
+        A["Client Application (Web/Mobile/Voice)"]
    end
    subgraph CDN
-       B[CDN / Cache]
+        B["CDN / Cache"]
    end
    subgraph Server
-       C[Bun Native Server]
-       E[NLP Engine &<br>Language Processing Module]
+        C["Bun Native Server"]
+        E["NLP Engine & Language Processing Module"]
    end
    subgraph Integration
-       D[Home Assistant<br>(Devices, Lights, Thermostats)]
+        D["Home Assistant (Devices, Lights, Thermostats)"]
    end

    A -->|HTTP Request| B
--- a/docker/speech/Dockerfile
+++ b/docker/speech/Dockerfile
@@ -33,36 +33,35 @@ RUN apt-get update && apt-get install -y \
    libasound2 \
    libasound2-plugins \
    pulseaudio \
-    && rm -rf /var/lib/apt/lists/*
+    pulseaudio-utils \
+    libpulse0 \
+    libportaudio2 \
+    && rm -rf /var/lib/apt/lists/* \
+    && mkdir -p /var/run/pulse /var/lib/pulse

 # Create necessary directories
-RUN mkdir -p /models/wake_word /audio
+RUN mkdir -p /models/wake_word /audio && \
+    chown -R 1000:1000 /models /audio && \
+    mkdir -p /home/user/.config/pulse && \
+    chown -R 1000:1000 /home/user

 # Set working directory
 WORKDIR /app

-# Copy the wake word detection script
+# Copy the wake word detection script and audio setup script
 COPY wake_word_detector.py .
+COPY setup-audio.sh /setup-audio.sh
+RUN chmod +x /setup-audio.sh

 # Set environment variables
 ENV WHISPER_MODEL_PATH=/models \
    WAKEWORD_MODEL_PATH=/models/wake_word \
    PYTHONUNBUFFERED=1 \
-    ASR_MODEL=base.en \
-    ASR_MODEL_PATH=/models
+    PULSE_SERVER=unix:/run/user/1000/pulse/native \
+    HOME=/home/user

-# Add resource limits to Python
-ENV PYTHONMALLOC=malloc \
-    MALLOC_TRIM_THRESHOLD_=100000 \
-    PYTHONDEVMODE=1
+# Run as the host user
+USER 1000:1000

-# Add healthcheck
-HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
-    CMD ps aux | grep '[p]ython' || exit 1
-
-# Copy audio setup script
-COPY setup-audio.sh /setup-audio.sh
-RUN chmod +x /setup-audio.sh
-
-# Start command
-CMD ["/bin/bash", "-c", "/setup-audio.sh && python -u wake_word_detector.py"] 
+# Start the application
+CMD ["/setup-audio.sh"] 
--- a/docker/speech/setup-audio.sh
+++ b/docker/speech/setup-audio.sh
@@ -1,7 +1,25 @@
 #!/bin/bash

-# Wait for PulseAudio to be ready
-sleep 2
+# Wait for PulseAudio socket to be available
+while [ ! -e /run/user/1000/pulse/native ]; do
+    echo "Waiting for PulseAudio socket..."
+    sleep 1
+done
+
+# Test PulseAudio connection
+pactl info || {
+    echo "Failed to connect to PulseAudio server"
+    exit 1
+}
+
+# List audio devices
+pactl list sources || {
+    echo "Failed to list audio devices"
+    exit 1
+}
+
+# Start the wake word detector
+python /app/wake_word_detector.py

 # Mute the monitor to prevent feedback
 pactl set-source-mute alsa_output.pci-0000_00_1b.0.analog-stereo.monitor 1