docs: update project documentation with simplified, focused content

- Streamline README, API, architecture, and usage documentation - Reduce complexity and focus on core functionality - Update roadmap with more pragmatic, near-term goals - Simplify contributing guidelines - Improve overall documentation clarity and readability
2025-02-05 10:40:27 +01:00
parent 8f8e3bd85e
commit 3e7f3920b2
10 changed files with 502 additions and 1451 deletions
--- a/docker/speech/Dockerfile
+++ b/docker/speech/Dockerfile
@@ -1,22 +1,29 @@
 # Use Python slim image as builder
-FROM python:3.10-slim as builder
+FROM python:3.10-slim AS builder

 # Install build dependencies
 RUN apt-get update && apt-get install -y \
    git \
-    build-essential \
-    portaudio19-dev \
-    && rm -rf /var/lib/apt/lists/*
+    curl \
+    wget

 # Create and activate virtual environment
 RUN python -m venv /opt/venv
 ENV PATH="/opt/venv/bin:$PATH"

 # Install Python dependencies with specific versions and CPU-only variants
-RUN pip install --no-cache-dir "numpy>=1.24.3,<2.0.0" && \
-    pip install --no-cache-dir torch==2.1.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cpu && \
-    pip install --no-cache-dir faster-whisper==0.10.0 openwakeword==0.4.0 pyaudio==0.2.14 sounddevice==0.4.6 requests==2.31.0 && \
-    pip freeze > /opt/venv/requirements.txt
+RUN pip install --no-cache-dir \
+    "numpy>=1.24.3,<2.0" \
+    "sounddevice" \
+    "openwakeword" \
+    "faster-whisper" \
+    "transformers" \
+    "torch" \
+    "torchaudio" \
+    "huggingface_hub" \
+    "requests" \
+    "soundfile" \
+    "tflite-runtime"

 # Create final image
 FROM python:3.10-slim
@@ -28,31 +35,48 @@ ENV PATH="/opt/venv/bin:$PATH"
 # Install audio dependencies
 RUN apt-get update && apt-get install -y \
    portaudio19-dev \
-    python3-pyaudio \
-    alsa-utils \
-    libasound2 \
-    libasound2-plugins \
    pulseaudio \
-    pulseaudio-utils \
-    libpulse0 \
-    libportaudio2 \
-    && rm -rf /var/lib/apt/lists/* \
-    && mkdir -p /var/run/pulse /var/lib/pulse
+    alsa-utils \
+    curl \
+    wget

-# Create necessary directories
-RUN mkdir -p /models/wake_word /audio && \
-    chown -R 1000:1000 /models /audio && \
-    mkdir -p /home/user/.config/pulse && \
-    chown -R 1000:1000 /home/user
+# Create necessary directories with explicit permissions
+RUN mkdir -p /models/wake_word /audio /app /models/cache /models/models--Systran--faster-whisper-base /opt/venv/lib/python3.10/site-packages/openwakeword/resources/models \
+    && chmod -R 777 /models /audio /app /models/cache /models/models--Systran--faster-whisper-base /opt/venv/lib/python3.10/site-packages/openwakeword/resources/models
+
+# Download wake word models
+RUN wget -O /opt/venv/lib/python3.10/site-packages/openwakeword/resources/models/alexa_v0.1.tflite \
+    https://github.com/dscripka/openWakeWord/raw/main/openwakeword/resources/models/alexa_v0.1.tflite \
+    && wget -O /opt/venv/lib/python3.10/site-packages/openwakeword/resources/models/hey_jarvis_v0.1.tflite \
+    https://github.com/dscripka/openWakeWord/raw/main/openwakeword/resources/models/hey_jarvis_v0.1.tflite \
+    && chmod 644 /opt/venv/lib/python3.10/site-packages/openwakeword/resources/models/*.tflite
+
+# Set environment variables for model caching
+ENV HF_HOME=/models/cache
+ENV TRANSFORMERS_CACHE=/models/cache
+ENV HUGGINGFACE_HUB_CACHE=/models/cache
+
+# Copy scripts and set permissions explicitly
+COPY wake_word_detector.py /app/wake_word_detector.py
+COPY setup-audio.sh /setup-audio.sh
+
+# Ensure scripts are executable by any user
+RUN chmod 755 /setup-audio.sh /app/wake_word_detector.py
+
+# Create a non-root user with explicit UID and GID
+RUN addgroup --gid 1000 user && \
+    adduser --uid 1000 --gid 1000 --disabled-password --gecos '' user
+
+# Change ownership of directories
+RUN chown -R 1000:1000 /models /audio /app /models/cache /models/models--Systran--faster-whisper-base \
+    /opt/venv/lib/python3.10/site-packages/openwakeword/resources/models
+
+# Switch to non-root user
+USER user

 # Set working directory
 WORKDIR /app

-# Copy the wake word detection script and audio setup script
-COPY wake_word_detector.py .
-COPY setup-audio.sh /setup-audio.sh
-RUN chmod +x /setup-audio.sh
-
 # Set environment variables
 ENV WHISPER_MODEL_PATH=/models \
    WAKEWORD_MODEL_PATH=/models/wake_word \
@@ -60,8 +84,5 @@ ENV WHISPER_MODEL_PATH=/models \
    PULSE_SERVER=unix:/run/user/1000/pulse/native \
    HOME=/home/user

-# Run as the host user
-USER 1000:1000
-
 # Start the application
 CMD ["/setup-audio.sh"] 
--- a/docker/speech/setup-audio.sh
+++ b/docker/speech/setup-audio.sh
@@ -1,25 +1,58 @@
 #!/bin/bash
+set -e  # Exit immediately if a command exits with a non-zero status
+set -x  # Print commands and their arguments as they are executed
+
+echo "Starting audio setup script at $(date)"
+echo "Current user: $(whoami)"
+echo "Current directory: $(pwd)"
+
+# Print environment variables related to audio and speech
+echo "ENABLE_WAKE_WORD: ${ENABLE_WAKE_WORD}"
+echo "PULSE_SERVER: ${PULSE_SERVER}"
+echo "WHISPER_MODEL_PATH: ${WHISPER_MODEL_PATH}"

 # Wait for PulseAudio socket to be available
+max_wait=30
+wait_count=0
 while [ ! -e /run/user/1000/pulse/native ]; do
-    echo "Waiting for PulseAudio socket..."
+    echo "Waiting for PulseAudio socket... (${wait_count}/${max_wait})"
    sleep 1
+    wait_count=$((wait_count + 1))
+    if [ $wait_count -ge $max_wait ]; then
+        echo "ERROR: PulseAudio socket not available after ${max_wait} seconds"
+        exit 1
+    fi
 done

-# Test PulseAudio connection
-pactl info || {
-    echo "Failed to connect to PulseAudio server"
+# Verify PulseAudio connection with detailed error handling
+if ! pactl info; then
+    echo "ERROR: Failed to connect to PulseAudio server"
+    pactl list short modules
+    pactl list short clients
    exit 1
-}
+fi

-# List audio devices
-pactl list sources || {
-    echo "Failed to list audio devices"
+# List audio devices with error handling
+if ! pactl list sources; then
+    echo "ERROR: Failed to list audio devices"
    exit 1
-}
+fi

-# Start the wake word detector
-python /app/wake_word_detector.py
+# Ensure wake word detector script is executable
+chmod +x /app/wake_word_detector.py
+
+# Start the wake word detector with logging
+echo "Starting wake word detector at $(date)"
+python /app/wake_word_detector.py 2>&1 | tee /audio/wake_word_detector.log &
+wake_word_pid=$!
+
+# Wait and check if the process is still running
+sleep 5
+if ! kill -0 $wake_word_pid 2>/dev/null; then
+    echo "ERROR: Wake word detector process died immediately"
+    cat /audio/wake_word_detector.log
+    exit 1
+fi

 # Mute the monitor to prevent feedback
 pactl set-source-mute alsa_output.pci-0000_00_1b.0.analog-stereo.monitor 1
@@ -30,5 +63,6 @@ pactl set-source-volume alsa_input.pci-0000_00_1b.0.analog-stereo 65%
 # Set speaker volume to 40%
 pactl set-sink-volume alsa_output.pci-0000_00_1b.0.analog-stereo 40%

-# Make the script executable
-chmod +x /setup-audio.sh 
+# Keep the script running to prevent container exit
+echo "Audio setup complete. Keeping container alive."
+tail -f /dev/null 
--- a/docker/speech/wake_word_detector.py
+++ b/docker/speech/wake_word_detector.py
@@ -53,8 +53,8 @@ HASS_TOKEN = os.environ.get('HASS_TOKEN')

 def initialize_asr_model():
    """Initialize the ASR model with retries and timeout"""
-    model_path = os.environ.get('ASR_MODEL_PATH', '/models')
-    model_name = os.environ.get('ASR_MODEL', 'large-v3')
+    model_path = os.environ.get('WHISPER_MODEL_PATH', '/models')
+    model_name = os.environ.get('WHISPER_MODEL_TYPE', 'base')
    
    start_time = time.time()
    for attempt in range(MAX_MODEL_LOAD_RETRIES):