docs: update project documentation with simplified, focused content

- Streamline README, API, architecture, and usage documentation - Reduce complexity and focus on core functionality - Update roadmap with more pragmatic, near-term goals - Simplify contributing guidelines - Improve overall documentation clarity and readability
2025-02-05 10:40:27 +01:00
parent 8f8e3bd85e
commit 3e7f3920b2
10 changed files with 502 additions and 1451 deletions
--- a/docker/speech/Dockerfile
+++ b/docker/speech/Dockerfile
@@ -1,22 +1,29 @@
 # Use Python slim image as builder
-FROM python:3.10-slim as builder
+FROM python:3.10-slim AS builder

 # Install build dependencies
 RUN apt-get update && apt-get install -y \
    git \
-    build-essential \
-    portaudio19-dev \
-    && rm -rf /var/lib/apt/lists/*
+    curl \
+    wget

 # Create and activate virtual environment
 RUN python -m venv /opt/venv
 ENV PATH="/opt/venv/bin:$PATH"

 # Install Python dependencies with specific versions and CPU-only variants
-RUN pip install --no-cache-dir "numpy>=1.24.3,<2.0.0" && \
-    pip install --no-cache-dir torch==2.1.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cpu && \
-    pip install --no-cache-dir faster-whisper==0.10.0 openwakeword==0.4.0 pyaudio==0.2.14 sounddevice==0.4.6 requests==2.31.0 && \
-    pip freeze > /opt/venv/requirements.txt
+RUN pip install --no-cache-dir \
+    "numpy>=1.24.3,<2.0" \
+    "sounddevice" \
+    "openwakeword" \
+    "faster-whisper" \
+    "transformers" \
+    "torch" \
+    "torchaudio" \
+    "huggingface_hub" \
+    "requests" \
+    "soundfile" \
+    "tflite-runtime"

 # Create final image
 FROM python:3.10-slim
@@ -28,31 +35,48 @@ ENV PATH="/opt/venv/bin:$PATH"
 # Install audio dependencies
 RUN apt-get update && apt-get install -y \
    portaudio19-dev \
-    python3-pyaudio \
-    alsa-utils \
-    libasound2 \
-    libasound2-plugins \
    pulseaudio \
-    pulseaudio-utils \
-    libpulse0 \
-    libportaudio2 \
-    && rm -rf /var/lib/apt/lists/* \
-    && mkdir -p /var/run/pulse /var/lib/pulse
+    alsa-utils \
+    curl \
+    wget

-# Create necessary directories
-RUN mkdir -p /models/wake_word /audio && \
-    chown -R 1000:1000 /models /audio && \
-    mkdir -p /home/user/.config/pulse && \
-    chown -R 1000:1000 /home/user
+# Create necessary directories with explicit permissions
+RUN mkdir -p /models/wake_word /audio /app /models/cache /models/models--Systran--faster-whisper-base /opt/venv/lib/python3.10/site-packages/openwakeword/resources/models \
+    && chmod -R 777 /models /audio /app /models/cache /models/models--Systran--faster-whisper-base /opt/venv/lib/python3.10/site-packages/openwakeword/resources/models
+
+# Download wake word models
+RUN wget -O /opt/venv/lib/python3.10/site-packages/openwakeword/resources/models/alexa_v0.1.tflite \
+    https://github.com/dscripka/openWakeWord/raw/main/openwakeword/resources/models/alexa_v0.1.tflite \
+    && wget -O /opt/venv/lib/python3.10/site-packages/openwakeword/resources/models/hey_jarvis_v0.1.tflite \
+    https://github.com/dscripka/openWakeWord/raw/main/openwakeword/resources/models/hey_jarvis_v0.1.tflite \
+    && chmod 644 /opt/venv/lib/python3.10/site-packages/openwakeword/resources/models/*.tflite
+
+# Set environment variables for model caching
+ENV HF_HOME=/models/cache
+ENV TRANSFORMERS_CACHE=/models/cache
+ENV HUGGINGFACE_HUB_CACHE=/models/cache
+
+# Copy scripts and set permissions explicitly
+COPY wake_word_detector.py /app/wake_word_detector.py
+COPY setup-audio.sh /setup-audio.sh
+
+# Ensure scripts are executable by any user
+RUN chmod 755 /setup-audio.sh /app/wake_word_detector.py
+
+# Create a non-root user with explicit UID and GID
+RUN addgroup --gid 1000 user && \
+    adduser --uid 1000 --gid 1000 --disabled-password --gecos '' user
+
+# Change ownership of directories
+RUN chown -R 1000:1000 /models /audio /app /models/cache /models/models--Systran--faster-whisper-base \
+    /opt/venv/lib/python3.10/site-packages/openwakeword/resources/models
+
+# Switch to non-root user
+USER user

 # Set working directory
 WORKDIR /app

-# Copy the wake word detection script and audio setup script
-COPY wake_word_detector.py .
-COPY setup-audio.sh /setup-audio.sh
-RUN chmod +x /setup-audio.sh
-
 # Set environment variables
 ENV WHISPER_MODEL_PATH=/models \
    WAKEWORD_MODEL_PATH=/models/wake_word \
@@ -60,8 +84,5 @@ ENV WHISPER_MODEL_PATH=/models \
    PULSE_SERVER=unix:/run/user/1000/pulse/native \
    HOME=/home/user

-# Run as the host user
-USER 1000:1000
-
 # Start the application
 CMD ["/setup-audio.sh"]