docs: update project documentation with simplified, focused content

- Streamline README, API, architecture, and usage documentation
- Reduce complexity and focus on core functionality
- Update roadmap with more pragmatic, near-term goals
- Simplify contributing guidelines
- Improve overall documentation clarity and readability
This commit is contained in:
jango-blockchained
2025-02-05 10:40:27 +01:00
parent 8f8e3bd85e
commit 3e7f3920b2
10 changed files with 502 additions and 1451 deletions

View File

@@ -1,22 +1,29 @@
# Use Python slim image as builder
FROM python:3.10-slim as builder
FROM python:3.10-slim AS builder
# Install build dependencies
RUN apt-get update && apt-get install -y \
git \
build-essential \
portaudio19-dev \
&& rm -rf /var/lib/apt/lists/*
curl \
wget
# Create and activate virtual environment
RUN python -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"
# Install Python dependencies with specific versions and CPU-only variants
RUN pip install --no-cache-dir "numpy>=1.24.3,<2.0.0" && \
pip install --no-cache-dir torch==2.1.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cpu && \
pip install --no-cache-dir faster-whisper==0.10.0 openwakeword==0.4.0 pyaudio==0.2.14 sounddevice==0.4.6 requests==2.31.0 && \
pip freeze > /opt/venv/requirements.txt
RUN pip install --no-cache-dir \
"numpy>=1.24.3,<2.0" \
"sounddevice" \
"openwakeword" \
"faster-whisper" \
"transformers" \
"torch" \
"torchaudio" \
"huggingface_hub" \
"requests" \
"soundfile" \
"tflite-runtime"
# Create final image
FROM python:3.10-slim
@@ -28,31 +35,48 @@ ENV PATH="/opt/venv/bin:$PATH"
# Install audio dependencies
RUN apt-get update && apt-get install -y \
portaudio19-dev \
python3-pyaudio \
alsa-utils \
libasound2 \
libasound2-plugins \
pulseaudio \
pulseaudio-utils \
libpulse0 \
libportaudio2 \
&& rm -rf /var/lib/apt/lists/* \
&& mkdir -p /var/run/pulse /var/lib/pulse
alsa-utils \
curl \
wget
# Create necessary directories
RUN mkdir -p /models/wake_word /audio && \
chown -R 1000:1000 /models /audio && \
mkdir -p /home/user/.config/pulse && \
chown -R 1000:1000 /home/user
# Create necessary directories with explicit permissions
RUN mkdir -p /models/wake_word /audio /app /models/cache /models/models--Systran--faster-whisper-base /opt/venv/lib/python3.10/site-packages/openwakeword/resources/models \
&& chmod -R 777 /models /audio /app /models/cache /models/models--Systran--faster-whisper-base /opt/venv/lib/python3.10/site-packages/openwakeword/resources/models
# Download wake word models
RUN wget -O /opt/venv/lib/python3.10/site-packages/openwakeword/resources/models/alexa_v0.1.tflite \
https://github.com/dscripka/openWakeWord/raw/main/openwakeword/resources/models/alexa_v0.1.tflite \
&& wget -O /opt/venv/lib/python3.10/site-packages/openwakeword/resources/models/hey_jarvis_v0.1.tflite \
https://github.com/dscripka/openWakeWord/raw/main/openwakeword/resources/models/hey_jarvis_v0.1.tflite \
&& chmod 644 /opt/venv/lib/python3.10/site-packages/openwakeword/resources/models/*.tflite
# Set environment variables for model caching
ENV HF_HOME=/models/cache
ENV TRANSFORMERS_CACHE=/models/cache
ENV HUGGINGFACE_HUB_CACHE=/models/cache
# Copy scripts and set permissions explicitly
COPY wake_word_detector.py /app/wake_word_detector.py
COPY setup-audio.sh /setup-audio.sh
# Ensure scripts are executable by any user
RUN chmod 755 /setup-audio.sh /app/wake_word_detector.py
# Create a non-root user with explicit UID and GID
RUN addgroup --gid 1000 user && \
adduser --uid 1000 --gid 1000 --disabled-password --gecos '' user
# Change ownership of directories
RUN chown -R 1000:1000 /models /audio /app /models/cache /models/models--Systran--faster-whisper-base \
/opt/venv/lib/python3.10/site-packages/openwakeword/resources/models
# Switch to non-root user
USER user
# Set working directory
WORKDIR /app
# Copy the wake word detection script and audio setup script
COPY wake_word_detector.py .
COPY setup-audio.sh /setup-audio.sh
RUN chmod +x /setup-audio.sh
# Set environment variables
ENV WHISPER_MODEL_PATH=/models \
WAKEWORD_MODEL_PATH=/models/wake_word \
@@ -60,8 +84,5 @@ ENV WHISPER_MODEL_PATH=/models \
PULSE_SERVER=unix:/run/user/1000/pulse/native \
HOME=/home/user
# Run as the host user
USER 1000:1000
# Start the application
CMD ["/setup-audio.sh"]

View File

@@ -1,25 +1,58 @@
#!/bin/bash
set -e # Exit immediately if a command exits with a non-zero status
set -x # Print commands and their arguments as they are executed
echo "Starting audio setup script at $(date)"
echo "Current user: $(whoami)"
echo "Current directory: $(pwd)"
# Print environment variables related to audio and speech
echo "ENABLE_WAKE_WORD: ${ENABLE_WAKE_WORD}"
echo "PULSE_SERVER: ${PULSE_SERVER}"
echo "WHISPER_MODEL_PATH: ${WHISPER_MODEL_PATH}"
# Wait for PulseAudio socket to be available
max_wait=30
wait_count=0
while [ ! -e /run/user/1000/pulse/native ]; do
echo "Waiting for PulseAudio socket..."
echo "Waiting for PulseAudio socket... (${wait_count}/${max_wait})"
sleep 1
wait_count=$((wait_count + 1))
if [ $wait_count -ge $max_wait ]; then
echo "ERROR: PulseAudio socket not available after ${max_wait} seconds"
exit 1
fi
done
# Test PulseAudio connection
pactl info || {
echo "Failed to connect to PulseAudio server"
# Verify PulseAudio connection with detailed error handling
if ! pactl info; then
echo "ERROR: Failed to connect to PulseAudio server"
pactl list short modules
pactl list short clients
exit 1
}
fi
# List audio devices
pactl list sources || {
echo "Failed to list audio devices"
# List audio devices with error handling
if ! pactl list sources; then
echo "ERROR: Failed to list audio devices"
exit 1
}
fi
# Start the wake word detector
python /app/wake_word_detector.py
# Ensure wake word detector script is executable
chmod +x /app/wake_word_detector.py
# Start the wake word detector with logging
echo "Starting wake word detector at $(date)"
python /app/wake_word_detector.py 2>&1 | tee /audio/wake_word_detector.log &
wake_word_pid=$!
# Wait and check if the process is still running
sleep 5
if ! kill -0 $wake_word_pid 2>/dev/null; then
echo "ERROR: Wake word detector process died immediately"
cat /audio/wake_word_detector.log
exit 1
fi
# Mute the monitor to prevent feedback
pactl set-source-mute alsa_output.pci-0000_00_1b.0.analog-stereo.monitor 1
@@ -30,5 +63,6 @@ pactl set-source-volume alsa_input.pci-0000_00_1b.0.analog-stereo 65%
# Set speaker volume to 40%
pactl set-sink-volume alsa_output.pci-0000_00_1b.0.analog-stereo 40%
# Make the script executable
chmod +x /setup-audio.sh
# Keep the script running to prevent container exit
echo "Audio setup complete. Keeping container alive."
tail -f /dev/null

View File

@@ -53,8 +53,8 @@ HASS_TOKEN = os.environ.get('HASS_TOKEN')
def initialize_asr_model():
"""Initialize the ASR model with retries and timeout"""
model_path = os.environ.get('ASR_MODEL_PATH', '/models')
model_name = os.environ.get('ASR_MODEL', 'large-v3')
model_path = os.environ.get('WHISPER_MODEL_PATH', '/models')
model_name = os.environ.get('WHISPER_MODEL_TYPE', 'base')
start_time = time.time()
for attempt in range(MAX_MODEL_LOAD_RETRIES):