docs: update project documentation with simplified, focused content
- Streamline README, API, architecture, and usage documentation - Reduce complexity and focus on core functionality - Update roadmap with more pragmatic, near-term goals - Simplify contributing guidelines - Improve overall documentation clarity and readability
This commit is contained in:
@@ -1,22 +1,29 @@
|
||||
# Use Python slim image as builder
|
||||
FROM python:3.10-slim as builder
|
||||
FROM python:3.10-slim AS builder
|
||||
|
||||
# Install build dependencies
|
||||
RUN apt-get update && apt-get install -y \
|
||||
git \
|
||||
build-essential \
|
||||
portaudio19-dev \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
curl \
|
||||
wget
|
||||
|
||||
# Create and activate virtual environment
|
||||
RUN python -m venv /opt/venv
|
||||
ENV PATH="/opt/venv/bin:$PATH"
|
||||
|
||||
# Install Python dependencies with specific versions and CPU-only variants
|
||||
RUN pip install --no-cache-dir "numpy>=1.24.3,<2.0.0" && \
|
||||
pip install --no-cache-dir torch==2.1.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cpu && \
|
||||
pip install --no-cache-dir faster-whisper==0.10.0 openwakeword==0.4.0 pyaudio==0.2.14 sounddevice==0.4.6 requests==2.31.0 && \
|
||||
pip freeze > /opt/venv/requirements.txt
|
||||
RUN pip install --no-cache-dir \
|
||||
"numpy>=1.24.3,<2.0" \
|
||||
"sounddevice" \
|
||||
"openwakeword" \
|
||||
"faster-whisper" \
|
||||
"transformers" \
|
||||
"torch" \
|
||||
"torchaudio" \
|
||||
"huggingface_hub" \
|
||||
"requests" \
|
||||
"soundfile" \
|
||||
"tflite-runtime"
|
||||
|
||||
# Create final image
|
||||
FROM python:3.10-slim
|
||||
@@ -28,31 +35,48 @@ ENV PATH="/opt/venv/bin:$PATH"
|
||||
# Install audio dependencies
|
||||
RUN apt-get update && apt-get install -y \
|
||||
portaudio19-dev \
|
||||
python3-pyaudio \
|
||||
alsa-utils \
|
||||
libasound2 \
|
||||
libasound2-plugins \
|
||||
pulseaudio \
|
||||
pulseaudio-utils \
|
||||
libpulse0 \
|
||||
libportaudio2 \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& mkdir -p /var/run/pulse /var/lib/pulse
|
||||
alsa-utils \
|
||||
curl \
|
||||
wget
|
||||
|
||||
# Create necessary directories
|
||||
RUN mkdir -p /models/wake_word /audio && \
|
||||
chown -R 1000:1000 /models /audio && \
|
||||
mkdir -p /home/user/.config/pulse && \
|
||||
chown -R 1000:1000 /home/user
|
||||
# Create necessary directories with explicit permissions
|
||||
RUN mkdir -p /models/wake_word /audio /app /models/cache /models/models--Systran--faster-whisper-base /opt/venv/lib/python3.10/site-packages/openwakeword/resources/models \
|
||||
&& chmod -R 777 /models /audio /app /models/cache /models/models--Systran--faster-whisper-base /opt/venv/lib/python3.10/site-packages/openwakeword/resources/models
|
||||
|
||||
# Download wake word models
|
||||
RUN wget -O /opt/venv/lib/python3.10/site-packages/openwakeword/resources/models/alexa_v0.1.tflite \
|
||||
https://github.com/dscripka/openWakeWord/raw/main/openwakeword/resources/models/alexa_v0.1.tflite \
|
||||
&& wget -O /opt/venv/lib/python3.10/site-packages/openwakeword/resources/models/hey_jarvis_v0.1.tflite \
|
||||
https://github.com/dscripka/openWakeWord/raw/main/openwakeword/resources/models/hey_jarvis_v0.1.tflite \
|
||||
&& chmod 644 /opt/venv/lib/python3.10/site-packages/openwakeword/resources/models/*.tflite
|
||||
|
||||
# Set environment variables for model caching
|
||||
ENV HF_HOME=/models/cache
|
||||
ENV TRANSFORMERS_CACHE=/models/cache
|
||||
ENV HUGGINGFACE_HUB_CACHE=/models/cache
|
||||
|
||||
# Copy scripts and set permissions explicitly
|
||||
COPY wake_word_detector.py /app/wake_word_detector.py
|
||||
COPY setup-audio.sh /setup-audio.sh
|
||||
|
||||
# Ensure scripts are executable by any user
|
||||
RUN chmod 755 /setup-audio.sh /app/wake_word_detector.py
|
||||
|
||||
# Create a non-root user with explicit UID and GID
|
||||
RUN addgroup --gid 1000 user && \
|
||||
adduser --uid 1000 --gid 1000 --disabled-password --gecos '' user
|
||||
|
||||
# Change ownership of directories
|
||||
RUN chown -R 1000:1000 /models /audio /app /models/cache /models/models--Systran--faster-whisper-base \
|
||||
/opt/venv/lib/python3.10/site-packages/openwakeword/resources/models
|
||||
|
||||
# Switch to non-root user
|
||||
USER user
|
||||
|
||||
# Set working directory
|
||||
WORKDIR /app
|
||||
|
||||
# Copy the wake word detection script and audio setup script
|
||||
COPY wake_word_detector.py .
|
||||
COPY setup-audio.sh /setup-audio.sh
|
||||
RUN chmod +x /setup-audio.sh
|
||||
|
||||
# Set environment variables
|
||||
ENV WHISPER_MODEL_PATH=/models \
|
||||
WAKEWORD_MODEL_PATH=/models/wake_word \
|
||||
@@ -60,8 +84,5 @@ ENV WHISPER_MODEL_PATH=/models \
|
||||
PULSE_SERVER=unix:/run/user/1000/pulse/native \
|
||||
HOME=/home/user
|
||||
|
||||
# Run as the host user
|
||||
USER 1000:1000
|
||||
|
||||
# Start the application
|
||||
CMD ["/setup-audio.sh"]
|
||||
@@ -1,25 +1,58 @@
|
||||
#!/bin/bash
|
||||
set -e # Exit immediately if a command exits with a non-zero status
|
||||
set -x # Print commands and their arguments as they are executed
|
||||
|
||||
echo "Starting audio setup script at $(date)"
|
||||
echo "Current user: $(whoami)"
|
||||
echo "Current directory: $(pwd)"
|
||||
|
||||
# Print environment variables related to audio and speech
|
||||
echo "ENABLE_WAKE_WORD: ${ENABLE_WAKE_WORD}"
|
||||
echo "PULSE_SERVER: ${PULSE_SERVER}"
|
||||
echo "WHISPER_MODEL_PATH: ${WHISPER_MODEL_PATH}"
|
||||
|
||||
# Wait for PulseAudio socket to be available
|
||||
max_wait=30
|
||||
wait_count=0
|
||||
while [ ! -e /run/user/1000/pulse/native ]; do
|
||||
echo "Waiting for PulseAudio socket..."
|
||||
echo "Waiting for PulseAudio socket... (${wait_count}/${max_wait})"
|
||||
sleep 1
|
||||
wait_count=$((wait_count + 1))
|
||||
if [ $wait_count -ge $max_wait ]; then
|
||||
echo "ERROR: PulseAudio socket not available after ${max_wait} seconds"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
# Test PulseAudio connection
|
||||
pactl info || {
|
||||
echo "Failed to connect to PulseAudio server"
|
||||
# Verify PulseAudio connection with detailed error handling
|
||||
if ! pactl info; then
|
||||
echo "ERROR: Failed to connect to PulseAudio server"
|
||||
pactl list short modules
|
||||
pactl list short clients
|
||||
exit 1
|
||||
}
|
||||
fi
|
||||
|
||||
# List audio devices
|
||||
pactl list sources || {
|
||||
echo "Failed to list audio devices"
|
||||
# List audio devices with error handling
|
||||
if ! pactl list sources; then
|
||||
echo "ERROR: Failed to list audio devices"
|
||||
exit 1
|
||||
}
|
||||
fi
|
||||
|
||||
# Start the wake word detector
|
||||
python /app/wake_word_detector.py
|
||||
# Ensure wake word detector script is executable
|
||||
chmod +x /app/wake_word_detector.py
|
||||
|
||||
# Start the wake word detector with logging
|
||||
echo "Starting wake word detector at $(date)"
|
||||
python /app/wake_word_detector.py 2>&1 | tee /audio/wake_word_detector.log &
|
||||
wake_word_pid=$!
|
||||
|
||||
# Wait and check if the process is still running
|
||||
sleep 5
|
||||
if ! kill -0 $wake_word_pid 2>/dev/null; then
|
||||
echo "ERROR: Wake word detector process died immediately"
|
||||
cat /audio/wake_word_detector.log
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Mute the monitor to prevent feedback
|
||||
pactl set-source-mute alsa_output.pci-0000_00_1b.0.analog-stereo.monitor 1
|
||||
@@ -30,5 +63,6 @@ pactl set-source-volume alsa_input.pci-0000_00_1b.0.analog-stereo 65%
|
||||
# Set speaker volume to 40%
|
||||
pactl set-sink-volume alsa_output.pci-0000_00_1b.0.analog-stereo 40%
|
||||
|
||||
# Make the script executable
|
||||
chmod +x /setup-audio.sh
|
||||
# Keep the script running to prevent container exit
|
||||
echo "Audio setup complete. Keeping container alive."
|
||||
tail -f /dev/null
|
||||
@@ -53,8 +53,8 @@ HASS_TOKEN = os.environ.get('HASS_TOKEN')
|
||||
|
||||
def initialize_asr_model():
|
||||
"""Initialize the ASR model with retries and timeout"""
|
||||
model_path = os.environ.get('ASR_MODEL_PATH', '/models')
|
||||
model_name = os.environ.get('ASR_MODEL', 'large-v3')
|
||||
model_path = os.environ.get('WHISPER_MODEL_PATH', '/models')
|
||||
model_name = os.environ.get('WHISPER_MODEL_TYPE', 'base')
|
||||
|
||||
start_time = time.time()
|
||||
for attempt in range(MAX_MODEL_LOAD_RETRIES):
|
||||
|
||||
Reference in New Issue
Block a user