feat: Enhance speech and AI configuration with advanced environment settings

- Update `.env.example` with comprehensive speech and AI configuration options - Modify Docker Compose speech configuration for more flexible audio and ASR settings - Enhance Dockerfile to support Python virtual environment and speech dependencies - Refactor environment loading to use Bun's file system utilities - Improve device listing tool with more detailed device statistics - Add support for multiple AI models and dynamic configuration
2025-02-10 03:28:58 +01:00
parent 986b1949cd
commit b6bd53b01a
10 changed files with 764 additions and 283 deletions
--- a/docker-compose.speech.yml
+++ b/docker-compose.speech.yml
@@ -4,17 +4,27 @@ services:
  homeassistant-mcp:
    image: homeassistant-mcp:latest
    environment:
+      # Speech Feature Flags
      - ENABLE_SPEECH_FEATURES=${ENABLE_SPEECH_FEATURES:-true}
      - ENABLE_WAKE_WORD=${ENABLE_WAKE_WORD:-true}
      - ENABLE_SPEECH_TO_TEXT=${ENABLE_SPEECH_TO_TEXT:-true}

+      # Audio Configuration
+      - NOISE_THRESHOLD=${NOISE_THRESHOLD:-0.05}
+      - MIN_SPEECH_DURATION=${MIN_SPEECH_DURATION:-1.0}
+      - SILENCE_DURATION=${SILENCE_DURATION:-0.5}
+      - SAMPLE_RATE=${SAMPLE_RATE:-16000}
+      - CHANNELS=${CHANNELS:-1}
+      - CHUNK_SIZE=${CHUNK_SIZE:-1024}
+      - PULSE_SERVER=${PULSE_SERVER:-unix:/run/user/1000/pulse/native}
+
  fast-whisper:
    image: onerahmet/openai-whisper-asr-webservice:latest
    volumes:
      - whisper-models:/models
      - audio-data:/audio
    environment:
-      - ASR_MODEL=base
+      - ASR_MODEL=${WHISPER_MODEL_TYPE:-base}
      - ASR_ENGINE=faster_whisper
      - WHISPER_BEAM_SIZE=5
      - COMPUTE_TYPE=float32
@@ -27,7 +37,7 @@ services:
          cpus: '4.0'
          memory: 2G
    healthcheck:
-      test: [ "CMD", "curl", "-f", "http://localhost:9000/asr/health" ]
+      test: [ "CMD", "curl", "-f", "http://localhost:9000/health" ]
      interval: 30s
      timeout: 10s
      retries: 3
@@ -40,10 +50,23 @@ services:
    volumes:
      - /run/user/1000/pulse/native:/run/user/1000/pulse/native
    environment:
-      - PULSE_SERVER=unix:/run/user/1000/pulse/native
+      - PULSE_SERVER=${PULSE_SERVER:-unix:/run/user/1000/pulse/native}
+      - PULSE_COOKIE=/run/user/1000/pulse/cookie
+      - PYTHONUNBUFFERED=1
+      - OPENWAKEWORD_MODEL=hey_jarvis
+      - OPENWAKEWORD_THRESHOLD=0.5
+      - MICROPHONE_COMMAND=arecord -D hw:0,0 -f S16_LE -c 1 -r 16000 -t raw
    group_add:
-      - audio
+      - "${AUDIO_GID:-29}"
    network_mode: host
+    privileged: true
+    entrypoint: >
+      /bin/bash -c " apt-get update && apt-get install -y pulseaudio alsa-utils && rm -rf /var/lib/apt/lists/* && /run.sh"
+    healthcheck:
+      test: [ "CMD-SHELL", "pactl info > /dev/null 2>&1 || exit 1" ]
+      interval: 30s
+      timeout: 10s
+      retries: 3

 volumes:
  whisper-models: