homeassistant-mcp/docker-compose.speech.yml

version: '3.8'

services:
  homeassistant-mcp:
    image: homeassistant-mcp:latest
    environment:
      # Speech Feature Flags
      - ENABLE_SPEECH_FEATURES=${ENABLE_SPEECH_FEATURES:-true}
      - ENABLE_WAKE_WORD=${ENABLE_WAKE_WORD:-true}
      - ENABLE_SPEECH_TO_TEXT=${ENABLE_SPEECH_TO_TEXT:-true}

      # Audio Configuration
      - NOISE_THRESHOLD=${NOISE_THRESHOLD:-0.05}
      - MIN_SPEECH_DURATION=${MIN_SPEECH_DURATION:-1.0}
      - SILENCE_DURATION=${SILENCE_DURATION:-0.5}
      - SAMPLE_RATE=${SAMPLE_RATE:-16000}
      - CHANNELS=${CHANNELS:-1}
      - CHUNK_SIZE=${CHUNK_SIZE:-1024}
      - PULSE_SERVER=${PULSE_SERVER:-unix:/run/user/1000/pulse/native}

  fast-whisper:
    image: onerahmet/openai-whisper-asr-webservice:latest
    volumes:
      - whisper-models:/models
      - audio-data:/audio
    environment:
      - ASR_MODEL=${WHISPER_MODEL_TYPE:-base}
      - ASR_ENGINE=faster_whisper
      - WHISPER_BEAM_SIZE=5
      - COMPUTE_TYPE=float32
      - LANGUAGE=en
    ports:
      - "9000:9000"
    deploy:
      resources:
        limits:
          cpus: '4.0'
          memory: 2G
    healthcheck:
      test: [ "CMD", "curl", "-f", "http://localhost:9000/health" ]
      interval: 30s
      timeout: 10s
      retries: 3

  wake-word:
    image: rhasspy/wyoming-openwakeword:latest
    restart: unless-stopped
    devices:
      - /dev/snd:/dev/snd
    volumes:
      - /run/user/1000/pulse/native:/run/user/1000/pulse/native
    environment:
      - PULSE_SERVER=${PULSE_SERVER:-unix:/run/user/1000/pulse/native}
      - PULSE_COOKIE=/run/user/1000/pulse/cookie
      - PYTHONUNBUFFERED=1
      - OPENWAKEWORD_MODEL=hey_jarvis
      - OPENWAKEWORD_THRESHOLD=0.5
      - MICROPHONE_COMMAND=arecord -D hw:0,0 -f S16_LE -c 1 -r 16000 -t raw
    group_add:
      - "${AUDIO_GID:-29}"
    network_mode: host
    privileged: true
    entrypoint: >
      /bin/bash -c " apt-get update && apt-get install -y pulseaudio alsa-utils && rm -rf /var/lib/apt/lists/* && /run.sh"
    healthcheck:
      test: [ "CMD-SHELL", "pactl info > /dev/null 2>&1 || exit 1" ]
      interval: 30s
      timeout: 10s
      retries: 3

volumes:
  whisper-models:
  audio-data: