feat: Enhance speech and AI configuration with advanced environment settings

- Update `.env.example` with comprehensive speech and AI configuration options
- Modify Docker Compose speech configuration for more flexible audio and ASR settings
- Enhance Dockerfile to support Python virtual environment and speech dependencies
- Refactor environment loading to use Bun's file system utilities
- Improve device listing tool with more detailed device statistics
- Add support for multiple AI models and dynamic configuration
This commit is contained in:
jango-blockchained
2025-02-10 03:28:58 +01:00
parent 986b1949cd
commit b6bd53b01a
10 changed files with 764 additions and 283 deletions

View File

@@ -4,17 +4,27 @@ services:
homeassistant-mcp:
image: homeassistant-mcp:latest
environment:
# Speech Feature Flags
- ENABLE_SPEECH_FEATURES=${ENABLE_SPEECH_FEATURES:-true}
- ENABLE_WAKE_WORD=${ENABLE_WAKE_WORD:-true}
- ENABLE_SPEECH_TO_TEXT=${ENABLE_SPEECH_TO_TEXT:-true}
# Audio Configuration
- NOISE_THRESHOLD=${NOISE_THRESHOLD:-0.05}
- MIN_SPEECH_DURATION=${MIN_SPEECH_DURATION:-1.0}
- SILENCE_DURATION=${SILENCE_DURATION:-0.5}
- SAMPLE_RATE=${SAMPLE_RATE:-16000}
- CHANNELS=${CHANNELS:-1}
- CHUNK_SIZE=${CHUNK_SIZE:-1024}
- PULSE_SERVER=${PULSE_SERVER:-unix:/run/user/1000/pulse/native}
fast-whisper:
image: onerahmet/openai-whisper-asr-webservice:latest
volumes:
- whisper-models:/models
- audio-data:/audio
environment:
- ASR_MODEL=base
- ASR_MODEL=${WHISPER_MODEL_TYPE:-base}
- ASR_ENGINE=faster_whisper
- WHISPER_BEAM_SIZE=5
- COMPUTE_TYPE=float32
@@ -27,7 +37,7 @@ services:
cpus: '4.0'
memory: 2G
healthcheck:
test: [ "CMD", "curl", "-f", "http://localhost:9000/asr/health" ]
test: [ "CMD", "curl", "-f", "http://localhost:9000/health" ]
interval: 30s
timeout: 10s
retries: 3
@@ -40,10 +50,23 @@ services:
volumes:
- /run/user/1000/pulse/native:/run/user/1000/pulse/native
environment:
- PULSE_SERVER=unix:/run/user/1000/pulse/native
- PULSE_SERVER=${PULSE_SERVER:-unix:/run/user/1000/pulse/native}
- PULSE_COOKIE=/run/user/1000/pulse/cookie
- PYTHONUNBUFFERED=1
- OPENWAKEWORD_MODEL=hey_jarvis
- OPENWAKEWORD_THRESHOLD=0.5
- MICROPHONE_COMMAND=arecord -D hw:0,0 -f S16_LE -c 1 -r 16000 -t raw
group_add:
- audio
- "${AUDIO_GID:-29}"
network_mode: host
privileged: true
entrypoint: >
/bin/bash -c " apt-get update && apt-get install -y pulseaudio alsa-utils && rm -rf /var/lib/apt/lists/* && /run.sh"
healthcheck:
test: [ "CMD-SHELL", "pactl info > /dev/null 2>&1 || exit 1" ]
interval: 30s
timeout: 10s
retries: 3
volumes:
whisper-models: