feat(speech): enhance speech processing with advanced audio setup and detection

- Add audio setup script for PulseAudio configuration
- Improve wake word detection with advanced noise filtering
- Implement continuous transcription and command processing
- Update speech Dockerfile with additional audio dependencies
- Enhance logging and error handling in wake word detector
This commit is contained in:
jango-blockchained
2025-02-04 22:51:06 +01:00
parent e1db799b1d
commit b9727981cc
4 changed files with 374 additions and 103 deletions

View File

@@ -13,9 +13,10 @@ RUN python -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"
# Install Python dependencies with specific versions and CPU-only variants
RUN pip install --no-cache-dir numpy==1.24.3
RUN pip install --no-cache-dir torch==2.1.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cpu
RUN pip install --no-cache-dir faster-whisper==0.10.0 openwakeword==0.4.0 pyaudio==0.2.14 sounddevice==0.4.6
RUN pip install --no-cache-dir "numpy>=1.24.3,<2.0.0" && \
pip install --no-cache-dir torch==2.1.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cpu && \
pip install --no-cache-dir faster-whisper==0.10.0 openwakeword==0.4.0 pyaudio==0.2.14 sounddevice==0.4.6 requests==2.31.0 && \
pip freeze > /opt/venv/requirements.txt
# Create final image
FROM python:3.10-slim
@@ -24,10 +25,14 @@ FROM python:3.10-slim
COPY --from=builder /opt/venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"
# Install only runtime dependencies
# Install audio dependencies
RUN apt-get update && apt-get install -y \
portaudio19-dev \
python3-pyaudio \
alsa-utils \
libasound2 \
libasound2-plugins \
pulseaudio \
&& rm -rf /var/lib/apt/lists/*
# Create necessary directories
@@ -55,5 +60,9 @@ ENV PYTHONMALLOC=malloc \
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD ps aux | grep '[p]ython' || exit 1
# Run the wake word detection service with resource constraints
CMD ["python", "-X", "faulthandler", "wake_word_detector.py"]
# Copy audio setup script
COPY setup-audio.sh /setup-audio.sh
RUN chmod +x /setup-audio.sh
# Start command
CMD ["/bin/bash", "-c", "/setup-audio.sh && python -u wake_word_detector.py"]