From 60f18f8e710a2277bef64cf4f7590df08a6882d0 Mon Sep 17 00:00:00 2001 From: jango-blockchained Date: Tue, 4 Feb 2025 19:08:01 +0100 Subject: [PATCH] feat(speech): add speech-to-text and wake word detection modules - Implement SpeechToText class with Docker-based transcription capabilities - Add wake word detection using OpenWakeWord and fast-whisper models - Create Dockerfile for speech processing container - Develop comprehensive test suite for speech recognition functionality - Include audio processing and event-driven transcription features --- README.md | 477 +++++++++++----------- docker/speech/Dockerfile | 39 ++ docker/speech/wake_word_detector.py | 104 +++++ src/speech/__tests__/speechToText.test.ts | 114 ++++++ src/speech/speechToText.ts | 161 ++++++++ 5 files changed, 649 insertions(+), 246 deletions(-) create mode 100644 docker/speech/Dockerfile create mode 100644 docker/speech/wake_word_detector.py create mode 100644 src/speech/__tests__/speechToText.test.ts create mode 100644 src/speech/speechToText.ts diff --git a/README.md b/README.md index 36219ee..f3c9009 100644 --- a/README.md +++ b/README.md @@ -1,303 +1,288 @@ -# ๐Ÿš€ Model Context Protocol (MCP) Server for Home Assistant +# ๐Ÿš€ MCP Server for Home Assistant - Bringing AI-Powered Smart Homes to Life! -The **Model Context Protocol (MCP) Server** is a robust, secure, and high-performance bridge that integrates Home Assistant with Language Learning Models (LLMs), enabling natural language control and real-time monitoring of your smart home devices. Unlock advanced automation, control, and analytics for your Home Assistant ecosystem. - -![License](https://img.shields.io/badge/license-MIT-blue.svg) -![Bun](https://img.shields.io/badge/bun-%3E%3D1.0.26-black) -![TypeScript](https://img.shields.io/badge/typescript-%5E5.0.0-blue.svg) -![Test Coverage](https://img.shields.io/badge/coverage-95%25-brightgreen.svg) -[![Documentation](https://img.shields.io/badge/docs-github.io-blue.svg)](https://jango-blockchained.github.io/homeassistant-mcp/) -![Docker](https://img.shields.io/badge/docker-%3E%3D20.10.8-blue) - -## ๐ŸŒŸ Key Benefits - -### ๐ŸŽฎ Device Control & Monitoring -- **Voice-like Control:** "Dim living room lights to 50%" ๐ŸŒ‡ -- **Real-time Updates:** WebSocket/SSE with <100ms latency โšก -- **Cross-Device Automation:** Create scene-based rules ๐ŸŽญ - -### ๐Ÿค– AI-Powered Features -- Natural language processing for commands -- Predictive automation suggestions -- Anomaly detection in device behavior - -## ๐Ÿ— Architecture Overview - -```mermaid -graph TD - A[User Interface] --> B{MCP Server} - B --> C[Home Assistant] - B --> D[LLM Integration] - B --> E[Cache Layer] - E --> F[Redis] - B --> G[Security Middleware] - C --> H[Smart Devices] -``` - -## ๐Ÿ›  Installation - -### ๐Ÿณ Docker Setup (Recommended) - -```bash -# 1. Clone repo with caching -git clone --depth 1 https://github.com/jango-blockchained/homeassistant-mcp.git - -# 2. Configure environment -cp .env.example .env # Edit with your HA details ๐Ÿ”ง - -# 3. Start with compose -docker compose up -d --build # Auto-scaling enabled ๐Ÿ“ˆ - -# View real-time logs ๐Ÿ“œ -docker compose logs -f --tail=50 -``` - -### ๐Ÿ“ฆ Bare Metal Installation - -```bash -# Install Bun (if missing) -curl -fsSL https://bun.sh/install | bash # ๐Ÿ‡ Fast runtime - -# Install dependencies with cache -bun install --frozen-lockfile # โ™ป๏ธ Reliable dep tree - -# Start in dev mode with hot-reload ๐Ÿ”ฅ -bun run dev --watch -``` | - -## ๐Ÿ’ก Example Usage - -```javascript -// Real-time device monitoring ๐ŸŒ -const ws = new WebSocket('wss://mcp.yourha.com/ws'); - -ws.onmessage = ({ data }) => { - const update = JSON.parse(data); - if(update.entity_id === 'light.kitchen') { - smartBulb(update.state); // ๐ŸŽ›๏ธ Update UI - } -}; -``` - -## ๐Ÿ”„ Update Strategy - -```bash -# Zero-downtime updates ๐Ÿ•’ -docker compose pull -docker compose up -d --build -docker system prune # Clean old images ๐Ÿงน -``` - -## ๐Ÿ›ก Security Features - -- JWT authentication with refresh tokens ๐Ÿ”‘ -- Automatic request sanitization ๐Ÿงผ -- IP-based rate limiting with fail2ban integration ๐Ÿšซ -- End-to-end encryption support ๐Ÿ”’ - -## ๐ŸŒ Community & Support - -| Platform | Link | Response Time | -|----------------|-------------------------------|---------------| -| ๐Ÿ“š Docs | [API Reference](docs/api.md) | Instant | -| ๐Ÿ› GitHub | [Issues](#) | <24hr | - -## ๐Ÿšง Troubleshooting Guide - -```bash -# Check service health ๐Ÿฉบ -docker compose ps - -# Test API endpoints ๐Ÿ”Œ -curl -I http://localhost:3000/healthcheck # Should return 200 โœ… - -# Inspect cache status ๐Ÿ’พ -docker exec mcp_redis redis-cli info memory -``` - -## ๐Ÿ”ฎ Roadmap Highlights - -- [ ] **AI Assistant Integration** (Q4 2024) ๐Ÿค– -- [ ] **Predictive Automation** (Q1 2025) ๐Ÿ”ฎ -- [x] **Real-time Analytics** (Shipped! ๐Ÿš€) -- [ ] **Energy Optimization** (Q3 2024) ๐ŸŒฑ - -## ๐Ÿค Contributing - -I love community input! Here's how to help: - -1. ๐Ÿด Fork the repository -2. ๐ŸŒฟ Create a feature branch -3. ๐Ÿ’ป Make your changes -4. ๐Ÿงช Run tests: `bun test --coverage` -5. ๐Ÿ“ฆ Commit using [Conventional Commits](https://www.conventionalcommits.org) -6. ๐Ÿ”€ Open a Pull Request +[![License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE) +[![Bun](https://img.shields.io/badge/bun-%3E%3D1.0.26-black)](https://bun.sh) +[![TypeScript](https://img.shields.io/badge/typescript-%5E5.0.0-blue.svg)](https://www.typescriptlang.org) +[![Test Coverage](https://img.shields.io/badge/coverage-95%25-brightgreen.svg)](#) +[![Documentation](https://img.shields.io/badge/docs-github.io-blue.svg)](https://jango-blockchained.github.io/homeassistant-mcp/) +[![Docker](https://img.shields.io/badge/docker-%3E%3D20.10.8-blue)](https://www.docker.com) --- -**๐Ÿ“ข Note:** This project adheres to [Semantic Versioning](https://semver.org). Always check breaking changes in release notes before upgrading! โš ๏ธ +## Overview ๐ŸŒ -## Table of Contents +Welcome to the **Model Context Protocol (MCP) Server for Home Assistant**! This robust platform bridges Home Assistant with cutting-edge Language Learning Models (LLMs), enabling natural language interactions and real-time automation of your smart devices. Imagine entering your home, saying: -- [Overview](#overview) -- [Key Features](#key-features) -- [Architecture & Design](#architecture--design) -- [Installation](#installation) - - [Basic Setup](#basic-setup) - - [Docker Setup (Recommended)](#docker-setup-recommended) -- [Usage](#usage) -- [API & Documentation](#api--documentation) -- [Development](#development) -- [Roadmap & Future Plans](#roadmap--future-plans) -- [Community & Support](#community--support) -- [Contributing](#contributing) -- [Troubleshooting & FAQ](#troubleshooting--faq) -- [License](#license) +> โ€œHey MCP, dim the lights and start my evening playlist,โ€ -## Overview +and watching your home transform instantlyโ€”that's the magic that MCP Server delivers! -The MCP Server bridges Home Assistant with advanced LLM integrations to deliver intuitive control, automation, and state monitoring. Leveraging a high-performance runtime and real-time communication protocols, MCP offers a seamless experience for managing your smart home. +--- -## Key Features +## Key Benefits โœจ -### Device Control & Monitoring -- **Smart Device Control:** Manage lights, climate, covers, switches, sensors, media players, fans, locks, vacuums, and cameras using natural language commands. -- **Real-time Updates:** Receive instant notifications and updates via Server-Sent Events (SSE). +### ๐ŸŽฎ Device Control & Monitoring +- **Voice-Controlled Automation:** + Use simple commands like "Turn on the kitchen lights" or "Set the thermostat to 22ยฐC" without touching a switch. + **Real-World Example:** + In the morning, say "Good morning! Open the blinds and start the coffee machine" to kickstart your day automatically. -### System & Automation Management -- **Automation Engine:** Create, modify, and trigger custom automation rules with ease. -- **Add-on & Package Management:** Integrates with HACS for deploying custom integrations, themes, scripts, and applications. -- **Robust System Management:** Features advanced state monitoring, error handling, and security safeguards. +- **Real-Time Communication:** + Experience sub-100ms latency updates via Server-Sent Events (SSE) or WebSocket connections, ensuring your dashboard is always current. + **Real-World Example:** + Monitor energy usage instantly during peak hours and adjust remotely for efficient consumption. -## Architecture & Design +- **Seamless Automation:** + Create scene-based rules to synchronize multiple devices effortlessly. + **Real-World Example:** + For movie nights, have MCP dim the lights, adjust the sound system, and launch your favorite streaming app with just one command. -The MCP Server is built with scalability, resilience, and security in mind: +### ๐Ÿค– AI-Powered Enhancements +- **Natural Language Processing (NLP):** + Convert everyday speech into actionable commandsโ€”just say, "Prepare the house for dinner," and MCP will adjust lighting, temperature, and even play soft background music. -- **High-Performance Runtime:** Powered by Bun for fast startup, efficient memory utilization, and native TypeScript support. -- **Real-time Communication:** Employs Server-Sent Events (SSE) for continuous, real-time data updates. -- **Modular & Extensible:** Designed to support plugins, add-ons, and custom automation scripts, allowing for easy expansion. -- **Secure API Integration:** Implements token-based authentication, rate limiting, and adherence to best security practices. +- **Predictive Automation & Suggestions:** + Receive proactive recommendations based on usage habits and environmental trends. + **Real-World Example:** + When home temperature fluctuates unexpectedly, MCP suggests an optimal setting and notifies you immediately. -For a deeper dive into the system architecture, please refer to our [Architecture Documentation](docs/architecture.md). +- **Anomaly Detection:** + Continuously monitor device activity and alert you to unusual behavior, helping prevent malfunctions or potential security breaches. -## Usage +--- -Once the server is running, open your browser at [http://localhost:3000](http://localhost:3000). For real-time device updates, integrate the SSE endpoint in your application: +## Architectural Overview ๐Ÿ— + +Our architecture is engineered for performance, scalability, and security. The following Mermaid diagram illustrates the data flow and component interactions: + +```mermaid +graph TD + subgraph Client + A[Client Application
(Web / Mobile / Voice)] + end + subgraph CDN + B[CDN / Cache] + end + subgraph Server + C[Bun Native Server] + E[NLP Engine
& Language Processing Module] + end + subgraph Integration + D[Home Assistant
(Devices, Lights, Thermostats)] + end + + A -->|HTTP Request| B + B -- Cache Miss --> C + C -->|Interpret Command| E + E -->|Determine Action| D + D -->|Return State/Action| C + C -->|Response| B + B -->|Cached/Processed Response| A +``` + +Learn more about our architecture in the [Architecture Documentation](docs/architecture.md). + +--- + +## Technical Stack ๐Ÿ”ง + +Our solution is built on a modern, high-performance stack that powers every feature: + +- **Bun:** + A next-generation JavaScript runtime offering rapid startup times, native TypeScript support, and high performance. + ๐Ÿ‘‰ [Learn about Bun](https://bun.sh) + +- **Bun Native Server:** + Utilizes Bun's built-in HTTP server to efficiently process API requests with sub-100ms response times. + ๐Ÿ‘‰ See the [Installation Guide](docs/getting-started/installation.md) for details. + +- **Natural Language Processing (NLP) & LLM Integration:** + Processes and interprets natural language commands using state-of-the-art LLMs and custom NLP modules. + ๐Ÿ‘‰ Find API usage details in the [API Documentation](docs/api.md). + +- **Home Assistant Integration:** + Provides seamless connectivity with Home Assistant, ensuring flawless communication with your smart devices. + ๐Ÿ‘‰ Refer to the [Usage Guide](docs/usage.md) for more information. + +- **Redis Cache:** + Enables rapid data retrieval and session persistence essential for real-time updates. + +- **TypeScript:** + Enhances type safety and developer productivity across the entire codebase. + +- **JWT & Security Middleware:** + Protects your ecosystem with JWT-based authentication, request sanitization, rate-limiting, and encryption. + +- **Containerization with Docker:** + Enables scalable, isolated deployments for production environments. + +For further technical details, check out our [Documentation Index](docs/index.md). + +--- + +## Installation ๐Ÿ›  + +### ๐Ÿณ Docker Setup (Recommended) + +For a hassle-free, containerized deployment: + +```bash +# 1. Clone the repository (using a shallow copy for efficiency) +git clone --depth 1 https://github.com/jango-blockchained/homeassistant-mcp.git + +# 2. Configure your environment: copy the example file and edit it with your Home Assistant credentials +cp .env.example .env # Modify .env with your Home Assistant host, tokens, etc. + +# 3. Build and run the Docker containers +docker compose up -d --build + +# 4. View real-time logs (last 50 log entries) +docker compose logs -f --tail=50 +``` + +๐Ÿ‘‰ Refer to our [Installation Guide](docs/getting-started/installation.md) for full details. + +### ๐Ÿ’ป Bare Metal Installation + +For direct deployment on your host machine: + +```bash +# 1. Install Bun (if not already installed) +curl -fsSL https://bun.sh/install | bash + +# 2. Install project dependencies with caching support +bun install --frozen-lockfile + +# 3. Launch the server in development mode with hot-reload enabled +bun run dev --watch +``` + +--- + +## Real-World Usage Examples ๐Ÿ” + +### ๐Ÿ“ฑ Smart Home Dashboard Integration +Integrate MCP's real-time updates into your custom dashboard for a dynamic smart home experience: ```javascript const eventSource = new EventSource('http://localhost:3000/subscribe_events?token=YOUR_TOKEN&domain=light'); eventSource.onmessage = (event) => { - const data = JSON.parse(event.data); - console.log('Update received:', data); + const data = JSON.parse(event.data); + console.log('Real-time update:', data); + // Update your UI dashboard, e.g., refresh a light intensity indicator. }; ``` -## API & Documentation +### ๐Ÿ  Voice-Activated Control +Utilize voice commands to trigger actions with minimal effort: -Access comprehensive API details and guides in the docs directory: +```javascript +// Establish a WebSocket connection for real-time command processing +const ws = new WebSocket('wss://mcp.yourha.com/ws'); -- **API Reference:** [API Documentation](docs/api.md) -- **SSE Documentation:** [SSE API](docs/sse-api.md) -- **Troubleshooting Guide:** [Troubleshooting](docs/troubleshooting.md) -- **Architecture Details:** [Architecture Documentation](docs/architecture.md) +ws.onmessage = ({ data }) => { + const update = JSON.parse(data); + if (update.entity_id === 'light.living_room') { + console.log('Adjusting living room lighting based on voice command...'); + // Additional logic to update your UI or trigger further actions can go here. + } +}; -## Development +// Simulate processing a voice command +function simulateVoiceCommand(command) { + console.log("Processing voice command:", command); + // Integrate with your actual voice-to-text system as needed. +} -### Running in Development Mode - -```bash -bun run dev +simulateVoiceCommand("Turn off all the lights for bedtime"); ``` -### Running Tests +๐Ÿ‘‰ Learn more in our [Usage Guide](docs/usage.md). -- Execute all tests: - ```bash - bun test - ``` +--- -- Run tests with coverage: - ```bash - bun test --coverage - ``` +## Update Strategy ๐Ÿ”„ -### Production Build & Start +Maintain a seamless operation with zero downtime updates: ```bash -bun run build -bun start +# 1. Pull the latest Docker images +docker compose pull + +# 2. Rebuild and restart containers smoothly +docker compose up -d --build + +# 3. Clean up unused Docker images to free up space +docker system prune -f ``` -## Roadmap & Future Plans +For more details, review our [Troubleshooting & Updates](docs/troubleshooting.md). -The MCP Server is under active development and improvement. Planned enhancements include: +--- -- **Advanced Automation Capabilities:** Introducing more complex automation rules and conditional logic. -- **Enhanced Security Features:** Additional authentication layers, encryption enhancements, and security monitoring tools. -- **User Interface Improvements:** Development of a more intuitive web dashboard for easier device management. -- **Expanded Integrations:** Support for a wider array of smart home devices and third-party services. -- **Performance Optimizations:** Continued efforts to reduce latency and improve resource efficiency. +## Security Features ๐Ÿ” -For additional details, check out our [Roadmap](docs/roadmap.md). +We prioritize the security of your smart home with multiple layers of defense: +- **JWT Authentication ๐Ÿ”‘:** Secure, token-based API access to prevent unauthorized usage. +- **Request Sanitization ๐Ÿงผ:** Automatic filtering and validation of API requests to combat injection attacks. +- **Rate Limiting & Fail2Ban ๐Ÿšซ:** Monitors requests to prevent brute force and DDoS attacks. +- **End-to-End Encryption ๐Ÿ”’:** Ensures that your commands and data remain private during transmission. -## Community & Support +--- -Join the community to stay updated, share ideas, and get help: +## Contributing ๐Ÿค -- **GitHub Issues:** Report bugs or suggest features on the [GitHub Issues Page](https://github.com/jango-blockchained/homeassistant-mcp/issues). -- **Discussion Forums:** Connect with other users and contributors in the community forums. -- **Chat Platforms:** Join real-time discussions on [Discord](#) or [Slack](#). +We value community contributions! Here's how you can help improve MCP Server: +1. **Fork the Repository ๐Ÿด** + Create your own copy of the project. +2. **Create a Feature Branch ๐ŸŒฟ** + ```bash + git checkout -b feature/your-feature-name + ``` +3. **Install Dependencies & Run Tests ๐Ÿงช** + ```bash + bun install + bun test --coverage + ``` +4. **Make Your Changes & Commit ๐Ÿ“** + Follow the [Conventional Commits](https://www.conventionalcommits.org) guidelines. +5. **Open a Pull Request ๐Ÿ”€** + Submit your changes for review. -## Contributing +Read more in our [Contribution Guidelines](docs/contributing.md). -I welcome your contributions! To get started: +--- -1. Fork the repository. -2. Create your feature branch: - ```bash - git checkout -b feature/your-feature-name - ``` -3. Install dependencies: - ```bash - bun install - ``` -4. Make your changes and run tests: - ```bash - bun test - ``` -5. Commit and push your changes, then open a Pull Request. +## Roadmap & Future Enhancements ๐Ÿ”ฎ -For detailed guidelines, see [Contributing Guide](docs/contributing.md). +We're continuously evolving MCP Server. Upcoming features include: +- **AI Assistant Integration (Q4 2024):** + Smarter, context-aware voice commands and personalized automation. +- **Predictive Automation (Q1 2025):** + Enhanced scheduling capabilities powered by advanced AI. +- **Enhanced Security (Q2 2024):** + Introduction of multi-factor authentication, advanced monitoring, and rigorous encryption methods. +- **Performance Optimizations (Q3 2024):** + Reducing latency further, optimizing caching, and improving load balancing. -## Troubleshooting & FAQ +For more details, see our [Roadmap](docs/roadmap.md). -### Common Issues +--- -- **Connection Problems:** Ensure that your `HASS_HOST`, authentication token, and WebSocket URL are correctly configured. -- **Docker Deployment:** Confirm that Docker is running and that your `.env` file contains the correct settings. -- **Automation Errors:** Verify entity availability and review your automation configurations for potential issues. +## Community & Support ๐ŸŒ -For more troubleshooting details, refer to [Troubleshooting Guide](docs/troubleshooting.md). +Your feedback and collaboration are vital! Join our community: +- **GitHub Issues:** Report bugs or request features via our [Issues Page](https://github.com/jango-blockchained/homeassistant-mcp/issues). +- **Discord & Slack:** Connect with fellow users and developers in real-time. +- **Documentation:** Find comprehensive guides on the [MCP Documentation Website](https://jango-blockchained.github.io/homeassistant-mcp/). -### Frequently Asked Questions +--- -**Q: What platforms does MCP Server support?** +## License ๐Ÿ“œ -A: MCP Server runs on Linux, macOS, and Windows (Docker is recommended for Windows environments). +This project is licensed under the MIT License. See [LICENSE](LICENSE) for full details. -**Q: How do I report a bug or request a feature?** +--- -A: Please use the [GitHub Issues Page](https://github.com/jango-blockchained/homeassistant-mcp/issues) to report bugs or request new features. - -**Q: Can I contribute to the project?** - -A: Absolutely! I welcome contributions from the community. See the [Contributing](#contributing) section for more details. - -## License - -This project is licensed under the MIT License. See [LICENSE](LICENSE) for the full license text. - -## Documentation - -Full documentation is available at: [https://jango-blockchained.github.io/homeassistant-mcp/](https://jango-blockchained.github.io/homeassistant-mcp/) \ No newline at end of file +๐Ÿ”‹ Batteries included. \ No newline at end of file diff --git a/docker/speech/Dockerfile b/docker/speech/Dockerfile new file mode 100644 index 0000000..ad48b8b --- /dev/null +++ b/docker/speech/Dockerfile @@ -0,0 +1,39 @@ +FROM python:3.10-slim + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + git \ + build-essential \ + portaudio19-dev \ + python3-pyaudio \ + && rm -rf /var/lib/apt/lists/* + +# Install fast-whisper and its dependencies +RUN pip install --no-cache-dir torch torchaudio --index-url https://download.pytorch.org/whl/cpu +RUN pip install --no-cache-dir fast-whisper + +# Install wake word detection +RUN pip install --no-cache-dir openwakeword pyaudio sounddevice + +# Create directories +RUN mkdir -p /models /audio + +# Download the base model by default +RUN python -c "from faster_whisper import WhisperModel; WhisperModel.download_model('base.en', cache_dir='/models')" + +# Download OpenWakeWord models +RUN mkdir -p /models/wake_word && \ + python -c "import openwakeword; openwakeword.download_models(['hey_jarvis', 'ok_google', 'alexa'], '/models/wake_word')" + +WORKDIR /app + +# Copy the wake word detection script +COPY wake_word_detector.py . + +# Set environment variables +ENV WHISPER_MODEL_PATH=/models +ENV WAKEWORD_MODEL_PATH=/models/wake_word +ENV PYTHONUNBUFFERED=1 + +# Run the wake word detection service +CMD ["python", "wake_word_detector.py"] \ No newline at end of file diff --git a/docker/speech/wake_word_detector.py b/docker/speech/wake_word_detector.py new file mode 100644 index 0000000..3857ebd --- /dev/null +++ b/docker/speech/wake_word_detector.py @@ -0,0 +1,104 @@ +import os +import json +import queue +import threading +import numpy as np +import sounddevice as sd +from openwakeword import Model +from datetime import datetime +import wave + +# Configuration +SAMPLE_RATE = 16000 +CHANNELS = 1 +CHUNK_SIZE = 1024 +BUFFER_DURATION = 30 # seconds to keep in buffer +DETECTION_THRESHOLD = 0.5 + +class AudioProcessor: + def __init__(self): + self.wake_word_model = Model( + wakeword_models=["hey_jarvis", "ok_google", "alexa"], + model_path=os.environ.get('WAKEWORD_MODEL_PATH', '/models/wake_word') + ) + self.audio_buffer = queue.Queue() + self.recording = False + self.buffer = np.zeros(SAMPLE_RATE * BUFFER_DURATION) + self.buffer_lock = threading.Lock() + + def audio_callback(self, indata, frames, time, status): + """Callback for audio input""" + if status: + print(f"Audio callback status: {status}") + + # Convert to mono if necessary + if CHANNELS > 1: + audio_data = np.mean(indata, axis=1) + else: + audio_data = indata.flatten() + + # Update circular buffer + with self.buffer_lock: + self.buffer = np.roll(self.buffer, -len(audio_data)) + self.buffer[-len(audio_data):] = audio_data + + # Process for wake word detection + prediction = self.wake_word_model.predict(audio_data) + + # Check if wake word detected + for wake_word, score in prediction.items(): + if score > DETECTION_THRESHOLD: + print(f"Wake word detected: {wake_word} (confidence: {score:.2f})") + self.save_audio_segment() + break + + def save_audio_segment(self): + """Save the audio buffer when wake word is detected""" + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + filename = f"/audio/wake_word_{timestamp}.wav" + + # Save the audio buffer to a WAV file + with wave.open(filename, 'wb') as wf: + wf.setnchannels(CHANNELS) + wf.setsampwidth(2) # 16-bit audio + wf.setframerate(SAMPLE_RATE) + + # Convert float32 to int16 + audio_data = (self.buffer * 32767).astype(np.int16) + wf.writeframes(audio_data.tobytes()) + + print(f"Saved audio segment to {filename}") + + # Write metadata + metadata = { + "timestamp": timestamp, + "sample_rate": SAMPLE_RATE, + "channels": CHANNELS, + "duration": BUFFER_DURATION + } + + with open(f"{filename}.json", 'w') as f: + json.dump(metadata, f, indent=2) + + def start(self): + """Start audio processing""" + try: + with sd.InputStream( + channels=CHANNELS, + samplerate=SAMPLE_RATE, + blocksize=CHUNK_SIZE, + callback=self.audio_callback + ): + print("Wake word detection started. Listening...") + while True: + sd.sleep(1000) # Sleep for 1 second + + except KeyboardInterrupt: + print("\nStopping wake word detection...") + except Exception as e: + print(f"Error in audio processing: {e}") + +if __name__ == "__main__": + print("Initializing wake word detection...") + processor = AudioProcessor() + processor.start() \ No newline at end of file diff --git a/src/speech/__tests__/speechToText.test.ts b/src/speech/__tests__/speechToText.test.ts new file mode 100644 index 0000000..5e7268d --- /dev/null +++ b/src/speech/__tests__/speechToText.test.ts @@ -0,0 +1,114 @@ +import { SpeechToText, WakeWordEvent } from '../speechToText'; +import fs from 'fs'; +import path from 'path'; + +describe('SpeechToText', () => { + let speechToText: SpeechToText; + const testAudioDir = path.join(__dirname, 'test_audio'); + + beforeEach(() => { + speechToText = new SpeechToText('fast-whisper'); + // Create test audio directory if it doesn't exist + if (!fs.existsSync(testAudioDir)) { + fs.mkdirSync(testAudioDir, { recursive: true }); + } + }); + + afterEach(() => { + speechToText.stopWakeWordDetection(); + // Clean up test files + if (fs.existsSync(testAudioDir)) { + fs.rmSync(testAudioDir, { recursive: true, force: true }); + } + }); + + describe('checkHealth', () => { + it('should return true when the container is running', async () => { + const isHealthy = await speechToText.checkHealth(); + expect(isHealthy).toBeDefined(); + }); + }); + + describe('wake word detection', () => { + it('should detect new audio files and emit wake word events', (done) => { + const testFile = path.join(testAudioDir, 'wake_word_20240203_123456.wav'); + const testMetadata = `${testFile}.json`; + + speechToText.startWakeWordDetection(testAudioDir); + + speechToText.on('wake_word', (event: WakeWordEvent) => { + expect(event).toBeDefined(); + expect(event.audioFile).toBe(testFile); + expect(event.metadataFile).toBe(testMetadata); + expect(event.timestamp).toBe('123456'); + done(); + }); + + // Create a test audio file to trigger the event + fs.writeFileSync(testFile, 'test audio content'); + }); + + it('should automatically transcribe detected wake word audio', (done) => { + const testFile = path.join(testAudioDir, 'wake_word_20240203_123456.wav'); + + speechToText.startWakeWordDetection(testAudioDir); + + speechToText.on('transcription', (event) => { + expect(event).toBeDefined(); + expect(event.audioFile).toBe(testFile); + expect(event.result).toBeDefined(); + done(); + }); + + // Create a test audio file to trigger the event + fs.writeFileSync(testFile, 'test audio content'); + }); + + it('should handle errors during wake word audio transcription', (done) => { + const testFile = path.join(testAudioDir, 'wake_word_20240203_123456.wav'); + + speechToText.startWakeWordDetection(testAudioDir); + + speechToText.on('error', (error) => { + expect(error).toBeDefined(); + expect(error.message).toContain('Transcription failed'); + done(); + }); + + // Create an invalid audio file to trigger an error + fs.writeFileSync(testFile, 'invalid audio content'); + }); + }); + + describe('transcribeAudio', () => { + it('should transcribe an audio file', async () => { + const result = await speechToText.transcribeAudio('/audio/test.wav'); + + expect(result).toBeDefined(); + expect(result.text).toBeDefined(); + expect(result.segments).toBeDefined(); + expect(Array.isArray(result.segments)).toBe(true); + }, 30000); + + it('should handle transcription errors', async () => { + await expect( + speechToText.transcribeAudio('/audio/nonexistent.wav') + ).rejects.toThrow(); + }); + + it('should emit progress events', (done) => { + const progressEvents: Array<{ type: string; data: string }> = []; + + speechToText.on('progress', (event: { type: string; data: string }) => { + progressEvents.push(event); + if (event.type === 'stderr' && event.data.includes('error')) { + expect(progressEvents.length).toBeGreaterThan(0); + done(); + } + }); + + // Trigger an error to test progress events + speechToText.transcribeAudio('/audio/nonexistent.wav').catch(() => { }); + }); + }); +}); \ No newline at end of file diff --git a/src/speech/speechToText.ts b/src/speech/speechToText.ts new file mode 100644 index 0000000..6550610 --- /dev/null +++ b/src/speech/speechToText.ts @@ -0,0 +1,161 @@ +import { spawn } from 'child_process'; +import { EventEmitter } from 'events'; +import { watch } from 'fs'; +import path from 'path'; + +export interface TranscriptionOptions { + model?: 'tiny.en' | 'base.en' | 'small.en' | 'medium.en' | 'large-v2'; + language?: string; + temperature?: number; + beamSize?: number; + patience?: number; + device?: 'cpu' | 'cuda'; +} + +export interface TranscriptionResult { + text: string; + segments: Array<{ + text: string; + start: number; + end: number; + confidence: number; + }>; +} + +export interface WakeWordEvent { + timestamp: string; + audioFile: string; + metadataFile: string; +} + +export class TranscriptionError extends Error { + constructor(message: string) { + super(message); + this.name = 'TranscriptionError'; + } +} + +export class SpeechToText extends EventEmitter { + private containerName: string; + private audioWatcher?: ReturnType; + + constructor(containerName = 'fast-whisper') { + super(); + this.containerName = containerName; + } + + startWakeWordDetection(audioDir: string = './audio'): void { + // Watch for new audio files from wake word detection + this.audioWatcher = watch(audioDir, (eventType, filename) => { + if (eventType === 'rename' && filename && filename.startsWith('wake_word_') && filename.endsWith('.wav')) { + const audioFile = path.join(audioDir, filename); + const metadataFile = `${audioFile}.json`; + + // Emit wake word event + this.emit('wake_word', { + timestamp: filename.split('_')[2].split('.')[0], + audioFile, + metadataFile + } as WakeWordEvent); + + // Automatically transcribe the wake word audio + this.transcribeAudio(audioFile) + .then(result => { + this.emit('transcription', { audioFile, result }); + }) + .catch(error => { + this.emit('error', error); + }); + } + }); + } + + stopWakeWordDetection(): void { + if (this.audioWatcher) { + this.audioWatcher.close(); + this.audioWatcher = undefined; + } + } + + async transcribeAudio( + audioFilePath: string, + options: TranscriptionOptions = {} + ): Promise { + const { + model = 'base.en', + language = 'en', + temperature = 0, + beamSize = 5, + patience = 1, + device = 'cpu' + } = options; + + return new Promise((resolve, reject) => { + // Construct Docker command to run fast-whisper + const args = [ + 'exec', + this.containerName, + 'fast-whisper', + '--model', model, + '--language', language, + '--temperature', temperature.toString(), + '--beam-size', beamSize.toString(), + '--patience', patience.toString(), + '--device', device, + '--output-json', + audioFilePath + ]; + + const process = spawn('docker', args); + let stdout = ''; + let stderr = ''; + + process.stdout.on('data', (data: Buffer) => { + stdout += data.toString(); + this.emit('progress', { type: 'stdout', data: data.toString() }); + }); + + process.stderr.on('data', (data: Buffer) => { + stderr += data.toString(); + this.emit('progress', { type: 'stderr', data: data.toString() }); + }); + + process.on('close', (code: number) => { + if (code !== 0) { + reject(new TranscriptionError(`Transcription failed: ${stderr}`)); + return; + } + + try { + const result = JSON.parse(stdout) as TranscriptionResult; + resolve(result); + } catch (error: unknown) { + if (error instanceof Error) { + reject(new TranscriptionError(`Failed to parse transcription result: ${error.message}`)); + } else { + reject(new TranscriptionError('Failed to parse transcription result: Unknown error')); + } + } + }); + }); + } + + async checkHealth(): Promise { + try { + const process = spawn('docker', ['ps', '--filter', `name=${this.containerName}`, '--format', '{{.Status}}']); + + return new Promise((resolve) => { + let output = ''; + process.stdout.on('data', (data: Buffer) => { + output += data.toString(); + }); + + process.on('close', (code: number) => { + resolve(code === 0 && output.toLowerCase().includes('up')); + }); + }); + } catch (error) { + return false; + } + } +} \ No newline at end of file