# Maix Duino Voice Assistant Client # Path: maix_voice_client.py (upload to Maix Duino SD card) # # Purpose and usage: # This script runs on the Maix Duino board and handles: # - Wake word detection using KPU # - Audio capture from I2S microphone # - Streaming audio to voice processing server # - Playing back TTS responses # - LED feedback for user interaction # # Requirements: # - MaixPy firmware (latest version) # - I2S microphone connected # - Speaker or audio output connected # - WiFi configured (see config below) # # Upload to board: # 1. Copy this file to SD card as boot.py or main.py # 2. Update WiFi credentials below # 3. Update server URL to your Heimdall IP # 4. Power cycle the board import time import audio import image from Maix import GPIO from fpioa_manager import fm from machine import I2S import KPU as kpu import sensor import lcd import gc # ----- Configuration ----- # Load credentials from secrets.py (copy secrets.py.example → secrets.py, gitignored) try: from secrets import SECRETS except ImportError: SECRETS = {} WIFI_SSID = SECRETS.get("wifi_ssid", "YourSSID") WIFI_PASSWORD = SECRETS.get("wifi_password", "") # Server Settings VOICE_SERVER_URL = SECRETS.get("voice_server_url", "http://10.1.10.71:5000") PROCESS_ENDPOINT = "/process" # Audio Settings SAMPLE_RATE = 16000 # 16kHz for Whisper CHANNELS = 1 # Mono SAMPLE_WIDTH = 2 # 16-bit CHUNK_SIZE = 1024 # Wake Word Settings WAKE_WORD_THRESHOLD = 0.7 # Confidence threshold (0.0-1.0) WAKE_WORD_MODEL = "/sd/models/wake_word.kmodel" # Path to wake word model # LED Pin for feedback LED_PIN = 13 # Onboard LED (adjust if needed) # Recording Settings MAX_RECORD_TIME = 10 # Maximum seconds to record after wake word SILENCE_THRESHOLD = 500 # Amplitude threshold for silence detection SILENCE_DURATION = 2 # Seconds of silence before stopping recording # ----- Color definitions for LCD ----- COLOR_RED = (255, 0, 0) COLOR_GREEN = (0, 255, 0) COLOR_BLUE = (0, 0, 255) COLOR_YELLOW = (255, 255, 0) COLOR_BLACK = (0, 0, 0) COLOR_WHITE = (255, 255, 255) # ----- Global Variables ----- led = None i2s_dev = None kpu_task = None listening = False def init_hardware(): """Initialize hardware components""" global led, i2s_dev # Initialize LED fm.register(LED_PIN, fm.fpioa.GPIO0) led = GPIO(GPIO.GPIO0, GPIO.OUT) led.value(0) # Turn off initially # Initialize LCD lcd.init() lcd.clear(COLOR_BLACK) lcd.draw_string(lcd.width()//2 - 50, lcd.height()//2, "Initializing...", lcd.WHITE, lcd.BLACK) # Initialize I2S for audio (microphone) # Note: Pin configuration may vary based on your specific hardware fm.register(20, fm.fpioa.I2S0_IN_D0) fm.register(19, fm.fpioa.I2S0_WS) fm.register(18, fm.fpioa.I2S0_SCLK) i2s_dev = I2S(I2S.DEVICE_0) i2s_dev.channel_config(I2S.CHANNEL_0, I2S.RECEIVER, align_mode=I2S.STANDARD_MODE, data_width=I2S.RESOLUTION_16_BIT) i2s_dev.set_sample_rate(SAMPLE_RATE) print("Hardware initialized") def init_network(): """Initialize WiFi connection""" import network lcd.clear(COLOR_BLACK) lcd.draw_string(10, 50, "Connecting to WiFi...", COLOR_WHITE, COLOR_BLACK) wlan = network.WLAN(network.STA_IF) wlan.active(True) if not wlan.isconnected(): print(f"Connecting to {WIFI_SSID}...") wlan.connect(WIFI_SSID, WIFI_PASSWORD) # Wait for connection timeout = 20 while not wlan.isconnected() and timeout > 0: time.sleep(1) timeout -= 1 print(f"Waiting for connection... {timeout}s") if not wlan.isconnected(): print("Failed to connect to WiFi") lcd.clear(COLOR_BLACK) lcd.draw_string(10, 50, "WiFi Failed!", COLOR_RED, COLOR_BLACK) return False print("Network connected:", wlan.ifconfig()) lcd.clear(COLOR_BLACK) lcd.draw_string(10, 50, "WiFi Connected", COLOR_GREEN, COLOR_BLACK) lcd.draw_string(10, 70, f"IP: {wlan.ifconfig()[0]}", COLOR_WHITE, COLOR_BLACK) time.sleep(2) return True def load_wake_word_model(): """Load wake word detection model""" global kpu_task try: # This is a placeholder - you'll need to train and convert a wake word model # For now, we'll skip KPU wake word and use a simpler approach print("Wake word model loading skipped (implement after model training)") return True except Exception as e: print(f"Failed to load wake word model: {e}") return False def detect_wake_word(): """ Detect wake word in audio stream Returns: True if wake word detected, False otherwise Note: This is a simplified version. For production, you should: 1. Train a wake word model using Mycroft Precise or similar 2. Convert the model to .kmodel format for K210 3. Load and run inference using KPU For now, we'll use a simple amplitude-based trigger """ # Simple amplitude-based detection (placeholder) # Replace with actual KPU inference audio_data = i2s_dev.record(CHUNK_SIZE) if audio_data: # Calculate amplitude amplitude = 0 for i in range(0, len(audio_data), 2): sample = int.from_bytes(audio_data[i:i+2], 'little', True) amplitude += abs(sample) amplitude = amplitude / (len(audio_data) // 2) # Simple threshold detection (replace with KPU inference) if amplitude > 3000: # Adjust threshold based on your microphone return True return False def record_audio(max_duration=MAX_RECORD_TIME): """ Record audio until silence or max duration Returns: bytes: Recorded audio data in WAV format """ print(f"Recording audio (max {max_duration}s)...") audio_buffer = bytearray() start_time = time.time() silence_start = None # Record in chunks while True: elapsed = time.time() - start_time # Check max duration if elapsed > max_duration: print("Max recording duration reached") break # Record chunk chunk = i2s_dev.record(CHUNK_SIZE) if chunk: audio_buffer.extend(chunk) # Calculate amplitude for silence detection amplitude = 0 for i in range(0, len(chunk), 2): sample = int.from_bytes(chunk[i:i+2], 'little', True) amplitude += abs(sample) amplitude = amplitude / (len(chunk) // 2) # Silence detection if amplitude < SILENCE_THRESHOLD: if silence_start is None: silence_start = time.time() elif time.time() - silence_start > SILENCE_DURATION: print("Silence detected, stopping recording") break else: silence_start = None # Update LCD with recording time if int(elapsed) % 1 == 0: lcd.clear(COLOR_BLACK) lcd.draw_string(10, 50, f"Recording... {int(elapsed)}s", COLOR_RED, COLOR_BLACK) print(f"Recorded {len(audio_buffer)} bytes") # Convert to WAV format return create_wav(audio_buffer) def create_wav(audio_data): """Create WAV file header and combine with audio data""" import struct # WAV header sample_rate = SAMPLE_RATE channels = CHANNELS sample_width = SAMPLE_WIDTH data_size = len(audio_data) # RIFF header wav = bytearray(b'RIFF') wav.extend(struct.pack(' lcd.width() - 20: # Rough character width if current_line: lines.append(current_line.strip()) current_line = word + " " else: current_line = test_line if current_line: lines.append(current_line.strip()) # Display lines y = 30 for line in lines[:5]: # Max 5 lines lcd.draw_string(10, y, line, COLOR_GREEN, COLOR_BLACK) y += 20 def set_led(state): """Control LED state""" if led: led.value(1 if state else 0) def main_loop(): """Main voice assistant loop""" global listening # Show ready status lcd.clear(COLOR_BLACK) lcd.draw_string(10, lcd.height()//2 - 10, "Say wake word...", COLOR_BLUE, COLOR_BLACK) print("Voice assistant ready. Listening for wake word...") while True: try: # Listen for wake word if detect_wake_word(): print("Wake word detected!") # Visual feedback set_led(True) lcd.clear(COLOR_BLACK) lcd.draw_string(10, 50, "Listening...", COLOR_RED, COLOR_BLACK) # Small delay to skip the wake word itself time.sleep(0.5) # Record command audio_data = record_audio() # Send to server response = send_audio_to_server(audio_data) if response and response.get('success'): transcription = response.get('transcription', '') response_text = response.get('response', 'No response') print(f"You said: {transcription}") print(f"Response: {response_text}") # Display response display_response(response_text) # TODO: Play TTS audio response else: lcd.clear(COLOR_BLACK) lcd.draw_string(10, 50, "Error processing", COLOR_RED, COLOR_BLACK) # Turn off LED set_led(False) # Pause before listening again time.sleep(2) # Reset display lcd.clear(COLOR_BLACK) lcd.draw_string(10, lcd.height()//2 - 10, "Say wake word...", COLOR_BLUE, COLOR_BLACK) # Small delay to prevent tight loop time.sleep(0.1) # Garbage collection if gc.mem_free() < 100000: # If free memory < 100KB gc.collect() except KeyboardInterrupt: print("Exiting...") break except Exception as e: print(f"Error in main loop: {e}") time.sleep(1) def main(): """Main entry point""" print("=" * 40) print("Maix Duino Voice Assistant") print("=" * 40) # Initialize hardware init_hardware() # Connect to network if not init_network(): print("Failed to initialize network. Exiting.") return # Load wake word model (optional) load_wake_word_model() # Start main loop try: main_loop() except Exception as e: print(f"Fatal error: {e}") finally: # Cleanup set_led(False) lcd.clear(COLOR_BLACK) lcd.draw_string(10, lcd.height()//2, "Stopped", COLOR_RED, COLOR_BLACK) # Run main program if __name__ == "__main__": main()