From 173f7f37d4c90335d944c5fb6946a693e2790f5a Mon Sep 17 00:00:00 2001
From: pyr0ball <pyroballpcs@gmail.com>
Date: Mon, 6 Apr 2026 22:21:12 -0700
Subject: [PATCH] feat: import mycroft-precise work as Minerva foundation

Ports prior voice assistant research and prototypes from devl/Devops
into the Minerva repo. Includes:

- docs/: architecture, wake word guides, ESP32-S3 spec, hardware buying guide
- scripts/: voice_server.py, voice_server_enhanced.py, setup scripts
- hardware/maixduino/: edge device scripts with WiFi credentials scrubbed
  (replaced hardcoded password with secrets.py pattern)
- config/.env.example: server config template
- .gitignore: excludes .env, secrets.py, model blobs, ELF firmware
- CLAUDE.md: Minerva product context and connection to cf-voice roadmap
---
 .gitignore                                    |   29 +
 CLAUDE.md                                     |  165 +++
 config/.env.example                           |   24 +
 docs/ADVANCED_WAKE_WORD_TOPICS.md             |  905 ++++++++++++++
 docs/ESP32_S3_VOICE_ASSISTANT_SPEC.md         | 1089 +++++++++++++++++
 docs/HARDWARE_BUYING_GUIDE.md                 |  542 ++++++++
 docs/K210_PERFORMANCE_VERIFICATION.md         |  223 ++++
 docs/LCD_CAMERA_FEATURES.md                   |  566 +++++++++
 docs/MYCROFT_PRECISE_GUIDE.md                 |  638 ++++++++++
 docs/PRECISE_DEPLOYMENT.md                    |  577 +++++++++
 docs/QUESTIONS_ANSWERED.md                    |  470 +++++++
 docs/QUICKSTART.md                            |  421 +++++++
 docs/WAKE_WORD_ADVANCED.md                    |  723 +++++++++++
 docs/WAKE_WORD_QUICK_REF.md                   |  411 +++++++
 docs/maix-voice-assistant-architecture.md     |  347 ++++++
 hardware/maixduino/MICROPYTHON_QUIRKS.md      |  348 ++++++
 hardware/maixduino/README.md                  |  184 +++
 .../maixduino/SESSION_PROGRESS_2025-12-03.md  |  376 ++++++
 hardware/maixduino/maix_debug_wifi.py         |   41 +
 hardware/maixduino/maix_discover_modules.py   |   51 +
 hardware/maixduino/maix_simple_record_test.py |  461 +++++++
 hardware/maixduino/maix_test_simple.py        |  252 ++++
 hardware/maixduino/maix_voice_client.py       |  465 +++++++
 hardware/maixduino/secrets.py.example         |    7 +
 scripts/download_pretrained_models.sh         |  409 +++++++
 scripts/quick_start_hey_mycroft.sh            |  456 +++++++
 scripts/setup_precise.sh                      |  630 ++++++++++
 scripts/setup_voice_assistant.sh              |  429 +++++++
 scripts/voice_server.py                       |  700 +++++++++++
 scripts/voice_server_enhanced.py              |  580 +++++++++
 30 files changed, 12519 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 CLAUDE.md
 create mode 100644 config/.env.example
 create mode 100755 docs/ADVANCED_WAKE_WORD_TOPICS.md
 create mode 100755 docs/ESP32_S3_VOICE_ASSISTANT_SPEC.md
 create mode 100755 docs/HARDWARE_BUYING_GUIDE.md
 create mode 100755 docs/K210_PERFORMANCE_VERIFICATION.md
 create mode 100755 docs/LCD_CAMERA_FEATURES.md
 create mode 100755 docs/MYCROFT_PRECISE_GUIDE.md
 create mode 100755 docs/PRECISE_DEPLOYMENT.md
 create mode 100755 docs/QUESTIONS_ANSWERED.md
 create mode 100755 docs/QUICKSTART.md
 create mode 100755 docs/WAKE_WORD_ADVANCED.md
 create mode 100755 docs/WAKE_WORD_QUICK_REF.md
 create mode 100755 docs/maix-voice-assistant-architecture.md
 create mode 100755 hardware/maixduino/MICROPYTHON_QUIRKS.md
 create mode 100755 hardware/maixduino/README.md
 create mode 100755 hardware/maixduino/SESSION_PROGRESS_2025-12-03.md
 create mode 100755 hardware/maixduino/maix_debug_wifi.py
 create mode 100755 hardware/maixduino/maix_discover_modules.py
 create mode 100644 hardware/maixduino/maix_simple_record_test.py
 create mode 100644 hardware/maixduino/maix_test_simple.py
 create mode 100755 hardware/maixduino/maix_voice_client.py
 create mode 100644 hardware/maixduino/secrets.py.example
 create mode 100755 scripts/download_pretrained_models.sh
 create mode 100755 scripts/quick_start_hey_mycroft.sh
 create mode 100755 scripts/setup_precise.sh
 create mode 100755 scripts/setup_voice_assistant.sh
 create mode 100755 scripts/voice_server.py
 create mode 100755 scripts/voice_server_enhanced.py

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..d27ccb5
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,29 @@
+# Credentials
+secrets.py
+config/.env
+*.env
+!*.env.example
+
+# Models (large binary files)
+models/*.pb
+models/*.pb.params
+models/*.net
+models/*.tflite
+models/*.kmodel
+
+# OEM firmware blobs
+*.elf
+*.7z
+*.bin
+
+# Python
+__pycache__/
+*.pyc
+*.pyo
+
+# Logs
+logs/
+
+# IDE
+.vscode/
+.idea/
diff --git a/CLAUDE.md b/CLAUDE.md
new file mode 100644
index 0000000..6a34638
--- /dev/null
+++ b/CLAUDE.md
@@ -0,0 +1,165 @@
+# Minerva — Developer Context
+
+**Product code:** `MNRV`
+**Status:** Concept / early prototype
+**Domain:** Privacy-first, local-only voice assistant hardware platform
+
+---
+
+## What Minerva Is
+
+A 100% local, FOSS voice assistant hardware platform. No cloud. No subscriptions. No data leaving the local network.
+
+The goal is a reference hardware + software stack for a privacy-first voice assistant that anyone can build, extend, or self-host — including people without technical backgrounds if the assembly docs are good enough.
+
+Core design principles (same as all CF products):
+- **Local-first inference** — Whisper STT, Piper TTS, Mycroft Precise wake word all run on the host server
+- **Edge where possible** — wake word detection moves to edge hardware over time (K210 → ESP32-S3 → custom)
+- **No cloud dependency** — Home Assistant optional, not required
+- **100% FOSS stack**
+
+---
+
+## Hardware Targets
+
+### Phase 1 (current): Maix Duino (K210)
+- K210 dual-core RISC-V @ 400MHz with KPU neural accelerator
+- Audio: I2S microphone + speaker output
+- Connectivity: ESP32 WiFi/BLE co-processor
+- Programming: MaixPy (MicroPython)
+- Status: server-side wake word working; edge inference in progress
+
+### Phase 2: ESP32-S3
+- More accessible, cheaper, better WiFi
+- On-device wake word with Espressif ESP-SR
+- See `docs/ESP32_S3_VOICE_ASSISTANT_SPEC.md`
+
+### Phase 3: Custom hardware
+- Dedicated PCB for CF reference platform
+- Hardware-accelerated wake word + VAD
+- Designed for accessibility: large buttons, LED feedback, easy mounting
+
+---
+
+## Software Stack
+
+### Edge device (Maix Duino / ESP32-S3)
+- Firmware: MaixPy or ESP-IDF
+- Client: `hardware/maixduino/maix_voice_client.py`
+- Audio: I2S capture and playback
+- Network: WiFi → Minerva server
+
+### Server (runs on Heimdall or any Linux box)
+- Voice server: `scripts/voice_server.py` (Flask + Whisper + Precise)
+- Enhanced version: `scripts/voice_server_enhanced.py` (adds speaker ID via pyannote)
+- STT: Whisper (local)
+- Wake word: Mycroft Precise
+- TTS: Piper
+- Home Assistant: REST API integration (optional)
+- Conda env: `whisper_cli` (existing on Heimdall)
+
+---
+
+## Directory Structure
+
+```
+minerva/
+├── docs/                        # Architecture, guides, reference docs
+│   ├── maix-voice-assistant-architecture.md
+│   ├── MYCROFT_PRECISE_GUIDE.md
+│   ├── PRECISE_DEPLOYMENT.md
+│   ├── ESP32_S3_VOICE_ASSISTANT_SPEC.md
+│   ├── HARDWARE_BUYING_GUIDE.md
+│   ├── LCD_CAMERA_FEATURES.md
+│   ├── K210_PERFORMANCE_VERIFICATION.md
+│   ├── WAKE_WORD_ADVANCED.md
+│   ├── ADVANCED_WAKE_WORD_TOPICS.md
+│   └── QUESTIONS_ANSWERED.md
+├── scripts/                     # Server-side scripts
+│   ├── voice_server.py          # Core Flask + Whisper + Precise server
+│   ├── voice_server_enhanced.py # + speaker identification (pyannote)
+│   ├── setup_voice_assistant.sh # Server setup
+│   ├── setup_precise.sh         # Mycroft Precise training environment
+│   └── download_pretrained_models.sh
+├── hardware/
+│   └── maixduino/               # K210 edge device scripts
+│       ├── maix_voice_client.py # Production client
+│       ├── maix_simple_record_test.py  # Audio capture test
+│       ├── maix_test_simple.py  # Hardware/network test
+│       ├── maix_debug_wifi.py   # WiFi diagnostics
+│       ├── maix_discover_modules.py    # Module discovery
+│       ├── secrets.py.example   # WiFi/server credential template
+│       ├── MICROPYTHON_QUIRKS.md
+│       └── README.md
+├── config/
+│   └── .env.example             # Server config template
+├── models/                      # Wake word models (gitignored, large)
+└── CLAUDE.md                    # This file
+```
+
+---
+
+## Credentials / Secrets
+
+**Never commit real credentials.** Pattern:
+
+- Server: copy `config/.env.example` → `config/.env`, fill in real values
+- Edge device: copy `hardware/maixduino/secrets.py.example` → `secrets.py`, fill in WiFi + server URL
+
+Both files are gitignored. `.example` files are committed as templates.
+
+---
+
+## Running the Server
+
+```bash
+# Activate environment
+conda activate whisper_cli
+
+# Basic server (Whisper + Precise wake word)
+python scripts/voice_server.py \
+    --enable-precise \
+    --precise-model models/hey-minerva.net \
+    --precise-sensitivity 0.5
+
+# Enhanced server (+ speaker identification)
+python scripts/voice_server_enhanced.py \
+    --enable-speaker-id \
+    --hf-token $HF_TOKEN
+
+# Test health
+curl http://localhost:5000/health
+curl http://localhost:5000/wake-word/status
+```
+
+---
+
+## Connection to CF Voice Infrastructure
+
+Minerva is the **hardware platform** for cf-voice. As `circuitforge_core.voice` matures:
+
+- `cf_voice.io` (STT/TTS) → replaces the ad hoc Whisper/Piper calls in `voice_server.py`
+- `cf_voice.context` (parallel classifier) → augments Mycroft Precise with tone/environment detection
+- `cf_voice.telephony` → future: Minerva as an always-on household linnet node
+
+Minerva hardware + cf-voice software = the CF reference voice assistant stack.
+
+---
+
+## Roadmap
+
+See Forgejo milestones on this repo. High-level:
+
+1. **Alpha — Server-side pipeline** — Whisper + Precise + Piper working end-to-end on Heimdall
+2. **Beta — Edge wake word** — wake word on K210 or ESP32-S3; audio only streams post-wake
+3. **Hardware v1** — documented reference build; buying guide; assembly instructions
+4. **cf-voice integration** — Minerva uses cf_voice modules from circuitforge-core
+5. **Platform** — multiple hardware targets; custom PCB design
+
+---
+
+## Related
+
+- `cf-voice` module design: `circuitforge-plans/circuitforge-core/2026-04-06-cf-voice-design.md`
+- `linnet` product: real-time tone annotation, will eventually embed Minerva as a hardware node
+- Heimdall server: primary dev/deployment target (10.1.10.71 on LAN)
diff --git a/config/.env.example b/config/.env.example
new file mode 100644
index 0000000..6518901
--- /dev/null
+++ b/config/.env.example
@@ -0,0 +1,24 @@
+# Minerva Voice Server — configuration
+# Copy to config/.env and fill in real values. Never commit .env.
+
+# Server
+SERVER_HOST=0.0.0.0
+SERVER_PORT=5000
+
+# Whisper STT
+WHISPER_MODEL=base
+
+# Mycroft Precise wake word
+# PRECISE_MODEL=/path/to/wake-word.net
+# PRECISE_SENSITIVITY=0.5
+
+# Home Assistant integration (optional)
+# HA_URL=http://homeassistant.local:8123
+# HA_TOKEN=your_long_lived_access_token_here
+
+# HuggingFace (for speaker identification, optional)
+# HF_TOKEN=your_huggingface_token_here
+
+# Logging
+LOG_LEVEL=INFO
+LOG_FILE=logs/minerva.log
diff --git a/docs/ADVANCED_WAKE_WORD_TOPICS.md b/docs/ADVANCED_WAKE_WORD_TOPICS.md
new file mode 100755
index 0000000..ea90ece
--- /dev/null
+++ b/docs/ADVANCED_WAKE_WORD_TOPICS.md
@@ -0,0 +1,905 @@
+# Advanced Wake Word Topics - Pre-trained Models, Multiple Wake Words, and Voice Adaptation
+
+## Pre-trained Mycroft Models
+
+### Yes! Pre-trained Models Exist
+
+Mycroft AI provides several pre-trained wake word models you can use immediately:
+
+**Available Models:**
+- **Hey Mycroft** - Original Mycroft wake word (most training data)
+- **Hey Jarvis** - Popular alternative
+- **Christopher** - Alternative wake word
+- **Hey Ezra** - Another option
+
+### Download Pre-trained Models
+
+```bash
+# On Heimdall
+conda activate precise
+cd ~/precise-models
+
+# Create directory for pre-trained models
+mkdir -p pretrained
+cd pretrained
+
+# Download Hey Mycroft (recommended starting point)
+wget https://github.com/MycroftAI/precise-data/raw/models-dev/hey-mycroft.tar.gz
+tar xzf hey-mycroft.tar.gz
+
+# Download other models
+wget https://github.com/MycroftAI/precise-data/raw/models-dev/hey-jarvis.tar.gz
+tar xzf hey-jarvis.tar.gz
+
+# List available models
+ls -lh *.net
+```
+
+### Test Pre-trained Model
+
+```bash
+conda activate precise
+
+# Test Hey Mycroft
+precise-listen hey-mycroft.net
+
+# Speak "Hey Mycroft" - should see "!" when detected
+# Press Ctrl+C to exit
+
+# Test with different threshold
+precise-listen hey-mycroft.net -t 0.7  # More conservative
+```
+
+### Use Pre-trained Model in Voice Server
+
+```bash
+cd ~/voice-assistant
+
+# Start server with Hey Mycroft model
+python voice_server.py \
+    --enable-precise \
+    --precise-model ~/precise-models/pretrained/hey-mycroft.net \
+    --precise-sensitivity 0.5
+```
+
+### Fine-tune Pre-trained Models
+
+You can use pre-trained models as a **starting point** and fine-tune with your voice:
+
+```bash
+cd ~/precise-models
+mkdir -p hey-mycroft-custom
+
+# Copy base model
+cp pretrained/hey-mycroft.net hey-mycroft-custom/
+
+# Collect your samples
+cd hey-mycroft-custom
+precise-collect  # Record 20-30 samples of YOUR voice
+
+# Fine-tune from pre-trained model
+precise-train -e 30 hey-mycroft-custom.net . \
+    --from-checkpoint ../pretrained/hey-mycroft.net
+
+# This is MUCH faster than training from scratch!
+```
+
+**Benefits:**
+- ✅ Start with proven model
+- ✅ Much less training data needed (20-30 vs 100+ samples)
+- ✅ Faster training (30 mins vs 60 mins)
+- ✅ Good baseline accuracy
+
+## Multiple Wake Words
+
+### Architecture Options
+
+#### Option 1: Multiple Models in Parallel (Server-Side Only)
+
+Run multiple Precise instances simultaneously:
+
+```python
+# In voice_server.py - Multiple wake word detection
+
+from precise_runner import PreciseEngine, PreciseRunner
+import threading
+
+# Global runners
+precise_runners = {}
+
+def on_wake_word_detected(wake_word_name):
+    """Callback factory for different wake words"""
+    def callback():
+        print(f"Wake word detected: {wake_word_name}")
+        wake_word_queue.put({
+            'wake_word': wake_word_name,
+            'timestamp': time.time()
+        })
+    return callback
+
+def start_multiple_wake_words(wake_word_configs):
+    """
+    Start multiple wake word detectors
+    
+    Args:
+        wake_word_configs: List of dicts with 'name', 'model', 'sensitivity'
+    
+    Example:
+        configs = [
+            {'name': 'hey mycroft', 'model': 'hey-mycroft.net', 'sensitivity': 0.5},
+            {'name': 'hey jarvis', 'model': 'hey-jarvis.net', 'sensitivity': 0.5}
+        ]
+    """
+    global precise_runners
+    
+    for config in wake_word_configs:
+        engine = PreciseEngine(
+            '/usr/local/bin/precise-engine',
+            config['model']
+        )
+        
+        runner = PreciseRunner(
+            engine,
+            sensitivity=config['sensitivity'],
+            on_activation=on_wake_word_detected(config['name'])
+        )
+        
+        runner.start()
+        precise_runners[config['name']] = runner
+        
+        print(f"Started wake word detector: {config['name']}")
+```
+
+**Server-Side Multiple Wake Words:**
+```bash
+# Start server with multiple wake words
+python voice_server.py \
+    --enable-precise \
+    --precise-models "hey-mycroft:~/models/hey-mycroft.net:0.5,hey-jarvis:~/models/hey-jarvis.net:0.5"
+```
+
+**Performance Impact:**
+- CPU: ~5-10% per model (can run 2-3 easily)
+- Memory: ~50-100MB per model
+- Latency: Minimal (all run in parallel)
+
+#### Option 2: Single Model, Multiple Phrases (Edge or Server)
+
+Train ONE model that responds to multiple phrases:
+
+```bash
+cd ~/precise-models/multi-wake
+conda activate precise
+
+# Record samples for BOTH wake words in the SAME dataset
+# Label all as "wake-word" regardless of which phrase
+
+mkdir -p wake-word not-wake-word
+
+# Record "Hey Mycroft" samples
+precise-collect  # Save to wake-word/hey-mycroft-*.wav
+
+# Record "Hey Computer" samples  
+precise-collect  # Save to wake-word/hey-computer-*.wav
+
+# Record negatives
+precise-collect -f not-wake-word/random.wav
+
+# Train single model on both phrases
+precise-train -e 60 multi-wake.net .
+```
+
+**Pros:**
+- ✅ Single model = less compute
+- ✅ Works on edge (K210)
+- ✅ Easy to deploy
+
+**Cons:**
+- ❌ Can't tell which wake word was used
+- ❌ May reduce accuracy for each individual phrase
+- ❌ Higher false positive risk
+
+#### Option 3: Sequential Detection (Edge)
+
+Detect wake word, then identify which one:
+
+```python
+# Pseudo-code for edge detection
+if wake_word_detected():
+    audio_snippet = last_2_seconds()
+    
+    # Run all models on the audio snippet
+    scores = {
+        'hey-mycroft': model1.score(audio_snippet),
+        'hey-jarvis': model2.score(audio_snippet),
+        'hey-computer': model3.score(audio_snippet)
+    }
+    
+    # Use highest scoring wake word
+    wake_word = max(scores, key=scores.get)
+```
+
+### Recommendations
+
+**Server-Side (Heimdall):**
+- ✅ **Use Option 1** - Multiple models in parallel
+- Run 2-3 wake words easily
+- Each can have different sensitivity
+- Can identify which wake word was used
+- Example: "Hey Mycroft" for commands, "Hey Jarvis" for queries
+
+**Edge (Maix Duino K210):**
+- ✅ **Use Option 2** - Single multi-phrase model
+- K210 can handle 1 model efficiently
+- Train on 2-3 phrases max
+- Simpler deployment
+- Lower latency
+
+## Voice Adaptation & Multi-User Support
+
+### Approach 1: Inclusive Training (Recommended)
+
+Train ONE model on EVERYONE'S voices:
+
+```bash
+cd ~/precise-models/family-wake-word
+conda activate precise
+
+# Record samples from each family member
+# Alice records 30 samples
+precise-collect  # Save as wake-word/alice-*.wav
+
+# Bob records 30 samples
+precise-collect  # Save as wake-word/bob-*.wav
+
+# Carol records 30 samples
+precise-collect  # Save as wake-word/carol-*.wav
+
+# Train on all voices
+precise-train -e 60 family-wake-word.net .
+```
+
+**Pros:**
+- ✅ Everyone can use the system
+- ✅ Single model deployment
+- ✅ Works for all family members
+- ✅ Simple maintenance
+
+**Cons:**
+- ❌ Can't identify who spoke
+- ❌ May need more training data
+- ❌ No personalization
+
+**Best for:** Family voice assistant, shared devices
+
+### Approach 2: Speaker Identification (Advanced)
+
+Detect wake word, then identify speaker:
+
+```python
+# Architecture with speaker ID
+
+# Step 1: Precise detects wake word
+if wake_word_detected():
+    
+    # Step 2: Capture voice sample
+    voice_sample = record_audio(duration=3)
+    
+    # Step 3: Speaker identification
+    speaker = identify_speaker(voice_sample)
+    # Uses voice embeddings/neural network
+    
+    # Step 4: Process with user context
+    process_command(voice_sample, user=speaker)
+```
+
+**Implementation Options:**
+
+#### Option A: Use resemblyzer (Voice Embeddings)
+```bash
+pip install resemblyzer --break-system-packages
+
+# Enrollment phase
+python enroll_users.py
+# Each user records 10-20 seconds of speech
+# System creates voice profile (embedding)
+
+# Runtime
+python speaker_id.py
+# Compares incoming audio to stored embeddings
+# Returns most likely speaker
+```
+
+**Example Code:**
+```python
+from resemblyzer import VoiceEncoder, preprocess_wav
+import numpy as np
+
+# Initialize encoder
+encoder = VoiceEncoder()
+
+# Enrollment - do once per user
+def enroll_user(name, audio_files):
+    """Create voice profile for user"""
+    embeddings = []
+    
+    for audio_file in audio_files:
+        wav = preprocess_wav(audio_file)
+        embedding = encoder.embed_utterance(wav)
+        embeddings.append(embedding)
+    
+    # Average embeddings for robustness
+    user_profile = np.mean(embeddings, axis=0)
+    
+    # Save profile
+    np.save(f'profiles/{name}.npy', user_profile)
+    return user_profile
+
+# Identification - run each time
+def identify_speaker(audio_file, profiles_dir='profiles'):
+    """Identify which enrolled user is speaking"""
+    wav = preprocess_wav(audio_file)
+    test_embedding = encoder.embed_utterance(wav)
+    
+    # Load all profiles
+    profiles = {}
+    for profile_file in os.listdir(profiles_dir):
+        name = profile_file.replace('.npy', '')
+        profile = np.load(os.path.join(profiles_dir, profile_file))
+        profiles[name] = profile
+    
+    # Calculate similarity to each profile
+    similarities = {}
+    for name, profile in profiles.items():
+        similarity = np.dot(test_embedding, profile)
+        similarities[name] = similarity
+    
+    # Return most similar
+    best_match = max(similarities, key=similarities.get)
+    confidence = similarities[best_match]
+    
+    if confidence > 0.7:  # Threshold
+        return best_match
+    else:
+        return "unknown"
+```
+
+#### Option B: Use pyannote.audio (Production-grade)
+```bash
+pip install pyannote.audio --break-system-packages
+
+# Requires HuggingFace token (same as diarization)
+```
+
+**Example:**
+```python
+from pyannote.audio import Inference
+
+# Initialize
+inference = Inference(
+    "pyannote/embedding",
+    use_auth_token="your_hf_token"
+)
+
+# Enroll users
+alice_profile = inference("alice_sample.wav")
+bob_profile = inference("bob_sample.wav")
+
+# Identify
+test_embedding = inference("test_audio.wav")
+
+# Compare
+from scipy.spatial.distance import cosine
+alice_similarity = 1 - cosine(test_embedding, alice_profile)
+bob_similarity = 1 - cosine(test_embedding, bob_profile)
+
+if alice_similarity > bob_similarity and alice_similarity > 0.7:
+    speaker = "Alice"
+elif bob_similarity > 0.7:
+    speaker = "Bob"
+else:
+    speaker = "Unknown"
+```
+
+**Pros:**
+- ✅ Can identify individual users
+- ✅ Personalized responses
+- ✅ User-specific commands/permissions
+- ✅ Better for privacy (know who's speaking)
+
+**Cons:**
+- ❌ More complex implementation
+- ❌ Requires enrollment phase
+- ❌ Additional processing time (~100-200ms)
+- ❌ May fail with similar voices
+
+### Approach 3: Per-User Wake Word Models
+
+Each person has their OWN wake word:
+
+```bash
+# Alice's wake word: "Hey Mycroft"
+# Train on ONLY Alice's voice
+
+# Bob's wake word: "Hey Jarvis"  
+# Train on ONLY Bob's voice
+
+# Carol's wake word: "Hey Computer"
+# Train on ONLY Carol's voice
+```
+
+**Deployment:**
+Run all 3 models in parallel (server-side):
+```python
+wake_word_configs = [
+    {'name': 'Alice', 'wake_word': 'hey mycroft', 'model': 'alice-wake.net'},
+    {'name': 'Bob', 'wake_word': 'hey jarvis', 'model': 'bob-wake.net'},
+    {'name': 'Carol', 'wake_word': 'hey computer', 'model': 'carol-wake.net'}
+]
+```
+
+**Pros:**
+- ✅ Automatic user identification
+- ✅ Highest accuracy per user
+- ✅ Clear user separation
+- ✅ No additional speaker ID needed
+
+**Cons:**
+- ❌ Requires 3x models (server only)
+- ❌ Users must remember their wake word
+- ❌ 3x CPU usage (~15-30%)
+- ❌ Can't work on edge (K210)
+
+### Approach 4: Context-Based Adaptation
+
+No speaker ID, but learn from interaction:
+
+```python
+# Track command patterns
+user_context = {
+    'last_command': 'turn on living room lights',
+    'frequent_entities': ['light.living_room', 'light.bedroom'],
+    'time_of_day_patterns': {'morning': 'coffee maker', 'evening': 'tv'},
+    'location': 'home'  # vs 'away'
+}
+
+# Use context to improve intent recognition
+if "turn on the lights" and time.is_morning():
+    # Probably means bedroom lights (based on history)
+    entity = user_context['frequent_entities'][0]
+```
+
+**Pros:**
+- ✅ No enrollment needed
+- ✅ Improves over time
+- ✅ Simple to implement
+- ✅ Works with any number of users
+
+**Cons:**
+- ❌ No true user identification
+- ❌ May make incorrect assumptions
+- ❌ Privacy concerns (tracking behavior)
+
+## Recommended Strategy
+
+### For Your Use Case
+
+Based on your home lab setup, I recommend:
+
+#### Phase 1: Single Wake Word, Inclusive Training (Week 1-2)
+```bash
+# Start simple
+cd ~/precise-models/hey-computer
+conda activate precise
+
+# Have all family members record samples
+# Alice: 30 samples of "Hey Computer"
+# Bob: 30 samples of "Hey Computer"
+# You: 30 samples of "Hey Computer"
+
+# Train single model on all voices
+precise-train -e 60 hey-computer.net .
+
+# Deploy to server
+python voice_server.py \
+    --enable-precise \
+    --precise-model hey-computer.net
+```
+
+**Why:** 
+- Simple to setup and test
+- Everyone can use it immediately
+- Single model = easier debugging
+- Works on edge if you migrate later
+
+#### Phase 2: Add Speaker Identification (Week 3-4)
+```bash
+# Install resemblyzer
+pip install resemblyzer --break-system-packages
+
+# Enroll users
+python enroll_users.py
+# Each person speaks for 20 seconds
+
+# Update voice_server.py to identify speaker
+# Use speaker ID for personalized responses
+```
+
+**Why:**
+- Enables personalization
+- Can track preferences per user
+- User-specific command permissions
+- Better privacy (know who's speaking)
+
+#### Phase 3: Multiple Wake Words (Month 2+)
+```bash
+# Add alternative wake words for different contexts
+# "Hey Mycroft" - General commands
+# "Hey Jarvis" - Media/Plex control
+# "Computer" - Quick commands (lights, temp)
+
+# Deploy multiple models on server
+python voice_server.py \
+    --enable-precise \
+    --precise-models "mycroft:hey-mycroft.net:0.5,jarvis:hey-jarvis.net:0.5"
+```
+
+**Why:**
+- Different wake words for different contexts
+- Reduces false positives (more specific triggers)
+- Fun factor (Jarvis for media!)
+- Server can handle 2-3 easily
+
+## Implementation Guide: Multiple Wake Words
+
+### Update voice_server.py for Multiple Wake Words
+
+```python
+# Add to voice_server.py
+
+def start_multiple_wake_words(configs):
+    """
+    Start multiple wake word detectors
+    
+    Args:
+        configs: List of dicts with 'name', 'model_path', 'sensitivity'
+    """
+    global precise_runners
+    precise_runners = {}
+    
+    for config in configs:
+        try:
+            engine = PreciseEngine(
+                DEFAULT_PRECISE_ENGINE,
+                config['model_path']
+            )
+            
+            def make_callback(wake_word_name):
+                def callback():
+                    print(f"Wake word detected: {wake_word_name}")
+                    wake_word_queue.put({
+                        'wake_word': wake_word_name,
+                        'timestamp': time.time(),
+                        'source': 'precise'
+                    })
+                return callback
+            
+            runner = PreciseRunner(
+                engine,
+                sensitivity=config['sensitivity'],
+                on_activation=make_callback(config['name'])
+            )
+            
+            runner.start()
+            precise_runners[config['name']] = runner
+            
+            print(f"✓ Started: {config['name']} (sensitivity: {config['sensitivity']})")
+            
+        except Exception as e:
+            print(f"✗ Failed to start {config['name']}: {e}")
+    
+    return len(precise_runners) > 0
+
+# Add to main()
+parser.add_argument('--precise-models', 
+                   help='Multiple models: name:path:sensitivity,name2:path2:sensitivity2')
+
+# Parse multiple models
+if args.precise_models:
+    configs = []
+    for model_spec in args.precise_models.split(','):
+        name, path, sensitivity = model_spec.split(':')
+        configs.append({
+            'name': name,
+            'model_path': os.path.expanduser(path),
+            'sensitivity': float(sensitivity)
+        })
+    
+    start_multiple_wake_words(configs)
+```
+
+### Usage Example
+
+```bash
+cd ~/voice-assistant
+
+# Start with multiple wake words
+python voice_server.py \
+    --enable-precise \
+    --precise-models "\
+hey-mycroft:~/precise-models/pretrained/hey-mycroft.net:0.5,\
+hey-jarvis:~/precise-models/pretrained/hey-jarvis.net:0.5"
+```
+
+## Implementation Guide: Speaker Identification
+
+### Add to voice_server.py
+
+```python
+# Add resemblyzer support
+try:
+    from resemblyzer import VoiceEncoder, preprocess_wav
+    import numpy as np
+    SPEAKER_ID_AVAILABLE = True
+except ImportError:
+    SPEAKER_ID_AVAILABLE = False
+    print("Warning: resemblyzer not available. Speaker ID disabled.")
+
+# Initialize encoder
+voice_encoder = None
+speaker_profiles = {}
+
+def load_speaker_profiles(profiles_dir='~/voice-assistant/profiles'):
+    """Load enrolled speaker profiles"""
+    global speaker_profiles, voice_encoder
+    
+    if not SPEAKER_ID_AVAILABLE:
+        return False
+    
+    profiles_dir = os.path.expanduser(profiles_dir)
+    
+    if not os.path.exists(profiles_dir):
+        print(f"No speaker profiles found at {profiles_dir}")
+        return False
+    
+    # Initialize encoder
+    voice_encoder = VoiceEncoder()
+    
+    # Load all profiles
+    for profile_file in os.listdir(profiles_dir):
+        if profile_file.endswith('.npy'):
+            name = profile_file.replace('.npy', '')
+            profile = np.load(os.path.join(profiles_dir, profile_file))
+            speaker_profiles[name] = profile
+            print(f"Loaded speaker profile: {name}")
+    
+    return len(speaker_profiles) > 0
+
+def identify_speaker(audio_path, threshold=0.7):
+    """Identify speaker from audio file"""
+    if not SPEAKER_ID_AVAILABLE or not speaker_profiles:
+        return None
+    
+    try:
+        # Get embedding for test audio
+        wav = preprocess_wav(audio_path)
+        test_embedding = voice_encoder.embed_utterance(wav)
+        
+        # Compare to all profiles
+        similarities = {}
+        for name, profile in speaker_profiles.items():
+            similarity = np.dot(test_embedding, profile)
+            similarities[name] = similarity
+        
+        # Get best match
+        best_match = max(similarities, key=similarities.get)
+        confidence = similarities[best_match]
+        
+        print(f"Speaker ID: {best_match} (confidence: {confidence:.2f})")
+        
+        if confidence > threshold:
+            return best_match
+        else:
+            return "unknown"
+            
+    except Exception as e:
+        print(f"Error identifying speaker: {e}")
+        return None
+
+# Update process endpoint to include speaker ID
+@app.route('/process', methods=['POST'])
+def process():
+    """Process complete voice command with speaker identification"""
+    # ... existing code ...
+    
+    # Add speaker identification
+    speaker = identify_speaker(temp_path) if speaker_profiles else None
+    
+    if speaker:
+        print(f"Detected speaker: {speaker}")
+        # Could personalize response based on speaker
+    
+    # ... rest of processing ...
+```
+
+### Enrollment Script
+
+Create `enroll_speaker.py`:
+
+```python
+#!/usr/bin/env python3
+"""
+Enroll users for speaker identification
+
+Usage:
+    python enroll_speaker.py --name Alice --audio alice_sample.wav
+    python enroll_speaker.py --name Alice --duration 20  # Record live
+"""
+
+import argparse
+import os
+import numpy as np
+from resemblyzer import VoiceEncoder, preprocess_wav
+import pyaudio
+import wave
+
+def record_audio(duration=20, sample_rate=16000):
+    """Record audio from microphone"""
+    print(f"Recording for {duration} seconds...")
+    print("Speak naturally - read a paragraph, have a conversation, etc.")
+    
+    chunk = 1024
+    format = pyaudio.paInt16
+    channels = 1
+    
+    p = pyaudio.PyAudio()
+    
+    stream = p.open(
+        format=format,
+        channels=channels,
+        rate=sample_rate,
+        input=True,
+        frames_per_buffer=chunk
+    )
+    
+    frames = []
+    for i in range(0, int(sample_rate / chunk * duration)):
+        data = stream.read(chunk)
+        frames.append(data)
+    
+    stream.stop_stream()
+    stream.close()
+    p.terminate()
+    
+    # Save to temp file
+    temp_file = f"/tmp/enrollment_{os.getpid()}.wav"
+    wf = wave.open(temp_file, 'wb')
+    wf.setnchannels(channels)
+    wf.setsampwidth(p.get_sample_size(format))
+    wf.setframerate(sample_rate)
+    wf.writeframes(b''.join(frames))
+    wf.close()
+    
+    return temp_file
+
+def enroll_speaker(name, audio_file, profiles_dir='~/voice-assistant/profiles'):
+    """Create voice profile for speaker"""
+    profiles_dir = os.path.expanduser(profiles_dir)
+    os.makedirs(profiles_dir, exist_ok=True)
+    
+    # Initialize encoder
+    encoder = VoiceEncoder()
+    
+    # Process audio
+    wav = preprocess_wav(audio_file)
+    embedding = encoder.embed_utterance(wav)
+    
+    # Save profile
+    profile_path = os.path.join(profiles_dir, f'{name}.npy')
+    np.save(profile_path, embedding)
+    
+    print(f"✓ Enrolled speaker: {name}")
+    print(f"  Profile saved to: {profile_path}")
+    
+    return profile_path
+
+def main():
+    parser = argparse.ArgumentParser(description="Enroll speaker for voice identification")
+    parser.add_argument('--name', required=True, help='Speaker name')
+    parser.add_argument('--audio', help='Path to audio file (wav)')
+    parser.add_argument('--duration', type=int, default=20, 
+                       help='Recording duration if not using audio file')
+    parser.add_argument('--profiles-dir', default='~/voice-assistant/profiles',
+                       help='Directory to save profiles')
+    
+    args = parser.parse_args()
+    
+    # Get audio file
+    if args.audio:
+        audio_file = args.audio
+        if not os.path.exists(audio_file):
+            print(f"Error: Audio file not found: {audio_file}")
+            return 1
+    else:
+        audio_file = record_audio(args.duration)
+    
+    # Enroll speaker
+    try:
+        enroll_speaker(args.name, audio_file, args.profiles_dir)
+        return 0
+    except Exception as e:
+        print(f"Error enrolling speaker: {e}")
+        return 1
+
+if __name__ == '__main__':
+    import sys
+    sys.exit(main())
+```
+
+## Performance Comparison
+
+### Single Wake Word
+- **Latency:** 100-200ms
+- **CPU:** ~5-10% (idle)
+- **Memory:** ~100MB
+- **Accuracy:** 95%+
+
+### Multiple Wake Words (3 models)
+- **Latency:** 100-200ms (parallel)
+- **CPU:** ~15-30% (idle)
+- **Memory:** ~300MB
+- **Accuracy:** 95%+ each
+
+### With Speaker Identification
+- **Additional latency:** +100-200ms
+- **Additional CPU:** +5% during ID
+- **Additional memory:** +50MB
+- **Accuracy:** 85-95% (depending on enrollment quality)
+
+## Best Practices
+
+### Wake Word Selection
+1. **Different enough** - "Hey Mycroft" vs "Hey Jarvis" (not "Hey Alice" vs "Hey Alex")
+2. **Clear consonants** - Easier to detect
+3. **2-3 syllables** - Not too short, not too long
+4. **Test in environment** - Check for false triggers
+
+### Training
+1. **Include all users** - If using single model
+2. **Diverse conditions** - Different rooms, noise levels
+3. **Regular updates** - Add false positives weekly
+4. **Per-user models** - Higher accuracy, more compute
+
+### Speaker Identification
+1. **Quality enrollment** - 20+ seconds of clear speech
+2. **Re-enroll periodically** - Voices change (colds, etc.)
+3. **Test thresholds** - Balance accuracy vs false IDs
+4. **Graceful fallback** - Handle unknown speakers
+
+## Recommended Path for You
+
+```bash
+# Week 1: Start with pre-trained "Hey Mycroft"
+wget https://github.com/MycroftAI/precise-data/raw/models-dev/hey-mycroft.tar.gz
+precise-listen hey-mycroft.net  # Test it!
+
+# Week 2: Fine-tune with your voices
+precise-train -e 30 hey-mycroft-custom.net . \
+    --from-checkpoint hey-mycroft.net
+
+# Week 3: Add speaker identification
+pip install resemblyzer
+python enroll_speaker.py --name Alan --duration 20
+python enroll_speaker.py --name [Family Member] --duration 20
+
+# Week 4: Add second wake word ("Hey Jarvis" for Plex?)
+wget hey-jarvis.tar.gz
+# Run both in parallel
+
+# Month 2+: Optimize and expand
+# - More wake words for different contexts
+# - Per-user wake word models
+# - Context-aware responses
+```
+
+This gives you a smooth progression from simple to advanced!
diff --git a/docs/ESP32_S3_VOICE_ASSISTANT_SPEC.md b/docs/ESP32_S3_VOICE_ASSISTANT_SPEC.md
new file mode 100755
index 0000000..e5696e0
--- /dev/null
+++ b/docs/ESP32_S3_VOICE_ASSISTANT_SPEC.md
@@ -0,0 +1,1089 @@
+# ESP32-S3-Touch-LCD Voice Assistant - Technical Specification
+
+**Date:** 2026-01-01
+**Hardware:** Waveshare ESP32-S3-Touch-LCD-1.69
+**Display:** 240×280 ST7789V2 with Capacitive Touch
+**Framework:** ESP-IDF v5.3.1+ with LVGL 8.4.0+
+**Purpose:** Voice assistant endpoint with real-time audio waveform visualization
+
+---
+
+## Overview
+
+Voice assistant client for ESP32-S3 with integrated LVGL-based visual feedback showing:
+- Real-time audio waveform during listening
+- Wake word detection animation
+- Processing/thinking state
+- Response state with audio output visualization
+- Touch controls for volume, sensitivity, settings
+
+**Architecture:**
+```
+┌─────────────────────────────────┐
+│  ESP32-S3-Touch-LCD-1.69        │
+│                                 │
+│  ┌──────────────────────────┐  │
+│  │   LVGL UI (240×280)      │  │
+│  │   - Waveform Canvas      │  │
+│  │   - State Indicators     │  │──┐
+│  │   - Touch Controls       │  │  │
+│  └──────────────────────────┘  │  │
+│                                 │  │
+│  ┌──────────────────────────┐  │  │ WiFi
+│  │   Audio Pipeline         │  │  │ Audio Stream
+│  │   - I2S Mic Input        │  │  │
+│  │   - I2S Speaker Output   │  │──┤
+│  │   - Buffer Management    │  │  │
+│  └──────────────────────────┘  │  │
+│                                 │  │
+│  ┌──────────────────────────┐  │  │
+│  │   State Machine          │  │  │
+│  │   - Idle → Listening     │  │  │
+│  │   - Processing → Speaking│  │──┘
+│  └──────────────────────────┘  │
+└─────────────────────────────────┘
+         │
+         │ TCP/HTTP
+         ↓
+┌─────────────────────────────────┐
+│  Heimdall Voice Server          │
+│  (10.1.10.71:3006)              │
+│                                 │
+│  - Mycroft Precise Wake Word    │
+│  - Whisper STT                  │
+│  - Home Assistant Integration   │
+│  - Piper TTS                    │
+└─────────────────────────────────┘
+```
+
+---
+
+## Visual States & UI Design
+
+### State Machine
+
+```
+        ┌─────────┐
+        │  IDLE   │ ◄──────────────┐
+        └────┬────┘                │
+             │                     │
+    Wake Word Detected             │
+             │                     │
+             ↓                     │
+      ┌──────────┐                │
+      │LISTENING │                │
+      └────┬─────┘                │
+           │                      │
+   End of Speech                  │
+           │                      │
+           ↓                      │
+    ┌───────────┐                │
+    │PROCESSING │                │
+    └─────┬─────┘                │
+          │                      │
+    Response Ready               │
+          │                      │
+          ↓                      │
+    ┌──────────┐                │
+    │ SPEAKING │ ───────────────┘
+    └──────────┘
+```
+
+### Visual Feedback Per State
+
+#### 1. IDLE State
+**Display:**
+- Subtle pulsing ring animation (like Google Home)
+- Time display from RTC
+- Status icons (WiFi strength, battery level)
+- Dim backlight (30-50%)
+
+**Colors:**
+- Background: Dark blue (#001F3F)
+- Pulse ring: Cyan (#00BFFF)
+- Text: White (#FFFFFF)
+
+**LVGL Widgets:**
+```c
+lv_obj_t *idle_screen;
+lv_obj_t *pulse_ring;      // Arc widget, animated rotation
+lv_obj_t *time_label;      // Label with RTC time
+lv_obj_t *status_bar;      // Container for icons
+```
+
+**Animation:**
+- Slow pulse: 2-second breathing cycle
+- Rotation: 360° over 10 seconds
+
+---
+
+#### 2. LISTENING State
+**Display:**
+- Real-time audio waveform visualization
+- Bright backlight (100%)
+- "Listening..." text
+- Cancel button (touch)
+
+**Waveform Visualization:**
+
+**Option A: Canvas-Based Waveform (Recommended)**
+- Use LVGL `lv_canvas` for custom drawing
+- Draw waveform from audio buffer samples
+- Scrolling waveform (left-to-right)
+- Update rate: 30-60 FPS
+
+**Option B: Bar Chart Spectrum**
+- Use `lv_chart` with bar type
+- FFT-based spectrum analyzer
+- 8-16 bars for frequency bins
+- Update rate: 15-30 FPS
+
+**Colors:**
+- Background: Dark gray (#1A1A1A)
+- Waveform: Green (#00FF00)
+- Peak indicators: Yellow (#FFFF00)
+- Clipping: Red (#FF0000)
+
+**LVGL Implementation:**
+```c
+// Canvas-based waveform
+lv_obj_t *listening_screen;
+lv_obj_t *waveform_canvas;    // 240×180 canvas
+lv_obj_t *listening_label;    // "Listening..."
+lv_obj_t *cancel_btn;         // Touch to cancel
+
+// Waveform buffer (circular buffer)
+#define WAVEFORM_WIDTH 240
+#define WAVEFORM_HEIGHT 180
+#define WAVEFORM_CENTER (WAVEFORM_HEIGHT / 2)
+int16_t waveform_buffer[WAVEFORM_WIDTH];
+uint16_t waveform_index = 0;
+
+// Drawing function (called from audio callback)
+void draw_waveform(lv_obj_t *canvas, int16_t *audio_samples, size_t count) {
+    lv_canvas_fill_bg(canvas, lv_color_hex(0x1A1A1A), LV_OPA_COVER);
+
+    lv_draw_line_dsc_t line_dsc;
+    lv_draw_line_dsc_init(&line_dsc);
+    line_dsc.color = lv_color_hex(0x00FF00);
+    line_dsc.width = 2;
+
+    // Draw waveform line
+    for (int x = 0; x < WAVEFORM_WIDTH - 1; x++) {
+        int16_t y1 = WAVEFORM_CENTER + (waveform_buffer[x] / 256);
+        int16_t y2 = WAVEFORM_CENTER + (waveform_buffer[x + 1] / 256);
+
+        lv_point_t points[] = {{x, y1}, {x + 1, y2}};
+        lv_canvas_draw_line(canvas, points, 2, &line_dsc);
+    }
+}
+
+// Audio callback (I2S task)
+void audio_i2s_callback(int16_t *samples, size_t count) {
+    // Downsample audio for waveform display
+    for (int i = 0; i < count; i += (count / WAVEFORM_WIDTH)) {
+        waveform_buffer[waveform_index] = samples[i];
+        waveform_index = (waveform_index + 1) % WAVEFORM_WIDTH;
+    }
+
+    // Trigger LVGL update (use event or flag)
+    xEventGroupSetBits(ui_event_group, WAVEFORM_UPDATE_BIT);
+}
+```
+
+**Touch Controls:**
+- Tap anywhere: Cancel listening
+- Swipe down: Lower sensitivity
+- Swipe up: Increase sensitivity
+
+---
+
+#### 3. PROCESSING State
+**Display:**
+- Animated spinner/thinking indicator
+- "Processing..." text
+- Waveform fades out smoothly
+
+**Animation:**
+- Circular spinner with gradient
+- Rotation: 360° per 1 second
+- Pulsing opacity
+
+**Colors:**
+- Background: Dark gray (#1A1A1A)
+- Spinner: Blue (#0080FF)
+- Text: Light gray (#CCCCCC)
+
+**LVGL Implementation:**
+```c
+lv_obj_t *processing_screen;
+lv_obj_t *spinner;           // lv_spinner widget
+lv_obj_t *processing_label;  // "Processing..."
+
+// Transition from listening to processing
+void transition_to_processing(void) {
+    // Fade out waveform
+    lv_anim_t fade_out;
+    lv_anim_init(&fade_out);
+    lv_anim_set_var(&fade_out, waveform_canvas);
+    lv_anim_set_values(&fade_out, LV_OPA_COVER, LV_OPA_TRANSP);
+    lv_anim_set_time(&fade_out, 300);
+    lv_anim_set_exec_cb(&fade_out, lv_obj_set_style_opa);
+    lv_anim_start(&fade_out);
+
+    // Show spinner after fade
+    lv_timer_t *timer = lv_timer_create(show_spinner_callback, 300, NULL);
+    lv_timer_set_repeat_count(timer, 1);
+}
+```
+
+---
+
+#### 4. SPEAKING State
+**Display:**
+- Audio output waveform (TTS playback visualization)
+- "Speaking..." or response text snippet
+- Volume indicator
+
+**Waveform:**
+- Same canvas as LISTENING but different color
+- Shows output audio being played
+- Synchronized with speaker output
+
+**Colors:**
+- Background: Dark gray (#1A1A1A)
+- Waveform: Blue (#0080FF)
+- Text: White (#FFFFFF)
+
+**LVGL Implementation:**
+```c
+lv_obj_t *speaking_screen;
+lv_obj_t *output_waveform_canvas;  // Same size as input waveform
+lv_obj_t *response_label;          // Show part of response text
+lv_obj_t *volume_bar;              // lv_bar widget for volume level
+
+// Similar drawing to listening state, but fed from speaker buffer
+void draw_output_waveform(lv_obj_t *canvas, int16_t *speaker_samples, size_t count) {
+    // Same logic as input waveform, different color
+    line_dsc.color = lv_color_hex(0x0080FF);
+    // ... draw logic
+}
+```
+
+**Touch Controls:**
+- Tap: Skip response (go back to idle)
+- Volume slider: Adjust speaker volume
+
+---
+
+### Additional UI Elements
+
+#### Status Bar (All States)
+**Location:** Top 20 pixels
+**Contents:**
+- WiFi icon + signal strength
+- Battery icon + percentage
+- Time (from RTC)
+- Mute icon (if muted)
+
+**LVGL Implementation:**
+```c
+lv_obj_t *status_bar;
+lv_obj_t *wifi_icon;
+lv_obj_t *battery_icon;
+lv_obj_t *time_label;
+lv_obj_t *mute_icon;
+
+// Update every second
+void update_status_bar(lv_timer_t *timer) {
+    // Update WiFi strength
+    int8_t rssi = wifi_get_rssi();
+    lv_img_set_src(wifi_icon, get_wifi_icon(rssi));
+
+    // Update battery
+    uint8_t battery_pct = battery_get_percentage();
+    lv_img_set_src(battery_icon, get_battery_icon(battery_pct));
+
+    // Update time from RTC
+    rtc_time_t time;
+    pcf85063_get_time(&time);
+    lv_label_set_text_fmt(time_label, "%02d:%02d", time.hour, time.min);
+}
+
+// Create timer for status bar updates
+lv_timer_create(update_status_bar, 1000, NULL);
+```
+
+#### Settings Screen (Touch Access)
+**Trigger:** Long-press on idle screen
+**Contents:**
+- Volume slider
+- Brightness slider
+- Wake word sensitivity slider
+- WiFi settings button
+- About/Info button
+
+**LVGL Implementation:**
+```c
+lv_obj_t *settings_screen;
+lv_obj_t *volume_slider;
+lv_obj_t *brightness_slider;
+lv_obj_t *sensitivity_slider;
+lv_obj_t *wifi_btn;
+lv_obj_t *about_btn;
+lv_obj_t *back_btn;
+
+// Slider event handler
+static void slider_event_cb(lv_event_t *e) {
+    lv_obj_t *slider = lv_event_get_target(e);
+    int32_t value = lv_slider_get_value(slider);
+
+    if (slider == volume_slider) {
+        set_speaker_volume(value);
+    } else if (slider == brightness_slider) {
+        set_backlight_brightness(value);
+    } else if (slider == sensitivity_slider) {
+        set_wake_word_sensitivity(value);
+    }
+}
+```
+
+---
+
+## Audio Pipeline Integration
+
+### I2S Configuration
+
+**Microphone (INMP441):**
+```c
+#define I2S_MIC_NUM         I2S_NUM_0
+#define I2S_MIC_BCLK_PIN    GPIO_NUM_4   // Verify with board schematic
+#define I2S_MIC_WS_PIN      GPIO_NUM_5
+#define I2S_MIC_DIN_PIN     GPIO_NUM_6
+#define I2S_MIC_SAMPLE_RATE 16000
+#define I2S_MIC_BITS        16
+#define I2S_MIC_CHANNELS    1
+
+i2s_config_t i2s_mic_config = {
+    .mode = I2S_MODE_MASTER | I2S_MODE_RX,
+    .sample_rate = I2S_MIC_SAMPLE_RATE,
+    .bits_per_sample = I2S_BITS_PER_SAMPLE_16BIT,
+    .channel_format = I2S_CHANNEL_FMT_ONLY_LEFT,
+    .communication_format = I2S_COMM_FORMAT_STAND_I2S,
+    .intr_alloc_flags = ESP_INTR_FLAG_LEVEL1,
+    .dma_buf_count = 8,
+    .dma_buf_len = 256,
+    .use_apll = false,
+    .tx_desc_auto_clear = false,
+    .fixed_mclk = 0
+};
+
+i2s_pin_config_t i2s_mic_pins = {
+    .bck_io_num = I2S_MIC_BCLK_PIN,
+    .ws_io_num = I2S_MIC_WS_PIN,
+    .data_out_num = I2S_PIN_NO_CHANGE,
+    .data_in_num = I2S_MIC_DIN_PIN
+};
+
+void audio_init_microphone(void) {
+    i2s_driver_install(I2S_MIC_NUM, &i2s_mic_config, 0, NULL);
+    i2s_set_pin(I2S_MIC_NUM, &i2s_mic_pins);
+    i2s_zero_dma_buffer(I2S_MIC_NUM);
+}
+```
+
+**Speaker (MAX98357A I2S Amp):**
+```c
+#define I2S_SPK_NUM         I2S_NUM_1
+#define I2S_SPK_BCLK_PIN    GPIO_NUM_7   // Verify with board schematic
+#define I2S_SPK_WS_PIN      GPIO_NUM_8
+#define I2S_SPK_DOUT_PIN    GPIO_NUM_9
+#define I2S_SPK_SAMPLE_RATE 16000
+#define I2S_SPK_BITS        16
+#define I2S_SPK_CHANNELS    1
+
+i2s_config_t i2s_spk_config = {
+    .mode = I2S_MODE_MASTER | I2S_MODE_TX,
+    .sample_rate = I2S_SPK_SAMPLE_RATE,
+    .bits_per_sample = I2S_BITS_PER_SAMPLE_16BIT,
+    .channel_format = I2S_CHANNEL_FMT_ONLY_LEFT,
+    .communication_format = I2S_COMM_FORMAT_STAND_I2S,
+    .intr_alloc_flags = ESP_INTR_FLAG_LEVEL1,
+    .dma_buf_count = 8,
+    .dma_buf_len = 256,
+    .use_apll = false,
+    .tx_desc_auto_clear = true,
+    .fixed_mclk = 0
+};
+
+i2s_pin_config_t i2s_spk_pins = {
+    .bck_io_num = I2S_SPK_BCLK_PIN,
+    .ws_io_num = I2S_SPK_WS_PIN,
+    .data_out_num = I2S_SPK_DOUT_PIN,
+    .data_in_num = I2S_PIN_NO_CHANGE
+};
+
+void audio_init_speaker(void) {
+    i2s_driver_install(I2S_SPK_NUM, &i2s_spk_config, 0, NULL);
+    i2s_set_pin(I2S_SPK_NUM, &i2s_spk_pins);
+    i2s_zero_dma_buffer(I2S_SPK_NUM);
+}
+```
+
+### Audio Buffer Management
+
+**Circular Buffer for Waveform:**
+```c
+#define AUDIO_BUFFER_SIZE 2048
+#define WAVEFORM_DECIMATION 8  // Downsample for display
+
+typedef struct {
+    int16_t samples[AUDIO_BUFFER_SIZE];
+    uint16_t write_idx;
+    uint16_t read_idx;
+    SemaphoreHandle_t mutex;
+} audio_buffer_t;
+
+audio_buffer_t mic_buffer;
+audio_buffer_t spk_buffer;
+
+void audio_buffer_init(audio_buffer_t *buf) {
+    memset(buf->samples, 0, sizeof(buf->samples));
+    buf->write_idx = 0;
+    buf->read_idx = 0;
+    buf->mutex = xSemaphoreCreateMutex();
+}
+
+void audio_buffer_write(audio_buffer_t *buf, int16_t *samples, size_t count) {
+    xSemaphoreTake(buf->mutex, portMAX_DELAY);
+    for (size_t i = 0; i < count; i++) {
+        buf->samples[buf->write_idx] = samples[i];
+        buf->write_idx = (buf->write_idx + 1) % AUDIO_BUFFER_SIZE;
+    }
+    xSemaphoreGive(buf->mutex);
+}
+
+// Get downsampled samples for waveform display
+void audio_buffer_get_waveform(audio_buffer_t *buf, int16_t *out, size_t out_count) {
+    xSemaphoreTake(buf->mutex, portMAX_DELAY);
+    for (size_t i = 0; i < out_count; i++) {
+        size_t src_idx = (buf->write_idx + (i * WAVEFORM_DECIMATION)) % AUDIO_BUFFER_SIZE;
+        out[i] = buf->samples[src_idx];
+    }
+    xSemaphoreGive(buf->mutex);
+}
+```
+
+### Audio Streaming Task
+
+**Microphone Input Task:**
+```c
+void audio_mic_task(void *pvParameters) {
+    int16_t i2s_buffer[256];
+    size_t bytes_read;
+
+    while (1) {
+        // Read from I2S microphone
+        i2s_read(I2S_MIC_NUM, i2s_buffer, sizeof(i2s_buffer), &bytes_read, portMAX_DELAY);
+        size_t samples_read = bytes_read / sizeof(int16_t);
+
+        if (current_state == STATE_LISTENING) {
+            // Write to circular buffer for waveform display
+            audio_buffer_write(&mic_buffer, i2s_buffer, samples_read);
+
+            // Send to Heimdall server via WiFi
+            audio_send_to_server(i2s_buffer, samples_read);
+
+            // Trigger waveform update
+            xEventGroupSetBits(ui_event_group, WAVEFORM_UPDATE_BIT);
+        }
+    }
+}
+```
+
+**Speaker Output Task:**
+```c
+void audio_speaker_task(void *pvParameters) {
+    int16_t i2s_buffer[256];
+    size_t bytes_written;
+
+    while (1) {
+        // Receive audio from Heimdall server
+        size_t samples_received = audio_receive_from_server(i2s_buffer, 256);
+
+        if (samples_received > 0 && current_state == STATE_SPEAKING) {
+            // Write to circular buffer for waveform display
+            audio_buffer_write(&spk_buffer, i2s_buffer, samples_received);
+
+            // Play through I2S speaker
+            i2s_write(I2S_SPK_NUM, i2s_buffer, samples_received * sizeof(int16_t),
+                     &bytes_written, portMAX_DELAY);
+
+            // Trigger waveform update
+            xEventGroupSetBits(ui_event_group, WAVEFORM_UPDATE_BIT);
+        } else {
+            vTaskDelay(pdMS_TO_TICKS(10));
+        }
+    }
+}
+```
+
+### LVGL Update Task
+
+**Waveform Rendering Task:**
+```c
+void lvgl_waveform_task(void *pvParameters) {
+    int16_t waveform_samples[WAVEFORM_WIDTH];
+
+    while (1) {
+        // Wait for waveform update event
+        EventBits_t bits = xEventGroupWaitBits(ui_event_group, WAVEFORM_UPDATE_BIT,
+                                               pdTRUE, pdFALSE, pdMS_TO_TICKS(50));
+
+        if (bits & WAVEFORM_UPDATE_BIT) {
+            if (current_state == STATE_LISTENING) {
+                // Get downsampled mic data
+                audio_buffer_get_waveform(&mic_buffer, waveform_samples, WAVEFORM_WIDTH);
+
+                // Draw on LVGL canvas (must lock LVGL)
+                lvgl_lock();
+                draw_waveform(waveform_canvas, waveform_samples, WAVEFORM_WIDTH);
+                lvgl_unlock();
+
+            } else if (current_state == STATE_SPEAKING) {
+                // Get downsampled speaker data
+                audio_buffer_get_waveform(&spk_buffer, waveform_samples, WAVEFORM_WIDTH);
+
+                lvgl_lock();
+                draw_output_waveform(output_waveform_canvas, waveform_samples, WAVEFORM_WIDTH);
+                lvgl_unlock();
+            }
+        }
+    }
+}
+```
+
+---
+
+## Touch Gesture Integration
+
+### Touch Controller (CST816D)
+
+**Gestures Supported:**
+- Single tap
+- Long press
+- Swipe up/down/left/right
+
+**Implementation:**
+```c
+#define TOUCH_I2C_NUM       I2C_NUM_0
+#define TOUCH_SDA_PIN       GPIO_NUM_6
+#define TOUCH_SCL_PIN       GPIO_NUM_7
+#define TOUCH_INT_PIN       GPIO_NUM_9
+#define TOUCH_RST_PIN       GPIO_NUM_10
+
+typedef enum {
+    GESTURE_NONE = 0,
+    GESTURE_TAP,
+    GESTURE_LONG_PRESS,
+    GESTURE_SWIPE_UP,
+    GESTURE_SWIPE_DOWN,
+    GESTURE_SWIPE_LEFT,
+    GESTURE_SWIPE_RIGHT
+} touch_gesture_t;
+
+void touch_init(void) {
+    // I2C init for CST816D
+    i2c_config_t conf = {
+        .mode = I2C_MODE_MASTER,
+        .sda_io_num = TOUCH_SDA_PIN,
+        .scl_io_num = TOUCH_SCL_PIN,
+        .sda_pullup_en = GPIO_PULLUP_ENABLE,
+        .scl_pullup_en = GPIO_PULLUP_ENABLE,
+        .master.clk_speed = 100000,
+    };
+    i2c_param_config(TOUCH_I2C_NUM, &conf);
+    i2c_driver_install(TOUCH_I2C_NUM, conf.mode, 0, 0, 0);
+
+    // Reset touch controller
+    gpio_set_direction(TOUCH_RST_PIN, GPIO_MODE_OUTPUT);
+    gpio_set_level(TOUCH_RST_PIN, 0);
+    vTaskDelay(pdMS_TO_TICKS(10));
+    gpio_set_level(TOUCH_RST_PIN, 1);
+    vTaskDelay(pdMS_TO_TICKS(50));
+
+    // Configure interrupt pin
+    gpio_set_direction(TOUCH_INT_PIN, GPIO_MODE_INPUT);
+    gpio_set_intr_type(TOUCH_INT_PIN, GPIO_INTR_NEGEDGE);
+    gpio_install_isr_service(0);
+    gpio_isr_handler_add(TOUCH_INT_PIN, touch_isr_handler, NULL);
+}
+
+touch_gesture_t touch_read_gesture(void) {
+    uint8_t data[8];
+    // Read gesture from CST816D register 0x01
+    i2c_master_read_from_device(TOUCH_I2C_NUM, CST816D_ADDR, 0x01, data, 8, pdMS_TO_TICKS(100));
+    return (touch_gesture_t)data[0];
+}
+```
+
+### Gesture Actions by State
+
+**IDLE State:**
+- **Tap:** Wake up display (if dimmed)
+- **Long Press:** Open settings screen
+- **Swipe Up:** Show more info (weather, calendar)
+
+**LISTENING State:**
+- **Tap:** Cancel listening, return to idle
+- **Swipe Down:** Lower wake word sensitivity
+- **Swipe Up:** Raise wake word sensitivity
+
+**SPEAKING State:**
+- **Tap:** Skip response, return to idle
+- **Swipe Left/Right:** Volume down/up
+
+**PROCESSING State:**
+- **Tap:** Cancel processing (if possible)
+
+---
+
+## Network Communication
+
+### WiFi Configuration
+
+**Connection:**
+```c
+#define WIFI_SSID           "YourNetworkName"
+#define WIFI_PASSWORD       "YourPassword"
+#define SERVER_URL          "http://10.1.10.71:3006"
+
+void wifi_init(void) {
+    esp_netif_init();
+    esp_event_loop_create_default();
+    esp_netif_create_default_wifi_sta();
+
+    wifi_init_config_t cfg = WIFI_INIT_CONFIG_DEFAULT();
+    esp_wifi_init(&cfg);
+
+    wifi_config_t wifi_config = {
+        .sta = {
+            .ssid = WIFI_SSID,
+            .password = WIFI_PASSWORD,
+        },
+    };
+
+    esp_wifi_set_mode(WIFI_MODE_STA);
+    esp_wifi_set_config(WIFI_IF_STA, &wifi_config);
+    esp_wifi_start();
+    esp_wifi_connect();
+}
+```
+
+### Server Communication Protocol
+
+**Endpoints:**
+- `GET /health` - Server health check
+- `POST /audio/stream` - Stream audio to server (multipart)
+- `GET /audio/tts` - Receive TTS audio response
+- `GET /wake-word/status` - Check wake word detection status
+
+**Audio Streaming (WebSockets Recommended):**
+```c
+#include "esp_websocket_client.h"
+
+esp_websocket_client_handle_t ws_client;
+
+void websocket_init(void) {
+    esp_websocket_client_config_t ws_cfg = {
+        .uri = "ws://10.1.10.71:3006/ws/audio",
+        .buffer_size = 2048,
+    };
+
+    ws_client = esp_websocket_client_init(&ws_cfg);
+    esp_websocket_register_events(ws_client, WEBSOCKET_EVENT_ANY,
+                                   websocket_event_handler, NULL);
+    esp_websocket_client_start(ws_client);
+}
+
+void audio_send_to_server(int16_t *samples, size_t count) {
+    if (esp_websocket_client_is_connected(ws_client)) {
+        esp_websocket_client_send_bin(ws_client, (char*)samples,
+                                     count * sizeof(int16_t), portMAX_DELAY);
+    }
+}
+
+size_t audio_receive_from_server(int16_t *out_buffer, size_t max_samples) {
+    // Receive audio from server (blocking with timeout)
+    int len = esp_websocket_client_recv(ws_client, (char*)out_buffer,
+                                       max_samples * sizeof(int16_t), pdMS_TO_TICKS(100));
+    return (len > 0) ? (len / sizeof(int16_t)) : 0;
+}
+```
+
+**Alternative: HTTP Chunked Transfer (Simpler):**
+```c
+void audio_stream_http(void) {
+    esp_http_client_config_t config = {
+        .url = "http://10.1.10.71:3006/audio/stream",
+        .method = HTTP_METHOD_POST,
+    };
+    esp_http_client_handle_t client = esp_http_client_init(&config);
+
+    // Set headers
+    esp_http_client_set_header(client, "Content-Type", "audio/pcm");
+    esp_http_client_set_header(client, "Transfer-Encoding", "chunked");
+
+    esp_http_client_open(client, -1);  // -1 = chunked mode
+
+    // Stream audio chunks
+    int16_t buffer[256];
+    while (current_state == STATE_LISTENING) {
+        // Read from mic
+        size_t bytes_read;
+        i2s_read(I2S_MIC_NUM, buffer, sizeof(buffer), &bytes_read, portMAX_DELAY);
+
+        // Send to server
+        esp_http_client_write(client, (char*)buffer, bytes_read);
+    }
+
+    esp_http_client_close(client);
+    esp_http_client_cleanup(client);
+}
+```
+
+---
+
+## Power Management
+
+### Battery Monitoring
+
+**ETA6098 Charging Chip:**
+```c
+#define BATTERY_ADC_CHANNEL ADC1_CHANNEL_0  // GPIO1 (example)
+#define BATTERY_FULL_MV     4200
+#define BATTERY_EMPTY_MV    3300
+
+void battery_init(void) {
+    adc1_config_width(ADC_WIDTH_BIT_12);
+    adc1_config_channel_atten(BATTERY_ADC_CHANNEL, ADC_ATTEN_DB_11);
+}
+
+uint8_t battery_get_percentage(void) {
+    int adc_reading = adc1_get_raw(BATTERY_ADC_CHANNEL);
+    int voltage_mv = esp_adc_cal_raw_to_voltage(adc_reading, &adc_chars);
+
+    if (voltage_mv >= BATTERY_FULL_MV) return 100;
+    if (voltage_mv <= BATTERY_EMPTY_MV) return 0;
+
+    return ((voltage_mv - BATTERY_EMPTY_MV) * 100) / (BATTERY_FULL_MV - BATTERY_EMPTY_MV);
+}
+
+bool battery_is_charging(void) {
+    // Check SYS_OUT pin (GPIO36) - high when charging
+    gpio_set_direction(GPIO_NUM_36, GPIO_MODE_INPUT);
+    return gpio_get_level(GPIO_NUM_36);
+}
+```
+
+### Low Power Modes
+
+**Deep Sleep When Idle (Optional):**
+```c
+#define IDLE_TIMEOUT_MS 300000  // 5 minutes
+
+void enter_deep_sleep(void) {
+    // Save state to RTC memory
+    RTC_DATA_ATTR static uint32_t boot_count = 0;
+    boot_count++;
+
+    // Configure wake sources
+    esp_sleep_enable_ext0_wakeup(TOUCH_INT_PIN, 0);  // Wake on touch
+    esp_sleep_enable_timer_wakeup(3600 * 1000000ULL); // Wake every hour
+
+    // Turn off display
+    gpio_set_level(LCD_BL_PIN, 0);
+
+    // Enter deep sleep
+    esp_deep_sleep_start();
+}
+```
+
+---
+
+## Performance Optimization
+
+### LVGL Performance
+
+**Buffer Configuration:**
+```c
+#define LVGL_BUFFER_SIZE (240 * 280 * 2)  // Full screen buffer
+
+static lv_color_t buf_1[LVGL_BUFFER_SIZE / 10];  // 1/10 screen buffer
+static lv_color_t buf_2[LVGL_BUFFER_SIZE / 10];  // Double buffering
+
+lv_disp_draw_buf_t draw_buf;
+lv_disp_draw_buf_init(&draw_buf, buf_1, buf_2, LVGL_BUFFER_SIZE / 10);
+```
+
+**Task Priority:**
+```c
+#define LVGL_TASK_PRIORITY      5
+#define AUDIO_MIC_TASK_PRIORITY 10  // Higher priority for audio
+#define AUDIO_SPK_TASK_PRIORITY 10
+#define WIFI_TASK_PRIORITY      8
+#define WAVEFORM_TASK_PRIORITY  4   // Lower priority for visuals
+
+void app_main(void) {
+    // Create tasks with priorities
+    xTaskCreatePinnedToCore(lvgl_task, "LVGL", 8192, NULL, LVGL_TASK_PRIORITY, NULL, 1);
+    xTaskCreatePinnedToCore(audio_mic_task, "MIC", 4096, NULL, AUDIO_MIC_TASK_PRIORITY, NULL, 0);
+    xTaskCreatePinnedToCore(audio_speaker_task, "SPK", 4096, NULL, AUDIO_SPK_TASK_PRIORITY, NULL, 0);
+    xTaskCreatePinnedToCore(lvgl_waveform_task, "WAVE", 4096, NULL, WAVEFORM_TASK_PRIORITY, NULL, 1);
+}
+```
+
+**Reduce Waveform Update Rate:**
+```c
+// Only update waveform at 30 FPS, not every audio sample
+#define WAVEFORM_UPDATE_MS 33  // ~30 FPS
+
+void lvgl_waveform_task(void *pvParameters) {
+    TickType_t last_update = xTaskGetTickCount();
+
+    while (1) {
+        TickType_t now = xTaskGetTickCount();
+        if ((now - last_update) >= pdMS_TO_TICKS(WAVEFORM_UPDATE_MS)) {
+            // Update waveform
+            last_update = now;
+        }
+        vTaskDelay(pdMS_TO_TICKS(10));
+    }
+}
+```
+
+### Memory Management
+
+**PSRAM Usage:**
+```c
+// Allocate large buffers in PSRAM (8MB available)
+#define AUDIO_LARGE_BUFFER_SIZE (16000 * 10)  // 10 seconds at 16kHz
+
+int16_t *audio_history = heap_caps_malloc(AUDIO_LARGE_BUFFER_SIZE * sizeof(int16_t),
+                                          MALLOC_CAP_SPIRAM);
+
+// Check if allocation succeeded
+if (audio_history == NULL) {
+    ESP_LOGE(TAG, "Failed to allocate PSRAM buffer");
+}
+```
+
+**Heap Monitoring:**
+```c
+void log_memory_stats(void) {
+    ESP_LOGI(TAG, "Free heap: %d bytes", esp_get_free_heap_size());
+    ESP_LOGI(TAG, "Free PSRAM: %d bytes", heap_caps_get_free_size(MALLOC_CAP_SPIRAM));
+    ESP_LOGI(TAG, "Min free heap: %d bytes", esp_get_minimum_free_heap_size());
+}
+```
+
+---
+
+## Example Code Structure
+
+### File Organization
+
+```
+esp32_voice_assistant/
+├── main/
+│   ├── main.c                  # Entry point, task creation
+│   ├── audio/
+│   │   ├── audio_input.c       # I2S microphone handling
+│   │   ├── audio_output.c      # I2S speaker handling
+│   │   ├── audio_buffer.c      # Circular buffer management
+│   │   └── audio_network.c     # WebSocket/HTTP streaming
+│   ├── ui/
+│   │   ├── ui_init.c           # LVGL setup, screen creation
+│   │   ├── ui_idle.c           # Idle screen UI
+│   │   ├── ui_listening.c      # Listening screen + waveform
+│   │   ├── ui_processing.c     # Processing screen + spinner
+│   │   ├── ui_speaking.c       # Speaking screen + output waveform
+│   │   ├── ui_settings.c       # Settings screen
+│   │   └── ui_waveform.c       # Waveform drawing functions
+│   ├── touch/
+│   │   ├── touch_cst816d.c     # Touch controller driver
+│   │   └── touch_gestures.c    # Gesture recognition
+│   ├── network/
+│   │   └── wifi_manager.c      # WiFi connection management
+│   ├── power/
+│   │   ├── battery.c           # Battery monitoring
+│   │   └── power_mgmt.c        # Sleep modes
+│   └── state_machine.c         # Voice assistant state machine
+├── components/
+│   └── lvgl/                   # LVGL library (ESP-IDF component)
+├── CMakeLists.txt
+└── sdkconfig                   # ESP-IDF configuration
+```
+
+### Main Entry Point
+
+```c
+// main/main.c
+#include "freertos/FreeRTOS.h"
+#include "freertos/task.h"
+#include "esp_log.h"
+
+static const char *TAG = "VOICE_ASSISTANT";
+
+void app_main(void) {
+    ESP_LOGI(TAG, "Voice Assistant Starting...");
+
+    // Initialize hardware
+    nvs_flash_init();           // Non-volatile storage
+    gpio_install_isr_service(0);// GPIO interrupts
+
+    // Power management
+    battery_init();
+
+    // Display and touch
+    lcd_init();
+    touch_init();
+    ui_init();
+
+    // Audio pipeline
+    audio_init_microphone();
+    audio_init_speaker();
+    audio_buffer_init(&mic_buffer);
+    audio_buffer_init(&spk_buffer);
+
+    // Network
+    wifi_init();
+    websocket_init();
+
+    // State machine
+    state_machine_init();
+
+    // Create FreeRTOS tasks
+    xTaskCreatePinnedToCore(lvgl_task, "LVGL", 8192, NULL, 5, NULL, 1);
+    xTaskCreatePinnedToCore(audio_mic_task, "MIC", 4096, NULL, 10, NULL, 0);
+    xTaskCreatePinnedToCore(audio_speaker_task, "SPK", 4096, NULL, 10, NULL, 0);
+    xTaskCreatePinnedToCore(lvgl_waveform_task, "WAVE", 4096, NULL, 4, NULL, 1);
+    xTaskCreatePinnedToCore(state_machine_task, "STATE", 4096, NULL, 7, NULL, 0);
+
+    ESP_LOGI(TAG, "Voice Assistant Running!");
+}
+```
+
+---
+
+## Testing Plan
+
+### Phase 1: Hardware Validation
+- [ ] LCD display working (show test pattern)
+- [ ] Touch controller responding (log touch coordinates)
+- [ ] Buzzer working (play test tone)
+- [ ] WiFi connecting (check IP address)
+- [ ] Battery reading (log voltage)
+- [ ] RTC working (log time)
+- [ ] IMU working (log accelerometer values)
+
+### Phase 2: Audio Pipeline
+- [ ] I2S microphone reading audio (log levels)
+- [ ] Audio streaming to Heimdall server
+- [ ] I2S speaker playing audio (test tone)
+- [ ] TTS audio playback from server
+- [ ] Audio buffer management (no overflows)
+
+### Phase 3: LVGL UI
+- [ ] Idle screen displays correctly
+- [ ] State transitions smooth
+- [ ] Waveform renders at 30 FPS
+- [ ] Touch gestures recognized
+- [ ] Settings screen functional
+- [ ] Status bar updates correctly
+
+### Phase 4: Integration
+- [ ] Wake word detection triggers listening state
+- [ ] Waveform shows mic input in real-time
+- [ ] Processing state shows after speech ends
+- [ ] TTS response plays with output waveform
+- [ ] Touch cancel works in all states
+- [ ] Battery indicator accurate
+
+### Phase 5: Optimization
+- [ ] Memory usage stable (no leaks)
+- [ ] CPU usage acceptable (<80% average)
+- [ ] WiFi latency <100ms
+- [ ] Audio latency <200ms end-to-end
+- [ ] Display framerate stable (30 FPS)
+- [ ] Battery life >4 hours continuous
+
+---
+
+## Bill of Materials (BOM)
+
+| Component | Part Number | Quantity | Unit Price | Total |
+|-----------|-------------|----------|------------|-------|
+| ESP32-S3-Touch-LCD-1.69 | Waveshare | 1 | $12.00 | $12.00 |
+| I2S MEMS Microphone | INMP441 | 1 | $3.50 | $3.50 |
+| I2S Amplifier | MAX98357A | 1 | $3.50 | $3.50 |
+| Speaker (3W 8Ω) | Generic | 1 | $5.00 | $5.00 |
+| LiPo Battery (1000mAh) | 503040 JST 1.25 | 1 | $7.00 | $7.00 |
+| MicroSD Card (8GB) | SanDisk | 1 | $5.00 | $5.00 |
+| Breadboard + Wires | Generic | 1 | $5.00 | $5.00 |
+| **Total** | | | | **$41.00** |
+
+**Optional:**
+- Enclosure/Case (3D printed or project box): $5-10
+- Backup battery: $7
+- USB-C cable: $3
+
+**Grand Total with Options:** ~$56-63
+
+---
+
+## References & Resources
+
+### LVGL Audio Visualization Examples
+- **Music Player with FFT Spectrum** - [Instructables Guide](https://www.instructables.com/Design-Music-Player-UI-With-LVGL/)
+  - Source: https://github.com/moononournation/LVGL_Music_Player.git
+  - Shows FFT-based audio visualization on LVGL canvas
+
+- **LVGL Audio FFT Spectrum (Xiao S3)** - [GitHub: genvex/LVGL_Audio_FFT_Spectrum_xiaoS3_oled](https://github.com/genvex/LVGL_Audio_FFT_Spectrum_xiaoS3_oled)
+  - Real-time FFT visualization using low-level LVGL drawing
+
+- **LVGL Audio FFT Spectrum** - [GitHub: imliubo/LVGL_Audio_FFT_Spectrum](https://github.com/imliubo/LVGL_Audio_FFT_Spectrum)
+  - Alternative FFT spectrum implementation
+
+- **Moving Waveform Discussion** - [LVGL Forum Thread](https://forum.lvgl.io/t/best-method-to-display-a-moving-waveform/17361)
+  - Tips on efficiently displaying moving waveforms
+
+### ESP32-S3 Resources
+- **Waveshare Wiki** - https://www.waveshare.com/wiki/ESP32-S3-LCD-1.69
+- **LVGL ESP32 Port** - [GitHub: lvgl/lv_port_esp32](https://github.com/lvgl/lv_port_esp32)
+- **ESP-IDF Documentation** - https://docs.espressif.com/projects/esp-idf/en/latest/
+
+### Voice Assistant Project
+- **Mycroft Precise Documentation** - https://github.com/MycroftAI/mycroft-precise
+- **Whisper OpenAI** - https://github.com/openai/whisper
+- **Piper TTS** - https://github.com/rhasspy/piper
+
+---
+
+## Next Steps
+
+1. **Order Hardware** - ESP32-S3-Touch-LCD + audio components (~$41)
+2. **Setup ESP-IDF** - Install ESP-IDF v5.3.1+ on development machine
+3. **Clone Examples** - Get LVGL audio visualization examples for reference
+4. **Start Simple** - Begin with LCD + LVGL test (no audio)
+5. **Add Audio** - Wire I2S mic, test audio streaming
+6. **Waveform MVP** - Get basic waveform rendering working
+7. **Full Integration** - Connect to Heimdall voice server
+8. **Polish** - Add touch controls, settings, battery support
+
+---
+
+**Version:** 1.0
+**Created:** 2026-01-01
+**Status:** Specification Complete, Ready for Implementation
+
diff --git a/docs/HARDWARE_BUYING_GUIDE.md b/docs/HARDWARE_BUYING_GUIDE.md
new file mode 100755
index 0000000..4413b15
--- /dev/null
+++ b/docs/HARDWARE_BUYING_GUIDE.md
@@ -0,0 +1,542 @@
+# Voice Assistant Hardware - Buying Guide for Second Unit
+
+**Date:** 2025-11-29  
+**Context:** You have one Maix Duino (K210), planning multi-room deployment  
+**Question:** What should I buy for the second unit?
+
+---
+
+## Quick Answer
+
+**Best Overall:** **Buy another Maix Duino K210** (~$30-40)  
+**Runner-up:** **ESP32-S3 with audio board** (~$20-30)  
+**Budget:** **Generic ESP32 + I2S** (~$15-20)  
+**Future-proof:** **Sipeed Maix-III** (~$60-80, when available)
+
+---
+
+## Analysis: Why Another Maix Duino K210?
+
+### Pros ✅
+- **Identical to first unit** - Code reuse, same workflow
+- **Proven solution** - You'll know exactly what to expect
+- **Stock availability** - Still widely available despite being "outdated"
+- **Same accessories** - Microphones, displays, cables compatible
+- **Edge detection ready** - Can upgrade to edge wake word later
+- **Low cost** - ~$30-40 for full kit with LCD and camera
+- **Multi-room consistency** - All units behave identically
+
+### Cons ❌
+- "Outdated" hardware (but doesn't matter for your use case)
+- Limited future support from Sipeed
+
+### Verdict: ✅ **RECOMMENDED - Best choice for consistency**
+
+---
+
+## Alternative Options
+
+### Option 1: Another Maix Duino K210
+**Price:** $30-40 (kit with LCD)  
+**Where:** AliExpress, Amazon, Seeed Studio
+
+**Specific Model:**
+- **Sipeed Maix Duino** (original, what you have)
+- Includes: LCD, camera module
+- Need to add: I2S microphone
+
+**Why Choose:**
+- Identical setup to first unit
+- Code works without modification
+- Same troubleshooting experience
+- Bulk buy discount possible
+
+**Link Examples:**
+- Seeed Studio: https://www.seeedstudio.com/Sipeed-Maix-Duino-Kit-for-RISC-V-AI-IoT.html
+- AliExpress: Search "Sipeed Maix Duino" (~$25-35)
+
+---
+
+### Option 2: Sipeed Maix Bit/Dock (K210 variant)
+**Price:** $15-25 (smaller form factor)
+
+**Differences from Maix Duino:**
+- Smaller board
+- May need separate LCD
+- Same K210 chip
+- Same capabilities
+
+**Why Choose:**
+- Cheaper
+- More compact
+- Same software
+
+**Why Skip:**
+- Need separate accessories
+- Different form factor means different mounting
+- Less convenient than all-in-one Duino
+
+**Verdict:** ⚠️ Only if you want smaller/cheaper
+
+---
+
+### Option 3: ESP32-S3 with Audio Kit
+**Price:** $20-30  
+**Chip:** ESP32-S3 (Xtensa dual-core @ 240MHz)
+
+**Examples:**
+- **ESP32-S3-Box** (~$30) - Has LCD, microphone, speaker built-in
+- **Seeed XIAO ESP32-S3 Sense** (~$15) - Tiny, needs accessories
+- **M5Stack Core S3** (~$50) - Premium, all-in-one
+
+**Pros:**
+- ✅ More modern than K210
+- ✅ Better WiFi/BLE support
+- ✅ Lower power consumption
+- ✅ Active development
+- ✅ Arduino/ESP-IDF support
+
+**Cons:**
+- ❌ No KPU (neural accelerator)
+- ❌ Different code needed (ESP32 vs MaixPy)
+- ❌ Less ML capability (for future edge wake word)
+- ❌ Different ecosystem
+
+**Best ESP32-S3 Choice:** **ESP32-S3-Box**
+- All-in-one like your Maix Duino
+- Built-in mic, speaker, LCD
+- Good for server-side wake word
+- Cheaper than Maix Duino
+
+**Verdict:** 🤔 Good alternative if you want to experiment
+
+---
+
+### Option 4: Raspberry Pi Zero 2 W
+**Price:** $15-20 (board only, need accessories)
+
+**Pros:**
+- ✅ Full Linux
+- ✅ Familiar ecosystem
+- ✅ Tons of support
+- ✅ Easy Python development
+
+**Cons:**
+- ❌ No neural accelerator
+- ❌ No dedicated audio hardware
+- ❌ More power hungry (~500mW vs 200mW)
+- ❌ Overkill for audio streaming
+- ❌ Need USB sound card or I2S HAT
+- ❌ Larger form factor
+
+**Verdict:** ❌ Not ideal for this project
+
+---
+
+### Option 5: Sipeed Maix-III AXera-Pi (Future)
+**Price:** $60-80 (when available)  
+**Chip:** AX620A (much more powerful than K210)
+
+**Pros:**
+- ✅ Modern hardware (2023)
+- ✅ Better AI performance
+- ✅ Linux + Python support
+- ✅ Sipeed ecosystem continuity
+- ✅ Great for edge wake word
+
+**Cons:**
+- ❌ More expensive
+- ❌ Newer = less community support
+- ❌ Overkill for server-side wake word
+- ❌ Stock availability varies
+
+**Verdict:** 🔮 Future-proof option if budget allows
+
+---
+
+### Option 6: Generic ESP32 + I2S Breakout
+**Price:** $10-15 (cheapest option)
+
+**What You Need:**
+- ESP32 DevKit (~$5)
+- I2S MEMS mic (~$5)
+- Optional: I2S speaker amp (~$5)
+
+**Pros:**
+- ✅ Cheapest option
+- ✅ Minimal, focused on audio only
+- ✅ Very low power
+- ✅ WiFi built-in
+
+**Cons:**
+- ❌ No LCD (would need separate)
+- ❌ No camera
+- ❌ DIY assembly required
+- ❌ No neural accelerator
+- ❌ Different code from K210
+
+**Verdict:** 💰 Budget choice, but less polished
+
+---
+
+## Comparison Table
+
+| Option | Price | Same Code? | LCD | AI Accel | Best For |
+|--------|-------|------------|-----|----------|----------|
+| **Maix Duino K210** | $30-40 | ✅ Yes | ✅ Included | ✅ KPU | **Multi-room consistency** |
+| Maix Bit/Dock (K210) | $15-25 | ✅ Yes | ⚠️ Optional | ✅ KPU | Compact/Budget |
+| ESP32-S3-Box | $25-35 | ❌ No | ✅ Included | ❌ No | Modern alternative |
+| ESP32-S3 DIY | $15-25 | ❌ No | ❌ No | ❌ No | Custom build |
+| Raspberry Pi Zero 2 W | $30+ | ❌ No | ❌ No | ❌ No | Linux/overkill |
+| Maix-III | $60-80 | ⚠️ Similar | ✅ Varies | ✅ NPU | Future-proof |
+| Generic ESP32 | $10-15 | ❌ No | ❌ No | ❌ No | Absolute budget |
+
+---
+
+## Recommended Purchase Plan
+
+### Phase 1: Second Identical Unit (NOW)
+**Buy:** Sipeed Maix Duino K210 (same as first)  
+**Cost:** ~$30-40  
+**Why:** Code reuse, proven solution, multi-room consistency
+
+**What to Order:**
+- [ ] Sipeed Maix Duino board with LCD and camera
+- [ ] I2S MEMS microphone (if not included)
+- [ ] Small speaker or audio output (3-5W)
+- [ ] USB-C cable
+- [ ] MicroSD card (4GB+)
+
+**Total Cost:** ~$40-50 with accessories
+
+---
+
+### Phase 2: Third+ Units (LATER)
+**Option A:** More Maix Duinos (if still available)  
+**Option B:** Switch to ESP32-S3-Box for variety/testing  
+**Option C:** Wait for Maix-III if you want cutting edge
+
+---
+
+## Where to Buy Maix Duino
+
+### Recommended Sellers
+
+**1. Seeed Studio (Official Partner)**
+- URL: https://www.seeedstudio.com/
+- Search: "Sipeed Maix Duino"
+- Price: ~$35-45
+- Shipping: International, good support
+- **Pro:** Official, reliable, good documentation
+- **Con:** Can be out of stock
+
+**2. AliExpress (Direct from Sipeed/China)**
+- Search: "Sipeed Maix Duino"
+- Price: ~$25-35
+- Shipping: 2-4 weeks (free or cheap)
+- **Pro:** Cheapest, often bundled with accessories
+- **Con:** Longer shipping, variable quality control
+- **Tip:** Look for "Sipeed Official Store"
+
+**3. Amazon**
+- Search: "Maix Duino K210"
+- Price: ~$40-50
+- Shipping: Fast (Prime eligible sometimes)
+- **Pro:** Fast shipping, easy returns
+- **Con:** Higher price, limited stock
+
+**4. Adafruit / SparkFun**
+- May carry Sipeed products
+- Higher price but US-based support
+- Check availability
+
+---
+
+## Accessories to Buy
+
+### Essential (for each unit)
+
+**1. I2S MEMS Microphone**
+- **Recommended:** Adafruit I2S MEMS Microphone Breakout (~$7)
+  - Model: SPH0645LM4H
+  - URL: https://www.adafruit.com/product/3421
+- **Alternative:** INMP441 I2S Microphone (~$3 on AliExpress)
+  - Cheaper, works well
+  - Search: "INMP441 I2S microphone"
+
+**2. Speaker / Audio Output**
+- **Option A:** Small 3-5W speaker (~$5-10)
+  - Search: "3W 8 ohm speaker"
+- **Option B:** I2S speaker amplifier + speaker
+  - MAX98357A I2S amp (~$5)
+  - 4-8 ohm speaker (~$5)
+- **Option C:** Line out to existing speakers (cheapest)
+
+**3. MicroSD Card**
+- 4GB or larger
+- FAT32 formatted
+- Class 10 recommended
+- ~$5
+
+**4. USB-C Cable**
+- For power and programming
+- ~$3-5
+
+---
+
+### Optional but Nice
+
+**1. Enclosure/Case**
+- 3D print custom case
+- Find STL files on Thingiverse
+- Or use small project box (~$5)
+
+**2. Microphone Array** (for better pickup)
+- 2 or 4-mic array board (~$15-25)
+- Better voice detection
+- Phase 2+ enhancement
+
+**3. Battery Pack** (for portable testing)
+- USB-C power bank
+- Makes testing easier
+- Already have? Use it!
+
+**4. Mounting Hardware**
+- Velcro strips
+- 3M command strips
+- Wall mount brackets
+- ~$5
+
+---
+
+## Multi-Unit Strategy
+
+### Same Hardware (Recommended)
+**Buy:** 2-4x Maix Duino K210 units  
+**Benefit:** 
+- All units identical
+- Same code deployment
+- Easy troubleshooting
+- Bulk buy discount
+
+**Deployment:**
+- Unit 1: Living room
+- Unit 2: Bedroom
+- Unit 3: Kitchen
+- Unit 4: Office
+
+### Mixed Hardware (Experimental)
+**Buy:** 
+- 2x Maix Duino K210 (proven)
+- 1x ESP32-S3-Box (modern)
+- 1x Maix-III (future-proof)
+
+**Benefit:**
+- Test different platforms
+- Evaluate performance
+- Future-proofing
+
+**Drawback:**
+- More complex code
+- Different troubleshooting
+- Inconsistent UX
+
+**Verdict:** ⚠️ Only if you want to experiment
+
+---
+
+## Budget Options
+
+### Ultra-Budget Multi-Room (~$50 total)
+- 2x Generic ESP32 + I2S mic ($10 each = $20)
+- 2x Speakers ($5 each = $10)
+- 2x SD cards ($5 each = $10)
+- Cables ($10)
+- **Total:** ~$50 for 2 units
+
+**Pros:** Cheap  
+**Cons:** No LCD, DIY assembly, different code
+
+---
+
+### Mid-Budget Multi-Room (~$100 total)
+- 2x Maix Duino K210 ($35 each = $70)
+- 2x I2S mics ($5 each = $10)
+- 2x Speakers ($5 each = $10)
+- Accessories ($10)
+- **Total:** ~$100 for 2 units
+
+**Pros:** Proven, consistent, LCD included  
+**Cons:** "Outdated" hardware (doesn't matter for your use)
+
+---
+
+### Premium Multi-Room (~$200 total)
+- 2x Maix-III AXera-Pi ($70 each = $140)
+- 2x I2S mics ($10 each = $20)
+- 2x Speakers ($10 each = $20)
+- Accessories ($20)
+- **Total:** ~$200 for 2 units
+
+**Pros:** Future-proof, modern, powerful  
+**Cons:** More expensive, newer = less support
+
+---
+
+## My Recommendation
+
+### For Second Unit: Buy Another Maix Duino K210 ✅
+
+**Reasoning:**
+1. **Code reuse** - Everything you develop for unit 1 works on unit 2
+2. **Known quantity** - No surprises, you know it works
+3. **Multi-room consistency** - All units behave the same
+4. **Edge wake word ready** - Can upgrade later if desired
+5. **Cost-effective** - ~$40 for full kit with LCD
+6. **Stock available** - Still widely sold despite being "outdated"
+
+**Where to Buy:**
+- **Best:** AliExpress "Sipeed Official Store" (~$30 + shipping)
+- **Fastest:** Amazon (~$45 with Prime)
+- **Support:** Seeed Studio (~$40 + shipping)
+
+**What to Order:**
+```
+Shopping List for Second Unit:
+[ ] 1x Sipeed Maix Duino Kit (board + LCD + camera) - $30-35
+[ ] 1x I2S MEMS microphone (INMP441 or SPH0645) - $5-7
+[ ] 1x Small speaker (3W, 8 ohm) - $5-10
+[ ] 1x MicroSD card (8GB+, Class 10) - $5
+[ ] 1x USB-C cable - $3-5
+[ ] Optional: Enclosure/mounting - $5-10
+
+Total: ~$50-75 (depending on shipping and options)
+```
+
+---
+
+### For Third+ Units: Evaluate
+
+By the time you're ready for 3rd/4th units:
+- You'll have experience with K210
+- You'll know if you want consistency (more K210s)
+- Or variety (try ESP32-S3 or Maix-III)
+- Maix-III may have better availability
+- Prices may have changed
+
+**Decision:** Revisit when units 1 and 2 are working
+
+---
+
+## Future-Proofing Considerations
+
+### Will K210 be Supported?
+- **MaixPy:** Still actively maintained for K210
+- **Community:** Large existing user base
+- **Models:** Pre-trained models still work
+- **Lifespan:** Good for 3-5+ years
+
+**Verdict:** ✅ Safe to buy more K210s now
+
+### When to Switch Hardware?
+Consider switching when:
+- [ ] K210 becomes hard to find
+- [ ] You need better performance (edge ML)
+- [ ] Power consumption is critical
+- [ ] New features require newer hardware
+
+**Timeline:** Probably 2-3 years out
+
+---
+
+## Special Considerations
+
+### Different Rooms, Different Needs?
+
+**Living Room (Primary):**
+- Needs: Best audio, LCD display, polish
+- **Hardware:** Maix Duino K210 with all features
+
+**Bedroom (Secondary):**
+- Needs: Simple, no bright LCD at night
+- **Hardware:** Maix Duino K210, disable LCD at night
+
+**Kitchen (Ambient Noise):**
+- Needs: Better microphone array
+- **Hardware:** Maix Duino K210 + 4-mic array
+
+**Office (Minimal):**
+- Needs: Cheap, basic audio only
+- **Hardware:** Generic ESP32 + I2S mic
+
+### All Same vs Customized?
+
+**Recommendation:** Start with all same (Maix Duino), customize later if needed.
+
+---
+
+## Action Plan
+
+### This Week
+1. **Order second Maix Duino K210** (~$30-40)
+2. **Order I2S microphone** (~$5-7)
+3. **Order speaker** (~$5-10)
+4. **Order SD card** (~$5)
+
+**Total Investment:** ~$50-65
+
+### Next Month
+1. Wait for delivery (2-4 weeks from AliExpress)
+2. Test unit 1 while waiting
+3. Refine code and setup process
+4. Prepare for unit 2 deployment
+
+### In 2-3 Months
+1. Deploy unit 2 (should be easy after unit 1)
+2. Test multi-room
+3. Decide on unit 3/4 based on experience
+4. Consider bulk order if expanding
+
+---
+
+## Summary
+
+**Buy for Second Unit:**
+- ✅ **Sipeed Maix Duino K210** (same as first) - ~$35
+- ✅ **I2S MEMS microphone** (INMP441) - ~$5
+- ✅ **Small speaker** (3W, 8 ohm) - ~$8
+- ✅ **MicroSD card** (8GB Class 10) - ~$5
+- ✅ **USB-C cable** - ~$5
+
+**Total:** ~$60 shipped
+
+**Why:** Code reuse, consistency, proven solution, future-expandable
+
+**Where:** AliExpress (cheap) or Amazon (fast)
+
+**When:** Order now, 2-4 weeks delivery
+
+**Third+ Units:** Decide after testing 2 units (probably buy more K210s)
+
+---
+
+## Quick Links
+
+**Official Sipeed Store (AliExpress):**
+https://sipeed.aliexpress.com/store/1101739727
+
+**Seeed Studio:**
+https://www.seeedstudio.com/catalogsearch/result/?q=maix+duino
+
+**Amazon Search:**
+"Sipeed Maix Duino K210"
+
+**Microphone (Adafruit):**
+https://www.adafruit.com/product/3421
+
+**Alternative Mic (AliExpress):**
+Search: "INMP441 I2S microphone breakout"
+
+---
+
+**Happy Building! 🏠🎙️**
diff --git a/docs/K210_PERFORMANCE_VERIFICATION.md b/docs/K210_PERFORMANCE_VERIFICATION.md
new file mode 100755
index 0000000..7f2819b
--- /dev/null
+++ b/docs/K210_PERFORMANCE_VERIFICATION.md
@@ -0,0 +1,223 @@
+# K210 Performance Verification for Voice Assistant
+
+**Date:** 2025-11-29  
+**Source:** https://github.com/sipeed/MaixPy Performance Comparison  
+**Question:** Is K210 suitable for our Mycroft Precise wake word detection project?
+
+---
+
+## K210 Specifications
+
+- **Processor:** K210 dual-core RISC-V @ 400MHz
+- **AI Accelerator:** KPU (Neural Network Processor)
+- **SRAM:** 8MB
+- **Status:** Considered "outdated" by Sipeed (2018 release)
+
+---
+
+## Performance Comparison (from MaixPy GitHub)
+
+### YOLOv2 Object Detection
+| Chip | Performance | Notes |
+|------|------------|-------|
+| K210 | 1.8 ms | Limited to older models |
+| V831 | 20-40 ms | More modern, but slower |
+| R329 | N/A | Newer hardware |
+
+### Our Use Case: Audio Processing
+
+**For wake word detection, we need:**
+- Audio input (16kHz, mono) ✅ K210 has I2S
+- Real-time processing ✅ K210 KPU can handle this
+- Network communication ✅ K210 has ESP32 WiFi
+- Low latency (<100ms) ✅ Achievable
+
+---
+
+## Deployment Strategy Analysis
+
+### Option A: Server-Side Wake Word (Recommended)
+**K210 Role:** Audio I/O only
+- Capture audio from I2S microphone ✅ Well supported
+- Stream to Heimdall via WiFi ✅ No problem
+- Receive and play TTS audio ✅ Works fine
+- LED/display feedback ✅ Easy
+
+**K210 Requirements:** MINIMAL
+- No AI processing needed
+- Simple audio streaming
+- Network communication only
+- **Verdict:** ✅ K210 is MORE than capable
+
+### Option B: Edge Wake Word (Future)
+**K210 Role:** Wake word detection on-device
+- Load KMODEL wake word model ⚠️ Needs conversion
+- Run inference on KPU ⚠️ Quantization required
+- Detect wake word locally ⚠️ Possible but limited
+
+**K210 Limitations:**
+- KMODEL conversion complex (TF→ONNX→KMODEL)
+- Quantization may reduce accuracy (80-90% vs 95%+)
+- Limited to simpler models
+- **Verdict:** ⚠️ Possible but challenging
+
+---
+
+## Why K210 is PERFECT for Our Project
+
+### 1. We're Starting with Server-Side Detection
+- K210 only does audio I/O
+- All AI processing on Heimdall (powerful server)
+- No need for cutting-edge hardware
+- **K210 is ideal for this role**
+
+### 2. Audio Processing is Not Computationally Intensive
+Unlike YOLOv2 (60 FPS video processing):
+- Audio: 16kHz sample rate = 16,000 samples/second
+- Wake word: Simple streaming
+- No real-time neural network inference needed (server-side)
+- **K210's "old" specs don't matter**
+
+### 3. Edge Detection is Optional (Future Enhancement)
+- We can prove the concept with server-side first
+- Edge detection is a nice-to-have optimization
+- If we need edge later, we can:
+  - Use simpler wake word models
+  - Accept slightly lower accuracy
+  - Or upgrade hardware then
+- **Starting point doesn't require latest hardware**
+
+### 4. K210 Advantages We Actually Care About
+- ✅ Well-documented (mature platform)
+- ✅ Stable MaixPy firmware
+- ✅ Large community and examples
+- ✅ Proven audio processing
+- ✅ Already have the hardware!
+- ✅ Cost-effective ($30 vs $100+ newer boards)
+
+---
+
+## Performance Targets vs K210 Capabilities
+
+### What We Need:
+- Audio capture: 16kHz, 1 channel ✅ K210: Easy
+- Audio streaming: ~128 kbps over WiFi ✅ K210: No problem
+- Wake word latency: <200ms ✅ K210: Achievable (server-side)
+- LED feedback: Instant ✅ K210: Trivial
+- Audio playback: 16kHz TTS ✅ K210: Supported
+
+### What We DON'T Need (for initial deployment):
+- ❌ Real-time video processing
+- ❌ Complex neural networks on device
+- ❌ Multi-model inference
+- ❌ High-resolution image processing
+- ❌ Latest and greatest AI accelerator
+
+---
+
+## Comparison to Alternatives
+
+### If we bought newer hardware:
+
+**V831 ($50-70):**
+- Pros: Newer, better supported
+- Cons: 
+  - More expensive
+  - SLOWER at neural networks than K210
+  - Still need server for Whisper anyway
+  - Overkill for audio I/O
+
+**ESP32-S3 ($10-20):**
+- Pros: Cheap, WiFi built-in
+- Cons:
+  - No KPU (if we want edge detection later)
+  - Less capable for ML
+  - Would work for server-side though
+
+**Raspberry Pi Zero 2 W ($15):**
+- Pros: Full Linux, familiar
+- Cons:
+  - No dedicated audio hardware
+  - No neural accelerator
+  - More power hungry
+  - Overkill for our needs
+
+**Verdict:** K210 is actually the sweet spot for this project!
+
+---
+
+## Real-World Comparison
+
+### What K210 CAN Do (Proven):
+- Audio classification ✅
+- Simple keyword spotting ✅
+- Voice activity detection ✅
+- Audio streaming ✅
+- Multi-microphone beamforming ✅
+
+### What We're Asking It To Do:
+- Stream audio to server ✅ Much easier
+- (Optional future) Simple wake word detection ✅ Proven capability
+
+---
+
+## Recommendation: Proceed with K210
+
+### Phase 1: Server-Side (Now)
+K210 role: Audio I/O device
+- **Difficulty:** Easy
+- **Performance:** Excellent
+- **K210 utilization:** ~10-20%
+- **Status:** No concerns whatsoever
+
+### Phase 2: Edge Detection (Future)
+K210 role: Wake word detection + audio I/O
+- **Difficulty:** Moderate (model conversion)
+- **Performance:** Good enough (80-90% accuracy)
+- **K210 utilization:** ~30-40%
+- **Status:** Feasible, community has done it
+
+---
+
+## Conclusion
+
+**Is K210 outdated?** Yes, for cutting-edge ML applications.
+
+**Is K210 suitable for our project?** ABSOLUTELY YES!
+
+**Why:**
+1. We're using server-side processing (K210 just streams audio)
+2. K210's audio capabilities are excellent
+3. Mature platform = more examples and stability
+4. Already have the hardware
+5. Cost-effective
+6. Can optionally upgrade to edge detection later
+
+**The "outdated" warning is for people wanting latest ML performance. We're using it as an audio I/O device with WiFi - it's perfect for that!**
+
+---
+
+## Additional Notes
+
+### From MaixPy GitHub Warning:
+> "We now recommend users choose the MaixCAM ... For 2018 K210 ... limited performance"
+
+**Our Response:**
+- We don't need 2024 performance for audio streaming
+- Server does the heavy lifting (Heimdall with NVIDIA GPU)
+- K210 mature platform is actually an advantage
+- If we need more later, we can upgrade edge device while keeping server
+
+### Community Validation:
+Many Mycroft Precise + K210 projects exist:
+- Audio streaming: Proven ✅
+- Edge wake word: Proven ✅
+- Full voice assistant: Proven ✅
+
+**The K210 is "outdated" for video/vision ML, not for audio projects.**
+
+---
+
+**Final Verdict:** ✅ PROCEED WITH CONFIDENCE
+
+The K210 is perfect for our use case. Ignore the "outdated" warning - that's for people doing real-time video processing or wanting the latest ML features. For a voice assistant where the heavy lifting happens server-side, the K210 is an excellent, mature, cost-effective choice!
diff --git a/docs/LCD_CAMERA_FEATURES.md b/docs/LCD_CAMERA_FEATURES.md
new file mode 100755
index 0000000..8c0c25b
--- /dev/null
+++ b/docs/LCD_CAMERA_FEATURES.md
@@ -0,0 +1,566 @@
+# Maix Duino LCD & Camera Feature Analysis
+
+**Date:** 2025-11-29  
+**Hardware:** Sipeed Maix Duino (K210)  
+**Question:** What's the overhead for using LCD display and camera?
+
+---
+
+## Hardware Capabilities
+
+### LCD Display
+- **Resolution:** Typically 320x240 or 240x135 (depending on model)
+- **Interface:** SPI
+- **Color:** RGB565 (16-bit color)
+- **Frame Rate:** Up to 60 FPS (limited by SPI bandwidth)
+- **Status:** ✅ Included with most Maix Duino kits
+
+### Camera
+- **Resolution:** Various (OV2640 common: 2MP, up to 1600x1200)
+- **Interface:** DVP (Digital Video Port)
+- **Frame Rate:** Up to 60 FPS (lower at high resolution)
+- **Status:** ✅ Often included with Maix Duino kits
+
+### K210 Resources
+- **CPU:** Dual-core RISC-V @ 400MHz
+- **KPU:** Neural network accelerator
+- **SRAM:** 8MB total (6MB available for apps)
+- **Flash:** 16MB
+
+---
+
+## LCD Usage for Voice Assistant
+
+### Use Case 1: Status Display (Minimal Overhead)
+**What to Show:**
+- Current state (idle/listening/processing/responding)
+- Wake word detected indicator
+- WiFi status and signal strength
+- Server connection status
+- Volume level
+- Time/date
+
+**Overhead:**
+- **CPU:** ~2-5% (simple text/icons)
+- **RAM:** ~200KB (framebuffer + assets)
+- **Power:** ~50mW additional
+- **Complexity:** Low (MaixPy has built-in LCD support)
+
+**Code Example:**
+```python
+import lcd
+import image
+
+lcd.init()
+lcd.rotation(2)  # Rotate if needed
+
+# Simple status display
+img = image.Image(size=(320, 240))
+img.draw_string(10, 10, "Listening...", color=(0, 255, 0), scale=3)
+img.draw_circle(300, 20, 10, color=(0, 255, 0), fill=True)  # Status LED
+lcd.display(img)
+```
+
+**Verdict:** ✅ **Very Low Overhead - Highly Recommended**
+
+---
+
+### Use Case 2: Audio Waveform Visualizer (Moderate Overhead)
+
+#### Input Waveform (Microphone)
+**What to Show:**
+- Real-time audio level meter
+- Waveform display (oscilloscope style)
+- VU meter
+- Frequency spectrum (simple bars)
+
+**Overhead:**
+- **CPU:** ~10-15% (real-time drawing)
+- **RAM:** ~300KB (framebuffer + audio buffer)
+- **Frame Rate:** 15-30 FPS (sufficient for audio visualization)
+- **Complexity:** Moderate (drawing primitives + FFT)
+
+**Implementation:**
+```python
+import lcd, audio, image
+import array
+
+lcd.init()
+audio.init()
+
+def draw_waveform(audio_buffer):
+    img = image.Image(size=(320, 240))
+    
+    # Draw waveform
+    width = 320
+    height = 240
+    center = height // 2
+    
+    # Sample every Nth point to fit on screen
+    step = len(audio_buffer) // width
+    
+    for x in range(width - 1):
+        y1 = center + (audio_buffer[x * step] // 256)
+        y2 = center + (audio_buffer[(x + 1) * step] // 256)
+        img.draw_line(x, y1, x + 1, y2, color=(0, 255, 0))
+    
+    # Add level meter
+    level = max(abs(min(audio_buffer)), abs(max(audio_buffer)))
+    bar_height = (level * height) // 32768
+    img.draw_rectangle(0, height - bar_height, 20, bar_height, 
+                       color=(0, 255, 0), fill=True)
+    
+    lcd.display(img)
+```
+
+**Verdict:** ✅ **Moderate Overhead - Feasible and Cool!**
+
+---
+
+#### Output Waveform (TTS Response)
+**What to Show:**
+- TTS audio being played back
+- Speaking animation (mouth/sound waves)
+- Response text scrolling
+
+**Overhead:**
+- **CPU:** ~10-15% (similar to input)
+- **RAM:** ~300KB
+- **Complexity:** Moderate
+
+**Note:** Can reuse same visualization code as input waveform.
+
+**Verdict:** ✅ **Same as Input - Totally Doable**
+
+---
+
+### Use Case 3: Spectrum Analyzer (Higher Overhead)
+**What to Show:**
+- Frequency bars (FFT visualization)
+- 8-16 frequency bands
+- Classic "equalizer" look
+
+**Overhead:**
+- **CPU:** ~20-30% (FFT computation + drawing)
+- **RAM:** ~500KB (FFT buffers + framebuffer)
+- **Complexity:** Moderate-High (FFT required)
+
+**Implementation Note:**
+- K210 KPU can accelerate FFT operations
+- Can do simple 8-band analysis with minimal CPU
+- More bands = more CPU
+
+**Verdict:** ⚠️ **Higher Overhead - Use Sparingly**
+
+---
+
+### Use Case 4: Interactive UI (High Overhead)
+**What to Show:**
+- Touchscreen controls (if touchscreen available)
+- Settings menu
+- Volume slider
+- Wake word selection
+- Network configuration
+
+**Overhead:**
+- **CPU:** ~20-40% (touch detection + UI rendering)
+- **RAM:** ~1MB (UI framework + assets)
+- **Complexity:** High (need UI framework)
+
+**Verdict:** ⚠️ **High Overhead - Nice-to-Have Later**
+
+---
+
+## Camera Usage for Voice Assistant
+
+### Use Case 1: Person Detection (Wake on Face)
+**What to Do:**
+- Detect person in frame
+- Only listen when someone present
+- Privacy mode: disable when no one around
+
+**Overhead:**
+- **CPU:** ~30-40% (KPU handles inference)
+- **RAM:** ~1.5MB (model + frame buffers)
+- **Power:** ~200mW additional
+- **Complexity:** Moderate (pre-trained models available)
+
+**Pros:**
+- ✅ Privacy enhancement (only listen when occupied)
+- ✅ Power saving (sleep when empty room)
+- ✅ Pre-trained models available for K210
+
+**Cons:**
+- ❌ Adds latency (check camera before listening)
+- ❌ Privacy concerns (camera always on)
+- ❌ Moderate resource usage
+
+**Verdict:** 🤔 **Interesting but Complex - Phase 2+**
+
+---
+
+### Use Case 2: Visual Context (Future AI Integration)
+**What to Do:**
+- "What am I holding?" queries
+- Visual scene understanding
+- QR code scanning
+- Gesture control
+
+**Overhead:**
+- **CPU:** 40-60% (vision processing)
+- **RAM:** 2-3MB (models + buffers)
+- **Complexity:** High (requires vision models)
+
+**Verdict:** ❌ **Too Complex for Initial Release - Future Feature**
+
+---
+
+### Use Case 3: Visual Wake Word (Gesture Detection)
+**What to Do:**
+- Wave hand to activate
+- Thumbs up/down for feedback
+- Alternative to voice wake word
+
+**Overhead:**
+- **CPU:** ~30-40% (gesture detection)
+- **RAM:** ~1.5MB
+- **Complexity:** Moderate-High
+
+**Verdict:** 🤔 **Novel Idea - Phase 3+**
+
+---
+
+## Recommended LCD Implementation
+
+### Phase 1: Basic Status Display (Recommended NOW)
+```
+┌─────────────────────────┐
+│  Voice Assistant        │
+│                         │
+│  Status: Listening  ●   │
+│  WiFi: ████░░  75%      │
+│  Server: Connected      │
+│                         │
+│  Volume: [██████░░░]    │
+│                         │
+│  Time: 14:23            │
+└─────────────────────────┘
+```
+
+**Features:**
+- Current state indicator
+- WiFi signal strength
+- Server connection status
+- Volume level bar
+- Clock
+- Wake word indicator (pulsing circle)
+
+**Overhead:** ~2-5% CPU, 200KB RAM
+
+---
+
+### Phase 2: Waveform Visualization (Cool Addition)
+```
+┌─────────────────────────┐
+│ Listening...       [●]  │
+├─────────────────────────┤
+│  ╱╲  ╱╲    ╱╲  ╱╲      │
+│ ╱  ╲╱  ╲  ╱  ╲╱  ╲     │
+│                         │
+│ Level: [████░░░░░░]     │
+└─────────────────────────┘
+```
+
+**Features:**
+- Real-time waveform (15-30 FPS)
+- Audio level meter
+- State indicator
+- Simple and clean
+
+**Overhead:** ~10-15% CPU, 300KB RAM
+
+---
+
+### Phase 3: Enhanced Visualizer (Polish)
+```
+┌─────────────────────────┐
+│ Hey Computer!      [●]  │
+├─────────────────────────┤
+│ ▁▂▃▄▅▆▇█ ▁▂▃▄▅▆▇█      │
+│ ▁▂▃▄▅▆▇█ ▁▂▃▄▅▆▇█      │
+│                         │
+│ "Turn off the lights"   │
+└─────────────────────────┘
+```
+
+**Features:**
+- Spectrum analyzer (8-16 bands)
+- Transcription display
+- Animated response
+- More polished UI
+
+**Overhead:** ~20-30% CPU, 500KB RAM
+
+---
+
+## Resource Budget Analysis
+
+### Total K210 Resources
+- **CPU:** 2 cores @ 400MHz (assume ~100% available)
+- **RAM:** 6MB available for app
+- **Bandwidth:** SPI (LCD), I2S (audio), WiFi
+
+### Current Voice Assistant Usage (Server-Side Wake Word)
+
+| Component | CPU % | RAM (KB) |
+|-----------|-------|----------|
+| Audio Capture (I2S) | 5% | 128 |
+| Audio Playback | 5% | 128 |
+| WiFi Streaming | 10% | 256 |
+| Network Stack | 5% | 512 |
+| MaixPy Runtime | 10% | 1024 |
+| **Base Total** | **35%** | **~2MB** |
+
+### With LCD Features
+
+| Display Mode | CPU % | RAM (KB) | Total CPU | Total RAM |
+|--------------|-------|----------|-----------|-----------|
+| **None** | 0% | 0 | 35% | 2MB |
+| **Status Only** | 2-5% | 200 | 37-40% | 2.2MB |
+| **Waveform** | 10-15% | 300 | 45-50% | 2.3MB |
+| **Spectrum** | 20-30% | 500 | 55-65% | 2.5MB |
+
+### With Camera Features
+
+| Feature | CPU % | RAM (KB) | Feasible? |
+|---------|-------|----------|-----------|
+| Person Detection | 30-40% | 1500 | ⚠️ Tight |
+| Gesture Control | 30-40% | 1500 | ⚠️ Tight |
+| Visual Context | 40-60% | 2500 | ❌ Too much |
+
+---
+
+## Recommendations
+
+### ✅ IMPLEMENT NOW: Basic Status Display
+- **Why:** Very low overhead, huge UX improvement
+- **Overhead:** 2-5% CPU, 200KB RAM
+- **Benefit:** Users know what's happening at a glance
+- **Difficulty:** Easy (MaixPy has good LCD support)
+
+### ✅ IMPLEMENT SOON: Waveform Visualizer
+- **Why:** Cool factor, moderate overhead
+- **Overhead:** 10-15% CPU, 300KB RAM
+- **Benefit:** Engaging, confirms mic is working, looks professional
+- **Difficulty:** Moderate (simple drawing code)
+
+### 🤔 CONSIDER LATER: Spectrum Analyzer
+- **Why:** Higher overhead, diminishing returns
+- **Overhead:** 20-30% CPU, 500KB RAM
+- **Benefit:** Looks cool but not essential
+- **Difficulty:** Moderate-High (FFT required)
+
+### ❌ SKIP FOR NOW: Camera Features
+- **Why:** High overhead, complex, privacy concerns
+- **Overhead:** 30-60% CPU, 1.5-2.5MB RAM
+- **Benefit:** Novel but not core functionality
+- **Difficulty:** High (model integration, privacy handling)
+
+---
+
+## Implementation Priority
+
+### Phase 1 (Week 1): Core Functionality
+- [x] Audio capture and streaming
+- [x] Server integration
+- [ ] Basic LCD status display
+  - Idle/Listening/Processing states
+  - WiFi status
+  - Connection indicator
+
+### Phase 2 (Week 2-3): Visual Enhancement
+- [ ] Audio waveform visualizer
+  - Input (microphone) waveform
+  - Output (TTS) waveform
+  - Level meters
+  - Clean, minimal design
+
+### Phase 3 (Month 2): Polish
+- [ ] Spectrum analyzer option
+- [ ] Animated transitions
+- [ ] Settings display
+- [ ] Network configuration UI (optional)
+
+### Phase 4 (Month 3+): Advanced Features
+- [ ] Camera person detection (privacy mode)
+- [ ] Gesture control experiments
+- [ ] Visual wake word alternative
+
+---
+
+## Code Structure Recommendation
+
+```python
+# main.py structure with modular display
+
+import lcd, audio, network
+from display_manager import DisplayManager
+from audio_processor import AudioProcessor
+from voice_client import VoiceClient
+
+# Initialize
+lcd.init()
+display = DisplayManager(mode='waveform')  # or 'status' or 'spectrum'
+
+# Main loop
+while True:
+    # Audio processing
+    audio_buffer = audio.capture()
+    
+    # Update display (non-blocking)
+    if display.mode == 'status':
+        display.show_status(state='listening', wifi_level=75)
+    elif display.mode == 'waveform':
+        display.show_waveform(audio_buffer)
+    elif display.mode == 'spectrum':
+        display.show_spectrum(audio_buffer)
+    
+    # Network communication
+    voice_client.stream_audio(audio_buffer)
+```
+
+---
+
+## Measured Overhead (Estimated)
+
+### Status Display Only
+- **CPU:** 38% total (3% for display)
+- **RAM:** 2.2MB total (200KB for display)
+- **Battery Life:** -2% (minimal impact)
+- **WiFi Latency:** No impact
+- **Verdict:** ✅ Negligible impact, worth it!
+
+### Waveform Visualizer
+- **CPU:** 48% total (13% for display)
+- **RAM:** 2.3MB total (300KB for display)
+- **Battery Life:** -5% (minor impact)
+- **WiFi Latency:** No impact (still <200ms)
+- **Verdict:** ✅ Acceptable, looks great!
+
+### Spectrum Analyzer
+- **CPU:** 60% total (25% for display)
+- **RAM:** 2.5MB total (500KB for display)
+- **Battery Life:** -8% (noticeable)
+- **WiFi Latency:** Possible minor impact
+- **Verdict:** ⚠️ Usable but pushing limits
+
+---
+
+## Camera: Should You Use It?
+
+### Pros
+- ✅ Already have the hardware (free!)
+- ✅ Novel features (person detection, gestures)
+- ✅ Privacy enhancement potential
+- ✅ Future-proofing
+
+### Cons
+- ❌ High resource usage (30-60% CPU, 1.5-2.5MB RAM)
+- ❌ Complex implementation
+- ❌ Privacy concerns (camera always on)
+- ❌ Not core to voice assistant
+- ❌ Competes with audio processing resources
+
+### Recommendation
+**Skip camera for initial implementation.** Focus on core voice assistant functionality. Revisit in Phase 3+ when:
+1. Core features are stable
+2. You want to experiment
+3. You have time for optimization
+4. You want to differentiate from commercial assistants
+
+---
+
+## Final Recommendations
+
+### Start With (NOW):
+```python
+# Simple status display
+# - State indicator
+# - WiFi status  
+# - Connection status
+# - Time/date
+# Overhead: ~3% CPU, 200KB RAM
+```
+
+### Add Next (Week 2):
+```python
+# Waveform visualizer
+# - Real-time audio waveform
+# - Level meter
+# - Clean design
+# Overhead: +10% CPU, +100KB RAM
+```
+
+### Maybe Later (Month 2+):
+```python
+# Spectrum analyzer
+# - 8-16 frequency bands
+# - FFT visualization
+# - Optional mode
+# Overhead: +15% CPU, +200KB RAM
+```
+
+### Skip (For Now):
+```python
+# Camera features
+# - Person detection
+# - Gestures
+# - Visual context
+# Too complex, revisit later
+```
+
+---
+
+## Example: Combined Status + Waveform Display
+
+```
+┌───────────────────────────────┐
+│ Voice Assistant    [LISTENING]│
+├───────────────────────────────┤
+│                               │
+│  ╱╲    ╱╲  ╱╲    ╱╲  ╱╲      │
+│ ╱  ╲  ╱  ╲╱  ╲  ╱  ╲╱  ╲     │
+│      ╲╱          ╲╱           │
+│                               │
+│ Vol: [████████░░] WiFi: ▂▃▅█ │
+│                               │
+│ Server: 10.1.10.71 ● 14:23   │
+└───────────────────────────────┘
+```
+
+**Total Overhead:** ~15% CPU, 300KB RAM  
+**Impact:** Minimal, excellent UX improvement  
+**Coolness Factor:** 9/10  
+
+---
+
+## Conclusion
+
+### LCD: YES! Definitely Use It! ✅
+- **Status display:** Low overhead, huge benefit
+- **Waveform:** Moderate overhead, looks amazing
+- **Spectrum:** Higher overhead, nice-to-have
+
+**Recommendation:** Start with status, add waveform, consider spectrum later.
+
+### Camera: Skip For Now ❌
+- High overhead
+- Complex implementation
+- Not core functionality
+- Revisit in Phase 3+
+
+**Focus on nailing the voice assistant first, then add visual features incrementally!**
+
+---
+
+**TL;DR:** Use the LCD for status + waveform visualization (~15% overhead total). Skip the camera for now. Your K210 can easily handle this! 🎉
diff --git a/docs/MYCROFT_PRECISE_GUIDE.md b/docs/MYCROFT_PRECISE_GUIDE.md
new file mode 100755
index 0000000..b3e9b64
--- /dev/null
+++ b/docs/MYCROFT_PRECISE_GUIDE.md
@@ -0,0 +1,638 @@
+# Mycroft Precise Wake Word Training Guide
+
+## Overview
+
+Mycroft Precise is a neural network-based wake word detector that you can train on custom wake words. This guide covers two deployment approaches for your Maix Duino voice assistant:
+
+1. **Server-side detection** (Recommended to start) - Run Precise on Heimdall
+2. **Edge detection** (Advanced) - Convert model for K210 on Maix Duino
+
+## Architecture Options
+
+### Option A: Server-Side Wake Word Detection (Recommended)
+
+```
+Maix Duino                          Heimdall
+┌─────────────────┐                ┌──────────────────────┐
+│ Continuous      │ Audio Stream   │ Mycroft Precise      │
+│ Audio Capture   │───────────────>│ Wake Word Detection  │
+│                 │                │                       │
+│ LED Feedback    │<───────────────│ Whisper STT          │
+│ Speaker Output  │   Response     │ HA Integration       │
+│                 │                │ Piper TTS            │
+└─────────────────┘                └──────────────────────┘
+```
+
+**Pros:**
+- Easier setup and debugging
+- Better accuracy (more compute available)
+- Easy to retrain and update models
+- Can use ensemble models
+
+**Cons:**
+- Continuous audio streaming (bandwidth)
+- Slightly higher latency (~100-200ms)
+- Requires stable network
+
+### Option B: Edge Detection on Maix Duino (Advanced)
+
+```
+Maix Duino                          Heimdall
+┌─────────────────┐                ┌──────────────────────┐
+│ Precise Model   │                │                       │
+│ (K210 KPU)      │                │                       │
+│ Wake Detection  │ Audio (on wake)│ Whisper STT          │
+│                 │───────────────>│ HA Integration       │
+│ Audio Capture   │                │ Piper TTS            │
+│ LED Feedback    │<───────────────│                      │
+└─────────────────┘   Response     └──────────────────────┘
+```
+
+**Pros:**
+- Lower latency (~50ms wake detection)
+- Less network traffic
+- Works even if server is down
+- Better privacy (no continuous streaming)
+
+**Cons:**
+- Complex model conversion (TensorFlow → ONNX → KMODEL)
+- Limited by K210 compute
+- Harder to update models
+- Requires careful optimization
+
+## Recommended Approach: Start with Server-Side
+
+Begin with server-side detection on Heimdall, then optimize to edge detection once everything works.
+
+## Phase 1: Mycroft Precise Setup on Heimdall
+
+### Install Mycroft Precise
+
+```bash
+# SSH to Heimdall
+ssh alan@10.1.10.71
+
+# Create conda environment for Precise
+conda create -n precise python=3.7 -y
+conda activate precise
+
+# Install TensorFlow 1.x (Precise requires this)
+pip install tensorflow==1.15.5 --break-system-packages
+
+# Install Precise
+pip install mycroft-precise --break-system-packages
+
+# Install audio dependencies
+sudo apt-get install -y portaudio19-dev sox libatlas-base-dev
+
+# Install precise-engine (for faster inference)
+wget https://github.com/MycroftAI/mycroft-precise/releases/download/v0.3.0/precise-engine_0.3.0_x86_64.tar.gz
+tar xvf precise-engine_0.3.0_x86_64.tar.gz
+sudo cp precise-engine/precise-engine /usr/local/bin/
+sudo chmod +x /usr/local/bin/precise-engine
+```
+
+### Verify Installation
+
+```bash
+precise-engine --version
+# Should output: Precise v0.3.0
+
+precise-listen --help
+# Should show help text
+```
+
+## Phase 2: Training Your Custom Wake Word
+
+### Step 1: Collect Wake Word Samples
+
+You'll need ~50-100 samples of your wake word. Choose something:
+- 2-3 syllables long
+- Easy to pronounce
+- Unlikely to occur in normal speech
+
+Example wake words:
+- "Hey Computer" (recommended - similar to commercial products)
+- "Okay Jarvis"
+- "Hello Assistant"
+- "Activate Assistant"
+
+```bash
+# Create project directory
+mkdir -p ~/precise-models/hey-computer
+cd ~/precise-models/hey-computer
+
+# Record wake word samples
+precise-collect
+```
+
+When prompted:
+1. Type your wake word ("hey computer")
+2. Press SPACE to record
+3. Say the wake word clearly
+4. Press SPACE to stop
+5. Repeat 50-100 times
+
+**Tips for good samples:**
+- Vary your tone and speed
+- Different distances from mic
+- Different background noise levels
+- Different pronunciations
+- Have family members record too
+
+### Step 2: Collect "Not Wake Word" Samples
+
+Record background audio and similar-sounding phrases:
+
+```bash
+# Create not-wake-word directory
+mkdir -p not-wake-word
+
+# Record random speech, music, TV, etc.
+# These help the model learn what NOT to trigger on
+precise-collect -f not-wake-word/random.wav
+```
+
+Collect ~200-500 samples of:
+- Normal conversation
+- TV/music in background
+- Similar sounding phrases ("hey commuter", "they computed", etc.)
+- Ambient noise
+- Other household sounds
+
+### Step 3: Generate Training Data
+
+```bash
+# Organize samples
+mkdir -p hey-computer/{wake-word,not-wake-word,test/wake-word,test/not-wake-word}
+
+# Split samples (80% train, 20% test)
+# Move 80% of wake-word samples to hey-computer/wake-word/
+# Move 20% to hey-computer/test/wake-word/
+# Move 80% of not-wake-word to hey-computer/not-wake-word/
+# Move 20% to hey-computer/test/not-wake-word/
+
+# Generate training data
+precise-train-incremental hey-computer.net hey-computer/
+```
+
+### Step 4: Train the Model
+
+```bash
+# Basic training (will take 30-60 minutes)
+precise-train -e 60 hey-computer.net hey-computer/
+
+# For better accuracy, train longer
+precise-train -e 120 hey-computer.net hey-computer/
+
+# Watch for overfitting - validation loss should decrease
+# Stop if validation loss starts increasing
+```
+
+Training output will show:
+```
+Epoch 1/60
+loss: 0.4523 - val_loss: 0.3891
+Epoch 2/60
+loss: 0.3102 - val_loss: 0.2845
+...
+```
+
+### Step 5: Test the Model
+
+```bash
+# Test with microphone
+precise-listen hey-computer.net
+
+# Speak your wake word - should see "!" when detected
+# Speak other phrases - should not trigger
+
+# Test with audio files
+precise-test hey-computer.net hey-computer/test/
+
+# Should show accuracy metrics:
+# Wake word accuracy: 95%+
+# False positive rate: <5%
+```
+
+### Step 6: Optimize Sensitivity
+
+```bash
+# Adjust activation threshold
+precise-listen hey-computer.net -t 0.5   # Default
+precise-listen hey-computer.net -t 0.7   # More conservative
+precise-listen hey-computer.net -t 0.3   # More aggressive
+
+# Find optimal threshold for your use case
+# Higher = fewer false positives, more false negatives
+# Lower = more false positives, fewer false negatives
+```
+
+## Phase 3: Integration with Voice Server
+
+### Update voice_server.py
+
+Add Mycroft Precise support to the server:
+
+```python
+# Add to imports
+from precise_runner import PreciseEngine, PreciseRunner
+import pyaudio
+
+# Add to configuration
+PRECISE_MODEL = os.getenv("PRECISE_MODEL", 
+                          "/home/alan/precise-models/hey-computer.net")
+PRECISE_SENSITIVITY = float(os.getenv("PRECISE_SENSITIVITY", "0.5"))
+
+# Global precise runner
+precise_runner = None
+
+def on_activation():
+    """Called when wake word is detected"""
+    print("Wake word detected!")
+    # Trigger recording and processing
+    # (Implementation depends on your audio streaming setup)
+
+def start_precise_listener():
+    """Start Mycroft Precise wake word detection"""
+    global precise_runner
+    
+    engine = PreciseEngine(
+        '/usr/local/bin/precise-engine',
+        PRECISE_MODEL
+    )
+    
+    precise_runner = PreciseRunner(
+        engine,
+        sensitivity=PRECISE_SENSITIVITY,
+        on_activation=on_activation
+    )
+    
+    precise_runner.start()
+    print(f"Precise listening with model: {PRECISE_MODEL}")
+```
+
+### Server-Side Wake Word Detection Architecture
+
+For server-side detection, you need continuous audio streaming from Maix Duino:
+
+```python
+# New endpoint for audio streaming
+@app.route('/stream', methods=['POST'])
+def stream_audio():
+    """
+    Receive continuous audio stream for wake word detection
+    
+    This endpoint processes incoming audio chunks and runs them
+    through Mycroft Precise for wake word detection.
+    """
+    # Implementation here
+    pass
+```
+
+## Phase 4: Maix Duino Integration (Server-Side Detection)
+
+### Update maix_voice_client.py
+
+For server-side detection, stream audio continuously:
+
+```python
+# Add to configuration
+STREAM_ENDPOINT = "/stream"
+WAKE_WORD_CHECK_INTERVAL = 0.1  # Check every 100ms
+
+def stream_audio_continuous():
+    """
+    Stream audio to server for wake word detection
+    
+    Server will notify us when wake word is detected
+    """
+    import socket
+    import struct
+    
+    # Create socket connection
+    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    server_addr = (VOICE_SERVER_URL.replace('http://', '').split(':')[0], 8888)
+    
+    try:
+        sock.connect(server_addr)
+        print("Connected to wake word server")
+        
+        while True:
+            # Capture audio chunk
+            chunk = i2s_dev.record(CHUNK_SIZE)
+            
+            if chunk:
+                # Send chunk size first, then chunk
+                sock.sendall(struct.pack('>I', len(chunk)))
+                sock.sendall(chunk)
+            
+            # Check for wake word detection signal
+            # (simplified - actual implementation needs non-blocking socket)
+            
+            time.sleep(0.01)
+    
+    except Exception as e:
+        print(f"Streaming error: {e}")
+    finally:
+        sock.close()
+```
+
+## Phase 5: Edge Detection on Maix Duino (Advanced)
+
+### Convert Precise Model to KMODEL
+
+This is complex and requires several conversion steps:
+
+```bash
+# Step 1: Convert TensorFlow model to ONNX
+pip install tf2onnx --break-system-packages
+
+python -m tf2onnx.convert \
+    --saved-model hey-computer.net \
+    --output hey-computer.onnx
+
+# Step 2: Optimize ONNX model
+pip install onnx --break-system-packages
+
+python -c "
+import onnx
+from onnx import optimizer
+
+model = onnx.load('hey-computer.onnx')
+passes = ['eliminate_deadend', 'eliminate_identity', 
+          'eliminate_nop_dropout', 'eliminate_nop_pad']
+optimized = optimizer.optimize(model, passes)
+onnx.save(optimized, 'hey-computer-opt.onnx')
+"
+
+# Step 3: Convert ONNX to KMODEL (for K210)
+# Use nncase (https://github.com/kendryte/nncase)
+# This step is hardware-specific and complex
+
+# Install nncase
+pip install nncase --break-system-packages
+
+# Convert (adjust parameters based on your model)
+ncc compile hey-computer-opt.onnx \
+    -i onnx \
+    --dataset calibration_data \
+    -o hey-computer.kmodel \
+    --target k210
+```
+
+**Note:** KMODEL conversion is non-trivial and may require model architecture adjustments. The K210 has limitations:
+- Max model size: ~6MB
+- Limited operators support
+- Quantization required for performance
+
+### Testing KMODEL on Maix Duino
+
+```python
+# Load model in maix_voice_client.py
+import KPU as kpu
+
+def load_wake_word_model_kmodel():
+    """Load converted KMODEL for wake word detection"""
+    global kpu_task
+    
+    try:
+        kpu_task = kpu.load("/sd/models/hey-computer.kmodel")
+        print("Wake word model loaded on K210")
+        return True
+    except Exception as e:
+        print(f"Failed to load model: {e}")
+        return False
+
+def detect_wake_word_kmodel():
+    """Run wake word detection using K210 KPU"""
+    global kpu_task
+    
+    # Capture audio
+    audio_chunk = i2s_dev.record(CHUNK_SIZE)
+    
+    # Preprocess for model (depends on model input format)
+    # This is model-specific - adjust based on your training
+    
+    # Run inference
+    features = preprocess_audio(audio_chunk)
+    output = kpu.run_yolo2(kpu_task, features)  # Adjust based on model type
+    
+    # Check confidence
+    if output[0] > WAKE_WORD_THRESHOLD:
+        return True
+    
+    return False
+```
+
+## Recommended Wake Words
+
+Based on testing and community feedback:
+
+**Best performers:**
+1. "Hey Computer" - Clear, distinct, 2-syllable, hard consonants
+2. "Okay Jarvis" - Pop culture reference, easy to say
+3. "Hey Mycroft" - Original Mycroft wake word (lots of training data available)
+
+**Avoid:**
+- Single syllable words (too easy to trigger)
+- Common phrases ("okay", "hey there")
+- Names of people in your household
+- Words that sound like common speech patterns
+
+## Training Tips
+
+### For Best Accuracy
+
+1. **Diverse training data:**
+   - Multiple speakers
+   - Various distances (1ft to 15ft)
+   - Different noise conditions
+   - Accent variations
+
+2. **Quality over quantity:**
+   - 50 good samples > 200 poor samples
+   - Clear pronunciation
+   - Consistent volume
+
+3. **Hard negatives:**
+   - Include similar-sounding phrases
+   - Include partial wake words
+   - Include common false triggers you notice
+
+4. **Regular retraining:**
+   - Add false positives to training set
+   - Add missed detections
+   - Retrain every few weeks initially
+
+### Collecting Hard Negatives
+
+```bash
+# Run Precise in test mode and collect false positives
+precise-listen hey-computer.net --save-false-positives
+
+# This will save audio clips when model triggers incorrectly
+# Add these to your not-wake-word training set
+# Retrain to reduce false positives
+```
+
+## Performance Benchmarks
+
+### Server-Side Detection (Heimdall)
+- **Latency:** 100-200ms from utterance to detection
+- **Accuracy:** 95%+ with good training
+- **False positive rate:** <1 per hour with tuning
+- **CPU usage:** ~5-10% (single core)
+- **Network:** ~128kbps continuous stream
+
+### Edge Detection (Maix Duino)
+- **Latency:** 50-100ms
+- **Accuracy:** 80-90% (limited by K210 quantization)
+- **False positive rate:** Varies by model optimization
+- **CPU usage:** ~30% K210 (leaves room for other tasks)
+- **Network:** 0 until wake detected
+
+## Monitoring and Debugging
+
+### Log Wake Word Detections
+
+```python
+# Add to voice_server.py
+import datetime
+
+def log_wake_word(confidence, timestamp=None):
+    """Log wake word detections for analysis"""
+    if timestamp is None:
+        timestamp = datetime.datetime.now()
+    
+    log_file = "/home/alan/voice-assistant/logs/wake_words.log"
+    
+    with open(log_file, 'a') as f:
+        f.write(f"{timestamp.isoformat()},{confidence}\n")
+```
+
+### Analyze False Positives
+
+```bash
+# Check wake word log
+tail -f ~/voice-assistant/logs/wake_words.log
+
+# Find patterns in false positives
+grep "wake_word" ~/voice-assistant/logs/wake_words.log | \
+    awk -F',' '{print $2}' | \
+    sort -n | uniq -c
+```
+
+## Production Deployment
+
+### Systemd Service with Precise
+
+Update the systemd service to include Precise:
+
+```ini
+[Unit]
+Description=Voice Assistant with Wake Word Detection
+After=network.target
+
+[Service]
+Type=simple
+User=alan
+WorkingDirectory=/home/alan/voice-assistant
+Environment="PATH=/home/alan/miniconda3/envs/precise/bin:/usr/local/bin:/usr/bin:/bin"
+EnvironmentFile=/home/alan/voice-assistant/config/.env
+ExecStart=/home/alan/miniconda3/envs/precise/bin/python voice_server.py --enable-precise
+Restart=on-failure
+RestartSec=10
+
+[Install]
+WantedBy=multi-user.target
+```
+
+## Troubleshooting
+
+### Precise Won't Start
+
+```bash
+# Check TensorFlow version
+python -c "import tensorflow as tf; print(tf.__version__)"
+# Should be 1.15.x
+
+# Check model file
+file hey-computer.net
+# Should be "TensorFlow SavedModel"
+
+# Test model directly
+precise-engine hey-computer.net
+# Should load without errors
+```
+
+### Low Accuracy
+
+1. **Collect more training data** - Especially hard negatives
+2. **Increase training epochs** - Try 200-300 epochs
+3. **Verify training/test split** - Should be 80/20
+4. **Check audio quality** - Sample rate should match (16kHz)
+5. **Try different wake words** - Some are easier to detect
+
+### High False Positive Rate
+
+1. **Increase threshold** - Try 0.6, 0.7, 0.8
+2. **Add false positives to training** - Retrain with false triggers
+3. **Collect more negative samples** - Expand not-wake-word set
+4. **Use ensemble models** - Run multiple models, require agreement
+
+### KMODEL Conversion Fails
+
+This is expected - K210 conversion is complex:
+
+1. **Simplify model architecture** - Reduce layer count
+2. **Use quantization-aware training** - Train with quantization in mind
+3. **Check operator support** - K210 doesn't support all TF ops
+4. **Consider alternatives:**
+   - Use pre-trained models for K210
+   - Stick with server-side detection
+   - Use Porcupine instead (has K210 support)
+
+## Alternative: Use Pre-trained Models
+
+Mycroft provides some pre-trained models:
+
+```bash
+# Download Hey Mycroft model
+wget https://github.com/MycroftAI/precise-data/raw/models-dev/hey-mycroft.tar.gz
+tar xzf hey-mycroft.tar.gz
+
+# Test it
+precise-listen hey-mycroft.net
+```
+
+Then train your own wake word starting from this base:
+
+```bash
+# Fine-tune from pre-trained model
+precise-train -e 60 my-wake-word.net my-wake-word/ \
+    --from-checkpoint hey-mycroft.net
+```
+
+## Next Steps
+
+1. **Start with server-side** - Get it working on Heimdall first
+2. **Collect good training data** - Quality samples are key
+3. **Test and tune threshold** - Find the sweet spot for your environment
+4. **Monitor performance** - Track false positives and misses
+5. **Iterate on training** - Add hard examples, retrain
+6. **Consider edge deployment** - Once server-side is solid
+
+## Resources
+
+- Mycroft Precise Docs: https://github.com/MycroftAI/mycroft-precise
+- Training Guide: https://mycroft-ai.gitbook.io/docs/mycroft-technologies/precise
+- Community Models: https://github.com/MycroftAI/precise-data
+- K210 Docs: https://canaan-creative.com/developer
+- nncase: https://github.com/kendryte/nncase
+
+## Conclusion
+
+Mycroft Precise gives you full control over your wake word detection with complete privacy. Start with server-side detection for easier development, then optimize to edge detection once you have a well-trained model.
+
+The key to success is good training data - invest time in collecting diverse, high-quality samples!
diff --git a/docs/PRECISE_DEPLOYMENT.md b/docs/PRECISE_DEPLOYMENT.md
new file mode 100755
index 0000000..8cfb313
--- /dev/null
+++ b/docs/PRECISE_DEPLOYMENT.md
@@ -0,0 +1,577 @@
+# Mycroft Precise Deployment Guide
+
+## Quick Reference: Server vs Edge Detection
+
+### Server-Side Detection (Recommended for Start)
+
+**Setup:**
+```bash
+# 1. On Heimdall: Setup Precise
+./setup_precise.sh --wake-word "hey computer"
+
+# 2. Train your model (follow scripts in ~/precise-models/hey-computer/)
+cd ~/precise-models/hey-computer
+./1-record-wake-word.sh
+./2-record-not-wake-word.sh
+# Organize samples, then:
+./3-train-model.sh
+./4-test-model.sh
+
+# 3. Start voice server with Precise
+cd ~/voice-assistant
+conda activate precise
+python voice_server.py \
+    --enable-precise \
+    --precise-model ~/precise-models/hey-computer/hey-computer.net \
+    --precise-sensitivity 0.5
+```
+
+**Architecture:**
+- Maix Duino → Continuous audio stream → Heimdall
+- Heimdall runs Precise on audio stream
+- On wake word: Process command with Whisper
+- Response → TTS → Stream back to Maix Duino
+
+**Pros:** Easier setup, better accuracy, simple updates
+**Cons:** More network traffic, requires stable connection
+
+### Edge Detection (Advanced - Future Phase)
+
+**Setup:**
+```bash
+# 1. Train model on Heimdall (same as above)
+# 2. Convert to KMODEL for K210
+# 3. Deploy to Maix Duino
+# (See MYCROFT_PRECISE_GUIDE.md for detailed conversion steps)
+```
+
+**Architecture:**
+- Maix Duino runs Precise locally on K210
+- Only sends audio after wake word detected
+- Lower latency, less network traffic
+
+**Pros:** Lower latency, less bandwidth, works offline
+**Cons:** Complex conversion, lower accuracy, harder updates
+
+## Phase-by-Phase Deployment
+
+### Phase 1: Server Setup (Day 1)
+
+```bash
+# On Heimdall
+ssh alan@10.1.10.71
+
+# 1. Setup voice assistant base
+./setup_voice_assistant.sh
+
+# 2. Setup Mycroft Precise
+./setup_precise.sh --wake-word "hey computer"
+
+# 3. Configure environment
+vim ~/voice-assistant/config/.env
+```
+
+Update `.env`:
+```bash
+HA_URL=http://your-home-assistant:8123
+HA_TOKEN=your_token_here
+PRECISE_MODEL=/home/alan/precise-models/hey-computer/hey-computer.net
+PRECISE_SENSITIVITY=0.5
+```
+
+### Phase 2: Wake Word Training (Day 1-2)
+
+```bash
+# Navigate to training directory
+cd ~/precise-models/hey-computer
+conda activate precise
+
+# Record samples (30-60 minutes)
+./1-record-wake-word.sh    # Record 50-100 wake word samples
+./2-record-not-wake-word.sh # Record 200-500 negative samples
+
+# Organize samples
+# Move 80% of wake-word recordings to wake-word/
+# Move 20% of wake-word recordings to test/wake-word/
+# Move 80% of not-wake-word to not-wake-word/
+# Move 20% of not-wake-word to test/not-wake-word/
+
+# Train model (30-60 minutes)
+./3-train-model.sh
+
+# Test model
+./4-test-model.sh
+
+# Evaluate on test set
+./5-evaluate-model.sh
+
+# Tune threshold
+./6-tune-threshold.sh
+```
+
+### Phase 3: Server Integration (Day 2)
+
+#### Option A: Manual Testing
+
+```bash
+cd ~/voice-assistant
+conda activate precise
+
+# Start server with Precise enabled
+python voice_server.py \
+    --enable-precise \
+    --precise-model ~/precise-models/hey-computer/hey-computer.net \
+    --precise-sensitivity 0.5 \
+    --ha-url http://your-ha:8123 \
+    --ha-token your_token
+```
+
+#### Option B: Systemd Service
+
+Update systemd service to use Precise environment:
+
+```bash
+sudo vim /etc/systemd/system/voice-assistant.service
+```
+
+```ini
+[Unit]
+Description=Voice Assistant with Wake Word Detection
+After=network.target
+
+[Service]
+Type=simple
+User=alan
+WorkingDirectory=/home/alan/voice-assistant
+Environment="PATH=/home/alan/miniconda3/envs/precise/bin:/usr/local/bin:/usr/bin:/bin"
+EnvironmentFile=/home/alan/voice-assistant/config/.env
+ExecStart=/home/alan/miniconda3/envs/precise/bin/python voice_server.py \
+    --enable-precise \
+    --precise-model /home/alan/precise-models/hey-computer/hey-computer.net \
+    --precise-sensitivity 0.5
+Restart=on-failure
+RestartSec=10
+StandardOutput=append:/home/alan/voice-assistant/logs/voice_assistant.log
+StandardError=append:/home/alan/voice-assistant/logs/voice_assistant_error.log
+
+[Install]
+WantedBy=multi-user.target
+```
+
+Enable and start:
+```bash
+sudo systemctl daemon-reload
+sudo systemctl enable voice-assistant
+sudo systemctl start voice-assistant
+sudo systemctl status voice-assistant
+```
+
+### Phase 4: Maix Duino Setup (Day 2-3)
+
+For server-side wake word detection, Maix Duino streams audio:
+
+Update `maix_voice_client.py`:
+
+```python
+# Use simplified mode - just stream audio
+# Server handles wake word detection
+CONTINUOUS_STREAM = True  # Enable continuous streaming
+WAKE_WORD_CHECK_INTERVAL = 0  # Server-side detection
+```
+
+Flash and test:
+1. Copy updated script to SD card
+2. Boot Maix Duino
+3. Check serial console for connection
+4. Speak wake word
+5. Verify server logs show detection
+
+### Phase 5: Testing & Tuning (Day 3-7)
+
+#### Test Wake Word Detection
+
+```bash
+# Monitor server logs
+journalctl -u voice-assistant -f
+
+# Or check detections via API
+curl http://10.1.10.71:5000/wake-word/detections
+```
+
+#### Test End-to-End Flow
+
+1. Say wake word: "Hey Computer"
+2. Wait for LED/beep on Maix Duino
+3. Say command: "Turn on the living room lights"
+4. Verify HA command executes
+5. Hear TTS response
+
+#### Monitor Performance
+
+```bash
+# Check wake word log
+tail -f ~/voice-assistant/logs/wake_words.log
+
+# Check false positive rate
+grep "wake_word" ~/voice-assistant/logs/wake_words.log | wc -l
+
+# Check accuracy
+# Should see detections when you say wake word
+# Should NOT see detections during normal conversation
+```
+
+#### Tune Sensitivity
+
+If too many false positives:
+```bash
+# Increase threshold (more conservative)
+# Edit systemd service or restart with:
+python voice_server.py --precise-sensitivity 0.7
+```
+
+If missing wake words:
+```bash
+# Decrease threshold (more aggressive)
+python voice_server.py --precise-sensitivity 0.3
+```
+
+#### Collect Hard Examples
+
+```bash
+# When you notice false positives, record them
+cd ~/precise-models/hey-computer
+precise-collect -f not-wake-word/false-positive-$(date +%s).wav
+
+# When wake word is missed, record it
+precise-collect -f wake-word/missed-$(date +%s).wav
+
+# After collecting 10-20 examples, retrain
+./3-train-model.sh
+```
+
+## Monitoring Commands
+
+### Check System Status
+
+```bash
+# Service status
+sudo systemctl status voice-assistant
+
+# Server health
+curl http://10.1.10.71:5000/health
+
+# Wake word status
+curl http://10.1.10.71:5000/wake-word/status
+
+# Recent detections
+curl http://10.1.10.71:5000/wake-word/detections
+```
+
+### View Logs
+
+```bash
+# Real-time server logs
+journalctl -u voice-assistant -f
+
+# Last 50 lines
+journalctl -u voice-assistant -n 50
+
+# Specific log file
+tail -f ~/voice-assistant/logs/voice_assistant.log
+
+# Wake word detections
+tail -f ~/voice-assistant/logs/wake_words.log
+
+# Maix Duino serial console
+screen /dev/ttyUSB0 115200
+```
+
+### Performance Metrics
+
+```bash
+# CPU usage (should be ~5-10% idle, spikes during processing)
+top -p $(pgrep -f voice_server.py)
+
+# Memory usage
+ps aux | grep voice_server.py
+
+# Network traffic (if streaming audio)
+iftop -i eth0  # or your network interface
+```
+
+## Troubleshooting
+
+### Wake Word Not Detecting
+
+**Check model is loaded:**
+```bash
+curl http://10.1.10.71:5000/wake-word/status
+# Should show: "enabled": true
+```
+
+**Test model directly:**
+```bash
+conda activate precise
+precise-listen ~/precise-models/hey-computer/hey-computer.net
+# Speak wake word - should see "!"
+```
+
+**Check sensitivity:**
+```bash
+# Try lower threshold
+precise-listen ~/precise-models/hey-computer/hey-computer.net -t 0.3
+```
+
+**Verify audio input:**
+```bash
+# Test microphone
+arecord -d 5 test.wav
+aplay test.wav
+```
+
+### Too Many False Positives
+
+**Increase threshold:**
+```bash
+# Edit service or restart with higher sensitivity
+python voice_server.py --precise-sensitivity 0.7
+```
+
+**Retrain with false positives:**
+```bash
+cd ~/precise-models/hey-computer
+# Record false triggers in not-wake-word/
+precise-collect -f not-wake-word/false-triggers.wav
+# Add to not-wake-word training set
+./3-train-model.sh
+```
+
+### Server Won't Start with Precise
+
+**Check Precise installation:**
+```bash
+conda activate precise
+python -c "from precise_runner import PreciseRunner; print('OK')"
+```
+
+**Check engine:**
+```bash
+precise-engine --version
+# Should show: Precise v0.3.0
+```
+
+**Check model file:**
+```bash
+ls -lh ~/precise-models/hey-computer/hey-computer.net
+file ~/precise-models/hey-computer/hey-computer.net
+```
+
+**Check permissions:**
+```bash
+chmod +x /usr/local/bin/precise-engine
+chmod 644 ~/precise-models/hey-computer/hey-computer.net
+```
+
+### Audio Quality Issues
+
+**Test audio path:**
+```bash
+# Record test on server
+arecord -f S16_LE -r 16000 -c 1 -d 5 test.wav
+
+# Transcribe with Whisper
+conda activate voice-assistant
+python -c "
+import whisper
+model = whisper.load_model('base')
+result = model.transcribe('test.wav')
+print(result['text'])
+"
+```
+
+**If poor quality:**
+- Check microphone connection
+- Verify sample rate (16kHz)
+- Test with USB microphone
+- Check for interference/noise
+
+### Maix Duino Connection Issues
+
+**Check WiFi:**
+```python
+# In Maix Duino serial console
+import network
+wlan = network.WLAN(network.STA_IF)
+print(wlan.isconnected())
+print(wlan.ifconfig())
+```
+
+**Check server reachability:**
+```python
+# From Maix Duino
+import urequests
+response = urequests.get('http://10.1.10.71:5000/health')
+print(response.json())
+```
+
+**Check audio streaming:**
+```bash
+# On Heimdall, monitor network
+sudo tcpdump -i any -n host <maix-duino-ip>
+# Should see continuous packets when streaming
+```
+
+## Optimization Tips
+
+### Reduce Latency
+
+1. **Use smaller Whisper model:**
+   ```bash
+   # Edit .env
+   WHISPER_MODEL=base  # or tiny
+   ```
+
+2. **Optimize Precise sensitivity:**
+   ```bash
+   # Find sweet spot between false positives and latency
+   # Lower threshold = faster trigger but more false positives
+   ```
+
+3. **Pre-load models:**
+   ```python
+   # Models load on startup, not first request
+   # Adds ~30s startup time but eliminates first-request delay
+   ```
+
+### Improve Accuracy
+
+1. **Use larger Whisper model:**
+   ```bash
+   WHISPER_MODEL=large
+   ```
+
+2. **Train more wake word samples:**
+   ```bash
+   # Aim for 100+ high-quality samples
+   # Diverse speakers, conditions, distances
+   ```
+
+3. **Increase training epochs:**
+   ```bash
+   # In 3-train-model.sh
+   precise-train -e 120 hey-computer.net .  # vs default 60
+   ```
+
+### Reduce False Positives
+
+1. **Collect hard negatives:**
+   ```bash
+   # Record TV, music, similar phrases
+   # Add to not-wake-word training set
+   ```
+
+2. **Increase threshold:**
+   ```bash
+   --precise-sensitivity 0.7  # vs default 0.5
+   ```
+
+3. **Use ensemble model:**
+   ```python
+   # Run multiple models, require agreement
+   # Advanced - requires code modification
+   ```
+
+## Production Checklist
+
+- [ ] Wake word model trained with 50+ samples
+- [ ] Model tested with <5% false positive rate
+- [ ] Server service enabled and auto-starting
+- [ ] Home Assistant token configured
+- [ ] Maix Duino WiFi configured
+- [ ] End-to-end test successful
+- [ ] Logs rotating properly
+- [ ] Monitoring in place
+- [ ] Backup of trained model
+- [ ] Documentation updated
+
+## Backup and Recovery
+
+### Backup Trained Model
+
+```bash
+# Backup model
+cp ~/precise-models/hey-computer/hey-computer.net \
+   ~/precise-models/hey-computer/hey-computer.net.backup
+
+# Backup to another host
+scp ~/precise-models/hey-computer/hey-computer.net \
+    user@backup-host:/path/to/backups/
+```
+
+### Restore from Backup
+
+```bash
+# Restore model
+cp ~/precise-models/hey-computer/hey-computer.net.backup \
+   ~/precise-models/hey-computer/hey-computer.net
+
+# Restart service
+sudo systemctl restart voice-assistant
+```
+
+## Next Steps
+
+Once basic server-side detection is working:
+
+1. **Add more intents** - Expand Home Assistant control
+2. **Implement TTS playback** - Complete the audio response loop
+3. **Multi-room support** - Deploy multiple Maix Duino units
+4. **Voice profiles** - Train model on family members
+5. **Edge deployment** - Convert model for K210 (advanced)
+
+## Resources
+
+- Main guide: MYCROFT_PRECISE_GUIDE.md
+- Quick start: QUICKSTART.md
+- Architecture: maix-voice-assistant-architecture.md
+- Mycroft Docs: https://github.com/MycroftAI/mycroft-precise
+- Community: https://community.mycroft.ai/
+
+## Support
+
+### Log an Issue
+
+```bash
+# Collect debug info
+echo "=== System Info ===" > debug.log
+uname -a >> debug.log
+conda list >> debug.log
+echo "=== Service Status ===" >> debug.log
+systemctl status voice-assistant >> debug.log
+echo "=== Recent Logs ===" >> debug.log
+journalctl -u voice-assistant -n 100 >> debug.log
+echo "=== Wake Word Status ===" >> debug.log
+curl http://10.1.10.71:5000/wake-word/status >> debug.log
+```
+
+Then share `debug.log` when asking for help.
+
+### Common Issues Database
+
+| Symptom | Likely Cause | Solution |
+|---------|--------------|----------|
+| No wake detection | Model not loaded | Check `/wake-word/status` |
+| Service won't start | Missing dependencies | Reinstall Precise |
+| High false positives | Low threshold | Increase to 0.7+ |
+| Missing wake words | High threshold | Decrease to 0.3-0.4 |
+| Poor transcription | Bad audio quality | Check microphone |
+| HA commands fail | Wrong token | Update .env |
+| High CPU usage | Large Whisper model | Use smaller model |
+
+## Conclusion
+
+With Mycroft Precise, you have complete control over your wake word detection. Start with server-side detection for easier debugging, collect good training data, and tune the threshold for your environment. Once it's working well, you can optionally optimize to edge detection for lower latency.
+
+The key to success: **Quality training data > Quantity**
+
+Happy voice assisting! 🎙️
diff --git a/docs/QUESTIONS_ANSWERED.md b/docs/QUESTIONS_ANSWERED.md
new file mode 100755
index 0000000..1d2e6be
--- /dev/null
+++ b/docs/QUESTIONS_ANSWERED.md
@@ -0,0 +1,470 @@
+# Your Questions Answered - Quick Reference
+
+## TL;DR: Yes, Yes, and Multiple Options!
+
+### Q1: Pre-trained "Hey Mycroft" Model?
+
+**Answer: YES! ✅**
+
+Download and use immediately:
+```bash
+./quick_start_hey_mycroft.sh
+# Done in 5 minutes - no training!
+```
+
+The pre-trained model works great and saves you 1-2 hours of training time.
+
+### Q2: Multiple Wake Words?
+
+**Answer: YES! ✅ (with considerations)**
+
+**Server-side (Heimdall):** Easy, run 3-5 wake words
+```bash
+python voice_server_enhanced.py \
+    --enable-precise \
+    --multi-wake-word
+```
+
+**Edge (K210):** Feasible for 1-2, challenging for 3+
+
+### Q3: Adopting New Users' Voices?
+
+**Answer: Multiple approaches ✅**
+
+**Best option:** Train one model with everyone's voices upfront
+**Alternative:** Incremental retraining as new users join
+**Advanced:** Speaker identification with personalization
+
+---
+
+## Detailed Answers
+
+### 1. Pre-trained "Hey Mycroft" Model
+
+#### Where to Get It
+
+```bash
+# Quick start script does this for you
+wget https://github.com/MycroftAI/precise-data/raw/models-dev/hey-mycroft.tar.gz
+tar xzf hey-mycroft.tar.gz
+```
+
+#### How to Use
+
+**Instant deployment:**
+```bash
+python voice_server.py \
+    --enable-precise \
+    --precise-model ~/precise-models/pretrained/hey-mycroft.net
+```
+
+**Fine-tune with your voice:**
+```bash
+# Record 20-30 samples of your voice saying "Hey Mycroft"
+precise-collect
+
+# Fine-tune from pre-trained
+precise-train -e 30 my-hey-mycroft.net . \
+    --from-checkpoint ~/precise-models/pretrained/hey-mycroft.net
+```
+
+#### Advantages
+
+✅ **Zero training time** - Works immediately  
+✅ **Proven accuracy** - Tested by thousands  
+✅ **Good baseline** - Already includes diverse voices  
+✅ **Easy fine-tuning** - Add your voice in 30 mins vs 60+ mins from scratch  
+
+#### When to Use Pre-trained vs Custom
+
+**Use Pre-trained "Hey Mycroft" when:**
+- You want to test quickly
+- "Hey Mycroft" is an acceptable wake word
+- You want proven accuracy out-of-box
+
+**Train Custom when:**
+- You want a different wake word ("Hey Computer", "Jarvis", etc.)
+- Maximum accuracy for your specific environment
+- Family-specific wake word
+
+**Hybrid (Recommended):**
+- Start with pre-trained "Hey Mycroft"
+- Test and learn the system
+- Fine-tune with your samples
+- Or add custom wake word later
+
+---
+
+### 2. Multiple Wake Words
+
+#### Can You Have Multiple?
+
+**Yes!** Options:
+
+#### Option A: Server-Side (Recommended)
+
+**Easy implementation:**
+```bash
+# Use the enhanced server
+python voice_server_enhanced.py \
+    --enable-precise \
+    --multi-wake-word
+```
+
+**Configured wake words:**
+- "Hey Mycroft" (pre-trained)
+- "Hey Computer" (custom)
+- "Jarvis" (custom)
+
+**Resource impact:**
+- 3 models = ~15-30% CPU (Heimdall handles easily)
+- ~300-600MB RAM
+- Each model runs independently
+
+**Example use cases:**
+```python
+"Hey Mycroft, what's the time?" → General assistant
+"Jarvis, run diagnostics"        → Personal assistant mode
+"Emergency, call help"           → Priority/emergency mode
+```
+
+#### Option B: Edge (K210)
+
+**Feasible for 1-2 wake words:**
+```python
+# Sequential checking
+for model in ['hey-mycroft.kmodel', 'emergency.kmodel']:
+    if detect_wake_word(model):
+        return model
+```
+
+**Limitations:**
+- +50-100ms latency per additional model
+- Memory constraints (6MB total for all models)
+- More models = more power consumption
+
+**Recommendation:**
+- K210: 1 wake word (optimal)
+- K210: 2 wake words (acceptable)
+- K210: 3+ wake words (not recommended)
+
+#### Option C: Contextual Wake Words
+
+Different wake words for different purposes:
+```python
+wake_word_contexts = {
+    'hey_mycroft': 'general_assistant',
+    'emergency': 'priority_emergency',
+    'goodnight': 'bedtime_routine',
+}
+```
+
+#### Should You Use Multiple?
+
+**One wake word is usually enough!**
+
+Commercial products (Alexa, Google) use one wake word and they work fine.
+
+**Use multiple when:**
+- Different family members want different wake words
+- You want context-specific behaviors (emergency vs. general)
+- You enjoy the flexibility
+
+**Start with one, add more later if needed.**
+
+---
+
+### 3. Adopting New Users' Voices
+
+#### Challenge
+
+Same wake word, different voices:
+- Mom says "Hey Mycroft" (soprano)
+- Dad says "Hey Mycroft" (bass)
+- Kids say "Hey Mycroft" (high-pitched)
+
+All need to work!
+
+#### Solution 1: Diverse Training (Recommended)
+
+**During initial training, have everyone record samples:**
+
+```bash
+cd ~/precise-models/family-hey-mycroft
+
+# Session 1: Mom records 30 samples
+precise-collect  # Mom speaks "Hey Mycroft" 30 times
+
+# Session 2: Dad records 30 samples  
+precise-collect  # Dad speaks "Hey Mycroft" 30 times
+
+# Session 3: Kids record 20 samples each
+precise-collect  # Kids speak "Hey Mycroft" 40 times total
+
+# Train one model with all voices
+precise-train -e 60 family-hey-mycroft.net .
+
+# Deploy
+python voice_server.py \
+    --enable-precise \
+    --precise-model family-hey-mycroft.net
+```
+
+**Pros:**
+✅ One model works for everyone  
+✅ Simple deployment  
+✅ No switching needed  
+✅ Works from day one  
+
+**Cons:**
+❌ Need everyone's time upfront  
+❌ Slightly lower per-person accuracy than individual models  
+
+#### Solution 2: Incremental Training
+
+**Start with one person, add others over time:**
+
+```bash
+# Week 1: Train with Dad's voice
+precise-train -e 60 hey-mycroft.net .
+
+# Week 2: Mom wants to use it
+# Collect Mom's samples
+precise-collect  # Mom records 20-30 samples
+
+# Add to training set
+cp mom-samples/* wake-word/
+
+# Retrain from checkpoint (faster!)
+precise-train -e 30 hey-mycroft.net . \
+    --from-checkpoint hey-mycroft.net
+
+# Now works for both Dad and Mom!
+
+# Week 3: Kids want in
+# Repeat process...
+```
+
+**Pros:**
+✅ Don't need everyone upfront  
+✅ Easy to add new users  
+✅ Model improves gradually  
+
+**Cons:**
+❌ New users may have issues initially  
+❌ Requires periodic retraining  
+
+#### Solution 3: Speaker Identification (Advanced)
+
+**Identify who's speaking, use personalized model/settings:**
+
+```bash
+# Install speaker ID
+pip install pyannote.audio scipy --break-system-packages
+
+# Use enhanced server
+python voice_server_enhanced.py \
+    --enable-precise \
+    --enable-speaker-id \
+    --hf-token YOUR_HF_TOKEN
+```
+
+**Enroll users:**
+```bash
+# Record 30-second voice sample from each person
+# POST to /speakers/enroll with audio + name
+
+curl -F "name=alan" \
+     -F "audio=@alan_voice.wav" \
+     http://localhost:5000/speakers/enroll
+
+curl -F "name=sarah" \
+     -F "audio=@sarah_voice.wav" \
+     http://localhost:5000/speakers/enroll
+```
+
+**Benefits:**
+```python
+# Different responses per user
+if speaker == 'alan':
+    turn_on('light.alan_office')
+elif speaker == 'sarah':
+    turn_on('light.sarah_office')
+
+# Different permissions
+if speaker == 'kids' and command.startswith('buy'):
+    return "Sorry, kids can't make purchases"
+```
+
+**Pros:**
+✅ Personalized responses  
+✅ User-specific settings  
+✅ Better accuracy (optimized per voice)  
+✅ Can track who said what  
+
+**Cons:**
+❌ More complex  
+❌ Privacy considerations  
+❌ Additional CPU/RAM (~10% + 200MB)  
+❌ Requires voice enrollment  
+
+#### Solution 4: Pre-trained Model (Easiest)
+
+**"Hey Mycroft" already includes diverse voices!**
+
+```bash
+# Just use it - already trained on many voices
+./quick_start_hey_mycroft.sh
+```
+
+The community model was trained with:
+- Male and female voices
+- Different accents
+- Different ages
+- Various environments
+
+**It should work for most family members out-of-box!**
+
+Then fine-tune if needed.
+
+---
+
+## Recommended Path for Your Situation
+
+### Scenario: Family of 3-4 People
+
+**Week 1: Quick Start**
+```bash
+# Use pre-trained "Hey Mycroft"
+./quick_start_hey_mycroft.sh
+
+# Test with all family members
+# Likely works for everyone already!
+```
+
+**Week 2: Fine-tune if Needed**
+```bash
+# If someone has issues:
+# Have them record 20 samples
+# Fine-tune the model
+
+precise-train -e 30 family-hey-mycroft.net . \
+    --from-checkpoint ~/precise-models/pretrained/hey-mycroft.net
+```
+
+**Week 3: Add Features**
+```bash
+# If you want personalization:
+python voice_server_enhanced.py \
+    --enable-speaker-id
+
+# Enroll each family member
+```
+
+### Scenario: Just You (or 1-2 People)
+
+**Option 1: Pre-trained**
+```bash
+./quick_start_hey_mycroft.sh
+# Done!
+```
+
+**Option 2: Custom Wake Word**
+```bash
+# Train custom "Hey Computer"
+cd ~/precise-models/hey-computer
+./1-record-wake-word.sh  # 50 samples
+./2-record-not-wake-word.sh  # 200 samples
+./3-train-model.sh
+```
+
+### Scenario: Multiple People + Multiple Wake Words
+
+**Full setup:**
+```bash
+# Pre-trained for family
+./quick_start_hey_mycroft.sh
+
+# Personal wake word for Dad
+cd ~/precise-models/jarvis
+# Train custom wake word
+
+# Emergency wake word
+cd ~/precise-models/emergency
+# Train emergency wake word
+
+# Run multi-wake-word server
+python voice_server_enhanced.py \
+    --enable-precise \
+    --multi-wake-word \
+    --enable-speaker-id
+```
+
+---
+
+## Quick Decision Matrix
+
+| Your Situation | Recommendation |
+|----------------|----------------|
+| **Just getting started** | Pre-trained "Hey Mycroft" |
+| **Want different wake word** | Train custom model |
+| **Family of 3-4** | Pre-trained + fine-tune if needed |
+| **Want personalization** | Add speaker ID |
+| **Multiple purposes** | Multiple wake words (server-side) |
+| **Deploying to K210** | 1 wake word, no speaker ID |
+
+---
+
+## Files to Use
+
+**Quick start with pre-trained:**
+- `quick_start_hey_mycroft.sh` - Zero training, 5 minutes!
+
+**Multiple wake words:**
+- `voice_server_enhanced.py` - Multi-wake-word + speaker ID support
+
+**Training custom:**
+- `setup_precise.sh` - Setup training environment
+- Scripts in `~/precise-models/your-wake-word/`
+
+**Documentation:**
+- `WAKE_WORD_ADVANCED.md` - Detailed guide (this is comprehensive!)
+- `PRECISE_DEPLOYMENT.md` - Production deployment
+
+---
+
+## Summary
+
+✅ **Yes**, pre-trained "Hey Mycroft" exists and works great  
+✅ **Yes**, you can have multiple wake words (server-side is easy)  
+✅ **Yes**, multiple approaches for multi-user support  
+
+**Recommended approach:**
+1. Start with `./quick_start_hey_mycroft.sh` (5 mins)
+2. Test with all family members
+3. Fine-tune if anyone has issues
+4. Add speaker ID later if you want personalization
+5. Consider multiple wake words only if you have specific use cases
+
+**Keep it simple!** One pre-trained wake word works for most people.
+
+---
+
+## Next Actions
+
+**Ready to start?**
+
+```bash
+# 5-minute quick start
+./quick_start_hey_mycroft.sh
+
+# Or read more first
+cat WAKE_WORD_ADVANCED.md
+```
+
+**Questions?**
+- Pre-trained models: See WAKE_WORD_ADVANCED.md § Pre-trained
+- Multiple wake words: See WAKE_WORD_ADVANCED.md § Multiple Wake Words
+- Voice adaptation: See WAKE_WORD_ADVANCED.md § Voice Adaptation
+
+**Happy voice assisting! 🎙️**
diff --git a/docs/QUICKSTART.md b/docs/QUICKSTART.md
new file mode 100755
index 0000000..8baddea
--- /dev/null
+++ b/docs/QUICKSTART.md
@@ -0,0 +1,421 @@
+# Maix Duino Voice Assistant - Quick Start Guide
+
+## Overview
+This guide will walk you through setting up a local, privacy-focused voice assistant using your Maix Duino board and Home Assistant integration. All processing happens on your local network - no cloud services required.
+
+## What You'll Build
+- Wake word detection on Maix Duino (edge device)
+- Speech-to-text using Whisper on Heimdall
+- Home Assistant integration for smart home control
+- Text-to-speech responses using Piper
+- All processing local to your 10.1.10.0/24 network
+
+## Hardware Requirements
+- [x] Sipeed Maix Duino board (you have this!)
+- [ ] I2S MEMS microphone (or microphone array)
+- [ ] Small speaker (3-5W) or audio output
+- [ ] MicroSD card (4GB+) formatted as FAT32
+- [ ] USB-C cable for power and programming
+
+## Network Prerequisites
+- Maix Duino will need WiFi access to your 10.1.10.0/24 network
+- Heimdall (10.1.10.71) for AI processing
+- Home Assistant instance (configure URL in setup)
+
+## Setup Process
+
+### Phase 1: Server Setup (Heimdall)
+
+#### Step 1: Run the setup script
+```bash
+# Transfer files to Heimdall
+scp setup_voice_assistant.sh voice_server.py alan@10.1.10.71:~/
+
+# SSH to Heimdall
+ssh alan@10.1.10.71
+
+# Make setup script executable and run it
+chmod +x setup_voice_assistant.sh
+./setup_voice_assistant.sh
+```
+
+#### Step 2: Configure Home Assistant access
+```bash
+# Edit the config file
+vim ~/voice-assistant/config/.env
+```
+
+Update these values:
+```env
+HA_URL=http://your-home-assistant:8123
+HA_TOKEN=your_long_lived_access_token_here
+```
+
+To get a long-lived access token:
+1. Open Home Assistant
+2. Click your profile (bottom left)
+3. Scroll to "Long-Lived Access Tokens"
+4. Click "Create Token"
+5. Copy the token and paste it in .env
+
+#### Step 3: Test the server
+```bash
+cd ~/voice-assistant
+./test_server.sh
+```
+
+You should see:
+```
+Loading Whisper model: medium
+Whisper model loaded successfully
+Starting voice processing server on 0.0.0.0:5000
+```
+
+#### Step 4: Test with curl (from another terminal)
+```bash
+# Test health endpoint
+curl http://10.1.10.71:5000/health
+
+# Should return:
+# {"status":"healthy","whisper_loaded":true,"ha_connected":true}
+```
+
+### Phase 2: Maix Duino Setup
+
+#### Step 1: Flash MaixPy firmware
+1. Download latest MaixPy firmware from: https://dl.sipeed.com/MAIX/MaixPy/release/
+2. Download Kflash GUI: https://github.com/sipeed/kflash_gui
+3. Connect Maix Duino via USB
+4. Flash firmware using Kflash GUI
+
+#### Step 2: Prepare SD card
+```bash
+# Format SD card as FAT32
+# Create directory structure:
+mkdir -p /path/to/sdcard/models
+
+# Copy the client script
+cp maix_voice_client.py /path/to/sdcard/main.py
+```
+
+#### Step 3: Configure WiFi settings
+Edit `/path/to/sdcard/main.py`:
+```python
+# WiFi Settings
+WIFI_SSID = "YourNetworkName"
+WIFI_PASSWORD = "YourPassword"
+
+# Server Settings
+VOICE_SERVER_URL = "http://10.1.10.71:5000"
+```
+
+#### Step 4: Test the board
+1. Insert SD card into Maix Duino
+2. Connect to serial console (115200 baud)
+   ```bash
+   screen /dev/ttyUSB0 115200
+   # or
+   minicom -D /dev/ttyUSB0 -b 115200
+   ```
+3. Power on the board
+4. Watch the serial output for connection status
+
+### Phase 3: Integration & Testing
+
+#### Test 1: Basic connectivity
+1. Maix Duino should connect to WiFi and display IP on LCD
+2. Server should show in logs when Maix connects
+
+#### Test 2: Audio capture
+The current implementation uses amplitude-based wake word detection as a placeholder. To test:
+1. Clap loudly near the microphone
+2. Speak a command (e.g., "turn on the living room lights")
+3. Watch the LCD for transcription and response
+
+#### Test 3: Home Assistant control
+Supported commands (add more in voice_server.py):
+- "Turn on the living room lights"
+- "Turn off the bedroom lights"
+- "What's the temperature?"
+- "Toggle the kitchen lights"
+
+### Phase 4: Wake Word Training (Advanced)
+
+The placeholder wake word detection uses simple amplitude triggering. For production use:
+
+#### Option A: Use Porcupine (easiest)
+1. Sign up at: https://console.picovoice.ai/
+2. Train custom wake word
+3. Download .ppn model
+4. Convert to .kmodel for K210
+
+#### Option B: Use Mycroft Precise (FOSS)
+```bash
+# On a machine with GPU
+conda create -n precise python=3.6
+conda activate precise
+pip install precise-runner
+
+# Record wake word samples
+precise-collect
+
+# Train model
+precise-train -e 60 my-wake-word.net my-wake-word/
+
+# Convert to .kmodel
+# (requires additional tools - see MaixPy docs)
+```
+
+## Architecture Diagram
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│                     Your Home Network (10.1.10.0/24)        │
+│                                                              │
+│  ┌──────────────┐         ┌──────────────┐                 │
+│  │  Maix Duino  │────────>│  Heimdall    │                 │
+│  │  10.1.10.xxx │ Audio   │  10.1.10.71  │                 │
+│  │              │<────────│              │                 │
+│  │ - Wake Word  │ Response│ - Whisper    │                 │
+│  │ - Mic Input  │         │ - Piper TTS  │                 │
+│  │ - Speaker    │         │ - Flask API  │                 │
+│  └──────────────┘         └──────┬───────┘                 │
+│                                   │                          │
+│                                   │ REST API                │
+│                                   v                          │
+│                          ┌──────────────┐                   │
+│                          │ Home Asst.   │                   │
+│                          │ homeassistant│                   │
+│                          │              │                   │
+│                          │ - Devices    │                   │
+│                          │ - Automation │                   │
+│                          └──────────────┘                   │
+└─────────────────────────────────────────────────────────────┘
+```
+
+## Troubleshooting
+
+### Maix Duino won't connect to WiFi
+```python
+# Check serial output for errors
+# Common issues:
+# - Wrong SSID/password
+# - WPA3 not supported (use WPA2)
+# - 5GHz network (use 2.4GHz)
+```
+
+### Whisper transcription is slow
+```bash
+# Use a smaller model on Heimdall
+# Edit ~/voice-assistant/config/.env:
+WHISPER_MODEL=base  # or tiny for fastest
+```
+
+### Home Assistant commands don't work
+```bash
+# Check server logs
+journalctl -u voice-assistant -f
+
+# Test HA connection manually
+curl -H "Authorization: Bearer YOUR_TOKEN" \
+     http://your-ha:8123/api/states
+```
+
+### Audio quality is poor
+1. Check microphone connections
+2. Adjust `SAMPLE_RATE` in maix_voice_client.py
+3. Test with USB microphone first
+4. Consider microphone array for better pickup
+
+### Out of memory on Maix Duino
+```python
+# In main_loop(), add more frequent GC:
+if gc.mem_free() < 200000:  # Increase threshold
+    gc.collect()
+```
+
+## Adding New Intents
+
+Edit `voice_server.py` and add patterns to `IntentParser.PATTERNS`:
+
+```python
+PATTERNS = {
+    # Existing patterns...
+    
+    'set_temperature': [
+        r'set (?:the )?temperature to (\d+)',
+        r'make it (\d+) degrees',
+    ],
+}
+```
+
+Then add the handler in `execute_intent()`:
+
+```python
+elif intent == 'set_temperature':
+    temp = params.get('temperature')
+    success = ha_client.call_service(
+        'climate', 'set_temperature',
+        entity_id, temperature=temp
+    )
+    return f"Set temperature to {temp} degrees"
+```
+
+## Entity Mapping
+
+Add your Home Assistant entities to `IntentParser.ENTITY_MAP`:
+
+```python
+ENTITY_MAP = {
+    # Lights
+    'living room light': 'light.living_room',
+    'bedroom light': 'light.bedroom',
+    
+    # Climate
+    'thermostat': 'climate.main_floor',
+    'temperature': 'sensor.main_floor_temperature',
+    
+    # Switches
+    'coffee maker': 'switch.coffee_maker',
+    'fan': 'switch.bedroom_fan',
+    
+    # Media
+    'tv': 'media_player.living_room_tv',
+    'music': 'media_player.whole_house',
+}
+```
+
+## Performance Tuning
+
+### Reduce latency
+1. Use Whisper `tiny` or `base` model
+2. Implement streaming audio (currently batch)
+3. Pre-load TTS models
+4. Use faster TTS engine (e.g., espeak)
+
+### Improve accuracy
+1. Use Whisper `large` model (slower)
+2. Train custom wake word
+3. Add NLU layer (Rasa, spaCy)
+4. Collect and fine-tune on your voice
+
+## Next Steps
+
+### Short term
+- [ ] Add more Home Assistant entity mappings
+- [ ] Implement Piper TTS playback on Maix Duino
+- [ ] Train custom wake word model
+- [ ] Add LED animations for better feedback
+- [ ] Implement conversation context
+
+### Medium term
+- [ ] Multi-room support (multiple Maix Duino units)
+- [ ] Voice profiles for different users
+- [ ] Integration with Plex for media control
+- [ ] Calendar and reminder functionality
+- [ ] Weather updates from local weather station
+
+### Long term
+- [ ] Custom skills/plugins system
+- [ ] Integration with other services (Nextcloud, Matrix)
+- [ ] Sound event detection (doorbell, smoke alarm)
+- [ ] Intercom functionality between rooms
+- [ ] Voice-controlled automation creation
+
+## Alternatives & Fallbacks
+
+If the Maix Duino proves limiting:
+
+### Raspberry Pi Zero 2 W
+- More processing power
+- Better software support
+- USB audio support
+- Cost: ~$15
+
+### ESP32-S3
+- Better WiFi
+- More RAM (8MB)
+- Cheaper (~$10)
+- Good community support
+
+### Orange Pi Zero 2
+- ARM Cortex-A53 quad-core
+- 512MB-1GB RAM
+- Full Linux support
+- Cost: ~$20
+
+## Resources
+
+### Documentation
+- Maix Duino: https://wiki.sipeed.com/hardware/en/maix/
+- MaixPy: https://maixpy.sipeed.com/
+- Whisper: https://github.com/openai/whisper
+- Piper TTS: https://github.com/rhasspy/piper
+- Home Assistant API: https://developers.home-assistant.io/
+
+### Community Projects
+- Rhasspy: https://rhasspy.readthedocs.io/
+- Willow: https://github.com/toverainc/willow
+- Mycroft: https://mycroft.ai/
+
+### Wake Word Tools
+- Porcupine: https://picovoice.ai/platform/porcupine/
+- Mycroft Precise: https://github.com/MycroftAI/mycroft-precise
+- Snowboy (archived): https://github.com/Kitt-AI/snowboy
+
+## Getting Help
+
+### Check logs
+```bash
+# Server logs (if using systemd)
+sudo journalctl -u voice-assistant -f
+
+# Or manual log file
+tail -f ~/voice-assistant/logs/voice_assistant.log
+
+# Maix Duino serial console
+screen /dev/ttyUSB0 115200
+```
+
+### Common issues and solutions
+See the Troubleshooting section above
+
+### Useful commands
+```bash
+# Restart service
+sudo systemctl restart voice-assistant
+
+# Check service status
+sudo systemctl status voice-assistant
+
+# Test HA connection
+curl http://10.1.10.71:5000/health
+
+# Monitor Maix Duino
+minicom -D /dev/ttyUSB0 -b 115200
+```
+
+## Cost Breakdown
+
+| Item | Cost | Status |
+|------|------|--------|
+| Maix Duino | $30 | Have it! |
+| I2S Microphone | $5-10 | Need |
+| Speaker | $10 | Need (or use existing) |
+| MicroSD Card | $5 | Have it? |
+| **Total** | **$15-25** | (vs $50+ commercial) |
+
+**Benefits of local solution:**
+- No subscription fees
+- Complete privacy (no cloud)
+- Customizable to your needs
+- Integration with existing infrastructure
+- Learning experience!
+
+## Conclusion
+
+You now have everything you need to build a local, privacy-focused voice assistant! The setup leverages your existing infrastructure (Heimdall for processing, Home Assistant for automation) while keeping costs minimal.
+
+Start with the basic setup, test each component, then iterate and improve. The beauty of this approach is you can enhance it over time without being locked into a commercial platform.
+
+Good luck, and enjoy your new voice assistant! 🎙️
diff --git a/docs/WAKE_WORD_ADVANCED.md b/docs/WAKE_WORD_ADVANCED.md
new file mode 100755
index 0000000..5f80066
--- /dev/null
+++ b/docs/WAKE_WORD_ADVANCED.md
@@ -0,0 +1,723 @@
+# Wake Word Models: Pre-trained, Multiple, and Voice Adaptation
+
+## Pre-trained Wake Word Models
+
+### Yes! "Hey Mycroft" Already Exists
+
+Mycroft provides several pre-trained models that you can use immediately:
+
+#### Available Pre-trained Models
+
+**Hey Mycroft** (Official)
+```bash
+# Download from Mycroft's model repository
+cd ~/precise-models/pretrained
+wget https://github.com/MycroftAI/precise-data/raw/models-dev/hey-mycroft.tar.gz
+tar xzf hey-mycroft.tar.gz
+
+# Test immediately
+conda activate precise
+precise-listen hey-mycroft.net
+
+# Should detect "Hey Mycroft" right away!
+```
+
+**Other Available Models:**
+- **Hey Mycroft** - Best tested, most reliable
+- **Christopher** - Alternative wake word
+- **Hey Jarvis** - Community contributed
+- **Computer** - Star Trek style
+
+#### Using Pre-trained Models
+
+**Option 1: Use as-is**
+```bash
+# Just point your server to the pre-trained model
+python voice_server.py \
+    --enable-precise \
+    --precise-model ~/precise-models/pretrained/hey-mycroft.net \
+    --precise-sensitivity 0.5
+```
+
+**Option 2: Fine-tune for your voice**
+```bash
+# Use pre-trained as starting point, add your samples
+cd ~/precise-models/my-hey-mycroft
+
+# Record additional samples
+precise-collect
+
+# Train from checkpoint (much faster than from scratch!)
+precise-train -e 30 my-hey-mycroft.net . \
+    --from-checkpoint ~/precise-models/pretrained/hey-mycroft.net
+
+# This adds your voice/environment while keeping the base model
+```
+
+**Option 3: Ensemble with custom**
+```python
+# Use both pre-trained and custom model
+# Require both to agree (reduces false positives)
+# See implementation below
+```
+
+### Advantages of Pre-trained Models
+
+✅ **Instant deployment** - No training required  
+✅ **Proven accuracy** - Tested by thousands of users  
+✅ **Good starting point** - Fine-tune rather than train from scratch  
+✅ **Multiple speakers** - Already includes diverse voices  
+✅ **Save time** - Skip 1-2 hours of training  
+
+### Disadvantages
+
+❌ **Generic** - Not optimized for your voice/environment  
+❌ **May need tuning** - Threshold adjustment required  
+❌ **Limited choice** - Only a few wake words available  
+
+### Recommendation
+
+**Start with "Hey Mycroft"** pre-trained model:
+1. Deploy immediately (zero training time)
+2. Test in your environment
+3. Collect false positives/negatives
+4. Fine-tune with your examples
+5. Best of both worlds!
+
+## Multiple Wake Words
+
+### Can You Have Multiple Wake Words?
+
+**Short answer:** Yes, but with tradeoffs.
+
+### Implementation Approaches
+
+#### Approach 1: Server-Side Multiple Models (Recommended)
+
+Run multiple Precise models in parallel on Heimdall:
+
+```python
+# In voice_server.py
+from precise_runner import PreciseEngine, PreciseRunner
+
+# Global runners for each wake word
+precise_runners = {}
+wake_word_configs = {
+    'hey_mycroft': {
+        'model': '~/precise-models/pretrained/hey-mycroft.net',
+        'sensitivity': 0.5,
+        'response': 'Yes?'
+    },
+    'hey_computer': {
+        'model': '~/precise-models/hey-computer/hey-computer.net',
+        'sensitivity': 0.5,
+        'response': 'I\'m listening'
+    },
+    'jarvis': {
+        'model': '~/precise-models/jarvis/jarvis.net',
+        'sensitivity': 0.6,
+        'response': 'At your service, sir'
+    }
+}
+
+def on_wake_word_detected(wake_word_name):
+    """Callback with wake word identifier"""
+    def callback():
+        print(f"Wake word detected: {wake_word_name}")
+        wake_word_queue.put({
+            'timestamp': time.time(),
+            'wake_word': wake_word_name,
+            'response': wake_word_configs[wake_word_name]['response']
+        })
+    return callback
+
+def start_multiple_wake_words():
+    """Start multiple Precise listeners"""
+    for name, config in wake_word_configs.items():
+        engine = PreciseEngine(
+            '/usr/local/bin/precise-engine',
+            os.path.expanduser(config['model'])
+        )
+        
+        runner = PreciseRunner(
+            engine,
+            sensitivity=config['sensitivity'],
+            on_activation=on_wake_word_detected(name)
+        )
+        
+        runner.start()
+        precise_runners[name] = runner
+        print(f"Started wake word listener: {name}")
+```
+
+**Resource Usage:**
+- CPU: ~5-10% per model (3 models = ~15-30%)
+- RAM: ~100-200MB per model
+- Still very manageable on Heimdall
+
+**Pros:**
+✅ Different wake words for different purposes  
+✅ Family members can choose preferred wake word  
+✅ Context-aware responses  
+✅ Easy to add/remove models  
+
+**Cons:**
+❌ Higher CPU usage (scales linearly)  
+❌ Increased false positive risk (3x models = 3x chance)  
+❌ More complex configuration  
+
+#### Approach 2: Edge Multiple Models (K210)
+
+**Challenge:** K210 has limited resources
+
+**Option A: Sequential checking** (Feasible)
+```python
+# Check each model in sequence
+models = ['hey-mycroft.kmodel', 'hey-computer.kmodel']
+
+for model in models:
+    kpu_task = kpu.load(f"/sd/models/{model}")
+    result = kpu.run(kpu_task, audio_features)
+    if result > threshold:
+        return model  # Wake word detected
+```
+
+**Resource impact:**
+- Latency: +50-100ms per additional model
+- Memory: Models must fit in 6MB total
+- CPU: ~30% per model check
+
+**Option B: Combined model** (Advanced)
+```python
+# Train a single model that recognizes multiple phrases
+# Each phrase maps to different output class
+# More complex training but single inference
+```
+
+**Recommendation for edge:**
+- **1-2 wake words max** on K210
+- **Server-side** for 3+ wake words
+
+#### Approach 3: Contextual Wake Words
+
+Different wake words trigger different behaviors:
+
+```python
+wake_word_contexts = {
+    'hey_mycroft': 'general',      # General commands
+    'hey_assistant': 'general',    # Alternative general
+    'emergency': 'priority',       # High priority
+    'goodnight': 'bedtime',        # Bedtime routine
+}
+
+def handle_wake_word(wake_word, command):
+    context = wake_word_contexts[wake_word]
+    
+    if context == 'priority':
+        # Skip queue, process immediately
+        # Maybe call emergency contact
+        pass
+    elif context == 'bedtime':
+        # Trigger bedtime automation
+        # Lower volume for responses
+        pass
+    else:
+        # Normal processing
+        pass
+```
+
+### Best Practices for Multiple Wake Words
+
+1. **Start with one** - Get it working well first
+2. **Add gradually** - One at a time, test thoroughly
+3. **Different purposes** - Each wake word should have a reason
+4. **Monitor performance** - Track false positives per wake word
+5. **User preference** - Let family members choose their favorite
+
+### Recommended Configuration
+
+**For most users:**
+```python
+wake_words = {
+    'hey_mycroft': 'primary',    # Main wake word (pre-trained)
+    'hey_computer': 'alternative' # Custom trained for your voice
+}
+```
+
+**For power users:**
+```python
+wake_words = {
+    'hey_mycroft': 'general',
+    'jarvis': 'personal_assistant',  # Custom responses
+    'computer': 'technical_queries', # Different intent parser
+}
+```
+
+**For families:**
+```python
+wake_words = {
+    'hey_mycroft': 'shared',        # Everyone can use
+    'dad': 'user_alan',             # Personalized
+    'mom': 'user_sarah',            # Personalized
+    'kids': 'user_children',        # Kid-safe responses
+}
+```
+
+## Voice Adaptation and Multi-User Support
+
+### Challenge: Different Voices, Same Wake Word
+
+When multiple people use the system:
+- Different accents
+- Different speech patterns
+- Different pronunciations
+- Different vocal characteristics
+
+### Solution Approaches
+
+#### Approach 1: Diverse Training Data (Recommended)
+
+**During initial training:**
+```bash
+# Have everyone in household record samples
+cd ~/precise-models/hey-computer
+
+# Alan records 30 samples
+precise-collect  # Record as user 1
+
+# Sarah records 30 samples  
+precise-collect  # Record as user 2
+
+# Kids record 20 samples
+precise-collect  # Record as user 3
+
+# Combine all in training set
+# Train one model that works for everyone
+./3-train-model.sh
+```
+
+**Pros:**
+✅ Single model for everyone  
+✅ No user switching needed  
+✅ Simple to maintain  
+✅ Works immediately for all users  
+
+**Cons:**
+❌ May have lower per-person accuracy  
+❌ Requires upfront time from everyone  
+❌ Hard to add new users later  
+
+#### Approach 2: Incremental Training
+
+Start with your voice, add others over time:
+
+```bash
+# Week 1: Train with Alan's voice
+cd ~/precise-models/hey-computer
+# Record and train with Alan's samples
+precise-train -e 60 hey-computer.net .
+
+# Week 2: Sarah wants to use it
+# Collect Sarah's samples
+mkdir -p sarah-samples/wake-word
+precise-collect  # Sarah records 20-30 samples
+
+# Add to existing training set
+cp sarah-samples/wake-word/* wake-word/
+
+# Retrain (continue from checkpoint)
+precise-train -e 30 hey-computer.net . \
+    --from-checkpoint hey-computer.net
+
+# Now works for both Alan and Sarah!
+```
+
+**Pros:**
+✅ Gradual improvement  
+✅ Don't need everyone upfront  
+✅ Easy to add new users  
+✅ Maintains accuracy for existing users  
+
+**Cons:**
+❌ May not work well for new users initially  
+❌ Requires retraining periodically  
+
+#### Approach 3: Per-User Models with Speaker Identification
+
+Train separate models + identify who's speaking:
+
+**Step 1: Train per-user wake word models**
+```bash
+# Alan's model
+~/precise-models/hey-computer-alan/
+
+# Sarah's model
+~/precise-models/hey-computer-sarah/
+
+# Kids' model
+~/precise-models/hey-computer-kids/
+```
+
+**Step 2: Use speaker identification**
+```python
+# Pseudo-code for speaker identification
+def identify_speaker(audio):
+    """
+    Identify speaker from voice characteristics
+    Using speaker embeddings (x-vectors, d-vectors)
+    """
+    # Extract speaker embedding
+    embedding = speaker_encoder.encode(audio)
+    
+    # Compare to known users
+    similarities = {
+        'alan': cosine_similarity(embedding, alan_embedding),
+        'sarah': cosine_similarity(embedding, sarah_embedding),
+        'kids': cosine_similarity(embedding, kids_embedding),
+    }
+    
+    # Return most similar
+    return max(similarities, key=similarities.get)
+
+def process_command(audio):
+    # Detect wake word with all models
+    wake_detected = check_all_models(audio)
+    
+    if wake_detected:
+        # Identify speaker
+        speaker = identify_speaker(audio)
+        
+        # Use speaker-specific model for better accuracy
+        model = f'~/precise-models/hey-computer-{speaker}/'
+        
+        # Continue with speaker context
+        process_with_context(audio, speaker)
+```
+
+**Speaker identification libraries:**
+- **Resemblyzer** - Simple speaker verification
+- **speechbrain** - Complete toolkit
+- **pyannote.audio** - You already use this for diarization!
+
+**Implementation:**
+```bash
+# You already have pyannote for diarization!
+conda activate voice-assistant
+pip install pyannote.audio --break-system-packages
+
+# Can use speaker embeddings for identification
+```
+
+```python
+from pyannote.audio import Inference
+
+# Load speaker embedding model
+inference = Inference(
+    "pyannote/embedding",
+    use_auth_token=hf_token
+)
+
+# Extract embeddings for known users
+alan_embedding = inference("alan_voice_sample.wav")
+sarah_embedding = inference("sarah_voice_sample.wav")
+
+# Compare with incoming audio
+unknown_embedding = inference(audio_buffer)
+
+from scipy.spatial.distance import cosine
+alan_similarity = 1 - cosine(unknown_embedding, alan_embedding)
+sarah_similarity = 1 - cosine(unknown_embedding, sarah_embedding)
+
+if alan_similarity > 0.8:
+    user = 'alan'
+elif sarah_similarity > 0.8:
+    user = 'sarah'
+else:
+    user = 'unknown'
+```
+
+**Pros:**
+✅ Personalized responses per user  
+✅ Better accuracy (model optimized for each voice)  
+✅ User-specific preferences/permissions  
+✅ Can track who said what  
+
+**Cons:**
+❌ More complex setup  
+❌ Higher resource usage  
+❌ Requires voice samples from each user  
+❌ Privacy considerations  
+
+#### Approach 4: Adaptive/Online Learning
+
+Model improves automatically based on usage:
+
+```python
+class AdaptiveWakeWord:
+    def __init__(self, base_model):
+        self.base_model = base_model
+        self.user_samples = []
+        self.retrain_threshold = 50  # Retrain after N samples
+    
+    def on_detection(self, audio, user_confirmed=True):
+        """User confirms this was correct detection"""
+        if user_confirmed:
+            self.user_samples.append(audio)
+            
+            # Periodically retrain
+            if len(self.user_samples) >= self.retrain_threshold:
+                self.retrain_with_samples()
+                self.user_samples = []
+    
+    def retrain_with_samples(self):
+        """Background retraining with collected samples"""
+        # Add samples to training set
+        # Retrain model
+        # Swap in new model
+        pass
+```
+
+**Pros:**
+✅ Automatic improvement  
+✅ Adapts to user's voice over time  
+✅ No manual retraining  
+✅ Gets better with use  
+
+**Cons:**
+❌ Complex implementation  
+❌ Requires user feedback mechanism  
+❌ Risk of drift/degradation  
+❌ Background training overhead  
+
+## Recommended Strategy
+
+### Phase 1: Single Wake Word, Single Model
+```bash
+# Week 1-2
+# Use pre-trained "Hey Mycroft"
+# OR train custom "Hey Computer" with all family members' voices
+# Keep it simple, get it working
+```
+
+### Phase 2: Add Fine-tuning
+```bash
+# Week 3-4
+# Collect false positives/negatives
+# Retrain with household-specific data
+# Optimize threshold
+```
+
+### Phase 3: Consider Multiple Wake Words
+```bash
+# Month 2
+# If needed, add second wake word
+# "Hey Mycroft" for general
+# "Jarvis" for personal assistant tasks
+```
+
+### Phase 4: Personalization
+```bash
+# Month 3+
+# If desired, add speaker identification
+# Personalized responses
+# User-specific preferences
+```
+
+## Practical Examples
+
+### Example 1: Family of 4, Single Model
+
+```bash
+# Training session with everyone
+cd ~/precise-models/hey-mycroft-family
+
+# Dad records 25 samples
+precise-collect  
+
+# Mom records 25 samples
+precise-collect
+
+# Kid 1 records 15 samples
+precise-collect
+
+# Kid 2 records 15 samples
+precise-collect
+
+# Collect shared negative samples (200+)
+# TV, music, conversation, etc.
+precise-collect -f not-wake-word/household.wav
+
+# Train single model for everyone
+precise-train -e 60 hey-mycroft-family.net .
+
+# Deploy
+python voice_server.py \
+    --enable-precise \
+    --precise-model hey-mycroft-family.net
+```
+
+**Result:** Everyone can use it, one model, simple.
+
+### Example 2: Two Wake Words, Different Purposes
+
+```python
+# voice_server.py configuration
+wake_words = {
+    'hey_mycroft': {
+        'model': 'hey-mycroft.net',
+        'sensitivity': 0.5,
+        'intent_parser': 'general',  # All commands
+        'response': 'Yes?'
+    },
+    'emergency': {
+        'model': 'emergency.net',
+        'sensitivity': 0.7,  # Higher threshold
+        'intent_parser': 'emergency',  # Limited commands
+        'response': 'Emergency mode activated'
+    }
+}
+
+# "Hey Mycroft, turn on the lights" - works
+# "Emergency, call for help" - triggers emergency protocol
+```
+
+### Example 3: Speaker Identification + Personalization
+
+```python
+# Enhanced processing with speaker ID
+def process_with_speaker_id(audio, speaker):
+    # Different HA entity based on speaker
+    entity_maps = {
+        'alan': {
+            'bedroom_light': 'light.master_bedroom',
+            'office_light': 'light.alan_office',
+        },
+        'sarah': {
+            'bedroom_light': 'light.master_bedroom',
+            'office_light': 'light.sarah_office',
+        },
+        'kids': {
+            'bedroom_light': 'light.kids_bedroom',
+            'tv': None,  # Kids can't control TV
+        }
+    }
+    
+    # Transcribe command
+    text = whisper_transcribe(audio)
+    
+    # "Turn on bedroom light"
+    if 'bedroom light' in text:
+        entity = entity_maps[speaker]['bedroom_light']
+        ha_client.turn_on(entity)
+        
+        response = f"Turned on your bedroom light"
+    
+    return response
+```
+
+## Resource Requirements
+
+### Single Wake Word
+- **CPU:** 5-10% (Heimdall)
+- **RAM:** 100-200MB
+- **Model size:** 1-3MB
+- **Training time:** 30-60 min
+
+### Multiple Wake Words (3 models)
+- **CPU:** 15-30% (Heimdall)
+- **RAM:** 300-600MB
+- **Model size:** 3-9MB total
+- **Training time:** 90-180 min
+
+### With Speaker Identification
+- **CPU:** +5-10% for speaker ID
+- **RAM:** +200-300MB for embedding model
+- **Model size:** +50MB for speaker model
+- **Setup time:** +30-60 min for voice enrollment
+
+### K210 Edge (Maix Duino)
+- **Single model:** Feasible, ~30% CPU
+- **2 models:** Feasible, ~60% CPU, higher latency
+- **3+ models:** Not recommended
+- **Speaker ID:** Not feasible (limited RAM/compute)
+
+## Quick Decision Guide
+
+**Just getting started?**
+→ Use pre-trained "Hey Mycroft"
+
+**Want custom wake word?**
+→ Train one model with all family voices
+
+**Need multiple wake words?**
+→ Start server-side with 2-3 models
+
+**Want personalization?**
+→ Add speaker identification
+
+**Deploying to edge (K210)?**
+→ Stick to 1-2 wake words maximum
+
+**Family of 4+ people?**
+→ Train single model with everyone's voice
+
+**Privacy is paramount?**
+→ Skip speaker ID, use single universal model
+
+## Testing Multiple Wake Words
+
+```bash
+# Test all wake words quickly
+conda activate precise
+
+# Terminal 1: Hey Mycroft
+precise-listen hey-mycroft.net
+
+# Terminal 2: Hey Computer  
+precise-listen hey-computer.net
+
+# Terminal 3: Emergency
+precise-listen emergency.net
+
+# Say each wake word, verify correct detection
+```
+
+## Conclusion
+
+### For Your Maix Duino Project:
+
+**Recommended approach:**
+1. **Start with "Hey Mycroft"** - Use pre-trained model
+2. **Fine-tune if needed** - Add your household's voices
+3. **Consider 2nd wake word** - Only if you have a specific use case
+4. **Speaker ID** - Phase 2/3 enhancement, not critical for MVP
+5. **Keep it simple** - One wake word works great for most users
+
+**The pre-trained "Hey Mycroft" model saves you 1-2 hours** and works immediately. You can always fine-tune or add custom wake words later!
+
+**Multiple wake words are cool but not necessary** - Most commercial products use just one. Focus on making one wake word work really well before adding more.
+
+**Voice adaptation** - Training with multiple voices upfront is simpler than per-user models. Save speaker ID for later if you need personalization.
+
+## Quick Start with Pre-trained
+
+```bash
+# On Heimdall
+cd ~/precise-models/pretrained
+wget https://github.com/MycroftAI/precise-data/raw/models-dev/hey-mycroft.tar.gz
+tar xzf hey-mycroft.tar.gz
+
+# Test it
+conda activate precise
+precise-listen hey-mycroft.net
+
+# Deploy
+cd ~/voice-assistant
+python voice_server.py \
+    --enable-precise \
+    --precise-model ~/precise-models/pretrained/hey-mycroft.net
+
+# You're done! No training needed!
+```
+
+**That's it - you have a working wake word in 5 minutes!** 🎉
diff --git a/docs/WAKE_WORD_QUICK_REF.md b/docs/WAKE_WORD_QUICK_REF.md
new file mode 100755
index 0000000..9ab0ff8
--- /dev/null
+++ b/docs/WAKE_WORD_QUICK_REF.md
@@ -0,0 +1,411 @@
+# Wake Word Quick Reference Card
+
+## 🎯 TL;DR: What Should I Do?
+
+### Recommendation for Your Setup
+
+**Week 1:** Use pre-trained "Hey Mycroft"
+```bash
+./download_pretrained_models.sh --model hey-mycroft
+precise-listen ~/precise-models/pretrained/hey-mycroft.net
+```
+
+**Week 2-3:** Fine-tune with all family members' voices
+```bash
+cd ~/precise-models/hey-mycroft-family
+precise-train -e 30 custom.net . --from-checkpoint ../pretrained/hey-mycroft.net
+```
+
+**Week 4+:** Add speaker identification
+```bash
+pip install resemblyzer
+python enroll_speaker.py --name Alan --duration 20
+python enroll_speaker.py --name [Family] --duration 20
+```
+
+**Month 2+:** Add second wake word (Hey Jarvis for Plex?)
+```bash
+./download_pretrained_models.sh --model hey-jarvis
+# Run both in parallel on server
+```
+
+---
+
+## 📋 Pre-trained Models
+
+### Available Models (Ready to Use!)
+
+| Wake Word | Download | Best For |
+|-----------|----------|----------|
+| **Hey Mycroft** ⭐ | `--model hey-mycroft` | Default choice, most data |
+| **Hey Jarvis** | `--model hey-jarvis` | Pop culture, media control |
+| **Christopher** | `--model christopher` | Unique, less common |
+| **Hey Ezra** | `--model hey-ezra` | Alternative option |
+
+### Quick Download
+
+```bash
+# Download one
+./download_pretrained_models.sh --model hey-mycroft
+
+# Download all
+./download_pretrained_models.sh --test-all
+
+# Test immediately
+precise-listen ~/precise-models/pretrained/hey-mycroft.net
+```
+
+---
+
+## 🔢 Multiple Wake Words
+
+### Option 1: Multiple Models (Server-Side) ⭐ RECOMMENDED
+
+**What:** Run 2-3 different wake word models simultaneously  
+**Where:** Heimdall (server)  
+**Performance:** ~15-30% CPU for 3 models  
+
+```bash
+# Start with multiple wake words
+python voice_server.py \
+    --enable-precise \
+    --precise-models "\
+hey-mycroft:~/models/hey-mycroft.net:0.5,\
+hey-jarvis:~/models/hey-jarvis.net:0.5"
+```
+
+**Pros:**
+- ✅ Can identify which wake word was used
+- ✅ Different contexts (Mycroft=commands, Jarvis=media)
+- ✅ Easy to add/remove wake words
+- ✅ Each can have different sensitivity
+
+**Cons:**
+- ❌ Only works server-side (not on Maix Duino)
+- ❌ Higher CPU usage (but still reasonable)
+
+**Use When:**
+- You want different wake words for different purposes
+- Server has CPU to spare (yours does!)
+- Want flexibility to add wake words later
+
+### Option 2: Single Multi-Phrase Model (Edge-Compatible)
+
+**What:** One model responds to multiple phrases  
+**Where:** Server OR Maix Duino  
+**Performance:** Same as single model  
+
+```bash
+# Train on multiple phrases
+cd ~/precise-models/multi-wake
+# Record "Hey Mycroft" samples → wake-word/
+# Record "Hey Computer" samples → wake-word/
+# Record negatives → not-wake-word/
+precise-train -e 60 multi-wake.net .
+```
+
+**Pros:**
+- ✅ Single model = less compute
+- ✅ Works on edge (K210)
+- ✅ Simple deployment
+
+**Cons:**
+- ❌ Can't tell which wake word was used
+- ❌ May reduce accuracy
+- ❌ Higher false positive risk
+
+**Use When:**
+- Deploying to Maix Duino (edge)
+- Want backup wake words
+- Don't care which was used
+
+---
+
+## 👥 Multi-User Support
+
+### Option 1: Inclusive Training ⭐ START HERE
+
+**What:** One model, all voices  
+**How:** All family members record samples  
+
+```bash
+cd ~/precise-models/family-wake
+# Alice records 30 samples
+# Bob records 30 samples  
+# You record 30 samples
+precise-train -e 60 family-wake.net .
+```
+
+**Pros:**
+- ✅ Everyone can use it
+- ✅ Simple deployment
+- ✅ Single model
+
+**Cons:**
+- ❌ Can't identify who spoke
+- ❌ No personalization
+
+**Use When:**
+- Just getting started
+- Don't need to know who spoke
+- Want simplicity
+
+### Option 2: Speaker Identification (Week 4+)
+
+**What:** Detect wake word, then identify speaker  
+**How:** Voice embeddings (resemblyzer or pyannote)  
+
+```bash
+# Install
+pip install resemblyzer
+
+# Enroll users
+python enroll_speaker.py --name Alan --duration 20
+python enroll_speaker.py --name Alice --duration 20
+python enroll_speaker.py --name Bob --duration 20
+
+# Server identifies speaker automatically
+```
+
+**Pros:**
+- ✅ Personalized responses
+- ✅ User-specific permissions
+- ✅ Better privacy
+- ✅ Track preferences
+
+**Cons:**
+- ❌ More complex
+- ❌ Requires enrollment
+- ❌ +100-200ms latency
+- ❌ May fail with similar voices
+
+**Use When:**
+- Want personalization
+- Need user-specific commands
+- Ready for advanced features
+
+### Option 3: Per-User Wake Words (Advanced)
+
+**What:** Each person has their own wake word  
+**How:** Multiple models, one per person  
+
+```bash
+# Alice: "Hey Mycroft"
+# Bob: "Hey Jarvis"
+# You: "Hey Computer"
+
+# Run all 3 models in parallel
+```
+
+**Pros:**
+- ✅ Automatic user ID
+- ✅ Highest accuracy per user
+- ✅ Clear separation
+
+**Cons:**
+- ❌ 3x models = 3x CPU
+- ❌ Users must remember their word
+- ❌ Server-only (not edge)
+
+**Use When:**
+- Need automatic user ID
+- Have CPU to spare
+- Users want their own wake word
+
+---
+
+## 🎯 Decision Tree
+
+```
+START: Want to use voice assistant
+  │
+  ├─ Single user or don't care who spoke?
+  │   └─ Use: Inclusive Training (Option 1)
+  │       └─ Download: Hey Mycroft (pre-trained)
+  │
+  ├─ Multiple users AND need to know who spoke?
+  │   └─ Use: Speaker Identification (Option 2)
+  │       └─ Start with: Hey Mycroft + resemblyzer
+  │
+  ├─ Want different wake words for different purposes?
+  │   └─ Use: Multiple Models (Option 1)
+  │       └─ Download: Hey Mycroft + Hey Jarvis
+  │
+  └─ Deploying to Maix Duino (edge)?
+      └─ Use: Single Multi-Phrase Model (Option 2)
+          └─ Train: Custom model with 2-3 phrases
+```
+
+---
+
+## 📊 Comparison Table
+
+| Feature | Inclusive | Speaker ID | Per-User Wake | Multiple Wake |
+|---------|-----------|------------|---------------|---------------|
+| **Setup Time** | 2 hours | 4 hours | 6 hours | 3 hours |
+| **Complexity** | ⭐ Easy | ⭐⭐⭐ Medium | ⭐⭐⭐⭐ Hard | ⭐⭐ Easy |
+| **CPU Usage** | 5-10% | 10-15% | 15-30% | 15-30% |
+| **Latency** | 100ms | 300ms | 100ms | 100ms |
+| **User ID** | ❌ No | ✅ Yes | ✅ Yes | ❌ No |
+| **Edge Deploy** | ✅ Yes | ⚠️ Maybe | ❌ No | ⚠️ Partial |
+| **Personalize** | ❌ No | ✅ Yes | ✅ Yes | ⚠️ Partial |
+
+---
+
+## 🚀 Recommended Timeline
+
+### Week 1: Get It Working
+```bash
+# Use pre-trained Hey Mycroft
+./download_pretrained_models.sh --model hey-mycroft
+
+# Test it
+precise-listen ~/precise-models/pretrained/hey-mycroft.net
+
+# Deploy to server
+python voice_server.py --enable-precise \
+    --precise-model ~/precise-models/pretrained/hey-mycroft.net
+```
+
+### Week 2-3: Make It Yours
+```bash
+# Fine-tune with your family's voices
+cd ~/precise-models/hey-mycroft-family
+
+# Have everyone record 20-30 samples
+precise-collect  # Alice
+precise-collect  # Bob
+precise-collect  # You
+
+# Train
+precise-train -e 30 custom.net . \
+    --from-checkpoint ../pretrained/hey-mycroft.net
+```
+
+### Week 4+: Add Intelligence
+```bash
+# Speaker identification
+pip install resemblyzer
+python enroll_speaker.py --name Alan --duration 20
+python enroll_speaker.py --name Alice --duration 20
+
+# Now server knows who's speaking!
+```
+
+### Month 2+: Expand Features
+```bash
+# Add second wake word for media control
+./download_pretrained_models.sh --model hey-jarvis
+
+# Run both: Mycroft for commands, Jarvis for Plex
+python voice_server.py --enable-precise \
+    --precise-models "mycroft:hey-mycroft.net:0.5,jarvis:hey-jarvis.net:0.5"
+```
+
+---
+
+## 💡 Pro Tips
+
+### Wake Word Selection
+- ✅ **DO:** Choose clear, distinct wake words
+- ✅ **DO:** Test in your environment
+- ❌ **DON'T:** Use similar-sounding words
+- ❌ **DON'T:** Use common phrases
+
+### Training
+- ✅ **DO:** Include all intended users
+- ✅ **DO:** Record in various conditions
+- ✅ **DO:** Add false positives to training
+- ❌ **DON'T:** Rush the training process
+
+### Deployment
+- ✅ **DO:** Start simple (one wake word)
+- ✅ **DO:** Test thoroughly before adding features
+- ✅ **DO:** Monitor false positive rate
+- ❌ **DON'T:** Deploy too many wake words at once
+
+### Speaker ID
+- ✅ **DO:** Use 20+ seconds for enrollment
+- ✅ **DO:** Re-enroll if accuracy drops
+- ✅ **DO:** Test threshold values
+- ❌ **DON'T:** Expect 100% accuracy
+
+---
+
+## 🔧 Quick Commands
+
+```bash
+# Download pre-trained model
+./download_pretrained_models.sh --model hey-mycroft
+
+# Test model
+precise-listen ~/precise-models/pretrained/hey-mycroft.net
+
+# Fine-tune from pre-trained
+precise-train -e 30 custom.net . \
+    --from-checkpoint ~/precise-models/pretrained/hey-mycroft.net
+
+# Enroll speaker
+python enroll_speaker.py --name Alan --duration 20
+
+# Start with single wake word
+python voice_server.py --enable-precise \
+    --precise-model hey-mycroft.net
+
+# Start with multiple wake words
+python voice_server.py --enable-precise \
+    --precise-models "mycroft:hey-mycroft.net:0.5,jarvis:hey-jarvis.net:0.5"
+
+# Check status
+curl http://10.1.10.71:5000/wake-word/status
+
+# Monitor detections
+curl http://10.1.10.71:5000/wake-word/detections
+```
+
+---
+
+## 📚 See Also
+
+- **Full guide:** [ADVANCED_WAKE_WORD_TOPICS.md](ADVANCED_WAKE_WORD_TOPICS.md)
+- **Training:** [MYCROFT_PRECISE_GUIDE.md](MYCROFT_PRECISE_GUIDE.md)
+- **Deployment:** [PRECISE_DEPLOYMENT.md](PRECISE_DEPLOYMENT.md)
+- **Getting started:** [QUICKSTART.md](QUICKSTART.md)
+
+---
+
+## ❓ FAQ
+
+**Q: Can I use "Hey Mycroft" right away?**  
+A: Yes! Download with `./download_pretrained_models.sh --model hey-mycroft`
+
+**Q: How many wake words can I run at once?**  
+A: 2-3 comfortably on server. Maix Duino can handle 1.
+
+**Q: Can I train my own custom wake word?**  
+A: Yes! See MYCROFT_PRECISE_GUIDE.md Phase 2.
+
+**Q: Does speaker ID work with multiple wake words?**  
+A: Yes! Wake word detected → Speaker identified → Personalized response.
+
+**Q: Can I use this on Maix Duino?**  
+A: Server-side (start here), then convert to KMODEL (advanced).
+
+**Q: How accurate is speaker identification?**  
+A: 85-95% with good enrollment. Re-enroll if accuracy drops.
+
+**Q: What if someone has a cold?**  
+A: May reduce accuracy temporarily. System should recover when voice returns to normal.
+
+**Q: Can kids use it?**  
+A: Yes! Include their voices in training or enroll them separately.
+
+---
+
+**Quick Decision:** Start with pre-trained Hey Mycroft. Add features later!
+
+```bash
+./download_pretrained_models.sh --model hey-mycroft
+precise-listen ~/precise-models/pretrained/hey-mycroft.net
+# It just works! ✨
+```
diff --git a/docs/maix-voice-assistant-architecture.md b/docs/maix-voice-assistant-architecture.md
new file mode 100755
index 0000000..9e4424d
--- /dev/null
+++ b/docs/maix-voice-assistant-architecture.md
@@ -0,0 +1,347 @@
+# Maix Duino Voice Assistant - System Architecture
+
+## Overview
+Local voice assistant using Sipeed Maix Duino board integrated with Home Assistant, leveraging existing home lab infrastructure for AI processing.
+
+## Hardware Components
+
+### Maix Duino Board
+- **Processor**: K210 dual-core RISC-V @ 400MHz
+- **AI Accelerator**: KPU for neural network inference
+- **Audio**: I2S microphone + speaker output
+- **Connectivity**: ESP32 for WiFi/BLE
+- **Programming**: MaixPy (MicroPython)
+
+### Recommended Accessories
+- I2S MEMS microphone (or microphone array for better pickup)
+- Small speaker (3-5W) or audio output to existing speakers
+- USB-C power supply (5V/2A minimum)
+
+## Software Architecture
+
+### Edge Layer (Maix Duino)
+```
+┌─────────────────────────────────────┐
+│   Maix Duino (MaixPy)              │
+├─────────────────────────────────────┤
+│ • Wake Word Detection (KPU)        │
+│ • Audio Capture (I2S)               │
+│ • Audio Streaming → Heimdall        │
+│ • Audio Playback ← Heimdall         │
+│ • LED Feedback (listening status)   │
+└─────────────────────────────────────┘
+           ↕ WiFi/HTTP/WebSocket
+┌─────────────────────────────────────┐
+│   Voice Processing Server           │
+│   (Heimdall - 10.1.10.71)          │
+├─────────────────────────────────────┤
+│ • Whisper STT (existing setup!)     │
+│ • Intent Recognition (Rasa/custom)  │
+│ • Piper TTS                         │
+│ • Home Assistant API Client         │
+└─────────────────────────────────────┘
+           ↕ REST API/MQTT
+┌─────────────────────────────────────┐
+│   Home Assistant                    │
+│   (Your HA instance)                │
+├─────────────────────────────────────┤
+│ • Device Control                    │
+│ • State Management                  │
+│ • Automation Triggers               │
+└─────────────────────────────────────┘
+```
+
+## Communication Flow
+
+### 1. Wake Word Detection (Local)
+```
+User says "Hey Assistant"
+    ↓
+Maix Duino KPU detects wake word
+    ↓
+LED turns on (listening mode)
+    ↓
+Start audio streaming to Heimdall
+```
+
+### 2. Speech Processing (Heimdall)
+```
+Audio stream received
+    ↓
+Whisper transcribes to text
+    ↓
+Intent parser extracts command
+    ↓
+Query Home Assistant API
+    ↓
+Generate response text
+    ↓
+Piper TTS creates audio
+    ↓
+Stream audio back to Maix Duino
+```
+
+### 3. Playback & Feedback
+```
+Receive audio stream
+    ↓
+Play through speaker
+    ↓
+LED indicates completion
+    ↓
+Return to wake word detection
+```
+
+## Network Configuration
+
+### Maix Duino Network Settings
+- **IP**: 10.1.10.xxx (assign static via DHCP reservation)
+- **Gateway**: 10.1.10.1
+- **DNS**: 10.1.10.4 (Pi-hole)
+
+### Service Endpoints
+- **Voice Processing Server**: http://10.1.10.71:5000
+- **Home Assistant**: (your existing HA URL)
+- **MQTT Broker**: (optional, if using MQTT)
+
+### Caddy Reverse Proxy Entry
+Add to `/mnt/project/epona_-_Caddyfile`:
+```caddy
+# Voice Assistant API
+handle /voice-assistant* {
+    uri strip_prefix /voice-assistant
+    reverse_proxy http://10.1.10.71:5000
+}
+```
+
+## Software Stack
+
+### Maix Duino (MaixPy)
+- **Firmware**: Latest MaixPy release
+- **Libraries**:
+  - `Maix.KPU` - Neural network inference
+  - `Maix.I2S` - Audio capture/playback
+  - `socket` - Network communication
+  - `ujson` - JSON handling
+
+### Heimdall Server (Python)
+- **Environment**: Create new conda env
+  ```bash
+  conda create -n voice-assistant python=3.10
+  conda activate voice-assistant
+  ```
+- **Dependencies**:
+  - `openai-whisper` (already installed!)
+  - `piper-tts` - Text-to-speech
+  - `flask` - REST API server
+  - `requests` - HTTP client
+  - `pyaudio` - Audio handling
+  - `websockets` - Real-time streaming
+
+### Optional: Intent Recognition
+- **Rasa** - Full NLU framework (heavier but powerful)
+- **Simple pattern matching** - Lightweight, start here
+- **LLM-based** - Use your existing LLM setup on Heimdall
+
+## Data Flow Examples
+
+### Example 1: Turn on lights
+```
+User: "Hey Assistant, turn on the living room lights"
+    ↓
+Wake word detected → Start recording
+    ↓
+Whisper STT: "turn on the living room lights"
+    ↓
+Intent Parser: {
+  "action": "turn_on",
+  "entity": "light.living_room"
+}
+    ↓
+Home Assistant API:
+  POST /api/services/light/turn_on
+  {"entity_id": "light.living_room"}
+    ↓
+Response: "Living room lights turned on"
+    ↓
+Piper TTS → Audio playback
+```
+
+### Example 2: Get status
+```
+User: "What's the temperature?"
+    ↓
+Whisper STT: "what's the temperature"
+    ↓
+Intent Parser: {
+  "action": "get_state",
+  "entity": "sensor.temperature"
+}
+    ↓
+Home Assistant API:
+  GET /api/states/sensor.temperature
+    ↓
+Response: "The temperature is 72 degrees"
+    ↓
+Piper TTS → Audio playback
+```
+
+## Phase 1 Implementation Plan
+
+### Step 1: Maix Duino Setup (Week 1)
+- [ ] Flash latest MaixPy firmware
+- [ ] Test audio input/output
+- [ ] Implement basic network communication
+- [ ] Test streaming audio to server
+
+### Step 2: Server Setup (Week 1-2)
+- [ ] Create conda environment on Heimdall
+- [ ] Set up Flask API server
+- [ ] Integrate Whisper (already have this!)
+- [ ] Install and test Piper TTS
+- [ ] Create basic Home Assistant API client
+
+### Step 3: Wake Word Training (Week 2)
+- [ ] Record wake word samples
+- [ ] Train custom wake word model
+- [ ] Convert model for K210 KPU
+- [ ] Test on-device detection
+
+### Step 4: Integration (Week 3)
+- [ ] Connect all components
+- [ ] Test end-to-end flow
+- [ ] Add error handling
+- [ ] Implement fallbacks
+
+### Step 5: Enhancement (Week 4+)
+- [ ] Add more intents
+- [ ] Improve NLU accuracy
+- [ ] Add multi-room support
+- [ ] Implement conversation context
+
+## Development Tools
+
+### Testing Wake Word
+```python
+# Use existing diarization.py for testing audio quality
+python3 /path/to/diarization.py test_audio.wav \
+  --format vtt \
+  --model medium
+```
+
+### Monitoring
+- Heimdall logs: `/var/log/voice-assistant/`
+- Maix Duino serial console: 115200 baud
+- Home Assistant logs: Standard HA logging
+
+## Security Considerations
+
+1. **No external cloud services** - Everything local
+2. **Network isolation** - Keep on 10.1.10.0/24
+3. **Authentication** - Use HA long-lived tokens
+4. **Rate limiting** - Prevent abuse
+5. **Audio privacy** - Only stream after wake word
+
+## Resource Requirements
+
+### Heimdall
+- **CPU**: Minimal (< 5% idle, spikes during STT)
+- **RAM**: ~2GB for Whisper medium model
+- **Storage**: ~5GB for models
+- **Network**: Low bandwidth (16kHz audio stream)
+
+### Maix Duino
+- **Power**: ~1-2W typical
+- **Storage**: 16MB flash (plenty for wake word model)
+- **RAM**: 8MB SRAM (sufficient for audio buffering)
+
+## Alternative Architectures
+
+### Option A: Fully On-Device (Limited)
+- Everything on Maix Duino
+- Very limited vocabulary
+- No internet required
+- Lower accuracy
+
+### Option B: Hybrid (Recommended)
+- Wake word on Maix Duino
+- Processing on Heimdall
+- Best balance of speed/accuracy
+
+### Option C: Raspberry Pi Alternative
+- If K210 proves limiting
+- More processing power
+- Still local/FOSS
+- Higher cost
+
+## Expansion Ideas
+
+### Future Enhancements
+1. **Multi-room**: Deploy multiple Maix Duino units
+2. **Music playback**: Integrate with Plex
+3. **Timers/Reminders**: Local scheduling
+4. **Weather**: Pull from local weather station
+5. **Calendar**: Sync with Nextcloud
+6. **Intercom**: Room-to-room communication
+7. **Sound events**: Doorbell, smoke alarm detection
+
+### Integration with Existing Infrastructure
+- **Plex**: Voice control for media playback
+- **qBittorrent**: Status queries, torrent management
+- **Nextcloud**: Calendar/contact queries
+- **Matrix**: Send messages via voice
+
+## Cost Estimate
+
+- Maix Duino board: ~$20-30 (already have!)
+- Microphone: ~$5-10 (if not included)
+- Speaker: ~$10-15 (or use existing)
+- **Total**: $0-55 (mostly already have)
+
+Compare to commercial solutions:
+- Google Home Mini: $50 (requires cloud)
+- Amazon Echo Dot: $50 (requires cloud)
+- Apple HomePod Mini: $99 (requires cloud)
+
+## Success Criteria
+
+### Minimum Viable Product (MVP)
+- ✓ Wake word detection < 1 second
+- ✓ Speech-to-text accuracy > 90%
+- ✓ Home Assistant command execution
+- ✓ Response time < 3 seconds total
+- ✓ All processing local (no cloud)
+
+### Enhanced Version
+- ✓ Multi-intent conversations
+- ✓ Context awareness
+- ✓ Multiple wake words
+- ✓ Room-aware responses
+- ✓ Custom voice training
+
+## Resources & Documentation
+
+### Official Documentation
+- Maix Duino: https://wiki.sipeed.com/hardware/en/maix/
+- MaixPy: https://maixpy.sipeed.com/
+- Home Assistant API: https://developers.home-assistant.io/
+
+### Wake Word Tools
+- Mycroft Precise: https://github.com/MycroftAI/mycroft-precise
+- Porcupine: https://github.com/Picovoice/porcupine
+
+### TTS Options
+- Piper: https://github.com/rhasspy/piper
+- Coqui TTS: https://github.com/coqui-ai/TTS
+
+### Community Projects
+- Rhasspy: https://rhasspy.readthedocs.io/ (full voice assistant framework)
+- Willow: https://github.com/toverainc/willow (ESP32-based alternative)
+
+## Next Steps
+
+1. **Test current setup**: Verify Maix Duino boots and can connect to WiFi
+2. **Audio test**: Record and playback test on the board
+3. **Server setup**: Create conda environment and install dependencies
+4. **Simple prototype**: Wake word → beep (no processing yet)
+5. **Iterate**: Add complexity step by step
diff --git a/hardware/maixduino/MICROPYTHON_QUIRKS.md b/hardware/maixduino/MICROPYTHON_QUIRKS.md
new file mode 100755
index 0000000..b53a819
--- /dev/null
+++ b/hardware/maixduino/MICROPYTHON_QUIRKS.md
@@ -0,0 +1,348 @@
+# MicroPython/MaixPy Quirks and Compatibility Notes
+
+**Date:** 2025-12-03
+**MicroPython Version:** v0.6.2-89-gd8901fd22 on 2024-06-17
+**Hardware:** Sipeed Maixduino (K210)
+
+This document captures all the compatibility issues and workarounds discovered while developing the voice assistant client for Maixduino.
+
+---
+
+## String Formatting
+
+### ❌ F-strings NOT supported
+```python
+# WRONG - SyntaxError
+message = f"IP: {ip}"
+temperature = f"Temp: {temp}°C"
+```
+
+### ✅ Use string concatenation
+```python
+# CORRECT
+message = "IP: " + str(ip)
+temperature = "Temp: " + str(temp) + "°C"
+```
+
+---
+
+## Conditional Expressions (Ternary Operator)
+
+### ❌ Inline ternary expressions NOT supported
+```python
+# WRONG - SyntaxError
+plural = "s" if count > 1 else ""
+message = "Found " + str(count) + " item" + ("s" if count > 1 else "")
+```
+
+### ✅ Use explicit if/else blocks
+```python
+# CORRECT
+if count > 1:
+    plural = "s"
+else:
+    plural = ""
+message = "Found " + str(count) + " item" + plural
+```
+
+---
+
+## String Methods
+
+### ❌ decode() doesn't accept keyword arguments
+```python
+# WRONG - TypeError: function doesn't take keyword arguments
+text = response.decode('utf-8', errors='ignore')
+```
+
+### ✅ Use positional arguments only (or catch exceptions)
+```python
+# CORRECT
+try:
+    text = response.decode('utf-8')
+except:
+    text = str(response)
+```
+
+---
+
+## Display/LCD Color Format
+
+### ❌ RGB tuples NOT accepted
+```python
+# WRONG - TypeError: can't convert tuple to int
+COLOR_RED = (255, 0, 0)
+lcd.draw_string(10, 50, "Hello", COLOR_RED, 0)
+```
+
+### ✅ Use bit-packed integers
+```python
+# CORRECT - Pack RGB into 16-bit or 24-bit integer
+def rgb_to_int(r, g, b):
+    return (r << 16) | (g << 8) | b
+
+COLOR_RED = rgb_to_int(255, 0, 0)
+lcd.draw_string(10, 50, "Hello", COLOR_RED, 0)
+```
+
+---
+
+## Network - WiFi Module
+
+### ❌ Standard network.WLAN NOT available
+```python
+# WRONG - AttributeError: 'module' object has no attribute 'WLAN'
+import network
+nic = network.WLAN(network.STA_IF)
+```
+
+### ✅ Use network.ESP32_SPI for Maixduino
+```python
+# CORRECT - Requires full pin configuration
+from network import ESP32_SPI
+from fpioa_manager import fm
+
+# Register all 6 SPI pins
+fm.register(25, fm.fpioa.GPIOHS10, force=True)  # CS
+fm.register(8, fm.fpioa.GPIOHS11, force=True)   # RST
+fm.register(9, fm.fpioa.GPIOHS12, force=True)   # RDY
+fm.register(28, fm.fpioa.GPIOHS13, force=True)  # MOSI
+fm.register(26, fm.fpioa.GPIOHS14, force=True)  # MISO
+fm.register(27, fm.fpioa.GPIOHS15, force=True)  # SCLK
+
+nic = ESP32_SPI(
+    cs=fm.fpioa.GPIOHS10,
+    rst=fm.fpioa.GPIOHS11,
+    rdy=fm.fpioa.GPIOHS12,
+    mosi=fm.fpioa.GPIOHS13,
+    miso=fm.fpioa.GPIOHS14,
+    sclk=fm.fpioa.GPIOHS15
+)
+
+nic.connect(SSID, PASSWORD)
+```
+
+### ❌ active() method NOT available
+```python
+# WRONG - AttributeError: 'ESP32_SPI' object has no attribute 'active'
+nic.active(True)
+```
+
+### ✅ Just use connect() directly
+```python
+# CORRECT
+nic.connect(SSID, PASSWORD)
+```
+
+---
+
+## I2S Audio
+
+### ❌ record() doesn't accept size parameter only
+```python
+# WRONG - TypeError: object with buffer protocol required
+chunk = i2s_dev.record(1024)
+```
+
+### ✅ Returns Audio object, use to_bytes()
+```python
+# CORRECT
+audio_obj = i2s_dev.record(total_bytes)
+audio_data = audio_obj.to_bytes()
+```
+
+**Note:** Audio data often comes in unexpected formats:
+- Expected: 16-bit mono PCM
+- Reality: Often 32-bit or stereo (4x expected size)
+- Solution: Implement format detection and conversion
+
+---
+
+## Memory Management
+
+### Memory is VERY limited (~6MB total, much less available)
+
+**Problems encountered:**
+- Creating large bytearrays fails (>100KB can fail)
+- Multiple allocations cause fragmentation
+- In-place operations preferred over creating new buffers
+
+### ❌ Creating new buffers
+```python
+# WRONG - MemoryError on large data
+compressed = bytearray()
+for i in range(0, len(data), 4):
+    compressed.extend(data[i:i+2])  # Allocates new memory
+```
+
+### ✅ Work with smaller chunks or compress during transmission
+```python
+# CORRECT - Process in smaller pieces
+chunk_size = 512
+for i in range(0, len(data), chunk_size):
+    chunk = data[i:i+chunk_size]
+    process_chunk(chunk)  # Handle incrementally
+```
+
+**Solutions implemented:**
+1. Reduce recording duration (3s → 1s)
+2. Compress audio (μ-law: 50% size reduction)
+3. Stream transmission in small chunks (512 bytes)
+4. Add delays between sends to prevent buffer overflow
+
+---
+
+## String Operations
+
+### ❌ Arithmetic in string concatenation
+```python
+# WRONG - SyntaxError (sometimes)
+message = "Count: #" + str(count + 1)
+```
+
+### ✅ Separate arithmetic from concatenation
+```python
+# CORRECT
+next_count = count + 1
+message = "Count: #" + str(next_count)
+```
+
+---
+
+## Bytearray Operations
+
+### ❌ Item deletion NOT supported
+```python
+# WRONG - TypeError: 'bytearray' object doesn't support item deletion
+del audio_data[expected_size:]
+```
+
+### ✅ Create new bytearray with slice
+```python
+# CORRECT
+audio_data = audio_data[:expected_size]
+# Or create new buffer
+trimmed = bytearray(expected_size)
+trimmed[:] = audio_data[:expected_size]
+```
+
+---
+
+## HTTP Requests
+
+### ❌ urequests module NOT available
+```python
+# WRONG - ImportError: no module named 'urequests'
+import urequests
+response = urequests.post(url, data=data)
+```
+
+### ✅ Use raw socket HTTP
+```python
+# CORRECT
+import socket
+
+s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+s.connect((host, port))
+
+# Manual HTTP headers
+headers = "POST /path HTTP/1.1\r\n"
+headers += "Host: " + host + "\r\n"
+headers += "Content-Type: audio/wav\r\n"
+headers += "Content-Length: " + str(len(data)) + "\r\n"
+headers += "Connection: close\r\n\r\n"
+
+s.send(headers.encode())
+s.send(data)
+
+response = s.recv(1024)
+s.close()
+```
+
+**Socket I/O errors common:**
+- `[Errno 5] EIO` - Buffer overflow or disconnect
+- Solutions:
+  - Send smaller chunks (512-1024 bytes)
+  - Add delays between sends (`time.sleep_ms(10)`)
+  - Enable keepalive if supported
+
+---
+
+## Best Practices for MaixPy
+
+1. **Avoid complex expressions** - Break into simple steps
+2. **Pre-allocate when possible** - Reduce fragmentation
+3. **Use small buffers** - 512-1024 byte chunks work well
+4. **Add delays in loops** - Prevent watchdog/buffer issues
+5. **Explicit type conversions** - Always use `str()`, `int()`, etc.
+6. **Test incrementally** - Memory errors appear suddenly
+7. **Monitor serial output** - Errors often give hints
+8. **Simplify, simplify** - Complexity = bugs in MicroPython
+
+---
+
+## Testing Methodology
+
+When porting Python code to MaixPy:
+
+1. Start with simplest version (hardcoded values)
+2. Test each function individually via REPL
+3. Add features incrementally
+4. Watch for memory errors (usually allocation failures)
+5. If error occurs, simplify the last change
+6. Use print statements liberally (no debugger available)
+
+---
+
+## Hardware-Specific Notes
+
+### Maixduino ESP32 WiFi
+- Requires manual pin registration
+- 6 pins must be configured (CS, RST, RDY, MOSI, MISO, SCLK)
+- Connection can be slow (20+ seconds)
+- Stability improves with smaller packet sizes
+
+### I2S Microphone
+- Returns Audio objects, not raw bytes
+- Format is often different than configured
+- May return stereo when mono requested
+- May return 32-bit when 16-bit requested
+- Always implement format detection/conversion
+
+### BOOT Button (GPIO 16)
+- Active low (0 = pressed, 1 = released)
+- Requires pull-up configuration
+- Debounce by waiting for release
+- Can be used without interrupts (polling is fine)
+
+---
+
+## Resources
+
+- **MaixPy Documentation:** https://maixpy.sipeed.com/
+- **K210 Datasheet:** https://canaan.io/product/kendryteai
+- **ESP32 SPI Firmware:** https://github.com/sipeed/MaixPy_scripts/tree/master/network
+
+---
+
+## Summary of Successful Patterns
+
+```python
+# Audio recording and transmission pipeline
+1. Record audio → Audio object (128KB for 1 second)
+2. Convert to bytes → to_bytes() (still 128KB)
+3. Detect format → Check size vs expected
+4. Convert to mono 16-bit → In-place copy (32KB)
+5. Compress with μ-law → 50% reduction (16KB)
+6. Send in chunks → 512 bytes at a time with delays
+7. Parse response → Simple string operations
+
+# Total: ~85% size reduction, fits in memory!
+```
+
+This approach works reliably on K210 with ~6MB RAM.
+
+---
+
+**Last Updated:** 2025-12-03
+**Status:** Fully tested and working
diff --git a/hardware/maixduino/README.md b/hardware/maixduino/README.md
new file mode 100755
index 0000000..bcf6bf5
--- /dev/null
+++ b/hardware/maixduino/README.md
@@ -0,0 +1,184 @@
+# Maixduino Scripts
+
+Scripts to copy/paste into MaixPy IDE for running on the Maix Duino board.
+
+## Files
+
+### 1. maix_test_simple.py
+**Purpose:** Hardware and connectivity test
+**Use:** Copy/paste into MaixPy IDE to test before deploying full application
+
+**Tests:**
+- LCD display functionality
+- WiFi connection
+- Network connection to Heimdall server (port 3006)
+- I2S audio hardware initialization
+
+**Before running:**
+1. Edit WiFi credentials (lines 16-17):
+   ```python
+   WIFI_SSID = "YourNetworkName"
+   WIFI_PASSWORD = "YourPassword"
+   ```
+2. Verify server URL is correct (line 18):
+   ```python
+   SERVER_URL = "http://10.1.10.71:3006"
+   ```
+3. Copy entire file contents
+4. Paste into MaixPy IDE
+5. Click RUN button
+
+**Expected output:**
+- Display will show test results
+- Serial console will print detailed progress
+- Will report OK/FAIL for each test
+
+---
+
+### 2. maix_voice_client.py
+**Purpose:** Full voice assistant client
+**Use:** Copy/paste into MaixPy IDE after test passes
+
+**Features:**
+- Wake word detection (placeholder - uses amplitude trigger)
+- Audio recording after wake word
+- Sends audio to Heimdall server for processing
+- Displays transcription and response on LCD
+- LED feedback for status
+
+**Before running:**
+1. Edit WiFi credentials (lines 38-39)
+2. Verify server URL (line 42)
+3. Adjust audio settings if needed (lines 45-62)
+
+**For SD card deployment:**
+1. Copy this file to SD card as `main.py`
+2. Board will auto-run on boot
+
+---
+
+## Deployment Workflow
+
+### Step 1: Test Hardware (maix_test_simple.py)
+```
+1. Edit WiFi settings
+2. Paste into MaixPy IDE
+3. Click RUN
+4. Verify all tests pass
+```
+
+### Step 2: Deploy Full Client (maix_voice_client.py)
+**Option A - IDE Testing:**
+```
+1. Edit WiFi settings
+2. Paste into MaixPy IDE
+3. Click RUN for testing
+```
+
+**Option B - Permanent SD Card:**
+```
+1. Edit WiFi settings
+2. Save to SD card as: /sd/main.py
+3. Reboot board - auto-runs on boot
+```
+
+---
+
+## Hardware Requirements
+
+### Maix Duino Board
+- K210 processor with KPU
+- LCD display (built-in)
+- I2S microphone (check connections)
+- ESP32 WiFi module (built-in)
+
+### I2S Pin Configuration (Default)
+```python
+Pin 20: I2S0_IN_D0  (Data)
+Pin 19: I2S0_WS     (Word Select)
+Pin 18: I2S0_SCLK   (Clock)
+```
+
+**Note:** If your microphone uses different pins, edit the pin assignments in the scripts.
+
+---
+
+## Troubleshooting
+
+### WiFi Won't Connect
+- Verify SSID and password are correct
+- Ensure WiFi is 2.4GHz (not 5GHz - Maix doesn't support 5GHz)
+- Check signal strength
+- Try moving closer to router
+
+### Server Connection Fails
+- Verify Heimdall server is running on port 3006
+- Check firewall allows port 3006
+- Ensure Maix is on same network (10.1.10.0/24)
+- Test from another device: `curl http://10.1.10.71:3006/health`
+
+### Audio Initialization Fails
+- Check microphone is properly connected
+- Verify I2S pins match your hardware
+- Try alternate pin configuration if needed
+- Check microphone requires 3.3V (not 5V)
+
+### Script Errors in MaixPy IDE
+- Ensure using latest MaixPy firmware
+- Check for typos when editing WiFi credentials
+- Verify entire script was copied (check for truncation)
+- Look at serial console for detailed error messages
+
+---
+
+## MaixPy IDE Tips
+
+### Running Scripts
+1. Connect board via USB
+2. Select correct board model: Tools → Select Board
+3. Click connect button (turns red when connected)
+4. Paste code into editor
+5. Click run button (red triangle)
+6. Watch serial console and LCD for output
+
+### Stopping Scripts
+- Click run button again to stop
+- Or press reset button on board
+
+### Serial Console
+- Shows detailed debug output
+- Useful for troubleshooting
+- Can copy errors for debugging
+
+---
+
+## Network Configuration
+
+- **Heimdall Server:** 10.1.10.71:3006
+- **Maix Duino:** Gets IP via DHCP (shown on LCD during test)
+- **Network:** 10.1.10.0/24
+
+---
+
+## Next Steps
+
+After both scripts work:
+1. Verify Heimdall server is processing audio
+2. Test wake word detection
+3. Integrate with Home Assistant (optional)
+4. Train custom wake word (optional)
+5. Deploy to SD card for permanent installation
+
+---
+
+## Related Documentation
+
+- **Project overview:** `../PROJECT_SUMMARY.md`
+- **Heimdall setup:** `../QUICKSTART.md`
+- **Wake word training:** `../MYCROFT_PRECISE_GUIDE.md`
+- **Server deployment:** `../docs/PRECISE_DEPLOYMENT.md`
+
+---
+
+**Last Updated:** 2025-12-03
+**Location:** `/Library/Development/devl/Devops/projects/mycroft-precise/maixduino-scripts/`
diff --git a/hardware/maixduino/SESSION_PROGRESS_2025-12-03.md b/hardware/maixduino/SESSION_PROGRESS_2025-12-03.md
new file mode 100755
index 0000000..e389a53
--- /dev/null
+++ b/hardware/maixduino/SESSION_PROGRESS_2025-12-03.md
@@ -0,0 +1,376 @@
+# Maixduino Voice Assistant - Session Progress
+
+**Date:** 2025-12-03
+**Session Duration:** ~4 hours
+**Goal:** Get audio recording and transcription working on Maixduino → Heimdall server
+
+---
+
+## 🎉 Major Achievements
+
+### ✅ Full Audio Pipeline Working!
+We successfully built and tested the complete audio capture → compression → transmission → transcription pipeline:
+
+1. **WiFi Connection** - Maixduino connects to network (10.1.10.98)
+2. **Audio Recording** - I2S microphone captures audio (MSM261S4030H0 MEMS mic)
+3. **Format Conversion** - Converts 32-bit stereo to 16-bit mono (4x size reduction)
+4. **μ-law Compression** - Compresses PCM audio by 50%
+5. **HTTP Transmission** - Sends compressed WAV to Heimdall server
+6. **Whisper Transcription** - Server transcribes and returns text
+7. **LCD Display** - Shows transcription on Maixduino screen
+8. **Button Loop** - Press BOOT button for repeated recordings
+
+**Total size reduction:** 128KB → 32KB (mono) → 16KB (compressed) = **87.5% reduction!**
+
+---
+
+## 🔧 Technical Accomplishments
+
+### Audio Recording Pipeline
+- **Initial Problem:** `i2s_dev.record()` returned immediately (1ms instead of 1000ms)
+- **Root Cause:** Recording API is asynchronous/non-blocking
+- **Solution:** Use chunked recording with `wait_record()` blocking calls
+- **Pattern:**
+  ```python
+  for i in range(frame_cnt):
+      audio_chunk = i2s_dev.record(chunk_size)
+      i2s_dev.wait_record()  # CRITICAL: blocks until complete
+      chunks.append(audio_chunk.to_bytes())
+  ```
+
+### Memory Management
+- **K210 has very limited RAM** (~6MB total, much less available)
+- Successfully handled 128KB → 16KB data transformation without OOM errors
+- Techniques used:
+  - Record in small chunks (2048 samples)
+  - Stream HTTP transmission (512-byte chunks with delays)
+  - In-place data conversion where possible
+  - Explicit garbage collection hints (`audio_data = None`)
+
+### Network Communication
+- **Raw socket HTTP** (no urequests library available)
+- **Chunked streaming** with flow control (10ms delays)
+- **Simple WAV format** with μ-law compression (format code 7)
+- **Robust error handling** with serial output debugging
+
+---
+
+## 🐛 MicroPython/MaixPy Quirks Discovered
+
+### String Operations
+- ❌ **F-strings NOT supported** - Must use `"text " + str(var)` concatenation
+- ❌ **Ternary operators fail** - Use explicit `if/else` blocks instead
+- ❌ **`split()` needs explicit delimiter** - `text.split(" ")` not `text.split()`
+- ❌ **Escape sequences problematic** - Avoid `\n` in strings, causes syntax errors
+
+### Data Types & Methods
+- ❌ **`decode()` doesn't accept kwargs** - Use `decode('utf-8')` not `decode('utf-8', errors='ignore')`
+- ❌ **RGB tuples not accepted** - Must convert to packed integers: `(r << 16) | (g << 8) | b`
+- ❌ **Bytearray item deletion unsupported** - `del arr[n:]` fails, use slicing instead
+- ❌ **Arithmetic in string concat** - Separate calculations: `next = count + 1; "text" + str(next)`
+
+### I2S Audio Specific
+- ❌ **`record()` is non-blocking** - Returns immediately, must use `wait_record()`
+- ❌ **Audio object not directly iterable** - Must call `.to_bytes()` first
+- ⚠️ **Data format mismatch** - Hardware returns 32-bit stereo even when configured for 16-bit mono (4x expected size)
+
+### Network/WiFi
+- ❌ **`network.WLAN` not available** - Must use `network.ESP32_SPI` with full pin config
+- ❌ **`active()` method doesn't exist** - Just call `connect()` directly
+- ⚠️ **Requires ALL 6 pins configured** - CS, RST, RDY, MOSI, MISO, SCLK
+
+### General Syntax
+- ⚠️ **`if __name__ == "__main__"` sometimes causes syntax errors** - Safer to just call `main()` directly
+- ⚠️ **Import statements mid-function can cause syntax errors** - Keep imports at top of file
+- ⚠️ **Some valid Python causes "invalid syntax" for unknown reasons** - Simplify complex expressions
+
+---
+
+## 📊 Current Status
+
+### ✅ Working
+- WiFi connectivity (ESP32 SPI)
+- I2S audio initialization
+- Chunked audio recording with `wait_record()`
+- Audio format detection and conversion (32-bit stereo → 16-bit mono)
+- μ-law compression (50% size reduction)
+- HTTP transmission to server (chunked streaming)
+- Whisper transcription (server-side)
+- JSON response parsing
+- LCD display (with word wrapping)
+- Button-triggered recording loop
+- Countdown timer before recording
+
+### ⚠️ Partially Working
+- **Recording duration** - Currently getting ~0.9 seconds instead of full 1 second
+  - Formula: `frame_cnt = seconds * sample_rate // chunk_size`
+  - Current: `7 frames × (2048/16000) = 0.896s`
+  - May need to increase `frame_cnt` or adjust chunk size
+
+### ❌ Not Yet Implemented
+- Mycroft Precise wake word detection
+- Full voice assistant loop
+- Command processing
+- Home Assistant integration
+- Multi-second recording support
+- Real-time audio streaming
+
+---
+
+## 🔬 Technical Details
+
+### Hardware Configuration
+
+**Maixduino Board:**
+- Processor: K210 dual-core RISC-V @ 400MHz
+- RAM: ~6MB total (limited available memory)
+- WiFi: ESP32 module via SPI
+- Microphone: MSM261S4030H0 MEMS (onboard)
+- IP Address: 10.1.10.98
+
+**I2S Pins:**
+- Pin 20: I2S0_IN_D0 (data)
+- Pin 19: I2S0_WS (word select)
+- Pin 18: I2S0_SCLK (clock)
+
+**ESP32 SPI Pins:**
+- Pin 25: CS (chip select)
+- Pin 8: RST (reset)
+- Pin 9: RDY (ready)
+- Pin 28: MOSI (master out)
+- Pin 26: MISO (master in)
+- Pin 27: SCLK (clock)
+
+**GPIO:**
+- Pin 16: BOOT button (active low, pull-up)
+
+### Server Configuration
+
+**Heimdall Server:**
+- IP: 10.1.10.71
+- Port: 3006
+- Framework: Flask
+- Model: Whisper base
+- Environment: Conda `whisper_cli`
+
+**Endpoints:**
+- `/health` - Health check
+- `/transcribe` - POST audio for transcription
+
+### Audio Format
+
+**Recording:**
+- Sample Rate: 16kHz
+- Hardware Output: 32-bit stereo (128KB for 1 second)
+- After Conversion: 16-bit mono (32KB for 1 second)
+- After Compression: 8-bit μ-law (16KB for 1 second)
+
+**WAV Header:**
+- Format Code: 7 (μ-law)
+- Channels: 1 (mono)
+- Sample Rate: 16000 Hz
+- Bits per Sample: 8
+- Includes `fact` chunk (required for μ-law)
+
+---
+
+## 📝 Code Files
+
+### Main Script
+**File:** `/Library/Development/devl/Devops/projects/mycroft-precise/maixduino-scripts/maix_simple_record_test.py`
+
+**Key Functions:**
+- `init_wifi()` - ESP32 SPI WiFi connection
+- `init_audio()` - I2S microphone setup
+- `record_audio()` - Chunked recording with `wait_record()`
+- `convert_to_mono_16bit()` - Format conversion (32-bit stereo → 16-bit mono)
+- `compress_ulaw()` - μ-law compression
+- `create_wav_header()` - WAV file header generation
+- `send_to_server()` - HTTP POST with chunked streaming
+- `display_transcription()` - LCD output with word wrapping
+- `main()` - Button loop for repeated recordings
+
+### Server Script
+**File:** `/devl/voice-assistant/simple_transcribe_server.py`
+
+**Features:**
+- Accepts raw WAV or multipart uploads
+- Whisper base model transcription
+- JSON response with transcription text
+- Handles μ-law compressed audio
+
+### Documentation
+**File:** `/Library/Development/devl/Devops/projects/mycroft-precise/maixduino-scripts/MICROPYTHON_QUIRKS.md`
+
+Complete reference of all MicroPython compatibility issues discovered during development.
+
+---
+
+## 🎯 Next Steps
+
+### Immediate (Tonight)
+1. ✅ Switch to Linux laptop with direct serial access
+2. ⏭️ Tune recording duration to get full 1 second
+   - Try `frame_cnt = 8` instead of 7
+   - Or adjust chunk size to get exact timing
+3. ⏭️ Test transcription quality with proper-length recordings
+
+### Short Term (This Week)
+1. Increase recording duration to 2-3 seconds for better transcription
+2. Test memory limits with longer recordings
+3. Optimize compression/transmission for speed
+4. Add visual feedback during transmission
+
+### Medium Term (Next Week)
+1. Install Mycroft Precise in `whisper_cli` environment
+2. Test "hey mycroft" wake word detection on server
+3. Integrate wake word into recording loop
+4. Add command processing and Home Assistant integration
+
+### Long Term (Future)
+1. Explore edge wake word detection (Precise on K210)
+2. Multi-device deployment
+3. Continuous listening mode
+4. Voice profiles and speaker identification
+
+---
+
+## 🐛 Known Issues
+
+### Recording Duration
+- **Issue:** Recording is ~0.9 seconds instead of 1.0 seconds
+- **Cause:** Integer division `16000 // 2048 = 7.8` rounds down to 7 frames
+- **Impact:** Minor - transcription still works
+- **Fix:** Increase `frame_cnt` to 8 or adjust chunk size
+
+### Data Format Mismatch
+- **Issue:** Hardware returns 4x expected data (128KB vs 32KB)
+- **Cause:** I2S outputting 32-bit stereo despite 16-bit mono config
+- **Impact:** None - conversion function handles it
+- **Status:** Working as intended
+
+### Syntax Error Sensitivity
+- **Issue:** Some valid Python causes "invalid syntax" in MicroPython
+- **Patterns:** Import statements mid-function, certain arithmetic expressions
+- **Workaround:** Simplify code, avoid complex expressions
+- **Status:** Documented in MICROPYTHON_QUIRKS.md
+
+---
+
+## 💡 Key Learnings
+
+### I2S Recording Pattern
+The correct pattern for MaixPy I2S recording:
+```python
+chunk_size = 2048
+frame_cnt = seconds * sample_rate // chunk_size
+
+for i in range(frame_cnt):
+    audio_chunk = i2s_dev.record(chunk_size)
+    i2s_dev.wait_record()  # BLOCKS until recording complete
+    data.append(audio_chunk.to_bytes())
+```
+
+**Critical:** `wait_record()` is REQUIRED or recording returns immediately!
+
+### Memory Management
+K210 has very limited RAM. Successful strategies:
+- Work in small chunks (512-2048 bytes)
+- Stream data instead of buffering
+- Free variables explicitly when done
+- Avoid creating large intermediate buffers
+
+### MicroPython Compatibility
+MicroPython is NOT Python. Many standard features missing:
+- F-strings, ternary operators, keyword arguments
+- Some string methods, complex expressions
+- Standard libraries (urequests, json parsing)
+
+**Rule:** Test incrementally, simplify everything, check quirks doc.
+
+---
+
+## 📚 Resources Used
+
+### Documentation
+- [MaixPy I2S API Reference](https://wiki.sipeed.com/soft/maixpy/en/api_reference/Maix/i2s.html)
+- [MaixPy I2S Usage Guide](https://wiki.sipeed.com/soft/maixpy/en/modules/on_chip/i2s.html)
+- [Maixduino Hardware Wiki](https://wiki.sipeed.com/hardware/en/maix/maixpy_develop_kit_board/maix_duino.html)
+
+### Code Examples
+- [Official record_wav.py](https://github.com/sipeed/MaixPy-v1_scripts/blob/master/multimedia/audio/record_wav.py)
+- [MaixPy Scripts Repository](https://github.com/sipeed/MaixPy-v1_scripts)
+
+### Tools
+- MaixPy IDE (copy/paste to board)
+- Serial monitor (debugging)
+- Heimdall server (Whisper transcription)
+
+---
+
+## 🔄 Ready for Next Session
+
+### Current State
+- ✅ Code is working and stable
+- ✅ Can record, compress, transmit, transcribe, display
+- ✅ Button loop allows repeated testing
+- ⚠️ Recording duration slightly short (~0.9s)
+
+### Files Ready
+- `/Library/Development/devl/Devops/projects/mycroft-precise/maixduino-scripts/maix_simple_record_test.py`
+- `/Library/Development/devl/Devops/projects/mycroft-precise/maixduino-scripts/MICROPYTHON_QUIRKS.md`
+- `/devl/voice-assistant/simple_transcribe_server.py`
+
+### For Serial Access Session
+1. Connect Maixduino via USB to Linux laptop
+2. Install pyserial: `pip install pyserial`
+3. Find device: `ls /dev/ttyUSB*` or `/dev/ttyACM*`
+4. Connect: `screen /dev/ttyUSB0 115200` or use MaixPy IDE
+5. Can directly modify code, test immediately, see serial output
+
+### Quick Test Commands
+```python
+# Test WiFi
+from network import ESP32_SPI
+# ... (full init code in maix_test_simple.py)
+
+# Test I2S
+from Maix import I2S
+rx = I2S(I2S.DEVICE_0)
+# ...
+
+# Test recording
+audio = rx.record(2048)
+rx.wait_record()
+print(len(audio.to_bytes()))
+```
+
+---
+
+## 🎊 Success Metrics
+
+Today we achieved:
+- ✅ WiFi connection working
+- ✅ Audio recording working (with proper blocking)
+- ✅ Format conversion working (4x reduction)
+- ✅ Compression working (2x reduction)
+- ✅ Network transmission working (chunked streaming)
+- ✅ Server transcription working
+- ✅ Display output working
+- ✅ Button loop working
+- ✅ End-to-end pipeline complete!
+
+**Total:** 9/9 core features working! 🚀
+
+Minor tuning needed, but the foundation is solid and ready for wake word integration.
+
+---
+
+**Session Summary:** Massive progress! From zero to working audio transcription pipeline in one session. Overcame significant MicroPython compatibility challenges and memory limitations. Ready for next phase: wake word detection.
+
+**Status:** ✅ Ready for Linux serial access and fine-tuning
+**Next Session:** Tune recording duration, then integrate Mycroft Precise wake word detection
+
+---
+
+*End of Session Report - 2025-12-03*
diff --git a/hardware/maixduino/maix_debug_wifi.py b/hardware/maixduino/maix_debug_wifi.py
new file mode 100755
index 0000000..fcab3ba
--- /dev/null
+++ b/hardware/maixduino/maix_debug_wifi.py
@@ -0,0 +1,41 @@
+# Debug script to discover WiFi module methods
+# This will help us figure out the correct API
+
+import lcd
+
+lcd.init()
+lcd.clear()
+
+print("=" * 40)
+print("WiFi Module Debug")
+print("=" * 40)
+
+# Try to import WiFi module
+try:
+    from network_esp32 import wifi
+    print("SUCCESS: Imported network_esp32.wifi")
+    lcd.draw_string(10, 10, "WiFi module found!", 0xFFFF, 0x0000)
+
+    # List all attributes/methods
+    print("\nAvailable methods:")
+    lcd.draw_string(10, 30, "Checking methods...", 0xFFFF, 0x0000)
+
+    attrs = dir(wifi)
+    y = 50
+    for i, attr in enumerate(attrs):
+        if not attr.startswith('_'):
+            print("  - " + attr)
+            if i < 10:  # Only show first 10 on screen
+                lcd.draw_string(10, y, attr[:20], 0x07E0, 0x0000)
+                y += 15
+
+    print("\nTotal methods: " + str(len(attrs)))
+
+except Exception as e:
+    print("ERROR importing wifi: " + str(e))
+    lcd.draw_string(10, 10, "WiFi import failed!", 0xF800, 0x0000)
+    lcd.draw_string(10, 30, str(e)[:30], 0xF800, 0x0000)
+
+print("\n" + "=" * 40)
+print("Debug complete - check serial output")
+print("=" * 40)
diff --git a/hardware/maixduino/maix_discover_modules.py b/hardware/maixduino/maix_discover_modules.py
new file mode 100755
index 0000000..476c263
--- /dev/null
+++ b/hardware/maixduino/maix_discover_modules.py
@@ -0,0 +1,51 @@
+# Discover what network/WiFi modules are actually available
+import lcd
+import sys
+
+lcd.init()
+lcd.clear()
+
+print("=" * 40)
+print("Module Discovery")
+print("=" * 40)
+
+# Try different possible module names
+modules_to_try = [
+    "network",
+    "network_esp32",
+    "network_esp8285",
+    "esp32_spi",
+    "esp8285",
+    "wifi",
+    "ESP32_SPI",
+    "WIFI"
+]
+
+found = []
+y = 10
+
+for module_name in modules_to_try:
+    try:
+        mod = __import__(module_name)
+        msg = "FOUND: " + module_name
+        print(msg)
+        lcd.draw_string(10, y, msg[:25], 0x07E0, 0x0000)  # Green
+        y += 15
+        found.append(module_name)
+
+        # Show methods
+        print("  Methods: " + str(dir(mod)))
+
+    except Exception as e:
+        msg = "NONE: " + module_name
+        print(msg + " (" + str(e) + ")")
+
+print("\n" + "=" * 40)
+if found:
+    print("Found modules: " + str(found))
+    lcd.draw_string(10, y + 20, "Found: " + str(len(found)), 0xFFFF, 0x0000)
+else:
+    print("No WiFi modules found!")
+    lcd.draw_string(10, y + 20, "No WiFi found!", 0xF800, 0x0000)
+
+print("=" * 40)
diff --git a/hardware/maixduino/maix_simple_record_test.py b/hardware/maixduino/maix_simple_record_test.py
new file mode 100644
index 0000000..0d9db28
--- /dev/null
+++ b/hardware/maixduino/maix_simple_record_test.py
@@ -0,0 +1,461 @@
+# Simple Audio Recording and Transcription Test
+# Record audio for 3 seconds, send to server, display transcription
+#
+# This tests the full audio pipeline without wake word detection
+
+import time
+import lcd
+import socket
+import struct
+from Maix import GPIO, I2S
+from fpioa_manager import fm
+
+# ===== CONFIGURATION =====
+# Load credentials from secrets.py (gitignored)
+try:
+    from secrets import SECRETS
+except ImportError:
+    SECRETS = {}
+
+WIFI_SSID = "Tell My WiFi Love Her"
+WIFI_PASSWORD = SECRETS.get("wifi_password", "")  # set in secrets.py
+SERVER_HOST = "10.1.10.71"
+SERVER_PORT = 3006
+RECORD_SECONDS = 1  # Reduced to 1 second to save memory
+SAMPLE_RATE = 16000
+# ==========================
+
+# Colors
+def rgb_to_int(r, g, b):
+    return (r << 16) | (g << 8) | b
+
+COLOR_BLACK = 0
+COLOR_WHITE = rgb_to_int(255, 255, 255)
+COLOR_RED = rgb_to_int(255, 0, 0)
+COLOR_GREEN = rgb_to_int(0, 255, 0)
+COLOR_BLUE = rgb_to_int(0, 0, 255)
+COLOR_YELLOW = rgb_to_int(255, 255, 0)
+COLOR_CYAN = 0x00FFFF  # Cyan: rgb_to_int(0, 255, 255)
+
+def display_msg(msg, color=COLOR_WHITE, y=50, clear=False):
+    """Display message on LCD"""
+    if clear:
+        lcd.clear(COLOR_BLACK)
+    lcd.draw_string(10, y, msg[:30], color, COLOR_BLACK)
+    print(msg)
+
+def init_wifi():
+    """Initialize WiFi connection"""
+    from network import ESP32_SPI
+
+    lcd.init()
+    lcd.clear(COLOR_BLACK)
+    display_msg("Connecting WiFi...", COLOR_BLUE, 10)
+
+    # Register ESP32 SPI pins
+    fm.register(25, fm.fpioa.GPIOHS10, force=True)  # CS
+    fm.register(8, fm.fpioa.GPIOHS11, force=True)   # RST
+    fm.register(9, fm.fpioa.GPIOHS12, force=True)   # RDY
+    fm.register(28, fm.fpioa.GPIOHS13, force=True)  # MOSI
+    fm.register(26, fm.fpioa.GPIOHS14, force=True)  # MISO
+    fm.register(27, fm.fpioa.GPIOHS15, force=True)  # SCLK
+
+    nic = ESP32_SPI(
+        cs=fm.fpioa.GPIOHS10, rst=fm.fpioa.GPIOHS11, rdy=fm.fpioa.GPIOHS12,
+        mosi=fm.fpioa.GPIOHS13, miso=fm.fpioa.GPIOHS14, sclk=fm.fpioa.GPIOHS15
+    )
+
+    nic.connect(WIFI_SSID, WIFI_PASSWORD)
+
+    # Wait for connection
+    timeout = 20
+    while timeout > 0:
+        time.sleep(1)
+        if nic.isconnected():
+            ip = nic.ifconfig()[0]
+            display_msg("WiFi OK: " + str(ip), COLOR_GREEN, 30)
+            return nic
+        timeout -= 1
+
+    display_msg("WiFi FAILED!", COLOR_RED, 30)
+    return None
+
+def init_audio():
+    """Initialize I2S audio"""
+    display_msg("Init audio...", COLOR_BLUE, 50)
+
+    # Register I2S pins
+    fm.register(20, fm.fpioa.I2S0_IN_D0, force=True)
+    fm.register(19, fm.fpioa.I2S0_WS, force=True)
+    fm.register(18, fm.fpioa.I2S0_SCLK, force=True)
+
+    # Initialize I2S
+    rx = I2S(I2S.DEVICE_0)
+    rx.channel_config(rx.CHANNEL_0, rx.RECEIVER, align_mode=I2S.STANDARD_MODE)
+    rx.set_sample_rate(SAMPLE_RATE)
+
+    display_msg("Audio OK!", COLOR_GREEN, 70)
+    return rx
+
+def convert_to_mono_16bit(audio_data):
+    """Convert audio to mono 16-bit by returning a slice"""
+    expected_size = SAMPLE_RATE * RECORD_SECONDS * 2  # 16-bit mono
+    actual_size = len(audio_data)
+
+    print("Expected size: " + str(expected_size) + ", Actual: " + str(actual_size))
+
+    # If we got 4x the expected data, downsample to mono
+    if actual_size == expected_size * 4:
+        print("Extracting mono from stereo/32-bit...")
+        # Create new buffer with only the data we need (every 4th pair of bytes)
+        mono_data = bytearray(expected_size)
+        write_pos = 0
+        # Read every 4 bytes, take first 2 bytes only
+        for read_pos in range(0, actual_size, 4):
+            if write_pos + 1 < expected_size and read_pos + 1 < actual_size:
+                mono_data[write_pos] = audio_data[read_pos]
+                mono_data[write_pos + 1] = audio_data[read_pos + 1]
+                write_pos += 2
+
+        # Free original buffer explicitly
+        audio_data = None
+        return mono_data
+
+    # If we got 2x the expected data, extract mono
+    elif actual_size == expected_size * 2:
+        print("Extracting mono from stereo...")
+        mono_data = bytearray(expected_size)
+        write_pos = 0
+        for read_pos in range(0, actual_size, 4):
+            if write_pos + 1 < expected_size and read_pos + 1 < actual_size:
+                mono_data[write_pos] = audio_data[read_pos]
+                mono_data[write_pos + 1] = audio_data[read_pos + 1]
+                write_pos += 2
+
+        # Free original
+        audio_data = None
+        return mono_data
+
+    # Otherwise assume it's already correct format
+    print("Audio data appears to be correct format")
+    return audio_data
+
+def record_audio(i2s_dev, seconds):
+    """Record audio for specified seconds using chunked recording with wait"""
+    # Clear screen and show big recording indicator
+    lcd.clear(COLOR_BLACK)
+
+    # Show large "RECORDING" text
+    display_msg("*** RECORDING ***", COLOR_RED, 60)
+    display_msg("Speak now!", COLOR_YELLOW, 100)
+    display_msg("(listening...)", COLOR_WHITE, 130)
+
+    chunk_size = 2048
+    channels = 1
+
+    # Calculate number of chunks needed
+    frame_cnt = seconds * SAMPLE_RATE // chunk_size
+    print("Recording " + str(frame_cnt) + " frames...")
+
+    # Recording loop with wait
+    all_chunks = []
+    for i in range(frame_cnt):
+        # Start recording this chunk
+        audio_chunk = i2s_dev.record(chunk_size * channels)
+
+        # CRITICAL: Wait for recording to complete
+        i2s_dev.wait_record()
+
+        # Convert to bytes and store
+        chunk_bytes = audio_chunk.to_bytes()
+        all_chunks.append(chunk_bytes)
+
+    # Combine all chunks
+    print("Combining " + str(len(all_chunks)) + " chunks...")
+    audio_data = bytearray()
+    for chunk in all_chunks:
+        audio_data.extend(chunk)
+
+    print("Recorded " + str(len(audio_data)) + " bytes")
+
+    # Convert to mono 16-bit if needed
+    audio_data = convert_to_mono_16bit(audio_data)
+    print("Final size: " + str(len(audio_data)) + " bytes")
+
+    return audio_data
+
+def compress_ulaw(data):
+    """Compress 16-bit PCM to 8-bit μ-law (50% size reduction)"""
+    # μ-law compression lookup table (simplified)
+    BIAS = 0x84
+    CLIP = 32635
+
+    compressed = bytearray()
+
+    # Process 16-bit samples (2 bytes each)
+    for i in range(0, len(data), 2):
+        # Get 16-bit sample (little endian)
+        sample = struct.unpack('<h', data[i:i+2])[0]
+
+        # Get sign and magnitude
+        sign = 0x80 if sample < 0 else 0x00
+        if sample < 0:
+            sample = -sample
+        if sample > CLIP:
+            sample = CLIP
+
+        # Add bias
+        sample = sample + BIAS
+
+        # Find exponent (position of highest bit)
+        exponent = 7
+        for exp in range(7, -1, -1):
+            if sample & (1 << (exp + 7)):
+                exponent = exp
+                break
+
+        # Get mantissa (top 4 bits after exponent)
+        mantissa = (sample >> (exponent + 3)) & 0x0F
+
+        # Combine: sign (1 bit) + exponent (3 bits) + mantissa (4 bits)
+        ulaw_byte = sign | (exponent << 4) | mantissa
+
+        # Invert bits (μ-law standard)
+        compressed.append(ulaw_byte ^ 0xFF)
+
+    return compressed
+
+def create_wav_header(data_size, sample_rate=16000, is_ulaw=False):
+    """Create WAV file header"""
+    header = bytearray()
+
+    # RIFF header
+    header.extend(b'RIFF')
+    header.extend(struct.pack('<I', 50 + data_size))  # Larger header for μ-law
+    header.extend(b'WAVE')
+
+    # fmt chunk
+    header.extend(b'fmt ')
+    header.extend(struct.pack('<I', 18))  # Chunk size (with extension)
+    header.extend(struct.pack('<H', 7 if is_ulaw else 1))   # 7=μ-law, 1=PCM
+    header.extend(struct.pack('<H', 1))   # Mono
+    header.extend(struct.pack('<I', sample_rate))
+    header.extend(struct.pack('<I', sample_rate * (1 if is_ulaw else 2)))  # Byte rate
+    header.extend(struct.pack('<H', 1 if is_ulaw else 2))   # Block align
+    header.extend(struct.pack('<H', 8 if is_ulaw else 16))  # Bits per sample
+    header.extend(struct.pack('<H', 0))   # Extension size
+
+    # fact chunk (required for μ-law)
+    if is_ulaw:
+        header.extend(b'fact')
+        header.extend(struct.pack('<I', 4))
+        header.extend(struct.pack('<I', data_size))  # Sample count
+
+    # data chunk
+    header.extend(b'data')
+    header.extend(struct.pack('<I', data_size))
+
+    return header
+
+def send_to_server(audio_data):
+    """Send audio to server and get transcription"""
+    lcd.clear(COLOR_BLACK)
+    display_msg("Processing...", COLOR_BLUE, 60)
+    display_msg("Compressing audio", COLOR_WHITE, 100)
+    print("Sending to server...")
+
+    try:
+        # Compress audio using μ-law (50% size reduction)
+        print("Compressing audio...")
+        compressed_data = compress_ulaw(audio_data)
+        print("Compressed: " + str(len(audio_data)) + " -> " + str(len(compressed_data)) + " bytes")
+
+        # Update display
+        display_msg("Sending to server", COLOR_WHITE, 130)
+
+        # Create WAV file with μ-law format
+        wav_header = create_wav_header(len(compressed_data), is_ulaw=True)
+        wav_size = len(wav_header) + len(compressed_data)
+
+        # Simple HTTP POST with raw WAV data
+        headers = "POST /transcribe HTTP/1.1\r\n"
+        headers += "Host: " + SERVER_HOST + "\r\n"
+        headers += "Content-Type: audio/wav\r\n"
+        headers += "Content-Length: " + str(wav_size) + "\r\n"
+        headers += "Connection: close\r\n\r\n"
+
+        # Connect with better socket settings
+        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        s.settimeout(30)
+
+        # Try to set socket options for better stability
+        try:
+            s.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)
+        except:
+            pass  # Some MicroPython builds don't support this
+
+        print("Connecting to " + SERVER_HOST + ":" + str(SERVER_PORT))
+        s.connect((SERVER_HOST, SERVER_PORT))
+
+        # Send headers
+        print("Sending headers...")
+        sent = s.send(headers.encode())
+        print("Sent " + str(sent) + " bytes of headers")
+
+        # Send WAV header
+        print("Sending WAV header...")
+        sent = s.send(wav_header)
+        print("Sent " + str(sent) + " bytes of WAV header")
+
+        # Send audio data in small chunks with delay
+        print("Sending audio data (" + str(len(compressed_data)) + " bytes)...")
+        chunk_size = 512  # Even smaller chunks for stability
+        total_chunks = (len(compressed_data) + chunk_size - 1) // chunk_size
+
+        bytes_sent = 0
+        for i in range(0, len(compressed_data), chunk_size):
+            chunk = compressed_data[i:i+chunk_size]
+            try:
+                sent = s.send(chunk)
+                bytes_sent += sent
+                chunk_num = i // chunk_size + 1
+                if chunk_num % 10 == 0:  # Progress update every 10 chunks
+                    print("Sent " + str(bytes_sent) + "/" + str(len(compressed_data)) + " bytes")
+                # Small delay to let socket buffer drain
+                time.sleep_ms(10)
+            except Exception as e:
+                print("Send error at byte " + str(bytes_sent) + ": " + str(e))
+                raise
+
+        print("All data sent! Total: " + str(bytes_sent) + " bytes")
+
+        # Update display for waiting
+        lcd.clear(COLOR_BLACK)
+        display_msg("Transcribing...", COLOR_CYAN, 60)
+        display_msg("Please wait", COLOR_WHITE, 100)
+
+        # Read response
+        response = b""
+        while True:
+            chunk = s.recv(1024)
+            if not chunk:
+                break
+            response += chunk
+
+        s.close()
+
+        # Parse response (MicroPython decode doesn't accept keyword args)
+        try:
+            response_str = response.decode('utf-8')
+        except:
+            response_str = str(response)
+        print("Response: " + response_str[:200])
+
+        # Extract JSON from response
+        if '{"' in response_str:
+            json_start = response_str.index('{"')
+            json_str = response_str[json_start:]
+
+            # Simple JSON parsing (MicroPython doesn't have json module)
+            if '"text":' in json_str:
+                text_start = json_str.index('"text":') + 7
+                text_str = json_str[text_start:]
+                # Find the value between quotes
+                if '"' in text_str:
+                    quote_start = text_str.index('"') + 1
+                    quote_end = text_str.index('"', quote_start)
+                    transcription = text_str[quote_start:quote_end]
+                    return transcription
+
+        return "Error parsing response"
+
+    except Exception as e:
+        print("Error: " + str(e))
+        return "Error: " + str(e)
+
+def display_transcription(text):
+    """Display transcription on LCD"""
+    lcd.clear(COLOR_BLACK)
+    display_msg("TRANSCRIPTION:", COLOR_GREEN, 10)
+
+    # Simple line splitting every 20 chars
+    y = 40
+    while len(text) > 0:
+        chunk = text[:20]
+        display_msg(chunk, COLOR_WHITE, y)
+        text = text[20:]
+        y += 20
+        if y > 200:
+            break
+
+    print("Transcription: " + text)
+
+def main():
+    """Main program with loop for multiple recordings"""
+    print("=" * 40)
+    print("Simple Audio Recording Test")
+    print("=" * 40)
+
+    # Initialize
+    nic = init_wifi()
+    if not nic:
+        return
+
+    i2s = init_audio()
+
+    # Setup button (boot button on GPIO 16)
+    fm.register(16, fm.fpioa.GPIOHS0, force=True)
+    button = GPIO(GPIO.GPIOHS0, GPIO.IN, GPIO.PULL_UP)
+
+    display_msg("Ready!", COLOR_GREEN, 110, clear=True)
+    display_msg("Press BOOT button", COLOR_WHITE, 130)
+    display_msg("to record", COLOR_WHITE, 150)
+    print("Press BOOT button to record, or Ctrl+C to exit")
+
+    recording_count = 0
+
+    # Main loop
+    while True:
+        # Wait for button press (button is active low)
+        if button.value() == 0:
+            recording_count += 1
+            print("\n--- Recording #" + str(recording_count) + " ---")
+
+            # Debounce - wait for button release
+            while button.value() == 0:
+                time.sleep_ms(10)
+
+            # Give user time to prepare (countdown)
+            lcd.clear(COLOR_BLACK)
+            display_msg("GET READY!", COLOR_YELLOW, 80)
+            display_msg("3...", COLOR_WHITE, 120)
+            time.sleep(1)
+            display_msg("2...", COLOR_WHITE, 140)
+            time.sleep(1)
+            display_msg("1...", COLOR_WHITE, 160)
+            time.sleep(1)
+
+            # Record
+            audio_data = record_audio(i2s, RECORD_SECONDS)
+
+            # Send to server
+            transcription = send_to_server(audio_data)
+
+            # Display result
+            display_transcription(transcription)
+
+            # Wait a bit before showing ready again
+            time.sleep(2)
+
+            # Show ready for next recording
+            display_msg("Ready!", COLOR_GREEN, 110, clear=True)
+            display_msg("Press BOOT button", COLOR_WHITE, 130)
+            next_count = recording_count + 1
+            display_msg("to record (#" + str(next_count) + ")", COLOR_WHITE, 150)
+            print("Ready for next recording. Press BOOT button.")
+
+        time.sleep_ms(50)  # Small delay to reduce CPU usage
+
+# Run main
+main()
+
diff --git a/hardware/maixduino/maix_test_simple.py b/hardware/maixduino/maix_test_simple.py
new file mode 100644
index 0000000..ae89ee0
--- /dev/null
+++ b/hardware/maixduino/maix_test_simple.py
@@ -0,0 +1,252 @@
+# Maix Duino - Simple Test Script
+# Copy/paste this into MaixPy IDE and click RUN
+#
+# This script tests:
+# 1. LCD display
+# 2. WiFi connectivity
+# 3. Network connection to Heimdall server
+# 4. I2S audio initialization (without recording yet)
+
+import time
+import lcd
+from Maix import GPIO, I2S
+from fpioa_manager import fm
+
+# Import the correct network module
+try:
+    import network
+    # Create ESP32_SPI instance (for Maix Duino with ESP32)
+    nic = None  # Will be initialized in test_wifi
+except Exception as e:
+    print("Network module import error: " + str(e))
+    nic = None
+
+# ===== CONFIGURATION - EDIT THESE =====
+# Load credentials from secrets.py (gitignored)
+try:
+    from secrets import SECRETS
+except ImportError:
+    SECRETS = {}
+
+WIFI_SSID = "Tell My WiFi Love Her"        # <<< CHANGE THIS
+WIFI_PASSWORD = SECRETS.get("wifi_password", "")  # set in secrets.py        # <<< CHANGE THIS
+SERVER_URL = "http://10.1.10.71:3006"  # Heimdall voice server
+# =======================================
+
+# Colors (as tuples for easy reference)
+COLOR_BLACK = (0, 0, 0)
+COLOR_WHITE = (255, 255, 255)
+COLOR_RED = (255, 0, 0)
+COLOR_GREEN = (0, 255, 0)
+COLOR_BLUE = (0, 0, 255)
+COLOR_YELLOW = (255, 255, 0)
+
+def display_msg(msg, color=COLOR_WHITE, y=50):
+    """Display message on LCD"""
+    # lcd.draw_string needs RGB as separate ints: lcd.draw_string(x, y, text, color_int, bg_color_int)
+    # Convert RGB tuple to single integer: (R << 16) | (G << 8) | B
+    color_int = (color[0] << 16) | (color[1] << 8) | color[2]
+    bg_int = 0  # Black background
+    lcd.draw_string(10, y, msg, color_int, bg_int)
+    print(msg)
+
+def test_lcd():
+    """Test LCD display"""
+    lcd.init()
+    lcd.clear(COLOR_BLACK)
+    display_msg("MaixDuino Test", COLOR_YELLOW, 10)
+    display_msg("Initializing...", COLOR_WHITE, 30)
+    time.sleep(1)
+    return True
+
+def test_wifi():
+    """Test WiFi connection"""
+    global nic
+    display_msg("Connecting WiFi...", COLOR_BLUE, 50)
+
+    try:
+        # Initialize ESP32_SPI network interface
+        print("Initializing ESP32_SPI...")
+
+        # Create network interface instance with Maix Duino pins
+        # Maix Duino ESP32 default pins:
+        # CS=25, RST=8, RDY=9, MOSI=28, MISO=26, SCLK=27
+        from network import ESP32_SPI
+        from fpioa_manager import fm
+        from Maix import GPIO
+
+        # Register pins for ESP32 SPI communication
+        fm.register(25, fm.fpioa.GPIOHS10, force=True)  # CS
+        fm.register(8, fm.fpioa.GPIOHS11, force=True)   # RST
+        fm.register(9, fm.fpioa.GPIOHS12, force=True)   # RDY
+        fm.register(28, fm.fpioa.GPIOHS13, force=True)  # MOSI
+        fm.register(26, fm.fpioa.GPIOHS14, force=True)  # MISO
+        fm.register(27, fm.fpioa.GPIOHS15, force=True)  # SCLK
+
+        nic = ESP32_SPI(
+            cs=fm.fpioa.GPIOHS10,
+            rst=fm.fpioa.GPIOHS11,
+            rdy=fm.fpioa.GPIOHS12,
+            mosi=fm.fpioa.GPIOHS13,
+            miso=fm.fpioa.GPIOHS14,
+            sclk=fm.fpioa.GPIOHS15
+        )
+
+        print("Connecting to " + WIFI_SSID + "...")
+
+        # Connect to WiFi (no need to call active() first)
+        nic.connect(WIFI_SSID, WIFI_PASSWORD)
+
+        # Wait for connection
+        timeout = 20
+        while timeout > 0:
+            time.sleep(1)
+            timeout -= 1
+
+            if nic.isconnected():
+                # Successfully connected!
+                ip_info = nic.ifconfig()
+                ip = ip_info[0] if ip_info else "Unknown"
+                display_msg("WiFi OK!", COLOR_GREEN, 70)
+                display_msg("IP: " + str(ip), COLOR_WHITE, 90)
+                print("Connected! IP: " + str(ip))
+                time.sleep(2)
+                return True
+            else:
+                print("Waiting... " + str(timeout) + "s")
+
+        # Timeout reached
+        display_msg("WiFi FAILED!", COLOR_RED, 70)
+        print("Connection timeout")
+        return False
+
+    except Exception as e:
+        display_msg("WiFi error!", COLOR_RED, 70)
+        print("WiFi error: " + str(e))
+        import sys
+        sys.print_exception(e)
+        return False
+
+def test_server():
+    """Test connection to Heimdall server"""
+    display_msg("Testing server...", COLOR_BLUE, 110)
+
+    try:
+        # Try socket connection to server
+        import socket
+
+        url = SERVER_URL + "/health"
+        print("Trying: " + url)
+
+        # Parse URL to get host and port
+        host = "10.1.10.71"
+        port = 3006
+
+        # Create socket
+        s = socket.socket()
+        s.settimeout(5)
+
+        print("Connecting to " + host + ":" + str(port))
+        s.connect((host, port))
+
+        # Send HTTP GET request
+        request = "GET /health HTTP/1.1\r\nHost: " + host + "\r\nConnection: close\r\n\r\n"
+        s.send(request.encode())
+
+        # Read response
+        response = s.recv(1024).decode()
+        s.close()
+
+        print("Server response received")
+
+        if "200" in response or "OK" in response:
+            display_msg("Server OK!", COLOR_GREEN, 130)
+            print("Server is reachable!")
+            time.sleep(2)
+            return True
+        else:
+            display_msg("Server responded", COLOR_YELLOW, 130)
+            print("Response: " + response[:100])
+            return True  # Still counts as success if we got a response
+
+    except Exception as e:
+        display_msg("Server FAILED!", COLOR_RED, 130)
+        error_msg = str(e)[:30]
+        display_msg(error_msg, COLOR_RED, 150)
+        print("Server connection failed: " + str(e))
+        return False
+
+def test_audio():
+    """Test I2S audio initialization"""
+    display_msg("Testing audio...", COLOR_BLUE, 170)
+
+    try:
+        # Register I2S pins (Maix Duino pinout)
+        fm.register(20, fm.fpioa.I2S0_IN_D0, force=True)
+        fm.register(19, fm.fpioa.I2S0_WS, force=True)
+        fm.register(18, fm.fpioa.I2S0_SCLK, force=True)
+
+        # Initialize I2S
+        rx = I2S(I2S.DEVICE_0)
+        rx.channel_config(rx.CHANNEL_0, rx.RECEIVER, align_mode=I2S.STANDARD_MODE)
+        rx.set_sample_rate(16000)
+
+        display_msg("Audio OK!", COLOR_GREEN, 190)
+        print("I2S initialized: " + str(rx))
+        time.sleep(2)
+        return True
+    except Exception as e:
+        display_msg("Audio FAILED!", COLOR_RED, 190)
+        print("Audio init failed: " + str(e))
+        return False
+
+def main():
+    """Run all tests"""
+    print("=" * 40)
+    print("MaixDuino Voice Assistant Test")
+    print("=" * 40)
+
+    # Test LCD
+    if not test_lcd():
+        print("LCD test failed!")
+        return
+
+    # Test WiFi
+    if not test_wifi():
+        print("WiFi test failed!")
+        red_int = (255 << 16) | (0 << 8) | 0  # Red color
+        lcd.draw_string(10, 210, "STOPPED - Check WiFi", red_int, 0)
+        return
+
+    # Test server connection
+    server_ok = test_server()
+
+    # Test audio
+    audio_ok = test_audio()
+
+    # Summary
+    lcd.clear(COLOR_BLACK)
+    display_msg("=== TEST RESULTS ===", COLOR_YELLOW, 10)
+    display_msg("LCD:    OK", COLOR_GREEN, 40)
+    display_msg("WiFi:   OK", COLOR_GREEN, 60)
+
+    if server_ok:
+        display_msg("Server: OK", COLOR_GREEN, 80)
+    else:
+        display_msg("Server: FAIL", COLOR_RED, 80)
+
+    if audio_ok:
+        display_msg("Audio:  OK", COLOR_GREEN, 100)
+    else:
+        display_msg("Audio:  FAIL", COLOR_RED, 100)
+
+    if server_ok and audio_ok:
+        display_msg("Ready for voice app!", COLOR_GREEN, 140)
+    else:
+        display_msg("Fix errors first", COLOR_YELLOW, 140)
+
+    print("\nTest complete!")
+
+# Run the test
+if __name__ == "__main__":
+    main()
diff --git a/hardware/maixduino/maix_voice_client.py b/hardware/maixduino/maix_voice_client.py
new file mode 100755
index 0000000..9d9f056
--- /dev/null
+++ b/hardware/maixduino/maix_voice_client.py
@@ -0,0 +1,465 @@
+# Maix Duino Voice Assistant Client
+# Path: maix_voice_client.py (upload to Maix Duino SD card)
+#
+# Purpose and usage:
+#     This script runs on the Maix Duino board and handles:
+#     - Wake word detection using KPU
+#     - Audio capture from I2S microphone
+#     - Streaming audio to voice processing server
+#     - Playing back TTS responses
+#     - LED feedback for user interaction
+#
+# Requirements:
+#     - MaixPy firmware (latest version)
+#     - I2S microphone connected
+#     - Speaker or audio output connected
+#     - WiFi configured (see config below)
+#
+# Upload to board:
+#     1. Copy this file to SD card as boot.py or main.py
+#     2. Update WiFi credentials below
+#     3. Update server URL to your Heimdall IP
+#     4. Power cycle the board
+
+import time
+import audio
+import image
+from Maix import GPIO
+from fpioa_manager import fm
+from machine import I2S
+import KPU as kpu
+import sensor
+import lcd
+import gc
+
+# ----- Configuration -----
+
+# WiFi Settings
+WIFI_SSID = "YourSSID"
+WIFI_PASSWORD = "YourPassword"
+
+# Server Settings
+VOICE_SERVER_URL = "http://10.1.10.71:5000"
+PROCESS_ENDPOINT = "/process"
+
+# Audio Settings
+SAMPLE_RATE = 16000  # 16kHz for Whisper
+CHANNELS = 1  # Mono
+SAMPLE_WIDTH = 2  # 16-bit
+CHUNK_SIZE = 1024
+
+# Wake Word Settings
+WAKE_WORD_THRESHOLD = 0.7  # Confidence threshold (0.0-1.0)
+WAKE_WORD_MODEL = "/sd/models/wake_word.kmodel"  # Path to wake word model
+
+# LED Pin for feedback
+LED_PIN = 13  # Onboard LED (adjust if needed)
+
+# Recording Settings
+MAX_RECORD_TIME = 10  # Maximum seconds to record after wake word
+SILENCE_THRESHOLD = 500  # Amplitude threshold for silence detection
+SILENCE_DURATION = 2  # Seconds of silence before stopping recording
+
+# ----- Color definitions for LCD -----
+COLOR_RED = (255, 0, 0)
+COLOR_GREEN = (0, 255, 0)
+COLOR_BLUE = (0, 0, 255)
+COLOR_YELLOW = (255, 255, 0)
+COLOR_BLACK = (0, 0, 0)
+COLOR_WHITE = (255, 255, 255)
+
+# ----- Global Variables -----
+led = None
+i2s_dev = None
+kpu_task = None
+listening = False
+
+
+def init_hardware():
+    """Initialize hardware components"""
+    global led, i2s_dev
+    
+    # Initialize LED
+    fm.register(LED_PIN, fm.fpioa.GPIO0)
+    led = GPIO(GPIO.GPIO0, GPIO.OUT)
+    led.value(0)  # Turn off initially
+    
+    # Initialize LCD
+    lcd.init()
+    lcd.clear(COLOR_BLACK)
+    lcd.draw_string(lcd.width()//2 - 50, lcd.height()//2, 
+                    "Initializing...", 
+                    lcd.WHITE, lcd.BLACK)
+    
+    # Initialize I2S for audio (microphone)
+    # Note: Pin configuration may vary based on your specific hardware
+    fm.register(20, fm.fpioa.I2S0_IN_D0)
+    fm.register(19, fm.fpioa.I2S0_WS)
+    fm.register(18, fm.fpioa.I2S0_SCLK)
+    
+    i2s_dev = I2S(I2S.DEVICE_0)
+    i2s_dev.channel_config(I2S.CHANNEL_0, I2S.RECEIVER, 
+                          align_mode=I2S.STANDARD_MODE,
+                          data_width=I2S.RESOLUTION_16_BIT)
+    i2s_dev.set_sample_rate(SAMPLE_RATE)
+    
+    print("Hardware initialized")
+
+
+def init_network():
+    """Initialize WiFi connection"""
+    import network
+    
+    lcd.clear(COLOR_BLACK)
+    lcd.draw_string(10, 50, "Connecting to WiFi...", COLOR_WHITE, COLOR_BLACK)
+    
+    wlan = network.WLAN(network.STA_IF)
+    wlan.active(True)
+    
+    if not wlan.isconnected():
+        print(f"Connecting to {WIFI_SSID}...")
+        wlan.connect(WIFI_SSID, WIFI_PASSWORD)
+        
+        # Wait for connection
+        timeout = 20
+        while not wlan.isconnected() and timeout > 0:
+            time.sleep(1)
+            timeout -= 1
+            print(f"Waiting for connection... {timeout}s")
+        
+        if not wlan.isconnected():
+            print("Failed to connect to WiFi")
+            lcd.clear(COLOR_BLACK)
+            lcd.draw_string(10, 50, "WiFi Failed!", COLOR_RED, COLOR_BLACK)
+            return False
+    
+    print("Network connected:", wlan.ifconfig())
+    lcd.clear(COLOR_BLACK)
+    lcd.draw_string(10, 50, "WiFi Connected", COLOR_GREEN, COLOR_BLACK)
+    lcd.draw_string(10, 70, f"IP: {wlan.ifconfig()[0]}", COLOR_WHITE, COLOR_BLACK)
+    time.sleep(2)
+    
+    return True
+
+
+def load_wake_word_model():
+    """Load wake word detection model"""
+    global kpu_task
+    
+    try:
+        # This is a placeholder - you'll need to train and convert a wake word model
+        # For now, we'll skip KPU wake word and use a simpler approach
+        print("Wake word model loading skipped (implement after model training)")
+        return True
+    except Exception as e:
+        print(f"Failed to load wake word model: {e}")
+        return False
+
+
+def detect_wake_word():
+    """
+    Detect wake word in audio stream
+    
+    Returns:
+        True if wake word detected, False otherwise
+        
+    Note: This is a simplified version. For production, you should:
+    1. Train a wake word model using Mycroft Precise or similar
+    2. Convert the model to .kmodel format for K210
+    3. Load and run inference using KPU
+    
+    For now, we'll use a simple amplitude-based trigger
+    """
+    # Simple amplitude-based detection (placeholder)
+    # Replace with actual KPU inference
+    
+    audio_data = i2s_dev.record(CHUNK_SIZE)
+    
+    if audio_data:
+        # Calculate amplitude
+        amplitude = 0
+        for i in range(0, len(audio_data), 2):
+            sample = int.from_bytes(audio_data[i:i+2], 'little', True)
+            amplitude += abs(sample)
+        
+        amplitude = amplitude / (len(audio_data) // 2)
+        
+        # Simple threshold detection (replace with KPU inference)
+        if amplitude > 3000:  # Adjust threshold based on your microphone
+            return True
+    
+    return False
+
+
+def record_audio(max_duration=MAX_RECORD_TIME):
+    """
+    Record audio until silence or max duration
+    
+    Returns:
+        bytes: Recorded audio data in WAV format
+    """
+    print(f"Recording audio (max {max_duration}s)...")
+    
+    audio_buffer = bytearray()
+    start_time = time.time()
+    silence_start = None
+    
+    # Record in chunks
+    while True:
+        elapsed = time.time() - start_time
+        
+        # Check max duration
+        if elapsed > max_duration:
+            print("Max recording duration reached")
+            break
+        
+        # Record chunk
+        chunk = i2s_dev.record(CHUNK_SIZE)
+        
+        if chunk:
+            audio_buffer.extend(chunk)
+            
+            # Calculate amplitude for silence detection
+            amplitude = 0
+            for i in range(0, len(chunk), 2):
+                sample = int.from_bytes(chunk[i:i+2], 'little', True)
+                amplitude += abs(sample)
+            
+            amplitude = amplitude / (len(chunk) // 2)
+            
+            # Silence detection
+            if amplitude < SILENCE_THRESHOLD:
+                if silence_start is None:
+                    silence_start = time.time()
+                elif time.time() - silence_start > SILENCE_DURATION:
+                    print("Silence detected, stopping recording")
+                    break
+            else:
+                silence_start = None
+        
+        # Update LCD with recording time
+        if int(elapsed) % 1 == 0:
+            lcd.clear(COLOR_BLACK)
+            lcd.draw_string(10, 50, f"Recording... {int(elapsed)}s", 
+                          COLOR_RED, COLOR_BLACK)
+    
+    print(f"Recorded {len(audio_buffer)} bytes")
+    
+    # Convert to WAV format
+    return create_wav(audio_buffer)
+
+
+def create_wav(audio_data):
+    """Create WAV file header and combine with audio data"""
+    import struct
+    
+    # WAV header
+    sample_rate = SAMPLE_RATE
+    channels = CHANNELS
+    sample_width = SAMPLE_WIDTH
+    data_size = len(audio_data)
+    
+    # RIFF header
+    wav = bytearray(b'RIFF')
+    wav.extend(struct.pack('<I', 36 + data_size))  # File size - 8
+    wav.extend(b'WAVE')
+    
+    # fmt chunk
+    wav.extend(b'fmt ')
+    wav.extend(struct.pack('<I', 16))  # fmt chunk size
+    wav.extend(struct.pack('<H', 1))   # PCM format
+    wav.extend(struct.pack('<H', channels))
+    wav.extend(struct.pack('<I', sample_rate))
+    wav.extend(struct.pack('<I', sample_rate * channels * sample_width))
+    wav.extend(struct.pack('<H', channels * sample_width))
+    wav.extend(struct.pack('<H', sample_width * 8))
+    
+    # data chunk
+    wav.extend(b'data')
+    wav.extend(struct.pack('<I', data_size))
+    wav.extend(audio_data)
+    
+    return bytes(wav)
+
+
+def send_audio_to_server(audio_data):
+    """
+    Send audio to voice processing server and get response
+    
+    Returns:
+        dict: Response from server or None on failure
+    """
+    import urequests
+    
+    try:
+        # Prepare multipart form data
+        url = f"{VOICE_SERVER_URL}{PROCESS_ENDPOINT}"
+        
+        print(f"Sending audio to {url}...")
+        lcd.clear(COLOR_BLACK)
+        lcd.draw_string(10, 50, "Processing...", COLOR_YELLOW, COLOR_BLACK)
+        
+        # Send POST request with audio file
+        # Note: MaixPy's urequests doesn't support multipart, so we need a workaround
+        # For now, send raw audio with appropriate headers
+        headers = {
+            'Content-Type': 'audio/wav',
+        }
+        
+        response = urequests.post(url, data=audio_data, headers=headers)
+        
+        if response.status_code == 200:
+            result = response.json()
+            response.close()
+            return result
+        else:
+            print(f"Server error: {response.status_code}")
+            response.close()
+            return None
+    
+    except Exception as e:
+        print(f"Error sending audio: {e}")
+        return None
+
+
+def display_response(response_text):
+    """Display response on LCD"""
+    lcd.clear(COLOR_BLACK)
+    
+    # Word wrap for LCD
+    words = response_text.split()
+    lines = []
+    current_line = ""
+    
+    for word in words:
+        test_line = current_line + word + " "
+        if len(test_line) * 8 > lcd.width() - 20:  # Rough character width
+            if current_line:
+                lines.append(current_line.strip())
+            current_line = word + " "
+        else:
+            current_line = test_line
+    
+    if current_line:
+        lines.append(current_line.strip())
+    
+    # Display lines
+    y = 30
+    for line in lines[:5]:  # Max 5 lines
+        lcd.draw_string(10, y, line, COLOR_GREEN, COLOR_BLACK)
+        y += 20
+
+
+def set_led(state):
+    """Control LED state"""
+    if led:
+        led.value(1 if state else 0)
+
+
+def main_loop():
+    """Main voice assistant loop"""
+    global listening
+    
+    # Show ready status
+    lcd.clear(COLOR_BLACK)
+    lcd.draw_string(10, lcd.height()//2 - 10, "Say wake word...", 
+                   COLOR_BLUE, COLOR_BLACK)
+    
+    print("Voice assistant ready. Listening for wake word...")
+    
+    while True:
+        try:
+            # Listen for wake word
+            if detect_wake_word():
+                print("Wake word detected!")
+                
+                # Visual feedback
+                set_led(True)
+                lcd.clear(COLOR_BLACK)
+                lcd.draw_string(10, 50, "Listening...", COLOR_RED, COLOR_BLACK)
+                
+                # Small delay to skip the wake word itself
+                time.sleep(0.5)
+                
+                # Record command
+                audio_data = record_audio()
+                
+                # Send to server
+                response = send_audio_to_server(audio_data)
+                
+                if response and response.get('success'):
+                    transcription = response.get('transcription', '')
+                    response_text = response.get('response', 'No response')
+                    
+                    print(f"You said: {transcription}")
+                    print(f"Response: {response_text}")
+                    
+                    # Display response
+                    display_response(response_text)
+                    
+                    # TODO: Play TTS audio response
+                    
+                else:
+                    lcd.clear(COLOR_BLACK)
+                    lcd.draw_string(10, 50, "Error processing", 
+                                  COLOR_RED, COLOR_BLACK)
+                
+                # Turn off LED
+                set_led(False)
+                
+                # Pause before listening again
+                time.sleep(2)
+                
+                # Reset display
+                lcd.clear(COLOR_BLACK)
+                lcd.draw_string(10, lcd.height()//2 - 10, "Say wake word...", 
+                               COLOR_BLUE, COLOR_BLACK)
+            
+            # Small delay to prevent tight loop
+            time.sleep(0.1)
+            
+            # Garbage collection
+            if gc.mem_free() < 100000:  # If free memory < 100KB
+                gc.collect()
+        
+        except KeyboardInterrupt:
+            print("Exiting...")
+            break
+        except Exception as e:
+            print(f"Error in main loop: {e}")
+            time.sleep(1)
+
+
+def main():
+    """Main entry point"""
+    print("=" * 40)
+    print("Maix Duino Voice Assistant")
+    print("=" * 40)
+    
+    # Initialize hardware
+    init_hardware()
+    
+    # Connect to network
+    if not init_network():
+        print("Failed to initialize network. Exiting.")
+        return
+    
+    # Load wake word model (optional)
+    load_wake_word_model()
+    
+    # Start main loop
+    try:
+        main_loop()
+    except Exception as e:
+        print(f"Fatal error: {e}")
+    finally:
+        # Cleanup
+        set_led(False)
+        lcd.clear(COLOR_BLACK)
+        lcd.draw_string(10, lcd.height()//2, "Stopped", 
+                       COLOR_RED, COLOR_BLACK)
+
+
+# Run main program
+if __name__ == "__main__":
+    main()
diff --git a/hardware/maixduino/secrets.py.example b/hardware/maixduino/secrets.py.example
new file mode 100644
index 0000000..67c7d78
--- /dev/null
+++ b/hardware/maixduino/secrets.py.example
@@ -0,0 +1,7 @@
+# Copy this file to secrets.py and fill in your values
+# secrets.py is gitignored — never commit it
+SECRETS = {
+    "wifi_ssid": "YourNetworkName",
+    "wifi_password": "YourWiFiPassword",
+    "voice_server_url": "http://10.1.10.71:5000",  # replace with your Minerva server IP
+}
diff --git a/scripts/download_pretrained_models.sh b/scripts/download_pretrained_models.sh
new file mode 100755
index 0000000..d1437a7
--- /dev/null
+++ b/scripts/download_pretrained_models.sh
@@ -0,0 +1,409 @@
+#!/usr/bin/env bash
+#
+# Path: download_pretrained_models.sh
+#
+# Purpose and usage:
+#     Downloads and sets up pre-trained Mycroft Precise wake word models
+#     - Downloads Hey Mycroft, Hey Jarvis, and other available models
+#     - Tests each model with microphone
+#     - Configures voice server to use them
+#
+# Requirements:
+#     - Mycroft Precise installed (run setup_precise.sh first)
+#     - Internet connection for downloads
+#     - Microphone for testing
+#
+# Usage:
+#     ./download_pretrained_models.sh [--test-all] [--model MODEL_NAME]
+#
+# Author: PRbL Library
+# Created: $(date +"%Y-%m-%d")
+
+# ----- PRbL Color and output functions -----
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[0;33m'
+BLUE='\033[0;34m'
+PURPLE='\033[0;35m'
+CYAN='\033[0;36m'
+NC='\033[0m' # No Color
+
+print_status() {
+    local level="$1"
+    shift
+    case "$level" in
+        "info")     echo -e "${BLUE}[INFO]${NC} $*" >&2 ;;
+        "success")  echo -e "${GREEN}[SUCCESS]${NC} $*" >&2 ;;
+        "warning")  echo -e "${YELLOW}[WARNING]${NC} $*" >&2 ;;
+        "error")    echo -e "${RED}[ERROR]${NC} $*" >&2 ;;
+        "debug")    [[ "$VERBOSE" == "true" ]] && echo -e "${PURPLE}[DEBUG]${NC} $*" >&2 ;;
+        *)          echo -e "$*" >&2 ;;
+    esac
+}
+
+# ----- Configuration -----
+MODELS_DIR="$HOME/precise-models/pretrained"
+TEST_ALL=false
+SPECIFIC_MODEL=""
+VERBOSE=false
+
+# Available pre-trained models
+declare -A MODELS=(
+    ["hey-mycroft"]="https://github.com/MycroftAI/precise-data/raw/models-dev/hey-mycroft.tar.gz"
+    ["hey-jarvis"]="https://github.com/MycroftAI/precise-data/raw/models-dev/hey-jarvis.tar.gz"
+    ["christopher"]="https://github.com/MycroftAI/precise-data/raw/models-dev/christopher.tar.gz"
+    ["hey-ezra"]="https://github.com/MycroftAI/precise-data/raw/models-dev/hey-ezra.tar.gz"
+)
+
+# ----- Dependency checking -----
+command_exists() {
+    command -v "$1" &> /dev/null
+}
+
+check_dependencies() {
+    local missing=()
+    
+    if ! command_exists wget; then
+        missing+=("wget")
+    fi
+    
+    if ! command_exists precise-listen; then
+        missing+=("precise-listen (run setup_precise.sh first)")
+    fi
+    
+    if [[ ${#missing[@]} -gt 0 ]]; then
+        print_status error "Missing dependencies: ${missing[*]}"
+        return 1
+    fi
+    
+    return 0
+}
+
+# ----- Parse arguments -----
+parse_args() {
+    while [[ $# -gt 0 ]]; do
+        case "$1" in
+            --test-all)
+                TEST_ALL=true
+                shift
+                ;;
+            --model)
+                SPECIFIC_MODEL="$2"
+                shift 2
+                ;;
+            -v|--verbose)
+                VERBOSE=true
+                shift
+                ;;
+            -h|--help)
+                cat << EOF
+Usage: $(basename "$0") [OPTIONS]
+
+Download and test pre-trained Mycroft Precise wake word models
+
+Options:
+    --test-all          Download and test all available models
+    --model NAME        Download and test specific model
+    -v, --verbose       Enable verbose output
+    -h, --help          Show this help message
+
+Available models:
+    hey-mycroft         Original Mycroft wake word (most data)
+    hey-jarvis          Popular alternative  
+    christopher         Alternative wake word
+    hey-ezra            Another option
+
+Examples:
+    $(basename "$0") --model hey-mycroft
+    $(basename "$0") --test-all
+
+EOF
+                exit 0
+                ;;
+            *)
+                print_status error "Unknown option: $1"
+                exit 1
+                ;;
+        esac
+    done
+}
+
+# ----- Functions -----
+
+create_models_directory() {
+    print_status info "Creating models directory: $MODELS_DIR"
+    mkdir -p "$MODELS_DIR" || {
+        print_status error "Failed to create directory"
+        return 1
+    }
+    return 0
+}
+
+download_model() {
+    local model_name="$1"
+    local model_url="${MODELS[${model_name}]}"
+    
+    if [[ -z "$model_url" ]]; then
+        print_status error "Unknown model: $model_name"
+        return 1
+    fi
+    
+    # Check if already downloaded
+    if [[ -f "$MODELS_DIR/${model_name}.net" ]]; then
+        print_status info "Model already exists: $model_name"
+        return 0
+    fi
+    
+    print_status info "Downloading $model_name..."
+    
+    local temp_file="/tmp/${model_name}-$$.tar.gz"
+    
+    wget -q --show-progress -O "$temp_file" "$model_url" || {
+        print_status error "Failed to download $model_name"
+        rm -f "$temp_file"
+        return 1
+    }
+    
+    # Extract
+    print_status info "Extracting $model_name..."
+    tar xzf "$temp_file" -C "$MODELS_DIR" || {
+        print_status error "Failed to extract $model_name"
+        rm -f "$temp_file"
+        return 1
+    }
+    
+    rm -f "$temp_file"
+    
+    # Verify extraction
+    if [[ -f "$MODELS_DIR/${model_name}.net" ]]; then
+        print_status success "Downloaded: $model_name"
+        return 0
+    else
+        print_status error "Extraction failed for $model_name"
+        return 1
+    fi
+}
+
+test_model() {
+    local model_name="$1"
+    local model_file="$MODELS_DIR/${model_name}.net"
+    
+    if [[ ! -f "$model_file" ]]; then
+        print_status error "Model file not found: $model_file"
+        return 1
+    fi
+    
+    print_status info "Testing model: $model_name"
+    echo ""
+    echo -e "${CYAN}Instructions:${NC}"
+    echo "  - Speak the wake word: '$model_name'"
+    echo "  - You should see '!' when detected"
+    echo "  - Press Ctrl+C to stop testing"
+    echo ""
+    read -p "Press Enter to start test..."
+    
+    # Activate conda environment if needed
+    if command_exists conda; then
+        eval "$(conda shell.bash hook)"
+        conda activate precise 2>/dev/null || true
+    fi
+    
+    precise-listen "$model_file" || {
+        print_status warning "Test interrupted or failed"
+        return 1
+    }
+    
+    return 0
+}
+
+create_multi_wake_config() {
+    print_status info "Creating multi-wake-word configuration..."
+    
+    local config_file="$MODELS_DIR/multi-wake-config.sh"
+    
+    cat > "$config_file" << 'EOF'
+#!/bin/bash
+# Multi-wake-word configuration
+# Generated by download_pretrained_models.sh
+
+# Start voice server with multiple wake words
+cd ~/voice-assistant
+
+# List of wake word models
+MODELS=""
+
+EOF
+    
+    # Add each downloaded model to config
+    for model_name in "${!MODELS[@]}"; do
+        if [[ -f "$MODELS_DIR/${model_name}.net" ]]; then
+            echo "# Found: $model_name" >> "$config_file"
+            echo "MODELS=\"\${MODELS}${model_name}:$MODELS_DIR/${model_name}.net:0.5,\"" >> "$config_file"
+        fi
+    done
+    
+    cat >> "$config_file" << 'EOF'
+
+# Remove trailing comma
+MODELS="${MODELS%,}"
+
+# Activate environment
+eval "$(conda shell.bash hook)"
+conda activate precise
+
+# Start server
+python voice_server.py \
+    --enable-precise \
+    --precise-models "$MODELS" \
+    --ha-token "$HA_TOKEN"
+
+EOF
+    
+    chmod +x "$config_file"
+    
+    print_status success "Created: $config_file"
+    echo ""
+    print_status info "To use multiple wake words, run:"
+    print_status info "  $config_file"
+    
+    return 0
+}
+
+list_downloaded_models() {
+    print_status info "Downloaded models in $MODELS_DIR:"
+    echo ""
+    
+    local count=0
+    for model_name in "${!MODELS[@]}"; do
+        if [[ -f "$MODELS_DIR/${model_name}.net" ]]; then
+            local size=$(du -h "$MODELS_DIR/${model_name}.net" | cut -f1)
+            echo -e "  ${GREEN}✓${NC} ${model_name}.net (${size})"
+            ((count++))
+        else
+            echo -e "  ${YELLOW}○${NC} ${model_name}.net (not downloaded)"
+        fi
+    done
+    
+    echo ""
+    print_status success "Total downloaded: $count"
+    
+    return 0
+}
+
+compare_models() {
+    print_status info "Model comparison:"
+    echo ""
+    
+    cat << 'EOF'
+┌─────────────────┬──────────────┬─────────────┬─────────────────┐
+│ Wake Word       │ Popularity   │ Difficulty  │ Recommended For │
+├─────────────────┼──────────────┼─────────────┼─────────────────┤
+│ Hey Mycroft     │ ★★★★★        │ Easy        │ Default choice  │
+│ Hey Jarvis      │ ★★★★☆        │ Easy        │ Pop culture     │
+│ Christopher     │ ★★☆☆☆        │ Medium      │ Unique name     │
+│ Hey Ezra        │ ★★☆☆☆        │ Medium      │ Alternative     │
+└─────────────────┴──────────────┴─────────────┴─────────────────┘
+
+Recommendations:
+  - Start with: Hey Mycroft (most training data)
+  - For media: Hey Jarvis (Plex/entertainment)
+  - For uniqueness: Christopher or Hey Ezra
+  
+Multiple wake words:
+  - Use different wake words for different contexts
+  - Example: "Hey Mycroft" for commands, "Hey Jarvis" for media
+  - Server can run 2-3 models simultaneously
+
+EOF
+}
+
+# ----- Main -----
+main() {
+    print_status info "Mycroft Precise Pre-trained Model Downloader"
+    echo ""
+    
+    # Parse arguments
+    parse_args "$@"
+    
+    # Check dependencies
+    check_dependencies || exit 1
+    
+    # Create directory
+    create_models_directory || exit 1
+    
+    # Show comparison
+    if [[ -z "$SPECIFIC_MODEL" && "$TEST_ALL" != "true" ]]; then
+        compare_models
+        echo ""
+        print_status info "Use --model <name> to download a specific model"
+        print_status info "Use --test-all to download all models"
+        echo ""
+        list_downloaded_models
+        exit 0
+    fi
+    
+    # Download models
+    if [[ -n "$SPECIFIC_MODEL" ]]; then
+        # Download specific model
+        download_model "$SPECIFIC_MODEL" || exit 1
+        
+        # Offer to test
+        echo ""
+        read -p "Test this model now? (y/N): " -n 1 -r
+        echo
+        if [[ $REPLY =~ ^[Yy]$ ]]; then
+            test_model "$SPECIFIC_MODEL"
+        fi
+        
+    elif [[ "$TEST_ALL" == "true" ]]; then
+        # Download all models
+        for model_name in "${!MODELS[@]}"; do
+            download_model "$model_name"
+            echo ""
+        done
+        
+        # Offer to test each
+        echo ""
+        print_status success "All models downloaded"
+        echo ""
+        read -p "Test each model? (y/N): " -n 1 -r
+        echo
+        if [[ $REPLY =~ ^[Yy]$ ]]; then
+            for model_name in "${!MODELS[@]}"; do
+                if [[ -f "$MODELS_DIR/${model_name}.net" ]]; then
+                    echo ""
+                    test_model "$model_name"
+                fi
+            done
+        fi
+    fi
+    
+    # List results
+    echo ""
+    list_downloaded_models
+    
+    # Create multi-wake config if multiple models
+    local model_count=$(find "$MODELS_DIR" -name "*.net" | wc -l)
+    if [[ $model_count -gt 1 ]]; then
+        echo ""
+        create_multi_wake_config
+    fi
+    
+    # Final instructions
+    echo ""
+    print_status success "Setup complete!"
+    echo ""
+    print_status info "Next steps:"
+    print_status info "1. Test a model: precise-listen $MODELS_DIR/hey-mycroft.net"
+    print_status info "2. Use in server: python voice_server.py --enable-precise --precise-model $MODELS_DIR/hey-mycroft.net"
+    print_status info "3. Fine-tune: precise-train -e 30 custom.net . --from-checkpoint $MODELS_DIR/hey-mycroft.net"
+    
+    if [[ $model_count -gt 1 ]]; then
+        echo ""
+        print_status info "For multiple wake words:"
+        print_status info "  $MODELS_DIR/multi-wake-config.sh"
+    fi
+}
+
+# Run main
+main "$@"
diff --git a/scripts/quick_start_hey_mycroft.sh b/scripts/quick_start_hey_mycroft.sh
new file mode 100755
index 0000000..555de47
--- /dev/null
+++ b/scripts/quick_start_hey_mycroft.sh
@@ -0,0 +1,456 @@
+#!/usr/bin/env bash
+#
+# Path: quick_start_hey_mycroft.sh
+#
+# Purpose and usage:
+#     Zero-training quick start using pre-trained "Hey Mycroft" model
+#     Gets you a working voice assistant in 5 minutes!
+#
+# Requirements:
+#     - Heimdall already setup (ran setup_voice_assistant.sh)
+#     - Mycroft Precise installed (ran setup_precise.sh)
+#
+# Usage:
+#     ./quick_start_hey_mycroft.sh [--test-only]
+#
+# Author: PRbL Library
+
+# ----- PRbL Color and output functions -----
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[0;33m'
+BLUE='\033[0;34m'
+PURPLE='\033[0;35m'
+CYAN='\033[0;36m'
+NC='\033[0m'
+
+print_status() {
+    local level="$1"
+    shift
+    case "$level" in
+        "info")     echo -e "${BLUE}[INFO]${NC} $*" >&2 ;;
+        "success")  echo -e "${GREEN}[SUCCESS]${NC} $*" >&2 ;;
+        "warning")  echo -e "${YELLOW}[WARNING]${NC} $*" >&2 ;;
+        "error")    echo -e "${RED}[ERROR]${NC} $*" >&2 ;;
+        *)          echo -e "$*" >&2 ;;
+    esac
+}
+
+# ----- Configuration -----
+MODELS_DIR="$HOME/precise-models/pretrained"
+MODEL_URL="https://github.com/MycroftAI/precise-data/raw/models-dev/hey-mycroft.tar.gz"
+MODEL_NAME="hey-mycroft"
+TEST_ONLY=false
+
+# ----- Parse arguments -----
+parse_args() {
+    while [[ $# -gt 0 ]]; do
+        case "$1" in
+            --test-only)
+                TEST_ONLY=true
+                shift
+                ;;
+            -h|--help)
+                cat << EOF
+Usage: $(basename "$0") [OPTIONS]
+
+Quick start with pre-trained "Hey Mycroft" wake word model.
+No training required!
+
+Options:
+    --test-only     Just test the model, don't start server
+    -h, --help      Show this help
+
+Examples:
+    $(basename "$0")              # Download, test, and run server
+    $(basename "$0") --test-only  # Just download and test
+
+EOF
+                exit 0
+                ;;
+            *)
+                print_status error "Unknown option: $1"
+                exit 1
+                ;;
+        esac
+    done
+}
+
+# ----- Functions -----
+
+check_prerequisites() {
+    print_status info "Checking prerequisites..."
+    
+    # Check conda
+    if ! command -v conda &> /dev/null; then
+        print_status error "conda not found"
+        return 1
+    fi
+    
+    # Check precise environment
+    if ! conda env list | grep -q "^precise\s"; then
+        print_status error "Precise environment not found"
+        print_status info "Run: ./setup_precise.sh first"
+        return 1
+    fi
+    
+    # Check voice-assistant directory
+    if [[ ! -d "$HOME/voice-assistant" ]]; then
+        print_status error "Voice assistant not setup"
+        print_status info "Run: ./setup_voice_assistant.sh first"
+        return 1
+    fi
+    
+    print_status success "Prerequisites OK"
+    return 0
+}
+
+download_pretrained_model() {
+    print_status info "Downloading pre-trained 'Hey Mycroft' model..."
+    
+    # Create directory
+    mkdir -p "$MODELS_DIR"
+    
+    # Check if already downloaded
+    if [[ -f "$MODELS_DIR/${MODEL_NAME}.net" ]]; then
+        print_status info "Model already downloaded"
+        return 0
+    fi
+    
+    # Download
+    cd "$MODELS_DIR" || return 1
+    
+    print_status info "Fetching from GitHub..."
+    wget -q --show-progress "$MODEL_URL" || {
+        print_status error "Failed to download model"
+        return 1
+    }
+    
+    # Extract
+    print_status info "Extracting model..."
+    tar xzf hey-mycroft.tar.gz || {
+        print_status error "Failed to extract model"
+        return 1
+    }
+    
+    # Verify
+    if [[ ! -f "${MODEL_NAME}.net" ]]; then
+        print_status error "Model file not found after extraction"
+        return 1
+    fi
+    
+    print_status success "Model downloaded: $MODELS_DIR/${MODEL_NAME}.net"
+    return 0
+}
+
+test_model() {
+    print_status info "Testing wake word model..."
+    
+    cd "$MODELS_DIR" || return 1
+    
+    # Activate conda
+    eval "$(conda shell.bash hook)"
+    conda activate precise || {
+        print_status error "Failed to activate precise environment"
+        return 1
+    }
+    
+    cat << EOF
+
+${CYAN}═══════════════════════════════════════════════════${NC}
+${CYAN}    Wake Word Test: "Hey Mycroft"${NC}
+${CYAN}═══════════════════════════════════════════════════${NC}
+
+${YELLOW}Instructions:${NC}
+1. Speak "Hey Mycroft" into your microphone
+2. You should see ${GREEN}"!"${NC} when detected
+3. Try other phrases - should ${RED}not${NC} trigger
+4. Press ${RED}Ctrl+C${NC} when done testing
+
+${CYAN}Starting in 3 seconds...${NC}
+
+EOF
+    
+    sleep 3
+    
+    # Test the model
+    precise-listen "${MODEL_NAME}.net" || {
+        print_status error "Model test failed"
+        return 1
+    }
+    
+    print_status success "Model test complete!"
+    return 0
+}
+
+update_config() {
+    print_status info "Updating voice assistant configuration..."
+    
+    local config_file="$HOME/voice-assistant/config/.env"
+    
+    if [[ ! -f "$config_file" ]]; then
+        print_status error "Config file not found: $config_file"
+        return 1
+    fi
+    
+    # Update PRECISE_MODEL if exists, otherwise add it
+    if grep -q "^PRECISE_MODEL=" "$config_file"; then
+        sed -i "s|^PRECISE_MODEL=.*|PRECISE_MODEL=$MODELS_DIR/${MODEL_NAME}.net|" "$config_file"
+    else
+        echo "PRECISE_MODEL=$MODELS_DIR/${MODEL_NAME}.net" >> "$config_file"
+    fi
+    
+    # Update sensitivity if not set
+    if ! grep -q "^PRECISE_SENSITIVITY=" "$config_file"; then
+        echo "PRECISE_SENSITIVITY=0.5" >> "$config_file"
+    fi
+    
+    print_status success "Configuration updated"
+    return 0
+}
+
+start_server() {
+    print_status info "Starting voice assistant server..."
+    
+    cd "$HOME/voice-assistant" || return 1
+    
+    # Activate conda
+    eval "$(conda shell.bash hook)"
+    conda activate precise || {
+        print_status error "Failed to activate environment"
+        return 1
+    }
+    
+    cat << EOF
+
+${CYAN}═══════════════════════════════════════════════════${NC}
+${GREEN}    Starting Voice Assistant Server${NC}
+${CYAN}═══════════════════════════════════════════════════${NC}
+
+${YELLOW}Configuration:${NC}
+  Wake word: ${GREEN}Hey Mycroft${NC}
+  Model: ${MODEL_NAME}.net
+  Server: http://0.0.0.0:5000
+
+${YELLOW}What to do next:${NC}
+  1. Wait for "Precise listening started" message
+  2. Say ${GREEN}"Hey Mycroft"${NC} to test wake word
+  3. Say a command like ${GREEN}"turn on the lights"${NC}
+  4. Check server logs for activity
+
+${YELLOW}Press Ctrl+C to stop the server${NC}
+
+${CYAN}Starting server...${NC}
+
+EOF
+    
+    # Check if HA token is set
+    if ! grep -q "^HA_TOKEN=..*" config/.env; then
+        print_status warning "Home Assistant token not set!"
+        print_status warning "Commands won't execute without it."
+        print_status info "Edit config/.env and add your HA token"
+        echo
+        read -p "Continue anyway? (y/N): " -n 1 -r
+        echo
+        if [[ ! $REPLY =~ ^[Yy]$ ]]; then
+            return 1
+        fi
+    fi
+    
+    # Start server
+    python voice_server.py \
+        --enable-precise \
+        --precise-model "$MODELS_DIR/${MODEL_NAME}.net" \
+        --precise-sensitivity 0.5
+    
+    return $?
+}
+
+create_systemd_service() {
+    print_status info "Creating systemd service..."
+    
+    local service_file="/etc/systemd/system/voice-assistant.service"
+    
+    # Check if we should update existing service
+    if [[ -f "$service_file" ]]; then
+        print_status warning "Service file already exists"
+        read -p "Update with Hey Mycroft configuration? (y/N): " -n 1 -r
+        echo
+        if [[ ! $REPLY =~ ^[Yy]$ ]]; then
+            return 0
+        fi
+    fi
+    
+    # Create service file
+    sudo tee "$service_file" > /dev/null << EOF
+[Unit]
+Description=Voice Assistant with Hey Mycroft Wake Word
+After=network.target
+
+[Service]
+Type=simple
+User=$USER
+WorkingDirectory=$HOME/voice-assistant
+Environment="PATH=$HOME/miniconda3/envs/precise/bin:/usr/local/bin:/usr/bin:/bin"
+EnvironmentFile=$HOME/voice-assistant/config/.env
+ExecStart=$HOME/miniconda3/envs/precise/bin/python voice_server.py \\
+    --enable-precise \\
+    --precise-model $MODELS_DIR/${MODEL_NAME}.net \\
+    --precise-sensitivity 0.5
+Restart=on-failure
+RestartSec=10
+StandardOutput=append:$HOME/voice-assistant/logs/voice_assistant.log
+StandardError=append:$HOME/voice-assistant/logs/voice_assistant_error.log
+
+[Install]
+WantedBy=multi-user.target
+EOF
+    
+    # Reload systemd
+    sudo systemctl daemon-reload
+    
+    print_status success "Systemd service created"
+    
+    cat << EOF
+
+${CYAN}To enable and start the service:${NC}
+  sudo systemctl enable voice-assistant
+  sudo systemctl start voice-assistant
+  sudo systemctl status voice-assistant
+
+${CYAN}To view logs:${NC}
+  journalctl -u voice-assistant -f
+
+EOF
+    
+    read -p "Enable service now? (y/N): " -n 1 -r
+    echo
+    if [[ $REPLY =~ ^[Yy]$ ]]; then
+        sudo systemctl enable voice-assistant
+        sudo systemctl start voice-assistant
+        sleep 2
+        sudo systemctl status voice-assistant
+    fi
+}
+
+print_next_steps() {
+    cat << EOF
+
+${GREEN}═══════════════════════════════════════════════════${NC}
+${GREEN}    Success! Your voice assistant is ready!${NC}
+${GREEN}═══════════════════════════════════════════════════${NC}
+
+${CYAN}What you have:${NC}
+  ✓ Pre-trained "Hey Mycroft" wake word
+  ✓ Voice assistant server configured
+  ✓ Ready to control Home Assistant
+
+${CYAN}Quick test:${NC}
+  1. Say: ${GREEN}"Hey Mycroft"${NC}
+  2. Say: ${GREEN}"Turn on the living room lights"${NC}
+  3. Check if command executed
+
+${CYAN}Next steps:${NC}
+  1. ${YELLOW}Configure Home Assistant entities${NC}
+     Edit: ~/voice-assistant/config/.env
+     Add: HA_TOKEN=your_token_here
+  
+  2. ${YELLOW}Add more entity mappings${NC}
+     Edit: voice_server.py
+     Update: IntentParser.ENTITY_MAP
+  
+  3. ${YELLOW}Fine-tune for your voice (optional)${NC}
+     cd ~/precise-models/hey-mycroft-custom
+     ./1-record-wake-word.sh
+     # Record 20-30 samples
+     precise-train -e 30 hey-mycroft-custom.net . \\
+         --from-checkpoint $MODELS_DIR/${MODEL_NAME}.net
+  
+  4. ${YELLOW}Setup Maix Duino${NC}
+     See: QUICKSTART.md Phase 2
+
+${CYAN}Useful commands:${NC}
+  # Test wake word only
+  cd $MODELS_DIR && conda activate precise
+  precise-listen ${MODEL_NAME}.net
+  
+  # Check server health
+  curl http://localhost:5000/health
+  
+  # Monitor logs
+  journalctl -u voice-assistant -f
+
+${CYAN}Documentation:${NC}
+  README.md               - Project overview
+  WAKE_WORD_ADVANCED.md   - Multiple wake words guide
+  QUICKSTART.md           - Complete setup guide
+
+${GREEN}Happy voice assisting! 🎙️${NC}
+
+EOF
+}
+
+# ----- Main -----
+main() {
+    cat << EOF
+${CYAN}═══════════════════════════════════════════════════${NC}
+${CYAN}    Quick Start: Hey Mycroft Wake Word${NC}
+${CYAN}═══════════════════════════════════════════════════${NC}
+
+${YELLOW}This script will:${NC}
+  1. Download pre-trained "Hey Mycroft" model
+  2. Test wake word detection
+  3. Configure voice assistant server
+  4. Start the server (optional)
+
+${YELLOW}Total time: ~5 minutes (no training!)${NC}
+
+EOF
+    
+    # Parse arguments
+    parse_args "$@"
+    
+    # Check prerequisites
+    check_prerequisites || exit 1
+    
+    # Download model
+    download_pretrained_model || exit 1
+    
+    # Test model
+    print_status info "Ready to test wake word"
+    read -p "Test now? (Y/n): " -n 1 -r
+    echo
+    if [[ ! $REPLY =~ ^[Nn]$ ]]; then
+        test_model
+    fi
+    
+    # If test-only mode, stop here
+    if [[ "$TEST_ONLY" == "true" ]]; then
+        print_status success "Test complete!"
+        print_status info "Model location: $MODELS_DIR/${MODEL_NAME}.net"
+        exit 0
+    fi
+    
+    # Update configuration
+    update_config || exit 1
+    
+    # Start server
+    read -p "Start voice assistant server now? (Y/n): " -n 1 -r
+    echo
+    if [[ ! $REPLY =~ ^[Nn]$ ]]; then
+        start_server
+    else
+        # Offer to create systemd service
+        read -p "Create systemd service instead? (y/N): " -n 1 -r
+        echo
+        if [[ $REPLY =~ ^[Yy]$ ]]; then
+            create_systemd_service
+        fi
+    fi
+    
+    # Print next steps
+    print_next_steps
+}
+
+# Run main
+main "$@"
diff --git a/scripts/setup_precise.sh b/scripts/setup_precise.sh
new file mode 100755
index 0000000..5d13368
--- /dev/null
+++ b/scripts/setup_precise.sh
@@ -0,0 +1,630 @@
+#!/usr/bin/env bash
+#
+# Path: setup_precise.sh
+#
+# Purpose and usage:
+#     Sets up Mycroft Precise wake word detection on Heimdall
+#     - Creates conda environment for Precise
+#     - Installs TensorFlow 1.x and dependencies
+#     - Downloads precise-engine
+#     - Sets up training directories
+#     - Provides helper scripts for training
+#
+# Requirements:
+#     - conda/miniconda installed
+#     - Internet connection for downloads
+#     - Microphone for recording samples
+#
+# Usage:
+#     ./setup_precise.sh [--wake-word "phrase"] [--env-name NAME]
+#
+# Author: PRbL Library
+# Created: $(date +"%Y-%m-%d")
+
+# ----- PRbL Color and output functions -----
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[0;33m'
+BLUE='\033[0;34m'
+PURPLE='\033[0;35m'
+CYAN='\033[0;36m'
+NC='\033[0m' # No Color
+
+print_status() {
+    local level="$1"
+    shift
+    case "$level" in
+        "info")     echo -e "${BLUE}[INFO]${NC} $*" >&2 ;;
+        "success")  echo -e "${GREEN}[SUCCESS]${NC} $*" >&2 ;;
+        "warning")  echo -e "${YELLOW}[WARNING]${NC} $*" >&2 ;;
+        "error")    echo -e "${RED}[ERROR]${NC} $*" >&2 ;;
+        "debug")    [[ "$VERBOSE" == "true" ]] && echo -e "${PURPLE}[DEBUG]${NC} $*" >&2 ;;
+        *)          echo -e "$*" >&2 ;;
+    esac
+}
+
+# ----- Configuration -----
+CONDA_ENV_NAME="precise"
+WAKE_WORD="hey computer"
+MODELS_DIR="$HOME/precise-models"
+VERBOSE=false
+
+# ----- Dependency checking -----
+command_exists() {
+    command -v "$1" &> /dev/null
+}
+
+check_conda() {
+    if ! command_exists conda; then
+        print_status error "conda not found. Please install miniconda first."
+        return 1
+    fi
+    return 0
+}
+
+# ----- Parse arguments -----
+parse_args() {
+    while [[ $# -gt 0 ]]; do
+        case "$1" in
+            --wake-word)
+                WAKE_WORD="$2"
+                shift 2
+                ;;
+            --env-name)
+                CONDA_ENV_NAME="$2"
+                shift 2
+                ;;
+            -v|--verbose)
+                VERBOSE=true
+                shift
+                ;;
+            -h|--help)
+                cat << EOF
+Usage: $(basename "$0") [OPTIONS]
+
+Options:
+    --wake-word "phrase"    Wake word to train (default: "hey computer")
+    --env-name NAME         Custom conda environment name (default: precise)
+    -v, --verbose           Enable verbose output
+    -h, --help              Show this help message
+
+Examples:
+    $(basename "$0") --wake-word "hey jarvis"
+    $(basename "$0") --env-name mycroft-precise
+
+EOF
+                exit 0
+                ;;
+            *)
+                print_status error "Unknown option: $1"
+                exit 1
+                ;;
+        esac
+    done
+}
+
+# ----- Setup functions -----
+
+create_conda_environment() {
+    print_status info "Creating conda environment: $CONDA_ENV_NAME"
+    
+    # Check if environment already exists
+    if conda env list | grep -q "^${CONDA_ENV_NAME}\s"; then
+        print_status warning "Environment $CONDA_ENV_NAME already exists"
+        read -p "Remove and recreate? (y/N): " -n 1 -r
+        echo
+        if [[ $REPLY =~ ^[Yy]$ ]]; then
+            print_status info "Removing existing environment..."
+            conda env remove -n "$CONDA_ENV_NAME" -y
+        else
+            print_status info "Using existing environment"
+            return 0
+        fi
+    fi
+    
+    # Create new environment with Python 3.7 (required for TF 1.15)
+    print_status info "Creating Python 3.7 environment..."
+    conda create -n "$CONDA_ENV_NAME" python=3.7 -y || {
+        print_status error "Failed to create conda environment"
+        return 1
+    }
+    
+    print_status success "Conda environment created"
+    return 0
+}
+
+install_tensorflow() {
+    print_status info "Installing TensorFlow 1.15..."
+    
+    # Activate conda environment
+    eval "$(conda shell.bash hook)"
+    conda activate "$CONDA_ENV_NAME" || {
+        print_status error "Failed to activate conda environment"
+        return 1
+    }
+    
+    # Install TensorFlow 1.15 (last 1.x version)
+    pip install tensorflow==1.15.5 --break-system-packages || {
+        print_status error "Failed to install TensorFlow"
+        return 1
+    }
+    
+    # Verify installation
+    python -c "import tensorflow as tf; print(f'TensorFlow {tf.__version__} installed')" || {
+        print_status error "TensorFlow installation verification failed"
+        return 1
+    }
+    
+    print_status success "TensorFlow 1.15 installed"
+    return 0
+}
+
+install_precise() {
+    print_status info "Installing Mycroft Precise..."
+    
+    # Activate conda environment
+    eval "$(conda shell.bash hook)"
+    conda activate "$CONDA_ENV_NAME" || {
+        print_status error "Failed to activate conda environment"
+        return 1
+    }
+    
+    # Install audio dependencies
+    print_status info "Installing system audio dependencies..."
+    if command_exists apt-get; then
+        sudo apt-get update
+        sudo apt-get install -y portaudio19-dev sox libatlas-base-dev || {
+            print_status warning "Some audio dependencies failed to install"
+        }
+    fi
+    
+    # Install Python audio libraries
+    pip install pyaudio --break-system-packages || {
+        print_status warning "PyAudio installation failed (may need manual installation)"
+    }
+    
+    # Install Precise
+    pip install mycroft-precise --break-system-packages || {
+        print_status error "Failed to install Mycroft Precise"
+        return 1
+    }
+    
+    # Verify installation
+    python -c "import precise_runner; print('Precise installed successfully')" || {
+        print_status error "Precise installation verification failed"
+        return 1
+    }
+    
+    print_status success "Mycroft Precise installed"
+    return 0
+}
+
+download_precise_engine() {
+    print_status info "Downloading precise-engine..."
+    
+    local engine_version="0.3.0"
+    local engine_url="https://github.com/MycroftAI/mycroft-precise/releases/download/v${engine_version}/precise-engine_${engine_version}_x86_64.tar.gz"
+    local temp_dir=$(mktemp -d)
+    
+    # Download engine
+    wget -q --show-progress -O "$temp_dir/precise-engine.tar.gz" "$engine_url" || {
+        print_status error "Failed to download precise-engine"
+        rm -rf "$temp_dir"
+        return 1
+    }
+    
+    # Extract
+    tar xzf "$temp_dir/precise-engine.tar.gz" -C "$temp_dir" || {
+        print_status error "Failed to extract precise-engine"
+        rm -rf "$temp_dir"
+        return 1
+    }
+    
+    # Install to /usr/local/bin
+    sudo cp "$temp_dir/precise-engine/precise-engine" /usr/local/bin/ || {
+        print_status error "Failed to install precise-engine"
+        rm -rf "$temp_dir"
+        return 1
+    }
+    
+    sudo chmod +x /usr/local/bin/precise-engine
+    
+    # Clean up
+    rm -rf "$temp_dir"
+    
+    # Verify installation
+    precise-engine --version || {
+        print_status error "precise-engine installation verification failed"
+        return 1
+    }
+    
+    print_status success "precise-engine installed"
+    return 0
+}
+
+create_training_directory() {
+    print_status info "Creating training directory structure..."
+    
+    # Sanitize wake word for directory name
+    local wake_word_dir=$(echo "$WAKE_WORD" | tr ' ' '-' | tr '[:upper:]' '[:lower:]')
+    local project_dir="$MODELS_DIR/$wake_word_dir"
+    
+    mkdir -p "$project_dir"/{wake-word,not-wake-word,test/wake-word,test/not-wake-word}
+    
+    print_status success "Training directory created: $project_dir"
+    
+    # Store project path for later use
+    echo "$project_dir" > "$MODELS_DIR/.current_project"
+    
+    return 0
+}
+
+create_training_scripts() {
+    print_status info "Creating training helper scripts..."
+    
+    local wake_word_dir=$(echo "$WAKE_WORD" | tr ' ' '-' | tr '[:upper:]' '[:lower:]')
+    local project_dir="$MODELS_DIR/$wake_word_dir"
+    
+    # Create recording script
+    cat > "$project_dir/1-record-wake-word.sh" << 'EOF'
+#!/bin/bash
+# Step 1: Record wake word samples
+# Run this script and follow the prompts to record ~50-100 samples
+
+eval "$(conda shell.bash hook)"
+conda activate precise
+
+echo "Recording wake word samples..."
+echo "Press SPACE to start/stop recording"
+echo "Press Ctrl+C when done (aim for 50-100 samples)"
+echo ""
+
+precise-collect
+EOF
+    
+    # Create not-wake-word recording script
+    cat > "$project_dir/2-record-not-wake-word.sh" << 'EOF'
+#!/bin/bash
+# Step 2: Record "not wake word" samples
+# Record random speech, TV, music, similar-sounding phrases
+
+eval "$(conda shell.bash hook)"
+conda activate precise
+
+echo "Recording not-wake-word samples..."
+echo "Record:"
+echo "  - Normal conversation"
+echo "  - TV/music background"
+echo "  - Similar sounding phrases"
+echo "  - Ambient noise"
+echo ""
+echo "Press SPACE to start/stop recording"
+echo "Press Ctrl+C when done (aim for 200-500 samples)"
+echo ""
+
+precise-collect -f not-wake-word/samples.wav
+EOF
+    
+    # Create training script
+    cat > "$project_dir/3-train-model.sh" << EOF
+#!/bin/bash
+# Step 3: Train the model
+# This will train for 60 epochs (adjust -e parameter for more/less)
+
+eval "\$(conda shell.bash hook)"
+conda activate precise
+
+echo "Training wake word model..."
+echo "This will take 30-60 minutes..."
+echo ""
+
+# Train model
+precise-train -e 60 ${wake_word_dir}.net .
+
+echo ""
+echo "Training complete!"
+echo "Test with: precise-listen ${wake_word_dir}.net"
+EOF
+    
+    # Create testing script
+    cat > "$project_dir/4-test-model.sh" << EOF
+#!/bin/bash
+# Step 4: Test the model with live microphone
+
+eval "\$(conda shell.bash hook)"
+conda activate precise
+
+echo "Testing wake word model..."
+echo "Speak your wake word - you should see '!' when detected"
+echo "Speak other phrases - should not trigger"
+echo ""
+echo "Press Ctrl+C to exit"
+echo ""
+
+precise-listen ${wake_word_dir}.net
+EOF
+    
+    # Create evaluation script
+    cat > "$project_dir/5-evaluate-model.sh" << EOF
+#!/bin/bash
+# Step 5: Evaluate model on test set
+
+eval "\$(conda shell.bash hook)"
+conda activate precise
+
+echo "Evaluating wake word model on test set..."
+echo ""
+
+precise-test ${wake_word_dir}.net test/
+
+echo ""
+echo "Check metrics above:"
+echo "  - Wake word accuracy should be >95%"
+echo "  - False positive rate should be <5%"
+EOF
+    
+    # Create tuning script
+    cat > "$project_dir/6-tune-threshold.sh" << EOF
+#!/bin/bash
+# Step 6: Tune activation threshold
+
+eval "\$(conda shell.bash hook)"
+conda activate precise
+
+echo "Testing different thresholds..."
+echo ""
+echo "Default threshold: 0.5"
+echo "Higher = fewer false positives, may miss some wake words"
+echo "Lower = catch more wake words, more false positives"
+echo ""
+
+for threshold in 0.3 0.5 0.7; do
+    echo "Testing threshold: \$threshold"
+    echo "Press Ctrl+C to try next threshold"
+    precise-listen ${wake_word_dir}.net -t \$threshold
+done
+EOF
+    
+    # Make all scripts executable
+    chmod +x "$project_dir"/*.sh
+    
+    print_status success "Training scripts created in $project_dir"
+    return 0
+}
+
+create_readme() {
+    print_status info "Creating README..."
+    
+    local wake_word_dir=$(echo "$WAKE_WORD" | tr ' ' '-' | tr '[:upper:]' '[:lower:]')
+    local project_dir="$MODELS_DIR/$wake_word_dir"
+    
+    cat > "$project_dir/README.md" << EOF
+# Wake Word Training: "$WAKE_WORD"
+
+## Quick Start
+
+Follow these steps in order:
+
+### 1. Record Wake Word Samples
+\`\`\`bash
+./1-record-wake-word.sh
+\`\`\`
+
+Record 50-100 samples:
+- Vary your tone and speed
+- Different distances from microphone
+- Different background noise levels
+- Have family members record too
+
+### 2. Record Not-Wake-Word Samples
+\`\`\`bash
+./2-record-not-wake-word.sh
+\`\`\`
+
+Record 200-500 samples of:
+- Normal conversation
+- TV/music in background
+- Similar sounding phrases
+- Ambient household noise
+
+### 3. Organize Samples
+
+Move files into training/test split:
+\`\`\`bash
+# 80% of wake-word samples go to:
+mv wake-word-samples-* wake-word/
+
+# 20% of wake-word samples go to:
+mv wake-word-samples-* test/wake-word/
+
+# 80% of not-wake-word samples go to:
+mv not-wake-word-samples-* not-wake-word/
+
+# 20% of not-wake-word samples go to:
+mv not-wake-word-samples-* test/not-wake-word/
+\`\`\`
+
+### 4. Train Model
+\`\`\`bash
+./3-train-model.sh
+\`\`\`
+
+Wait 30-60 minutes for training to complete.
+
+### 5. Test Model
+\`\`\`bash
+./4-test-model.sh
+\`\`\`
+
+Speak your wake word and verify detection.
+
+### 6. Evaluate Model
+\`\`\`bash
+./5-evaluate-model.sh
+\`\`\`
+
+Check accuracy metrics on test set.
+
+### 7. Tune Threshold
+\`\`\`bash
+./6-tune-threshold.sh
+\`\`\`
+
+Find the best threshold for your environment.
+
+## Tips for Good Training
+
+1. **Quality over quantity** - Clear samples are better than many poor ones
+2. **Diverse conditions** - Different noise levels, distances, speakers
+3. **Hard negatives** - Include similar-sounding phrases in not-wake-word set
+4. **Regular updates** - Add false positives/negatives and retrain
+
+## Next Steps
+
+Once trained and tested:
+
+1. Copy model to voice assistant server:
+   \`\`\`bash
+   cp ${wake_word_dir}.net ~/voice-assistant/models/
+   \`\`\`
+
+2. Update voice assistant config:
+   \`\`\`bash
+   vim ~/voice-assistant/config/.env
+   # Set: PRECISE_MODEL=~/voice-assistant/models/${wake_word_dir}.net
+   \`\`\`
+
+3. Restart voice assistant service:
+   \`\`\`bash
+   sudo systemctl restart voice-assistant
+   \`\`\`
+
+## Troubleshooting
+
+**Low accuracy?**
+- Collect more training samples
+- Increase training epochs (edit 3-train-model.sh, change -e 60 to -e 120)
+- Verify 80/20 train/test split
+
+**Too many false positives?**
+- Increase threshold (use 6-tune-threshold.sh)
+- Add false trigger audio to not-wake-word set
+- Retrain with more diverse negative samples
+
+**Misses wake words?**
+- Lower threshold
+- Add missed samples to training set
+- Ensure good audio quality
+
+## Resources
+
+- Mycroft Precise Docs: https://github.com/MycroftAI/mycroft-precise
+- Training Guide: https://mycroft-ai.gitbook.io/docs/mycroft-technologies/precise
+- Community Models: https://github.com/MycroftAI/precise-data
+EOF
+    
+    print_status success "README created in $project_dir"
+    return 0
+}
+
+download_pretrained_models() {
+    print_status info "Downloading pre-trained models..."
+    
+    # Create models directory
+    mkdir -p "$MODELS_DIR/pretrained"
+    
+    # Download Hey Mycroft model (as example/base)
+    local model_url="https://github.com/MycroftAI/precise-data/raw/models-dev/hey-mycroft.tar.gz"
+    
+    if [[ ! -f "$MODELS_DIR/pretrained/hey-mycroft.net" ]]; then
+        print_status info "Downloading Hey Mycroft model..."
+        wget -q --show-progress -O "$MODELS_DIR/pretrained/hey-mycroft.tar.gz" "$model_url" || {
+            print_status warning "Failed to download pre-trained model (optional)"
+            return 0
+        }
+        
+        tar xzf "$MODELS_DIR/pretrained/hey-mycroft.tar.gz" -C "$MODELS_DIR/pretrained/" || {
+            print_status warning "Failed to extract pre-trained model"
+            return 0
+        }
+        
+        print_status success "Pre-trained model downloaded"
+    else
+        print_status info "Pre-trained model already exists"
+    fi
+    
+    return 0
+}
+
+print_next_steps() {
+    local wake_word_dir=$(echo "$WAKE_WORD" | tr ' ' '-' | tr '[:upper:]' '[:lower:]')
+    local project_dir="$MODELS_DIR/$wake_word_dir"
+    
+    cat << EOF
+
+${GREEN}Setup complete!${NC}
+
+Wake word: "$WAKE_WORD"
+Project directory: $project_dir
+
+${BLUE}Next steps:${NC}
+
+1. ${CYAN}Activate conda environment:${NC}
+   conda activate $CONDA_ENV_NAME
+
+2. ${CYAN}Navigate to project directory:${NC}
+   cd $project_dir
+
+3. ${CYAN}Follow the README or run scripts in order:${NC}
+   ./1-record-wake-word.sh      # Record wake word samples
+   ./2-record-not-wake-word.sh  # Record negative samples
+   # Organize samples into train/test directories
+   ./3-train-model.sh           # Train the model (30-60 min)
+   ./4-test-model.sh            # Test with microphone
+   ./5-evaluate-model.sh        # Check accuracy metrics
+   ./6-tune-threshold.sh        # Find best threshold
+
+${BLUE}Helpful commands:${NC}
+
+Test pre-trained model:
+  conda activate $CONDA_ENV_NAME
+  precise-listen $MODELS_DIR/pretrained/hey-mycroft.net
+
+Check precise-engine:
+  precise-engine --version
+
+${BLUE}Resources:${NC}
+
+Full guide: See MYCROFT_PRECISE_GUIDE.md
+Project README: $project_dir/README.md
+Mycroft Docs: https://github.com/MycroftAI/mycroft-precise
+
+EOF
+}
+
+# ----- Main -----
+main() {
+    print_status info "Starting Mycroft Precise setup..."
+    
+    # Parse arguments
+    parse_args "$@"
+    
+    # Check dependencies
+    check_conda || exit 1
+    
+    # Setup steps
+    create_conda_environment || exit 1
+    install_tensorflow || exit 1
+    install_precise || exit 1
+    download_precise_engine || exit 1
+    create_training_directory || exit 1
+    create_training_scripts || exit 1
+    create_readme || exit 1
+    download_pretrained_models || exit 1
+    
+    # Print next steps
+    print_next_steps
+}
+
+# Run main
+main "$@"
diff --git a/scripts/setup_voice_assistant.sh b/scripts/setup_voice_assistant.sh
new file mode 100755
index 0000000..bcceced
--- /dev/null
+++ b/scripts/setup_voice_assistant.sh
@@ -0,0 +1,429 @@
+#!/usr/bin/env bash
+#
+# Path: setup_voice_assistant.sh
+#
+# Purpose and usage:
+#     Sets up the voice assistant server environment on Heimdall
+#     - Creates conda environment
+#     - Installs dependencies (Whisper, Flask, Piper TTS)
+#     - Downloads and configures TTS models
+#     - Sets up systemd service (optional)
+#     - Configures environment variables
+#
+# Requirements:
+#     - conda/miniconda installed
+#     - Internet connection for downloads
+#     - Sudo access (for systemd service setup)
+#
+# Usage:
+#     ./setup_voice_assistant.sh [--no-service] [--env-name NAME]
+#
+# Author: PRbL Library
+# Created: $(date +"%Y-%m-%d")
+
+# ----- PRbL Color and output functions -----
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[0;33m'
+BLUE='\033[0;34m'
+PURPLE='\033[0;35m'
+CYAN='\033[0;36m'
+NC='\033[0m' # No Color
+
+print_status() {
+    local level="$1"
+    shift
+    case "$level" in
+        "info")     echo -e "${BLUE}[INFO]${NC} $*" >&2 ;;
+        "success")  echo -e "${GREEN}[SUCCESS]${NC} $*" >&2 ;;
+        "warning")  echo -e "${YELLOW}[WARNING]${NC} $*" >&2 ;;
+        "error")    echo -e "${RED}[ERROR]${NC} $*" >&2 ;;
+        "debug")    [[ "$VERBOSE" == "true" ]] && echo -e "${PURPLE}[DEBUG]${NC} $*" >&2 ;;
+        *)          echo -e "$*" >&2 ;;
+    esac
+}
+
+# ----- Configuration -----
+CONDA_ENV_NAME="voice-assistant"
+PROJECT_DIR="$HOME/voice-assistant"
+INSTALL_SYSTEMD=true
+VERBOSE=false
+
+# ----- Dependency checking -----
+command_exists() {
+    command -v "$1" &> /dev/null
+}
+
+check_conda() {
+    if ! command_exists conda; then
+        print_status error "conda not found. Please install miniconda first."
+        print_status info "Install with: wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh"
+        print_status info "             bash Miniconda3-latest-Linux-x86_64.sh"
+        return 1
+    fi
+    return 0
+}
+
+# ----- Parse arguments -----
+parse_args() {
+    while [[ $# -gt 0 ]]; do
+        case "$1" in
+            --no-service)
+                INSTALL_SYSTEMD=false
+                shift
+                ;;
+            --env-name)
+                CONDA_ENV_NAME="$2"
+                shift 2
+                ;;
+            -v|--verbose)
+                VERBOSE=true
+                shift
+                ;;
+            -h|--help)
+                cat << EOF
+Usage: $(basename "$0") [OPTIONS]
+
+Options:
+    --no-service        Don't install systemd service
+    --env-name NAME     Custom conda environment name (default: voice-assistant)
+    -v, --verbose       Enable verbose output
+    -h, --help          Show this help message
+
+EOF
+                exit 0
+                ;;
+            *)
+                print_status error "Unknown option: $1"
+                exit 1
+                ;;
+        esac
+    done
+}
+
+# ----- Setup functions -----
+
+create_project_directory() {
+    print_status info "Creating project directory: $PROJECT_DIR"
+    
+    if [[ ! -d "$PROJECT_DIR" ]]; then
+        mkdir -p "$PROJECT_DIR" || {
+            print_status error "Failed to create project directory"
+            return 1
+        }
+    fi
+    
+    # Create subdirectories
+    mkdir -p "$PROJECT_DIR"/{logs,models,config}
+    
+    print_status success "Project directory created"
+    return 0
+}
+
+create_conda_environment() {
+    print_status info "Creating conda environment: $CONDA_ENV_NAME"
+    
+    # Check if environment already exists
+    if conda env list | grep -q "^${CONDA_ENV_NAME}\s"; then
+        print_status warning "Environment $CONDA_ENV_NAME already exists"
+        read -p "Remove and recreate? (y/N): " -n 1 -r
+        echo
+        if [[ $REPLY =~ ^[Yy]$ ]]; then
+            print_status info "Removing existing environment..."
+            conda env remove -n "$CONDA_ENV_NAME" -y
+        else
+            print_status info "Using existing environment"
+            return 0
+        fi
+    fi
+    
+    # Create new environment
+    print_status info "Creating Python 3.10 environment..."
+    conda create -n "$CONDA_ENV_NAME" python=3.10 -y || {
+        print_status error "Failed to create conda environment"
+        return 1
+    }
+    
+    print_status success "Conda environment created"
+    return 0
+}
+
+install_python_dependencies() {
+    print_status info "Installing Python dependencies..."
+    
+    # Activate conda environment
+    eval "$(conda shell.bash hook)"
+    conda activate "$CONDA_ENV_NAME" || {
+        print_status error "Failed to activate conda environment"
+        return 1
+    }
+    
+    # Install base dependencies
+    print_status info "Installing base packages..."
+    pip install --upgrade pip --break-system-packages || true
+    
+    # Install Whisper (OpenAI)
+    print_status info "Installing OpenAI Whisper..."
+    pip install -U openai-whisper --break-system-packages || {
+        print_status error "Failed to install Whisper"
+        return 1
+    }
+    
+    # Install Flask
+    print_status info "Installing Flask..."
+    pip install flask --break-system-packages || {
+        print_status error "Failed to install Flask"
+        return 1
+    }
+    
+    # Install requests
+    print_status info "Installing requests..."
+    pip install requests --break-system-packages || {
+        print_status error "Failed to install requests"
+        return 1
+    }
+    
+    # Install python-dotenv
+    print_status info "Installing python-dotenv..."
+    pip install python-dotenv --break-system-packages || {
+        print_status warning "Failed to install python-dotenv (optional)"
+    }
+    
+    # Install Piper TTS
+    print_status info "Installing Piper TTS..."
+    # Note: Piper TTS installation method varies, adjust as needed
+    # For now, we'll install the Python package if available
+    pip install piper-tts --break-system-packages || {
+        print_status warning "Piper TTS pip package not found"
+        print_status info "You may need to install Piper manually from: https://github.com/rhasspy/piper"
+    }
+    
+    # Install PyAudio for audio handling
+    print_status info "Installing PyAudio dependencies..."
+    if command_exists apt-get; then
+        sudo apt-get install -y portaudio19-dev python3-pyaudio || {
+            print_status warning "Failed to install portaudio dev packages"
+        }
+    fi
+    
+    pip install pyaudio --break-system-packages || {
+        print_status warning "Failed to install PyAudio (may need manual installation)"
+    }
+    
+    print_status success "Python dependencies installed"
+    return 0
+}
+
+download_piper_models() {
+    print_status info "Downloading Piper TTS models..."
+    
+    local models_dir="$PROJECT_DIR/models/piper"
+    mkdir -p "$models_dir"
+    
+    # Download a default voice model
+    # Example: en_US-lessac-medium
+    local model_url="https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/medium/en_US-lessac-medium.onnx"
+    local config_url="https://huggingface.co/rhasspy/piper-voices/resolve/main/en/en_US/lessac/medium/en_US-lessac-medium.onnx.json"
+    
+    if [[ ! -f "$models_dir/en_US-lessac-medium.onnx" ]]; then
+        print_status info "Downloading voice model..."
+        wget -q --show-progress -O "$models_dir/en_US-lessac-medium.onnx" "$model_url" || {
+            print_status warning "Failed to download Piper model (manual download may be needed)"
+        }
+        
+        wget -q --show-progress -O "$models_dir/en_US-lessac-medium.onnx.json" "$config_url" || {
+            print_status warning "Failed to download Piper config"
+        }
+    else
+        print_status info "Piper model already downloaded"
+    fi
+    
+    print_status success "Piper models ready"
+    return 0
+}
+
+create_config_file() {
+    print_status info "Creating configuration file..."
+    
+    local config_file="$PROJECT_DIR/config/.env"
+    
+    if [[ -f "$config_file" ]]; then
+        print_status warning "Config file already exists: $config_file"
+        return 0
+    fi
+    
+    cat > "$config_file" << 'EOF'
+# Voice Assistant Configuration
+# Path: ~/voice-assistant/config/.env
+
+# Home Assistant Configuration
+HA_URL=http://homeassistant.local:8123
+HA_TOKEN=your_long_lived_access_token_here
+
+# Server Configuration
+SERVER_HOST=0.0.0.0
+SERVER_PORT=5000
+
+# Whisper Configuration
+WHISPER_MODEL=medium
+
+# Piper TTS Configuration
+PIPER_MODEL=/path/to/piper/model.onnx
+PIPER_CONFIG=/path/to/piper/model.onnx.json
+
+# Logging
+LOG_LEVEL=INFO
+LOG_FILE=/home/$USER/voice-assistant/logs/voice_assistant.log
+EOF
+    
+    # Update paths in config
+    sed -i "s|/path/to/piper/model.onnx|$PROJECT_DIR/models/piper/en_US-lessac-medium.onnx|g" "$config_file"
+    sed -i "s|/path/to/piper/model.onnx.json|$PROJECT_DIR/models/piper/en_US-lessac-medium.onnx.json|g" "$config_file"
+    sed -i "s|/home/\$USER|$HOME|g" "$config_file"
+    
+    chmod 600 "$config_file"
+    
+    print_status success "Config file created: $config_file"
+    print_status warning "Please edit $config_file and add your Home Assistant token"
+    
+    return 0
+}
+
+create_systemd_service() {
+    if [[ "$INSTALL_SYSTEMD" != "true" ]]; then
+        print_status info "Skipping systemd service installation"
+        return 0
+    fi
+    
+    print_status info "Creating systemd service..."
+    
+    local service_file="/etc/systemd/system/voice-assistant.service"
+    
+    # Create service file
+    sudo tee "$service_file" > /dev/null << EOF
+[Unit]
+Description=Voice Assistant Server
+After=network.target
+
+[Service]
+Type=simple
+User=$USER
+WorkingDirectory=$PROJECT_DIR
+Environment="PATH=$HOME/miniconda3/envs/$CONDA_ENV_NAME/bin:/usr/local/bin:/usr/bin:/bin"
+EnvironmentFile=$PROJECT_DIR/config/.env
+ExecStart=$HOME/miniconda3/envs/$CONDA_ENV_NAME/bin/python $PROJECT_DIR/voice_server.py
+Restart=on-failure
+RestartSec=10
+StandardOutput=append:$PROJECT_DIR/logs/voice_assistant.log
+StandardError=append:$PROJECT_DIR/logs/voice_assistant_error.log
+
+[Install]
+WantedBy=multi-user.target
+EOF
+    
+    # Reload systemd
+    sudo systemctl daemon-reload
+    
+    print_status success "Systemd service created"
+    print_status info "To enable and start the service:"
+    print_status info "  sudo systemctl enable voice-assistant"
+    print_status info "  sudo systemctl start voice-assistant"
+    
+    return 0
+}
+
+create_test_script() {
+    print_status info "Creating test script..."
+    
+    local test_script="$PROJECT_DIR/test_server.sh"
+    
+    cat > "$test_script" << 'EOF'
+#!/bin/bash
+# Test script for voice assistant server
+
+# Activate conda environment
+eval "$(conda shell.bash hook)"
+conda activate voice-assistant
+
+# Load environment variables
+if [[ -f ~/voice-assistant/config/.env ]]; then
+    export $(grep -v '^#' ~/voice-assistant/config/.env | xargs)
+fi
+
+# Run server
+cd ~/voice-assistant
+python voice_server.py --verbose
+EOF
+    
+    chmod +x "$test_script"
+    
+    print_status success "Test script created: $test_script"
+    return 0
+}
+
+install_voice_server_script() {
+    print_status info "Installing voice_server.py..."
+    
+    # Check if voice_server.py exists in outputs
+    if [[ -f "$HOME/voice_server.py" ]]; then
+        cp "$HOME/voice_server.py" "$PROJECT_DIR/voice_server.py"
+        print_status success "voice_server.py installed"
+    elif [[ -f "./voice_server.py" ]]; then
+        cp "./voice_server.py" "$PROJECT_DIR/voice_server.py"
+        print_status success "voice_server.py installed"
+    else
+        print_status warning "voice_server.py not found in current directory"
+        print_status info "Please copy voice_server.py to $PROJECT_DIR manually"
+    fi
+    
+    return 0
+}
+
+# ----- Main -----
+main() {
+    print_status info "Starting voice assistant setup..."
+    
+    # Parse arguments
+    parse_args "$@"
+    
+    # Check dependencies
+    check_conda || exit 1
+    
+    # Setup steps
+    create_project_directory || exit 1
+    create_conda_environment || exit 1
+    install_python_dependencies || exit 1
+    download_piper_models || exit 1
+    create_config_file || exit 1
+    install_voice_server_script || exit 1
+    create_test_script || exit 1
+    
+    if [[ "$INSTALL_SYSTEMD" == "true" ]]; then
+        create_systemd_service || exit 1
+    fi
+    
+    # Final instructions
+    print_status success "Setup complete!"
+    echo
+    print_status info "Next steps:"
+    print_status info "1. Edit config file: vim $PROJECT_DIR/config/.env"
+    print_status info "2. Add your Home Assistant long-lived access token"
+    print_status info "3. Test the server: $PROJECT_DIR/test_server.sh"
+    print_status info "4. Configure your Maix Duino device"
+    
+    if [[ "$INSTALL_SYSTEMD" == "true" ]]; then
+        echo
+        print_status info "To run as a service:"
+        print_status info "  sudo systemctl enable voice-assistant"
+        print_status info "  sudo systemctl start voice-assistant"
+        print_status info "  sudo systemctl status voice-assistant"
+    fi
+    
+    echo
+    print_status info "Project directory: $PROJECT_DIR"
+    print_status info "Conda environment: $CONDA_ENV_NAME"
+    print_status info "Activate with: conda activate $CONDA_ENV_NAME"
+}
+
+# Run main
+main "$@"
diff --git a/scripts/voice_server.py b/scripts/voice_server.py
new file mode 100755
index 0000000..d18bc3d
--- /dev/null
+++ b/scripts/voice_server.py
@@ -0,0 +1,700 @@
+#!/usr/bin/env python3
+"""
+Voice Processing Server for Maix Duino Voice Assistant
+
+Purpose and usage:
+    This server runs on Heimdall (10.1.10.71) and handles:
+    - Audio stream reception from Maix Duino
+    - Speech-to-text using Whisper
+    - Intent recognition and Home Assistant API calls
+    - Text-to-speech using Piper
+    - Audio response streaming back to device
+
+Path: /home/alan/voice-assistant/voice_server.py
+
+Requirements:
+    - whisper (already installed)
+    - piper-tts
+    - flask
+    - requests
+    - python-dotenv
+
+Usage:
+    python3 voice_server.py [--host HOST] [--port PORT] [--ha-url URL]
+"""
+
+import os
+import sys
+import argparse
+import tempfile
+import wave
+import io
+import re
+import threading
+import queue
+from pathlib import Path
+from typing import Optional, Dict, Any, Tuple
+
+import whisper
+import requests
+from flask import Flask, request, jsonify, send_file
+from werkzeug.exceptions import BadRequest
+
+# Try to load environment variables
+try:
+    from dotenv import load_dotenv
+    load_dotenv()
+except ImportError:
+    print("Warning: python-dotenv not installed. Using environment variables only.")
+
+# Try to import Mycroft Precise
+PRECISE_AVAILABLE = False
+try:
+    from precise_runner import PreciseEngine, PreciseRunner
+    import pyaudio
+    PRECISE_AVAILABLE = True
+except ImportError:
+    print("Warning: Mycroft Precise not installed. Wake word detection disabled.")
+    print("Install with: pip install mycroft-precise pyaudio")
+
+# Configuration
+DEFAULT_HOST = "0.0.0.0"
+DEFAULT_PORT = 5000
+DEFAULT_WHISPER_MODEL = "medium"
+DEFAULT_HA_URL = os.getenv("HA_URL", "http://homeassistant.local:8123")
+DEFAULT_HA_TOKEN = os.getenv("HA_TOKEN", "")
+DEFAULT_PRECISE_MODEL = os.getenv("PRECISE_MODEL", "")
+DEFAULT_PRECISE_SENSITIVITY = float(os.getenv("PRECISE_SENSITIVITY", "0.5"))
+DEFAULT_PRECISE_ENGINE = "/usr/local/bin/precise-engine"
+
+# Initialize Flask app
+app = Flask(__name__)
+app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024  # 16MB max audio file
+
+# Global variables for loaded models
+whisper_model = None
+ha_client = None
+precise_runner = None
+precise_enabled = False
+wake_word_queue = queue.Queue()  # Queue for wake word detections
+
+
+class HomeAssistantClient:
+    """Client for interacting with Home Assistant API"""
+    
+    def __init__(self, base_url: str, token: str):
+        self.base_url = base_url.rstrip('/')
+        self.token = token
+        self.session = requests.Session()
+        self.session.headers.update({
+            'Authorization': f'Bearer {token}',
+            'Content-Type': 'application/json'
+        })
+    
+    def get_state(self, entity_id: str) -> Optional[Dict[str, Any]]:
+        """Get the state of an entity"""
+        try:
+            response = self.session.get(f'{self.base_url}/api/states/{entity_id}')
+            response.raise_for_status()
+            return response.json()
+        except requests.RequestException as e:
+            print(f"Error getting state for {entity_id}: {e}")
+            return None
+    
+    def call_service(self, domain: str, service: str, entity_id: str, 
+                    **kwargs) -> bool:
+        """Call a Home Assistant service"""
+        try:
+            data = {'entity_id': entity_id}
+            data.update(kwargs)
+            
+            response = self.session.post(
+                f'{self.base_url}/api/services/{domain}/{service}',
+                json=data
+            )
+            response.raise_for_status()
+            return True
+        except requests.RequestException as e:
+            print(f"Error calling service {domain}.{service}: {e}")
+            return False
+    
+    def turn_on(self, entity_id: str, **kwargs) -> bool:
+        """Turn on an entity"""
+        domain = entity_id.split('.')[0]
+        return self.call_service(domain, 'turn_on', entity_id, **kwargs)
+    
+    def turn_off(self, entity_id: str, **kwargs) -> bool:
+        """Turn off an entity"""
+        domain = entity_id.split('.')[0]
+        return self.call_service(domain, 'turn_off', entity_id, **kwargs)
+    
+    def toggle(self, entity_id: str, **kwargs) -> bool:
+        """Toggle an entity"""
+        domain = entity_id.split('.')[0]
+        return self.call_service(domain, 'toggle', entity_id, **kwargs)
+
+
+class IntentParser:
+    """Simple pattern-based intent recognition"""
+    
+    # Intent patterns (can be expanded or replaced with ML-based NLU)
+    PATTERNS = {
+        'turn_on': [
+            r'turn on (the )?(.+)',
+            r'switch on (the )?(.+)',
+            r'enable (the )?(.+)',
+        ],
+        'turn_off': [
+            r'turn off (the )?(.+)',
+            r'switch off (the )?(.+)',
+            r'disable (the )?(.+)',
+        ],
+        'toggle': [
+            r'toggle (the )?(.+)',
+        ],
+        'get_state': [
+            r'what(?:\'s| is) (the )?(.+)',
+            r'how is (the )?(.+)',
+            r'status of (the )?(.+)',
+        ],
+        'get_temperature': [
+            r'what(?:\'s| is) the temperature',
+            r'how (?:warm|cold|hot) is it',
+        ],
+    }
+    
+    # Entity name mapping (friendly names to entity IDs)
+    ENTITY_MAP = {
+        'living room light': 'light.living_room',
+        'living room lights': 'light.living_room',
+        'bedroom light': 'light.bedroom',
+        'bedroom lights': 'light.bedroom',
+        'kitchen light': 'light.kitchen',
+        'kitchen lights': 'light.kitchen',
+        'all lights': 'group.all_lights',
+        'temperature': 'sensor.temperature',
+        'thermostat': 'climate.thermostat',
+    }
+    
+    def parse(self, text: str) -> Optional[Tuple[str, str, Dict[str, Any]]]:
+        """
+        Parse text into intent, entity, and parameters
+        
+        Returns:
+            (intent, entity_id, params) or None if no match
+        """
+        text = text.lower().strip()
+        
+        for intent, patterns in self.PATTERNS.items():
+            for pattern in patterns:
+                match = re.match(pattern, text, re.IGNORECASE)
+                if match:
+                    # Extract entity name from match groups
+                    entity_name = None
+                    for group in match.groups():
+                        if group and group.lower() not in ['the', 'a', 'an']:
+                            entity_name = group.lower().strip()
+                            break
+                    
+                    # Map entity name to entity ID
+                    entity_id = None
+                    if entity_name:
+                        entity_id = self.ENTITY_MAP.get(entity_name)
+                    
+                    # For get_temperature, use default sensor
+                    if intent == 'get_temperature':
+                        entity_id = self.ENTITY_MAP.get('temperature')
+                    
+                    if entity_id:
+                        return (intent, entity_id, {})
+        
+        return None
+
+
+def load_whisper_model(model_name: str = DEFAULT_WHISPER_MODEL):
+    """Load Whisper model"""
+    global whisper_model
+    
+    if whisper_model is None:
+        print(f"Loading Whisper model: {model_name}")
+        whisper_model = whisper.load_model(model_name)
+        print("Whisper model loaded successfully")
+    
+    return whisper_model
+
+
+def transcribe_audio(audio_file_path: str) -> Optional[str]:
+    """Transcribe audio file using Whisper"""
+    try:
+        model = load_whisper_model()
+        result = model.transcribe(audio_file_path)
+        return result['text'].strip()
+    except Exception as e:
+        print(f"Error transcribing audio: {e}")
+        return None
+
+
+def generate_tts(text: str) -> Optional[bytes]:
+    """
+    Generate speech from text using Piper TTS
+    
+    TODO: Implement Piper TTS integration
+    For now, returns None - implement based on Piper installation
+    """
+    # Placeholder for TTS implementation
+    print(f"TTS requested for: {text}")
+    
+    # You'll need to add Piper TTS integration here
+    # Example command: piper --model <model> --output_file <file> < text
+    
+    return None
+
+
+def on_wake_word_detected():
+    """
+    Callback when Mycroft Precise detects wake word
+    
+    This function is called by the Precise runner when the wake word
+    is detected. It signals the main application to start recording
+    and processing the user's command.
+    """
+    print("Wake word detected by Precise!")
+    wake_word_queue.put({
+        'timestamp': time.time(),
+        'source': 'precise'
+    })
+
+
+def start_precise_listener(model_path: str, sensitivity: float = 0.5,
+                          engine_path: str = DEFAULT_PRECISE_ENGINE):
+    """
+    Start Mycroft Precise wake word detection
+    
+    Args:
+        model_path: Path to .net model file
+        sensitivity: Detection threshold (0.0-1.0, default 0.5)
+        engine_path: Path to precise-engine binary
+    
+    Returns:
+        PreciseRunner instance if successful, None otherwise
+    """
+    global precise_runner, precise_enabled
+    
+    if not PRECISE_AVAILABLE:
+        print("Error: Mycroft Precise not available")
+        return None
+    
+    # Verify model exists
+    if not os.path.exists(model_path):
+        print(f"Error: Precise model not found: {model_path}")
+        return None
+    
+    # Verify engine exists
+    if not os.path.exists(engine_path):
+        print(f"Error: precise-engine not found: {engine_path}")
+        print("Download from: https://github.com/MycroftAI/mycroft-precise/releases")
+        return None
+    
+    try:
+        # Create Precise engine
+        engine = PreciseEngine(engine_path, model_path)
+        
+        # Create runner with callback
+        precise_runner = PreciseRunner(
+            engine,
+            sensitivity=sensitivity,
+            on_activation=on_wake_word_detected
+        )
+        
+        # Start listening
+        precise_runner.start()
+        precise_enabled = True
+        
+        print(f"Precise listening started:")
+        print(f"  Model: {model_path}")
+        print(f"  Sensitivity: {sensitivity}")
+        print(f"  Engine: {engine_path}")
+        
+        return precise_runner
+        
+    except Exception as e:
+        print(f"Error starting Precise: {e}")
+        return None
+
+
+def stop_precise_listener():
+    """Stop Mycroft Precise wake word detection"""
+    global precise_runner, precise_enabled
+    
+    if precise_runner:
+        try:
+            precise_runner.stop()
+            precise_enabled = False
+            print("Precise listener stopped")
+        except Exception as e:
+            print(f"Error stopping Precise: {e}")
+
+
+def record_audio_after_wake(duration: int = 5) -> Optional[bytes]:
+    """
+    Record audio after wake word is detected
+    
+    Args:
+        duration: Maximum recording duration in seconds
+    
+    Returns:
+        WAV audio data or None
+        
+    Note: This is for server-side wake word detection where
+    the server is also doing audio capture. For Maix Duino
+    client-side wake detection, audio comes from the client.
+    """
+    if not PRECISE_AVAILABLE:
+        return None
+    
+    try:
+        # Audio settings
+        CHUNK = 1024
+        FORMAT = pyaudio.paInt16
+        CHANNELS = 1
+        RATE = 16000
+        
+        p = pyaudio.PyAudio()
+        
+        # Open stream
+        stream = p.open(
+            format=FORMAT,
+            channels=CHANNELS,
+            rate=RATE,
+            input=True,
+            frames_per_buffer=CHUNK
+        )
+        
+        print(f"Recording for {duration} seconds...")
+        
+        frames = []
+        for _ in range(0, int(RATE / CHUNK * duration)):
+            data = stream.read(CHUNK)
+            frames.append(data)
+        
+        # Stop and close stream
+        stream.stop_stream()
+        stream.close()
+        p.terminate()
+        
+        # Convert to WAV
+        wav_buffer = io.BytesIO()
+        with wave.open(wav_buffer, 'wb') as wf:
+            wf.setnchannels(CHANNELS)
+            wf.setsampwidth(p.get_sample_size(FORMAT))
+            wf.setframerate(RATE)
+            wf.writeframes(b''.join(frames))
+        
+        return wav_buffer.getvalue()
+        
+    except Exception as e:
+        print(f"Error recording audio: {e}")
+        return None
+
+
+import time  # Add this import at the top if not already there
+
+
+def execute_intent(intent: str, entity_id: str, params: Dict[str, Any]) -> str:
+    """Execute an intent and return response text"""
+    
+    if intent == 'turn_on':
+        success = ha_client.turn_on(entity_id)
+        if success:
+            entity_name = entity_id.split('.')[-1].replace('_', ' ')
+            return f"Turned on {entity_name}"
+        else:
+            return "Sorry, I couldn't turn that on"
+    
+    elif intent == 'turn_off':
+        success = ha_client.turn_off(entity_id)
+        if success:
+            entity_name = entity_id.split('.')[-1].replace('_', ' ')
+            return f"Turned off {entity_name}"
+        else:
+            return "Sorry, I couldn't turn that off"
+    
+    elif intent == 'toggle':
+        success = ha_client.toggle(entity_id)
+        if success:
+            entity_name = entity_id.split('.')[-1].replace('_', ' ')
+            return f"Toggled {entity_name}"
+        else:
+            return "Sorry, I couldn't toggle that"
+    
+    elif intent in ['get_state', 'get_temperature']:
+        state = ha_client.get_state(entity_id)
+        if state:
+            entity_name = entity_id.split('.')[-1].replace('_', ' ')
+            value = state.get('state', 'unknown')
+            unit = state.get('attributes', {}).get('unit_of_measurement', '')
+            
+            return f"The {entity_name} is {value} {unit}".strip()
+        else:
+            return "Sorry, I couldn't get that information"
+    
+    return "I didn't understand that command"
+
+
+# Flask routes
+
+@app.route('/health', methods=['GET'])
+def health():
+    """Health check endpoint"""
+    return jsonify({
+        'status': 'healthy',
+        'whisper_loaded': whisper_model is not None,
+        'ha_connected': ha_client is not None,
+        'precise_enabled': precise_enabled,
+        'precise_available': PRECISE_AVAILABLE
+    })
+
+
+@app.route('/wake-word/status', methods=['GET'])
+def wake_word_status():
+    """Get wake word detection status"""
+    return jsonify({
+        'enabled': precise_enabled,
+        'available': PRECISE_AVAILABLE,
+        'model': DEFAULT_PRECISE_MODEL if precise_enabled else None,
+        'sensitivity': DEFAULT_PRECISE_SENSITIVITY if precise_enabled else None
+    })
+
+
+@app.route('/wake-word/detections', methods=['GET'])
+def wake_word_detections():
+    """
+    Get recent wake word detections (non-blocking)
+    
+    Returns any wake word detections in the queue.
+    Used for testing and monitoring.
+    """
+    detections = []
+    
+    try:
+        while not wake_word_queue.empty():
+            detections.append(wake_word_queue.get_nowait())
+    except queue.Empty:
+        pass
+    
+    return jsonify({
+        'detections': detections,
+        'count': len(detections)
+    })
+
+
+@app.route('/transcribe', methods=['POST'])
+def transcribe():
+    """
+    Transcribe audio file
+    
+    Expects: WAV audio file in request body
+    Returns: JSON with transcribed text
+    """
+    if 'audio' not in request.files:
+        raise BadRequest('No audio file provided')
+    
+    audio_file = request.files['audio']
+    
+    # Save to temporary file
+    with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_file:
+        audio_file.save(temp_file.name)
+        temp_path = temp_file.name
+    
+    try:
+        # Transcribe
+        text = transcribe_audio(temp_path)
+        
+        if text:
+            return jsonify({
+                'success': True,
+                'text': text
+            })
+        else:
+            return jsonify({
+                'success': False,
+                'error': 'Transcription failed'
+            }), 500
+    
+    finally:
+        # Clean up temp file
+        if os.path.exists(temp_path):
+            os.remove(temp_path)
+
+
+@app.route('/process', methods=['POST'])
+def process():
+    """
+    Process complete voice command
+    
+    Expects: WAV audio file in request body
+    Returns: JSON with response and audio file
+    """
+    if 'audio' not in request.files:
+        raise BadRequest('No audio file provided')
+    
+    audio_file = request.files['audio']
+    
+    # Save to temporary file
+    with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_file:
+        audio_file.save(temp_file.name)
+        temp_path = temp_file.name
+    
+    try:
+        # Step 1: Transcribe
+        text = transcribe_audio(temp_path)
+        
+        if not text:
+            return jsonify({
+                'success': False,
+                'error': 'Transcription failed'
+            }), 500
+        
+        print(f"Transcribed: {text}")
+        
+        # Step 2: Parse intent
+        parser = IntentParser()
+        intent_result = parser.parse(text)
+        
+        if not intent_result:
+            response_text = "I didn't understand that command"
+        else:
+            intent, entity_id, params = intent_result
+            print(f"Intent: {intent}, Entity: {entity_id}")
+            
+            # Step 3: Execute intent
+            response_text = execute_intent(intent, entity_id, params)
+        
+        print(f"Response: {response_text}")
+        
+        # Step 4: Generate TTS (placeholder for now)
+        # audio_response = generate_tts(response_text)
+        
+        return jsonify({
+            'success': True,
+            'transcription': text,
+            'response': response_text,
+            # 'audio_available': audio_response is not None
+        })
+    
+    finally:
+        # Clean up temp file
+        if os.path.exists(temp_path):
+            os.remove(temp_path)
+
+
+@app.route('/tts', methods=['POST'])
+def tts():
+    """
+    Generate TTS audio
+    
+    Expects: JSON with 'text' field
+    Returns: WAV audio file
+    """
+    data = request.get_json()
+    
+    if not data or 'text' not in data:
+        raise BadRequest('No text provided')
+    
+    text = data['text']
+    
+    # Generate TTS
+    audio_data = generate_tts(text)
+    
+    if audio_data:
+        return send_file(
+            io.BytesIO(audio_data),
+            mimetype='audio/wav',
+            as_attachment=True,
+            download_name='response.wav'
+        )
+    else:
+        return jsonify({
+            'success': False,
+            'error': 'TTS generation not implemented yet'
+        }), 501
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Voice Processing Server for Maix Duino Voice Assistant"
+    )
+    parser.add_argument('--host', default=DEFAULT_HOST,
+                       help=f'Server host (default: {DEFAULT_HOST})')
+    parser.add_argument('--port', type=int, default=DEFAULT_PORT,
+                       help=f'Server port (default: {DEFAULT_PORT})')
+    parser.add_argument('--whisper-model', default=DEFAULT_WHISPER_MODEL,
+                       help=f'Whisper model to use (default: {DEFAULT_WHISPER_MODEL})')
+    parser.add_argument('--ha-url', default=DEFAULT_HA_URL,
+                       help=f'Home Assistant URL (default: {DEFAULT_HA_URL})')
+    parser.add_argument('--ha-token', default=DEFAULT_HA_TOKEN,
+                       help='Home Assistant long-lived access token')
+    parser.add_argument('--enable-precise', action='store_true',
+                       help='Enable Mycroft Precise wake word detection')
+    parser.add_argument('--precise-model', default=DEFAULT_PRECISE_MODEL,
+                       help='Path to Precise .net model file')
+    parser.add_argument('--precise-sensitivity', type=float, 
+                       default=DEFAULT_PRECISE_SENSITIVITY,
+                       help='Precise sensitivity threshold (0.0-1.0, default: 0.5)')
+    parser.add_argument('--precise-engine', default=DEFAULT_PRECISE_ENGINE,
+                       help=f'Path to precise-engine binary (default: {DEFAULT_PRECISE_ENGINE})')
+    
+    args = parser.parse_args()
+    
+    # Validate HA configuration
+    if not args.ha_token:
+        print("Warning: No Home Assistant token provided!")
+        print("Set HA_TOKEN environment variable or use --ha-token")
+        print("Commands will not execute without authentication.")
+    
+    # Initialize global clients
+    global ha_client
+    ha_client = HomeAssistantClient(args.ha_url, args.ha_token)
+    
+    # Load Whisper model
+    print(f"Starting voice processing server on {args.host}:{args.port}")
+    load_whisper_model(args.whisper_model)
+    
+    # Start Precise if enabled
+    if args.enable_precise:
+        if not PRECISE_AVAILABLE:
+            print("Error: --enable-precise specified but Mycroft Precise not installed")
+            print("Install with: pip install mycroft-precise pyaudio")
+            sys.exit(1)
+        
+        if not args.precise_model:
+            print("Error: --enable-precise requires --precise-model")
+            sys.exit(1)
+        
+        print("\nStarting Mycroft Precise wake word detection...")
+        precise_result = start_precise_listener(
+            args.precise_model,
+            args.precise_sensitivity,
+            args.precise_engine
+        )
+        
+        if not precise_result:
+            print("Error: Failed to start Precise listener")
+            sys.exit(1)
+        
+        print("\nWake word detection active!")
+        print("The server will detect wake words and queue them for processing.")
+        print("Use /wake-word/detections endpoint to check for detections.\n")
+    
+    # Start Flask server
+    try:
+        app.run(host=args.host, port=args.port, debug=False)
+    except KeyboardInterrupt:
+        print("\nShutting down...")
+        if args.enable_precise:
+            stop_precise_listener()
+        sys.exit(0)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/scripts/voice_server_enhanced.py b/scripts/voice_server_enhanced.py
new file mode 100755
index 0000000..74e9a84
--- /dev/null
+++ b/scripts/voice_server_enhanced.py
@@ -0,0 +1,580 @@
+#!/usr/bin/env python3
+"""
+Enhanced Voice Server with Multiple Wake Words and Speaker Identification
+
+Path: /home/alan/voice-assistant/voice_server_enhanced.py
+
+This enhanced version adds:
+- Multiple wake word support
+- Speaker identification using pyannote.audio
+- Per-user customization
+- Wake word-specific responses
+
+Usage:
+    python3 voice_server_enhanced.py \
+        --enable-precise \
+        --multi-wake-word \
+        --enable-speaker-id
+"""
+
+import os
+import sys
+import json
+import argparse
+import tempfile
+import wave
+import io
+import re
+import threading
+import queue
+import time
+from pathlib import Path
+from typing import Optional, Dict, Any, Tuple, List
+
+import whisper
+import requests
+from flask import Flask, request, jsonify, send_file
+from werkzeug.exceptions import BadRequest
+
+try:
+    from dotenv import load_dotenv
+    load_dotenv()
+except ImportError:
+    pass
+
+# Mycroft Precise
+PRECISE_AVAILABLE = False
+try:
+    from precise_runner import PreciseEngine, PreciseRunner
+    import pyaudio
+    PRECISE_AVAILABLE = True
+except ImportError:
+    print("Warning: Mycroft Precise not installed")
+
+# Speaker identification
+SPEAKER_ID_AVAILABLE = False
+try:
+    from pyannote.audio import Inference
+    from scipy.spatial.distance import cosine
+    import numpy as np
+    SPEAKER_ID_AVAILABLE = True
+except ImportError:
+    print("Warning: Speaker ID not available. Install: pip install pyannote.audio scipy")
+
+# Configuration
+DEFAULT_HOST = "0.0.0.0"
+DEFAULT_PORT = 5000
+DEFAULT_WHISPER_MODEL = "medium"
+DEFAULT_HA_URL = os.getenv("HA_URL", "http://homeassistant.local:8123")
+DEFAULT_HA_TOKEN = os.getenv("HA_TOKEN", "")
+DEFAULT_PRECISE_ENGINE = "/usr/local/bin/precise-engine"
+DEFAULT_HF_TOKEN = os.getenv("HF_TOKEN", "")
+
+# Wake word configurations
+WAKE_WORD_CONFIGS = {
+    'hey_mycroft': {
+        'model': os.path.expanduser('~/precise-models/pretrained/hey-mycroft.net'),
+        'sensitivity': 0.5,
+        'response': 'Yes?',
+        'enabled': True,
+        'context': 'general'
+    },
+    'hey_computer': {
+        'model': os.path.expanduser('~/precise-models/hey-computer/hey-computer.net'),
+        'sensitivity': 0.5,
+        'response': 'I\'m listening',
+        'enabled': False,  # Disabled by default (requires training)
+        'context': 'general'
+    },
+    'jarvis': {
+        'model': os.path.expanduser('~/precise-models/jarvis/jarvis.net'),
+        'sensitivity': 0.6,
+        'response': 'At your service',
+        'enabled': False,
+        'context': 'personal'
+    },
+}
+
+# Speaker profiles (stored in JSON file)
+SPEAKER_PROFILES_FILE = os.path.expanduser('~/voice-assistant/config/speaker_profiles.json')
+
+# Flask app
+app = Flask(__name__)
+app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024
+
+# Global state
+whisper_model = None
+ha_client = None
+precise_runners = {}
+precise_enabled = False
+speaker_id_enabled = False
+speaker_inference = None
+speaker_profiles = {}
+wake_word_queue = queue.Queue()
+
+
+class HomeAssistantClient:
+    """Client for Home Assistant API"""
+    
+    def __init__(self, base_url: str, token: str):
+        self.base_url = base_url.rstrip('/')
+        self.token = token
+        self.session = requests.Session()
+        self.session.headers.update({
+            'Authorization': f'Bearer {token}',
+            'Content-Type': 'application/json'
+        })
+    
+    def get_state(self, entity_id: str) -> Optional[Dict[str, Any]]:
+        try:
+            response = self.session.get(f'{self.base_url}/api/states/{entity_id}')
+            response.raise_for_status()
+            return response.json()
+        except requests.RequestException as e:
+            print(f"Error getting state for {entity_id}: {e}")
+            return None
+    
+    def call_service(self, domain: str, service: str, entity_id: str, **kwargs) -> bool:
+        try:
+            data = {'entity_id': entity_id}
+            data.update(kwargs)
+            response = self.session.post(
+                f'{self.base_url}/api/services/{domain}/{service}',
+                json=data
+            )
+            response.raise_for_status()
+            return True
+        except requests.RequestException as e:
+            print(f"Error calling service {domain}.{service}: {e}")
+            return False
+    
+    def turn_on(self, entity_id: str, **kwargs) -> bool:
+        domain = entity_id.split('.')[0]
+        return self.call_service(domain, 'turn_on', entity_id, **kwargs)
+    
+    def turn_off(self, entity_id: str, **kwargs) -> bool:
+        domain = entity_id.split('.')[0]
+        return self.call_service(domain, 'turn_off', entity_id, **kwargs)
+
+
+class SpeakerIdentification:
+    """Speaker identification using pyannote.audio"""
+    
+    def __init__(self, hf_token: str):
+        if not SPEAKER_ID_AVAILABLE:
+            raise ImportError("Speaker ID dependencies not available")
+        
+        self.inference = Inference(
+            "pyannote/embedding",
+            use_auth_token=hf_token
+        )
+        self.profiles = {}
+    
+    def enroll_speaker(self, name: str, audio_file: str):
+        """Enroll a speaker from audio file"""
+        embedding = self.inference(audio_file)
+        self.profiles[name] = {
+            'embedding': embedding.tolist(),  # Convert to list for JSON
+            'enrolled': time.time()
+        }
+        print(f"Enrolled speaker: {name}")
+    
+    def identify_speaker(self, audio_file: str, threshold: float = 0.7) -> Optional[str]:
+        """Identify speaker from audio file"""
+        if not self.profiles:
+            return None
+        
+        unknown_embedding = self.inference(audio_file)
+        
+        best_match = None
+        best_similarity = 0.0
+        
+        for name, profile in self.profiles.items():
+            known_embedding = np.array(profile['embedding'])
+            similarity = 1 - cosine(unknown_embedding, known_embedding)
+            
+            if similarity > best_similarity:
+                best_similarity = similarity
+                best_match = name
+        
+        if best_similarity >= threshold:
+            return best_match
+        
+        return 'unknown'
+    
+    def load_profiles(self, filepath: str):
+        """Load speaker profiles from JSON"""
+        if os.path.exists(filepath):
+            with open(filepath, 'r') as f:
+                self.profiles = json.load(f)
+            print(f"Loaded {len(self.profiles)} speaker profiles")
+    
+    def save_profiles(self, filepath: str):
+        """Save speaker profiles to JSON"""
+        os.makedirs(os.path.dirname(filepath), exist_ok=True)
+        with open(filepath, 'w') as f:
+            json.dump(self.profiles, f, indent=2)
+        print(f"Saved {len(self.profiles)} speaker profiles")
+
+
+def load_whisper_model(model_name: str = DEFAULT_WHISPER_MODEL):
+    """Load Whisper model"""
+    global whisper_model
+    if whisper_model is None:
+        print(f"Loading Whisper model: {model_name}")
+        whisper_model = whisper.load_model(model_name)
+        print("Whisper model loaded")
+    return whisper_model
+
+
+def transcribe_audio(audio_file_path: str) -> Optional[str]:
+    """Transcribe audio file"""
+    try:
+        model = load_whisper_model()
+        result = model.transcribe(audio_file_path)
+        return result['text'].strip()
+    except Exception as e:
+        print(f"Error transcribing: {e}")
+        return None
+
+
+def on_wake_word_detected(wake_word_name: str):
+    """Callback factory for wake word detection"""
+    def callback():
+        config = WAKE_WORD_CONFIGS.get(wake_word_name, {})
+        print(f"Wake word detected: {wake_word_name}")
+        
+        wake_word_queue.put({
+            'timestamp': time.time(),
+            'wake_word': wake_word_name,
+            'response': config.get('response', 'Yes?'),
+            'context': config.get('context', 'general')
+        })
+    
+    return callback
+
+
+def start_multiple_wake_words(configs: Dict[str, Dict], engine_path: str):
+    """Start multiple Precise wake word listeners"""
+    global precise_runners, precise_enabled
+    
+    if not PRECISE_AVAILABLE:
+        print("Error: Precise not available")
+        return False
+    
+    active_count = 0
+    
+    for name, config in configs.items():
+        if not config.get('enabled', False):
+            continue
+        
+        model_path = config['model']
+        if not os.path.exists(model_path):
+            print(f"Warning: Model not found: {model_path} (skipping {name})")
+            continue
+        
+        try:
+            engine = PreciseEngine(engine_path, model_path)
+            runner = PreciseRunner(
+                engine,
+                sensitivity=config.get('sensitivity', 0.5),
+                on_activation=on_wake_word_detected(name)
+            )
+            runner.start()
+            precise_runners[name] = runner
+            active_count += 1
+            
+            print(f"✓ Started wake word: {name}")
+            print(f"  Model: {model_path}")
+            print(f"  Sensitivity: {config.get('sensitivity', 0.5)}")
+        
+        except Exception as e:
+            print(f"✗ Failed to start {name}: {e}")
+    
+    if active_count > 0:
+        precise_enabled = True
+        print(f"\nTotal active wake words: {active_count}")
+        return True
+    
+    return False
+
+
+def stop_all_wake_words():
+    """Stop all wake word listeners"""
+    global precise_runners, precise_enabled
+    
+    for name, runner in precise_runners.items():
+        try:
+            runner.stop()
+            print(f"Stopped wake word: {name}")
+        except Exception as e:
+            print(f"Error stopping {name}: {e}")
+    
+    precise_runners = {}
+    precise_enabled = False
+
+
+def init_speaker_identification(hf_token: str) -> Optional[SpeakerIdentification]:
+    """Initialize speaker identification"""
+    global speaker_inference, speaker_id_enabled
+    
+    if not SPEAKER_ID_AVAILABLE:
+        print("Speaker ID not available")
+        return None
+    
+    try:
+        speaker_inference = SpeakerIdentification(hf_token)
+        
+        # Load existing profiles
+        if os.path.exists(SPEAKER_PROFILES_FILE):
+            speaker_inference.load_profiles(SPEAKER_PROFILES_FILE)
+        
+        speaker_id_enabled = True
+        print("Speaker identification initialized")
+        return speaker_inference
+    
+    except Exception as e:
+        print(f"Error initializing speaker ID: {e}")
+        return None
+
+
+# Flask routes
+
+@app.route('/health', methods=['GET'])
+def health():
+    """Health check"""
+    return jsonify({
+        'status': 'healthy',
+        'whisper_loaded': whisper_model is not None,
+        'ha_connected': ha_client is not None,
+        'precise_enabled': precise_enabled,
+        'active_wake_words': list(precise_runners.keys()),
+        'speaker_id_enabled': speaker_id_enabled,
+        'enrolled_speakers': list(speaker_inference.profiles.keys()) if speaker_inference else []
+    })
+
+
+@app.route('/wake-words', methods=['GET'])
+def list_wake_words():
+    """List all configured wake words"""
+    wake_words = []
+    
+    for name, config in WAKE_WORD_CONFIGS.items():
+        wake_words.append({
+            'name': name,
+            'enabled': config.get('enabled', False),
+            'active': name in precise_runners,
+            'model': config['model'],
+            'sensitivity': config.get('sensitivity', 0.5),
+            'response': config.get('response', ''),
+            'context': config.get('context', 'general')
+        })
+    
+    return jsonify({
+        'wake_words': wake_words,
+        'total': len(wake_words),
+        'active': len(precise_runners)
+    })
+
+
+@app.route('/wake-words/<name>/enable', methods=['POST'])
+def enable_wake_word(name):
+    """Enable a wake word"""
+    if name not in WAKE_WORD_CONFIGS:
+        return jsonify({'error': 'Wake word not found'}), 404
+    
+    config = WAKE_WORD_CONFIGS[name]
+    config['enabled'] = True
+    
+    # Start the wake word if not already running
+    if name not in precise_runners:
+        # Restart all wake words to pick up changes
+        # (simpler than starting individual ones)
+        return jsonify({
+            'message': f'Enabled {name}. Restart server to activate.'
+        })
+    
+    return jsonify({'message': f'Wake word {name} enabled'})
+
+
+@app.route('/speakers/enroll', methods=['POST'])
+def enroll_speaker():
+    """Enroll a new speaker"""
+    if not speaker_id_enabled or not speaker_inference:
+        return jsonify({'error': 'Speaker ID not enabled'}), 400
+    
+    if 'audio' not in request.files:
+        return jsonify({'error': 'No audio file'}), 400
+    
+    name = request.form.get('name')
+    if not name:
+        return jsonify({'error': 'No speaker name provided'}), 400
+    
+    audio_file = request.files['audio']
+    
+    # Save temporarily
+    with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp:
+        audio_file.save(temp.name)
+        temp_path = temp.name
+    
+    try:
+        speaker_inference.enroll_speaker(name, temp_path)
+        speaker_inference.save_profiles(SPEAKER_PROFILES_FILE)
+        
+        return jsonify({
+            'message': f'Enrolled speaker: {name}',
+            'total_speakers': len(speaker_inference.profiles)
+        })
+    
+    except Exception as e:
+        return jsonify({'error': str(e)}), 500
+    
+    finally:
+        if os.path.exists(temp_path):
+            os.remove(temp_path)
+
+
+@app.route('/speakers', methods=['GET'])
+def list_speakers():
+    """List enrolled speakers"""
+    if not speaker_id_enabled or not speaker_inference:
+        return jsonify({'error': 'Speaker ID not enabled'}), 400
+    
+    speakers = []
+    for name, profile in speaker_inference.profiles.items():
+        speakers.append({
+            'name': name,
+            'enrolled': profile.get('enrolled', 0)
+        })
+    
+    return jsonify({
+        'speakers': speakers,
+        'total': len(speakers)
+    })
+
+
+@app.route('/process-enhanced', methods=['POST'])
+def process_enhanced():
+    """
+    Enhanced processing with speaker ID and wake word context
+    """
+    if 'audio' not in request.files:
+        return jsonify({'error': 'No audio file'}), 400
+    
+    wake_word = request.form.get('wake_word', 'unknown')
+    
+    audio_file = request.files['audio']
+    
+    with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp:
+        audio_file.save(temp.name)
+        temp_path = temp.name
+    
+    try:
+        # Identify speaker (if enabled)
+        speaker = 'unknown'
+        if speaker_id_enabled and speaker_inference:
+            speaker = speaker_inference.identify_speaker(temp_path)
+            print(f"Identified speaker: {speaker}")
+        
+        # Transcribe
+        text = transcribe_audio(temp_path)
+        if not text:
+            return jsonify({'error': 'Transcription failed'}), 500
+        
+        print(f"[{speaker}] via [{wake_word}]: {text}")
+        
+        # Get wake word config
+        config = WAKE_WORD_CONFIGS.get(wake_word, {})
+        context = config.get('context', 'general')
+        
+        # Process based on context and speaker
+        response = f"Heard via {wake_word}: {text}"
+        
+        return jsonify({
+            'success': True,
+            'transcription': text,
+            'speaker': speaker,
+            'wake_word': wake_word,
+            'context': context,
+            'response': response
+        })
+    
+    finally:
+        if os.path.exists(temp_path):
+            os.remove(temp_path)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Enhanced Voice Server with Multi-Wake-Word and Speaker ID"
+    )
+    parser.add_argument('--host', default=DEFAULT_HOST)
+    parser.add_argument('--port', type=int, default=DEFAULT_PORT)
+    parser.add_argument('--whisper-model', default=DEFAULT_WHISPER_MODEL)
+    parser.add_argument('--ha-url', default=DEFAULT_HA_URL)
+    parser.add_argument('--ha-token', default=DEFAULT_HA_TOKEN)
+    parser.add_argument('--enable-precise', action='store_true',
+                       help='Enable wake word detection')
+    parser.add_argument('--multi-wake-word', action='store_true',
+                       help='Enable multiple wake words')
+    parser.add_argument('--precise-engine', default=DEFAULT_PRECISE_ENGINE)
+    parser.add_argument('--enable-speaker-id', action='store_true',
+                       help='Enable speaker identification')
+    parser.add_argument('--hf-token', default=DEFAULT_HF_TOKEN,
+                       help='HuggingFace token for speaker ID')
+    
+    args = parser.parse_args()
+    
+    # Initialize HA client
+    global ha_client
+    ha_client = HomeAssistantClient(args.ha_url, args.ha_token)
+    
+    # Load Whisper
+    print(f"Starting enhanced voice server on {args.host}:{args.port}")
+    load_whisper_model(args.whisper_model)
+    
+    # Start Precise (multiple wake words)
+    if args.enable_precise:
+        if not PRECISE_AVAILABLE:
+            print("Error: Precise not available")
+            sys.exit(1)
+        
+        # Enable all or just first wake word
+        if args.multi_wake_word:
+            # Enable all configured wake words
+            enabled_count = sum(1 for c in WAKE_WORD_CONFIGS.values() if c.get('enabled'))
+            print(f"\nStarting {enabled_count} wake words...")
+        else:
+            # Enable only first wake word
+            first_key = list(WAKE_WORD_CONFIGS.keys())[0]
+            WAKE_WORD_CONFIGS[first_key]['enabled'] = True
+            for key in list(WAKE_WORD_CONFIGS.keys())[1:]:
+                WAKE_WORD_CONFIGS[key]['enabled'] = False
+        
+        if not start_multiple_wake_words(WAKE_WORD_CONFIGS, args.precise_engine):
+            print("Error: No wake words started")
+            sys.exit(1)
+    
+    # Initialize speaker ID
+    if args.enable_speaker_id:
+        if not args.hf_token:
+            print("Error: --hf-token required for speaker ID")
+            sys.exit(1)
+        
+        if not init_speaker_identification(args.hf_token):
+            print("Warning: Speaker ID initialization failed")
+    
+    # Start server
+    try:
+        print("\n" + "="*50)
+        print("Server ready!")
+        print("="*50 + "\n")
+        app.run(host=args.host, port=args.port, debug=False)
+    except KeyboardInterrupt:
+        print("\nShutting down...")
+        stop_all_wake_words()
+        sys.exit(0)
+
+
+if __name__ == '__main__':
+    main()