minerva/scripts/setup_precise.sh

#!/usr/bin/env bash
#
# Path: setup_precise.sh
#
# Purpose and usage:
#     Sets up Mycroft Precise wake word detection on Heimdall
#     - Creates conda environment for Precise
#     - Installs TensorFlow 1.x and dependencies
#     - Downloads precise-engine
#     - Sets up training directories
#     - Provides helper scripts for training
#
# Requirements:
#     - conda/miniconda installed
#     - Internet connection for downloads
#     - Microphone for recording samples
#
# Usage:
#     ./setup_precise.sh [--wake-word "phrase"] [--env-name NAME]
#
# Author: PRbL Library
# Created: $(date +"%Y-%m-%d")

# ----- PRbL Color and output functions -----
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[0;33m'
BLUE='\033[0;34m'
PURPLE='\033[0;35m'
CYAN='\033[0;36m'
NC='\033[0m' # No Color

print_status() {
    local level="$1"
    shift
    case "$level" in
        "info")     echo -e "${BLUE}[INFO]${NC} $*" >&2 ;;
        "success")  echo -e "${GREEN}[SUCCESS]${NC} $*" >&2 ;;
        "warning")  echo -e "${YELLOW}[WARNING]${NC} $*" >&2 ;;
        "error")    echo -e "${RED}[ERROR]${NC} $*" >&2 ;;
        "debug")    [[ "$VERBOSE" == "true" ]] && echo -e "${PURPLE}[DEBUG]${NC} $*" >&2 ;;
        *)          echo -e "$*" >&2 ;;
    esac
}

# ----- Configuration -----
CONDA_ENV_NAME="precise"
WAKE_WORD="hey computer"
MODELS_DIR="$HOME/precise-models"
VERBOSE=false

# ----- Dependency checking -----
command_exists() {
    command -v "$1" &> /dev/null
}

check_conda() {
    if ! command_exists conda; then
        print_status error "conda not found. Please install miniconda first."
        return 1
    fi
    return 0
}

# ----- Parse arguments -----
parse_args() {
    while [[ $# -gt 0 ]]; do
        case "$1" in
            --wake-word)
                WAKE_WORD="$2"
                shift 2
                ;;
            --env-name)
                CONDA_ENV_NAME="$2"
                shift 2
                ;;
            -v|--verbose)
                VERBOSE=true
                shift
                ;;
            -h|--help)
                cat << EOF
Usage: $(basename "$0") [OPTIONS]

Options:
    --wake-word "phrase"    Wake word to train (default: "hey computer")
    --env-name NAME         Custom conda environment name (default: precise)
    -v, --verbose           Enable verbose output
    -h, --help              Show this help message

Examples:
    $(basename "$0") --wake-word "hey jarvis"
    $(basename "$0") --env-name mycroft-precise

EOF
                exit 0
                ;;
            *)
                print_status error "Unknown option: $1"
                exit 1
                ;;
        esac
    done
}

# ----- Setup functions -----

create_conda_environment() {
    print_status info "Creating conda environment: $CONDA_ENV_NAME"

    # Check if environment already exists
    if conda env list | grep -q "^${CONDA_ENV_NAME}\s"; then
        print_status warning "Environment $CONDA_ENV_NAME already exists"
        read -p "Remove and recreate? (y/N): " -n 1 -r
        echo
        if [[ $REPLY =~ ^[Yy]$ ]]; then
            print_status info "Removing existing environment..."
            conda env remove -n "$CONDA_ENV_NAME" -y
        else
            print_status info "Using existing environment"
            return 0
        fi
    fi

    # Create new environment with Python 3.7 (required for TF 1.15)
    print_status info "Creating Python 3.7 environment..."
    conda create -n "$CONDA_ENV_NAME" python=3.7 -y || {
        print_status error "Failed to create conda environment"
        return 1
    }

    print_status success "Conda environment created"
    return 0
}

install_tensorflow() {
    print_status info "Installing TensorFlow 1.15..."

    # Activate conda environment
    eval "$(conda shell.bash hook)"
    conda activate "$CONDA_ENV_NAME" || {
        print_status error "Failed to activate conda environment"
        return 1
    }

    # Install TensorFlow 1.15 (last 1.x version)
    pip install tensorflow==1.15.5 --break-system-packages || {
        print_status error "Failed to install TensorFlow"
        return 1
    }

    # Verify installation
    python -c "import tensorflow as tf; print(f'TensorFlow {tf.__version__} installed')" || {
        print_status error "TensorFlow installation verification failed"
        return 1
    }

    print_status success "TensorFlow 1.15 installed"
    return 0
}

install_precise() {
    print_status info "Installing Mycroft Precise..."

    # Activate conda environment
    eval "$(conda shell.bash hook)"
    conda activate "$CONDA_ENV_NAME" || {
        print_status error "Failed to activate conda environment"
        return 1
    }

    # Install audio dependencies
    print_status info "Installing system audio dependencies..."
    if command_exists apt-get; then
        sudo apt-get update
        sudo apt-get install -y portaudio19-dev sox libatlas-base-dev || {
            print_status warning "Some audio dependencies failed to install"
        }
    fi

    # Install Python audio libraries
    pip install pyaudio --break-system-packages || {
        print_status warning "PyAudio installation failed (may need manual installation)"
    }

    # Install Precise
    pip install mycroft-precise --break-system-packages || {
        print_status error "Failed to install Mycroft Precise"
        return 1
    }

    # Verify installation
    python -c "import precise_runner; print('Precise installed successfully')" || {
        print_status error "Precise installation verification failed"
        return 1
    }

    print_status success "Mycroft Precise installed"
    return 0
}

download_precise_engine() {
    print_status info "Downloading precise-engine..."

    local engine_version="0.3.0"
    local engine_url="https://github.com/MycroftAI/mycroft-precise/releases/download/v${engine_version}/precise-engine_${engine_version}_x86_64.tar.gz"
    local temp_dir=$(mktemp -d)

    # Download engine
    wget -q --show-progress -O "$temp_dir/precise-engine.tar.gz" "$engine_url" || {
        print_status error "Failed to download precise-engine"
        rm -rf "$temp_dir"
        return 1
    }

    # Extract
    tar xzf "$temp_dir/precise-engine.tar.gz" -C "$temp_dir" || {
        print_status error "Failed to extract precise-engine"
        rm -rf "$temp_dir"
        return 1
    }

    # Install to /usr/local/bin
    sudo cp "$temp_dir/precise-engine/precise-engine" /usr/local/bin/ || {
        print_status error "Failed to install precise-engine"
        rm -rf "$temp_dir"
        return 1
    }

    sudo chmod +x /usr/local/bin/precise-engine

    # Clean up
    rm -rf "$temp_dir"

    # Verify installation
    precise-engine --version || {
        print_status error "precise-engine installation verification failed"
        return 1
    }

    print_status success "precise-engine installed"
    return 0
}

create_training_directory() {
    print_status info "Creating training directory structure..."

    # Sanitize wake word for directory name
    local wake_word_dir=$(echo "$WAKE_WORD" | tr ' ' '-' | tr '[:upper:]' '[:lower:]')
    local project_dir="$MODELS_DIR/$wake_word_dir"

    mkdir -p "$project_dir"/{wake-word,not-wake-word,test/wake-word,test/not-wake-word}

    print_status success "Training directory created: $project_dir"

    # Store project path for later use
    echo "$project_dir" > "$MODELS_DIR/.current_project"

    return 0
}

create_training_scripts() {
    print_status info "Creating training helper scripts..."

    local wake_word_dir=$(echo "$WAKE_WORD" | tr ' ' '-' | tr '[:upper:]' '[:lower:]')
    local project_dir="$MODELS_DIR/$wake_word_dir"

    # Create recording script
    cat > "$project_dir/1-record-wake-word.sh" << 'EOF'
#!/bin/bash
# Step 1: Record wake word samples
# Run this script and follow the prompts to record ~50-100 samples

eval "$(conda shell.bash hook)"
conda activate precise

echo "Recording wake word samples..."
echo "Press SPACE to start/stop recording"
echo "Press Ctrl+C when done (aim for 50-100 samples)"
echo ""

precise-collect
EOF

    # Create not-wake-word recording script
    cat > "$project_dir/2-record-not-wake-word.sh" << 'EOF'
#!/bin/bash
# Step 2: Record "not wake word" samples
# Record random speech, TV, music, similar-sounding phrases

eval "$(conda shell.bash hook)"
conda activate precise

echo "Recording not-wake-word samples..."
echo "Record:"
echo "  - Normal conversation"
echo "  - TV/music background"
echo "  - Similar sounding phrases"
echo "  - Ambient noise"
echo ""
echo "Press SPACE to start/stop recording"
echo "Press Ctrl+C when done (aim for 200-500 samples)"
echo ""

precise-collect -f not-wake-word/samples.wav
EOF

    # Create training script
    cat > "$project_dir/3-train-model.sh" << EOF
#!/bin/bash
# Step 3: Train the model
# This will train for 60 epochs (adjust -e parameter for more/less)

eval "\$(conda shell.bash hook)"
conda activate precise

echo "Training wake word model..."
echo "This will take 30-60 minutes..."
echo ""

# Train model
precise-train -e 60 ${wake_word_dir}.net .

echo ""
echo "Training complete!"
echo "Test with: precise-listen ${wake_word_dir}.net"
EOF

    # Create testing script
    cat > "$project_dir/4-test-model.sh" << EOF
#!/bin/bash
# Step 4: Test the model with live microphone

eval "\$(conda shell.bash hook)"
conda activate precise

echo "Testing wake word model..."
echo "Speak your wake word - you should see '!' when detected"
echo "Speak other phrases - should not trigger"
echo ""
echo "Press Ctrl+C to exit"
echo ""

precise-listen ${wake_word_dir}.net
EOF

    # Create evaluation script
    cat > "$project_dir/5-evaluate-model.sh" << EOF
#!/bin/bash
# Step 5: Evaluate model on test set

eval "\$(conda shell.bash hook)"
conda activate precise

echo "Evaluating wake word model on test set..."
echo ""

precise-test ${wake_word_dir}.net test/

echo ""
echo "Check metrics above:"
echo "  - Wake word accuracy should be >95%"
echo "  - False positive rate should be <5%"
EOF

    # Create tuning script
    cat > "$project_dir/6-tune-threshold.sh" << EOF
#!/bin/bash
# Step 6: Tune activation threshold

eval "\$(conda shell.bash hook)"
conda activate precise

echo "Testing different thresholds..."
echo ""
echo "Default threshold: 0.5"
echo "Higher = fewer false positives, may miss some wake words"
echo "Lower = catch more wake words, more false positives"
echo ""

for threshold in 0.3 0.5 0.7; do
    echo "Testing threshold: \$threshold"
    echo "Press Ctrl+C to try next threshold"
    precise-listen ${wake_word_dir}.net -t \$threshold
done
EOF

    # Make all scripts executable
    chmod +x "$project_dir"/*.sh

    print_status success "Training scripts created in $project_dir"
    return 0
}

create_readme() {
    print_status info "Creating README..."

    local wake_word_dir=$(echo "$WAKE_WORD" | tr ' ' '-' | tr '[:upper:]' '[:lower:]')
    local project_dir="$MODELS_DIR/$wake_word_dir"

    cat > "$project_dir/README.md" << EOF
# Wake Word Training: "$WAKE_WORD"

## Quick Start

Follow these steps in order:

### 1. Record Wake Word Samples
\`\`\`bash
./1-record-wake-word.sh
\`\`\`

Record 50-100 samples:
- Vary your tone and speed
- Different distances from microphone
- Different background noise levels
- Have family members record too

### 2. Record Not-Wake-Word Samples
\`\`\`bash
./2-record-not-wake-word.sh
\`\`\`

Record 200-500 samples of:
- Normal conversation
- TV/music in background
- Similar sounding phrases
- Ambient household noise

### 3. Organize Samples

Move files into training/test split:
\`\`\`bash
# 80% of wake-word samples go to:
mv wake-word-samples-* wake-word/

# 20% of wake-word samples go to:
mv wake-word-samples-* test/wake-word/

# 80% of not-wake-word samples go to:
mv not-wake-word-samples-* not-wake-word/

# 20% of not-wake-word samples go to:
mv not-wake-word-samples-* test/not-wake-word/
\`\`\`

### 4. Train Model
\`\`\`bash
./3-train-model.sh
\`\`\`

Wait 30-60 minutes for training to complete.

### 5. Test Model
\`\`\`bash
./4-test-model.sh
\`\`\`

Speak your wake word and verify detection.

### 6. Evaluate Model
\`\`\`bash
./5-evaluate-model.sh
\`\`\`

Check accuracy metrics on test set.

### 7. Tune Threshold
\`\`\`bash
./6-tune-threshold.sh
\`\`\`

Find the best threshold for your environment.

## Tips for Good Training

1. **Quality over quantity** - Clear samples are better than many poor ones
2. **Diverse conditions** - Different noise levels, distances, speakers
3. **Hard negatives** - Include similar-sounding phrases in not-wake-word set
4. **Regular updates** - Add false positives/negatives and retrain

## Next Steps

Once trained and tested:

1. Copy model to voice assistant server:
   \`\`\`bash
   cp ${wake_word_dir}.net ~/voice-assistant/models/
   \`\`\`

2. Update voice assistant config:
   \`\`\`bash
   vim ~/voice-assistant/config/.env
   # Set: PRECISE_MODEL=~/voice-assistant/models/${wake_word_dir}.net
   \`\`\`

3. Restart voice assistant service:
   \`\`\`bash
   sudo systemctl restart voice-assistant
   \`\`\`

## Troubleshooting

**Low accuracy?**
- Collect more training samples
- Increase training epochs (edit 3-train-model.sh, change -e 60 to -e 120)
- Verify 80/20 train/test split

**Too many false positives?**
- Increase threshold (use 6-tune-threshold.sh)
- Add false trigger audio to not-wake-word set
- Retrain with more diverse negative samples

**Misses wake words?**
- Lower threshold
- Add missed samples to training set
- Ensure good audio quality

## Resources

- Mycroft Precise Docs: https://github.com/MycroftAI/mycroft-precise
- Training Guide: https://mycroft-ai.gitbook.io/docs/mycroft-technologies/precise
- Community Models: https://github.com/MycroftAI/precise-data
EOF

    print_status success "README created in $project_dir"
    return 0
}

download_pretrained_models() {
    print_status info "Downloading pre-trained models..."

    # Create models directory
    mkdir -p "$MODELS_DIR/pretrained"

    # Download Hey Mycroft model (as example/base)
    local model_url="https://github.com/MycroftAI/precise-data/raw/models-dev/hey-mycroft.tar.gz"

    if [[ ! -f "$MODELS_DIR/pretrained/hey-mycroft.net" ]]; then
        print_status info "Downloading Hey Mycroft model..."
        wget -q --show-progress -O "$MODELS_DIR/pretrained/hey-mycroft.tar.gz" "$model_url" || {
            print_status warning "Failed to download pre-trained model (optional)"
            return 0
        }

        tar xzf "$MODELS_DIR/pretrained/hey-mycroft.tar.gz" -C "$MODELS_DIR/pretrained/" || {
            print_status warning "Failed to extract pre-trained model"
            return 0
        }

        print_status success "Pre-trained model downloaded"
    else
        print_status info "Pre-trained model already exists"
    fi

    return 0
}

print_next_steps() {
    local wake_word_dir=$(echo "$WAKE_WORD" | tr ' ' '-' | tr '[:upper:]' '[:lower:]')
    local project_dir="$MODELS_DIR/$wake_word_dir"

    cat << EOF

${GREEN}Setup complete!${NC}

Wake word: "$WAKE_WORD"
Project directory: $project_dir

${BLUE}Next steps:${NC}

1. ${CYAN}Activate conda environment:${NC}
   conda activate $CONDA_ENV_NAME

2. ${CYAN}Navigate to project directory:${NC}
   cd $project_dir

3. ${CYAN}Follow the README or run scripts in order:${NC}
   ./1-record-wake-word.sh      # Record wake word samples
   ./2-record-not-wake-word.sh  # Record negative samples
   # Organize samples into train/test directories
   ./3-train-model.sh           # Train the model (30-60 min)
   ./4-test-model.sh            # Test with microphone
   ./5-evaluate-model.sh        # Check accuracy metrics
   ./6-tune-threshold.sh        # Find best threshold

${BLUE}Helpful commands:${NC}

Test pre-trained model:
  conda activate $CONDA_ENV_NAME
  precise-listen $MODELS_DIR/pretrained/hey-mycroft.net

Check precise-engine:
  precise-engine --version

${BLUE}Resources:${NC}

Full guide: See MYCROFT_PRECISE_GUIDE.md
Project README: $project_dir/README.md
Mycroft Docs: https://github.com/MycroftAI/mycroft-precise

EOF
}

# ----- Main -----
main() {
    print_status info "Starting Mycroft Precise setup..."

    # Parse arguments
    parse_args "$@"

    # Check dependencies
    check_conda || exit 1

    # Setup steps
    create_conda_environment || exit 1
    install_tensorflow || exit 1
    install_precise || exit 1
    download_precise_engine || exit 1
    create_training_directory || exit 1
    create_training_scripts || exit 1
    create_readme || exit 1
    download_pretrained_models || exit 1

    # Print next steps
    print_next_steps
}

# Run main
main "$@"