minerva/scripts/download_pretrained_models.sh

#!/usr/bin/env bash
#
# Path: download_pretrained_models.sh
#
# Purpose and usage:
#     Downloads and sets up pre-trained Mycroft Precise wake word models
#     - Downloads Hey Mycroft, Hey Jarvis, and other available models
#     - Tests each model with microphone
#     - Configures voice server to use them
#
# Requirements:
#     - Mycroft Precise installed (run setup_precise.sh first)
#     - Internet connection for downloads
#     - Microphone for testing
#
# Usage:
#     ./download_pretrained_models.sh [--test-all] [--model MODEL_NAME]
#
# Author: PRbL Library
# Created: $(date +"%Y-%m-%d")

# ----- PRbL Color and output functions -----
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[0;33m'
BLUE='\033[0;34m'
PURPLE='\033[0;35m'
CYAN='\033[0;36m'
NC='\033[0m' # No Color

print_status() {
    local level="$1"
    shift
    case "$level" in
        "info")     echo -e "${BLUE}[INFO]${NC} $*" >&2 ;;
        "success")  echo -e "${GREEN}[SUCCESS]${NC} $*" >&2 ;;
        "warning")  echo -e "${YELLOW}[WARNING]${NC} $*" >&2 ;;
        "error")    echo -e "${RED}[ERROR]${NC} $*" >&2 ;;
        "debug")    [[ "$VERBOSE" == "true" ]] && echo -e "${PURPLE}[DEBUG]${NC} $*" >&2 ;;
        *)          echo -e "$*" >&2 ;;
    esac
}

# ----- Configuration -----
MODELS_DIR="$HOME/precise-models/pretrained"
TEST_ALL=false
SPECIFIC_MODEL=""
VERBOSE=false

# Available pre-trained models
declare -A MODELS=(
    ["hey-mycroft"]="https://github.com/MycroftAI/precise-data/raw/models-dev/hey-mycroft.tar.gz"
    ["hey-jarvis"]="https://github.com/MycroftAI/precise-data/raw/models-dev/hey-jarvis.tar.gz"
    ["christopher"]="https://github.com/MycroftAI/precise-data/raw/models-dev/christopher.tar.gz"
    ["hey-ezra"]="https://github.com/MycroftAI/precise-data/raw/models-dev/hey-ezra.tar.gz"
)

# ----- Dependency checking -----
command_exists() {
    command -v "$1" &> /dev/null
}

check_dependencies() {
    local missing=()

    if ! command_exists wget; then
        missing+=("wget")
    fi

    if ! command_exists precise-listen; then
        missing+=("precise-listen (run setup_precise.sh first)")
    fi

    if [[ ${#missing[@]} -gt 0 ]]; then
        print_status error "Missing dependencies: ${missing[*]}"
        return 1
    fi

    return 0
}

# ----- Parse arguments -----
parse_args() {
    while [[ $# -gt 0 ]]; do
        case "$1" in
            --test-all)
                TEST_ALL=true
                shift
                ;;
            --model)
                SPECIFIC_MODEL="$2"
                shift 2
                ;;
            -v|--verbose)
                VERBOSE=true
                shift
                ;;
            -h|--help)
                cat << EOF
Usage: $(basename "$0") [OPTIONS]

Download and test pre-trained Mycroft Precise wake word models

Options:
    --test-all          Download and test all available models
    --model NAME        Download and test specific model
    -v, --verbose       Enable verbose output
    -h, --help          Show this help message

Available models:
    hey-mycroft         Original Mycroft wake word (most data)
    hey-jarvis          Popular alternative
    christopher         Alternative wake word
    hey-ezra            Another option

Examples:
    $(basename "$0") --model hey-mycroft
    $(basename "$0") --test-all

EOF
                exit 0
                ;;
            *)
                print_status error "Unknown option: $1"
                exit 1
                ;;
        esac
    done
}

# ----- Functions -----

create_models_directory() {
    print_status info "Creating models directory: $MODELS_DIR"
    mkdir -p "$MODELS_DIR" || {
        print_status error "Failed to create directory"
        return 1
    }
    return 0
}

download_model() {
    local model_name="$1"
    local model_url="${MODELS[${model_name}]}"

    if [[ -z "$model_url" ]]; then
        print_status error "Unknown model: $model_name"
        return 1
    fi

    # Check if already downloaded
    if [[ -f "$MODELS_DIR/${model_name}.net" ]]; then
        print_status info "Model already exists: $model_name"
        return 0
    fi

    print_status info "Downloading $model_name..."

    local temp_file="/tmp/${model_name}-$$.tar.gz"

    wget -q --show-progress -O "$temp_file" "$model_url" || {
        print_status error "Failed to download $model_name"
        rm -f "$temp_file"
        return 1
    }

    # Extract
    print_status info "Extracting $model_name..."
    tar xzf "$temp_file" -C "$MODELS_DIR" || {
        print_status error "Failed to extract $model_name"
        rm -f "$temp_file"
        return 1
    }

    rm -f "$temp_file"

    # Verify extraction
    if [[ -f "$MODELS_DIR/${model_name}.net" ]]; then
        print_status success "Downloaded: $model_name"
        return 0
    else
        print_status error "Extraction failed for $model_name"
        return 1
    fi
}

test_model() {
    local model_name="$1"
    local model_file="$MODELS_DIR/${model_name}.net"

    if [[ ! -f "$model_file" ]]; then
        print_status error "Model file not found: $model_file"
        return 1
    fi

    print_status info "Testing model: $model_name"
    echo ""
    echo -e "${CYAN}Instructions:${NC}"
    echo "  - Speak the wake word: '$model_name'"
    echo "  - You should see '!' when detected"
    echo "  - Press Ctrl+C to stop testing"
    echo ""
    read -p "Press Enter to start test..."

    # Activate conda environment if needed
    if command_exists conda; then
        eval "$(conda shell.bash hook)"
        conda activate precise 2>/dev/null || true
    fi

    precise-listen "$model_file" || {
        print_status warning "Test interrupted or failed"
        return 1
    }

    return 0
}

create_multi_wake_config() {
    print_status info "Creating multi-wake-word configuration..."

    local config_file="$MODELS_DIR/multi-wake-config.sh"

    cat > "$config_file" << 'EOF'
#!/bin/bash
# Multi-wake-word configuration
# Generated by download_pretrained_models.sh

# Start voice server with multiple wake words
cd ~/voice-assistant

# List of wake word models
MODELS=""

EOF

    # Add each downloaded model to config
    for model_name in "${!MODELS[@]}"; do
        if [[ -f "$MODELS_DIR/${model_name}.net" ]]; then
            echo "# Found: $model_name" >> "$config_file"
            echo "MODELS=\"\${MODELS}${model_name}:$MODELS_DIR/${model_name}.net:0.5,\"" >> "$config_file"
        fi
    done

    cat >> "$config_file" << 'EOF'

# Remove trailing comma
MODELS="${MODELS%,}"

# Activate environment
eval "$(conda shell.bash hook)"
conda activate precise

# Start server
python voice_server.py \
    --enable-precise \
    --precise-models "$MODELS" \
    --ha-token "$HA_TOKEN"

EOF

    chmod +x "$config_file"

    print_status success "Created: $config_file"
    echo ""
    print_status info "To use multiple wake words, run:"
    print_status info "  $config_file"

    return 0
}

list_downloaded_models() {
    print_status info "Downloaded models in $MODELS_DIR:"
    echo ""

    local count=0
    for model_name in "${!MODELS[@]}"; do
        if [[ -f "$MODELS_DIR/${model_name}.net" ]]; then
            local size=$(du -h "$MODELS_DIR/${model_name}.net" | cut -f1)
            echo -e "  ${GREEN}✓${NC} ${model_name}.net (${size})"
            ((count++))
        else
            echo -e "  ${YELLOW}○${NC} ${model_name}.net (not downloaded)"
        fi
    done

    echo ""
    print_status success "Total downloaded: $count"

    return 0
}

compare_models() {
    print_status info "Model comparison:"
    echo ""

    cat << 'EOF'
┌─────────────────┬──────────────┬─────────────┬─────────────────┐
│ Wake Word       │ Popularity   │ Difficulty  │ Recommended For │
├─────────────────┼──────────────┼─────────────┼─────────────────┤
│ Hey Mycroft     │ ★★★★★        │ Easy        │ Default choice  │
│ Hey Jarvis      │ ★★★★☆        │ Easy        │ Pop culture     │
│ Christopher     │ ★★☆☆☆        │ Medium      │ Unique name     │
│ Hey Ezra        │ ★★☆☆☆        │ Medium      │ Alternative     │
└─────────────────┴──────────────┴─────────────┴─────────────────┘

Recommendations:
  - Start with: Hey Mycroft (most training data)
  - For media: Hey Jarvis (Plex/entertainment)
  - For uniqueness: Christopher or Hey Ezra

Multiple wake words:
  - Use different wake words for different contexts
  - Example: "Hey Mycroft" for commands, "Hey Jarvis" for media
  - Server can run 2-3 models simultaneously

EOF
}

# ----- Main -----
main() {
    print_status info "Mycroft Precise Pre-trained Model Downloader"
    echo ""

    # Parse arguments
    parse_args "$@"

    # Check dependencies
    check_dependencies || exit 1

    # Create directory
    create_models_directory || exit 1

    # Show comparison
    if [[ -z "$SPECIFIC_MODEL" && "$TEST_ALL" != "true" ]]; then
        compare_models
        echo ""
        print_status info "Use --model <name> to download a specific model"
        print_status info "Use --test-all to download all models"
        echo ""
        list_downloaded_models
        exit 0
    fi

    # Download models
    if [[ -n "$SPECIFIC_MODEL" ]]; then
        # Download specific model
        download_model "$SPECIFIC_MODEL" || exit 1

        # Offer to test
        echo ""
        read -p "Test this model now? (y/N): " -n 1 -r
        echo
        if [[ $REPLY =~ ^[Yy]$ ]]; then
            test_model "$SPECIFIC_MODEL"
        fi

    elif [[ "$TEST_ALL" == "true" ]]; then
        # Download all models
        for model_name in "${!MODELS[@]}"; do
            download_model "$model_name"
            echo ""
        done

        # Offer to test each
        echo ""
        print_status success "All models downloaded"
        echo ""
        read -p "Test each model? (y/N): " -n 1 -r
        echo
        if [[ $REPLY =~ ^[Yy]$ ]]; then
            for model_name in "${!MODELS[@]}"; do
                if [[ -f "$MODELS_DIR/${model_name}.net" ]]; then
                    echo ""
                    test_model "$model_name"
                fi
            done
        fi
    fi

    # List results
    echo ""
    list_downloaded_models

    # Create multi-wake config if multiple models
    local model_count=$(find "$MODELS_DIR" -name "*.net" | wc -l)
    if [[ $model_count -gt 1 ]]; then
        echo ""
        create_multi_wake_config
    fi

    # Final instructions
    echo ""
    print_status success "Setup complete!"
    echo ""
    print_status info "Next steps:"
    print_status info "1. Test a model: precise-listen $MODELS_DIR/hey-mycroft.net"
    print_status info "2. Use in server: python voice_server.py --enable-precise --precise-model $MODELS_DIR/hey-mycroft.net"
    print_status info "3. Fine-tune: precise-train -e 30 custom.net . --from-checkpoint $MODELS_DIR/hey-mycroft.net"

    if [[ $model_count -gt 1 ]]; then
        echo ""
        print_status info "For multiple wake words:"
        print_status info "  $MODELS_DIR/multi-wake-config.sh"
    fi
}

# Run main
main "$@"