minerva/scripts/download_pretrained_models.sh
pyr0ball 173f7f37d4 feat: import mycroft-precise work as Minerva foundation
Ports prior voice assistant research and prototypes from devl/Devops
into the Minerva repo. Includes:

- docs/: architecture, wake word guides, ESP32-S3 spec, hardware buying guide
- scripts/: voice_server.py, voice_server_enhanced.py, setup scripts
- hardware/maixduino/: edge device scripts with WiFi credentials scrubbed
  (replaced hardcoded password with secrets.py pattern)
- config/.env.example: server config template
- .gitignore: excludes .env, secrets.py, model blobs, ELF firmware
- CLAUDE.md: Minerva product context and connection to cf-voice roadmap
2026-04-06 22:21:12 -07:00

409 lines
11 KiB
Bash
Executable file

#!/usr/bin/env bash
#
# Path: download_pretrained_models.sh
#
# Purpose and usage:
# Downloads and sets up pre-trained Mycroft Precise wake word models
# - Downloads Hey Mycroft, Hey Jarvis, and other available models
# - Tests each model with microphone
# - Configures voice server to use them
#
# Requirements:
# - Mycroft Precise installed (run setup_precise.sh first)
# - Internet connection for downloads
# - Microphone for testing
#
# Usage:
# ./download_pretrained_models.sh [--test-all] [--model MODEL_NAME]
#
# Author: PRbL Library
# Created: $(date +"%Y-%m-%d")
# ----- PRbL Color and output functions -----
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[0;33m'
BLUE='\033[0;34m'
PURPLE='\033[0;35m'
CYAN='\033[0;36m'
NC='\033[0m' # No Color
print_status() {
local level="$1"
shift
case "$level" in
"info") echo -e "${BLUE}[INFO]${NC} $*" >&2 ;;
"success") echo -e "${GREEN}[SUCCESS]${NC} $*" >&2 ;;
"warning") echo -e "${YELLOW}[WARNING]${NC} $*" >&2 ;;
"error") echo -e "${RED}[ERROR]${NC} $*" >&2 ;;
"debug") [[ "$VERBOSE" == "true" ]] && echo -e "${PURPLE}[DEBUG]${NC} $*" >&2 ;;
*) echo -e "$*" >&2 ;;
esac
}
# ----- Configuration -----
MODELS_DIR="$HOME/precise-models/pretrained"
TEST_ALL=false
SPECIFIC_MODEL=""
VERBOSE=false
# Available pre-trained models
declare -A MODELS=(
["hey-mycroft"]="https://github.com/MycroftAI/precise-data/raw/models-dev/hey-mycroft.tar.gz"
["hey-jarvis"]="https://github.com/MycroftAI/precise-data/raw/models-dev/hey-jarvis.tar.gz"
["christopher"]="https://github.com/MycroftAI/precise-data/raw/models-dev/christopher.tar.gz"
["hey-ezra"]="https://github.com/MycroftAI/precise-data/raw/models-dev/hey-ezra.tar.gz"
)
# ----- Dependency checking -----
command_exists() {
command -v "$1" &> /dev/null
}
check_dependencies() {
local missing=()
if ! command_exists wget; then
missing+=("wget")
fi
if ! command_exists precise-listen; then
missing+=("precise-listen (run setup_precise.sh first)")
fi
if [[ ${#missing[@]} -gt 0 ]]; then
print_status error "Missing dependencies: ${missing[*]}"
return 1
fi
return 0
}
# ----- Parse arguments -----
parse_args() {
while [[ $# -gt 0 ]]; do
case "$1" in
--test-all)
TEST_ALL=true
shift
;;
--model)
SPECIFIC_MODEL="$2"
shift 2
;;
-v|--verbose)
VERBOSE=true
shift
;;
-h|--help)
cat << EOF
Usage: $(basename "$0") [OPTIONS]
Download and test pre-trained Mycroft Precise wake word models
Options:
--test-all Download and test all available models
--model NAME Download and test specific model
-v, --verbose Enable verbose output
-h, --help Show this help message
Available models:
hey-mycroft Original Mycroft wake word (most data)
hey-jarvis Popular alternative
christopher Alternative wake word
hey-ezra Another option
Examples:
$(basename "$0") --model hey-mycroft
$(basename "$0") --test-all
EOF
exit 0
;;
*)
print_status error "Unknown option: $1"
exit 1
;;
esac
done
}
# ----- Functions -----
create_models_directory() {
print_status info "Creating models directory: $MODELS_DIR"
mkdir -p "$MODELS_DIR" || {
print_status error "Failed to create directory"
return 1
}
return 0
}
download_model() {
local model_name="$1"
local model_url="${MODELS[${model_name}]}"
if [[ -z "$model_url" ]]; then
print_status error "Unknown model: $model_name"
return 1
fi
# Check if already downloaded
if [[ -f "$MODELS_DIR/${model_name}.net" ]]; then
print_status info "Model already exists: $model_name"
return 0
fi
print_status info "Downloading $model_name..."
local temp_file="/tmp/${model_name}-$$.tar.gz"
wget -q --show-progress -O "$temp_file" "$model_url" || {
print_status error "Failed to download $model_name"
rm -f "$temp_file"
return 1
}
# Extract
print_status info "Extracting $model_name..."
tar xzf "$temp_file" -C "$MODELS_DIR" || {
print_status error "Failed to extract $model_name"
rm -f "$temp_file"
return 1
}
rm -f "$temp_file"
# Verify extraction
if [[ -f "$MODELS_DIR/${model_name}.net" ]]; then
print_status success "Downloaded: $model_name"
return 0
else
print_status error "Extraction failed for $model_name"
return 1
fi
}
test_model() {
local model_name="$1"
local model_file="$MODELS_DIR/${model_name}.net"
if [[ ! -f "$model_file" ]]; then
print_status error "Model file not found: $model_file"
return 1
fi
print_status info "Testing model: $model_name"
echo ""
echo -e "${CYAN}Instructions:${NC}"
echo " - Speak the wake word: '$model_name'"
echo " - You should see '!' when detected"
echo " - Press Ctrl+C to stop testing"
echo ""
read -p "Press Enter to start test..."
# Activate conda environment if needed
if command_exists conda; then
eval "$(conda shell.bash hook)"
conda activate precise 2>/dev/null || true
fi
precise-listen "$model_file" || {
print_status warning "Test interrupted or failed"
return 1
}
return 0
}
create_multi_wake_config() {
print_status info "Creating multi-wake-word configuration..."
local config_file="$MODELS_DIR/multi-wake-config.sh"
cat > "$config_file" << 'EOF'
#!/bin/bash
# Multi-wake-word configuration
# Generated by download_pretrained_models.sh
# Start voice server with multiple wake words
cd ~/voice-assistant
# List of wake word models
MODELS=""
EOF
# Add each downloaded model to config
for model_name in "${!MODELS[@]}"; do
if [[ -f "$MODELS_DIR/${model_name}.net" ]]; then
echo "# Found: $model_name" >> "$config_file"
echo "MODELS=\"\${MODELS}${model_name}:$MODELS_DIR/${model_name}.net:0.5,\"" >> "$config_file"
fi
done
cat >> "$config_file" << 'EOF'
# Remove trailing comma
MODELS="${MODELS%,}"
# Activate environment
eval "$(conda shell.bash hook)"
conda activate precise
# Start server
python voice_server.py \
--enable-precise \
--precise-models "$MODELS" \
--ha-token "$HA_TOKEN"
EOF
chmod +x "$config_file"
print_status success "Created: $config_file"
echo ""
print_status info "To use multiple wake words, run:"
print_status info " $config_file"
return 0
}
list_downloaded_models() {
print_status info "Downloaded models in $MODELS_DIR:"
echo ""
local count=0
for model_name in "${!MODELS[@]}"; do
if [[ -f "$MODELS_DIR/${model_name}.net" ]]; then
local size=$(du -h "$MODELS_DIR/${model_name}.net" | cut -f1)
echo -e " ${GREEN}${NC} ${model_name}.net (${size})"
((count++))
else
echo -e " ${YELLOW}${NC} ${model_name}.net (not downloaded)"
fi
done
echo ""
print_status success "Total downloaded: $count"
return 0
}
compare_models() {
print_status info "Model comparison:"
echo ""
cat << 'EOF'
┌─────────────────┬──────────────┬─────────────┬─────────────────┐
│ Wake Word │ Popularity │ Difficulty │ Recommended For │
├─────────────────┼──────────────┼─────────────┼─────────────────┤
│ Hey Mycroft │ ★★★★★ │ Easy │ Default choice │
│ Hey Jarvis │ ★★★★☆ │ Easy │ Pop culture │
│ Christopher │ ★★☆☆☆ │ Medium │ Unique name │
│ Hey Ezra │ ★★☆☆☆ │ Medium │ Alternative │
└─────────────────┴──────────────┴─────────────┴─────────────────┘
Recommendations:
- Start with: Hey Mycroft (most training data)
- For media: Hey Jarvis (Plex/entertainment)
- For uniqueness: Christopher or Hey Ezra
Multiple wake words:
- Use different wake words for different contexts
- Example: "Hey Mycroft" for commands, "Hey Jarvis" for media
- Server can run 2-3 models simultaneously
EOF
}
# ----- Main -----
main() {
print_status info "Mycroft Precise Pre-trained Model Downloader"
echo ""
# Parse arguments
parse_args "$@"
# Check dependencies
check_dependencies || exit 1
# Create directory
create_models_directory || exit 1
# Show comparison
if [[ -z "$SPECIFIC_MODEL" && "$TEST_ALL" != "true" ]]; then
compare_models
echo ""
print_status info "Use --model <name> to download a specific model"
print_status info "Use --test-all to download all models"
echo ""
list_downloaded_models
exit 0
fi
# Download models
if [[ -n "$SPECIFIC_MODEL" ]]; then
# Download specific model
download_model "$SPECIFIC_MODEL" || exit 1
# Offer to test
echo ""
read -p "Test this model now? (y/N): " -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
test_model "$SPECIFIC_MODEL"
fi
elif [[ "$TEST_ALL" == "true" ]]; then
# Download all models
for model_name in "${!MODELS[@]}"; do
download_model "$model_name"
echo ""
done
# Offer to test each
echo ""
print_status success "All models downloaded"
echo ""
read -p "Test each model? (y/N): " -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
for model_name in "${!MODELS[@]}"; do
if [[ -f "$MODELS_DIR/${model_name}.net" ]]; then
echo ""
test_model "$model_name"
fi
done
fi
fi
# List results
echo ""
list_downloaded_models
# Create multi-wake config if multiple models
local model_count=$(find "$MODELS_DIR" -name "*.net" | wc -l)
if [[ $model_count -gt 1 ]]; then
echo ""
create_multi_wake_config
fi
# Final instructions
echo ""
print_status success "Setup complete!"
echo ""
print_status info "Next steps:"
print_status info "1. Test a model: precise-listen $MODELS_DIR/hey-mycroft.net"
print_status info "2. Use in server: python voice_server.py --enable-precise --precise-model $MODELS_DIR/hey-mycroft.net"
print_status info "3. Fine-tune: precise-train -e 30 custom.net . --from-checkpoint $MODELS_DIR/hey-mycroft.net"
if [[ $model_count -gt 1 ]]; then
echo ""
print_status info "For multiple wake words:"
print_status info " $MODELS_DIR/multi-wake-config.sh"
fi
}
# Run main
main "$@"