avocet/manage.sh

#!/usr/bin/env bash
# manage.sh — Avocet label tool manager
# Usage: ./manage.sh <command> [args]
set -euo pipefail

RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; BLUE='\033[0;34m'; NC='\033[0m'
info()    { echo -e "${BLUE}[avocet]${NC} $*"; }
success() { echo -e "${GREEN}[avocet]${NC} $*"; }
warn()    { echo -e "${YELLOW}[avocet]${NC} $*"; }
error()   { echo -e "${RED}[avocet]${NC} $*" >&2; exit 1; }

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"

PID_FILE=".avocet.pid"
PORT_FILE=".avocet.port"
LOG_DIR="log"
LOG_FILE="${LOG_DIR}/label_tool.log"
DEFAULT_PORT=8503

CONDA_BASE="${CONDA_BASE:-/devl/miniconda3}"
ENV_UI="job-seeker"
ENV_BM="job-seeker-classifiers"
STREAMLIT="${CONDA_BASE}/envs/${ENV_UI}/bin/streamlit"
PYTHON_BM="${CONDA_BASE}/envs/${ENV_BM}/bin/python"
PYTHON_UI="${CONDA_BASE}/envs/${ENV_UI}/bin/python"

# ── Port helpers ──────────────────────────────────────────────────────────────

_port_in_use() {
    local port=$1
    # Try lsof first (macOS + most Linux); fall back to ss (systemd Linux)
    if command -v lsof &>/dev/null; then
        lsof -iTCP:"$port" -sTCP:LISTEN -t &>/dev/null
    elif command -v ss &>/dev/null; then
        ss -tlnH 2>/dev/null | awk '{print $4}' | grep -q ":${port}$"
    else
        # Last resort: attempt a connection
        (echo "" >/dev/tcp/127.0.0.1/"$port") 2>/dev/null
    fi
}

_find_free_port() {
    local port=${1:-$DEFAULT_PORT}
    while _port_in_use "$port"; do
        warn "Port ${port} is in use — trying $((port + 1))…"
        ((port++))
    done
    echo "$port"
}

# ── PID helpers ───────────────────────────────────────────────────────────────

_running_pid() {
    # Returns the PID if a live avocet process is running, empty string otherwise
    if [[ -f "$PID_FILE" ]]; then
        local pid
        pid=$(<"$PID_FILE")
        if kill -0 "$pid" 2>/dev/null; then
            echo "$pid"
            return 0
        else
            rm -f "$PID_FILE" "$PORT_FILE"
        fi
    fi
    echo ""
}

_running_port() {
    [[ -f "$PORT_FILE" ]] && cat "$PORT_FILE" || echo "$DEFAULT_PORT"
}

# ── Usage ─────────────────────────────────────────────────────────────────────

usage() {
    echo ""
    echo -e "  ${BLUE}Avocet — Email Classifier Training Tool${NC}"
    echo -e "  ${YELLOW}Scrape → Store → Process${NC}"
    echo ""
    echo "  Usage: ./manage.sh <command> [args]"
    echo ""
    echo "  Label tool:"
    echo -e "    ${GREEN}start${NC}                    Start label tool UI (port collision-safe)"
    echo -e "    ${GREEN}stop${NC}                     Stop label tool UI"
    echo -e "    ${GREEN}restart${NC}                  Restart label tool UI"
    echo -e "    ${GREEN}status${NC}                   Show running state and port"
    echo -e "    ${GREEN}logs${NC}                     Tail label tool log output"
    echo -e "    ${GREEN}open${NC}                     Open label tool in browser"
    echo ""
    echo "  Benchmark:"
    echo -e "    ${GREEN}benchmark [args]${NC}         Run benchmark_classifier.py (args passed through)"
    echo -e "    ${GREEN}list-models${NC}              Shortcut: --list-models"
    echo -e "    ${GREEN}score [args]${NC}             Shortcut: --score [args]"
    echo -e "    ${GREEN}compare [args]${NC}           Shortcut: --compare [args]"
    echo ""
    echo "  Dev:"
    echo -e "    ${GREEN}test${NC}                     Run pytest suite"
    echo ""
    echo "  Port defaults to ${DEFAULT_PORT}; auto-increments if occupied."
    echo "  Conda envs: UI=${ENV_UI}  Benchmark=${ENV_BM}"
    echo ""
    echo "  Examples:"
    echo "    ./manage.sh start"
    echo "    ./manage.sh benchmark --list-models"
    echo "    ./manage.sh score --include-slow"
    echo "    ./manage.sh compare --limit 30"
    echo ""
}

# ── Commands ──────────────────────────────────────────────────────────────────

CMD="${1:-help}"
shift || true

case "$CMD" in

    start)
        pid=$(_running_pid)
        if [[ -n "$pid" ]]; then
            port=$(_running_port)
            warn "Already running (PID ${pid}) on port ${port} → http://localhost:${port}"
            exit 0
        fi

        if [[ ! -x "$STREAMLIT" ]]; then
            error "Streamlit not found at ${STREAMLIT}\nActivate env: conda run -n ${ENV_UI} ..."
        fi

        port=$(_find_free_port "$DEFAULT_PORT")
        mkdir -p "$LOG_DIR"

        info "Starting label tool on port ${port}…"
        nohup "$STREAMLIT" run app/label_tool.py \
            --server.port "$port" \
            --server.headless true \
            --server.fileWatcherType none \
            >"$LOG_FILE" 2>&1 &

        pid=$!
        echo "$pid"  > "$PID_FILE"
        echo "$port" > "$PORT_FILE"

        # Wait briefly and confirm the process survived
        sleep 1
        if kill -0 "$pid" 2>/dev/null; then
            success "Avocet label tool started → http://localhost:${port}  (PID ${pid})"
            success "Logs: ${LOG_FILE}"
        else
            rm -f "$PID_FILE" "$PORT_FILE"
            error "Process died immediately. Check ${LOG_FILE} for details."
        fi
        ;;

    stop)
        pid=$(_running_pid)
        if [[ -z "$pid" ]]; then
            warn "Not running."
            exit 0
        fi
        info "Stopping label tool (PID ${pid})…"
        kill "$pid"
        # Wait up to 5 s for clean exit
        for _ in $(seq 1 10); do
            kill -0 "$pid" 2>/dev/null || break
            sleep 0.5
        done
        if kill -0 "$pid" 2>/dev/null; then
            warn "Process did not exit cleanly; sending SIGKILL…"
            kill -9 "$pid" 2>/dev/null || true
        fi
        rm -f "$PID_FILE" "$PORT_FILE"
        success "Stopped."
        ;;

    restart)
        pid=$(_running_pid)
        if [[ -n "$pid" ]]; then
            info "Stopping existing process (PID ${pid})…"
            kill "$pid"
            for _ in $(seq 1 10); do
                kill -0 "$pid" 2>/dev/null || break
                sleep 0.5
            done
            kill -0 "$pid" 2>/dev/null && kill -9 "$pid" 2>/dev/null || true
            rm -f "$PID_FILE" "$PORT_FILE"
        fi
        exec bash "$0" start
        ;;

    status)
        pid=$(_running_pid)
        if [[ -n "$pid" ]]; then
            port=$(_running_port)
            success "Running  — PID ${pid}  port ${port}  → http://localhost:${port}"
        else
            warn "Not running."
        fi
        ;;

    logs)
        if [[ ! -f "$LOG_FILE" ]]; then
            warn "No log file found at ${LOG_FILE}. Has the tool been started?"
            exit 0
        fi
        info "Tailing ${LOG_FILE} (Ctrl-C to stop)"
        tail -f "$LOG_FILE"
        ;;

    open)
        port=$(_running_port)
        pid=$(_running_pid)
        [[ -z "$pid" ]] && warn "Label tool does not appear to be running. Start with: ./manage.sh start"
        URL="http://localhost:${port}"
        info "Opening ${URL}"
        if command -v xdg-open &>/dev/null; then
            xdg-open "$URL"
        elif command -v open &>/dev/null; then
            open "$URL"
        else
            echo "$URL"
        fi
        ;;

    test)
        info "Running test suite…"
        PYTEST="${CONDA_BASE}/envs/${ENV_UI}/bin/pytest"
        if [[ ! -x "$PYTEST" ]]; then
            error "pytest not found in ${ENV_UI} env at ${PYTEST}"
        fi
        "$PYTEST" tests/ -v "$@"
        ;;

    benchmark)
        info "Running benchmark (${ENV_BM})…"
        if [[ ! -x "$PYTHON_BM" ]]; then
            error "Python not found in ${ENV_BM} env at ${PYTHON_BM}\n" \
                  "Create it with: conda env create -f environment.yml"
        fi
        "$PYTHON_BM" scripts/benchmark_classifier.py "$@"
        ;;

    list-models)
        exec "$0" benchmark --list-models
        ;;

    score)
        exec "$0" benchmark --score "$@"
        ;;

    compare)
        exec "$0" benchmark --compare "$@"
        ;;

    help|--help|-h)
        usage
        ;;

    *)
        error "Unknown command: ${CMD}. Run './manage.sh help' for usage."
        ;;

esac