feat: add manage.sh with port collision detection

start/stop/restart/status/logs/open for the label tool UI; benchmark/list-models/score/compare shortcuts for the harness. Port scan: checks lsof → ss → /dev/tcp fallback, increments from 8503 until a free port is found. PID tracked in .avocet.pid, assigned port persisted in .avocet.port.
2026-02-27 14:10:24 -08:00 · 2026-02-27 14:10:24 -08:00 · bd4911fe58
commit bd4911fe58
parent d68754d432
1 changed files with 262 additions and 0 deletions
--- a/manage.sh
+++ b/manage.sh
@ -0,0 +1,262 @@
+#!/usr/bin/env bash
+# manage.sh — Avocet label tool manager
+# Usage: ./manage.sh <command> [args]
+set -euo pipefail
+
+RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; BLUE='\033[0;34m'; NC='\033[0m'
+info()    { echo -e "${BLUE}[avocet]${NC} $*"; }
+success() { echo -e "${GREEN}[avocet]${NC} $*"; }
+warn()    { echo -e "${YELLOW}[avocet]${NC} $*"; }
+error()   { echo -e "${RED}[avocet]${NC} $*" >&2; exit 1; }
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+cd "$SCRIPT_DIR"
+
+PID_FILE=".avocet.pid"
+PORT_FILE=".avocet.port"
+LOG_DIR="log"
+LOG_FILE="${LOG_DIR}/label_tool.log"
+DEFAULT_PORT=8503
+
+CONDA_BASE="${CONDA_BASE:-/devl/miniconda3}"
+ENV_UI="job-seeker"
+ENV_BM="job-seeker-classifiers"
+STREAMLIT="${CONDA_BASE}/envs/${ENV_UI}/bin/streamlit"
+PYTHON_BM="${CONDA_BASE}/envs/${ENV_BM}/bin/python"
+PYTHON_UI="${CONDA_BASE}/envs/${ENV_UI}/bin/python"
+
+# ── Port helpers ──────────────────────────────────────────────────────────────
+
+_port_in_use() {
+    local port=$1
+    # Try lsof first (macOS + most Linux); fall back to ss (systemd Linux)
+    if command -v lsof &>/dev/null; then
+        lsof -iTCP:"$port" -sTCP:LISTEN -t &>/dev/null
+    elif command -v ss &>/dev/null; then
+        ss -tlnH 2>/dev/null | awk '{print $4}' | grep -q ":${port}$"
+    else
+        # Last resort: attempt a connection
+        (echo "" >/dev/tcp/127.0.0.1/"$port") 2>/dev/null
+    fi
+}
+
+_find_free_port() {
+    local port=${1:-$DEFAULT_PORT}
+    while _port_in_use "$port"; do
+        warn "Port ${port} is in use — trying $((port + 1))…"
+        ((port++))
+    done
+    echo "$port"
+}
+
+# ── PID helpers ───────────────────────────────────────────────────────────────
+
+_running_pid() {
+    # Returns the PID if a live avocet process is running, empty string otherwise
+    if [[ -f "$PID_FILE" ]]; then
+        local pid
+        pid=$(<"$PID_FILE")
+        if kill -0 "$pid" 2>/dev/null; then
+            echo "$pid"
+            return 0
+        else
+            rm -f "$PID_FILE" "$PORT_FILE"
+        fi
+    fi
+    echo ""
+}
+
+_running_port() {
+    [[ -f "$PORT_FILE" ]] && cat "$PORT_FILE" || echo "$DEFAULT_PORT"
+}
+
+# ── Usage ─────────────────────────────────────────────────────────────────────
+
+usage() {
+    echo ""
+    echo -e "  ${BLUE}Avocet — Email Classifier Training Tool${NC}"
+    echo -e "  ${YELLOW}Scrape → Store → Process${NC}"
+    echo ""
+    echo "  Usage: ./manage.sh <command> [args]"
+    echo ""
+    echo "  Label tool:"
+    echo -e "    ${GREEN}start${NC}                    Start label tool UI (port collision-safe)"
+    echo -e "    ${GREEN}stop${NC}                     Stop label tool UI"
+    echo -e "    ${GREEN}restart${NC}                  Restart label tool UI"
+    echo -e "    ${GREEN}status${NC}                   Show running state and port"
+    echo -e "    ${GREEN}logs${NC}                     Tail label tool log output"
+    echo -e "    ${GREEN}open${NC}                     Open label tool in browser"
+    echo ""
+    echo "  Benchmark:"
+    echo -e "    ${GREEN}benchmark [args]${NC}         Run benchmark_classifier.py (args passed through)"
+    echo -e "    ${GREEN}list-models${NC}              Shortcut: --list-models"
+    echo -e "    ${GREEN}score [args]${NC}             Shortcut: --score [args]"
+    echo -e "    ${GREEN}compare [args]${NC}           Shortcut: --compare [args]"
+    echo ""
+    echo "  Dev:"
+    echo -e "    ${GREEN}test${NC}                     Run pytest suite"
+    echo ""
+    echo "  Port defaults to ${DEFAULT_PORT}; auto-increments if occupied."
+    echo "  Conda envs: UI=${ENV_UI}  Benchmark=${ENV_BM}"
+    echo ""
+    echo "  Examples:"
+    echo "    ./manage.sh start"
+    echo "    ./manage.sh benchmark --list-models"
+    echo "    ./manage.sh score --include-slow"
+    echo "    ./manage.sh compare --limit 30"
+    echo ""
+}
+
+# ── Commands ──────────────────────────────────────────────────────────────────
+
+CMD="${1:-help}"
+shift || true
+
+case "$CMD" in
+
+    start)
+        pid=$(_running_pid)
+        if [[ -n "$pid" ]]; then
+            port=$(_running_port)
+            warn "Already running (PID ${pid}) on port ${port} → http://localhost:${port}"
+            exit 0
+        fi
+
+        if [[ ! -x "$STREAMLIT" ]]; then
+            error "Streamlit not found at ${STREAMLIT}\nActivate env: conda run -n ${ENV_UI} ..."
+        fi
+
+        port=$(_find_free_port "$DEFAULT_PORT")
+        mkdir -p "$LOG_DIR"
+
+        info "Starting label tool on port ${port}…"
+        nohup "$STREAMLIT" run app/label_tool.py \
+            --server.port "$port" \
+            --server.headless true \
+            --server.fileWatcherType none \
+            >"$LOG_FILE" 2>&1 &
+
+        pid=$!
+        echo "$pid"  > "$PID_FILE"
+        echo "$port" > "$PORT_FILE"
+
+        # Wait briefly and confirm the process survived
+        sleep 1
+        if kill -0 "$pid" 2>/dev/null; then
+            success "Avocet label tool started → http://localhost:${port}  (PID ${pid})"
+            success "Logs: ${LOG_FILE}"
+        else
+            rm -f "$PID_FILE" "$PORT_FILE"
+            error "Process died immediately. Check ${LOG_FILE} for details."
+        fi
+        ;;
+
+    stop)
+        pid=$(_running_pid)
+        if [[ -z "$pid" ]]; then
+            warn "Not running."
+            exit 0
+        fi
+        info "Stopping label tool (PID ${pid})…"
+        kill "$pid"
+        # Wait up to 5 s for clean exit
+        for _ in $(seq 1 10); do
+            kill -0 "$pid" 2>/dev/null || break
+            sleep 0.5
+        done
+        if kill -0 "$pid" 2>/dev/null; then
+            warn "Process did not exit cleanly; sending SIGKILL…"
+            kill -9 "$pid" 2>/dev/null || true
+        fi
+        rm -f "$PID_FILE" "$PORT_FILE"
+        success "Stopped."
+        ;;
+
+    restart)
+        pid=$(_running_pid)
+        if [[ -n "$pid" ]]; then
+            info "Stopping existing process (PID ${pid})…"
+            kill "$pid"
+            for _ in $(seq 1 10); do
+                kill -0 "$pid" 2>/dev/null || break
+                sleep 0.5
+            done
+            kill -0 "$pid" 2>/dev/null && kill -9 "$pid" 2>/dev/null || true
+            rm -f "$PID_FILE" "$PORT_FILE"
+        fi
+        exec "$0" start
+        ;;
+
+    status)
+        pid=$(_running_pid)
+        if [[ -n "$pid" ]]; then
+            port=$(_running_port)
+            success "Running  — PID ${pid}  port ${port}  → http://localhost:${port}"
+        else
+            warn "Not running."
+        fi
+        ;;
+
+    logs)
+        if [[ ! -f "$LOG_FILE" ]]; then
+            warn "No log file found at ${LOG_FILE}. Has the tool been started?"
+            exit 0
+        fi
+        info "Tailing ${LOG_FILE} (Ctrl-C to stop)"
+        tail -f "$LOG_FILE"
+        ;;
+
+    open)
+        port=$(_running_port)
+        pid=$(_running_pid)
+        [[ -z "$pid" ]] && warn "Label tool does not appear to be running. Start with: ./manage.sh start"
+        URL="http://localhost:${port}"
+        info "Opening ${URL}"
+        if command -v xdg-open &>/dev/null; then
+            xdg-open "$URL"
+        elif command -v open &>/dev/null; then
+            open "$URL"
+        else
+            echo "$URL"
+        fi
+        ;;
+
+    test)
+        info "Running test suite…"
+        PYTEST="${CONDA_BASE}/envs/${ENV_UI}/bin/pytest"
+        if [[ ! -x "$PYTEST" ]]; then
+            error "pytest not found in ${ENV_UI} env at ${PYTEST}"
+        fi
+        "$PYTEST" tests/ -v "$@"
+        ;;
+
+    benchmark)
+        info "Running benchmark (${ENV_BM})…"
+        if [[ ! -x "$PYTHON_BM" ]]; then
+            error "Python not found in ${ENV_BM} env at ${PYTHON_BM}\n" \
+                  "Create it with: conda env create -f environment.yml"
+        fi
+        "$PYTHON_BM" scripts/benchmark_classifier.py "$@"
+        ;;
+
+    list-models)
+        exec "$0" benchmark --list-models
+        ;;
+
+    score)
+        exec "$0" benchmark --score "$@"
+        ;;
+
+    compare)
+        exec "$0" benchmark --compare "$@"
+        ;;
+
+    help|--help|-h)
+        usage
+        ;;
+
+    *)
+        error "Unknown command: ${CMD}. Run './manage.sh help' for usage."
+        ;;
+
+esac