avocet/manage.sh

262 lines
8.5 KiB
Bash
Executable file

#!/usr/bin/env bash
# manage.sh — Avocet label tool manager
# Usage: ./manage.sh <command> [args]
set -euo pipefail
RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m'; BLUE='\033[0;34m'; NC='\033[0m'
info() { echo -e "${BLUE}[avocet]${NC} $*"; }
success() { echo -e "${GREEN}[avocet]${NC} $*"; }
warn() { echo -e "${YELLOW}[avocet]${NC} $*"; }
error() { echo -e "${RED}[avocet]${NC} $*" >&2; exit 1; }
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"
PID_FILE=".avocet.pid"
PORT_FILE=".avocet.port"
LOG_DIR="log"
LOG_FILE="${LOG_DIR}/label_tool.log"
DEFAULT_PORT=8503
CONDA_BASE="${CONDA_BASE:-/devl/miniconda3}"
ENV_UI="job-seeker"
ENV_BM="job-seeker-classifiers"
STREAMLIT="${CONDA_BASE}/envs/${ENV_UI}/bin/streamlit"
PYTHON_BM="${CONDA_BASE}/envs/${ENV_BM}/bin/python"
PYTHON_UI="${CONDA_BASE}/envs/${ENV_UI}/bin/python"
# ── Port helpers ──────────────────────────────────────────────────────────────
_port_in_use() {
local port=$1
# Try lsof first (macOS + most Linux); fall back to ss (systemd Linux)
if command -v lsof &>/dev/null; then
lsof -iTCP:"$port" -sTCP:LISTEN -t &>/dev/null
elif command -v ss &>/dev/null; then
ss -tlnH 2>/dev/null | awk '{print $4}' | grep -q ":${port}$"
else
# Last resort: attempt a connection
(echo "" >/dev/tcp/127.0.0.1/"$port") 2>/dev/null
fi
}
_find_free_port() {
local port=${1:-$DEFAULT_PORT}
while _port_in_use "$port"; do
warn "Port ${port} is in use — trying $((port + 1))"
((port++))
done
echo "$port"
}
# ── PID helpers ───────────────────────────────────────────────────────────────
_running_pid() {
# Returns the PID if a live avocet process is running, empty string otherwise
if [[ -f "$PID_FILE" ]]; then
local pid
pid=$(<"$PID_FILE")
if kill -0 "$pid" 2>/dev/null; then
echo "$pid"
return 0
else
rm -f "$PID_FILE" "$PORT_FILE"
fi
fi
echo ""
}
_running_port() {
[[ -f "$PORT_FILE" ]] && cat "$PORT_FILE" || echo "$DEFAULT_PORT"
}
# ── Usage ─────────────────────────────────────────────────────────────────────
usage() {
echo ""
echo -e " ${BLUE}Avocet — Email Classifier Training Tool${NC}"
echo -e " ${YELLOW}Scrape → Store → Process${NC}"
echo ""
echo " Usage: ./manage.sh <command> [args]"
echo ""
echo " Label tool:"
echo -e " ${GREEN}start${NC} Start label tool UI (port collision-safe)"
echo -e " ${GREEN}stop${NC} Stop label tool UI"
echo -e " ${GREEN}restart${NC} Restart label tool UI"
echo -e " ${GREEN}status${NC} Show running state and port"
echo -e " ${GREEN}logs${NC} Tail label tool log output"
echo -e " ${GREEN}open${NC} Open label tool in browser"
echo ""
echo " Benchmark:"
echo -e " ${GREEN}benchmark [args]${NC} Run benchmark_classifier.py (args passed through)"
echo -e " ${GREEN}list-models${NC} Shortcut: --list-models"
echo -e " ${GREEN}score [args]${NC} Shortcut: --score [args]"
echo -e " ${GREEN}compare [args]${NC} Shortcut: --compare [args]"
echo ""
echo " Dev:"
echo -e " ${GREEN}test${NC} Run pytest suite"
echo ""
echo " Port defaults to ${DEFAULT_PORT}; auto-increments if occupied."
echo " Conda envs: UI=${ENV_UI} Benchmark=${ENV_BM}"
echo ""
echo " Examples:"
echo " ./manage.sh start"
echo " ./manage.sh benchmark --list-models"
echo " ./manage.sh score --include-slow"
echo " ./manage.sh compare --limit 30"
echo ""
}
# ── Commands ──────────────────────────────────────────────────────────────────
CMD="${1:-help}"
shift || true
case "$CMD" in
start)
pid=$(_running_pid)
if [[ -n "$pid" ]]; then
port=$(_running_port)
warn "Already running (PID ${pid}) on port ${port} → http://localhost:${port}"
exit 0
fi
if [[ ! -x "$STREAMLIT" ]]; then
error "Streamlit not found at ${STREAMLIT}\nActivate env: conda run -n ${ENV_UI} ..."
fi
port=$(_find_free_port "$DEFAULT_PORT")
mkdir -p "$LOG_DIR"
info "Starting label tool on port ${port}"
nohup "$STREAMLIT" run app/label_tool.py \
--server.port "$port" \
--server.headless true \
--server.fileWatcherType none \
>"$LOG_FILE" 2>&1 &
pid=$!
echo "$pid" > "$PID_FILE"
echo "$port" > "$PORT_FILE"
# Wait briefly and confirm the process survived
sleep 1
if kill -0 "$pid" 2>/dev/null; then
success "Avocet label tool started → http://localhost:${port} (PID ${pid})"
success "Logs: ${LOG_FILE}"
else
rm -f "$PID_FILE" "$PORT_FILE"
error "Process died immediately. Check ${LOG_FILE} for details."
fi
;;
stop)
pid=$(_running_pid)
if [[ -z "$pid" ]]; then
warn "Not running."
exit 0
fi
info "Stopping label tool (PID ${pid})…"
kill "$pid"
# Wait up to 5 s for clean exit
for _ in $(seq 1 10); do
kill -0 "$pid" 2>/dev/null || break
sleep 0.5
done
if kill -0 "$pid" 2>/dev/null; then
warn "Process did not exit cleanly; sending SIGKILL…"
kill -9 "$pid" 2>/dev/null || true
fi
rm -f "$PID_FILE" "$PORT_FILE"
success "Stopped."
;;
restart)
pid=$(_running_pid)
if [[ -n "$pid" ]]; then
info "Stopping existing process (PID ${pid})…"
kill "$pid"
for _ in $(seq 1 10); do
kill -0 "$pid" 2>/dev/null || break
sleep 0.5
done
kill -0 "$pid" 2>/dev/null && kill -9 "$pid" 2>/dev/null || true
rm -f "$PID_FILE" "$PORT_FILE"
fi
exec bash "$0" start
;;
status)
pid=$(_running_pid)
if [[ -n "$pid" ]]; then
port=$(_running_port)
success "Running — PID ${pid} port ${port} → http://localhost:${port}"
else
warn "Not running."
fi
;;
logs)
if [[ ! -f "$LOG_FILE" ]]; then
warn "No log file found at ${LOG_FILE}. Has the tool been started?"
exit 0
fi
info "Tailing ${LOG_FILE} (Ctrl-C to stop)"
tail -f "$LOG_FILE"
;;
open)
port=$(_running_port)
pid=$(_running_pid)
[[ -z "$pid" ]] && warn "Label tool does not appear to be running. Start with: ./manage.sh start"
URL="http://localhost:${port}"
info "Opening ${URL}"
if command -v xdg-open &>/dev/null; then
xdg-open "$URL"
elif command -v open &>/dev/null; then
open "$URL"
else
echo "$URL"
fi
;;
test)
info "Running test suite…"
PYTEST="${CONDA_BASE}/envs/${ENV_UI}/bin/pytest"
if [[ ! -x "$PYTEST" ]]; then
error "pytest not found in ${ENV_UI} env at ${PYTEST}"
fi
"$PYTEST" tests/ -v "$@"
;;
benchmark)
info "Running benchmark (${ENV_BM})…"
if [[ ! -x "$PYTHON_BM" ]]; then
error "Python not found in ${ENV_BM} env at ${PYTHON_BM}\n" \
"Create it with: conda env create -f environment.yml"
fi
"$PYTHON_BM" scripts/benchmark_classifier.py "$@"
;;
list-models)
exec "$0" benchmark --list-models
;;
score)
exec "$0" benchmark --score "$@"
;;
compare)
exec "$0" benchmark --compare "$@"
;;
help|--help|-h)
usage
;;
*)
error "Unknown command: ${CMD}. Run './manage.sh help' for usage."
;;
esac