diff --git a/app/pages/2_Settings.py b/app/pages/2_Settings.py index 0ff379a..1bc383f 100644 --- a/app/pages/2_Settings.py +++ b/app/pages/2_Settings.py @@ -1026,9 +1026,10 @@ with tab_finetune: if ft_step == 1: st.markdown("**Step 1: Upload Cover Letters**") + st.caption("Accepted formats: `.md` or `.txt`. Convert PDFs to text before uploading.") uploaded = st.file_uploader( - "Upload cover letters (PDF, DOCX, or TXT)", - type=["pdf", "docx", "txt"], + "Upload cover letters (.md or .txt)", + type=["md", "txt"], accept_multiple_files=True, ) if uploaded and st.button("Extract Training Pairs →", type="primary", key="ft_extract"): @@ -1040,18 +1041,45 @@ with tab_finetune: st.rerun() elif ft_step == 2: - st.markdown("**Step 2: Preview Training Pairs**") - st.info("Run `python scripts/prepare_training_data.py` to extract pairs, then return here.") + st.markdown("**Step 2: Extract Training Pairs**") + import json as _json + import sqlite3 as _sqlite3 + from scripts.db import DEFAULT_DB as _FT_DB + jsonl_path = _profile.docs_dir / "training_data" / "cover_letters.jsonl" + + # Show task status + _ft_conn = _sqlite3.connect(_FT_DB) + _ft_conn.row_factory = _sqlite3.Row + _ft_task = _ft_conn.execute( + "SELECT * FROM background_tasks WHERE task_type='prepare_training' ORDER BY id DESC LIMIT 1" + ).fetchone() + _ft_conn.close() + + if _ft_task: + _ft_status = _ft_task["status"] + if _ft_status == "completed": + st.success(f"✅ {_ft_task['error'] or 'Extraction complete'}") + elif _ft_status in ("running", "queued"): + st.info(f"⏳ {_ft_status.capitalize()}… refresh to check progress.") + elif _ft_status == "failed": + st.error(f"Extraction failed: {_ft_task['error']}") + + if st.button("⚙️ Extract Training Pairs", type="primary", key="ft_extract2"): + from scripts.task_runner import submit_task as _ft_submit + _ft_submit(_FT_DB, "prepare_training", 0) + st.info("Extracting in the background — refresh in a moment.") + st.rerun() + if jsonl_path.exists(): - import json as _json pairs = [_json.loads(l) for l in jsonl_path.read_text().splitlines() if l.strip()] - st.caption(f"{len(pairs)} training pairs extracted.") + st.caption(f"{len(pairs)} training pairs ready.") for i, p in enumerate(pairs[:3]): with st.expander(f"Pair {i+1}"): - st.text(p.get("input", "")[:300]) + st.text(p.get("output", p.get("input", ""))[:300]) else: - st.warning("No training pairs found. Run `prepare_training_data.py` first.") + st.caption("No training pairs yet — click Extract above.") + col_back, col_next = st.columns([1, 4]) if col_back.button("← Back", key="ft_back2"): st.session_state.ft_step = 1 @@ -1061,13 +1089,45 @@ with tab_finetune: st.rerun() elif ft_step == 3: - st.markdown("**Step 3: Train**") - st.slider("Epochs", 3, 20, 10, key="ft_epochs") - if st.button("🚀 Start Fine-Tune", type="primary", key="ft_start"): - st.info("Fine-tune queued as a background task. Check back in 30–60 minutes.") - if st.button("← Back", key="ft_back3"): + st.markdown("**Step 3: Fine-Tune**") + + _ft_profile_name = ((_profile.name.split() or ["cover"])[0].lower() + if _profile else "cover") + _ft_model_name = f"{_ft_profile_name}-cover-writer" + + st.info( + "Run the command below from your terminal. Training takes 30–90 min on GPU " + "and registers the model automatically when complete." + ) + st.code("make finetune PROFILE=single-gpu", language="bash") + st.caption( + f"Your model will appear as **{_ft_model_name}:latest** in Ollama. " + "Cover letter generation will use it automatically." + ) + + st.markdown("**Model status:**") + try: + import os as _os + import requests as _ft_req + _ollama_url = _os.environ.get("OLLAMA_URL", "http://localhost:11434") + _tags = _ft_req.get(f"{_ollama_url}/api/tags", timeout=3) + if _tags.status_code == 200: + _model_names = [m["name"] for m in _tags.json().get("models", [])] + if any(_ft_model_name in m for m in _model_names): + st.success(f"✅ `{_ft_model_name}:latest` is ready in Ollama!") + else: + st.warning(f"⏳ `{_ft_model_name}:latest` not registered yet.") + else: + st.caption("Ollama returned an unexpected response.") + except Exception: + st.caption("Could not reach Ollama — ensure services are running with `make start`.") + + col_back, col_refresh = st.columns([1, 3]) + if col_back.button("← Back", key="ft_back3"): st.session_state.ft_step = 2 st.rerun() + if col_refresh.button("🔄 Check model status", key="ft_refresh3"): + st.rerun() # ── Developer tab ───────────────────────────────────────────────────────────── if _show_dev_tab: diff --git a/scripts/prepare_training_data.py b/scripts/prepare_training_data.py index e0bc046..8a47d86 100644 --- a/scripts/prepare_training_data.py +++ b/scripts/prepare_training_data.py @@ -81,6 +81,16 @@ def build_records(letters_dir: Path = LETTERS_DIR) -> list[dict]: if p not in seen: seen.add(p) all_paths.append(p) + + # Also scan web-uploaded files (Settings → Fine-tune → Upload) + uploads_dir = letters_dir / "training_data" / "uploads" + if uploads_dir.exists(): + for glob in ("*.md", "*.txt"): + for p in uploads_dir.glob(glob): + if p not in seen: + seen.add(p) + all_paths.append(p) + for path in sorted(all_paths): text = path.read_text(encoding="utf-8", errors="ignore").strip() if not text or len(text) < 100: diff --git a/scripts/task_runner.py b/scripts/task_runner.py index 41e87c6..9d02bbe 100644 --- a/scripts/task_runner.py +++ b/scripts/task_runner.py @@ -243,6 +243,17 @@ def _run_task(db_path: Path, task_id: int, task_type: str, job_id: int, ) return + elif task_type == "prepare_training": + from scripts.prepare_training_data import build_records, write_jsonl, DEFAULT_OUTPUT + records = build_records() + write_jsonl(records, DEFAULT_OUTPUT) + n = len(records) + update_task_status( + db_path, task_id, "completed", + error=f"{n} training pair{'s' if n != 1 else ''} extracted", + ) + return + else: raise ValueError(f"Unknown task_type: {task_type!r}") diff --git a/setup.sh b/setup.sh index 99ab27a..9316355 100755 --- a/setup.sh +++ b/setup.sh @@ -168,6 +168,27 @@ check_compose() { fi } +# ── Docker daemon health check ────────────────────────────────────────────────── +check_docker_running() { + if docker info &>/dev/null 2>&1; then + success "Docker daemon is running." + return + fi + warn "Docker daemon is not responding." + if [[ "$OS" == "Linux" ]] && command -v systemctl &>/dev/null; then + info "Starting Docker service…" + $SUDO systemctl start docker 2>/dev/null || true + sleep 2 + if docker info &>/dev/null 2>&1; then + success "Docker daemon started." + else + warn "Docker failed to start. Run: sudo systemctl start docker" + fi + elif [[ "$OS" == "Darwin" ]]; then + warn "Docker Desktop is not running. Start it, wait for the whale icon, then run 'make start'." + fi +} + # ── NVIDIA Container Toolkit ─────────────────────────────────────────────────── install_nvidia_toolkit() { [[ "$OS" != "Linux" ]] && return # macOS has no NVIDIA support @@ -175,8 +196,8 @@ install_nvidia_toolkit() { info "No NVIDIA GPU detected — skipping Container Toolkit." return fi - if docker run --rm --gpus all nvidia/cuda:12.0-base-ubuntu22.04 nvidia-smi &>/dev/null 2>&1; then - success "NVIDIA Container Toolkit already working." + if cmd_exists nvidia-ctk && nvidia-ctk runtime validate --runtime=docker &>/dev/null 2>&1; then + success "NVIDIA Container Toolkit already configured." return fi info "NVIDIA GPU detected. Installing Container Toolkit…" @@ -283,6 +304,7 @@ main() { # Podman takes precedence if already installed; otherwise install Docker if ! check_podman; then install_docker + check_docker_running check_compose install_nvidia_toolkit fi