feat: wire fine-tune UI end-to-end + harden setup.sh
- setup.sh: replace docker-image-based NVIDIA test with nvidia-ctk validate
(faster, no 100MB pull, no daemon required); add check_docker_running()
to auto-start the Docker service on Linux or warn on macOS
- prepare_training_data.py: also scan training_data/uploads/*.{md,txt}
so web-uploaded letters are included in training data
- task_runner.py: add prepare_training task type (calls build_records +
write_jsonl inline; reports pair count in task result)
- Settings fine-tune tab: Step 1 accepts .md/.txt uploads; Step 2 Extract
button submits prepare_training background task + shows status; Step 3
shows make finetune command + live Ollama model status poller
This commit is contained in:
parent
740b0ea45a
commit
bcde4c960e
4 changed files with 118 additions and 15 deletions
|
|
@ -1026,9 +1026,10 @@ with tab_finetune:
|
|||
|
||||
if ft_step == 1:
|
||||
st.markdown("**Step 1: Upload Cover Letters**")
|
||||
st.caption("Accepted formats: `.md` or `.txt`. Convert PDFs to text before uploading.")
|
||||
uploaded = st.file_uploader(
|
||||
"Upload cover letters (PDF, DOCX, or TXT)",
|
||||
type=["pdf", "docx", "txt"],
|
||||
"Upload cover letters (.md or .txt)",
|
||||
type=["md", "txt"],
|
||||
accept_multiple_files=True,
|
||||
)
|
||||
if uploaded and st.button("Extract Training Pairs →", type="primary", key="ft_extract"):
|
||||
|
|
@ -1040,18 +1041,45 @@ with tab_finetune:
|
|||
st.rerun()
|
||||
|
||||
elif ft_step == 2:
|
||||
st.markdown("**Step 2: Preview Training Pairs**")
|
||||
st.info("Run `python scripts/prepare_training_data.py` to extract pairs, then return here.")
|
||||
st.markdown("**Step 2: Extract Training Pairs**")
|
||||
import json as _json
|
||||
import sqlite3 as _sqlite3
|
||||
from scripts.db import DEFAULT_DB as _FT_DB
|
||||
|
||||
jsonl_path = _profile.docs_dir / "training_data" / "cover_letters.jsonl"
|
||||
|
||||
# Show task status
|
||||
_ft_conn = _sqlite3.connect(_FT_DB)
|
||||
_ft_conn.row_factory = _sqlite3.Row
|
||||
_ft_task = _ft_conn.execute(
|
||||
"SELECT * FROM background_tasks WHERE task_type='prepare_training' ORDER BY id DESC LIMIT 1"
|
||||
).fetchone()
|
||||
_ft_conn.close()
|
||||
|
||||
if _ft_task:
|
||||
_ft_status = _ft_task["status"]
|
||||
if _ft_status == "completed":
|
||||
st.success(f"✅ {_ft_task['error'] or 'Extraction complete'}")
|
||||
elif _ft_status in ("running", "queued"):
|
||||
st.info(f"⏳ {_ft_status.capitalize()}… refresh to check progress.")
|
||||
elif _ft_status == "failed":
|
||||
st.error(f"Extraction failed: {_ft_task['error']}")
|
||||
|
||||
if st.button("⚙️ Extract Training Pairs", type="primary", key="ft_extract2"):
|
||||
from scripts.task_runner import submit_task as _ft_submit
|
||||
_ft_submit(_FT_DB, "prepare_training", 0)
|
||||
st.info("Extracting in the background — refresh in a moment.")
|
||||
st.rerun()
|
||||
|
||||
if jsonl_path.exists():
|
||||
import json as _json
|
||||
pairs = [_json.loads(l) for l in jsonl_path.read_text().splitlines() if l.strip()]
|
||||
st.caption(f"{len(pairs)} training pairs extracted.")
|
||||
st.caption(f"{len(pairs)} training pairs ready.")
|
||||
for i, p in enumerate(pairs[:3]):
|
||||
with st.expander(f"Pair {i+1}"):
|
||||
st.text(p.get("input", "")[:300])
|
||||
st.text(p.get("output", p.get("input", ""))[:300])
|
||||
else:
|
||||
st.warning("No training pairs found. Run `prepare_training_data.py` first.")
|
||||
st.caption("No training pairs yet — click Extract above.")
|
||||
|
||||
col_back, col_next = st.columns([1, 4])
|
||||
if col_back.button("← Back", key="ft_back2"):
|
||||
st.session_state.ft_step = 1
|
||||
|
|
@ -1061,13 +1089,45 @@ with tab_finetune:
|
|||
st.rerun()
|
||||
|
||||
elif ft_step == 3:
|
||||
st.markdown("**Step 3: Train**")
|
||||
st.slider("Epochs", 3, 20, 10, key="ft_epochs")
|
||||
if st.button("🚀 Start Fine-Tune", type="primary", key="ft_start"):
|
||||
st.info("Fine-tune queued as a background task. Check back in 30–60 minutes.")
|
||||
if st.button("← Back", key="ft_back3"):
|
||||
st.markdown("**Step 3: Fine-Tune**")
|
||||
|
||||
_ft_profile_name = ((_profile.name.split() or ["cover"])[0].lower()
|
||||
if _profile else "cover")
|
||||
_ft_model_name = f"{_ft_profile_name}-cover-writer"
|
||||
|
||||
st.info(
|
||||
"Run the command below from your terminal. Training takes 30–90 min on GPU "
|
||||
"and registers the model automatically when complete."
|
||||
)
|
||||
st.code("make finetune PROFILE=single-gpu", language="bash")
|
||||
st.caption(
|
||||
f"Your model will appear as **{_ft_model_name}:latest** in Ollama. "
|
||||
"Cover letter generation will use it automatically."
|
||||
)
|
||||
|
||||
st.markdown("**Model status:**")
|
||||
try:
|
||||
import os as _os
|
||||
import requests as _ft_req
|
||||
_ollama_url = _os.environ.get("OLLAMA_URL", "http://localhost:11434")
|
||||
_tags = _ft_req.get(f"{_ollama_url}/api/tags", timeout=3)
|
||||
if _tags.status_code == 200:
|
||||
_model_names = [m["name"] for m in _tags.json().get("models", [])]
|
||||
if any(_ft_model_name in m for m in _model_names):
|
||||
st.success(f"✅ `{_ft_model_name}:latest` is ready in Ollama!")
|
||||
else:
|
||||
st.warning(f"⏳ `{_ft_model_name}:latest` not registered yet.")
|
||||
else:
|
||||
st.caption("Ollama returned an unexpected response.")
|
||||
except Exception:
|
||||
st.caption("Could not reach Ollama — ensure services are running with `make start`.")
|
||||
|
||||
col_back, col_refresh = st.columns([1, 3])
|
||||
if col_back.button("← Back", key="ft_back3"):
|
||||
st.session_state.ft_step = 2
|
||||
st.rerun()
|
||||
if col_refresh.button("🔄 Check model status", key="ft_refresh3"):
|
||||
st.rerun()
|
||||
|
||||
# ── Developer tab ─────────────────────────────────────────────────────────────
|
||||
if _show_dev_tab:
|
||||
|
|
|
|||
|
|
@ -81,6 +81,16 @@ def build_records(letters_dir: Path = LETTERS_DIR) -> list[dict]:
|
|||
if p not in seen:
|
||||
seen.add(p)
|
||||
all_paths.append(p)
|
||||
|
||||
# Also scan web-uploaded files (Settings → Fine-tune → Upload)
|
||||
uploads_dir = letters_dir / "training_data" / "uploads"
|
||||
if uploads_dir.exists():
|
||||
for glob in ("*.md", "*.txt"):
|
||||
for p in uploads_dir.glob(glob):
|
||||
if p not in seen:
|
||||
seen.add(p)
|
||||
all_paths.append(p)
|
||||
|
||||
for path in sorted(all_paths):
|
||||
text = path.read_text(encoding="utf-8", errors="ignore").strip()
|
||||
if not text or len(text) < 100:
|
||||
|
|
|
|||
|
|
@ -243,6 +243,17 @@ def _run_task(db_path: Path, task_id: int, task_type: str, job_id: int,
|
|||
)
|
||||
return
|
||||
|
||||
elif task_type == "prepare_training":
|
||||
from scripts.prepare_training_data import build_records, write_jsonl, DEFAULT_OUTPUT
|
||||
records = build_records()
|
||||
write_jsonl(records, DEFAULT_OUTPUT)
|
||||
n = len(records)
|
||||
update_task_status(
|
||||
db_path, task_id, "completed",
|
||||
error=f"{n} training pair{'s' if n != 1 else ''} extracted",
|
||||
)
|
||||
return
|
||||
|
||||
else:
|
||||
raise ValueError(f"Unknown task_type: {task_type!r}")
|
||||
|
||||
|
|
|
|||
26
setup.sh
26
setup.sh
|
|
@ -168,6 +168,27 @@ check_compose() {
|
|||
fi
|
||||
}
|
||||
|
||||
# ── Docker daemon health check ──────────────────────────────────────────────────
|
||||
check_docker_running() {
|
||||
if docker info &>/dev/null 2>&1; then
|
||||
success "Docker daemon is running."
|
||||
return
|
||||
fi
|
||||
warn "Docker daemon is not responding."
|
||||
if [[ "$OS" == "Linux" ]] && command -v systemctl &>/dev/null; then
|
||||
info "Starting Docker service…"
|
||||
$SUDO systemctl start docker 2>/dev/null || true
|
||||
sleep 2
|
||||
if docker info &>/dev/null 2>&1; then
|
||||
success "Docker daemon started."
|
||||
else
|
||||
warn "Docker failed to start. Run: sudo systemctl start docker"
|
||||
fi
|
||||
elif [[ "$OS" == "Darwin" ]]; then
|
||||
warn "Docker Desktop is not running. Start it, wait for the whale icon, then run 'make start'."
|
||||
fi
|
||||
}
|
||||
|
||||
# ── NVIDIA Container Toolkit ───────────────────────────────────────────────────
|
||||
install_nvidia_toolkit() {
|
||||
[[ "$OS" != "Linux" ]] && return # macOS has no NVIDIA support
|
||||
|
|
@ -175,8 +196,8 @@ install_nvidia_toolkit() {
|
|||
info "No NVIDIA GPU detected — skipping Container Toolkit."
|
||||
return
|
||||
fi
|
||||
if docker run --rm --gpus all nvidia/cuda:12.0-base-ubuntu22.04 nvidia-smi &>/dev/null 2>&1; then
|
||||
success "NVIDIA Container Toolkit already working."
|
||||
if cmd_exists nvidia-ctk && nvidia-ctk runtime validate --runtime=docker &>/dev/null 2>&1; then
|
||||
success "NVIDIA Container Toolkit already configured."
|
||||
return
|
||||
fi
|
||||
info "NVIDIA GPU detected. Installing Container Toolkit…"
|
||||
|
|
@ -283,6 +304,7 @@ main() {
|
|||
# Podman takes precedence if already installed; otherwise install Docker
|
||||
if ! check_podman; then
|
||||
install_docker
|
||||
check_docker_running
|
||||
check_compose
|
||||
install_nvidia_toolkit
|
||||
fi
|
||||
|
|
|
|||
Loading…
Reference in a new issue