fix: thread searxng URL through research functions via _SEARXNG_URL constant
- Add module-level _SEARXNG_URL derived from UserProfile.searxng_url (or default localhost:8888) - Update all _searxng_running() call sites to pass _SEARXNG_URL explicitly - Replace hardcoded "http://localhost:8888/" in _scrape_company() with _SEARXNG_URL + "/" - Replace hardcoded "http://localhost:8888/search" in _run_search_query() with f"{_SEARXNG_URL}/search" - Guard _profile.name.split() against empty string in finetune_local.py OLLAMA_NAME
This commit is contained in:
parent
af41d14241
commit
f28d91d4d7
2 changed files with 10 additions and 7 deletions
|
|
@ -45,6 +45,9 @@ for _scraper_candidate in [
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
||||||
|
_SEARXNG_URL: str = _profile.searxng_url if _profile else "http://localhost:8888"
|
||||||
|
|
||||||
|
|
||||||
def _searxng_running(searxng_url: str = "http://localhost:8888") -> bool:
|
def _searxng_running(searxng_url: str = "http://localhost:8888") -> bool:
|
||||||
"""Quick check whether SearXNG is reachable."""
|
"""Quick check whether SearXNG is reachable."""
|
||||||
try:
|
try:
|
||||||
|
|
@ -76,10 +79,10 @@ def _scrape_company(company: str) -> dict:
|
||||||
timeout=20,
|
timeout=20,
|
||||||
input_file=None,
|
input_file=None,
|
||||||
output_file="/dev/null",
|
output_file="/dev/null",
|
||||||
searxng_url="http://localhost:8888/",
|
searxng_url=_SEARXNG_URL + "/",
|
||||||
)
|
)
|
||||||
# Override the singleton Config URL
|
# Override the singleton Config URL
|
||||||
_ScraperConfig.SEARXNG_URL = "http://localhost:8888/"
|
_ScraperConfig.SEARXNG_URL = _SEARXNG_URL + "/"
|
||||||
|
|
||||||
scraper = EnhancedCompanyScraper(mock_args)
|
scraper = EnhancedCompanyScraper(mock_args)
|
||||||
scraper.companies = [company]
|
scraper.companies = [company]
|
||||||
|
|
@ -121,7 +124,7 @@ def _run_search_query(query: str, results: dict, key: str) -> None:
|
||||||
seen: set[str] = set()
|
seen: set[str] = set()
|
||||||
try:
|
try:
|
||||||
resp = requests.get(
|
resp = requests.get(
|
||||||
"http://localhost:8888/search",
|
f"{_SEARXNG_URL}/search",
|
||||||
params={"q": query, "format": "json", "language": "en-US"},
|
params={"q": query, "format": "json", "language": "en-US"},
|
||||||
timeout=12,
|
timeout=12,
|
||||||
)
|
)
|
||||||
|
|
@ -317,7 +320,7 @@ def research_company(job: dict, use_scraper: bool = True, on_stage=None) -> dict
|
||||||
live_data: dict = {}
|
live_data: dict = {}
|
||||||
scrape_note = ""
|
scrape_note = ""
|
||||||
_stage("Checking for live company data…")
|
_stage("Checking for live company data…")
|
||||||
if use_scraper and _SCRAPER_AVAILABLE and _searxng_running():
|
if use_scraper and _SCRAPER_AVAILABLE and _searxng_running(_SEARXNG_URL):
|
||||||
_stage("Scraping CEO & HQ data…")
|
_stage("Scraping CEO & HQ data…")
|
||||||
try:
|
try:
|
||||||
live_data = _scrape_company(company)
|
live_data = _scrape_company(company)
|
||||||
|
|
@ -340,7 +343,7 @@ def research_company(job: dict, use_scraper: bool = True, on_stage=None) -> dict
|
||||||
# ── Phase 1b: parallel search queries ────────────────────────────────────
|
# ── Phase 1b: parallel search queries ────────────────────────────────────
|
||||||
search_data: dict[str, str] = {}
|
search_data: dict[str, str] = {}
|
||||||
_stage("Running web searches…")
|
_stage("Running web searches…")
|
||||||
if use_scraper and _searxng_running():
|
if use_scraper and _searxng_running(_SEARXNG_URL):
|
||||||
_stage("Running web searches (news, funding, tech, culture)…")
|
_stage("Running web searches (news, funding, tech, culture)…")
|
||||||
try:
|
try:
|
||||||
ceo_name = (live_data.get("ceo") or "") if live_data else ""
|
ceo_name = (live_data.get("ceo") or "") if live_data else ""
|
||||||
|
|
@ -469,7 +472,7 @@ if __name__ == "__main__":
|
||||||
job = dict(row)
|
job = dict(row)
|
||||||
print(f"Researching: {job['title']} @ {job['company']} …\n")
|
print(f"Researching: {job['title']} @ {job['company']} …\n")
|
||||||
if _SCRAPER_AVAILABLE and not args.no_scrape:
|
if _SCRAPER_AVAILABLE and not args.no_scrape:
|
||||||
print(f"SearXNG available: {_searxng_running()}")
|
print(f"SearXNG available: {_searxng_running(_SEARXNG_URL)}")
|
||||||
|
|
||||||
result = research_company(job, use_scraper=not args.no_scrape)
|
result = research_company(job, use_scraper=not args.no_scrape)
|
||||||
save_research(DEFAULT_DB, job_id=args.job_id, **result)
|
save_research(DEFAULT_DB, job_id=args.job_id, **result)
|
||||||
|
|
|
||||||
|
|
@ -36,7 +36,7 @@ _docs = _profile.docs_dir if _profile else Path.home() / "Documents" / "JobSearc
|
||||||
LETTERS_JSONL = _docs / "training_data" / "cover_letters.jsonl"
|
LETTERS_JSONL = _docs / "training_data" / "cover_letters.jsonl"
|
||||||
OUTPUT_DIR = _docs / "training_data" / "finetune_output"
|
OUTPUT_DIR = _docs / "training_data" / "finetune_output"
|
||||||
GGUF_DIR = _docs / "training_data" / "gguf"
|
GGUF_DIR = _docs / "training_data" / "gguf"
|
||||||
OLLAMA_NAME = f"{_profile.name.split()[0].lower()}-cover-writer" if _profile else "cover-writer"
|
OLLAMA_NAME = f"{(_profile.name.split() or ['cover'])[0].lower()}-cover-writer" if _profile else "cover-writer"
|
||||||
|
|
||||||
SYSTEM_PROMPT = (
|
SYSTEM_PROMPT = (
|
||||||
f"You are {_profile.name}'s personal cover letter writer. "
|
f"You are {_profile.name}'s personal cover letter writer. "
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue