From 4e1748ca6291ead4d88f4e0539f823bf682f9158 Mon Sep 17 00:00:00 2001
From: pyr0ball <pyroballpcs@gmail.com>
Date: Wed, 25 Feb 2026 16:03:10 -0800
Subject: [PATCH] fix: repair beta installer path for Docker-first deployment
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- llm.yaml + example: replace localhost URLs with Docker service names
  (ollama:11434, vllm:8000, vision:8002); replace personal model names
  (alex-cover-writer, llama3.1:8b) with llama3.2:3b
- user.yaml.example: update service hosts to Docker names (ollama, vllm,
  searxng) and searxng port from 8888 (host-mapped) to 8080 (internal)
- wizard step 5: fix hardcoded localhost defaults — wizard runs inside
  Docker, so service name defaults are required for connection tests to pass
- scrapers/companyScraper.py: bundle scraper so Dockerfile COPY succeeds
- setup.sh: remove host Ollama install (conflicts with Docker Ollama on
  port 11434); Docker entrypoint handles model download automatically
- README + setup.sh banner: add Circuit Forge mission statement
---
 .gitignore                 |    5 +
 README.md                  |    2 +
 app/pages/0_Setup.py       |    6 +-
 config/llm.yaml            |   12 +-
 config/llm.yaml.example    |   10 +-
 config/user.yaml.example   |    8 +-
 scrapers/companyScraper.py | 1026 ++++++++++++++++++++++++++++++++++++
 setup.sh                   |   88 +---
 8 files changed, 1059 insertions(+), 98 deletions(-)
 create mode 100755 scrapers/companyScraper.py

diff --git a/.gitignore b/.gitignore
index aae1f7d..b574311 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,3 +22,8 @@ config/user.yaml
 config/.backup-*
 config/integrations/*.yaml
 !config/integrations/*.yaml.example
+
+# companyScraper runtime artifacts
+scrapers/.cache/
+scrapers/.debug/
+scrapers/raw_scrapes/
diff --git a/README.md b/README.md
index e07f1b7..434a36a 100644
--- a/README.md
+++ b/README.md
@@ -2,6 +2,8 @@
 
 **AI-powered job search pipeline — by [Circuit Forge LLC](https://circuitforge.io)**
 
+> *"Don't be evil, for real and forever."*
+
 Automates the full job search lifecycle: discovery → matching → cover letters → applications → interview prep.
 Privacy-first, local-first. Your data never leaves your machine.
 
diff --git a/app/pages/0_Setup.py b/app/pages/0_Setup.py
index 59e6d11..637c468 100644
--- a/app/pages/0_Setup.py
+++ b/app/pages/0_Setup.py
@@ -403,9 +403,9 @@ elif step == 5:
         st.caption("Change only if services run on non-default ports or remote hosts.")
         svc = dict(saved_yaml.get("services", {}))
         for svc_name, default_host, default_port in [
-            ("ollama",  "localhost", 11434),
-            ("vllm",    "localhost", 8000),
-            ("searxng", "localhost", 8888),
+            ("ollama",  "ollama",   11434),  # Docker service name
+            ("vllm",    "vllm",     8000),   # Docker service name
+            ("searxng", "searxng",  8080),   # Docker internal port (host-mapped: 8888)
         ]:
             c1, c2 = st.columns([3, 1])
             svc[f"{svc_name}_host"] = c1.text_input(
diff --git a/config/llm.yaml b/config/llm.yaml
index 34860df..015e789 100644
--- a/config/llm.yaml
+++ b/config/llm.yaml
@@ -21,26 +21,26 @@ backends:
     type: openai_compat
   ollama:
     api_key: ollama
-    base_url: http://localhost:11434/v1
+    base_url: http://ollama:11434/v1
     enabled: true
-    model: alex-cover-writer:latest
+    model: llama3.2:3b
     supports_images: false
     type: openai_compat
   ollama_research:
     api_key: ollama
-    base_url: http://localhost:11434/v1
+    base_url: http://ollama:11434/v1
     enabled: true
-    model: llama3.1:8b
+    model: llama3.2:3b
     supports_images: false
     type: openai_compat
   vision_service:
-    base_url: http://localhost:8002
+    base_url: http://vision:8002
     enabled: true
     supports_images: true
     type: vision_service
   vllm:
     api_key: ''
-    base_url: http://localhost:8000/v1
+    base_url: http://vllm:8000/v1
     enabled: true
     model: __auto__
     supports_images: false
diff --git a/config/llm.yaml.example b/config/llm.yaml.example
index e5a58e5..5b006ef 100644
--- a/config/llm.yaml.example
+++ b/config/llm.yaml.example
@@ -21,21 +21,21 @@ backends:
     supports_images: false
   ollama:
     api_key: ollama
-    base_url: http://localhost:11434/v1
+    base_url: http://ollama:11434/v1    # Docker service name; use localhost:11434 outside Docker
     enabled: true
-    model: alex-cover-writer:latest
+    model: llama3.2:3b
     type: openai_compat
     supports_images: false
   ollama_research:
     api_key: ollama
-    base_url: http://localhost:11434/v1
+    base_url: http://ollama:11434/v1    # Docker service name; use localhost:11434 outside Docker
     enabled: true
-    model: llama3.1:8b
+    model: llama3.2:3b
     type: openai_compat
     supports_images: false
   vllm:
     api_key: ''
-    base_url: http://localhost:8000/v1
+    base_url: http://vllm:8000/v1      # Docker service name; use localhost:8000 outside Docker
     enabled: true
     model: __auto__
     type: openai_compat
diff --git a/config/user.yaml.example b/config/user.yaml.example
index d088a27..22c8ecb 100644
--- a/config/user.yaml.example
+++ b/config/user.yaml.example
@@ -44,15 +44,15 @@ inference_profile: "remote"  # remote | cpu | single-gpu | dual-gpu
 
 services:
   streamlit_port: 8501
-  ollama_host: localhost
+  ollama_host: ollama        # Docker service name; use "localhost" if running outside Docker
   ollama_port: 11434
   ollama_ssl: false
   ollama_ssl_verify: true
-  vllm_host: localhost
+  vllm_host: vllm            # Docker service name; use "localhost" if running outside Docker
   vllm_port: 8000
   vllm_ssl: false
   vllm_ssl_verify: true
-  searxng_host: localhost
-  searxng_port: 8888
+  searxng_host: searxng      # Docker service name; use "localhost" if running outside Docker
+  searxng_port: 8080         # internal Docker port; use 8888 for host-mapped access
   searxng_ssl: false
   searxng_ssl_verify: true
diff --git a/scrapers/companyScraper.py b/scrapers/companyScraper.py
new file mode 100755
index 0000000..1a01d83
--- /dev/null
+++ b/scrapers/companyScraper.py
@@ -0,0 +1,1026 @@
+#!/usr/bin/env python3
+"""
+Enhanced Company Information Scraper with SearXNG Integration
+----------------------------
+A Python script to collect various company information including executives, 
+contact details, and addresses using SearXNG as the search backend.
+
+Enhanced features:
+- Search for staff by specific titles
+- Collect contact information (phone, email, social media)
+- Multiple output modes (minimal, targeted, comprehensive)
+- Configurable data collection targets
+"""
+
+import argparse
+import csv
+import json
+import os
+import random
+import re
+import sys
+import time
+from datetime import datetime
+from urllib.parse import quote_plus, urlencode
+
+try:
+    import requests
+    from bs4 import BeautifulSoup
+    from fake_useragent import UserAgent
+except ImportError:
+    print("Required packages not found. Please install them with:")
+    print("pip install requests beautifulsoup4 fake-useragent")
+    sys.exit(1)
+
+# Configuration
+class Config:
+    VERSION = "2.0.0"
+    DEFAULT_TIMEOUT = 20
+    CACHE_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), ".cache")
+    DEBUG_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), ".debug")
+    RAW_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "raw_scrapes")
+    
+    # SearXNG configuration
+    SEARXNG_URL = "http://localhost:8888/"
+    
+    # Search engines to use with SearXNG
+    SEARCH_ENGINES = [
+        "google",
+        "duckduckgo",
+        "bing"
+    ]
+    
+    # Search delay ranges (min, max) in seconds
+    DELAY_BETWEEN_SEARCHES = (1, 3)  # Can be lower with SearXNG
+    DELAY_BETWEEN_COMPANIES = (2, 5)  # Can be lower with SearXNG
+    DELAY_BEFORE_SEARCH = (0.5, 1.5)  # Can be lower with SearXNG
+    
+    # Retry configuration
+    MAX_RETRIES = 3
+    RETRY_DELAY = (2, 5)  # Can be lower with SearXNG
+    
+    # Available search types
+    SEARCH_TYPES = {
+        "ceo": "CEO information",
+        "hq": "Headquarters address",
+        "phone": "Phone numbers",
+        "email": "Email addresses",
+        "social": "Social media profiles",
+        "staff": "Staff members by title",
+        "contact": "General contact information",
+        "mailing": "Mailing address"
+    }
+    
+    # Minimal mode search types
+    MINIMAL_SEARCH_TYPES = ["ceo", "hq"]
+    
+    # Default comprehensive search types (everything)
+    COMPREHENSIVE_SEARCH_TYPES = list(SEARCH_TYPES.keys())
+
+class EnhancedCompanyScraper:
+    def __init__(self, args):
+        self.args = args
+        self.companies = []
+        self.results = []
+        self.session = requests.Session()
+        
+        # Determine which search types to use based on mode
+        self.search_types = self.determine_search_types()
+        
+        self.setup_directories()
+        
+        # Check if SearXNG is running
+        if not self.check_searxng():
+            print(f"Error: SearXNG not available at {Config.SEARXNG_URL}")
+            print("Please make sure SearXNG is running before using this script.")
+            print("You can start it with: docker-compose up -d")
+            sys.exit(1)
+        
+        # Use fake-useragent to rotate user agents
+        try:
+            self.ua = UserAgent()
+        except:
+            # Fallback if fake-useragent fails
+            self.ua = None
+            print("Warning: fake-useragent failed to initialize. Using default user agent.")
+    
+    def determine_search_types(self):
+        """Determine which search types to use based on mode and args"""
+        search_types = []
+        
+        # Start with default search types
+        if self.args.mode == "minimal":
+            search_types = Config.MINIMAL_SEARCH_TYPES.copy()
+        elif self.args.mode == "comprehensive":
+            search_types = Config.COMPREHENSIVE_SEARCH_TYPES.copy()
+        elif self.args.mode == "targeted":
+            # For targeted mode, use only what was specified
+            if self.args.target_staff:
+                search_types.append("staff")
+            else:
+                # If no staff title specified, default to CEO
+                search_types.append("ceo")
+                
+            # Add any explicitly requested types
+            if self.args.include_contact:
+                search_types.extend(["phone", "email"])
+            if self.args.include_address:
+                search_types.extend(["hq", "mailing"])
+            if self.args.include_social:
+                search_types.append("social")
+                
+            # If nothing explicitly included, add headquarters
+            if len(search_types) == 1:  # Only staff/ceo
+                search_types.append("hq")
+        
+        # Override with explicit includes/excludes
+        if self.args.include_types:
+            for type_name in self.args.include_types.split(','):
+                type_name = type_name.strip()
+                if type_name in Config.SEARCH_TYPES and type_name not in search_types:
+                    search_types.append(type_name)
+        
+        if self.args.exclude_types:
+            for type_name in self.args.exclude_types.split(','):
+                type_name = type_name.strip()
+                if type_name in search_types:
+                    search_types.remove(type_name)
+        
+        # Log selected search types
+        if self.args.verbose:
+            print(f"Selected search types: {', '.join(search_types)}")
+        
+        return search_types
+    
+    def check_searxng(self):
+        """Check if SearXNG is running and available"""
+        if self.args.dry_run:
+            return True
+            
+        try:
+            response = requests.get(Config.SEARXNG_URL, timeout=5)
+            return response.status_code == 200
+        except:
+            return False
+    
+    def setup_directories(self):
+        """Create necessary directories for caching and debugging"""
+        # Create cache directories for all search types
+        if self.args.use_cache:
+            for search_type in Config.SEARCH_TYPES.keys():
+                os.makedirs(os.path.join(Config.CACHE_DIR, search_type), exist_ok=True)
+        
+        if self.args.debug:
+            os.makedirs(Config.DEBUG_DIR, exist_ok=True)
+            os.makedirs(os.path.join(Config.DEBUG_DIR, "extraction"), exist_ok=True)
+            os.makedirs(os.path.join(Config.DEBUG_DIR, "patterns"), exist_ok=True)
+        
+        if self.args.save_raw:
+            for search_type in Config.SEARCH_TYPES.keys():
+                os.makedirs(os.path.join(Config.RAW_DIR, search_type), exist_ok=True)
+    
+    def load_companies(self):
+        """Load companies from file or stdin"""
+        if self.args.input_file:
+            try:
+                with open(self.args.input_file, 'r') as f:
+                    for line in f:
+                        company = line.strip()
+                        if company:
+                            self.companies.append(company)
+            except Exception as e:
+                print(f"Error loading companies from file: {e}")
+                sys.exit(1)
+        else:
+            print("Enter company names (one per line), press Ctrl+D when finished:")
+            for line in sys.stdin:
+                company = line.strip()
+                if company:
+                    self.companies.append(company)
+        
+        if not self.companies:
+            print("No companies provided!")
+            sys.exit(1)
+        
+        print(f"Loaded {len(self.companies)} companies")
+    
+    def get_random_user_agent(self):
+        """Get a random user agent"""
+        if self.ua:
+            return self.ua.random
+        return "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
+    
+    def get_searxng_url(self, query, search_type, engine):
+        """Get SearXNG search URL for the given engine and search type"""
+        query = quote_plus(query)
+        
+        search_terms = ""
+        if search_type == "ceo":
+            search_terms = "CEO who is the chief executive"
+        elif search_type == "hq":
+            search_terms = "headquarters address location where is"
+        elif search_type == "phone":
+            search_terms = "phone number contact"
+        elif search_type == "email":
+            search_terms = "email address contact"
+        elif search_type == "social":
+            search_terms = "social media profiles twitter linkedin facebook"
+        elif search_type == "contact":
+            search_terms = "contact information phone email"
+        elif search_type == "mailing":
+            search_terms = "mailing address postal"
+        elif search_type == "staff":
+            # For staff, include the target title in the search
+            staff_title = self.args.target_staff or "executive team"
+            search_terms = f"{staff_title} who is"
+        
+        # Build the full query
+        full_query = f"{query} {search_terms}"
+        
+        # Prepare parameters for SearXNG
+        params = {
+            'q': full_query,
+            'engines': engine,
+            'format': 'html',
+            'language': 'en-US'
+        }
+        
+        # Build the URL
+        url = f"{Config.SEARXNG_URL.rstrip('/')}/?{urlencode(params)}"
+        return url
+    
+    def search_company(self, company, search_type):
+        """Search for company information with specific search type"""
+        clean_company = re.sub(r'[^a-zA-Z0-9_-]', '+', company)
+        cache_file = os.path.join(Config.CACHE_DIR, search_type, f"{clean_company}.html")
+        
+        # Check cache first if enabled
+        if self.args.use_cache and os.path.exists(cache_file):
+            self.debug_log(f"Using cached data for {search_type} search", company, "extraction")
+            with open(cache_file, 'r', encoding='utf-8') as f:
+                return f.read()
+        
+        # Try each search engine until one succeeds
+        for retry in range(Config.MAX_RETRIES):
+            for engine in Config.SEARCH_ENGINES:
+                if self.args.verbose:
+                    print(f"Searching for {company} {search_type} using SearXNG with {engine} (attempt {retry+1})")
+                
+                # Random delay before search
+                delay = random.uniform(*Config.DELAY_BEFORE_SEARCH)
+                if self.args.verbose:
+                    print(f"Waiting {delay:.2f} seconds before search...")
+                time.sleep(delay)
+                
+                # Get the search URL
+                url = self.get_searxng_url(company, search_type, engine)
+                
+                if self.args.dry_run:
+                    self.debug_log(f"Would search: {url}", company, "extraction")
+                    return "<dry-run-placeholder></dry-run-placeholder>"
+                
+                # Prepare headers with random user agent
+                headers = {
+                    "User-Agent": self.get_random_user_agent(),
+                    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
+                    "Accept-Language": "en-US,en;q=0.5",
+                    "Accept-Encoding": "gzip, deflate, br",
+                    "Connection": "keep-alive",
+                    "Upgrade-Insecure-Requests": "1"
+                }
+                
+                try:
+                    response = self.session.get(
+                        url, 
+                        headers=headers, 
+                        timeout=self.args.timeout
+                    )
+                    
+                    # Check if the response is valid
+                    if response.status_code != 200:
+                        if self.args.verbose:
+                            print(f"Got status code {response.status_code} from SearXNG with {engine}")
+                        continue
+                    
+                    # Get the HTML content
+                    html_content = response.text
+                    
+                    # Save raw HTML if requested
+                    if self.args.save_raw:
+                        raw_file = os.path.join(Config.RAW_DIR, search_type, f"{clean_company}_{engine}.html")
+                        with open(raw_file, 'w', encoding='utf-8') as f:
+                            f.write(html_content)
+                    
+                    # Save to cache if enabled
+                    if self.args.use_cache:
+                        with open(cache_file, 'w', encoding='utf-8') as f:
+                            f.write(html_content)
+                    
+                    return html_content
+                    
+                except Exception as e:
+                    if self.args.verbose:
+                        print(f"Error searching with SearXNG/{engine}: {e}")
+                    continue
+            
+            # If we've tried all engines and none worked, wait before retry
+            if retry < Config.MAX_RETRIES - 1:
+                retry_delay = random.uniform(*Config.RETRY_DELAY)
+                if self.args.verbose:
+                    print(f"All search engines failed. Waiting {retry_delay:.2f} seconds before retry...")
+                time.sleep(retry_delay)
+        
+        # If all retries failed
+        print(f"Warning: All search attempts failed for {company} {search_type}")
+        return "<search-failed></search-failed>"
+    
+    def extract_ceo(self, html_content, company):
+        """Extract CEO name from search results"""
+        if self.args.dry_run:
+            return f"CEO of {company} (dry run)"
+        
+        if "<search-failed></search-failed>" in html_content:
+            return "Not found"
+        
+        self.debug_log(f"Attempting to extract CEO for {company}", company, "extraction")
+        
+        # Parse HTML with Beautiful Soup
+        soup = BeautifulSoup(html_content, 'html.parser')
+        
+        # Method 1: Look for structured data
+        try:
+            # Extract all text-containing elements
+            text_elements = soup.find_all(['p', 'span', 'div', 'li'])
+            
+            # Create a list of text snippets for pattern matching
+            snippets = []
+            for element in text_elements:
+                text = element.get_text(strip=True)
+                if text and len(text) > 10:  # Ignore very short snippets
+                    snippets.append(text)
+            
+            # Define CEO pattern matches
+            ceo_patterns = [
+                r"CEO\s+(is|of)\s+([A-Z][a-z]+\s+[A-Z][a-z]+(?:[ -][A-Z][a-z]+)?)",
+                r"([A-Z][a-z]+\s+[A-Z][a-z]+(?:[ -][A-Z][a-z]+)?)\s+is\s+(?:the\s+)?(?:current\s+)?(?:CEO|Chief Executive Officer)",
+                r"([A-Z][a-z]+\s+[A-Z][a-z]+(?:[ -][A-Z][a-z]+)?)\s+has been\s+(?:the\s+)?(?:CEO|Chief Executive Officer)",
+                r"led by\s+(?:CEO|Chief Executive Officer)\s+([A-Z][a-z]+\s+[A-Z][a-z]+(?:[ -][A-Z][a-z]+)?)",
+                r"led by\s+([A-Z][a-z]+\s+[A-Z][a-z]+(?:[ -][A-Z][a-z]+)?),\s+(?:the\s+)?(?:CEO|Chief Executive Officer)",
+                r"(?:CEO|Chief Executive Officer)[,]?\s+([A-Z][a-z]+\s+[A-Z][a-z]+(?:[ -][A-Z][a-z]+)?)",
+                r"([A-Z][a-z]+\s+[A-Z][a-z]+(?:[ -][A-Z][a-z]+)?)\s+serves as\s+(?:the\s+)?(?:CEO|Chief Executive Officer)",
+                r"current\s+(?:CEO|Chief Executive Officer)\s+(?:is\s+)?([A-Z][a-z]+\s+[A-Z][a-z]+(?:[ -][A-Z][a-z]+)?)"
+            ]
+            
+            # Try each pattern on the snippets
+            for snippet in snippets:
+                for pattern in ceo_patterns:
+                    self.debug_log(f"Checking snippet with pattern: {pattern}", company, "patterns")
+                    
+                    match = re.search(pattern, snippet, re.IGNORECASE)
+                    if match:
+                        # Determine which group contains the CEO name based on pattern
+                        if pattern.startswith(r"CEO"):
+                            ceo = match.group(2)
+                        else:
+                            ceo = match.group(1)
+                        
+                        if ceo:
+                            self.debug_log(f"Extracted CEO from snippet: {ceo}", company, "extraction")
+                            return ceo
+            
+            # If no patterns matched, look for CEO-related content more broadly
+            ceo_related_texts = []
+            for snippet in snippets:
+                if "ceo" in snippet.lower() or "chief executive" in snippet.lower():
+                    ceo_related_texts.append(snippet)
+            
+            if ceo_related_texts:
+                # Look for a name pattern in the CEO-related content
+                name_pattern = r"([A-Z][a-z]+\s+[A-Z][a-z]+(?:[ -][A-Z][a-z]+)?)"
+                for text in ceo_related_texts:
+                    match = re.search(name_pattern, text)
+                    if match:
+                        ceo = match.group(1)
+                        self.debug_log(f"Extracted CEO from related text: {ceo}", company, "extraction")
+                        return ceo
+        
+        except Exception as e:
+            self.debug_log(f"Error extracting CEO: {e}", company, "extraction")
+        
+        # If all extraction methods fail, return placeholder
+        self.debug_log("Failed to extract CEO", company, "extraction")
+        return "Not found"
+    
+    def extract_staff_by_title(self, html_content, company):
+        """Extract staff member by title from search results"""
+        if self.args.dry_run:
+            return f"Staff member ({self.args.target_staff}) of {company} (dry run)"
+        
+        if "<search-failed></search-failed>" in html_content:
+            return "Not found"
+        
+        target_title = self.args.target_staff
+        if not target_title:
+            return "No title specified"
+            
+        self.debug_log(f"Attempting to extract {target_title} for {company}", company, "extraction")
+        
+        # Parse HTML with Beautiful Soup
+        soup = BeautifulSoup(html_content, 'html.parser')
+        
+        try:
+            # Extract all text-containing elements
+            text_elements = soup.find_all(['p', 'span', 'div', 'li'])
+            
+            # Create a list of text snippets for pattern matching
+            snippets = []
+            for element in text_elements:
+                text = element.get_text(strip=True)
+                if text and len(text) > 10:  # Ignore very short snippets
+                    snippets.append(text)
+            
+            # Create patterns for the specified title
+            # Normalize the title for pattern matching
+            normalized_title = target_title.lower().replace(' ', '\\s+')
+            
+            # Define staff pattern matches
+            staff_patterns = [
+                rf"{normalized_title}\s+(is|of)\s+([A-Z][a-z]+\s+[A-Z][a-z]+(?:[ -][A-Z][a-z]+)?)",
+                rf"([A-Z][a-z]+\s+[A-Z][a-z]+(?:[ -][A-Z][a-z]+)?)\s+is\s+(?:the\s+)?(?:current\s+)?(?:{normalized_title})",
+                rf"([A-Z][a-z]+\s+[A-Z][a-z]+(?:[ -][A-Z][a-z]+)?)\s+has been\s+(?:the\s+)?(?:{normalized_title})",
+                rf"led by\s+(?:{normalized_title})\s+([A-Z][a-z]+\s+[A-Z][a-z]+(?:[ -][A-Z][a-z]+)?)",
+                rf"led by\s+([A-Z][a-z]+\s+[A-Z][a-z]+(?:[ -][A-Z][a-z]+)?),\s+(?:the\s+)?(?:{normalized_title})",
+                rf"(?:{normalized_title})[,]?\s+([A-Z][a-z]+\s+[A-Z][a-z]+(?:[ -][A-Z][a-z]+)?)",
+                rf"([A-Z][a-z]+\s+[A-Z][a-z]+(?:[ -][A-Z][a-z]+)?)\s+serves as\s+(?:the\s+)?(?:{normalized_title})",
+                rf"current\s+(?:{normalized_title})\s+(?:is\s+)?([A-Z][a-z]+\s+[A-Z][a-z]+(?:[ -][A-Z][a-z]+)?)"
+            ]
+            
+            # Try each pattern on the snippets
+            for snippet in snippets:
+                for pattern in staff_patterns:
+                    self.debug_log(f"Checking snippet with pattern: {pattern}", company, "patterns")
+                    
+                    match = re.search(pattern, snippet, re.IGNORECASE)
+                    if match:
+                        # Extract the name based on the pattern
+                        if len(match.groups()) > 1 and pattern.startswith(rf"{normalized_title}"):
+                            staff_name = match.group(2)
+                        else:
+                            staff_name = match.group(1)
+                        
+                        if staff_name:
+                            self.debug_log(f"Extracted {target_title} from snippet: {staff_name}", company, "extraction")
+                            return staff_name
+            
+            # If no patterns matched, look for title-related content more broadly
+            title_related_texts = []
+            for snippet in snippets:
+                if target_title.lower() in snippet.lower():
+                    title_related_texts.append(snippet)
+            
+            if title_related_texts:
+                # Look for a name pattern in the title-related content
+                name_pattern = r"([A-Z][a-z]+\s+[A-Z][a-z]+(?:[ -][A-Z][a-z]+)?)"
+                for text in title_related_texts:
+                    match = re.search(name_pattern, text)
+                    if match:
+                        staff_name = match.group(1)
+                        self.debug_log(f"Extracted {target_title} from related text: {staff_name}", company, "extraction")
+                        return staff_name
+        
+        except Exception as e:
+            self.debug_log(f"Error extracting {target_title}: {e}", company, "extraction")
+        
+        # If all extraction methods fail, return placeholder
+        self.debug_log(f"Failed to extract {target_title}", company, "extraction")
+        return "Not found"
+    
+    def extract_address(self, html_content, company):
+        """Extract headquarters address from search results"""
+        if self.args.dry_run:
+            return f"Address of {company} HQ (dry run)"
+        
+        if "<search-failed></search-failed>" in html_content:
+            return "Not found"
+        
+        self.debug_log(f"Attempting to extract headquarters address for {company}", company, "extraction")
+        
+        # Parse HTML with Beautiful Soup
+        soup = BeautifulSoup(html_content, 'html.parser')
+        
+        try:
+            # Extract all text-containing elements
+            text_elements = soup.find_all(['p', 'span', 'div', 'li'])
+            
+            # Create a list of text snippets for pattern matching
+            snippets = []
+            for element in text_elements:
+                text = element.get_text(strip=True)
+                if text and len(text) > 10:  # Ignore very short snippets
+                    snippets.append(text)
+            
+            # Define address pattern matches
+            address_patterns = [
+                r"located at\s+([0-9]+\s+[A-Za-z\s]+(Road|Street|Avenue|Ave|St|Blvd|Boulevard|Pkwy|Parkway)[,\s]+[A-Za-z\s]+[,\s]+[A-Za-z]+\s+[0-9-]+)",
+                r"located at\s+([0-9]+\s+[A-Za-z\s]+(Road|Street|Avenue|Ave|St|Blvd|Boulevard|Pkwy|Parkway)[,\s]+[A-Za-z\s]+)",
+                r"located in\s+([A-Za-z\s]+(?:,|\s+in\s+)[A-Za-z\s]+)",
+                r"headquarters\s+(?:is|are)\s+(?:in|at)\s+([0-9]+\s+[A-Za-z\s]+(Road|Street|Avenue|Ave|St|Blvd|Boulevard|Pkwy|Parkway)[,\s]+[A-Za-z\s]+)",
+                r"headquarters\s+(?:is|are)\s+(?:in|at)\s+([A-Za-z\s]+(?:,|\s+in\s+)[A-Za-z\s]+)",
+                r"headquartered\s+(?:in|at)\s+([A-Za-z\s]+(?:,|\s+in\s+)[A-Za-z\s]+)",
+                r"based\s+(?:in|at)\s+([A-Za-z\s]+(?:,|\s+in\s+)[A-Za-z\s]+)",
+                r"address\s+(?:is|of|:)\s+([0-9]+\s+[A-Za-z\s]+(Road|Street|Avenue|Ave|St|Blvd|Boulevard|Pkwy|Parkway)[,\s]+[A-Za-z\s]+)"
+            ]
+            
+            # Try each pattern on the snippets
+            for snippet in snippets:
+                for pattern in address_patterns:
+                    self.debug_log(f"Checking snippet with pattern: {pattern}", company, "patterns")
+                    
+                    match = re.search(pattern, snippet, re.IGNORECASE)
+                    if match:
+                        address = match.group(1).strip()
+                        if address:
+                            self.debug_log(f"Extracted address from snippet: {address}", company, "extraction")
+                            return address
+            
+            # If no patterns matched, look for address-related content more broadly
+            location_related_texts = []
+            for snippet in snippets:
+                if any(term in snippet.lower() for term in ["headquarters", "located", "address", "based in"]):
+                    location_related_texts.append(snippet)
+            
+            if location_related_texts:
+                # Look for an address pattern in the location-related content
+                address_pattern = r"([0-9]+\s+[A-Za-z\s]+(?:Road|Street|Avenue|Ave|St|Blvd|Boulevard|Pkwy|Parkway)[,\s]+[A-Za-z\s]+)"
+                for text in location_related_texts:
+                    match = re.search(address_pattern, text, re.IGNORECASE)
+                    if match:
+                        address = match.group(1)
+                        self.debug_log(f"Extracted address from related text: {address}", company, "extraction")
+                        return address
+        
+        except Exception as e:
+            self.debug_log(f"Error extracting address: {e}", company, "extraction")
+        
+        # If all extraction methods fail, return placeholder
+        self.debug_log("Failed to extract headquarters address", company, "extraction")
+        return "Not found"
+    
+    def extract_mailing_address(self, html_content, company):
+        """Extract mailing address from search results"""
+        if self.args.dry_run:
+            return f"Mailing address of {company} (dry run)"
+        
+        if "<search-failed></search-failed>" in html_content:
+            return "Not found"
+        
+        self.debug_log(f"Attempting to extract mailing address for {company}", company, "extraction")
+        
+        # Parse HTML with Beautiful Soup
+        soup = BeautifulSoup(html_content, 'html.parser')
+        
+        try:
+            # Extract all text-containing elements
+            text_elements = soup.find_all(['p', 'span', 'div', 'li'])
+            
+            # Create a list of text snippets for pattern matching
+            snippets = []
+            for element in text_elements:
+                text = element.get_text(strip=True)
+                if text and len(text) > 10:  # Ignore very short snippets
+                    snippets.append(text)
+            
+            # Define mailing address pattern matches
+            mailing_patterns = [
+                r"mailing address[:\s]+([0-9]+\s+[A-Za-z\s]+(Road|Street|Avenue|Ave|St|Blvd|Boulevard|Pkwy|Parkway)[,\s]+[A-Za-z\s]+[,\s]+[A-Za-z]+\s+[0-9-]+)",
+                r"postal address[:\s]+([0-9]+\s+[A-Za-z\s]+(Road|Street|Avenue|Ave|St|Blvd|Boulevard|Pkwy|Parkway)[,\s]+[A-Za-z\s]+[,\s]+[A-Za-z]+\s+[0-9-]+)",
+                r"mail to[:\s]+([0-9]+\s+[A-Za-z\s]+(Road|Street|Avenue|Ave|St|Blvd|Boulevard|Pkwy|Parkway)[,\s]+[A-Za-z\s]+[,\s]+[A-Za-z]+\s+[0-9-]+)",
+                r"P\.?O\.?\s+Box\s+([0-9]+)[,\s]+([A-Za-z\s]+[,\s]+[A-Za-z]+\s+[0-9-]+)",
+                r"([0-9]+\s+[A-Za-z\s]+(Road|Street|Avenue|Ave|St|Blvd|Boulevard|Pkwy|Parkway)[,\s]+[A-Za-z\s]+[,\s]+[A-Za-z]+\s+[0-9-]+)"
+            ]
+            
+            # Try each pattern on the snippets
+            for snippet in snippets:
+                for pattern in mailing_patterns:
+                    self.debug_log(f"Checking snippet with pattern: {pattern}", company, "patterns")
+                    
+                    match = re.search(pattern, snippet, re.IGNORECASE)
+                    if match:
+                        if pattern.startswith(r"P\.?O\.?"):
+                            # Handle PO Box format
+                            po_box = f"PO Box {match.group(1)}"
+                            location = match.group(2).strip()
+                            address = f"{po_box}, {location}"
+                        else:
+                            address = match.group(1).strip()
+                        
+                        if address:
+                            self.debug_log(f"Extracted mailing address from snippet: {address}", company, "extraction")
+                            return address
+        
+        except Exception as e:
+            self.debug_log(f"Error extracting mailing address: {e}", company, "extraction")
+        
+        # If all extraction methods fail, return placeholder
+        self.debug_log("Failed to extract mailing address", company, "extraction")
+        return "Not found"
+    
+    def extract_phone(self, html_content, company):
+        """Extract phone number from search results"""
+        if self.args.dry_run:
+            return f"Phone number of {company} (dry run)"
+        
+        if "<search-failed></search-failed>" in html_content:
+            return "Not found"
+        
+        self.debug_log(f"Attempting to extract phone number for {company}", company, "extraction")
+        
+        # Parse HTML with Beautiful Soup
+        soup = BeautifulSoup(html_content, 'html.parser')
+        
+        try:
+            # Extract all text-containing elements
+            text_elements = soup.find_all(['p', 'span', 'div', 'li'])
+            
+            # Create a list of text snippets for pattern matching
+            snippets = []
+            for element in text_elements:
+                text = element.get_text(strip=True)
+                if text:
+                    snippets.append(text)
+            
+            # Define phone pattern matches
+            phone_patterns = [
+                r"phone[:\s]+(\+?[0-9][\s\-\.\(\)0-9]{8,20})",
+                r"call[:\s]+(\+?[0-9][\s\-\.\(\)0-9]{8,20})",
+                r"telephone[:\s]+(\+?[0-9][\s\-\.\(\)0-9]{8,20})",
+                r"tel[:\s]+(\+?[0-9][\s\-\.\(\)0-9]{8,20})",
+                r"contact[:\s]+(\+?[0-9][\s\-\.\(\)0-9]{8,20})",
+                r"(?<![0-9])(\(?[0-9]{3}\)?[\-\.\s]?[0-9]{3}[\-\.\s]?[0-9]{4})(?![0-9])",   # US format
+                r"(?<![0-9])(\+[0-9]{1,3}[\s\-\.]?[0-9]{1,4}[\s\-\.]?[0-9]{1,4}[\s\-\.]?[0-9]{1,9})(?![0-9])"  # International format
+            ]
+            
+            # Try each pattern on the snippets
+            for snippet in snippets:
+                for pattern in phone_patterns:
+                    self.debug_log(f"Checking snippet with pattern: {pattern}", company, "patterns")
+                    
+                    match = re.search(pattern, snippet, re.IGNORECASE)
+                    if match:
+                        phone = match.group(1).strip()
+                        if phone:
+                            self.debug_log(f"Extracted phone from snippet: {phone}", company, "extraction")
+                            return phone
+        
+        except Exception as e:
+            self.debug_log(f"Error extracting phone: {e}", company, "extraction")
+        
+        # If all extraction methods fail, return placeholder
+        self.debug_log("Failed to extract phone number", company, "extraction")
+        return "Not found"
+    
+    def extract_email(self, html_content, company):
+        """Extract email address from search results"""
+        if self.args.dry_run:
+            return f"Email of {company} (dry run)"
+        
+        if "<search-failed></search-failed>" in html_content:
+            return "Not found"
+        
+        self.debug_log(f"Attempting to extract email for {company}", company, "extraction")
+        
+        # Parse HTML with Beautiful Soup
+        soup = BeautifulSoup(html_content, 'html.parser')
+        
+        try:
+            # Extract all text-containing elements
+            text_elements = soup.find_all(['p', 'span', 'div', 'li', 'a'])
+            
+            # Create a list of text snippets for pattern matching
+            snippets = []
+            for element in text_elements:
+                text = element.get_text(strip=True)
+                if text:
+                    snippets.append(text)
+                # Also check for href attributes in <a> tags
+                if element.name == 'a' and element.has_attr('href'):
+                    href = element['href']
+                    if href.startswith('mailto:'):
+                        snippets.append(href)
+            
+            # Define email pattern matches
+            email_patterns = [
+                r"email[:\s]+([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})",
+                r"e-mail[:\s]+([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})",
+                r"mailto:([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})",
+                r"([a-zA-Z0-9._%+\-]+@[a-zA-Z0-9.\-]+\.[a-zA-Z]{2,})"  # Generic email pattern
+            ]
+            
+            # Try each pattern on the snippets
+            for snippet in snippets:
+                for pattern in email_patterns:
+                    self.debug_log(f"Checking snippet with pattern: {pattern}", company, "patterns")
+                    
+                    match = re.search(pattern, snippet, re.IGNORECASE)
+                    if match:
+                        email = match.group(1).strip().lower()
+                        if email:
+                            # Basic validation to avoid false positives
+                            if '.' in email.split('@')[1] and '@' in email:
+                                self.debug_log(f"Extracted email from snippet: {email}", company, "extraction")
+                                return email
+        
+        except Exception as e:
+            self.debug_log(f"Error extracting email: {e}", company, "extraction")
+        
+        # If all extraction methods fail, return placeholder
+        self.debug_log("Failed to extract email", company, "extraction")
+        return "Not found"
+    
+    def extract_social(self, html_content, company):
+        """Extract social media profiles from search results"""
+        if self.args.dry_run:
+            return f"Social media of {company} (dry run)"
+        
+        if "<search-failed></search-failed>" in html_content:
+            return "Not found"
+        
+        self.debug_log(f"Attempting to extract social media profiles for {company}", company, "extraction")
+        
+        # Parse HTML with Beautiful Soup
+        soup = BeautifulSoup(html_content, 'html.parser')
+        
+        try:
+            # Extract all text-containing elements and links
+            text_elements = soup.find_all(['p', 'span', 'div', 'li'])
+            link_elements = soup.find_all('a')
+            
+            # Create a list of text snippets and href values for pattern matching
+            snippets = []
+            for element in text_elements:
+                text = element.get_text(strip=True)
+                if text:
+                    snippets.append(text)
+            
+            for link in link_elements:
+                if link.has_attr('href'):
+                    snippets.append(link['href'])
+            
+            # Define social media pattern matches
+            social_patterns = [
+                r"(?:https?://)?(?:www\.)?twitter\.com/([A-Za-z0-9_]+)",
+                r"(?:https?://)?(?:www\.)?linkedin\.com/(?:company|in)/([A-Za-z0-9_\-]+)",
+                r"(?:https?://)?(?:www\.)?facebook\.com/([A-Za-z0-9\.\-]+)",
+                r"(?:https?://)?(?:www\.)?instagram\.com/([A-Za-z0-9_\.]+)",
+                r"(?:https?://)?(?:www\.)?youtube\.com/(?:channel|user)/([A-Za-z0-9_\-]+)"
+            ]
+            
+            social_results = []
+            
+            # Try each pattern on the snippets
+            for snippet in snippets:
+                for pattern in social_patterns:
+                    self.debug_log(f"Checking snippet with pattern: {pattern}", company, "patterns")
+                    
+                    match = re.search(pattern, snippet, re.IGNORECASE)
+                    if match:
+                        handle = match.group(1).strip()
+                        platform = pattern.split(r'\.')[1].split(r'\.')[0]  # Extract platform name from pattern
+                        
+                        if handle:
+                            social_entry = f"{platform}: {handle}"
+                            if social_entry not in social_results:
+                                social_results.append(social_entry)
+                                self.debug_log(f"Extracted social media: {social_entry}", company, "extraction")
+            
+            if social_results:
+                return "; ".join(social_results)
+        
+        except Exception as e:
+            self.debug_log(f"Error extracting social media: {e}", company, "extraction")
+        
+        # If no social media profiles found, return placeholder
+        self.debug_log("Failed to extract social media profiles", company, "extraction")
+        return "Not found"
+    
+    def extract_contact(self, html_content, company):
+        """Extract general contact information from search results"""
+        if self.args.dry_run:
+            return f"Contact info of {company} (dry run)"
+        
+        if "<search-failed></search-failed>" in html_content:
+            return "Not found"
+        
+        # This is a combined extraction function that looks for multiple 
+        # types of contact information in one search result
+        contact_parts = {}
+        
+        # Use the specialized extraction methods
+        contact_parts["phone"] = self.extract_phone(html_content, company)
+        contact_parts["email"] = self.extract_email(html_content, company)
+        
+        # Combine the results
+        contact_info = []
+        for key, value in contact_parts.items():
+            if value != "Not found":
+                contact_info.append(f"{key}: {value}")
+        
+        if contact_info:
+            return "; ".join(contact_info)
+        
+        return "Not found"
+    
+    def debug_log(self, message, company, log_type):
+        """Log debug information if debug mode is enabled"""
+        if self.args.debug:
+            clean_company = re.sub(r'[^a-zA-Z0-9_-]', '_', company)
+            log_file = os.path.join(Config.DEBUG_DIR, log_type, f"{clean_company}.log")
+            
+            with open(log_file, 'a', encoding='utf-8') as f:
+                timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+                f.write(f"[{timestamp}] {message}\n")
+            
+            if self.args.verbose:
+                print(f"DEBUG: {message}")
+        elif self.args.verbose:
+            print(f"INFO: {message}")
+    
+    def process_companies(self):
+        """Process the list of companies and create CSV output"""
+        total = len(self.companies)
+        
+        # Process each company
+        for i, company in enumerate(self.companies):
+            progress = int((i + 1) * 100 / total)
+            print(f"Processing {i+1} of {total} ({progress}%): {company}")
+            
+            if not company:
+                continue
+            
+            # Initialize result dictionary for this company
+            company_result = {
+                "company": company
+            }
+            
+            # Process each selected search type
+            for search_type in self.search_types:
+                search_html = self.search_company(company, search_type)
+                
+                # Add a delay between searches
+                if not self.args.dry_run and search_type != self.search_types[-1]:
+                    delay = random.uniform(*Config.DELAY_BETWEEN_SEARCHES)
+                    if self.args.verbose:
+                        print(f"Waiting {delay:.2f} seconds between searches...")
+                    time.sleep(delay)
+                
+                # Extract information based on search type
+                if search_type == "ceo":
+                    company_result["ceo"] = self.extract_ceo(search_html, company)
+                elif search_type == "hq":
+                    company_result["headquarters"] = self.extract_address(search_html, company)
+                elif search_type == "phone":
+                    company_result["phone"] = self.extract_phone(search_html, company)
+                elif search_type == "email":
+                    company_result["email"] = self.extract_email(search_html, company)
+                elif search_type == "social":
+                    company_result["social_media"] = self.extract_social(search_html, company)
+                elif search_type == "contact":
+                    company_result["contact_info"] = self.extract_contact(search_html, company)
+                elif search_type == "mailing":
+                    company_result["mailing_address"] = self.extract_mailing_address(search_html, company)
+                elif search_type == "staff":
+                    staff_title = self.args.target_staff or "CEO"
+                    company_result[f"{staff_title.lower().replace(' ', '_')}"] = self.extract_staff_by_title(search_html, company)
+            
+            # Add result to list
+            self.results.append(company_result)
+            
+            # Add a delay between companies
+            if not self.args.dry_run and i < total - 1:
+                delay = random.uniform(*Config.DELAY_BETWEEN_COMPANIES)
+                if self.args.verbose:
+                    print(f"Waiting {delay:.2f} seconds before next company...")
+                time.sleep(delay)
+        
+        print(f"Completed processing {total} companies.")
+    
+    def save_results(self):
+        """Save results to CSV file"""
+        try:
+            # Determine all fields across all results
+            all_fields = set()
+            for result in self.results:
+                all_fields.update(result.keys())
+            
+            # Ensure 'company' is the first field
+            field_list = sorted(list(all_fields))
+            if 'company' in field_list:
+                field_list.remove('company')
+                field_list = ['company'] + field_list
+            
+            with open(self.args.output_file, 'w', newline='', encoding='utf-8') as f:
+                writer = csv.writer(f)
+                writer.writerow(field_list)
+                
+                for result in self.results:
+                    row = []
+                    for field in field_list:
+                        row.append(result.get(field, ""))
+                    writer.writerow(row)
+            
+            print(f"Results saved to {self.args.output_file}")
+        except Exception as e:
+            print(f"Error saving results: {e}")
+    
+    def run(self):
+        """Main execution method"""
+        print(f"Enhanced Company Information Scraper v{Config.VERSION}")
+        self.load_companies()
+        
+        if self.args.verbose:
+            print(f"Using SearXNG at: {Config.SEARXNG_URL}")
+            print(f"Mode: {self.args.mode}")
+            if self.args.target_staff:
+                print(f"Target staff title: {self.args.target_staff}")
+            print(f"Debug mode: {self.args.debug}")
+            print(f"Cache: {'enabled' if self.args.use_cache else 'disabled'}")
+            print(f"Saving raw HTML: {self.args.save_raw}")
+        
+        self.process_companies()
+        self.save_results()
+        
+        if self.args.save_raw:
+            print(f"Raw HTML search results saved to {Config.RAW_DIR}/")
+
+def parse_args():
+    """Parse command line arguments"""
+    parser = argparse.ArgumentParser(description='Enhanced Company Information Scraper with SearXNG')
+    parser.add_argument('-i', '--input', dest='input_file',
+                        help='Input file with company names (one per line)')
+    parser.add_argument('-o', '--output', dest='output_file',
+                        default=f"company_data_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.csv",
+                        help='Output CSV file (default: company_data_<timestamp>.csv)')
+    
+    # Scraping mode options
+    mode_group = parser.add_argument_group('Scraping Mode')
+    mode_group.add_argument('-m', '--mode', choices=['minimal', 'targeted', 'comprehensive'],
+                        default='minimal',
+                        help='Scraping mode: minimal (CEO, HQ only), targeted (specific data), comprehensive (all data)')
+    mode_group.add_argument('-T', '--target-staff', dest='target_staff',
+                        help='Target specific staff title (e.g., "CTO", "CFO", "Marketing Director")')
+    
+    # Include/exclude data types
+    data_group = parser.add_argument_group('Data Selection')
+    data_group.add_argument('--include-types', dest='include_types',
+                        help='Comma-separated list of data types to include (ceo,hq,phone,email,social,contact,mailing,staff)')
+    data_group.add_argument('--exclude-types', dest='exclude_types',
+                        help='Comma-separated list of data types to exclude')
+    data_group.add_argument('--include-contact', dest='include_contact', action='store_true',
+                        help='Include contact information (phone, email) in targeted mode')
+    data_group.add_argument('--include-address', dest='include_address', action='store_true',
+                        help='Include address information (HQ, mailing) in targeted mode')
+    data_group.add_argument('--include-social', dest='include_social', action='store_true',
+                        help='Include social media information in targeted mode')
+    
+    # Cache and performance options
+    cache_group = parser.add_argument_group('Cache and Performance')
+    cache_group.add_argument('-c', '--no-cache', dest='use_cache',
+                        action='store_false', default=True,
+                        help='Disable caching of search results')
+    cache_group.add_argument('-t', '--timeout', dest='timeout',
+                        type=int, default=Config.DEFAULT_TIMEOUT,
+                        help=f'Set request timeout in seconds (default: {Config.DEFAULT_TIMEOUT})')
+    
+    # Debug and logging options
+    debug_group = parser.add_argument_group('Debug and Logging')
+    debug_group.add_argument('-D', '--dry-run', dest='dry_run',
+                        action='store_true', default=False,
+                        help='Show what would be done without executing searches')
+    debug_group.add_argument('-d', '--debug', dest='debug',
+                        action='store_true', default=False,
+                        help='Enable debug mode (saves extraction details)')
+    debug_group.add_argument('-r', '--raw', dest='save_raw',
+                        action='store_true', default=False,
+                        help='Save raw HTML from searches for inspection')
+    debug_group.add_argument('-v', '--verbose', dest='verbose',
+                        action='store_true', default=False,
+                        help='Show verbose output during processing')
+    
+    # SearXNG configuration
+    searx_group = parser.add_argument_group('SearXNG Configuration')
+    searx_group.add_argument('-s', '--searxng-url', dest='searxng_url',
+                        default=Config.SEARXNG_URL,
+                        help=f'SearXNG instance URL (default: {Config.SEARXNG_URL})')
+    
+    args = parser.parse_args()
+    
+    # Override the SearXNG URL if provided
+    if args.searxng_url != Config.SEARXNG_URL:
+        Config.SEARXNG_URL = args.searxng_url
+    
+    return args
+
+if __name__ == "__main__":
+    args = parse_args()
+    scraper = EnhancedCompanyScraper(args)
+    scraper.run()
\ No newline at end of file
diff --git a/setup.sh b/setup.sh
index 17b2be2..377dafb 100755
--- a/setup.sh
+++ b/setup.sh
@@ -204,81 +204,9 @@ install_nvidia_toolkit() {
     success "NVIDIA Container Toolkit installed."
 }
 
-# ── Ollama ─────────────────────────────────────────────────────────────────────
-install_ollama() {
-    # ── Install ───────────────────────────────────────────────────────────────
-    if cmd_exists ollama; then
-        success "Ollama already installed: $(ollama --version 2>/dev/null)"
-    else
-        info "Installing Ollama…"
-        case "$OS" in
-            Linux)
-                curl -fsSL https://ollama.com/install.sh | sh ;;
-            Darwin)
-                if cmd_exists brew; then
-                    brew install ollama
-                else
-                    warn "Homebrew not found — skipping Ollama. Install from: https://ollama.com/download"
-                    return
-                fi ;;
-        esac
-        success "Ollama installed."
-    fi
-
-    # ── Start service ─────────────────────────────────────────────────────────
-    if [[ "$OS" == "Linux" ]] && command -v systemctl &>/dev/null; then
-        $SUDO systemctl enable ollama 2>/dev/null || true
-        if ! systemctl is-active --quiet ollama 2>/dev/null; then
-            info "Starting Ollama service…"
-            $SUDO systemctl start ollama 2>/dev/null || true
-        fi
-        info "Waiting for Ollama to be ready…"
-        local i=0
-        until ollama list &>/dev/null 2>&1; do
-            sleep 1; i=$((i+1))
-            if [[ $i -ge 30 ]]; then
-                warn "Ollama service timed out. Run: sudo systemctl start ollama"
-                return
-            fi
-        done
-        success "Ollama service running."
-    elif [[ "$OS" == "Darwin" ]]; then
-        if ! ollama list &>/dev/null 2>&1; then
-            info "Starting Ollama…"
-            brew services start ollama 2>/dev/null \
-                || { ollama serve &>/tmp/ollama.log &; }
-            local i=0
-            until ollama list &>/dev/null 2>&1; do
-                sleep 1; i=$((i+1))
-                if [[ $i -ge 15 ]]; then
-                    warn "Ollama did not start. Run: ollama serve"
-                    return
-                fi
-            done
-        fi
-        success "Ollama service running."
-    fi
-
-    # ── Pull default model ────────────────────────────────────────────────────
-    local script_dir model
-    script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
-    model="$(grep -E '^OLLAMA_DEFAULT_MODEL=' "${script_dir}/.env" 2>/dev/null \
-             | cut -d= -f2 | tr -d '[:space:]')"
-    [[ -z "$model" ]] && model="llama3.2:3b"
-
-    if ollama show "${model}" &>/dev/null 2>&1; then
-        success "Default model already present: ${model}"
-    else
-        info "Pulling default model: ${model} (this may take several minutes)…"
-        if ollama pull "${model}"; then
-            success "Default model ready: ${model}"
-        else
-            warn "Model pull failed — run manually: ollama pull ${model}"
-        fi
-    fi
-}
-
 # ── Environment setup ──────────────────────────────────────────────────────────
+# Note: Ollama runs as a Docker container — the compose.yml ollama service
+# handles model download automatically on first start (see docker/ollama/entrypoint.sh).
 setup_env() {
     SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
     if [[ ! -f "$SCRIPT_DIR/.env" ]]; then
@@ -292,10 +220,11 @@ setup_env() {
 # ── Main ───────────────────────────────────────────────────────────────────────
 main() {
     echo ""
-    echo -e "${BLUE}╔══════════════════════════════════════════╗${NC}"
-    echo -e "${BLUE}║   Peregrine — Dependency Installer       ║${NC}"
-    echo -e "${BLUE}║   by Circuit Forge LLC                   ║${NC}"
-    echo -e "${BLUE}╚══════════════════════════════════════════╝${NC}"
+    echo -e "${BLUE}╔══════════════════════════════════════════════════════╗${NC}"
+    echo -e "${BLUE}║   Peregrine — Dependency Installer                   ║${NC}"
+    echo -e "${BLUE}║   by Circuit Forge LLC                               ║${NC}"
+    echo -e "${BLUE}║   \"Don't be evil, for real and forever.\"             ║${NC}"
+    echo -e "${BLUE}╚══════════════════════════════════════════════════════╝${NC}"
     echo ""
 
     install_git
@@ -305,8 +234,7 @@ main() {
         check_compose
         install_nvidia_toolkit
     fi
-    setup_env       # creates .env before install_ollama reads OLLAMA_DEFAULT_MODEL
-    install_ollama
+    setup_env
 
     echo ""
     success "All dependencies installed."