diff --git a/config/skills_suggestions.yaml b/config/skills_suggestions.yaml new file mode 100644 index 0000000..6b93f75 --- /dev/null +++ b/config/skills_suggestions.yaml @@ -0,0 +1,193 @@ +# skills_suggestions.yaml — Bundled tag suggestions for the Skills & Keywords UI. +# Shown as searchable options in the multiselect. Users can add custom tags beyond these. +# Future: community aggregate (paid tier) will supplement this list from anonymised installs. + +skills: + # ── Customer Success & Account Management ── + - Customer Success + - Technical Account Management + - Account Management + - Customer Onboarding + - Renewal Management + - Churn Prevention + - Expansion Revenue + - Executive Relationship Management + - Escalation Management + - QBR Facilitation + - Customer Advocacy + - Voice of the Customer + - Customer Health Scoring + - Success Planning + - Customer Education + - Implementation Management + # ── Revenue & Operations ── + - Revenue Operations + - Sales Operations + - Pipeline Management + - Forecasting + - Contract Negotiation + - Upsell & Cross-sell + - ARR / MRR Management + - NRR Optimization + - Quota Attainment + # ── Leadership & Management ── + - Team Leadership + - People Management + - Cross-functional Collaboration + - Change Management + - Stakeholder Management + - Executive Presentation + - Strategic Planning + - OKR Setting + - Hiring & Recruiting + - Coaching & Mentoring + - Performance Management + # ── Project & Program Management ── + - Project Management + - Program Management + - Agile / Scrum + - Kanban + - Risk Management + - Resource Planning + - Process Improvement + - SOP Development + # ── Technical Skills ── + - SQL + - Python + - Data Analysis + - Tableau + - Looker + - Power BI + - Excel / Google Sheets + - REST APIs + - Salesforce + - HubSpot + - Gainsight + - Totango + - ChurnZero + - Zendesk + - Intercom + - Jira + - Confluence + - Notion + - Slack + - Zoom + # ── Communications & Writing ── + - Executive Communication + - Technical Writing + - Proposal Writing + - Presentation Skills + - Public Speaking + - Stakeholder Communication + # ── Compliance & Security ── + - Compliance + - Risk Assessment + - SOC 2 + - ISO 27001 + - GDPR + - Security Awareness + - Vendor Management + +domains: + # ── Software & Tech ── + - B2B SaaS + - Enterprise Software + - Cloud Infrastructure + - Developer Tools + - Cybersecurity + - Data & Analytics + - AI / ML Platform + - FinTech + - InsurTech + - LegalTech + - HR Tech + - MarTech + - AdTech + - DevOps / Platform Engineering + - Open Source + # ── Industry Verticals ── + - Healthcare / HealthTech + - Education / EdTech + - Non-profit / Social Impact + - Government / GovTech + - E-commerce / Retail + - Manufacturing + - Financial Services + - Media & Entertainment + - Music Industry + - Logistics & Supply Chain + - Real Estate / PropTech + - Energy / CleanTech + - Hospitality & Travel + # ── Market Segments ── + - Enterprise + - Mid-Market + - SMB / SME + - Startup + - Fortune 500 + - Public Sector + - International / Global + # ── Business Models ── + - Subscription / SaaS + - Marketplace + - Usage-based Pricing + - Professional Services + - Self-serve / PLG + +keywords: + # ── CS Metrics & Outcomes ── + - NPS + - CSAT + - CES + - Churn Rate + - Net Revenue Retention + - Gross Revenue Retention + - Logo Retention + - Time-to-Value + - Product Adoption + - Feature Utilisation + - Health Score + - Customer Lifetime Value + # ── Sales & Growth ── + - ARR + - MRR + - GRR + - NRR + - Expansion ARR + - Pipeline Coverage + - Win Rate + - Average Contract Value + - Land & Expand + - Multi-threading + # ── Process & Delivery ── + - Onboarding + - Implementation + - Knowledge Transfer + - Escalation + - SLA + - Root Cause Analysis + - Post-mortem + - Runbook + - Playbook Development + - Feedback Loop + - Product Roadmap Input + # ── Team & Culture ── + - Cross-functional + - Distributed Team + - Remote-first + - High-growth + - Fast-paced + - Autonomous + - Data-driven + - Customer-centric + - Empathetic Leadership + - Inclusive Culture + # ── Job-seeker Keywords ── + - Strategic + - Proactive + - Hands-on + - Scalable Processes + - Operational Excellence + - Business Impact + - Executive Visibility + - Player-Coach diff --git a/scripts/skills_utils.py b/scripts/skills_utils.py new file mode 100644 index 0000000..61721e7 --- /dev/null +++ b/scripts/skills_utils.py @@ -0,0 +1,67 @@ +""" +skills_utils.py — Content filter and suggestion loader for the skills tagging system. + +load_suggestions(category) → list[str] bundled suggestions for a category +filter_tag(tag) → str | None cleaned tag, or None if rejected +""" +from __future__ import annotations +import re +from pathlib import Path + +_SUGGESTIONS_FILE = Path(__file__).parent.parent / "config" / "skills_suggestions.yaml" + +# ── Content filter ───────────────────────────────────────────────────────────── +# Tags must be short, human-readable skill/domain labels. No URLs, no abuse. + +_BLOCKED = { + # profanity placeholder — extend as needed + "fuck", "shit", "ass", "bitch", "cunt", "dick", "bastard", "damn", +} + +_URL_RE = re.compile(r"https?://|www\.|\.com\b|\.net\b|\.org\b", re.I) +_ALLOWED_CHARS = re.compile(r"^[\w\s\-\.\+\#\/\&\(\)]+$", re.UNICODE) + + +def filter_tag(raw: str) -> str | None: + """Return a cleaned tag string, or None if the tag should be rejected. + + Rejection criteria: + - Blank after stripping + - Too short (< 2 chars) or too long (> 60 chars) + - Contains a URL pattern + - Contains disallowed characters + - Matches a blocked term (case-insensitive, whole-word) + - Repeated character run (e.g. 'aaaaa') + """ + tag = " ".join(raw.strip().split()) # normalise whitespace + if not tag or len(tag) < 2: + return None + if len(tag) > 60: + return None + if _URL_RE.search(tag): + return None + if not _ALLOWED_CHARS.match(tag): + return None + lower = tag.lower() + for blocked in _BLOCKED: + if re.search(rf"\b{re.escape(blocked)}\b", lower): + return None + if re.search(r"(.)\1{4,}", lower): # 5+ repeated chars + return None + return tag + + +# ── Suggestion loader ────────────────────────────────────────────────────────── + +def load_suggestions(category: str) -> list[str]: + """Return the bundled suggestion list for a category ('skills'|'domains'|'keywords'). + Returns an empty list if the file is missing or the category is not found. + """ + if not _SUGGESTIONS_FILE.exists(): + return [] + try: + import yaml + data = yaml.safe_load(_SUGGESTIONS_FILE.read_text()) or {} + return list(data.get(category, [])) + except Exception: + return []