feat(discovery): add Lemmy community search, fix dead request, add platform field

- Add app/services/lemmy/discovery.py: searches 5 major Lemmy instances, deduplicates by actor_id (AP canonical URL), skips NSFW communities, uses community@instance naming convention matching existing Lemmy client - Update POST /subs/discover: accepts platforms[] param (default both), fans out to Reddit + Lemmy search, merges and sorts by subscribers - Add platform field to all discovery result dicts (Reddit and Lemmy) - Fix: remove dead _get() call left in search_subs() during earlier refactor - Frontend: show platform badge on each discovery row, correct hyperlink format for Lemmy (https://{instance}/c/{community}), pass r.platform to upsertRules on import so Lemmy subs land in the lemmy platform slot
2026-06-13 22:23:31 -07:00 · 2026-06-13 22:23:31 -07:00 · 35c6e5f7bc
commit 35c6e5f7bc
parent f39f36e258
5 changed files with 143 additions and 22 deletions
--- a/app/api/endpoints/subs.py
+++ b/app/api/endpoints/subs.py
@ -14,6 +14,7 @@ router = APIRouter(prefix="/subs", tags=["subs"])
 class DiscoverBody(BaseModel):
    keyword: str
    limit: int = 15
+    platforms: list[str] = ["reddit", "lemmy"]


 def _in_thread(fn):
@ -70,18 +71,35 @@ async def discover_subs(body: DiscoverBody):
    from app.services.reddit.discovery import search_and_analyze

    def _run(store: Store):
-        # Collect already-tracked sub names so the UI can flag them
-        existing = {r["sub"].lower() for r in store.list_sub_rules("reddit")}
-        try:
-            from app.services.reddit.client import RedditClient
-            cookies = RedditClient().cookies
-        except Exception:
-            cookies = None
-        return search_and_analyze(
-            keyword=body.keyword,
-            limit=body.limit,
-            cookies=cookies,
-            known_subs=existing,
-        )
+        platforms = set(body.platforms or ["reddit", "lemmy"])
+        results: list[dict] = []
+
+        if "reddit" in platforms:
+            from app.services.reddit.discovery import search_and_analyze
+            existing_reddit = {r["sub"].lower() for r in store.list_sub_rules("reddit")}
+            try:
+                from app.services.reddit.client import RedditClient
+                cookies = RedditClient().cookies
+            except Exception:
+                cookies = None
+            results.extend(search_and_analyze(
+                keyword=body.keyword,
+                limit=body.limit,
+                cookies=cookies,
+                known_subs=existing_reddit,
+            ))
+
+        if "lemmy" in platforms:
+            from app.services.lemmy.discovery import search_lemmy
+            existing_lemmy = {r["sub"].lower() for r in store.list_sub_rules("lemmy")}
+            results.extend(search_lemmy(
+                keyword=body.keyword,
+                limit=body.limit,
+                known_subs=existing_lemmy,
+            ))
+
+        # Merge and sort by subscribers descending
+        results.sort(key=lambda x: x.get("subscribers", 0), reverse=True)
+        return results

    return await asyncio.to_thread(_in_thread, _run)
--- a/app/services/lemmy/discovery.py
+++ b/app/services/lemmy/discovery.py
@ -0,0 +1,100 @@
+"""
+Lemmy community discovery.
+
+Searches major Lemmy instances for communities matching a keyword and
+returns candidates for user review. Nothing is stored until the user
+imports via PUT /subs/{community@instance}.
+"""
+from __future__ import annotations
+
+import logging
+from typing import Any
+
+import httpx
+
+from app.services.lemmy.client import KNOWN_INSTANCES, _DEFAULT_USER_AGENT
+
+logger = logging.getLogger(__name__)
+
+# Instances to fan search across (ordered by size)
+_SEARCH_INSTANCES = ["lemmy.world", "lemmy.ml", "beehaw.org", "sh.itjust.works", "reddthat.com"]
+
+_TIMEOUT = 10.0
+
+
+def search_lemmy(
+    keyword: str,
+    limit: int = 15,
+    known_subs: set[str] | None = None,
+) -> list[dict[str, Any]]:
+    """
+    Search multiple Lemmy instances for communities matching keyword.
+
+    Deduplicates by actor_id (canonical AP URL) so federated communities
+    don't appear twice. Returns candidates sorted by subscriber count.
+    """
+    seen_actor_ids: set[str] = set()
+    results: list[dict] = []
+
+    for instance in _SEARCH_INSTANCES:
+        try:
+            r = httpx.get(
+                f"https://{instance}/api/v3/search",
+                params={
+                    "q": keyword,
+                    "type_": "Communities",
+                    "limit": 20,
+                    "sort": "TopAll",
+                },
+                headers={"User-Agent": _DEFAULT_USER_AGENT},
+                timeout=_TIMEOUT,
+                follow_redirects=True,
+            )
+            if r.status_code != 200:
+                logger.debug("Lemmy search on %s returned %d", instance, r.status_code)
+                continue
+
+            communities = r.json().get("communities", [])
+
+            for cv in communities:
+                comm = cv.get("community", {})
+                counts = cv.get("counts", {})
+
+                actor_id: str = comm.get("actor_id", "")
+                if not actor_id or actor_id in seen_actor_ids:
+                    continue
+                seen_actor_ids.add(actor_id)
+
+                # Derive home instance from actor_id (e.g. https://lemmy.world/c/selfhosted)
+                try:
+                    home_instance = actor_id.split("/")[2]
+                except IndexError:
+                    home_instance = instance
+
+                if comm.get("nsfw"):
+                    continue
+
+                name = comm.get("name", "")
+                sub_key = f"{name}@{home_instance}"
+                subscribers = counts.get("subscribers", 0)
+                description = (comm.get("description") or "").strip()
+
+                results.append({
+                    "sub": sub_key,
+                    "title": comm.get("title") or name,
+                    "subscribers": subscribers,
+                    "description": description[:280],
+                    "promo_allowed": None,  # Lemmy has no standardized promo rules
+                    "flair_required": False,
+                    "available_flairs": [],
+                    "rule_warning": False,
+                    "notes": None,
+                    "already_tracked": (sub_key.lower() in known_subs) if known_subs else False,
+                    "platform": "lemmy",
+                })
+
+        except Exception:
+            logger.warning("Error searching Lemmy instance %s", instance, exc_info=True)
+
+    results.sort(key=lambda x: x["subscribers"], reverse=True)
+    return results[:limit]
--- a/app/services/reddit/discovery.py
+++ b/app/services/reddit/discovery.py
@ -136,6 +136,7 @@ def analyze_sub(
            "rule_warning": False,
            "notes": notes,
            "already_tracked": (sub.lower() in known_subs) if known_subs is not None else False,
+            "platform": "reddit",
        }
    except Exception:
        logger.exception("Error analyzing r/%s", sub)
@ -154,11 +155,6 @@ def search_subs(
    Returns a list of analysis dicts sorted by subscriber count (desc).
    """
    try:
-        search_r = _get(
-            f"{_BASE}/subreddits/search.json",
-            cookies=cookies,
-        )
-        # httpx doesn't support params kwarg above since we're using _get; rebuild
        r = httpx.get(
            f"{_BASE}/subreddits/search.json",
            params={"q": keyword, "limit": min(limit, 50), "sort": "relevance"},
@ -201,6 +197,7 @@ def search_subs(
            "rule_warning": False,
            "notes": None,
            "already_tracked": (sub_name.lower() in known_subs) if known_subs is not None else False,
+            "platform": "reddit",
        })

    # Sort by subscribers descending
--- a/frontend/src/components/SubRulesView.vue
+++ b/frontend/src/components/SubRulesView.vue
@ -144,8 +144,13 @@
          >
            <div class="discover-row-main">
              <div class="discover-row-name">
-                <a :href="`https://reddit.com/r/${r.sub}`" target="_blank" class="sub-link">r/{{ r.sub }}</a>
+                <a
+                  :href="r.platform === 'lemmy' ? `https://${r.sub.split('@')[1]}/c/${r.sub.split('@')[0]}` : `https://reddit.com/r/${r.sub}`"
+                  target="_blank"
+                  class="sub-link"
+                >{{ r.platform === 'lemmy' ? r.sub : `r/${r.sub}` }}</a>
                <span class="sub-size">{{ formatSubs(r.subscribers) }}</span>
+                <span class="badge badge-muted">{{ r.platform }}</span>
                <span v-if="r.already_tracked" class="badge badge-muted">tracked</span>
              </div>
              <div v-if="r.description" class="discover-row-desc">{{ r.description }}</div>
@ -282,7 +287,7 @@ async function importSub(r: SubDiscoveryResult) {
    notes: r.notes,
  }
  try {
-    const saved = await api.subs.upsertRules(r.sub, payload, 'reddit')
+    const saved = await api.subs.upsertRules(r.sub, payload, r.platform)
    const idx = rules.value.findIndex(x => x.sub === r.sub)
    if (idx !== -1) {
      rules.value = [...rules.value.slice(0, idx), saved, ...rules.value.slice(idx + 1)]
--- a/frontend/src/services/api.ts
+++ b/frontend/src/services/api.ts
@ -109,6 +109,7 @@ export interface SubDiscoveryResult {
  rule_warning: boolean
  notes: string | null
  already_tracked: boolean
+  platform: string
 }

 export interface SubRulesUpsert {
@ -286,8 +287,8 @@ export const api = {
    upsertRules: (sub: string, data: SubRulesUpsert, platform = 'reddit') =>
      http.put<SubRules>(`/subs/${sub}`, data, { params: { platform } }).then(r => r.data),

-    discover: (keyword: string, limit = 15) =>
-      http.post<SubDiscoveryResult[]>('/subs/discover', { keyword, limit }).then(r => r.data),
+    discover: (keyword: string, limit = 15, platforms = ['reddit', 'lemmy']) =>
+      http.post<SubDiscoveryResult[]>('/subs/discover', { keyword, limit, platforms }).then(r => r.data),
  },

  posts: {