feat(discovery): add Lemmy community search, fix dead request, add platform field
- Add app/services/lemmy/discovery.py: searches 5 major Lemmy instances, deduplicates by actor_id (AP canonical URL), skips NSFW communities, uses community@instance naming convention matching existing Lemmy client - Update POST /subs/discover: accepts platforms[] param (default both), fans out to Reddit + Lemmy search, merges and sorts by subscribers - Add platform field to all discovery result dicts (Reddit and Lemmy) - Fix: remove dead _get() call left in search_subs() during earlier refactor - Frontend: show platform badge on each discovery row, correct hyperlink format for Lemmy (https://{instance}/c/{community}), pass r.platform to upsertRules on import so Lemmy subs land in the lemmy platform slot
This commit is contained in:
parent
f39f36e258
commit
35c6e5f7bc
5 changed files with 143 additions and 22 deletions
|
|
@ -14,6 +14,7 @@ router = APIRouter(prefix="/subs", tags=["subs"])
|
|||
class DiscoverBody(BaseModel):
|
||||
keyword: str
|
||||
limit: int = 15
|
||||
platforms: list[str] = ["reddit", "lemmy"]
|
||||
|
||||
|
||||
def _in_thread(fn):
|
||||
|
|
@ -70,18 +71,35 @@ async def discover_subs(body: DiscoverBody):
|
|||
from app.services.reddit.discovery import search_and_analyze
|
||||
|
||||
def _run(store: Store):
|
||||
# Collect already-tracked sub names so the UI can flag them
|
||||
existing = {r["sub"].lower() for r in store.list_sub_rules("reddit")}
|
||||
try:
|
||||
from app.services.reddit.client import RedditClient
|
||||
cookies = RedditClient().cookies
|
||||
except Exception:
|
||||
cookies = None
|
||||
return search_and_analyze(
|
||||
keyword=body.keyword,
|
||||
limit=body.limit,
|
||||
cookies=cookies,
|
||||
known_subs=existing,
|
||||
)
|
||||
platforms = set(body.platforms or ["reddit", "lemmy"])
|
||||
results: list[dict] = []
|
||||
|
||||
if "reddit" in platforms:
|
||||
from app.services.reddit.discovery import search_and_analyze
|
||||
existing_reddit = {r["sub"].lower() for r in store.list_sub_rules("reddit")}
|
||||
try:
|
||||
from app.services.reddit.client import RedditClient
|
||||
cookies = RedditClient().cookies
|
||||
except Exception:
|
||||
cookies = None
|
||||
results.extend(search_and_analyze(
|
||||
keyword=body.keyword,
|
||||
limit=body.limit,
|
||||
cookies=cookies,
|
||||
known_subs=existing_reddit,
|
||||
))
|
||||
|
||||
if "lemmy" in platforms:
|
||||
from app.services.lemmy.discovery import search_lemmy
|
||||
existing_lemmy = {r["sub"].lower() for r in store.list_sub_rules("lemmy")}
|
||||
results.extend(search_lemmy(
|
||||
keyword=body.keyword,
|
||||
limit=body.limit,
|
||||
known_subs=existing_lemmy,
|
||||
))
|
||||
|
||||
# Merge and sort by subscribers descending
|
||||
results.sort(key=lambda x: x.get("subscribers", 0), reverse=True)
|
||||
return results
|
||||
|
||||
return await asyncio.to_thread(_in_thread, _run)
|
||||
|
|
|
|||
100
app/services/lemmy/discovery.py
Normal file
100
app/services/lemmy/discovery.py
Normal file
|
|
@ -0,0 +1,100 @@
|
|||
"""
|
||||
Lemmy community discovery.
|
||||
|
||||
Searches major Lemmy instances for communities matching a keyword and
|
||||
returns candidates for user review. Nothing is stored until the user
|
||||
imports via PUT /subs/{community@instance}.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
|
||||
from app.services.lemmy.client import KNOWN_INSTANCES, _DEFAULT_USER_AGENT
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Instances to fan search across (ordered by size)
|
||||
_SEARCH_INSTANCES = ["lemmy.world", "lemmy.ml", "beehaw.org", "sh.itjust.works", "reddthat.com"]
|
||||
|
||||
_TIMEOUT = 10.0
|
||||
|
||||
|
||||
def search_lemmy(
|
||||
keyword: str,
|
||||
limit: int = 15,
|
||||
known_subs: set[str] | None = None,
|
||||
) -> list[dict[str, Any]]:
|
||||
"""
|
||||
Search multiple Lemmy instances for communities matching keyword.
|
||||
|
||||
Deduplicates by actor_id (canonical AP URL) so federated communities
|
||||
don't appear twice. Returns candidates sorted by subscriber count.
|
||||
"""
|
||||
seen_actor_ids: set[str] = set()
|
||||
results: list[dict] = []
|
||||
|
||||
for instance in _SEARCH_INSTANCES:
|
||||
try:
|
||||
r = httpx.get(
|
||||
f"https://{instance}/api/v3/search",
|
||||
params={
|
||||
"q": keyword,
|
||||
"type_": "Communities",
|
||||
"limit": 20,
|
||||
"sort": "TopAll",
|
||||
},
|
||||
headers={"User-Agent": _DEFAULT_USER_AGENT},
|
||||
timeout=_TIMEOUT,
|
||||
follow_redirects=True,
|
||||
)
|
||||
if r.status_code != 200:
|
||||
logger.debug("Lemmy search on %s returned %d", instance, r.status_code)
|
||||
continue
|
||||
|
||||
communities = r.json().get("communities", [])
|
||||
|
||||
for cv in communities:
|
||||
comm = cv.get("community", {})
|
||||
counts = cv.get("counts", {})
|
||||
|
||||
actor_id: str = comm.get("actor_id", "")
|
||||
if not actor_id or actor_id in seen_actor_ids:
|
||||
continue
|
||||
seen_actor_ids.add(actor_id)
|
||||
|
||||
# Derive home instance from actor_id (e.g. https://lemmy.world/c/selfhosted)
|
||||
try:
|
||||
home_instance = actor_id.split("/")[2]
|
||||
except IndexError:
|
||||
home_instance = instance
|
||||
|
||||
if comm.get("nsfw"):
|
||||
continue
|
||||
|
||||
name = comm.get("name", "")
|
||||
sub_key = f"{name}@{home_instance}"
|
||||
subscribers = counts.get("subscribers", 0)
|
||||
description = (comm.get("description") or "").strip()
|
||||
|
||||
results.append({
|
||||
"sub": sub_key,
|
||||
"title": comm.get("title") or name,
|
||||
"subscribers": subscribers,
|
||||
"description": description[:280],
|
||||
"promo_allowed": None, # Lemmy has no standardized promo rules
|
||||
"flair_required": False,
|
||||
"available_flairs": [],
|
||||
"rule_warning": False,
|
||||
"notes": None,
|
||||
"already_tracked": (sub_key.lower() in known_subs) if known_subs else False,
|
||||
"platform": "lemmy",
|
||||
})
|
||||
|
||||
except Exception:
|
||||
logger.warning("Error searching Lemmy instance %s", instance, exc_info=True)
|
||||
|
||||
results.sort(key=lambda x: x["subscribers"], reverse=True)
|
||||
return results[:limit]
|
||||
|
|
@ -136,6 +136,7 @@ def analyze_sub(
|
|||
"rule_warning": False,
|
||||
"notes": notes,
|
||||
"already_tracked": (sub.lower() in known_subs) if known_subs is not None else False,
|
||||
"platform": "reddit",
|
||||
}
|
||||
except Exception:
|
||||
logger.exception("Error analyzing r/%s", sub)
|
||||
|
|
@ -154,11 +155,6 @@ def search_subs(
|
|||
Returns a list of analysis dicts sorted by subscriber count (desc).
|
||||
"""
|
||||
try:
|
||||
search_r = _get(
|
||||
f"{_BASE}/subreddits/search.json",
|
||||
cookies=cookies,
|
||||
)
|
||||
# httpx doesn't support params kwarg above since we're using _get; rebuild
|
||||
r = httpx.get(
|
||||
f"{_BASE}/subreddits/search.json",
|
||||
params={"q": keyword, "limit": min(limit, 50), "sort": "relevance"},
|
||||
|
|
@ -201,6 +197,7 @@ def search_subs(
|
|||
"rule_warning": False,
|
||||
"notes": None,
|
||||
"already_tracked": (sub_name.lower() in known_subs) if known_subs is not None else False,
|
||||
"platform": "reddit",
|
||||
})
|
||||
|
||||
# Sort by subscribers descending
|
||||
|
|
|
|||
|
|
@ -144,8 +144,13 @@
|
|||
>
|
||||
<div class="discover-row-main">
|
||||
<div class="discover-row-name">
|
||||
<a :href="`https://reddit.com/r/${r.sub}`" target="_blank" class="sub-link">r/{{ r.sub }}</a>
|
||||
<a
|
||||
:href="r.platform === 'lemmy' ? `https://${r.sub.split('@')[1]}/c/${r.sub.split('@')[0]}` : `https://reddit.com/r/${r.sub}`"
|
||||
target="_blank"
|
||||
class="sub-link"
|
||||
>{{ r.platform === 'lemmy' ? r.sub : `r/${r.sub}` }}</a>
|
||||
<span class="sub-size">{{ formatSubs(r.subscribers) }}</span>
|
||||
<span class="badge badge-muted">{{ r.platform }}</span>
|
||||
<span v-if="r.already_tracked" class="badge badge-muted">tracked</span>
|
||||
</div>
|
||||
<div v-if="r.description" class="discover-row-desc">{{ r.description }}</div>
|
||||
|
|
@ -282,7 +287,7 @@ async function importSub(r: SubDiscoveryResult) {
|
|||
notes: r.notes,
|
||||
}
|
||||
try {
|
||||
const saved = await api.subs.upsertRules(r.sub, payload, 'reddit')
|
||||
const saved = await api.subs.upsertRules(r.sub, payload, r.platform)
|
||||
const idx = rules.value.findIndex(x => x.sub === r.sub)
|
||||
if (idx !== -1) {
|
||||
rules.value = [...rules.value.slice(0, idx), saved, ...rules.value.slice(idx + 1)]
|
||||
|
|
|
|||
|
|
@ -109,6 +109,7 @@ export interface SubDiscoveryResult {
|
|||
rule_warning: boolean
|
||||
notes: string | null
|
||||
already_tracked: boolean
|
||||
platform: string
|
||||
}
|
||||
|
||||
export interface SubRulesUpsert {
|
||||
|
|
@ -286,8 +287,8 @@ export const api = {
|
|||
upsertRules: (sub: string, data: SubRulesUpsert, platform = 'reddit') =>
|
||||
http.put<SubRules>(`/subs/${sub}`, data, { params: { platform } }).then(r => r.data),
|
||||
|
||||
discover: (keyword: string, limit = 15) =>
|
||||
http.post<SubDiscoveryResult[]>('/subs/discover', { keyword, limit }).then(r => r.data),
|
||||
discover: (keyword: string, limit = 15, platforms = ['reddit', 'lemmy']) =>
|
||||
http.post<SubDiscoveryResult[]>('/subs/discover', { keyword, limit, platforms }).then(r => r.data),
|
||||
},
|
||||
|
||||
posts: {
|
||||
|
|
|
|||
Loading…
Reference in a new issue