feat(discovery): add Lemmy community search, fix dead request, add platform field
- Add app/services/lemmy/discovery.py: searches 5 major Lemmy instances, deduplicates by actor_id (AP canonical URL), skips NSFW communities, uses community@instance naming convention matching existing Lemmy client - Update POST /subs/discover: accepts platforms[] param (default both), fans out to Reddit + Lemmy search, merges and sorts by subscribers - Add platform field to all discovery result dicts (Reddit and Lemmy) - Fix: remove dead _get() call left in search_subs() during earlier refactor - Frontend: show platform badge on each discovery row, correct hyperlink format for Lemmy (https://{instance}/c/{community}), pass r.platform to upsertRules on import so Lemmy subs land in the lemmy platform slot
This commit is contained in:
parent
f39f36e258
commit
35c6e5f7bc
5 changed files with 143 additions and 22 deletions
|
|
@ -14,6 +14,7 @@ router = APIRouter(prefix="/subs", tags=["subs"])
|
||||||
class DiscoverBody(BaseModel):
|
class DiscoverBody(BaseModel):
|
||||||
keyword: str
|
keyword: str
|
||||||
limit: int = 15
|
limit: int = 15
|
||||||
|
platforms: list[str] = ["reddit", "lemmy"]
|
||||||
|
|
||||||
|
|
||||||
def _in_thread(fn):
|
def _in_thread(fn):
|
||||||
|
|
@ -70,18 +71,35 @@ async def discover_subs(body: DiscoverBody):
|
||||||
from app.services.reddit.discovery import search_and_analyze
|
from app.services.reddit.discovery import search_and_analyze
|
||||||
|
|
||||||
def _run(store: Store):
|
def _run(store: Store):
|
||||||
# Collect already-tracked sub names so the UI can flag them
|
platforms = set(body.platforms or ["reddit", "lemmy"])
|
||||||
existing = {r["sub"].lower() for r in store.list_sub_rules("reddit")}
|
results: list[dict] = []
|
||||||
try:
|
|
||||||
from app.services.reddit.client import RedditClient
|
if "reddit" in platforms:
|
||||||
cookies = RedditClient().cookies
|
from app.services.reddit.discovery import search_and_analyze
|
||||||
except Exception:
|
existing_reddit = {r["sub"].lower() for r in store.list_sub_rules("reddit")}
|
||||||
cookies = None
|
try:
|
||||||
return search_and_analyze(
|
from app.services.reddit.client import RedditClient
|
||||||
keyword=body.keyword,
|
cookies = RedditClient().cookies
|
||||||
limit=body.limit,
|
except Exception:
|
||||||
cookies=cookies,
|
cookies = None
|
||||||
known_subs=existing,
|
results.extend(search_and_analyze(
|
||||||
)
|
keyword=body.keyword,
|
||||||
|
limit=body.limit,
|
||||||
|
cookies=cookies,
|
||||||
|
known_subs=existing_reddit,
|
||||||
|
))
|
||||||
|
|
||||||
|
if "lemmy" in platforms:
|
||||||
|
from app.services.lemmy.discovery import search_lemmy
|
||||||
|
existing_lemmy = {r["sub"].lower() for r in store.list_sub_rules("lemmy")}
|
||||||
|
results.extend(search_lemmy(
|
||||||
|
keyword=body.keyword,
|
||||||
|
limit=body.limit,
|
||||||
|
known_subs=existing_lemmy,
|
||||||
|
))
|
||||||
|
|
||||||
|
# Merge and sort by subscribers descending
|
||||||
|
results.sort(key=lambda x: x.get("subscribers", 0), reverse=True)
|
||||||
|
return results
|
||||||
|
|
||||||
return await asyncio.to_thread(_in_thread, _run)
|
return await asyncio.to_thread(_in_thread, _run)
|
||||||
|
|
|
||||||
100
app/services/lemmy/discovery.py
Normal file
100
app/services/lemmy/discovery.py
Normal file
|
|
@ -0,0 +1,100 @@
|
||||||
|
"""
|
||||||
|
Lemmy community discovery.
|
||||||
|
|
||||||
|
Searches major Lemmy instances for communities matching a keyword and
|
||||||
|
returns candidates for user review. Nothing is stored until the user
|
||||||
|
imports via PUT /subs/{community@instance}.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from app.services.lemmy.client import KNOWN_INSTANCES, _DEFAULT_USER_AGENT
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Instances to fan search across (ordered by size)
|
||||||
|
_SEARCH_INSTANCES = ["lemmy.world", "lemmy.ml", "beehaw.org", "sh.itjust.works", "reddthat.com"]
|
||||||
|
|
||||||
|
_TIMEOUT = 10.0
|
||||||
|
|
||||||
|
|
||||||
|
def search_lemmy(
|
||||||
|
keyword: str,
|
||||||
|
limit: int = 15,
|
||||||
|
known_subs: set[str] | None = None,
|
||||||
|
) -> list[dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Search multiple Lemmy instances for communities matching keyword.
|
||||||
|
|
||||||
|
Deduplicates by actor_id (canonical AP URL) so federated communities
|
||||||
|
don't appear twice. Returns candidates sorted by subscriber count.
|
||||||
|
"""
|
||||||
|
seen_actor_ids: set[str] = set()
|
||||||
|
results: list[dict] = []
|
||||||
|
|
||||||
|
for instance in _SEARCH_INSTANCES:
|
||||||
|
try:
|
||||||
|
r = httpx.get(
|
||||||
|
f"https://{instance}/api/v3/search",
|
||||||
|
params={
|
||||||
|
"q": keyword,
|
||||||
|
"type_": "Communities",
|
||||||
|
"limit": 20,
|
||||||
|
"sort": "TopAll",
|
||||||
|
},
|
||||||
|
headers={"User-Agent": _DEFAULT_USER_AGENT},
|
||||||
|
timeout=_TIMEOUT,
|
||||||
|
follow_redirects=True,
|
||||||
|
)
|
||||||
|
if r.status_code != 200:
|
||||||
|
logger.debug("Lemmy search on %s returned %d", instance, r.status_code)
|
||||||
|
continue
|
||||||
|
|
||||||
|
communities = r.json().get("communities", [])
|
||||||
|
|
||||||
|
for cv in communities:
|
||||||
|
comm = cv.get("community", {})
|
||||||
|
counts = cv.get("counts", {})
|
||||||
|
|
||||||
|
actor_id: str = comm.get("actor_id", "")
|
||||||
|
if not actor_id or actor_id in seen_actor_ids:
|
||||||
|
continue
|
||||||
|
seen_actor_ids.add(actor_id)
|
||||||
|
|
||||||
|
# Derive home instance from actor_id (e.g. https://lemmy.world/c/selfhosted)
|
||||||
|
try:
|
||||||
|
home_instance = actor_id.split("/")[2]
|
||||||
|
except IndexError:
|
||||||
|
home_instance = instance
|
||||||
|
|
||||||
|
if comm.get("nsfw"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
name = comm.get("name", "")
|
||||||
|
sub_key = f"{name}@{home_instance}"
|
||||||
|
subscribers = counts.get("subscribers", 0)
|
||||||
|
description = (comm.get("description") or "").strip()
|
||||||
|
|
||||||
|
results.append({
|
||||||
|
"sub": sub_key,
|
||||||
|
"title": comm.get("title") or name,
|
||||||
|
"subscribers": subscribers,
|
||||||
|
"description": description[:280],
|
||||||
|
"promo_allowed": None, # Lemmy has no standardized promo rules
|
||||||
|
"flair_required": False,
|
||||||
|
"available_flairs": [],
|
||||||
|
"rule_warning": False,
|
||||||
|
"notes": None,
|
||||||
|
"already_tracked": (sub_key.lower() in known_subs) if known_subs else False,
|
||||||
|
"platform": "lemmy",
|
||||||
|
})
|
||||||
|
|
||||||
|
except Exception:
|
||||||
|
logger.warning("Error searching Lemmy instance %s", instance, exc_info=True)
|
||||||
|
|
||||||
|
results.sort(key=lambda x: x["subscribers"], reverse=True)
|
||||||
|
return results[:limit]
|
||||||
|
|
@ -136,6 +136,7 @@ def analyze_sub(
|
||||||
"rule_warning": False,
|
"rule_warning": False,
|
||||||
"notes": notes,
|
"notes": notes,
|
||||||
"already_tracked": (sub.lower() in known_subs) if known_subs is not None else False,
|
"already_tracked": (sub.lower() in known_subs) if known_subs is not None else False,
|
||||||
|
"platform": "reddit",
|
||||||
}
|
}
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.exception("Error analyzing r/%s", sub)
|
logger.exception("Error analyzing r/%s", sub)
|
||||||
|
|
@ -154,11 +155,6 @@ def search_subs(
|
||||||
Returns a list of analysis dicts sorted by subscriber count (desc).
|
Returns a list of analysis dicts sorted by subscriber count (desc).
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
search_r = _get(
|
|
||||||
f"{_BASE}/subreddits/search.json",
|
|
||||||
cookies=cookies,
|
|
||||||
)
|
|
||||||
# httpx doesn't support params kwarg above since we're using _get; rebuild
|
|
||||||
r = httpx.get(
|
r = httpx.get(
|
||||||
f"{_BASE}/subreddits/search.json",
|
f"{_BASE}/subreddits/search.json",
|
||||||
params={"q": keyword, "limit": min(limit, 50), "sort": "relevance"},
|
params={"q": keyword, "limit": min(limit, 50), "sort": "relevance"},
|
||||||
|
|
@ -201,6 +197,7 @@ def search_subs(
|
||||||
"rule_warning": False,
|
"rule_warning": False,
|
||||||
"notes": None,
|
"notes": None,
|
||||||
"already_tracked": (sub_name.lower() in known_subs) if known_subs is not None else False,
|
"already_tracked": (sub_name.lower() in known_subs) if known_subs is not None else False,
|
||||||
|
"platform": "reddit",
|
||||||
})
|
})
|
||||||
|
|
||||||
# Sort by subscribers descending
|
# Sort by subscribers descending
|
||||||
|
|
|
||||||
|
|
@ -144,8 +144,13 @@
|
||||||
>
|
>
|
||||||
<div class="discover-row-main">
|
<div class="discover-row-main">
|
||||||
<div class="discover-row-name">
|
<div class="discover-row-name">
|
||||||
<a :href="`https://reddit.com/r/${r.sub}`" target="_blank" class="sub-link">r/{{ r.sub }}</a>
|
<a
|
||||||
|
:href="r.platform === 'lemmy' ? `https://${r.sub.split('@')[1]}/c/${r.sub.split('@')[0]}` : `https://reddit.com/r/${r.sub}`"
|
||||||
|
target="_blank"
|
||||||
|
class="sub-link"
|
||||||
|
>{{ r.platform === 'lemmy' ? r.sub : `r/${r.sub}` }}</a>
|
||||||
<span class="sub-size">{{ formatSubs(r.subscribers) }}</span>
|
<span class="sub-size">{{ formatSubs(r.subscribers) }}</span>
|
||||||
|
<span class="badge badge-muted">{{ r.platform }}</span>
|
||||||
<span v-if="r.already_tracked" class="badge badge-muted">tracked</span>
|
<span v-if="r.already_tracked" class="badge badge-muted">tracked</span>
|
||||||
</div>
|
</div>
|
||||||
<div v-if="r.description" class="discover-row-desc">{{ r.description }}</div>
|
<div v-if="r.description" class="discover-row-desc">{{ r.description }}</div>
|
||||||
|
|
@ -282,7 +287,7 @@ async function importSub(r: SubDiscoveryResult) {
|
||||||
notes: r.notes,
|
notes: r.notes,
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
const saved = await api.subs.upsertRules(r.sub, payload, 'reddit')
|
const saved = await api.subs.upsertRules(r.sub, payload, r.platform)
|
||||||
const idx = rules.value.findIndex(x => x.sub === r.sub)
|
const idx = rules.value.findIndex(x => x.sub === r.sub)
|
||||||
if (idx !== -1) {
|
if (idx !== -1) {
|
||||||
rules.value = [...rules.value.slice(0, idx), saved, ...rules.value.slice(idx + 1)]
|
rules.value = [...rules.value.slice(0, idx), saved, ...rules.value.slice(idx + 1)]
|
||||||
|
|
|
||||||
|
|
@ -109,6 +109,7 @@ export interface SubDiscoveryResult {
|
||||||
rule_warning: boolean
|
rule_warning: boolean
|
||||||
notes: string | null
|
notes: string | null
|
||||||
already_tracked: boolean
|
already_tracked: boolean
|
||||||
|
platform: string
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface SubRulesUpsert {
|
export interface SubRulesUpsert {
|
||||||
|
|
@ -286,8 +287,8 @@ export const api = {
|
||||||
upsertRules: (sub: string, data: SubRulesUpsert, platform = 'reddit') =>
|
upsertRules: (sub: string, data: SubRulesUpsert, platform = 'reddit') =>
|
||||||
http.put<SubRules>(`/subs/${sub}`, data, { params: { platform } }).then(r => r.data),
|
http.put<SubRules>(`/subs/${sub}`, data, { params: { platform } }).then(r => r.data),
|
||||||
|
|
||||||
discover: (keyword: string, limit = 15) =>
|
discover: (keyword: string, limit = 15, platforms = ['reddit', 'lemmy']) =>
|
||||||
http.post<SubDiscoveryResult[]>('/subs/discover', { keyword, limit }).then(r => r.data),
|
http.post<SubDiscoveryResult[]>('/subs/discover', { keyword, limit, platforms }).then(r => r.data),
|
||||||
},
|
},
|
||||||
|
|
||||||
posts: {
|
posts: {
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue