linnet/app/services/session_store.py
pyr0ball 1bc47b8e0f fix: mobile tab timeout — idle session reaper + wake lock + visibility reconnect
Backend:
- Session.last_subscriber_left_at: monotonic timestamp set when last SSE subscriber
  leaves, cleared when a new one arrives
- Session.subscriber_count(): replaces len(_subscribers) access from outside the model
- session_store._reaper_loop(): kills sessions with no subscribers for >SESSION_IDLE_TTL_S
  (default 90s); runs every TTL/2 seconds via asyncio.create_task at startup
- session_store._reaper_loop_once(): single-cycle variant for deterministic tests
- app/main.py lifespan: starts reaper on startup, cancels it cleanly on shutdown
- config.py: SESSION_IDLE_TTL_S setting (90s default, overridable per-env)

Frontend:
- useWakeLock.ts: Screen Wake Lock API wrapper; acquires on connect, releases on
  disconnect; degrades silently when unsupported (battery saver, iOS Safari)
- useToneStream.ts: visibilitychange handler — on hidden: closes EventSource without
  ending backend session (grace window stays open); on visible: GET /session/{id}
  liveness check, reconnects SSE + re-acquires wake lock if alive, sets expired=true
  and calls store.reset() if reaped
- ComposeBar.vue: surfaces expired state with calm 'Session timed out' notice
  (not an error — expected behaviour on long screen-off)

Tests:
- test_reaper.py: 7 tests covering subscriber idle tracking, reaper eligibility
  (kills idle, spares active subscriber, spares within-TTL)
2026-04-11 09:42:42 -07:00

356 lines
12 KiB
Python

# app/services/session_store.py — session lifecycle and classifier management
from __future__ import annotations
import asyncio
import logging
import time
from app.config import settings
from app.models.session import Session
from app.models.tone_event import ToneEvent
from app.services.annotator import annotate
logger = logging.getLogger(__name__)
# Module-level singleton store — one per process
_sessions: dict[str, Session] = {}
_tasks: dict[str, asyncio.Task] = {}
_reaper_task: asyncio.Task | None = None
# Audio accumulation buffer per session.
# We accumulate 100ms PCM chunks until we have CLASSIFY_WINDOW_MS of audio,
# then fire a single /classify call. Real emotion models need ≥500ms context.
_CLASSIFY_WINDOW_MS = 1000 # ms of audio per classify call; wav2vec2 needs ≥1s context
_CHUNK_MS = 100 # AudioWorklet sends 1600 samples @ 16kHz = 100ms
_CHUNKS_PER_WINDOW = _CLASSIFY_WINDOW_MS // _CHUNK_MS # 10 chunks
_audio_buffers: dict[str, list[bytes]] = {}
async def create_session(elcor: bool = False) -> Session:
"""Create a new session and start its ContextClassifier background task.
If CF_ORCH_URL is configured, requests a managed cf-voice instance before
starting the classifier. Falls back to in-process mock if allocation fails.
"""
session = Session(elcor=elcor)
_sessions[session.session_id] = session
await _allocate_voice(session)
task = asyncio.create_task(
_run_classifier(session),
name=f"classifier-{session.session_id}",
)
_tasks[session.session_id] = task
session.state = "running"
logger.info(
"Session %s started (voice=%s)",
session.session_id,
session.cf_voice_url or "in-process",
)
return session
def get_session(session_id: str) -> Session | None:
return _sessions.get(session_id)
def active_session_count() -> int:
"""Return the number of currently running sessions."""
return sum(1 for s in _sessions.values() if s.state == "running")
async def end_session(session_id: str) -> bool:
"""Stop and remove a session. Returns True if it existed."""
session = _sessions.pop(session_id, None)
if session is None:
return False
session.state = "stopped"
task = _tasks.pop(session_id, None)
if task and not task.done():
task.cancel()
_audio_buffers.pop(session_id, None)
await _release_voice(session)
logger.info("Session %s ended", session_id)
return True
async def _allocate_voice(session: Session) -> None:
"""Request a managed cf-voice instance from cf-orch. No-op if CF_ORCH_URL is unset."""
# Static override takes precedence — skip cf-orch allocation
if settings.cf_voice_url:
session.cf_voice_url = settings.cf_voice_url
return
if not settings.cf_orch_url:
return
import httpx
url = settings.cf_orch_url.rstrip("/") + "/api/services/cf-voice/allocate"
try:
async with httpx.AsyncClient(timeout=30.0) as client:
resp = await client.post(url, json={"caller": "linnet", "ttl_s": 3600.0})
resp.raise_for_status()
data = resp.json()
session.cf_voice_url = data["url"]
session.cf_voice_allocation_id = data["allocation_id"]
logger.info(
"cf-orch allocated cf-voice for session %s: %s (alloc=%s)",
session.session_id, data["url"], data["allocation_id"],
)
except Exception as exc:
logger.warning(
"cf-orch allocation failed for session %s: %s — falling back to in-process",
session.session_id, exc,
)
async def _release_voice(session: Session) -> None:
"""Release a cf-voice allocation back to cf-orch. No-op if not orch-managed."""
if not session.cf_voice_allocation_id or not settings.cf_orch_url:
return
import httpx
url = (
settings.cf_orch_url.rstrip("/")
+ f"/api/services/cf-voice/allocations/{session.cf_voice_allocation_id}"
)
try:
async with httpx.AsyncClient(timeout=5.0) as client:
await client.delete(url)
logger.info("Released cf-voice allocation for session %s", session.session_id)
except Exception as exc:
logger.warning(
"cf-orch release failed for session %s: %s", session.session_id, exc
)
async def _run_classifier(session: Session) -> None:
"""
Background task: stream VoiceFrames and broadcast ToneEvents.
Two modes:
- Sidecar (cf-orch allocated or CF_VOICE_URL set): holds the session open
while audio chunks are forwarded to cf-voice via forward_audio_chunk().
- In-process (no sidecar): runs ContextClassifier.from_env() directly.
Used for local dev and mock mode when CF_ORCH_URL is unset or allocation failed.
"""
if session.cf_voice_url:
await _run_classifier_sidecar(session)
else:
await _run_classifier_inprocess(session)
async def _run_classifier_inprocess(session: Session) -> None:
"""In-process path: ContextClassifier.stream() — used when CF_VOICE_URL is unset."""
from cf_voice.context import ContextClassifier
classifier = ContextClassifier.from_env()
try:
async for frame in classifier.stream():
if session.state == "stopped":
break
event = annotate(frame, session_id=session.session_id, elcor=session.elcor)
if event is not None:
session.broadcast(event)
except asyncio.CancelledError:
pass
finally:
await classifier.stop()
session.state = "stopped"
logger.info("Classifier stopped for session %s", session.session_id)
async def _run_classifier_sidecar(session: Session) -> None:
"""
Sidecar path: wait for audio chunks forwarded via forward_audio_chunk().
The sidecar does not self-generate frames — audio arrives from the browser
WebSocket and is sent to cf-voice /classify. This task holds the session
open and handles cleanup.
"""
try:
while session.state == "running":
await asyncio.sleep(1.0)
except asyncio.CancelledError:
pass
finally:
session.state = "stopped"
logger.info("Sidecar session ended for session %s", session.session_id)
async def forward_audio_chunk(
session: Session,
audio_b64: str,
timestamp: float,
) -> None:
"""
Accumulate PCM chunks into 500ms windows, then forward to cf-voice /classify.
The AudioWorklet sends 100ms chunks; we buffer 10 of them (_CHUNKS_PER_WINDOW)
before firing a classify call. This gives wav2vec2 a full second of context
and keeps the classify rate at 1/s instead of 10/s.
No-op when no cf-voice sidecar is allocated (in-process path handles its own input).
"""
voice_url = session.cf_voice_url
if not voice_url:
return
import base64 as _b64
import httpx
from app.models.speaker_event import SpeakerEvent
from cf_voice.models import VoiceFrame
# Decode incoming chunk and append to per-session buffer
raw = _b64.b64decode(audio_b64)
buf = _audio_buffers.setdefault(session.session_id, [])
buf.append(raw)
if len(buf) < _CHUNKS_PER_WINDOW:
return # not enough audio yet — wait for more chunks
# Flush: concatenate window, reset buffer
window_bytes = b"".join(buf)
buf.clear()
window_b64 = _b64.b64encode(window_bytes).decode()
url = voice_url.rstrip("/") + "/classify"
payload = {
"audio_chunk": window_b64,
"timestamp": timestamp,
"elcor": session.elcor,
"session_id": session.session_id,
}
try:
async with httpx.AsyncClient(timeout=5.0) as client:
resp = await client.post(url, json=payload)
resp.raise_for_status()
data = resp.json()
except Exception as exc:
logger.warning("cf-voice sidecar call failed for session %s: %s", session.session_id, exc)
return
from app.models.queue_event import QueueEvent
from app.models.transcript_event import TranscriptEvent
for ev in data.get("events", []):
etype = ev.get("event_type")
if etype == "tone":
frame = VoiceFrame(
label=ev["label"],
confidence=ev["confidence"],
speaker_id=ev.get("speaker_id", ""),
shift_magnitude=ev.get("shift_magnitude", 0.0),
timestamp=ev["timestamp"],
)
tone = annotate(frame, session_id=session.session_id, elcor=session.elcor)
if tone is not None:
session.broadcast(tone)
elif etype == "speaker":
speaker = SpeakerEvent(
session_id=session.session_id,
label=ev["label"],
confidence=ev.get("confidence", 1.0),
timestamp=ev["timestamp"],
)
session.broadcast(speaker)
elif etype == "transcript":
transcript = TranscriptEvent(
session_id=session.session_id,
text=ev["label"],
speaker_id=ev.get("speaker_id", "speaker_a"),
timestamp=ev["timestamp"],
)
session.broadcast(transcript)
elif etype in ("queue", "environ"):
queue_ev = QueueEvent(
session_id=session.session_id,
event_type=etype,
label=ev["label"],
confidence=ev.get("confidence", 1.0),
timestamp=ev["timestamp"],
)
session.broadcast(queue_ev)
# ── Idle session reaper ───────────────────────────────────────────────────────
async def _reaper_loop() -> None:
"""
Periodically kill sessions with no active SSE subscribers.
A session becomes eligible for reaping when:
- Its last SSE subscriber disconnected more than SESSION_IDLE_TTL_S seconds ago
- It has not been explicitly ended (state != "stopped")
This covers the common mobile pattern: screen locks → browser suspends tab →
EventSource closes → SSE subscriber count drops to zero. If the user doesn't
return within the TTL window, the session is cleaned up automatically.
The reaper runs every REAP_INTERVAL_S seconds (half the TTL, so the worst-case
overshoot is TTL + REAP_INTERVAL_S).
"""
ttl = settings.session_idle_ttl_s
interval = max(15, ttl // 2)
logger.info("Session reaper started (TTL=%ds, check every %ds)", ttl, interval)
while True:
await asyncio.sleep(interval)
now = time.monotonic()
to_reap = [
sid
for sid, session in list(_sessions.items())
if (
session.state == "running"
and session.subscriber_count() == 0
and session.last_subscriber_left_at is not None
and (now - session.last_subscriber_left_at) > ttl
)
]
for sid in to_reap:
logger.info(
"Reaping idle session %s (no subscribers for >%ds)", sid, ttl
)
await end_session(sid)
async def _reaper_loop_once() -> None:
"""Single reaper pass — used by tests to avoid sleeping."""
ttl = settings.session_idle_ttl_s
now = time.monotonic()
to_reap = [
sid
for sid, session in list(_sessions.items())
if (
session.state == "running"
and session.subscriber_count() == 0
and session.last_subscriber_left_at is not None
and (now - session.last_subscriber_left_at) > ttl
)
]
for sid in to_reap:
logger.info("Reaping idle session %s (no subscribers for >%ds)", sid, ttl)
await end_session(sid)
def start_reaper() -> None:
"""Start the idle session reaper background task. Called from app lifespan."""
global _reaper_task
if _reaper_task is None or _reaper_task.done():
_reaper_task = asyncio.create_task(_reaper_loop(), name="session-reaper")
async def stop_reaper() -> None:
"""Cancel the reaper task cleanly. Called from app lifespan shutdown."""
global _reaper_task
if _reaper_task and not _reaper_task.done():
_reaper_task.cancel()
try:
await _reaper_task
except asyncio.CancelledError:
pass
_reaper_task = None