Two-phase streaming architecture:
Phase 1 (sync thread): IngredientClassifier builds element profiles +
gap list from SQLite — thread-safe, no async context needed
Phase 2 (async): LLMRecipeGenerator.stream_generate() yields tokens via
cf-orch warm vllm (existing /stream-token path) or AsyncOpenAI against
Ollama if the coordinator is unavailable
Backend (app/services/recipe/llm_recipe.py):
- stream_generate() async generator; _try_alloc_for_stream() sync helper
- _stream_openai_compat() static method handles __auto__ model resolution
- LLMRecipeGenerator(None) is safe for streaming (store not used)
Endpoint (app/api/endpoints/recipes.py):
- ?stream=true on POST /recipes/suggest returns StreamingResponse
- X-Accel-Buffering: no prevents nginx buffering without nginx.conf edits
Frontend (api.ts, recipes.ts, RecipesView.vue):
- suggestRecipeStream() uses fetch + ReadableStream (POST; EventSource
only supports GET)
- streamSuggest() action in recipes store builds request internally
- RecipesView.streamRecipe() silently falls back to native SSE when
cf-orch token fetch fails rather than surfacing an error
67 lines
2.8 KiB
Text
67 lines
2.8 KiB
Text
server {
|
|
listen 80;
|
|
server_name _;
|
|
|
|
root /usr/share/nginx/html;
|
|
index index.html;
|
|
|
|
# Proxy API requests to the FastAPI container via Docker bridge network.
|
|
location /api/ {
|
|
proxy_pass http://api:8512;
|
|
proxy_set_header Host $http_host;
|
|
# Prefer X-Real-IP set by Caddy (real client address); fall back to $remote_addr
|
|
# when accessed directly on LAN without Caddy in the path.
|
|
proxy_set_header X-Real-IP $http_x_real_ip;
|
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
|
proxy_set_header X-Forwarded-Proto $http_x_forwarded_proto;
|
|
# Forward the session header injected by Caddy from cf_session cookie.
|
|
proxy_set_header X-CF-Session $http_x_cf_session;
|
|
# Allow image uploads (barcode/receipt photos from phone cameras).
|
|
client_max_body_size 20m;
|
|
# LLM inference (recipe suggestions, expiry fallback) can take 60-120s.
|
|
# Default proxy_read_timeout is 60s which causes 504s on full recipe generation.
|
|
proxy_read_timeout 180s;
|
|
proxy_send_timeout 180s;
|
|
}
|
|
|
|
# Direct-port LAN access (localhost:8515): when VITE_API_BASE='/kiwi', the frontend
|
|
# builds API calls as /kiwi/api/v1/... — proxy these to the API container.
|
|
# Through Caddy the /kiwi prefix is stripped before reaching nginx, so this block
|
|
# is only active for direct-port access without Caddy in the path.
|
|
# Longer prefix (/kiwi/api/ = 10 chars) beats ^~/kiwi/ (6 chars) per nginx rules.
|
|
location /kiwi/api/ {
|
|
rewrite ^/kiwi(/api/.*)$ $1 break;
|
|
proxy_pass http://api:8512;
|
|
proxy_set_header Host $http_host;
|
|
proxy_set_header X-Real-IP $http_x_real_ip;
|
|
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
|
proxy_set_header X-Forwarded-Proto $http_x_forwarded_proto;
|
|
proxy_set_header X-CF-Session $http_x_cf_session;
|
|
client_max_body_size 20m;
|
|
proxy_read_timeout 180s;
|
|
proxy_send_timeout 180s;
|
|
}
|
|
|
|
# When accessed directly (localhost:8515) instead of via Caddy (/kiwi path-strip),
|
|
# Vite's /kiwi base URL means assets are requested at /kiwi/assets/... but stored
|
|
# at /assets/... in nginx's root. Alias /kiwi/ → root so direct port access works.
|
|
# ^~ prevents regex locations from overriding this prefix match for /kiwi/ paths.
|
|
location ^~ /kiwi/ {
|
|
alias /usr/share/nginx/html/;
|
|
try_files $uri $uri/ /index.html;
|
|
}
|
|
|
|
location = /index.html {
|
|
add_header Cache-Control "no-cache, no-store, must-revalidate";
|
|
try_files $uri /index.html;
|
|
}
|
|
|
|
location / {
|
|
try_files $uri $uri/ /index.html;
|
|
}
|
|
|
|
location ~* \.(js|css|png|jpg|jpeg|gif|ico|svg|woff2?)$ {
|
|
expires 1y;
|
|
add_header Cache-Control "public, immutable";
|
|
}
|
|
}
|