"""Tests for app/context/chunker.py.""" import pytest from app.context.chunker import ( detect_type, extract_facts_from_yaml, chunk_text, process_upload, UnsupportedDocType, FileTooLarge, ) def test_detect_type_yaml(): assert detect_type("compose.yml", b"") == "yaml" assert detect_type("docker-compose.yaml", b"") == "yaml" def test_detect_type_json(): assert detect_type("config.json", b"") == "json" def test_detect_type_markdown(): assert detect_type("runbook.md", b"") == "markdown" def test_detect_type_text(): assert detect_type("notes.txt", b"") == "text" def test_detect_type_unsupported(): with pytest.raises(UnsupportedDocType, match=".pdf"): detect_type("report.pdf", b"") def test_extract_facts_from_yaml_docker_compose(): yaml_text = """ services: plex: image: plexinc/pms-docker ports: - "32400:32400" sonarr: image: linuxserver/sonarr ports: - "8989:8989" """ facts = extract_facts_from_yaml(yaml_text) keys = [f.key for f in facts] assert "plex" in keys assert "sonarr" in keys plex_fact = next(f for f in facts if f.key == "plex") assert "port:32400:32400" in plex_fact.value or "port:" in plex_fact.value assert plex_fact.category == "service" def test_extract_facts_from_yaml_non_compose(): yaml_text = "foo: bar\nbaz: 42\n" facts = extract_facts_from_yaml(yaml_text) assert facts == [] def test_extract_facts_from_yaml_invalid(): facts = extract_facts_from_yaml("{{{{not yaml") assert facts == [] def test_chunk_text_basic(): words = ["word"] * 600 text = " ".join(words) chunks = chunk_text(text, chunk_size=300, overlap=50) assert len(chunks) >= 2 for c in chunks: assert c.strip() def test_chunk_text_short(): chunks = chunk_text("short text", chunk_size=300, overlap=50) assert len(chunks) == 1 assert chunks[0] == "short text" def test_chunk_text_empty(): assert chunk_text("") == [] def test_process_upload_yaml_extracts_facts(): yaml_bytes = b""" services: nginx: image: nginx:latest ports: - "80:80" """ doc_type, facts, chunks = process_upload("docker-compose.yml", yaml_bytes) assert doc_type == "yaml" assert any(f.key == "nginx" for f in facts) assert len(chunks) >= 1 def test_process_upload_markdown_no_facts(): md = b"# Plex Troubleshooting\n\nRestart the service with systemctl restart plex.\n" doc_type, facts, chunks = process_upload("plex.md", md) assert doc_type == "markdown" assert facts == [] assert len(chunks) >= 1 def test_process_upload_too_large(): big = b"x" * (6 * 1024 * 1024) with pytest.raises(FileTooLarge): process_upload("big.txt", big) def test_process_upload_unsupported_type(): with pytest.raises(UnsupportedDocType): process_upload("report.pdf", b"data")