docs: update README/INTERNALS for import feature, harden .gitignore

This commit is contained in:
P0luz
2026-04-19 12:09:53 +08:00
parent a09fbfe13a
commit 821546d5de
27 changed files with 5365 additions and 479 deletions

111
tests/test_llm_quality.py Normal file
View File

@@ -0,0 +1,111 @@
# ============================================================
# Test 2: LLM Quality Baseline — needs GEMINI_API_KEY
# 测试 2LLM 质量基准 —— 需要 GEMINI_API_KEY
#
# Verifies LLM auto-tagging returns reasonable results:
# - domain is a non-empty list of strings
# - valence ∈ [0, 1]
# - arousal ∈ [0, 1]
# - tags is a list
# - suggested_name is a string
# - domain matches content semantics (loose check)
# ============================================================
import os
import pytest
# Skip all tests if no API key
pytestmark = pytest.mark.skipif(
not os.environ.get("OMBRE_API_KEY"),
reason="OMBRE_API_KEY not set — skipping LLM quality tests"
)
@pytest.fixture
def dehydrator(test_config):
from dehydrator import Dehydrator
return Dehydrator(test_config)
# Test cases: (content, expected_domains_superset, valence_range)
LLM_CASES = [
(
"今天学了 Python 的 asyncio终于搞懂了 event loop心情不错",
{"学习", "编程", "技术", "数字", "Python"},
(0.5, 1.0), # positive
),
(
"被导师骂了一顿,论文写得太差了,很沮丧",
{"学习", "学业", "心理", "工作"},
(0.0, 0.4), # negative
),
(
"和朋友去爬了一座山,山顶的风景超美,累但值得",
{"生活", "旅行", "社交", "运动", "健康"},
(0.6, 1.0), # positive
),
(
"在阳台上看日落,什么都没想,很平静",
{"生活", "心理", "自省"},
(0.4, 0.8), # calm positive
),
(
"I built a FastAPI app with Docker and deployed it on Render",
{"编程", "技术", "学习", "数字", "工作"},
(0.5, 1.0), # positive
),
]
class TestLLMQuality:
"""Verify LLM auto-tagging produces reasonable outputs."""
@pytest.mark.asyncio
@pytest.mark.parametrize("content,expected_domains,valence_range", LLM_CASES)
async def test_analyze_structure(self, dehydrator, content, expected_domains, valence_range):
"""Check that analyze() returns valid structure and reasonable values."""
result = await dehydrator.analyze(content)
# Structure checks
assert isinstance(result, dict)
assert "domain" in result
assert "valence" in result
assert "arousal" in result
assert "tags" in result
# Domain is non-empty list of strings
assert isinstance(result["domain"], list)
assert len(result["domain"]) >= 1
assert all(isinstance(d, str) for d in result["domain"])
# Valence and arousal in range
assert 0.0 <= result["valence"] <= 1.0, f"valence {result['valence']} out of range"
assert 0.0 <= result["arousal"] <= 1.0, f"arousal {result['arousal']} out of range"
# Valence roughly matches expected range (with tolerance)
lo, hi = valence_range
assert lo - 0.15 <= result["valence"] <= hi + 0.15, \
f"valence {result['valence']} not in expected range ({lo}, {hi}) for: {content[:30]}..."
# Tags is a list
assert isinstance(result["tags"], list)
@pytest.mark.asyncio
async def test_analyze_domain_semantic_match(self, dehydrator):
"""Check that domain has at least some semantic relevance."""
result = await dehydrator.analyze("我家的橘猫小橘今天又偷吃了桌上的鱼")
domains = set(result["domain"])
# Should contain something life/pet related
life_related = {"生活", "宠物", "家庭", "日常", "动物"}
assert domains & life_related, f"Expected life-related domain, got {domains}"
@pytest.mark.asyncio
async def test_analyze_empty_content(self, dehydrator):
"""Empty content should raise or return defaults gracefully."""
try:
result = await dehydrator.analyze("")
# If it doesn't raise, should still return valid structure
assert isinstance(result, dict)
assert 0.0 <= result["valence"] <= 1.0
except Exception:
pass # Raising is also acceptable