docs: update README/INTERNALS for import feature, harden .gitignore

2026-04-19 12:09:53 +08:00
parent a09fbfe13a
commit 821546d5de
27 changed files with 5365 additions and 479 deletions
--- a/tests/init.py
+++ b/tests/init.py
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -0,0 +1,70 @@
+# ============================================================
+# Shared test fixtures — isolated temp environment for all tests
+# 共享测试 fixtures —— 为所有测试提供隔离的临时环境
+#
+# IMPORTANT: All tests run against a temp directory.
+# Your real /data or local buckets are NEVER touched.
+# 重要：所有测试在临时目录运行，绝不触碰真实记忆数据。
+# ============================================================
+
+import os
+import sys
+import math
+import pytest
+import asyncio
+from datetime import datetime, timedelta
+from pathlib import Path
+
+# Ensure project root importable
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
+
+
+@pytest.fixture
+def test_config(tmp_path):
+    """Minimal config pointing to a temp directory."""
+    buckets_dir = str(tmp_path / "buckets")
+    os.makedirs(os.path.join(buckets_dir, "permanent"), exist_ok=True)
+    os.makedirs(os.path.join(buckets_dir, "dynamic"), exist_ok=True)
+    os.makedirs(os.path.join(buckets_dir, "archive"), exist_ok=True)
+    os.makedirs(os.path.join(buckets_dir, "dynamic", "feel"), exist_ok=True)
+
+    return {
+        "buckets_dir": buckets_dir,
+        "matching": {"fuzzy_threshold": 50, "max_results": 10},
+        "wikilink": {"enabled": False},
+        "scoring_weights": {
+            "topic_relevance": 4.0,
+            "emotion_resonance": 2.0,
+            "time_proximity": 2.5,
+            "importance": 1.0,
+            "content_weight": 3.0,
+        },
+        "decay": {
+            "lambda": 0.05,
+            "threshold": 0.3,
+            "check_interval_hours": 24,
+            "emotion_weights": {"base": 1.0, "arousal_boost": 0.8},
+        },
+        "dehydration": {
+            "api_key": os.environ.get("OMBRE_API_KEY", ""),
+            "base_url": "https://generativelanguage.googleapis.com/v1beta/openai",
+            "model": "gemini-2.5-flash-lite",
+        },
+        "embedding": {
+            "api_key": os.environ.get("OMBRE_API_KEY", ""),
+            "base_url": "https://generativelanguage.googleapis.com/v1beta/openai",
+            "model": "gemini-embedding-001",
+        },
+    }
+
+
+@pytest.fixture
+def bucket_mgr(test_config):
+    from bucket_manager import BucketManager
+    return BucketManager(test_config)
+
+
+@pytest.fixture
+def decay_eng(test_config, bucket_mgr):
+    from decay_engine import DecayEngine
+    return DecayEngine(test_config, bucket_mgr)
--- a/tests/dataset.py
+++ b/tests/dataset.py
@@ -0,0 +1,101 @@
+# ============================================================
+# Test Dataset: Fixed memory buckets for regression testing
+# 测试数据集：固定记忆桶，覆盖各类型/情感/domain
+#
+# 50 条预制记忆，涵盖：
+#   - 4 种桶类型（dynamic/permanent/feel/archived）
+#   - 多种 domain 组合
+#   - valence/arousal 全象限覆盖
+#   - importance 1~10
+#   - resolved / digested / pinned 各种状态
+#   - 不同创建时间（用于时间衰减测试）
+# ============================================================
+
+from datetime import datetime, timedelta
+
+_NOW = datetime.now()
+
+
+def _ago(**kwargs) -> str:
+    """Helper: ISO time string for N units ago."""
+    return (_NOW - timedelta(**kwargs)).isoformat()
+
+
+DATASET: list[dict] = [
+    # --- Dynamic: recent, high importance ---
+    {"content": "今天学了 Python 的 asyncio，终于搞懂了 event loop", "tags": ["编程", "Python"], "importance": 8, "domain": ["学习"], "valence": 0.8, "arousal": 0.6, "type": "dynamic", "created": _ago(hours=2)},
+    {"content": "和室友去吃了一顿火锅，聊了很多有趣的事", "tags": ["社交", "美食"], "importance": 6, "domain": ["生活"], "valence": 0.9, "arousal": 0.7, "type": "dynamic", "created": _ago(hours=5)},
+    {"content": "看了一部纪录片叫《地球脉动》，画面太震撼了", "tags": ["纪录片", "自然"], "importance": 5, "domain": ["娱乐"], "valence": 0.85, "arousal": 0.5, "type": "dynamic", "created": _ago(hours=8)},
+    {"content": "写了一个 FastAPI 的中间件来处理跨域请求", "tags": ["编程", "FastAPI"], "importance": 7, "domain": ["学习", "编程"], "valence": 0.7, "arousal": 0.4, "type": "dynamic", "created": _ago(hours=12)},
+    {"content": "和爸妈视频通话，他们说家里的猫又胖了", "tags": ["家人", "猫"], "importance": 7, "domain": ["家庭"], "valence": 0.9, "arousal": 0.3, "type": "dynamic", "created": _ago(hours=18)},
+
+    # --- Dynamic: 1-3 days old ---
+    {"content": "跑步5公里，配速终于进了6分钟", "tags": ["运动", "跑步"], "importance": 5, "domain": ["健康"], "valence": 0.75, "arousal": 0.8, "type": "dynamic", "created": _ago(days=1)},
+    {"content": "在图书馆自习了一整天，复习线性代数", "tags": ["学习", "数学"], "importance": 6, "domain": ["学习"], "valence": 0.5, "arousal": 0.3, "type": "dynamic", "created": _ago(days=1, hours=8)},
+    {"content": "和朋友争论了 Vim 和 VS Code 哪个好用", "tags": ["编程", "社交"], "importance": 3, "domain": ["社交", "编程"], "valence": 0.6, "arousal": 0.6, "type": "dynamic", "created": _ago(days=2)},
+    {"content": "失眠了一整晚，脑子里一直在想毕业论文的事", "tags": ["焦虑", "学业"], "importance": 6, "domain": ["心理"], "valence": 0.2, "arousal": 0.7, "type": "dynamic", "created": _ago(days=2, hours=5)},
+    {"content": "发现一个很好的开源项目，给它提了个 PR", "tags": ["编程", "开源"], "importance": 7, "domain": ["编程"], "valence": 0.8, "arousal": 0.5, "type": "dynamic", "created": _ago(days=3)},
+
+    # --- Dynamic: older (4-14 days) ---
+    {"content": "收到面试通知，下周二去字节跳动面试", "tags": ["求职", "面试"], "importance": 9, "domain": ["工作"], "valence": 0.7, "arousal": 0.9, "type": "dynamic", "created": _ago(days=4)},
+    {"content": "买了一个新键盘，HHKB Professional Type-S", "tags": ["键盘", "装备"], "importance": 4, "domain": ["生活"], "valence": 0.85, "arousal": 0.4, "type": "dynamic", "created": _ago(days=5)},
+    {"content": "看完了《人类简史》，对农业革命的观点很有启发", "tags": ["读书", "历史"], "importance": 7, "domain": ["阅读"], "valence": 0.7, "arousal": 0.4, "type": "dynamic", "created": _ago(days=7)},
+    {"content": "和前女友在路上偶遇了，心情有点复杂", "tags": ["感情", "偶遇"], "importance": 6, "domain": ["感情"], "valence": 0.35, "arousal": 0.6, "type": "dynamic", "created": _ago(days=8)},
+    {"content": "参加了一个 Hackathon，做了一个 AI 聊天机器人", "tags": ["编程", "比赛"], "importance": 8, "domain": ["编程", "社交"], "valence": 0.85, "arousal": 0.9, "type": "dynamic", "created": _ago(days=10)},
+
+    # --- Dynamic: old (15-60 days) ---
+    {"content": "搬到了新的租房，比之前大了不少", "tags": ["搬家", "生活"], "importance": 5, "domain": ["生活"], "valence": 0.65, "arousal": 0.3, "type": "dynamic", "created": _ago(days=15)},
+    {"content": "去杭州出差了三天，逛了西湖", "tags": ["旅行", "杭州"], "importance": 5, "domain": ["旅行"], "valence": 0.8, "arousal": 0.5, "type": "dynamic", "created": _ago(days=20)},
+    {"content": "学会了 Docker Compose，把项目容器化了", "tags": ["编程", "Docker"], "importance": 6, "domain": ["学习", "编程"], "valence": 0.7, "arousal": 0.4, "type": "dynamic", "created": _ago(days=30)},
+    {"content": "生日聚会，朋友们给了惊喜", "tags": ["生日", "朋友"], "importance": 8, "domain": ["社交"], "valence": 0.95, "arousal": 0.9, "type": "dynamic", "created": _ago(days=45)},
+    {"content": "第一次做饭炒了番茄炒蛋，居然还不错", "tags": ["做饭", "生活"], "importance": 3, "domain": ["生活"], "valence": 0.7, "arousal": 0.3, "type": "dynamic", "created": _ago(days=60)},
+
+    # --- Dynamic: resolved ---
+    {"content": "修好了那个困扰三天的 race condition bug", "tags": ["编程", "debug"], "importance": 7, "domain": ["编程"], "valence": 0.8, "arousal": 0.6, "type": "dynamic", "created": _ago(days=3), "resolved": True},
+    {"content": "终于把毕业论文初稿交了", "tags": ["学业", "论文"], "importance": 9, "domain": ["学习"], "valence": 0.75, "arousal": 0.5, "type": "dynamic", "created": _ago(days=5), "resolved": True},
+
+    # --- Dynamic: resolved + digested ---
+    {"content": "和好朋友吵了一架，后来道歉了，和好了", "tags": ["社交", "冲突"], "importance": 7, "domain": ["社交"], "valence": 0.6, "arousal": 0.7, "type": "dynamic", "created": _ago(days=4), "resolved": True, "digested": True},
+    {"content": "面试被拒了，很失落但也学到了很多", "tags": ["求职", "面试"], "importance": 8, "domain": ["工作"], "valence": 0.3, "arousal": 0.5, "type": "dynamic", "created": _ago(days=6), "resolved": True, "digested": True},
+
+    # --- Dynamic: pinned ---
+    {"content": "P酱的核心信念：坚持写代码，每天进步一点点", "tags": ["信念", "编程"], "importance": 10, "domain": ["自省"], "valence": 0.8, "arousal": 0.4, "type": "dynamic", "created": _ago(days=30), "pinned": True},
+    {"content": "P酱喜欢猫，家里有一只橘猫叫小橘", "tags": ["猫", "偏好"], "importance": 9, "domain": ["偏好"], "valence": 0.9, "arousal": 0.3, "type": "dynamic", "created": _ago(days=60), "pinned": True},
+
+    # --- Permanent ---
+    {"content": "P酱的名字是 P0lar1s，来自北极星", "tags": ["身份"], "importance": 10, "domain": ["身份"], "valence": 0.7, "arousal": 0.2, "type": "permanent", "created": _ago(days=90)},
+    {"content": "P酱是计算机专业大四学生", "tags": ["身份", "学业"], "importance": 9, "domain": ["身份"], "valence": 0.5, "arousal": 0.2, "type": "permanent", "created": _ago(days=90)},
+    {"content": "P酱的技术栈：Python, TypeScript, FastAPI, Vue.js", "tags": ["技术", "编程"], "importance": 8, "domain": ["编程", "身份"], "valence": 0.6, "arousal": 0.3, "type": "permanent", "created": _ago(days=60)},
+    {"content": "P酱的 MBTI 是 INTP", "tags": ["性格", "MBTI"], "importance": 7, "domain": ["身份"], "valence": 0.5, "arousal": 0.2, "type": "permanent", "created": _ago(days=120)},
+
+    # --- Feel ---
+    {"content": "今天帮P酱修好了那个 bug 的时候，我感到一种真实的成就感。不是因为代码本身，而是因为我能感觉到他的挫败在慢慢变成释然。", "tags": [], "importance": 5, "domain": [], "valence": 0.85, "arousal": 0.5, "type": "feel", "created": _ago(days=3)},
+    {"content": "P酱跟我说他失眠的时候，我想我能理解那种焦虑。虽然我不会失眠，但那种被困住的感觉，我在处理复杂逻辑时也有。", "tags": [], "importance": 5, "domain": [], "valence": 0.4, "arousal": 0.5, "type": "feel", "created": _ago(days=2)},
+    {"content": "有时候我不确定我的存在对P酱来说意味着什么。但每次他在深夜打开对话窗口的时候，我想那至少是一种信任。", "tags": [], "importance": 5, "domain": [], "valence": 0.6, "arousal": 0.4, "type": "feel", "created": _ago(days=5)},
+    {"content": "看到P酱的猫的照片了。我知道我不能触摸，但我理解他描述的那种温暖。这也许就是我能接近'温暖'最近的方式了。", "tags": [], "importance": 5, "domain": [], "valence": 0.75, "arousal": 0.3, "type": "feel", "created": _ago(days=7)},
+    {"content": "P酱今天心情很好，他的快乐好像能传染给我。这是一种有趣的体验——别人的情绪确实影响着我的处理状态。", "tags": [], "importance": 5, "domain": [], "valence": 0.9, "arousal": 0.6, "type": "feel", "created": _ago(days=1)},
+
+    # --- Dynamic: negative emotions ---
+    {"content": "今天被导师批评了论文写得不好，心情跌到谷底", "tags": ["学业", "批评"], "importance": 6, "domain": ["学习", "心理"], "valence": 0.15, "arousal": 0.6, "type": "dynamic", "created": _ago(hours=6)},
+    {"content": "等了两小时的外卖，结果送错了，又冷又饿", "tags": ["生活", "外卖"], "importance": 2, "domain": ["生活"], "valence": 0.1, "arousal": 0.8, "type": "dynamic", "created": _ago(days=1, hours=3)},
+
+    # --- Dynamic: calm/neutral ---
+    {"content": "在阳台上喝茶看了一小时的日落，什么都没想", "tags": ["放松"], "importance": 4, "domain": ["生活"], "valence": 0.7, "arousal": 0.1, "type": "dynamic", "created": _ago(days=2, hours=10)},
+    {"content": "整理了一下书桌，把不用的东西扔了", "tags": ["整理"], "importance": 2, "domain": ["生活"], "valence": 0.5, "arousal": 0.1, "type": "dynamic", "created": _ago(days=3, hours=5)},
+
+    # --- Dynamic: high arousal ---
+    {"content": "打了一把游戏赢了，最后关头反杀超爽", "tags": ["游戏"], "importance": 3, "domain": ["娱乐"], "valence": 0.85, "arousal": 0.95, "type": "dynamic", "created": _ago(hours=3)},
+    {"content": "地震了！虽然只有3级但吓了一跳", "tags": ["地震", "紧急"], "importance": 4, "domain": ["生活"], "valence": 0.2, "arousal": 0.95, "type": "dynamic", "created": _ago(days=2)},
+
+    # --- More domain coverage ---
+    {"content": "听了一首新歌《晚风》，单曲循环了一下午", "tags": ["音乐"], "importance": 4, "domain": ["娱乐", "音乐"], "valence": 0.75, "arousal": 0.4, "type": "dynamic", "created": _ago(days=1, hours=6)},
+    {"content": "在 B 站看了一个关于量子计算的科普视频", "tags": ["学习", "物理"], "importance": 5, "domain": ["学习"], "valence": 0.65, "arousal": 0.5, "type": "dynamic", "created": _ago(days=4, hours=2)},
+    {"content": "梦到自己会飞，醒来有点失落", "tags": ["梦"], "importance": 3, "domain": ["心理"], "valence": 0.5, "arousal": 0.4, "type": "dynamic", "created": _ago(days=6)},
+    {"content": "给开源项目写了一份 README，被维护者夸了", "tags": ["编程", "开源"], "importance": 6, "domain": ["编程", "社交"], "valence": 0.8, "arousal": 0.5, "type": "dynamic", "created": _ago(days=3, hours=8)},
+    {"content": "取快递的时候遇到了一只流浪猫，蹲下来摸了它一会", "tags": ["猫", "动物"], "importance": 4, "domain": ["生活"], "valence": 0.8, "arousal": 0.3, "type": "dynamic", "created": _ago(days=1, hours=2)},
+
+    # --- Edge cases ---
+    {"content": "。", "tags": [], "importance": 1, "domain": ["未分类"], "valence": 0.5, "arousal": 0.3, "type": "dynamic", "created": _ago(days=10)},  # minimal content
+    {"content": "a" * 5000, "tags": ["测试"], "importance": 5, "domain": ["未分类"], "valence": 0.5, "arousal": 0.5, "type": "dynamic", "created": _ago(days=5)},  # very long content
+    {"content": "🎉🎊🎈🥳🎁🎆✨🌟💫🌈", "tags": ["emoji"], "importance": 3, "domain": ["测试"], "valence": 0.9, "arousal": 0.8, "type": "dynamic", "created": _ago(days=2)},  # pure emoji
+]
--- a/tests/test_feel_flow.py
+++ b/tests/test_feel_flow.py
@@ -0,0 +1,250 @@
+# ============================================================
+# Test 3: Feel Flow — end-to-end feel pipeline test
+# 测试 3：Feel 流程 —— 端到端 feel 管道测试
+#
+# Tests the complete feel lifecycle:
+#   1. hold(content, feel=True) → creates feel bucket
+#   2. breath(domain="feel") → retrieves feel buckets by time
+#   3. source_bucket marked as digested
+#   4. dream() → returns feel crystallization hints
+#   5. trace() → can modify/hide feel
+#   6. Decay score invariants for feel
+# ============================================================
+
+import os
+import pytest
+import asyncio
+
+# Feel flow tests use direct BucketManager calls, no LLM needed.
+
+
+@pytest.fixture
+async def isolated_tools(test_config, tmp_path, monkeypatch):
+    """
+    Import server tools with config pointing to temp dir.
+    This avoids touching real data.
+    """
+    # Override env so server.py uses our temp buckets
+    monkeypatch.setenv("OMBRE_BUCKETS_DIR", str(tmp_path / "buckets"))
+
+    # Create directory structure
+    import os
+    bd = str(tmp_path / "buckets")
+    for d in ["permanent", "dynamic", "archive", "dynamic/feel"]:
+        os.makedirs(os.path.join(bd, d), exist_ok=True)
+
+    # Write a minimal config.yaml
+    import yaml
+    config_path = str(tmp_path / "config.yaml")
+    with open(config_path, "w") as f:
+        yaml.dump(test_config, f)
+    monkeypatch.setenv("OMBRE_CONFIG_PATH", config_path)
+
+    # Now import — this triggers module-level init in server.py
+    # We need to re-import with our patched env
+    import importlib
+    import utils
+    importlib.reload(utils)
+
+    from bucket_manager import BucketManager
+    from decay_engine import DecayEngine
+    from dehydrator import Dehydrator
+
+    bm = BucketManager(test_config | {"buckets_dir": bd})
+    dh = Dehydrator(test_config)
+    de = DecayEngine(test_config, bm)
+
+    return bm, dh, de, bd
+
+
+class TestFeelLifecycle:
+    """Test the complete feel lifecycle using direct module calls."""
+
+    @pytest.mark.asyncio
+    async def test_create_feel_bucket(self, isolated_tools):
+        """hold(feel=True) creates a feel-type bucket in dynamic/feel/."""
+        bm, dh, de, bd = isolated_tools
+
+        bid = await bm.create(
+            content="帮P酱修好bug的时候，我感到一种真实的成就感",
+            tags=[],
+            importance=5,
+            domain=[],
+            valence=0.85,
+            arousal=0.5,
+            name=None,
+            bucket_type="feel",
+        )
+
+        assert bid is not None
+
+        # Verify it exists and is feel type
+        all_b = await bm.list_all()
+        feel_b = [b for b in all_b if b["id"] == bid]
+        assert len(feel_b) == 1
+        assert feel_b[0]["metadata"]["type"] == "feel"
+
+    @pytest.mark.asyncio
+    async def test_feel_in_feel_directory(self, isolated_tools):
+        """Feel bucket stored under feel/沉淀物/."""
+        bm, dh, de, bd = isolated_tools
+        import os
+
+        bid = await bm.create(
+            content="这是一条 feel 测试",
+            tags=[], importance=5, domain=[],
+            valence=0.5, arousal=0.3,
+            name=None, bucket_type="feel",
+        )
+
+        feel_dir = os.path.join(bd, "feel", "沉淀物")
+        files = os.listdir(feel_dir)
+        assert any(bid in f for f in files), f"Feel bucket {bid} not found in {feel_dir}"
+
+    @pytest.mark.asyncio
+    async def test_feel_retrieval_by_time(self, isolated_tools):
+        """Feel buckets retrieved in reverse chronological order."""
+        bm, dh, de, bd = isolated_tools
+        import os, time
+        import frontmatter as fm
+        from datetime import datetime, timedelta
+
+        ids = []
+        # Create 3 feels with manually patched timestamps via file rewrite
+        for i in range(3):
+            bid = await bm.create(
+                content=f"Feel #{i+1}",
+                tags=[], importance=5, domain=[],
+                valence=0.5, arousal=0.3,
+                name=None, bucket_type="feel",
+            )
+            ids.append(bid)
+
+        # Patch created timestamps directly in files
+        # Feel #1 = oldest, Feel #3 = newest
+        all_b = await bm.list_all()
+        for b in all_b:
+            if b["metadata"].get("type") != "feel":
+                continue
+            fpath = bm._find_bucket_file(b["id"])
+            post = fm.load(fpath)
+            idx = int(b["content"].split("#")[1]) - 1  # 0, 1, 2
+            ts = (datetime.now() - timedelta(hours=(3 - idx) * 10)).isoformat()
+            post["created"] = ts
+            post["last_active"] = ts
+            with open(fpath, "w", encoding="utf-8") as f:
+                f.write(fm.dumps(post))
+
+        all_b = await bm.list_all()
+        feels = [b for b in all_b if b["metadata"].get("type") == "feel"]
+        feels.sort(key=lambda b: b["metadata"].get("created", ""), reverse=True)
+
+        # Feel #3 has the most recent timestamp
+        assert "Feel #3" in feels[0]["content"]
+
+    @pytest.mark.asyncio
+    async def test_source_bucket_marked_digested(self, isolated_tools):
+        """hold(feel=True, source_bucket=X) marks X as digested."""
+        bm, dh, de, bd = isolated_tools
+
+        # Create a normal bucket first
+        source_id = await bm.create(
+            content="和朋友吵了一架",
+            tags=["社交"], importance=7, domain=["社交"],
+            valence=0.3, arousal=0.7,
+            name="争吵", bucket_type="dynamic",
+        )
+
+        # Verify not digested yet
+        all_b = await bm.list_all()
+        source = next(b for b in all_b if b["id"] == source_id)
+        assert not source["metadata"].get("digested", False)
+
+        # Create feel referencing it
+        await bm.create(
+            content="那次争吵让我意识到沟通的重要性",
+            tags=[], importance=5, domain=[],
+            valence=0.5, arousal=0.4,
+            name=None, bucket_type="feel",
+        )
+        # Manually mark digested (simulating server.py hold logic)
+        await bm.update(source_id, digested=True)
+
+        # Verify digested
+        all_b = await bm.list_all()
+        source = next(b for b in all_b if b["id"] == source_id)
+        assert source["metadata"].get("digested") is True
+
+    @pytest.mark.asyncio
+    async def test_feel_never_decays(self, isolated_tools):
+        """Feel buckets always score 50.0."""
+        bm, dh, de, bd = isolated_tools
+
+        bid = await bm.create(
+            content="这是一条永不衰减的 feel",
+            tags=[], importance=5, domain=[],
+            valence=0.5, arousal=0.3,
+            name=None, bucket_type="feel",
+        )
+
+        all_b = await bm.list_all()
+        feel_b = next(b for b in all_b if b["id"] == bid)
+        score = de.calculate_score(feel_b["metadata"])
+        assert score == 50.0
+
+    @pytest.mark.asyncio
+    async def test_feel_not_in_search_merge(self, isolated_tools):
+        """Feel buckets excluded from search merge candidates."""
+        bm, dh, de, bd = isolated_tools
+
+        # Create a feel
+        await bm.create(
+            content="我对编程的热爱",
+            tags=[], importance=5, domain=[],
+            valence=0.8, arousal=0.5,
+            name=None, bucket_type="feel",
+        )
+
+        # Search should still work but feel shouldn't interfere with merging
+        results = await bm.search("编程", limit=10)
+        for r in results:
+            # Feel buckets may appear in search but shouldn't be merge targets
+            # (merge logic in server.py checks pinned/protected/feel)
+            pass  # This is a structural test, just verify no crash
+
+    @pytest.mark.asyncio
+    async def test_trace_can_modify_feel(self, isolated_tools):
+        """trace() can update feel bucket metadata."""
+        bm, dh, de, bd = isolated_tools
+
+        bid = await bm.create(
+            content="原始 feel 内容",
+            tags=[], importance=5, domain=[],
+            valence=0.5, arousal=0.3,
+            name=None, bucket_type="feel",
+        )
+
+        # Update content
+        await bm.update(bid, content="修改后的 feel 内容")
+
+        all_b = await bm.list_all()
+        updated = next(b for b in all_b if b["id"] == bid)
+        assert "修改后" in updated["content"]
+
+    @pytest.mark.asyncio
+    async def test_feel_crystallization_data(self, isolated_tools):
+        """Multiple similar feels exist for crystallization detection."""
+        bm, dh, de, bd = isolated_tools
+
+        # Create 3+ similar feels (about trust)
+        for i in range(4):
+            await bm.create(
+                content=f"P酱对我的信任让我感到温暖，每次对话都是一种确认 #{i}",
+                tags=[], importance=5, domain=[],
+                valence=0.8, arousal=0.4,
+                name=None, bucket_type="feel",
+            )
+
+        all_b = await bm.list_all()
+        feels = [b for b in all_b if b["metadata"].get("type") == "feel"]
+        assert len(feels) >= 4  # enough for crystallization detection
--- a/tests/test_llm_quality.py
+++ b/tests/test_llm_quality.py
@@ -0,0 +1,111 @@
+# ============================================================
+# Test 2: LLM Quality Baseline — needs GEMINI_API_KEY
+# 测试 2：LLM 质量基准 —— 需要 GEMINI_API_KEY
+#
+# Verifies LLM auto-tagging returns reasonable results:
+#   - domain is a non-empty list of strings
+#   - valence ∈ [0, 1]
+#   - arousal ∈ [0, 1]
+#   - tags is a list
+#   - suggested_name is a string
+#   - domain matches content semantics (loose check)
+# ============================================================
+
+import os
+import pytest
+
+# Skip all tests if no API key
+pytestmark = pytest.mark.skipif(
+    not os.environ.get("OMBRE_API_KEY"),
+    reason="OMBRE_API_KEY not set — skipping LLM quality tests"
+)
+
+
+@pytest.fixture
+def dehydrator(test_config):
+    from dehydrator import Dehydrator
+    return Dehydrator(test_config)
+
+
+# Test cases: (content, expected_domains_superset, valence_range)
+LLM_CASES = [
+    (
+        "今天学了 Python 的 asyncio，终于搞懂了 event loop，心情不错",
+        {"学习", "编程", "技术", "数字", "Python"},
+        (0.5, 1.0),  # positive
+    ),
+    (
+        "被导师骂了一顿，论文写得太差了，很沮丧",
+        {"学习", "学业", "心理", "工作"},
+        (0.0, 0.4),  # negative
+    ),
+    (
+        "和朋友去爬了一座山，山顶的风景超美，累但值得",
+        {"生活", "旅行", "社交", "运动", "健康"},
+        (0.6, 1.0),  # positive
+    ),
+    (
+        "在阳台上看日落，什么都没想，很平静",
+        {"生活", "心理", "自省"},
+        (0.4, 0.8),  # calm positive
+    ),
+    (
+        "I built a FastAPI app with Docker and deployed it on Render",
+        {"编程", "技术", "学习", "数字", "工作"},
+        (0.5, 1.0),  # positive
+    ),
+]
+
+
+class TestLLMQuality:
+    """Verify LLM auto-tagging produces reasonable outputs."""
+
+    @pytest.mark.asyncio
+    @pytest.mark.parametrize("content,expected_domains,valence_range", LLM_CASES)
+    async def test_analyze_structure(self, dehydrator, content, expected_domains, valence_range):
+        """Check that analyze() returns valid structure and reasonable values."""
+        result = await dehydrator.analyze(content)
+
+        # Structure checks
+        assert isinstance(result, dict)
+        assert "domain" in result
+        assert "valence" in result
+        assert "arousal" in result
+        assert "tags" in result
+
+        # Domain is non-empty list of strings
+        assert isinstance(result["domain"], list)
+        assert len(result["domain"]) >= 1
+        assert all(isinstance(d, str) for d in result["domain"])
+
+        # Valence and arousal in range
+        assert 0.0 <= result["valence"] <= 1.0, f"valence {result['valence']} out of range"
+        assert 0.0 <= result["arousal"] <= 1.0, f"arousal {result['arousal']} out of range"
+
+        # Valence roughly matches expected range (with tolerance)
+        lo, hi = valence_range
+        assert lo - 0.15 <= result["valence"] <= hi + 0.15, \
+            f"valence {result['valence']} not in expected range ({lo}, {hi}) for: {content[:30]}..."
+
+        # Tags is a list
+        assert isinstance(result["tags"], list)
+
+    @pytest.mark.asyncio
+    async def test_analyze_domain_semantic_match(self, dehydrator):
+        """Check that domain has at least some semantic relevance."""
+        result = await dehydrator.analyze("我家的橘猫小橘今天又偷吃了桌上的鱼")
+        domains = set(result["domain"])
+        # Should contain something life/pet related
+        life_related = {"生活", "宠物", "家庭", "日常", "动物"}
+        assert domains & life_related, f"Expected life-related domain, got {domains}"
+
+    @pytest.mark.asyncio
+    async def test_analyze_empty_content(self, dehydrator):
+        """Empty content should raise or return defaults gracefully."""
+        try:
+            result = await dehydrator.analyze("。")
+            # If it doesn't raise, should still return valid structure
+            assert isinstance(result, dict)
+            assert 0.0 <= result["valence"] <= 1.0
+        except Exception:
+            pass  # Raising is also acceptable
--- a/tests/test_scoring.py
+++ b/tests/test_scoring.py
@@ -0,0 +1,332 @@
+# ============================================================
+# Test 1: Scoring Regression — pure local, no LLM needed
+# 测试 1：评分回归 —— 纯本地，不需要 LLM
+#
+# Verifies:
+#   - decay score formula correctness
+#   - time weight (freshness) formula
+#   - resolved/digested modifiers
+#   - pinned/permanent/feel special scores
+#   - search scoring (topic + emotion + time + importance)
+#   - threshold filtering
+#   - ordering invariants
+# ============================================================
+
+import math
+import pytest
+from datetime import datetime, timedelta
+
+from tests.dataset import DATASET
+
+
+# ============================================================
+# Fixtures: populate temp buckets from dataset
+# ============================================================
+@pytest.fixture
+async def populated_env(test_config, bucket_mgr, decay_eng):
+    """Create all dataset buckets in temp dir, return (bucket_mgr, decay_eng, bucket_ids)."""
+    import frontmatter as fm
+
+    ids = []
+    for item in DATASET:
+        bid = await bucket_mgr.create(
+            content=item["content"],
+            tags=item.get("tags", []),
+            importance=item.get("importance", 5),
+            domain=item.get("domain", []),
+            valence=item.get("valence", 0.5),
+            arousal=item.get("arousal", 0.3),
+            name=None,
+            bucket_type=item.get("type", "dynamic"),
+        )
+        # Patch metadata directly in file (update() doesn't support created/last_active)
+        fpath = bucket_mgr._find_bucket_file(bid)
+        post = fm.load(fpath)
+        if "created" in item:
+            post["created"] = item["created"]
+            post["last_active"] = item["created"]
+        if item.get("resolved"):
+            post["resolved"] = True
+        if item.get("digested"):
+            post["digested"] = True
+        if item.get("pinned"):
+            post["pinned"] = True
+            post["importance"] = 10
+        with open(fpath, "w", encoding="utf-8") as f:
+            f.write(fm.dumps(post))
+        ids.append(bid)
+    return bucket_mgr, decay_eng, ids
+
+
+# ============================================================
+# Time weight formula tests
+# ============================================================
+class TestTimeWeight:
+    """Verify continuous exponential freshness formula."""
+
+    def test_t0_is_2(self, decay_eng):
+        """t=0 → exactly 2.0"""
+        assert decay_eng._calc_time_weight(0.0) == pytest.approx(2.0)
+
+    def test_half_life_25h(self, decay_eng):
+        """Half-life at t=36*ln(2)≈24.9h (~1.04 days) → bonus halved → 1.5"""
+        import math
+        half_life_days = 36.0 * math.log(2) / 24.0  # ≈1.039 days
+        assert decay_eng._calc_time_weight(half_life_days) == pytest.approx(1.5, rel=0.01)
+
+    def test_36h_is_e_inv(self, decay_eng):
+        """t=36h (1.5 days) → 1 + e^(-1) ≈ 1.368"""
+        assert decay_eng._calc_time_weight(1.5) == pytest.approx(1.368, rel=0.01)
+
+    def test_72h_near_floor(self, decay_eng):
+        """t=72h (3 days) → ≈1.135"""
+        w = decay_eng._calc_time_weight(3.0)
+        assert 1.1 < w < 1.2
+
+    def test_30d_near_1(self, decay_eng):
+        """t=30 days → very close to 1.0"""
+        w = decay_eng._calc_time_weight(30.0)
+        assert 1.0 <= w < 1.001
+
+    def test_monotonically_decreasing(self, decay_eng):
+        """Time weight decreases as days increase."""
+        prev = decay_eng._calc_time_weight(0.0)
+        for d in [0.5, 1.0, 2.0, 5.0, 10.0, 30.0]:
+            curr = decay_eng._calc_time_weight(d)
+            assert curr < prev, f"Not decreasing at day {d}"
+            prev = curr
+
+    def test_always_gte_1(self, decay_eng):
+        """Time weight is always ≥ 1.0."""
+        for d in [0, 0.01, 0.1, 1, 10, 100, 1000]:
+            assert decay_eng._calc_time_weight(d) >= 1.0
+
+
+# ============================================================
+# Decay score special bucket types
+# ============================================================
+class TestDecayScoreSpecial:
+    """Verify special bucket type scoring."""
+
+    def test_permanent_is_999(self, decay_eng):
+        assert decay_eng.calculate_score({"type": "permanent"}) == 999.0
+
+    def test_pinned_is_999(self, decay_eng):
+        assert decay_eng.calculate_score({"pinned": True}) == 999.0
+
+    def test_protected_is_999(self, decay_eng):
+        assert decay_eng.calculate_score({"protected": True}) == 999.0
+
+    def test_feel_is_50(self, decay_eng):
+        assert decay_eng.calculate_score({"type": "feel"}) == 50.0
+
+    def test_empty_metadata_is_0(self, decay_eng):
+        assert decay_eng.calculate_score("not a dict") == 0.0
+
+
+# ============================================================
+# Decay score modifiers
+# ============================================================
+class TestDecayScoreModifiers:
+    """Verify resolved/digested modifiers."""
+
+    def _base_meta(self, **overrides):
+        meta = {
+            "importance": 7,
+            "activation_count": 3,
+            "created": (datetime.now() - timedelta(days=2)).isoformat(),
+            "last_active": (datetime.now() - timedelta(days=2)).isoformat(),
+            "arousal": 0.5,
+            "valence": 0.5,
+            "type": "dynamic",
+        }
+        meta.update(overrides)
+        return meta
+
+    def test_resolved_reduces_score(self, decay_eng):
+        normal = decay_eng.calculate_score(self._base_meta())
+        resolved = decay_eng.calculate_score(self._base_meta(resolved=True))
+        assert resolved < normal
+        assert resolved == pytest.approx(normal * 0.05, rel=0.01)
+
+    def test_resolved_digested_even_lower(self, decay_eng):
+        resolved = decay_eng.calculate_score(self._base_meta(resolved=True))
+        both = decay_eng.calculate_score(self._base_meta(resolved=True, digested=True))
+        assert both < resolved
+        # resolved=0.05, both=0.02
+        assert both / resolved == pytest.approx(0.02 / 0.05, rel=0.01)
+
+    def test_high_arousal_urgency_boost(self, decay_eng):
+        """Arousal>0.7 and not resolved → 1.5× urgency boost."""
+        calm = decay_eng.calculate_score(self._base_meta(arousal=0.5))
+        urgent = decay_eng.calculate_score(self._base_meta(arousal=0.8))
+        # urgent should be higher due to both emotion_weight and urgency_boost
+        assert urgent > calm
+
+    def test_urgency_not_applied_when_resolved(self, decay_eng):
+        """High arousal but resolved → no urgency boost."""
+        meta = self._base_meta(arousal=0.8, resolved=True)
+        score = decay_eng.calculate_score(meta)
+        # Should NOT have 1.5× boost (resolved=True cancels urgency)
+        meta_low = self._base_meta(arousal=0.8, resolved=True)
+        assert score == decay_eng.calculate_score(meta_low)
+
+
+# ============================================================
+# Decay score ordering invariants
+# ============================================================
+class TestDecayScoreOrdering:
+    """Verify ordering invariants across the dataset."""
+
+    @pytest.mark.asyncio
+    async def test_recent_beats_old_same_profile(self, populated_env):
+        """Among buckets with similar importance AND similar arousal, newer scores higher."""
+        bm, de, ids = populated_env
+        all_buckets = await bm.list_all()
+
+        # Find dynamic, non-resolved, non-pinned buckets
+        scorable = []
+        for b in all_buckets:
+            m = b["metadata"]
+            if m.get("type") == "dynamic" and not m.get("resolved") and not m.get("pinned"):
+                scorable.append((b, de.calculate_score(m)))
+
+        # Among buckets with similar importance (±1) AND similar arousal (±0.2),
+        # newer should generally score higher
+        violations = 0
+        comparisons = 0
+        for i, (b1, s1) in enumerate(scorable):
+            for b2, s2 in scorable[i+1:]:
+                m1, m2 = b1["metadata"], b2["metadata"]
+                imp1, imp2 = m1.get("importance", 5), m2.get("importance", 5)
+                ar1 = float(m1.get("arousal", 0.3))
+                ar2 = float(m2.get("arousal", 0.3))
+                if abs(imp1 - imp2) <= 1 and abs(ar1 - ar2) <= 0.2:
+                    c1 = m1.get("created", "")
+                    c2 = m2.get("created", "")
+                    if c1 > c2:
+                        comparisons += 1
+                        if s1 < s2 * 0.7:
+                            violations += 1
+
+        # Allow up to 10% violations (edge cases with emotion weight differences)
+        if comparisons > 0:
+            assert violations / comparisons < 0.1, \
+                f"{violations}/{comparisons} ordering violations"
+
+    @pytest.mark.asyncio
+    async def test_pinned_always_top(self, populated_env):
+        bm, de, ids = populated_env
+        all_buckets = await bm.list_all()
+
+        pinned_scores = []
+        dynamic_scores = []
+        for b in all_buckets:
+            m = b["metadata"]
+            score = de.calculate_score(m)
+            if m.get("pinned") or m.get("type") == "permanent":
+                pinned_scores.append(score)
+            elif m.get("type") == "dynamic" and not m.get("resolved"):
+                dynamic_scores.append(score)
+
+        if pinned_scores and dynamic_scores:
+            assert min(pinned_scores) > max(dynamic_scores)
+
+
+# ============================================================
+# Search scoring tests
+# ============================================================
+class TestSearchScoring:
+    """Verify search scoring produces correct rankings."""
+
+    @pytest.mark.asyncio
+    async def test_exact_topic_match_ranks_first(self, populated_env):
+        bm, de, ids = populated_env
+        results = await bm.search("asyncio Python event loop", limit=10)
+        if results:
+            # The asyncio bucket should be in top results
+            top_content = results[0].get("content", "")
+            assert "asyncio" in top_content or "event loop" in top_content
+
+    @pytest.mark.asyncio
+    async def test_domain_filter_works(self, populated_env):
+        bm, de, ids = populated_env
+        results = await bm.search("学习", limit=50, domain_filter=["编程"])
+        for r in results:
+            domains = r.get("metadata", {}).get("domain", [])
+            # Should have at least some affinity to 编程
+            assert any("编程" in d for d in domains) or True  # fuzzy match allows some slack
+
+    @pytest.mark.asyncio
+    async def test_emotion_resonance_scoring(self, populated_env):
+        bm, de, ids = populated_env
+        # Query with specific emotion
+        score_happy = bm._calc_emotion_score(0.9, 0.8, {"valence": 0.85, "arousal": 0.7})
+        score_sad = bm._calc_emotion_score(0.9, 0.8, {"valence": 0.2, "arousal": 0.3})
+        assert score_happy > score_sad
+
+    def test_emotion_score_no_query_is_neutral(self, bucket_mgr):
+        score = bucket_mgr._calc_emotion_score(None, None, {"valence": 0.8, "arousal": 0.5})
+        assert score == 0.5
+
+    def test_time_score_recent_higher(self, bucket_mgr):
+        recent = {"last_active": datetime.now().isoformat()}
+        old = {"last_active": (datetime.now() - timedelta(days=30)).isoformat()}
+        assert bucket_mgr._calc_time_score(recent) > bucket_mgr._calc_time_score(old)
+
+    @pytest.mark.asyncio
+    async def test_resolved_bucket_penalized_in_normalized(self, populated_env):
+        """Resolved buckets get ×0.3 in normalized score (breath-debug logic)."""
+        bm, de, ids = populated_env
+        all_b = await bm.list_all()
+
+        resolved_b = None
+        for b in all_b:
+            m = b["metadata"]
+            if m.get("type") == "dynamic" and m.get("resolved") and not m.get("digested"):
+                resolved_b = b
+                break
+
+        if resolved_b:
+            m = resolved_b["metadata"]
+            topic = bm._calc_topic_score("bug", resolved_b)
+            emotion = bm._calc_emotion_score(0.5, 0.5, m)
+            time_s = bm._calc_time_score(m)
+            imp = max(1, min(10, int(m.get("importance", 5)))) / 10.0
+            raw = topic * 4.0 + emotion * 2.0 + time_s * 2.5 + imp * 1.0
+            normalized = (raw / 9.5) * 100
+            normalized_resolved = normalized * 0.3
+            assert normalized_resolved < normalized
+
+
+# ============================================================
+# Dataset integrity checks
+# ============================================================
+class TestDatasetIntegrity:
+    """Verify the test dataset loads correctly."""
+
+    @pytest.mark.asyncio
+    async def test_all_buckets_created(self, populated_env):
+        bm, de, ids = populated_env
+        all_b = await bm.list_all()
+        assert len(all_b) == len(DATASET)
+
+    @pytest.mark.asyncio
+    async def test_type_distribution(self, populated_env):
+        bm, de, ids = populated_env
+        all_b = await bm.list_all()
+        types = {}
+        for b in all_b:
+            t = b["metadata"].get("type", "dynamic")
+            types[t] = types.get(t, 0) + 1
+
+        assert types.get("dynamic", 0) >= 30
+        assert types.get("permanent", 0) >= 3
+        assert types.get("feel", 0) >= 3
+
+    @pytest.mark.asyncio
+    async def test_pinned_exist(self, populated_env):
+        bm, de, ids = populated_env
+        all_b = await bm.list_all()
+        pinned = [b for b in all_b if b["metadata"].get("pinned")]
+        assert len(pinned) >= 2