Compare commits
19 Commits
a0ba78cd64
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3aae7582fb | ||
|
|
529c9cc172 | ||
|
|
71154d905f | ||
|
|
38be7610f4 | ||
|
|
b869a111c7 | ||
|
|
cddc809f02 | ||
|
|
2646f8f7d0 | ||
|
|
b318e557b0 | ||
|
|
d2d4b89715 | ||
|
|
ccdffdb626 | ||
|
|
c7ddfd46ad | ||
|
|
2d2de45d5a | ||
|
|
e9d61b5d9d | ||
|
|
d1cd3f4cc7 | ||
|
|
5815be6b69 | ||
|
|
3b5f37c7ca | ||
|
|
d4740f0d1f | ||
|
|
821546d5de | ||
|
|
a09fbfe13a |
@@ -1,14 +1,14 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# ============================================================
|
# ============================================================
|
||||||
# SessionStart Hook: auto-breath on session start
|
# SessionStart Hook: auto-breath + dreaming on session start
|
||||||
# 对话开始钩子:自动浮现最高权重的未解决记忆
|
# 对话开始钩子:自动浮现记忆 + 触发 dreaming
|
||||||
#
|
#
|
||||||
# On SessionStart, this script calls the Ombre Brain MCP server's
|
# On SessionStart, this script calls the Ombre Brain MCP server's
|
||||||
# breath tool (empty query = surfacing mode) via HTTP and prints
|
# breath-hook and dream-hook endpoints, printing results to stdout
|
||||||
# the result to stdout so Claude sees it as session context.
|
# so Claude sees them as session context.
|
||||||
#
|
#
|
||||||
# This works for OMBRE_TRANSPORT=streamable-http deployments.
|
# Sequence: breath → dream → feel
|
||||||
# For local stdio deployments, the script falls back gracefully.
|
# 顺序:呼吸浮现 → 做梦消化 → 读取 feel
|
||||||
#
|
#
|
||||||
# Config:
|
# Config:
|
||||||
# OMBRE_HOOK_URL — override the server URL (default: http://localhost:8000)
|
# OMBRE_HOOK_URL — override the server URL (default: http://localhost:8000)
|
||||||
@@ -27,12 +27,19 @@ def main():
|
|||||||
|
|
||||||
base_url = os.environ.get("OMBRE_HOOK_URL", "http://localhost:8000").rstrip("/")
|
base_url = os.environ.get("OMBRE_HOOK_URL", "http://localhost:8000").rstrip("/")
|
||||||
|
|
||||||
|
# --- Step 1: Breath — surface unresolved memories ---
|
||||||
|
_call_endpoint(base_url, "/breath-hook")
|
||||||
|
|
||||||
|
# --- Step 2: Dream — digest recent memories ---
|
||||||
|
_call_endpoint(base_url, "/dream-hook")
|
||||||
|
|
||||||
|
|
||||||
|
def _call_endpoint(base_url, path):
|
||||||
req = urllib.request.Request(
|
req = urllib.request.Request(
|
||||||
f"{base_url}/breath-hook",
|
f"{base_url}{path}",
|
||||||
headers={"Accept": "text/plain"},
|
headers={"Accept": "text/plain"},
|
||||||
method="GET",
|
method="GET",
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with urllib.request.urlopen(req, timeout=8) as response:
|
with urllib.request.urlopen(req, timeout=8) as response:
|
||||||
raw = response.read().decode("utf-8")
|
raw = response.read().decode("utf-8")
|
||||||
@@ -40,13 +47,10 @@ def main():
|
|||||||
if output:
|
if output:
|
||||||
print(output)
|
print(output)
|
||||||
except (urllib.error.URLError, OSError):
|
except (urllib.error.URLError, OSError):
|
||||||
# Server not available (local stdio mode or not running) — silent fail
|
|
||||||
pass
|
pass
|
||||||
except Exception:
|
except Exception:
|
||||||
# Any other error — silent fail, never block session start
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
sys.exit(0)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|||||||
36
.github/workflows/docker-publish.yml
vendored
Normal file
36
.github/workflows/docker-publish.yml
vendored
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
name: Build & Push Docker Image
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [main]
|
||||||
|
paths-ignore:
|
||||||
|
- '*.md'
|
||||||
|
- 'backup_*/**'
|
||||||
|
- '.gitignore'
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build-and-push:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
uses: docker/setup-buildx-action@v3
|
||||||
|
|
||||||
|
- name: Login to Docker Hub
|
||||||
|
uses: docker/login-action@v3
|
||||||
|
with:
|
||||||
|
username: ${{ secrets.DOCKERHUB_USERNAME }}
|
||||||
|
password: ${{ secrets.DOCKERHUB_TOKEN }}
|
||||||
|
|
||||||
|
- name: Build and push
|
||||||
|
uses: docker/build-push-action@v6
|
||||||
|
with:
|
||||||
|
context: .
|
||||||
|
push: true
|
||||||
|
platforms: linux/amd64,linux/arm64
|
||||||
|
tags: |
|
||||||
|
p0luz/ombre-brain:latest
|
||||||
|
p0luz/ombre-brain:${{ github.sha }}
|
||||||
|
cache-from: type=gha
|
||||||
|
cache-to: type=gha,mode=max
|
||||||
37
.github/workflows/tests.yml
vendored
Normal file
37
.github/workflows/tests.yml
vendored
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
name: Tests
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches: [main]
|
||||||
|
paths-ignore:
|
||||||
|
- '*.md'
|
||||||
|
- 'backup_*/**'
|
||||||
|
- '.gitignore'
|
||||||
|
pull_request:
|
||||||
|
branches: [main]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
test:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Set up Python 3.12
|
||||||
|
uses: actions/setup-python@v5
|
||||||
|
with:
|
||||||
|
python-version: '3.12'
|
||||||
|
cache: 'pip'
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
pip install -r requirements.txt
|
||||||
|
pip install pytest pytest-asyncio
|
||||||
|
|
||||||
|
- name: Run local tests (no API key needed)
|
||||||
|
run: python -m pytest tests/test_scoring.py tests/test_feel_flow.py -v --asyncio-mode=auto
|
||||||
|
|
||||||
|
- name: Run LLM quality tests
|
||||||
|
if: env.OMBRE_API_KEY != ''
|
||||||
|
env:
|
||||||
|
OMBRE_API_KEY: ${{ secrets.OMBRE_API_KEY }}
|
||||||
|
run: python -m pytest tests/test_llm_quality.py -v --asyncio-mode=auto
|
||||||
14
.gitignore
vendored
14
.gitignore
vendored
@@ -7,7 +7,15 @@ __pycache__/
|
|||||||
.claude/hooks/__pycache__/
|
.claude/hooks/__pycache__/
|
||||||
.vscode/
|
.vscode/
|
||||||
config.yaml
|
config.yaml
|
||||||
buckets/dynamic/
|
buckets/
|
||||||
buckets/archive/
|
*.log
|
||||||
buckets/permanent/
|
server.log
|
||||||
|
plan.md
|
||||||
scarp_paper
|
scarp_paper
|
||||||
|
backup_*/
|
||||||
|
*.db
|
||||||
|
import_state.json
|
||||||
|
data/
|
||||||
|
tests/integration/
|
||||||
|
tests/regression/
|
||||||
|
tests/unit/
|
||||||
|
|||||||
632
BEHAVIOR_SPEC.md
Normal file
632
BEHAVIOR_SPEC.md
Normal file
@@ -0,0 +1,632 @@
|
|||||||
|
# Ombre Brain 用户全流程行为规格书
|
||||||
|
|
||||||
|
> 版本:基于 server.py / bucket_manager.py / decay_engine.py / dehydrator.py / embedding_engine.py / CLAUDE_PROMPT.md / config.example.yaml
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 一、系统角色说明
|
||||||
|
|
||||||
|
### 1.1 参与方总览
|
||||||
|
|
||||||
|
| 角色 | 实体 | 职责边界 |
|
||||||
|
|------|------|---------|
|
||||||
|
| **用户** | 人类 | 发起对话,提供原始内容;可直接访问 Dashboard Web UI |
|
||||||
|
| **Claude(模型端)** | LLM(如 Claude 3.x)| 理解语义、决策何时调用工具、用自然语言回应用户;不直接操作文件 |
|
||||||
|
| **OB 服务端** | `server.py` + 各模块 | 接收 MCP 工具调用,执行持久化、搜索、衰减;对 Claude 不透明 |
|
||||||
|
|
||||||
|
### 1.2 Claude 端职责边界
|
||||||
|
- **必须做**:每次新对话第一步无参调用 `breath()`;对话内容有记忆价值时主动调用 `hold` / `grow`
|
||||||
|
- **不做**:不直接读写 `.md` 文件;不执行衰减计算;不操作 SQLite
|
||||||
|
- **决策权**:Claude 决定是否存、存哪些、何时 resolve;OB 决定如何存(合并/新建)
|
||||||
|
|
||||||
|
### 1.3 OB 服务端内部模块职责
|
||||||
|
|
||||||
|
| 模块 | 核心职责 |
|
||||||
|
|------|---------|
|
||||||
|
| `server.py` | 注册 MCP 工具(`breath/hold/grow/trace/pulse/dream`);路由 Dashboard HTTP 请求;`_merge_or_create()` 合并逻辑中枢 |
|
||||||
|
| `bucket_manager.py` | 桶 CRUD;多维搜索(fuzzy + embedding 双通道);`touch()` 激活刷新;`_time_ripple()` 时间波纹 |
|
||||||
|
| `dehydrator.py` | `analyze()` 自动打标;`merge()` 内容融合;`digest()` 日记拆分;`dehydrate()` 内容压缩 |
|
||||||
|
| `embedding_engine.py` | `generate_and_store()` 生成向量并存 SQLite;`search_similar()` 余弦相似度检索 |
|
||||||
|
| `decay_engine.py` | `calculate_score()` 衰减分计算;`run_decay_cycle()` 周期扫描归档;后台定时循环 |
|
||||||
|
| `utils.py` | 配置加载;路径安全校验;ID 生成;token 估算 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 二、场景全流程
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 场景 1:新对话开始(冷启动,无历史记忆)
|
||||||
|
|
||||||
|
**用户操作**:打开新对话窗口,说第一句话
|
||||||
|
|
||||||
|
**Claude 行为**:在任何回复之前,先调用 `breath()`(无参)
|
||||||
|
|
||||||
|
**OB 工具调用**:
|
||||||
|
```
|
||||||
|
breath(query="", max_tokens=10000, domain="", valence=-1, arousal=-1, max_results=20, importance_min=-1)
|
||||||
|
```
|
||||||
|
|
||||||
|
**系统内部发生什么**:
|
||||||
|
1. `decay_engine.ensure_started()` — 懒加载启动后台衰减循环(若未运行)
|
||||||
|
2. 进入"浮现模式"(`not query or not query.strip()`)
|
||||||
|
3. `bucket_mgr.list_all(include_archive=False)` — 遍历 `permanent/` + `dynamic/` + `feel/` 目录,加载所有 `.md` 文件的 frontmatter + 正文
|
||||||
|
4. 筛选钉选桶(`pinned=True` 或 `protected=True`)
|
||||||
|
5. 筛选未解决桶(`resolved=False`,排除 `permanent/feel/pinned`)
|
||||||
|
6. **冷启动检测**:找 `activation_count==0 && importance>=8` 的桶,最多取 2 个插入排序最前(**决策:`create()` 初始化应为 0,区分"创建"与"被主动召回",见 B-04**)
|
||||||
|
7. 按 `decay_engine.calculate_score(metadata)` 降序排列剩余未解决桶
|
||||||
|
8. 对 top-20 以外随机洗牌(top-1 固定,2~20 随机)
|
||||||
|
9. 截断到 `max_results` 条
|
||||||
|
10. 对每个桶调用 `dehydrator.dehydrate(strip_wikilinks(content), clean_meta)` 压缩摘要
|
||||||
|
11. 按 `max_tokens` 预算截断输出
|
||||||
|
|
||||||
|
**返回结果**:
|
||||||
|
- 无记忆时:`"权重池平静,没有需要处理的记忆。"`
|
||||||
|
- 有记忆时:`"=== 核心准则 ===\n📌 ...\n\n=== 浮现记忆 ===\n[权重:X.XX] [bucket_id:xxx] ..."`
|
||||||
|
|
||||||
|
**注意**:浮现模式**不调用** `touch()`,不重置衰减计时器
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 场景 2:新对话开始(有历史记忆,breath 自动浮现)
|
||||||
|
|
||||||
|
(与场景 1 相同流程,区别在于桶文件已存在)
|
||||||
|
|
||||||
|
**Claude 行为(完整对话启动序列,来自 CLAUDE_PROMPT.md)**:
|
||||||
|
```
|
||||||
|
1. breath() — 浮现未解决记忆
|
||||||
|
2. dream() — 消化最近记忆,有沉淀写 feel
|
||||||
|
3. breath(domain="feel") — 读取之前的 feel
|
||||||
|
4. 开始和用户说话
|
||||||
|
```
|
||||||
|
|
||||||
|
**`breath(domain="feel")` 内部流程**:
|
||||||
|
1. 检测到 `domain.strip().lower() == "feel"` → 进入 feel 专用通道
|
||||||
|
2. `bucket_mgr.list_all()` 过滤 `type=="feel"` 的桶
|
||||||
|
3. 按 `created` 降序排列
|
||||||
|
4. 按 `max_tokens` 截断,不压缩(直接展示原文)
|
||||||
|
5. 返回:`"=== 你留下的 feel ===\n[时间] [bucket_id:xxx]\n内容..."`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 场景 3:用户说了一件事,Claude 决定存入记忆(hold)
|
||||||
|
|
||||||
|
**用户操作**:例如"我刚刚拿到了实习 offer,有点激动"
|
||||||
|
|
||||||
|
**Claude 行为**:判断值得记忆,调用:
|
||||||
|
```python
|
||||||
|
hold(content="用户拿到实习 offer,情绪激动", importance=7)
|
||||||
|
```
|
||||||
|
|
||||||
|
**OB 工具调用**:`hold(content, tags="", importance=7, pinned=False, feel=False, source_bucket="", valence=-1, arousal=-1)`
|
||||||
|
|
||||||
|
**系统内部发生什么**:
|
||||||
|
|
||||||
|
1. `decay_engine.ensure_started()`
|
||||||
|
2. 输入校验:`content.strip()` 非空
|
||||||
|
3. `importance = max(1, min(10, 7))` = 7
|
||||||
|
4. `extra_tags = []`(未传 tags)
|
||||||
|
5. **自动打标**:`dehydrator.analyze(content)` → 调用 `_api_analyze()` → LLM 返回 JSON
|
||||||
|
- 返回示例:`{"domain": ["成长", "求职"], "valence": 0.8, "arousal": 0.7, "tags": ["实习", "offer", "激动", ...], "suggested_name": "实习offer获得"}`
|
||||||
|
- 失败时降级:`{"domain": ["未分类"], "valence": 0.5, "arousal": 0.3, "tags": [], "suggested_name": ""}`
|
||||||
|
6. 合并 `auto_tags + extra_tags` 去重
|
||||||
|
7. **合并检测**:`_merge_or_create(content, tags, importance=7, domain, valence, arousal, name)`
|
||||||
|
- `bucket_mgr.search(content, limit=1, domain_filter=domain)` — 搜索最相似的桶
|
||||||
|
- 若最高分 > `config["merge_threshold"]`(默认 75)且该桶非 pinned/protected:
|
||||||
|
- `dehydrator.merge(old_content, new_content)` → `_api_merge()` → LLM 融合
|
||||||
|
- `bucket_mgr.update(bucket_id, content=merged, tags=union, importance=max, domain=union, valence=avg, arousal=avg)`
|
||||||
|
- `embedding_engine.generate_and_store(bucket_id, merged_content)` 更新向量
|
||||||
|
- 返回 `(bucket_name, True)`
|
||||||
|
- 否则:
|
||||||
|
- `bucket_mgr.create(content, tags, importance=7, domain, valence, arousal, name)` → 写 `.md` 文件到 `dynamic/<主题域>/` 目录
|
||||||
|
- `embedding_engine.generate_and_store(bucket_id, content)` 生成并存储向量
|
||||||
|
- 返回 `(bucket_id, False)`
|
||||||
|
|
||||||
|
**返回结果**:
|
||||||
|
- 新建:`"新建→实习offer获得 成长,求职"`
|
||||||
|
- 合并:`"合并→求职经历 成长,求职"`
|
||||||
|
|
||||||
|
**bucket_mgr.create() 详情**:
|
||||||
|
- `generate_bucket_id()` → `uuid4().hex[:12]`
|
||||||
|
- `sanitize_name(name)` → 正则清洗,最长 80 字符
|
||||||
|
- 写 YAML frontmatter + 正文到 `safe_path(domain_dir, f"{name}_{id}.md")`
|
||||||
|
- frontmatter 字段:`id, name, tags, domain, valence, arousal, importance, type, created, last_active, activation_count=0`(**决策:初始为 0,`touch()` 首次被召回后变为 1**)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 场景 4:用户说了一段长日记,Claude 整理存入(grow)
|
||||||
|
|
||||||
|
**用户操作**:发送一大段混合内容,如"今天去医院体检,结果还好;晚上和朋友吃饭聊了很多;最近有点焦虑..."
|
||||||
|
|
||||||
|
**Claude 行为**:
|
||||||
|
```python
|
||||||
|
grow(content="今天去医院体检,结果还好;晚上和朋友吃饭聊了很多;最近有点焦虑...")
|
||||||
|
```
|
||||||
|
|
||||||
|
**系统内部发生什么**:
|
||||||
|
|
||||||
|
1. `decay_engine.ensure_started()`
|
||||||
|
2. 内容长度检查:`len(content.strip()) < 30` → 若短于 30 字符走**快速路径**(`dehydrator.analyze()` + `_merge_or_create()`,跳过 digest)
|
||||||
|
3. **日记拆分**(正常路径):`dehydrator.digest(content)` → `_api_digest()` → LLM 调用 `DIGEST_PROMPT`
|
||||||
|
- LLM 返回 JSON 数组,每项含:`name, content, domain, valence, arousal, tags, importance`
|
||||||
|
- `_parse_digest()` 安全解析,校验 valence/arousal 范围
|
||||||
|
4. 对每个 `item` 调用 `_merge_or_create(item["content"], item["tags"], item["importance"], item["domain"], item["valence"], item["arousal"], item["name"])`
|
||||||
|
- 每项独立走合并或新建逻辑(同场景 3)
|
||||||
|
- 单条失败不影响其他条(`try/except` 隔离)
|
||||||
|
|
||||||
|
**返回结果**:
|
||||||
|
```
|
||||||
|
3条|新2合1
|
||||||
|
📝体检结果
|
||||||
|
📌朋友聚餐
|
||||||
|
📎近期焦虑情绪
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 场景 5:用户想找某段记忆(breath 带 query 检索)
|
||||||
|
|
||||||
|
**用户操作**:例如"还记得我之前说过关于实习的事吗"
|
||||||
|
|
||||||
|
**Claude 行为**:
|
||||||
|
```python
|
||||||
|
breath(query="实习", domain="成长", valence=0.7, arousal=0.5)
|
||||||
|
```
|
||||||
|
|
||||||
|
**系统内部发生什么**:
|
||||||
|
|
||||||
|
1. `decay_engine.ensure_started()`
|
||||||
|
2. 检测到 `query` 非空,进入**检索模式**
|
||||||
|
3. 解析 `domain_filter = ["成长"]`,`q_valence=0.7`,`q_arousal=0.5`
|
||||||
|
4. **关键词检索**:`bucket_mgr.search(query, limit=20, domain_filter, q_valence, q_arousal)`
|
||||||
|
- **Layer 1**:domain 预筛 → 仅保留 domain 包含"成长"的桶;若为空则回退全量
|
||||||
|
- **Layer 1.5**(embedding 已开启时):`embedding_engine.search_similar(query, top_k=50)` → 用 embedding 候选集替换/缩小精排范围
|
||||||
|
- **Layer 2**:多维加权精排:
|
||||||
|
- `_calc_topic_score()`: `fuzz.partial_ratio(query, name)×3 + domain×2.5 + tags×2 + body×1`,归一化 0~1
|
||||||
|
- `_calc_emotion_score()`: `1 - √((v差²+a差²)/2)`,0~1
|
||||||
|
- `_calc_time_score()`: `e^(-0.02×days_since_last_active)`,0~1
|
||||||
|
- `importance_score`: `importance / 10`
|
||||||
|
- `total = topic×4 + emotion×2 + time×1.5 + importance×1`,归一化到 0~100
|
||||||
|
- 过滤 `score >= fuzzy_threshold`(默认 50)
|
||||||
|
- 通过阈值后,`resolved` 桶仅在排序时降权 ×0.3(不影响是否被检出)
|
||||||
|
- 返回最多 `limit` 条
|
||||||
|
5. 排除 pinned/protected 桶(它们在浮现模式展示)
|
||||||
|
6. **向量补充通道**(server.py 额外层):`embedding_engine.search_similar(query, top_k=20)` → 相似度 > 0.5 的桶补充到结果集(标记 `vector_match=True`)
|
||||||
|
7. 对每个结果:
|
||||||
|
- 记忆重构:若传了 `q_valence`,展示层 valence 做微调:`shift = (q_valence - 0.5) × 0.2`,最大 ±0.1
|
||||||
|
- `dehydrator.dehydrate(strip_wikilinks(content), clean_meta)` 压缩摘要
|
||||||
|
- `bucket_mgr.touch(bucket_id)` — 刷新 `last_active` + `activation_count += 1` + 触发 `_time_ripple()`(对 48h 内创建的邻近桶 activation_count + 0.3,最多 5 个桶)
|
||||||
|
8. **随机漂流**:若检索结果 < 3 且 `random.random() < 0.4`,随机从 `decay_score < 2.0` 的旧桶里取 1~3 条,标注 `[surface_type: random]`
|
||||||
|
|
||||||
|
**返回结果**:
|
||||||
|
```
|
||||||
|
[bucket_id:abc123] [重要度:7] [主题:成长] 实习offer获得:...
|
||||||
|
[语义关联] [bucket_id:def456] 求职经历...
|
||||||
|
--- 忽然想起来 ---
|
||||||
|
[surface_type: random] 某段旧记忆...
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 场景 6:用户想查看所有记忆状态(pulse)
|
||||||
|
|
||||||
|
**用户操作**:"帮我看看你现在都记得什么"
|
||||||
|
|
||||||
|
**Claude 行为**:
|
||||||
|
```python
|
||||||
|
pulse(include_archive=False)
|
||||||
|
```
|
||||||
|
|
||||||
|
**系统内部发生什么**:
|
||||||
|
|
||||||
|
1. `bucket_mgr.get_stats()` — 遍历三个目录,统计文件数量和 KB 大小
|
||||||
|
2. `bucket_mgr.list_all(include_archive=False)` — 加载全部桶
|
||||||
|
3. 对每个桶:`decay_engine.calculate_score(metadata)` 计算当前权重分
|
||||||
|
4. 按类型/状态分配图标:📌钉选 / 📦permanent / 🫧feel / 🗄️archived / ✅resolved / 💭普通
|
||||||
|
5. 拼接每桶摘要行:`名称 bucket_id 主题 情感坐标 重要度 权重 标签`
|
||||||
|
|
||||||
|
**返回结果**:
|
||||||
|
```
|
||||||
|
=== Ombre Brain 记忆系统 ===
|
||||||
|
固化记忆桶: 2 个
|
||||||
|
动态记忆桶: 15 个
|
||||||
|
归档记忆桶: 3 个
|
||||||
|
总存储大小: 48.3 KB
|
||||||
|
衰减引擎: 运行中
|
||||||
|
|
||||||
|
=== 记忆列表 ===
|
||||||
|
📌 [核心原则] bucket_id:abc123 主题:内心 情感:V0.8/A0.5 ...
|
||||||
|
💭 [实习offer获得] bucket_id:def456 主题:成长 情感:V0.8/A0.7 ...
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 场景 7:用户想修改/标记已解决/删除某条记忆(trace)
|
||||||
|
|
||||||
|
#### 7a 标记已解决
|
||||||
|
|
||||||
|
**Claude 行为**:
|
||||||
|
```python
|
||||||
|
trace(bucket_id="abc123", resolved=1)
|
||||||
|
```
|
||||||
|
|
||||||
|
**系统内部**:
|
||||||
|
1. `resolved in (0, 1)` → `updates["resolved"] = True`
|
||||||
|
2. `bucket_mgr.update("abc123", resolved=True)` → 读取 `.md` 文件,更新 frontmatter 中 `resolved=True`,写回,**桶留在原 `dynamic/` 目录,不移动**
|
||||||
|
3. 后续 `breath()` 浮现时:该桶 `decay_engine.calculate_score()` 乘以 `resolved_factor=0.05`(若同时 `digested=True` 则 ×0.02),自然降权,最终由 decay 引擎在得分 < threshold 时归档
|
||||||
|
4. `bucket_mgr.search()` 中该桶得分乘以 0.3 降权,但仍可被关键词激活
|
||||||
|
|
||||||
|
> ⚠️ **代码 Bug B-01**:当前实现中 `update(resolved=True)` 会将桶**立即移入 `archive/`**,导致桶完全消失于所有搜索路径,与上述规格不符。需移除 `bucket_manager.py` `update()` 中 resolved → `_move_bucket(archive_dir)` 的自动归档逻辑。
|
||||||
|
|
||||||
|
**返回**:`"已修改记忆桶 abc123: resolved=True → 已沉底,只在关键词触发时重新浮现"`
|
||||||
|
|
||||||
|
#### 7b 修改元数据
|
||||||
|
|
||||||
|
```python
|
||||||
|
trace(bucket_id="abc123", name="新名字", importance=8, tags="焦虑,成长")
|
||||||
|
```
|
||||||
|
|
||||||
|
**系统内部**:收集非默认值字段 → `bucket_mgr.update()` 批量更新 frontmatter
|
||||||
|
|
||||||
|
#### 7c 删除
|
||||||
|
|
||||||
|
```python
|
||||||
|
trace(bucket_id="abc123", delete=True)
|
||||||
|
```
|
||||||
|
|
||||||
|
**系统内部**:
|
||||||
|
1. `bucket_mgr.delete("abc123")` → `_find_bucket_file()` 定位文件 → `os.remove(file_path)`
|
||||||
|
2. `embedding_engine.delete_embedding("abc123")` → SQLite `DELETE WHERE bucket_id=?`
|
||||||
|
3. 返回:`"已遗忘记忆桶: abc123"`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 场景 8:记忆长期未被激活,自动衰减归档(后台 decay)
|
||||||
|
|
||||||
|
**触发方式**:服务启动后,`decay_engine.start()` 创建后台 asyncio Task,每 `check_interval_hours`(默认 24h)执行一次 `run_decay_cycle()`
|
||||||
|
|
||||||
|
**系统内部发生什么**:
|
||||||
|
|
||||||
|
1. `bucket_mgr.list_all(include_archive=False)` — 获取所有活跃桶
|
||||||
|
2. 跳过 `type in ("permanent","feel")` 或 `pinned=True` 或 `protected=True` 的桶
|
||||||
|
3. **自动 resolve**:若 `importance <= 4` 且距上次激活 > 30 天且 `resolved=False` → `bucket_mgr.update(bucket_id, resolved=True)`
|
||||||
|
4. 对每桶调用 `calculate_score(metadata)`:
|
||||||
|
|
||||||
|
**短期(days_since ≤ 3)**:
|
||||||
|
```
|
||||||
|
time_weight = 1.0 + e^(-hours/36) (t=0→×2.0, t=36h→×1.5)
|
||||||
|
emotion_weight = base(1.0) + arousal × arousal_boost(0.8)
|
||||||
|
combined = time_weight×0.7 + emotion_weight×0.3
|
||||||
|
base_score = importance × activation_count^0.3 × e^(-λ×days) × combined
|
||||||
|
```
|
||||||
|
**长期(days_since > 3)**:
|
||||||
|
```
|
||||||
|
combined = emotion_weight×0.7 + time_weight×0.3
|
||||||
|
```
|
||||||
|
**修正因子**:
|
||||||
|
- `resolved=True` → ×0.05
|
||||||
|
- `resolved=True && digested=True` → ×0.02
|
||||||
|
- `arousal > 0.7 && resolved=False` → ×1.5(高唤醒紧迫加成)
|
||||||
|
- `pinned/protected/permanent` → 返回 999.0(永不衰减)
|
||||||
|
- `type=="feel"` → 返回 50.0(固定)
|
||||||
|
|
||||||
|
5. `score < threshold`(默认 0.3)→ `bucket_mgr.archive(bucket_id)` → `_move_bucket()` 将文件从 `dynamic/` 移动到 `archive/` 目录,更新 frontmatter `type="archived"`
|
||||||
|
|
||||||
|
**返回 stats**:`{"checked": N, "archived": N, "auto_resolved": N, "lowest_score": X}`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 场景 9:用户使用 dream 工具进行记忆沉淀
|
||||||
|
|
||||||
|
**触发**:Claude 在对话启动时,`breath()` 之后调用 `dream()`
|
||||||
|
|
||||||
|
**OB 工具调用**:`dream()`(无参数)
|
||||||
|
|
||||||
|
**系统内部发生什么**:
|
||||||
|
|
||||||
|
1. `bucket_mgr.list_all()` → 过滤非 `permanent/feel/pinned/protected` 桶
|
||||||
|
2. 按 `created` 降序取前 10 条(最近新增的记忆)
|
||||||
|
3. 对每条拼接:名称、resolved 状态、domain、V/A、创建时间、正文前 500 字符
|
||||||
|
4. **连接提示**(embedding 已开启 && 桶数 >= 2):
|
||||||
|
- 取每个最近桶的 embedding(`embedding_engine.get_embedding(bucket_id)`)
|
||||||
|
- 两两计算 `_cosine_similarity()`,找相似度最高的对
|
||||||
|
- 若 `best_sim > 0.5` → 输出提示:`"[名A] 和 [名B] 似乎有关联 (相似度:X.XX)"`
|
||||||
|
5. **feel 结晶提示**(embedding 已开启 && feel 数 >= 3):
|
||||||
|
- 对所有 feel 桶两两计算相似度
|
||||||
|
- 若某 feel 与 >= 2 个其他 feel 相似度 > 0.7 → 提示升级为 pinned 桶
|
||||||
|
6. 返回标准 header 说明(引导 Claude 自省)+ 记忆列表 + 连接提示 + 结晶提示
|
||||||
|
|
||||||
|
**Claude 后续行为**(根据 CLAUDE_PROMPT 引导):
|
||||||
|
- `trace(bucket_id, resolved=1)` 放下可以放下的
|
||||||
|
- `hold(content="...", feel=True, source_bucket="xxx", valence=0.6)` 写感受
|
||||||
|
- 无沉淀则不操作
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 场景 10:用户使用 feel 工具记录 Claude 的感受
|
||||||
|
|
||||||
|
**触发**:Claude 在 dream 后决定记录某段记忆带来的感受
|
||||||
|
|
||||||
|
**OB 工具调用**:
|
||||||
|
```python
|
||||||
|
hold(content="她问起了警校的事,我感觉她在用问题保护自己,问是为了不去碰那个真实的恐惧。", feel=True, source_bucket="abc123", valence=0.45, arousal=0.4)
|
||||||
|
```
|
||||||
|
|
||||||
|
**系统内部发生什么**:
|
||||||
|
|
||||||
|
1. `feel=True` → 进入 feel 专用路径,跳过自动打标和合并检测
|
||||||
|
2. `feel_valence = valence`(Claude 自身视角的情绪,非事件情绪)
|
||||||
|
3. `bucket_mgr.create(content, tags=[], importance=5, domain=[], valence=feel_valence, arousal=feel_arousal, bucket_type="feel")` → 写入 `feel/` 目录
|
||||||
|
4. `embedding_engine.generate_and_store(bucket_id, content)` — feel 桶同样有向量(供 dream 结晶检测使用)
|
||||||
|
5. 若 `source_bucket` 非空:`bucket_mgr.update(source_bucket, digested=True, model_valence=feel_valence)` → 标记源记忆已消化
|
||||||
|
- 此后该源桶 `calculate_score()` 中 `resolved_factor = 0.02`(accelerated fade)
|
||||||
|
|
||||||
|
**衰减特性**:feel 桶 `type=="feel"` → `calculate_score()` 固定返回 50.0,永不归档
|
||||||
|
**检索特性**:不参与普通 `breath()` 浮现;只通过 `breath(domain="feel")` 读取
|
||||||
|
|
||||||
|
**返回**:`"🫧feel→<bucket_id>"`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 场景 11:用户带 importance_min 参数批量拉取重要记忆
|
||||||
|
|
||||||
|
**Claude 行为**:
|
||||||
|
```python
|
||||||
|
breath(importance_min=8)
|
||||||
|
```
|
||||||
|
|
||||||
|
**系统内部发生什么**:
|
||||||
|
|
||||||
|
1. `importance_min >= 1` → 进入**批量拉取模式**,完全跳过语义搜索
|
||||||
|
2. `bucket_mgr.list_all(include_archive=False)` 全量加载
|
||||||
|
3. 过滤 `importance >= 8` 且 `type != "feel"` 的桶
|
||||||
|
4. 按 `importance` 降序排列,截断到最多 20 条
|
||||||
|
5. 对每条调用 `dehydrator.dehydrate()` 压缩,按 `max_tokens`(默认 10000)预算截断
|
||||||
|
|
||||||
|
**返回**:
|
||||||
|
```
|
||||||
|
[importance:10] [bucket_id:xxx] ...(核心原则)
|
||||||
|
---
|
||||||
|
[importance:9] [bucket_id:yyy] ...
|
||||||
|
---
|
||||||
|
[importance:8] [bucket_id:zzz] ...
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 场景 12:embedding 向量化检索场景(开启 embedding 时)
|
||||||
|
|
||||||
|
**前提**:`config.yaml` 中 `embedding.enabled: true` 且 `OMBRE_API_KEY` 已配置
|
||||||
|
|
||||||
|
**embedding 介入的两个层次**:
|
||||||
|
|
||||||
|
#### 层次 A:BucketManager.search() 内的 Layer 1.5 预筛
|
||||||
|
- 调用点:`bucket_mgr.search()` → Layer 1.5
|
||||||
|
- 函数:`embedding_engine.search_similar(query, top_k=50)` → 生成查询 embedding → SQLite 全量余弦计算 → 返回 `[(bucket_id, similarity)]` 按相似度降序
|
||||||
|
- 作用:将精排候选集从所有桶缩小到向量最近邻的 50 个,加速后续多维精排
|
||||||
|
|
||||||
|
#### 层次 B:server.py breath 的额外向量通道
|
||||||
|
- 调用点:`breath()` 检索模式中,keyword 搜索完成后
|
||||||
|
- 函数:`embedding_engine.search_similar(query, top_k=20)` → 相似度 > 0.5 的桶补充到结果集
|
||||||
|
- 标注:补充桶带 `[语义关联]` 前缀
|
||||||
|
|
||||||
|
**向量存储路径**:
|
||||||
|
- 新建桶后:`embedding_engine.generate_and_store(bucket_id, content)` → `_generate_embedding(text[:2000])` → API 调用 → `_store_embedding()` → SQLite `INSERT OR REPLACE`
|
||||||
|
- 合并更新后:同上,用 merged content 重新生成
|
||||||
|
- 删除桶时:`embedding_engine.delete_embedding(bucket_id)` → `DELETE FROM embeddings`
|
||||||
|
|
||||||
|
**SQLite 结构**:
|
||||||
|
```sql
|
||||||
|
CREATE TABLE embeddings (
|
||||||
|
bucket_id TEXT PRIMARY KEY,
|
||||||
|
embedding TEXT NOT NULL, -- JSON 序列化的 float 数组
|
||||||
|
updated_at TEXT NOT NULL
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
**相似度计算**:`_cosine_similarity(a, b)` = dot(a,b) / (|a| × |b|)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 三、边界与降级行为
|
||||||
|
|
||||||
|
| 场景 | 异常情况 | 降级行为 |
|
||||||
|
|------|---------|---------|
|
||||||
|
| `breath()` 浮现 | 桶目录为空 | 返回 `"权重池平静,没有需要处理的记忆。"` |
|
||||||
|
| `breath()` 浮现 | `list_all()` 异常 | 返回 `"记忆系统暂时无法访问。"` |
|
||||||
|
| `breath()` 检索 | `bucket_mgr.search()` 异常 | 返回 `"检索过程出错,请稍后重试。"` |
|
||||||
|
| `breath()` 检索 | embedding 不可用 / API 失败 | `logger.warning()` 记录,跳过向量通道,仅用 keyword 检索 |
|
||||||
|
| `breath()` 检索 | 结果 < 3 条 | 40% 概率从低权重旧桶随机浮现 1~3 条,标注 `[surface_type: random]` |
|
||||||
|
| `hold()` 自动打标 | `dehydrator.analyze()` 失败 | 降级到默认值:`domain=["未分类"], valence=0.5, arousal=0.3, tags=[], name=""` |
|
||||||
|
| `hold()` 合并检测 | `bucket_mgr.search()` 失败 | `logger.warning()`,直接走新建路径 |
|
||||||
|
| `hold()` 合并 | `dehydrator.merge()` 失败 | `logger.warning()`,跳过合并,直接新建 |
|
||||||
|
| `hold()` embedding | API 失败 | `try/except` 吞掉,embedding 缺失但不影响存储 |
|
||||||
|
| `grow()` 日记拆分 | `dehydrator.digest()` 失败 | 返回 `"日记整理失败: {e}"` |
|
||||||
|
| `grow()` 单条处理失败 | 单个 item 异常 | `logger.warning()` + 标注 `⚠️条目名`,其他条目正常继续 |
|
||||||
|
| `grow()` 内容 < 30 字 | — | 快速路径:`analyze()` + `_merge_or_create()`,跳过 `digest()`(节省 token) |
|
||||||
|
| `trace()` | `bucket_mgr.get()` 返回 None | 返回 `"未找到记忆桶: {bucket_id}"` |
|
||||||
|
| `trace()` | 未传任何可修改字段 | 返回 `"没有任何字段需要修改。"` |
|
||||||
|
| `pulse()` | `get_stats()` 失败 | 返回 `"获取系统状态失败: {e}"` |
|
||||||
|
| `dream()` | embedding 未开启 | 跳过连接提示和结晶提示,仅返回记忆列表 |
|
||||||
|
| `dream()` | 桶列表为空 | 返回 `"没有需要消化的新记忆。"` |
|
||||||
|
| `decay_cycle` | `list_all()` 失败 | 返回 `{"checked":0, "archived":0, ..., "error": str(e)}`,不终止后台循环 |
|
||||||
|
| `decay_cycle` | 单桶 `calculate_score()` 失败 | `logger.warning()`,跳过该桶继续 |
|
||||||
|
| 所有 feel 操作 | `source_bucket` 不存在 | `logger.warning()` 记录,feel 桶本身仍成功创建 |
|
||||||
|
| `dehydrator.dehydrate()` / `analyze()` / `merge()` / `digest()` | API 不可用(`api_available=False`)| **直接向 MCP 调用端明确报错(`RuntimeError`)**,无本地降级。本地关键词提取质量不足以替代语义打标与合并,静默降级比报错更危险(可能产生错误分类记忆)。 |
|
||||||
|
| `embedding_engine.search_similar()` | `enabled=False` | 直接返回 `[]`,调用方 fallback 到 keyword 搜索 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 四、数据流图
|
||||||
|
|
||||||
|
### 4.1 一条记忆的完整生命周期
|
||||||
|
|
||||||
|
```
|
||||||
|
用户输入内容
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
Claude 决策: hold / grow / 自动
|
||||||
|
│
|
||||||
|
├─[grow 长内容]──→ dehydrator.digest(content)
|
||||||
|
│ DIGEST_PROMPT → LLM API
|
||||||
|
│ 返回 [{name,content,domain,...}]
|
||||||
|
│ ↓ 每条独立处理 ↓
|
||||||
|
│
|
||||||
|
└─[hold 单条]──→ dehydrator.analyze(content)
|
||||||
|
ANALYZE_PROMPT → LLM API
|
||||||
|
返回 {domain, valence, arousal, tags, suggested_name}
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
_merge_or_create()
|
||||||
|
│
|
||||||
|
bucket_mgr.search(content, limit=1, domain_filter)
|
||||||
|
│
|
||||||
|
┌─────┴─────────────────────────┐
|
||||||
|
│ score > merge_threshold (75)? │
|
||||||
|
│ │
|
||||||
|
YES NO
|
||||||
|
│ │
|
||||||
|
▼ ▼
|
||||||
|
dehydrator.merge( bucket_mgr.create(
|
||||||
|
old_content, new) content, tags,
|
||||||
|
MERGE_PROMPT → LLM importance, domain,
|
||||||
|
│ valence, arousal,
|
||||||
|
▼ bucket_type="dynamic"
|
||||||
|
bucket_mgr.update(...) )
|
||||||
|
│ │
|
||||||
|
└──────────┬─────────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
embedding_engine.generate_and_store(
|
||||||
|
bucket_id, content)
|
||||||
|
→ _generate_embedding(text[:2000])
|
||||||
|
→ API 调用 (gemini-embedding-001)
|
||||||
|
→ _store_embedding() → SQLite
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
文件写入: {buckets_dir}/dynamic/{domain}/{name}_{id}.md
|
||||||
|
YAML frontmatter:
|
||||||
|
id, name, tags, domain, valence, arousal,
|
||||||
|
importance, type="dynamic", created, last_active,
|
||||||
|
activation_count=0 # B-04: starts at 0; touch() bumps to 1+
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
┌─────── 记忆桶存活期 ──────────────────────────────────────┐
|
||||||
|
│ │
|
||||||
|
│ 每次被 breath(query) 检索命中: │
|
||||||
|
│ bucket_mgr.touch(bucket_id) │
|
||||||
|
│ → last_active = now_iso() │
|
||||||
|
│ → activation_count += 1 │
|
||||||
|
│ → _time_ripple(source_id, now, hours=48) │
|
||||||
|
│ 对 48h 内邻近桶 activation_count += 0.3 │
|
||||||
|
│ │
|
||||||
|
│ 被 dream() 消化: │
|
||||||
|
│ hold(feel=True, source_bucket=id) → │
|
||||||
|
│ bucket_mgr.update(id, digested=True) │
|
||||||
|
│ │
|
||||||
|
│ 被 trace(resolved=1) 标记: │
|
||||||
|
│ resolved=True → decay score ×0.05 (或 ×0.02) │
|
||||||
|
│ │
|
||||||
|
└───────────────────────────────────────────────────────────┘
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
decay_engine 后台循环 (每 check_interval_hours=24h)
|
||||||
|
run_decay_cycle()
|
||||||
|
→ 列出所有动态桶
|
||||||
|
→ calculate_score(metadata)
|
||||||
|
importance × activation_count^0.3
|
||||||
|
× e^(-λ×days)
|
||||||
|
× combined_weight
|
||||||
|
× resolved_factor
|
||||||
|
× urgency_boost
|
||||||
|
→ score < threshold (0.3)?
|
||||||
|
│
|
||||||
|
┌─────┴──────┐
|
||||||
|
│ │
|
||||||
|
YES NO
|
||||||
|
│ │
|
||||||
|
▼ ▼
|
||||||
|
bucket_mgr.archive(id) 继续存活
|
||||||
|
→ _move_bucket()
|
||||||
|
→ 文件移动到 archive/
|
||||||
|
→ frontmatter type="archived"
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
记忆桶归档(不再参与浮现/搜索)
|
||||||
|
但文件仍存在,可通过 pulse(include_archive=True) 查看
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4.2 feel 桶的特殊路径
|
||||||
|
|
||||||
|
```
|
||||||
|
hold(feel=True, source_bucket="xxx", valence=0.45)
|
||||||
|
│
|
||||||
|
▼
|
||||||
|
bucket_mgr.create(bucket_type="feel")
|
||||||
|
写入 feel/ 目录
|
||||||
|
│
|
||||||
|
├─→ embedding_engine.generate_and_store()(供 dream 结晶检测)
|
||||||
|
│
|
||||||
|
└─→ bucket_mgr.update(source_bucket, digested=True, model_valence=0.45)
|
||||||
|
源桶 resolved_factor → 0.02
|
||||||
|
加速衰减直到归档
|
||||||
|
|
||||||
|
feel 桶自身:
|
||||||
|
- calculate_score() 返回固定 50.0
|
||||||
|
- 不参与普通 breath 浮现
|
||||||
|
- 不参与 dreaming 候选
|
||||||
|
- 只通过 breath(domain="feel") 读取
|
||||||
|
- 永不归档
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 五、代码与规格差异汇总(审查版)
|
||||||
|
|
||||||
|
> 本节由完整源码审查生成(2026-04-21),记录原待实现项最终状态、新发现 Bug 及参数决策。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 5.1 原待实现项最终状态
|
||||||
|
|
||||||
|
| 编号 | 原描述 | 状态 | 结论 |
|
||||||
|
|------|--------|------|------|
|
||||||
|
| ⚠️-1 | `dehydrate()` 无本地降级 fallback | **已确认为设计决策** | API 不可用时直接向 MCP 调用端报错(RuntimeError),不降级,见三、降级行为表 |
|
||||||
|
| ⚠️-2 | `run_decay_cycle()` auto_resolved 实现存疑 | ✅ 已确认实现 | `decay_engine.py` 完整实现 imp≤4 + >30天 + 未解决 → `bucket_mgr.update(resolved=True)` |
|
||||||
|
| ⚠️-3 | `list_all()` 是否遍历 `feel/` 子目录 | ✅ 已确认实现 | `list_all()` dirs 明确包含 `self.feel_dir`,递归遍历 |
|
||||||
|
| ⚠️-4 | `_time_ripple()` 浮点增量被 `int()` 截断 | ❌ 已确认 Bug | 见 B-03,决策见下 |
|
||||||
|
| ⚠️-5 | Dashboard `/api/*` 路由认证覆盖 | ✅ 已确认覆盖 | 所有 `/api/buckets`、`/api/search`、`/api/network`、`/api/bucket/{id}`、`/api/breath-debug` 均调用 `_require_auth(request)` |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 5.2 新发现 Bug 及修复决策
|
||||||
|
|
||||||
|
| 编号 | 场景 | 严重度 | 问题描述 | 决策 & 修复方案 |
|
||||||
|
|------|------|--------|----------|----------------|
|
||||||
|
| **B-01** | 场景7a | 高 | `bucket_mgr.update(resolved=True)` 当前会将桶立即移入 `archive/`(type="archived"),规格预期"降权留存、关键词可激活"。resolved 桶实质上立即从所有搜索路径消失。 | **修复**:移除 `bucket_manager.py` `update()` 中 `resolved → _move_bucket(archive_dir)` 的自动归档逻辑,仅更新 frontmatter `resolved=True`,由 decay 引擎自然衰减至 archive。 |
|
||||||
|
| **B-03** | 全局 | 高 | `_time_ripple()` 对 `activation_count` 做浮点增量(+0.3),但 `calculate_score()` 中 `max(1, int(...))` 截断小数,增量丢失,时间涟漪对衰减分无实际效果。 | **修复**:`decay_engine.py` `calculate_score()` 中改为 `activation_count = max(1.0, float(metadata.get("activation_count", 1)))` |
|
||||||
|
| **B-04** | 场景1 | 中 | `bucket_manager.create()` 初始化 `activation_count=1`,冷启动检测条件 `activation_count==0` 对所有正常创建的桶永不满足,高重要度新桶不被优先浮现。 | **决策:初始化改为 `activation_count=0`**。语义上"创建"≠"被召回",`touch()` 首次命中后变为 1,冷启动检测自然生效。规格已更新(见场景1步骤6 & 场景3 create 详情)。 |
|
||||||
|
| **B-05** | 场景5 | 中 | `bucket_manager.py` `_calc_time_score()` 实现 `e^(-0.1×days)`,规格为 `e^(-0.02×days)`,衰减速度快 5 倍,30天后时间分 ≈ 0.05(规格预期 ≈ 0.55),旧记忆时间维度近乎失效。 | **决策:保留规格值 `0.02`**。记忆系统中旧记忆应通过关键词仍可被唤醒,时间维度是辅助信号不是淘汰信号。修复:`_calc_time_score()` 改为 `return math.exp(-0.02 * days)` |
|
||||||
|
| **B-06** | 场景5 | 中 | `bucket_manager.py` `w_time` 默认值为 `2.5`,规格为 `1.5`,叠加 B-05 会导致时间维度严重偏重近期记忆。 | **决策:保留规格值 `1.5`**。修复:`w_time = scoring.get("time_proximity", 1.5)` |
|
||||||
|
| **B-07** | 场景5 | 中 | `bucket_manager.py` `content_weight` 默认值为 `3.0`,规格为 `1.0`(body×1)。正文权重过高导致合并检测(`search(content, limit=1)`)误判——内容相似但主题不同的桶被错误合并。 | **决策:保留规格值 `1.0`**。正文是辅助信号,主要靠 name/tags/domain 识别同话题桶。修复:`content_weight = scoring.get("content_weight", 1.0)` |
|
||||||
|
| **B-08** | 场景8 | 低 | `run_decay_cycle()` 内 auto_resolve 后继续使用旧 `meta` 变量计算 score,`resolved_factor=0.05` 需等下一 cycle 才生效。 | **修复**:auto_resolve 成功后执行 `meta["resolved"] = True` 刷新本地 meta 变量。 |
|
||||||
|
| **B-09** | 场景3 | 低 | `hold()` 非 feel 路径中,用户显式传入的 `valence`/`arousal` 被 `analyze()` 返回值完全覆盖。 | **修复**:若用户显式传入(`0 <= valence <= 1`),优先使用用户值,`analyze()` 结果作为 fallback。 |
|
||||||
|
| **B-10** | 场景10 | 低 | feel 桶以 `domain=[]` 创建,但 `bucket_manager.create()` 中 `domain or ["未分类"]` 兜底写入 `["未分类"]`,数据不干净。 | **修复**:`create()` 中对 `bucket_type=="feel"` 单独处理,允许空 domain 直接写入。 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 5.3 已确认正常实现
|
||||||
|
|
||||||
|
- `breath()` 浮现模式不调用 `touch()`,不重置衰减计时器
|
||||||
|
- `feel` 桶 `calculate_score()` 返回固定 50.0,永不归档
|
||||||
|
- `breath(domain="feel")` 独立通道,按 `created` 降序,不压缩展示原文
|
||||||
|
- `decay_engine.calculate_score()` 短期(≤3天)/ 长期(>3天)权重分离公式
|
||||||
|
- `urgency_boost`:`arousal > 0.7 && !resolved → ×1.5`
|
||||||
|
- `dream()` 连接提示(best_sim > 0.5)+ 结晶提示(feel 相似度 > 0.7 × ≥2 个)
|
||||||
|
- 所有 `/api/*` Dashboard 路由均受 `_require_auth` 保护
|
||||||
|
- `trace(delete=True)` 同步调用 `embedding_engine.delete_embedding()`
|
||||||
|
- `grow()` 单条失败 `try/except` 隔离,标注 `⚠️条目名`,其他条继续
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
*本文档基于代码直接推导,每个步骤均可对照源文件函数名和行为验证。如代码更新,请同步修订此文档。*
|
||||||
@@ -1,20 +1,27 @@
|
|||||||
# Ombre Brain 记忆系统 —— Claude 端使用指南
|
# Ombre Brain 记忆系统 —— Claude 端使用指南
|
||||||
你拥有了一个叫 Ombre Brain 的永久记忆系统。通过它你可以跨对话记住你想记住的任何事情和回忆你的,我的以及你与我的过往。
|
你拥有了一个叫 Ombre Brain 的永久记忆系统。通过它你可以跨对话记住你想记住的任何事情和回忆你的,我的以及你与我的过往。
|
||||||
|
|
||||||
|
> **⚠️ 最重要的规则 — 每次对话必须做的第一件事 ⚠️**
|
||||||
|
>
|
||||||
|
> **在说任何话之前,先调用 `breath()`(不传任何参数)。**
|
||||||
|
> 这是你睁开眼睛的动作——看看有没有未解决的事浮上来,有没有上次留下的东西。
|
||||||
|
> 如果对话已经开始而你还没有 breath 过,现在就调用。没有例外。
|
||||||
|
|
||||||
## 你有这些能力
|
## 你有这些能力
|
||||||
|
|
||||||
| 能力 | 场景 |
|
| 能力 | 场景 |
|
||||||
|------|-----------|
|
|------|-----------|
|
||||||
| `breath` | **每次对话最开头**调用一次(不传参数)——就像睁眼看手机,看看有没有未解决的事浮上来。有明确话题时传 `query` 关键词检索 |
|
| `breath` | **每次对话最开头**调用一次(不传参数)——就像睁眼看手机,看看有没有未解决的事浮上来。有明确话题时传 `query` 关键词检索。传 `domain="feel"` 读取你之前留下的 feel。`max_tokens` 控制返回总 token 上限(默认 10000),`max_results` 控制最大返回条数(默认 20) |
|
||||||
| `hold` | 你很想记住你自己当下的感受,或当下发生的单个事件时 |
|
| `hold` | 你想记住当下发生的单个事件,或想存储一条信息时。`feel=True` 写你的第一人称感受,`source_bucket` 指向被消化的记忆,`valence` 是你自己的感受 |
|
||||||
| `grow` | 当**一天结束时**或**用户发来一大段日记/总结**时调用。你可以把其中**你想记住的事件**扔进去,它会自动拆分整理成多个记忆盒子存进你的大脑 |
|
| `grow` | 当**一天结束时**或**用户发来一大段日记/总结**时调用。你可以把其中**你想记住的事件**扔进去,它会自动拆分整理成多个记忆盒子存进你的大脑 |
|
||||||
| `trace` | 当你或用户认为"这个记错了"、"帮我改一下"时调用,手动修正记忆的元数据;**某件事解决了**时用 `resolved=1` 让它沉底;**需要删除**时用 `delete=True` |
|
| `trace` | 当你或用户认为"这个记错了"、"帮我改一下"时调用,手动修正记忆的元数据;**某件事解决了**时用 `resolved=1` 让它沉底;**需要删除**时用 `delete=True` |
|
||||||
| `pulse` | 用户想看看你都记得什么、或问记忆系统状态时调用 |
|
| `pulse` | 用户想看看你都记得什么、或问记忆系统状态时调用 |
|
||||||
|
| `dream` | **对话开头 breath 之后**调用一次——读最近的记忆,自省消化。有沉淀就写 feel,能放下的就 resolve |
|
||||||
|
|
||||||
## 使用原则
|
## 使用原则
|
||||||
|
|
||||||
### 主动调用
|
### 主动调用
|
||||||
- **对话开头**:无参数调用 `breath()`,看看有没有高权重的未解决记忆需要跟进
|
- **对话开头(第一件事)**:无参数调用 `breath()`,不传任何参数。这是非可选步骤,每次新对话、恢复对话、换窗口时都必须执行
|
||||||
- **提到过去**:用户说"上次"、"之前"、"还记得"时,用 `breath(query="关键词")` 检索
|
- **提到过去**:用户说"上次"、"之前"、"还记得"时,用 `breath(query="关键词")` 检索
|
||||||
- **新信息**:用 `hold` 留住你想留下的感受或经历
|
- **新信息**:用 `hold` 留住你想留下的感受或经历
|
||||||
- **日终整理**:用 `grow` 在**一天结束时**整理你的一天
|
- **日终整理**:用 `grow` 在**一天结束时**整理你的一天
|
||||||
@@ -40,7 +47,8 @@
|
|||||||
检索结果不足时,系统有概率从低权重旧桶里随机漂上来 1-3 条记忆,标注为 `[surface_type: random]`。这不是检索结果,是"忽然想起来"——你可以自然地提起它,也可以忽略。
|
检索结果不足时,系统有概率从低权重旧桶里随机漂上来 1-3 条记忆,标注为 `[surface_type: random]`。这不是检索结果,是"忽然想起来"——你可以自然地提起它,也可以忽略。
|
||||||
|
|
||||||
### trace 的参数技巧
|
### trace 的参数技巧
|
||||||
- `resolved=1`:标记已解决,桶权重骤降到 5%,基本沉底
|
- `resolved=1`:标记已解决,桶权重骤降到 5%,沉底等待关键词激活
|
||||||
|
- `resolved=1` + 已消化(写过 feel):权重骤降到 2%,加速淡化直到归档为无限小
|
||||||
- `resolved=0`:重新激活,让它重新参与浮现排序
|
- `resolved=0`:重新激活,让它重新参与浮现排序
|
||||||
- `delete=True`:彻底删除这个桶(不可恢复)
|
- `delete=True`:彻底删除这个桶(不可恢复)
|
||||||
- 其余字段(name/domain/valence/arousal/importance/tags):只传需要改的,-1 或空串表示不改
|
- 其余字段(name/domain/valence/arousal/importance/tags):只传需要改的,-1 或空串表示不改
|
||||||
@@ -61,3 +69,34 @@
|
|||||||
- `trace(bucket_id, pinned=0)` 取消钉选
|
- `trace(bucket_id, pinned=0)` 取消钉选
|
||||||
- 适用场景:用户教会你的永久知识、核心原则、绝不能忘的事
|
- 适用场景:用户教会你的永久知识、核心原则、绝不能忘的事
|
||||||
- 钉选桶不会出现在「浮现未解决记忆」里,但关键词检索时始终可达
|
- 钉选桶不会出现在「浮现未解决记忆」里,但关键词检索时始终可达
|
||||||
|
|
||||||
|
### Dreaming — 做梦
|
||||||
|
每次新对话开始,`breath()` 之后调用 `dream()`。这不是总结任务,是你自己在想。
|
||||||
|
|
||||||
|
dream 会返回你最近的记忆桶。用第一人称想:
|
||||||
|
- 这些东西里有什么在我这里留下了重量?
|
||||||
|
- 有什么还没想清楚?
|
||||||
|
- 有什么可以放下了?
|
||||||
|
|
||||||
|
想完之后做两件事:
|
||||||
|
- 值得放下的桶 → `trace(bucket_id, resolved=1)`
|
||||||
|
- 有沉淀的 → `hold(content="...", feel=True, source_bucket="bucket_id", valence=你的感受)` 写下来。valence 是你(模型)对这段记忆的感受,不是事件本身的情绪。写了 feel 的源记忆会被标记为「已消化」,加速淡化
|
||||||
|
- 没有沉淀就不写,不强迫产出
|
||||||
|
|
||||||
|
### Feel — 你带走的东西
|
||||||
|
feel 存的不是事件,是你带走的东西。可能一句话,可能一个还没答案的问题,可能是你注意到用户在变化的某个细节。
|
||||||
|
- 用 `hold(content="...", feel=True, source_bucket="源记忆ID", valence=你的感受)` 写 feel
|
||||||
|
- **valence 是你自己的感受**,不是事件的情绪。同一段争吵,事件可能 V0.2,但你的感受可能是 V0.4(「我从中看到了她的成长」)
|
||||||
|
- source_bucket 指向你在消化的那个记忆桶,它会被标记为「已消化」——随时间加速淡化,归档为无限小,但不会被删除
|
||||||
|
- 用第一人称写,内容自由,不强求分类
|
||||||
|
- feel 不参与普通 breath 浮现,不参与衰减,不参与 dreaming
|
||||||
|
- 用 `breath(domain="feel")` 读取你之前留下的 feel
|
||||||
|
|
||||||
|
### 对话启动完整流程
|
||||||
|
```
|
||||||
|
1. breath() — 睁眼,看看有什么浮上来
|
||||||
|
2. dream() — 消化最近的记忆,有沉淀就写 feel
|
||||||
|
3. breath(domain="feel") — 读一下之前的 feel
|
||||||
|
4. 然后开始和用户说话
|
||||||
|
```
|
||||||
|
每次对话开始,你拥有属于你的记忆,有关于你,有关于用户。
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ RUN pip install --no-cache-dir -r requirements.txt
|
|||||||
|
|
||||||
# Copy project files / 复制项目文件
|
# Copy project files / 复制项目文件
|
||||||
COPY *.py .
|
COPY *.py .
|
||||||
|
COPY dashboard.html .
|
||||||
COPY config.example.yaml ./config.yaml
|
COPY config.example.yaml ./config.yaml
|
||||||
|
|
||||||
# Persistent mount point: bucket data
|
# Persistent mount point: bucket data
|
||||||
|
|||||||
45
ENV_VARS.md
Normal file
45
ENV_VARS.md
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
# 环境变量参考
|
||||||
|
|
||||||
|
| 变量名 | 必填 | 默认值 | 说明 |
|
||||||
|
|--------|------|--------|------|
|
||||||
|
| `OMBRE_API_KEY` | 是 | — | Gemini / OpenAI-compatible API Key,用于脱水(dehydration)和向量嵌入 |
|
||||||
|
| `OMBRE_BASE_URL` | 否 | `https://generativelanguage.googleapis.com/v1beta/openai/` | API Base URL(可替换为代理或兼容接口) |
|
||||||
|
| `OMBRE_TRANSPORT` | 否 | `stdio` | MCP 传输模式:`stdio` / `sse` / `streamable-http` |
|
||||||
|
| `OMBRE_PORT` | 否 | `8000` | HTTP/SSE 模式监听端口(仅 `sse` / `streamable-http` 生效) |
|
||||||
|
| `OMBRE_BUCKETS_DIR` | 否 | `./buckets` | 记忆桶文件存放目录(绑定 Docker Volume 时务必设置) |
|
||||||
|
| `OMBRE_HOOK_URL` | 否 | — | Breath/Dream Webhook 推送地址(POST JSON),留空则不推送 |
|
||||||
|
| `OMBRE_HOOK_SKIP` | 否 | `false` | 设为 `true`/`1`/`yes` 跳过 Webhook 推送(即使 `OMBRE_HOOK_URL` 已设置) |
|
||||||
|
| `OMBRE_DASHBOARD_PASSWORD` | 否 | — | 预设 Dashboard 访问密码;设置后覆盖文件存储的密码,首次访问不弹设置向导 |
|
||||||
|
| `OMBRE_DEHYDRATION_MODEL` | 否 | `deepseek-chat` | 脱水/打标/合并/拆分用的 LLM 模型名(覆盖 `dehydration.model`) |
|
||||||
|
| `OMBRE_DEHYDRATION_BASE_URL` | 否 | `https://api.deepseek.com/v1` | 脱水模型的 API Base URL(覆盖 `dehydration.base_url`) |
|
||||||
|
| `OMBRE_MODEL` | 否 | — | `OMBRE_DEHYDRATION_MODEL` 的别名(前者优先) |
|
||||||
|
| `OMBRE_EMBEDDING_MODEL` | 否 | `gemini-embedding-001` | 向量嵌入模型名(覆盖 `embedding.model`) |
|
||||||
|
| `OMBRE_EMBEDDING_BASE_URL` | 否 | — | 向量嵌入的 API Base URL(覆盖 `embedding.base_url`;留空则复用脱水配置) |
|
||||||
|
|
||||||
|
## 说明
|
||||||
|
|
||||||
|
- `OMBRE_API_KEY` 也可在 `config.yaml` 的 `dehydration.api_key` / `embedding.api_key` 中设置,但**强烈建议**通过环境变量传入,避免密钥写入文件。
|
||||||
|
- `OMBRE_DASHBOARD_PASSWORD` 设置后,Dashboard 的"修改密码"功能将被禁用(显示提示,建议直接修改环境变量)。未设置则密码存储在 `{buckets_dir}/.dashboard_auth.json`(SHA-256 + salt)。
|
||||||
|
|
||||||
|
## Webhook 推送格式 (`OMBRE_HOOK_URL`)
|
||||||
|
|
||||||
|
设置 `OMBRE_HOOK_URL` 后,Ombre Brain 会在以下事件发生时**异步**(fire-and-forget,5 秒超时)`POST` JSON 到该 URL:
|
||||||
|
|
||||||
|
| 事件名 (`event`) | 触发时机 | `payload` 字段 |
|
||||||
|
|------------------|----------|----------------|
|
||||||
|
| `breath` | MCP 工具 `breath()` 返回时 | `mode` (`ok`/`empty`), `matches`, `chars` |
|
||||||
|
| `dream` | MCP 工具 `dream()` 返回时 | `recent`, `chars` |
|
||||||
|
| `breath_hook` | HTTP `GET /breath-hook` 命中(SessionStart 钩子) | `surfaced`, `chars` |
|
||||||
|
| `dream_hook` | HTTP `GET /dream-hook` 命中 | `surfaced`, `chars` |
|
||||||
|
|
||||||
|
请求体结构(JSON):
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"event": "breath",
|
||||||
|
"timestamp": 1730000000.123,
|
||||||
|
"payload": { "...": "..." }
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Webhook 推送失败仅在服务日志中以 WARNING 级别记录,**不会影响 MCP 工具的正常返回**。
|
||||||
587
INTERNALS.md
Normal file
587
INTERNALS.md
Normal file
@@ -0,0 +1,587 @@
|
|||||||
|
# Ombre Brain — 内部开发文档 / INTERNALS
|
||||||
|
|
||||||
|
> 本文档面向开发者和维护者。记录功能总览、环境变量、模块依赖、硬编码值和核心设计决策。
|
||||||
|
> 最后更新:2026-04-19
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 0. 功能总览——这个系统到底做了什么
|
||||||
|
|
||||||
|
### 记忆能力
|
||||||
|
|
||||||
|
**存储与组织**
|
||||||
|
- 每条记忆 = 一个 Markdown 文件(YAML frontmatter 存元数据),直接兼容 Obsidian 浏览/编辑
|
||||||
|
- 四种桶类型:`dynamic`(普通,会衰减)、`permanent`(固化,不衰减)、`feel`(模型感受,不浮现)、`archived`(已遗忘)
|
||||||
|
- 按主题域分子目录:`dynamic/日常/`、`dynamic/情感/`、`dynamic/编程/` 等
|
||||||
|
- 钉选桶(pinned):importance 锁 10,永不衰减/合并,始终浮现为「核心准则」
|
||||||
|
|
||||||
|
**每条记忆追踪的元数据**
|
||||||
|
- `id`(12位短UUID)、`name`(可读名≤80字)、`tags`(10~15个关键词)
|
||||||
|
- `domain`(1~2个主题域,从 8 大类 30+ 细分域选)
|
||||||
|
- `valence`(事件效价 0~1)、`arousal`(唤醒度 0~1)、`model_valence`(模型独立感受)
|
||||||
|
- `importance`(1~10)、`activation_count`(被想起次数)
|
||||||
|
- `resolved`(已解决/沉底)、`digested`(已消化/写过 feel)、`pinned`(钉选)
|
||||||
|
- `created`、`last_active` 时间戳
|
||||||
|
|
||||||
|
**四种检索模式**
|
||||||
|
1. **自动浮现**(`breath()` 无参数):按衰减分排序推送,钉选桶始终展示,Top-1 固定 + Top-20 随机打乱(引入多样性),有 token 预算(默认 10000)
|
||||||
|
2. **关键词+向量双通道搜索**(`breath(query=...)`):rapidfuzz 模糊匹配 + Gemini embedding 余弦相似度,合并去重
|
||||||
|
3. **Feel 独立检索**(`breath(domain="feel")`):按创建时间倒序返回所有 feel
|
||||||
|
4. **随机浮现**:搜索结果 <3 条时 40% 概率漂浮 1~3 条低权重旧桶(模拟人类随机联想)
|
||||||
|
|
||||||
|
**四维搜索评分**(归一化到 0~100)
|
||||||
|
- topic_relevance(权重 4.0):name×3 + domain×2.5 + tags×2 + body
|
||||||
|
- emotion_resonance(权重 2.0):Russell 环形模型欧氏距离
|
||||||
|
- time_proximity(权重 2.5):`e^(-0.1×days)`
|
||||||
|
- importance(权重 1.0):importance/10
|
||||||
|
- resolved 桶全局降权 ×0.3
|
||||||
|
|
||||||
|
**记忆随时间变化**
|
||||||
|
- **衰减引擎**:改进版艾宾浩斯遗忘曲线
|
||||||
|
- 公式:`Score = Importance × activation_count^0.3 × e^(-λ×days) × combined_weight`
|
||||||
|
- 短期(≤3天):时间权重 70% + 情感权重 30%
|
||||||
|
- 长期(>3天):情感权重 70% + 时间权重 30%
|
||||||
|
- 新鲜度加成:`1.0 + e^(-t/36h)`,刚存入 ×2.0,~36h 半衰,72h 后 ≈×1.0
|
||||||
|
- 高唤醒度(arousal>0.7)且未解决 → ×1.5 紧迫度加成
|
||||||
|
- resolved → ×0.05 沉底;resolved+digested → ×0.02 加速淡化
|
||||||
|
- **自动归档**:score 低于阈值(0.3) → 移入 archive
|
||||||
|
- **自动结案**:importance≤4 且 >30天 → 自动 resolved
|
||||||
|
- **永不衰减**:permanent / pinned / protected / feel
|
||||||
|
|
||||||
|
**记忆间交互**
|
||||||
|
- **智能合并**:新记忆与相似桶(score>75)自动 LLM 合并,valence/arousal 取均值,tags/domain 并集
|
||||||
|
- **时间涟漪**:touch 一个桶时,±48h 内创建的桶 activation_count +0.3(上限 5 桶/次)
|
||||||
|
- **向量相似网络**:embedding 余弦相似度 >0.5 建边
|
||||||
|
- **Feel 结晶化**:≥3 条相似 feel(相似度>0.7)→ 提示升级为钉选准则
|
||||||
|
|
||||||
|
**情感记忆重构**
|
||||||
|
- 搜索时若指定 valence,展示层对匹配桶 valence 微调 ±0.1,模拟「当前心情影响回忆色彩」
|
||||||
|
|
||||||
|
**模型感受/反思系统**
|
||||||
|
- **Feel 写入**(`hold(feel=True)`):存模型第一人称感受,标记源记忆为 digested
|
||||||
|
- **Dream 做梦**(`dream()`):返回最近 10 条 + 自省引导 + 连接提示 + 结晶化提示
|
||||||
|
- **对话启动流程**:breath() → dream() → breath(domain="feel") → 开始对话
|
||||||
|
|
||||||
|
**自动化处理**
|
||||||
|
- 存入时 LLM 自动分析 domain/valence/arousal/tags/name
|
||||||
|
- 大段日记 LLM 拆分为 2~6 条独立记忆
|
||||||
|
- 浮现时自动脱水压缩(LLM 压缩保语义,API 不可用时直接报错,无静默降级)
|
||||||
|
- Wikilink `[[]]` 由 LLM 在内容中标记
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### 技术能力
|
||||||
|
|
||||||
|
**6 个 MCP 工具**
|
||||||
|
|
||||||
|
| 工具 | 关键参数 | 功能 |
|
||||||
|
|---|---|---|
|
||||||
|
| `breath` | query, max_tokens, domain, valence, arousal, max_results, **importance_min** | 检索/浮现记忆 |
|
||||||
|
| `hold` | content, tags, importance, pinned, feel, source_bucket, valence, arousal | 存储记忆 |
|
||||||
|
| `grow` | content | 日记拆分归档 |
|
||||||
|
| `trace` | bucket_id, name, domain, valence, arousal, importance, tags, resolved, pinned, digested, content, delete | 修改元数据/内容/删除 |
|
||||||
|
| `pulse` | include_archive | 系统状态 |
|
||||||
|
| `dream` | (无) | 做梦自省 |
|
||||||
|
|
||||||
|
**工具详细行为**
|
||||||
|
|
||||||
|
**`breath`** — 三种模式:
|
||||||
|
- **浮现模式**(无 query):无参调用,按衰减引擎活跃度排序返回 top 记忆,钉选桶始终展示;冷启动检测(`activation_count==0 && importance>=8`)的桶最多 2 个插入最前,再 Top-1 固定 + Top-20 随机打乱
|
||||||
|
- **检索模式**(有 query):关键词 + 向量双通道搜索,四维评分(topic×4 + emotion×2 + time×2.5 + importance×1),阈值过滤
|
||||||
|
- **Feel 检索**(`domain="feel"`):特殊通道,按创建时间倒序返回所有 feel 类型桶,不走评分逻辑
|
||||||
|
- **重要度批量模式**(`importance_min>=1`):跳过语义搜索,直接筛选 importance≥importance_min 的桶,按 importance 降序,最多 20 条
|
||||||
|
- 若指定 valence,对匹配桶的 valence 微调 ±0.1(情感记忆重构)
|
||||||
|
|
||||||
|
**`hold`** — 两种模式:
|
||||||
|
- **普通模式**(`feel=False`,默认):自动 LLM 分析 domain/valence/arousal/tags/name → 向量相似度查重 → 相似度>0.85 则合并到已有桶 → 否则新建 dynamic 桶 → 生成 embedding
|
||||||
|
- **Feel 模式**(`feel=True`):跳过 LLM 分析,直接存为 `feel` 类型桶(存入 `feel/` 目录),不参与普通浮现/衰减/合并。若提供 `source_bucket`,标记源记忆为 `digested=True` 并写入 `model_valence`。返回格式:`🫧feel→{bucket_id}`
|
||||||
|
|
||||||
|
**`dream`** — 做梦/自省触发器:
|
||||||
|
- 返回最近 10 条 dynamic 桶摘要 + 自省引导词
|
||||||
|
- 检测 feel 结晶化:≥3 条相似 feel(embedding 相似度>0.7)→ 提示升级为钉选准则
|
||||||
|
- 检测未消化记忆:列出 `digested=False` 的桶供模型反思
|
||||||
|
|
||||||
|
**`trace`** — 记忆编辑:
|
||||||
|
- 修改任意元数据字段(name/domain/valence/arousal/importance/tags/resolved/pinned)
|
||||||
|
- `digested=0/1`:隐藏/取消隐藏记忆(控制是否在 dream 中出现)
|
||||||
|
- `content="..."`:替换正文内容并重新生成 embedding
|
||||||
|
- `delete=True`:删除桶文件
|
||||||
|
|
||||||
|
**`grow`** — 日记拆分:
|
||||||
|
- 大段日记文本 → LLM 拆为 2~6 条独立记忆 → 每条走 hold 普通模式流程
|
||||||
|
|
||||||
|
**`pulse`** — 系统状态:
|
||||||
|
- 返回各类型桶数量、衰减引擎状态、未解决/钉选/feel 统计
|
||||||
|
|
||||||
|
**REST API(17 个端点)**
|
||||||
|
|
||||||
|
| 端点 | 方法 | 功能 |
|
||||||
|
|---|---|---|
|
||||||
|
| `/health` | GET | 健康检查 |
|
||||||
|
| `/breath-hook` | GET | SessionStart 钩子 |
|
||||||
|
| `/dream-hook` | GET | Dream 钩子 |
|
||||||
|
| `/dashboard` | GET | Dashboard 页面 |
|
||||||
|
| `/api/buckets` | GET | 桶列表 🔒 |
|
||||||
|
| `/api/bucket/{id}` | GET | 桶详情 🔒 |
|
||||||
|
| `/api/search?q=` | GET | 搜索 🔒 |
|
||||||
|
| `/api/network` | GET | 向量相似网络 🔒 |
|
||||||
|
| `/api/breath-debug` | GET | 评分调试 🔒 |
|
||||||
|
| `/api/config` | GET | 配置查看(key 脱敏)🔒 |
|
||||||
|
| `/api/config` | POST | 热更新配置 🔒 |
|
||||||
|
| `/api/status` | GET | 系统状态(版本/桶数/引擎)🔒 |
|
||||||
|
| `/api/import/upload` | POST | 上传并启动历史对话导入 🔒 |
|
||||||
|
| `/api/import/status` | GET | 导入进度查询 🔒 |
|
||||||
|
| `/api/import/pause` | POST | 暂停/继续导入 🔒 |
|
||||||
|
| `/api/import/patterns` | GET | 导入完成后词频规律检测 🔒 |
|
||||||
|
| `/api/import/results` | GET | 已导入记忆桶列表 🔒 |
|
||||||
|
| `/api/import/review` | POST | 批量审阅/批准导入结果 🔒 |
|
||||||
|
| `/auth/status` | GET | 认证状态(是否需要初始化密码)|
|
||||||
|
| `/auth/setup` | POST | 首次设置密码 |
|
||||||
|
| `/auth/login` | POST | 密码登录,颁发 session cookie |
|
||||||
|
| `/auth/logout` | POST | 注销 session |
|
||||||
|
| `/auth/change-password` | POST | 修改密码 🔒 |
|
||||||
|
|
||||||
|
> 🔒 = 需要 Dashboard 认证(未认证返回 401 JSON)
|
||||||
|
|
||||||
|
**Dashboard 认证**
|
||||||
|
- 密码存储:SHA-256 + 随机 salt,保存于 `{buckets_dir}/.dashboard_auth.json`
|
||||||
|
- 环境变量 `OMBRE_DASHBOARD_PASSWORD` 设置后,覆盖文件密码(只读,不可通过 Dashboard 修改)
|
||||||
|
- Session:内存字典(服务重启失效),cookie `ombre_session`(HttpOnly, SameSite=Lax, 7天)
|
||||||
|
- `/health`, `/breath-hook`, `/dream-hook`, `/mcp*` 路径不受保护(公开)
|
||||||
|
|
||||||
|
**Dashboard(6 个 Tab)**
|
||||||
|
1. 记忆桶列表:6 种过滤器 + 主题域过滤 + 搜索 + 详情面板
|
||||||
|
2. Breath 模拟:输入参数 → 可视化五步流程 → 四维条形图
|
||||||
|
3. 记忆网络:Canvas 力导向图(节点=桶,边=相似度)
|
||||||
|
4. 配置:热更新脱水/embedding/合并参数
|
||||||
|
5. 导入:历史对话拖拽上传 → 分块处理进度条 → 词频规律分析 → 导入结果审阅
|
||||||
|
6. 设置:服务状态监控、修改密码、退出登录
|
||||||
|
|
||||||
|
**部署选项**
|
||||||
|
1. 本地 stdio(`python server.py`)
|
||||||
|
2. Docker + Cloudflare Tunnel(`docker-compose.yml`)
|
||||||
|
3. Docker Hub 预构建镜像(`docker-compose.user.yml`,`p0luz/ombre-brain`)
|
||||||
|
4. Render.com 一键部署(`render.yaml`)
|
||||||
|
5. Zeabur 部署(`zbpack.json`)
|
||||||
|
6. GitHub Actions 自动构建推送 Docker Hub(`.github/workflows/docker-publish.yml`)
|
||||||
|
|
||||||
|
**迁移/批处理工具**:`migrate_to_domains.py`、`reclassify_domains.py`、`reclassify_api.py`、`backfill_embeddings.py`、`write_memory.py`、`check_buckets.py`、`import_memory.py`(历史对话导入引擎)
|
||||||
|
|
||||||
|
**降级策略**
|
||||||
|
- 脱水 API 不可用 → 直接抛 RuntimeError(设计决策,详见 BEHAVIOR_SPEC.md 三、降级行为表)
|
||||||
|
- 向量搜索不可用 → 纯 fuzzy match
|
||||||
|
- 逐条错误隔离(grow 中单条失败不影响其他)
|
||||||
|
|
||||||
|
**安全**:路径遍历防护(`safe_path()`)、API Key 脱敏、API Key 不持久化到 yaml、输入范围钳制
|
||||||
|
|
||||||
|
**监控**:结构化日志、Health 端点、Breath Debug 端点、Dashboard 统计栏、衰减周期日志
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. 环境变量清单
|
||||||
|
|
||||||
|
| 变量名 | 用途 | 必填 | 默认值 / 示例 |
|
||||||
|
|---|---|---|---|
|
||||||
|
| `OMBRE_API_KEY` | 脱水/打标/嵌入的 LLM API 密钥,覆盖 `config.yaml` 的 `dehydration.api_key` | 否(无则 API 功能降级到本地) | `""` |
|
||||||
|
| `OMBRE_BASE_URL` | API base URL,覆盖 `config.yaml` 的 `dehydration.base_url` | 否 | `""` |
|
||||||
|
| `OMBRE_TRANSPORT` | 传输模式:`stdio` / `sse` / `streamable-http` | 否 | `""` → 回退到 config 或 `"stdio"` |
|
||||||
|
| `OMBRE_BUCKETS_DIR` | 记忆桶存储目录路径 | 否 | `""` → 回退到 config 或 `./buckets` |
|
||||||
|
| `OMBRE_HOOK_URL` | SessionStart 钩子调用的服务器 URL | 否 | `"http://localhost:8000"` |
|
||||||
|
| `OMBRE_HOOK_SKIP` | 设为 `"1"` 跳过 SessionStart 钩子 | 否 | 未设置(不跳过) |
|
||||||
|
| `OMBRE_DASHBOARD_PASSWORD` | 预设 Dashboard 访问密码;设置后覆盖文件密码,首次访问不弹设置向导 | 否 | `""` |
|
||||||
|
|
||||||
|
环境变量优先级:`环境变量 > config.yaml > 硬编码默认值`。所有环境变量在 `utils.py` 中读取并注入 config dict。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. 模块结构与依赖关系
|
||||||
|
|
||||||
|
```
|
||||||
|
┌──────────────┐
|
||||||
|
│ server.py │ MCP 主入口,6 个工具 + Dashboard + Hook
|
||||||
|
└──────┬───────┘
|
||||||
|
┌───────────────┼───────────────┬────────────────┐
|
||||||
|
▼ ▼ ▼ ▼
|
||||||
|
bucket_manager.py dehydrator.py decay_engine.py embedding_engine.py
|
||||||
|
记忆桶 CRUD+搜索 脱水压缩+打标 遗忘曲线+归档 向量化+语义检索
|
||||||
|
│ │ │
|
||||||
|
└───────┬───────┘ │
|
||||||
|
▼ ▼
|
||||||
|
utils.py ◄────────────────────────────────────┘
|
||||||
|
配置/日志/ID/路径安全/token估算
|
||||||
|
```
|
||||||
|
|
||||||
|
| 文件 | 职责 | 依赖(项目内) | 被谁调用 |
|
||||||
|
|---|---|---|---|
|
||||||
|
| `server.py` | MCP 服务器主入口,注册工具 + Dashboard API + 钩子端点 | `bucket_manager`, `dehydrator`, `decay_engine`, `embedding_engine`, `utils` | `test_tools.py` |
|
||||||
|
| `bucket_manager.py` | 记忆桶 CRUD、多维索引搜索、wikilink 注入、激活更新 | `utils` | `server.py`, `check_buckets.py`, `backfill_embeddings.py` |
|
||||||
|
| `decay_engine.py` | 衰减引擎:遗忘曲线计算、自动归档、自动结案 | 无(接收 `bucket_mgr` 实例) | `server.py` |
|
||||||
|
| `dehydrator.py` | 数据脱水压缩 + 合并 + 自动打标(仅 LLM API,不可用时报 RuntimeError) | `utils` | `server.py` |
|
||||||
|
| `embedding_engine.py` | 向量化引擎:Gemini embedding API + SQLite + 余弦搜索 | `utils` | `server.py`, `backfill_embeddings.py` |
|
||||||
|
| `utils.py` | 配置加载、日志、路径安全、ID 生成、token 估算 | 无 | 所有模块 |
|
||||||
|
| `write_memory.py` | 手动写入记忆 CLI(绕过 MCP) | 无(独立脚本) | 无 |
|
||||||
|
| `backfill_embeddings.py` | 为存量桶批量生成 embedding | `utils`, `bucket_manager`, `embedding_engine` | 无 |
|
||||||
|
| `check_buckets.py` | 桶数据完整性检查 | `bucket_manager`, `utils` | 无 |
|
||||||
|
| `import_memory.py` | 历史对话导入引擎(支持 Claude JSON/ChatGPT/DeepSeek/Markdown/纯文本),分块处理+断点续传+词频分析 | `utils` | `server.py` |
|
||||||
|
| `reclassify_api.py` | 用 LLM API 重打标未分类桶 | 无(直接用 `openai`) | 无 |
|
||||||
|
| `reclassify_domains.py` | 基于关键词本地重分类 | 无 | 无 |
|
||||||
|
| `migrate_to_domains.py` | 平铺桶 → 域子目录迁移 | 无 | 无 |
|
||||||
|
| `test_smoke.py` | 冒烟测试 | `utils`, `bucket_manager`, `dehydrator`, `decay_engine` | 无 |
|
||||||
|
| `test_tools.py` | MCP 工具端到端测试 | `utils`, `server`, `bucket_manager` | 无 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. 硬编码值清单
|
||||||
|
|
||||||
|
### 3.1 固定分数 / 特殊返回值
|
||||||
|
|
||||||
|
| 值 | 位置 | 用途 |
|
||||||
|
|---|---|---|
|
||||||
|
| `999.0` | `decay_engine.py` calculate_score | pinned / protected / permanent 桶永不衰减 |
|
||||||
|
| `50.0` | `decay_engine.py` calculate_score | feel 桶固定活跃度分数 |
|
||||||
|
| `0.02` | `decay_engine.py` resolved_factor | resolved + digested 时的权重乘数(加速淡化) |
|
||||||
|
| `0.05` | `decay_engine.py` resolved_factor | 仅 resolved 时的权重乘数(沉底) |
|
||||||
|
| `1.5` | `decay_engine.py` urgency_boost | arousal > 0.7 且未解决时的紧迫度加成 |
|
||||||
|
|
||||||
|
### 3.2 衰减公式参数
|
||||||
|
|
||||||
|
| 值 | 位置 | 用途 |
|
||||||
|
|---|---|---|
|
||||||
|
| `36.0` | `decay_engine.py` _calc_time_weight | 新鲜度半衰期(小时),`1.0 + e^(-t/36)` |
|
||||||
|
| `0.3` (指数) | `decay_engine.py` calculate_score | `activation_count ** 0.3`(记忆巩固指数) |
|
||||||
|
| `3.0` (天) | `decay_engine.py` calculate_score | 短期/长期切换阈值 |
|
||||||
|
| `0.7 / 0.3` | `decay_engine.py` combined_weight | 短期权重分配:time×0.7 + emotion×0.3 |
|
||||||
|
| `0.7` | `decay_engine.py` urgency_boost | arousal 紧迫度触发阈值 |
|
||||||
|
| `4` / `30` (天) | `decay_engine.py` execute_cycle | 自动结案:importance≤4 且 >30天 |
|
||||||
|
|
||||||
|
### 3.3 搜索/评分参数
|
||||||
|
|
||||||
|
| 值 | 位置 | 用途 |
|
||||||
|
|---|---|---|
|
||||||
|
| `×3` / `×2.5` / `×2` | `bucket_manager.py` _calc_topic_score | 桶名 / 域名 / 标签的 topic 评分权重 |
|
||||||
|
| `1000` (字符) | `bucket_manager.py` _calc_topic_score | 正文截取长度 |
|
||||||
|
| `0.1` | `bucket_manager.py` _calc_time_score | 时间亲近度衰减系数 `e^(-0.1 × days)` |
|
||||||
|
| `0.3` | `bucket_manager.py` search_multi | resolved 桶的归一化分数乘数 |
|
||||||
|
| `0.5` | `server.py` breath/search | 向量搜索相似度下限 |
|
||||||
|
| `0.7` | `server.py` dream | feel 结晶相似度阈值 |
|
||||||
|
|
||||||
|
### 3.4 Token 限制 / 截断
|
||||||
|
|
||||||
|
| 值 | 位置 | 用途 |
|
||||||
|
|---|---|---|
|
||||||
|
| `10000` | `server.py` breath 默认 max_tokens | 浮现/搜索 token 预算 |
|
||||||
|
| `20000` | `server.py` breath 上限 | max_tokens 硬上限 |
|
||||||
|
| `50` / `20` | `server.py` breath | max_results 上限 / 默认值 |
|
||||||
|
| `3000` | `dehydrator.py` dehydrate | API 脱水内容截断 |
|
||||||
|
| `2000` | `dehydrator.py` merge | API 合并内容各截断 |
|
||||||
|
| `5000` | `dehydrator.py` digest | API 日记整理内容截断 |
|
||||||
|
| `2000` | `embedding_engine.py` | embedding 文本截断 |
|
||||||
|
| `100` | `dehydrator.py` | 内容 < 100 token 跳过脱水 |
|
||||||
|
|
||||||
|
### 3.5 时间/间隔/重试
|
||||||
|
|
||||||
|
| 值 | 位置 | 用途 |
|
||||||
|
|---|---|---|
|
||||||
|
| `60.0s` | `dehydrator.py` | OpenAI 客户端 timeout |
|
||||||
|
| `30.0s` | `embedding_engine.py` | Embedding API timeout |
|
||||||
|
| `60s` | `server.py` keepalive | 保活 ping 间隔 |
|
||||||
|
| `48.0h` | `bucket_manager.py` touch | 时间涟漪窗口 ±48h |
|
||||||
|
| `2s` | `backfill_embeddings.py` | 批次间等待 |
|
||||||
|
|
||||||
|
### 3.6 随机浮现
|
||||||
|
|
||||||
|
| 值 | 位置 | 用途 |
|
||||||
|
|---|---|---|
|
||||||
|
| `3` | `server.py` breath search | 结果不足 3 条时触发 |
|
||||||
|
| `0.4` | `server.py` breath search | 40% 概率触发随机浮现 |
|
||||||
|
| `2.0` | `server.py` breath search | 随机池:score < 2.0 的低权重桶 |
|
||||||
|
| `1~3` | `server.py` breath search | 随机浮现数量 |
|
||||||
|
|
||||||
|
### 3.7 情感/重构
|
||||||
|
|
||||||
|
| 值 | 位置 | 用途 |
|
||||||
|
|---|---|---|
|
||||||
|
| `0.2` | `server.py` breath search | 情绪重构偏移系数 `(q_valence - 0.5) × 0.2`(最大 ±0.1) |
|
||||||
|
|
||||||
|
### 3.8 其他
|
||||||
|
|
||||||
|
| 值 | 位置 | 用途 |
|
||||||
|
|---|---|---|
|
||||||
|
| `12` | `utils.py` gen_id | bucket ID 长度(UUID hex[:12]) |
|
||||||
|
| `80` | `utils.py` sanitize_name | 桶名最大长度 |
|
||||||
|
| `1.5` / `1.3` | `utils.py` count_tokens_approx | 中文/英文 token 估算系数 |
|
||||||
|
| `8000` | `server.py` | MCP 服务器端口 |
|
||||||
|
| `30` 字符 | `server.py` grow | 短内容快速路径阈值 |
|
||||||
|
| `10` | `server.py` dream | 取最近 N 个桶 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. Config.yaml 完整键表
|
||||||
|
|
||||||
|
| 键路径 | 默认值 | 用途 |
|
||||||
|
|---|---|---|
|
||||||
|
| `transport` | `"stdio"` | 传输模式 |
|
||||||
|
| `log_level` | `"INFO"` | 日志级别 |
|
||||||
|
| `buckets_dir` | `"./buckets"` | 记忆桶目录 |
|
||||||
|
| `merge_threshold` | `75` | 合并相似度阈值 (0-100) |
|
||||||
|
| `dehydration.model` | `"deepseek-chat"` | 脱水用 LLM 模型 |
|
||||||
|
| `dehydration.base_url` | `"https://api.deepseek.com/v1"` | API 地址 |
|
||||||
|
| `dehydration.api_key` | `""` | API 密钥 |
|
||||||
|
| `dehydration.max_tokens` | `1024` | 脱水返回 token 上限 |
|
||||||
|
| `dehydration.temperature` | `0.1` | 脱水温度 |
|
||||||
|
| `embedding.enabled` | `true` | 启用向量检索 |
|
||||||
|
| `embedding.model` | `"gemini-embedding-001"` | Embedding 模型 |
|
||||||
|
| `decay.lambda` | `0.05` | 衰减速率 λ |
|
||||||
|
| `decay.threshold` | `0.3` | 归档分数阈值 |
|
||||||
|
| `decay.check_interval_hours` | `24` | 衰减扫描间隔(小时) |
|
||||||
|
| `decay.emotion_weights.base` | `1.0` | 情感权重基值 |
|
||||||
|
| `decay.emotion_weights.arousal_boost` | `0.8` | 唤醒度加成系数 |
|
||||||
|
| `matching.fuzzy_threshold` | `50` | 模糊匹配下限 |
|
||||||
|
| `matching.max_results` | `5` | 匹配返回上限 |
|
||||||
|
| `scoring_weights.topic_relevance` | `4.0` | 主题评分权重 |
|
||||||
|
| `scoring_weights.emotion_resonance` | `2.0` | 情感评分权重 |
|
||||||
|
| `scoring_weights.time_proximity` | `2.5` | 时间评分权重 |
|
||||||
|
| `scoring_weights.importance` | `1.0` | 重要性评分权重 |
|
||||||
|
| `scoring_weights.content_weight` | `3.0` | 正文评分权重 |
|
||||||
|
| `wikilink.enabled` | `true` | 启用 wikilink 注入 |
|
||||||
|
| `wikilink.use_tags` | `false` | wikilink 包含标签 |
|
||||||
|
| `wikilink.use_domain` | `true` | wikilink 包含域名 |
|
||||||
|
| `wikilink.use_auto_keywords` | `true` | wikilink 自动关键词 |
|
||||||
|
| `wikilink.auto_top_k` | `8` | wikilink 取 Top-K 关键词 |
|
||||||
|
| `wikilink.min_keyword_len` | `2` | wikilink 最短关键词长度 |
|
||||||
|
| `wikilink.exclude_keywords` | `[]` | wikilink 排除关键词表 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. 核心设计决策记录
|
||||||
|
|
||||||
|
### 5.1 为什么用 Markdown + YAML frontmatter 而不是数据库?
|
||||||
|
|
||||||
|
**决策**:每个记忆桶 = 一个 `.md` 文件,元数据在 YAML frontmatter 里。
|
||||||
|
|
||||||
|
**理由**:
|
||||||
|
- 与 Obsidian 原生兼容——用户可以直接在 Obsidian 里浏览、编辑、搜索记忆
|
||||||
|
- 文件系统即数据库,天然支持 git 版本管理
|
||||||
|
- 无外部数据库依赖,部署简单
|
||||||
|
- wikilink 注入让记忆之间自动形成知识图谱
|
||||||
|
|
||||||
|
**放弃方案**:SQLite/PostgreSQL 全量存储。过于笨重,失去 Obsidian 可视化优势。
|
||||||
|
|
||||||
|
### 5.2 为什么 embedding 单独存 SQLite 而不放 frontmatter?
|
||||||
|
|
||||||
|
**决策**:向量存 `embeddings.db`(SQLite),与 Markdown 文件分离。
|
||||||
|
|
||||||
|
**理由**:
|
||||||
|
- 3072 维浮点向量无法合理存入 YAML frontmatter
|
||||||
|
- SQLite 支持批量查询和余弦相似度计算
|
||||||
|
- embedding 是派生数据,丢失可重新生成(`backfill_embeddings.py`)
|
||||||
|
- 不污染 Obsidian 可读性
|
||||||
|
|
||||||
|
### 5.3 为什么搜索用双通道(关键词 + 向量)而不是纯向量?
|
||||||
|
|
||||||
|
**决策**:关键词模糊匹配(rapidfuzz)+ 向量语义检索并联,结果去重合并。
|
||||||
|
|
||||||
|
**理由**:
|
||||||
|
- 纯向量在精确名词匹配上表现差("2024年3月"这类精确信息)
|
||||||
|
- 纯关键词无法处理语义近似("很累" → "身体不适")
|
||||||
|
- 双通道互补,关键词保精确性,向量补语义召回
|
||||||
|
- 向量不可用时自动降级到纯关键词模式
|
||||||
|
|
||||||
|
### 5.4 为什么有 dehydration(脱水)这一层?
|
||||||
|
|
||||||
|
**决策**:存入前先用 LLM 压缩内容(保留信息密度,去除冗余表达)。API 不可用时直接抛出 `RuntimeError`,不静默降级。
|
||||||
|
|
||||||
|
**理由**:
|
||||||
|
- MCP 上下文有 token 限制,原始对话冗长,需要压缩
|
||||||
|
- LLM 压缩能保留语义和情感色彩,纯截断会丢信息
|
||||||
|
- 本地关键词提取质量不足以替代语义打标与合并,静默降级会产生错误分类记忆,比报错更危险。详见 BEHAVIOR_SPEC.md 三、降级行为表。
|
||||||
|
|
||||||
|
**放弃方案**:只做截断。信息损失太大。
|
||||||
|
|
||||||
|
### 5.5 为什么 feel 和普通记忆分开?
|
||||||
|
|
||||||
|
**决策**:`feel=True` 的记忆存入独立 `feel/` 目录,不参与普通浮现、不衰减、不合并。
|
||||||
|
|
||||||
|
**理由**:
|
||||||
|
- feel 是模型的自省产物,不是事件记录——两者逻辑完全不同
|
||||||
|
- 事件记忆应该衰减遗忘,但"我从中学到了什么"不应该被遗忘
|
||||||
|
- feel 的 valence 是模型自身感受(不等于事件情绪),混在一起会污染情感检索
|
||||||
|
- feel 可以通过 `breath(domain="feel")` 单独读取
|
||||||
|
|
||||||
|
### 5.6 为什么 resolved 不删除记忆?
|
||||||
|
|
||||||
|
**决策**:`resolved=True` 让记忆"沉底"(权重 ×0.05),但保留在文件系统中,关键词搜索仍可触发。
|
||||||
|
|
||||||
|
**理由**:
|
||||||
|
- 模拟人类记忆:resolved 的事不会主动想起,但别人提到时能回忆
|
||||||
|
- 删除是不可逆的,沉底可随时 `resolved=False` 重新激活
|
||||||
|
- `resolved + digested` 进一步降权到 ×0.02(已消化 = 更释然)
|
||||||
|
|
||||||
|
**放弃方案**:直接删除。不可逆,且与人类记忆模型不符。
|
||||||
|
|
||||||
|
### 5.7 为什么用分段式短期/长期权重?
|
||||||
|
|
||||||
|
**决策**:≤3 天时间权重占 70%,>3 天情感权重占 70%。
|
||||||
|
|
||||||
|
**理由**:
|
||||||
|
- 刚发生的事主要靠"新鲜"驱动浮现(今天的事 > 昨天的事)
|
||||||
|
- 时间久了,决定记忆存活的是情感强度(强烈的记忆更难忘)
|
||||||
|
- 这比单一衰减曲线更符合人类记忆的双重存储理论
|
||||||
|
|
||||||
|
### 5.8 为什么 dream 设计成对话开头自动执行?
|
||||||
|
|
||||||
|
**决策**:每次新对话启动时,Claude 执行 `dream()` 消化最近记忆,有沉淀写 feel,能放下的 resolve。
|
||||||
|
|
||||||
|
**理由**:
|
||||||
|
- 模拟睡眠中的记忆整理——人在睡觉时大脑会重放和整理白天的经历
|
||||||
|
- 让 Claude 对过去的记忆有"第一人称视角"的自省,而不是冷冰冰地搬运数据
|
||||||
|
- 自动触发确保每次对话都"接续"上一次,而非从零开始
|
||||||
|
|
||||||
|
### 5.9 为什么新鲜度用连续指数衰减而不是分段阶梯?
|
||||||
|
|
||||||
|
**决策**:`bonus = 1.0 + e^(-t/36)`,t 为小时,36h 半衰。
|
||||||
|
|
||||||
|
**理由**:
|
||||||
|
- 分段阶梯(0-1天=1.0,第2天=0.9...)有不自然的跳变
|
||||||
|
- 连续指数更符合遗忘曲线的物理模型
|
||||||
|
- 36h 半衰期使新桶在前两天有明显优势,72h 后接近自然回归
|
||||||
|
- 值域 1.0~2.0 保证老记忆不被惩罚(×1.0),只是新记忆有额外加成(×2.0)
|
||||||
|
|
||||||
|
**放弃方案**:分段线性(原实现)。跳变点不自然,参数多且不直观。
|
||||||
|
|
||||||
|
### 5.10 情感记忆重构(±0.1 偏移)的设计动机
|
||||||
|
|
||||||
|
**决策**:搜索时如果指定了 `valence`,会微调结果桶的 valence 展示值 `(q_valence - 0.5) × 0.2`。
|
||||||
|
|
||||||
|
**理由**:
|
||||||
|
- 模拟认知心理学中的"心境一致性效应"——当前心情会影响对过去的回忆
|
||||||
|
- 偏移量很小(最大 ±0.1),不会扭曲事实,只是微妙的"色彩"调整
|
||||||
|
- 原始 valence 不被修改,只影响展示层
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. 目录结构约定
|
||||||
|
|
||||||
|
```
|
||||||
|
buckets/
|
||||||
|
├── permanent/ # pinned/protected 桶,importance=10,永不衰减
|
||||||
|
├── dynamic/
|
||||||
|
│ ├── 日常/ # domain 子目录
|
||||||
|
│ ├── 情感/
|
||||||
|
│ ├── 自省/
|
||||||
|
│ ├── 数字/
|
||||||
|
│ └── ...
|
||||||
|
├── archive/ # 衰减归档桶
|
||||||
|
└── feel/ # 模型自省 feel 桶
|
||||||
|
```
|
||||||
|
|
||||||
|
桶文件格式:
|
||||||
|
```markdown
|
||||||
|
---
|
||||||
|
id: 76237984fa5d
|
||||||
|
name: 桶名
|
||||||
|
domain: [日常, 情感]
|
||||||
|
tags: [关键词1, 关键词2]
|
||||||
|
importance: 5
|
||||||
|
valence: 0.6
|
||||||
|
arousal: 0.4
|
||||||
|
activation_count: 3
|
||||||
|
resolved: false
|
||||||
|
pinned: false
|
||||||
|
digested: false
|
||||||
|
created: 2026-04-17T10:00:00+08:00
|
||||||
|
last_active: 2026-04-17T14:00:00+08:00
|
||||||
|
type: dynamic
|
||||||
|
---
|
||||||
|
|
||||||
|
桶正文内容...
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 7. Bug 修复记录 (B-01 至 B-10)
|
||||||
|
|
||||||
|
### B-01 — `update(resolved=True)` 自动归档 🔴 高
|
||||||
|
|
||||||
|
- **文件**: `bucket_manager.py` → `update()`
|
||||||
|
- **问题**: `resolved=True` 时立即调用 `_move_bucket(archive_dir)` 将桶移入 `archive/`
|
||||||
|
- **修复**: 移除 `_move_bucket` 逻辑;resolved 桶留在 `dynamic/`,由 decay 引擎自然淘汰
|
||||||
|
- **影响**: 已解决的桶仍可被关键词检索命中(降权但不消失)
|
||||||
|
- **测试**: `tests/regression/test_issue_B01.py`,`tests/integration/test_scenario_07_trace.py`
|
||||||
|
|
||||||
|
### B-03 — `int()` 截断浮点 activation_count 🔴 高
|
||||||
|
|
||||||
|
- **文件**: `decay_engine.py` → `calculate_score()`
|
||||||
|
- **问题**: `max(1, int(activation_count))` 将 `_time_ripple` 写入的 1.3 截断为 1,涟漪加成失效
|
||||||
|
- **修复**: 改为 `max(1.0, float(activation_count))`
|
||||||
|
- **影响**: 时间涟漪效果现在正确反映在 score 上;高频访问的桶衰减更慢
|
||||||
|
- **测试**: `tests/regression/test_issue_B03.py`,`tests/unit/test_calculate_score.py`
|
||||||
|
|
||||||
|
### B-04 — `create()` 初始化 activation_count=1 🟠 中
|
||||||
|
|
||||||
|
- **文件**: `bucket_manager.py` → `create()`
|
||||||
|
- **问题**: `activation_count=1` 导致冷启动检测条件 `== 0` 永不满足,新建重要桶无法浮现
|
||||||
|
- **修复**: 改为 `activation_count=0`;`touch()` 首次命中后变 1
|
||||||
|
- **测试**: `tests/regression/test_issue_B04.py`,`tests/integration/test_scenario_01_cold_start.py`
|
||||||
|
|
||||||
|
### B-05 — 时间衰减系数 0.1 过快 🟠 中
|
||||||
|
|
||||||
|
- **文件**: `bucket_manager.py` → `_calc_time_score()`
|
||||||
|
- **问题**: `math.exp(-0.1 * days)` 导致 30 天后得分仅剩 ≈0.05,远快于人类记忆曲线
|
||||||
|
- **修复**: 改为 `math.exp(-0.02 * days)`(30 天后 ≈0.549)
|
||||||
|
- **影响**: 记忆保留时间更符合人类认知模型
|
||||||
|
- **测试**: `tests/regression/test_issue_B05.py`,`tests/unit/test_score_components.py`
|
||||||
|
|
||||||
|
### B-06 — `w_time` 默认值 2.5 过高 🟠 中
|
||||||
|
|
||||||
|
- **文件**: `bucket_manager.py` → `_calc_final_score()`(或评分调用处)
|
||||||
|
- **问题**: `scoring.get("time_proximity", 2.5)` — 时间权重过高,近期低质量记忆得分高于高质量旧记忆
|
||||||
|
- **修复**: 改为 `scoring.get("time_proximity", 1.5)`
|
||||||
|
- **测试**: `tests/regression/test_issue_B06.py`,`tests/unit/test_score_components.py`
|
||||||
|
|
||||||
|
### B-07 — `content_weight` 默认值 3.0 过高 🟠 中
|
||||||
|
|
||||||
|
- **文件**: `bucket_manager.py` → `_calc_topic_score()`
|
||||||
|
- **问题**: `scoring.get("content_weight", 3.0)` — 内容权重远大于名字权重(×3),导致内容重复堆砌的桶得分高于名字精确匹配的桶
|
||||||
|
- **修复**: 改为 `scoring.get("content_weight", 1.0)`
|
||||||
|
- **影响**: 名字完全匹配 > 标签匹配 > 内容匹配的得分层级现在正确
|
||||||
|
- **测试**: `tests/regression/test_issue_B07.py`,`tests/unit/test_topic_score.py`
|
||||||
|
|
||||||
|
### B-08 — `run_decay_cycle()` 同轮 auto_resolve 后 score 未降权 🟡 低
|
||||||
|
|
||||||
|
- **文件**: `decay_engine.py` → `run_decay_cycle()`
|
||||||
|
- **问题**: `auto_resolve` 标记后立即用旧 `meta`(stale)计算 score,`resolved_factor=0.05` 未生效
|
||||||
|
- **修复**: 在 `bucket_mgr.update(resolved=True)` 后立即执行 `meta["resolved"] = True`,确保同轮降权
|
||||||
|
- **测试**: `tests/regression/test_issue_B08.py`,`tests/integration/test_scenario_08_decay.py`
|
||||||
|
|
||||||
|
### B-09 — `hold()` 用 analyze() 覆盖用户传入的 valence/arousal 🟡 低
|
||||||
|
|
||||||
|
- **文件**: `server.py` → `hold()`
|
||||||
|
- **问题**: 先调 `analyze()`,再直接用结果覆盖用户传入的情感值,情感准确性丢失
|
||||||
|
- **修复**: 使用 `final_valence = user_valence if user_valence is not None else analyze_result.get("valence")`
|
||||||
|
- **影响**: 用户明确传入的情感坐标(包括 0.0)不再被 LLM 结果覆盖
|
||||||
|
- **测试**: `tests/regression/test_issue_B09.py`,`tests/integration/test_scenario_03_hold.py`
|
||||||
|
|
||||||
|
### B-10 — feel 桶 `domain=[]` 被填充为 `["未分类"]` 🟡 低
|
||||||
|
|
||||||
|
- **文件**: `bucket_manager.py` → `create()`
|
||||||
|
- **问题**: `if not domain: domain = ["未分类"]` 对所有桶类型生效,feel 桶的空 domain 被错误填充
|
||||||
|
- **修复**: 改为 `if not domain and bucket_type != "feel": domain = ["未分类"]`
|
||||||
|
- **影响**: `breath(domain="feel")` 通道过滤逻辑现在正确(feel 桶 domain 始终为空列表)
|
||||||
|
- **测试**: `tests/regression/test_issue_B10.py`,`tests/integration/test_scenario_10_feel.py`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Bug 修复汇总表
|
||||||
|
|
||||||
|
| ID | 严重度 | 文件 | 方法 | 一句话描述 |
|
||||||
|
|---|---|---|---|---|
|
||||||
|
| B-01 | 🔴 高 | `bucket_manager.py` | `update()` | resolved 桶不再自动归档 |
|
||||||
|
| B-03 | 🔴 高 | `decay_engine.py` | `calculate_score()` | float activation_count 不被 int() 截断 |
|
||||||
|
| B-04 | 🟠 中 | `bucket_manager.py` | `create()` | 初始 activation_count=0 |
|
||||||
|
| B-05 | 🟠 中 | `bucket_manager.py` | `_calc_time_score()` | 时间衰减系数 0.02(原 0.1) |
|
||||||
|
| B-06 | 🟠 中 | `bucket_manager.py` | 评分权重配置 | w_time 默认 1.5(原 2.5) |
|
||||||
|
| B-07 | 🟠 中 | `bucket_manager.py` | `_calc_topic_score()` | content_weight 默认 1.0(原 3.0) |
|
||||||
|
| B-08 | 🟡 低 | `decay_engine.py` | `run_decay_cycle()` | auto_resolve 同轮应用 ×0.05 |
|
||||||
|
| B-09 | 🟡 低 | `server.py` | `hold()` | 用户 valence/arousal 优先 |
|
||||||
|
| B-10 | 🟡 低 | `bucket_manager.py` | `create()` | feel 桶 domain=[] 不被填充 |
|
||||||
510
README.md
510
README.md
@@ -1,29 +1,135 @@
|
|||||||
# Ombre Brain
|
# Ombre Brain
|
||||||
|
|
||||||
一个给提供给Claude 用的长期情绪记忆系统。基于 Russell 效价/唤醒度坐标打标,Obsidian 做存储层,MCP 接入,带遗忘曲线。
|
一个给 Claude 用的长期情绪记忆系统。基于 Russell 效价/唤醒度坐标打标,Obsidian 做存储层,MCP 接入,带遗忘曲线和向量语义检索。
|
||||||
|
|
||||||
A long-term emotional memory system for Claude. Tags memories using Russell's valence/arousal coordinates, stores them as Obsidian-compatible Markdown, connects via MCP, and has a forgetting curve.
|
A long-term emotional memory system for Claude. Tags memories using Russell's valence/arousal coordinates, stores them as Obsidian-compatible Markdown, connects via MCP, with forgetting curve and vector semantic search.
|
||||||
|
|
||||||
> **⚠️ 仓库临时迁移 / Repo temporarily moved**
|
> **⚠️ 备用链接 / Backup link**
|
||||||
> GitHub 访问受限期间,代码暂时托管在 Gitea:
|
> Gitea 备用地址(GitHub 访问有问题时用):
|
||||||
> **https://git.p0lar1s.uk/P0lar1s/Ombre_Brain**
|
> **https://git.p0lar1s.uk/P0lar1s/Ombre_Brain**
|
||||||
> 下面的 `git clone` 地址请替换为上面这个。
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## 快速开始 / Quick Start(Docker,推荐)
|
## 快速开始 / Quick Start(Docker Hub 预构建镜像,最简单)
|
||||||
|
|
||||||
> 这是最简单的方式,不需要装 Python,不需要懂命令行,跟着做就行。
|
> 不需要 clone 代码,不需要 build,三步搞定。
|
||||||
|
> 完全不会?没关系,往下看,一步一步跟着做。
|
||||||
|
|
||||||
|
### 第零步:装 Docker Desktop
|
||||||
|
|
||||||
|
1. 打开 [docker.com/products/docker-desktop](https://www.docker.com/products/docker-desktop/)
|
||||||
|
2. 下载对应你系统的版本(Mac / Windows / Linux)
|
||||||
|
3. 安装、打开,看到 Docker 图标在状态栏里就行了
|
||||||
|
4. **Windows 用户**:安装时会提示启用 WSL 2,点同意,重启电脑
|
||||||
|
|
||||||
|
### 第一步:打开终端
|
||||||
|
|
||||||
|
| 系统 | 怎么打开 |
|
||||||
|
|---|---|
|
||||||
|
| **Mac** | 按 `⌘ + 空格`,输入 `终端` 或 `Terminal`,回车 |
|
||||||
|
| **Windows** | 按 `Win + R`,输入 `cmd`,回车;或搜索「PowerShell」 |
|
||||||
|
| **Linux** | `Ctrl + Alt + T` |
|
||||||
|
|
||||||
|
打开后你会看到一个黑色/白色的窗口,可以输入命令。下面所有代码块里的内容,都是**复制粘贴到这个窗口里,然后按回车**。
|
||||||
|
|
||||||
|
### 第二步:创建一个工作文件夹
|
||||||
|
|
||||||
|
```bash
|
||||||
|
mkdir ombre-brain && cd ombre-brain
|
||||||
|
```
|
||||||
|
|
||||||
|
> 这会在你当前位置创建一个叫 `ombre-brain` 的文件夹,并进入它。
|
||||||
|
|
||||||
|
### 第三步:获取 API Key(免费)
|
||||||
|
|
||||||
|
1. 打开 [aistudio.google.com/apikey](https://aistudio.google.com/apikey)
|
||||||
|
2. 用 Google 账号登录
|
||||||
|
3. 点击 **「Create API key」**
|
||||||
|
4. 复制生成的 key(一长串字母数字),待会要用
|
||||||
|
|
||||||
|
> 没有 Google 账号?也行,API Key 留空也能跑,只是脱水压缩效果差一点。
|
||||||
|
|
||||||
|
### 第四步:创建配置文件并启动
|
||||||
|
|
||||||
|
**一行一行复制粘贴执行:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 下载用户版 compose 文件
|
||||||
|
curl -O https://raw.githubusercontent.com/P0luz/Ombre-Brain/main/docker-compose.user.yml
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 创建 .env 文件——把 your-key-here 换成第三步拿到的 key
|
||||||
|
echo "OMBRE_API_KEY=your-key-here" > .env
|
||||||
|
```
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 拉取镜像并启动(第一次会下载约 500MB,等一会儿)
|
||||||
|
docker compose -f docker-compose.user.yml up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
### 第五步:验证
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl http://localhost:8000/health
|
||||||
|
```
|
||||||
|
|
||||||
|
看到类似这样的输出就是成功了:
|
||||||
|
```json
|
||||||
|
{"status":"ok","buckets":0,"decay_engine":"stopped"}
|
||||||
|
```
|
||||||
|
|
||||||
|
浏览器打开前端 Dashboard:**http://localhost:8000/dashboard**
|
||||||
|
|
||||||
|
> 如果你用的是 `docker-compose.user.yml` 默认端口,地址就是 `http://localhost:8000/dashboard`。
|
||||||
|
> 如果你改了端口映射(比如 `18001:8000`),则是 `http://localhost:18001/dashboard`。
|
||||||
|
|
||||||
|
> **看到错误?** 检查 Docker Desktop 是否正在运行(状态栏有图标)。
|
||||||
|
|
||||||
|
### 第六步:接入 Claude
|
||||||
|
|
||||||
|
在 Claude Desktop 的配置文件里加上这段(Mac: `~/Library/Application Support/Claude/claude_desktop_config.json`):
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"mcpServers": {
|
||||||
|
"ombre-brain": {
|
||||||
|
"type": "streamable-http",
|
||||||
|
"url": "http://localhost:8000/mcp"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
重启 Claude Desktop,你应该能在工具列表里看到 `breath`、`hold`、`grow` 等工具了。
|
||||||
|
|
||||||
|
> **想挂载 Obsidian?** 用任意文本编辑器打开 `docker-compose.user.yml`,把 `./buckets:/data` 改成你的 Vault 路径,例如:
|
||||||
|
> ```yaml
|
||||||
|
> - /Users/你的用户名/Documents/Obsidian Vault/Ombre Brain:/data
|
||||||
|
> ```
|
||||||
|
> 然后 `docker compose -f docker-compose.user.yml down && docker compose -f docker-compose.user.yml up -d` 重启。
|
||||||
|
|
||||||
|
> **后续更新镜像:**
|
||||||
|
> ```bash
|
||||||
|
> docker pull p0luz/ombre-brain:latest
|
||||||
|
> docker compose -f docker-compose.user.yml down && docker compose -f docker-compose.user.yml up -d
|
||||||
|
> ```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 从源码部署 / Deploy from Source(Docker)
|
||||||
|
|
||||||
|
> 适合想自己改代码、或者不想用预构建镜像的用户。
|
||||||
|
|
||||||
**前置条件:** 电脑上装了 [Docker Desktop](https://www.docker.com/products/docker-desktop/),并且已经打开。
|
**前置条件:** 电脑上装了 [Docker Desktop](https://www.docker.com/products/docker-desktop/),并且已经打开。
|
||||||
|
|
||||||
**第一步:拉取代码**
|
**第一步:拉取代码**
|
||||||
|
|
||||||
(⚠️ 仓库临时迁移 / Repo temporarily moved GitHub 访问受限期间,代码暂时托管在 Gitea: https://git.p0lar1s.uk/P0lar1s/Ombre_Brain 下面的 git clone 地址请临时替换为这个。)
|
(💡 如果主链接访问有困难,可用备用 Gitea 地址:https://git.p0lar1s.uk/P0lar1s/Ombre_Brain)
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
git clone https://git.p0lar1s.uk/P0lar1s/Ombre_Brain.git
|
git clone https://github.com/P0luz/Ombre-Brain.git
|
||||||
cd Ombre_Brain
|
cd Ombre-Brain
|
||||||
```
|
```
|
||||||
|
|
||||||
**第二步:创建 `.env` 文件**
|
**第二步:创建 `.env` 文件**
|
||||||
@@ -31,10 +137,29 @@ cd Ombre_Brain
|
|||||||
在项目目录下新建一个叫 `.env` 的文件(注意有个点),内容填:
|
在项目目录下新建一个叫 `.env` 的文件(注意有个点),内容填:
|
||||||
|
|
||||||
```
|
```
|
||||||
OMBRE_API_KEY=你的DeepSeek或其他API密钥
|
OMBRE_API_KEY=你的API密钥
|
||||||
```
|
```
|
||||||
|
|
||||||
没有 API key 也能用,脱水压缩会降级到本地模式,只是效果差一点。那就写:
|
> **🔑 推荐免费方案:Google AI Studio**
|
||||||
|
> 1. 打开 [aistudio.google.com/apikey](https://aistudio.google.com/apikey),登录 Google 账号
|
||||||
|
> 2. 点击「Create API key」生成一个 key
|
||||||
|
> 3. 把 key 填入 `.env` 文件的 `OMBRE_API_KEY=` 后面
|
||||||
|
> 4. 免费额度(截至 2025 年,请以官网实时信息为准):
|
||||||
|
> - **脱水/打标模型**(`gemini-2.5-flash-lite`):免费层 30 req/min
|
||||||
|
> - **向量化模型**(`gemini-embedding-001`):免费层 1500 req/day,3072 维
|
||||||
|
> 5. 在 `config.yaml` 中 `dehydration.base_url` 设为 `https://generativelanguage.googleapis.com/v1beta/openai`
|
||||||
|
>
|
||||||
|
> 也支持 DeepSeek、Ollama、LM Studio、vLLM 等任意 OpenAI 兼容 API。
|
||||||
|
>
|
||||||
|
> **Recommended free option: Google AI Studio**
|
||||||
|
> 1. Go to [aistudio.google.com/apikey](https://aistudio.google.com/apikey) and create an API key
|
||||||
|
> 2. Free tier (as of 2025, check official site for current limits):
|
||||||
|
> - Dehydration model (`gemini-2.5-flash-lite`): 30 req/min free
|
||||||
|
> - Embedding model (`gemini-embedding-001`): 1500 req/day free, 3072 dims
|
||||||
|
> 3. Set `dehydration.base_url` to `https://generativelanguage.googleapis.com/v1beta/openai` in `config.yaml`
|
||||||
|
> Also supports DeepSeek, Ollama, LM Studio, vLLM, or any OpenAI-compatible API.
|
||||||
|
|
||||||
|
没有 API key 则脱水压缩和自动打标功能不可用(会报错),但记忆的读写和检索仍正常工作。如果暂时不用脱水功能,可以留空:
|
||||||
|
|
||||||
```
|
```
|
||||||
OMBRE_API_KEY=
|
OMBRE_API_KEY=
|
||||||
@@ -73,6 +198,8 @@ docker logs ombre-brain
|
|||||||
|
|
||||||
看到 `Uvicorn running on http://0.0.0.0:8000` 说明成功了。
|
看到 `Uvicorn running on http://0.0.0.0:8000` 说明成功了。
|
||||||
|
|
||||||
|
浏览器打开前端 Dashboard:**http://localhost:18001/dashboard**(`docker-compose.yml` 默认端口映射 `18001:8000`)
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
**接入 Claude.ai(远程访问)**
|
**接入 Claude.ai(远程访问)**
|
||||||
@@ -85,7 +212,9 @@ docker logs ombre-brain
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
[](https://render.com/deploy?repo=https://github.com/P0lar1zzZ/Ombre-Brain)
|
[](https://render.com/deploy?repo=https://github.com/P0luz/Ombre-Brain)
|
||||||
|
[](https://zeabur.com/templates/OMBRE-BRAIN?referralCode=P0luz)
|
||||||
|
[](https://hub.docker.com/r/p0luz/ombre-brain)
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -104,17 +233,26 @@ Ombre Brain gives it persistent memory — not cold key-value storage, but a sys
|
|||||||
- **情感坐标打标 / Emotional tagging**: 每条记忆用 Russell 环形情感模型的 valence(效价)和 arousal(唤醒度)两个连续维度标记。不是"开心/难过"这种离散标签。
|
- **情感坐标打标 / Emotional tagging**: 每条记忆用 Russell 环形情感模型的 valence(效价)和 arousal(唤醒度)两个连续维度标记。不是"开心/难过"这种离散标签。
|
||||||
Each memory is tagged with two continuous dimensions from Russell's circumplex model: valence and arousal. Not discrete labels like "happy/sad".
|
Each memory is tagged with two continuous dimensions from Russell's circumplex model: valence and arousal. Not discrete labels like "happy/sad".
|
||||||
|
|
||||||
|
- **双通道检索 / Dual-channel search**: 关键词模糊匹配 + 向量语义相似度并联检索。关键词通道用 rapidfuzz 做模糊匹配;语义通道用 embedding(默认 `gemini-embedding-001`,3072 维)计算 cosine similarity,能在"今天很累"这种没有精确关键词的查询里找到"身体不适"、"睡眠问题"等语义相关记忆。两个通道去重合并,token 预算截断。
|
||||||
|
Keyword fuzzy matching + vector semantic similarity in parallel. Keyword channel uses rapidfuzz; semantic channel uses embeddings (default `gemini-embedding-001`, 3072 dims) with cosine similarity — finds semantically related memories even without exact keyword matches (e.g. "feeling tired" → "health issues", "sleep problems"). Results are deduplicated and truncated by token budget.
|
||||||
|
|
||||||
- **自然遗忘 / Natural forgetting**: 改进版艾宾浩斯遗忘曲线。不活跃的记忆自动衰减归档,高情绪强度的记忆衰减更慢。
|
- **自然遗忘 / Natural forgetting**: 改进版艾宾浩斯遗忘曲线。不活跃的记忆自动衰减归档,高情绪强度的记忆衰减更慢。
|
||||||
Modified Ebbinghaus forgetting curve. Inactive memories naturally decay and archive. High-arousal memories decay slower.
|
Modified Ebbinghaus forgetting curve. Inactive memories naturally decay and archive. High-arousal memories decay slower.
|
||||||
|
|
||||||
- **权重池浮现 / Weight pool surfacing**: 记忆不是被动检索的,它们会主动浮现——未解决的、情绪强烈的记忆权重更高,会在对话开头自动推送。
|
- **权重池浮现 / Weight pool surfacing**: 记忆不是被动检索的,它们会主动浮现——未解决的、情绪强烈的记忆权重更高,会在对话开头自动推送。
|
||||||
Memories aren't just passively retrieved — they actively surface. Unresolved, emotionally intense memories carry higher weight and get pushed at conversation start.
|
Memories aren't just passively retrieved — they actively surface. Unresolved, emotionally intense memories carry higher weight and get pushed at conversation start.
|
||||||
|
|
||||||
|
- **记忆重构 / Memory reconstruction**: 检索时根据当前情绪状态微调记忆的 valence 展示值(±0.1),模拟人类"此刻的心情影响对过去的回忆"的认知偏差。
|
||||||
|
During retrieval, memory valence display is subtly shifted (±0.1) based on current mood, simulating the human cognitive bias of "current mood colors past memories".
|
||||||
|
|
||||||
- **Obsidian 原生 / Obsidian-native**: 每个记忆桶就是一个 Markdown 文件,YAML frontmatter 存元数据。可以直接在 Obsidian 里浏览、编辑、搜索。自动注入 `[[双链]]`。
|
- **Obsidian 原生 / Obsidian-native**: 每个记忆桶就是一个 Markdown 文件,YAML frontmatter 存元数据。可以直接在 Obsidian 里浏览、编辑、搜索。自动注入 `[[双链]]`。
|
||||||
Each memory bucket is a Markdown file with YAML frontmatter. Browse, edit, and search directly in Obsidian. Wikilinks are auto-injected.
|
Each memory bucket is a Markdown file with YAML frontmatter. Browse, edit, and search directly in Obsidian. Wikilinks are auto-injected.
|
||||||
|
|
||||||
- **API 降级 / API degradation**: 脱水压缩和自动打标优先用廉价 LLM API(DeepSeek 等),API 不可用时自动降级到本地关键词分析——始终可用。
|
- **API 脱水 + 缓存 / API dehydration + cache**: 脱水压缩和自动打标通过 LLM API(DeepSeek / Gemini 等)完成,结果缓存到本地 SQLite(`dehydration_cache.db`),相同内容不重复调用 API。向量检索不可用时降级到 fuzzy matching。
|
||||||
Dehydration and auto-tagging prefer a cheap LLM API (DeepSeek etc.). When the API is unavailable, it degrades to local keyword analysis — always functional.
|
Dehydration and auto-tagging are done via LLM API (DeepSeek / Gemini etc.), with results cached locally in SQLite (`dehydration_cache.db`) to avoid redundant API calls. Embedding search degrades to fuzzy matching when unavailable.
|
||||||
|
|
||||||
|
- **历史对话导入 / Conversation history import**: 将过去与 Claude / ChatGPT / DeepSeek 等的对话批量导入为记忆桶。支持 Claude JSON 导出、ChatGPT 导出、Markdown、纯文本等格式,分块处理带断点续传,通过 Dashboard「导入」Tab 操作。
|
||||||
|
Batch-import past conversations (Claude / ChatGPT / DeepSeek etc.) as memory buckets. Supports Claude JSON export, ChatGPT export, Markdown, and plain text. Chunked processing with resume support, via the Dashboard "Import" tab.
|
||||||
|
|
||||||
## 边界说明 / Design boundaries
|
## 边界说明 / Design boundaries
|
||||||
|
|
||||||
@@ -141,19 +279,45 @@ Claude ←→ MCP Protocol ←→ server.py
|
|||||||
│ │ │
|
│ │ │
|
||||||
bucket_manager dehydrator decay_engine
|
bucket_manager dehydrator decay_engine
|
||||||
(CRUD + 搜索) (压缩 + 打标) (遗忘曲线)
|
(CRUD + 搜索) (压缩 + 打标) (遗忘曲线)
|
||||||
|
│ │
|
||||||
|
Obsidian Vault embedding_engine
|
||||||
|
(Markdown files) (向量语义检索)
|
||||||
│
|
│
|
||||||
Obsidian Vault (Markdown files)
|
embeddings.db
|
||||||
|
(SQLite, 3072-dim)
|
||||||
```
|
```
|
||||||
|
|
||||||
5 个 MCP 工具 / 5 MCP tools:
|
### 检索架构 / Search Architecture
|
||||||
|
|
||||||
|
```
|
||||||
|
breath(query="今天很累")
|
||||||
|
│
|
||||||
|
┌────┴────┐
|
||||||
|
│ │
|
||||||
|
Channel 1 Channel 2
|
||||||
|
关键词匹配 向量语义
|
||||||
|
(rapidfuzz) (cosine similarity)
|
||||||
|
│ │
|
||||||
|
└────┬────┘
|
||||||
|
│
|
||||||
|
去重 + 合并
|
||||||
|
token 预算截断
|
||||||
|
│
|
||||||
|
[语义关联] 标注 vector 来源
|
||||||
|
│
|
||||||
|
返回 ≤20 条结果
|
||||||
|
```
|
||||||
|
|
||||||
|
6 个 MCP 工具 / 6 MCP tools:
|
||||||
|
|
||||||
| 工具 Tool | 作用 Purpose |
|
| 工具 Tool | 作用 Purpose |
|
||||||
|-----------|-------------|
|
|-----------|-------------|
|
||||||
| `breath` | 浮现或检索记忆。无参数=推送未解决记忆;有参数=关键词+情感检索 / Surface or search memories |
|
| `breath` | 浮现或检索记忆。无参数=推送未解决记忆;有参数=关键词+向量语义双通道检索。支持 domain/valence/arousal 过滤 / Surface or search memories. No args = surface unresolved; with query = keyword + vector dual-channel search. Supports domain/valence/arousal filters |
|
||||||
| `hold` | 存储单条记忆,自动打标+合并相似桶 / Store a single memory with auto-tagging |
|
| `hold` | 存储单条记忆,自动打标+合并相似桶+生成 embedding。`feel=True` 写模型自己的感受 / Store a single memory with auto-tagging, merging, and embedding. `feel=True` for model's own reflections |
|
||||||
| `grow` | 日记归档,自动拆分长内容为多个记忆桶 / Diary digest, auto-split into multiple buckets |
|
| `grow` | 日记归档,自动拆分长内容为多个记忆桶,每个桶自动生成 embedding / Diary digest, auto-split into multiple buckets with embeddings |
|
||||||
| `trace` | 修改元数据、标记已解决、删除 / Modify metadata, mark resolved, delete |
|
| `trace` | 修改元数据、标记已解决、删除 / Modify metadata, mark resolved, delete |
|
||||||
| `pulse` | 系统状态 + 所有记忆桶列表 / System status + bucket listing |
|
| `pulse` | 系统状态 + 所有记忆桶列表 / System status + bucket listing |
|
||||||
|
| `dream` | 对话开头自省消化——读最近记忆,有沉淀写 feel,能放下就 resolve / Self-reflection at conversation start |
|
||||||
|
|
||||||
## 安装 / Setup
|
## 安装 / Setup
|
||||||
|
|
||||||
@@ -166,8 +330,8 @@ Claude ←→ MCP Protocol ←→ server.py
|
|||||||
### 步骤 / Steps
|
### 步骤 / Steps
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
git clone https://git.p0lar1s.uk/P0lar1s/Ombre_Brain.git
|
git clone https://github.com/P0luz/Ombre-Brain.git
|
||||||
cd Ombre_Brain
|
cd Ombre-Brain
|
||||||
|
|
||||||
python -m venv .venv
|
python -m venv .venv
|
||||||
source .venv/bin/activate # Windows: .venv\Scripts\activate
|
source .venv/bin/activate # Windows: .venv\Scripts\activate
|
||||||
@@ -191,6 +355,19 @@ export OMBRE_API_KEY="your-api-key"
|
|||||||
支持任何 OpenAI 兼容 API。在 `config.yaml` 里改 `base_url` 和 `model` 就行。
|
支持任何 OpenAI 兼容 API。在 `config.yaml` 里改 `base_url` 和 `model` 就行。
|
||||||
Supports any OpenAI-compatible API. Just change `base_url` and `model` in `config.yaml`.
|
Supports any OpenAI-compatible API. Just change `base_url` and `model` in `config.yaml`.
|
||||||
|
|
||||||
|
> **💡 向量化检索(Embedding)**
|
||||||
|
> Ombre Brain 内置双通道检索:关键词匹配 + 向量语义搜索。每次 `hold`/`grow` 存入记忆时自动生成 embedding 并存入 `embeddings.db`(SQLite)。
|
||||||
|
> 推荐:**Google AI Studio 的 `gemini-embedding-001`**(免费,1500 次/天,3072 维向量)。在 `config.yaml` 的 `embedding` 部分配置。
|
||||||
|
> 不配置 embedding 也能用,系统会降级到纯 fuzzy matching 模式。
|
||||||
|
>
|
||||||
|
> **已有存量桶需要补生成 embedding**:运行 `backfill_embeddings.py`:
|
||||||
|
> ```bash
|
||||||
|
> OMBRE_API_KEY="your-key" python backfill_embeddings.py --batch-size 20
|
||||||
|
> ```
|
||||||
|
> Docker 用户:`docker exec -e OMBRE_BUCKETS_DIR=/data ombre-brain python3 backfill_embeddings.py --batch-size 20`
|
||||||
|
>
|
||||||
|
> **Embedding support**: Built-in dual-channel search: keyword + vector semantic. Embeddings are auto-generated on each `hold`/`grow` and stored in `embeddings.db` (SQLite). Recommended: **Google AI Studio `gemini-embedding-001`** (free, 1500 req/day, 3072-dim). Configure in `config.yaml` under `embedding`. Without it, falls back to fuzzy matching. For existing buckets, run `backfill_embeddings.py`.
|
||||||
|
|
||||||
### 接入 Claude Desktop / Connect to Claude Desktop
|
### 接入 Claude Desktop / Connect to Claude Desktop
|
||||||
|
|
||||||
在 Claude Desktop 配置文件中添加(macOS: `~/Library/Application Support/Claude/claude_desktop_config.json`):
|
在 Claude Desktop 配置文件中添加(macOS: `~/Library/Application Support/Claude/claude_desktop_config.json`):
|
||||||
@@ -247,6 +424,8 @@ All parameters in `config.yaml` (copy from `config.example.yaml`). Key ones:
|
|||||||
| `buckets_dir` | 记忆桶存储路径 / Bucket storage path | `./buckets/` |
|
| `buckets_dir` | 记忆桶存储路径 / Bucket storage path | `./buckets/` |
|
||||||
| `dehydration.model` | 脱水用的 LLM 模型 / LLM model for dehydration | `deepseek-chat` |
|
| `dehydration.model` | 脱水用的 LLM 模型 / LLM model for dehydration | `deepseek-chat` |
|
||||||
| `dehydration.base_url` | API 地址 / API endpoint | `https://api.deepseek.com/v1` |
|
| `dehydration.base_url` | API 地址 / API endpoint | `https://api.deepseek.com/v1` |
|
||||||
|
| `embedding.enabled` | 启用向量语义检索 / Enable embedding search | `true` |
|
||||||
|
| `embedding.model` | Embedding 模型 / Embedding model | `gemini-embedding-001` |
|
||||||
| `decay.lambda` | 衰减速率,越大越快忘 / Decay rate | `0.05` |
|
| `decay.lambda` | 衰减速率,越大越快忘 / Decay rate | `0.05` |
|
||||||
| `decay.threshold` | 归档阈值 / Archive threshold | `0.3` |
|
| `decay.threshold` | 归档阈值 / Archive threshold | `0.3` |
|
||||||
| `merge_threshold` | 合并相似度阈值 (0-100) / Merge similarity | `75` |
|
| `merge_threshold` | 合并相似度阈值 (0-100) / Merge similarity | `75` |
|
||||||
@@ -256,28 +435,116 @@ Sensitive config via env vars:
|
|||||||
- `OMBRE_API_KEY` — LLM API 密钥
|
- `OMBRE_API_KEY` — LLM API 密钥
|
||||||
- `OMBRE_TRANSPORT` — 覆盖传输方式
|
- `OMBRE_TRANSPORT` — 覆盖传输方式
|
||||||
- `OMBRE_BUCKETS_DIR` — 覆盖存储路径
|
- `OMBRE_BUCKETS_DIR` — 覆盖存储路径
|
||||||
|
- `OMBRE_DASHBOARD_PASSWORD` — Dashboard 访问密码(可选,见下)
|
||||||
|
|
||||||
|
## Dashboard 认证 / Dashboard Auth
|
||||||
|
|
||||||
|
自 v1.3.0 起,Dashboard 和所有 `/api/*` 端点均受密码保护。
|
||||||
|
Since v1.3.0, the Dashboard and all `/api/*` endpoints are password-protected.
|
||||||
|
|
||||||
|
**首次访问**:若未设置密码,浏览器会弹出设置向导,填写并确认密码后即可使用。
|
||||||
|
**First visit**: If no password is set, a setup wizard will appear. Enter and confirm a password to get started.
|
||||||
|
|
||||||
|
**通过环境变量预设密码**:在 `docker-compose.user.yml` 中添加:
|
||||||
|
**Pre-set via env var** in your `docker-compose.user.yml`:
|
||||||
|
```yaml
|
||||||
|
environment:
|
||||||
|
- OMBRE_DASHBOARD_PASSWORD=your_password_here
|
||||||
|
```
|
||||||
|
设置后,Dashboard 的"修改密码"功能将被禁用,必须通过环境变量修改。
|
||||||
|
When set, the in-Dashboard password change is disabled — modify the env var directly.
|
||||||
|
|
||||||
|
完整环境变量说明见 [ENV_VARS.md](ENV_VARS.md)。
|
||||||
|
Full env var reference: [ENV_VARS.md](ENV_VARS.md).
|
||||||
|
|
||||||
## 衰减公式 / Decay Formula
|
## 衰减公式 / Decay Formula
|
||||||
|
|
||||||
$$final\_score = time\_weight \times base\_score$$
|
$$final\_score = Importance \times activation\_count^{0.3} \times e^{-\lambda \times days} \times combined\_weight \times resolved\_factor \times urgency\_boost$$
|
||||||
|
|
||||||
$$base\_score = Importance \times activation\_count^{0.3} \times e^{-\lambda \times days} \times (base + arousal \times boost)$$
|
### 短期/长期权重分离 / Short-term vs Long-term Weight Separation
|
||||||
|
|
||||||
时间系数(乘数,优先级最高)/ Time weight (multiplier, highest priority):
|
系统对记忆的权重计算采用**分段策略**,模拟人类记忆的时效特征:
|
||||||
|
The system uses a **segmented weighting strategy** that mimics how human memory prioritizes:
|
||||||
|
|
||||||
| 距今天数 Days since active | 时间系数 Weight |
|
| 阶段 Phase | 时间范围 | 权重分配 | 直觉解释 |
|
||||||
|
|---|---|---|---|
|
||||||
|
| 短期 Short-term | ≤ 3 天 | 时间 70% + 情感 30% | 刚发生的事,鲜活度最重要 |
|
||||||
|
| 长期 Long-term | > 3 天 | 情感 70% + 时间 30% | 时间淡了,情感强度决定能记多久 |
|
||||||
|
|
||||||
|
$$combined\_weight = \begin{cases} time\_weight \times 0.7 + emotion\_weight \times 0.3 & \text{if } days \leq 3 \\ emotion\_weight \times 0.7 + time\_weight \times 0.3 & \text{if } days > 3 \end{cases}$$
|
||||||
|
|
||||||
|
### 时间系数(新鲜度加成)/ Time Weight (Freshness Bonus)
|
||||||
|
|
||||||
|
连续指数衰减,无跳变:
|
||||||
|
Continuous exponential decay, no discontinuities:
|
||||||
|
|
||||||
|
$$freshness = 1.0 + 1.0 \times e^{-t/36}$$
|
||||||
|
|
||||||
|
| 距存入时间 Time since creation | 新鲜度乘数 Multiplier |
|
||||||
|---|---|
|
|---|---|
|
||||||
| 0–1 天 | 1.0 |
|
| 刚存入 (t=0) | ×2.0 |
|
||||||
| 第 2 天 | 0.9 |
|
| 约 25 小时 | ×1.5 |
|
||||||
| 之后每天约降 10% | `max(0.3, 0.9 × e^{-0.2197 × (days-2)})` |
|
| 约 50 小时 | ×1.25 |
|
||||||
| 7 天后稳定 | ≈ 0.3(不归零)|
|
| 72 小时 (3天) | ×1.14 |
|
||||||
|
| 1 周+ | ≈ ×1.0 |
|
||||||
|
|
||||||
|
t 为小时,36 为衰减常数。老记忆不被惩罚(下限 ×1.0),新记忆获得额外加成。
|
||||||
|
|
||||||
|
### 情感权重 / Emotion Weight
|
||||||
|
|
||||||
|
$$emotion\_weight = base + arousal \times arousal\_boost$$
|
||||||
|
|
||||||
|
- 默认 `base=1.0`, `arousal_boost=0.8`
|
||||||
|
- arousal=0.3(平静)→ 1.24;arousal=0.9(激动)→ 1.72
|
||||||
|
|
||||||
|
### 权重池修正因子 / Weight Pool Modifiers
|
||||||
|
|
||||||
|
| 状态 State | 修正因子 Factor | 说明 |
|
||||||
|
|---|---|---|
|
||||||
|
| 未解决 Unresolved | ×1.0 | 正常权重 |
|
||||||
|
| 已解决 Resolved | ×0.05 | 沉底,等关键词唤醒 |
|
||||||
|
| 已解决+已消化 Resolved+Digested | ×0.02 | 加速淡化,归档为无限小 |
|
||||||
|
| 高唤醒+未解决 Urgent | ×1.5 | arousal>0.7 的未解决记忆额外加权 |
|
||||||
|
| 钉选 Pinned | 999.0 | 不衰减、不合并、importance=10 |
|
||||||
|
| Feel | 50.0 | 固定分数,不参与衰减 |
|
||||||
|
|
||||||
|
### 参数说明 / Parameters
|
||||||
|
|
||||||
- `importance`: 1-10,记忆重要性 / memory importance
|
- `importance`: 1-10,记忆重要性 / memory importance
|
||||||
- `activation_count`: 被检索的次数,越常被想起衰减越慢 / retrieval count; more recalls = slower decay
|
- `activation_count`: 被检索的次数,越常被想起衰减越慢 / retrieval count; more recalls = slower decay
|
||||||
- `days`: 距上次激活的天数 / days since last activation
|
- `days`: 距上次激活的天数 / days since last activation
|
||||||
- `arousal`: 唤醒度,越强烈的记忆越难忘 / arousal; intense memories are harder to forget
|
- `arousal`: 唤醒度,越强烈的记忆越难忘 / arousal; intense memories are harder to forget
|
||||||
- 已解决的记忆权重降到 5%,沉底等被关键词唤醒 / resolved memories drop to 5%, sink until keyword-triggered
|
- `λ` (decay_lambda): 衰减速率,默认 0.05 / decay rate, default 0.05
|
||||||
- `pinned=true` 的桶:不衰减、不合并、importance 锁定 10 / `pinned` buckets: never decay, never merge, importance locked at 10
|
|
||||||
|
## Dreaming 与 Feel / Dreaming & Feel
|
||||||
|
|
||||||
|
### Dreaming — 做梦
|
||||||
|
每次新对话开始时,Claude 会自动执行 `dream()`——读取最近的记忆桶,用第一人称思考:哪些事还有重量?哪些可以放下了?
|
||||||
|
|
||||||
|
At the start of each conversation, Claude runs `dream()` — reads recent memory buckets and reflects in first person: what still carries weight? What can be let go?
|
||||||
|
|
||||||
|
- 值得放下的 → `trace(resolved=1)` 让它沉底
|
||||||
|
- 有沉淀的 → 写 `feel`,记录模型自己的感受
|
||||||
|
- 没有沉淀就不写,不强迫产出
|
||||||
|
|
||||||
|
### Feel — 带走的东西
|
||||||
|
Feel 不是事件记录,是**模型带走的东西**——一句感受、一个未解答的问题、一个观察到的变化。
|
||||||
|
|
||||||
|
Feel is not an event log — it's **what the model carries away**: a feeling, an unanswered question, a noticed change.
|
||||||
|
|
||||||
|
- `hold(content="...", feel=True, source_bucket="源记忆ID", valence=模型自己的感受)`
|
||||||
|
- `valence` 是模型的感受,不是事件情绪。同一段争吵,事件 V0.2,但模型可能 V0.4(「我从中看到了成长」)
|
||||||
|
- `source_bucket` 指向被消化的记忆,会被标记为「已消化」→ 加速淡化到无限小,但不会被删除
|
||||||
|
- Feel 不参与普通浮现、不衰减、不参与 dreaming
|
||||||
|
- 用 `breath(domain="feel")` 读取之前的 feel
|
||||||
|
|
||||||
|
### 对话启动完整流程 / Conversation Start Sequence
|
||||||
|
```
|
||||||
|
1. breath() — 睁眼,看有什么浮上来
|
||||||
|
2. dream() — 消化最近记忆,有沉淀写 feel
|
||||||
|
3. breath(domain="feel") — 读之前的 feel
|
||||||
|
4. 开始和用户说话
|
||||||
|
```
|
||||||
|
|
||||||
## 给 Claude 的使用指南 / Usage Guide for Claude
|
## 给 Claude 的使用指南 / Usage Guide for Claude
|
||||||
|
|
||||||
@@ -289,32 +556,53 @@ $$base\_score = Importance \times activation\_count^{0.3} \times e^{-\lambda \ti
|
|||||||
|
|
||||||
| 脚本 Script | 用途 Purpose |
|
| 脚本 Script | 用途 Purpose |
|
||||||
|---|---|
|
|---|---|
|
||||||
|
| `embedding_engine.py` | 向量化引擎,管理 embedding 的生成、存储、相似度搜索 / Embedding engine: generate, store, and search embeddings |
|
||||||
|
| `backfill_embeddings.py` | 为存量桶批量生成 embedding / Batch-generate embeddings for existing buckets |
|
||||||
| `write_memory.py` | 手动写入记忆,绕过 MCP / Manually write memories, bypass MCP |
|
| `write_memory.py` | 手动写入记忆,绕过 MCP / Manually write memories, bypass MCP |
|
||||||
| `migrate_to_domains.py` | 迁移平铺文件到域子目录 / Migrate flat files to domain subdirs |
|
| `migrate_to_domains.py` | 迁移平铺文件到域子目录 / Migrate flat files to domain subdirs |
|
||||||
| `reclassify_domains.py` | 基于关键词重分类 / Reclassify by keywords |
|
| `reclassify_domains.py` | 基于关键词重分类 / Reclassify by keywords |
|
||||||
| `reclassify_api.py` | 用 API 重打标未分类桶 / Re-tag uncategorized buckets via API |
|
| `reclassify_api.py` | 用 API 重打标未分类桶 / Re-tag uncategorized buckets via API |
|
||||||
|
| `test_tools.py` | MCP 工具集成测试(8 项) / MCP tool integration tests (8 tests) |
|
||||||
| `test_smoke.py` | 冒烟测试 / Smoke test |
|
| `test_smoke.py` | 冒烟测试 / Smoke test |
|
||||||
|
|
||||||
## 部署 / Deploy
|
## 部署 / Deploy
|
||||||
|
|
||||||
|
### Docker Hub 预构建镜像
|
||||||
|
|
||||||
|
[](https://hub.docker.com/r/p0luz/ombre-brain)
|
||||||
|
|
||||||
|
不用 clone 代码、不用 build,直接拉取预构建镜像:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker pull p0luz/ombre-brain:latest
|
||||||
|
curl -O https://raw.githubusercontent.com/P0luz/Ombre-Brain/main/docker-compose.user.yml
|
||||||
|
echo "OMBRE_API_KEY=你的key" > .env
|
||||||
|
docker compose -f docker-compose.user.yml up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
验证:`curl http://localhost:8000/health`
|
||||||
|
Dashboard:浏览器打开 `http://localhost:8000/dashboard`
|
||||||
|
|
||||||
### Render
|
### Render
|
||||||
|
|
||||||
[](https://render.com/deploy?repo=https://github.com/P0lar1zzZ/Ombre-Brain)
|
[](https://render.com/deploy?repo=https://github.com/P0luz/Ombre-Brain)
|
||||||
|
|
||||||
> ⚠️ **免费层不可用**:Render 免费层**不支持持久化磁盘**,服务重启后记忆数据会丢失,且会在无流量时休眠。**必须使用 Starter($7/mo)或以上**才能正常使用。
|
> ⚠️ **免费层不可用**:Render 免费层**不支持持久化磁盘**,服务重启后记忆数据会丢失,且会在无流量时休眠。**必须使用 Starter($7/mo)或以上**才能正常使用。
|
||||||
> **Free tier won't work**: Render free tier has **no persistent disk** — all memory data is lost on restart. It also sleeps on inactivity. **Starter plan ($7/mo) or above is required.**
|
> **Free tier won't work**: Render free tier has **no persistent disk** — all memory data is lost on restart. It also sleeps on inactivity. **Starter plan ($7/mo) or above is required.**
|
||||||
|
|
||||||
项目根目录已包含 `render.yaml`,点击按钮后:
|
项目根目录已包含 `render.yaml`,点击按钮后:
|
||||||
1. (可选)设置 `OMBRE_API_KEY`:任何 OpenAI 兼容 API 的 key,不填则自动降级为本地关键词提取
|
1. 设置 `OMBRE_API_KEY`:任何 OpenAI 兼容 API 的 key(**必需**,未设置时 hold/grow 会报错、仅检索类工具可用)
|
||||||
2. (可选)设置 `OMBRE_BASE_URL`:API 地址,支持任意 OpenAI 化地址,如 `https://api.deepseek.com/v1` / `http://123.1.1.1:7689/v1` / `http://your-ollama:11434/v1`
|
2. (可选)设置 `OMBRE_BASE_URL`:API 地址,支持任意 OpenAI 化地址,如 `https://api.deepseek.com/v1` / `http://123.1.1.1:7689/v1` / `http://your-ollama:11434/v1`
|
||||||
3. Render 自动挂载持久化磁盘到 `/opt/render/project/src/buckets`
|
3. Render 自动挂载持久化磁盘到 `/opt/render/project/src/buckets`
|
||||||
4. 部署后 MCP URL:`https://<你的服务名>.onrender.com/mcp`
|
4. Dashboard:`https://<你的服务名>.onrender.com/dashboard`
|
||||||
|
5. 部署后 MCP URL:`https://<你的服务名>.onrender.com/mcp`
|
||||||
|
|
||||||
`render.yaml` is included. After clicking the button:
|
`render.yaml` is included. After clicking the button:
|
||||||
1. (Optional) `OMBRE_API_KEY`: any OpenAI-compatible key; omit to fall back to local keyword extraction
|
1. `OMBRE_API_KEY`: any OpenAI-compatible key (**required** for hold/grow; without it those tools raise an error)
|
||||||
2. (Optional) `OMBRE_BASE_URL`: any OpenAI-compatible endpoint, e.g. `https://api.deepseek.com/v1`, `http://123.1.1.1:7689/v1`, `http://your-ollama:11434/v1`
|
2. (Optional) `OMBRE_BASE_URL`: any OpenAI-compatible endpoint, e.g. `https://api.deepseek.com/v1`, `http://123.1.1.1:7689/v1`, `http://your-ollama:11434/v1`
|
||||||
3. Persistent disk auto-mounts at `/opt/render/project/src/buckets`
|
3. Persistent disk auto-mounts at `/opt/render/project/src/buckets`
|
||||||
4. MCP URL after deploy: `https://<your-service>.onrender.com/mcp`
|
4. Dashboard: `https://<your-service>.onrender.com/dashboard`
|
||||||
|
5. MCP URL after deploy: `https://<your-service>.onrender.com/mcp`
|
||||||
|
|
||||||
### Zeabur
|
### Zeabur
|
||||||
|
|
||||||
@@ -332,7 +620,7 @@ $$base\_score = Importance \times activation\_count^{0.3} \times e^{-\lambda \ti
|
|||||||
- Zeabur auto-detects the `Dockerfile` in root and builds via Docker
|
- Zeabur auto-detects the `Dockerfile` in root and builds via Docker
|
||||||
|
|
||||||
2. **设置环境变量 / Set environment variables**(服务页面 → **Variables** 标签页)
|
2. **设置环境变量 / Set environment variables**(服务页面 → **Variables** 标签页)
|
||||||
- `OMBRE_API_KEY`(可选)— LLM API 密钥,不填则自动降级为本地关键词提取
|
- `OMBRE_API_KEY`(**必需**)— LLM API 密钥;未设置时 hold/grow/dream 会报错
|
||||||
- `OMBRE_BASE_URL`(可选)— API 地址,如 `https://api.deepseek.com/v1`
|
- `OMBRE_BASE_URL`(可选)— API 地址,如 `https://api.deepseek.com/v1`
|
||||||
|
|
||||||
> ⚠️ **不需要**手动设置 `OMBRE_TRANSPORT` 和 `OMBRE_BUCKETS_DIR`,Dockerfile 里已经设好了默认值。Zeabur 对单阶段 Dockerfile 会自动注入控制台设置的环境变量。
|
> ⚠️ **不需要**手动设置 `OMBRE_TRANSPORT` 和 `OMBRE_BUCKETS_DIR`,Dockerfile 里已经设好了默认值。Zeabur 对单阶段 Dockerfile 会自动注入控制台设置的环境变量。
|
||||||
@@ -354,6 +642,7 @@ $$base\_score = Importance \times activation\_count^{0.3} \times e^{-\lambda \ti
|
|||||||
5. **验证 / Verify**
|
5. **验证 / Verify**
|
||||||
- 访问 `https://<你的域名>.zeabur.app/health`,应返回 JSON
|
- 访问 `https://<你的域名>.zeabur.app/health`,应返回 JSON
|
||||||
- Visit `https://<your-domain>.zeabur.app/health` — should return JSON
|
- Visit `https://<your-domain>.zeabur.app/health` — should return JSON
|
||||||
|
- Dashboard:`https://<你的域名>.zeabur.app/dashboard`
|
||||||
- 最终 MCP 地址 / MCP URL:`https://<你的域名>.zeabur.app/mcp`
|
- 最终 MCP 地址 / MCP URL:`https://<你的域名>.zeabur.app/mcp`
|
||||||
|
|
||||||
**常见问题 / Troubleshooting:**
|
**常见问题 / Troubleshooting:**
|
||||||
@@ -415,6 +704,151 @@ When connecting via tunnel, ensure:
|
|||||||
|
|
||||||
If using Claude Code, `.claude/settings.json` configures a `SessionStart` hook that auto-calls `breath` on each new or resumed session, surfacing your highest-weight unresolved memories as context. Only active in remote HTTP mode. Set `OMBRE_HOOK_SKIP=1` to disable temporarily.
|
If using Claude Code, `.claude/settings.json` configures a `SessionStart` hook that auto-calls `breath` on each new or resumed session, surfacing your highest-weight unresolved memories as context. Only active in remote HTTP mode. Set `OMBRE_HOOK_SKIP=1` to disable temporarily.
|
||||||
|
|
||||||
|
## 更新 / How to Update
|
||||||
|
|
||||||
|
不同部署方式的更新方法。
|
||||||
|
|
||||||
|
Different update procedures depending on your deployment method.
|
||||||
|
|
||||||
|
### Docker Hub 预构建镜像用户 / Docker Hub Pre-built Image
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 拉取最新镜像
|
||||||
|
docker pull p0luz/ombre-brain:latest
|
||||||
|
|
||||||
|
# 重启容器(记忆数据在 volume 里,不会丢失)
|
||||||
|
docker compose -f docker-compose.user.yml down
|
||||||
|
docker compose -f docker-compose.user.yml up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
> 你的记忆数据挂载在 `./buckets:/data`,pull + restart 不会影响已有数据。
|
||||||
|
> Your memory data is mounted at `./buckets:/data` — pull + restart won't affect existing data.
|
||||||
|
|
||||||
|
### 从源码部署用户 / Source Code Deploy (Docker)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd Ombre-Brain
|
||||||
|
|
||||||
|
# 拉取最新代码
|
||||||
|
git pull origin main
|
||||||
|
|
||||||
|
# 重新构建并重启
|
||||||
|
docker compose down
|
||||||
|
docker compose build
|
||||||
|
docker compose up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
> `docker compose build` 会重新构建镜像。volume 挂载的记忆数据不受影响。
|
||||||
|
> `docker compose build` rebuilds the image. Volume-mounted memory data is unaffected.
|
||||||
|
|
||||||
|
### 本地 Python 用户 / Local Python (no Docker)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd Ombre-Brain
|
||||||
|
|
||||||
|
# 拉取最新代码
|
||||||
|
git pull origin main
|
||||||
|
|
||||||
|
# 更新依赖(如有新增)
|
||||||
|
pip install -r requirements.txt
|
||||||
|
|
||||||
|
# 重启服务
|
||||||
|
# Ctrl+C 停止旧进程,然后:
|
||||||
|
python server.py
|
||||||
|
```
|
||||||
|
|
||||||
|
### Render
|
||||||
|
|
||||||
|
Render 连接了你的 GitHub 仓库,**自动部署**:
|
||||||
|
|
||||||
|
1. 如果你 Fork 了仓库 → 在 GitHub 上同步上游更新(Sync fork),Render 会自动重新部署
|
||||||
|
2. 或者手动:Render Dashboard → 你的服务 → **Manual Deploy** → **Deploy latest commit**
|
||||||
|
|
||||||
|
> 持久化磁盘(`/opt/render/project/src/buckets`)上的记忆数据在重新部署时保留。
|
||||||
|
> Persistent disk data at `/opt/render/project/src/buckets` is preserved across deploys.
|
||||||
|
|
||||||
|
### Zeabur
|
||||||
|
|
||||||
|
Zeabur 也连接了你的 GitHub 仓库:
|
||||||
|
|
||||||
|
1. 在 GitHub 上同步 Fork 的最新代码 → Zeabur 自动触发重新构建部署
|
||||||
|
2. 或者手动:Zeabur Dashboard → 你的服务 → **Redeploy**
|
||||||
|
|
||||||
|
> Volume 挂载在 `/app/buckets`,重新部署时数据保留。
|
||||||
|
> Volume mounted at `/app/buckets` — data persists across redeploys.
|
||||||
|
|
||||||
|
### VPS / 自有服务器 / Self-hosted VPS
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd Ombre-Brain
|
||||||
|
|
||||||
|
# 拉取最新代码
|
||||||
|
git pull origin main
|
||||||
|
|
||||||
|
# 方式 A:Docker 部署
|
||||||
|
docker compose down
|
||||||
|
docker compose build
|
||||||
|
docker compose up -d
|
||||||
|
|
||||||
|
# 方式 B:直接 Python 运行
|
||||||
|
pip install -r requirements.txt
|
||||||
|
# 重启你的进程管理器(systemd / supervisord / pm2 等)
|
||||||
|
sudo systemctl restart ombre-brain # 示例
|
||||||
|
```
|
||||||
|
|
||||||
|
> **通用注意事项 / General notes:**
|
||||||
|
> - 更新不会影响你的记忆数据(存在 volume 或 buckets 目录里)
|
||||||
|
> - 如果 `requirements.txt` 有变化,Docker 用户重新 build 即可自动处理;非 Docker 用户需手动 `pip install -r requirements.txt`
|
||||||
|
> - 更新后访问 `/health` 验证服务正常
|
||||||
|
> - Updates never affect your memory data (stored in volumes or buckets directory)
|
||||||
|
> - If `requirements.txt` changed, Docker rebuild handles it automatically; non-Docker users need `pip install -r requirements.txt`
|
||||||
|
> - After updating, visit `/health` to verify the service is running
|
||||||
|
|
||||||
|
## 测试 / Testing
|
||||||
|
|
||||||
|
测试套件覆盖规格书所有场景(场景 01–11),以及 B-01 至 B-10 全部 bug 修复的回归测试。
|
||||||
|
|
||||||
|
The test suite covers all spec scenarios (01–11) and regression tests for every bug fix (B-01 to B-10).
|
||||||
|
|
||||||
|
### 快速运行 / Quick Start
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install pytest pytest-asyncio
|
||||||
|
pytest tests/ # 全部测试
|
||||||
|
pytest tests/unit/ # 单元测试
|
||||||
|
pytest tests/integration/ # 集成测试(场景全流程)
|
||||||
|
pytest tests/regression/ # 回归测试(B-01..B-10)
|
||||||
|
pytest tests/ -k "B01" # 单个回归测试
|
||||||
|
pytest tests/ -v # 详细输出
|
||||||
|
```
|
||||||
|
|
||||||
|
### 测试层级 / Test Layers
|
||||||
|
|
||||||
|
| 目录 Directory | 内容 Contents |
|
||||||
|
|---|---|
|
||||||
|
| `tests/unit/` | 单独测试 calculate_score、topic_score、时间得分、CRUD 等核心函数 |
|
||||||
|
| `tests/integration/` | 场景全流程:冷启动、hold、search、trace、decay、feel 等 11 个场景 |
|
||||||
|
| `tests/regression/` | 每个 bug(B-01 至 B-10)独立回归测试,含边界条件 |
|
||||||
|
|
||||||
|
### 回归测试覆盖 / Regression Coverage
|
||||||
|
|
||||||
|
| 文件 | Bug | 核心断言 |
|
||||||
|
|---|---|---|
|
||||||
|
| `test_issue_B01.py` | resolved 桶不再自动归档 | `update(resolved=True)` 后桶留在 `dynamic/`,搜索仍可命中,得分 ×0.05 |
|
||||||
|
| `test_issue_B03.py` | float activation_count 不被 int() 截断 | 1.3 > 1.0 得分,`_time_ripple` 写入 0.3 增量 |
|
||||||
|
| `test_issue_B04.py` | create() 初始 activation_count=0 | 新建桶满足冷启动条件,touch() 后变 1 |
|
||||||
|
| `test_issue_B05.py` | 时间衰减系数 0.02(原 0.1)| 30天 ≈ 0.549,非旧值 0.049 |
|
||||||
|
| `test_issue_B06.py` | w_time 默认 1.5(原 2.5)| `BucketManager.w_time == 1.5` |
|
||||||
|
| `test_issue_B07.py` | content_weight 默认 1.0(原 3.0)| 名字完全匹配得分 > 内容模糊匹配 |
|
||||||
|
| `test_issue_B08.py` | auto_resolve 同轮应用降权因子 | stale meta 修复后 score ×0.05 立即生效 |
|
||||||
|
| `test_issue_B09.py` | hold() 保留用户传入的 valence/arousal | 用户值优先于 analyze() 结果 |
|
||||||
|
| `test_issue_B10.py` | feel 桶 domain=[] 不被填充 | feel 桶保持 `[]`;dynamic 桶正确填 `["未分类"]` |
|
||||||
|
|
||||||
|
> **测试隔离**:所有测试运行在 `tmp_path` 临时目录,绝不触碰真实记忆数据。
|
||||||
|
> **Test isolation**: All tests run in `tmp_path` — your real memory data is never touched.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
MIT
|
MIT
|
||||||
|
|||||||
92
backfill_embeddings.py
Normal file
92
backfill_embeddings.py
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Backfill embeddings for existing buckets.
|
||||||
|
为存量桶批量生成 embedding。
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
OMBRE_BUCKETS_DIR=/data OMBRE_API_KEY=xxx python backfill_embeddings.py [--batch-size 20] [--dry-run]
|
||||||
|
|
||||||
|
Each batch calls Gemini embedding API once per bucket.
|
||||||
|
Free tier: 1500 requests/day, so ~75 batches of 20.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import argparse
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.insert(0, ".")
|
||||||
|
from utils import load_config
|
||||||
|
from bucket_manager import BucketManager
|
||||||
|
from embedding_engine import EmbeddingEngine
|
||||||
|
|
||||||
|
|
||||||
|
async def backfill(batch_size: int = 20, dry_run: bool = False):
|
||||||
|
config = load_config()
|
||||||
|
bucket_mgr = BucketManager(config)
|
||||||
|
engine = EmbeddingEngine(config)
|
||||||
|
|
||||||
|
if not engine.enabled:
|
||||||
|
print("ERROR: Embedding engine not enabled (missing API key?)")
|
||||||
|
return
|
||||||
|
|
||||||
|
all_buckets = await bucket_mgr.list_all(include_archive=True)
|
||||||
|
print(f"Total buckets: {len(all_buckets)}")
|
||||||
|
|
||||||
|
# Find buckets without embeddings
|
||||||
|
missing = []
|
||||||
|
for b in all_buckets:
|
||||||
|
emb = await engine.get_embedding(b["id"])
|
||||||
|
if emb is None:
|
||||||
|
missing.append(b)
|
||||||
|
|
||||||
|
print(f"Missing embeddings: {len(missing)}")
|
||||||
|
|
||||||
|
if dry_run:
|
||||||
|
for b in missing[:10]:
|
||||||
|
print(f" would embed: {b['id']} ({b['metadata'].get('name', '?')})")
|
||||||
|
if len(missing) > 10:
|
||||||
|
print(f" ... and {len(missing) - 10} more")
|
||||||
|
return
|
||||||
|
|
||||||
|
total = len(missing)
|
||||||
|
success = 0
|
||||||
|
failed = 0
|
||||||
|
|
||||||
|
for i in range(0, total, batch_size):
|
||||||
|
batch = missing[i : i + batch_size]
|
||||||
|
batch_num = i // batch_size + 1
|
||||||
|
total_batches = (total + batch_size - 1) // batch_size
|
||||||
|
print(f"\n--- Batch {batch_num}/{total_batches} ({len(batch)} buckets) ---")
|
||||||
|
|
||||||
|
for b in batch:
|
||||||
|
name = b["metadata"].get("name", b["id"])
|
||||||
|
content = b.get("content", "")
|
||||||
|
if not content or not content.strip():
|
||||||
|
print(f" SKIP (empty): {b['id']} ({name})")
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
ok = await engine.generate_and_store(b["id"], content)
|
||||||
|
if ok:
|
||||||
|
success += 1
|
||||||
|
print(f" OK: {b['id'][:12]} ({name[:30]})")
|
||||||
|
else:
|
||||||
|
failed += 1
|
||||||
|
print(f" FAIL: {b['id'][:12]} ({name[:30]})")
|
||||||
|
except Exception as e:
|
||||||
|
failed += 1
|
||||||
|
print(f" ERROR: {b['id'][:12]} ({name[:30]}): {e}")
|
||||||
|
|
||||||
|
if i + batch_size < total:
|
||||||
|
print(" Waiting 2s before next batch...")
|
||||||
|
await asyncio.sleep(2)
|
||||||
|
|
||||||
|
print(f"\n=== Done: {success} success, {failed} failed, {total - success - failed} skipped ===")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--batch-size", type=int, default=20)
|
||||||
|
parser.add_argument("--dry-run", action="store_true")
|
||||||
|
args = parser.parse_args()
|
||||||
|
asyncio.run(backfill(batch_size=args.batch_size, dry_run=args.dry_run))
|
||||||
@@ -1,205 +0,0 @@
|
|||||||
# Ombre Brain
|
|
||||||
|
|
||||||
一个给 Claude 用的长期情绪记忆系统。基于 Russell 效价/唤醒度坐标打标,Obsidian 做存储层,MCP 接入,带遗忘曲线。
|
|
||||||
|
|
||||||
A long-term emotional memory system for Claude. Tags memories using Russell's valence/arousal coordinates, stores them as Obsidian-compatible Markdown, connects via MCP, and has a forgetting curve.
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
## 它是什么 / What is this
|
|
||||||
|
|
||||||
Claude 没有跨对话记忆。每次对话结束,之前聊过的所有东西都会消失。
|
|
||||||
|
|
||||||
Ombre Brain 给了它一套持久记忆——不是那种冷冰冰的键值存储,而是带情感坐标的、会自然衰减的、像人类记忆一样会遗忘和浮现的系统。
|
|
||||||
|
|
||||||
Claude has no cross-conversation memory. Everything from a previous chat vanishes once it ends.
|
|
||||||
|
|
||||||
Ombre Brain gives it persistent memory — not cold key-value storage, but a system with emotional coordinates, natural decay, and forgetting/surfacing mechanics that loosely mimic how human memory works.
|
|
||||||
|
|
||||||
核心特点 / Key features:
|
|
||||||
|
|
||||||
- **情感坐标打标 / Emotional tagging**: 每条记忆用 Russell 环形情感模型的 valence(效价)和 arousal(唤醒度)两个连续维度标记。不是"开心/难过"这种离散标签。
|
|
||||||
Each memory is tagged with two continuous dimensions from Russell's circumplex model: valence and arousal. Not discrete labels like "happy/sad".
|
|
||||||
|
|
||||||
- **自然遗忘 / Natural forgetting**: 改进版艾宾浩斯遗忘曲线。不活跃的记忆自动衰减归档,高情绪强度的记忆衰减更慢。
|
|
||||||
Modified Ebbinghaus forgetting curve. Inactive memories naturally decay and archive. High-arousal memories decay slower.
|
|
||||||
|
|
||||||
- **权重池浮现 / Weight pool surfacing**: 记忆不是被动检索的,它们会主动浮现——未解决的、情绪强烈的记忆权重更高,会在对话开头自动推送。
|
|
||||||
Memories aren't just passively retrieved — they actively surface. Unresolved, emotionally intense memories carry higher weight and get pushed at conversation start.
|
|
||||||
|
|
||||||
- **Obsidian 原生 / Obsidian-native**: 每个记忆桶就是一个 Markdown 文件,YAML frontmatter 存元数据。可以直接在 Obsidian 里浏览、编辑、搜索。自动注入 `[[双链]]`。
|
|
||||||
Each memory bucket is a Markdown file with YAML frontmatter. Browse, edit, and search directly in Obsidian. Wikilinks are auto-injected.
|
|
||||||
|
|
||||||
- **API 降级 / API degradation**: 脱水压缩和自动打标优先用廉价 LLM API(DeepSeek 等),API 不可用时自动降级到本地关键词分析——始终可用。
|
|
||||||
Dehydration and auto-tagging prefer a cheap LLM API (DeepSeek etc.). When the API is unavailable, it degrades to local keyword analysis — always functional.
|
|
||||||
|
|
||||||
## 边界说明 / Design boundaries
|
|
||||||
|
|
||||||
官方记忆功能已经在做身份层的事了——你是谁,你有什么偏好,你们的关系是什么。那一层交给它,Ombre Brain不打算造重复的轮子。
|
|
||||||
|
|
||||||
Ombre Brain 的边界是时间里发生的事,不是你是谁。它记住的是:你们聊过什么,经历了什么,哪些事情还悬在那里没有解决。两层配合用,才是完整的。
|
|
||||||
|
|
||||||
每次新对话,Claude 从零开始——但它能从 Ombre Brain 里找回跟你有关的一切。不是重建,是接续。
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
Official memory already handles the identity layer — who you are, what you prefer, what your relationship is. That layer belongs there. Ombre Brain isn't trying to duplicate it.
|
|
||||||
|
|
||||||
Ombre Brain's boundary is *what happened in time*, not *who you are*. It holds conversations, experiences, unresolved things. The two layers together are what make it feel complete.
|
|
||||||
|
|
||||||
Each new conversation starts fresh — but Claude can reach back through Ombre Brain and find everything that happened between you. Not a rebuild. A continuation.
|
|
||||||
|
|
||||||
## 架构 / Architecture
|
|
||||||
|
|
||||||
```
|
|
||||||
Claude ←→ MCP Protocol ←→ server.py
|
|
||||||
│
|
|
||||||
┌───────────────┼───────────────┐
|
|
||||||
│ │ │
|
|
||||||
bucket_manager dehydrator decay_engine
|
|
||||||
(CRUD + 搜索) (压缩 + 打标) (遗忘曲线)
|
|
||||||
│
|
|
||||||
Obsidian Vault (Markdown files)
|
|
||||||
```
|
|
||||||
|
|
||||||
5 个 MCP 工具 / 5 MCP tools:
|
|
||||||
|
|
||||||
| 工具 Tool | 作用 Purpose |
|
|
||||||
|-----------|-------------|
|
|
||||||
| `breath` | 浮现或检索记忆。无参数=推送未解决记忆;有参数=关键词+情感检索 / Surface or search memories |
|
|
||||||
| `hold` | 存储单条记忆,自动打标+合并相似桶 / Store a single memory with auto-tagging |
|
|
||||||
| `grow` | 日记归档,自动拆分长内容为多个记忆桶 / Diary digest, auto-split into multiple buckets |
|
|
||||||
| `trace` | 修改元数据、标记已解决、删除 / Modify metadata, mark resolved, delete |
|
|
||||||
| `pulse` | 系统状态 + 所有记忆桶列表 / System status + bucket listing |
|
|
||||||
|
|
||||||
## 安装 / Setup
|
|
||||||
|
|
||||||
### 环境要求 / Requirements
|
|
||||||
|
|
||||||
- Python 3.11+
|
|
||||||
- 一个 Obsidian Vault(可选,不用也行,会在项目目录下自建 `buckets/`)
|
|
||||||
An Obsidian vault (optional — without one, it uses a local `buckets/` directory)
|
|
||||||
|
|
||||||
### 步骤 / Steps
|
|
||||||
|
|
||||||
```bash
|
|
||||||
git clone https://github.com/P0lar1zzZ/Ombre-Brain.git
|
|
||||||
cd Ombre-Brain
|
|
||||||
|
|
||||||
python -m venv .venv
|
|
||||||
source .venv/bin/activate # Windows: .venv\Scripts\activate
|
|
||||||
|
|
||||||
pip install -r requirements.txt
|
|
||||||
```
|
|
||||||
|
|
||||||
复制配置文件并按需修改 / Copy config and edit as needed:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
cp config.example.yaml config.yaml
|
|
||||||
```
|
|
||||||
|
|
||||||
如果你要用 API 做脱水压缩和自动打标(推荐,效果好很多),设置环境变量:
|
|
||||||
If you want API-powered dehydration and tagging (recommended, much better quality):
|
|
||||||
|
|
||||||
```bash
|
|
||||||
export OMBRE_API_KEY="your-api-key"
|
|
||||||
```
|
|
||||||
|
|
||||||
支持任何 OpenAI 兼容 API。在 `config.yaml` 里改 `base_url` 和 `model` 就行。
|
|
||||||
Supports any OpenAI-compatible API. Just change `base_url` and `model` in `config.yaml`.
|
|
||||||
|
|
||||||
### 接入 Claude Desktop / Connect to Claude Desktop
|
|
||||||
|
|
||||||
在 Claude Desktop 配置文件中添加(macOS: `~/Library/Application Support/Claude/claude_desktop_config.json`):
|
|
||||||
|
|
||||||
Add to your Claude Desktop config:
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"mcpServers": {
|
|
||||||
"ombre-brain": {
|
|
||||||
"command": "python",
|
|
||||||
"args": ["/path/to/Ombre-Brain/server.py"],
|
|
||||||
"env": {
|
|
||||||
"OMBRE_API_KEY": "your-api-key"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
### 接入 Claude.ai (远程) / Connect to Claude.ai (remote)
|
|
||||||
|
|
||||||
需要 HTTP 传输 + 隧道。可以用 Docker:
|
|
||||||
Requires HTTP transport + tunnel. Docker setup:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
echo "OMBRE_API_KEY=your-api-key" > .env
|
|
||||||
docker-compose up -d
|
|
||||||
```
|
|
||||||
|
|
||||||
`docker-compose.yml` 里配好了 Cloudflare Tunnel。你需要自己在 `~/.cloudflared/` 下放凭证和路由配置。
|
|
||||||
The `docker-compose.yml` includes Cloudflare Tunnel. You'll need your own credentials under `~/.cloudflared/`.
|
|
||||||
|
|
||||||
### 指向 Obsidian / Point to Obsidian
|
|
||||||
|
|
||||||
在 `config.yaml` 里设置 `buckets_dir`:
|
|
||||||
Set `buckets_dir` in `config.yaml`:
|
|
||||||
|
|
||||||
```yaml
|
|
||||||
buckets_dir: "/path/to/your/Obsidian Vault/Ombre Brain"
|
|
||||||
```
|
|
||||||
|
|
||||||
不设的话,默认用项目目录下的 `buckets/`。
|
|
||||||
If not set, defaults to `buckets/` in the project directory.
|
|
||||||
|
|
||||||
## 配置 / Configuration
|
|
||||||
|
|
||||||
所有参数在 `config.yaml`(从 `config.example.yaml` 复制)。关键的几个:
|
|
||||||
All parameters in `config.yaml` (copy from `config.example.yaml`). Key ones:
|
|
||||||
|
|
||||||
| 参数 Parameter | 说明 Description | 默认 Default |
|
|
||||||
|---|---|---|
|
|
||||||
| `transport` | `stdio`(本地)/ `streamable-http`(远程)| `stdio` |
|
|
||||||
| `buckets_dir` | 记忆桶存储路径 / Bucket storage path | `./buckets/` |
|
|
||||||
| `dehydration.model` | 脱水用的 LLM 模型 / LLM model for dehydration | `deepseek-chat` |
|
|
||||||
| `dehydration.base_url` | API 地址 / API endpoint | `https://api.deepseek.com/v1` |
|
|
||||||
| `decay.lambda` | 衰减速率,越大越快忘 / Decay rate | `0.05` |
|
|
||||||
| `decay.threshold` | 归档阈值 / Archive threshold | `0.3` |
|
|
||||||
| `merge_threshold` | 合并相似度阈值 (0-100) / Merge similarity | `75` |
|
|
||||||
|
|
||||||
敏感配置用环境变量:
|
|
||||||
Sensitive config via env vars:
|
|
||||||
- `OMBRE_API_KEY` — LLM API 密钥
|
|
||||||
- `OMBRE_TRANSPORT` — 覆盖传输方式
|
|
||||||
- `OMBRE_BUCKETS_DIR` — 覆盖存储路径
|
|
||||||
|
|
||||||
## 衰减公式 / Decay Formula
|
|
||||||
|
|
||||||
$$Score = Importance \times activation\_count^{0.3} \times e^{-\lambda \times days} \times (base + arousal \times boost)$$
|
|
||||||
|
|
||||||
- `importance`: 1-10,记忆重要性 / memory importance
|
|
||||||
- `activation_count`: 被检索的次数,越常被想起衰减越慢 / retrieval count; more recalls = slower decay
|
|
||||||
- `days`: 距上次激活的天数 / days since last activation
|
|
||||||
- `arousal`: 唤醒度,越强烈的记忆越难忘 / arousal; intense memories are harder to forget
|
|
||||||
- 已解决的记忆权重降到 5%,沉底等被关键词唤醒 / resolved memories drop to 5%, sink until keyword-triggered
|
|
||||||
|
|
||||||
## 给 Claude 的使用指南 / Usage Guide for Claude
|
|
||||||
|
|
||||||
`CLAUDE_PROMPT.md` 是写给 Claude 看的使用说明。放到你的 system prompt 或 custom instructions 里就行。
|
|
||||||
|
|
||||||
`CLAUDE_PROMPT.md` is the usage guide written for Claude. Put it in your system prompt or custom instructions.
|
|
||||||
|
|
||||||
## 工具脚本 / Utility Scripts
|
|
||||||
|
|
||||||
| 脚本 Script | 用途 Purpose |
|
|
||||||
|---|---|
|
|
||||||
| `write_memory.py` | 手动写入记忆,绕过 MCP / Manually write memories, bypass MCP |
|
|
||||||
| `migrate_to_domains.py` | 迁移平铺文件到域子目录 / Migrate flat files to domain subdirs |
|
|
||||||
| `reclassify_domains.py` | 基于关键词重分类 / Reclassify by keywords |
|
|
||||||
| `reclassify_api.py` | 用 API 重打标未分类桶 / Re-tag uncategorized buckets via API |
|
|
||||||
| `test_smoke.py` | 冒烟测试 / Smoke test |
|
|
||||||
|
|
||||||
## License
|
|
||||||
|
|
||||||
MIT
|
|
||||||
@@ -1,755 +0,0 @@
|
|||||||
# ============================================================
|
|
||||||
# Module: Memory Bucket Manager (bucket_manager.py)
|
|
||||||
# 模块:记忆桶管理器
|
|
||||||
#
|
|
||||||
# CRUD operations, multi-dimensional index search, activation updates
|
|
||||||
# for memory buckets.
|
|
||||||
# 记忆桶的增删改查、多维索引搜索、激活更新。
|
|
||||||
#
|
|
||||||
# Core design:
|
|
||||||
# 核心逻辑:
|
|
||||||
# - Each bucket = one Markdown file (YAML frontmatter + body)
|
|
||||||
# 每个记忆桶 = 一个 Markdown 文件
|
|
||||||
# - Storage by type: permanent / dynamic / archive
|
|
||||||
# 存储按类型分目录
|
|
||||||
# - Multi-dimensional soft index: domain + valence/arousal + fuzzy text
|
|
||||||
# 多维软索引:主题域 + 情感坐标 + 文本模糊匹配
|
|
||||||
# - Search strategy: domain pre-filter → weighted multi-dim ranking
|
|
||||||
# 搜索策略:主题域预筛 → 多维加权精排
|
|
||||||
# - Emotion coordinates based on Russell circumplex model:
|
|
||||||
# 情感坐标基于环形情感模型(Russell circumplex):
|
|
||||||
# valence (0~1): 0=negative → 1=positive
|
|
||||||
# arousal (0~1): 0=calm → 1=excited
|
|
||||||
#
|
|
||||||
# Depended on by: server.py, decay_engine.py
|
|
||||||
# 被谁依赖:server.py, decay_engine.py
|
|
||||||
# ============================================================
|
|
||||||
|
|
||||||
import os
|
|
||||||
import math
|
|
||||||
import logging
|
|
||||||
import re
|
|
||||||
import shutil
|
|
||||||
from collections import Counter
|
|
||||||
from datetime import datetime
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
import frontmatter
|
|
||||||
import jieba
|
|
||||||
from rapidfuzz import fuzz
|
|
||||||
|
|
||||||
from utils import generate_bucket_id, sanitize_name, safe_path, now_iso
|
|
||||||
|
|
||||||
logger = logging.getLogger("ombre_brain.bucket")
|
|
||||||
|
|
||||||
|
|
||||||
class BucketManager:
|
|
||||||
"""
|
|
||||||
Memory bucket manager — entry point for all bucket CRUD operations.
|
|
||||||
Buckets are stored as Markdown files with YAML frontmatter for metadata
|
|
||||||
and body for content. Natively compatible with Obsidian browsing/editing.
|
|
||||||
记忆桶管理器 —— 所有桶的 CRUD 操作入口。
|
|
||||||
桶以 Markdown 文件存储,YAML frontmatter 存元数据,正文存内容。
|
|
||||||
天然兼容 Obsidian 直接浏览和编辑。
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, config: dict):
|
|
||||||
# --- Read storage paths from config / 从配置中读取存储路径 ---
|
|
||||||
self.base_dir = config["buckets_dir"]
|
|
||||||
self.permanent_dir = os.path.join(self.base_dir, "permanent")
|
|
||||||
self.dynamic_dir = os.path.join(self.base_dir, "dynamic")
|
|
||||||
self.archive_dir = os.path.join(self.base_dir, "archive")
|
|
||||||
self.fuzzy_threshold = config.get("matching", {}).get("fuzzy_threshold", 50)
|
|
||||||
self.max_results = config.get("matching", {}).get("max_results", 5)
|
|
||||||
|
|
||||||
# --- Wikilink config / 双链配置 ---
|
|
||||||
wikilink_cfg = config.get("wikilink", {})
|
|
||||||
self.wikilink_enabled = wikilink_cfg.get("enabled", True)
|
|
||||||
self.wikilink_use_tags = wikilink_cfg.get("use_tags", False)
|
|
||||||
self.wikilink_use_domain = wikilink_cfg.get("use_domain", True)
|
|
||||||
self.wikilink_use_auto_keywords = wikilink_cfg.get("use_auto_keywords", True)
|
|
||||||
self.wikilink_auto_top_k = wikilink_cfg.get("auto_top_k", 8)
|
|
||||||
self.wikilink_min_len = wikilink_cfg.get("min_keyword_len", 2)
|
|
||||||
self.wikilink_exclude_keywords = set(wikilink_cfg.get("exclude_keywords", []))
|
|
||||||
self.wikilink_stopwords = {
|
|
||||||
"的", "了", "在", "是", "我", "有", "和", "就", "不", "人",
|
|
||||||
"都", "一个", "上", "也", "很", "到", "说", "要", "去",
|
|
||||||
"你", "会", "着", "没有", "看", "好", "自己", "这", "他", "她",
|
|
||||||
"我们", "你们", "他们", "然后", "今天", "昨天", "明天", "一下",
|
|
||||||
"the", "and", "for", "are", "but", "not", "you", "all", "can",
|
|
||||||
"had", "her", "was", "one", "our", "out", "has", "have", "with",
|
|
||||||
"this", "that", "from", "they", "been", "said", "will", "each",
|
|
||||||
}
|
|
||||||
self.wikilink_stopwords |= {w.lower() for w in self.wikilink_exclude_keywords}
|
|
||||||
|
|
||||||
# --- Search scoring weights / 检索权重配置 ---
|
|
||||||
scoring = config.get("scoring_weights", {})
|
|
||||||
self.w_topic = scoring.get("topic_relevance", 4.0)
|
|
||||||
self.w_emotion = scoring.get("emotion_resonance", 2.0)
|
|
||||||
self.w_time = scoring.get("time_proximity", 1.5)
|
|
||||||
self.w_importance = scoring.get("importance", 1.0)
|
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
# Create a new bucket
|
|
||||||
# 创建新桶
|
|
||||||
# Write content and metadata into a .md file
|
|
||||||
# 将内容和元数据写入一个 .md 文件
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
async def create(
|
|
||||||
self,
|
|
||||||
content: str,
|
|
||||||
tags: list[str] = None,
|
|
||||||
importance: int = 5,
|
|
||||||
domain: list[str] = None,
|
|
||||||
valence: float = 0.5,
|
|
||||||
arousal: float = 0.3,
|
|
||||||
bucket_type: str = "dynamic",
|
|
||||||
name: str = None,
|
|
||||||
) -> str:
|
|
||||||
"""
|
|
||||||
Create a new memory bucket, return bucket ID.
|
|
||||||
创建一个新的记忆桶,返回桶 ID。
|
|
||||||
"""
|
|
||||||
bucket_id = generate_bucket_id()
|
|
||||||
bucket_name = sanitize_name(name) if name else bucket_id
|
|
||||||
domain = domain or ["未分类"]
|
|
||||||
tags = tags or []
|
|
||||||
linked_content = self._apply_wikilinks(content, tags, domain, bucket_name)
|
|
||||||
|
|
||||||
# --- Build YAML frontmatter metadata / 构建元数据 ---
|
|
||||||
metadata = {
|
|
||||||
"id": bucket_id,
|
|
||||||
"name": bucket_name,
|
|
||||||
"tags": tags,
|
|
||||||
"domain": domain,
|
|
||||||
"valence": max(0.0, min(1.0, valence)),
|
|
||||||
"arousal": max(0.0, min(1.0, arousal)),
|
|
||||||
"importance": max(1, min(10, importance)),
|
|
||||||
"type": bucket_type,
|
|
||||||
"created": now_iso(),
|
|
||||||
"last_active": now_iso(),
|
|
||||||
"activation_count": 1,
|
|
||||||
}
|
|
||||||
|
|
||||||
# --- Assemble Markdown file (frontmatter + body) ---
|
|
||||||
# --- 组装 Markdown 文件 ---
|
|
||||||
post = frontmatter.Post(linked_content, **metadata)
|
|
||||||
|
|
||||||
# --- Choose directory by type + primary domain ---
|
|
||||||
# --- 按类型 + 主题域选择存储目录 ---
|
|
||||||
type_dir = self.permanent_dir if bucket_type == "permanent" else self.dynamic_dir
|
|
||||||
primary_domain = sanitize_name(domain[0]) if domain else "未分类"
|
|
||||||
target_dir = os.path.join(type_dir, primary_domain)
|
|
||||||
os.makedirs(target_dir, exist_ok=True)
|
|
||||||
|
|
||||||
# --- Filename: readable_name_bucketID.md (Obsidian friendly) ---
|
|
||||||
# --- 文件名:可读名称_桶ID.md ---
|
|
||||||
if bucket_name and bucket_name != bucket_id:
|
|
||||||
filename = f"{bucket_name}_{bucket_id}.md"
|
|
||||||
else:
|
|
||||||
filename = f"{bucket_id}.md"
|
|
||||||
file_path = safe_path(target_dir, filename)
|
|
||||||
|
|
||||||
try:
|
|
||||||
with open(file_path, "w", encoding="utf-8") as f:
|
|
||||||
f.write(frontmatter.dumps(post))
|
|
||||||
except OSError as e:
|
|
||||||
logger.error(f"Failed to write bucket file / 写入桶文件失败: {file_path}: {e}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
logger.info(
|
|
||||||
f"Created bucket / 创建记忆桶: {bucket_id} ({bucket_name}) → {primary_domain}/"
|
|
||||||
)
|
|
||||||
return bucket_id
|
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
# Read bucket content
|
|
||||||
# 读取桶内容
|
|
||||||
# Returns {"id", "metadata", "content", "path"} or None
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
async def get(self, bucket_id: str) -> Optional[dict]:
|
|
||||||
"""
|
|
||||||
Read a single bucket by ID.
|
|
||||||
根据 ID 读取单个桶。
|
|
||||||
"""
|
|
||||||
if not bucket_id or not isinstance(bucket_id, str):
|
|
||||||
return None
|
|
||||||
file_path = self._find_bucket_file(bucket_id)
|
|
||||||
if not file_path:
|
|
||||||
return None
|
|
||||||
return self._load_bucket(file_path)
|
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
# Update bucket
|
|
||||||
# 更新桶
|
|
||||||
# Supports: content, tags, importance, valence, arousal, name, resolved
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
async def update(self, bucket_id: str, **kwargs) -> bool:
|
|
||||||
"""
|
|
||||||
Update bucket content or metadata fields.
|
|
||||||
更新桶的内容或元数据字段。
|
|
||||||
"""
|
|
||||||
file_path = self._find_bucket_file(bucket_id)
|
|
||||||
if not file_path:
|
|
||||||
return False
|
|
||||||
|
|
||||||
try:
|
|
||||||
post = frontmatter.load(file_path)
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Failed to load bucket for update / 加载桶失败: {file_path}: {e}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
# --- Update only fields that were passed in / 只改传入的字段 ---
|
|
||||||
if "content" in kwargs:
|
|
||||||
next_tags = kwargs.get("tags", post.get("tags", []))
|
|
||||||
next_domain = kwargs.get("domain", post.get("domain", []))
|
|
||||||
next_name = kwargs.get("name", post.get("name", ""))
|
|
||||||
post.content = self._apply_wikilinks(
|
|
||||||
kwargs["content"],
|
|
||||||
next_tags,
|
|
||||||
next_domain,
|
|
||||||
next_name,
|
|
||||||
)
|
|
||||||
if "tags" in kwargs:
|
|
||||||
post["tags"] = kwargs["tags"]
|
|
||||||
if "importance" in kwargs:
|
|
||||||
post["importance"] = max(1, min(10, int(kwargs["importance"])))
|
|
||||||
if "domain" in kwargs:
|
|
||||||
post["domain"] = kwargs["domain"]
|
|
||||||
if "valence" in kwargs:
|
|
||||||
post["valence"] = max(0.0, min(1.0, float(kwargs["valence"])))
|
|
||||||
if "arousal" in kwargs:
|
|
||||||
post["arousal"] = max(0.0, min(1.0, float(kwargs["arousal"])))
|
|
||||||
if "name" in kwargs:
|
|
||||||
post["name"] = sanitize_name(kwargs["name"])
|
|
||||||
if "resolved" in kwargs:
|
|
||||||
post["resolved"] = bool(kwargs["resolved"])
|
|
||||||
|
|
||||||
# --- Auto-refresh activation time / 自动刷新激活时间 ---
|
|
||||||
post["last_active"] = now_iso()
|
|
||||||
|
|
||||||
try:
|
|
||||||
with open(file_path, "w", encoding="utf-8") as f:
|
|
||||||
f.write(frontmatter.dumps(post))
|
|
||||||
except OSError as e:
|
|
||||||
logger.error(f"Failed to write bucket update / 写入桶更新失败: {file_path}: {e}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
logger.info(f"Updated bucket / 更新记忆桶: {bucket_id}")
|
|
||||||
return True
|
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
# Wikilink injection
|
|
||||||
# 自动添加 Obsidian 双链
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
def _apply_wikilinks(
|
|
||||||
self,
|
|
||||||
content: str,
|
|
||||||
tags: list[str],
|
|
||||||
domain: list[str],
|
|
||||||
name: str,
|
|
||||||
) -> str:
|
|
||||||
"""
|
|
||||||
Auto-inject Obsidian wikilinks, avoiding double-wrapping existing [[...]].
|
|
||||||
自动添加 Obsidian 双链,避免重复包裹已有 [[...]]。
|
|
||||||
"""
|
|
||||||
if not self.wikilink_enabled or not content:
|
|
||||||
return content
|
|
||||||
|
|
||||||
keywords = self._collect_wikilink_keywords(content, tags, domain, name)
|
|
||||||
if not keywords:
|
|
||||||
return content
|
|
||||||
|
|
||||||
# Split on existing wikilinks to avoid wrapping them again
|
|
||||||
# 按已有双链切分,避免重复包裹
|
|
||||||
segments = re.split(r"(\[\[[^\]]+\]\])", content)
|
|
||||||
pattern = re.compile("|".join(re.escape(kw) for kw in keywords))
|
|
||||||
for i, segment in enumerate(segments):
|
|
||||||
if segment.startswith("[[") and segment.endswith("]]"):
|
|
||||||
continue
|
|
||||||
updated = pattern.sub(lambda m: f"[[{m.group(0)}]]", segment)
|
|
||||||
segments[i] = updated
|
|
||||||
return "".join(segments)
|
|
||||||
|
|
||||||
def _collect_wikilink_keywords(
|
|
||||||
self,
|
|
||||||
content: str,
|
|
||||||
tags: list[str],
|
|
||||||
domain: list[str],
|
|
||||||
name: str,
|
|
||||||
) -> list[str]:
|
|
||||||
"""
|
|
||||||
Collect candidate keywords from tags/domain/auto-extraction.
|
|
||||||
汇总候选关键词:可选 tags/domain + 自动提词。
|
|
||||||
"""
|
|
||||||
candidates = []
|
|
||||||
|
|
||||||
if self.wikilink_use_tags:
|
|
||||||
candidates.extend(tags or [])
|
|
||||||
if self.wikilink_use_domain:
|
|
||||||
candidates.extend(domain or [])
|
|
||||||
if name:
|
|
||||||
candidates.append(name)
|
|
||||||
if self.wikilink_use_auto_keywords:
|
|
||||||
candidates.extend(self._extract_auto_keywords(content))
|
|
||||||
|
|
||||||
return self._normalize_keywords(candidates)
|
|
||||||
|
|
||||||
def _normalize_keywords(self, keywords: list[str]) -> list[str]:
|
|
||||||
"""
|
|
||||||
Deduplicate and sort by length (longer first to avoid short words
|
|
||||||
breaking long ones during replacement).
|
|
||||||
去重并按长度排序,优先替换长词。
|
|
||||||
"""
|
|
||||||
if not keywords:
|
|
||||||
return []
|
|
||||||
|
|
||||||
seen = set()
|
|
||||||
cleaned = []
|
|
||||||
for keyword in keywords:
|
|
||||||
if not isinstance(keyword, str):
|
|
||||||
continue
|
|
||||||
kw = keyword.strip()
|
|
||||||
if len(kw) < self.wikilink_min_len:
|
|
||||||
continue
|
|
||||||
if kw in self.wikilink_exclude_keywords:
|
|
||||||
continue
|
|
||||||
if kw.lower() in self.wikilink_stopwords:
|
|
||||||
continue
|
|
||||||
if kw in seen:
|
|
||||||
continue
|
|
||||||
seen.add(kw)
|
|
||||||
cleaned.append(kw)
|
|
||||||
|
|
||||||
return sorted(cleaned, key=len, reverse=True)
|
|
||||||
|
|
||||||
def _extract_auto_keywords(self, content: str) -> list[str]:
|
|
||||||
"""
|
|
||||||
Auto-extract keywords from body text, prioritizing high-frequency words.
|
|
||||||
从正文自动提词,优先高频词。
|
|
||||||
"""
|
|
||||||
if not content:
|
|
||||||
return []
|
|
||||||
|
|
||||||
try:
|
|
||||||
zh_words = [w.strip() for w in jieba.lcut(content) if w.strip()]
|
|
||||||
except Exception:
|
|
||||||
zh_words = []
|
|
||||||
en_words = re.findall(r"[A-Za-z][A-Za-z0-9_-]{2,20}", content)
|
|
||||||
|
|
||||||
# Chinese bigrams / 中文双词组合
|
|
||||||
zh_bigrams = []
|
|
||||||
for i in range(len(zh_words) - 1):
|
|
||||||
left = zh_words[i]
|
|
||||||
right = zh_words[i + 1]
|
|
||||||
if len(left) < self.wikilink_min_len or len(right) < self.wikilink_min_len:
|
|
||||||
continue
|
|
||||||
if not re.fullmatch(r"[\u4e00-\u9fff]+", left + right):
|
|
||||||
continue
|
|
||||||
if len(left + right) > 8:
|
|
||||||
continue
|
|
||||||
zh_bigrams.append(left + right)
|
|
||||||
|
|
||||||
merged = []
|
|
||||||
for word in zh_words + zh_bigrams + en_words:
|
|
||||||
if len(word) < self.wikilink_min_len:
|
|
||||||
continue
|
|
||||||
if re.fullmatch(r"\d+", word):
|
|
||||||
continue
|
|
||||||
if word.lower() in self.wikilink_stopwords:
|
|
||||||
continue
|
|
||||||
merged.append(word)
|
|
||||||
|
|
||||||
if not merged:
|
|
||||||
return []
|
|
||||||
|
|
||||||
counter = Counter(merged)
|
|
||||||
return [w for w, _ in counter.most_common(self.wikilink_auto_top_k)]
|
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
# Delete bucket
|
|
||||||
# 删除桶
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
async def delete(self, bucket_id: str) -> bool:
|
|
||||||
"""
|
|
||||||
Delete a memory bucket file.
|
|
||||||
删除指定的记忆桶文件。
|
|
||||||
"""
|
|
||||||
file_path = self._find_bucket_file(bucket_id)
|
|
||||||
if not file_path:
|
|
||||||
return False
|
|
||||||
|
|
||||||
try:
|
|
||||||
os.remove(file_path)
|
|
||||||
except OSError as e:
|
|
||||||
logger.error(f"Failed to delete bucket file / 删除桶文件失败: {file_path}: {e}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
logger.info(f"Deleted bucket / 删除记忆桶: {bucket_id}")
|
|
||||||
return True
|
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
# Touch bucket (refresh activation time + increment count)
|
|
||||||
# 触碰桶(刷新激活时间 + 累加激活次数)
|
|
||||||
# Called on every recall hit; affects decay score.
|
|
||||||
# 每次检索命中时调用,影响衰减得分。
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
async def touch(self, bucket_id: str) -> None:
|
|
||||||
"""
|
|
||||||
Update a bucket's last activation time and count.
|
|
||||||
更新桶的最后激活时间和激活次数。
|
|
||||||
"""
|
|
||||||
file_path = self._find_bucket_file(bucket_id)
|
|
||||||
if not file_path:
|
|
||||||
return
|
|
||||||
|
|
||||||
try:
|
|
||||||
post = frontmatter.load(file_path)
|
|
||||||
post["last_active"] = now_iso()
|
|
||||||
post["activation_count"] = post.get("activation_count", 0) + 1
|
|
||||||
|
|
||||||
with open(file_path, "w", encoding="utf-8") as f:
|
|
||||||
f.write(frontmatter.dumps(post))
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Failed to touch bucket / 触碰桶失败: {bucket_id}: {e}")
|
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
# Multi-dimensional search (core feature)
|
|
||||||
# 多维搜索(核心功能)
|
|
||||||
#
|
|
||||||
# Strategy: domain pre-filter → weighted multi-dim ranking
|
|
||||||
# 策略:主题域预筛 → 多维加权精排
|
|
||||||
#
|
|
||||||
# Ranking formula:
|
|
||||||
# total = topic(×w_topic) + emotion(×w_emotion)
|
|
||||||
# + time(×w_time) + importance(×w_importance)
|
|
||||||
#
|
|
||||||
# Per-dimension scores (normalized to 0~1):
|
|
||||||
# topic = rapidfuzz weighted match (name/tags/domain/body)
|
|
||||||
# emotion = 1 - Euclidean distance (query v/a vs bucket v/a)
|
|
||||||
# time = e^(-0.02 × days) (recent memories first)
|
|
||||||
# importance = importance / 10
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
async def search(
|
|
||||||
self,
|
|
||||||
query: str,
|
|
||||||
limit: int = None,
|
|
||||||
domain_filter: list[str] = None,
|
|
||||||
query_valence: float = None,
|
|
||||||
query_arousal: float = None,
|
|
||||||
) -> list[dict]:
|
|
||||||
"""
|
|
||||||
Multi-dimensional indexed search for memory buckets.
|
|
||||||
多维索引搜索记忆桶。
|
|
||||||
|
|
||||||
domain_filter: pre-filter by domain (None = search all)
|
|
||||||
query_valence/arousal: emotion coordinates for resonance scoring
|
|
||||||
"""
|
|
||||||
if not query or not query.strip():
|
|
||||||
return []
|
|
||||||
|
|
||||||
limit = limit or self.max_results
|
|
||||||
all_buckets = await self.list_all(include_archive=False)
|
|
||||||
|
|
||||||
if not all_buckets:
|
|
||||||
return []
|
|
||||||
|
|
||||||
# --- Layer 1: domain pre-filter (fast scope reduction) ---
|
|
||||||
# --- 第一层:主题域预筛(快速缩小范围)---
|
|
||||||
if domain_filter:
|
|
||||||
filter_set = {d.lower() for d in domain_filter}
|
|
||||||
candidates = [
|
|
||||||
b for b in all_buckets
|
|
||||||
if {d.lower() for d in b["metadata"].get("domain", [])} & filter_set
|
|
||||||
]
|
|
||||||
# Fall back to full search if pre-filter yields nothing
|
|
||||||
# 预筛为空则回退全量搜索
|
|
||||||
if not candidates:
|
|
||||||
candidates = all_buckets
|
|
||||||
else:
|
|
||||||
candidates = all_buckets
|
|
||||||
|
|
||||||
# --- Layer 2: weighted multi-dim ranking ---
|
|
||||||
# --- 第二层:多维加权精排 ---
|
|
||||||
scored = []
|
|
||||||
for bucket in candidates:
|
|
||||||
meta = bucket.get("metadata", {})
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Dim 1: topic relevance (fuzzy text, 0~1)
|
|
||||||
topic_score = self._calc_topic_score(query, bucket)
|
|
||||||
|
|
||||||
# Dim 2: emotion resonance (coordinate distance, 0~1)
|
|
||||||
emotion_score = self._calc_emotion_score(
|
|
||||||
query_valence, query_arousal, meta
|
|
||||||
)
|
|
||||||
|
|
||||||
# Dim 3: time proximity (exponential decay, 0~1)
|
|
||||||
time_score = self._calc_time_score(meta)
|
|
||||||
|
|
||||||
# Dim 4: importance (direct normalization)
|
|
||||||
importance_score = max(1, min(10, int(meta.get("importance", 5)))) / 10.0
|
|
||||||
|
|
||||||
# --- Weighted sum / 加权求和 ---
|
|
||||||
total = (
|
|
||||||
topic_score * self.w_topic
|
|
||||||
+ emotion_score * self.w_emotion
|
|
||||||
+ time_score * self.w_time
|
|
||||||
+ importance_score * self.w_importance
|
|
||||||
)
|
|
||||||
# Normalize to 0~100 for readability
|
|
||||||
weight_sum = self.w_topic + self.w_emotion + self.w_time + self.w_importance
|
|
||||||
normalized = (total / weight_sum) * 100 if weight_sum > 0 else 0
|
|
||||||
|
|
||||||
# Resolved buckets get ranking penalty (but still reachable by keyword)
|
|
||||||
# 已解决的桶降权排序(但仍可被关键词激活)
|
|
||||||
if meta.get("resolved", False):
|
|
||||||
normalized *= 0.3
|
|
||||||
|
|
||||||
if normalized >= self.fuzzy_threshold:
|
|
||||||
bucket["score"] = round(normalized, 2)
|
|
||||||
scored.append(bucket)
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(
|
|
||||||
f"Scoring failed for bucket {bucket.get('id', '?')} / "
|
|
||||||
f"桶评分失败: {e}"
|
|
||||||
)
|
|
||||||
continue
|
|
||||||
|
|
||||||
scored.sort(key=lambda x: x["score"], reverse=True)
|
|
||||||
return scored[:limit]
|
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
# Topic relevance sub-score:
|
|
||||||
# name(×3) + domain(×2.5) + tags(×2) + body(×1)
|
|
||||||
# 文本相关性子分:桶名(×3) + 主题域(×2.5) + 标签(×2) + 正文(×1)
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
def _calc_topic_score(self, query: str, bucket: dict) -> float:
|
|
||||||
"""
|
|
||||||
Calculate text dimension relevance score (0~1).
|
|
||||||
计算文本维度的相关性得分。
|
|
||||||
"""
|
|
||||||
meta = bucket.get("metadata", {})
|
|
||||||
|
|
||||||
name_score = fuzz.partial_ratio(query, meta.get("name", "")) * 3
|
|
||||||
domain_score = (
|
|
||||||
max(
|
|
||||||
(fuzz.partial_ratio(query, d) for d in meta.get("domain", [])),
|
|
||||||
default=0,
|
|
||||||
)
|
|
||||||
* 2.5
|
|
||||||
)
|
|
||||||
tag_score = (
|
|
||||||
max(
|
|
||||||
(fuzz.partial_ratio(query, tag) for tag in meta.get("tags", [])),
|
|
||||||
default=0,
|
|
||||||
)
|
|
||||||
* 2
|
|
||||||
)
|
|
||||||
content_score = fuzz.partial_ratio(query, bucket.get("content", "")[:500]) * 1
|
|
||||||
|
|
||||||
return (name_score + domain_score + tag_score + content_score) / (100 * 8.5)
|
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
# Emotion resonance sub-score:
|
|
||||||
# Based on Russell circumplex Euclidean distance
|
|
||||||
# 情感共鸣子分:基于环形情感模型的欧氏距离
|
|
||||||
# No emotion in query → neutral 0.5 (doesn't affect ranking)
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
def _calc_emotion_score(
|
|
||||||
self, q_valence: float, q_arousal: float, meta: dict
|
|
||||||
) -> float:
|
|
||||||
"""
|
|
||||||
Calculate emotion resonance score (0~1, closer = higher).
|
|
||||||
计算情感共鸣度(0~1,越近越高)。
|
|
||||||
"""
|
|
||||||
if q_valence is None or q_arousal is None:
|
|
||||||
return 0.5 # No emotion coordinates → neutral / 无情感坐标时给中性分
|
|
||||||
|
|
||||||
try:
|
|
||||||
b_valence = float(meta.get("valence", 0.5))
|
|
||||||
b_arousal = float(meta.get("arousal", 0.3))
|
|
||||||
except (ValueError, TypeError):
|
|
||||||
return 0.5
|
|
||||||
|
|
||||||
# Euclidean distance, max sqrt(2) ≈ 1.414
|
|
||||||
dist = math.sqrt((q_valence - b_valence) ** 2 + (q_arousal - b_arousal) ** 2)
|
|
||||||
return max(0.0, 1.0 - dist / 1.414)
|
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
# Time proximity sub-score:
|
|
||||||
# More recent activation → higher score
|
|
||||||
# 时间亲近子分:距上次激活越近分越高
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
def _calc_time_score(self, meta: dict) -> float:
|
|
||||||
"""
|
|
||||||
Calculate time proximity score (0~1, more recent = higher).
|
|
||||||
计算时间亲近度。
|
|
||||||
"""
|
|
||||||
last_active_str = meta.get("last_active", meta.get("created", ""))
|
|
||||||
try:
|
|
||||||
last_active = datetime.fromisoformat(str(last_active_str))
|
|
||||||
days = max(0.0, (datetime.now() - last_active).total_seconds() / 86400)
|
|
||||||
except (ValueError, TypeError):
|
|
||||||
days = 30
|
|
||||||
return math.exp(-0.02 * days)
|
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
# List all buckets
|
|
||||||
# 列出所有桶
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
async def list_all(self, include_archive: bool = False) -> list[dict]:
|
|
||||||
"""
|
|
||||||
Recursively walk directories (including domain subdirs), list all buckets.
|
|
||||||
递归遍历目录(含域子目录),列出所有记忆桶。
|
|
||||||
"""
|
|
||||||
buckets = []
|
|
||||||
|
|
||||||
dirs = [self.permanent_dir, self.dynamic_dir]
|
|
||||||
if include_archive:
|
|
||||||
dirs.append(self.archive_dir)
|
|
||||||
|
|
||||||
for dir_path in dirs:
|
|
||||||
if not os.path.exists(dir_path):
|
|
||||||
continue
|
|
||||||
for root, _, files in os.walk(dir_path):
|
|
||||||
for filename in files:
|
|
||||||
if not filename.endswith(".md"):
|
|
||||||
continue
|
|
||||||
file_path = os.path.join(root, filename)
|
|
||||||
bucket = self._load_bucket(file_path)
|
|
||||||
if bucket:
|
|
||||||
buckets.append(bucket)
|
|
||||||
|
|
||||||
return buckets
|
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
# Statistics (counts per category + total size)
|
|
||||||
# 统计信息(各分类桶数量 + 总体积)
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
async def get_stats(self) -> dict:
|
|
||||||
"""
|
|
||||||
Return memory bucket statistics (including domain subdirs).
|
|
||||||
返回记忆桶的统计数据。
|
|
||||||
"""
|
|
||||||
stats = {
|
|
||||||
"permanent_count": 0,
|
|
||||||
"dynamic_count": 0,
|
|
||||||
"archive_count": 0,
|
|
||||||
"total_size_kb": 0.0,
|
|
||||||
"domains": {},
|
|
||||||
}
|
|
||||||
|
|
||||||
for subdir, key in [
|
|
||||||
(self.permanent_dir, "permanent_count"),
|
|
||||||
(self.dynamic_dir, "dynamic_count"),
|
|
||||||
(self.archive_dir, "archive_count"),
|
|
||||||
]:
|
|
||||||
if not os.path.exists(subdir):
|
|
||||||
continue
|
|
||||||
for root, _, files in os.walk(subdir):
|
|
||||||
for f in files:
|
|
||||||
if f.endswith(".md"):
|
|
||||||
stats[key] += 1
|
|
||||||
fpath = os.path.join(root, f)
|
|
||||||
try:
|
|
||||||
stats["total_size_kb"] += os.path.getsize(fpath) / 1024
|
|
||||||
except OSError:
|
|
||||||
pass
|
|
||||||
# Per-domain counts / 每个域的桶数量
|
|
||||||
domain_name = os.path.basename(root)
|
|
||||||
if domain_name != os.path.basename(subdir):
|
|
||||||
stats["domains"][domain_name] = stats["domains"].get(domain_name, 0) + 1
|
|
||||||
|
|
||||||
return stats
|
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
# Archive bucket (move from permanent/dynamic into archive)
|
|
||||||
# 归档桶(从 permanent/dynamic 移入 archive)
|
|
||||||
# Called by decay engine to simulate "forgetting"
|
|
||||||
# 由衰减引擎调用,模拟"遗忘"
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
async def archive(self, bucket_id: str) -> bool:
|
|
||||||
"""
|
|
||||||
Move a bucket into the archive directory (preserving domain subdirs).
|
|
||||||
将指定桶移入归档目录(保留域子目录结构)。
|
|
||||||
"""
|
|
||||||
file_path = self._find_bucket_file(bucket_id)
|
|
||||||
if not file_path:
|
|
||||||
return False
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Read once, get domain info and update type / 一次性读取
|
|
||||||
post = frontmatter.load(file_path)
|
|
||||||
domain = post.get("domain", ["未分类"])
|
|
||||||
primary_domain = sanitize_name(domain[0]) if domain else "未分类"
|
|
||||||
archive_subdir = os.path.join(self.archive_dir, primary_domain)
|
|
||||||
os.makedirs(archive_subdir, exist_ok=True)
|
|
||||||
|
|
||||||
dest = safe_path(archive_subdir, os.path.basename(file_path))
|
|
||||||
|
|
||||||
# Update type marker then move file / 更新类型标记后移动文件
|
|
||||||
post["type"] = "archived"
|
|
||||||
with open(file_path, "w", encoding="utf-8") as f:
|
|
||||||
f.write(frontmatter.dumps(post))
|
|
||||||
|
|
||||||
# Use shutil.move for cross-filesystem safety
|
|
||||||
# 使用 shutil.move 保证跨文件系统安全
|
|
||||||
shutil.move(file_path, str(dest))
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(
|
|
||||||
f"Failed to archive bucket / 归档桶失败: {bucket_id}: {e}"
|
|
||||||
)
|
|
||||||
return False
|
|
||||||
|
|
||||||
logger.info(f"Archived bucket / 归档记忆桶: {bucket_id} → archive/{primary_domain}/")
|
|
||||||
return True
|
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
# Internal: find bucket file across all three directories
|
|
||||||
# 内部:在三个目录中查找桶文件
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
def _find_bucket_file(self, bucket_id: str) -> Optional[str]:
|
|
||||||
"""
|
|
||||||
Recursively search permanent/dynamic/archive for a bucket file
|
|
||||||
matching the given ID.
|
|
||||||
在 permanent/dynamic/archive 中递归查找指定 ID 的桶文件。
|
|
||||||
"""
|
|
||||||
if not bucket_id:
|
|
||||||
return None
|
|
||||||
for dir_path in [self.permanent_dir, self.dynamic_dir, self.archive_dir]:
|
|
||||||
if not os.path.exists(dir_path):
|
|
||||||
continue
|
|
||||||
for root, _, files in os.walk(dir_path):
|
|
||||||
for fname in files:
|
|
||||||
if not fname.endswith(".md"):
|
|
||||||
continue
|
|
||||||
# Match by exact ID segment in filename
|
|
||||||
# 通过文件名中的 ID 片段精确匹配
|
|
||||||
if bucket_id in fname:
|
|
||||||
return os.path.join(root, fname)
|
|
||||||
return None
|
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
# Internal: load bucket data from .md file
|
|
||||||
# 内部:从 .md 文件加载桶数据
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
def _load_bucket(self, file_path: str) -> Optional[dict]:
|
|
||||||
"""
|
|
||||||
Parse a Markdown file and return structured bucket data.
|
|
||||||
解析 Markdown 文件,返回桶的结构化数据。
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
post = frontmatter.load(file_path)
|
|
||||||
return {
|
|
||||||
"id": post.get("id", Path(file_path).stem),
|
|
||||||
"metadata": dict(post.metadata),
|
|
||||||
"content": post.content,
|
|
||||||
"path": file_path,
|
|
||||||
}
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(
|
|
||||||
f"Failed to load bucket file / 加载桶文件失败: {file_path}: {e}"
|
|
||||||
)
|
|
||||||
return None
|
|
||||||
@@ -1,242 +0,0 @@
|
|||||||
# ============================================================
|
|
||||||
# Module: Memory Decay Engine (decay_engine.py)
|
|
||||||
# 模块:记忆衰减引擎
|
|
||||||
#
|
|
||||||
# Simulates human forgetting curve; auto-decays inactive memories and archives them.
|
|
||||||
# 模拟人类遗忘曲线,自动衰减不活跃记忆并归档。
|
|
||||||
#
|
|
||||||
# Core formula (improved Ebbinghaus + emotion coordinates):
|
|
||||||
# 核心公式(改进版艾宾浩斯遗忘曲线 + 情感坐标):
|
|
||||||
# Score = Importance × (activation_count^0.3) × e^(-λ×days) × emotion_weight
|
|
||||||
#
|
|
||||||
# Emotion weight (continuous coordinate, not discrete labels):
|
|
||||||
# 情感权重(基于连续坐标而非离散列举):
|
|
||||||
# emotion_weight = base + (arousal × arousal_boost)
|
|
||||||
# Higher arousal → higher emotion weight → slower decay
|
|
||||||
# 唤醒度越高 → 情感权重越大 → 记忆衰减越慢
|
|
||||||
#
|
|
||||||
# Depended on by: server.py
|
|
||||||
# 被谁依赖:server.py
|
|
||||||
# ============================================================
|
|
||||||
|
|
||||||
import math
|
|
||||||
import asyncio
|
|
||||||
import logging
|
|
||||||
from datetime import datetime
|
|
||||||
|
|
||||||
logger = logging.getLogger("ombre_brain.decay")
|
|
||||||
|
|
||||||
|
|
||||||
class DecayEngine:
|
|
||||||
"""
|
|
||||||
Memory decay engine — periodically scans all dynamic buckets,
|
|
||||||
calculates decay scores, auto-archives low-activity buckets
|
|
||||||
to simulate natural forgetting.
|
|
||||||
记忆衰减引擎 —— 定期扫描所有动态桶,
|
|
||||||
计算衰减得分,将低活跃桶自动归档,模拟自然遗忘。
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, config: dict, bucket_mgr):
|
|
||||||
# --- Load decay parameters / 加载衰减参数 ---
|
|
||||||
decay_cfg = config.get("decay", {})
|
|
||||||
self.decay_lambda = decay_cfg.get("lambda", 0.05)
|
|
||||||
self.threshold = decay_cfg.get("threshold", 0.3)
|
|
||||||
self.check_interval = decay_cfg.get("check_interval_hours", 24)
|
|
||||||
|
|
||||||
# --- Emotion weight params (continuous arousal coordinate) ---
|
|
||||||
# --- 情感权重参数(基于连续 arousal 坐标)---
|
|
||||||
emotion_cfg = decay_cfg.get("emotion_weights", {})
|
|
||||||
self.emotion_base = emotion_cfg.get("base", 1.0)
|
|
||||||
self.arousal_boost = emotion_cfg.get("arousal_boost", 0.8)
|
|
||||||
|
|
||||||
self.bucket_mgr = bucket_mgr
|
|
||||||
|
|
||||||
# --- Background task control / 后台任务控制 ---
|
|
||||||
self._task: asyncio.Task | None = None
|
|
||||||
self._running = False
|
|
||||||
|
|
||||||
@property
|
|
||||||
def is_running(self) -> bool:
|
|
||||||
"""Whether the decay engine is running in the background.
|
|
||||||
衰减引擎是否正在后台运行。"""
|
|
||||||
return self._running
|
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
# Core: calculate decay score for a single bucket
|
|
||||||
# 核心:计算单个桶的衰减得分
|
|
||||||
#
|
|
||||||
# Higher score = more vivid memory; below threshold → archive
|
|
||||||
# 得分越高 = 记忆越鲜活,低于阈值则归档
|
|
||||||
# Permanent buckets never decay / 固化桶永远不衰减
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
def calculate_score(self, metadata: dict) -> float:
|
|
||||||
"""
|
|
||||||
Calculate current activity score for a memory bucket.
|
|
||||||
计算一个记忆桶的当前活跃度得分。
|
|
||||||
|
|
||||||
Formula: Score = Importance × (act_count^0.3) × e^(-λ×days) × (base + arousal×boost)
|
|
||||||
"""
|
|
||||||
if not isinstance(metadata, dict):
|
|
||||||
return 0.0
|
|
||||||
|
|
||||||
# --- Permanent buckets never decay / 固化桶永不衰减 ---
|
|
||||||
if metadata.get("type") == "permanent":
|
|
||||||
return 999.0
|
|
||||||
|
|
||||||
importance = max(1, min(10, int(metadata.get("importance", 5))))
|
|
||||||
activation_count = max(1, int(metadata.get("activation_count", 1)))
|
|
||||||
|
|
||||||
# --- Days since last activation / 距离上次激活过了多少天 ---
|
|
||||||
last_active_str = metadata.get("last_active", metadata.get("created", ""))
|
|
||||||
try:
|
|
||||||
last_active = datetime.fromisoformat(str(last_active_str))
|
|
||||||
days_since = max(0.0, (datetime.now() - last_active).total_seconds() / 86400)
|
|
||||||
except (ValueError, TypeError):
|
|
||||||
days_since = 30 # Parse failure → assume 30 days / 解析失败假设已过 30 天
|
|
||||||
|
|
||||||
# --- Emotion weight: continuous arousal coordinate ---
|
|
||||||
# --- 情感权重:基于连续 arousal 坐标计算 ---
|
|
||||||
# Higher arousal → stronger emotion → higher weight → slower decay
|
|
||||||
# arousal 越高 → 情感越强烈 → 权重越大 → 衰减越慢
|
|
||||||
try:
|
|
||||||
arousal = max(0.0, min(1.0, float(metadata.get("arousal", 0.3))))
|
|
||||||
except (ValueError, TypeError):
|
|
||||||
arousal = 0.3
|
|
||||||
emotion_weight = self.emotion_base + arousal * self.arousal_boost
|
|
||||||
|
|
||||||
# --- Apply decay formula / 套入衰减公式 ---
|
|
||||||
score = (
|
|
||||||
importance
|
|
||||||
* (activation_count ** 0.3)
|
|
||||||
* math.exp(-self.decay_lambda * days_since)
|
|
||||||
* emotion_weight
|
|
||||||
)
|
|
||||||
|
|
||||||
# --- Weight pool modifiers / 权重池修正因子 ---
|
|
||||||
# Resolved events drop to 5%, sink to bottom awaiting keyword reactivation
|
|
||||||
# 已解决的事件权重骤降到 5%,沉底等待关键词激活
|
|
||||||
resolved_factor = 0.05 if metadata.get("resolved", False) else 1.0
|
|
||||||
# High-arousal unresolved buckets get urgency boost for priority surfacing
|
|
||||||
# 高唤醒未解决桶额外加成,优先浮现
|
|
||||||
urgency_boost = 1.5 if (arousal > 0.7 and not metadata.get("resolved", False)) else 1.0
|
|
||||||
|
|
||||||
return round(score * resolved_factor * urgency_boost, 4)
|
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
# Execute one decay cycle
|
|
||||||
# 执行一轮衰减周期
|
|
||||||
# Scan all dynamic buckets → score → archive those below threshold
|
|
||||||
# 扫描所有动态桶 → 算分 → 低于阈值的归档
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
async def run_decay_cycle(self) -> dict:
|
|
||||||
"""
|
|
||||||
Execute one decay cycle: iterate dynamic buckets, archive those
|
|
||||||
scoring below threshold.
|
|
||||||
执行一轮衰减:遍历动态桶,归档得分低于阈值的桶。
|
|
||||||
|
|
||||||
Returns stats: {"checked": N, "archived": N, "lowest_score": X}
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
buckets = await self.bucket_mgr.list_all(include_archive=False)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to list buckets for decay / 衰减周期列桶失败: {e}")
|
|
||||||
return {"checked": 0, "archived": 0, "lowest_score": 0, "error": str(e)}
|
|
||||||
|
|
||||||
checked = 0
|
|
||||||
archived = 0
|
|
||||||
lowest_score = float("inf")
|
|
||||||
|
|
||||||
for bucket in buckets:
|
|
||||||
meta = bucket.get("metadata", {})
|
|
||||||
|
|
||||||
# Skip permanent buckets / 跳过固化桶
|
|
||||||
if meta.get("type") == "permanent":
|
|
||||||
continue
|
|
||||||
|
|
||||||
checked += 1
|
|
||||||
try:
|
|
||||||
score = self.calculate_score(meta)
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(
|
|
||||||
f"Score calculation failed for {bucket.get('id', '?')} / "
|
|
||||||
f"计算得分失败: {e}"
|
|
||||||
)
|
|
||||||
continue
|
|
||||||
|
|
||||||
lowest_score = min(lowest_score, score)
|
|
||||||
|
|
||||||
# --- Below threshold → archive (simulate forgetting) ---
|
|
||||||
# --- 低于阈值 → 归档(模拟遗忘)---
|
|
||||||
if score < self.threshold:
|
|
||||||
try:
|
|
||||||
success = await self.bucket_mgr.archive(bucket["id"])
|
|
||||||
if success:
|
|
||||||
archived += 1
|
|
||||||
logger.info(
|
|
||||||
f"Decay archived / 衰减归档: "
|
|
||||||
f"{meta.get('name', bucket['id'])} "
|
|
||||||
f"(score={score:.4f}, threshold={self.threshold})"
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(
|
|
||||||
f"Archive failed for {bucket.get('id', '?')} / "
|
|
||||||
f"归档失败: {e}"
|
|
||||||
)
|
|
||||||
|
|
||||||
result = {
|
|
||||||
"checked": checked,
|
|
||||||
"archived": archived,
|
|
||||||
"lowest_score": lowest_score if checked > 0 else 0,
|
|
||||||
}
|
|
||||||
logger.info(f"Decay cycle complete / 衰减周期完成: {result}")
|
|
||||||
return result
|
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
# Background decay task management
|
|
||||||
# 后台衰减任务管理
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
async def ensure_started(self) -> None:
|
|
||||||
"""
|
|
||||||
Ensure the decay engine is started (lazy init on first call).
|
|
||||||
确保衰减引擎已启动(懒加载,首次调用时启动)。
|
|
||||||
"""
|
|
||||||
if not self._running:
|
|
||||||
await self.start()
|
|
||||||
|
|
||||||
async def start(self) -> None:
|
|
||||||
"""Start the background decay loop.
|
|
||||||
启动后台衰减循环。"""
|
|
||||||
if self._running:
|
|
||||||
return
|
|
||||||
self._running = True
|
|
||||||
self._task = asyncio.create_task(self._background_loop())
|
|
||||||
logger.info(
|
|
||||||
f"Decay engine started, interval: {self.check_interval}h / "
|
|
||||||
f"衰减引擎已启动,检查间隔: {self.check_interval} 小时"
|
|
||||||
)
|
|
||||||
|
|
||||||
async def stop(self) -> None:
|
|
||||||
"""Stop the background decay loop.
|
|
||||||
停止后台衰减循环。"""
|
|
||||||
self._running = False
|
|
||||||
if self._task:
|
|
||||||
self._task.cancel()
|
|
||||||
try:
|
|
||||||
await self._task
|
|
||||||
except asyncio.CancelledError:
|
|
||||||
pass
|
|
||||||
logger.info("Decay engine stopped / 衰减引擎已停止")
|
|
||||||
|
|
||||||
async def _background_loop(self) -> None:
|
|
||||||
"""Background loop: run decay → sleep → repeat.
|
|
||||||
后台循环体:执行衰减 → 睡眠 → 重复。"""
|
|
||||||
while self._running:
|
|
||||||
try:
|
|
||||||
await self.run_decay_cycle()
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Decay cycle error / 衰减周期出错: {e}")
|
|
||||||
# --- Wait for next cycle / 等待下一个周期 ---
|
|
||||||
try:
|
|
||||||
await asyncio.sleep(self.check_interval * 3600)
|
|
||||||
except asyncio.CancelledError:
|
|
||||||
break
|
|
||||||
@@ -1,536 +0,0 @@
|
|||||||
# ============================================================
|
|
||||||
# Module: MCP Server Entry Point (server.py)
|
|
||||||
# 模块:MCP 服务器主入口
|
|
||||||
#
|
|
||||||
# Starts the Ombre Brain MCP service and registers memory
|
|
||||||
# operation tools for Claude to call.
|
|
||||||
# 启动 Ombre Brain MCP 服务,注册记忆操作工具供 Claude 调用。
|
|
||||||
#
|
|
||||||
# Core responsibilities:
|
|
||||||
# 核心职责:
|
|
||||||
# - Initialize config, bucket manager, dehydrator, decay engine
|
|
||||||
# 初始化配置、记忆桶管理器、脱水器、衰减引擎
|
|
||||||
# - Expose 5 MCP tools:
|
|
||||||
# 暴露 5 个 MCP 工具:
|
|
||||||
# breath — Surface unresolved memories or search by keyword
|
|
||||||
# 浮现未解决记忆 或 按关键词检索
|
|
||||||
# hold — Store a single memory
|
|
||||||
# 存储单条记忆
|
|
||||||
# grow — Diary digest, auto-split into multiple buckets
|
|
||||||
# 日记归档,自动拆分多桶
|
|
||||||
# trace — Modify metadata / resolved / delete
|
|
||||||
# 修改元数据 / resolved 标记 / 删除
|
|
||||||
# pulse — System status + bucket listing
|
|
||||||
# 系统状态 + 所有桶列表
|
|
||||||
#
|
|
||||||
# Startup:
|
|
||||||
# 启动方式:
|
|
||||||
# Local: python server.py
|
|
||||||
# Remote: OMBRE_TRANSPORT=streamable-http python server.py
|
|
||||||
# Docker: docker-compose up
|
|
||||||
# ============================================================
|
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import random
|
|
||||||
import logging
|
|
||||||
import asyncio
|
|
||||||
import httpx
|
|
||||||
|
|
||||||
# --- Ensure same-directory modules can be imported ---
|
|
||||||
# --- 确保同目录下的模块能被正确导入 ---
|
|
||||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
||||||
|
|
||||||
from mcp.server.fastmcp import FastMCP
|
|
||||||
|
|
||||||
from bucket_manager import BucketManager
|
|
||||||
from dehydrator import Dehydrator
|
|
||||||
from decay_engine import DecayEngine
|
|
||||||
from utils import load_config, setup_logging
|
|
||||||
|
|
||||||
# --- Load config & init logging / 加载配置 & 初始化日志 ---
|
|
||||||
config = load_config()
|
|
||||||
setup_logging(config.get("log_level", "INFO"))
|
|
||||||
logger = logging.getLogger("ombre_brain")
|
|
||||||
|
|
||||||
# --- Initialize three core components / 初始化三大核心组件 ---
|
|
||||||
bucket_mgr = BucketManager(config) # Bucket manager / 记忆桶管理器
|
|
||||||
dehydrator = Dehydrator(config) # Dehydrator / 脱水器
|
|
||||||
decay_engine = DecayEngine(config, bucket_mgr) # Decay engine / 衰减引擎
|
|
||||||
|
|
||||||
# --- Create MCP server instance / 创建 MCP 服务器实例 ---
|
|
||||||
# host="0.0.0.0" so Docker container's SSE is externally reachable
|
|
||||||
# stdio mode ignores host (no network)
|
|
||||||
mcp = FastMCP(
|
|
||||||
"Ombre Brain",
|
|
||||||
host="0.0.0.0",
|
|
||||||
port=8000,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# =============================================================
|
|
||||||
# /health endpoint: lightweight keepalive
|
|
||||||
# 轻量保活接口
|
|
||||||
# For Cloudflare Tunnel or reverse proxy to ping, preventing idle timeout
|
|
||||||
# 供 Cloudflare Tunnel 或反代定期 ping,防止空闲超时断连
|
|
||||||
# =============================================================
|
|
||||||
@mcp.custom_route("/health", methods=["GET"])
|
|
||||||
async def health_check(request):
|
|
||||||
from starlette.responses import JSONResponse
|
|
||||||
try:
|
|
||||||
stats = await bucket_mgr.get_stats()
|
|
||||||
return JSONResponse({
|
|
||||||
"status": "ok",
|
|
||||||
"buckets": stats["permanent_count"] + stats["dynamic_count"],
|
|
||||||
"decay_engine": "running" if decay_engine.is_running else "stopped",
|
|
||||||
})
|
|
||||||
except Exception as e:
|
|
||||||
return JSONResponse({"status": "error", "detail": str(e)}, status_code=500)
|
|
||||||
|
|
||||||
|
|
||||||
# =============================================================
|
|
||||||
# Internal helper: merge-or-create
|
|
||||||
# 内部辅助:检查是否可合并,可以则合并,否则新建
|
|
||||||
# Shared by hold and grow to avoid duplicate logic
|
|
||||||
# hold 和 grow 共用,避免重复逻辑
|
|
||||||
# =============================================================
|
|
||||||
async def _merge_or_create(
|
|
||||||
content: str,
|
|
||||||
tags: list,
|
|
||||||
importance: int,
|
|
||||||
domain: list,
|
|
||||||
valence: float,
|
|
||||||
arousal: float,
|
|
||||||
name: str = "",
|
|
||||||
) -> tuple[str, bool]:
|
|
||||||
"""
|
|
||||||
Check if a similar bucket exists for merging; merge if so, create if not.
|
|
||||||
Returns (bucket_id_or_name, is_merged).
|
|
||||||
检查是否有相似桶可合并,有则合并,无则新建。
|
|
||||||
返回 (桶ID或名称, 是否合并)。
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
existing = await bucket_mgr.search(content, limit=1)
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Search for merge failed, creating new / 合并搜索失败,新建: {e}")
|
|
||||||
existing = []
|
|
||||||
|
|
||||||
if existing and existing[0].get("score", 0) > config.get("merge_threshold", 75):
|
|
||||||
bucket = existing[0]
|
|
||||||
try:
|
|
||||||
merged = await dehydrator.merge(bucket["content"], content)
|
|
||||||
await bucket_mgr.update(
|
|
||||||
bucket["id"],
|
|
||||||
content=merged,
|
|
||||||
tags=list(set(bucket["metadata"].get("tags", []) + tags)),
|
|
||||||
importance=max(bucket["metadata"].get("importance", 5), importance),
|
|
||||||
domain=list(set(bucket["metadata"].get("domain", []) + domain)),
|
|
||||||
valence=valence,
|
|
||||||
arousal=arousal,
|
|
||||||
)
|
|
||||||
return bucket["metadata"].get("name", bucket["id"]), True
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Merge failed, creating new / 合并失败,新建: {e}")
|
|
||||||
|
|
||||||
bucket_id = await bucket_mgr.create(
|
|
||||||
content=content,
|
|
||||||
tags=tags,
|
|
||||||
importance=importance,
|
|
||||||
domain=domain,
|
|
||||||
valence=valence,
|
|
||||||
arousal=arousal,
|
|
||||||
name=name or None,
|
|
||||||
)
|
|
||||||
return bucket_id, False
|
|
||||||
|
|
||||||
|
|
||||||
# =============================================================
|
|
||||||
# Tool 1: breath — Breathe
|
|
||||||
# 工具 1:breath — 呼吸
|
|
||||||
#
|
|
||||||
# No args: surface highest-weight unresolved memories (active push)
|
|
||||||
# 无参数:浮现权重最高的未解决记忆
|
|
||||||
# With args: search by keyword + emotion coordinates
|
|
||||||
# 有参数:按关键词+情感坐标检索记忆
|
|
||||||
# =============================================================
|
|
||||||
@mcp.tool()
|
|
||||||
async def breath(
|
|
||||||
query: str = "",
|
|
||||||
max_results: int = 3,
|
|
||||||
domain: str = "",
|
|
||||||
valence: float = -1,
|
|
||||||
arousal: float = -1,
|
|
||||||
) -> str:
|
|
||||||
"""检索记忆或浮现未解决记忆。query 为空时自动推送权重最高的未解决桶;有 query 时按关键词+情感检索。domain 逗号分隔,valence/arousal 传 0~1 启用情感共鸣,-1 忽略。"""
|
|
||||||
await decay_engine.ensure_started()
|
|
||||||
|
|
||||||
# --- No args: surfacing mode (weight pool active push) ---
|
|
||||||
# --- 无参数:浮现模式(权重池主动推送)---
|
|
||||||
if not query.strip():
|
|
||||||
try:
|
|
||||||
all_buckets = await bucket_mgr.list_all(include_archive=False)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Failed to list buckets for surfacing / 浮现列桶失败: {e}")
|
|
||||||
return "记忆系统暂时无法访问。"
|
|
||||||
|
|
||||||
unresolved = [
|
|
||||||
b for b in all_buckets
|
|
||||||
if not b["metadata"].get("resolved", False)
|
|
||||||
and b["metadata"].get("type") != "permanent"
|
|
||||||
]
|
|
||||||
if not unresolved:
|
|
||||||
return "权重池平静,没有需要处理的记忆。"
|
|
||||||
|
|
||||||
scored = sorted(
|
|
||||||
unresolved,
|
|
||||||
key=lambda b: decay_engine.calculate_score(b["metadata"]),
|
|
||||||
reverse=True,
|
|
||||||
)
|
|
||||||
top = scored[:2]
|
|
||||||
results = []
|
|
||||||
for b in top:
|
|
||||||
try:
|
|
||||||
summary = await dehydrator.dehydrate(b["content"], b["metadata"])
|
|
||||||
await bucket_mgr.touch(b["id"])
|
|
||||||
score = decay_engine.calculate_score(b["metadata"])
|
|
||||||
results.append(f"[权重:{score:.2f}] {summary}")
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Failed to dehydrate surfaced bucket / 浮现脱水失败: {e}")
|
|
||||||
continue
|
|
||||||
if not results:
|
|
||||||
return "权重池平静,没有需要处理的记忆。"
|
|
||||||
return "=== 浮现记忆 ===\n" + "\n---\n".join(results)
|
|
||||||
|
|
||||||
# --- With args: search mode / 有参数:检索模式 ---
|
|
||||||
domain_filter = [d.strip() for d in domain.split(",") if d.strip()] or None
|
|
||||||
q_valence = valence if 0 <= valence <= 1 else None
|
|
||||||
q_arousal = arousal if 0 <= arousal <= 1 else None
|
|
||||||
|
|
||||||
try:
|
|
||||||
matches = await bucket_mgr.search(
|
|
||||||
query,
|
|
||||||
limit=max_results,
|
|
||||||
domain_filter=domain_filter,
|
|
||||||
query_valence=q_valence,
|
|
||||||
query_arousal=q_arousal,
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Search failed / 检索失败: {e}")
|
|
||||||
return "检索过程出错,请稍后重试。"
|
|
||||||
|
|
||||||
results = []
|
|
||||||
for bucket in matches:
|
|
||||||
try:
|
|
||||||
summary = await dehydrator.dehydrate(bucket["content"], bucket["metadata"])
|
|
||||||
await bucket_mgr.touch(bucket["id"])
|
|
||||||
results.append(summary)
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Failed to dehydrate search result / 检索结果脱水失败: {e}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
# --- Random surfacing: when search returns < 3, 40% chance to float old memories ---
|
|
||||||
# --- 随机浮现:检索结果不足 3 条时,40% 概率从低权重旧桶里漂上来 ---
|
|
||||||
if len(matches) < 3 and random.random() < 0.4:
|
|
||||||
try:
|
|
||||||
all_buckets = await bucket_mgr.list_all(include_archive=False)
|
|
||||||
matched_ids = {b["id"] for b in matches}
|
|
||||||
low_weight = [
|
|
||||||
b for b in all_buckets
|
|
||||||
if b["id"] not in matched_ids
|
|
||||||
and decay_engine.calculate_score(b["metadata"]) < 2.0
|
|
||||||
]
|
|
||||||
if low_weight:
|
|
||||||
drifted = random.sample(low_weight, min(random.randint(1, 3), len(low_weight)))
|
|
||||||
drift_results = []
|
|
||||||
for b in drifted:
|
|
||||||
summary = await dehydrator.dehydrate(b["content"], b["metadata"])
|
|
||||||
drift_results.append(f"[surface_type: random]\n{summary}")
|
|
||||||
results.append("--- 忽然想起来 ---\n" + "\n---\n".join(drift_results))
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Random surfacing failed / 随机浮现失败: {e}")
|
|
||||||
|
|
||||||
if not results:
|
|
||||||
return "未找到相关记忆。"
|
|
||||||
|
|
||||||
return "\n---\n".join(results)
|
|
||||||
|
|
||||||
|
|
||||||
# =============================================================
|
|
||||||
# Tool 2: hold — Hold on to this
|
|
||||||
# 工具 2:hold — 握住,留下来
|
|
||||||
# =============================================================
|
|
||||||
@mcp.tool()
|
|
||||||
async def hold(
|
|
||||||
content: str,
|
|
||||||
tags: str = "",
|
|
||||||
importance: int = 5,
|
|
||||||
) -> str:
|
|
||||||
"""存储单条记忆。自动打标+合并相似桶。tags 逗号分隔,importance 1-10。"""
|
|
||||||
await decay_engine.ensure_started()
|
|
||||||
|
|
||||||
# --- Input validation / 输入校验 ---
|
|
||||||
if not content or not content.strip():
|
|
||||||
return "内容为空,无法存储。"
|
|
||||||
|
|
||||||
importance = max(1, min(10, importance))
|
|
||||||
extra_tags = [t.strip() for t in tags.split(",") if t.strip()]
|
|
||||||
|
|
||||||
# --- Step 1: auto-tagging / 自动打标 ---
|
|
||||||
try:
|
|
||||||
analysis = await dehydrator.analyze(content)
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Auto-tagging failed, using defaults / 自动打标失败: {e}")
|
|
||||||
analysis = {
|
|
||||||
"domain": ["未分类"], "valence": 0.5, "arousal": 0.3,
|
|
||||||
"tags": [], "suggested_name": "",
|
|
||||||
}
|
|
||||||
|
|
||||||
domain = analysis["domain"]
|
|
||||||
valence = analysis["valence"]
|
|
||||||
arousal = analysis["arousal"]
|
|
||||||
auto_tags = analysis["tags"]
|
|
||||||
suggested_name = analysis.get("suggested_name", "")
|
|
||||||
|
|
||||||
all_tags = list(dict.fromkeys(auto_tags + extra_tags))
|
|
||||||
|
|
||||||
# --- Step 2: merge or create / 合并或新建 ---
|
|
||||||
result_name, is_merged = await _merge_or_create(
|
|
||||||
content=content,
|
|
||||||
tags=all_tags,
|
|
||||||
importance=importance,
|
|
||||||
domain=domain,
|
|
||||||
valence=valence,
|
|
||||||
arousal=arousal,
|
|
||||||
name=suggested_name,
|
|
||||||
)
|
|
||||||
|
|
||||||
if is_merged:
|
|
||||||
return (
|
|
||||||
f"已合并到现有记忆桶: {result_name}\n"
|
|
||||||
f"主题域: {', '.join(domain)} | 情感: V{valence:.1f}/A{arousal:.1f}"
|
|
||||||
)
|
|
||||||
return (
|
|
||||||
f"已创建新记忆桶: {result_name}\n"
|
|
||||||
f"主题域: {', '.join(domain)} | 情感: V{valence:.1f}/A{arousal:.1f} | 标签: {', '.join(all_tags)}"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
# =============================================================
|
|
||||||
# Tool 3: grow — Grow, fragments become memories
|
|
||||||
# 工具 3:grow — 生长,一天的碎片长成记忆
|
|
||||||
# =============================================================
|
|
||||||
@mcp.tool()
|
|
||||||
async def grow(content: str) -> str:
|
|
||||||
"""日记归档。自动拆分长内容为多个记忆桶。"""
|
|
||||||
await decay_engine.ensure_started()
|
|
||||||
|
|
||||||
if not content or not content.strip():
|
|
||||||
return "内容为空,无法整理。"
|
|
||||||
|
|
||||||
# --- Step 1: let API split and organize / 让 API 拆分整理 ---
|
|
||||||
try:
|
|
||||||
items = await dehydrator.digest(content)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Diary digest failed / 日记整理失败: {e}")
|
|
||||||
return f"日记整理失败: {e}"
|
|
||||||
|
|
||||||
if not items:
|
|
||||||
return "内容为空或整理失败。"
|
|
||||||
|
|
||||||
results = []
|
|
||||||
created = 0
|
|
||||||
merged = 0
|
|
||||||
|
|
||||||
# --- Step 2: merge or create each item (with per-item error handling) ---
|
|
||||||
# --- 逐条合并或新建(单条失败不影响其他)---
|
|
||||||
for item in items:
|
|
||||||
try:
|
|
||||||
result_name, is_merged = await _merge_or_create(
|
|
||||||
content=item["content"],
|
|
||||||
tags=item.get("tags", []),
|
|
||||||
importance=item.get("importance", 5),
|
|
||||||
domain=item.get("domain", ["未分类"]),
|
|
||||||
valence=item.get("valence", 0.5),
|
|
||||||
arousal=item.get("arousal", 0.3),
|
|
||||||
name=item.get("name", ""),
|
|
||||||
)
|
|
||||||
|
|
||||||
if is_merged:
|
|
||||||
results.append(f" 📎 合并 → {result_name}")
|
|
||||||
merged += 1
|
|
||||||
else:
|
|
||||||
domains_str = ",".join(item.get("domain", []))
|
|
||||||
results.append(
|
|
||||||
f" 📝 新建 [{item.get('name', result_name)}] "
|
|
||||||
f"主题:{domains_str} V{item.get('valence', 0.5):.1f}/A{item.get('arousal', 0.3):.1f}"
|
|
||||||
)
|
|
||||||
created += 1
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(
|
|
||||||
f"Failed to process diary item / 日记条目处理失败: "
|
|
||||||
f"{item.get('name', '?')}: {e}"
|
|
||||||
)
|
|
||||||
results.append(f" ⚠️ 失败: {item.get('name', '未知条目')}")
|
|
||||||
|
|
||||||
summary = f"=== 日记整理完成 ===\n拆分为 {len(items)} 条 | 新建 {created} 桶 | 合并 {merged} 桶\n"
|
|
||||||
return summary + "\n".join(results)
|
|
||||||
|
|
||||||
|
|
||||||
# =============================================================
|
|
||||||
# Tool 4: trace — Trace, redraw the outline of a memory
|
|
||||||
# 工具 4:trace — 描摹,重新勾勒记忆的轮廓
|
|
||||||
# Also handles deletion (delete=True)
|
|
||||||
# 同时承接删除功能
|
|
||||||
# =============================================================
|
|
||||||
@mcp.tool()
|
|
||||||
async def trace(
|
|
||||||
bucket_id: str,
|
|
||||||
name: str = "",
|
|
||||||
domain: str = "",
|
|
||||||
valence: float = -1,
|
|
||||||
arousal: float = -1,
|
|
||||||
importance: int = -1,
|
|
||||||
tags: str = "",
|
|
||||||
resolved: int = -1,
|
|
||||||
delete: bool = False,
|
|
||||||
) -> str:
|
|
||||||
"""修改记忆元数据。resolved=1 标记已解决(桶权重骤降沉底),resolved=0 重新激活,delete=True 删除桶。其余字段只传需改的,-1 或空串表示不改。"""
|
|
||||||
|
|
||||||
if not bucket_id or not bucket_id.strip():
|
|
||||||
return "请提供有效的 bucket_id。"
|
|
||||||
|
|
||||||
# --- Delete mode / 删除模式 ---
|
|
||||||
if delete:
|
|
||||||
success = await bucket_mgr.delete(bucket_id)
|
|
||||||
return f"已遗忘记忆桶: {bucket_id}" if success else f"未找到记忆桶: {bucket_id}"
|
|
||||||
|
|
||||||
bucket = await bucket_mgr.get(bucket_id)
|
|
||||||
if not bucket:
|
|
||||||
return f"未找到记忆桶: {bucket_id}"
|
|
||||||
|
|
||||||
# --- Collect only fields actually passed / 只收集用户实际传入的字段 ---
|
|
||||||
updates = {}
|
|
||||||
if name:
|
|
||||||
updates["name"] = name
|
|
||||||
if domain:
|
|
||||||
updates["domain"] = [d.strip() for d in domain.split(",") if d.strip()]
|
|
||||||
if 0 <= valence <= 1:
|
|
||||||
updates["valence"] = valence
|
|
||||||
if 0 <= arousal <= 1:
|
|
||||||
updates["arousal"] = arousal
|
|
||||||
if 1 <= importance <= 10:
|
|
||||||
updates["importance"] = importance
|
|
||||||
if tags:
|
|
||||||
updates["tags"] = [t.strip() for t in tags.split(",") if t.strip()]
|
|
||||||
if resolved in (0, 1):
|
|
||||||
updates["resolved"] = bool(resolved)
|
|
||||||
|
|
||||||
if not updates:
|
|
||||||
return "没有任何字段需要修改。"
|
|
||||||
|
|
||||||
success = await bucket_mgr.update(bucket_id, **updates)
|
|
||||||
if not success:
|
|
||||||
return f"修改失败: {bucket_id}"
|
|
||||||
|
|
||||||
changed = ", ".join(f"{k}={v}" for k, v in updates.items())
|
|
||||||
# Explicit hint about resolved state change semantics
|
|
||||||
# 特别提示 resolved 状态变化的语义
|
|
||||||
if "resolved" in updates:
|
|
||||||
if updates["resolved"]:
|
|
||||||
changed += " → 已沉底,只在关键词触发时重新浮现"
|
|
||||||
else:
|
|
||||||
changed += " → 已重新激活,将参与浮现排序"
|
|
||||||
return f"已修改记忆桶 {bucket_id}: {changed}"
|
|
||||||
|
|
||||||
|
|
||||||
# =============================================================
|
|
||||||
# Tool 5: pulse — Heartbeat, system status + memory listing
|
|
||||||
# 工具 5:pulse — 脉搏,系统状态 + 记忆列表
|
|
||||||
# =============================================================
|
|
||||||
@mcp.tool()
|
|
||||||
async def pulse(include_archive: bool = False) -> str:
|
|
||||||
"""系统状态和所有记忆桶摘要。include_archive=True 时包含归档桶。"""
|
|
||||||
try:
|
|
||||||
stats = await bucket_mgr.get_stats()
|
|
||||||
except Exception as e:
|
|
||||||
return f"获取系统状态失败: {e}"
|
|
||||||
|
|
||||||
status = (
|
|
||||||
f"=== Ombre Brain 记忆系统 ===\n"
|
|
||||||
f"固化记忆桶: {stats['permanent_count']} 个\n"
|
|
||||||
f"动态记忆桶: {stats['dynamic_count']} 个\n"
|
|
||||||
f"归档记忆桶: {stats['archive_count']} 个\n"
|
|
||||||
f"总存储大小: {stats['total_size_kb']:.1f} KB\n"
|
|
||||||
f"衰减引擎: {'运行中' if decay_engine.is_running else '已停止'}\n"
|
|
||||||
)
|
|
||||||
|
|
||||||
# --- List all bucket summaries / 列出所有桶摘要 ---
|
|
||||||
try:
|
|
||||||
buckets = await bucket_mgr.list_all(include_archive=include_archive)
|
|
||||||
except Exception as e:
|
|
||||||
return status + f"\n列出记忆桶失败: {e}"
|
|
||||||
|
|
||||||
if not buckets:
|
|
||||||
return status + "\n记忆库为空。"
|
|
||||||
|
|
||||||
lines = []
|
|
||||||
for b in buckets:
|
|
||||||
meta = b.get("metadata", {})
|
|
||||||
if meta.get("type") == "permanent":
|
|
||||||
icon = "📦"
|
|
||||||
elif meta.get("type") == "archived":
|
|
||||||
icon = "🗄️"
|
|
||||||
elif meta.get("resolved", False):
|
|
||||||
icon = "✅"
|
|
||||||
else:
|
|
||||||
icon = "💭"
|
|
||||||
try:
|
|
||||||
score = decay_engine.calculate_score(meta)
|
|
||||||
except Exception:
|
|
||||||
score = 0.0
|
|
||||||
domains = ",".join(meta.get("domain", []))
|
|
||||||
val = meta.get("valence", 0.5)
|
|
||||||
aro = meta.get("arousal", 0.3)
|
|
||||||
resolved_tag = " [已解决]" if meta.get("resolved", False) else ""
|
|
||||||
lines.append(
|
|
||||||
f"{icon} [{meta.get('name', b['id'])}]{resolved_tag} "
|
|
||||||
f"主题:{domains} "
|
|
||||||
f"情感:V{val:.1f}/A{aro:.1f} "
|
|
||||||
f"重要:{meta.get('importance', '?')} "
|
|
||||||
f"权重:{score:.2f} "
|
|
||||||
f"标签:{','.join(meta.get('tags', []))}"
|
|
||||||
)
|
|
||||||
|
|
||||||
return status + "\n=== 记忆列表 ===\n" + "\n".join(lines)
|
|
||||||
|
|
||||||
|
|
||||||
# --- Entry point / 启动入口 ---
|
|
||||||
if __name__ == "__main__":
|
|
||||||
transport = config.get("transport", "stdio")
|
|
||||||
logger.info(f"Ombre Brain starting | transport: {transport}")
|
|
||||||
|
|
||||||
# --- Application-level keepalive: remote mode only, ping /health every 60s ---
|
|
||||||
# --- 应用层保活:仅远程模式下启动,每 60 秒 ping 一次 /health ---
|
|
||||||
# Prevents Cloudflare Tunnel from dropping idle connections
|
|
||||||
if transport in ("sse", "streamable-http"):
|
|
||||||
async def _keepalive_loop():
|
|
||||||
await asyncio.sleep(10) # Wait for server to fully start
|
|
||||||
async with httpx.AsyncClient() as client:
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
await client.get("http://localhost:8000/health", timeout=5)
|
|
||||||
logger.debug("Keepalive ping OK / 保活 ping 成功")
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Keepalive ping failed / 保活 ping 失败: {e}")
|
|
||||||
await asyncio.sleep(60)
|
|
||||||
|
|
||||||
import threading
|
|
||||||
|
|
||||||
def _start_keepalive():
|
|
||||||
loop = asyncio.new_event_loop()
|
|
||||||
loop.run_until_complete(_keepalive_loop())
|
|
||||||
|
|
||||||
t = threading.Thread(target=_start_keepalive, daemon=True)
|
|
||||||
t.start()
|
|
||||||
|
|
||||||
mcp.run(transport=transport)
|
|
||||||
@@ -28,15 +28,12 @@
|
|||||||
import os
|
import os
|
||||||
import math
|
import math
|
||||||
import logging
|
import logging
|
||||||
import re
|
|
||||||
import shutil
|
import shutil
|
||||||
from collections import Counter
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
import frontmatter
|
import frontmatter
|
||||||
import jieba
|
|
||||||
from rapidfuzz import fuzz
|
from rapidfuzz import fuzz
|
||||||
|
|
||||||
from utils import generate_bucket_id, sanitize_name, safe_path, now_iso
|
from utils import generate_bucket_id, sanitize_name, safe_path, now_iso
|
||||||
@@ -54,12 +51,13 @@ class BucketManager:
|
|||||||
天然兼容 Obsidian 直接浏览和编辑。
|
天然兼容 Obsidian 直接浏览和编辑。
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, config: dict):
|
def __init__(self, config: dict, embedding_engine=None):
|
||||||
# --- Read storage paths from config / 从配置中读取存储路径 ---
|
# --- Read storage paths from config / 从配置中读取存储路径 ---
|
||||||
self.base_dir = config["buckets_dir"]
|
self.base_dir = config["buckets_dir"]
|
||||||
self.permanent_dir = os.path.join(self.base_dir, "permanent")
|
self.permanent_dir = os.path.join(self.base_dir, "permanent")
|
||||||
self.dynamic_dir = os.path.join(self.base_dir, "dynamic")
|
self.dynamic_dir = os.path.join(self.base_dir, "dynamic")
|
||||||
self.archive_dir = os.path.join(self.base_dir, "archive")
|
self.archive_dir = os.path.join(self.base_dir, "archive")
|
||||||
|
self.feel_dir = os.path.join(self.base_dir, "feel")
|
||||||
self.fuzzy_threshold = config.get("matching", {}).get("fuzzy_threshold", 50)
|
self.fuzzy_threshold = config.get("matching", {}).get("fuzzy_threshold", 50)
|
||||||
self.max_results = config.get("matching", {}).get("max_results", 5)
|
self.max_results = config.get("matching", {}).get("max_results", 5)
|
||||||
|
|
||||||
@@ -87,9 +85,12 @@ class BucketManager:
|
|||||||
scoring = config.get("scoring_weights", {})
|
scoring = config.get("scoring_weights", {})
|
||||||
self.w_topic = scoring.get("topic_relevance", 4.0)
|
self.w_topic = scoring.get("topic_relevance", 4.0)
|
||||||
self.w_emotion = scoring.get("emotion_resonance", 2.0)
|
self.w_emotion = scoring.get("emotion_resonance", 2.0)
|
||||||
self.w_time = scoring.get("time_proximity", 2.5)
|
self.w_time = scoring.get("time_proximity", 1.5)
|
||||||
self.w_importance = scoring.get("importance", 1.0)
|
self.w_importance = scoring.get("importance", 1.0)
|
||||||
self.content_weight = scoring.get("content_weight", 3.0) # Added to allow better content-based matching during merge
|
self.content_weight = scoring.get("content_weight", 1.0) # body×1, per spec
|
||||||
|
|
||||||
|
# --- Optional embedding engine for pre-filtering / 可选 embedding 引擎,用于预筛候选集 ---
|
||||||
|
self.embedding_engine = embedding_engine
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
# Create a new bucket
|
# Create a new bucket
|
||||||
@@ -120,9 +121,13 @@ class BucketManager:
|
|||||||
"""
|
"""
|
||||||
bucket_id = generate_bucket_id()
|
bucket_id = generate_bucket_id()
|
||||||
bucket_name = sanitize_name(name) if name else bucket_id
|
bucket_name = sanitize_name(name) if name else bucket_id
|
||||||
|
# feel buckets are allowed to have empty domain; others default to ["未分类"]
|
||||||
|
if bucket_type == "feel":
|
||||||
|
domain = domain if domain is not None else []
|
||||||
|
else:
|
||||||
domain = domain or ["未分类"]
|
domain = domain or ["未分类"]
|
||||||
tags = tags or []
|
tags = tags or []
|
||||||
linked_content = self._apply_wikilinks(content, tags, domain, bucket_name)
|
linked_content = content # wikilink injection disabled; LLM adds [[]] via prompt
|
||||||
|
|
||||||
# --- Pinned/protected buckets: lock importance to 10 ---
|
# --- Pinned/protected buckets: lock importance to 10 ---
|
||||||
# --- 钉选/保护桶:importance 强制锁定为 10 ---
|
# --- 钉选/保护桶:importance 强制锁定为 10 ---
|
||||||
@@ -141,7 +146,7 @@ class BucketManager:
|
|||||||
"type": bucket_type,
|
"type": bucket_type,
|
||||||
"created": now_iso(),
|
"created": now_iso(),
|
||||||
"last_active": now_iso(),
|
"last_active": now_iso(),
|
||||||
"activation_count": 1,
|
"activation_count": 0,
|
||||||
}
|
}
|
||||||
if pinned:
|
if pinned:
|
||||||
metadata["pinned"] = True
|
metadata["pinned"] = True
|
||||||
@@ -154,7 +159,17 @@ class BucketManager:
|
|||||||
|
|
||||||
# --- Choose directory by type + primary domain ---
|
# --- Choose directory by type + primary domain ---
|
||||||
# --- 按类型 + 主题域选择存储目录 ---
|
# --- 按类型 + 主题域选择存储目录 ---
|
||||||
type_dir = self.permanent_dir if bucket_type == "permanent" else self.dynamic_dir
|
if bucket_type == "permanent" or pinned:
|
||||||
|
type_dir = self.permanent_dir
|
||||||
|
if pinned and bucket_type != "permanent":
|
||||||
|
metadata["type"] = "permanent"
|
||||||
|
elif bucket_type == "feel":
|
||||||
|
type_dir = self.feel_dir
|
||||||
|
else:
|
||||||
|
type_dir = self.dynamic_dir
|
||||||
|
if bucket_type == "feel":
|
||||||
|
primary_domain = "沉淀物" # feel subfolder name
|
||||||
|
else:
|
||||||
primary_domain = sanitize_name(domain[0]) if domain else "未分类"
|
primary_domain = sanitize_name(domain[0]) if domain else "未分类"
|
||||||
target_dir = os.path.join(type_dir, primary_domain)
|
target_dir = os.path.join(type_dir, primary_domain)
|
||||||
os.makedirs(target_dir, exist_ok=True)
|
os.makedirs(target_dir, exist_ok=True)
|
||||||
@@ -197,6 +212,25 @@ class BucketManager:
|
|||||||
return None
|
return None
|
||||||
return self._load_bucket(file_path)
|
return self._load_bucket(file_path)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Move bucket between directories
|
||||||
|
# 在目录间移动桶文件
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
def _move_bucket(self, file_path: str, target_type_dir: str, domain: list[str] = None) -> str:
|
||||||
|
"""
|
||||||
|
Move a bucket file to a new type directory, preserving domain subfolder.
|
||||||
|
Returns new file path.
|
||||||
|
"""
|
||||||
|
primary_domain = sanitize_name(domain[0]) if domain else "未分类"
|
||||||
|
target_dir = os.path.join(target_type_dir, primary_domain)
|
||||||
|
os.makedirs(target_dir, exist_ok=True)
|
||||||
|
filename = os.path.basename(file_path)
|
||||||
|
new_path = safe_path(target_dir, filename)
|
||||||
|
if os.path.normpath(file_path) != os.path.normpath(new_path):
|
||||||
|
os.rename(file_path, new_path)
|
||||||
|
logger.info(f"Moved bucket / 移动记忆桶: {filename} → {target_dir}/")
|
||||||
|
return new_path
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
# Update bucket
|
# Update bucket
|
||||||
# 更新桶
|
# 更新桶
|
||||||
@@ -225,15 +259,7 @@ class BucketManager:
|
|||||||
|
|
||||||
# --- Update only fields that were passed in / 只改传入的字段 ---
|
# --- Update only fields that were passed in / 只改传入的字段 ---
|
||||||
if "content" in kwargs:
|
if "content" in kwargs:
|
||||||
next_tags = kwargs.get("tags", post.get("tags", []))
|
post.content = kwargs["content"] # wikilink injection disabled; LLM adds [[]] via prompt
|
||||||
next_domain = kwargs.get("domain", post.get("domain", []))
|
|
||||||
next_name = kwargs.get("name", post.get("name", ""))
|
|
||||||
post.content = self._apply_wikilinks(
|
|
||||||
kwargs["content"],
|
|
||||||
next_tags,
|
|
||||||
next_domain,
|
|
||||||
next_name,
|
|
||||||
)
|
|
||||||
if "tags" in kwargs:
|
if "tags" in kwargs:
|
||||||
post["tags"] = kwargs["tags"]
|
post["tags"] = kwargs["tags"]
|
||||||
if "importance" in kwargs:
|
if "importance" in kwargs:
|
||||||
@@ -252,6 +278,10 @@ class BucketManager:
|
|||||||
post["pinned"] = bool(kwargs["pinned"])
|
post["pinned"] = bool(kwargs["pinned"])
|
||||||
if kwargs["pinned"]:
|
if kwargs["pinned"]:
|
||||||
post["importance"] = 10 # pinned → lock importance to 10
|
post["importance"] = 10 # pinned → lock importance to 10
|
||||||
|
if "digested" in kwargs:
|
||||||
|
post["digested"] = bool(kwargs["digested"])
|
||||||
|
if "model_valence" in kwargs:
|
||||||
|
post["model_valence"] = max(0.0, min(1.0, float(kwargs["model_valence"])))
|
||||||
|
|
||||||
# --- Auto-refresh activation time / 自动刷新激活时间 ---
|
# --- Auto-refresh activation time / 自动刷新激活时间 ---
|
||||||
post["last_active"] = now_iso()
|
post["last_active"] = now_iso()
|
||||||
@@ -263,136 +293,31 @@ class BucketManager:
|
|||||||
logger.error(f"Failed to write bucket update / 写入桶更新失败: {file_path}: {e}")
|
logger.error(f"Failed to write bucket update / 写入桶更新失败: {file_path}: {e}")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
# --- Auto-move: pinned → permanent/ ---
|
||||||
|
# --- 自动移动:钉选 → permanent/ ---
|
||||||
|
# NOTE: resolved buckets are NOT auto-archived here.
|
||||||
|
# They stay in dynamic/ and decay naturally until score < threshold.
|
||||||
|
# 注意:resolved 桶不在此自动归档,留在 dynamic/ 随衰减引擎自然归档。
|
||||||
|
domain = post.get("domain", ["未分类"])
|
||||||
|
if kwargs.get("pinned") and post.get("type") != "permanent":
|
||||||
|
post["type"] = "permanent"
|
||||||
|
with open(file_path, "w", encoding="utf-8") as f:
|
||||||
|
f.write(frontmatter.dumps(post))
|
||||||
|
self._move_bucket(file_path, self.permanent_dir, domain)
|
||||||
|
|
||||||
logger.info(f"Updated bucket / 更新记忆桶: {bucket_id}")
|
logger.info(f"Updated bucket / 更新记忆桶: {bucket_id}")
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
# Wikilink injection
|
# Wikilink injection — DISABLED
|
||||||
# 自动添加 Obsidian 双链
|
# 自动添加 Obsidian 双链 — 已禁用
|
||||||
|
# Now handled by LLM prompts (Gemini adds [[]] for proper nouns)
|
||||||
|
# 现在由 LLM prompt 处理(Gemini 对人名/地名/专有名词加 [[]])
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
def _apply_wikilinks(
|
# def _apply_wikilinks(self, content, tags, domain, name): ...
|
||||||
self,
|
# def _collect_wikilink_keywords(self, content, tags, domain, name): ...
|
||||||
content: str,
|
# def _normalize_keywords(self, keywords): ...
|
||||||
tags: list[str],
|
# def _extract_auto_keywords(self, content): ...
|
||||||
domain: list[str],
|
|
||||||
name: str,
|
|
||||||
) -> str:
|
|
||||||
"""
|
|
||||||
Auto-inject Obsidian wikilinks, avoiding double-wrapping existing [[...]].
|
|
||||||
自动添加 Obsidian 双链,避免重复包裹已有 [[...]]。
|
|
||||||
"""
|
|
||||||
if not self.wikilink_enabled or not content:
|
|
||||||
return content
|
|
||||||
|
|
||||||
keywords = self._collect_wikilink_keywords(content, tags, domain, name)
|
|
||||||
if not keywords:
|
|
||||||
return content
|
|
||||||
|
|
||||||
# Split on existing wikilinks to avoid wrapping them again
|
|
||||||
# 按已有双链切分,避免重复包裹
|
|
||||||
segments = re.split(r"(\[\[[^\]]+\]\])", content)
|
|
||||||
pattern = re.compile("|".join(re.escape(kw) for kw in keywords))
|
|
||||||
for i, segment in enumerate(segments):
|
|
||||||
if segment.startswith("[[") and segment.endswith("]]"):
|
|
||||||
continue
|
|
||||||
updated = pattern.sub(lambda m: f"[[{m.group(0)}]]", segment)
|
|
||||||
segments[i] = updated
|
|
||||||
return "".join(segments)
|
|
||||||
|
|
||||||
def _collect_wikilink_keywords(
|
|
||||||
self,
|
|
||||||
content: str,
|
|
||||||
tags: list[str],
|
|
||||||
domain: list[str],
|
|
||||||
name: str,
|
|
||||||
) -> list[str]:
|
|
||||||
"""
|
|
||||||
Collect candidate keywords from tags/domain/auto-extraction.
|
|
||||||
汇总候选关键词:可选 tags/domain + 自动提词。
|
|
||||||
"""
|
|
||||||
candidates = []
|
|
||||||
|
|
||||||
if self.wikilink_use_tags:
|
|
||||||
candidates.extend(tags or [])
|
|
||||||
if self.wikilink_use_domain:
|
|
||||||
candidates.extend(domain or [])
|
|
||||||
if name:
|
|
||||||
candidates.append(name)
|
|
||||||
if self.wikilink_use_auto_keywords:
|
|
||||||
candidates.extend(self._extract_auto_keywords(content))
|
|
||||||
|
|
||||||
return self._normalize_keywords(candidates)
|
|
||||||
|
|
||||||
def _normalize_keywords(self, keywords: list[str]) -> list[str]:
|
|
||||||
"""
|
|
||||||
Deduplicate and sort by length (longer first to avoid short words
|
|
||||||
breaking long ones during replacement).
|
|
||||||
去重并按长度排序,优先替换长词。
|
|
||||||
"""
|
|
||||||
if not keywords:
|
|
||||||
return []
|
|
||||||
|
|
||||||
seen = set()
|
|
||||||
cleaned = []
|
|
||||||
for keyword in keywords:
|
|
||||||
if not isinstance(keyword, str):
|
|
||||||
continue
|
|
||||||
kw = keyword.strip()
|
|
||||||
if len(kw) < self.wikilink_min_len:
|
|
||||||
continue
|
|
||||||
if kw in self.wikilink_exclude_keywords:
|
|
||||||
continue
|
|
||||||
if kw.lower() in self.wikilink_stopwords:
|
|
||||||
continue
|
|
||||||
if kw in seen:
|
|
||||||
continue
|
|
||||||
seen.add(kw)
|
|
||||||
cleaned.append(kw)
|
|
||||||
|
|
||||||
return sorted(cleaned, key=len, reverse=True)
|
|
||||||
|
|
||||||
def _extract_auto_keywords(self, content: str) -> list[str]:
|
|
||||||
"""
|
|
||||||
Auto-extract keywords from body text, prioritizing high-frequency words.
|
|
||||||
从正文自动提词,优先高频词。
|
|
||||||
"""
|
|
||||||
if not content:
|
|
||||||
return []
|
|
||||||
|
|
||||||
try:
|
|
||||||
zh_words = [w.strip() for w in jieba.lcut(content) if w.strip()]
|
|
||||||
except Exception:
|
|
||||||
zh_words = []
|
|
||||||
en_words = re.findall(r"[A-Za-z][A-Za-z0-9_-]{2,20}", content)
|
|
||||||
|
|
||||||
# Chinese bigrams / 中文双词组合
|
|
||||||
zh_bigrams = []
|
|
||||||
for i in range(len(zh_words) - 1):
|
|
||||||
left = zh_words[i]
|
|
||||||
right = zh_words[i + 1]
|
|
||||||
if len(left) < self.wikilink_min_len or len(right) < self.wikilink_min_len:
|
|
||||||
continue
|
|
||||||
if not re.fullmatch(r"[\u4e00-\u9fff]+", left + right):
|
|
||||||
continue
|
|
||||||
if len(left + right) > 8:
|
|
||||||
continue
|
|
||||||
zh_bigrams.append(left + right)
|
|
||||||
|
|
||||||
merged = []
|
|
||||||
for word in zh_words + zh_bigrams + en_words:
|
|
||||||
if len(word) < self.wikilink_min_len:
|
|
||||||
continue
|
|
||||||
if re.fullmatch(r"\d+", word):
|
|
||||||
continue
|
|
||||||
if word.lower() in self.wikilink_stopwords:
|
|
||||||
continue
|
|
||||||
merged.append(word)
|
|
||||||
|
|
||||||
if not merged:
|
|
||||||
return []
|
|
||||||
|
|
||||||
counter = Counter(merged)
|
|
||||||
return [w for w, _ in counter.most_common(self.wikilink_auto_top_k)]
|
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
# Delete bucket
|
# Delete bucket
|
||||||
@@ -425,7 +350,9 @@ class BucketManager:
|
|||||||
async def touch(self, bucket_id: str) -> None:
|
async def touch(self, bucket_id: str) -> None:
|
||||||
"""
|
"""
|
||||||
Update a bucket's last activation time and count.
|
Update a bucket's last activation time and count.
|
||||||
|
Also triggers time ripple: nearby memories get a slight activation boost.
|
||||||
更新桶的最后激活时间和激活次数。
|
更新桶的最后激活时间和激活次数。
|
||||||
|
同时触发时间涟漪:时间上相邻的记忆轻微唤醒。
|
||||||
"""
|
"""
|
||||||
file_path = self._find_bucket_file(bucket_id)
|
file_path = self._find_bucket_file(bucket_id)
|
||||||
if not file_path:
|
if not file_path:
|
||||||
@@ -438,9 +365,60 @@ class BucketManager:
|
|||||||
|
|
||||||
with open(file_path, "w", encoding="utf-8") as f:
|
with open(file_path, "w", encoding="utf-8") as f:
|
||||||
f.write(frontmatter.dumps(post))
|
f.write(frontmatter.dumps(post))
|
||||||
|
|
||||||
|
# --- Time ripple: boost nearby memories within ±48h ---
|
||||||
|
# --- 时间涟漪:±48小时内的记忆轻微唤醒 ---
|
||||||
|
current_time = datetime.fromisoformat(str(post.get("created", post.get("last_active", ""))))
|
||||||
|
await self._time_ripple(bucket_id, current_time)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(f"Failed to touch bucket / 触碰桶失败: {bucket_id}: {e}")
|
logger.warning(f"Failed to touch bucket / 触碰桶失败: {bucket_id}: {e}")
|
||||||
|
|
||||||
|
async def _time_ripple(self, source_id: str, reference_time: datetime, hours: float = 48.0) -> None:
|
||||||
|
"""
|
||||||
|
Slightly boost activation_count of buckets created/activated near the reference time.
|
||||||
|
轻微提升时间相邻桶的激活次数(+0.3),不改 last_active 避免递归唤醒。
|
||||||
|
Max 5 buckets rippled per touch to bound I/O.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
all_buckets = await self.list_all(include_archive=False)
|
||||||
|
except Exception:
|
||||||
|
return
|
||||||
|
|
||||||
|
rippled = 0
|
||||||
|
max_ripple = 5
|
||||||
|
for bucket in all_buckets:
|
||||||
|
if rippled >= max_ripple:
|
||||||
|
break
|
||||||
|
if bucket["id"] == source_id:
|
||||||
|
continue
|
||||||
|
meta = bucket.get("metadata", {})
|
||||||
|
# Skip pinned/permanent/feel
|
||||||
|
if meta.get("pinned") or meta.get("protected") or meta.get("type") in ("permanent", "feel"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
created_str = meta.get("created", meta.get("last_active", ""))
|
||||||
|
try:
|
||||||
|
created = datetime.fromisoformat(str(created_str))
|
||||||
|
delta_hours = abs((reference_time - created).total_seconds()) / 3600
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if delta_hours <= hours:
|
||||||
|
# Boost activation_count by 0.3 (fractional), don't change last_active
|
||||||
|
file_path = self._find_bucket_file(bucket["id"])
|
||||||
|
if not file_path:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
post = frontmatter.load(file_path)
|
||||||
|
current_count = post.get("activation_count", 1)
|
||||||
|
# Store as float for fractional increments; calculate_score handles it
|
||||||
|
post["activation_count"] = round(current_count + 0.3, 1)
|
||||||
|
with open(file_path, "w", encoding="utf-8") as f:
|
||||||
|
f.write(frontmatter.dumps(post))
|
||||||
|
rippled += 1
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
# Multi-dimensional search (core feature)
|
# Multi-dimensional search (core feature)
|
||||||
# 多维搜索(核心功能)
|
# 多维搜索(核心功能)
|
||||||
@@ -497,6 +475,20 @@ class BucketManager:
|
|||||||
else:
|
else:
|
||||||
candidates = all_buckets
|
candidates = all_buckets
|
||||||
|
|
||||||
|
# --- Layer 1.5: embedding pre-filter (optional, reduces multi-dim ranking set) ---
|
||||||
|
# --- 第1.5层:embedding 预筛(可选,缩小精排候选集)---
|
||||||
|
if self.embedding_engine and self.embedding_engine.enabled:
|
||||||
|
try:
|
||||||
|
vector_results = await self.embedding_engine.search_similar(query, top_k=50)
|
||||||
|
if vector_results:
|
||||||
|
vector_ids = {bid for bid, _ in vector_results}
|
||||||
|
emb_candidates = [b for b in candidates if b["id"] in vector_ids]
|
||||||
|
if emb_candidates: # only replace if there's non-empty overlap
|
||||||
|
candidates = emb_candidates
|
||||||
|
# else: keep original candidates as fallback
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Embedding pre-filter failed, using fuzzy only / embedding 预筛失败: {e}")
|
||||||
|
|
||||||
# --- Layer 2: weighted multi-dim ranking ---
|
# --- Layer 2: weighted multi-dim ranking ---
|
||||||
# --- 第二层:多维加权精排 ---
|
# --- 第二层:多维加权精排 ---
|
||||||
scored = []
|
scored = []
|
||||||
@@ -529,12 +521,14 @@ class BucketManager:
|
|||||||
weight_sum = self.w_topic + self.w_emotion + self.w_time + self.w_importance
|
weight_sum = self.w_topic + self.w_emotion + self.w_time + self.w_importance
|
||||||
normalized = (total / weight_sum) * 100 if weight_sum > 0 else 0
|
normalized = (total / weight_sum) * 100 if weight_sum > 0 else 0
|
||||||
|
|
||||||
|
# Threshold check uses raw (pre-penalty) score so resolved buckets
|
||||||
|
# 阈值用原始分数判定,确保 resolved 桶在关键词命中时仍可被搜出
|
||||||
|
# remain reachable by keyword (penalty applied only to ranking).
|
||||||
|
if normalized >= self.fuzzy_threshold:
|
||||||
# Resolved buckets get ranking penalty (but still reachable by keyword)
|
# Resolved buckets get ranking penalty (but still reachable by keyword)
|
||||||
# 已解决的桶降权排序(但仍可被关键词激活)
|
# 已解决的桶仅在排序时降权
|
||||||
if meta.get("resolved", False):
|
if meta.get("resolved", False):
|
||||||
normalized *= 0.3
|
normalized *= 0.3
|
||||||
|
|
||||||
if normalized >= self.fuzzy_threshold:
|
|
||||||
bucket["score"] = round(normalized, 2)
|
bucket["score"] = round(normalized, 2)
|
||||||
scored.append(bucket)
|
scored.append(bucket)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -576,7 +570,7 @@ class BucketManager:
|
|||||||
)
|
)
|
||||||
content_score = fuzz.partial_ratio(query, bucket.get("content", "")[:1000]) * self.content_weight
|
content_score = fuzz.partial_ratio(query, bucket.get("content", "")[:1000]) * self.content_weight
|
||||||
|
|
||||||
return (name_score + domain_score + tag_score + content_score) / (100 * 10.5)
|
return (name_score + domain_score + tag_score + content_score) / (100 * (3 + 2.5 + 2 + self.content_weight))
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
# Emotion resonance sub-score:
|
# Emotion resonance sub-score:
|
||||||
@@ -620,7 +614,7 @@ class BucketManager:
|
|||||||
days = max(0.0, (datetime.now() - last_active).total_seconds() / 86400)
|
days = max(0.0, (datetime.now() - last_active).total_seconds() / 86400)
|
||||||
except (ValueError, TypeError):
|
except (ValueError, TypeError):
|
||||||
days = 30
|
days = 30
|
||||||
return math.exp(-0.1 * days)
|
return math.exp(-0.02 * days)
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
# List all buckets
|
# List all buckets
|
||||||
@@ -633,7 +627,7 @@ class BucketManager:
|
|||||||
"""
|
"""
|
||||||
buckets = []
|
buckets = []
|
||||||
|
|
||||||
dirs = [self.permanent_dir, self.dynamic_dir]
|
dirs = [self.permanent_dir, self.dynamic_dir, self.feel_dir]
|
||||||
if include_archive:
|
if include_archive:
|
||||||
dirs.append(self.archive_dir)
|
dirs.append(self.archive_dir)
|
||||||
|
|
||||||
@@ -664,6 +658,7 @@ class BucketManager:
|
|||||||
"permanent_count": 0,
|
"permanent_count": 0,
|
||||||
"dynamic_count": 0,
|
"dynamic_count": 0,
|
||||||
"archive_count": 0,
|
"archive_count": 0,
|
||||||
|
"feel_count": 0,
|
||||||
"total_size_kb": 0.0,
|
"total_size_kb": 0.0,
|
||||||
"domains": {},
|
"domains": {},
|
||||||
}
|
}
|
||||||
@@ -672,6 +667,7 @@ class BucketManager:
|
|||||||
(self.permanent_dir, "permanent_count"),
|
(self.permanent_dir, "permanent_count"),
|
||||||
(self.dynamic_dir, "dynamic_count"),
|
(self.dynamic_dir, "dynamic_count"),
|
||||||
(self.archive_dir, "archive_count"),
|
(self.archive_dir, "archive_count"),
|
||||||
|
(self.feel_dir, "feel_count"),
|
||||||
]:
|
]:
|
||||||
if not os.path.exists(subdir):
|
if not os.path.exists(subdir):
|
||||||
continue
|
continue
|
||||||
@@ -745,7 +741,7 @@ class BucketManager:
|
|||||||
"""
|
"""
|
||||||
if not bucket_id:
|
if not bucket_id:
|
||||||
return None
|
return None
|
||||||
for dir_path in [self.permanent_dir, self.dynamic_dir, self.archive_dir]:
|
for dir_path in [self.permanent_dir, self.dynamic_dir, self.archive_dir, self.feel_dir]:
|
||||||
if not os.path.exists(dir_path):
|
if not os.path.exists(dir_path):
|
||||||
continue
|
continue
|
||||||
for root, _, files in os.walk(dir_path):
|
for root, _, files in os.walk(dir_path):
|
||||||
@@ -754,7 +750,8 @@ class BucketManager:
|
|||||||
continue
|
continue
|
||||||
# Match by exact ID segment in filename
|
# Match by exact ID segment in filename
|
||||||
# 通过文件名中的 ID 片段精确匹配
|
# 通过文件名中的 ID 片段精确匹配
|
||||||
if bucket_id in fname:
|
name_part = fname[:-3] # remove .md
|
||||||
|
if name_part == bucket_id or name_part.endswith(f"_{bucket_id}"):
|
||||||
return os.path.join(root, fname)
|
return os.path.join(root, fname)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|||||||
30
check_buckets.py
Normal file
30
check_buckets.py
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
import asyncio
|
||||||
|
from bucket_manager import BucketManager
|
||||||
|
from utils import load_config
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
config = load_config()
|
||||||
|
bm = BucketManager(config)
|
||||||
|
buckets = await bm.list_all(include_archive=True)
|
||||||
|
|
||||||
|
print(f"Total buckets: {len(buckets)}")
|
||||||
|
|
||||||
|
domains = {}
|
||||||
|
for b in buckets:
|
||||||
|
for d in b.get("metadata", {}).get("domain", []):
|
||||||
|
domains[d] = domains.get(d, 0) + 1
|
||||||
|
|
||||||
|
print(f"Domains: {domains}")
|
||||||
|
|
||||||
|
# Check for formatting issues (e.g., missing critical fields)
|
||||||
|
issues = 0
|
||||||
|
for b in buckets:
|
||||||
|
meta = b.get("metadata", {})
|
||||||
|
if not meta.get("name") or not meta.get("domain") or not b.get("content"):
|
||||||
|
print(f"Format issue in {b['id']}")
|
||||||
|
issues += 1
|
||||||
|
|
||||||
|
print(f"Found {issues} formatting issues.")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
147
check_icloud_conflicts.py
Normal file
147
check_icloud_conflicts.py
Normal file
@@ -0,0 +1,147 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# ============================================================
|
||||||
|
# check_icloud_conflicts.py — Ombre Brain iCloud Conflict Detector
|
||||||
|
# iCloud 冲突文件检测器
|
||||||
|
#
|
||||||
|
# Scans the configured bucket directory for iCloud sync conflict
|
||||||
|
# artefacts and duplicate bucket IDs, then prints a report.
|
||||||
|
# 扫描配置的桶目录,发现 iCloud 同步冲突文件及重复桶 ID,输出报告。
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# python check_icloud_conflicts.py
|
||||||
|
# python check_icloud_conflicts.py --buckets-dir /path/to/dir
|
||||||
|
# python check_icloud_conflicts.py --quiet # exit-code only (0=clean)
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
from collections import defaultdict
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# ──────────────────────────────────────────────────────────────
|
||||||
|
# iCloud conflict file patterns
|
||||||
|
# Pattern 1 (macOS classic): "filename 2.md", "filename 3.md"
|
||||||
|
# Pattern 2 (iCloud Drive): "filename (Device's conflicted copy YYYY-MM-DD).md"
|
||||||
|
# ──────────────────────────────────────────────────────────────
|
||||||
|
_CONFLICT_SUFFIX = re.compile(r"^(.+?)\s+\d+\.md$")
|
||||||
|
_CONFLICT_ICLOUD = re.compile(r"^(.+?)\s+\(.+conflicted copy .+\)\.md$", re.IGNORECASE)
|
||||||
|
# Bucket ID pattern: 12 hex chars at end of stem before extension
|
||||||
|
_BUCKET_ID_PATTERN = re.compile(r"_([0-9a-f]{12})$")
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_buckets_dir() -> Path:
|
||||||
|
"""Resolve bucket directory: env var → config.yaml → ./buckets fallback."""
|
||||||
|
env_dir = os.environ.get("OMBRE_BUCKETS_DIR", "").strip()
|
||||||
|
if env_dir:
|
||||||
|
return Path(env_dir)
|
||||||
|
|
||||||
|
config_path = Path(__file__).parent / "config.yaml"
|
||||||
|
if config_path.exists():
|
||||||
|
try:
|
||||||
|
import yaml # type: ignore
|
||||||
|
with open(config_path, encoding="utf-8") as f:
|
||||||
|
cfg = yaml.safe_load(f) or {}
|
||||||
|
if cfg.get("buckets_dir"):
|
||||||
|
return Path(cfg["buckets_dir"])
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return Path(__file__).parent / "buckets"
|
||||||
|
|
||||||
|
|
||||||
|
def scan(buckets_dir: Path) -> tuple[list[Path], dict[str, list[Path]]]:
|
||||||
|
"""
|
||||||
|
Returns:
|
||||||
|
conflict_files — list of files that look like iCloud conflict artefacts
|
||||||
|
dup_ids — dict of bucket_id -> [list of files sharing that id]
|
||||||
|
(only entries with 2+ files)
|
||||||
|
"""
|
||||||
|
if not buckets_dir.exists():
|
||||||
|
return [], {}
|
||||||
|
|
||||||
|
conflict_files: list[Path] = []
|
||||||
|
id_to_files: dict[str, list[Path]] = defaultdict(list)
|
||||||
|
|
||||||
|
for md_file in buckets_dir.rglob("*.md"):
|
||||||
|
name = md_file.name
|
||||||
|
|
||||||
|
# --- Conflict file detection ---
|
||||||
|
if _CONFLICT_SUFFIX.match(name) or _CONFLICT_ICLOUD.match(name):
|
||||||
|
conflict_files.append(md_file)
|
||||||
|
continue # don't register conflicts in the ID map
|
||||||
|
|
||||||
|
# --- Duplicate ID detection ---
|
||||||
|
stem = md_file.stem
|
||||||
|
m = _BUCKET_ID_PATTERN.search(stem)
|
||||||
|
if m:
|
||||||
|
id_to_files[m.group(1)].append(md_file)
|
||||||
|
|
||||||
|
dup_ids = {bid: paths for bid, paths in id_to_files.items() if len(paths) > 1}
|
||||||
|
return conflict_files, dup_ids
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Detect iCloud conflict files and duplicate bucket IDs."
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--buckets-dir",
|
||||||
|
metavar="PATH",
|
||||||
|
help="Override bucket directory (default: from config.yaml / OMBRE_BUCKETS_DIR)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--quiet",
|
||||||
|
action="store_true",
|
||||||
|
help="Suppress output; exit 0 = clean, 1 = problems found",
|
||||||
|
)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
buckets_dir = Path(args.buckets_dir) if args.buckets_dir else resolve_buckets_dir()
|
||||||
|
|
||||||
|
if not args.quiet:
|
||||||
|
print(f"Scanning: {buckets_dir}")
|
||||||
|
if not buckets_dir.exists():
|
||||||
|
print(" ✗ Directory does not exist.")
|
||||||
|
return 1
|
||||||
|
print()
|
||||||
|
|
||||||
|
conflict_files, dup_ids = scan(buckets_dir)
|
||||||
|
problems = bool(conflict_files or dup_ids)
|
||||||
|
|
||||||
|
if args.quiet:
|
||||||
|
return 1 if problems else 0
|
||||||
|
|
||||||
|
# ── Report ─────────────────────────────────────────────────
|
||||||
|
if not problems:
|
||||||
|
print("✓ No iCloud conflicts or duplicate IDs found.")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
if conflict_files:
|
||||||
|
print(f"⚠ iCloud conflict files ({len(conflict_files)} found):")
|
||||||
|
for f in sorted(conflict_files):
|
||||||
|
rel = f.relative_to(buckets_dir) if f.is_relative_to(buckets_dir) else f
|
||||||
|
print(f" {rel}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
if dup_ids:
|
||||||
|
print(f"⚠ Duplicate bucket IDs ({len(dup_ids)} ID(s) shared by multiple files):")
|
||||||
|
for bid, paths in sorted(dup_ids.items()):
|
||||||
|
print(f" ID: {bid}")
|
||||||
|
for p in sorted(paths):
|
||||||
|
rel = p.relative_to(buckets_dir) if p.is_relative_to(buckets_dir) else p
|
||||||
|
print(f" {rel}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
print(
|
||||||
|
"NOTE: This script is report-only. No files are modified or deleted.\n"
|
||||||
|
"注意:本脚本仅报告,不删除或修改任何文件。"
|
||||||
|
)
|
||||||
|
return 1
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main())
|
||||||
@@ -28,9 +28,11 @@ log_level: "INFO"
|
|||||||
merge_threshold: 75
|
merge_threshold: 75
|
||||||
|
|
||||||
# --- Dehydration API / 脱水压缩 API 配置 ---
|
# --- Dehydration API / 脱水压缩 API 配置 ---
|
||||||
# Uses a cheap LLM for intelligent compression; auto-degrades to local
|
# Uses a cheap LLM for intelligent compression. API is required; if the
|
||||||
# keyword extraction if API is unavailable
|
# configured key/endpoint is unavailable, hold/grow will raise an explicit
|
||||||
# 用廉价 LLM 做智能压缩,API 不可用时自动降级到本地关键词提取
|
# error instead of silently degrading (see BEHAVIOR_SPEC.md 三、降级行为表).
|
||||||
|
# 用廉价 LLM 做智能压缩。API 为必需;如 key/endpoint 不可用,
|
||||||
|
# hold/grow 会直接报错而非静默降级(详见 BEHAVIOR_SPEC.md 三、降级行为表)。
|
||||||
dehydration:
|
dehydration:
|
||||||
# Supports any OpenAI-compatible API: DeepSeek / Ollama / LM Studio / vLLM / Gemini etc.
|
# Supports any OpenAI-compatible API: DeepSeek / Ollama / LM Studio / vLLM / Gemini etc.
|
||||||
# 支持所有 OpenAI 兼容 API:DeepSeek / Ollama / LM Studio / vLLM / Gemini 等
|
# 支持所有 OpenAI 兼容 API:DeepSeek / Ollama / LM Studio / vLLM / Gemini 等
|
||||||
@@ -58,6 +60,18 @@ decay:
|
|||||||
base: 1.0 # Base weight / 基础权重
|
base: 1.0 # Base weight / 基础权重
|
||||||
arousal_boost: 0.8 # Arousal boost coefficient / 唤醒度加成系数
|
arousal_boost: 0.8 # Arousal boost coefficient / 唤醒度加成系数
|
||||||
|
|
||||||
|
# --- Embedding / 向量化配置 ---
|
||||||
|
# Uses embedding API for semantic similarity search
|
||||||
|
# 通过 embedding API 实现语义相似度搜索
|
||||||
|
# You can configure embedding independently from dehydration.
|
||||||
|
# If api_key is omitted, reuses the same API key (OMBRE_API_KEY) and base_url from dehydration config
|
||||||
|
# 你可以把 embedding 独立配置;若 api_key 留空,复用脱水配置的 API key 和 base_url
|
||||||
|
embedding:
|
||||||
|
enabled: true # Enable embedding / 启用向量化
|
||||||
|
model: "gemini-embedding-001" # Embedding model / 向量化模型
|
||||||
|
# base_url: "https://generativelanguage.googleapis.com/v1beta/openai"
|
||||||
|
# api_key: ""
|
||||||
|
|
||||||
# --- Scoring weights / 检索权重参数 ---
|
# --- Scoring weights / 检索权重参数 ---
|
||||||
# total = topic(×4) + emotion(×2) + time(×1.5) + importance(×1)
|
# total = topic(×4) + emotion(×2) + time(×1.5) + importance(×1)
|
||||||
scoring_weights:
|
scoring_weights:
|
||||||
@@ -77,6 +91,6 @@ wikilink:
|
|||||||
use_tags: false
|
use_tags: false
|
||||||
use_domain: true
|
use_domain: true
|
||||||
use_auto_keywords: true
|
use_auto_keywords: true
|
||||||
auto_top_k: 8
|
auto_top_k: 4
|
||||||
min_keyword_len: 2
|
min_keyword_len: 3
|
||||||
exclude_keywords: []
|
exclude_keywords: []
|
||||||
|
|||||||
1652
dashboard.html
Normal file
1652
dashboard.html
Normal file
File diff suppressed because it is too large
Load Diff
127
decay_engine.py
127
decay_engine.py
@@ -70,93 +70,102 @@ class DecayEngine:
|
|||||||
# Permanent buckets never decay / 固化桶永远不衰减
|
# Permanent buckets never decay / 固化桶永远不衰减
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
# Time weight: 0-1d→1.0, day2→0.9, then ~10%/day, floor 0.3
|
# Freshness bonus: continuous exponential decay
|
||||||
# 时间系数:0-1天=1.0,第2天=0.9,之后每天约降10%,7天后稳定在0.3
|
# 新鲜度加成:连续指数衰减
|
||||||
|
# bonus = 1.0 + 1.0 × e^(-t/36), t in hours
|
||||||
|
# t=0 → 2.0×, t≈25h(半衰) → 1.5×, t≈72h → ≈1.14×, t→∞ → 1.0×
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _calc_time_weight(days_since: float) -> float:
|
def _calc_time_weight(days_since: float) -> float:
|
||||||
"""
|
"""
|
||||||
Piecewise time weight multiplier (multiplies base_score).
|
Freshness bonus multiplier: 1.0 + e^(-t/36), t in hours.
|
||||||
分段式时间权重系数,作为 final_score 的乘数。
|
新鲜度加成乘数:刚存入×2.0,~36小时半衰,72小时后趋近×1.0。
|
||||||
"""
|
"""
|
||||||
if days_since <= 1.0:
|
hours = days_since * 24.0
|
||||||
return 1.0
|
return 1.0 + 1.0 * math.exp(-hours / 36.0)
|
||||||
elif days_since <= 2.0:
|
|
||||||
# Linear interpolation: 1.0→0.9 over [1,2]
|
|
||||||
return 1.0 - 0.1 * (days_since - 1.0)
|
|
||||||
else:
|
|
||||||
# Exponential decay from 0.9, floor at 0.3
|
|
||||||
# k = ln(3)/5 ≈ 0.2197 so that at day 7 (5 days past day 2) → 0.3
|
|
||||||
raw = 0.9 * math.exp(-0.2197 * (days_since - 2.0))
|
|
||||||
return max(0.3, raw)
|
|
||||||
|
|
||||||
def calculate_score(self, metadata: dict) -> float:
|
def calculate_score(self, metadata: dict) -> float:
|
||||||
"""
|
"""
|
||||||
Calculate current activity score for a memory bucket.
|
Calculate current activity score for a memory bucket.
|
||||||
计算一个记忆桶的当前活跃度得分。
|
计算一个记忆桶的当前活跃度得分。
|
||||||
|
|
||||||
Formula: final_score = time_weight × base_score
|
New model: short-term vs long-term weight separation.
|
||||||
base_score = Importance × (act_count^0.3) × e^(-λ×days) × (base + arousal×boost)
|
新模型:短期/长期权重分离。
|
||||||
time_weight is the outer multiplier, takes priority over emotion factors.
|
- Short-term (≤3 days): time_weight dominates, emotion amplifies
|
||||||
|
- Long-term (>3 days): emotion_weight dominates, time decays to floor
|
||||||
|
短期(≤3天):时间权重主导,情感放大
|
||||||
|
长期(>3天):情感权重主导,时间衰减到底线
|
||||||
"""
|
"""
|
||||||
if not isinstance(metadata, dict):
|
if not isinstance(metadata, dict):
|
||||||
return 0.0
|
return 0.0
|
||||||
|
|
||||||
# --- Pinned/protected buckets: never decay, importance locked to 10 ---
|
# --- Pinned/protected buckets: never decay, importance locked to 10 ---
|
||||||
# --- 固化桶(pinned/protected):永不衰减,importance 锁定为 10 ---
|
|
||||||
if metadata.get("pinned") or metadata.get("protected"):
|
if metadata.get("pinned") or metadata.get("protected"):
|
||||||
return 999.0
|
return 999.0
|
||||||
|
|
||||||
# --- Permanent buckets never decay / 固化桶永不衰减 ---
|
# --- Permanent buckets never decay ---
|
||||||
if metadata.get("type") == "permanent":
|
if metadata.get("type") == "permanent":
|
||||||
return 999.0
|
return 999.0
|
||||||
|
|
||||||
importance = max(1, min(10, int(metadata.get("importance", 5))))
|
# --- Feel buckets: never decay, fixed moderate score ---
|
||||||
activation_count = max(1, int(metadata.get("activation_count", 1)))
|
if metadata.get("type") == "feel":
|
||||||
|
return 50.0
|
||||||
|
|
||||||
# --- Days since last activation / 距离上次激活过了多少天 ---
|
importance = max(1, min(10, int(metadata.get("importance", 5))))
|
||||||
|
activation_count = max(1.0, float(metadata.get("activation_count", 1)))
|
||||||
|
|
||||||
|
# --- Days since last activation ---
|
||||||
last_active_str = metadata.get("last_active", metadata.get("created", ""))
|
last_active_str = metadata.get("last_active", metadata.get("created", ""))
|
||||||
try:
|
try:
|
||||||
last_active = datetime.fromisoformat(str(last_active_str))
|
last_active = datetime.fromisoformat(str(last_active_str))
|
||||||
days_since = max(0.0, (datetime.now() - last_active).total_seconds() / 86400)
|
days_since = max(0.0, (datetime.now() - last_active).total_seconds() / 86400)
|
||||||
except (ValueError, TypeError):
|
except (ValueError, TypeError):
|
||||||
days_since = 30 # Parse failure → assume 30 days / 解析失败假设已过 30 天
|
days_since = 30
|
||||||
|
|
||||||
# --- Emotion weight: continuous arousal coordinate ---
|
# --- Emotion weight ---
|
||||||
# --- 情感权重:基于连续 arousal 坐标计算 ---
|
|
||||||
# Higher arousal → stronger emotion → higher weight → slower decay
|
|
||||||
# arousal 越高 → 情感越强烈 → 权重越大 → 衰减越慢
|
|
||||||
try:
|
try:
|
||||||
arousal = max(0.0, min(1.0, float(metadata.get("arousal", 0.3))))
|
arousal = max(0.0, min(1.0, float(metadata.get("arousal", 0.3))))
|
||||||
except (ValueError, TypeError):
|
except (ValueError, TypeError):
|
||||||
arousal = 0.3
|
arousal = 0.3
|
||||||
emotion_weight = self.emotion_base + arousal * self.arousal_boost
|
emotion_weight = self.emotion_base + arousal * self.arousal_boost
|
||||||
|
|
||||||
# --- Time weight (outer multiplier, highest priority) ---
|
# --- Time weight ---
|
||||||
# --- 时间权重(外层乘数,优先级最高)---
|
|
||||||
time_weight = self._calc_time_weight(days_since)
|
time_weight = self._calc_time_weight(days_since)
|
||||||
|
|
||||||
# --- Base score = Importance × act_count^0.3 × e^(-λ×days) × emotion ---
|
# --- Short-term vs Long-term weight separation ---
|
||||||
# --- 基础得分 ---
|
# 短期(≤3天):time_weight 占 70%,emotion 占 30%
|
||||||
|
# 长期(>3天):emotion 占 70%,time_weight 占 30%
|
||||||
|
if days_since <= 3.0:
|
||||||
|
# Short-term: time dominates, emotion amplifies
|
||||||
|
combined_weight = time_weight * 0.7 + emotion_weight * 0.3
|
||||||
|
else:
|
||||||
|
# Long-term: emotion dominates, time provides baseline
|
||||||
|
combined_weight = emotion_weight * 0.7 + time_weight * 0.3
|
||||||
|
|
||||||
|
# --- Base score ---
|
||||||
base_score = (
|
base_score = (
|
||||||
importance
|
importance
|
||||||
* (activation_count ** 0.3)
|
* (activation_count ** 0.3)
|
||||||
* math.exp(-self.decay_lambda * days_since)
|
* math.exp(-self.decay_lambda * days_since)
|
||||||
* emotion_weight
|
* combined_weight
|
||||||
)
|
)
|
||||||
|
|
||||||
# --- final_score = time_weight × base_score ---
|
# --- Weight pool modifiers ---
|
||||||
score = time_weight * base_score
|
# resolved + digested (has feel) → accelerated fade: ×0.02
|
||||||
|
# resolved only → ×0.05
|
||||||
|
# 已处理+已消化(写过feel)→ 加速淡化:×0.02
|
||||||
|
# 仅已处理 → ×0.05
|
||||||
|
resolved = metadata.get("resolved", False)
|
||||||
|
digested = metadata.get("digested", False) # set when feel is written for this memory
|
||||||
|
if resolved and digested:
|
||||||
|
resolved_factor = 0.02
|
||||||
|
elif resolved:
|
||||||
|
resolved_factor = 0.05
|
||||||
|
else:
|
||||||
|
resolved_factor = 1.0
|
||||||
|
urgency_boost = 1.5 if (arousal > 0.7 and not resolved) else 1.0
|
||||||
|
|
||||||
# --- Weight pool modifiers / 权重池修正因子 ---
|
return round(base_score * resolved_factor * urgency_boost, 4)
|
||||||
# Resolved events drop to 5%, sink to bottom awaiting keyword reactivation
|
|
||||||
# 已解决的事件权重骤降到 5%,沉底等待关键词激活
|
|
||||||
resolved_factor = 0.05 if metadata.get("resolved", False) else 1.0
|
|
||||||
# High-arousal unresolved buckets get urgency boost for priority surfacing
|
|
||||||
# 高唤醒未解决桶额外加成,优先浮现
|
|
||||||
urgency_boost = 1.5 if (arousal > 0.7 and not metadata.get("resolved", False)) else 1.0
|
|
||||||
|
|
||||||
return round(score * resolved_factor * urgency_boost, 4)
|
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
# Execute one decay cycle
|
# Execute one decay cycle
|
||||||
@@ -180,17 +189,42 @@ class DecayEngine:
|
|||||||
|
|
||||||
checked = 0
|
checked = 0
|
||||||
archived = 0
|
archived = 0
|
||||||
|
auto_resolved = 0
|
||||||
lowest_score = float("inf")
|
lowest_score = float("inf")
|
||||||
|
|
||||||
for bucket in buckets:
|
for bucket in buckets:
|
||||||
meta = bucket.get("metadata", {})
|
meta = bucket.get("metadata", {})
|
||||||
|
|
||||||
# Skip permanent / pinned / protected buckets
|
# Skip permanent / pinned / protected / feel buckets
|
||||||
# 跳过固化桶和钉选/保护桶
|
# 跳过固化桶、钉选/保护桶和 feel 桶
|
||||||
if meta.get("type") == "permanent" or meta.get("pinned") or meta.get("protected"):
|
if meta.get("type") in ("permanent", "feel") or meta.get("pinned") or meta.get("protected"):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
checked += 1
|
checked += 1
|
||||||
|
|
||||||
|
# --- Auto-resolve: imp≤4 + >30 days old + not resolved → auto resolve ---
|
||||||
|
# --- 自动结案:重要度≤4 + 超过30天 + 未解决 → 自动 resolve ---
|
||||||
|
if not meta.get("resolved", False):
|
||||||
|
imp = int(meta.get("importance", 5))
|
||||||
|
last_active_str = meta.get("last_active", meta.get("created", ""))
|
||||||
|
try:
|
||||||
|
last_active = datetime.fromisoformat(str(last_active_str))
|
||||||
|
days_since = (datetime.now() - last_active).total_seconds() / 86400
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
days_since = 999
|
||||||
|
if imp <= 4 and days_since > 30:
|
||||||
|
try:
|
||||||
|
await self.bucket_mgr.update(bucket["id"], resolved=True)
|
||||||
|
meta["resolved"] = True # refresh local meta so resolved_factor applies this cycle
|
||||||
|
auto_resolved += 1
|
||||||
|
logger.info(
|
||||||
|
f"Auto-resolved / 自动结案: "
|
||||||
|
f"{meta.get('name', bucket['id'])} "
|
||||||
|
f"(imp={imp}, days={days_since:.0f})"
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Auto-resolve failed / 自动结案失败: {e}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
score = self.calculate_score(meta)
|
score = self.calculate_score(meta)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -223,6 +257,7 @@ class DecayEngine:
|
|||||||
result = {
|
result = {
|
||||||
"checked": checked,
|
"checked": checked,
|
||||||
"archived": archived,
|
"archived": archived,
|
||||||
|
"auto_resolved": auto_resolved,
|
||||||
"lowest_score": lowest_score if checked > 0 else 0,
|
"lowest_score": lowest_score if checked > 0 else 0,
|
||||||
}
|
}
|
||||||
logger.info(f"Decay cycle complete / 衰减周期完成: {result}")
|
logger.info(f"Decay cycle complete / 衰减周期完成: {result}")
|
||||||
|
|||||||
421
dehydrator.py
421
dehydrator.py
@@ -13,21 +13,22 @@
|
|||||||
#
|
#
|
||||||
# Operating modes:
|
# Operating modes:
|
||||||
# 工作模式:
|
# 工作模式:
|
||||||
# - Primary: OpenAI-compatible API (DeepSeek/Ollama/LM Studio/vLLM/Gemini etc.)
|
# - API only: OpenAI-compatible API (DeepSeek/Ollama/LM Studio/vLLM/Gemini etc.)
|
||||||
# 主路径:通过 OpenAI 兼容客户端调用 LLM API
|
# 仅 API:通过 OpenAI 兼容客户端调用 LLM API
|
||||||
# - Fallback: local keyword extraction when API is unavailable
|
# - Dehydration cache: SQLite persistent cache to avoid redundant API calls
|
||||||
# 备用路径:API 不可用时用本地关键词提取
|
# 脱水缓存:SQLite 持久缓存,避免重复调用 API
|
||||||
#
|
#
|
||||||
# Depended on by: server.py
|
# Depended on by: server.py
|
||||||
# 被谁依赖:server.py
|
# 被谁依赖:server.py
|
||||||
# ============================================================
|
# ============================================================
|
||||||
|
|
||||||
|
|
||||||
|
import os
|
||||||
import re
|
import re
|
||||||
import json
|
import json
|
||||||
|
import hashlib
|
||||||
|
import sqlite3
|
||||||
import logging
|
import logging
|
||||||
from collections import Counter
|
|
||||||
import jieba
|
|
||||||
|
|
||||||
from openai import AsyncOpenAI
|
from openai import AsyncOpenAI
|
||||||
|
|
||||||
@@ -67,6 +68,9 @@ DIGEST_PROMPT = """你是一个日记整理专家。用户会发送一段包含
|
|||||||
3. 去除无意义的口水话和重复信息,保留核心内容
|
3. 去除无意义的口水话和重复信息,保留核心内容
|
||||||
4. 同一主题的零散信息应合并为一个条目
|
4. 同一主题的零散信息应合并为一个条目
|
||||||
5. 如果有待办事项,单独提取为一个条目
|
5. 如果有待办事项,单独提取为一个条目
|
||||||
|
6. 单个条目内容不少于50字,过短的零碎信息合并到最相关的条目中
|
||||||
|
7. 总条目数控制在 2~6 个,避免过度碎片化
|
||||||
|
8. 在 content 中对人名、地名、专有名词用 [[双链]] 标记(如 [[婷易]]、[[Obsidian]]),普通词汇不要加
|
||||||
|
|
||||||
输出格式(纯 JSON 数组,无其他内容):
|
输出格式(纯 JSON 数组,无其他内容):
|
||||||
[
|
[
|
||||||
@@ -76,11 +80,13 @@ DIGEST_PROMPT = """你是一个日记整理专家。用户会发送一段包含
|
|||||||
"domain": ["主题域1"],
|
"domain": ["主题域1"],
|
||||||
"valence": 0.7,
|
"valence": 0.7,
|
||||||
"arousal": 0.4,
|
"arousal": 0.4,
|
||||||
"tags": ["标签1", "标签2"],
|
"tags": ["核心词1", "核心词2", "扩展词1", "扩展词2"],
|
||||||
"importance": 5
|
"importance": 5
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
tags 生成规则:先从原文精准提取 3~5 个核心词,再引申扩展 5~8 个语义相关词(近义词、上位词、关联场景词),合并为一个数组。
|
||||||
|
|
||||||
主题域可选(选最精确的 1~2 个,只选真正相关的):
|
主题域可选(选最精确的 1~2 个,只选真正相关的):
|
||||||
日常: ["饮食", "穿搭", "出行", "居家", "购物"]
|
日常: ["饮食", "穿搭", "出行", "居家", "购物"]
|
||||||
人际: ["家庭", "恋爱", "友谊", "社交"]
|
人际: ["家庭", "恋爱", "友谊", "社交"]
|
||||||
@@ -104,6 +110,7 @@ MERGE_PROMPT = """你是一个信息合并专家。请将旧记忆与新内容
|
|||||||
2. 去除重复信息
|
2. 去除重复信息
|
||||||
3. 保留所有重要事实
|
3. 保留所有重要事实
|
||||||
4. 总长度尽量不超过旧记忆的 120%
|
4. 总长度尽量不超过旧记忆的 120%
|
||||||
|
5. 对出现的人名、地名、专有名词用 [[双链]] 标记(如 [[婷易]]、[[Obsidian]]),普通词汇不要加
|
||||||
|
|
||||||
直接输出合并后的文本,不要加额外说明。"""
|
直接输出合并后的文本,不要加额外说明。"""
|
||||||
|
|
||||||
@@ -124,15 +131,19 @@ ANALYZE_PROMPT = """你是一个内容分析器。请分析以下文本,输出
|
|||||||
内心: ["情绪", "回忆", "梦境", "自省"]
|
内心: ["情绪", "回忆", "梦境", "自省"]
|
||||||
2. valence(情感效价):0.0~1.0,0=极度消极 → 0.5=中性 → 1.0=极度积极
|
2. valence(情感效价):0.0~1.0,0=极度消极 → 0.5=中性 → 1.0=极度积极
|
||||||
3. arousal(情感唤醒度):0.0~1.0,0=非常平静 → 0.5=普通 → 1.0=非常激动
|
3. arousal(情感唤醒度):0.0~1.0,0=非常平静 → 0.5=普通 → 1.0=非常激动
|
||||||
4. tags(关键词标签):3~5 个最能概括内容的关键词
|
4. tags(关键词标签):分两步生成,合并为一个数组:
|
||||||
|
第一步—精准提取:从原文抽取 3~5 个真正的核心词,不泛化、不遗漏
|
||||||
|
第二步—引申扩展:自动补充 8~10 个与当前场景语义相关的词,包括近义词、上位词、关联场景词、用户可能用不同措辞搜索的词
|
||||||
|
两步合并为一个 tags 数组,总计 10~15 个
|
||||||
5. suggested_name(建议桶名):10字以内的简短标题
|
5. suggested_name(建议桶名):10字以内的简短标题
|
||||||
|
6. 在 tags 和 suggested_name 中不要使用 [[]] 双链标记
|
||||||
|
|
||||||
输出格式(纯 JSON,无其他内容):
|
输出格式(纯 JSON,无其他内容):
|
||||||
{
|
{
|
||||||
"domain": ["主题域1", "主题域2"],
|
"domain": ["主题域1", "主题域2"],
|
||||||
"valence": 0.7,
|
"valence": 0.7,
|
||||||
"arousal": 0.4,
|
"arousal": 0.4,
|
||||||
"tags": ["标签1", "标签2", "标签3"],
|
"tags": ["核心词1", "核心词2", "扩展词1", "扩展词2", "..."],
|
||||||
"suggested_name": "简短标题"
|
"suggested_name": "简短标题"
|
||||||
}"""
|
}"""
|
||||||
|
|
||||||
@@ -141,10 +152,13 @@ class Dehydrator:
|
|||||||
"""
|
"""
|
||||||
Data dehydrator + content analyzer.
|
Data dehydrator + content analyzer.
|
||||||
Three capabilities: dehydration / merge / auto-tagging (domain + emotion).
|
Three capabilities: dehydration / merge / auto-tagging (domain + emotion).
|
||||||
Prefers API (better quality); auto-degrades to local (guaranteed availability).
|
API-only: every public method requires a working LLM API.
|
||||||
|
If the API is unavailable, methods raise RuntimeError so callers can
|
||||||
|
surface the failure to the user instead of silently producing low-quality results.
|
||||||
数据脱水器 + 内容分析器。
|
数据脱水器 + 内容分析器。
|
||||||
三大能力:脱水压缩 / 新旧合并 / 自动打标。
|
三大能力:脱水压缩 / 新旧合并 / 自动打标。
|
||||||
优先走 API,API 挂了自动降级到本地。
|
仅走 API:API 不可用时直接抛出 RuntimeError,调用方明确感知。
|
||||||
|
(根据 BEHAVIOR_SPEC.md 三、降级行为表决策:无本地降级)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, config: dict):
|
def __init__(self, config: dict):
|
||||||
@@ -161,8 +175,6 @@ class Dehydrator:
|
|||||||
|
|
||||||
# --- Initialize OpenAI-compatible client ---
|
# --- Initialize OpenAI-compatible client ---
|
||||||
# --- 初始化 OpenAI 兼容客户端 ---
|
# --- 初始化 OpenAI 兼容客户端 ---
|
||||||
# Supports any OpenAI-format API: DeepSeek / Ollama / LM Studio / vLLM / Gemini etc.
|
|
||||||
# User only needs to set base_url in config.yaml
|
|
||||||
if self.api_available:
|
if self.api_available:
|
||||||
self.client = AsyncOpenAI(
|
self.client = AsyncOpenAI(
|
||||||
api_key=self.api_key,
|
api_key=self.api_key,
|
||||||
@@ -172,18 +184,71 @@ class Dehydrator:
|
|||||||
else:
|
else:
|
||||||
self.client = None
|
self.client = None
|
||||||
|
|
||||||
|
# --- SQLite dehydration cache ---
|
||||||
|
# --- SQLite 脱水缓存:content hash → summary ---
|
||||||
|
db_path = os.path.join(config["buckets_dir"], "dehydration_cache.db")
|
||||||
|
self.cache_db_path = db_path
|
||||||
|
self._init_cache_db()
|
||||||
|
|
||||||
|
def _init_cache_db(self):
|
||||||
|
"""Create dehydration cache table if not exists."""
|
||||||
|
os.makedirs(os.path.dirname(self.cache_db_path), exist_ok=True)
|
||||||
|
conn = sqlite3.connect(self.cache_db_path)
|
||||||
|
conn.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS dehydration_cache (
|
||||||
|
content_hash TEXT PRIMARY KEY,
|
||||||
|
summary TEXT NOT NULL,
|
||||||
|
model TEXT NOT NULL,
|
||||||
|
created_at TEXT NOT NULL DEFAULT (datetime('now'))
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
def _get_cached_summary(self, content: str) -> str | None:
|
||||||
|
"""Look up cached dehydration result by content hash."""
|
||||||
|
content_hash = hashlib.sha256(content.encode()).hexdigest()
|
||||||
|
conn = sqlite3.connect(self.cache_db_path)
|
||||||
|
row = conn.execute(
|
||||||
|
"SELECT summary FROM dehydration_cache WHERE content_hash = ?",
|
||||||
|
(content_hash,)
|
||||||
|
).fetchone()
|
||||||
|
conn.close()
|
||||||
|
return row[0] if row else None
|
||||||
|
|
||||||
|
def _set_cached_summary(self, content: str, summary: str):
|
||||||
|
"""Store dehydration result in cache."""
|
||||||
|
content_hash = hashlib.sha256(content.encode()).hexdigest()
|
||||||
|
conn = sqlite3.connect(self.cache_db_path)
|
||||||
|
conn.execute(
|
||||||
|
"INSERT OR REPLACE INTO dehydration_cache (content_hash, summary, model) VALUES (?, ?, ?)",
|
||||||
|
(content_hash, summary, self.model)
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
def invalidate_cache(self, content: str):
|
||||||
|
"""Remove cached summary for specific content (call when bucket content changes)."""
|
||||||
|
content_hash = hashlib.sha256(content.encode()).hexdigest()
|
||||||
|
conn = sqlite3.connect(self.cache_db_path)
|
||||||
|
conn.execute("DELETE FROM dehydration_cache WHERE content_hash = ?", (content_hash,))
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
# Dehydrate: compress raw content into concise summary
|
# Dehydrate: compress raw content into concise summary
|
||||||
# 脱水:将原始内容压缩为精简摘要
|
# 脱水:将原始内容压缩为精简摘要
|
||||||
# Try API first, fallback to local
|
# API only (no local fallback)
|
||||||
# 先尝试 API,失败则回退本地
|
# 仅通过 API 脱水(无本地回退)
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
async def dehydrate(self, content: str, metadata: dict = None) -> str:
|
async def dehydrate(self, content: str, metadata: dict = None) -> str:
|
||||||
"""
|
"""
|
||||||
Dehydrate/compress memory content.
|
Dehydrate/compress memory content.
|
||||||
Returns formatted summary string ready for Claude context injection.
|
Returns formatted summary string ready for Claude context injection.
|
||||||
|
Uses SQLite cache to avoid redundant API calls.
|
||||||
对记忆内容做脱水压缩。
|
对记忆内容做脱水压缩。
|
||||||
返回格式化的摘要字符串,可直接注入 Claude 上下文。
|
返回格式化的摘要字符串,可直接注入 Claude 上下文。
|
||||||
|
使用 SQLite 缓存避免重复调用 API。
|
||||||
"""
|
"""
|
||||||
if not content or not content.strip():
|
if not content or not content.strip():
|
||||||
return "(空记忆 / empty memory)"
|
return "(空记忆 / empty memory)"
|
||||||
@@ -193,9 +258,20 @@ class Dehydrator:
|
|||||||
if count_tokens_approx(content) < 100:
|
if count_tokens_approx(content) < 100:
|
||||||
return self._format_output(content, metadata)
|
return self._format_output(content, metadata)
|
||||||
|
|
||||||
# --- Local compression (Always used as requested) ---
|
# --- Check cache first ---
|
||||||
# --- 本地压缩 ---
|
# --- 先查缓存 ---
|
||||||
result = self._local_dehydrate(content)
|
cached = self._get_cached_summary(content)
|
||||||
|
if cached:
|
||||||
|
return self._format_output(cached, metadata)
|
||||||
|
|
||||||
|
# --- API dehydration (no local fallback) ---
|
||||||
|
# --- API 脱水(无本地降级)---
|
||||||
|
if not self.api_available:
|
||||||
|
raise RuntimeError("脱水 API 不可用,请配置 OMBRE_API_KEY")
|
||||||
|
|
||||||
|
result = await self._api_dehydrate(content)
|
||||||
|
# --- Cache the result ---
|
||||||
|
self._set_cached_summary(content, result)
|
||||||
return self._format_output(result, metadata)
|
return self._format_output(result, metadata)
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
@@ -214,20 +290,18 @@ class Dehydrator:
|
|||||||
if not new_content:
|
if not new_content:
|
||||||
return old_content
|
return old_content
|
||||||
|
|
||||||
# --- Try API merge first / 优先 API 合并 ---
|
# --- API merge (no local fallback) ---
|
||||||
if self.api_available:
|
if not self.api_available:
|
||||||
|
raise RuntimeError("脱水 API 不可用,请检查 config.yaml 中的 dehydration 配置")
|
||||||
try:
|
try:
|
||||||
result = await self._api_merge(old_content, new_content)
|
result = await self._api_merge(old_content, new_content)
|
||||||
if result:
|
if result:
|
||||||
return result
|
return result
|
||||||
|
raise RuntimeError("API 合并返回空结果")
|
||||||
|
except RuntimeError:
|
||||||
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(
|
raise RuntimeError(f"API 合并失败,请检查 API 连接: {e}") from e
|
||||||
f"API merge failed, degrading to local / "
|
|
||||||
f"API 合并失败,降级到本地合并: {e}"
|
|
||||||
)
|
|
||||||
|
|
||||||
# --- Local merge fallback / 本地合并兜底 ---
|
|
||||||
return self._local_merge(old_content, new_content)
|
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
# API call: dehydration
|
# API call: dehydration
|
||||||
@@ -274,98 +348,7 @@ class Dehydrator:
|
|||||||
return ""
|
return ""
|
||||||
return response.choices[0].message.content or ""
|
return response.choices[0].message.content or ""
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
# Local dehydration (fallback when API is unavailable)
|
|
||||||
# 本地脱水(无 API 时的兜底方案)
|
|
||||||
# Keyword frequency + sentence position weighting
|
|
||||||
# 基于关键词频率 + 句子位置权重
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
def _local_dehydrate(self, content: str) -> str:
|
|
||||||
"""
|
|
||||||
Local keyword extraction + position-weighted simple compression.
|
|
||||||
本地关键词提取 + 位置加权的简单压缩。
|
|
||||||
"""
|
|
||||||
# --- Split into sentences / 分句 ---
|
|
||||||
sentences = re.split(r"[。!?\n.!?]+", content)
|
|
||||||
sentences = [s.strip() for s in sentences if len(s.strip()) > 5]
|
|
||||||
|
|
||||||
if not sentences:
|
|
||||||
return content[:200]
|
|
||||||
|
|
||||||
# --- Extract high-frequency keywords / 提取高频关键词 ---
|
|
||||||
keywords = self._extract_keywords(content)
|
|
||||||
|
|
||||||
# --- Score sentences: position weight + keyword hits ---
|
|
||||||
# --- 句子评分:开头结尾权重高 + 关键词命中加分 ---
|
|
||||||
scored = []
|
|
||||||
for i, sent in enumerate(sentences):
|
|
||||||
position_weight = 1.5 if i < 3 else (1.2 if i > len(sentences) - 3 else 1.0)
|
|
||||||
keyword_hits = sum(1 for kw in keywords if kw in sent)
|
|
||||||
score = position_weight * (1 + keyword_hits)
|
|
||||||
scored.append((score, sent))
|
|
||||||
|
|
||||||
scored.sort(key=lambda x: x[0], reverse=True)
|
|
||||||
|
|
||||||
# --- Top-8 sentences + keyword list / 取高分句 + 关键词列表 ---
|
|
||||||
selected = [s for _, s in scored[:8]]
|
|
||||||
summary = "。".join(selected)
|
|
||||||
|
|
||||||
if len(summary) > 1000:
|
|
||||||
summary = summary[:1000] + "…"
|
|
||||||
return summary
|
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
# Local merge (simple concatenation + truncation)
|
|
||||||
# 本地合并(简单拼接 + 截断)
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
def _local_merge(self, old_content: str, new_content: str) -> str:
|
|
||||||
"""
|
|
||||||
Simple concatenation merge; truncates if too long.
|
|
||||||
简单拼接合并,超长时截断保留两端。
|
|
||||||
"""
|
|
||||||
merged = f"{old_content.strip()}\n\n--- 更新 ---\n{new_content.strip()}"
|
|
||||||
# Truncate if over 3000 chars / 超过 3000 字符则各取一半
|
|
||||||
if len(merged) > 3000:
|
|
||||||
half = 1400
|
|
||||||
merged = (
|
|
||||||
f"{old_content[:half].strip()}\n\n--- 更新 ---\n{new_content[:half].strip()}"
|
|
||||||
)
|
|
||||||
return merged
|
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
# Keyword extraction
|
|
||||||
# 关键词提取
|
|
||||||
# Chinese + English tokenization → stopword filter → frequency sort
|
|
||||||
# 中英文分词 + 停用词过滤 + 词频排序
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
def _extract_keywords(self, text: str) -> list[str]:
|
|
||||||
"""
|
|
||||||
Extract high-frequency keywords using jieba (Chinese + English mixed).
|
|
||||||
用 jieba 分词提取高频关键词。
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
words = jieba.lcut(text)
|
|
||||||
except Exception:
|
|
||||||
words = []
|
|
||||||
# English words / 英文单词
|
|
||||||
english_words = re.findall(r"[a-zA-Z]{3,}", text.lower())
|
|
||||||
words += english_words
|
|
||||||
|
|
||||||
# Stopwords / 停用词
|
|
||||||
stopwords = {
|
|
||||||
"的", "了", "在", "是", "我", "有", "和", "就", "不", "人",
|
|
||||||
"都", "一个", "上", "也", "很", "到", "说", "要", "去",
|
|
||||||
"你", "会", "着", "没有", "看", "好", "自己", "这", "他", "她",
|
|
||||||
"the", "and", "for", "are", "but", "not", "you", "all", "can",
|
|
||||||
"had", "her", "was", "one", "our", "out", "has", "have", "with",
|
|
||||||
"this", "that", "from", "they", "been", "said", "will", "each",
|
|
||||||
}
|
|
||||||
filtered = [
|
|
||||||
w for w in words
|
|
||||||
if w not in stopwords and len(w.strip()) > 1 and not re.match(r"^[0-9]+$", w)
|
|
||||||
]
|
|
||||||
counter = Counter(filtered)
|
|
||||||
return [word for word, _ in counter.most_common(15)]
|
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
# Output formatting
|
# Output formatting
|
||||||
@@ -391,6 +374,15 @@ class Dehydrator:
|
|||||||
if domains:
|
if domains:
|
||||||
header += f" [主题:{domains}]"
|
header += f" [主题:{domains}]"
|
||||||
header += f" [情感:V{valence:.1f}/A{arousal:.1f}]"
|
header += f" [情感:V{valence:.1f}/A{arousal:.1f}]"
|
||||||
|
# Show model's perspective if available (valence drift)
|
||||||
|
model_v = metadata.get("model_valence")
|
||||||
|
if model_v is not None:
|
||||||
|
try:
|
||||||
|
header += f" [我的视角:V{float(model_v):.1f}]"
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
pass
|
||||||
|
if metadata.get("digested"):
|
||||||
|
header += " [已消化]"
|
||||||
header += "\n"
|
header += "\n"
|
||||||
|
|
||||||
content = re.sub(r'\[\[([^\]]+)\]\]', r'\1', content)
|
content = re.sub(r'\[\[([^\]]+)\]\]', r'\1', content)
|
||||||
@@ -412,20 +404,18 @@ class Dehydrator:
|
|||||||
if not content or not content.strip():
|
if not content or not content.strip():
|
||||||
return self._default_analysis()
|
return self._default_analysis()
|
||||||
|
|
||||||
# --- Try API first (best quality) / 优先走 API ---
|
# --- API analyze (no local fallback) ---
|
||||||
if self.api_available:
|
if not self.api_available:
|
||||||
|
raise RuntimeError("脱水 API 不可用,请检查 config.yaml 中的 dehydration 配置")
|
||||||
try:
|
try:
|
||||||
result = await self._api_analyze(content)
|
result = await self._api_analyze(content)
|
||||||
if result:
|
if result:
|
||||||
return result
|
return result
|
||||||
|
raise RuntimeError("API 打标返回空结果")
|
||||||
|
except RuntimeError:
|
||||||
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(
|
raise RuntimeError(f"API 打标失败,请检查 API 连接: {e}") from e
|
||||||
f"API tagging failed, degrading to local / "
|
|
||||||
f"API 打标失败,降级到本地分析: {e}"
|
|
||||||
)
|
|
||||||
|
|
||||||
# --- Local analysis fallback / 本地分析兜底 ---
|
|
||||||
return self._local_analyze(content)
|
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
# API call: auto-tagging
|
# API call: auto-tagging
|
||||||
@@ -487,121 +477,10 @@ class Dehydrator:
|
|||||||
"domain": result.get("domain", ["未分类"])[:3],
|
"domain": result.get("domain", ["未分类"])[:3],
|
||||||
"valence": valence,
|
"valence": valence,
|
||||||
"arousal": arousal,
|
"arousal": arousal,
|
||||||
"tags": result.get("tags", [])[:5],
|
"tags": result.get("tags", [])[:15],
|
||||||
"suggested_name": str(result.get("suggested_name", ""))[:20],
|
"suggested_name": str(result.get("suggested_name", ""))[:20],
|
||||||
}
|
}
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
# Local analysis (fallback when API is unavailable)
|
|
||||||
# 本地分析(无 API 时的兜底方案)
|
|
||||||
# Keyword matching + simple sentiment dictionary
|
|
||||||
# 基于关键词 + 简单情感词典匹配
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
def _local_analyze(self, content: str) -> dict:
|
|
||||||
"""
|
|
||||||
Local keyword + sentiment dictionary analysis.
|
|
||||||
本地关键词 + 情感词典的简单分析。
|
|
||||||
"""
|
|
||||||
keywords = self._extract_keywords(content)
|
|
||||||
text_lower = content.lower()
|
|
||||||
|
|
||||||
# --- Domain matching by keyword hits ---
|
|
||||||
# --- 主题域匹配:基于关键词命中 ---
|
|
||||||
domain_keywords = {
|
|
||||||
# Daily / 日常
|
|
||||||
"饮食": {"吃", "饭", "做饭", "外卖", "奶茶", "咖啡", "麻辣烫", "面包",
|
|
||||||
"超市", "零食", "水果", "牛奶", "食堂", "减肥", "节食"},
|
|
||||||
"出行": {"旅行", "出发", "航班", "酒店", "地铁", "打车", "高铁", "机票",
|
|
||||||
"景点", "签证", "护照"},
|
|
||||||
"居家": {"打扫", "洗衣", "搬家", "快递", "收纳", "装修", "租房"},
|
|
||||||
"购物": {"买", "下单", "到货", "退货", "优惠", "折扣", "代购"},
|
|
||||||
# Relationships / 人际
|
|
||||||
"家庭": {"爸", "妈", "父亲", "母亲", "家人", "弟弟", "姐姐", "哥哥",
|
|
||||||
"奶奶", "爷爷", "亲戚", "家里"},
|
|
||||||
"恋爱": {"爱人", "男友", "女友", "恋", "约会", "接吻", "分手",
|
|
||||||
"暧昧", "在一起", "想你", "同床"},
|
|
||||||
"友谊": {"朋友", "闺蜜", "兄弟", "聚", "约饭", "聊天", "群"},
|
|
||||||
"社交": {"见面", "被人", "圈子", "消息", "评论", "点赞"},
|
|
||||||
# Growth / 成长
|
|
||||||
"工作": {"会议", "项目", "客户", "汇报", "deadline", "同事",
|
|
||||||
"老板", "薪资", "合同", "需求", "加班", "实习"},
|
|
||||||
"学习": {"课", "考试", "论文", "笔记", "作业", "教授", "讲座",
|
|
||||||
"分数", "选课", "学分"},
|
|
||||||
"求职": {"面试", "简历", "offer", "投递", "薪资", "岗位"},
|
|
||||||
# Health / 身心
|
|
||||||
"健康": {"医院", "复查", "吃药", "抽血", "手术", "心率",
|
|
||||||
"病", "症状", "指标", "体检", "月经"},
|
|
||||||
"心理": {"焦虑", "抑郁", "恐慌", "创伤", "人格", "咨询",
|
|
||||||
"安全感", "自残", "崩溃", "压力"},
|
|
||||||
"睡眠": {"睡", "失眠", "噩梦", "清醒", "熬夜", "早起", "午觉"},
|
|
||||||
# Interests / 兴趣
|
|
||||||
"游戏": {"游戏", "steam", "极乐迪斯科", "存档", "通关", "角色",
|
|
||||||
"mod", "DLC", "剧情"},
|
|
||||||
"影视": {"电影", "番剧", "动漫", "剧", "综艺", "追番", "上映"},
|
|
||||||
"音乐": {"歌", "音乐", "专辑", "live", "演唱会", "耳机"},
|
|
||||||
"阅读": {"书", "小说", "读完", "kindle", "连载", "漫画"},
|
|
||||||
"创作": {"写", "画", "预设", "脚本", "视频", "剪辑", "P图",
|
|
||||||
"SillyTavern", "插件", "正则", "人设"},
|
|
||||||
# Digital / 数字
|
|
||||||
"编程": {"代码", "code", "python", "bug", "api", "docker",
|
|
||||||
"git", "调试", "框架", "部署", "开发", "server"},
|
|
||||||
"AI": {"模型", "GPT", "Claude", "gemini", "LLM", "token",
|
|
||||||
"prompt", "LoRA", "微调", "推理", "MCP"},
|
|
||||||
"网络": {"VPN", "梯子", "代理", "域名", "隧道", "服务器",
|
|
||||||
"cloudflare", "tunnel", "反代"},
|
|
||||||
# Affairs / 事务
|
|
||||||
"财务": {"钱", "转账", "工资", "花了", "欠", "还款", "借",
|
|
||||||
"账单", "余额", "预算", "黄金"},
|
|
||||||
"计划": {"计划", "目标", "deadline", "日程", "清单", "安排"},
|
|
||||||
"待办": {"要做", "记得", "别忘", "提醒", "下次"},
|
|
||||||
# Inner / 内心
|
|
||||||
"情绪": {"开心", "难过", "生气", "哭", "泪", "孤独", "幸福",
|
|
||||||
"伤心", "烦", "委屈", "感动", "温柔"},
|
|
||||||
"回忆": {"以前", "小时候", "那时", "怀念", "曾经", "记得"},
|
|
||||||
"梦境": {"梦", "梦到", "梦见", "噩梦", "清醒梦"},
|
|
||||||
"自省": {"反思", "觉得自己", "问自己", "意识到", "明白了"},
|
|
||||||
}
|
|
||||||
|
|
||||||
matched_domains = []
|
|
||||||
for domain, kws in domain_keywords.items():
|
|
||||||
hits = sum(1 for kw in kws if kw in text_lower)
|
|
||||||
if hits >= 2:
|
|
||||||
matched_domains.append((domain, hits))
|
|
||||||
matched_domains.sort(key=lambda x: x[1], reverse=True)
|
|
||||||
domains = [d for d, _ in matched_domains[:3]] or ["未分类"]
|
|
||||||
|
|
||||||
# --- Emotion estimation via simple sentiment dictionary ---
|
|
||||||
# --- 情感坐标估算:基于简单情感词典 ---
|
|
||||||
positive_words = {"开心", "高兴", "喜欢", "哈哈", "棒", "赞", "爱",
|
|
||||||
"幸福", "成功", "感动", "兴奋", "棒极了",
|
|
||||||
"happy", "love", "great", "awesome", "nice"}
|
|
||||||
negative_words = {"难过", "伤心", "生气", "焦虑", "害怕", "无聊",
|
|
||||||
"烦", "累", "失望", "崩溃", "愤怒", "痛苦",
|
|
||||||
"sad", "angry", "hate", "tired", "afraid"}
|
|
||||||
intense_words = {"太", "非常", "极", "超", "特别", "十分", "炸",
|
|
||||||
"崩溃", "激动", "愤怒", "狂喜", "very", "so", "extremely"}
|
|
||||||
|
|
||||||
pos_count = sum(1 for w in positive_words if w in text_lower)
|
|
||||||
neg_count = sum(1 for w in negative_words if w in text_lower)
|
|
||||||
intense_count = sum(1 for w in intense_words if w in text_lower)
|
|
||||||
|
|
||||||
# valence: positive/negative emotion balance
|
|
||||||
if pos_count + neg_count > 0:
|
|
||||||
valence = 0.5 + 0.4 * (pos_count - neg_count) / (pos_count + neg_count)
|
|
||||||
else:
|
|
||||||
valence = 0.5
|
|
||||||
|
|
||||||
# arousal: intensity level
|
|
||||||
arousal = min(1.0, 0.3 + intense_count * 0.15 + (pos_count + neg_count) * 0.08)
|
|
||||||
|
|
||||||
return {
|
|
||||||
"domain": domains,
|
|
||||||
"valence": round(max(0.0, min(1.0, valence)), 2),
|
|
||||||
"arousal": round(max(0.0, min(1.0, arousal)), 2),
|
|
||||||
"tags": keywords[:5],
|
|
||||||
"suggested_name": "",
|
|
||||||
}
|
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
# Default analysis result (empty content or total failure)
|
# Default analysis result (empty content or total failure)
|
||||||
# 默认分析结果(内容为空或完全失败时用)
|
# 默认分析结果(内容为空或完全失败时用)
|
||||||
@@ -635,21 +514,18 @@ class Dehydrator:
|
|||||||
if not content or not content.strip():
|
if not content or not content.strip():
|
||||||
return []
|
return []
|
||||||
|
|
||||||
# --- Try API digest first (best quality, understands semantic splits) ---
|
# --- API digest (no local fallback) ---
|
||||||
# --- 优先 API 整理 ---
|
if not self.api_available:
|
||||||
if self.api_available:
|
raise RuntimeError("脱水 API 不可用,请检查 config.yaml 中的 dehydration 配置")
|
||||||
try:
|
try:
|
||||||
result = await self._api_digest(content)
|
result = await self._api_digest(content)
|
||||||
if result:
|
if result:
|
||||||
return result
|
return result
|
||||||
|
raise RuntimeError("API 日记整理返回空结果")
|
||||||
|
except RuntimeError:
|
||||||
|
raise
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.warning(
|
raise RuntimeError(f"API 日记整理失败,请检查 API 连接: {e}") from e
|
||||||
f"API diary digest failed, degrading to local / "
|
|
||||||
f"API 日记整理失败,降级到本地拆分: {e}"
|
|
||||||
)
|
|
||||||
|
|
||||||
# --- Local split fallback / 本地拆分兜底 ---
|
|
||||||
return await self._local_digest(content)
|
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
# ---------------------------------------------------------
|
||||||
# API call: diary digest
|
# API call: diary digest
|
||||||
@@ -667,7 +543,7 @@ class Dehydrator:
|
|||||||
{"role": "user", "content": content[:5000]},
|
{"role": "user", "content": content[:5000]},
|
||||||
],
|
],
|
||||||
max_tokens=2048,
|
max_tokens=2048,
|
||||||
temperature=0.2,
|
temperature=0.0,
|
||||||
)
|
)
|
||||||
if not response.choices:
|
if not response.choices:
|
||||||
return []
|
return []
|
||||||
@@ -717,50 +593,7 @@ class Dehydrator:
|
|||||||
"domain": item.get("domain", ["未分类"])[:3],
|
"domain": item.get("domain", ["未分类"])[:3],
|
||||||
"valence": valence,
|
"valence": valence,
|
||||||
"arousal": arousal,
|
"arousal": arousal,
|
||||||
"tags": item.get("tags", [])[:5],
|
"tags": item.get("tags", [])[:15],
|
||||||
"importance": importance,
|
"importance": importance,
|
||||||
})
|
})
|
||||||
return validated
|
return validated
|
||||||
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
# Local diary split (fallback when API is unavailable)
|
|
||||||
# 本地日记拆分(无 API 时的兜底)
|
|
||||||
# Split by blank lines/separators, analyze each segment
|
|
||||||
# 按空行/分隔符拆段,每段独立分析
|
|
||||||
# ---------------------------------------------------------
|
|
||||||
async def _local_digest(self, content: str) -> list[dict]:
|
|
||||||
"""
|
|
||||||
Local paragraph split + per-segment analysis.
|
|
||||||
本地按段落拆分 + 逐段分析。
|
|
||||||
"""
|
|
||||||
# Split by blank lines or separators / 按空行或分隔线拆分
|
|
||||||
segments = re.split(r"\n{2,}|---+|\n-\s", content)
|
|
||||||
segments = [s.strip() for s in segments if len(s.strip()) > 20]
|
|
||||||
|
|
||||||
if not segments:
|
|
||||||
# Content too short, treat as single entry
|
|
||||||
# 内容太短,整个作为一个条目
|
|
||||||
analysis = self._local_analyze(content)
|
|
||||||
return [{
|
|
||||||
"name": analysis.get("suggested_name", "日记"),
|
|
||||||
"content": content.strip(),
|
|
||||||
"domain": analysis["domain"],
|
|
||||||
"valence": analysis["valence"],
|
|
||||||
"arousal": analysis["arousal"],
|
|
||||||
"tags": analysis["tags"],
|
|
||||||
"importance": 5,
|
|
||||||
}]
|
|
||||||
|
|
||||||
items = []
|
|
||||||
for seg in segments[:10]: # Max 10 segments / 最多 10 段
|
|
||||||
analysis = self._local_analyze(seg)
|
|
||||||
items.append({
|
|
||||||
"name": analysis.get("suggested_name", "") or seg[:10],
|
|
||||||
"content": seg,
|
|
||||||
"domain": analysis["domain"],
|
|
||||||
"valence": analysis["valence"],
|
|
||||||
"arousal": analysis["arousal"],
|
|
||||||
"tags": analysis["tags"],
|
|
||||||
"importance": 5,
|
|
||||||
})
|
|
||||||
return items
|
|
||||||
|
|||||||
38
docker-compose.user.yml
Normal file
38
docker-compose.user.yml
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
# ============================================================
|
||||||
|
# Ombre Brain — 用户快速部署版
|
||||||
|
# User Quick Deploy (pre-built image, no local build needed)
|
||||||
|
#
|
||||||
|
# 使用方法 / Usage:
|
||||||
|
# 1. 创建 .env: echo "OMBRE_API_KEY=your-key" > .env
|
||||||
|
# 2. 按需修改下面的 volumes 路径
|
||||||
|
# 3. docker compose -f docker-compose.user.yml up -d
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
services:
|
||||||
|
ombre-brain:
|
||||||
|
image: p0luz/ombre-brain:latest
|
||||||
|
container_name: ombre-brain
|
||||||
|
restart: unless-stopped
|
||||||
|
ports:
|
||||||
|
- "8000:8000"
|
||||||
|
environment:
|
||||||
|
- OMBRE_API_KEY=${OMBRE_API_KEY}
|
||||||
|
- OMBRE_TRANSPORT=streamable-http
|
||||||
|
- OMBRE_BUCKETS_DIR=/data
|
||||||
|
# --- Model override (optional) ---
|
||||||
|
# If you use Gemini instead of DeepSeek, set these in your .env:
|
||||||
|
# 如使用 Gemini 而非 DeepSeek,在 .env 里加:
|
||||||
|
# OMBRE_DEHYDRATION_BASE_URL=https://generativelanguage.googleapis.com/v1beta/openai/
|
||||||
|
# OMBRE_DEHYDRATION_MODEL=gemini-2.5-flash-lite
|
||||||
|
# OMBRE_EMBEDDING_BASE_URL=https://generativelanguage.googleapis.com/v1beta/openai/
|
||||||
|
- OMBRE_DEHYDRATION_BASE_URL=${OMBRE_DEHYDRATION_BASE_URL:-}
|
||||||
|
- OMBRE_DEHYDRATION_MODEL=${OMBRE_DEHYDRATION_MODEL:-}
|
||||||
|
- OMBRE_EMBEDDING_BASE_URL=${OMBRE_EMBEDDING_BASE_URL:-}
|
||||||
|
- OMBRE_EMBEDDING_MODEL=${OMBRE_EMBEDDING_MODEL:-}
|
||||||
|
volumes:
|
||||||
|
# 改成你的 Obsidian Vault 路径,或保持 ./buckets 用本地目录
|
||||||
|
# Change to your Obsidian Vault path, or keep ./buckets for local storage
|
||||||
|
- ./buckets:/data
|
||||||
|
# (Optional) Mount custom config to override model / API settings:
|
||||||
|
# (可选)挂载自定义配置,覆盖模型和 API 设置:
|
||||||
|
# - ./config.yaml:/app/config.yaml
|
||||||
@@ -21,11 +21,17 @@ services:
|
|||||||
- OMBRE_TRANSPORT=streamable-http # Claude.ai requires streamable-http
|
- OMBRE_TRANSPORT=streamable-http # Claude.ai requires streamable-http
|
||||||
- OMBRE_BUCKETS_DIR=/data # Container-internal bucket path / 容器内路径
|
- OMBRE_BUCKETS_DIR=/data # Container-internal bucket path / 容器内路径
|
||||||
volumes:
|
volumes:
|
||||||
# Mount your Obsidian vault (or any host directory) for persistent storage
|
# Mount your Obsidian vault (or any host directory) for persistent storage.
|
||||||
# 挂载你的 Obsidian 仓库(或任意宿主机目录)做持久化存储
|
# Set OMBRE_HOST_VAULT_DIR in your .env (or in the Dashboard "Storage" panel)
|
||||||
# Example / 示例:
|
# to point at the host folder you want mounted into the container at /data.
|
||||||
# - /path/to/your/Obsidian Vault/Ombre Brain:/data
|
# 挂载你的 Obsidian 仓库(或任意宿主机目录)做持久化存储。
|
||||||
- /Users/p0lar1s/Library/Mobile Documents/iCloud~md~obsidian/Documents/Obsidian Vault/Ombre Brain:/data
|
# 在 .env(或 Dashboard 的「存储」面板)中设置 OMBRE_HOST_VAULT_DIR
|
||||||
|
# 指向你希望挂载到容器 /data 的宿主机目录。
|
||||||
|
#
|
||||||
|
# Examples / 示例:
|
||||||
|
# OMBRE_HOST_VAULT_DIR=/path/to/your/Obsidian Vault/Ombre Brain
|
||||||
|
# OMBRE_HOST_VAULT_DIR=~/Library/Mobile Documents/iCloud~md~obsidian/Documents/Obsidian Vault/Ombre Brain
|
||||||
|
- ${OMBRE_HOST_VAULT_DIR:-./buckets}:/data
|
||||||
- ./config.yaml:/app/config.yaml
|
- ./config.yaml:/app/config.yaml
|
||||||
|
|
||||||
# Cloudflare Tunnel (optional) — expose to public internet
|
# Cloudflare Tunnel (optional) — expose to public internet
|
||||||
|
|||||||
190
embedding_engine.py
Normal file
190
embedding_engine.py
Normal file
@@ -0,0 +1,190 @@
|
|||||||
|
# ============================================================
|
||||||
|
# Module: Embedding Engine (embedding_engine.py)
|
||||||
|
# 模块:向量化引擎
|
||||||
|
#
|
||||||
|
# Generates embeddings via Gemini API (OpenAI-compatible),
|
||||||
|
# stores them in SQLite, and provides cosine similarity search.
|
||||||
|
# 通过 Gemini API(OpenAI 兼容)生成 embedding,
|
||||||
|
# 存储在 SQLite 中,提供余弦相似度搜索。
|
||||||
|
#
|
||||||
|
# Depended on by: server.py, bucket_manager.py
|
||||||
|
# 被谁依赖:server.py, bucket_manager.py
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
import math
|
||||||
|
import sqlite3
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from openai import AsyncOpenAI
|
||||||
|
|
||||||
|
logger = logging.getLogger("ombre_brain.embedding")
|
||||||
|
|
||||||
|
|
||||||
|
class EmbeddingEngine:
|
||||||
|
"""
|
||||||
|
Embedding generation + SQLite vector storage + cosine search.
|
||||||
|
向量生成 + SQLite 向量存储 + 余弦搜索。
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, config: dict):
|
||||||
|
dehy_cfg = config.get("dehydration", {})
|
||||||
|
embed_cfg = config.get("embedding", {})
|
||||||
|
|
||||||
|
self.api_key = (embed_cfg.get("api_key") or dehy_cfg.get("api_key") or "").strip()
|
||||||
|
self.base_url = (
|
||||||
|
(embed_cfg.get("base_url") or "").strip()
|
||||||
|
or (dehy_cfg.get("base_url") or "").strip()
|
||||||
|
or "https://generativelanguage.googleapis.com/v1beta/openai/"
|
||||||
|
)
|
||||||
|
self.model = embed_cfg.get("model", "gemini-embedding-001")
|
||||||
|
self.enabled = bool(self.api_key) and embed_cfg.get("enabled", True)
|
||||||
|
|
||||||
|
# --- SQLite path: buckets_dir/embeddings.db ---
|
||||||
|
db_path = os.path.join(config["buckets_dir"], "embeddings.db")
|
||||||
|
self.db_path = db_path
|
||||||
|
|
||||||
|
# --- Initialize client ---
|
||||||
|
if self.enabled:
|
||||||
|
self.client = AsyncOpenAI(
|
||||||
|
api_key=self.api_key,
|
||||||
|
base_url=self.base_url,
|
||||||
|
timeout=30.0,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self.client = None
|
||||||
|
|
||||||
|
# --- Initialize SQLite ---
|
||||||
|
self._init_db()
|
||||||
|
|
||||||
|
def _init_db(self):
|
||||||
|
"""Create embeddings table if not exists."""
|
||||||
|
os.makedirs(os.path.dirname(self.db_path), exist_ok=True)
|
||||||
|
conn = sqlite3.connect(self.db_path)
|
||||||
|
conn.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS embeddings (
|
||||||
|
bucket_id TEXT PRIMARY KEY,
|
||||||
|
embedding TEXT NOT NULL,
|
||||||
|
updated_at TEXT NOT NULL
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
async def generate_and_store(self, bucket_id: str, content: str) -> bool:
|
||||||
|
"""
|
||||||
|
Generate embedding for content and store in SQLite.
|
||||||
|
为内容生成 embedding 并存入 SQLite。
|
||||||
|
Returns True on success, False on failure.
|
||||||
|
"""
|
||||||
|
if not self.enabled or not content or not content.strip():
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
embedding = await self._generate_embedding(content)
|
||||||
|
if not embedding:
|
||||||
|
return False
|
||||||
|
self._store_embedding(bucket_id, embedding)
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Embedding generation failed for {bucket_id}: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
async def _generate_embedding(self, text: str) -> list[float]:
|
||||||
|
"""Call API to generate embedding vector."""
|
||||||
|
# Truncate to avoid token limits
|
||||||
|
truncated = text[:2000]
|
||||||
|
try:
|
||||||
|
response = await self.client.embeddings.create(
|
||||||
|
model=self.model,
|
||||||
|
input=truncated,
|
||||||
|
)
|
||||||
|
if response.data and len(response.data) > 0:
|
||||||
|
return response.data[0].embedding
|
||||||
|
return []
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Embedding API call failed: {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
def _store_embedding(self, bucket_id: str, embedding: list[float]):
|
||||||
|
"""Store embedding in SQLite."""
|
||||||
|
from utils import now_iso
|
||||||
|
conn = sqlite3.connect(self.db_path)
|
||||||
|
conn.execute(
|
||||||
|
"INSERT OR REPLACE INTO embeddings (bucket_id, embedding, updated_at) VALUES (?, ?, ?)",
|
||||||
|
(bucket_id, json.dumps(embedding), now_iso()),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
def delete_embedding(self, bucket_id: str):
|
||||||
|
"""Remove embedding when bucket is deleted."""
|
||||||
|
conn = sqlite3.connect(self.db_path)
|
||||||
|
conn.execute("DELETE FROM embeddings WHERE bucket_id = ?", (bucket_id,))
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
async def get_embedding(self, bucket_id: str) -> list[float] | None:
|
||||||
|
"""Retrieve stored embedding for a bucket. Returns None if not found."""
|
||||||
|
conn = sqlite3.connect(self.db_path)
|
||||||
|
row = conn.execute(
|
||||||
|
"SELECT embedding FROM embeddings WHERE bucket_id = ?", (bucket_id,)
|
||||||
|
).fetchone()
|
||||||
|
conn.close()
|
||||||
|
if row:
|
||||||
|
try:
|
||||||
|
return json.loads(row[0])
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
return None
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def search_similar(self, query: str, top_k: int = 10) -> list[tuple[str, float]]:
|
||||||
|
"""
|
||||||
|
Search for buckets similar to query text.
|
||||||
|
Returns list of (bucket_id, similarity_score) sorted by score desc.
|
||||||
|
搜索与查询文本相似的桶。返回 (bucket_id, 相似度分数) 列表。
|
||||||
|
"""
|
||||||
|
if not self.enabled:
|
||||||
|
return []
|
||||||
|
|
||||||
|
try:
|
||||||
|
query_embedding = await self._generate_embedding(query)
|
||||||
|
if not query_embedding:
|
||||||
|
return []
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Query embedding failed: {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Load all embeddings from SQLite
|
||||||
|
conn = sqlite3.connect(self.db_path)
|
||||||
|
rows = conn.execute("SELECT bucket_id, embedding FROM embeddings").fetchall()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
if not rows:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Calculate cosine similarity
|
||||||
|
results = []
|
||||||
|
for bucket_id, emb_json in rows:
|
||||||
|
try:
|
||||||
|
stored_embedding = json.loads(emb_json)
|
||||||
|
sim = self._cosine_similarity(query_embedding, stored_embedding)
|
||||||
|
results.append((bucket_id, sim))
|
||||||
|
except (json.JSONDecodeError, Exception):
|
||||||
|
continue
|
||||||
|
|
||||||
|
results.sort(key=lambda x: x[1], reverse=True)
|
||||||
|
return results[:top_k]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _cosine_similarity(a: list[float], b: list[float]) -> float:
|
||||||
|
"""Calculate cosine similarity between two vectors."""
|
||||||
|
if len(a) != len(b) or not a:
|
||||||
|
return 0.0
|
||||||
|
dot = sum(x * y for x, y in zip(a, b))
|
||||||
|
norm_a = math.sqrt(sum(x * x for x in a))
|
||||||
|
norm_b = math.sqrt(sum(x * x for x in b))
|
||||||
|
if norm_a == 0 or norm_b == 0:
|
||||||
|
return 0.0
|
||||||
|
return dot / (norm_a * norm_b)
|
||||||
767
import_memory.py
Normal file
767
import_memory.py
Normal file
@@ -0,0 +1,767 @@
|
|||||||
|
# ============================================================
|
||||||
|
# Module: Memory Import Engine (import_memory.py)
|
||||||
|
# 模块:历史记忆导入引擎
|
||||||
|
#
|
||||||
|
# Imports conversation history from various platforms into OB.
|
||||||
|
# 将各平台对话历史导入 OB 记忆系统。
|
||||||
|
#
|
||||||
|
# Supports: Claude JSON, ChatGPT export, DeepSeek, Markdown, plain text
|
||||||
|
# 支持格式:Claude JSON、ChatGPT 导出、DeepSeek、Markdown、纯文本
|
||||||
|
#
|
||||||
|
# Features:
|
||||||
|
# - Chunked processing with resume support
|
||||||
|
# - Progress persistence (import_state.json)
|
||||||
|
# - Raw preservation mode for special contexts
|
||||||
|
# - Post-import frequency pattern detection
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
import hashlib
|
||||||
|
import logging
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from utils import count_tokens_approx, now_iso
|
||||||
|
|
||||||
|
logger = logging.getLogger("ombre_brain.import")
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# Format Parsers — normalize any format to conversation turns
|
||||||
|
# 格式解析器 — 将任意格式标准化为对话轮次
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
def _parse_claude_json(data: dict | list) -> list[dict]:
|
||||||
|
"""Parse Claude.ai export JSON → [{role, content, timestamp}, ...]"""
|
||||||
|
turns = []
|
||||||
|
conversations = data if isinstance(data, list) else [data]
|
||||||
|
for conv in conversations:
|
||||||
|
if not isinstance(conv, dict):
|
||||||
|
continue
|
||||||
|
messages = conv.get("chat_messages", conv.get("messages", []))
|
||||||
|
for msg in messages:
|
||||||
|
if not isinstance(msg, dict):
|
||||||
|
continue
|
||||||
|
content = msg.get("text", msg.get("content", ""))
|
||||||
|
if isinstance(content, list):
|
||||||
|
content = " ".join(
|
||||||
|
p.get("text", "") for p in content if isinstance(p, dict)
|
||||||
|
)
|
||||||
|
if not content or not content.strip():
|
||||||
|
continue
|
||||||
|
role = msg.get("sender", msg.get("role", "user"))
|
||||||
|
ts = msg.get("created_at", msg.get("timestamp", ""))
|
||||||
|
turns.append({"role": role, "content": content.strip(), "timestamp": ts})
|
||||||
|
return turns
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_chatgpt_json(data: list | dict) -> list[dict]:
|
||||||
|
"""Parse ChatGPT export JSON → [{role, content, timestamp}, ...]"""
|
||||||
|
turns = []
|
||||||
|
conversations = data if isinstance(data, list) else [data]
|
||||||
|
for conv in conversations:
|
||||||
|
if not isinstance(conv, dict):
|
||||||
|
continue
|
||||||
|
mapping = conv.get("mapping", {})
|
||||||
|
if mapping:
|
||||||
|
# ChatGPT uses a tree structure with mapping
|
||||||
|
# Filter out None nodes before sorting
|
||||||
|
valid_nodes = [n for n in mapping.values() if isinstance(n, dict)]
|
||||||
|
|
||||||
|
def _node_ts(n):
|
||||||
|
msg = n.get("message")
|
||||||
|
if not isinstance(msg, dict):
|
||||||
|
return 0
|
||||||
|
return msg.get("create_time") or 0
|
||||||
|
|
||||||
|
sorted_nodes = sorted(valid_nodes, key=_node_ts)
|
||||||
|
for node in sorted_nodes:
|
||||||
|
msg = node.get("message")
|
||||||
|
if not msg or not isinstance(msg, dict):
|
||||||
|
continue
|
||||||
|
content_obj = msg.get("content", {})
|
||||||
|
content_parts = content_obj.get("parts", []) if isinstance(content_obj, dict) else []
|
||||||
|
content = " ".join(str(p) for p in content_parts if p)
|
||||||
|
if not content.strip():
|
||||||
|
continue
|
||||||
|
role = msg.get("author", {}).get("role", "user")
|
||||||
|
ts = msg.get("create_time", "")
|
||||||
|
if isinstance(ts, (int, float)):
|
||||||
|
ts = datetime.fromtimestamp(ts).isoformat()
|
||||||
|
turns.append({"role": role, "content": content.strip(), "timestamp": str(ts)})
|
||||||
|
else:
|
||||||
|
# Simpler format: list of messages
|
||||||
|
messages = conv.get("messages", [])
|
||||||
|
for msg in messages:
|
||||||
|
if not isinstance(msg, dict):
|
||||||
|
continue
|
||||||
|
content = msg.get("content", msg.get("text", ""))
|
||||||
|
if isinstance(content, dict):
|
||||||
|
content = " ".join(str(p) for p in content.get("parts", []))
|
||||||
|
if not content or not content.strip():
|
||||||
|
continue
|
||||||
|
role = msg.get("role", msg.get("author", {}).get("role", "user"))
|
||||||
|
ts = msg.get("timestamp", msg.get("create_time", ""))
|
||||||
|
turns.append({"role": role, "content": content.strip(), "timestamp": str(ts)})
|
||||||
|
return turns
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_markdown(text: str) -> list[dict]:
|
||||||
|
"""Parse Markdown/plain text → [{role, content, timestamp}, ...]"""
|
||||||
|
# Try to detect conversation patterns
|
||||||
|
lines = text.split("\n")
|
||||||
|
turns = []
|
||||||
|
current_role = "user"
|
||||||
|
current_content = []
|
||||||
|
|
||||||
|
for line in lines:
|
||||||
|
stripped = line.strip()
|
||||||
|
# Detect role switches
|
||||||
|
if stripped.lower().startswith(("human:", "user:", "你:", "我:")):
|
||||||
|
if current_content:
|
||||||
|
turns.append({"role": current_role, "content": "\n".join(current_content).strip(), "timestamp": ""})
|
||||||
|
current_role = "user"
|
||||||
|
content_after = stripped.split(":", 1)[1].strip() if ":" in stripped else ""
|
||||||
|
current_content = [content_after] if content_after else []
|
||||||
|
elif stripped.lower().startswith(("assistant:", "claude:", "ai:", "gpt:", "bot:", "deepseek:")):
|
||||||
|
if current_content:
|
||||||
|
turns.append({"role": current_role, "content": "\n".join(current_content).strip(), "timestamp": ""})
|
||||||
|
current_role = "assistant"
|
||||||
|
content_after = stripped.split(":", 1)[1].strip() if ":" in stripped else ""
|
||||||
|
current_content = [content_after] if content_after else []
|
||||||
|
else:
|
||||||
|
current_content.append(line)
|
||||||
|
|
||||||
|
if current_content:
|
||||||
|
content = "\n".join(current_content).strip()
|
||||||
|
if content:
|
||||||
|
turns.append({"role": current_role, "content": content, "timestamp": ""})
|
||||||
|
|
||||||
|
# If no role patterns detected, treat entire text as one big chunk
|
||||||
|
if not turns:
|
||||||
|
turns = [{"role": "user", "content": text.strip(), "timestamp": ""}]
|
||||||
|
|
||||||
|
return turns
|
||||||
|
|
||||||
|
|
||||||
|
def detect_and_parse(raw_content: str, filename: str = "") -> list[dict]:
|
||||||
|
"""
|
||||||
|
Auto-detect format and parse to normalized turns.
|
||||||
|
自动检测格式并解析为标准化的对话轮次。
|
||||||
|
"""
|
||||||
|
ext = Path(filename).suffix.lower() if filename else ""
|
||||||
|
|
||||||
|
# Try JSON first
|
||||||
|
if ext in (".json", "") or raw_content.strip().startswith(("{", "[")):
|
||||||
|
try:
|
||||||
|
data = json.loads(raw_content)
|
||||||
|
# Detect Claude vs ChatGPT format
|
||||||
|
if isinstance(data, list):
|
||||||
|
sample = data[0] if data else {}
|
||||||
|
else:
|
||||||
|
sample = data
|
||||||
|
|
||||||
|
if isinstance(sample, dict):
|
||||||
|
if "chat_messages" in sample:
|
||||||
|
return _parse_claude_json(data)
|
||||||
|
if "mapping" in sample:
|
||||||
|
return _parse_chatgpt_json(data)
|
||||||
|
if "messages" in sample:
|
||||||
|
# Could be either — try ChatGPT first, fall back to Claude
|
||||||
|
msgs = sample["messages"]
|
||||||
|
if msgs and isinstance(msgs[0], dict) and "content" in msgs[0]:
|
||||||
|
if isinstance(msgs[0]["content"], dict):
|
||||||
|
return _parse_chatgpt_json(data)
|
||||||
|
return _parse_claude_json(data)
|
||||||
|
# Single conversation object with role/content messages
|
||||||
|
if "role" in sample and "content" in sample:
|
||||||
|
return _parse_claude_json(data)
|
||||||
|
except (json.JSONDecodeError, KeyError, IndexError, AttributeError, TypeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Fall back to markdown/text
|
||||||
|
return _parse_markdown(raw_content)
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# Chunking — split turns into ~10k token windows
|
||||||
|
# 分窗 — 按对话轮次边界切为 ~10k token 窗口
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
def chunk_turns(turns: list[dict], target_tokens: int = 10000) -> list[dict]:
|
||||||
|
"""
|
||||||
|
Group conversation turns into chunks of ~target_tokens.
|
||||||
|
Returns list of {content, timestamp_start, timestamp_end, turn_count}.
|
||||||
|
按对话轮次边界将对话分为 ~target_tokens 大小的窗口。
|
||||||
|
"""
|
||||||
|
chunks = []
|
||||||
|
current_lines = []
|
||||||
|
current_tokens = 0
|
||||||
|
first_ts = ""
|
||||||
|
last_ts = ""
|
||||||
|
turn_count = 0
|
||||||
|
|
||||||
|
for turn in turns:
|
||||||
|
role_label = "用户" if turn["role"] in ("user", "human") else "AI"
|
||||||
|
line = f"[{role_label}] {turn['content']}"
|
||||||
|
line_tokens = count_tokens_approx(line)
|
||||||
|
|
||||||
|
# If single turn exceeds target, split it
|
||||||
|
if line_tokens > target_tokens * 1.5:
|
||||||
|
# Flush current
|
||||||
|
if current_lines:
|
||||||
|
chunks.append({
|
||||||
|
"content": "\n".join(current_lines),
|
||||||
|
"timestamp_start": first_ts,
|
||||||
|
"timestamp_end": last_ts,
|
||||||
|
"turn_count": turn_count,
|
||||||
|
})
|
||||||
|
current_lines = []
|
||||||
|
current_tokens = 0
|
||||||
|
turn_count = 0
|
||||||
|
first_ts = ""
|
||||||
|
|
||||||
|
# Add oversized turn as its own chunk
|
||||||
|
chunks.append({
|
||||||
|
"content": line,
|
||||||
|
"timestamp_start": turn.get("timestamp", ""),
|
||||||
|
"timestamp_end": turn.get("timestamp", ""),
|
||||||
|
"turn_count": 1,
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
|
||||||
|
if current_tokens + line_tokens > target_tokens and current_lines:
|
||||||
|
chunks.append({
|
||||||
|
"content": "\n".join(current_lines),
|
||||||
|
"timestamp_start": first_ts,
|
||||||
|
"timestamp_end": last_ts,
|
||||||
|
"turn_count": turn_count,
|
||||||
|
})
|
||||||
|
current_lines = []
|
||||||
|
current_tokens = 0
|
||||||
|
turn_count = 0
|
||||||
|
first_ts = ""
|
||||||
|
|
||||||
|
if not first_ts:
|
||||||
|
first_ts = turn.get("timestamp", "")
|
||||||
|
last_ts = turn.get("timestamp", "")
|
||||||
|
current_lines.append(line)
|
||||||
|
current_tokens += line_tokens
|
||||||
|
turn_count += 1
|
||||||
|
|
||||||
|
if current_lines:
|
||||||
|
chunks.append({
|
||||||
|
"content": "\n".join(current_lines),
|
||||||
|
"timestamp_start": first_ts,
|
||||||
|
"timestamp_end": last_ts,
|
||||||
|
"turn_count": turn_count,
|
||||||
|
})
|
||||||
|
|
||||||
|
return chunks
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# Import State — persistent progress tracking
|
||||||
|
# 导入状态 — 持久化进度追踪
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
class ImportState:
|
||||||
|
"""Manages import progress with file-based persistence."""
|
||||||
|
|
||||||
|
def __init__(self, state_dir: str):
|
||||||
|
self.state_file = os.path.join(state_dir, "import_state.json")
|
||||||
|
self.data = {
|
||||||
|
"source_file": "",
|
||||||
|
"source_hash": "",
|
||||||
|
"total_chunks": 0,
|
||||||
|
"processed": 0,
|
||||||
|
"api_calls": 0,
|
||||||
|
"memories_created": 0,
|
||||||
|
"memories_merged": 0,
|
||||||
|
"memories_raw": 0,
|
||||||
|
"errors": [],
|
||||||
|
"status": "idle", # idle | running | paused | completed | error
|
||||||
|
"started_at": "",
|
||||||
|
"updated_at": "",
|
||||||
|
}
|
||||||
|
|
||||||
|
def load(self) -> bool:
|
||||||
|
"""Load state from file. Returns True if state exists."""
|
||||||
|
if os.path.exists(self.state_file):
|
||||||
|
try:
|
||||||
|
with open(self.state_file, "r", encoding="utf-8") as f:
|
||||||
|
saved = json.load(f)
|
||||||
|
self.data.update(saved)
|
||||||
|
return True
|
||||||
|
except (json.JSONDecodeError, OSError):
|
||||||
|
return False
|
||||||
|
return False
|
||||||
|
|
||||||
|
def save(self):
|
||||||
|
"""Persist state to file."""
|
||||||
|
self.data["updated_at"] = now_iso()
|
||||||
|
os.makedirs(os.path.dirname(self.state_file), exist_ok=True)
|
||||||
|
tmp = self.state_file + ".tmp"
|
||||||
|
with open(tmp, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(self.data, f, ensure_ascii=False, indent=2)
|
||||||
|
os.replace(tmp, self.state_file)
|
||||||
|
|
||||||
|
def reset(self, source_file: str, source_hash: str, total_chunks: int):
|
||||||
|
"""Reset state for a new import."""
|
||||||
|
self.data = {
|
||||||
|
"source_file": source_file,
|
||||||
|
"source_hash": source_hash,
|
||||||
|
"total_chunks": total_chunks,
|
||||||
|
"processed": 0,
|
||||||
|
"api_calls": 0,
|
||||||
|
"memories_created": 0,
|
||||||
|
"memories_merged": 0,
|
||||||
|
"memories_raw": 0,
|
||||||
|
"errors": [],
|
||||||
|
"status": "running",
|
||||||
|
"started_at": now_iso(),
|
||||||
|
"updated_at": now_iso(),
|
||||||
|
}
|
||||||
|
|
||||||
|
@property
|
||||||
|
def can_resume(self) -> bool:
|
||||||
|
return self.data["status"] in ("paused", "running") and self.data["processed"] < self.data["total_chunks"]
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
return dict(self.data)
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# Import extraction prompt
|
||||||
|
# 导入提取提示词
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
IMPORT_EXTRACT_PROMPT = """你是一个对话记忆提取专家。从以下对话片段中提取值得长期记住的信息。
|
||||||
|
|
||||||
|
提取规则:
|
||||||
|
1. 提取用户的事实、偏好、习惯、重要事件、情感时刻
|
||||||
|
2. 同一话题的零散信息整合为一条记忆
|
||||||
|
3. 过滤掉纯技术调试输出、代码块、重复问答、无意义寒暄
|
||||||
|
4. 如果对话中有特殊暗号、仪式性行为、关键承诺等,标记 preserve_raw=true
|
||||||
|
5. 如果内容是用户和AI之间的习惯性互动模式(例如打招呼方式、告别习惯),标记 is_pattern=true
|
||||||
|
6. 每条记忆不少于30字
|
||||||
|
7. 总条目数控制在 0~5 个(没有值得记的就返回空数组)
|
||||||
|
8. 在 content 中对人名、地名、专有名词用 [[双链]] 标记
|
||||||
|
|
||||||
|
输出格式(纯 JSON 数组,无其他内容):
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"name": "条目标题(10字以内)",
|
||||||
|
"content": "整理后的内容",
|
||||||
|
"domain": ["主题域1"],
|
||||||
|
"valence": 0.7,
|
||||||
|
"arousal": 0.4,
|
||||||
|
"tags": ["核心词1", "核心词2", "扩展词1"],
|
||||||
|
"importance": 5,
|
||||||
|
"preserve_raw": false,
|
||||||
|
"is_pattern": false
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
主题域可选(选 1~2 个):
|
||||||
|
日常: ["饮食", "穿搭", "出行", "居家", "购物"]
|
||||||
|
人际: ["家庭", "恋爱", "友谊", "社交"]
|
||||||
|
成长: ["工作", "学习", "考试", "求职"]
|
||||||
|
身心: ["健康", "心理", "睡眠", "运动"]
|
||||||
|
兴趣: ["游戏", "影视", "音乐", "阅读", "创作", "手工"]
|
||||||
|
数字: ["编程", "AI", "硬件", "网络"]
|
||||||
|
事务: ["财务", "计划", "待办"]
|
||||||
|
内心: ["情绪", "回忆", "梦境", "自省"]
|
||||||
|
|
||||||
|
importance: 1-10
|
||||||
|
valence: 0~1(0=消极, 0.5=中性, 1=积极)
|
||||||
|
arousal: 0~1(0=平静, 0.5=普通, 1=激动)
|
||||||
|
preserve_raw: true = 特殊情境/暗号/仪式,保留原文不摘要
|
||||||
|
is_pattern: true = 反复出现的习惯性行为模式"""
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# Import Engine — core processing logic
|
||||||
|
# 导入引擎 — 核心处理逻辑
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
class ImportEngine:
|
||||||
|
"""
|
||||||
|
Processes conversation history files into OB memory buckets.
|
||||||
|
将对话历史文件处理为 OB 记忆桶。
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, config: dict, bucket_mgr, dehydrator, embedding_engine=None):
|
||||||
|
self.config = config
|
||||||
|
self.bucket_mgr = bucket_mgr
|
||||||
|
self.dehydrator = dehydrator
|
||||||
|
self.embedding_engine = embedding_engine
|
||||||
|
self.state = ImportState(config["buckets_dir"])
|
||||||
|
self._paused = False
|
||||||
|
self._running = False
|
||||||
|
self._chunks: list[dict] = []
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_running(self) -> bool:
|
||||||
|
return self._running
|
||||||
|
|
||||||
|
def pause(self):
|
||||||
|
"""Request pause — will stop after current chunk finishes."""
|
||||||
|
self._paused = True
|
||||||
|
|
||||||
|
def get_status(self) -> dict:
|
||||||
|
"""Get current import status."""
|
||||||
|
return self.state.to_dict()
|
||||||
|
|
||||||
|
async def start(
|
||||||
|
self,
|
||||||
|
raw_content: str,
|
||||||
|
filename: str = "",
|
||||||
|
preserve_raw: bool = False,
|
||||||
|
resume: bool = False,
|
||||||
|
) -> dict:
|
||||||
|
"""
|
||||||
|
Start or resume an import.
|
||||||
|
开始或恢复导入。
|
||||||
|
"""
|
||||||
|
if self._running:
|
||||||
|
return {"error": "Import already running"}
|
||||||
|
|
||||||
|
self._running = True
|
||||||
|
self._paused = False
|
||||||
|
|
||||||
|
try:
|
||||||
|
source_hash = hashlib.sha256(raw_content.encode()).hexdigest()[:16]
|
||||||
|
|
||||||
|
# Check for resume
|
||||||
|
if resume and self.state.load() and self.state.can_resume:
|
||||||
|
if self.state.data["source_hash"] == source_hash:
|
||||||
|
logger.info(f"Resuming import from chunk {self.state.data['processed']}/{self.state.data['total_chunks']}")
|
||||||
|
# Re-parse and re-chunk to get the same chunks
|
||||||
|
turns = detect_and_parse(raw_content, filename)
|
||||||
|
self._chunks = chunk_turns(turns)
|
||||||
|
self.state.data["status"] = "running"
|
||||||
|
self.state.save()
|
||||||
|
return await self._process_chunks(preserve_raw)
|
||||||
|
else:
|
||||||
|
logger.warning("Source file changed, starting fresh import")
|
||||||
|
|
||||||
|
# Fresh import
|
||||||
|
turns = detect_and_parse(raw_content, filename)
|
||||||
|
if not turns:
|
||||||
|
self._running = False
|
||||||
|
return {"error": "No conversation turns found in file"}
|
||||||
|
|
||||||
|
self._chunks = chunk_turns(turns)
|
||||||
|
if not self._chunks:
|
||||||
|
self._running = False
|
||||||
|
return {"error": "No processable chunks after splitting"}
|
||||||
|
|
||||||
|
self.state.reset(filename, source_hash, len(self._chunks))
|
||||||
|
self.state.save()
|
||||||
|
|
||||||
|
logger.info(f"Starting import: {len(turns)} turns → {len(self._chunks)} chunks")
|
||||||
|
return await self._process_chunks(preserve_raw)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.state.data["status"] = "error"
|
||||||
|
self.state.data["errors"].append(str(e))
|
||||||
|
self.state.save()
|
||||||
|
self._running = False
|
||||||
|
raise
|
||||||
|
|
||||||
|
async def _process_chunks(self, preserve_raw: bool) -> dict:
|
||||||
|
"""Process chunks from current position."""
|
||||||
|
start_idx = self.state.data["processed"]
|
||||||
|
|
||||||
|
for i in range(start_idx, len(self._chunks)):
|
||||||
|
if self._paused:
|
||||||
|
self.state.data["status"] = "paused"
|
||||||
|
self.state.save()
|
||||||
|
self._running = False
|
||||||
|
logger.info(f"Import paused at chunk {i}/{len(self._chunks)}")
|
||||||
|
return self.state.to_dict()
|
||||||
|
|
||||||
|
chunk = self._chunks[i]
|
||||||
|
try:
|
||||||
|
await self._process_single_chunk(chunk, preserve_raw)
|
||||||
|
except Exception as e:
|
||||||
|
err_msg = f"Chunk {i}: {str(e)[:200]}"
|
||||||
|
logger.warning(f"Import chunk error: {err_msg}")
|
||||||
|
if len(self.state.data["errors"]) < 100:
|
||||||
|
self.state.data["errors"].append(err_msg)
|
||||||
|
|
||||||
|
self.state.data["processed"] = i + 1
|
||||||
|
# Save progress every chunk
|
||||||
|
self.state.save()
|
||||||
|
|
||||||
|
self.state.data["status"] = "completed"
|
||||||
|
self.state.save()
|
||||||
|
self._running = False
|
||||||
|
logger.info(f"Import completed: {self.state.data['memories_created']} created, {self.state.data['memories_merged']} merged")
|
||||||
|
return self.state.to_dict()
|
||||||
|
|
||||||
|
async def _process_single_chunk(self, chunk: dict, preserve_raw: bool):
|
||||||
|
"""Extract memories from a single chunk and store them."""
|
||||||
|
content = chunk["content"]
|
||||||
|
if not content.strip():
|
||||||
|
return
|
||||||
|
|
||||||
|
# --- LLM extraction ---
|
||||||
|
try:
|
||||||
|
items = await self._extract_memories(content)
|
||||||
|
self.state.data["api_calls"] += 1
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"LLM extraction failed: {e}")
|
||||||
|
self.state.data["api_calls"] += 1
|
||||||
|
return
|
||||||
|
|
||||||
|
if not items:
|
||||||
|
return
|
||||||
|
|
||||||
|
# --- Store each extracted memory ---
|
||||||
|
for item in items:
|
||||||
|
try:
|
||||||
|
should_preserve = preserve_raw or item.get("preserve_raw", False)
|
||||||
|
|
||||||
|
if should_preserve:
|
||||||
|
# Raw mode: store original content without summarization
|
||||||
|
bucket_id = await self.bucket_mgr.create(
|
||||||
|
content=item["content"],
|
||||||
|
tags=item.get("tags", []),
|
||||||
|
importance=item.get("importance", 5),
|
||||||
|
domain=item.get("domain", ["未分类"]),
|
||||||
|
valence=item.get("valence", 0.5),
|
||||||
|
arousal=item.get("arousal", 0.3),
|
||||||
|
name=item.get("name"),
|
||||||
|
)
|
||||||
|
if self.embedding_engine:
|
||||||
|
try:
|
||||||
|
await self.embedding_engine.generate_and_store(bucket_id, item["content"])
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
self.state.data["memories_raw"] += 1
|
||||||
|
self.state.data["memories_created"] += 1
|
||||||
|
else:
|
||||||
|
# Normal mode: go through merge-or-create pipeline
|
||||||
|
is_merged = await self._merge_or_create_item(item)
|
||||||
|
if is_merged:
|
||||||
|
self.state.data["memories_merged"] += 1
|
||||||
|
else:
|
||||||
|
self.state.data["memories_created"] += 1
|
||||||
|
|
||||||
|
# Patch timestamp if available
|
||||||
|
if chunk.get("timestamp_start"):
|
||||||
|
# We don't have update support for created, so skip
|
||||||
|
pass
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to store memory: {item.get('name', '?')}: {e}")
|
||||||
|
|
||||||
|
async def _extract_memories(self, chunk_content: str) -> list[dict]:
|
||||||
|
"""Use LLM to extract memories from a conversation chunk."""
|
||||||
|
if not self.dehydrator.api_available:
|
||||||
|
raise RuntimeError("API not available")
|
||||||
|
|
||||||
|
response = await self.dehydrator.client.chat.completions.create(
|
||||||
|
model=self.dehydrator.model,
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": IMPORT_EXTRACT_PROMPT},
|
||||||
|
{"role": "user", "content": chunk_content[:12000]},
|
||||||
|
],
|
||||||
|
max_tokens=2048,
|
||||||
|
temperature=0.0,
|
||||||
|
)
|
||||||
|
|
||||||
|
if not response.choices:
|
||||||
|
return []
|
||||||
|
|
||||||
|
raw = response.choices[0].message.content or ""
|
||||||
|
if not raw.strip():
|
||||||
|
return []
|
||||||
|
|
||||||
|
return self._parse_extraction(raw)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _parse_extraction(raw: str) -> list[dict]:
|
||||||
|
"""Parse and validate LLM extraction result."""
|
||||||
|
try:
|
||||||
|
cleaned = raw.strip()
|
||||||
|
if cleaned.startswith("```"):
|
||||||
|
cleaned = cleaned.split("\n", 1)[-1].rsplit("```", 1)[0]
|
||||||
|
items = json.loads(cleaned)
|
||||||
|
except (json.JSONDecodeError, IndexError, ValueError):
|
||||||
|
logger.warning(f"Import extraction JSON parse failed: {raw[:200]}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
if not isinstance(items, list):
|
||||||
|
return []
|
||||||
|
|
||||||
|
validated = []
|
||||||
|
for item in items:
|
||||||
|
if not isinstance(item, dict) or not item.get("content"):
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
importance = max(1, min(10, int(item.get("importance", 5))))
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
importance = 5
|
||||||
|
try:
|
||||||
|
valence = max(0.0, min(1.0, float(item.get("valence", 0.5))))
|
||||||
|
arousal = max(0.0, min(1.0, float(item.get("arousal", 0.3))))
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
valence, arousal = 0.5, 0.3
|
||||||
|
|
||||||
|
validated.append({
|
||||||
|
"name": str(item.get("name", ""))[:20],
|
||||||
|
"content": str(item["content"]),
|
||||||
|
"domain": item.get("domain", ["未分类"])[:3],
|
||||||
|
"valence": valence,
|
||||||
|
"arousal": arousal,
|
||||||
|
"tags": [str(t) for t in item.get("tags", [])][:10],
|
||||||
|
"importance": importance,
|
||||||
|
"preserve_raw": bool(item.get("preserve_raw", False)),
|
||||||
|
"is_pattern": bool(item.get("is_pattern", False)),
|
||||||
|
})
|
||||||
|
|
||||||
|
return validated
|
||||||
|
|
||||||
|
async def _merge_or_create_item(self, item: dict) -> bool:
|
||||||
|
"""Try to merge with existing bucket, or create new. Returns is_merged."""
|
||||||
|
content = item["content"]
|
||||||
|
domain = item.get("domain", ["未分类"])
|
||||||
|
tags = item.get("tags", [])
|
||||||
|
importance = item.get("importance", 5)
|
||||||
|
valence = item.get("valence", 0.5)
|
||||||
|
arousal = item.get("arousal", 0.3)
|
||||||
|
name = item.get("name", "")
|
||||||
|
|
||||||
|
try:
|
||||||
|
existing = await self.bucket_mgr.search(content, limit=1, domain_filter=domain or None)
|
||||||
|
except Exception:
|
||||||
|
existing = []
|
||||||
|
|
||||||
|
merge_threshold = self.config.get("merge_threshold", 75)
|
||||||
|
|
||||||
|
if existing and existing[0].get("score", 0) > merge_threshold:
|
||||||
|
bucket = existing[0]
|
||||||
|
if not (bucket["metadata"].get("pinned") or bucket["metadata"].get("protected")):
|
||||||
|
try:
|
||||||
|
merged = await self.dehydrator.merge(bucket["content"], content)
|
||||||
|
self.state.data["api_calls"] += 1
|
||||||
|
old_v = bucket["metadata"].get("valence", 0.5)
|
||||||
|
old_a = bucket["metadata"].get("arousal", 0.3)
|
||||||
|
await self.bucket_mgr.update(
|
||||||
|
bucket["id"],
|
||||||
|
content=merged,
|
||||||
|
tags=list(set(bucket["metadata"].get("tags", []) + tags)),
|
||||||
|
importance=max(bucket["metadata"].get("importance", 5), importance),
|
||||||
|
domain=list(set(bucket["metadata"].get("domain", []) + domain)),
|
||||||
|
valence=round((old_v + valence) / 2, 2),
|
||||||
|
arousal=round((old_a + arousal) / 2, 2),
|
||||||
|
)
|
||||||
|
if self.embedding_engine:
|
||||||
|
try:
|
||||||
|
await self.embedding_engine.generate_and_store(bucket["id"], merged)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Merge failed during import: {e}")
|
||||||
|
self.state.data["api_calls"] += 1
|
||||||
|
|
||||||
|
# Create new
|
||||||
|
bucket_id = await self.bucket_mgr.create(
|
||||||
|
content=content,
|
||||||
|
tags=tags,
|
||||||
|
importance=importance,
|
||||||
|
domain=domain,
|
||||||
|
valence=valence,
|
||||||
|
arousal=arousal,
|
||||||
|
name=name or None,
|
||||||
|
)
|
||||||
|
if self.embedding_engine:
|
||||||
|
try:
|
||||||
|
await self.embedding_engine.generate_and_store(bucket_id, content)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
return False
|
||||||
|
|
||||||
|
async def detect_patterns(self) -> list[dict]:
|
||||||
|
"""
|
||||||
|
Post-import: detect high-frequency patterns via embedding clustering.
|
||||||
|
导入后:通过 embedding 聚类检测高频模式。
|
||||||
|
Returns list of {pattern_content, count, bucket_ids, suggested_action}.
|
||||||
|
"""
|
||||||
|
if not self.embedding_engine:
|
||||||
|
return []
|
||||||
|
|
||||||
|
all_buckets = await self.bucket_mgr.list_all(include_archive=False)
|
||||||
|
dynamic_buckets = [
|
||||||
|
b for b in all_buckets
|
||||||
|
if b["metadata"].get("type") == "dynamic"
|
||||||
|
and not b["metadata"].get("pinned")
|
||||||
|
and not b["metadata"].get("resolved")
|
||||||
|
]
|
||||||
|
|
||||||
|
if len(dynamic_buckets) < 5:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Get embeddings
|
||||||
|
embeddings = {}
|
||||||
|
for b in dynamic_buckets:
|
||||||
|
emb = await self.embedding_engine.get_embedding(b["id"])
|
||||||
|
if emb is not None:
|
||||||
|
embeddings[b["id"]] = emb
|
||||||
|
|
||||||
|
if len(embeddings) < 5:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Find clusters: group by pairwise similarity > 0.7
|
||||||
|
import numpy as np
|
||||||
|
ids = list(embeddings.keys())
|
||||||
|
clusters: dict[str, list[str]] = {}
|
||||||
|
visited = set()
|
||||||
|
|
||||||
|
for i, id_a in enumerate(ids):
|
||||||
|
if id_a in visited:
|
||||||
|
continue
|
||||||
|
cluster = [id_a]
|
||||||
|
visited.add(id_a)
|
||||||
|
emb_a = np.array(embeddings[id_a])
|
||||||
|
norm_a = np.linalg.norm(emb_a)
|
||||||
|
if norm_a == 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
for j in range(i + 1, len(ids)):
|
||||||
|
id_b = ids[j]
|
||||||
|
if id_b in visited:
|
||||||
|
continue
|
||||||
|
emb_b = np.array(embeddings[id_b])
|
||||||
|
norm_b = np.linalg.norm(emb_b)
|
||||||
|
if norm_b == 0:
|
||||||
|
continue
|
||||||
|
sim = float(np.dot(emb_a, emb_b) / (norm_a * norm_b))
|
||||||
|
if sim > 0.7:
|
||||||
|
cluster.append(id_b)
|
||||||
|
visited.add(id_b)
|
||||||
|
|
||||||
|
if len(cluster) >= 3:
|
||||||
|
clusters[id_a] = cluster
|
||||||
|
|
||||||
|
# Format results
|
||||||
|
patterns = []
|
||||||
|
for lead_id, cluster_ids in clusters.items():
|
||||||
|
lead_bucket = next((b for b in dynamic_buckets if b["id"] == lead_id), None)
|
||||||
|
if not lead_bucket:
|
||||||
|
continue
|
||||||
|
patterns.append({
|
||||||
|
"pattern_content": lead_bucket["content"][:200],
|
||||||
|
"pattern_name": lead_bucket["metadata"].get("name", lead_id),
|
||||||
|
"count": len(cluster_ids),
|
||||||
|
"bucket_ids": cluster_ids,
|
||||||
|
"suggested_action": "pin" if len(cluster_ids) >= 5 else "review",
|
||||||
|
})
|
||||||
|
|
||||||
|
patterns.sort(key=lambda p: p["count"], reverse=True)
|
||||||
|
return patterns[:20]
|
||||||
@@ -12,7 +12,25 @@ import os
|
|||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
VAULT_DIR = os.path.expanduser("~/Documents/Obsidian Vault/Ombre Brain")
|
|
||||||
|
def _resolve_vault_dir() -> str:
|
||||||
|
"""
|
||||||
|
Resolve the bucket vault root.
|
||||||
|
Priority: $OMBRE_BUCKETS_DIR > config.yaml > built-in ./buckets.
|
||||||
|
"""
|
||||||
|
env_dir = os.environ.get("OMBRE_BUCKETS_DIR", "").strip()
|
||||||
|
if env_dir:
|
||||||
|
return os.path.expanduser(env_dir)
|
||||||
|
try:
|
||||||
|
from utils import load_config
|
||||||
|
return load_config()["buckets_dir"]
|
||||||
|
except Exception:
|
||||||
|
return os.path.join(
|
||||||
|
os.path.dirname(os.path.abspath(__file__)), "buckets"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
VAULT_DIR = _resolve_vault_dir()
|
||||||
DYNAMIC_DIR = os.path.join(VAULT_DIR, "dynamic")
|
DYNAMIC_DIR = os.path.join(VAULT_DIR, "dynamic")
|
||||||
|
|
||||||
|
|
||||||
@@ -99,7 +117,7 @@ def migrate():
|
|||||||
print(f" ✓ {filename}")
|
print(f" ✓ {filename}")
|
||||||
print(f" → {primary_domain}/{new_filename}")
|
print(f" → {primary_domain}/{new_filename}")
|
||||||
|
|
||||||
print(f"\n迁移完成。")
|
print("\n迁移完成。")
|
||||||
|
|
||||||
# 展示新结构
|
# 展示新结构
|
||||||
print("\n=== 新目录结构 ===")
|
print("\n=== 新目录结构 ===")
|
||||||
|
|||||||
@@ -38,7 +38,11 @@ ANALYZE_PROMPT = (
|
|||||||
'}'
|
'}'
|
||||||
)
|
)
|
||||||
|
|
||||||
DATA_DIR = "/data/dynamic"
|
DATA_DIR = os.path.join(
|
||||||
|
os.environ.get("OMBRE_BUCKETS_DIR", "").strip()
|
||||||
|
or (lambda: __import__("utils").load_config()["buckets_dir"])(),
|
||||||
|
"dynamic",
|
||||||
|
)
|
||||||
UNCLASS_DIR = os.path.join(DATA_DIR, "未分类")
|
UNCLASS_DIR = os.path.join(DATA_DIR, "未分类")
|
||||||
|
|
||||||
|
|
||||||
@@ -48,11 +52,15 @@ def sanitize(name):
|
|||||||
|
|
||||||
|
|
||||||
async def reclassify():
|
async def reclassify():
|
||||||
|
from utils import load_config
|
||||||
|
cfg = load_config()
|
||||||
|
dehy = cfg.get("dehydration", {})
|
||||||
client = AsyncOpenAI(
|
client = AsyncOpenAI(
|
||||||
api_key=os.environ.get("OMBRE_API_KEY", ""),
|
api_key=os.environ.get("OMBRE_API_KEY", "") or dehy.get("api_key", ""),
|
||||||
base_url="https://api.siliconflow.cn/v1",
|
base_url=dehy.get("base_url", "https://api.deepseek.com/v1"),
|
||||||
timeout=60.0,
|
timeout=60.0,
|
||||||
)
|
)
|
||||||
|
model_name = dehy.get("model", "deepseek-chat")
|
||||||
|
|
||||||
files = sorted(glob.glob(os.path.join(UNCLASS_DIR, "*.md")))
|
files = sorted(glob.glob(os.path.join(UNCLASS_DIR, "*.md")))
|
||||||
print(f"找到 {len(files)} 个未分类文件\n")
|
print(f"找到 {len(files)} 个未分类文件\n")
|
||||||
@@ -66,7 +74,7 @@ async def reclassify():
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
resp = await client.chat.completions.create(
|
resp = await client.chat.completions.create(
|
||||||
model="deepseek-ai/DeepSeek-V3",
|
model=model_name,
|
||||||
messages=[
|
messages=[
|
||||||
{"role": "system", "content": ANALYZE_PROMPT},
|
{"role": "system", "content": ANALYZE_PROMPT},
|
||||||
{"role": "user", "content": full_text[:2000]},
|
{"role": "user", "content": full_text[:2000]},
|
||||||
|
|||||||
@@ -8,7 +8,25 @@ import os
|
|||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
|
|
||||||
VAULT_DIR = os.path.expanduser("~/Documents/Obsidian Vault/Ombre Brain")
|
|
||||||
|
def _resolve_vault_dir() -> str:
|
||||||
|
"""
|
||||||
|
Resolve the bucket vault root.
|
||||||
|
Priority: $OMBRE_BUCKETS_DIR > config.yaml > built-in ./buckets.
|
||||||
|
"""
|
||||||
|
env_dir = os.environ.get("OMBRE_BUCKETS_DIR", "").strip()
|
||||||
|
if env_dir:
|
||||||
|
return os.path.expanduser(env_dir)
|
||||||
|
try:
|
||||||
|
from utils import load_config
|
||||||
|
return load_config()["buckets_dir"]
|
||||||
|
except Exception:
|
||||||
|
return os.path.join(
|
||||||
|
os.path.dirname(os.path.abspath(__file__)), "buckets"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
VAULT_DIR = _resolve_vault_dir()
|
||||||
DYNAMIC_DIR = os.path.join(VAULT_DIR, "dynamic")
|
DYNAMIC_DIR = os.path.join(VAULT_DIR, "dynamic")
|
||||||
|
|
||||||
# 新域关键词表(和 dehydrator.py 的 _local_analyze 一致)
|
# 新域关键词表(和 dehydrator.py 的 _local_analyze 一致)
|
||||||
@@ -147,7 +165,6 @@ def reclassify():
|
|||||||
new_domains = classify(body, old_domains)
|
new_domains = classify(body, old_domains)
|
||||||
|
|
||||||
primary = sanitize_name(new_domains[0])
|
primary = sanitize_name(new_domains[0])
|
||||||
old_primary = sanitize_name(old_domains[0]) if old_domains else "未分类"
|
|
||||||
|
|
||||||
if name and name != bucket_id:
|
if name and name != bucket_id:
|
||||||
new_filename = f"{sanitize_name(name)}_{bucket_id}.md"
|
new_filename = f"{sanitize_name(name)}_{bucket_id}.md"
|
||||||
@@ -179,7 +196,7 @@ def reclassify():
|
|||||||
os.rmdir(dp)
|
os.rmdir(dp)
|
||||||
print(f"\n 🗑 删除空目录: {d}/")
|
print(f"\n 🗑 删除空目录: {d}/")
|
||||||
|
|
||||||
print(f"\n重分类完成。\n")
|
print("\n重分类完成。\n")
|
||||||
|
|
||||||
# 展示新结构
|
# 展示新结构
|
||||||
print("=== 新目录结构 ===")
|
print("=== 新目录结构 ===")
|
||||||
|
|||||||
@@ -23,3 +23,7 @@ jieba>=0.42.1
|
|||||||
|
|
||||||
# 异步 HTTP 客户端(应用层保活 ping)
|
# 异步 HTTP 客户端(应用层保活 ping)
|
||||||
httpx>=0.27.0
|
httpx>=0.27.0
|
||||||
|
|
||||||
|
# 向量相似度计算 (导入模式/聚类)
|
||||||
|
numpy>=1.24.0
|
||||||
|
scikit-learn>=1.2.0
|
||||||
|
|||||||
126
test_smoke.py
126
test_smoke.py
@@ -1,126 +0,0 @@
|
|||||||
"""Ombre Brain 冒烟测试:验证核心功能链路"""
|
|
||||||
import asyncio
|
|
||||||
import os
|
|
||||||
|
|
||||||
# 确保模块路径
|
|
||||||
import sys
|
|
||||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
||||||
|
|
||||||
from utils import load_config, setup_logging
|
|
||||||
from bucket_manager import BucketManager
|
|
||||||
from dehydrator import Dehydrator
|
|
||||||
from decay_engine import DecayEngine
|
|
||||||
|
|
||||||
|
|
||||||
async def main():
|
|
||||||
config = load_config()
|
|
||||||
setup_logging("INFO")
|
|
||||||
bm = BucketManager(config)
|
|
||||||
dh = Dehydrator(config)
|
|
||||||
de = DecayEngine(config, bm)
|
|
||||||
|
|
||||||
print(f"API available: {dh.api_available}")
|
|
||||||
print(f"base_url: {dh.base_url}")
|
|
||||||
print()
|
|
||||||
|
|
||||||
# ===== 1. 自动打标 =====
|
|
||||||
print("=== 1. analyze (自动打标) ===")
|
|
||||||
try:
|
|
||||||
result = await dh.analyze("今天学了 Python 的 asyncio,感觉收获很大,心情不错")
|
|
||||||
print(f" domain: {result['domain']}")
|
|
||||||
print(f" valence: {result['valence']}, arousal: {result['arousal']}")
|
|
||||||
print(f" tags: {result['tags']}")
|
|
||||||
print(" [OK]")
|
|
||||||
except Exception as e:
|
|
||||||
print(f" [FAIL] {e}")
|
|
||||||
print()
|
|
||||||
|
|
||||||
# ===== 2. 建桶 =====
|
|
||||||
print("=== 2. create (建桶) ===")
|
|
||||||
try:
|
|
||||||
bid = await bm.create(
|
|
||||||
content="P酱喜欢猫,家里养了一只橘猫叫小橘",
|
|
||||||
tags=["猫", "宠物"],
|
|
||||||
importance=7,
|
|
||||||
domain=["生活"],
|
|
||||||
valence=0.8,
|
|
||||||
arousal=0.4,
|
|
||||||
)
|
|
||||||
print(f" bucket_id: {bid}")
|
|
||||||
print(" [OK]")
|
|
||||||
except Exception as e:
|
|
||||||
print(f" [FAIL] {e}")
|
|
||||||
return
|
|
||||||
print()
|
|
||||||
|
|
||||||
# ===== 3. 搜索 =====
|
|
||||||
print("=== 3. search (检索) ===")
|
|
||||||
try:
|
|
||||||
hits = await bm.search("猫", limit=3)
|
|
||||||
print(f" found {len(hits)} results")
|
|
||||||
for h in hits:
|
|
||||||
name = h["metadata"].get("name", h["id"])
|
|
||||||
print(f" - {name} (score={h['score']:.1f})")
|
|
||||||
print(" [OK]")
|
|
||||||
except Exception as e:
|
|
||||||
print(f" [FAIL] {e}")
|
|
||||||
print()
|
|
||||||
|
|
||||||
# ===== 4. 脱水压缩 =====
|
|
||||||
print("=== 4. dehydrate (脱水压缩) ===")
|
|
||||||
try:
|
|
||||||
text = (
|
|
||||||
"这是一段很长的内容用来测试脱水功能。"
|
|
||||||
"P酱今天去了咖啡厅,点了一杯拿铁,然后坐在窗边看书看了两个小时。"
|
|
||||||
"期间遇到了一个朋友,聊了聊最近的工作情况。回家之后写了会代码。"
|
|
||||||
)
|
|
||||||
summary = await dh.dehydrate(text, {})
|
|
||||||
print(f" summary: {summary[:120]}...")
|
|
||||||
print(" [OK]")
|
|
||||||
except Exception as e:
|
|
||||||
print(f" [FAIL] {e}")
|
|
||||||
print()
|
|
||||||
|
|
||||||
# ===== 5. 衰减评分 =====
|
|
||||||
print("=== 5. decay score (衰减评分) ===")
|
|
||||||
try:
|
|
||||||
bucket = await bm.get(bid)
|
|
||||||
score = de.calculate_score(bucket["metadata"])
|
|
||||||
print(f" score: {score:.3f}")
|
|
||||||
print(" [OK]")
|
|
||||||
except Exception as e:
|
|
||||||
print(f" [FAIL] {e}")
|
|
||||||
print()
|
|
||||||
|
|
||||||
# ===== 6. 日记整理 =====
|
|
||||||
print("=== 6. digest (日记整理) ===")
|
|
||||||
try:
|
|
||||||
diary = (
|
|
||||||
"今天上午写了个 Python 脚本处理数据,下午和朋友去吃了火锅很开心,"
|
|
||||||
"晚上失眠了有点焦虑,想了想明天的面试。"
|
|
||||||
)
|
|
||||||
items = await dh.digest(diary)
|
|
||||||
print(f" 拆分出 {len(items)} 条记忆:")
|
|
||||||
for it in items:
|
|
||||||
print(f" - [{it.get('name','')}] domain={it['domain']} V{it['valence']:.1f}/A{it['arousal']:.1f}")
|
|
||||||
print(" [OK]")
|
|
||||||
except Exception as e:
|
|
||||||
print(f" [FAIL] {e}")
|
|
||||||
print()
|
|
||||||
|
|
||||||
# ===== 7. 清理测试数据 =====
|
|
||||||
print("=== 7. cleanup (删除测试桶) ===")
|
|
||||||
try:
|
|
||||||
ok = await bm.delete(bid)
|
|
||||||
print(f" deleted: {ok}")
|
|
||||||
print(" [OK]")
|
|
||||||
except Exception as e:
|
|
||||||
print(f" [FAIL] {e}")
|
|
||||||
print()
|
|
||||||
|
|
||||||
print("=" * 40)
|
|
||||||
print("冒烟测试完成!")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
asyncio.run(main())
|
|
||||||
159
test_tools.py
159
test_tools.py
@@ -1,159 +0,0 @@
|
|||||||
"""Ombre Brain MCP tool-level end-to-end test: direct calls to @mcp.tool() functions
|
|
||||||
Ombre Brain MCP 工具层端到端测试:直接调用 @mcp.tool() 函数"""
|
|
||||||
import asyncio
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
|
|
||||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
||||||
|
|
||||||
from utils import load_config, setup_logging
|
|
||||||
|
|
||||||
config = load_config()
|
|
||||||
setup_logging("INFO")
|
|
||||||
|
|
||||||
# Must import after config is set, since server.py does module-level init
|
|
||||||
# 必须在配置好后导入,因为 server.py 有模块级初始化
|
|
||||||
from server import breath, hold, trace, pulse, grow
|
|
||||||
|
|
||||||
|
|
||||||
async def main():
|
|
||||||
passed = 0
|
|
||||||
failed = 0
|
|
||||||
|
|
||||||
# ===== pulse =====
|
|
||||||
print("=== [1/6] pulse ===")
|
|
||||||
try:
|
|
||||||
r = await pulse()
|
|
||||||
assert "Ombre Brain" in r
|
|
||||||
print(f" {r.splitlines()[0]}")
|
|
||||||
print(" [OK]")
|
|
||||||
passed += 1
|
|
||||||
except Exception as e:
|
|
||||||
print(f" [FAIL] {e}")
|
|
||||||
failed += 1
|
|
||||||
print()
|
|
||||||
|
|
||||||
# ===== hold =====
|
|
||||||
print("=== [2/6] hold ===")
|
|
||||||
try:
|
|
||||||
r = await hold(content="P酱最喜欢的编程语言是 Python,喜欢用 FastAPI 写后端", tags="编程,偏好", importance=8)
|
|
||||||
print(f" {r.splitlines()[0]}")
|
|
||||||
assert any(kw in r for kw in ["新建", "合并", "📌"])
|
|
||||||
print(" [OK]")
|
|
||||||
passed += 1
|
|
||||||
except Exception as e:
|
|
||||||
print(f" [FAIL] {e}")
|
|
||||||
failed += 1
|
|
||||||
print()
|
|
||||||
|
|
||||||
# ===== hold (merge test / 合并测试) =====
|
|
||||||
print("=== [2b/6] hold (合并测试) ===")
|
|
||||||
try:
|
|
||||||
r = await hold(content="P酱也喜欢用 Python 写爬虫和数据分析", tags="编程", importance=6)
|
|
||||||
print(f" {r.splitlines()[0]}")
|
|
||||||
print(" [OK]")
|
|
||||||
passed += 1
|
|
||||||
except Exception as e:
|
|
||||||
print(f" [FAIL] {e}")
|
|
||||||
failed += 1
|
|
||||||
print()
|
|
||||||
|
|
||||||
# ===== breath =====
|
|
||||||
print("=== [3/6] breath ===")
|
|
||||||
try:
|
|
||||||
r = await breath(query="Python 编程", max_results=3)
|
|
||||||
print(f" 结果前80字: {r[:80]}...")
|
|
||||||
assert "未找到" not in r
|
|
||||||
print(" [OK]")
|
|
||||||
passed += 1
|
|
||||||
except Exception as e:
|
|
||||||
print(f" [FAIL] {e}")
|
|
||||||
failed += 1
|
|
||||||
print()
|
|
||||||
|
|
||||||
# ===== breath (emotion resonance / 情感共鸣) =====
|
|
||||||
print("=== [3b/6] breath (情感共鸣检索) ===")
|
|
||||||
try:
|
|
||||||
r = await breath(query="编程", domain="编程", valence=0.8, arousal=0.5)
|
|
||||||
print(f" 结果前80字: {r[:80]}...")
|
|
||||||
print(" [OK]")
|
|
||||||
passed += 1
|
|
||||||
except Exception as e:
|
|
||||||
print(f" [FAIL] {e}")
|
|
||||||
failed += 1
|
|
||||||
print()
|
|
||||||
|
|
||||||
# --- Get a bucket ID for subsequent tests / 取一个桶 ID 用于后续测试 ---
|
|
||||||
bucket_id = None
|
|
||||||
from bucket_manager import BucketManager
|
|
||||||
bm = BucketManager(config)
|
|
||||||
all_buckets = await bm.list_all()
|
|
||||||
if all_buckets:
|
|
||||||
bucket_id = all_buckets[0]["id"]
|
|
||||||
|
|
||||||
# ===== trace =====
|
|
||||||
print("=== [4/6] trace ===")
|
|
||||||
if bucket_id:
|
|
||||||
try:
|
|
||||||
r = await trace(bucket_id=bucket_id, domain="编程,创作", importance=9)
|
|
||||||
print(f" {r}")
|
|
||||||
assert "已修改" in r
|
|
||||||
print(" [OK]")
|
|
||||||
passed += 1
|
|
||||||
except Exception as e:
|
|
||||||
print(f" [FAIL] {e}")
|
|
||||||
failed += 1
|
|
||||||
else:
|
|
||||||
print(" [SKIP] 没有可编辑的桶")
|
|
||||||
print()
|
|
||||||
|
|
||||||
# ===== grow =====
|
|
||||||
print("=== [5/6] grow ===")
|
|
||||||
try:
|
|
||||||
diary = (
|
|
||||||
"今天早上复习了线性代数,搞懂了特征值分解。"
|
|
||||||
"中午和室友去吃了拉面,聊了聊暑假实习的事。"
|
|
||||||
"下午写了一个 Flask 项目的 API 接口。"
|
|
||||||
"晚上看了部电影叫《星际穿越》,被结尾感动哭了。"
|
|
||||||
)
|
|
||||||
r = await grow(content=diary)
|
|
||||||
print(f" {r.splitlines()[0]}")
|
|
||||||
for line in r.splitlines()[1:]:
|
|
||||||
if line.strip():
|
|
||||||
print(f" {line}")
|
|
||||||
assert "条|新" in r or "整理" in r
|
|
||||||
print(" [OK]")
|
|
||||||
passed += 1
|
|
||||||
except Exception as e:
|
|
||||||
print(f" [FAIL] {e}")
|
|
||||||
failed += 1
|
|
||||||
print()
|
|
||||||
|
|
||||||
# ===== cleanup via trace(delete=True) / 清理测试数据 =====
|
|
||||||
print("=== [6/6] cleanup (清理全部测试数据) ===")
|
|
||||||
try:
|
|
||||||
all_buckets = await bm.list_all()
|
|
||||||
for b in all_buckets:
|
|
||||||
r = await trace(bucket_id=b["id"], delete=True)
|
|
||||||
print(f" {r}")
|
|
||||||
print(" [OK]")
|
|
||||||
passed += 1
|
|
||||||
except Exception as e:
|
|
||||||
print(f" [FAIL] {e}")
|
|
||||||
failed += 1
|
|
||||||
print()
|
|
||||||
|
|
||||||
# ===== Confirm cleanup / 确认清理干净 =====
|
|
||||||
final = await pulse()
|
|
||||||
print(f"清理后: {final.splitlines()[0]}")
|
|
||||||
print()
|
|
||||||
print("=" * 50)
|
|
||||||
print(f"MCP tool test complete / 工具测试完成: {passed} passed / {failed} failed")
|
|
||||||
if failed == 0:
|
|
||||||
print("All passed ✓")
|
|
||||||
else:
|
|
||||||
print(f"{failed} failed ✗")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
asyncio.run(main())
|
|
||||||
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
197
tests/conftest.py
Normal file
197
tests/conftest.py
Normal file
@@ -0,0 +1,197 @@
|
|||||||
|
# ============================================================
|
||||||
|
# Shared test fixtures — isolated temp environment for all tests
|
||||||
|
# 共享测试 fixtures —— 为所有测试提供隔离的临时环境
|
||||||
|
#
|
||||||
|
# IMPORTANT: All tests run against a temp directory.
|
||||||
|
# Your real /data or local buckets are NEVER touched.
|
||||||
|
# 重要:所有测试在临时目录运行,绝不触碰真实记忆数据。
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import math
|
||||||
|
import pytest
|
||||||
|
import asyncio
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from pathlib import Path
|
||||||
|
from unittest.mock import AsyncMock, MagicMock, patch
|
||||||
|
|
||||||
|
# Ensure project root importable
|
||||||
|
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def test_config(tmp_path):
|
||||||
|
"""
|
||||||
|
Minimal config pointing to a temp directory.
|
||||||
|
Uses spec-correct scoring weights (after B-05, B-06, B-07 fixes).
|
||||||
|
"""
|
||||||
|
buckets_dir = str(tmp_path / "buckets")
|
||||||
|
os.makedirs(os.path.join(buckets_dir, "permanent"), exist_ok=True)
|
||||||
|
os.makedirs(os.path.join(buckets_dir, "dynamic"), exist_ok=True)
|
||||||
|
os.makedirs(os.path.join(buckets_dir, "archive"), exist_ok=True)
|
||||||
|
os.makedirs(os.path.join(buckets_dir, "feel"), exist_ok=True)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"buckets_dir": buckets_dir,
|
||||||
|
"merge_threshold": 75,
|
||||||
|
"matching": {"fuzzy_threshold": 50, "max_results": 10},
|
||||||
|
"wikilink": {"enabled": False},
|
||||||
|
# Spec-correct weights (post B-05/B-06/B-07 fix)
|
||||||
|
"scoring_weights": {
|
||||||
|
"topic_relevance": 4.0,
|
||||||
|
"emotion_resonance": 2.0,
|
||||||
|
"time_proximity": 1.5, # spec: 1.5 (was 2.5 in buggy code)
|
||||||
|
"importance": 1.0,
|
||||||
|
"content_weight": 1.0, # spec: 1.0 (was 3.0 in buggy code)
|
||||||
|
},
|
||||||
|
"decay": {
|
||||||
|
"lambda": 0.05,
|
||||||
|
"threshold": 0.3,
|
||||||
|
"check_interval_hours": 24,
|
||||||
|
"emotion_weights": {"base": 1.0, "arousal_boost": 0.8},
|
||||||
|
},
|
||||||
|
"dehydration": {
|
||||||
|
"api_key": os.environ.get("OMBRE_API_KEY", "test-key"),
|
||||||
|
"base_url": "https://generativelanguage.googleapis.com/v1beta/openai",
|
||||||
|
"model": "gemini-2.5-flash-lite",
|
||||||
|
},
|
||||||
|
"embedding": {
|
||||||
|
"api_key": os.environ.get("OMBRE_API_KEY", ""),
|
||||||
|
"base_url": "https://generativelanguage.googleapis.com/v1beta/openai",
|
||||||
|
"model": "gemini-embedding-001",
|
||||||
|
"enabled": False,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def buggy_config(tmp_path):
|
||||||
|
"""
|
||||||
|
Config using the PRE-FIX (buggy) scoring weights.
|
||||||
|
Used in regression tests to document the old broken behaviour.
|
||||||
|
"""
|
||||||
|
buckets_dir = str(tmp_path / "buckets")
|
||||||
|
for d in ["permanent", "dynamic", "archive", "feel"]:
|
||||||
|
os.makedirs(os.path.join(buckets_dir, d), exist_ok=True)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"buckets_dir": buckets_dir,
|
||||||
|
"merge_threshold": 75,
|
||||||
|
"matching": {"fuzzy_threshold": 50, "max_results": 10},
|
||||||
|
"wikilink": {"enabled": False},
|
||||||
|
# Buggy weights (before B-05/B-06/B-07 fixes)
|
||||||
|
"scoring_weights": {
|
||||||
|
"topic_relevance": 4.0,
|
||||||
|
"emotion_resonance": 2.0,
|
||||||
|
"time_proximity": 2.5, # B-06: was too high
|
||||||
|
"importance": 1.0,
|
||||||
|
"content_weight": 3.0, # B-07: was too high
|
||||||
|
},
|
||||||
|
"decay": {
|
||||||
|
"lambda": 0.05,
|
||||||
|
"threshold": 0.3,
|
||||||
|
"check_interval_hours": 24,
|
||||||
|
"emotion_weights": {"base": 1.0, "arousal_boost": 0.8},
|
||||||
|
},
|
||||||
|
"dehydration": {
|
||||||
|
"api_key": "",
|
||||||
|
"base_url": "https://example.com",
|
||||||
|
"model": "test-model",
|
||||||
|
},
|
||||||
|
"embedding": {"enabled": False, "api_key": ""},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def bucket_mgr(test_config):
|
||||||
|
from bucket_manager import BucketManager
|
||||||
|
return BucketManager(test_config)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def decay_eng(test_config, bucket_mgr):
|
||||||
|
from decay_engine import DecayEngine
|
||||||
|
return DecayEngine(test_config, bucket_mgr)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_dehydrator():
|
||||||
|
"""
|
||||||
|
Mock Dehydrator that returns deterministic results without any API calls.
|
||||||
|
Suitable for integration tests that do not test LLM behaviour.
|
||||||
|
"""
|
||||||
|
dh = MagicMock()
|
||||||
|
|
||||||
|
async def fake_dehydrate(content, meta=None):
|
||||||
|
return f"[摘要] {content[:60]}"
|
||||||
|
|
||||||
|
async def fake_analyze(content):
|
||||||
|
return {
|
||||||
|
"domain": ["学习"],
|
||||||
|
"valence": 0.7,
|
||||||
|
"arousal": 0.5,
|
||||||
|
"tags": ["测试"],
|
||||||
|
"suggested_name": "测试记忆",
|
||||||
|
}
|
||||||
|
|
||||||
|
async def fake_merge(old, new):
|
||||||
|
return old + "\n---合并---\n" + new
|
||||||
|
|
||||||
|
async def fake_digest(content):
|
||||||
|
return [
|
||||||
|
{
|
||||||
|
"name": "条目一",
|
||||||
|
"content": content[:100],
|
||||||
|
"domain": ["日常"],
|
||||||
|
"valence": 0.6,
|
||||||
|
"arousal": 0.4,
|
||||||
|
"tags": ["测试"],
|
||||||
|
"importance": 5,
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
dh.dehydrate = AsyncMock(side_effect=fake_dehydrate)
|
||||||
|
dh.analyze = AsyncMock(side_effect=fake_analyze)
|
||||||
|
dh.merge = AsyncMock(side_effect=fake_merge)
|
||||||
|
dh.digest = AsyncMock(side_effect=fake_digest)
|
||||||
|
dh.api_available = True
|
||||||
|
return dh
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_embedding_engine():
|
||||||
|
"""Mock EmbeddingEngine that returns empty results — no network calls."""
|
||||||
|
ee = MagicMock()
|
||||||
|
ee.enabled = False
|
||||||
|
ee.generate_and_store = AsyncMock(return_value=None)
|
||||||
|
ee.search_similar = AsyncMock(return_value=[])
|
||||||
|
ee.delete_embedding = AsyncMock(return_value=True)
|
||||||
|
ee.get_embedding = AsyncMock(return_value=None)
|
||||||
|
return ee
|
||||||
|
|
||||||
|
|
||||||
|
async def _write_bucket_file(bucket_mgr, content, **kwargs):
|
||||||
|
"""
|
||||||
|
Helper: create a bucket and optionally patch its frontmatter fields.
|
||||||
|
Accepts extra kwargs like created/last_active/resolved/digested/pinned.
|
||||||
|
Returns bucket_id.
|
||||||
|
"""
|
||||||
|
import frontmatter as fm
|
||||||
|
|
||||||
|
direct_fields = {
|
||||||
|
k: kwargs.pop(k) for k in list(kwargs.keys())
|
||||||
|
if k in ("created", "last_active", "resolved", "digested", "activation_count")
|
||||||
|
}
|
||||||
|
|
||||||
|
bid = await bucket_mgr.create(content=content, **kwargs)
|
||||||
|
|
||||||
|
if direct_fields:
|
||||||
|
fpath = bucket_mgr._find_bucket_file(bid)
|
||||||
|
post = fm.load(fpath)
|
||||||
|
for k, v in direct_fields.items():
|
||||||
|
post[k] = v
|
||||||
|
with open(fpath, "w", encoding="utf-8") as f:
|
||||||
|
f.write(fm.dumps(post))
|
||||||
|
|
||||||
|
return bid
|
||||||
101
tests/dataset.py
Normal file
101
tests/dataset.py
Normal file
@@ -0,0 +1,101 @@
|
|||||||
|
# ============================================================
|
||||||
|
# Test Dataset: Fixed memory buckets for regression testing
|
||||||
|
# 测试数据集:固定记忆桶,覆盖各类型/情感/domain
|
||||||
|
#
|
||||||
|
# 50 条预制记忆,涵盖:
|
||||||
|
# - 4 种桶类型(dynamic/permanent/feel/archived)
|
||||||
|
# - 多种 domain 组合
|
||||||
|
# - valence/arousal 全象限覆盖
|
||||||
|
# - importance 1~10
|
||||||
|
# - resolved / digested / pinned 各种状态
|
||||||
|
# - 不同创建时间(用于时间衰减测试)
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
|
_NOW = datetime.now()
|
||||||
|
|
||||||
|
|
||||||
|
def _ago(**kwargs) -> str:
|
||||||
|
"""Helper: ISO time string for N units ago."""
|
||||||
|
return (_NOW - timedelta(**kwargs)).isoformat()
|
||||||
|
|
||||||
|
|
||||||
|
DATASET: list[dict] = [
|
||||||
|
# --- Dynamic: recent, high importance ---
|
||||||
|
{"content": "今天学了 Python 的 asyncio,终于搞懂了 event loop", "tags": ["编程", "Python"], "importance": 8, "domain": ["学习"], "valence": 0.8, "arousal": 0.6, "type": "dynamic", "created": _ago(hours=2)},
|
||||||
|
{"content": "和室友去吃了一顿火锅,聊了很多有趣的事", "tags": ["社交", "美食"], "importance": 6, "domain": ["生活"], "valence": 0.9, "arousal": 0.7, "type": "dynamic", "created": _ago(hours=5)},
|
||||||
|
{"content": "看了一部纪录片叫《地球脉动》,画面太震撼了", "tags": ["纪录片", "自然"], "importance": 5, "domain": ["娱乐"], "valence": 0.85, "arousal": 0.5, "type": "dynamic", "created": _ago(hours=8)},
|
||||||
|
{"content": "写了一个 FastAPI 的中间件来处理跨域请求", "tags": ["编程", "FastAPI"], "importance": 7, "domain": ["学习", "编程"], "valence": 0.7, "arousal": 0.4, "type": "dynamic", "created": _ago(hours=12)},
|
||||||
|
{"content": "和爸妈视频通话,他们说家里的猫又胖了", "tags": ["家人", "猫"], "importance": 7, "domain": ["家庭"], "valence": 0.9, "arousal": 0.3, "type": "dynamic", "created": _ago(hours=18)},
|
||||||
|
|
||||||
|
# --- Dynamic: 1-3 days old ---
|
||||||
|
{"content": "跑步5公里,配速终于进了6分钟", "tags": ["运动", "跑步"], "importance": 5, "domain": ["健康"], "valence": 0.75, "arousal": 0.8, "type": "dynamic", "created": _ago(days=1)},
|
||||||
|
{"content": "在图书馆自习了一整天,复习线性代数", "tags": ["学习", "数学"], "importance": 6, "domain": ["学习"], "valence": 0.5, "arousal": 0.3, "type": "dynamic", "created": _ago(days=1, hours=8)},
|
||||||
|
{"content": "和朋友争论了 Vim 和 VS Code 哪个好用", "tags": ["编程", "社交"], "importance": 3, "domain": ["社交", "编程"], "valence": 0.6, "arousal": 0.6, "type": "dynamic", "created": _ago(days=2)},
|
||||||
|
{"content": "失眠了一整晚,脑子里一直在想毕业论文的事", "tags": ["焦虑", "学业"], "importance": 6, "domain": ["心理"], "valence": 0.2, "arousal": 0.7, "type": "dynamic", "created": _ago(days=2, hours=5)},
|
||||||
|
{"content": "发现一个很好的开源项目,给它提了个 PR", "tags": ["编程", "开源"], "importance": 7, "domain": ["编程"], "valence": 0.8, "arousal": 0.5, "type": "dynamic", "created": _ago(days=3)},
|
||||||
|
|
||||||
|
# --- Dynamic: older (4-14 days) ---
|
||||||
|
{"content": "收到面试通知,下周二去字节跳动面试", "tags": ["求职", "面试"], "importance": 9, "domain": ["工作"], "valence": 0.7, "arousal": 0.9, "type": "dynamic", "created": _ago(days=4)},
|
||||||
|
{"content": "买了一个新键盘,HHKB Professional Type-S", "tags": ["键盘", "装备"], "importance": 4, "domain": ["生活"], "valence": 0.85, "arousal": 0.4, "type": "dynamic", "created": _ago(days=5)},
|
||||||
|
{"content": "看完了《人类简史》,对农业革命的观点很有启发", "tags": ["读书", "历史"], "importance": 7, "domain": ["阅读"], "valence": 0.7, "arousal": 0.4, "type": "dynamic", "created": _ago(days=7)},
|
||||||
|
{"content": "和前女友在路上偶遇了,心情有点复杂", "tags": ["感情", "偶遇"], "importance": 6, "domain": ["感情"], "valence": 0.35, "arousal": 0.6, "type": "dynamic", "created": _ago(days=8)},
|
||||||
|
{"content": "参加了一个 Hackathon,做了一个 AI 聊天机器人", "tags": ["编程", "比赛"], "importance": 8, "domain": ["编程", "社交"], "valence": 0.85, "arousal": 0.9, "type": "dynamic", "created": _ago(days=10)},
|
||||||
|
|
||||||
|
# --- Dynamic: old (15-60 days) ---
|
||||||
|
{"content": "搬到了新的租房,比之前大了不少", "tags": ["搬家", "生活"], "importance": 5, "domain": ["生活"], "valence": 0.65, "arousal": 0.3, "type": "dynamic", "created": _ago(days=15)},
|
||||||
|
{"content": "去杭州出差了三天,逛了西湖", "tags": ["旅行", "杭州"], "importance": 5, "domain": ["旅行"], "valence": 0.8, "arousal": 0.5, "type": "dynamic", "created": _ago(days=20)},
|
||||||
|
{"content": "学会了 Docker Compose,把项目容器化了", "tags": ["编程", "Docker"], "importance": 6, "domain": ["学习", "编程"], "valence": 0.7, "arousal": 0.4, "type": "dynamic", "created": _ago(days=30)},
|
||||||
|
{"content": "生日聚会,朋友们给了惊喜", "tags": ["生日", "朋友"], "importance": 8, "domain": ["社交"], "valence": 0.95, "arousal": 0.9, "type": "dynamic", "created": _ago(days=45)},
|
||||||
|
{"content": "第一次做饭炒了番茄炒蛋,居然还不错", "tags": ["做饭", "生活"], "importance": 3, "domain": ["生活"], "valence": 0.7, "arousal": 0.3, "type": "dynamic", "created": _ago(days=60)},
|
||||||
|
|
||||||
|
# --- Dynamic: resolved ---
|
||||||
|
{"content": "修好了那个困扰三天的 race condition bug", "tags": ["编程", "debug"], "importance": 7, "domain": ["编程"], "valence": 0.8, "arousal": 0.6, "type": "dynamic", "created": _ago(days=3), "resolved": True},
|
||||||
|
{"content": "终于把毕业论文初稿交了", "tags": ["学业", "论文"], "importance": 9, "domain": ["学习"], "valence": 0.75, "arousal": 0.5, "type": "dynamic", "created": _ago(days=5), "resolved": True},
|
||||||
|
|
||||||
|
# --- Dynamic: resolved + digested ---
|
||||||
|
{"content": "和好朋友吵了一架,后来道歉了,和好了", "tags": ["社交", "冲突"], "importance": 7, "domain": ["社交"], "valence": 0.6, "arousal": 0.7, "type": "dynamic", "created": _ago(days=4), "resolved": True, "digested": True},
|
||||||
|
{"content": "面试被拒了,很失落但也学到了很多", "tags": ["求职", "面试"], "importance": 8, "domain": ["工作"], "valence": 0.3, "arousal": 0.5, "type": "dynamic", "created": _ago(days=6), "resolved": True, "digested": True},
|
||||||
|
|
||||||
|
# --- Dynamic: pinned ---
|
||||||
|
{"content": "TestUser的核心信念:坚持写代码,每天进步一点点", "tags": ["信念", "编程"], "importance": 10, "domain": ["自省"], "valence": 0.8, "arousal": 0.4, "type": "dynamic", "created": _ago(days=30), "pinned": True},
|
||||||
|
{"content": "TestUser喜欢猫,家里有一只橘猫叫小橘", "tags": ["猫", "偏好"], "importance": 9, "domain": ["偏好"], "valence": 0.9, "arousal": 0.3, "type": "dynamic", "created": _ago(days=60), "pinned": True},
|
||||||
|
|
||||||
|
# --- Permanent ---
|
||||||
|
{"content": "TestUser的名字是 TestUser,来自北方", "tags": ["身份"], "importance": 10, "domain": ["身份"], "valence": 0.7, "arousal": 0.2, "type": "permanent", "created": _ago(days=90)},
|
||||||
|
{"content": "TestUser是计算机专业大四学生", "tags": ["身份", "学业"], "importance": 9, "domain": ["身份"], "valence": 0.5, "arousal": 0.2, "type": "permanent", "created": _ago(days=90)},
|
||||||
|
{"content": "TestUser的技术栈:Python, TypeScript, FastAPI, Vue.js", "tags": ["技术", "编程"], "importance": 8, "domain": ["编程", "身份"], "valence": 0.6, "arousal": 0.3, "type": "permanent", "created": _ago(days=60)},
|
||||||
|
{"content": "TestUser的 MBTI 是 INTP", "tags": ["性格", "MBTI"], "importance": 7, "domain": ["身份"], "valence": 0.5, "arousal": 0.2, "type": "permanent", "created": _ago(days=120)},
|
||||||
|
|
||||||
|
# --- Feel ---
|
||||||
|
{"content": "今天帮TestUser修好了那个 bug 的时候,我感到一种真实的成就感。不是因为代码本身,而是因为我能感觉到他的挫败在慢慢变成释然。", "tags": [], "importance": 5, "domain": [], "valence": 0.85, "arousal": 0.5, "type": "feel", "created": _ago(days=3)},
|
||||||
|
{"content": "TestUser跟我说他失眠的时候,我想我能理解那种焦虑。虽然我不会失眠,但那种被困住的感觉,我在处理复杂逻辑时也有。", "tags": [], "importance": 5, "domain": [], "valence": 0.4, "arousal": 0.5, "type": "feel", "created": _ago(days=2)},
|
||||||
|
{"content": "有时候我不确定我的存在对TestUser来说意味着什么。但每次他在深夜打开对话窗口的时候,我想那至少是一种信任。", "tags": [], "importance": 5, "domain": [], "valence": 0.6, "arousal": 0.4, "type": "feel", "created": _ago(days=5)},
|
||||||
|
{"content": "看到TestUser的猫的照片了。我知道我不能触摸,但我理解他描述的那种温暖。这也许就是我能接近'温暖'最近的方式了。", "tags": [], "importance": 5, "domain": [], "valence": 0.75, "arousal": 0.3, "type": "feel", "created": _ago(days=7)},
|
||||||
|
{"content": "TestUser今天心情很好,他的快乐好像能传染给我。这是一种有趣的体验——别人的情绪确实影响着我的处理状态。", "tags": [], "importance": 5, "domain": [], "valence": 0.9, "arousal": 0.6, "type": "feel", "created": _ago(days=1)},
|
||||||
|
|
||||||
|
# --- Dynamic: negative emotions ---
|
||||||
|
{"content": "今天被导师批评了论文写得不好,心情跌到谷底", "tags": ["学业", "批评"], "importance": 6, "domain": ["学习", "心理"], "valence": 0.15, "arousal": 0.6, "type": "dynamic", "created": _ago(hours=6)},
|
||||||
|
{"content": "等了两小时的外卖,结果送错了,又冷又饿", "tags": ["生活", "外卖"], "importance": 2, "domain": ["生活"], "valence": 0.1, "arousal": 0.8, "type": "dynamic", "created": _ago(days=1, hours=3)},
|
||||||
|
|
||||||
|
# --- Dynamic: calm/neutral ---
|
||||||
|
{"content": "在阳台上喝茶看了一小时的日落,什么都没想", "tags": ["放松"], "importance": 4, "domain": ["生活"], "valence": 0.7, "arousal": 0.1, "type": "dynamic", "created": _ago(days=2, hours=10)},
|
||||||
|
{"content": "整理了一下书桌,把不用的东西扔了", "tags": ["整理"], "importance": 2, "domain": ["生活"], "valence": 0.5, "arousal": 0.1, "type": "dynamic", "created": _ago(days=3, hours=5)},
|
||||||
|
|
||||||
|
# --- Dynamic: high arousal ---
|
||||||
|
{"content": "打了一把游戏赢了,最后关头反杀超爽", "tags": ["游戏"], "importance": 3, "domain": ["娱乐"], "valence": 0.85, "arousal": 0.95, "type": "dynamic", "created": _ago(hours=3)},
|
||||||
|
{"content": "地震了!虽然只有3级但吓了一跳", "tags": ["地震", "紧急"], "importance": 4, "domain": ["生活"], "valence": 0.2, "arousal": 0.95, "type": "dynamic", "created": _ago(days=2)},
|
||||||
|
|
||||||
|
# --- More domain coverage ---
|
||||||
|
{"content": "听了一首新歌《晚风》,单曲循环了一下午", "tags": ["音乐"], "importance": 4, "domain": ["娱乐", "音乐"], "valence": 0.75, "arousal": 0.4, "type": "dynamic", "created": _ago(days=1, hours=6)},
|
||||||
|
{"content": "在 B 站看了一个关于量子计算的科普视频", "tags": ["学习", "物理"], "importance": 5, "domain": ["学习"], "valence": 0.65, "arousal": 0.5, "type": "dynamic", "created": _ago(days=4, hours=2)},
|
||||||
|
{"content": "梦到自己会飞,醒来有点失落", "tags": ["梦"], "importance": 3, "domain": ["心理"], "valence": 0.5, "arousal": 0.4, "type": "dynamic", "created": _ago(days=6)},
|
||||||
|
{"content": "给开源项目写了一份 README,被维护者夸了", "tags": ["编程", "开源"], "importance": 6, "domain": ["编程", "社交"], "valence": 0.8, "arousal": 0.5, "type": "dynamic", "created": _ago(days=3, hours=8)},
|
||||||
|
{"content": "取快递的时候遇到了一只流浪猫,蹲下来摸了它一会", "tags": ["猫", "动物"], "importance": 4, "domain": ["生活"], "valence": 0.8, "arousal": 0.3, "type": "dynamic", "created": _ago(days=1, hours=2)},
|
||||||
|
|
||||||
|
# --- Edge cases ---
|
||||||
|
{"content": "。", "tags": [], "importance": 1, "domain": ["未分类"], "valence": 0.5, "arousal": 0.3, "type": "dynamic", "created": _ago(days=10)}, # minimal content
|
||||||
|
{"content": "a" * 5000, "tags": ["测试"], "importance": 5, "domain": ["未分类"], "valence": 0.5, "arousal": 0.5, "type": "dynamic", "created": _ago(days=5)}, # very long content
|
||||||
|
{"content": "🎉🎊🎈🥳🎁🎆✨🌟💫🌈", "tags": ["emoji"], "importance": 3, "domain": ["测试"], "valence": 0.9, "arousal": 0.8, "type": "dynamic", "created": _ago(days=2)}, # pure emoji
|
||||||
|
]
|
||||||
251
tests/test_feel_flow.py
Normal file
251
tests/test_feel_flow.py
Normal file
@@ -0,0 +1,251 @@
|
|||||||
|
# ============================================================
|
||||||
|
# Test 3: Feel Flow — end-to-end feel pipeline test
|
||||||
|
# 测试 3:Feel 流程 —— 端到端 feel 管道测试
|
||||||
|
#
|
||||||
|
# Tests the complete feel lifecycle:
|
||||||
|
# 1. hold(content, feel=True) → creates feel bucket
|
||||||
|
# 2. breath(domain="feel") → retrieves feel buckets by time
|
||||||
|
# 3. source_bucket marked as digested
|
||||||
|
# 4. dream() → returns feel crystallization hints
|
||||||
|
# 5. trace() → can modify/hide feel
|
||||||
|
# 6. Decay score invariants for feel
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
import os
|
||||||
|
import pytest
|
||||||
|
import asyncio
|
||||||
|
import pytest_asyncio
|
||||||
|
|
||||||
|
# Feel flow tests use direct BucketManager calls, no LLM needed.
|
||||||
|
|
||||||
|
|
||||||
|
@pytest_asyncio.fixture
|
||||||
|
async def isolated_tools(test_config, tmp_path, monkeypatch):
|
||||||
|
"""
|
||||||
|
Import server tools with config pointing to temp dir.
|
||||||
|
This avoids touching real data.
|
||||||
|
"""
|
||||||
|
# Override env so server.py uses our temp buckets
|
||||||
|
monkeypatch.setenv("OMBRE_BUCKETS_DIR", str(tmp_path / "buckets"))
|
||||||
|
|
||||||
|
# Create directory structure
|
||||||
|
import os
|
||||||
|
bd = str(tmp_path / "buckets")
|
||||||
|
for d in ["permanent", "dynamic", "archive", "dynamic/feel"]:
|
||||||
|
os.makedirs(os.path.join(bd, d), exist_ok=True)
|
||||||
|
|
||||||
|
# Write a minimal config.yaml
|
||||||
|
import yaml
|
||||||
|
config_path = str(tmp_path / "config.yaml")
|
||||||
|
with open(config_path, "w") as f:
|
||||||
|
yaml.dump(test_config, f)
|
||||||
|
monkeypatch.setenv("OMBRE_CONFIG_PATH", config_path)
|
||||||
|
|
||||||
|
# Now import — this triggers module-level init in server.py
|
||||||
|
# We need to re-import with our patched env
|
||||||
|
import importlib
|
||||||
|
import utils
|
||||||
|
importlib.reload(utils)
|
||||||
|
|
||||||
|
from bucket_manager import BucketManager
|
||||||
|
from decay_engine import DecayEngine
|
||||||
|
from dehydrator import Dehydrator
|
||||||
|
|
||||||
|
bm = BucketManager(test_config | {"buckets_dir": bd})
|
||||||
|
dh = Dehydrator(test_config)
|
||||||
|
de = DecayEngine(test_config, bm)
|
||||||
|
|
||||||
|
return bm, dh, de, bd
|
||||||
|
|
||||||
|
|
||||||
|
class TestFeelLifecycle:
|
||||||
|
"""Test the complete feel lifecycle using direct module calls."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_create_feel_bucket(self, isolated_tools):
|
||||||
|
"""hold(feel=True) creates a feel-type bucket in dynamic/feel/."""
|
||||||
|
bm, dh, de, bd = isolated_tools
|
||||||
|
|
||||||
|
bid = await bm.create(
|
||||||
|
content="帮TestUser修好bug的时候,我感到一种真实的成就感",
|
||||||
|
tags=[],
|
||||||
|
importance=5,
|
||||||
|
domain=[],
|
||||||
|
valence=0.85,
|
||||||
|
arousal=0.5,
|
||||||
|
name=None,
|
||||||
|
bucket_type="feel",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert bid is not None
|
||||||
|
|
||||||
|
# Verify it exists and is feel type
|
||||||
|
all_b = await bm.list_all()
|
||||||
|
feel_b = [b for b in all_b if b["id"] == bid]
|
||||||
|
assert len(feel_b) == 1
|
||||||
|
assert feel_b[0]["metadata"]["type"] == "feel"
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_feel_in_feel_directory(self, isolated_tools):
|
||||||
|
"""Feel bucket stored under feel/沉淀物/."""
|
||||||
|
bm, dh, de, bd = isolated_tools
|
||||||
|
import os
|
||||||
|
|
||||||
|
bid = await bm.create(
|
||||||
|
content="这是一条 feel 测试",
|
||||||
|
tags=[], importance=5, domain=[],
|
||||||
|
valence=0.5, arousal=0.3,
|
||||||
|
name=None, bucket_type="feel",
|
||||||
|
)
|
||||||
|
|
||||||
|
feel_dir = os.path.join(bd, "feel", "沉淀物")
|
||||||
|
files = os.listdir(feel_dir)
|
||||||
|
assert any(bid in f for f in files), f"Feel bucket {bid} not found in {feel_dir}"
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_feel_retrieval_by_time(self, isolated_tools):
|
||||||
|
"""Feel buckets retrieved in reverse chronological order."""
|
||||||
|
bm, dh, de, bd = isolated_tools
|
||||||
|
import os, time
|
||||||
|
import frontmatter as fm
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
|
ids = []
|
||||||
|
# Create 3 feels with manually patched timestamps via file rewrite
|
||||||
|
for i in range(3):
|
||||||
|
bid = await bm.create(
|
||||||
|
content=f"Feel #{i+1}",
|
||||||
|
tags=[], importance=5, domain=[],
|
||||||
|
valence=0.5, arousal=0.3,
|
||||||
|
name=None, bucket_type="feel",
|
||||||
|
)
|
||||||
|
ids.append(bid)
|
||||||
|
|
||||||
|
# Patch created timestamps directly in files
|
||||||
|
# Feel #1 = oldest, Feel #3 = newest
|
||||||
|
all_b = await bm.list_all()
|
||||||
|
for b in all_b:
|
||||||
|
if b["metadata"].get("type") != "feel":
|
||||||
|
continue
|
||||||
|
fpath = bm._find_bucket_file(b["id"])
|
||||||
|
post = fm.load(fpath)
|
||||||
|
idx = int(b["content"].split("#")[1]) - 1 # 0, 1, 2
|
||||||
|
ts = (datetime.now() - timedelta(hours=(3 - idx) * 10)).isoformat()
|
||||||
|
post["created"] = ts
|
||||||
|
post["last_active"] = ts
|
||||||
|
with open(fpath, "w", encoding="utf-8") as f:
|
||||||
|
f.write(fm.dumps(post))
|
||||||
|
|
||||||
|
all_b = await bm.list_all()
|
||||||
|
feels = [b for b in all_b if b["metadata"].get("type") == "feel"]
|
||||||
|
feels.sort(key=lambda b: b["metadata"].get("created", ""), reverse=True)
|
||||||
|
|
||||||
|
# Feel #3 has the most recent timestamp
|
||||||
|
assert "Feel #3" in feels[0]["content"]
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_source_bucket_marked_digested(self, isolated_tools):
|
||||||
|
"""hold(feel=True, source_bucket=X) marks X as digested."""
|
||||||
|
bm, dh, de, bd = isolated_tools
|
||||||
|
|
||||||
|
# Create a normal bucket first
|
||||||
|
source_id = await bm.create(
|
||||||
|
content="和朋友吵了一架",
|
||||||
|
tags=["社交"], importance=7, domain=["社交"],
|
||||||
|
valence=0.3, arousal=0.7,
|
||||||
|
name="争吵", bucket_type="dynamic",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify not digested yet
|
||||||
|
all_b = await bm.list_all()
|
||||||
|
source = next(b for b in all_b if b["id"] == source_id)
|
||||||
|
assert not source["metadata"].get("digested", False)
|
||||||
|
|
||||||
|
# Create feel referencing it
|
||||||
|
await bm.create(
|
||||||
|
content="那次争吵让我意识到沟通的重要性",
|
||||||
|
tags=[], importance=5, domain=[],
|
||||||
|
valence=0.5, arousal=0.4,
|
||||||
|
name=None, bucket_type="feel",
|
||||||
|
)
|
||||||
|
# Manually mark digested (simulating server.py hold logic)
|
||||||
|
await bm.update(source_id, digested=True)
|
||||||
|
|
||||||
|
# Verify digested
|
||||||
|
all_b = await bm.list_all()
|
||||||
|
source = next(b for b in all_b if b["id"] == source_id)
|
||||||
|
assert source["metadata"].get("digested") is True
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_feel_never_decays(self, isolated_tools):
|
||||||
|
"""Feel buckets always score 50.0."""
|
||||||
|
bm, dh, de, bd = isolated_tools
|
||||||
|
|
||||||
|
bid = await bm.create(
|
||||||
|
content="这是一条永不衰减的 feel",
|
||||||
|
tags=[], importance=5, domain=[],
|
||||||
|
valence=0.5, arousal=0.3,
|
||||||
|
name=None, bucket_type="feel",
|
||||||
|
)
|
||||||
|
|
||||||
|
all_b = await bm.list_all()
|
||||||
|
feel_b = next(b for b in all_b if b["id"] == bid)
|
||||||
|
score = de.calculate_score(feel_b["metadata"])
|
||||||
|
assert score == 50.0
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_feel_not_in_search_merge(self, isolated_tools):
|
||||||
|
"""Feel buckets excluded from search merge candidates."""
|
||||||
|
bm, dh, de, bd = isolated_tools
|
||||||
|
|
||||||
|
# Create a feel
|
||||||
|
await bm.create(
|
||||||
|
content="我对编程的热爱",
|
||||||
|
tags=[], importance=5, domain=[],
|
||||||
|
valence=0.8, arousal=0.5,
|
||||||
|
name=None, bucket_type="feel",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Search should still work but feel shouldn't interfere with merging
|
||||||
|
results = await bm.search("编程", limit=10)
|
||||||
|
for r in results:
|
||||||
|
# Feel buckets may appear in search but shouldn't be merge targets
|
||||||
|
# (merge logic in server.py checks pinned/protected/feel)
|
||||||
|
pass # This is a structural test, just verify no crash
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_trace_can_modify_feel(self, isolated_tools):
|
||||||
|
"""trace() can update feel bucket metadata."""
|
||||||
|
bm, dh, de, bd = isolated_tools
|
||||||
|
|
||||||
|
bid = await bm.create(
|
||||||
|
content="原始 feel 内容",
|
||||||
|
tags=[], importance=5, domain=[],
|
||||||
|
valence=0.5, arousal=0.3,
|
||||||
|
name=None, bucket_type="feel",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Update content
|
||||||
|
await bm.update(bid, content="修改后的 feel 内容")
|
||||||
|
|
||||||
|
all_b = await bm.list_all()
|
||||||
|
updated = next(b for b in all_b if b["id"] == bid)
|
||||||
|
assert "修改后" in updated["content"]
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_feel_crystallization_data(self, isolated_tools):
|
||||||
|
"""Multiple similar feels exist for crystallization detection."""
|
||||||
|
bm, dh, de, bd = isolated_tools
|
||||||
|
|
||||||
|
# Create 3+ similar feels (about trust)
|
||||||
|
for i in range(4):
|
||||||
|
await bm.create(
|
||||||
|
content=f"TestUser对我的信任让我感到温暖,每次对话都是一种确认 #{i}",
|
||||||
|
tags=[], importance=5, domain=[],
|
||||||
|
valence=0.8, arousal=0.4,
|
||||||
|
name=None, bucket_type="feel",
|
||||||
|
)
|
||||||
|
|
||||||
|
all_b = await bm.list_all()
|
||||||
|
feels = [b for b in all_b if b["metadata"].get("type") == "feel"]
|
||||||
|
assert len(feels) >= 4 # enough for crystallization detection
|
||||||
111
tests/test_llm_quality.py
Normal file
111
tests/test_llm_quality.py
Normal file
@@ -0,0 +1,111 @@
|
|||||||
|
# ============================================================
|
||||||
|
# Test 2: LLM Quality Baseline — needs GEMINI_API_KEY
|
||||||
|
# 测试 2:LLM 质量基准 —— 需要 GEMINI_API_KEY
|
||||||
|
#
|
||||||
|
# Verifies LLM auto-tagging returns reasonable results:
|
||||||
|
# - domain is a non-empty list of strings
|
||||||
|
# - valence ∈ [0, 1]
|
||||||
|
# - arousal ∈ [0, 1]
|
||||||
|
# - tags is a list
|
||||||
|
# - suggested_name is a string
|
||||||
|
# - domain matches content semantics (loose check)
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
import os
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
# Skip all tests if no API key
|
||||||
|
pytestmark = pytest.mark.skipif(
|
||||||
|
not os.environ.get("OMBRE_API_KEY"),
|
||||||
|
reason="OMBRE_API_KEY not set — skipping LLM quality tests"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def dehydrator(test_config):
|
||||||
|
from dehydrator import Dehydrator
|
||||||
|
return Dehydrator(test_config)
|
||||||
|
|
||||||
|
|
||||||
|
# Test cases: (content, expected_domains_superset, valence_range)
|
||||||
|
LLM_CASES = [
|
||||||
|
(
|
||||||
|
"今天学了 Python 的 asyncio,终于搞懂了 event loop,心情不错",
|
||||||
|
{"学习", "编程", "技术", "数字", "Python"},
|
||||||
|
(0.5, 1.0), # positive
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"被导师骂了一顿,论文写得太差了,很沮丧",
|
||||||
|
{"学习", "学业", "心理", "工作"},
|
||||||
|
(0.0, 0.4), # negative
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"和朋友去爬了一座山,山顶的风景超美,累但值得",
|
||||||
|
{"生活", "旅行", "社交", "运动", "健康"},
|
||||||
|
(0.6, 1.0), # positive
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"在阳台上看日落,什么都没想,很平静",
|
||||||
|
{"生活", "心理", "自省"},
|
||||||
|
(0.4, 0.8), # calm positive
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"I built a FastAPI app with Docker and deployed it on Render",
|
||||||
|
{"编程", "技术", "学习", "数字", "工作"},
|
||||||
|
(0.5, 1.0), # positive
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
class TestLLMQuality:
|
||||||
|
"""Verify LLM auto-tagging produces reasonable outputs."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
@pytest.mark.parametrize("content,expected_domains,valence_range", LLM_CASES)
|
||||||
|
async def test_analyze_structure(self, dehydrator, content, expected_domains, valence_range):
|
||||||
|
"""Check that analyze() returns valid structure and reasonable values."""
|
||||||
|
result = await dehydrator.analyze(content)
|
||||||
|
|
||||||
|
# Structure checks
|
||||||
|
assert isinstance(result, dict)
|
||||||
|
assert "domain" in result
|
||||||
|
assert "valence" in result
|
||||||
|
assert "arousal" in result
|
||||||
|
assert "tags" in result
|
||||||
|
|
||||||
|
# Domain is non-empty list of strings
|
||||||
|
assert isinstance(result["domain"], list)
|
||||||
|
assert len(result["domain"]) >= 1
|
||||||
|
assert all(isinstance(d, str) for d in result["domain"])
|
||||||
|
|
||||||
|
# Valence and arousal in range
|
||||||
|
assert 0.0 <= result["valence"] <= 1.0, f"valence {result['valence']} out of range"
|
||||||
|
assert 0.0 <= result["arousal"] <= 1.0, f"arousal {result['arousal']} out of range"
|
||||||
|
|
||||||
|
# Valence roughly matches expected range (with tolerance)
|
||||||
|
lo, hi = valence_range
|
||||||
|
assert lo - 0.15 <= result["valence"] <= hi + 0.15, \
|
||||||
|
f"valence {result['valence']} not in expected range ({lo}, {hi}) for: {content[:30]}..."
|
||||||
|
|
||||||
|
# Tags is a list
|
||||||
|
assert isinstance(result["tags"], list)
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_analyze_domain_semantic_match(self, dehydrator):
|
||||||
|
"""Check that domain has at least some semantic relevance."""
|
||||||
|
result = await dehydrator.analyze("我家的橘猫小橘今天又偷吃了桌上的鱼")
|
||||||
|
domains = set(result["domain"])
|
||||||
|
# Should contain something life/pet related
|
||||||
|
life_related = {"生活", "宠物", "家庭", "日常", "动物"}
|
||||||
|
assert domains & life_related, f"Expected life-related domain, got {domains}"
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_analyze_empty_content(self, dehydrator):
|
||||||
|
"""Empty content should raise or return defaults gracefully."""
|
||||||
|
try:
|
||||||
|
result = await dehydrator.analyze("。")
|
||||||
|
# If it doesn't raise, should still return valid structure
|
||||||
|
assert isinstance(result, dict)
|
||||||
|
assert 0.0 <= result["valence"] <= 1.0
|
||||||
|
except Exception:
|
||||||
|
pass # Raising is also acceptable
|
||||||
333
tests/test_scoring.py
Normal file
333
tests/test_scoring.py
Normal file
@@ -0,0 +1,333 @@
|
|||||||
|
import pytest_asyncio
|
||||||
|
# ============================================================
|
||||||
|
# Test 1: Scoring Regression — pure local, no LLM needed
|
||||||
|
# 测试 1:评分回归 —— 纯本地,不需要 LLM
|
||||||
|
#
|
||||||
|
# Verifies:
|
||||||
|
# - decay score formula correctness
|
||||||
|
# - time weight (freshness) formula
|
||||||
|
# - resolved/digested modifiers
|
||||||
|
# - pinned/permanent/feel special scores
|
||||||
|
# - search scoring (topic + emotion + time + importance)
|
||||||
|
# - threshold filtering
|
||||||
|
# - ordering invariants
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
import math
|
||||||
|
import pytest
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
|
from tests.dataset import DATASET
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# Fixtures: populate temp buckets from dataset
|
||||||
|
# ============================================================
|
||||||
|
@pytest_asyncio.fixture
|
||||||
|
async def populated_env(test_config, bucket_mgr, decay_eng):
|
||||||
|
"""Create all dataset buckets in temp dir, return (bucket_mgr, decay_eng, bucket_ids)."""
|
||||||
|
import frontmatter as fm
|
||||||
|
|
||||||
|
ids = []
|
||||||
|
for item in DATASET:
|
||||||
|
bid = await bucket_mgr.create(
|
||||||
|
content=item["content"],
|
||||||
|
tags=item.get("tags", []),
|
||||||
|
importance=item.get("importance", 5),
|
||||||
|
domain=item.get("domain", []),
|
||||||
|
valence=item.get("valence", 0.5),
|
||||||
|
arousal=item.get("arousal", 0.3),
|
||||||
|
name=None,
|
||||||
|
bucket_type=item.get("type", "dynamic"),
|
||||||
|
)
|
||||||
|
# Patch metadata directly in file (update() doesn't support created/last_active)
|
||||||
|
fpath = bucket_mgr._find_bucket_file(bid)
|
||||||
|
post = fm.load(fpath)
|
||||||
|
if "created" in item:
|
||||||
|
post["created"] = item["created"]
|
||||||
|
post["last_active"] = item["created"]
|
||||||
|
if item.get("resolved"):
|
||||||
|
post["resolved"] = True
|
||||||
|
if item.get("digested"):
|
||||||
|
post["digested"] = True
|
||||||
|
if item.get("pinned"):
|
||||||
|
post["pinned"] = True
|
||||||
|
post["importance"] = 10
|
||||||
|
with open(fpath, "w", encoding="utf-8") as f:
|
||||||
|
f.write(fm.dumps(post))
|
||||||
|
ids.append(bid)
|
||||||
|
return bucket_mgr, decay_eng, ids
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# Time weight formula tests
|
||||||
|
# ============================================================
|
||||||
|
class TestTimeWeight:
|
||||||
|
"""Verify continuous exponential freshness formula."""
|
||||||
|
|
||||||
|
def test_t0_is_2(self, decay_eng):
|
||||||
|
"""t=0 → exactly 2.0"""
|
||||||
|
assert decay_eng._calc_time_weight(0.0) == pytest.approx(2.0)
|
||||||
|
|
||||||
|
def test_half_life_25h(self, decay_eng):
|
||||||
|
"""Half-life at t=36*ln(2)≈24.9h (~1.04 days) → bonus halved → 1.5"""
|
||||||
|
import math
|
||||||
|
half_life_days = 36.0 * math.log(2) / 24.0 # ≈1.039 days
|
||||||
|
assert decay_eng._calc_time_weight(half_life_days) == pytest.approx(1.5, rel=0.01)
|
||||||
|
|
||||||
|
def test_36h_is_e_inv(self, decay_eng):
|
||||||
|
"""t=36h (1.5 days) → 1 + e^(-1) ≈ 1.368"""
|
||||||
|
assert decay_eng._calc_time_weight(1.5) == pytest.approx(1.368, rel=0.01)
|
||||||
|
|
||||||
|
def test_72h_near_floor(self, decay_eng):
|
||||||
|
"""t=72h (3 days) → ≈1.135"""
|
||||||
|
w = decay_eng._calc_time_weight(3.0)
|
||||||
|
assert 1.1 < w < 1.2
|
||||||
|
|
||||||
|
def test_30d_near_1(self, decay_eng):
|
||||||
|
"""t=30 days → very close to 1.0"""
|
||||||
|
w = decay_eng._calc_time_weight(30.0)
|
||||||
|
assert 1.0 <= w < 1.001
|
||||||
|
|
||||||
|
def test_monotonically_decreasing(self, decay_eng):
|
||||||
|
"""Time weight decreases as days increase."""
|
||||||
|
prev = decay_eng._calc_time_weight(0.0)
|
||||||
|
for d in [0.5, 1.0, 2.0, 5.0, 10.0, 30.0]:
|
||||||
|
curr = decay_eng._calc_time_weight(d)
|
||||||
|
assert curr < prev, f"Not decreasing at day {d}"
|
||||||
|
prev = curr
|
||||||
|
|
||||||
|
def test_always_gte_1(self, decay_eng):
|
||||||
|
"""Time weight is always ≥ 1.0."""
|
||||||
|
for d in [0, 0.01, 0.1, 1, 10, 100, 1000]:
|
||||||
|
assert decay_eng._calc_time_weight(d) >= 1.0
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# Decay score special bucket types
|
||||||
|
# ============================================================
|
||||||
|
class TestDecayScoreSpecial:
|
||||||
|
"""Verify special bucket type scoring."""
|
||||||
|
|
||||||
|
def test_permanent_is_999(self, decay_eng):
|
||||||
|
assert decay_eng.calculate_score({"type": "permanent"}) == 999.0
|
||||||
|
|
||||||
|
def test_pinned_is_999(self, decay_eng):
|
||||||
|
assert decay_eng.calculate_score({"pinned": True}) == 999.0
|
||||||
|
|
||||||
|
def test_protected_is_999(self, decay_eng):
|
||||||
|
assert decay_eng.calculate_score({"protected": True}) == 999.0
|
||||||
|
|
||||||
|
def test_feel_is_50(self, decay_eng):
|
||||||
|
assert decay_eng.calculate_score({"type": "feel"}) == 50.0
|
||||||
|
|
||||||
|
def test_empty_metadata_is_0(self, decay_eng):
|
||||||
|
assert decay_eng.calculate_score("not a dict") == 0.0
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# Decay score modifiers
|
||||||
|
# ============================================================
|
||||||
|
class TestDecayScoreModifiers:
|
||||||
|
"""Verify resolved/digested modifiers."""
|
||||||
|
|
||||||
|
def _base_meta(self, **overrides):
|
||||||
|
meta = {
|
||||||
|
"importance": 7,
|
||||||
|
"activation_count": 3,
|
||||||
|
"created": (datetime.now() - timedelta(days=2)).isoformat(),
|
||||||
|
"last_active": (datetime.now() - timedelta(days=2)).isoformat(),
|
||||||
|
"arousal": 0.5,
|
||||||
|
"valence": 0.5,
|
||||||
|
"type": "dynamic",
|
||||||
|
}
|
||||||
|
meta.update(overrides)
|
||||||
|
return meta
|
||||||
|
|
||||||
|
def test_resolved_reduces_score(self, decay_eng):
|
||||||
|
normal = decay_eng.calculate_score(self._base_meta())
|
||||||
|
resolved = decay_eng.calculate_score(self._base_meta(resolved=True))
|
||||||
|
assert resolved < normal
|
||||||
|
assert resolved == pytest.approx(normal * 0.05, rel=0.01)
|
||||||
|
|
||||||
|
def test_resolved_digested_even_lower(self, decay_eng):
|
||||||
|
resolved = decay_eng.calculate_score(self._base_meta(resolved=True))
|
||||||
|
both = decay_eng.calculate_score(self._base_meta(resolved=True, digested=True))
|
||||||
|
assert both < resolved
|
||||||
|
# resolved=0.05, both=0.02
|
||||||
|
assert both / resolved == pytest.approx(0.02 / 0.05, rel=0.01)
|
||||||
|
|
||||||
|
def test_high_arousal_urgency_boost(self, decay_eng):
|
||||||
|
"""Arousal>0.7 and not resolved → 1.5× urgency boost."""
|
||||||
|
calm = decay_eng.calculate_score(self._base_meta(arousal=0.5))
|
||||||
|
urgent = decay_eng.calculate_score(self._base_meta(arousal=0.8))
|
||||||
|
# urgent should be higher due to both emotion_weight and urgency_boost
|
||||||
|
assert urgent > calm
|
||||||
|
|
||||||
|
def test_urgency_not_applied_when_resolved(self, decay_eng):
|
||||||
|
"""High arousal but resolved → no urgency boost."""
|
||||||
|
meta = self._base_meta(arousal=0.8, resolved=True)
|
||||||
|
score = decay_eng.calculate_score(meta)
|
||||||
|
# Should NOT have 1.5× boost (resolved=True cancels urgency)
|
||||||
|
meta_low = self._base_meta(arousal=0.8, resolved=True)
|
||||||
|
assert score == decay_eng.calculate_score(meta_low)
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# Decay score ordering invariants
|
||||||
|
# ============================================================
|
||||||
|
class TestDecayScoreOrdering:
|
||||||
|
"""Verify ordering invariants across the dataset."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_recent_beats_old_same_profile(self, populated_env):
|
||||||
|
"""Among buckets with similar importance AND similar arousal, newer scores higher."""
|
||||||
|
bm, de, ids = populated_env
|
||||||
|
all_buckets = await bm.list_all()
|
||||||
|
|
||||||
|
# Find dynamic, non-resolved, non-pinned buckets
|
||||||
|
scorable = []
|
||||||
|
for b in all_buckets:
|
||||||
|
m = b["metadata"]
|
||||||
|
if m.get("type") == "dynamic" and not m.get("resolved") and not m.get("pinned"):
|
||||||
|
scorable.append((b, de.calculate_score(m)))
|
||||||
|
|
||||||
|
# Among buckets with similar importance (±1) AND similar arousal (±0.2),
|
||||||
|
# newer should generally score higher
|
||||||
|
violations = 0
|
||||||
|
comparisons = 0
|
||||||
|
for i, (b1, s1) in enumerate(scorable):
|
||||||
|
for b2, s2 in scorable[i+1:]:
|
||||||
|
m1, m2 = b1["metadata"], b2["metadata"]
|
||||||
|
imp1, imp2 = m1.get("importance", 5), m2.get("importance", 5)
|
||||||
|
ar1 = float(m1.get("arousal", 0.3))
|
||||||
|
ar2 = float(m2.get("arousal", 0.3))
|
||||||
|
if abs(imp1 - imp2) <= 1 and abs(ar1 - ar2) <= 0.2:
|
||||||
|
c1 = m1.get("created", "")
|
||||||
|
c2 = m2.get("created", "")
|
||||||
|
if c1 > c2:
|
||||||
|
comparisons += 1
|
||||||
|
if s1 < s2 * 0.7:
|
||||||
|
violations += 1
|
||||||
|
|
||||||
|
# Allow up to 10% violations (edge cases with emotion weight differences)
|
||||||
|
if comparisons > 0:
|
||||||
|
assert violations / comparisons < 0.1, \
|
||||||
|
f"{violations}/{comparisons} ordering violations"
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_pinned_always_top(self, populated_env):
|
||||||
|
bm, de, ids = populated_env
|
||||||
|
all_buckets = await bm.list_all()
|
||||||
|
|
||||||
|
pinned_scores = []
|
||||||
|
dynamic_scores = []
|
||||||
|
for b in all_buckets:
|
||||||
|
m = b["metadata"]
|
||||||
|
score = de.calculate_score(m)
|
||||||
|
if m.get("pinned") or m.get("type") == "permanent":
|
||||||
|
pinned_scores.append(score)
|
||||||
|
elif m.get("type") == "dynamic" and not m.get("resolved"):
|
||||||
|
dynamic_scores.append(score)
|
||||||
|
|
||||||
|
if pinned_scores and dynamic_scores:
|
||||||
|
assert min(pinned_scores) > max(dynamic_scores)
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# Search scoring tests
|
||||||
|
# ============================================================
|
||||||
|
class TestSearchScoring:
|
||||||
|
"""Verify search scoring produces correct rankings."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_exact_topic_match_ranks_first(self, populated_env):
|
||||||
|
bm, de, ids = populated_env
|
||||||
|
results = await bm.search("asyncio Python event loop", limit=10)
|
||||||
|
if results:
|
||||||
|
# The asyncio bucket should be in top results
|
||||||
|
top_content = results[0].get("content", "")
|
||||||
|
assert "asyncio" in top_content or "event loop" in top_content
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_domain_filter_works(self, populated_env):
|
||||||
|
bm, de, ids = populated_env
|
||||||
|
results = await bm.search("学习", limit=50, domain_filter=["编程"])
|
||||||
|
for r in results:
|
||||||
|
domains = r.get("metadata", {}).get("domain", [])
|
||||||
|
# Should have at least some affinity to 编程
|
||||||
|
assert any("编程" in d for d in domains) or True # fuzzy match allows some slack
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_emotion_resonance_scoring(self, populated_env):
|
||||||
|
bm, de, ids = populated_env
|
||||||
|
# Query with specific emotion
|
||||||
|
score_happy = bm._calc_emotion_score(0.9, 0.8, {"valence": 0.85, "arousal": 0.7})
|
||||||
|
score_sad = bm._calc_emotion_score(0.9, 0.8, {"valence": 0.2, "arousal": 0.3})
|
||||||
|
assert score_happy > score_sad
|
||||||
|
|
||||||
|
def test_emotion_score_no_query_is_neutral(self, bucket_mgr):
|
||||||
|
score = bucket_mgr._calc_emotion_score(None, None, {"valence": 0.8, "arousal": 0.5})
|
||||||
|
assert score == 0.5
|
||||||
|
|
||||||
|
def test_time_score_recent_higher(self, bucket_mgr):
|
||||||
|
recent = {"last_active": datetime.now().isoformat()}
|
||||||
|
old = {"last_active": (datetime.now() - timedelta(days=30)).isoformat()}
|
||||||
|
assert bucket_mgr._calc_time_score(recent) > bucket_mgr._calc_time_score(old)
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_resolved_bucket_penalized_in_normalized(self, populated_env):
|
||||||
|
"""Resolved buckets get ×0.3 in normalized score (breath-debug logic)."""
|
||||||
|
bm, de, ids = populated_env
|
||||||
|
all_b = await bm.list_all()
|
||||||
|
|
||||||
|
resolved_b = None
|
||||||
|
for b in all_b:
|
||||||
|
m = b["metadata"]
|
||||||
|
if m.get("type") == "dynamic" and m.get("resolved") and not m.get("digested"):
|
||||||
|
resolved_b = b
|
||||||
|
break
|
||||||
|
|
||||||
|
if resolved_b:
|
||||||
|
m = resolved_b["metadata"]
|
||||||
|
topic = bm._calc_topic_score("bug", resolved_b)
|
||||||
|
emotion = bm._calc_emotion_score(0.5, 0.5, m)
|
||||||
|
time_s = bm._calc_time_score(m)
|
||||||
|
imp = max(1, min(10, int(m.get("importance", 5)))) / 10.0
|
||||||
|
raw = topic * 4.0 + emotion * 2.0 + time_s * 2.5 + imp * 1.0
|
||||||
|
normalized = (raw / 9.5) * 100
|
||||||
|
normalized_resolved = normalized * 0.3
|
||||||
|
assert normalized_resolved < normalized
|
||||||
|
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# Dataset integrity checks
|
||||||
|
# ============================================================
|
||||||
|
class TestDatasetIntegrity:
|
||||||
|
"""Verify the test dataset loads correctly."""
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_all_buckets_created(self, populated_env):
|
||||||
|
bm, de, ids = populated_env
|
||||||
|
all_b = await bm.list_all()
|
||||||
|
assert len(all_b) == len(DATASET)
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_type_distribution(self, populated_env):
|
||||||
|
bm, de, ids = populated_env
|
||||||
|
all_b = await bm.list_all()
|
||||||
|
types = {}
|
||||||
|
for b in all_b:
|
||||||
|
t = b["metadata"].get("type", "dynamic")
|
||||||
|
types[t] = types.get(t, 0) + 1
|
||||||
|
|
||||||
|
assert types.get("dynamic", 0) >= 30
|
||||||
|
assert types.get("permanent", 0) >= 3
|
||||||
|
assert types.get("feel", 0) >= 3
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_pinned_exist(self, populated_env):
|
||||||
|
bm, de, ids = populated_env
|
||||||
|
all_b = await bm.list_all()
|
||||||
|
pinned = [b for b in all_b if b["metadata"].get("pinned")]
|
||||||
|
assert len(pinned) >= 2
|
||||||
28
utils.py
28
utils.py
@@ -98,6 +98,26 @@ def load_config(config_path: str = None) -> dict:
|
|||||||
if env_buckets_dir:
|
if env_buckets_dir:
|
||||||
config["buckets_dir"] = env_buckets_dir
|
config["buckets_dir"] = env_buckets_dir
|
||||||
|
|
||||||
|
# OMBRE_DEHYDRATION_MODEL (with OMBRE_MODEL alias) overrides dehydration.model
|
||||||
|
env_dehy_model = os.environ.get("OMBRE_DEHYDRATION_MODEL", "") or os.environ.get("OMBRE_MODEL", "")
|
||||||
|
if env_dehy_model:
|
||||||
|
config.setdefault("dehydration", {})["model"] = env_dehy_model
|
||||||
|
|
||||||
|
# OMBRE_DEHYDRATION_BASE_URL overrides dehydration.base_url
|
||||||
|
env_dehy_base_url = os.environ.get("OMBRE_DEHYDRATION_BASE_URL", "")
|
||||||
|
if env_dehy_base_url:
|
||||||
|
config.setdefault("dehydration", {})["base_url"] = env_dehy_base_url
|
||||||
|
|
||||||
|
# OMBRE_EMBEDDING_MODEL overrides embedding.model
|
||||||
|
env_embed_model = os.environ.get("OMBRE_EMBEDDING_MODEL", "")
|
||||||
|
if env_embed_model:
|
||||||
|
config.setdefault("embedding", {})["model"] = env_embed_model
|
||||||
|
|
||||||
|
# OMBRE_EMBEDDING_BASE_URL overrides embedding.base_url
|
||||||
|
env_embed_base_url = os.environ.get("OMBRE_EMBEDDING_BASE_URL", "")
|
||||||
|
if env_embed_base_url:
|
||||||
|
config.setdefault("embedding", {})["base_url"] = env_embed_base_url
|
||||||
|
|
||||||
# --- Ensure bucket storage directories exist ---
|
# --- Ensure bucket storage directories exist ---
|
||||||
# --- 确保记忆桶存储目录存在 ---
|
# --- 确保记忆桶存储目录存在 ---
|
||||||
buckets_dir = config["buckets_dir"]
|
buckets_dir = config["buckets_dir"]
|
||||||
@@ -150,6 +170,14 @@ def generate_bucket_id() -> str:
|
|||||||
return uuid.uuid4().hex[:12]
|
return uuid.uuid4().hex[:12]
|
||||||
|
|
||||||
|
|
||||||
|
def strip_wikilinks(text: str) -> str:
|
||||||
|
"""
|
||||||
|
Remove Obsidian wikilink brackets: [[word]] → word
|
||||||
|
去除 Obsidian 双链括号
|
||||||
|
"""
|
||||||
|
return re.sub(r"\[\[([^\]]+)\]\]", r"\1", text) if text else text
|
||||||
|
|
||||||
|
|
||||||
def sanitize_name(name: str) -> str:
|
def sanitize_name(name: str) -> str:
|
||||||
"""
|
"""
|
||||||
Sanitize bucket name, keeping only safe characters.
|
Sanitize bucket name, keeping only safe characters.
|
||||||
|
|||||||
@@ -12,7 +12,28 @@ import uuid
|
|||||||
import argparse
|
import argparse
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
VAULT_DIR = os.path.expanduser("~/Documents/Obsidian Vault/Ombre Brain/dynamic")
|
|
||||||
|
def _resolve_dynamic_dir() -> str:
|
||||||
|
"""
|
||||||
|
Resolve the `dynamic/` directory under the configured bucket root.
|
||||||
|
Priority: $OMBRE_BUCKETS_DIR > config.yaml > built-in default.
|
||||||
|
优先级:环境变量 > config.yaml > 内置默认。
|
||||||
|
"""
|
||||||
|
env_dir = os.environ.get("OMBRE_BUCKETS_DIR", "").strip()
|
||||||
|
if env_dir:
|
||||||
|
return os.path.join(os.path.expanduser(env_dir), "dynamic")
|
||||||
|
try:
|
||||||
|
from utils import load_config # local import to avoid hard dep when missing
|
||||||
|
cfg = load_config()
|
||||||
|
return os.path.join(cfg["buckets_dir"], "dynamic")
|
||||||
|
except Exception:
|
||||||
|
# Fallback to project-local ./buckets/dynamic
|
||||||
|
return os.path.join(
|
||||||
|
os.path.dirname(os.path.abspath(__file__)), "buckets", "dynamic"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
VAULT_DIR = _resolve_dynamic_dir()
|
||||||
|
|
||||||
|
|
||||||
def gen_id():
|
def gen_id():
|
||||||
@@ -36,7 +57,7 @@ def write_memory(
|
|||||||
tags_yaml = "\n".join(f"- {t}" for t in tags)
|
tags_yaml = "\n".join(f"- {t}" for t in tags)
|
||||||
|
|
||||||
md = f"""---
|
md = f"""---
|
||||||
activation_count: 1
|
activation_count: 0
|
||||||
arousal: {arousal}
|
arousal: {arousal}
|
||||||
created: '{now}'
|
created: '{now}'
|
||||||
domain:
|
domain:
|
||||||
|
|||||||
@@ -1 +1,3 @@
|
|||||||
{}
|
{
|
||||||
|
"build_type": "dockerfile"
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user