init: first commit to Gitea mirror, update README with Docker quick start and new repo URL

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
P0lar1s
2026-04-15 15:44:25 +08:00
commit 0d695f71cb
27 changed files with 6049 additions and 0 deletions

View File

@@ -0,0 +1,70 @@
#!/usr/bin/env python3
# ============================================================
# SessionStart Hook: auto-breath on session start
# 对话开始钩子:自动浮现最高权重的未解决记忆
#
# On SessionStart, this script calls the Ombre Brain MCP server's
# breath tool (empty query = surfacing mode) via HTTP and prints
# the result to stdout so Claude sees it as session context.
#
# This works for OMBRE_TRANSPORT=streamable-http deployments.
# For local stdio deployments, the script falls back gracefully.
#
# Config:
# OMBRE_HOOK_URL — override the server URL (default: http://localhost:8000)
# OMBRE_HOOK_SKIP — set to "1" to disable the hook temporarily
# ============================================================
import json
import os
import sys
import urllib.request
import urllib.error
def main():
# Allow disabling the hook via env var
if os.environ.get("OMBRE_HOOK_SKIP") == "1":
sys.exit(0)
base_url = os.environ.get("OMBRE_HOOK_URL", "http://localhost:8000").rstrip("/")
# Build MCP call via HTTP POST to the streamable-http endpoint
# The breath tool with no query triggers surfacing mode.
payload = json.dumps({
"jsonrpc": "2.0",
"id": 1,
"method": "tools/call",
"params": {
"name": "breath",
"arguments": {"query": "", "max_results": 2}
}
}).encode("utf-8")
req = urllib.request.Request(
f"{base_url}/mcp",
data=payload,
headers={"Content-Type": "application/json"},
method="POST",
)
try:
with urllib.request.urlopen(req, timeout=8) as response:
raw = response.read().decode("utf-8")
data = json.loads(raw)
# Extract text from MCP tool result
result_content = data.get("result", {}).get("content", [])
text_parts = [c.get("text", "") for c in result_content if c.get("type") == "text"]
output = "\n".join(text_parts).strip()
if output and output != "权重池平静,没有需要处理的记忆。":
print(f"[Ombre Brain - 记忆浮现]\n{output}")
except (urllib.error.URLError, OSError):
# Server not available (local stdio mode or not running) — silent fail
pass
except Exception:
# Any other error — silent fail, never block session start
pass
sys.exit(0)
if __name__ == "__main__":
main()

30
.claude/settings.json Normal file
View File

@@ -0,0 +1,30 @@
{
"hooks": {
"SessionStart": [
{
"matcher": "startup",
"hooks": [
{
"type": "command",
"command": "python \"$CLAUDE_PROJECT_DIR/.claude/hooks/session_breath.py\"",
"shell": "powershell",
"timeout": 12,
"statusMessage": "Ombre Brain 正在浮现记忆..."
}
]
},
{
"matcher": "resume",
"hooks": [
{
"type": "command",
"command": "python \"$CLAUDE_PROJECT_DIR/.claude/hooks/session_breath.py\"",
"shell": "powershell",
"timeout": 12,
"statusMessage": "Ombre Brain 正在浮现记忆..."
}
]
}
]
}
}

13
.gitignore vendored Normal file
View File

@@ -0,0 +1,13 @@
__pycache__/
*.pyc
*.pyo
.env
.venv/
.claude/settings.local.json
.claude/hooks/__pycache__/
.vscode/
config.yaml
buckets/dynamic/
buckets/archive/
buckets/permanent/
scarp_paper

63
CLAUDE_PROMPT.md Normal file
View File

@@ -0,0 +1,63 @@
# Ombre Brain 记忆系统 —— Claude 端使用指南
你拥有了一个叫 Ombre Brain 的永久记忆系统。通过它你可以跨对话记住你想记住的任何事情和回忆你的,我的以及你与我的过往。
## 你有这些能力
| 能力 | 场景 |
|------|-----------|
| `breath` | **每次对话最开头**调用一次(不传参数)——就像睁眼看手机,看看有没有未解决的事浮上来。有明确话题时传 `query` 关键词检索 |
| `hold` | 你很想记住你自己当下的感受,或当下发生的单个事件时 |
| `grow` | 当**一天结束时**或**用户发来一大段日记/总结**时调用。你可以把其中**你想记住的事件**扔进去,它会自动拆分整理成多个记忆盒子存进你的大脑 |
| `trace` | 当你或用户认为"这个记错了"、"帮我改一下"时调用,手动修正记忆的元数据;**某件事解决了**时用 `resolved=1` 让它沉底;**需要删除**时用 `delete=True` |
| `pulse` | 用户想看看你都记得什么、或问记忆系统状态时调用 |
## 使用原则
### 主动调用
- **对话开头**:无参数调用 `breath()`,看看有没有高权重的未解决记忆需要跟进
- **提到过去**:用户说"上次"、"之前"、"还记得"时,用 `breath(query="关键词")` 检索
- **新信息**:用 `hold` 留住你想留下的感受或经历
- **日终整理**:用 `grow` 在**一天结束时**整理你的一天
### 无须调用
- 闲聊水话不需要存("哈哈"、"好的"、"嗯嗯"
- 已经记过的信息不要重复存
- 短期信息不存("帮我查个天气"
### 权重池机制
记忆系统是一个**权重池**,不是分类柜:
- 未解决 + 高情绪强度的桶 → 权重最高,`breath()` 时主动浮现
- 已解决的桶 → 权重骤降,沉底等待关键词激活
-`trace(bucket_id, resolved=1)` 标记某件事已解决,让它沉底
-`trace(bucket_id, resolved=0)` 重新激活一个沉底的记忆
### breath 的参数技巧
- 无参数:浮现模式,推送权重最高的未解决桶(每次对话开头用这个)
- `query`:用关键词而不是整句话,检索更准
- `domain`:如果明确知道话题领域可以传(如 "编程" 或 "恋爱"),缩小搜索范围
- `valence` + `arousal`:如果用户当前情绪明显,可以传情感坐标来触发情感共鸣检索
检索结果不足时,系统有概率从低权重旧桶里随机漂上来 1-3 条记忆,标注为 `[surface_type: random]`。这不是检索结果,是"忽然想起来"——你可以自然地提起它,也可以忽略。
### trace 的参数技巧
- `resolved=1`:标记已解决,桶权重骤降到 5%,基本沉底
- `resolved=0`:重新激活,让它重新参与浮现排序
- `delete=True`:彻底删除这个桶(不可恢复)
- 其余字段name/domain/valence/arousal/importance/tags只传需要改的-1 或空串表示不改
### hold vs grow
- 一句话的事 → `hold`"我喜欢吃饺子"
- 一大段的事 → `grow`"今天发生了一堆事..."
- **需要批量存多条记忆时,用 `grow` 把内容拼成一段发一次,不要多次调用 `hold`**token是稀缺资源——每次工具调用都会消耗token多次 hold 远比 1 次 grow 贵
### 省配额原则
- **一次 grow 胜过多次 hold**:要存多条记忆时,合成一段文本调用一次 grow
- **对话太长时建议用户换窗口**:同一窗口聊越久,每次工具调用的底价越高(因为要重新读完整对话历史)
- **工具返回值很短,无需复述**:收到 `新建→桶名 域名` 后直接跟用户说,无需展开解释
### 核心准则桶pinned
- `hold(content="...", pinned=True)` 创建钉选桶——不衰减、不合并、importance 锁定 10
- `trace(bucket_id, pinned=1)` 把已有桶钉选为核心准则
- `trace(bucket_id, pinned=0)` 取消钉选
- 适用场景:用户教会你的永久知识、核心原则、绝不能忘的事
- 钉选桶不会出现在「浮现未解决记忆」里,但关键词检索时始终可达

33
Dockerfile Normal file
View File

@@ -0,0 +1,33 @@
# ============================================================
# Ombre Brain Docker Build
# Docker 构建文件
#
# Build: docker build -t ombre-brain .
# Run: docker run -e OMBRE_API_KEY=your-key -p 8000:8000 ombre-brain
# ============================================================
FROM python:3.12-slim
WORKDIR /app
# Install dependencies first (leverage Docker cache)
# 先装依赖(利用 Docker 缓存)
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy project files / 复制项目文件
COPY *.py .
COPY config.example.yaml ./config.yaml
# Persistent mount point: bucket data
# 持久化挂载点:记忆数据
VOLUME ["/app/buckets"]
# Default to streamable-http for container (remote access)
# 容器场景默认用 streamable-http
ENV OMBRE_TRANSPORT=streamable-http
ENV OMBRE_BUCKETS_DIR=/app/buckets
EXPOSE 8000
CMD ["python", "server.py"]

21
LICENSE Normal file
View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2026 P0lar1zzZ
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

418
README.md Normal file
View File

@@ -0,0 +1,418 @@
# Ombre Brain
一个给提供给Claude 用的长期情绪记忆系统。基于 Russell 效价/唤醒度坐标打标Obsidian 做存储层MCP 接入,带遗忘曲线。
A long-term emotional memory system for Claude. Tags memories using Russell's valence/arousal coordinates, stores them as Obsidian-compatible Markdown, connects via MCP, and has a forgetting curve.
> **⚠️ 仓库临时迁移 / Repo temporarily moved**
> GitHub 访问受限期间,代码暂时托管在 Gitea
> **https://git.p0lar1s.uk/P0lar1s/Ombre_Brain**
> 下面的 `git clone` 地址请替换为上面这个。
---
## 快速开始 / Quick StartDocker推荐
> 这是最简单的方式,不需要装 Python不需要懂命令行跟着做就行。
**前置条件:** 电脑上装了 [Docker Desktop](https://www.docker.com/products/docker-desktop/),并且已经打开。
**第一步:拉取代码**
```bash
git clone https://git.p0lar1s.uk/P0lar1s/Ombre_Brain.git
cd Ombre_Brain
```
**第二步:创建 `.env` 文件**
在项目目录下新建一个叫 `.env` 的文件(注意有个点),内容填:
```
OMBRE_API_KEY=你的DeepSeek或其他API密钥
```
没有 API key 也能用,脱水压缩会降级到本地模式,只是效果差一点。那就写:
```
OMBRE_API_KEY=
```
**第三步:配置 `docker-compose.yml`(指向你的 Obsidian Vault**
用文本编辑器打开 `docker-compose.yml`,找到这一行:
```yaml
- ./buckets:/data
```
改成你的 Obsidian Vault 里 `Ombre Brain` 文件夹的路径,例如:
```yaml
- /Users/你的用户名/Documents/Obsidian Vault/Ombre Brain:/data
```
> 不知道路径?在 Obsidian 里右键那个文件夹 → 「在访达中显示」,然后把地址栏的路径复制过来。
> 不想挂载 Obsidian 也行,保持 `./buckets:/data` 不动,数据会存在项目目录的 `buckets/` 文件夹里。
**第四步:启动**
```bash
docker compose up -d
```
等它跑完,看到 `Started` 就好了。
**验证是否正常运行:**
```bash
docker logs ombre-brain
```
看到 `Uvicorn running on http://0.0.0.0:8000` 说明成功了。
---
**接入 Claude.ai远程访问**
需要额外配置 Cloudflare Tunnel把服务暴露到公网。参考下面「接入 Claude.ai (远程)」章节。
**接入 Claude Desktop本地**
不需要 Docker直接用 Python 本地跑。参考下面「安装 / Setup」章节。
---
[![Deploy to Render](https://render.com/images/deploy-to-render-button.svg)](https://render.com/deploy?repo=https://github.com/P0lar1zzZ/Ombre-Brain)
---
## 它是什么 / What is this
Claude 没有跨对话记忆。每次对话结束,之前聊过的所有东西都会消失。
Ombre Brain 给了它一套持久记忆——不是那种冷冰冰的键值存储,而是带情感坐标的、会自然衰减的、像人类记忆一样会遗忘和浮现的系统。
Claude has no cross-conversation memory. Everything from a previous chat vanishes once it ends.
Ombre Brain gives it persistent memory — not cold key-value storage, but a system with emotional coordinates, natural decay, and forgetting/surfacing mechanics that loosely mimic how human memory works.
核心特点 / Key features:
- **情感坐标打标 / Emotional tagging**: 每条记忆用 Russell 环形情感模型的 valence效价和 arousal唤醒度两个连续维度标记。不是"开心/难过"这种离散标签。
Each memory is tagged with two continuous dimensions from Russell's circumplex model: valence and arousal. Not discrete labels like "happy/sad".
- **自然遗忘 / Natural forgetting**: 改进版艾宾浩斯遗忘曲线。不活跃的记忆自动衰减归档,高情绪强度的记忆衰减更慢。
Modified Ebbinghaus forgetting curve. Inactive memories naturally decay and archive. High-arousal memories decay slower.
- **权重池浮现 / Weight pool surfacing**: 记忆不是被动检索的,它们会主动浮现——未解决的、情绪强烈的记忆权重更高,会在对话开头自动推送。
Memories aren't just passively retrieved — they actively surface. Unresolved, emotionally intense memories carry higher weight and get pushed at conversation start.
- **Obsidian 原生 / Obsidian-native**: 每个记忆桶就是一个 Markdown 文件YAML frontmatter 存元数据。可以直接在 Obsidian 里浏览、编辑、搜索。自动注入 `[[双链]]`
Each memory bucket is a Markdown file with YAML frontmatter. Browse, edit, and search directly in Obsidian. Wikilinks are auto-injected.
- **API 降级 / API degradation**: 脱水压缩和自动打标优先用廉价 LLM APIDeepSeek 等API 不可用时自动降级到本地关键词分析——始终可用。
Dehydration and auto-tagging prefer a cheap LLM API (DeepSeek etc.). When the API is unavailable, it degrades to local keyword analysis — always functional.
## 边界说明 / Design boundaries
官方记忆功能已经在做身份层的事了——你是谁你有什么偏好你们的关系是什么。那一层交给它Ombre Brain不打算造重复的轮子。
Ombre Brain 的边界是时间里发生的事,不是你是谁。它记住的是:你们聊过什么,经历了什么,哪些事情还悬在那里没有解决。两层配合用,才是完整的。
每次新对话Claude 从零开始——但它能从 Ombre Brain 里找回跟你有关的一切。不是重建,是接续。
---
Official memory already handles the identity layer — who you are, what you prefer, what your relationship is. That layer belongs there. Ombre Brain isn't trying to duplicate it.
Ombre Brain's boundary is *what happened in time*, not *who you are*. It holds conversations, experiences, unresolved things. The two layers together are what make it feel complete.
Each new conversation starts fresh — but Claude can reach back through Ombre Brain and find everything that happened between you. Not a rebuild. A continuation.
## 架构 / Architecture
```
Claude ←→ MCP Protocol ←→ server.py
┌───────────────┼───────────────┐
│ │ │
bucket_manager dehydrator decay_engine
(CRUD + 搜索) (压缩 + 打标) (遗忘曲线)
Obsidian Vault (Markdown files)
```
5 个 MCP 工具 / 5 MCP tools:
| 工具 Tool | 作用 Purpose |
|-----------|-------------|
| `breath` | 浮现或检索记忆。无参数=推送未解决记忆;有参数=关键词+情感检索 / Surface or search memories |
| `hold` | 存储单条记忆,自动打标+合并相似桶 / Store a single memory with auto-tagging |
| `grow` | 日记归档,自动拆分长内容为多个记忆桶 / Diary digest, auto-split into multiple buckets |
| `trace` | 修改元数据、标记已解决、删除 / Modify metadata, mark resolved, delete |
| `pulse` | 系统状态 + 所有记忆桶列表 / System status + bucket listing |
## 安装 / Setup
### 环境要求 / Requirements
- Python 3.11+
- 一个 Obsidian Vault可选不用也行会在项目目录下自建 `buckets/`
An Obsidian vault (optional — without one, it uses a local `buckets/` directory)
### 步骤 / Steps
```bash
git clone https://github.com/P0lar1zzZ/Ombre-Brain.git
cd Ombre-Brain
python -m venv .venv
source .venv/bin/activate # Windows: .venv\Scripts\activate
pip install -r requirements.txt
```
复制配置文件并按需修改 / Copy config and edit as needed:
```bash
cp config.example.yaml config.yaml
```
如果你要用 API 做脱水压缩和自动打标(推荐,效果好很多),设置环境变量:
If you want API-powered dehydration and tagging (recommended, much better quality):
```bash
export OMBRE_API_KEY="your-api-key"
```
支持任何 OpenAI 兼容 API。在 `config.yaml` 里改 `base_url``model` 就行。
Supports any OpenAI-compatible API. Just change `base_url` and `model` in `config.yaml`.
### 接入 Claude Desktop / Connect to Claude Desktop
在 Claude Desktop 配置文件中添加macOS: `~/Library/Application Support/Claude/claude_desktop_config.json`
Add to your Claude Desktop config:
```json
{
"mcpServers": {
"ombre-brain": {
"command": "python",
"args": ["/path/to/Ombre-Brain/server.py"],
"env": {
"OMBRE_API_KEY": "your-api-key"
}
}
}
}
```
### 接入 Claude.ai (远程) / Connect to Claude.ai (remote)
需要 HTTP 传输 + 隧道。可以用 Docker
Requires HTTP transport + tunnel. Docker setup:
```bash
echo "OMBRE_API_KEY=your-api-key" > .env
docker-compose up -d
```
`docker-compose.yml` 里配好了 Cloudflare Tunnel。你需要自己在 `~/.cloudflared/` 下放凭证和路由配置。
The `docker-compose.yml` includes Cloudflare Tunnel. You'll need your own credentials under `~/.cloudflared/`.
### 指向 Obsidian / Point to Obsidian
`config.yaml` 里设置 `buckets_dir`
Set `buckets_dir` in `config.yaml`:
```yaml
buckets_dir: "/path/to/your/Obsidian Vault/Ombre Brain"
```
不设的话,默认用项目目录下的 `buckets/`
If not set, defaults to `buckets/` in the project directory.
## 配置 / Configuration
所有参数在 `config.yaml`(从 `config.example.yaml` 复制)。关键的几个:
All parameters in `config.yaml` (copy from `config.example.yaml`). Key ones:
| 参数 Parameter | 说明 Description | 默认 Default |
|---|---|---|
| `transport` | `stdio`(本地)/ `streamable-http`(远程)| `stdio` |
| `buckets_dir` | 记忆桶存储路径 / Bucket storage path | `./buckets/` |
| `dehydration.model` | 脱水用的 LLM 模型 / LLM model for dehydration | `deepseek-chat` |
| `dehydration.base_url` | API 地址 / API endpoint | `https://api.deepseek.com/v1` |
| `decay.lambda` | 衰减速率,越大越快忘 / Decay rate | `0.05` |
| `decay.threshold` | 归档阈值 / Archive threshold | `0.3` |
| `merge_threshold` | 合并相似度阈值 (0-100) / Merge similarity | `75` |
敏感配置用环境变量:
Sensitive config via env vars:
- `OMBRE_API_KEY` — LLM API 密钥
- `OMBRE_TRANSPORT` — 覆盖传输方式
- `OMBRE_BUCKETS_DIR` — 覆盖存储路径
## 衰减公式 / Decay Formula
$$final\_score = time\_weight \times base\_score$$
$$base\_score = Importance \times activation\_count^{0.3} \times e^{-\lambda \times days} \times (base + arousal \times boost)$$
时间系数(乘数,优先级最高)/ Time weight (multiplier, highest priority):
| 距今天数 Days since active | 时间系数 Weight |
|---|---|
| 01 天 | 1.0 |
| 第 2 天 | 0.9 |
| 之后每天约降 10% | `max(0.3, 0.9 × e^{-0.2197 × (days-2)})` |
| 7 天后稳定 | ≈ 0.3(不归零)|
- `importance`: 1-10记忆重要性 / memory importance
- `activation_count`: 被检索的次数,越常被想起衰减越慢 / retrieval count; more recalls = slower decay
- `days`: 距上次激活的天数 / days since last activation
- `arousal`: 唤醒度,越强烈的记忆越难忘 / arousal; intense memories are harder to forget
- 已解决的记忆权重降到 5%,沉底等被关键词唤醒 / resolved memories drop to 5%, sink until keyword-triggered
- `pinned=true` 的桶不衰减、不合并、importance 锁定 10 / `pinned` buckets: never decay, never merge, importance locked at 10
## 给 Claude 的使用指南 / Usage Guide for Claude
`CLAUDE_PROMPT.md` 是写给 Claude 看的使用说明。放到你的 system prompt 或 custom instructions 里就行。
`CLAUDE_PROMPT.md` is the usage guide written for Claude. Put it in your system prompt or custom instructions.
## 工具脚本 / Utility Scripts
| 脚本 Script | 用途 Purpose |
|---|---|
| `write_memory.py` | 手动写入记忆,绕过 MCP / Manually write memories, bypass MCP |
| `migrate_to_domains.py` | 迁移平铺文件到域子目录 / Migrate flat files to domain subdirs |
| `reclassify_domains.py` | 基于关键词重分类 / Reclassify by keywords |
| `reclassify_api.py` | 用 API 重打标未分类桶 / Re-tag uncategorized buckets via API |
| `test_smoke.py` | 冒烟测试 / Smoke test |
## 部署 / Deploy
### Render
[![Deploy to Render](https://render.com/images/deploy-to-render-button.svg)](https://render.com/deploy?repo=https://github.com/P0lar1zzZ/Ombre-Brain)
> ⚠️ **免费层不可用**Render 免费层**不支持持久化磁盘**,服务重启后记忆数据会丢失,且会在无流量时休眠。**必须使用 Starter$7/mo或以上**才能正常使用。
> **Free tier won't work**: Render free tier has **no persistent disk** — all memory data is lost on restart. It also sleeps on inactivity. **Starter plan ($7/mo) or above is required.**
项目根目录已包含 `render.yaml`,点击按钮后:
1. (可选)设置 `OMBRE_API_KEY`:任何 OpenAI 兼容 API 的 key不填则自动降级为本地关键词提取
2. (可选)设置 `OMBRE_BASE_URL`API 地址,支持任意 OpenAI 化地址,如 `https://api.deepseek.com/v1` / `http://123.1.1.1:7689/v1` / `http://your-ollama:11434/v1`
3. Render 自动挂载持久化磁盘到 `/opt/render/project/src/buckets`
4. 部署后 MCP URL`https://<你的服务名>.onrender.com/mcp`
`render.yaml` is included. After clicking the button:
1. (Optional) `OMBRE_API_KEY`: any OpenAI-compatible key; omit to fall back to local keyword extraction
2. (Optional) `OMBRE_BASE_URL`: any OpenAI-compatible endpoint, e.g. `https://api.deepseek.com/v1`, `http://123.1.1.1:7689/v1`, `http://your-ollama:11434/v1`
3. Persistent disk auto-mounts at `/opt/render/project/src/buckets`
4. MCP URL after deploy: `https://<your-service>.onrender.com/mcp`
### Zeabur
> 💡 **Zeabur 的定价模式**Zeabur 是「买 VPS + 平台托管」,你先购买一台服务器(最低腾讯云新加坡 $2/mo、火山引擎 $3/moVolume 直接挂在该服务器上,**数据天然持久化,无丢失问题**。另需订阅 Zeabur 管理方案Developer $5/mo总计约 $7-8/mo 起。
> **Zeabur pricing model**: You buy a VPS first (cheapest: Tencent Cloud Singapore ~$2/mo, Volcano Engine ~$3/mo), then add Zeabur's Developer plan ($5/mo) for management. Volumes mount directly on your server — **data is always persistent, no cold-start data loss**. Total ~$7-8/mo minimum.
**步骤 / Steps**
1. **创建项目 / Create project**
- 打开 [zeabur.com](https://zeabur.com) → 购买一台服务器 → **New Project****Deploy from GitHub**
- 先 Fork 本仓库到自己 GitHub 账号,然后在 Zeabur 选择 `你的用户名/Ombre-Brain`
- Zeabur 会自动检测到根目录的 `Dockerfile` 并使用 Docker 方式构建
- Go to [zeabur.com](https://zeabur.com) → buy a server → **New Project****Deploy from GitHub**
- Fork this repo first, then select `your-username/Ombre-Brain` in Zeabur
- Zeabur auto-detects the `Dockerfile` in root and builds via Docker
2. **设置环境变量 / Set environment variables**(服务页面 → **Variables** 标签页)
- `OMBRE_API_KEY`(可选)— LLM API 密钥,不填则自动降级为本地关键词提取
- `OMBRE_BASE_URL`(可选)— API 地址,如 `https://api.deepseek.com/v1`
> ⚠️ **不需要**手动设置 `OMBRE_TRANSPORT` 和 `OMBRE_BUCKETS_DIR`Dockerfile 里已经设好了默认值。Zeabur 对单阶段 Dockerfile 会自动注入控制台设置的环境变量。
> You do **NOT** need to set `OMBRE_TRANSPORT` or `OMBRE_BUCKETS_DIR` — defaults are baked into the Dockerfile. Zeabur auto-injects dashboard env vars for single-stage Dockerfiles.
3. **挂载持久存储 / Mount persistent volume**(服务页面 → **Volumes** 标签页)
- Volume ID`ombre-buckets`(或任意名)
- 挂载路径 / Path**`/app/buckets`**
- ⚠️ 不挂载的话,每次重新部署记忆数据会丢失
- ⚠️ Without this, memory data is lost on every redeploy
4. **配置端口 / Configure port**(服务页面 → **Networking** 标签页)
- Port Name`web`(或任意名)
- Port**`8000`**
- Port Type**`HTTP`**
- 然后点 **Generate Domain** 生成一个 `xxx.zeabur.app` 域名
- Then click **Generate Domain** to get a `xxx.zeabur.app` domain
5. **验证 / Verify**
- 访问 `https://<你的域名>.zeabur.app/health`,应返回 JSON
- Visit `https://<your-domain>.zeabur.app/health` — should return JSON
- 最终 MCP 地址 / MCP URL`https://<你的域名>.zeabur.app/mcp`
**常见问题 / Troubleshooting**
| 现象 Symptom | 原因 Cause | 解决 Fix |
|---|---|---|
| 域名无法访问 / Domain unreachable | 没配端口 / Port not configured | Networking 标签页加 port 8000 (HTTP) |
| 域名无法访问 / Domain unreachable | `OMBRE_TRANSPORT` 未设置,服务以 stdio 模式启动,不监听任何端口 / Service started in stdio mode — no port is listened | **Variables 标签页确认设置 `OMBRE_TRANSPORT=streamable-http`,然后重新部署** |
| 构建失败 / Build failed | Dockerfile 未被识别 / Dockerfile not detected | 确认仓库根目录有 `Dockerfile`(大小写敏感) |
| 服务启动后立刻退出 | `OMBRE_TRANSPORT` 被覆盖为 `stdio` | 检查 Variables 里有没有多余的 `OMBRE_TRANSPORT=stdio`,删掉即可 |
| 重启后记忆丢失 / Data lost on restart | Volume 未挂载 | Volumes 标签页挂载到 `/app/buckets` |
### 使用 Cloudflare Tunnel 或 ngrok 连接 / Connecting via Cloudflare Tunnel or ngrok
> 自 v1.1 起server.py 在 HTTP 模式下已自动添加 CORS 中间件,无需额外配置。
> Since v1.1, server.py automatically enables CORS middleware in HTTP mode — no extra config needed.
使用隧道连接时,确保以下条件满足:
When connecting via tunnel, ensure:
1. **服务器必须运行在 HTTP 模式** / Server must use HTTP transport
```bash
OMBRE_TRANSPORT=streamable-http python server.py
```
或 Docker
```bash
docker-compose up -d
```
2. **在 Claude.ai 网页版添加 MCP 服务器** / Adding to Claude.ai web
- URL 格式 / URL format: `https://<tunnel-subdomain>.trycloudflare.com/mcp`
- 或 ngrok / or ngrok: `https://<xxxx>.ngrok-free.app/mcp`
- 先访问 `/health` 验证连接 / Verify first: `https://<your-tunnel>/health` should return `{"status":"ok",...}`
3. **已知限制 / Known limitations**
- Cloudflare Tunnel 免费版有空闲超时(约 10 分钟),系统内置保活 ping 可缓解但不能完全消除
- Free Cloudflare Tunnel has idle timeout (~10 min); built-in keepalive pings mitigate but can't fully prevent it
- ngrok 免费版有请求速率限制 / ngrok free tier has rate limits
- 如果连接仍失败,检查隧道是否正在运行、服务是否以 `streamable-http` 模式启动
- If connection still fails, verify the tunnel is running and the server started in `streamable-http` mode
| 现象 Symptom | 原因 Cause | 解决 Fix |
|---|---|---|
| 网页版无法连接隧道 URL / Web can't connect to tunnel URL | 服务以 stdio 模式运行 / Server in stdio mode | 设置 `OMBRE_TRANSPORT=streamable-http` 后重启 |
| 网页版无法连接隧道 URL / Web can't connect to tunnel URL | 旧版 server.py 缺少 CORS 头 / Missing CORS headers | 拉取最新代码CORS 已内置 / Pull latest — CORS is now built-in |
| `/health` 返回 200 但 MCP 连不上 / `/health` 200 but MCP fails | 路径错误 / Wrong path | MCP URL 末尾必须是 `/mcp` 而非 `/` |
| 隧道连接偶尔断开 / Tunnel disconnects intermittently | Cloudflare Tunnel 空闲超时 / Idle timeout | 保活 ping 已内置,若仍断开可缩短隧道超时配置 |
---
### Session Start Hook自动 breath
部署后,如果你使用 Claude Code可以在项目内激活自动浮现 hook
`.claude/settings.json` 已配置好 `SessionStart` hook每次新会话或恢复会话时自动触发 `breath`,把最高权重未解决记忆推入上下文。
**仅在远程 HTTP 模式下有效**`OMBRE_TRANSPORT=streamable-http`)。本地 stdio 模式下 hook 会安静退出,不影响正常使用。
可以通过 `OMBRE_HOOK_URL` 环境变量指定服务器地址(默认 `http://localhost:8000`),或者设置 `OMBRE_HOOK_SKIP=1` 临时禁用。
If using Claude Code, `.claude/settings.json` configures a `SessionStart` hook that auto-calls `breath` on each new or resumed session, surfacing your highest-weight unresolved memories as context. Only active in remote HTTP mode. Set `OMBRE_HOOK_SKIP=1` to disable temporarily.
## License
MIT

View File

@@ -0,0 +1,205 @@
# Ombre Brain
一个给 Claude 用的长期情绪记忆系统。基于 Russell 效价/唤醒度坐标打标Obsidian 做存储层MCP 接入,带遗忘曲线。
A long-term emotional memory system for Claude. Tags memories using Russell's valence/arousal coordinates, stores them as Obsidian-compatible Markdown, connects via MCP, and has a forgetting curve.
---
## 它是什么 / What is this
Claude 没有跨对话记忆。每次对话结束,之前聊过的所有东西都会消失。
Ombre Brain 给了它一套持久记忆——不是那种冷冰冰的键值存储,而是带情感坐标的、会自然衰减的、像人类记忆一样会遗忘和浮现的系统。
Claude has no cross-conversation memory. Everything from a previous chat vanishes once it ends.
Ombre Brain gives it persistent memory — not cold key-value storage, but a system with emotional coordinates, natural decay, and forgetting/surfacing mechanics that loosely mimic how human memory works.
核心特点 / Key features:
- **情感坐标打标 / Emotional tagging**: 每条记忆用 Russell 环形情感模型的 valence效价和 arousal唤醒度两个连续维度标记。不是"开心/难过"这种离散标签。
Each memory is tagged with two continuous dimensions from Russell's circumplex model: valence and arousal. Not discrete labels like "happy/sad".
- **自然遗忘 / Natural forgetting**: 改进版艾宾浩斯遗忘曲线。不活跃的记忆自动衰减归档,高情绪强度的记忆衰减更慢。
Modified Ebbinghaus forgetting curve. Inactive memories naturally decay and archive. High-arousal memories decay slower.
- **权重池浮现 / Weight pool surfacing**: 记忆不是被动检索的,它们会主动浮现——未解决的、情绪强烈的记忆权重更高,会在对话开头自动推送。
Memories aren't just passively retrieved — they actively surface. Unresolved, emotionally intense memories carry higher weight and get pushed at conversation start.
- **Obsidian 原生 / Obsidian-native**: 每个记忆桶就是一个 Markdown 文件YAML frontmatter 存元数据。可以直接在 Obsidian 里浏览、编辑、搜索。自动注入 `[[双链]]`
Each memory bucket is a Markdown file with YAML frontmatter. Browse, edit, and search directly in Obsidian. Wikilinks are auto-injected.
- **API 降级 / API degradation**: 脱水压缩和自动打标优先用廉价 LLM APIDeepSeek 等API 不可用时自动降级到本地关键词分析——始终可用。
Dehydration and auto-tagging prefer a cheap LLM API (DeepSeek etc.). When the API is unavailable, it degrades to local keyword analysis — always functional.
## 边界说明 / Design boundaries
官方记忆功能已经在做身份层的事了——你是谁你有什么偏好你们的关系是什么。那一层交给它Ombre Brain不打算造重复的轮子。
Ombre Brain 的边界是时间里发生的事,不是你是谁。它记住的是:你们聊过什么,经历了什么,哪些事情还悬在那里没有解决。两层配合用,才是完整的。
每次新对话Claude 从零开始——但它能从 Ombre Brain 里找回跟你有关的一切。不是重建,是接续。
---
Official memory already handles the identity layer — who you are, what you prefer, what your relationship is. That layer belongs there. Ombre Brain isn't trying to duplicate it.
Ombre Brain's boundary is *what happened in time*, not *who you are*. It holds conversations, experiences, unresolved things. The two layers together are what make it feel complete.
Each new conversation starts fresh — but Claude can reach back through Ombre Brain and find everything that happened between you. Not a rebuild. A continuation.
## 架构 / Architecture
```
Claude ←→ MCP Protocol ←→ server.py
┌───────────────┼───────────────┐
│ │ │
bucket_manager dehydrator decay_engine
(CRUD + 搜索) (压缩 + 打标) (遗忘曲线)
Obsidian Vault (Markdown files)
```
5 个 MCP 工具 / 5 MCP tools:
| 工具 Tool | 作用 Purpose |
|-----------|-------------|
| `breath` | 浮现或检索记忆。无参数=推送未解决记忆;有参数=关键词+情感检索 / Surface or search memories |
| `hold` | 存储单条记忆,自动打标+合并相似桶 / Store a single memory with auto-tagging |
| `grow` | 日记归档,自动拆分长内容为多个记忆桶 / Diary digest, auto-split into multiple buckets |
| `trace` | 修改元数据、标记已解决、删除 / Modify metadata, mark resolved, delete |
| `pulse` | 系统状态 + 所有记忆桶列表 / System status + bucket listing |
## 安装 / Setup
### 环境要求 / Requirements
- Python 3.11+
- 一个 Obsidian Vault可选不用也行会在项目目录下自建 `buckets/`
An Obsidian vault (optional — without one, it uses a local `buckets/` directory)
### 步骤 / Steps
```bash
git clone https://github.com/P0lar1zzZ/Ombre-Brain.git
cd Ombre-Brain
python -m venv .venv
source .venv/bin/activate # Windows: .venv\Scripts\activate
pip install -r requirements.txt
```
复制配置文件并按需修改 / Copy config and edit as needed:
```bash
cp config.example.yaml config.yaml
```
如果你要用 API 做脱水压缩和自动打标(推荐,效果好很多),设置环境变量:
If you want API-powered dehydration and tagging (recommended, much better quality):
```bash
export OMBRE_API_KEY="your-api-key"
```
支持任何 OpenAI 兼容 API。在 `config.yaml` 里改 `base_url``model` 就行。
Supports any OpenAI-compatible API. Just change `base_url` and `model` in `config.yaml`.
### 接入 Claude Desktop / Connect to Claude Desktop
在 Claude Desktop 配置文件中添加macOS: `~/Library/Application Support/Claude/claude_desktop_config.json`
Add to your Claude Desktop config:
```json
{
"mcpServers": {
"ombre-brain": {
"command": "python",
"args": ["/path/to/Ombre-Brain/server.py"],
"env": {
"OMBRE_API_KEY": "your-api-key"
}
}
}
}
```
### 接入 Claude.ai (远程) / Connect to Claude.ai (remote)
需要 HTTP 传输 + 隧道。可以用 Docker
Requires HTTP transport + tunnel. Docker setup:
```bash
echo "OMBRE_API_KEY=your-api-key" > .env
docker-compose up -d
```
`docker-compose.yml` 里配好了 Cloudflare Tunnel。你需要自己在 `~/.cloudflared/` 下放凭证和路由配置。
The `docker-compose.yml` includes Cloudflare Tunnel. You'll need your own credentials under `~/.cloudflared/`.
### 指向 Obsidian / Point to Obsidian
`config.yaml` 里设置 `buckets_dir`
Set `buckets_dir` in `config.yaml`:
```yaml
buckets_dir: "/path/to/your/Obsidian Vault/Ombre Brain"
```
不设的话,默认用项目目录下的 `buckets/`
If not set, defaults to `buckets/` in the project directory.
## 配置 / Configuration
所有参数在 `config.yaml`(从 `config.example.yaml` 复制)。关键的几个:
All parameters in `config.yaml` (copy from `config.example.yaml`). Key ones:
| 参数 Parameter | 说明 Description | 默认 Default |
|---|---|---|
| `transport` | `stdio`(本地)/ `streamable-http`(远程)| `stdio` |
| `buckets_dir` | 记忆桶存储路径 / Bucket storage path | `./buckets/` |
| `dehydration.model` | 脱水用的 LLM 模型 / LLM model for dehydration | `deepseek-chat` |
| `dehydration.base_url` | API 地址 / API endpoint | `https://api.deepseek.com/v1` |
| `decay.lambda` | 衰减速率,越大越快忘 / Decay rate | `0.05` |
| `decay.threshold` | 归档阈值 / Archive threshold | `0.3` |
| `merge_threshold` | 合并相似度阈值 (0-100) / Merge similarity | `75` |
敏感配置用环境变量:
Sensitive config via env vars:
- `OMBRE_API_KEY` — LLM API 密钥
- `OMBRE_TRANSPORT` — 覆盖传输方式
- `OMBRE_BUCKETS_DIR` — 覆盖存储路径
## 衰减公式 / Decay Formula
$$Score = Importance \times activation\_count^{0.3} \times e^{-\lambda \times days} \times (base + arousal \times boost)$$
- `importance`: 1-10记忆重要性 / memory importance
- `activation_count`: 被检索的次数,越常被想起衰减越慢 / retrieval count; more recalls = slower decay
- `days`: 距上次激活的天数 / days since last activation
- `arousal`: 唤醒度,越强烈的记忆越难忘 / arousal; intense memories are harder to forget
- 已解决的记忆权重降到 5%,沉底等被关键词唤醒 / resolved memories drop to 5%, sink until keyword-triggered
## 给 Claude 的使用指南 / Usage Guide for Claude
`CLAUDE_PROMPT.md` 是写给 Claude 看的使用说明。放到你的 system prompt 或 custom instructions 里就行。
`CLAUDE_PROMPT.md` is the usage guide written for Claude. Put it in your system prompt or custom instructions.
## 工具脚本 / Utility Scripts
| 脚本 Script | 用途 Purpose |
|---|---|
| `write_memory.py` | 手动写入记忆,绕过 MCP / Manually write memories, bypass MCP |
| `migrate_to_domains.py` | 迁移平铺文件到域子目录 / Migrate flat files to domain subdirs |
| `reclassify_domains.py` | 基于关键词重分类 / Reclassify by keywords |
| `reclassify_api.py` | 用 API 重打标未分类桶 / Re-tag uncategorized buckets via API |
| `test_smoke.py` | 冒烟测试 / Smoke test |
## License
MIT

View File

@@ -0,0 +1,755 @@
# ============================================================
# Module: Memory Bucket Manager (bucket_manager.py)
# 模块:记忆桶管理器
#
# CRUD operations, multi-dimensional index search, activation updates
# for memory buckets.
# 记忆桶的增删改查、多维索引搜索、激活更新。
#
# Core design:
# 核心逻辑:
# - Each bucket = one Markdown file (YAML frontmatter + body)
# 每个记忆桶 = 一个 Markdown 文件
# - Storage by type: permanent / dynamic / archive
# 存储按类型分目录
# - Multi-dimensional soft index: domain + valence/arousal + fuzzy text
# 多维软索引:主题域 + 情感坐标 + 文本模糊匹配
# - Search strategy: domain pre-filter → weighted multi-dim ranking
# 搜索策略:主题域预筛 → 多维加权精排
# - Emotion coordinates based on Russell circumplex model:
# 情感坐标基于环形情感模型Russell circumplex
# valence (0~1): 0=negative → 1=positive
# arousal (0~1): 0=calm → 1=excited
#
# Depended on by: server.py, decay_engine.py
# 被谁依赖server.py, decay_engine.py
# ============================================================
import os
import math
import logging
import re
import shutil
from collections import Counter
from datetime import datetime
from pathlib import Path
from typing import Optional
import frontmatter
import jieba
from rapidfuzz import fuzz
from utils import generate_bucket_id, sanitize_name, safe_path, now_iso
logger = logging.getLogger("ombre_brain.bucket")
class BucketManager:
"""
Memory bucket manager — entry point for all bucket CRUD operations.
Buckets are stored as Markdown files with YAML frontmatter for metadata
and body for content. Natively compatible with Obsidian browsing/editing.
记忆桶管理器 —— 所有桶的 CRUD 操作入口。
桶以 Markdown 文件存储YAML frontmatter 存元数据,正文存内容。
天然兼容 Obsidian 直接浏览和编辑。
"""
def __init__(self, config: dict):
# --- Read storage paths from config / 从配置中读取存储路径 ---
self.base_dir = config["buckets_dir"]
self.permanent_dir = os.path.join(self.base_dir, "permanent")
self.dynamic_dir = os.path.join(self.base_dir, "dynamic")
self.archive_dir = os.path.join(self.base_dir, "archive")
self.fuzzy_threshold = config.get("matching", {}).get("fuzzy_threshold", 50)
self.max_results = config.get("matching", {}).get("max_results", 5)
# --- Wikilink config / 双链配置 ---
wikilink_cfg = config.get("wikilink", {})
self.wikilink_enabled = wikilink_cfg.get("enabled", True)
self.wikilink_use_tags = wikilink_cfg.get("use_tags", False)
self.wikilink_use_domain = wikilink_cfg.get("use_domain", True)
self.wikilink_use_auto_keywords = wikilink_cfg.get("use_auto_keywords", True)
self.wikilink_auto_top_k = wikilink_cfg.get("auto_top_k", 8)
self.wikilink_min_len = wikilink_cfg.get("min_keyword_len", 2)
self.wikilink_exclude_keywords = set(wikilink_cfg.get("exclude_keywords", []))
self.wikilink_stopwords = {
"", "", "", "", "", "", "", "", "", "",
"", "一个", "", "", "", "", "", "", "",
"", "", "", "没有", "", "", "自己", "", "", "",
"我们", "你们", "他们", "然后", "今天", "昨天", "明天", "一下",
"the", "and", "for", "are", "but", "not", "you", "all", "can",
"had", "her", "was", "one", "our", "out", "has", "have", "with",
"this", "that", "from", "they", "been", "said", "will", "each",
}
self.wikilink_stopwords |= {w.lower() for w in self.wikilink_exclude_keywords}
# --- Search scoring weights / 检索权重配置 ---
scoring = config.get("scoring_weights", {})
self.w_topic = scoring.get("topic_relevance", 4.0)
self.w_emotion = scoring.get("emotion_resonance", 2.0)
self.w_time = scoring.get("time_proximity", 1.5)
self.w_importance = scoring.get("importance", 1.0)
# ---------------------------------------------------------
# Create a new bucket
# 创建新桶
# Write content and metadata into a .md file
# 将内容和元数据写入一个 .md 文件
# ---------------------------------------------------------
async def create(
self,
content: str,
tags: list[str] = None,
importance: int = 5,
domain: list[str] = None,
valence: float = 0.5,
arousal: float = 0.3,
bucket_type: str = "dynamic",
name: str = None,
) -> str:
"""
Create a new memory bucket, return bucket ID.
创建一个新的记忆桶,返回桶 ID。
"""
bucket_id = generate_bucket_id()
bucket_name = sanitize_name(name) if name else bucket_id
domain = domain or ["未分类"]
tags = tags or []
linked_content = self._apply_wikilinks(content, tags, domain, bucket_name)
# --- Build YAML frontmatter metadata / 构建元数据 ---
metadata = {
"id": bucket_id,
"name": bucket_name,
"tags": tags,
"domain": domain,
"valence": max(0.0, min(1.0, valence)),
"arousal": max(0.0, min(1.0, arousal)),
"importance": max(1, min(10, importance)),
"type": bucket_type,
"created": now_iso(),
"last_active": now_iso(),
"activation_count": 1,
}
# --- Assemble Markdown file (frontmatter + body) ---
# --- 组装 Markdown 文件 ---
post = frontmatter.Post(linked_content, **metadata)
# --- Choose directory by type + primary domain ---
# --- 按类型 + 主题域选择存储目录 ---
type_dir = self.permanent_dir if bucket_type == "permanent" else self.dynamic_dir
primary_domain = sanitize_name(domain[0]) if domain else "未分类"
target_dir = os.path.join(type_dir, primary_domain)
os.makedirs(target_dir, exist_ok=True)
# --- Filename: readable_name_bucketID.md (Obsidian friendly) ---
# --- 文件名可读名称_桶ID.md ---
if bucket_name and bucket_name != bucket_id:
filename = f"{bucket_name}_{bucket_id}.md"
else:
filename = f"{bucket_id}.md"
file_path = safe_path(target_dir, filename)
try:
with open(file_path, "w", encoding="utf-8") as f:
f.write(frontmatter.dumps(post))
except OSError as e:
logger.error(f"Failed to write bucket file / 写入桶文件失败: {file_path}: {e}")
raise
logger.info(
f"Created bucket / 创建记忆桶: {bucket_id} ({bucket_name}) → {primary_domain}/"
)
return bucket_id
# ---------------------------------------------------------
# Read bucket content
# 读取桶内容
# Returns {"id", "metadata", "content", "path"} or None
# ---------------------------------------------------------
async def get(self, bucket_id: str) -> Optional[dict]:
"""
Read a single bucket by ID.
根据 ID 读取单个桶。
"""
if not bucket_id or not isinstance(bucket_id, str):
return None
file_path = self._find_bucket_file(bucket_id)
if not file_path:
return None
return self._load_bucket(file_path)
# ---------------------------------------------------------
# Update bucket
# 更新桶
# Supports: content, tags, importance, valence, arousal, name, resolved
# ---------------------------------------------------------
async def update(self, bucket_id: str, **kwargs) -> bool:
"""
Update bucket content or metadata fields.
更新桶的内容或元数据字段。
"""
file_path = self._find_bucket_file(bucket_id)
if not file_path:
return False
try:
post = frontmatter.load(file_path)
except Exception as e:
logger.warning(f"Failed to load bucket for update / 加载桶失败: {file_path}: {e}")
return False
# --- Update only fields that were passed in / 只改传入的字段 ---
if "content" in kwargs:
next_tags = kwargs.get("tags", post.get("tags", []))
next_domain = kwargs.get("domain", post.get("domain", []))
next_name = kwargs.get("name", post.get("name", ""))
post.content = self._apply_wikilinks(
kwargs["content"],
next_tags,
next_domain,
next_name,
)
if "tags" in kwargs:
post["tags"] = kwargs["tags"]
if "importance" in kwargs:
post["importance"] = max(1, min(10, int(kwargs["importance"])))
if "domain" in kwargs:
post["domain"] = kwargs["domain"]
if "valence" in kwargs:
post["valence"] = max(0.0, min(1.0, float(kwargs["valence"])))
if "arousal" in kwargs:
post["arousal"] = max(0.0, min(1.0, float(kwargs["arousal"])))
if "name" in kwargs:
post["name"] = sanitize_name(kwargs["name"])
if "resolved" in kwargs:
post["resolved"] = bool(kwargs["resolved"])
# --- Auto-refresh activation time / 自动刷新激活时间 ---
post["last_active"] = now_iso()
try:
with open(file_path, "w", encoding="utf-8") as f:
f.write(frontmatter.dumps(post))
except OSError as e:
logger.error(f"Failed to write bucket update / 写入桶更新失败: {file_path}: {e}")
return False
logger.info(f"Updated bucket / 更新记忆桶: {bucket_id}")
return True
# ---------------------------------------------------------
# Wikilink injection
# 自动添加 Obsidian 双链
# ---------------------------------------------------------
def _apply_wikilinks(
self,
content: str,
tags: list[str],
domain: list[str],
name: str,
) -> str:
"""
Auto-inject Obsidian wikilinks, avoiding double-wrapping existing [[...]].
自动添加 Obsidian 双链,避免重复包裹已有 [[...]]。
"""
if not self.wikilink_enabled or not content:
return content
keywords = self._collect_wikilink_keywords(content, tags, domain, name)
if not keywords:
return content
# Split on existing wikilinks to avoid wrapping them again
# 按已有双链切分,避免重复包裹
segments = re.split(r"(\[\[[^\]]+\]\])", content)
pattern = re.compile("|".join(re.escape(kw) for kw in keywords))
for i, segment in enumerate(segments):
if segment.startswith("[[") and segment.endswith("]]"):
continue
updated = pattern.sub(lambda m: f"[[{m.group(0)}]]", segment)
segments[i] = updated
return "".join(segments)
def _collect_wikilink_keywords(
self,
content: str,
tags: list[str],
domain: list[str],
name: str,
) -> list[str]:
"""
Collect candidate keywords from tags/domain/auto-extraction.
汇总候选关键词:可选 tags/domain + 自动提词。
"""
candidates = []
if self.wikilink_use_tags:
candidates.extend(tags or [])
if self.wikilink_use_domain:
candidates.extend(domain or [])
if name:
candidates.append(name)
if self.wikilink_use_auto_keywords:
candidates.extend(self._extract_auto_keywords(content))
return self._normalize_keywords(candidates)
def _normalize_keywords(self, keywords: list[str]) -> list[str]:
"""
Deduplicate and sort by length (longer first to avoid short words
breaking long ones during replacement).
去重并按长度排序,优先替换长词。
"""
if not keywords:
return []
seen = set()
cleaned = []
for keyword in keywords:
if not isinstance(keyword, str):
continue
kw = keyword.strip()
if len(kw) < self.wikilink_min_len:
continue
if kw in self.wikilink_exclude_keywords:
continue
if kw.lower() in self.wikilink_stopwords:
continue
if kw in seen:
continue
seen.add(kw)
cleaned.append(kw)
return sorted(cleaned, key=len, reverse=True)
def _extract_auto_keywords(self, content: str) -> list[str]:
"""
Auto-extract keywords from body text, prioritizing high-frequency words.
从正文自动提词,优先高频词。
"""
if not content:
return []
try:
zh_words = [w.strip() for w in jieba.lcut(content) if w.strip()]
except Exception:
zh_words = []
en_words = re.findall(r"[A-Za-z][A-Za-z0-9_-]{2,20}", content)
# Chinese bigrams / 中文双词组合
zh_bigrams = []
for i in range(len(zh_words) - 1):
left = zh_words[i]
right = zh_words[i + 1]
if len(left) < self.wikilink_min_len or len(right) < self.wikilink_min_len:
continue
if not re.fullmatch(r"[\u4e00-\u9fff]+", left + right):
continue
if len(left + right) > 8:
continue
zh_bigrams.append(left + right)
merged = []
for word in zh_words + zh_bigrams + en_words:
if len(word) < self.wikilink_min_len:
continue
if re.fullmatch(r"\d+", word):
continue
if word.lower() in self.wikilink_stopwords:
continue
merged.append(word)
if not merged:
return []
counter = Counter(merged)
return [w for w, _ in counter.most_common(self.wikilink_auto_top_k)]
# ---------------------------------------------------------
# Delete bucket
# 删除桶
# ---------------------------------------------------------
async def delete(self, bucket_id: str) -> bool:
"""
Delete a memory bucket file.
删除指定的记忆桶文件。
"""
file_path = self._find_bucket_file(bucket_id)
if not file_path:
return False
try:
os.remove(file_path)
except OSError as e:
logger.error(f"Failed to delete bucket file / 删除桶文件失败: {file_path}: {e}")
return False
logger.info(f"Deleted bucket / 删除记忆桶: {bucket_id}")
return True
# ---------------------------------------------------------
# Touch bucket (refresh activation time + increment count)
# 触碰桶(刷新激活时间 + 累加激活次数)
# Called on every recall hit; affects decay score.
# 每次检索命中时调用,影响衰减得分。
# ---------------------------------------------------------
async def touch(self, bucket_id: str) -> None:
"""
Update a bucket's last activation time and count.
更新桶的最后激活时间和激活次数。
"""
file_path = self._find_bucket_file(bucket_id)
if not file_path:
return
try:
post = frontmatter.load(file_path)
post["last_active"] = now_iso()
post["activation_count"] = post.get("activation_count", 0) + 1
with open(file_path, "w", encoding="utf-8") as f:
f.write(frontmatter.dumps(post))
except Exception as e:
logger.warning(f"Failed to touch bucket / 触碰桶失败: {bucket_id}: {e}")
# ---------------------------------------------------------
# Multi-dimensional search (core feature)
# 多维搜索(核心功能)
#
# Strategy: domain pre-filter → weighted multi-dim ranking
# 策略:主题域预筛 → 多维加权精排
#
# Ranking formula:
# total = topic(×w_topic) + emotion(×w_emotion)
# + time(×w_time) + importance(×w_importance)
#
# Per-dimension scores (normalized to 0~1):
# topic = rapidfuzz weighted match (name/tags/domain/body)
# emotion = 1 - Euclidean distance (query v/a vs bucket v/a)
# time = e^(-0.02 × days) (recent memories first)
# importance = importance / 10
# ---------------------------------------------------------
async def search(
self,
query: str,
limit: int = None,
domain_filter: list[str] = None,
query_valence: float = None,
query_arousal: float = None,
) -> list[dict]:
"""
Multi-dimensional indexed search for memory buckets.
多维索引搜索记忆桶。
domain_filter: pre-filter by domain (None = search all)
query_valence/arousal: emotion coordinates for resonance scoring
"""
if not query or not query.strip():
return []
limit = limit or self.max_results
all_buckets = await self.list_all(include_archive=False)
if not all_buckets:
return []
# --- Layer 1: domain pre-filter (fast scope reduction) ---
# --- 第一层:主题域预筛(快速缩小范围)---
if domain_filter:
filter_set = {d.lower() for d in domain_filter}
candidates = [
b for b in all_buckets
if {d.lower() for d in b["metadata"].get("domain", [])} & filter_set
]
# Fall back to full search if pre-filter yields nothing
# 预筛为空则回退全量搜索
if not candidates:
candidates = all_buckets
else:
candidates = all_buckets
# --- Layer 2: weighted multi-dim ranking ---
# --- 第二层:多维加权精排 ---
scored = []
for bucket in candidates:
meta = bucket.get("metadata", {})
try:
# Dim 1: topic relevance (fuzzy text, 0~1)
topic_score = self._calc_topic_score(query, bucket)
# Dim 2: emotion resonance (coordinate distance, 0~1)
emotion_score = self._calc_emotion_score(
query_valence, query_arousal, meta
)
# Dim 3: time proximity (exponential decay, 0~1)
time_score = self._calc_time_score(meta)
# Dim 4: importance (direct normalization)
importance_score = max(1, min(10, int(meta.get("importance", 5)))) / 10.0
# --- Weighted sum / 加权求和 ---
total = (
topic_score * self.w_topic
+ emotion_score * self.w_emotion
+ time_score * self.w_time
+ importance_score * self.w_importance
)
# Normalize to 0~100 for readability
weight_sum = self.w_topic + self.w_emotion + self.w_time + self.w_importance
normalized = (total / weight_sum) * 100 if weight_sum > 0 else 0
# Resolved buckets get ranking penalty (but still reachable by keyword)
# 已解决的桶降权排序(但仍可被关键词激活)
if meta.get("resolved", False):
normalized *= 0.3
if normalized >= self.fuzzy_threshold:
bucket["score"] = round(normalized, 2)
scored.append(bucket)
except Exception as e:
logger.warning(
f"Scoring failed for bucket {bucket.get('id', '?')} / "
f"桶评分失败: {e}"
)
continue
scored.sort(key=lambda x: x["score"], reverse=True)
return scored[:limit]
# ---------------------------------------------------------
# Topic relevance sub-score:
# name(×3) + domain(×2.5) + tags(×2) + body(×1)
# 文本相关性子分:桶名(×3) + 主题域(×2.5) + 标签(×2) + 正文(×1)
# ---------------------------------------------------------
def _calc_topic_score(self, query: str, bucket: dict) -> float:
"""
Calculate text dimension relevance score (0~1).
计算文本维度的相关性得分。
"""
meta = bucket.get("metadata", {})
name_score = fuzz.partial_ratio(query, meta.get("name", "")) * 3
domain_score = (
max(
(fuzz.partial_ratio(query, d) for d in meta.get("domain", [])),
default=0,
)
* 2.5
)
tag_score = (
max(
(fuzz.partial_ratio(query, tag) for tag in meta.get("tags", [])),
default=0,
)
* 2
)
content_score = fuzz.partial_ratio(query, bucket.get("content", "")[:500]) * 1
return (name_score + domain_score + tag_score + content_score) / (100 * 8.5)
# ---------------------------------------------------------
# Emotion resonance sub-score:
# Based on Russell circumplex Euclidean distance
# 情感共鸣子分:基于环形情感模型的欧氏距离
# No emotion in query → neutral 0.5 (doesn't affect ranking)
# ---------------------------------------------------------
def _calc_emotion_score(
self, q_valence: float, q_arousal: float, meta: dict
) -> float:
"""
Calculate emotion resonance score (0~1, closer = higher).
计算情感共鸣度0~1越近越高
"""
if q_valence is None or q_arousal is None:
return 0.5 # No emotion coordinates → neutral / 无情感坐标时给中性分
try:
b_valence = float(meta.get("valence", 0.5))
b_arousal = float(meta.get("arousal", 0.3))
except (ValueError, TypeError):
return 0.5
# Euclidean distance, max sqrt(2) ≈ 1.414
dist = math.sqrt((q_valence - b_valence) ** 2 + (q_arousal - b_arousal) ** 2)
return max(0.0, 1.0 - dist / 1.414)
# ---------------------------------------------------------
# Time proximity sub-score:
# More recent activation → higher score
# 时间亲近子分:距上次激活越近分越高
# ---------------------------------------------------------
def _calc_time_score(self, meta: dict) -> float:
"""
Calculate time proximity score (0~1, more recent = higher).
计算时间亲近度。
"""
last_active_str = meta.get("last_active", meta.get("created", ""))
try:
last_active = datetime.fromisoformat(str(last_active_str))
days = max(0.0, (datetime.now() - last_active).total_seconds() / 86400)
except (ValueError, TypeError):
days = 30
return math.exp(-0.02 * days)
# ---------------------------------------------------------
# List all buckets
# 列出所有桶
# ---------------------------------------------------------
async def list_all(self, include_archive: bool = False) -> list[dict]:
"""
Recursively walk directories (including domain subdirs), list all buckets.
递归遍历目录(含域子目录),列出所有记忆桶。
"""
buckets = []
dirs = [self.permanent_dir, self.dynamic_dir]
if include_archive:
dirs.append(self.archive_dir)
for dir_path in dirs:
if not os.path.exists(dir_path):
continue
for root, _, files in os.walk(dir_path):
for filename in files:
if not filename.endswith(".md"):
continue
file_path = os.path.join(root, filename)
bucket = self._load_bucket(file_path)
if bucket:
buckets.append(bucket)
return buckets
# ---------------------------------------------------------
# Statistics (counts per category + total size)
# 统计信息(各分类桶数量 + 总体积)
# ---------------------------------------------------------
async def get_stats(self) -> dict:
"""
Return memory bucket statistics (including domain subdirs).
返回记忆桶的统计数据。
"""
stats = {
"permanent_count": 0,
"dynamic_count": 0,
"archive_count": 0,
"total_size_kb": 0.0,
"domains": {},
}
for subdir, key in [
(self.permanent_dir, "permanent_count"),
(self.dynamic_dir, "dynamic_count"),
(self.archive_dir, "archive_count"),
]:
if not os.path.exists(subdir):
continue
for root, _, files in os.walk(subdir):
for f in files:
if f.endswith(".md"):
stats[key] += 1
fpath = os.path.join(root, f)
try:
stats["total_size_kb"] += os.path.getsize(fpath) / 1024
except OSError:
pass
# Per-domain counts / 每个域的桶数量
domain_name = os.path.basename(root)
if domain_name != os.path.basename(subdir):
stats["domains"][domain_name] = stats["domains"].get(domain_name, 0) + 1
return stats
# ---------------------------------------------------------
# Archive bucket (move from permanent/dynamic into archive)
# 归档桶(从 permanent/dynamic 移入 archive
# Called by decay engine to simulate "forgetting"
# 由衰减引擎调用,模拟"遗忘"
# ---------------------------------------------------------
async def archive(self, bucket_id: str) -> bool:
"""
Move a bucket into the archive directory (preserving domain subdirs).
将指定桶移入归档目录(保留域子目录结构)。
"""
file_path = self._find_bucket_file(bucket_id)
if not file_path:
return False
try:
# Read once, get domain info and update type / 一次性读取
post = frontmatter.load(file_path)
domain = post.get("domain", ["未分类"])
primary_domain = sanitize_name(domain[0]) if domain else "未分类"
archive_subdir = os.path.join(self.archive_dir, primary_domain)
os.makedirs(archive_subdir, exist_ok=True)
dest = safe_path(archive_subdir, os.path.basename(file_path))
# Update type marker then move file / 更新类型标记后移动文件
post["type"] = "archived"
with open(file_path, "w", encoding="utf-8") as f:
f.write(frontmatter.dumps(post))
# Use shutil.move for cross-filesystem safety
# 使用 shutil.move 保证跨文件系统安全
shutil.move(file_path, str(dest))
except Exception as e:
logger.error(
f"Failed to archive bucket / 归档桶失败: {bucket_id}: {e}"
)
return False
logger.info(f"Archived bucket / 归档记忆桶: {bucket_id} → archive/{primary_domain}/")
return True
# ---------------------------------------------------------
# Internal: find bucket file across all three directories
# 内部:在三个目录中查找桶文件
# ---------------------------------------------------------
def _find_bucket_file(self, bucket_id: str) -> Optional[str]:
"""
Recursively search permanent/dynamic/archive for a bucket file
matching the given ID.
在 permanent/dynamic/archive 中递归查找指定 ID 的桶文件。
"""
if not bucket_id:
return None
for dir_path in [self.permanent_dir, self.dynamic_dir, self.archive_dir]:
if not os.path.exists(dir_path):
continue
for root, _, files in os.walk(dir_path):
for fname in files:
if not fname.endswith(".md"):
continue
# Match by exact ID segment in filename
# 通过文件名中的 ID 片段精确匹配
if bucket_id in fname:
return os.path.join(root, fname)
return None
# ---------------------------------------------------------
# Internal: load bucket data from .md file
# 内部:从 .md 文件加载桶数据
# ---------------------------------------------------------
def _load_bucket(self, file_path: str) -> Optional[dict]:
"""
Parse a Markdown file and return structured bucket data.
解析 Markdown 文件,返回桶的结构化数据。
"""
try:
post = frontmatter.load(file_path)
return {
"id": post.get("id", Path(file_path).stem),
"metadata": dict(post.metadata),
"content": post.content,
"path": file_path,
}
except Exception as e:
logger.warning(
f"Failed to load bucket file / 加载桶文件失败: {file_path}: {e}"
)
return None

View File

@@ -0,0 +1,242 @@
# ============================================================
# Module: Memory Decay Engine (decay_engine.py)
# 模块:记忆衰减引擎
#
# Simulates human forgetting curve; auto-decays inactive memories and archives them.
# 模拟人类遗忘曲线,自动衰减不活跃记忆并归档。
#
# Core formula (improved Ebbinghaus + emotion coordinates):
# 核心公式(改进版艾宾浩斯遗忘曲线 + 情感坐标):
# Score = Importance × (activation_count^0.3) × e^(-λ×days) × emotion_weight
#
# Emotion weight (continuous coordinate, not discrete labels):
# 情感权重(基于连续坐标而非离散列举):
# emotion_weight = base + (arousal × arousal_boost)
# Higher arousal → higher emotion weight → slower decay
# 唤醒度越高 → 情感权重越大 → 记忆衰减越慢
#
# Depended on by: server.py
# 被谁依赖server.py
# ============================================================
import math
import asyncio
import logging
from datetime import datetime
logger = logging.getLogger("ombre_brain.decay")
class DecayEngine:
"""
Memory decay engine — periodically scans all dynamic buckets,
calculates decay scores, auto-archives low-activity buckets
to simulate natural forgetting.
记忆衰减引擎 —— 定期扫描所有动态桶,
计算衰减得分,将低活跃桶自动归档,模拟自然遗忘。
"""
def __init__(self, config: dict, bucket_mgr):
# --- Load decay parameters / 加载衰减参数 ---
decay_cfg = config.get("decay", {})
self.decay_lambda = decay_cfg.get("lambda", 0.05)
self.threshold = decay_cfg.get("threshold", 0.3)
self.check_interval = decay_cfg.get("check_interval_hours", 24)
# --- Emotion weight params (continuous arousal coordinate) ---
# --- 情感权重参数(基于连续 arousal 坐标)---
emotion_cfg = decay_cfg.get("emotion_weights", {})
self.emotion_base = emotion_cfg.get("base", 1.0)
self.arousal_boost = emotion_cfg.get("arousal_boost", 0.8)
self.bucket_mgr = bucket_mgr
# --- Background task control / 后台任务控制 ---
self._task: asyncio.Task | None = None
self._running = False
@property
def is_running(self) -> bool:
"""Whether the decay engine is running in the background.
衰减引擎是否正在后台运行。"""
return self._running
# ---------------------------------------------------------
# Core: calculate decay score for a single bucket
# 核心:计算单个桶的衰减得分
#
# Higher score = more vivid memory; below threshold → archive
# 得分越高 = 记忆越鲜活,低于阈值则归档
# Permanent buckets never decay / 固化桶永远不衰减
# ---------------------------------------------------------
def calculate_score(self, metadata: dict) -> float:
"""
Calculate current activity score for a memory bucket.
计算一个记忆桶的当前活跃度得分。
Formula: Score = Importance × (act_count^0.3) × e^(-λ×days) × (base + arousal×boost)
"""
if not isinstance(metadata, dict):
return 0.0
# --- Permanent buckets never decay / 固化桶永不衰减 ---
if metadata.get("type") == "permanent":
return 999.0
importance = max(1, min(10, int(metadata.get("importance", 5))))
activation_count = max(1, int(metadata.get("activation_count", 1)))
# --- Days since last activation / 距离上次激活过了多少天 ---
last_active_str = metadata.get("last_active", metadata.get("created", ""))
try:
last_active = datetime.fromisoformat(str(last_active_str))
days_since = max(0.0, (datetime.now() - last_active).total_seconds() / 86400)
except (ValueError, TypeError):
days_since = 30 # Parse failure → assume 30 days / 解析失败假设已过 30 天
# --- Emotion weight: continuous arousal coordinate ---
# --- 情感权重:基于连续 arousal 坐标计算 ---
# Higher arousal → stronger emotion → higher weight → slower decay
# arousal 越高 → 情感越强烈 → 权重越大 → 衰减越慢
try:
arousal = max(0.0, min(1.0, float(metadata.get("arousal", 0.3))))
except (ValueError, TypeError):
arousal = 0.3
emotion_weight = self.emotion_base + arousal * self.arousal_boost
# --- Apply decay formula / 套入衰减公式 ---
score = (
importance
* (activation_count ** 0.3)
* math.exp(-self.decay_lambda * days_since)
* emotion_weight
)
# --- Weight pool modifiers / 权重池修正因子 ---
# Resolved events drop to 5%, sink to bottom awaiting keyword reactivation
# 已解决的事件权重骤降到 5%,沉底等待关键词激活
resolved_factor = 0.05 if metadata.get("resolved", False) else 1.0
# High-arousal unresolved buckets get urgency boost for priority surfacing
# 高唤醒未解决桶额外加成,优先浮现
urgency_boost = 1.5 if (arousal > 0.7 and not metadata.get("resolved", False)) else 1.0
return round(score * resolved_factor * urgency_boost, 4)
# ---------------------------------------------------------
# Execute one decay cycle
# 执行一轮衰减周期
# Scan all dynamic buckets → score → archive those below threshold
# 扫描所有动态桶 → 算分 → 低于阈值的归档
# ---------------------------------------------------------
async def run_decay_cycle(self) -> dict:
"""
Execute one decay cycle: iterate dynamic buckets, archive those
scoring below threshold.
执行一轮衰减:遍历动态桶,归档得分低于阈值的桶。
Returns stats: {"checked": N, "archived": N, "lowest_score": X}
"""
try:
buckets = await self.bucket_mgr.list_all(include_archive=False)
except Exception as e:
logger.error(f"Failed to list buckets for decay / 衰减周期列桶失败: {e}")
return {"checked": 0, "archived": 0, "lowest_score": 0, "error": str(e)}
checked = 0
archived = 0
lowest_score = float("inf")
for bucket in buckets:
meta = bucket.get("metadata", {})
# Skip permanent buckets / 跳过固化桶
if meta.get("type") == "permanent":
continue
checked += 1
try:
score = self.calculate_score(meta)
except Exception as e:
logger.warning(
f"Score calculation failed for {bucket.get('id', '?')} / "
f"计算得分失败: {e}"
)
continue
lowest_score = min(lowest_score, score)
# --- Below threshold → archive (simulate forgetting) ---
# --- 低于阈值 → 归档(模拟遗忘)---
if score < self.threshold:
try:
success = await self.bucket_mgr.archive(bucket["id"])
if success:
archived += 1
logger.info(
f"Decay archived / 衰减归档: "
f"{meta.get('name', bucket['id'])} "
f"(score={score:.4f}, threshold={self.threshold})"
)
except Exception as e:
logger.warning(
f"Archive failed for {bucket.get('id', '?')} / "
f"归档失败: {e}"
)
result = {
"checked": checked,
"archived": archived,
"lowest_score": lowest_score if checked > 0 else 0,
}
logger.info(f"Decay cycle complete / 衰减周期完成: {result}")
return result
# ---------------------------------------------------------
# Background decay task management
# 后台衰减任务管理
# ---------------------------------------------------------
async def ensure_started(self) -> None:
"""
Ensure the decay engine is started (lazy init on first call).
确保衰减引擎已启动(懒加载,首次调用时启动)。
"""
if not self._running:
await self.start()
async def start(self) -> None:
"""Start the background decay loop.
启动后台衰减循环。"""
if self._running:
return
self._running = True
self._task = asyncio.create_task(self._background_loop())
logger.info(
f"Decay engine started, interval: {self.check_interval}h / "
f"衰减引擎已启动,检查间隔: {self.check_interval} 小时"
)
async def stop(self) -> None:
"""Stop the background decay loop.
停止后台衰减循环。"""
self._running = False
if self._task:
self._task.cancel()
try:
await self._task
except asyncio.CancelledError:
pass
logger.info("Decay engine stopped / 衰减引擎已停止")
async def _background_loop(self) -> None:
"""Background loop: run decay → sleep → repeat.
后台循环体:执行衰减 → 睡眠 → 重复。"""
while self._running:
try:
await self.run_decay_cycle()
except Exception as e:
logger.error(f"Decay cycle error / 衰减周期出错: {e}")
# --- Wait for next cycle / 等待下一个周期 ---
try:
await asyncio.sleep(self.check_interval * 3600)
except asyncio.CancelledError:
break

View File

@@ -0,0 +1,536 @@
# ============================================================
# Module: MCP Server Entry Point (server.py)
# 模块MCP 服务器主入口
#
# Starts the Ombre Brain MCP service and registers memory
# operation tools for Claude to call.
# 启动 Ombre Brain MCP 服务,注册记忆操作工具供 Claude 调用。
#
# Core responsibilities:
# 核心职责:
# - Initialize config, bucket manager, dehydrator, decay engine
# 初始化配置、记忆桶管理器、脱水器、衰减引擎
# - Expose 5 MCP tools:
# 暴露 5 个 MCP 工具:
# breath — Surface unresolved memories or search by keyword
# 浮现未解决记忆 或 按关键词检索
# hold — Store a single memory
# 存储单条记忆
# grow — Diary digest, auto-split into multiple buckets
# 日记归档,自动拆分多桶
# trace — Modify metadata / resolved / delete
# 修改元数据 / resolved 标记 / 删除
# pulse — System status + bucket listing
# 系统状态 + 所有桶列表
#
# Startup:
# 启动方式:
# Local: python server.py
# Remote: OMBRE_TRANSPORT=streamable-http python server.py
# Docker: docker-compose up
# ============================================================
import os
import sys
import random
import logging
import asyncio
import httpx
# --- Ensure same-directory modules can be imported ---
# --- 确保同目录下的模块能被正确导入 ---
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from mcp.server.fastmcp import FastMCP
from bucket_manager import BucketManager
from dehydrator import Dehydrator
from decay_engine import DecayEngine
from utils import load_config, setup_logging
# --- Load config & init logging / 加载配置 & 初始化日志 ---
config = load_config()
setup_logging(config.get("log_level", "INFO"))
logger = logging.getLogger("ombre_brain")
# --- Initialize three core components / 初始化三大核心组件 ---
bucket_mgr = BucketManager(config) # Bucket manager / 记忆桶管理器
dehydrator = Dehydrator(config) # Dehydrator / 脱水器
decay_engine = DecayEngine(config, bucket_mgr) # Decay engine / 衰减引擎
# --- Create MCP server instance / 创建 MCP 服务器实例 ---
# host="0.0.0.0" so Docker container's SSE is externally reachable
# stdio mode ignores host (no network)
mcp = FastMCP(
"Ombre Brain",
host="0.0.0.0",
port=8000,
)
# =============================================================
# /health endpoint: lightweight keepalive
# 轻量保活接口
# For Cloudflare Tunnel or reverse proxy to ping, preventing idle timeout
# 供 Cloudflare Tunnel 或反代定期 ping防止空闲超时断连
# =============================================================
@mcp.custom_route("/health", methods=["GET"])
async def health_check(request):
from starlette.responses import JSONResponse
try:
stats = await bucket_mgr.get_stats()
return JSONResponse({
"status": "ok",
"buckets": stats["permanent_count"] + stats["dynamic_count"],
"decay_engine": "running" if decay_engine.is_running else "stopped",
})
except Exception as e:
return JSONResponse({"status": "error", "detail": str(e)}, status_code=500)
# =============================================================
# Internal helper: merge-or-create
# 内部辅助:检查是否可合并,可以则合并,否则新建
# Shared by hold and grow to avoid duplicate logic
# hold 和 grow 共用,避免重复逻辑
# =============================================================
async def _merge_or_create(
content: str,
tags: list,
importance: int,
domain: list,
valence: float,
arousal: float,
name: str = "",
) -> tuple[str, bool]:
"""
Check if a similar bucket exists for merging; merge if so, create if not.
Returns (bucket_id_or_name, is_merged).
检查是否有相似桶可合并,有则合并,无则新建。
返回 (桶ID或名称, 是否合并)。
"""
try:
existing = await bucket_mgr.search(content, limit=1)
except Exception as e:
logger.warning(f"Search for merge failed, creating new / 合并搜索失败,新建: {e}")
existing = []
if existing and existing[0].get("score", 0) > config.get("merge_threshold", 75):
bucket = existing[0]
try:
merged = await dehydrator.merge(bucket["content"], content)
await bucket_mgr.update(
bucket["id"],
content=merged,
tags=list(set(bucket["metadata"].get("tags", []) + tags)),
importance=max(bucket["metadata"].get("importance", 5), importance),
domain=list(set(bucket["metadata"].get("domain", []) + domain)),
valence=valence,
arousal=arousal,
)
return bucket["metadata"].get("name", bucket["id"]), True
except Exception as e:
logger.warning(f"Merge failed, creating new / 合并失败,新建: {e}")
bucket_id = await bucket_mgr.create(
content=content,
tags=tags,
importance=importance,
domain=domain,
valence=valence,
arousal=arousal,
name=name or None,
)
return bucket_id, False
# =============================================================
# Tool 1: breath — Breathe
# 工具 1breath — 呼吸
#
# No args: surface highest-weight unresolved memories (active push)
# 无参数:浮现权重最高的未解决记忆
# With args: search by keyword + emotion coordinates
# 有参数:按关键词+情感坐标检索记忆
# =============================================================
@mcp.tool()
async def breath(
query: str = "",
max_results: int = 3,
domain: str = "",
valence: float = -1,
arousal: float = -1,
) -> str:
"""检索记忆或浮现未解决记忆。query 为空时自动推送权重最高的未解决桶;有 query 时按关键词+情感检索。domain 逗号分隔valence/arousal 传 0~1 启用情感共鸣,-1 忽略。"""
await decay_engine.ensure_started()
# --- No args: surfacing mode (weight pool active push) ---
# --- 无参数:浮现模式(权重池主动推送)---
if not query.strip():
try:
all_buckets = await bucket_mgr.list_all(include_archive=False)
except Exception as e:
logger.error(f"Failed to list buckets for surfacing / 浮现列桶失败: {e}")
return "记忆系统暂时无法访问。"
unresolved = [
b for b in all_buckets
if not b["metadata"].get("resolved", False)
and b["metadata"].get("type") != "permanent"
]
if not unresolved:
return "权重池平静,没有需要处理的记忆。"
scored = sorted(
unresolved,
key=lambda b: decay_engine.calculate_score(b["metadata"]),
reverse=True,
)
top = scored[:2]
results = []
for b in top:
try:
summary = await dehydrator.dehydrate(b["content"], b["metadata"])
await bucket_mgr.touch(b["id"])
score = decay_engine.calculate_score(b["metadata"])
results.append(f"[权重:{score:.2f}] {summary}")
except Exception as e:
logger.warning(f"Failed to dehydrate surfaced bucket / 浮现脱水失败: {e}")
continue
if not results:
return "权重池平静,没有需要处理的记忆。"
return "=== 浮现记忆 ===\n" + "\n---\n".join(results)
# --- With args: search mode / 有参数:检索模式 ---
domain_filter = [d.strip() for d in domain.split(",") if d.strip()] or None
q_valence = valence if 0 <= valence <= 1 else None
q_arousal = arousal if 0 <= arousal <= 1 else None
try:
matches = await bucket_mgr.search(
query,
limit=max_results,
domain_filter=domain_filter,
query_valence=q_valence,
query_arousal=q_arousal,
)
except Exception as e:
logger.error(f"Search failed / 检索失败: {e}")
return "检索过程出错,请稍后重试。"
results = []
for bucket in matches:
try:
summary = await dehydrator.dehydrate(bucket["content"], bucket["metadata"])
await bucket_mgr.touch(bucket["id"])
results.append(summary)
except Exception as e:
logger.warning(f"Failed to dehydrate search result / 检索结果脱水失败: {e}")
continue
# --- Random surfacing: when search returns < 3, 40% chance to float old memories ---
# --- 随机浮现:检索结果不足 3 条时40% 概率从低权重旧桶里漂上来 ---
if len(matches) < 3 and random.random() < 0.4:
try:
all_buckets = await bucket_mgr.list_all(include_archive=False)
matched_ids = {b["id"] for b in matches}
low_weight = [
b for b in all_buckets
if b["id"] not in matched_ids
and decay_engine.calculate_score(b["metadata"]) < 2.0
]
if low_weight:
drifted = random.sample(low_weight, min(random.randint(1, 3), len(low_weight)))
drift_results = []
for b in drifted:
summary = await dehydrator.dehydrate(b["content"], b["metadata"])
drift_results.append(f"[surface_type: random]\n{summary}")
results.append("--- 忽然想起来 ---\n" + "\n---\n".join(drift_results))
except Exception as e:
logger.warning(f"Random surfacing failed / 随机浮现失败: {e}")
if not results:
return "未找到相关记忆。"
return "\n---\n".join(results)
# =============================================================
# Tool 2: hold — Hold on to this
# 工具 2hold — 握住,留下来
# =============================================================
@mcp.tool()
async def hold(
content: str,
tags: str = "",
importance: int = 5,
) -> str:
"""存储单条记忆。自动打标+合并相似桶。tags 逗号分隔importance 1-10。"""
await decay_engine.ensure_started()
# --- Input validation / 输入校验 ---
if not content or not content.strip():
return "内容为空,无法存储。"
importance = max(1, min(10, importance))
extra_tags = [t.strip() for t in tags.split(",") if t.strip()]
# --- Step 1: auto-tagging / 自动打标 ---
try:
analysis = await dehydrator.analyze(content)
except Exception as e:
logger.warning(f"Auto-tagging failed, using defaults / 自动打标失败: {e}")
analysis = {
"domain": ["未分类"], "valence": 0.5, "arousal": 0.3,
"tags": [], "suggested_name": "",
}
domain = analysis["domain"]
valence = analysis["valence"]
arousal = analysis["arousal"]
auto_tags = analysis["tags"]
suggested_name = analysis.get("suggested_name", "")
all_tags = list(dict.fromkeys(auto_tags + extra_tags))
# --- Step 2: merge or create / 合并或新建 ---
result_name, is_merged = await _merge_or_create(
content=content,
tags=all_tags,
importance=importance,
domain=domain,
valence=valence,
arousal=arousal,
name=suggested_name,
)
if is_merged:
return (
f"已合并到现有记忆桶: {result_name}\n"
f"主题域: {', '.join(domain)} | 情感: V{valence:.1f}/A{arousal:.1f}"
)
return (
f"已创建新记忆桶: {result_name}\n"
f"主题域: {', '.join(domain)} | 情感: V{valence:.1f}/A{arousal:.1f} | 标签: {', '.join(all_tags)}"
)
# =============================================================
# Tool 3: grow — Grow, fragments become memories
# 工具 3grow — 生长,一天的碎片长成记忆
# =============================================================
@mcp.tool()
async def grow(content: str) -> str:
"""日记归档。自动拆分长内容为多个记忆桶。"""
await decay_engine.ensure_started()
if not content or not content.strip():
return "内容为空,无法整理。"
# --- Step 1: let API split and organize / 让 API 拆分整理 ---
try:
items = await dehydrator.digest(content)
except Exception as e:
logger.error(f"Diary digest failed / 日记整理失败: {e}")
return f"日记整理失败: {e}"
if not items:
return "内容为空或整理失败。"
results = []
created = 0
merged = 0
# --- Step 2: merge or create each item (with per-item error handling) ---
# --- 逐条合并或新建(单条失败不影响其他)---
for item in items:
try:
result_name, is_merged = await _merge_or_create(
content=item["content"],
tags=item.get("tags", []),
importance=item.get("importance", 5),
domain=item.get("domain", ["未分类"]),
valence=item.get("valence", 0.5),
arousal=item.get("arousal", 0.3),
name=item.get("name", ""),
)
if is_merged:
results.append(f" 📎 合并 → {result_name}")
merged += 1
else:
domains_str = ",".join(item.get("domain", []))
results.append(
f" 📝 新建 [{item.get('name', result_name)}] "
f"主题:{domains_str} V{item.get('valence', 0.5):.1f}/A{item.get('arousal', 0.3):.1f}"
)
created += 1
except Exception as e:
logger.warning(
f"Failed to process diary item / 日记条目处理失败: "
f"{item.get('name', '?')}: {e}"
)
results.append(f" ⚠️ 失败: {item.get('name', '未知条目')}")
summary = f"=== 日记整理完成 ===\n拆分为 {len(items)} 条 | 新建 {created} 桶 | 合并 {merged}\n"
return summary + "\n".join(results)
# =============================================================
# Tool 4: trace — Trace, redraw the outline of a memory
# 工具 4trace — 描摹,重新勾勒记忆的轮廓
# Also handles deletion (delete=True)
# 同时承接删除功能
# =============================================================
@mcp.tool()
async def trace(
bucket_id: str,
name: str = "",
domain: str = "",
valence: float = -1,
arousal: float = -1,
importance: int = -1,
tags: str = "",
resolved: int = -1,
delete: bool = False,
) -> str:
"""修改记忆元数据。resolved=1 标记已解决桶权重骤降沉底resolved=0 重新激活delete=True 删除桶。其余字段只传需改的,-1 或空串表示不改。"""
if not bucket_id or not bucket_id.strip():
return "请提供有效的 bucket_id。"
# --- Delete mode / 删除模式 ---
if delete:
success = await bucket_mgr.delete(bucket_id)
return f"已遗忘记忆桶: {bucket_id}" if success else f"未找到记忆桶: {bucket_id}"
bucket = await bucket_mgr.get(bucket_id)
if not bucket:
return f"未找到记忆桶: {bucket_id}"
# --- Collect only fields actually passed / 只收集用户实际传入的字段 ---
updates = {}
if name:
updates["name"] = name
if domain:
updates["domain"] = [d.strip() for d in domain.split(",") if d.strip()]
if 0 <= valence <= 1:
updates["valence"] = valence
if 0 <= arousal <= 1:
updates["arousal"] = arousal
if 1 <= importance <= 10:
updates["importance"] = importance
if tags:
updates["tags"] = [t.strip() for t in tags.split(",") if t.strip()]
if resolved in (0, 1):
updates["resolved"] = bool(resolved)
if not updates:
return "没有任何字段需要修改。"
success = await bucket_mgr.update(bucket_id, **updates)
if not success:
return f"修改失败: {bucket_id}"
changed = ", ".join(f"{k}={v}" for k, v in updates.items())
# Explicit hint about resolved state change semantics
# 特别提示 resolved 状态变化的语义
if "resolved" in updates:
if updates["resolved"]:
changed += " → 已沉底,只在关键词触发时重新浮现"
else:
changed += " → 已重新激活,将参与浮现排序"
return f"已修改记忆桶 {bucket_id}: {changed}"
# =============================================================
# Tool 5: pulse — Heartbeat, system status + memory listing
# 工具 5pulse — 脉搏,系统状态 + 记忆列表
# =============================================================
@mcp.tool()
async def pulse(include_archive: bool = False) -> str:
"""系统状态和所有记忆桶摘要。include_archive=True 时包含归档桶。"""
try:
stats = await bucket_mgr.get_stats()
except Exception as e:
return f"获取系统状态失败: {e}"
status = (
f"=== Ombre Brain 记忆系统 ===\n"
f"固化记忆桶: {stats['permanent_count']}\n"
f"动态记忆桶: {stats['dynamic_count']}\n"
f"归档记忆桶: {stats['archive_count']}\n"
f"总存储大小: {stats['total_size_kb']:.1f} KB\n"
f"衰减引擎: {'运行中' if decay_engine.is_running else '已停止'}\n"
)
# --- List all bucket summaries / 列出所有桶摘要 ---
try:
buckets = await bucket_mgr.list_all(include_archive=include_archive)
except Exception as e:
return status + f"\n列出记忆桶失败: {e}"
if not buckets:
return status + "\n记忆库为空。"
lines = []
for b in buckets:
meta = b.get("metadata", {})
if meta.get("type") == "permanent":
icon = "📦"
elif meta.get("type") == "archived":
icon = "🗄️"
elif meta.get("resolved", False):
icon = ""
else:
icon = "💭"
try:
score = decay_engine.calculate_score(meta)
except Exception:
score = 0.0
domains = ",".join(meta.get("domain", []))
val = meta.get("valence", 0.5)
aro = meta.get("arousal", 0.3)
resolved_tag = " [已解决]" if meta.get("resolved", False) else ""
lines.append(
f"{icon} [{meta.get('name', b['id'])}]{resolved_tag} "
f"主题:{domains} "
f"情感:V{val:.1f}/A{aro:.1f} "
f"重要:{meta.get('importance', '?')} "
f"权重:{score:.2f} "
f"标签:{','.join(meta.get('tags', []))}"
)
return status + "\n=== 记忆列表 ===\n" + "\n".join(lines)
# --- Entry point / 启动入口 ---
if __name__ == "__main__":
transport = config.get("transport", "stdio")
logger.info(f"Ombre Brain starting | transport: {transport}")
# --- Application-level keepalive: remote mode only, ping /health every 60s ---
# --- 应用层保活:仅远程模式下启动,每 60 秒 ping 一次 /health ---
# Prevents Cloudflare Tunnel from dropping idle connections
if transport in ("sse", "streamable-http"):
async def _keepalive_loop():
await asyncio.sleep(10) # Wait for server to fully start
async with httpx.AsyncClient() as client:
while True:
try:
await client.get("http://localhost:8000/health", timeout=5)
logger.debug("Keepalive ping OK / 保活 ping 成功")
except Exception as e:
logger.warning(f"Keepalive ping failed / 保活 ping 失败: {e}")
await asyncio.sleep(60)
import threading
def _start_keepalive():
loop = asyncio.new_event_loop()
loop.run_until_complete(_keepalive_loop())
t = threading.Thread(target=_start_keepalive, daemon=True)
t.start()
mcp.run(transport=transport)

781
bucket_manager.py Normal file
View File

@@ -0,0 +1,781 @@
# ============================================================
# Module: Memory Bucket Manager (bucket_manager.py)
# 模块:记忆桶管理器
#
# CRUD operations, multi-dimensional index search, activation updates
# for memory buckets.
# 记忆桶的增删改查、多维索引搜索、激活更新。
#
# Core design:
# 核心逻辑:
# - Each bucket = one Markdown file (YAML frontmatter + body)
# 每个记忆桶 = 一个 Markdown 文件
# - Storage by type: permanent / dynamic / archive
# 存储按类型分目录
# - Multi-dimensional soft index: domain + valence/arousal + fuzzy text
# 多维软索引:主题域 + 情感坐标 + 文本模糊匹配
# - Search strategy: domain pre-filter → weighted multi-dim ranking
# 搜索策略:主题域预筛 → 多维加权精排
# - Emotion coordinates based on Russell circumplex model:
# 情感坐标基于环形情感模型Russell circumplex
# valence (0~1): 0=negative → 1=positive
# arousal (0~1): 0=calm → 1=excited
#
# Depended on by: server.py, decay_engine.py
# 被谁依赖server.py, decay_engine.py
# ============================================================
import os
import math
import logging
import re
import shutil
from collections import Counter
from datetime import datetime
from pathlib import Path
from typing import Optional
import frontmatter
import jieba
from rapidfuzz import fuzz
from utils import generate_bucket_id, sanitize_name, safe_path, now_iso
logger = logging.getLogger("ombre_brain.bucket")
class BucketManager:
"""
Memory bucket manager — entry point for all bucket CRUD operations.
Buckets are stored as Markdown files with YAML frontmatter for metadata
and body for content. Natively compatible with Obsidian browsing/editing.
记忆桶管理器 —— 所有桶的 CRUD 操作入口。
桶以 Markdown 文件存储YAML frontmatter 存元数据,正文存内容。
天然兼容 Obsidian 直接浏览和编辑。
"""
def __init__(self, config: dict):
# --- Read storage paths from config / 从配置中读取存储路径 ---
self.base_dir = config["buckets_dir"]
self.permanent_dir = os.path.join(self.base_dir, "permanent")
self.dynamic_dir = os.path.join(self.base_dir, "dynamic")
self.archive_dir = os.path.join(self.base_dir, "archive")
self.fuzzy_threshold = config.get("matching", {}).get("fuzzy_threshold", 50)
self.max_results = config.get("matching", {}).get("max_results", 5)
# --- Wikilink config / 双链配置 ---
wikilink_cfg = config.get("wikilink", {})
self.wikilink_enabled = wikilink_cfg.get("enabled", True)
self.wikilink_use_tags = wikilink_cfg.get("use_tags", False)
self.wikilink_use_domain = wikilink_cfg.get("use_domain", True)
self.wikilink_use_auto_keywords = wikilink_cfg.get("use_auto_keywords", True)
self.wikilink_auto_top_k = wikilink_cfg.get("auto_top_k", 8)
self.wikilink_min_len = wikilink_cfg.get("min_keyword_len", 2)
self.wikilink_exclude_keywords = set(wikilink_cfg.get("exclude_keywords", []))
self.wikilink_stopwords = {
"", "", "", "", "", "", "", "", "", "",
"", "一个", "", "", "", "", "", "", "",
"", "", "", "没有", "", "", "自己", "", "", "",
"我们", "你们", "他们", "然后", "今天", "昨天", "明天", "一下",
"the", "and", "for", "are", "but", "not", "you", "all", "can",
"had", "her", "was", "one", "our", "out", "has", "have", "with",
"this", "that", "from", "they", "been", "said", "will", "each",
}
self.wikilink_stopwords |= {w.lower() for w in self.wikilink_exclude_keywords}
# --- Search scoring weights / 检索权重配置 ---
scoring = config.get("scoring_weights", {})
self.w_topic = scoring.get("topic_relevance", 4.0)
self.w_emotion = scoring.get("emotion_resonance", 2.0)
self.w_time = scoring.get("time_proximity", 1.5)
self.w_importance = scoring.get("importance", 1.0)
# ---------------------------------------------------------
# Create a new bucket
# 创建新桶
# Write content and metadata into a .md file
# 将内容和元数据写入一个 .md 文件
# ---------------------------------------------------------
async def create(
self,
content: str,
tags: list[str] = None,
importance: int = 5,
domain: list[str] = None,
valence: float = 0.5,
arousal: float = 0.3,
bucket_type: str = "dynamic",
name: str = None,
pinned: bool = False,
protected: bool = False,
) -> str:
"""
Create a new memory bucket, return bucket ID.
创建一个新的记忆桶,返回桶 ID。
pinned/protected=True: bucket won't be merged, decayed, or have importance changed.
Importance is locked to 10 for pinned/protected buckets.
pinned/protected 桶不参与合并与衰减importance 强制锁定为 10。
"""
bucket_id = generate_bucket_id()
bucket_name = sanitize_name(name) if name else bucket_id
domain = domain or ["未分类"]
tags = tags or []
linked_content = self._apply_wikilinks(content, tags, domain, bucket_name)
# --- Pinned/protected buckets: lock importance to 10 ---
# --- 钉选/保护桶importance 强制锁定为 10 ---
if pinned or protected:
importance = 10
# --- Build YAML frontmatter metadata / 构建元数据 ---
metadata = {
"id": bucket_id,
"name": bucket_name,
"tags": tags,
"domain": domain,
"valence": max(0.0, min(1.0, valence)),
"arousal": max(0.0, min(1.0, arousal)),
"importance": max(1, min(10, importance)),
"type": bucket_type,
"created": now_iso(),
"last_active": now_iso(),
"activation_count": 1,
}
if pinned:
metadata["pinned"] = True
if protected:
metadata["protected"] = True
# --- Assemble Markdown file (frontmatter + body) ---
# --- 组装 Markdown 文件 ---
post = frontmatter.Post(linked_content, **metadata)
# --- Choose directory by type + primary domain ---
# --- 按类型 + 主题域选择存储目录 ---
type_dir = self.permanent_dir if bucket_type == "permanent" else self.dynamic_dir
primary_domain = sanitize_name(domain[0]) if domain else "未分类"
target_dir = os.path.join(type_dir, primary_domain)
os.makedirs(target_dir, exist_ok=True)
# --- Filename: readable_name_bucketID.md (Obsidian friendly) ---
# --- 文件名可读名称_桶ID.md ---
if bucket_name and bucket_name != bucket_id:
filename = f"{bucket_name}_{bucket_id}.md"
else:
filename = f"{bucket_id}.md"
file_path = safe_path(target_dir, filename)
try:
with open(file_path, "w", encoding="utf-8") as f:
f.write(frontmatter.dumps(post))
except OSError as e:
logger.error(f"Failed to write bucket file / 写入桶文件失败: {file_path}: {e}")
raise
logger.info(
f"Created bucket / 创建记忆桶: {bucket_id} ({bucket_name}) → {primary_domain}/"
+ (" [PINNED]" if pinned else "") + (" [PROTECTED]" if protected else "")
)
return bucket_id
# ---------------------------------------------------------
# Read bucket content
# 读取桶内容
# Returns {"id", "metadata", "content", "path"} or None
# ---------------------------------------------------------
async def get(self, bucket_id: str) -> Optional[dict]:
"""
Read a single bucket by ID.
根据 ID 读取单个桶。
"""
if not bucket_id or not isinstance(bucket_id, str):
return None
file_path = self._find_bucket_file(bucket_id)
if not file_path:
return None
return self._load_bucket(file_path)
# ---------------------------------------------------------
# Update bucket
# 更新桶
# Supports: content, tags, importance, valence, arousal, name, resolved
# ---------------------------------------------------------
async def update(self, bucket_id: str, **kwargs) -> bool:
"""
Update bucket content or metadata fields.
更新桶的内容或元数据字段。
"""
file_path = self._find_bucket_file(bucket_id)
if not file_path:
return False
try:
post = frontmatter.load(file_path)
except Exception as e:
logger.warning(f"Failed to load bucket for update / 加载桶失败: {file_path}: {e}")
return False
# --- Pinned/protected buckets: lock importance to 10, ignore importance changes ---
# --- 钉选/保护桶importance 不可修改,强制保持 10 ---
is_pinned = post.get("pinned", False) or post.get("protected", False)
if is_pinned:
kwargs.pop("importance", None) # silently ignore importance update
# --- Update only fields that were passed in / 只改传入的字段 ---
if "content" in kwargs:
next_tags = kwargs.get("tags", post.get("tags", []))
next_domain = kwargs.get("domain", post.get("domain", []))
next_name = kwargs.get("name", post.get("name", ""))
post.content = self._apply_wikilinks(
kwargs["content"],
next_tags,
next_domain,
next_name,
)
if "tags" in kwargs:
post["tags"] = kwargs["tags"]
if "importance" in kwargs:
post["importance"] = max(1, min(10, int(kwargs["importance"])))
if "domain" in kwargs:
post["domain"] = kwargs["domain"]
if "valence" in kwargs:
post["valence"] = max(0.0, min(1.0, float(kwargs["valence"])))
if "arousal" in kwargs:
post["arousal"] = max(0.0, min(1.0, float(kwargs["arousal"])))
if "name" in kwargs:
post["name"] = sanitize_name(kwargs["name"])
if "resolved" in kwargs:
post["resolved"] = bool(kwargs["resolved"])
if "pinned" in kwargs:
post["pinned"] = bool(kwargs["pinned"])
if kwargs["pinned"]:
post["importance"] = 10 # pinned → lock importance to 10
# --- Auto-refresh activation time / 自动刷新激活时间 ---
post["last_active"] = now_iso()
try:
with open(file_path, "w", encoding="utf-8") as f:
f.write(frontmatter.dumps(post))
except OSError as e:
logger.error(f"Failed to write bucket update / 写入桶更新失败: {file_path}: {e}")
return False
logger.info(f"Updated bucket / 更新记忆桶: {bucket_id}")
return True
# ---------------------------------------------------------
# Wikilink injection
# 自动添加 Obsidian 双链
# ---------------------------------------------------------
def _apply_wikilinks(
self,
content: str,
tags: list[str],
domain: list[str],
name: str,
) -> str:
"""
Auto-inject Obsidian wikilinks, avoiding double-wrapping existing [[...]].
自动添加 Obsidian 双链,避免重复包裹已有 [[...]]。
"""
if not self.wikilink_enabled or not content:
return content
keywords = self._collect_wikilink_keywords(content, tags, domain, name)
if not keywords:
return content
# Split on existing wikilinks to avoid wrapping them again
# 按已有双链切分,避免重复包裹
segments = re.split(r"(\[\[[^\]]+\]\])", content)
pattern = re.compile("|".join(re.escape(kw) for kw in keywords))
for i, segment in enumerate(segments):
if segment.startswith("[[") and segment.endswith("]]"):
continue
updated = pattern.sub(lambda m: f"[[{m.group(0)}]]", segment)
segments[i] = updated
return "".join(segments)
def _collect_wikilink_keywords(
self,
content: str,
tags: list[str],
domain: list[str],
name: str,
) -> list[str]:
"""
Collect candidate keywords from tags/domain/auto-extraction.
汇总候选关键词:可选 tags/domain + 自动提词。
"""
candidates = []
if self.wikilink_use_tags:
candidates.extend(tags or [])
if self.wikilink_use_domain:
candidates.extend(domain or [])
if name:
candidates.append(name)
if self.wikilink_use_auto_keywords:
candidates.extend(self._extract_auto_keywords(content))
return self._normalize_keywords(candidates)
def _normalize_keywords(self, keywords: list[str]) -> list[str]:
"""
Deduplicate and sort by length (longer first to avoid short words
breaking long ones during replacement).
去重并按长度排序,优先替换长词。
"""
if not keywords:
return []
seen = set()
cleaned = []
for keyword in keywords:
if not isinstance(keyword, str):
continue
kw = keyword.strip()
if len(kw) < self.wikilink_min_len:
continue
if kw in self.wikilink_exclude_keywords:
continue
if kw.lower() in self.wikilink_stopwords:
continue
if kw in seen:
continue
seen.add(kw)
cleaned.append(kw)
return sorted(cleaned, key=len, reverse=True)
def _extract_auto_keywords(self, content: str) -> list[str]:
"""
Auto-extract keywords from body text, prioritizing high-frequency words.
从正文自动提词,优先高频词。
"""
if not content:
return []
try:
zh_words = [w.strip() for w in jieba.lcut(content) if w.strip()]
except Exception:
zh_words = []
en_words = re.findall(r"[A-Za-z][A-Za-z0-9_-]{2,20}", content)
# Chinese bigrams / 中文双词组合
zh_bigrams = []
for i in range(len(zh_words) - 1):
left = zh_words[i]
right = zh_words[i + 1]
if len(left) < self.wikilink_min_len or len(right) < self.wikilink_min_len:
continue
if not re.fullmatch(r"[\u4e00-\u9fff]+", left + right):
continue
if len(left + right) > 8:
continue
zh_bigrams.append(left + right)
merged = []
for word in zh_words + zh_bigrams + en_words:
if len(word) < self.wikilink_min_len:
continue
if re.fullmatch(r"\d+", word):
continue
if word.lower() in self.wikilink_stopwords:
continue
merged.append(word)
if not merged:
return []
counter = Counter(merged)
return [w for w, _ in counter.most_common(self.wikilink_auto_top_k)]
# ---------------------------------------------------------
# Delete bucket
# 删除桶
# ---------------------------------------------------------
async def delete(self, bucket_id: str) -> bool:
"""
Delete a memory bucket file.
删除指定的记忆桶文件。
"""
file_path = self._find_bucket_file(bucket_id)
if not file_path:
return False
try:
os.remove(file_path)
except OSError as e:
logger.error(f"Failed to delete bucket file / 删除桶文件失败: {file_path}: {e}")
return False
logger.info(f"Deleted bucket / 删除记忆桶: {bucket_id}")
return True
# ---------------------------------------------------------
# Touch bucket (refresh activation time + increment count)
# 触碰桶(刷新激活时间 + 累加激活次数)
# Called on every recall hit; affects decay score.
# 每次检索命中时调用,影响衰减得分。
# ---------------------------------------------------------
async def touch(self, bucket_id: str) -> None:
"""
Update a bucket's last activation time and count.
更新桶的最后激活时间和激活次数。
"""
file_path = self._find_bucket_file(bucket_id)
if not file_path:
return
try:
post = frontmatter.load(file_path)
post["last_active"] = now_iso()
post["activation_count"] = post.get("activation_count", 0) + 1
with open(file_path, "w", encoding="utf-8") as f:
f.write(frontmatter.dumps(post))
except Exception as e:
logger.warning(f"Failed to touch bucket / 触碰桶失败: {bucket_id}: {e}")
# ---------------------------------------------------------
# Multi-dimensional search (core feature)
# 多维搜索(核心功能)
#
# Strategy: domain pre-filter → weighted multi-dim ranking
# 策略:主题域预筛 → 多维加权精排
#
# Ranking formula:
# total = topic(×w_topic) + emotion(×w_emotion)
# + time(×w_time) + importance(×w_importance)
#
# Per-dimension scores (normalized to 0~1):
# topic = rapidfuzz weighted match (name/tags/domain/body)
# emotion = 1 - Euclidean distance (query v/a vs bucket v/a)
# time = e^(-0.02 × days) (recent memories first)
# importance = importance / 10
# ---------------------------------------------------------
async def search(
self,
query: str,
limit: int = None,
domain_filter: list[str] = None,
query_valence: float = None,
query_arousal: float = None,
) -> list[dict]:
"""
Multi-dimensional indexed search for memory buckets.
多维索引搜索记忆桶。
domain_filter: pre-filter by domain (None = search all)
query_valence/arousal: emotion coordinates for resonance scoring
"""
if not query or not query.strip():
return []
limit = limit or self.max_results
all_buckets = await self.list_all(include_archive=False)
if not all_buckets:
return []
# --- Layer 1: domain pre-filter (fast scope reduction) ---
# --- 第一层:主题域预筛(快速缩小范围)---
if domain_filter:
filter_set = {d.lower() for d in domain_filter}
candidates = [
b for b in all_buckets
if {d.lower() for d in b["metadata"].get("domain", [])} & filter_set
]
# Fall back to full search if pre-filter yields nothing
# 预筛为空则回退全量搜索
if not candidates:
candidates = all_buckets
else:
candidates = all_buckets
# --- Layer 2: weighted multi-dim ranking ---
# --- 第二层:多维加权精排 ---
scored = []
for bucket in candidates:
meta = bucket.get("metadata", {})
try:
# Dim 1: topic relevance (fuzzy text, 0~1)
topic_score = self._calc_topic_score(query, bucket)
# Dim 2: emotion resonance (coordinate distance, 0~1)
emotion_score = self._calc_emotion_score(
query_valence, query_arousal, meta
)
# Dim 3: time proximity (exponential decay, 0~1)
time_score = self._calc_time_score(meta)
# Dim 4: importance (direct normalization)
importance_score = max(1, min(10, int(meta.get("importance", 5)))) / 10.0
# --- Weighted sum / 加权求和 ---
total = (
topic_score * self.w_topic
+ emotion_score * self.w_emotion
+ time_score * self.w_time
+ importance_score * self.w_importance
)
# Normalize to 0~100 for readability
weight_sum = self.w_topic + self.w_emotion + self.w_time + self.w_importance
normalized = (total / weight_sum) * 100 if weight_sum > 0 else 0
# Resolved buckets get ranking penalty (but still reachable by keyword)
# 已解决的桶降权排序(但仍可被关键词激活)
if meta.get("resolved", False):
normalized *= 0.3
if normalized >= self.fuzzy_threshold:
bucket["score"] = round(normalized, 2)
scored.append(bucket)
except Exception as e:
logger.warning(
f"Scoring failed for bucket {bucket.get('id', '?')} / "
f"桶评分失败: {e}"
)
continue
scored.sort(key=lambda x: x["score"], reverse=True)
return scored[:limit]
# ---------------------------------------------------------
# Topic relevance sub-score:
# name(×3) + domain(×2.5) + tags(×2) + body(×1)
# 文本相关性子分:桶名(×3) + 主题域(×2.5) + 标签(×2) + 正文(×1)
# ---------------------------------------------------------
def _calc_topic_score(self, query: str, bucket: dict) -> float:
"""
Calculate text dimension relevance score (0~1).
计算文本维度的相关性得分。
"""
meta = bucket.get("metadata", {})
name_score = fuzz.partial_ratio(query, meta.get("name", "")) * 3
domain_score = (
max(
(fuzz.partial_ratio(query, d) for d in meta.get("domain", [])),
default=0,
)
* 2.5
)
tag_score = (
max(
(fuzz.partial_ratio(query, tag) for tag in meta.get("tags", [])),
default=0,
)
* 2
)
content_score = fuzz.partial_ratio(query, bucket.get("content", "")[:500]) * 1
return (name_score + domain_score + tag_score + content_score) / (100 * 8.5)
# ---------------------------------------------------------
# Emotion resonance sub-score:
# Based on Russell circumplex Euclidean distance
# 情感共鸣子分:基于环形情感模型的欧氏距离
# No emotion in query → neutral 0.5 (doesn't affect ranking)
# ---------------------------------------------------------
def _calc_emotion_score(
self, q_valence: float, q_arousal: float, meta: dict
) -> float:
"""
Calculate emotion resonance score (0~1, closer = higher).
计算情感共鸣度0~1越近越高
"""
if q_valence is None or q_arousal is None:
return 0.5 # No emotion coordinates → neutral / 无情感坐标时给中性分
try:
b_valence = float(meta.get("valence", 0.5))
b_arousal = float(meta.get("arousal", 0.3))
except (ValueError, TypeError):
return 0.5
# Euclidean distance, max sqrt(2) ≈ 1.414
dist = math.sqrt((q_valence - b_valence) ** 2 + (q_arousal - b_arousal) ** 2)
return max(0.0, 1.0 - dist / 1.414)
# ---------------------------------------------------------
# Time proximity sub-score:
# More recent activation → higher score
# 时间亲近子分:距上次激活越近分越高
# ---------------------------------------------------------
def _calc_time_score(self, meta: dict) -> float:
"""
Calculate time proximity score (0~1, more recent = higher).
计算时间亲近度。
"""
last_active_str = meta.get("last_active", meta.get("created", ""))
try:
last_active = datetime.fromisoformat(str(last_active_str))
days = max(0.0, (datetime.now() - last_active).total_seconds() / 86400)
except (ValueError, TypeError):
days = 30
return math.exp(-0.02 * days)
# ---------------------------------------------------------
# List all buckets
# 列出所有桶
# ---------------------------------------------------------
async def list_all(self, include_archive: bool = False) -> list[dict]:
"""
Recursively walk directories (including domain subdirs), list all buckets.
递归遍历目录(含域子目录),列出所有记忆桶。
"""
buckets = []
dirs = [self.permanent_dir, self.dynamic_dir]
if include_archive:
dirs.append(self.archive_dir)
for dir_path in dirs:
if not os.path.exists(dir_path):
continue
for root, _, files in os.walk(dir_path):
for filename in files:
if not filename.endswith(".md"):
continue
file_path = os.path.join(root, filename)
bucket = self._load_bucket(file_path)
if bucket:
buckets.append(bucket)
return buckets
# ---------------------------------------------------------
# Statistics (counts per category + total size)
# 统计信息(各分类桶数量 + 总体积)
# ---------------------------------------------------------
async def get_stats(self) -> dict:
"""
Return memory bucket statistics (including domain subdirs).
返回记忆桶的统计数据。
"""
stats = {
"permanent_count": 0,
"dynamic_count": 0,
"archive_count": 0,
"total_size_kb": 0.0,
"domains": {},
}
for subdir, key in [
(self.permanent_dir, "permanent_count"),
(self.dynamic_dir, "dynamic_count"),
(self.archive_dir, "archive_count"),
]:
if not os.path.exists(subdir):
continue
for root, _, files in os.walk(subdir):
for f in files:
if f.endswith(".md"):
stats[key] += 1
fpath = os.path.join(root, f)
try:
stats["total_size_kb"] += os.path.getsize(fpath) / 1024
except OSError:
pass
# Per-domain counts / 每个域的桶数量
domain_name = os.path.basename(root)
if domain_name != os.path.basename(subdir):
stats["domains"][domain_name] = stats["domains"].get(domain_name, 0) + 1
return stats
# ---------------------------------------------------------
# Archive bucket (move from permanent/dynamic into archive)
# 归档桶(从 permanent/dynamic 移入 archive
# Called by decay engine to simulate "forgetting"
# 由衰减引擎调用,模拟"遗忘"
# ---------------------------------------------------------
async def archive(self, bucket_id: str) -> bool:
"""
Move a bucket into the archive directory (preserving domain subdirs).
将指定桶移入归档目录(保留域子目录结构)。
"""
file_path = self._find_bucket_file(bucket_id)
if not file_path:
return False
try:
# Read once, get domain info and update type / 一次性读取
post = frontmatter.load(file_path)
domain = post.get("domain", ["未分类"])
primary_domain = sanitize_name(domain[0]) if domain else "未分类"
archive_subdir = os.path.join(self.archive_dir, primary_domain)
os.makedirs(archive_subdir, exist_ok=True)
dest = safe_path(archive_subdir, os.path.basename(file_path))
# Update type marker then move file / 更新类型标记后移动文件
post["type"] = "archived"
with open(file_path, "w", encoding="utf-8") as f:
f.write(frontmatter.dumps(post))
# Use shutil.move for cross-filesystem safety
# 使用 shutil.move 保证跨文件系统安全
shutil.move(file_path, str(dest))
except Exception as e:
logger.error(
f"Failed to archive bucket / 归档桶失败: {bucket_id}: {e}"
)
return False
logger.info(f"Archived bucket / 归档记忆桶: {bucket_id} → archive/{primary_domain}/")
return True
# ---------------------------------------------------------
# Internal: find bucket file across all three directories
# 内部:在三个目录中查找桶文件
# ---------------------------------------------------------
def _find_bucket_file(self, bucket_id: str) -> Optional[str]:
"""
Recursively search permanent/dynamic/archive for a bucket file
matching the given ID.
在 permanent/dynamic/archive 中递归查找指定 ID 的桶文件。
"""
if not bucket_id:
return None
for dir_path in [self.permanent_dir, self.dynamic_dir, self.archive_dir]:
if not os.path.exists(dir_path):
continue
for root, _, files in os.walk(dir_path):
for fname in files:
if not fname.endswith(".md"):
continue
# Match by exact ID segment in filename
# 通过文件名中的 ID 片段精确匹配
if bucket_id in fname:
return os.path.join(root, fname)
return None
# ---------------------------------------------------------
# Internal: load bucket data from .md file
# 内部:从 .md 文件加载桶数据
# ---------------------------------------------------------
def _load_bucket(self, file_path: str) -> Optional[dict]:
"""
Parse a Markdown file and return structured bucket data.
解析 Markdown 文件,返回桶的结构化数据。
"""
try:
post = frontmatter.load(file_path)
return {
"id": post.get("id", Path(file_path).stem),
"metadata": dict(post.metadata),
"content": post.content,
"path": file_path,
}
except Exception as e:
logger.warning(
f"Failed to load bucket file / 加载桶文件失败: {file_path}: {e}"
)
return None

82
config.example.yaml Normal file
View File

@@ -0,0 +1,82 @@
# ============================================================
# Ombre Brain Configuration / 配置文件
# Copy this file to config.yaml and modify as needed
# 复制此文件为 config.yaml 后按需修改
# ============================================================
# --- Transport / 传输方式 ---
# stdio: local use (Claude Desktop, direct pipe)
# streamable-http: remote use (HTTP, tunnel/CDN/proxy friendly)
# stdio: 本地使用Claude Desktop直接管道通信
# streamable-http: 远程使用(标准 HTTP对隧道/CDN/代理友好)
transport: "stdio"
# --- Log level / 日志级别 ---
log_level: "INFO"
# --- Bucket storage path / 记忆桶存储路径 ---
# Point this to your Obsidian vault subdirectory, or any local folder
# 指向你的 Obsidian 仓库子目录,或任意本地文件夹
# Leave as-is to use the built-in ./buckets/ directory
# 保持默认则使用内置的 ./buckets/ 目录
# buckets_dir: "/path/to/your/Obsidian Vault/Ombre Brain"
# --- Merge threshold / 桶合并阈值 ---
# When storing a new memory, if similarity with an existing bucket exceeds
# this value (0-100), merge instead of creating a new one
# 存新记忆时如果与已有桶的相似度超过此值0-100则合并而非新建
merge_threshold: 75
# --- Dehydration API / 脱水压缩 API 配置 ---
# Uses a cheap LLM for intelligent compression; auto-degrades to local
# keyword extraction if API is unavailable
# 用廉价 LLM 做智能压缩API 不可用时自动降级到本地关键词提取
dehydration:
# Supports any OpenAI-compatible API: DeepSeek / Ollama / LM Studio / vLLM / Gemini etc.
# 支持所有 OpenAI 兼容 APIDeepSeek / Ollama / LM Studio / vLLM / Gemini 等
model: "deepseek-chat"
base_url: "https://api.deepseek.com/v1"
# Common base_url examples / 常见 base_url 示例:
# DeepSeek: https://api.deepseek.com/v1
# SiliconFlow: https://api.siliconflow.cn/v1
# Ollama: http://localhost:11434/v1
# LM Studio: http://localhost:1234/v1
# vLLM: http://localhost:8000/v1
# Gemini: https://generativelanguage.googleapis.com/v1beta/openai
# api_key: "" # ⚠️ Use env var OMBRE_API_KEY instead / 请使用环境变量 OMBRE_API_KEY
max_tokens: 1024
temperature: 0.1
# --- Decay parameters / 记忆衰减参数 ---
# Simulates Ebbinghaus forgetting curve, auto-archives inactive memories
# 模拟艾宾浩斯遗忘曲线,自动归档不活跃的记忆
decay:
lambda: 0.05 # Decay rate / 衰减速率(越大遗忘越快)
threshold: 0.3 # Archive threshold / 归档阈值
check_interval_hours: 24 # Check interval (hours) / 衰减检查间隔(小时)
emotion_weights:
base: 1.0 # Base weight / 基础权重
arousal_boost: 0.8 # Arousal boost coefficient / 唤醒度加成系数
# --- Scoring weights / 检索权重参数 ---
# total = topic(×4) + emotion(×2) + time(×1.5) + importance(×1)
scoring_weights:
topic_relevance: 4.0
emotion_resonance: 2.0
time_proximity: 1.5
importance: 1.0
# --- Fuzzy matching / 模糊匹配参数 ---
matching:
fuzzy_threshold: 50 # Minimum match score (0-100) / 最低匹配分数
max_results: 5 # Max results per search / 单次搜索最多返回条数
# --- Obsidian wikilinks / Obsidian 双链自动注入 ---
wikilink:
enabled: true
use_tags: false
use_domain: true
use_auto_keywords: true
auto_top_k: 8
min_keyword_len: 2
exclude_keywords: []

279
decay_engine.py Normal file
View File

@@ -0,0 +1,279 @@
# ============================================================
# Module: Memory Decay Engine (decay_engine.py)
# 模块:记忆衰减引擎
#
# Simulates human forgetting curve; auto-decays inactive memories and archives them.
# 模拟人类遗忘曲线,自动衰减不活跃记忆并归档。
#
# Core formula (improved Ebbinghaus + emotion coordinates):
# 核心公式(改进版艾宾浩斯遗忘曲线 + 情感坐标):
# Score = Importance × (activation_count^0.3) × e^(-λ×days) × emotion_weight
#
# Emotion weight (continuous coordinate, not discrete labels):
# 情感权重(基于连续坐标而非离散列举):
# emotion_weight = base + (arousal × arousal_boost)
# Higher arousal → higher emotion weight → slower decay
# 唤醒度越高 → 情感权重越大 → 记忆衰减越慢
#
# Depended on by: server.py
# 被谁依赖server.py
# ============================================================
import math
import asyncio
import logging
from datetime import datetime
logger = logging.getLogger("ombre_brain.decay")
class DecayEngine:
"""
Memory decay engine — periodically scans all dynamic buckets,
calculates decay scores, auto-archives low-activity buckets
to simulate natural forgetting.
记忆衰减引擎 —— 定期扫描所有动态桶,
计算衰减得分,将低活跃桶自动归档,模拟自然遗忘。
"""
def __init__(self, config: dict, bucket_mgr):
# --- Load decay parameters / 加载衰减参数 ---
decay_cfg = config.get("decay", {})
self.decay_lambda = decay_cfg.get("lambda", 0.05)
self.threshold = decay_cfg.get("threshold", 0.3)
self.check_interval = decay_cfg.get("check_interval_hours", 24)
# --- Emotion weight params (continuous arousal coordinate) ---
# --- 情感权重参数(基于连续 arousal 坐标)---
emotion_cfg = decay_cfg.get("emotion_weights", {})
self.emotion_base = emotion_cfg.get("base", 1.0)
self.arousal_boost = emotion_cfg.get("arousal_boost", 0.8)
self.bucket_mgr = bucket_mgr
# --- Background task control / 后台任务控制 ---
self._task: asyncio.Task | None = None
self._running = False
@property
def is_running(self) -> bool:
"""Whether the decay engine is running in the background.
衰减引擎是否正在后台运行。"""
return self._running
# ---------------------------------------------------------
# Core: calculate decay score for a single bucket
# 核心:计算单个桶的衰减得分
#
# Higher score = more vivid memory; below threshold → archive
# 得分越高 = 记忆越鲜活,低于阈值则归档
# Permanent buckets never decay / 固化桶永远不衰减
# ---------------------------------------------------------
# ---------------------------------------------------------
# Time weight: 0-1d→1.0, day2→0.9, then ~10%/day, floor 0.3
# 时间系数0-1天=1.0第2天=0.9之后每天约降10%7天后稳定在0.3
# ---------------------------------------------------------
@staticmethod
def _calc_time_weight(days_since: float) -> float:
"""
Piecewise time weight multiplier (multiplies base_score).
分段式时间权重系数,作为 final_score 的乘数。
"""
if days_since <= 1.0:
return 1.0
elif days_since <= 2.0:
# Linear interpolation: 1.0→0.9 over [1,2]
return 1.0 - 0.1 * (days_since - 1.0)
else:
# Exponential decay from 0.9, floor at 0.3
# k = ln(3)/5 ≈ 0.2197 so that at day 7 (5 days past day 2) → 0.3
raw = 0.9 * math.exp(-0.2197 * (days_since - 2.0))
return max(0.3, raw)
def calculate_score(self, metadata: dict) -> float:
"""
Calculate current activity score for a memory bucket.
计算一个记忆桶的当前活跃度得分。
Formula: final_score = time_weight × base_score
base_score = Importance × (act_count^0.3) × e^(-λ×days) × (base + arousal×boost)
time_weight is the outer multiplier, takes priority over emotion factors.
"""
if not isinstance(metadata, dict):
return 0.0
# --- Pinned/protected buckets: never decay, importance locked to 10 ---
# --- 固化桶pinned/protected永不衰减importance 锁定为 10 ---
if metadata.get("pinned") or metadata.get("protected"):
return 999.0
# --- Permanent buckets never decay / 固化桶永不衰减 ---
if metadata.get("type") == "permanent":
return 999.0
importance = max(1, min(10, int(metadata.get("importance", 5))))
activation_count = max(1, int(metadata.get("activation_count", 1)))
# --- Days since last activation / 距离上次激活过了多少天 ---
last_active_str = metadata.get("last_active", metadata.get("created", ""))
try:
last_active = datetime.fromisoformat(str(last_active_str))
days_since = max(0.0, (datetime.now() - last_active).total_seconds() / 86400)
except (ValueError, TypeError):
days_since = 30 # Parse failure → assume 30 days / 解析失败假设已过 30 天
# --- Emotion weight: continuous arousal coordinate ---
# --- 情感权重:基于连续 arousal 坐标计算 ---
# Higher arousal → stronger emotion → higher weight → slower decay
# arousal 越高 → 情感越强烈 → 权重越大 → 衰减越慢
try:
arousal = max(0.0, min(1.0, float(metadata.get("arousal", 0.3))))
except (ValueError, TypeError):
arousal = 0.3
emotion_weight = self.emotion_base + arousal * self.arousal_boost
# --- Time weight (outer multiplier, highest priority) ---
# --- 时间权重(外层乘数,优先级最高)---
time_weight = self._calc_time_weight(days_since)
# --- Base score = Importance × act_count^0.3 × e^(-λ×days) × emotion ---
# --- 基础得分 ---
base_score = (
importance
* (activation_count ** 0.3)
* math.exp(-self.decay_lambda * days_since)
* emotion_weight
)
# --- final_score = time_weight × base_score ---
score = time_weight * base_score
# --- Weight pool modifiers / 权重池修正因子 ---
# Resolved events drop to 5%, sink to bottom awaiting keyword reactivation
# 已解决的事件权重骤降到 5%,沉底等待关键词激活
resolved_factor = 0.05 if metadata.get("resolved", False) else 1.0
# High-arousal unresolved buckets get urgency boost for priority surfacing
# 高唤醒未解决桶额外加成,优先浮现
urgency_boost = 1.5 if (arousal > 0.7 and not metadata.get("resolved", False)) else 1.0
return round(score * resolved_factor * urgency_boost, 4)
# ---------------------------------------------------------
# Execute one decay cycle
# 执行一轮衰减周期
# Scan all dynamic buckets → score → archive those below threshold
# 扫描所有动态桶 → 算分 → 低于阈值的归档
# ---------------------------------------------------------
async def run_decay_cycle(self) -> dict:
"""
Execute one decay cycle: iterate dynamic buckets, archive those
scoring below threshold.
执行一轮衰减:遍历动态桶,归档得分低于阈值的桶。
Returns stats: {"checked": N, "archived": N, "lowest_score": X}
"""
try:
buckets = await self.bucket_mgr.list_all(include_archive=False)
except Exception as e:
logger.error(f"Failed to list buckets for decay / 衰减周期列桶失败: {e}")
return {"checked": 0, "archived": 0, "lowest_score": 0, "error": str(e)}
checked = 0
archived = 0
lowest_score = float("inf")
for bucket in buckets:
meta = bucket.get("metadata", {})
# Skip permanent / pinned / protected buckets
# 跳过固化桶和钉选/保护桶
if meta.get("type") == "permanent" or meta.get("pinned") or meta.get("protected"):
continue
checked += 1
try:
score = self.calculate_score(meta)
except Exception as e:
logger.warning(
f"Score calculation failed for {bucket.get('id', '?')} / "
f"计算得分失败: {e}"
)
continue
lowest_score = min(lowest_score, score)
# --- Below threshold → archive (simulate forgetting) ---
# --- 低于阈值 → 归档(模拟遗忘)---
if score < self.threshold:
try:
success = await self.bucket_mgr.archive(bucket["id"])
if success:
archived += 1
logger.info(
f"Decay archived / 衰减归档: "
f"{meta.get('name', bucket['id'])} "
f"(score={score:.4f}, threshold={self.threshold})"
)
except Exception as e:
logger.warning(
f"Archive failed for {bucket.get('id', '?')} / "
f"归档失败: {e}"
)
result = {
"checked": checked,
"archived": archived,
"lowest_score": lowest_score if checked > 0 else 0,
}
logger.info(f"Decay cycle complete / 衰减周期完成: {result}")
return result
# ---------------------------------------------------------
# Background decay task management
# 后台衰减任务管理
# ---------------------------------------------------------
async def ensure_started(self) -> None:
"""
Ensure the decay engine is started (lazy init on first call).
确保衰减引擎已启动(懒加载,首次调用时启动)。
"""
if not self._running:
await self.start()
async def start(self) -> None:
"""Start the background decay loop.
启动后台衰减循环。"""
if self._running:
return
self._running = True
self._task = asyncio.create_task(self._background_loop())
logger.info(
f"Decay engine started, interval: {self.check_interval}h / "
f"衰减引擎已启动,检查间隔: {self.check_interval} 小时"
)
async def stop(self) -> None:
"""Stop the background decay loop.
停止后台衰减循环。"""
self._running = False
if self._task:
self._task.cancel()
try:
await self._task
except asyncio.CancelledError:
pass
logger.info("Decay engine stopped / 衰减引擎已停止")
async def _background_loop(self) -> None:
"""Background loop: run decay → sleep → repeat.
后台循环体:执行衰减 → 睡眠 → 重复。"""
while self._running:
try:
await self.run_decay_cycle()
except Exception as e:
logger.error(f"Decay cycle error / 衰减周期出错: {e}")
# --- Wait for next cycle / 等待下一个周期 ---
try:
await asyncio.sleep(self.check_interval * 3600)
except asyncio.CancelledError:
break

779
dehydrator.py Normal file
View File

@@ -0,0 +1,779 @@
# ============================================================
# Module: Dehydration & Auto-tagging (dehydrator.py)
# 模块:数据脱水压缩 + 自动打标
#
# Capabilities:
# 能力:
# 1. Dehydrate: compress memory content into high-density summaries (save tokens)
# 脱水:将记忆桶的原始内容压缩为高密度摘要,省 token
# 2. Merge: blend old and new content, keeping bucket size constant
# 合并:揉合新旧内容,控制桶体积恒定
# 3. Analyze: auto-analyze content for domain/emotion/tags
# 打标:自动分析内容,输出主题域/情感坐标/标签
#
# Operating modes:
# 工作模式:
# - Primary: OpenAI-compatible API (DeepSeek/Ollama/LM Studio/vLLM/Gemini etc.)
# 主路径:通过 OpenAI 兼容客户端调用 LLM API
# - Fallback: local keyword extraction when API is unavailable
# 备用路径API 不可用时用本地关键词提取
#
# Depended on by: server.py
# 被谁依赖server.py
# ============================================================
import re
import json
import logging
from collections import Counter
import jieba
from openai import AsyncOpenAI
from utils import count_tokens_approx
logger = logging.getLogger("ombre_brain.dehydrator")
# --- Dehydration prompt: instructs cheap LLM to compress information ---
# --- 脱水提示词:指导廉价 LLM 压缩信息 ---
DEHYDRATE_PROMPT = """你是一个信息压缩专家。请将以下内容脱水为紧凑摘要。
压缩规则:
1. 提取所有核心事实,去除冗余修饰和重复
2. 保留最新的情绪状态和态度
3. 保留所有待办/未完成事项
4. 关键数字、日期、名称必须保留
5. 目标压缩率 > 70%
输出格式(纯 JSON无其他内容
{
"core_facts": ["事实1", "事实2"],
"emotion_state": "当前情绪关键词",
"todos": ["待办1", "待办2"],
"keywords": ["关键词1", "关键词2"],
"summary": "50字以内的核心总结"
}"""
# --- Diary digest prompt: split daily notes into independent memory entries ---
# --- 日记整理提示词:把一大段日常拆分成多个独立记忆条目 ---
DIGEST_PROMPT = """你是一个日记整理专家。用户会发送一段包含今天各种事情的文本(可能很杂乱),请你将其拆分成多个独立的记忆条目。
整理规则:
1. 每个条目应该是一个独立的主题/事件(不要混在一起)
2. 为每个条目自动分析元数据
3. 去除无意义的口水话和重复信息,保留核心内容
4. 同一主题的零散信息应合并为一个条目
5. 如果有待办事项,单独提取为一个条目
输出格式(纯 JSON 数组,无其他内容):
[
{
"name": "条目标题10字以内",
"content": "整理后的内容",
"domain": ["主题域1"],
"valence": 0.7,
"arousal": 0.4,
"tags": ["标签1", "标签2"],
"importance": 5
}
]
主题域可选(选最精确的 1~2 个,只选真正相关的):
日常: ["饮食", "穿搭", "出行", "居家", "购物"]
人际: ["家庭", "恋爱", "友谊", "社交"]
成长: ["工作", "学习", "考试", "求职"]
身心: ["健康", "心理", "睡眠", "运动"]
兴趣: ["游戏", "影视", "音乐", "阅读", "创作", "手工"]
数字: ["编程", "AI", "硬件", "网络"]
事务: ["财务", "计划", "待办"]
内心: ["情绪", "回忆", "梦境", "自省"]
importance: 1-10根据内容重要程度判断
valence: 0~10=消极, 0.5=中性, 1=积极)
arousal: 0~10=平静, 0.5=普通, 1=激动)"""
# --- Merge prompt: instruct LLM to blend old and new memories ---
# --- 合并提示词:指导 LLM 揉合新旧记忆 ---
MERGE_PROMPT = """你是一个信息合并专家。请将旧记忆与新内容合并为一份统一的简洁记录。
合并规则:
1. 新内容与旧记忆冲突时,以新内容为准
2. 去除重复信息
3. 保留所有重要事实
4. 总长度尽量不超过旧记忆的 120%
直接输出合并后的文本,不要加额外说明。"""
# --- Auto-tagging prompt: analyze content for domain and emotion coords ---
# --- 自动打标提示词:分析内容的主题域和情感坐标 ---
ANALYZE_PROMPT = """你是一个内容分析器。请分析以下文本,输出结构化的元数据。
分析规则:
1. domain主题域选最精确的 1~2 个,只选真正相关的
日常: ["饮食", "穿搭", "出行", "居家", "购物"]
人际: ["家庭", "恋爱", "友谊", "社交"]
成长: ["工作", "学习", "考试", "求职"]
身心: ["健康", "心理", "睡眠", "运动"]
兴趣: ["游戏", "影视", "音乐", "阅读", "创作", "手工"]
数字: ["编程", "AI", "硬件", "网络"]
事务: ["财务", "计划", "待办"]
内心: ["情绪", "回忆", "梦境", "自省"]
2. valence情感效价0.0~1.00=极度消极 → 0.5=中性 → 1.0=极度积极
3. arousal情感唤醒度0.0~1.00=非常平静 → 0.5=普通 → 1.0=非常激动
4. tags关键词标签3~5 个最能概括内容的关键词
5. suggested_name建议桶名10字以内的简短标题
输出格式(纯 JSON无其他内容
{
"domain": ["主题域1", "主题域2"],
"valence": 0.7,
"arousal": 0.4,
"tags": ["标签1", "标签2", "标签3"],
"suggested_name": "简短标题"
}"""
class Dehydrator:
"""
Data dehydrator + content analyzer.
Three capabilities: dehydration / merge / auto-tagging (domain + emotion).
Prefers API (better quality); auto-degrades to local (guaranteed availability).
数据脱水器 + 内容分析器。
三大能力:脱水压缩 / 新旧合并 / 自动打标。
优先走 APIAPI 挂了自动降级到本地。
"""
def __init__(self, config: dict):
# --- Read dehydration API config / 读取脱水 API 配置 ---
dehy_cfg = config.get("dehydration", {})
self.api_key = dehy_cfg.get("api_key", "")
self.model = dehy_cfg.get("model", "deepseek-chat")
self.base_url = dehy_cfg.get("base_url", "https://api.deepseek.com/v1")
self.max_tokens = dehy_cfg.get("max_tokens", 1024)
self.temperature = dehy_cfg.get("temperature", 0.1)
# --- API availability / 是否有可用的 API ---
self.api_available = bool(self.api_key)
# --- Initialize OpenAI-compatible client ---
# --- 初始化 OpenAI 兼容客户端 ---
# Supports any OpenAI-format API: DeepSeek / Ollama / LM Studio / vLLM / Gemini etc.
# User only needs to set base_url in config.yaml
if self.api_available:
self.client = AsyncOpenAI(
api_key=self.api_key,
base_url=self.base_url,
timeout=60.0,
)
else:
self.client = None
# ---------------------------------------------------------
# Dehydrate: compress raw content into concise summary
# 脱水:将原始内容压缩为精简摘要
# Try API first, fallback to local
# 先尝试 API失败则回退本地
# ---------------------------------------------------------
async def dehydrate(self, content: str, metadata: dict = None) -> str:
"""
Dehydrate/compress memory content.
Returns formatted summary string ready for Claude context injection.
对记忆内容做脱水压缩。
返回格式化的摘要字符串,可直接注入 Claude 上下文。
"""
if not content or not content.strip():
return "(空记忆 / empty memory"
# --- Content is short enough, no compression needed ---
# --- 内容已经很短,不需要压缩 ---
if count_tokens_approx(content) < 100:
return self._format_output(content, metadata)
# --- Try API compression first (best quality) ---
# --- 优先尝试 API 压缩 ---
if self.api_available:
try:
result = await self._api_dehydrate(content)
if result:
return self._format_output(result, metadata)
except Exception as e:
logger.warning(
f"API dehydration failed, degrading to local / "
f"API 脱水失败,降级到本地压缩: {e}"
)
# --- Local compression fallback (works without API) ---
# --- 本地压缩兜底 ---
result = self._local_dehydrate(content)
return self._format_output(result, metadata)
# ---------------------------------------------------------
# Merge: blend new content into existing bucket
# 合并:将新内容揉入已有桶,保持体积恒定
# ---------------------------------------------------------
async def merge(self, old_content: str, new_content: str) -> str:
"""
Merge new content with old memory, preventing infinite bucket growth.
将新内容与旧记忆合并,避免桶无限膨胀。
"""
if not old_content and not new_content:
return ""
if not old_content:
return new_content or ""
if not new_content:
return old_content
# --- Try API merge first / 优先 API 合并 ---
if self.api_available:
try:
result = await self._api_merge(old_content, new_content)
if result:
return result
except Exception as e:
logger.warning(
f"API merge failed, degrading to local / "
f"API 合并失败,降级到本地合并: {e}"
)
# --- Local merge fallback / 本地合并兜底 ---
return self._local_merge(old_content, new_content)
# ---------------------------------------------------------
# API call: dehydration
# API 调用:脱水压缩
# ---------------------------------------------------------
async def _api_dehydrate(self, content: str) -> str:
"""
Call LLM API for intelligent dehydration (via OpenAI-compatible client).
调用 LLM API 执行智能脱水。
"""
response = await self.client.chat.completions.create(
model=self.model,
messages=[
{"role": "system", "content": DEHYDRATE_PROMPT},
{"role": "user", "content": content[:3000]},
],
max_tokens=self.max_tokens,
temperature=self.temperature,
)
if not response.choices:
return ""
return response.choices[0].message.content or ""
# ---------------------------------------------------------
# API call: merge
# API 调用:合并
# ---------------------------------------------------------
async def _api_merge(self, old_content: str, new_content: str) -> str:
"""
Call LLM API for intelligent merge (via OpenAI-compatible client).
调用 LLM API 执行智能合并。
"""
user_msg = f"旧记忆:\n{old_content[:2000]}\n\n新内容:\n{new_content[:2000]}"
response = await self.client.chat.completions.create(
model=self.model,
messages=[
{"role": "system", "content": MERGE_PROMPT},
{"role": "user", "content": user_msg},
],
max_tokens=self.max_tokens,
temperature=self.temperature,
)
if not response.choices:
return ""
return response.choices[0].message.content or ""
# ---------------------------------------------------------
# Local dehydration (fallback when API is unavailable)
# 本地脱水(无 API 时的兜底方案)
# Keyword frequency + sentence position weighting
# 基于关键词频率 + 句子位置权重
# ---------------------------------------------------------
def _local_dehydrate(self, content: str) -> str:
"""
Local keyword extraction + position-weighted simple compression.
本地关键词提取 + 位置加权的简单压缩。
"""
# --- Split into sentences / 分句 ---
sentences = re.split(r"[。!?\n.!?]+", content)
sentences = [s.strip() for s in sentences if len(s.strip()) > 5]
if not sentences:
return content[:200]
# --- Extract high-frequency keywords / 提取高频关键词 ---
keywords = self._extract_keywords(content)
# --- Score sentences: position weight + keyword hits ---
# --- 句子评分:开头结尾权重高 + 关键词命中加分 ---
scored = []
for i, sent in enumerate(sentences):
position_weight = 1.5 if i < 3 else (1.2 if i > len(sentences) - 3 else 1.0)
keyword_hits = sum(1 for kw in keywords if kw in sent)
score = position_weight * (1 + keyword_hits)
scored.append((score, sent))
scored.sort(key=lambda x: x[0], reverse=True)
# --- Top-8 sentences + keyword list / 取高分句 + 关键词列表 ---
selected = [s for _, s in scored[:8]]
summary = "".join(selected)
keyword_str = ", ".join(keywords[:10])
return f"[摘要] {summary}\n[关键词] {keyword_str}"
# ---------------------------------------------------------
# Local merge (simple concatenation + truncation)
# 本地合并(简单拼接 + 截断)
# ---------------------------------------------------------
def _local_merge(self, old_content: str, new_content: str) -> str:
"""
Simple concatenation merge; truncates if too long.
简单拼接合并,超长时截断保留两端。
"""
merged = f"{old_content.strip()}\n\n--- 更新 ---\n{new_content.strip()}"
# Truncate if over 3000 chars / 超过 3000 字符则各取一半
if len(merged) > 3000:
half = 1400
merged = (
f"{old_content[:half].strip()}\n\n--- 更新 ---\n{new_content[:half].strip()}"
)
return merged
# ---------------------------------------------------------
# Keyword extraction
# 关键词提取
# Chinese + English tokenization → stopword filter → frequency sort
# 中英文分词 + 停用词过滤 + 词频排序
# ---------------------------------------------------------
def _extract_keywords(self, text: str) -> list[str]:
"""
Extract high-frequency keywords using jieba (Chinese + English mixed).
用 jieba 分词提取高频关键词。
"""
try:
words = jieba.lcut(text)
except Exception:
words = []
# English words / 英文单词
english_words = re.findall(r"[a-zA-Z]{3,}", text.lower())
words += english_words
# Stopwords / 停用词
stopwords = {
"", "", "", "", "", "", "", "", "", "",
"", "一个", "", "", "", "", "", "", "",
"", "", "", "没有", "", "", "自己", "", "", "",
"the", "and", "for", "are", "but", "not", "you", "all", "can",
"had", "her", "was", "one", "our", "out", "has", "have", "with",
"this", "that", "from", "they", "been", "said", "will", "each",
}
filtered = [
w for w in words
if w not in stopwords and len(w.strip()) > 1 and not re.match(r"^[0-9]+$", w)
]
counter = Counter(filtered)
return [word for word, _ in counter.most_common(15)]
# ---------------------------------------------------------
# Output formatting
# 输出格式化
# Wraps dehydrated result with bucket name, tags, emotion coords
# 把脱水结果包装成带桶名、标签、情感坐标的可读文本
# ---------------------------------------------------------
def _format_output(self, content: str, metadata: dict = None) -> str:
"""
Format dehydrated result into context-injectable text.
将脱水结果格式化为可注入上下文的文本。
"""
header = ""
if metadata and isinstance(metadata, dict):
name = metadata.get("name", "未命名")
tags = ", ".join(metadata.get("tags", []))
domains = ", ".join(metadata.get("domain", []))
try:
valence = float(metadata.get("valence", 0.5))
arousal = float(metadata.get("arousal", 0.3))
except (ValueError, TypeError):
valence, arousal = 0.5, 0.3
header = f"📌 记忆桶: {name}"
if domains:
header += f" [主题:{domains}]"
if tags:
header += f" [标签:{tags}]"
header += f" [情感:V{valence:.1f}/A{arousal:.1f}]"
header += "\n"
return f"{header}{content}"
# ---------------------------------------------------------
# Auto-tagging: analyze content for domain + emotion + tags
# 自动打标:分析内容,输出主题域 + 情感坐标 + 标签
# Called by server.py when storing new memories
# 存新记忆时由 server.py 调用
# ---------------------------------------------------------
async def analyze(self, content: str) -> dict:
"""
Analyze content and return structured metadata.
分析内容,返回结构化元数据。
Returns: {"domain", "valence", "arousal", "tags", "suggested_name"}
"""
if not content or not content.strip():
return self._default_analysis()
# --- Try API first (best quality) / 优先走 API ---
if self.api_available:
try:
result = await self._api_analyze(content)
if result:
return result
except Exception as e:
logger.warning(
f"API tagging failed, degrading to local / "
f"API 打标失败,降级到本地分析: {e}"
)
# --- Local analysis fallback / 本地分析兜底 ---
return self._local_analyze(content)
# ---------------------------------------------------------
# API call: auto-tagging
# API 调用:自动打标
# ---------------------------------------------------------
async def _api_analyze(self, content: str) -> dict:
"""
Call LLM API for content analysis / tagging.
调用 LLM API 执行内容分析打标。
"""
response = await self.client.chat.completions.create(
model=self.model,
messages=[
{"role": "system", "content": ANALYZE_PROMPT},
{"role": "user", "content": content[:2000]},
],
max_tokens=256,
temperature=0.1,
)
if not response.choices:
return self._default_analysis()
raw = response.choices[0].message.content or ""
if not raw.strip():
return self._default_analysis()
return self._parse_analysis(raw)
# ---------------------------------------------------------
# Parse API JSON response with safety checks
# 解析 API 返回的 JSON做安全校验
# Ensure valence/arousal in 0~1, domain/tags valid
# ---------------------------------------------------------
def _parse_analysis(self, raw: str) -> dict:
"""
Parse and validate API tagging result.
解析并校验 API 返回的打标结果。
"""
try:
# Handle potential markdown code block wrapping
# 处理可能的 markdown 代码块包裹
cleaned = raw.strip()
if cleaned.startswith("```"):
cleaned = cleaned.split("\n", 1)[-1].rsplit("```", 1)[0]
result = json.loads(cleaned)
except (json.JSONDecodeError, IndexError, ValueError):
logger.warning(f"API tagging JSON parse failed / JSON 解析失败: {raw[:200]}")
return self._default_analysis()
if not isinstance(result, dict):
return self._default_analysis()
# --- Validate and clamp value ranges / 校验并钳制数值范围 ---
try:
valence = max(0.0, min(1.0, float(result.get("valence", 0.5))))
arousal = max(0.0, min(1.0, float(result.get("arousal", 0.3))))
except (ValueError, TypeError):
valence, arousal = 0.5, 0.3
return {
"domain": result.get("domain", ["未分类"])[:3],
"valence": valence,
"arousal": arousal,
"tags": result.get("tags", [])[:5],
"suggested_name": str(result.get("suggested_name", ""))[:20],
}
# ---------------------------------------------------------
# Local analysis (fallback when API is unavailable)
# 本地分析(无 API 时的兜底方案)
# Keyword matching + simple sentiment dictionary
# 基于关键词 + 简单情感词典匹配
# ---------------------------------------------------------
def _local_analyze(self, content: str) -> dict:
"""
Local keyword + sentiment dictionary analysis.
本地关键词 + 情感词典的简单分析。
"""
keywords = self._extract_keywords(content)
text_lower = content.lower()
# --- Domain matching by keyword hits ---
# --- 主题域匹配:基于关键词命中 ---
domain_keywords = {
# Daily / 日常
"饮食": {"", "", "做饭", "外卖", "奶茶", "咖啡", "麻辣烫", "面包",
"超市", "零食", "水果", "牛奶", "食堂", "减肥", "节食"},
"出行": {"旅行", "出发", "航班", "酒店", "地铁", "打车", "高铁", "机票",
"景点", "签证", "护照"},
"居家": {"打扫", "洗衣", "搬家", "快递", "收纳", "装修", "租房"},
"购物": {"", "下单", "到货", "退货", "优惠", "折扣", "代购"},
# Relationships / 人际
"家庭": {"", "", "父亲", "母亲", "家人", "弟弟", "姐姐", "哥哥",
"奶奶", "爷爷", "亲戚", "家里"},
"恋爱": {"爱人", "男友", "女友", "", "约会", "接吻", "分手",
"暧昧", "在一起", "想你", "同床"},
"友谊": {"朋友", "闺蜜", "兄弟", "", "约饭", "聊天", ""},
"社交": {"见面", "被人", "圈子", "消息", "评论", "点赞"},
# Growth / 成长
"工作": {"会议", "项目", "客户", "汇报", "deadline", "同事",
"老板", "薪资", "合同", "需求", "加班", "实习"},
"学习": {"", "考试", "论文", "笔记", "作业", "教授", "讲座",
"分数", "选课", "学分"},
"求职": {"面试", "简历", "offer", "投递", "薪资", "岗位"},
# Health / 身心
"健康": {"医院", "复查", "吃药", "抽血", "手术", "心率",
"", "症状", "指标", "体检", "月经"},
"心理": {"焦虑", "抑郁", "恐慌", "创伤", "人格", "咨询",
"安全感", "自残", "崩溃", "压力"},
"睡眠": {"", "失眠", "噩梦", "清醒", "熬夜", "早起", "午觉"},
# Interests / 兴趣
"游戏": {"游戏", "steam", "极乐迪斯科", "存档", "通关", "角色",
"mod", "DLC", "剧情"},
"影视": {"电影", "番剧", "动漫", "", "综艺", "追番", "上映"},
"音乐": {"", "音乐", "专辑", "live", "演唱会", "耳机"},
"阅读": {"", "小说", "读完", "kindle", "连载", "漫画"},
"创作": {"", "", "预设", "脚本", "视频", "剪辑", "P图",
"SillyTavern", "插件", "正则", "人设"},
# Digital / 数字
"编程": {"代码", "code", "python", "bug", "api", "docker",
"git", "调试", "框架", "部署", "开发", "server"},
"AI": {"模型", "GPT", "Claude", "gemini", "LLM", "token",
"prompt", "LoRA", "微调", "推理", "MCP"},
"网络": {"VPN", "梯子", "代理", "域名", "隧道", "服务器",
"cloudflare", "tunnel", "反代"},
# Affairs / 事务
"财务": {"", "转账", "工资", "花了", "", "还款", "",
"账单", "余额", "预算", "黄金"},
"计划": {"计划", "目标", "deadline", "日程", "清单", "安排"},
"待办": {"要做", "记得", "别忘", "提醒", "下次"},
# Inner / 内心
"情绪": {"开心", "难过", "生气", "", "", "孤独", "幸福",
"伤心", "", "委屈", "感动", "温柔"},
"回忆": {"以前", "小时候", "那时", "怀念", "曾经", "记得"},
"梦境": {"", "梦到", "梦见", "噩梦", "清醒梦"},
"自省": {"反思", "觉得自己", "问自己", "意识到", "明白了"},
}
matched_domains = []
for domain, kws in domain_keywords.items():
hits = sum(1 for kw in kws if kw in text_lower)
if hits >= 2:
matched_domains.append((domain, hits))
matched_domains.sort(key=lambda x: x[1], reverse=True)
domains = [d for d, _ in matched_domains[:3]] or ["未分类"]
# --- Emotion estimation via simple sentiment dictionary ---
# --- 情感坐标估算:基于简单情感词典 ---
positive_words = {"开心", "高兴", "喜欢", "哈哈", "", "", "",
"幸福", "成功", "感动", "兴奋", "棒极了",
"happy", "love", "great", "awesome", "nice"}
negative_words = {"难过", "伤心", "生气", "焦虑", "害怕", "无聊",
"", "", "失望", "崩溃", "愤怒", "痛苦",
"sad", "angry", "hate", "tired", "afraid"}
intense_words = {"", "非常", "", "", "特别", "十分", "",
"崩溃", "激动", "愤怒", "狂喜", "very", "so", "extremely"}
pos_count = sum(1 for w in positive_words if w in text_lower)
neg_count = sum(1 for w in negative_words if w in text_lower)
intense_count = sum(1 for w in intense_words if w in text_lower)
# valence: positive/negative emotion balance
if pos_count + neg_count > 0:
valence = 0.5 + 0.4 * (pos_count - neg_count) / (pos_count + neg_count)
else:
valence = 0.5
# arousal: intensity level
arousal = min(1.0, 0.3 + intense_count * 0.15 + (pos_count + neg_count) * 0.08)
return {
"domain": domains,
"valence": round(max(0.0, min(1.0, valence)), 2),
"arousal": round(max(0.0, min(1.0, arousal)), 2),
"tags": keywords[:5],
"suggested_name": "",
}
# ---------------------------------------------------------
# Default analysis result (empty content or total failure)
# 默认分析结果(内容为空或完全失败时用)
# ---------------------------------------------------------
def _default_analysis(self) -> dict:
"""
Return default neutral analysis result.
返回默认的中性分析结果。
"""
return {
"domain": ["未分类"],
"valence": 0.5,
"arousal": 0.3,
"tags": [],
"suggested_name": "",
}
# ---------------------------------------------------------
# Diary digest: split daily notes into independent memory entries
# 日记整理:把一大段日常拆分成多个独立记忆条目
# For the "grow" tool — "dump a day's content and it gets organized"
# 给 grow 工具用,"一天结束发一坨内容"靠这个
# ---------------------------------------------------------
async def digest(self, content: str) -> list[dict]:
"""
Split a large chunk of daily content into independent memory entries.
将一大段日常内容拆分成多个独立记忆条目。
Returns: [{"name", "content", "domain", "valence", "arousal", "tags", "importance"}, ...]
"""
if not content or not content.strip():
return []
# --- Try API digest first (best quality, understands semantic splits) ---
# --- 优先 API 整理 ---
if self.api_available:
try:
result = await self._api_digest(content)
if result:
return result
except Exception as e:
logger.warning(
f"API diary digest failed, degrading to local / "
f"API 日记整理失败,降级到本地拆分: {e}"
)
# --- Local split fallback / 本地拆分兜底 ---
return await self._local_digest(content)
# ---------------------------------------------------------
# API call: diary digest
# API 调用:日记整理
# ---------------------------------------------------------
async def _api_digest(self, content: str) -> list[dict]:
"""
Call LLM API for diary organization.
调用 LLM API 执行日记整理。
"""
response = await self.client.chat.completions.create(
model=self.model,
messages=[
{"role": "system", "content": DIGEST_PROMPT},
{"role": "user", "content": content[:5000]},
],
max_tokens=2048,
temperature=0.2,
)
if not response.choices:
return []
raw = response.choices[0].message.content or ""
if not raw.strip():
return []
return self._parse_digest(raw)
# ---------------------------------------------------------
# Parse diary digest result with safety checks
# 解析日记整理结果,做安全校验
# ---------------------------------------------------------
def _parse_digest(self, raw: str) -> list[dict]:
"""
Parse and validate API diary digest result.
解析并校验 API 返回的日记整理结果。
"""
try:
cleaned = raw.strip()
if cleaned.startswith("```"):
cleaned = cleaned.split("\n", 1)[-1].rsplit("```", 1)[0]
items = json.loads(cleaned)
except (json.JSONDecodeError, IndexError, ValueError):
logger.warning(f"Diary digest JSON parse failed / JSON 解析失败: {raw[:200]}")
return []
if not isinstance(items, list):
return []
validated = []
for item in items:
if not isinstance(item, dict) or not item.get("content"):
continue
try:
importance = max(1, min(10, int(item.get("importance", 5))))
except (ValueError, TypeError):
importance = 5
try:
valence = max(0.0, min(1.0, float(item.get("valence", 0.5))))
arousal = max(0.0, min(1.0, float(item.get("arousal", 0.3))))
except (ValueError, TypeError):
valence, arousal = 0.5, 0.3
validated.append({
"name": str(item.get("name", ""))[:20],
"content": str(item.get("content", "")),
"domain": item.get("domain", ["未分类"])[:3],
"valence": valence,
"arousal": arousal,
"tags": item.get("tags", [])[:5],
"importance": importance,
})
return validated
# ---------------------------------------------------------
# Local diary split (fallback when API is unavailable)
# 本地日记拆分(无 API 时的兜底)
# Split by blank lines/separators, analyze each segment
# 按空行/分隔符拆段,每段独立分析
# ---------------------------------------------------------
async def _local_digest(self, content: str) -> list[dict]:
"""
Local paragraph split + per-segment analysis.
本地按段落拆分 + 逐段分析。
"""
# Split by blank lines or separators / 按空行或分隔线拆分
segments = re.split(r"\n{2,}|---+|\n-\s", content)
segments = [s.strip() for s in segments if len(s.strip()) > 20]
if not segments:
# Content too short, treat as single entry
# 内容太短,整个作为一个条目
analysis = self._local_analyze(content)
return [{
"name": analysis.get("suggested_name", "日记"),
"content": content.strip(),
"domain": analysis["domain"],
"valence": analysis["valence"],
"arousal": analysis["arousal"],
"tags": analysis["tags"],
"importance": 5,
}]
items = []
for seg in segments[:10]: # Max 10 segments / 最多 10 段
analysis = self._local_analyze(seg)
items.append({
"name": analysis.get("suggested_name", "") or seg[:10],
"content": seg,
"domain": analysis["domain"],
"valence": analysis["valence"],
"arousal": analysis["arousal"],
"tags": analysis["tags"],
"importance": 5,
})
return items

48
docker-compose.yml Normal file
View File

@@ -0,0 +1,48 @@
# ============================================================
# Ombre Brain Docker Compose
# Docker Compose 配置
#
# Usage / 使用:
# 1. Create .env: echo "OMBRE_API_KEY=your-key" > .env
# 创建 .env 文件
# 2. docker-compose up -d
# 3. docker compose logs tunnel (for public URL / 查看公网地址)
# ============================================================
services:
ombre-brain:
build: .
container_name: ombre-brain
restart: unless-stopped
ports:
- "18001:8000" # Local debug port (optional) / 本地调试端口
environment:
- OMBRE_API_KEY=${OMBRE_API_KEY} # From .env file / 从 .env 文件读取
- OMBRE_TRANSPORT=streamable-http # Claude.ai requires streamable-http
- OMBRE_BUCKETS_DIR=/data # Container-internal bucket path / 容器内路径
volumes:
# Mount your Obsidian vault (or any host directory) for persistent storage
# 挂载你的 Obsidian 仓库(或任意宿主机目录)做持久化存储
# Example / 示例:
# - /path/to/your/Obsidian Vault/Ombre Brain:/data
- /Users/p0lar1s/Library/Mobile Documents/iCloud~md~obsidian/Documents/Obsidian Vault/Ombre Brain:/data
- ./config.yaml:/app/config.yaml
# Cloudflare Tunnel (optional) — expose to public internet
# Cloudflare Tunnel可选— 暴露到公网
# Configure your own credentials under ~/.cloudflared/
# 在 ~/.cloudflared/ 下放你自己的凭证
tunnel:
image: cloudflare/cloudflared:latest
container_name: ombre-tunnel
restart: unless-stopped
command: >
tunnel --no-autoupdate --protocol http2
--config /etc/cloudflared/config.yml
--proxy-keepalive-timeout 300s
--proxy-connection-timeout 300s
run
volumes:
- ~/.cloudflared:/etc/cloudflared
depends_on:
- ombre-brain

118
migrate_to_domains.py Normal file
View File

@@ -0,0 +1,118 @@
#!/usr/bin/env python3
"""
迁移脚本:将 dynamic/ 下的平铺记忆桶文件重组为域子目录结构。
旧结构: dynamic/{bucket_id}.md
新结构: dynamic/{primary_domain}/{name}_{bucket_id}.md
纯标准库,无外部依赖。
"""
import os
import re
import shutil
VAULT_DIR = os.path.expanduser("~/Documents/Obsidian Vault/Ombre Brain")
DYNAMIC_DIR = os.path.join(VAULT_DIR, "dynamic")
def sanitize_name(name: str) -> str:
cleaned = re.sub(r"[^\w\s\u4e00-\u9fff-]", "", name, flags=re.UNICODE)
return cleaned.strip()[:80] or "unnamed"
def parse_frontmatter(filepath):
"""纯正则解析 YAML frontmatter 中的 id, name, domain 字段。"""
with open(filepath, "r", encoding="utf-8") as f:
content = f.read()
if not content.startswith("---"):
return None
parts = content.split("---", 2)
if len(parts) < 3:
return None
yaml_text = parts[1]
meta = {}
# 提取 id
m = re.search(r"^id:\s*(.+)$", yaml_text, re.MULTILINE)
if m:
meta["id"] = m.group(1).strip().strip("'\"")
# 提取 name
m = re.search(r"^name:\s*(.+)$", yaml_text, re.MULTILINE)
if m:
meta["name"] = m.group(1).strip().strip("'\"")
# 提取 domain 列表
m = re.search(r"^domain:\s*\n((?:\s*-\s*.+\n?)+)", yaml_text, re.MULTILINE)
if m:
meta["domain"] = re.findall(r"-\s*(.+)", m.group(1))
else:
meta["domain"] = ["未分类"]
return meta
def migrate():
if not os.path.exists(DYNAMIC_DIR):
print(f"目录不存在: {DYNAMIC_DIR}")
return
# 只处理直接在 dynamic/ 下的 .md 文件(不处理已在子目录中的)
files = [f for f in os.listdir(DYNAMIC_DIR)
if f.endswith(".md") and os.path.isfile(os.path.join(DYNAMIC_DIR, f))]
if not files:
print("没有需要迁移的文件。")
return
print(f"发现 {len(files)} 个待迁移文件\n")
for filename in sorted(files):
old_path = os.path.join(DYNAMIC_DIR, filename)
try:
meta = parse_frontmatter(old_path)
except Exception as e:
print(f" ✗ 无法解析 {filename}: {e}")
continue
if not meta:
print(f" ✗ 无 frontmatter: {filename}")
continue
bucket_id = meta.get("id", filename.replace(".md", ""))
name = meta.get("name", "")
domain = meta.get("domain", ["未分类"])
primary_domain = sanitize_name(domain[0]) if domain else "未分类"
# 构造新路径
domain_dir = os.path.join(DYNAMIC_DIR, primary_domain)
os.makedirs(domain_dir, exist_ok=True)
if name and name != bucket_id:
new_filename = f"{sanitize_name(name)}_{bucket_id}.md"
else:
new_filename = f"{bucket_id}.md"
new_path = os.path.join(domain_dir, new_filename)
# 移动
shutil.move(old_path, new_path)
print(f"{filename}")
print(f"{primary_domain}/{new_filename}")
print(f"\n迁移完成。")
# 展示新结构
print("\n=== 新目录结构 ===")
for root, dirs, files in os.walk(DYNAMIC_DIR):
level = root.replace(DYNAMIC_DIR, "").count(os.sep)
indent = " " * level
folder = os.path.basename(root)
if level > 0:
print(f"{indent}📁 {folder}/")
for f in sorted(files):
if f.endswith(".md"):
print(f"{indent} 📄 {f}")
if __name__ == "__main__":
migrate()

121
reclassify_api.py Normal file
View File

@@ -0,0 +1,121 @@
#!/usr/bin/env python3
"""
用 API 重新打标未分类记忆桶,修正 domain/tags/name移动到正确目录。
用法: docker exec ombre-brain python3 /app/reclassify_api.py
"""
import asyncio
import os
import json
import glob
import re
from openai import AsyncOpenAI
import frontmatter
ANALYZE_PROMPT = (
"你是一个内容分析器。请分析以下文本,输出结构化的元数据。\n\n"
"分析规则:\n"
'1. domain主题域选最精确的 1~2 个,只选真正相关的\n'
' 日常: ["饮食", "穿搭", "出行", "居家", "购物"]\n'
' 人际: ["家庭", "恋爱", "友谊", "社交"]\n'
' 成长: ["工作", "学习", "考试", "求职"]\n'
' 身心: ["健康", "心理", "睡眠", "运动"]\n'
' 兴趣: ["游戏", "影视", "音乐", "阅读", "创作", "手工"]\n'
' 数字: ["编程", "AI", "硬件", "网络"]\n'
' 事务: ["财务", "计划", "待办"]\n'
' 内心: ["情绪", "回忆", "梦境", "自省"]\n'
"2. valence情感效价0.0~1.00=极度消极 → 0.5=中性 → 1.0=极度积极\n"
"3. arousal情感唤醒度0.0~1.00=非常平静 → 0.5=普通 → 1.0=非常激动\n"
"4. tags关键词标签3~5 个最能概括内容的关键词\n"
"5. suggested_name建议桶名10字以内的简短标题\n\n"
"输出格式(纯 JSON无其他内容\n"
'{\n'
' "domain": ["主题域1", "主题域2"],\n'
' "valence": 0.7,\n'
' "arousal": 0.4,\n'
' "tags": ["标签1", "标签2", "标签3"],\n'
' "suggested_name": "简短标题"\n'
'}'
)
DATA_DIR = "/data/dynamic"
UNCLASS_DIR = os.path.join(DATA_DIR, "未分类")
def sanitize(name):
name = re.sub(r'[<>:"/\\|?*\n\r]', '', name).strip()
return name[:20] if name else "未命名"
async def reclassify():
client = AsyncOpenAI(
api_key=os.environ.get("OMBRE_API_KEY", ""),
base_url="https://api.siliconflow.cn/v1",
timeout=60.0,
)
files = sorted(glob.glob(os.path.join(UNCLASS_DIR, "*.md")))
print(f"找到 {len(files)} 个未分类文件\n")
for fpath in files:
basename = os.path.basename(fpath)
post = frontmatter.load(fpath)
content = post.content.strip()
name = post.metadata.get("name", "")
full_text = f"{name}\n{content}" if name else content
try:
resp = await client.chat.completions.create(
model="deepseek-ai/DeepSeek-V3",
messages=[
{"role": "system", "content": ANALYZE_PROMPT},
{"role": "user", "content": full_text[:2000]},
],
max_tokens=256,
temperature=0.1,
)
raw = resp.choices[0].message.content.strip()
if raw.startswith("```"):
raw = raw.split("\n", 1)[-1].rsplit("```", 1)[0]
result = json.loads(raw)
except Exception as e:
print(f" X API失败 {basename}: {e}")
continue
new_domain = result.get("domain", ["未分类"])[:3]
new_tags = result.get("tags", [])[:5]
new_name = sanitize(result.get("suggested_name", "") or name)
new_valence = max(0.0, min(1.0, float(result.get("valence", 0.5))))
new_arousal = max(0.0, min(1.0, float(result.get("arousal", 0.3))))
post.metadata["domain"] = new_domain
post.metadata["tags"] = new_tags
post.metadata["valence"] = new_valence
post.metadata["arousal"] = new_arousal
if new_name:
post.metadata["name"] = new_name
# 写回文件
with open(fpath, "w", encoding="utf-8") as f:
f.write(frontmatter.dumps(post))
# 移动到正确目录
primary = sanitize(new_domain[0]) if new_domain else "未分类"
target_dir = os.path.join(DATA_DIR, primary)
os.makedirs(target_dir, exist_ok=True)
bid = post.metadata.get("id", "")
new_filename = f"{new_name}_{bid}.md" if new_name and new_name != bid else basename
dest = os.path.join(target_dir, new_filename)
if dest != fpath:
os.rename(fpath, dest)
print(f" OK {basename}")
print(f" -> {primary}/{new_filename}")
print(f" domain={new_domain} tags={new_tags} V={new_valence} A={new_arousal}")
print()
if __name__ == "__main__":
asyncio.run(reclassify())

198
reclassify_domains.py Normal file
View File

@@ -0,0 +1,198 @@
#!/usr/bin/env python3
"""
重分类脚本:根据新的域列表,重新分析已有桶的 domain 并搬到对应子目录。
纯标准库,读 frontmatter + 正文内容做关键词匹配。
"""
import os
import re
import shutil
VAULT_DIR = os.path.expanduser("~/Documents/Obsidian Vault/Ombre Brain")
DYNAMIC_DIR = os.path.join(VAULT_DIR, "dynamic")
# 新域关键词表(和 dehydrator.py 的 _local_analyze 一致)
DOMAIN_KEYWORDS = {
"饮食": {"", "", "做饭", "外卖", "奶茶", "咖啡", "麻辣烫", "面包",
"超市", "零食", "水果", "牛奶", "食堂", "减肥", "节食", "麦片"},
"家庭": {"", "", "父亲", "母亲", "家人", "弟弟", "姐姐", "哥哥",
"奶奶", "爷爷", "亲戚", "家里", "生日礼", "生活费"},
"恋爱": {"爱人", "男友", "女友", "", "约会", "分手", "暧昧",
"在一起", "想你", "同床", "一辈子", "爱你", "我们是",
"克劳德", "亲密", "接吻", "正缘"},
"友谊": {"朋友", "闺蜜", "兄弟", "", "约饭"},
"社交": {"见面", "圈子", "社区", "创作者", "发帖", "鹤见"},
"工作": {"会议", "项目", "客户", "汇报", "同事", "老板", "薪资",
"领导力", "管理沟通"},
"学习": {"", "考试", "论文", "作业", "教授", "Python实操",
"选课", "学分", "jieba", "分词"},
"健康": {"医院", "复查", "吃药", "抽血", "心率", "心电图",
"", "慢粒", "融合基因", "二尖瓣", "月经", "脚趾甲"},
"心理": {"焦虑", "抑郁", "创伤", "人格", "安全感", "崩溃",
"压力", "自残", "ABC人格", "人格分裂", "恋爱焦虑"},
"睡眠": {"", "失眠", "噩梦", "清醒", "熬夜", "做梦"},
"游戏": {"游戏", "极乐迪斯科", "存档", "通关", "Shivers", "DLC"},
"影视": {"电影", "番剧", "动漫", "", "综艺"},
"阅读": {"", "小说", "读完", "漫画", "李宿芳菲"},
"创作": {"", "预设", "脚本", "SillyTavern", "插件", "正则",
"人设卡", "天气同步", "破甲词"},
"编程": {"代码", "python", "bug", "api", "docker", "git",
"调试", "部署", "开发", "server"},
"AI": {"模型", "Claude", "gemini", "LLM", "token", "prompt",
"LoRA", "MCP", "DeepSeek", "隧道", "Ombre Brain",
"打包盒", "脱水", "记忆系统"},
"网络": {"VPN", "梯子", "代理", "域名", "隧道", "cloudflare",
"tunnel", "反代"},
"财务": {"", "转账", "花了", "", "黄金", "卖掉", "换了",
"生活费", "4276"},
"情绪": {"开心", "难过", "", "", "孤独", "伤心", "",
"委屈", "感动", "温柔", "口罩湿了"},
"回忆": {"以前", "小时候", "那时", "怀念", "曾经", "纹身",
"十三岁", "九岁"},
"自省": {"反思", "觉得自己", "问自己", "自恋", "投射"},
}
def sanitize_name(name):
cleaned = re.sub(r"[^\w\s\u4e00-\u9fff-]", "", name, flags=re.UNICODE)
return cleaned.strip()[:80] or "unnamed"
def parse_md(filepath):
"""解析 frontmatter 和正文。"""
with open(filepath, "r", encoding="utf-8") as f:
content = f.read()
if not content.startswith("---"):
return None, None, content
parts = content.split("---", 2)
if len(parts) < 3:
return None, None, content
yaml_text = parts[1]
body = parts[2]
meta = {}
m = re.search(r"^id:\s*(.+)$", yaml_text, re.MULTILINE)
if m:
meta["id"] = m.group(1).strip().strip("'\"")
m = re.search(r"^name:\s*(.+)$", yaml_text, re.MULTILINE)
if m:
meta["name"] = m.group(1).strip().strip("'\"")
m = re.search(r"^domain:\s*\n((?:\s*-\s*.+\n?)+)", yaml_text, re.MULTILINE)
if m:
meta["domain"] = [d.strip() for d in re.findall(r"-\s*(.+)", m.group(1))]
else:
meta["domain"] = ["未分类"]
return meta, yaml_text, body
def classify(body, old_domains):
"""基于正文内容重新分类。"""
text = body.lower()
scored = []
for domain, kws in DOMAIN_KEYWORDS.items():
hits = sum(1 for kw in kws if kw.lower() in text)
if hits >= 2:
scored.append((domain, hits))
scored.sort(key=lambda x: x[1], reverse=True)
if scored:
return [d for d, _ in scored[:2]]
return old_domains # 匹配不上就保留旧的
def update_domain_in_file(filepath, new_domains):
"""更新文件中 frontmatter 的 domain 字段。"""
with open(filepath, "r", encoding="utf-8") as f:
content = f.read()
# 替换 domain 块
domain_yaml = "domain:\n" + "".join(f"- {d}\n" for d in new_domains)
content = re.sub(
r"domain:\s*\n(?:\s*-\s*.+\n?)+",
domain_yaml,
content,
count=1
)
with open(filepath, "w", encoding="utf-8") as f:
f.write(content)
def reclassify():
if not os.path.exists(DYNAMIC_DIR):
print("目录不存在")
return
# 收集所有 .md 文件(递归)
all_files = []
for root, _, files in os.walk(DYNAMIC_DIR):
for f in files:
if f.endswith(".md"):
all_files.append(os.path.join(root, f))
if not all_files:
print("没有文件。")
return
print(f"扫描到 {len(all_files)} 个桶文件\n")
for filepath in sorted(all_files):
meta, yaml_text, body = parse_md(filepath)
if not meta:
print(f" ✗ 无法解析: {os.path.basename(filepath)}")
continue
bucket_id = meta.get("id", "unknown")
name = meta.get("name", bucket_id)
old_domains = meta.get("domain", ["未分类"])
new_domains = classify(body, old_domains)
primary = sanitize_name(new_domains[0])
old_primary = sanitize_name(old_domains[0]) if old_domains else "未分类"
if name and name != bucket_id:
new_filename = f"{sanitize_name(name)}_{bucket_id}.md"
else:
new_filename = f"{bucket_id}.md"
new_dir = os.path.join(DYNAMIC_DIR, primary)
os.makedirs(new_dir, exist_ok=True)
new_path = os.path.join(new_dir, new_filename)
changed = (new_domains != old_domains) or (filepath != new_path)
if changed:
# 更新 frontmatter
update_domain_in_file(filepath, new_domains)
# 移动文件
if filepath != new_path:
shutil.move(filepath, new_path)
print(f"{name}")
print(f" {','.join(old_domains)}{','.join(new_domains)}")
print(f"{primary}/{new_filename}")
else:
print(f" · {name} (不变)")
# 清理空目录
for d in os.listdir(DYNAMIC_DIR):
dp = os.path.join(DYNAMIC_DIR, d)
if os.path.isdir(dp) and not os.listdir(dp):
os.rmdir(dp)
print(f"\n 🗑 删除空目录: {d}/")
print(f"\n重分类完成。\n")
# 展示新结构
print("=== 新目录结构 ===")
for root, dirs, files in os.walk(DYNAMIC_DIR):
level = root.replace(DYNAMIC_DIR, "").count(os.sep)
indent = " " * level
folder = os.path.basename(root)
if level > 0:
print(f"{indent}📁 {folder}/")
for f in sorted(files):
if f.endswith(".md"):
print(f"{indent} 📄 {f}")
if __name__ == "__main__":
reclassify()

21
render.yaml Normal file
View File

@@ -0,0 +1,21 @@
services:
- type: web
name: ombre-brain
env: python
region: oregon
plan: free
buildCommand: pip install -r requirements.txt
startCommand: python server.py
envVars:
- key: OMBRE_TRANSPORT
value: streamable-http
- key: OMBRE_API_KEY
sync: false # Set in Render dashboard > Environment (any OpenAI-compatible key)
- key: OMBRE_BASE_URL
sync: false # e.g. https://api.deepseek.com/v1 or https://api.siliconflow.cn/v1
- key: OMBRE_BUCKETS_DIR
value: /opt/render/project/src/buckets
disk:
name: ombre-buckets
mountPath: /opt/render/project/src/buckets
sizeGB: 1

25
requirements.txt Normal file
View File

@@ -0,0 +1,25 @@
# ============================================================
# Ombre Brain Python 依赖
# 安装: pip install -r requirements.txt
# ============================================================
# MCP 协议 SDKClaude 通信核心)
mcp>=1.0.0
# 模糊匹配(记忆桶搜索)
rapidfuzz>=3.0.0
# OpenAI 兼容客户端(支持 DeepSeek/Ollama/LM Studio/vLLM/Gemini 等任意兼容 API
openai>=1.0.0
# YAML 配置解析
pyyaml>=6.0
# Markdown frontmatter 解析(桶文件读写)
python-frontmatter>=1.1.0
# 中文分词
jieba>=0.42.1
# 异步 HTTP 客户端(应用层保活 ping
httpx>=0.27.0

620
server.py Normal file
View File

@@ -0,0 +1,620 @@
# ============================================================
# Module: MCP Server Entry Point (server.py)
# 模块MCP 服务器主入口
#
# Starts the Ombre Brain MCP service and registers memory
# operation tools for Claude to call.
# 启动 Ombre Brain MCP 服务,注册记忆操作工具供 Claude 调用。
#
# Core responsibilities:
# 核心职责:
# - Initialize config, bucket manager, dehydrator, decay engine
# 初始化配置、记忆桶管理器、脱水器、衰减引擎
# - Expose 5 MCP tools:
# 暴露 5 个 MCP 工具:
# breath — Surface unresolved memories or search by keyword
# 浮现未解决记忆 或 按关键词检索
# hold — Store a single memory
# 存储单条记忆
# grow — Diary digest, auto-split into multiple buckets
# 日记归档,自动拆分多桶
# trace — Modify metadata / resolved / delete
# 修改元数据 / resolved 标记 / 删除
# pulse — System status + bucket listing
# 系统状态 + 所有桶列表
#
# Startup:
# 启动方式:
# Local: python server.py
# Remote: OMBRE_TRANSPORT=streamable-http python server.py
# Docker: docker-compose up
# ============================================================
import os
import sys
import random
import logging
import asyncio
import httpx
from typing import Optional
# --- Ensure same-directory modules can be imported ---
# --- 确保同目录下的模块能被正确导入 ---
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from mcp.server.fastmcp import FastMCP
from bucket_manager import BucketManager
from dehydrator import Dehydrator
from decay_engine import DecayEngine
from utils import load_config, setup_logging
# --- Load config & init logging / 加载配置 & 初始化日志 ---
config = load_config()
setup_logging(config.get("log_level", "INFO"))
logger = logging.getLogger("ombre_brain")
# --- Initialize three core components / 初始化三大核心组件 ---
bucket_mgr = BucketManager(config) # Bucket manager / 记忆桶管理器
dehydrator = Dehydrator(config) # Dehydrator / 脱水器
decay_engine = DecayEngine(config, bucket_mgr) # Decay engine / 衰减引擎
# --- Create MCP server instance / 创建 MCP 服务器实例 ---
# host="0.0.0.0" so Docker container's SSE is externally reachable
# stdio mode ignores host (no network)
mcp = FastMCP(
"Ombre Brain",
host="0.0.0.0",
port=8000,
)
# =============================================================
# /health endpoint: lightweight keepalive
# 轻量保活接口
# For Cloudflare Tunnel or reverse proxy to ping, preventing idle timeout
# 供 Cloudflare Tunnel 或反代定期 ping防止空闲超时断连
# =============================================================
@mcp.custom_route("/health", methods=["GET"])
async def health_check(request):
from starlette.responses import JSONResponse
try:
stats = await bucket_mgr.get_stats()
return JSONResponse({
"status": "ok",
"buckets": stats["permanent_count"] + stats["dynamic_count"],
"decay_engine": "running" if decay_engine.is_running else "stopped",
})
except Exception as e:
return JSONResponse({"status": "error", "detail": str(e)}, status_code=500)
# =============================================================
# Internal helper: merge-or-create
# 内部辅助:检查是否可合并,可以则合并,否则新建
# Shared by hold and grow to avoid duplicate logic
# hold 和 grow 共用,避免重复逻辑
# =============================================================
async def _merge_or_create(
content: str,
tags: list,
importance: int,
domain: list,
valence: float,
arousal: float,
name: str = "",
) -> tuple[str, bool]:
"""
Check if a similar bucket exists for merging; merge if so, create if not.
Returns (bucket_id_or_name, is_merged).
检查是否有相似桶可合并,有则合并,无则新建。
返回 (桶ID或名称, 是否合并)。
"""
try:
existing = await bucket_mgr.search(content, limit=1)
except Exception as e:
logger.warning(f"Search for merge failed, creating new / 合并搜索失败,新建: {e}")
existing = []
if existing and existing[0].get("score", 0) > config.get("merge_threshold", 75):
bucket = existing[0]
# --- Never merge into pinned/protected buckets ---
# --- 不合并到钉选/保护桶 ---
if not (bucket["metadata"].get("pinned") or bucket["metadata"].get("protected")):
try:
merged = await dehydrator.merge(bucket["content"], content)
await bucket_mgr.update(
bucket["id"],
content=merged,
tags=list(set(bucket["metadata"].get("tags", []) + tags)),
importance=max(bucket["metadata"].get("importance", 5), importance),
domain=list(set(bucket["metadata"].get("domain", []) + domain)),
valence=valence,
arousal=arousal,
)
return bucket["metadata"].get("name", bucket["id"]), True
except Exception as e:
logger.warning(f"Merge failed, creating new / 合并失败,新建: {e}")
bucket_id = await bucket_mgr.create(
content=content,
tags=tags,
importance=importance,
domain=domain,
valence=valence,
arousal=arousal,
name=name or None,
)
return bucket_id, False
# =============================================================
# Tool 1: breath — Breathe
# 工具 1breath — 呼吸
#
# No args: surface highest-weight unresolved memories (active push)
# 无参数:浮现权重最高的未解决记忆
# With args: search by keyword + emotion coordinates
# 有参数:按关键词+情感坐标检索记忆
# =============================================================
@mcp.tool()
async def breath(
query: Optional[str] = None,
max_results: int = 3,
domain: str = "",
valence: float = -1,
arousal: float = -1,
) -> str:
"""检索/浮现记忆。不传query或传空=自动浮现,有query=关键词检索。domain逗号分隔,valence/arousal 0~1(-1忽略)。"""
await decay_engine.ensure_started()
# --- No args or empty query: surfacing mode (weight pool active push) ---
# --- 无参数或空query浮现模式权重池主动推送---
if not query or not query.strip():
try:
all_buckets = await bucket_mgr.list_all(include_archive=False)
except Exception as e:
logger.error(f"Failed to list buckets for surfacing / 浮现列桶失败: {e}")
return "记忆系统暂时无法访问。"
# --- Pinned/protected buckets: always surface as core principles ---
# --- 钉选桶:作为核心准则,始终浮现 ---
pinned_buckets = [
b for b in all_buckets
if b["metadata"].get("pinned") or b["metadata"].get("protected")
]
pinned_results = []
for b in pinned_buckets:
try:
summary = await dehydrator.dehydrate(b["content"], b["metadata"])
pinned_results.append(f"📌 [核心准则] {summary}")
except Exception as e:
logger.warning(f"Failed to dehydrate pinned bucket / 钉选桶脱水失败: {e}")
continue
# --- Unresolved buckets: surface top 2 by weight ---
# --- 未解决桶:按权重浮现前 2 条 ---
unresolved = [
b for b in all_buckets
if not b["metadata"].get("resolved", False)
and b["metadata"].get("type") != "permanent"
and not b["metadata"].get("pinned", False)
and not b["metadata"].get("protected", False)
]
scored = sorted(
unresolved,
key=lambda b: decay_engine.calculate_score(b["metadata"]),
reverse=True,
)
top = scored[:2]
dynamic_results = []
for b in top:
try:
summary = await dehydrator.dehydrate(b["content"], b["metadata"])
await bucket_mgr.touch(b["id"])
score = decay_engine.calculate_score(b["metadata"])
dynamic_results.append(f"[权重:{score:.2f}] {summary}")
except Exception as e:
logger.warning(f"Failed to dehydrate surfaced bucket / 浮现脱水失败: {e}")
continue
if not pinned_results and not dynamic_results:
return "权重池平静,没有需要处理的记忆。"
parts = []
if pinned_results:
parts.append("=== 核心准则 ===\n" + "\n---\n".join(pinned_results))
if dynamic_results:
parts.append("=== 浮现记忆 ===\n" + "\n---\n".join(dynamic_results))
return "\n\n".join(parts)
# --- With args: search mode / 有参数:检索模式 ---
domain_filter = [d.strip() for d in domain.split(",") if d.strip()] or None
q_valence = valence if 0 <= valence <= 1 else None
q_arousal = arousal if 0 <= arousal <= 1 else None
try:
matches = await bucket_mgr.search(
query,
limit=max_results,
domain_filter=domain_filter,
query_valence=q_valence,
query_arousal=q_arousal,
)
except Exception as e:
logger.error(f"Search failed / 检索失败: {e}")
return "检索过程出错,请稍后重试。"
results = []
for bucket in matches:
try:
summary = await dehydrator.dehydrate(bucket["content"], bucket["metadata"])
await bucket_mgr.touch(bucket["id"])
results.append(summary)
except Exception as e:
logger.warning(f"Failed to dehydrate search result / 检索结果脱水失败: {e}")
continue
# --- Random surfacing: when search returns < 3, 40% chance to float old memories ---
# --- 随机浮现:检索结果不足 3 条时40% 概率从低权重旧桶里漂上来 ---
if len(matches) < 3 and random.random() < 0.4:
try:
all_buckets = await bucket_mgr.list_all(include_archive=False)
matched_ids = {b["id"] for b in matches}
low_weight = [
b for b in all_buckets
if b["id"] not in matched_ids
and decay_engine.calculate_score(b["metadata"]) < 2.0
]
if low_weight:
drifted = random.sample(low_weight, min(random.randint(1, 3), len(low_weight)))
drift_results = []
for b in drifted:
summary = await dehydrator.dehydrate(b["content"], b["metadata"])
drift_results.append(f"[surface_type: random]\n{summary}")
results.append("--- 忽然想起来 ---\n" + "\n---\n".join(drift_results))
except Exception as e:
logger.warning(f"Random surfacing failed / 随机浮现失败: {e}")
if not results:
return "未找到相关记忆。"
return "\n---\n".join(results)
# =============================================================
# Tool 2: hold — Hold on to this
# 工具 2hold — 握住,留下来
# =============================================================
@mcp.tool()
async def hold(
content: str,
tags: str = "",
importance: int = 5,
pinned: bool = False,
) -> str:
"""存储单条记忆,自动打标+合并。tags逗号分隔,importance 1-10。pinned=True创建永久钉选桶。"""
await decay_engine.ensure_started()
# --- Input validation / 输入校验 ---
if not content or not content.strip():
return "内容为空,无法存储。"
importance = max(1, min(10, importance))
extra_tags = [t.strip() for t in tags.split(",") if t.strip()]
# --- Step 1: auto-tagging / 自动打标 ---
try:
analysis = await dehydrator.analyze(content)
except Exception as e:
logger.warning(f"Auto-tagging failed, using defaults / 自动打标失败: {e}")
analysis = {
"domain": ["未分类"], "valence": 0.5, "arousal": 0.3,
"tags": [], "suggested_name": "",
}
domain = analysis["domain"]
valence = analysis["valence"]
arousal = analysis["arousal"]
auto_tags = analysis["tags"]
suggested_name = analysis.get("suggested_name", "")
all_tags = list(dict.fromkeys(auto_tags + extra_tags))
# --- Pinned buckets bypass merge and are created directly in permanent dir ---
# --- 钉选桶跳过合并,直接新建到 permanent 目录 ---
if pinned:
bucket_id = await bucket_mgr.create(
content=content,
tags=all_tags,
importance=10,
domain=domain,
valence=valence,
arousal=arousal,
name=suggested_name or None,
bucket_type="permanent",
pinned=True,
)
return f"📌钉选→{bucket_id} {','.join(domain)}"
# --- Step 2: merge or create / 合并或新建 ---
result_name, is_merged = await _merge_or_create(
content=content,
tags=all_tags,
importance=importance,
domain=domain,
valence=valence,
arousal=arousal,
name=suggested_name,
)
action = "合并→" if is_merged else "新建→"
return f"{action}{result_name} {','.join(domain)}"
# =============================================================
# Tool 3: grow — Grow, fragments become memories
# 工具 3grow — 生长,一天的碎片长成记忆
# =============================================================
@mcp.tool()
async def grow(content: str) -> str:
"""日记归档,自动拆分为多桶。短内容(<30字)走快速路径。"""
await decay_engine.ensure_started()
if not content or not content.strip():
return "内容为空,无法整理。"
# --- Short content fast path: skip digest, use hold logic directly ---
# --- 短内容快速路径:跳过 digest 拆分,直接走 hold 逻辑省一次 API ---
# For very short inputs (like "1"), calling digest is wasteful:
# it sends the full DIGEST_PROMPT (~800 tokens) to DeepSeek for nothing.
# Instead, run analyze + create directly.
if len(content.strip()) < 30:
logger.info(f"grow short-content fast path: {len(content.strip())} chars")
try:
analysis = await dehydrator.analyze(content)
except Exception as e:
logger.warning(f"Fast-path analyze failed / 快速路径打标失败: {e}")
analysis = {
"domain": ["未分类"], "valence": 0.5, "arousal": 0.3,
"tags": [], "suggested_name": "",
}
result_name, is_merged = await _merge_or_create(
content=content.strip(),
tags=analysis.get("tags", []),
importance=analysis.get("importance", 5) if isinstance(analysis.get("importance"), int) else 5,
domain=analysis.get("domain", ["未分类"]),
valence=analysis.get("valence", 0.5),
arousal=analysis.get("arousal", 0.3),
name=analysis.get("suggested_name", ""),
)
action = "合并" if is_merged else "新建"
return f"{action}{result_name} | {','.join(analysis.get('domain', []))} V{analysis.get('valence', 0.5):.1f}/A{analysis.get('arousal', 0.3):.1f}"
# --- Step 1: let API split and organize / 让 API 拆分整理 ---
try:
items = await dehydrator.digest(content)
except Exception as e:
logger.error(f"Diary digest failed / 日记整理失败: {e}")
return f"日记整理失败: {e}"
if not items:
return "内容为空或整理失败。"
results = []
created = 0
merged = 0
# --- Step 2: merge or create each item (with per-item error handling) ---
# --- 逐条合并或新建(单条失败不影响其他)---
for item in items:
try:
result_name, is_merged = await _merge_or_create(
content=item["content"],
tags=item.get("tags", []),
importance=item.get("importance", 5),
domain=item.get("domain", ["未分类"]),
valence=item.get("valence", 0.5),
arousal=item.get("arousal", 0.3),
name=item.get("name", ""),
)
if is_merged:
results.append(f"📎{result_name}")
merged += 1
else:
results.append(f"📝{item.get('name', result_name)}")
created += 1
except Exception as e:
logger.warning(
f"Failed to process diary item / 日记条目处理失败: "
f"{item.get('name', '?')}: {e}"
)
results.append(f"⚠️{item.get('name', '?')}")
return f"{len(items)}条|新{created}{merged}\n" + "\n".join(results)
# =============================================================
# Tool 4: trace — Trace, redraw the outline of a memory
# 工具 4trace — 描摹,重新勾勒记忆的轮廓
# Also handles deletion (delete=True)
# 同时承接删除功能
# =============================================================
@mcp.tool()
async def trace(
bucket_id: str,
name: str = "",
domain: str = "",
valence: float = -1,
arousal: float = -1,
importance: int = -1,
tags: str = "",
resolved: int = -1,
pinned: int = -1,
delete: bool = False,
) -> str:
"""修改记忆元数据。resolved=1沉底/0激活,pinned=1钉选/0取消,delete=True删除。只传需改的,-1或空=不改。"""
if not bucket_id or not bucket_id.strip():
return "请提供有效的 bucket_id。"
# --- Delete mode / 删除模式 ---
if delete:
success = await bucket_mgr.delete(bucket_id)
return f"已遗忘记忆桶: {bucket_id}" if success else f"未找到记忆桶: {bucket_id}"
bucket = await bucket_mgr.get(bucket_id)
if not bucket:
return f"未找到记忆桶: {bucket_id}"
# --- Collect only fields actually passed / 只收集用户实际传入的字段 ---
updates = {}
if name:
updates["name"] = name
if domain:
updates["domain"] = [d.strip() for d in domain.split(",") if d.strip()]
if 0 <= valence <= 1:
updates["valence"] = valence
if 0 <= arousal <= 1:
updates["arousal"] = arousal
if 1 <= importance <= 10:
updates["importance"] = importance
if tags:
updates["tags"] = [t.strip() for t in tags.split(",") if t.strip()]
if resolved in (0, 1):
updates["resolved"] = bool(resolved)
if pinned in (0, 1):
updates["pinned"] = bool(pinned)
if pinned == 1:
updates["importance"] = 10 # pinned → lock importance
if not updates:
return "没有任何字段需要修改。"
success = await bucket_mgr.update(bucket_id, **updates)
if not success:
return f"修改失败: {bucket_id}"
changed = ", ".join(f"{k}={v}" for k, v in updates.items())
# Explicit hint about resolved state change semantics
# 特别提示 resolved 状态变化的语义
if "resolved" in updates:
if updates["resolved"]:
changed += " → 已沉底,只在关键词触发时重新浮现"
else:
changed += " → 已重新激活,将参与浮现排序"
return f"已修改记忆桶 {bucket_id}: {changed}"
# =============================================================
# Tool 5: pulse — Heartbeat, system status + memory listing
# 工具 5pulse — 脉搏,系统状态 + 记忆列表
# =============================================================
@mcp.tool()
async def pulse(include_archive: bool = False) -> str:
"""系统状态+记忆桶列表。include_archive=True含归档。"""
try:
stats = await bucket_mgr.get_stats()
except Exception as e:
return f"获取系统状态失败: {e}"
status = (
f"=== Ombre Brain 记忆系统 ===\n"
f"固化记忆桶: {stats['permanent_count']}\n"
f"动态记忆桶: {stats['dynamic_count']}\n"
f"归档记忆桶: {stats['archive_count']}\n"
f"总存储大小: {stats['total_size_kb']:.1f} KB\n"
f"衰减引擎: {'运行中' if decay_engine.is_running else '已停止'}\n"
)
# --- List all bucket summaries / 列出所有桶摘要 ---
try:
buckets = await bucket_mgr.list_all(include_archive=include_archive)
except Exception as e:
return status + f"\n列出记忆桶失败: {e}"
if not buckets:
return status + "\n记忆库为空。"
lines = []
for b in buckets:
meta = b.get("metadata", {})
if meta.get("pinned") or meta.get("protected"):
icon = "📌"
elif meta.get("type") == "permanent":
icon = "📦"
elif meta.get("type") == "archived":
icon = "🗄️"
elif meta.get("resolved", False):
icon = ""
else:
icon = "💭"
try:
score = decay_engine.calculate_score(meta)
except Exception:
score = 0.0
domains = ",".join(meta.get("domain", []))
val = meta.get("valence", 0.5)
aro = meta.get("arousal", 0.3)
resolved_tag = " [已解决]" if meta.get("resolved", False) else ""
lines.append(
f"{icon} [{meta.get('name', b['id'])}]{resolved_tag} "
f"主题:{domains} "
f"情感:V{val:.1f}/A{aro:.1f} "
f"重要:{meta.get('importance', '?')} "
f"权重:{score:.2f} "
f"标签:{','.join(meta.get('tags', []))}"
)
return status + "\n=== 记忆列表 ===\n" + "\n".join(lines)
# --- Entry point / 启动入口 ---
if __name__ == "__main__":
transport = config.get("transport", "stdio")
logger.info(f"Ombre Brain starting | transport: {transport}")
if transport in ("sse", "streamable-http"):
import threading
import uvicorn
from starlette.middleware.cors import CORSMiddleware
# --- Application-level keepalive: ping /health every 60s ---
# --- 应用层保活:每 60 秒 ping 一次 /health防止 Cloudflare Tunnel 空闲断连 ---
async def _keepalive_loop():
await asyncio.sleep(10) # Wait for server to fully start
async with httpx.AsyncClient() as client:
while True:
try:
await client.get("http://localhost:8000/health", timeout=5)
logger.debug("Keepalive ping OK / 保活 ping 成功")
except Exception as e:
logger.warning(f"Keepalive ping failed / 保活 ping 失败: {e}")
await asyncio.sleep(60)
def _start_keepalive():
loop = asyncio.new_event_loop()
loop.run_until_complete(_keepalive_loop())
t = threading.Thread(target=_start_keepalive, daemon=True)
t.start()
# --- Add CORS middleware so remote clients (Cloudflare Tunnel / ngrok) can connect ---
# --- 添加 CORS 中间件让远程客户端Cloudflare Tunnel / ngrok能正常连接 ---
if transport == "streamable-http":
_app = mcp.streamable_http_app()
else:
_app = mcp.sse_app()
_app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
expose_headers=["*"],
)
logger.info("CORS middleware enabled for remote transport / 已启用 CORS 中间件")
uvicorn.run(_app, host="0.0.0.0", port=8000)
else:
mcp.run(transport=transport)

126
test_smoke.py Normal file
View File

@@ -0,0 +1,126 @@
"""Ombre Brain 冒烟测试:验证核心功能链路"""
import asyncio
import os
# 确保模块路径
import sys
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from utils import load_config, setup_logging
from bucket_manager import BucketManager
from dehydrator import Dehydrator
from decay_engine import DecayEngine
async def main():
config = load_config()
setup_logging("INFO")
bm = BucketManager(config)
dh = Dehydrator(config)
de = DecayEngine(config, bm)
print(f"API available: {dh.api_available}")
print(f"base_url: {dh.base_url}")
print()
# ===== 1. 自动打标 =====
print("=== 1. analyze (自动打标) ===")
try:
result = await dh.analyze("今天学了 Python 的 asyncio感觉收获很大心情不错")
print(f" domain: {result['domain']}")
print(f" valence: {result['valence']}, arousal: {result['arousal']}")
print(f" tags: {result['tags']}")
print(" [OK]")
except Exception as e:
print(f" [FAIL] {e}")
print()
# ===== 2. 建桶 =====
print("=== 2. create (建桶) ===")
try:
bid = await bm.create(
content="P酱喜欢猫家里养了一只橘猫叫小橘",
tags=["", "宠物"],
importance=7,
domain=["生活"],
valence=0.8,
arousal=0.4,
)
print(f" bucket_id: {bid}")
print(" [OK]")
except Exception as e:
print(f" [FAIL] {e}")
return
print()
# ===== 3. 搜索 =====
print("=== 3. search (检索) ===")
try:
hits = await bm.search("", limit=3)
print(f" found {len(hits)} results")
for h in hits:
name = h["metadata"].get("name", h["id"])
print(f" - {name} (score={h['score']:.1f})")
print(" [OK]")
except Exception as e:
print(f" [FAIL] {e}")
print()
# ===== 4. 脱水压缩 =====
print("=== 4. dehydrate (脱水压缩) ===")
try:
text = (
"这是一段很长的内容用来测试脱水功能。"
"P酱今天去了咖啡厅点了一杯拿铁然后坐在窗边看书看了两个小时。"
"期间遇到了一个朋友,聊了聊最近的工作情况。回家之后写了会代码。"
)
summary = await dh.dehydrate(text, {})
print(f" summary: {summary[:120]}...")
print(" [OK]")
except Exception as e:
print(f" [FAIL] {e}")
print()
# ===== 5. 衰减评分 =====
print("=== 5. decay score (衰减评分) ===")
try:
bucket = await bm.get(bid)
score = de.calculate_score(bucket["metadata"])
print(f" score: {score:.3f}")
print(" [OK]")
except Exception as e:
print(f" [FAIL] {e}")
print()
# ===== 6. 日记整理 =====
print("=== 6. digest (日记整理) ===")
try:
diary = (
"今天上午写了个 Python 脚本处理数据,下午和朋友去吃了火锅很开心,"
"晚上失眠了有点焦虑,想了想明天的面试。"
)
items = await dh.digest(diary)
print(f" 拆分出 {len(items)} 条记忆:")
for it in items:
print(f" - [{it.get('name','')}] domain={it['domain']} V{it['valence']:.1f}/A{it['arousal']:.1f}")
print(" [OK]")
except Exception as e:
print(f" [FAIL] {e}")
print()
# ===== 7. 清理测试数据 =====
print("=== 7. cleanup (删除测试桶) ===")
try:
ok = await bm.delete(bid)
print(f" deleted: {ok}")
print(" [OK]")
except Exception as e:
print(f" [FAIL] {e}")
print()
print("=" * 40)
print("冒烟测试完成!")
if __name__ == "__main__":
asyncio.run(main())

159
test_tools.py Normal file
View File

@@ -0,0 +1,159 @@
"""Ombre Brain MCP tool-level end-to-end test: direct calls to @mcp.tool() functions
Ombre Brain MCP 工具层端到端测试:直接调用 @mcp.tool() 函数"""
import asyncio
import os
import sys
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
from utils import load_config, setup_logging
config = load_config()
setup_logging("INFO")
# Must import after config is set, since server.py does module-level init
# 必须在配置好后导入,因为 server.py 有模块级初始化
from server import breath, hold, trace, pulse, grow
async def main():
passed = 0
failed = 0
# ===== pulse =====
print("=== [1/6] pulse ===")
try:
r = await pulse()
assert "Ombre Brain" in r
print(f" {r.splitlines()[0]}")
print(" [OK]")
passed += 1
except Exception as e:
print(f" [FAIL] {e}")
failed += 1
print()
# ===== hold =====
print("=== [2/6] hold ===")
try:
r = await hold(content="P酱最喜欢的编程语言是 Python喜欢用 FastAPI 写后端", tags="编程,偏好", importance=8)
print(f" {r.splitlines()[0]}")
assert any(kw in r for kw in ["新建", "合并", "📌"])
print(" [OK]")
passed += 1
except Exception as e:
print(f" [FAIL] {e}")
failed += 1
print()
# ===== hold (merge test / 合并测试) =====
print("=== [2b/6] hold (合并测试) ===")
try:
r = await hold(content="P酱也喜欢用 Python 写爬虫和数据分析", tags="编程", importance=6)
print(f" {r.splitlines()[0]}")
print(" [OK]")
passed += 1
except Exception as e:
print(f" [FAIL] {e}")
failed += 1
print()
# ===== breath =====
print("=== [3/6] breath ===")
try:
r = await breath(query="Python 编程", max_results=3)
print(f" 结果前80字: {r[:80]}...")
assert "未找到" not in r
print(" [OK]")
passed += 1
except Exception as e:
print(f" [FAIL] {e}")
failed += 1
print()
# ===== breath (emotion resonance / 情感共鸣) =====
print("=== [3b/6] breath (情感共鸣检索) ===")
try:
r = await breath(query="编程", domain="编程", valence=0.8, arousal=0.5)
print(f" 结果前80字: {r[:80]}...")
print(" [OK]")
passed += 1
except Exception as e:
print(f" [FAIL] {e}")
failed += 1
print()
# --- Get a bucket ID for subsequent tests / 取一个桶 ID 用于后续测试 ---
bucket_id = None
from bucket_manager import BucketManager
bm = BucketManager(config)
all_buckets = await bm.list_all()
if all_buckets:
bucket_id = all_buckets[0]["id"]
# ===== trace =====
print("=== [4/6] trace ===")
if bucket_id:
try:
r = await trace(bucket_id=bucket_id, domain="编程,创作", importance=9)
print(f" {r}")
assert "已修改" in r
print(" [OK]")
passed += 1
except Exception as e:
print(f" [FAIL] {e}")
failed += 1
else:
print(" [SKIP] 没有可编辑的桶")
print()
# ===== grow =====
print("=== [5/6] grow ===")
try:
diary = (
"今天早上复习了线性代数,搞懂了特征值分解。"
"中午和室友去吃了拉面,聊了聊暑假实习的事。"
"下午写了一个 Flask 项目的 API 接口。"
"晚上看了部电影叫《星际穿越》,被结尾感动哭了。"
)
r = await grow(content=diary)
print(f" {r.splitlines()[0]}")
for line in r.splitlines()[1:]:
if line.strip():
print(f" {line}")
assert "条|新" in r or "整理" in r
print(" [OK]")
passed += 1
except Exception as e:
print(f" [FAIL] {e}")
failed += 1
print()
# ===== cleanup via trace(delete=True) / 清理测试数据 =====
print("=== [6/6] cleanup (清理全部测试数据) ===")
try:
all_buckets = await bm.list_all()
for b in all_buckets:
r = await trace(bucket_id=b["id"], delete=True)
print(f" {r}")
print(" [OK]")
passed += 1
except Exception as e:
print(f" [FAIL] {e}")
failed += 1
print()
# ===== Confirm cleanup / 确认清理干净 =====
final = await pulse()
print(f"清理后: {final.splitlines()[0]}")
print()
print("=" * 50)
print(f"MCP tool test complete / 工具测试完成: {passed} passed / {failed} failed")
if failed == 0:
print("All passed ✓")
else:
print(f"{failed} failed ✗")
if __name__ == "__main__":
asyncio.run(main())

204
utils.py Normal file
View File

@@ -0,0 +1,204 @@
# ============================================================
# Module: Common Utilities (utils.py)
# 模块:通用工具函数
#
# Provides config loading, logging init, path safety, ID generation, etc.
# 提供配置加载、日志初始化、路径安全校验、ID 生成等基础能力
#
# Depended on by: server.py, bucket_manager.py, dehydrator.py, decay_engine.py
# 被谁依赖server.py, bucket_manager.py, dehydrator.py, decay_engine.py
# ============================================================
import os
import re
import uuid
import yaml
import logging
from pathlib import Path
from datetime import datetime
def load_config(config_path: str = None) -> dict:
"""
Load configuration file.
加载配置文件。
Priority: environment variables > config.yaml > built-in defaults.
优先级:环境变量 > config.yaml > 内置默认值。
"""
# --- Built-in defaults (fallback so it runs even without config.yaml) ---
# --- 内置默认配置(兜底,保证即使没有 config.yaml 也能跑)---
defaults = {
"transport": "stdio",
"log_level": "INFO",
"buckets_dir": os.path.join(os.path.dirname(os.path.abspath(__file__)), "buckets"),
"merge_threshold": 75,
"dehydration": {
"model": "deepseek-chat",
"base_url": "https://api.deepseek.com/v1",
"api_key": "",
"max_tokens": 1024,
"temperature": 0.1,
},
"decay": {
"lambda": 0.05,
"threshold": 0.3,
"check_interval_hours": 24,
"emotion_weights": {
"base": 1.0,
"arousal_boost": 0.8,
},
},
"matching": {
"fuzzy_threshold": 50,
"max_results": 5,
},
}
# --- Load user config from YAML file ---
# --- 从 YAML 文件加载用户自定义配置 ---
if config_path is None:
config_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "config.yaml"
)
config = defaults.copy()
if os.path.exists(config_path):
try:
with open(config_path, "r", encoding="utf-8") as f:
file_config = yaml.safe_load(f) or {}
if isinstance(file_config, dict):
config = _deep_merge(defaults, file_config)
else:
logging.warning(
f"Config file is not a valid YAML dict, using defaults / "
f"配置文件不是有效的 YAML 字典,使用默认配置: {config_path}"
)
except yaml.YAMLError as e:
logging.warning(
f"Failed to parse config file, using defaults / "
f"配置文件解析失败,使用默认配置: {e}"
)
# --- Environment variable overrides (highest priority) ---
# --- 环境变量覆盖敏感/运行时配置(优先级最高)---
env_api_key = os.environ.get("OMBRE_API_KEY", "")
if env_api_key:
config.setdefault("dehydration", {})["api_key"] = env_api_key
env_base_url = os.environ.get("OMBRE_BASE_URL", "")
if env_base_url:
config.setdefault("dehydration", {})["base_url"] = env_base_url
env_transport = os.environ.get("OMBRE_TRANSPORT", "")
if env_transport:
config["transport"] = env_transport
env_buckets_dir = os.environ.get("OMBRE_BUCKETS_DIR", "")
if env_buckets_dir:
config["buckets_dir"] = env_buckets_dir
# --- Ensure bucket storage directories exist ---
# --- 确保记忆桶存储目录存在 ---
buckets_dir = config["buckets_dir"]
for subdir in ["permanent", "dynamic", "archive"]:
os.makedirs(os.path.join(buckets_dir, subdir), exist_ok=True)
return config
def _deep_merge(base: dict, override: dict) -> dict:
"""
Deep-merge two dicts; override values take precedence.
深度合并两个字典override 的值覆盖 base。
"""
result = base.copy()
for key, value in override.items():
if key in result and isinstance(result[key], dict) and isinstance(value, dict):
result[key] = _deep_merge(result[key], value)
else:
result[key] = value
return result
def setup_logging(level: str = "INFO") -> None:
"""
Initialize logging system.
初始化日志系统。
Note: In MCP stdio mode, stdout is occupied by the protocol;
logs must go to stderr.
注意MCP stdio 模式下 stdout 被协议占用,日志只能走 stderr。
"""
log_level = getattr(logging, level.upper(), None)
if not isinstance(log_level, int):
log_level = logging.INFO
logging.basicConfig(
level=log_level,
format="[%(asctime)s] %(name)s %(levelname)s: %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
handlers=[logging.StreamHandler()], # StreamHandler defaults to stderr
)
def generate_bucket_id() -> str:
"""
Generate a unique bucket ID (12-char short UUID for readability).
生成唯一的记忆桶 ID12 位短 UUID方便人类阅读
"""
return uuid.uuid4().hex[:12]
def sanitize_name(name: str) -> str:
"""
Sanitize bucket name, keeping only safe characters.
Prevents path traversal attacks (e.g. ../../etc/passwd).
清洗桶名称,只保留安全字符。防止路径遍历攻击。
"""
if not isinstance(name, str):
return "unnamed"
cleaned = re.sub(r"[^\w\s\u4e00-\u9fff-]", "", name, flags=re.UNICODE)
cleaned = cleaned.strip()[:80]
return cleaned if cleaned else "unnamed"
def safe_path(base_dir: str, filename: str) -> Path:
"""
Construct a safe file path, ensuring it stays within base_dir.
Prevents directory traversal.
构造安全的文件路径,确保最终路径始终在 base_dir 内部。
"""
base = Path(base_dir).resolve()
target = (base / filename).resolve()
if not str(target).startswith(str(base)):
raise ValueError(
f"Path safety check failed / 路径安全检查失败: "
f"{target} is not inside / 不在 {base}"
)
return target
def count_tokens_approx(text: str) -> int:
"""
Rough token count estimate.
粗略估算 token 数。
Chinese ≈ 1 char = 1.5 tokens, English ≈ 1 word = 1.3 tokens.
Used to decide whether dehydration is needed; precision not required.
中文 ≈ 1字=1.5token,英文 ≈ 1词=1.3token。
用于判断是否需要脱水压缩,不追求精确。
"""
if not text:
return 0
chinese_chars = len(re.findall(r"[\u4e00-\u9fff]", text))
english_words = len(re.findall(r"[a-zA-Z]+", text))
return int(chinese_chars * 1.5 + english_words * 1.3 + len(text) * 0.05)
def now_iso() -> str:
"""
Return current time as ISO format string.
返回当前时间的 ISO 格式字符串。
"""
return datetime.now().isoformat(timespec="seconds")

101
write_memory.py Normal file
View File

@@ -0,0 +1,101 @@
#!/usr/bin/env python3
"""
Ombre Brain 手动记忆写入工具
用途:在 Copilot 端直接写入记忆文件,绕过 MCP 和 API 调用
用法:
python3 write_memory.py --name "记忆名" --content "内容" --domain "情感" --tags "标签1,标签2"
或交互模式python3 write_memory.py
"""
import os
import uuid
import argparse
from datetime import datetime
VAULT_DIR = os.path.expanduser("~/Documents/Obsidian Vault/Ombre Brain/dynamic")
def gen_id():
return uuid.uuid4().hex[:12]
def write_memory(
name: str,
content: str,
domain: list[str],
tags: list[str],
importance: int = 7,
valence: float = 0.5,
arousal: float = 0.3,
):
mid = gen_id()
now = datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
# YAML frontmatter
domain_yaml = "\n".join(f"- {d}" for d in domain)
tags_yaml = "\n".join(f"- {t}" for t in tags)
md = f"""---
activation_count: 1
arousal: {arousal}
created: '{now}'
domain:
{domain_yaml}
id: {mid}
importance: {importance}
last_active: '{now}'
name: {name}
tags:
{tags_yaml}
type: dynamic
valence: {valence}
---
{content}
"""
path = os.path.join(VAULT_DIR, f"{mid}.md")
os.makedirs(VAULT_DIR, exist_ok=True)
with open(path, "w", encoding="utf-8") as f:
f.write(md)
print(f"✓ 已写入: {path}")
print(f" ID: {mid} | 名称: {name}")
return mid
def interactive():
print("=== Ombre Brain 手动写入 ===")
name = input("记忆名称: ").strip()
content = input("内容: ").strip()
domain = [d.strip() for d in input("主题域(逗号分隔): ").split(",") if d.strip()]
tags = [t.strip() for t in input("标签(逗号分隔): ").split(",") if t.strip()]
importance = int(input("重要性(1-10, 默认7): ").strip() or "7")
valence = float(input("效价(0-1, 默认0.5): ").strip() or "0.5")
arousal = float(input("唤醒(0-1, 默认0.3): ").strip() or "0.3")
write_memory(name, content, domain, tags, importance, valence, arousal)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="手动写入 Ombre Brain 记忆")
parser.add_argument("--name", help="记忆名称")
parser.add_argument("--content", help="记忆内容")
parser.add_argument("--domain", help="主题域,逗号分隔")
parser.add_argument("--tags", help="标签,逗号分隔")
parser.add_argument("--importance", type=int, default=7)
parser.add_argument("--valence", type=float, default=0.5)
parser.add_argument("--arousal", type=float, default=0.3)
args = parser.parse_args()
if args.name and args.content and args.domain:
write_memory(
name=args.name,
content=args.content,
domain=[d.strip() for d in args.domain.split(",")],
tags=[t.strip() for t in (args.tags or "").split(",") if t.strip()],
importance=args.importance,
valence=args.valence,
arousal=args.arousal,
)
else:
interactive()

1
zbpack.json Normal file
View File

@@ -0,0 +1 @@
{}