init: first commit to Gitea mirror, update README with Docker quick start and new repo URL
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
70
.claude/hooks/session_breath.py
Normal file
70
.claude/hooks/session_breath.py
Normal file
@@ -0,0 +1,70 @@
|
||||
#!/usr/bin/env python3
|
||||
# ============================================================
|
||||
# SessionStart Hook: auto-breath on session start
|
||||
# 对话开始钩子:自动浮现最高权重的未解决记忆
|
||||
#
|
||||
# On SessionStart, this script calls the Ombre Brain MCP server's
|
||||
# breath tool (empty query = surfacing mode) via HTTP and prints
|
||||
# the result to stdout so Claude sees it as session context.
|
||||
#
|
||||
# This works for OMBRE_TRANSPORT=streamable-http deployments.
|
||||
# For local stdio deployments, the script falls back gracefully.
|
||||
#
|
||||
# Config:
|
||||
# OMBRE_HOOK_URL — override the server URL (default: http://localhost:8000)
|
||||
# OMBRE_HOOK_SKIP — set to "1" to disable the hook temporarily
|
||||
# ============================================================
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
|
||||
def main():
|
||||
# Allow disabling the hook via env var
|
||||
if os.environ.get("OMBRE_HOOK_SKIP") == "1":
|
||||
sys.exit(0)
|
||||
|
||||
base_url = os.environ.get("OMBRE_HOOK_URL", "http://localhost:8000").rstrip("/")
|
||||
|
||||
# Build MCP call via HTTP POST to the streamable-http endpoint
|
||||
# The breath tool with no query triggers surfacing mode.
|
||||
payload = json.dumps({
|
||||
"jsonrpc": "2.0",
|
||||
"id": 1,
|
||||
"method": "tools/call",
|
||||
"params": {
|
||||
"name": "breath",
|
||||
"arguments": {"query": "", "max_results": 2}
|
||||
}
|
||||
}).encode("utf-8")
|
||||
|
||||
req = urllib.request.Request(
|
||||
f"{base_url}/mcp",
|
||||
data=payload,
|
||||
headers={"Content-Type": "application/json"},
|
||||
method="POST",
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=8) as response:
|
||||
raw = response.read().decode("utf-8")
|
||||
data = json.loads(raw)
|
||||
# Extract text from MCP tool result
|
||||
result_content = data.get("result", {}).get("content", [])
|
||||
text_parts = [c.get("text", "") for c in result_content if c.get("type") == "text"]
|
||||
output = "\n".join(text_parts).strip()
|
||||
if output and output != "权重池平静,没有需要处理的记忆。":
|
||||
print(f"[Ombre Brain - 记忆浮现]\n{output}")
|
||||
except (urllib.error.URLError, OSError):
|
||||
# Server not available (local stdio mode or not running) — silent fail
|
||||
pass
|
||||
except Exception:
|
||||
# Any other error — silent fail, never block session start
|
||||
pass
|
||||
|
||||
sys.exit(0)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
30
.claude/settings.json
Normal file
30
.claude/settings.json
Normal file
@@ -0,0 +1,30 @@
|
||||
{
|
||||
"hooks": {
|
||||
"SessionStart": [
|
||||
{
|
||||
"matcher": "startup",
|
||||
"hooks": [
|
||||
{
|
||||
"type": "command",
|
||||
"command": "python \"$CLAUDE_PROJECT_DIR/.claude/hooks/session_breath.py\"",
|
||||
"shell": "powershell",
|
||||
"timeout": 12,
|
||||
"statusMessage": "Ombre Brain 正在浮现记忆..."
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"matcher": "resume",
|
||||
"hooks": [
|
||||
{
|
||||
"type": "command",
|
||||
"command": "python \"$CLAUDE_PROJECT_DIR/.claude/hooks/session_breath.py\"",
|
||||
"shell": "powershell",
|
||||
"timeout": 12,
|
||||
"statusMessage": "Ombre Brain 正在浮现记忆..."
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
13
.gitignore
vendored
Normal file
13
.gitignore
vendored
Normal file
@@ -0,0 +1,13 @@
|
||||
__pycache__/
|
||||
*.pyc
|
||||
*.pyo
|
||||
.env
|
||||
.venv/
|
||||
.claude/settings.local.json
|
||||
.claude/hooks/__pycache__/
|
||||
.vscode/
|
||||
config.yaml
|
||||
buckets/dynamic/
|
||||
buckets/archive/
|
||||
buckets/permanent/
|
||||
scarp_paper
|
||||
63
CLAUDE_PROMPT.md
Normal file
63
CLAUDE_PROMPT.md
Normal file
@@ -0,0 +1,63 @@
|
||||
# Ombre Brain 记忆系统 —— Claude 端使用指南
|
||||
你拥有了一个叫 Ombre Brain 的永久记忆系统。通过它你可以跨对话记住你想记住的任何事情和回忆你的,我的以及你与我的过往。
|
||||
|
||||
## 你有这些能力
|
||||
|
||||
| 能力 | 场景 |
|
||||
|------|-----------|
|
||||
| `breath` | **每次对话最开头**调用一次(不传参数)——就像睁眼看手机,看看有没有未解决的事浮上来。有明确话题时传 `query` 关键词检索 |
|
||||
| `hold` | 你很想记住你自己当下的感受,或当下发生的单个事件时 |
|
||||
| `grow` | 当**一天结束时**或**用户发来一大段日记/总结**时调用。你可以把其中**你想记住的事件**扔进去,它会自动拆分整理成多个记忆盒子存进你的大脑 |
|
||||
| `trace` | 当你或用户认为"这个记错了"、"帮我改一下"时调用,手动修正记忆的元数据;**某件事解决了**时用 `resolved=1` 让它沉底;**需要删除**时用 `delete=True` |
|
||||
| `pulse` | 用户想看看你都记得什么、或问记忆系统状态时调用 |
|
||||
|
||||
## 使用原则
|
||||
|
||||
### 主动调用
|
||||
- **对话开头**:无参数调用 `breath()`,看看有没有高权重的未解决记忆需要跟进
|
||||
- **提到过去**:用户说"上次"、"之前"、"还记得"时,用 `breath(query="关键词")` 检索
|
||||
- **新信息**:用 `hold` 留住你想留下的感受或经历
|
||||
- **日终整理**:用 `grow` 在**一天结束时**整理你的一天
|
||||
|
||||
### 无须调用
|
||||
- 闲聊水话不需要存("哈哈"、"好的"、"嗯嗯")
|
||||
- 已经记过的信息不要重复存
|
||||
- 短期信息不存("帮我查个天气")
|
||||
|
||||
### 权重池机制
|
||||
记忆系统是一个**权重池**,不是分类柜:
|
||||
- 未解决 + 高情绪强度的桶 → 权重最高,`breath()` 时主动浮现
|
||||
- 已解决的桶 → 权重骤降,沉底等待关键词激活
|
||||
- 用 `trace(bucket_id, resolved=1)` 标记某件事已解决,让它沉底
|
||||
- 用 `trace(bucket_id, resolved=0)` 重新激活一个沉底的记忆
|
||||
|
||||
### breath 的参数技巧
|
||||
- 无参数:浮现模式,推送权重最高的未解决桶(每次对话开头用这个)
|
||||
- `query`:用关键词而不是整句话,检索更准
|
||||
- `domain`:如果明确知道话题领域可以传(如 "编程" 或 "恋爱"),缩小搜索范围
|
||||
- `valence` + `arousal`:如果用户当前情绪明显,可以传情感坐标来触发情感共鸣检索
|
||||
|
||||
检索结果不足时,系统有概率从低权重旧桶里随机漂上来 1-3 条记忆,标注为 `[surface_type: random]`。这不是检索结果,是"忽然想起来"——你可以自然地提起它,也可以忽略。
|
||||
|
||||
### trace 的参数技巧
|
||||
- `resolved=1`:标记已解决,桶权重骤降到 5%,基本沉底
|
||||
- `resolved=0`:重新激活,让它重新参与浮现排序
|
||||
- `delete=True`:彻底删除这个桶(不可恢复)
|
||||
- 其余字段(name/domain/valence/arousal/importance/tags):只传需要改的,-1 或空串表示不改
|
||||
|
||||
### hold vs grow
|
||||
- 一句话的事 → `hold`("我喜欢吃饺子")
|
||||
- 一大段的事 → `grow`("今天发生了一堆事...")
|
||||
- **需要批量存多条记忆时,用 `grow` 把内容拼成一段发一次,不要多次调用 `hold`**token是稀缺资源——每次工具调用都会消耗token,多次 hold 远比 1 次 grow 贵
|
||||
|
||||
### 省配额原则
|
||||
- **一次 grow 胜过多次 hold**:要存多条记忆时,合成一段文本调用一次 grow
|
||||
- **对话太长时建议用户换窗口**:同一窗口聊越久,每次工具调用的底价越高(因为要重新读完整对话历史)
|
||||
- **工具返回值很短,无需复述**:收到 `新建→桶名 域名` 后直接跟用户说,无需展开解释
|
||||
|
||||
### 核心准则桶(pinned)
|
||||
- `hold(content="...", pinned=True)` 创建钉选桶——不衰减、不合并、importance 锁定 10
|
||||
- `trace(bucket_id, pinned=1)` 把已有桶钉选为核心准则
|
||||
- `trace(bucket_id, pinned=0)` 取消钉选
|
||||
- 适用场景:用户教会你的永久知识、核心原则、绝不能忘的事
|
||||
- 钉选桶不会出现在「浮现未解决记忆」里,但关键词检索时始终可达
|
||||
33
Dockerfile
Normal file
33
Dockerfile
Normal file
@@ -0,0 +1,33 @@
|
||||
# ============================================================
|
||||
# Ombre Brain Docker Build
|
||||
# Docker 构建文件
|
||||
#
|
||||
# Build: docker build -t ombre-brain .
|
||||
# Run: docker run -e OMBRE_API_KEY=your-key -p 8000:8000 ombre-brain
|
||||
# ============================================================
|
||||
|
||||
FROM python:3.12-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Install dependencies first (leverage Docker cache)
|
||||
# 先装依赖(利用 Docker 缓存)
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy project files / 复制项目文件
|
||||
COPY *.py .
|
||||
COPY config.example.yaml ./config.yaml
|
||||
|
||||
# Persistent mount point: bucket data
|
||||
# 持久化挂载点:记忆数据
|
||||
VOLUME ["/app/buckets"]
|
||||
|
||||
# Default to streamable-http for container (remote access)
|
||||
# 容器场景默认用 streamable-http
|
||||
ENV OMBRE_TRANSPORT=streamable-http
|
||||
ENV OMBRE_BUCKETS_DIR=/app/buckets
|
||||
|
||||
EXPOSE 8000
|
||||
|
||||
CMD ["python", "server.py"]
|
||||
21
LICENSE
Normal file
21
LICENSE
Normal file
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2026 P0lar1zzZ
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
418
README.md
Normal file
418
README.md
Normal file
@@ -0,0 +1,418 @@
|
||||
# Ombre Brain
|
||||
|
||||
一个给提供给Claude 用的长期情绪记忆系统。基于 Russell 效价/唤醒度坐标打标,Obsidian 做存储层,MCP 接入,带遗忘曲线。
|
||||
|
||||
A long-term emotional memory system for Claude. Tags memories using Russell's valence/arousal coordinates, stores them as Obsidian-compatible Markdown, connects via MCP, and has a forgetting curve.
|
||||
|
||||
> **⚠️ 仓库临时迁移 / Repo temporarily moved**
|
||||
> GitHub 访问受限期间,代码暂时托管在 Gitea:
|
||||
> **https://git.p0lar1s.uk/P0lar1s/Ombre_Brain**
|
||||
> 下面的 `git clone` 地址请替换为上面这个。
|
||||
|
||||
---
|
||||
|
||||
## 快速开始 / Quick Start(Docker,推荐)
|
||||
|
||||
> 这是最简单的方式,不需要装 Python,不需要懂命令行,跟着做就行。
|
||||
|
||||
**前置条件:** 电脑上装了 [Docker Desktop](https://www.docker.com/products/docker-desktop/),并且已经打开。
|
||||
|
||||
**第一步:拉取代码**
|
||||
|
||||
```bash
|
||||
git clone https://git.p0lar1s.uk/P0lar1s/Ombre_Brain.git
|
||||
cd Ombre_Brain
|
||||
```
|
||||
|
||||
**第二步:创建 `.env` 文件**
|
||||
|
||||
在项目目录下新建一个叫 `.env` 的文件(注意有个点),内容填:
|
||||
|
||||
```
|
||||
OMBRE_API_KEY=你的DeepSeek或其他API密钥
|
||||
```
|
||||
|
||||
没有 API key 也能用,脱水压缩会降级到本地模式,只是效果差一点。那就写:
|
||||
|
||||
```
|
||||
OMBRE_API_KEY=
|
||||
```
|
||||
|
||||
**第三步:配置 `docker-compose.yml`(指向你的 Obsidian Vault)**
|
||||
|
||||
用文本编辑器打开 `docker-compose.yml`,找到这一行:
|
||||
|
||||
```yaml
|
||||
- ./buckets:/data
|
||||
```
|
||||
|
||||
改成你的 Obsidian Vault 里 `Ombre Brain` 文件夹的路径,例如:
|
||||
|
||||
```yaml
|
||||
- /Users/你的用户名/Documents/Obsidian Vault/Ombre Brain:/data
|
||||
```
|
||||
|
||||
> 不知道路径?在 Obsidian 里右键那个文件夹 → 「在访达中显示」,然后把地址栏的路径复制过来。
|
||||
> 不想挂载 Obsidian 也行,保持 `./buckets:/data` 不动,数据会存在项目目录的 `buckets/` 文件夹里。
|
||||
|
||||
**第四步:启动**
|
||||
|
||||
```bash
|
||||
docker compose up -d
|
||||
```
|
||||
|
||||
等它跑完,看到 `Started` 就好了。
|
||||
|
||||
**验证是否正常运行:**
|
||||
|
||||
```bash
|
||||
docker logs ombre-brain
|
||||
```
|
||||
|
||||
看到 `Uvicorn running on http://0.0.0.0:8000` 说明成功了。
|
||||
|
||||
---
|
||||
|
||||
**接入 Claude.ai(远程访问)**
|
||||
|
||||
需要额外配置 Cloudflare Tunnel,把服务暴露到公网。参考下面「接入 Claude.ai (远程)」章节。
|
||||
|
||||
**接入 Claude Desktop(本地)**
|
||||
|
||||
不需要 Docker,直接用 Python 本地跑。参考下面「安装 / Setup」章节。
|
||||
|
||||
---
|
||||
|
||||
[](https://render.com/deploy?repo=https://github.com/P0lar1zzZ/Ombre-Brain)
|
||||
|
||||
---
|
||||
|
||||
## 它是什么 / What is this
|
||||
|
||||
Claude 没有跨对话记忆。每次对话结束,之前聊过的所有东西都会消失。
|
||||
|
||||
Ombre Brain 给了它一套持久记忆——不是那种冷冰冰的键值存储,而是带情感坐标的、会自然衰减的、像人类记忆一样会遗忘和浮现的系统。
|
||||
|
||||
Claude has no cross-conversation memory. Everything from a previous chat vanishes once it ends.
|
||||
|
||||
Ombre Brain gives it persistent memory — not cold key-value storage, but a system with emotional coordinates, natural decay, and forgetting/surfacing mechanics that loosely mimic how human memory works.
|
||||
|
||||
核心特点 / Key features:
|
||||
|
||||
- **情感坐标打标 / Emotional tagging**: 每条记忆用 Russell 环形情感模型的 valence(效价)和 arousal(唤醒度)两个连续维度标记。不是"开心/难过"这种离散标签。
|
||||
Each memory is tagged with two continuous dimensions from Russell's circumplex model: valence and arousal. Not discrete labels like "happy/sad".
|
||||
|
||||
- **自然遗忘 / Natural forgetting**: 改进版艾宾浩斯遗忘曲线。不活跃的记忆自动衰减归档,高情绪强度的记忆衰减更慢。
|
||||
Modified Ebbinghaus forgetting curve. Inactive memories naturally decay and archive. High-arousal memories decay slower.
|
||||
|
||||
- **权重池浮现 / Weight pool surfacing**: 记忆不是被动检索的,它们会主动浮现——未解决的、情绪强烈的记忆权重更高,会在对话开头自动推送。
|
||||
Memories aren't just passively retrieved — they actively surface. Unresolved, emotionally intense memories carry higher weight and get pushed at conversation start.
|
||||
|
||||
- **Obsidian 原生 / Obsidian-native**: 每个记忆桶就是一个 Markdown 文件,YAML frontmatter 存元数据。可以直接在 Obsidian 里浏览、编辑、搜索。自动注入 `[[双链]]`。
|
||||
Each memory bucket is a Markdown file with YAML frontmatter. Browse, edit, and search directly in Obsidian. Wikilinks are auto-injected.
|
||||
|
||||
- **API 降级 / API degradation**: 脱水压缩和自动打标优先用廉价 LLM API(DeepSeek 等),API 不可用时自动降级到本地关键词分析——始终可用。
|
||||
Dehydration and auto-tagging prefer a cheap LLM API (DeepSeek etc.). When the API is unavailable, it degrades to local keyword analysis — always functional.
|
||||
|
||||
## 边界说明 / Design boundaries
|
||||
|
||||
官方记忆功能已经在做身份层的事了——你是谁,你有什么偏好,你们的关系是什么。那一层交给它,Ombre Brain不打算造重复的轮子。
|
||||
|
||||
Ombre Brain 的边界是时间里发生的事,不是你是谁。它记住的是:你们聊过什么,经历了什么,哪些事情还悬在那里没有解决。两层配合用,才是完整的。
|
||||
|
||||
每次新对话,Claude 从零开始——但它能从 Ombre Brain 里找回跟你有关的一切。不是重建,是接续。
|
||||
|
||||
---
|
||||
|
||||
Official memory already handles the identity layer — who you are, what you prefer, what your relationship is. That layer belongs there. Ombre Brain isn't trying to duplicate it.
|
||||
|
||||
Ombre Brain's boundary is *what happened in time*, not *who you are*. It holds conversations, experiences, unresolved things. The two layers together are what make it feel complete.
|
||||
|
||||
Each new conversation starts fresh — but Claude can reach back through Ombre Brain and find everything that happened between you. Not a rebuild. A continuation.
|
||||
|
||||
## 架构 / Architecture
|
||||
|
||||
```
|
||||
Claude ←→ MCP Protocol ←→ server.py
|
||||
│
|
||||
┌───────────────┼───────────────┐
|
||||
│ │ │
|
||||
bucket_manager dehydrator decay_engine
|
||||
(CRUD + 搜索) (压缩 + 打标) (遗忘曲线)
|
||||
│
|
||||
Obsidian Vault (Markdown files)
|
||||
```
|
||||
|
||||
5 个 MCP 工具 / 5 MCP tools:
|
||||
|
||||
| 工具 Tool | 作用 Purpose |
|
||||
|-----------|-------------|
|
||||
| `breath` | 浮现或检索记忆。无参数=推送未解决记忆;有参数=关键词+情感检索 / Surface or search memories |
|
||||
| `hold` | 存储单条记忆,自动打标+合并相似桶 / Store a single memory with auto-tagging |
|
||||
| `grow` | 日记归档,自动拆分长内容为多个记忆桶 / Diary digest, auto-split into multiple buckets |
|
||||
| `trace` | 修改元数据、标记已解决、删除 / Modify metadata, mark resolved, delete |
|
||||
| `pulse` | 系统状态 + 所有记忆桶列表 / System status + bucket listing |
|
||||
|
||||
## 安装 / Setup
|
||||
|
||||
### 环境要求 / Requirements
|
||||
|
||||
- Python 3.11+
|
||||
- 一个 Obsidian Vault(可选,不用也行,会在项目目录下自建 `buckets/`)
|
||||
An Obsidian vault (optional — without one, it uses a local `buckets/` directory)
|
||||
|
||||
### 步骤 / Steps
|
||||
|
||||
```bash
|
||||
git clone https://github.com/P0lar1zzZ/Ombre-Brain.git
|
||||
cd Ombre-Brain
|
||||
|
||||
python -m venv .venv
|
||||
source .venv/bin/activate # Windows: .venv\Scripts\activate
|
||||
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
复制配置文件并按需修改 / Copy config and edit as needed:
|
||||
|
||||
```bash
|
||||
cp config.example.yaml config.yaml
|
||||
```
|
||||
|
||||
如果你要用 API 做脱水压缩和自动打标(推荐,效果好很多),设置环境变量:
|
||||
If you want API-powered dehydration and tagging (recommended, much better quality):
|
||||
|
||||
```bash
|
||||
export OMBRE_API_KEY="your-api-key"
|
||||
```
|
||||
|
||||
支持任何 OpenAI 兼容 API。在 `config.yaml` 里改 `base_url` 和 `model` 就行。
|
||||
Supports any OpenAI-compatible API. Just change `base_url` and `model` in `config.yaml`.
|
||||
|
||||
### 接入 Claude Desktop / Connect to Claude Desktop
|
||||
|
||||
在 Claude Desktop 配置文件中添加(macOS: `~/Library/Application Support/Claude/claude_desktop_config.json`):
|
||||
|
||||
Add to your Claude Desktop config:
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"ombre-brain": {
|
||||
"command": "python",
|
||||
"args": ["/path/to/Ombre-Brain/server.py"],
|
||||
"env": {
|
||||
"OMBRE_API_KEY": "your-api-key"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 接入 Claude.ai (远程) / Connect to Claude.ai (remote)
|
||||
|
||||
需要 HTTP 传输 + 隧道。可以用 Docker:
|
||||
Requires HTTP transport + tunnel. Docker setup:
|
||||
|
||||
```bash
|
||||
echo "OMBRE_API_KEY=your-api-key" > .env
|
||||
docker-compose up -d
|
||||
```
|
||||
|
||||
`docker-compose.yml` 里配好了 Cloudflare Tunnel。你需要自己在 `~/.cloudflared/` 下放凭证和路由配置。
|
||||
The `docker-compose.yml` includes Cloudflare Tunnel. You'll need your own credentials under `~/.cloudflared/`.
|
||||
|
||||
### 指向 Obsidian / Point to Obsidian
|
||||
|
||||
在 `config.yaml` 里设置 `buckets_dir`:
|
||||
Set `buckets_dir` in `config.yaml`:
|
||||
|
||||
```yaml
|
||||
buckets_dir: "/path/to/your/Obsidian Vault/Ombre Brain"
|
||||
```
|
||||
|
||||
不设的话,默认用项目目录下的 `buckets/`。
|
||||
If not set, defaults to `buckets/` in the project directory.
|
||||
|
||||
## 配置 / Configuration
|
||||
|
||||
所有参数在 `config.yaml`(从 `config.example.yaml` 复制)。关键的几个:
|
||||
All parameters in `config.yaml` (copy from `config.example.yaml`). Key ones:
|
||||
|
||||
| 参数 Parameter | 说明 Description | 默认 Default |
|
||||
|---|---|---|
|
||||
| `transport` | `stdio`(本地)/ `streamable-http`(远程)| `stdio` |
|
||||
| `buckets_dir` | 记忆桶存储路径 / Bucket storage path | `./buckets/` |
|
||||
| `dehydration.model` | 脱水用的 LLM 模型 / LLM model for dehydration | `deepseek-chat` |
|
||||
| `dehydration.base_url` | API 地址 / API endpoint | `https://api.deepseek.com/v1` |
|
||||
| `decay.lambda` | 衰减速率,越大越快忘 / Decay rate | `0.05` |
|
||||
| `decay.threshold` | 归档阈值 / Archive threshold | `0.3` |
|
||||
| `merge_threshold` | 合并相似度阈值 (0-100) / Merge similarity | `75` |
|
||||
|
||||
敏感配置用环境变量:
|
||||
Sensitive config via env vars:
|
||||
- `OMBRE_API_KEY` — LLM API 密钥
|
||||
- `OMBRE_TRANSPORT` — 覆盖传输方式
|
||||
- `OMBRE_BUCKETS_DIR` — 覆盖存储路径
|
||||
|
||||
## 衰减公式 / Decay Formula
|
||||
|
||||
$$final\_score = time\_weight \times base\_score$$
|
||||
|
||||
$$base\_score = Importance \times activation\_count^{0.3} \times e^{-\lambda \times days} \times (base + arousal \times boost)$$
|
||||
|
||||
时间系数(乘数,优先级最高)/ Time weight (multiplier, highest priority):
|
||||
|
||||
| 距今天数 Days since active | 时间系数 Weight |
|
||||
|---|---|
|
||||
| 0–1 天 | 1.0 |
|
||||
| 第 2 天 | 0.9 |
|
||||
| 之后每天约降 10% | `max(0.3, 0.9 × e^{-0.2197 × (days-2)})` |
|
||||
| 7 天后稳定 | ≈ 0.3(不归零)|
|
||||
|
||||
- `importance`: 1-10,记忆重要性 / memory importance
|
||||
- `activation_count`: 被检索的次数,越常被想起衰减越慢 / retrieval count; more recalls = slower decay
|
||||
- `days`: 距上次激活的天数 / days since last activation
|
||||
- `arousal`: 唤醒度,越强烈的记忆越难忘 / arousal; intense memories are harder to forget
|
||||
- 已解决的记忆权重降到 5%,沉底等被关键词唤醒 / resolved memories drop to 5%, sink until keyword-triggered
|
||||
- `pinned=true` 的桶:不衰减、不合并、importance 锁定 10 / `pinned` buckets: never decay, never merge, importance locked at 10
|
||||
|
||||
## 给 Claude 的使用指南 / Usage Guide for Claude
|
||||
|
||||
`CLAUDE_PROMPT.md` 是写给 Claude 看的使用说明。放到你的 system prompt 或 custom instructions 里就行。
|
||||
|
||||
`CLAUDE_PROMPT.md` is the usage guide written for Claude. Put it in your system prompt or custom instructions.
|
||||
|
||||
## 工具脚本 / Utility Scripts
|
||||
|
||||
| 脚本 Script | 用途 Purpose |
|
||||
|---|---|
|
||||
| `write_memory.py` | 手动写入记忆,绕过 MCP / Manually write memories, bypass MCP |
|
||||
| `migrate_to_domains.py` | 迁移平铺文件到域子目录 / Migrate flat files to domain subdirs |
|
||||
| `reclassify_domains.py` | 基于关键词重分类 / Reclassify by keywords |
|
||||
| `reclassify_api.py` | 用 API 重打标未分类桶 / Re-tag uncategorized buckets via API |
|
||||
| `test_smoke.py` | 冒烟测试 / Smoke test |
|
||||
|
||||
## 部署 / Deploy
|
||||
|
||||
### Render
|
||||
|
||||
[](https://render.com/deploy?repo=https://github.com/P0lar1zzZ/Ombre-Brain)
|
||||
|
||||
> ⚠️ **免费层不可用**:Render 免费层**不支持持久化磁盘**,服务重启后记忆数据会丢失,且会在无流量时休眠。**必须使用 Starter($7/mo)或以上**才能正常使用。
|
||||
> **Free tier won't work**: Render free tier has **no persistent disk** — all memory data is lost on restart. It also sleeps on inactivity. **Starter plan ($7/mo) or above is required.**
|
||||
|
||||
项目根目录已包含 `render.yaml`,点击按钮后:
|
||||
1. (可选)设置 `OMBRE_API_KEY`:任何 OpenAI 兼容 API 的 key,不填则自动降级为本地关键词提取
|
||||
2. (可选)设置 `OMBRE_BASE_URL`:API 地址,支持任意 OpenAI 化地址,如 `https://api.deepseek.com/v1` / `http://123.1.1.1:7689/v1` / `http://your-ollama:11434/v1`
|
||||
3. Render 自动挂载持久化磁盘到 `/opt/render/project/src/buckets`
|
||||
4. 部署后 MCP URL:`https://<你的服务名>.onrender.com/mcp`
|
||||
|
||||
`render.yaml` is included. After clicking the button:
|
||||
1. (Optional) `OMBRE_API_KEY`: any OpenAI-compatible key; omit to fall back to local keyword extraction
|
||||
2. (Optional) `OMBRE_BASE_URL`: any OpenAI-compatible endpoint, e.g. `https://api.deepseek.com/v1`, `http://123.1.1.1:7689/v1`, `http://your-ollama:11434/v1`
|
||||
3. Persistent disk auto-mounts at `/opt/render/project/src/buckets`
|
||||
4. MCP URL after deploy: `https://<your-service>.onrender.com/mcp`
|
||||
|
||||
### Zeabur
|
||||
|
||||
> 💡 **Zeabur 的定价模式**:Zeabur 是「买 VPS + 平台托管」,你先购买一台服务器(最低腾讯云新加坡 $2/mo、火山引擎 $3/mo),Volume 直接挂在该服务器上,**数据天然持久化,无丢失问题**。另需订阅 Zeabur 管理方案(Developer $5/mo),总计约 $7-8/mo 起。
|
||||
> **Zeabur pricing model**: You buy a VPS first (cheapest: Tencent Cloud Singapore ~$2/mo, Volcano Engine ~$3/mo), then add Zeabur's Developer plan ($5/mo) for management. Volumes mount directly on your server — **data is always persistent, no cold-start data loss**. Total ~$7-8/mo minimum.
|
||||
|
||||
**步骤 / Steps:**
|
||||
|
||||
1. **创建项目 / Create project**
|
||||
- 打开 [zeabur.com](https://zeabur.com) → 购买一台服务器 → **New Project** → **Deploy from GitHub**
|
||||
- 先 Fork 本仓库到自己 GitHub 账号,然后在 Zeabur 选择 `你的用户名/Ombre-Brain`
|
||||
- Zeabur 会自动检测到根目录的 `Dockerfile` 并使用 Docker 方式构建
|
||||
- Go to [zeabur.com](https://zeabur.com) → buy a server → **New Project** → **Deploy from GitHub**
|
||||
- Fork this repo first, then select `your-username/Ombre-Brain` in Zeabur
|
||||
- Zeabur auto-detects the `Dockerfile` in root and builds via Docker
|
||||
|
||||
2. **设置环境变量 / Set environment variables**(服务页面 → **Variables** 标签页)
|
||||
- `OMBRE_API_KEY`(可选)— LLM API 密钥,不填则自动降级为本地关键词提取
|
||||
- `OMBRE_BASE_URL`(可选)— API 地址,如 `https://api.deepseek.com/v1`
|
||||
|
||||
> ⚠️ **不需要**手动设置 `OMBRE_TRANSPORT` 和 `OMBRE_BUCKETS_DIR`,Dockerfile 里已经设好了默认值。Zeabur 对单阶段 Dockerfile 会自动注入控制台设置的环境变量。
|
||||
> You do **NOT** need to set `OMBRE_TRANSPORT` or `OMBRE_BUCKETS_DIR` — defaults are baked into the Dockerfile. Zeabur auto-injects dashboard env vars for single-stage Dockerfiles.
|
||||
|
||||
3. **挂载持久存储 / Mount persistent volume**(服务页面 → **Volumes** 标签页)
|
||||
- Volume ID:填 `ombre-buckets`(或任意名)
|
||||
- 挂载路径 / Path:**`/app/buckets`**
|
||||
- ⚠️ 不挂载的话,每次重新部署记忆数据会丢失
|
||||
- ⚠️ Without this, memory data is lost on every redeploy
|
||||
|
||||
4. **配置端口 / Configure port**(服务页面 → **Networking** 标签页)
|
||||
- Port Name:`web`(或任意名)
|
||||
- Port:**`8000`**
|
||||
- Port Type:**`HTTP`**
|
||||
- 然后点 **Generate Domain** 生成一个 `xxx.zeabur.app` 域名
|
||||
- Then click **Generate Domain** to get a `xxx.zeabur.app` domain
|
||||
|
||||
5. **验证 / Verify**
|
||||
- 访问 `https://<你的域名>.zeabur.app/health`,应返回 JSON
|
||||
- Visit `https://<your-domain>.zeabur.app/health` — should return JSON
|
||||
- 最终 MCP 地址 / MCP URL:`https://<你的域名>.zeabur.app/mcp`
|
||||
|
||||
**常见问题 / Troubleshooting:**
|
||||
|
||||
| 现象 Symptom | 原因 Cause | 解决 Fix |
|
||||
|---|---|---|
|
||||
| 域名无法访问 / Domain unreachable | 没配端口 / Port not configured | Networking 标签页加 port 8000 (HTTP) |
|
||||
| 域名无法访问 / Domain unreachable | `OMBRE_TRANSPORT` 未设置,服务以 stdio 模式启动,不监听任何端口 / Service started in stdio mode — no port is listened | **Variables 标签页确认设置 `OMBRE_TRANSPORT=streamable-http`,然后重新部署** |
|
||||
| 构建失败 / Build failed | Dockerfile 未被识别 / Dockerfile not detected | 确认仓库根目录有 `Dockerfile`(大小写敏感) |
|
||||
| 服务启动后立刻退出 | `OMBRE_TRANSPORT` 被覆盖为 `stdio` | 检查 Variables 里有没有多余的 `OMBRE_TRANSPORT=stdio`,删掉即可 |
|
||||
| 重启后记忆丢失 / Data lost on restart | Volume 未挂载 | Volumes 标签页挂载到 `/app/buckets` |
|
||||
|
||||
### 使用 Cloudflare Tunnel 或 ngrok 连接 / Connecting via Cloudflare Tunnel or ngrok
|
||||
|
||||
> ℹ️ 自 v1.1 起,server.py 在 HTTP 模式下已自动添加 CORS 中间件,无需额外配置。
|
||||
> Since v1.1, server.py automatically enables CORS middleware in HTTP mode — no extra config needed.
|
||||
|
||||
使用隧道连接时,确保以下条件满足:
|
||||
When connecting via tunnel, ensure:
|
||||
|
||||
1. **服务器必须运行在 HTTP 模式** / Server must use HTTP transport
|
||||
```bash
|
||||
OMBRE_TRANSPORT=streamable-http python server.py
|
||||
```
|
||||
或 Docker:
|
||||
```bash
|
||||
docker-compose up -d
|
||||
```
|
||||
|
||||
2. **在 Claude.ai 网页版添加 MCP 服务器** / Adding to Claude.ai web
|
||||
- URL 格式 / URL format: `https://<tunnel-subdomain>.trycloudflare.com/mcp`
|
||||
- 或 ngrok / or ngrok: `https://<xxxx>.ngrok-free.app/mcp`
|
||||
- 先访问 `/health` 验证连接 / Verify first: `https://<your-tunnel>/health` should return `{"status":"ok",...}`
|
||||
|
||||
3. **已知限制 / Known limitations**
|
||||
- Cloudflare Tunnel 免费版有空闲超时(约 10 分钟),系统内置保活 ping 可缓解但不能完全消除
|
||||
- Free Cloudflare Tunnel has idle timeout (~10 min); built-in keepalive pings mitigate but can't fully prevent it
|
||||
- ngrok 免费版有请求速率限制 / ngrok free tier has rate limits
|
||||
- 如果连接仍失败,检查隧道是否正在运行、服务是否以 `streamable-http` 模式启动
|
||||
- If connection still fails, verify the tunnel is running and the server started in `streamable-http` mode
|
||||
|
||||
| 现象 Symptom | 原因 Cause | 解决 Fix |
|
||||
|---|---|---|
|
||||
| 网页版无法连接隧道 URL / Web can't connect to tunnel URL | 服务以 stdio 模式运行 / Server in stdio mode | 设置 `OMBRE_TRANSPORT=streamable-http` 后重启 |
|
||||
| 网页版无法连接隧道 URL / Web can't connect to tunnel URL | 旧版 server.py 缺少 CORS 头 / Missing CORS headers | 拉取最新代码,CORS 已内置 / Pull latest — CORS is now built-in |
|
||||
| `/health` 返回 200 但 MCP 连不上 / `/health` 200 but MCP fails | 路径错误 / Wrong path | MCP URL 末尾必须是 `/mcp` 而非 `/` |
|
||||
| 隧道连接偶尔断开 / Tunnel disconnects intermittently | Cloudflare Tunnel 空闲超时 / Idle timeout | 保活 ping 已内置,若仍断开可缩短隧道超时配置 |
|
||||
|
||||
---
|
||||
|
||||
### Session Start Hook(自动 breath)
|
||||
|
||||
部署后,如果你使用 Claude Code,可以在项目内激活自动浮现 hook:
|
||||
`.claude/settings.json` 已配置好 `SessionStart` hook,每次新会话或恢复会话时自动触发 `breath`,把最高权重未解决记忆推入上下文。
|
||||
|
||||
**仅在远程 HTTP 模式下有效**(`OMBRE_TRANSPORT=streamable-http`)。本地 stdio 模式下 hook 会安静退出,不影响正常使用。
|
||||
|
||||
可以通过 `OMBRE_HOOK_URL` 环境变量指定服务器地址(默认 `http://localhost:8000`),或者设置 `OMBRE_HOOK_SKIP=1` 临时禁用。
|
||||
|
||||
If using Claude Code, `.claude/settings.json` configures a `SessionStart` hook that auto-calls `breath` on each new or resumed session, surfacing your highest-weight unresolved memories as context. Only active in remote HTTP mode. Set `OMBRE_HOOK_SKIP=1` to disable temporarily.
|
||||
|
||||
## License
|
||||
|
||||
MIT
|
||||
205
backup_20260405_2124/README.md
Normal file
205
backup_20260405_2124/README.md
Normal file
@@ -0,0 +1,205 @@
|
||||
# Ombre Brain
|
||||
|
||||
一个给 Claude 用的长期情绪记忆系统。基于 Russell 效价/唤醒度坐标打标,Obsidian 做存储层,MCP 接入,带遗忘曲线。
|
||||
|
||||
A long-term emotional memory system for Claude. Tags memories using Russell's valence/arousal coordinates, stores them as Obsidian-compatible Markdown, connects via MCP, and has a forgetting curve.
|
||||
|
||||
---
|
||||
|
||||
## 它是什么 / What is this
|
||||
|
||||
Claude 没有跨对话记忆。每次对话结束,之前聊过的所有东西都会消失。
|
||||
|
||||
Ombre Brain 给了它一套持久记忆——不是那种冷冰冰的键值存储,而是带情感坐标的、会自然衰减的、像人类记忆一样会遗忘和浮现的系统。
|
||||
|
||||
Claude has no cross-conversation memory. Everything from a previous chat vanishes once it ends.
|
||||
|
||||
Ombre Brain gives it persistent memory — not cold key-value storage, but a system with emotional coordinates, natural decay, and forgetting/surfacing mechanics that loosely mimic how human memory works.
|
||||
|
||||
核心特点 / Key features:
|
||||
|
||||
- **情感坐标打标 / Emotional tagging**: 每条记忆用 Russell 环形情感模型的 valence(效价)和 arousal(唤醒度)两个连续维度标记。不是"开心/难过"这种离散标签。
|
||||
Each memory is tagged with two continuous dimensions from Russell's circumplex model: valence and arousal. Not discrete labels like "happy/sad".
|
||||
|
||||
- **自然遗忘 / Natural forgetting**: 改进版艾宾浩斯遗忘曲线。不活跃的记忆自动衰减归档,高情绪强度的记忆衰减更慢。
|
||||
Modified Ebbinghaus forgetting curve. Inactive memories naturally decay and archive. High-arousal memories decay slower.
|
||||
|
||||
- **权重池浮现 / Weight pool surfacing**: 记忆不是被动检索的,它们会主动浮现——未解决的、情绪强烈的记忆权重更高,会在对话开头自动推送。
|
||||
Memories aren't just passively retrieved — they actively surface. Unresolved, emotionally intense memories carry higher weight and get pushed at conversation start.
|
||||
|
||||
- **Obsidian 原生 / Obsidian-native**: 每个记忆桶就是一个 Markdown 文件,YAML frontmatter 存元数据。可以直接在 Obsidian 里浏览、编辑、搜索。自动注入 `[[双链]]`。
|
||||
Each memory bucket is a Markdown file with YAML frontmatter. Browse, edit, and search directly in Obsidian. Wikilinks are auto-injected.
|
||||
|
||||
- **API 降级 / API degradation**: 脱水压缩和自动打标优先用廉价 LLM API(DeepSeek 等),API 不可用时自动降级到本地关键词分析——始终可用。
|
||||
Dehydration and auto-tagging prefer a cheap LLM API (DeepSeek etc.). When the API is unavailable, it degrades to local keyword analysis — always functional.
|
||||
|
||||
## 边界说明 / Design boundaries
|
||||
|
||||
官方记忆功能已经在做身份层的事了——你是谁,你有什么偏好,你们的关系是什么。那一层交给它,Ombre Brain不打算造重复的轮子。
|
||||
|
||||
Ombre Brain 的边界是时间里发生的事,不是你是谁。它记住的是:你们聊过什么,经历了什么,哪些事情还悬在那里没有解决。两层配合用,才是完整的。
|
||||
|
||||
每次新对话,Claude 从零开始——但它能从 Ombre Brain 里找回跟你有关的一切。不是重建,是接续。
|
||||
|
||||
---
|
||||
|
||||
Official memory already handles the identity layer — who you are, what you prefer, what your relationship is. That layer belongs there. Ombre Brain isn't trying to duplicate it.
|
||||
|
||||
Ombre Brain's boundary is *what happened in time*, not *who you are*. It holds conversations, experiences, unresolved things. The two layers together are what make it feel complete.
|
||||
|
||||
Each new conversation starts fresh — but Claude can reach back through Ombre Brain and find everything that happened between you. Not a rebuild. A continuation.
|
||||
|
||||
## 架构 / Architecture
|
||||
|
||||
```
|
||||
Claude ←→ MCP Protocol ←→ server.py
|
||||
│
|
||||
┌───────────────┼───────────────┐
|
||||
│ │ │
|
||||
bucket_manager dehydrator decay_engine
|
||||
(CRUD + 搜索) (压缩 + 打标) (遗忘曲线)
|
||||
│
|
||||
Obsidian Vault (Markdown files)
|
||||
```
|
||||
|
||||
5 个 MCP 工具 / 5 MCP tools:
|
||||
|
||||
| 工具 Tool | 作用 Purpose |
|
||||
|-----------|-------------|
|
||||
| `breath` | 浮现或检索记忆。无参数=推送未解决记忆;有参数=关键词+情感检索 / Surface or search memories |
|
||||
| `hold` | 存储单条记忆,自动打标+合并相似桶 / Store a single memory with auto-tagging |
|
||||
| `grow` | 日记归档,自动拆分长内容为多个记忆桶 / Diary digest, auto-split into multiple buckets |
|
||||
| `trace` | 修改元数据、标记已解决、删除 / Modify metadata, mark resolved, delete |
|
||||
| `pulse` | 系统状态 + 所有记忆桶列表 / System status + bucket listing |
|
||||
|
||||
## 安装 / Setup
|
||||
|
||||
### 环境要求 / Requirements
|
||||
|
||||
- Python 3.11+
|
||||
- 一个 Obsidian Vault(可选,不用也行,会在项目目录下自建 `buckets/`)
|
||||
An Obsidian vault (optional — without one, it uses a local `buckets/` directory)
|
||||
|
||||
### 步骤 / Steps
|
||||
|
||||
```bash
|
||||
git clone https://github.com/P0lar1zzZ/Ombre-Brain.git
|
||||
cd Ombre-Brain
|
||||
|
||||
python -m venv .venv
|
||||
source .venv/bin/activate # Windows: .venv\Scripts\activate
|
||||
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
复制配置文件并按需修改 / Copy config and edit as needed:
|
||||
|
||||
```bash
|
||||
cp config.example.yaml config.yaml
|
||||
```
|
||||
|
||||
如果你要用 API 做脱水压缩和自动打标(推荐,效果好很多),设置环境变量:
|
||||
If you want API-powered dehydration and tagging (recommended, much better quality):
|
||||
|
||||
```bash
|
||||
export OMBRE_API_KEY="your-api-key"
|
||||
```
|
||||
|
||||
支持任何 OpenAI 兼容 API。在 `config.yaml` 里改 `base_url` 和 `model` 就行。
|
||||
Supports any OpenAI-compatible API. Just change `base_url` and `model` in `config.yaml`.
|
||||
|
||||
### 接入 Claude Desktop / Connect to Claude Desktop
|
||||
|
||||
在 Claude Desktop 配置文件中添加(macOS: `~/Library/Application Support/Claude/claude_desktop_config.json`):
|
||||
|
||||
Add to your Claude Desktop config:
|
||||
|
||||
```json
|
||||
{
|
||||
"mcpServers": {
|
||||
"ombre-brain": {
|
||||
"command": "python",
|
||||
"args": ["/path/to/Ombre-Brain/server.py"],
|
||||
"env": {
|
||||
"OMBRE_API_KEY": "your-api-key"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### 接入 Claude.ai (远程) / Connect to Claude.ai (remote)
|
||||
|
||||
需要 HTTP 传输 + 隧道。可以用 Docker:
|
||||
Requires HTTP transport + tunnel. Docker setup:
|
||||
|
||||
```bash
|
||||
echo "OMBRE_API_KEY=your-api-key" > .env
|
||||
docker-compose up -d
|
||||
```
|
||||
|
||||
`docker-compose.yml` 里配好了 Cloudflare Tunnel。你需要自己在 `~/.cloudflared/` 下放凭证和路由配置。
|
||||
The `docker-compose.yml` includes Cloudflare Tunnel. You'll need your own credentials under `~/.cloudflared/`.
|
||||
|
||||
### 指向 Obsidian / Point to Obsidian
|
||||
|
||||
在 `config.yaml` 里设置 `buckets_dir`:
|
||||
Set `buckets_dir` in `config.yaml`:
|
||||
|
||||
```yaml
|
||||
buckets_dir: "/path/to/your/Obsidian Vault/Ombre Brain"
|
||||
```
|
||||
|
||||
不设的话,默认用项目目录下的 `buckets/`。
|
||||
If not set, defaults to `buckets/` in the project directory.
|
||||
|
||||
## 配置 / Configuration
|
||||
|
||||
所有参数在 `config.yaml`(从 `config.example.yaml` 复制)。关键的几个:
|
||||
All parameters in `config.yaml` (copy from `config.example.yaml`). Key ones:
|
||||
|
||||
| 参数 Parameter | 说明 Description | 默认 Default |
|
||||
|---|---|---|
|
||||
| `transport` | `stdio`(本地)/ `streamable-http`(远程)| `stdio` |
|
||||
| `buckets_dir` | 记忆桶存储路径 / Bucket storage path | `./buckets/` |
|
||||
| `dehydration.model` | 脱水用的 LLM 模型 / LLM model for dehydration | `deepseek-chat` |
|
||||
| `dehydration.base_url` | API 地址 / API endpoint | `https://api.deepseek.com/v1` |
|
||||
| `decay.lambda` | 衰减速率,越大越快忘 / Decay rate | `0.05` |
|
||||
| `decay.threshold` | 归档阈值 / Archive threshold | `0.3` |
|
||||
| `merge_threshold` | 合并相似度阈值 (0-100) / Merge similarity | `75` |
|
||||
|
||||
敏感配置用环境变量:
|
||||
Sensitive config via env vars:
|
||||
- `OMBRE_API_KEY` — LLM API 密钥
|
||||
- `OMBRE_TRANSPORT` — 覆盖传输方式
|
||||
- `OMBRE_BUCKETS_DIR` — 覆盖存储路径
|
||||
|
||||
## 衰减公式 / Decay Formula
|
||||
|
||||
$$Score = Importance \times activation\_count^{0.3} \times e^{-\lambda \times days} \times (base + arousal \times boost)$$
|
||||
|
||||
- `importance`: 1-10,记忆重要性 / memory importance
|
||||
- `activation_count`: 被检索的次数,越常被想起衰减越慢 / retrieval count; more recalls = slower decay
|
||||
- `days`: 距上次激活的天数 / days since last activation
|
||||
- `arousal`: 唤醒度,越强烈的记忆越难忘 / arousal; intense memories are harder to forget
|
||||
- 已解决的记忆权重降到 5%,沉底等被关键词唤醒 / resolved memories drop to 5%, sink until keyword-triggered
|
||||
|
||||
## 给 Claude 的使用指南 / Usage Guide for Claude
|
||||
|
||||
`CLAUDE_PROMPT.md` 是写给 Claude 看的使用说明。放到你的 system prompt 或 custom instructions 里就行。
|
||||
|
||||
`CLAUDE_PROMPT.md` is the usage guide written for Claude. Put it in your system prompt or custom instructions.
|
||||
|
||||
## 工具脚本 / Utility Scripts
|
||||
|
||||
| 脚本 Script | 用途 Purpose |
|
||||
|---|---|
|
||||
| `write_memory.py` | 手动写入记忆,绕过 MCP / Manually write memories, bypass MCP |
|
||||
| `migrate_to_domains.py` | 迁移平铺文件到域子目录 / Migrate flat files to domain subdirs |
|
||||
| `reclassify_domains.py` | 基于关键词重分类 / Reclassify by keywords |
|
||||
| `reclassify_api.py` | 用 API 重打标未分类桶 / Re-tag uncategorized buckets via API |
|
||||
| `test_smoke.py` | 冒烟测试 / Smoke test |
|
||||
|
||||
## License
|
||||
|
||||
MIT
|
||||
755
backup_20260405_2124/bucket_manager.py
Normal file
755
backup_20260405_2124/bucket_manager.py
Normal file
@@ -0,0 +1,755 @@
|
||||
# ============================================================
|
||||
# Module: Memory Bucket Manager (bucket_manager.py)
|
||||
# 模块:记忆桶管理器
|
||||
#
|
||||
# CRUD operations, multi-dimensional index search, activation updates
|
||||
# for memory buckets.
|
||||
# 记忆桶的增删改查、多维索引搜索、激活更新。
|
||||
#
|
||||
# Core design:
|
||||
# 核心逻辑:
|
||||
# - Each bucket = one Markdown file (YAML frontmatter + body)
|
||||
# 每个记忆桶 = 一个 Markdown 文件
|
||||
# - Storage by type: permanent / dynamic / archive
|
||||
# 存储按类型分目录
|
||||
# - Multi-dimensional soft index: domain + valence/arousal + fuzzy text
|
||||
# 多维软索引:主题域 + 情感坐标 + 文本模糊匹配
|
||||
# - Search strategy: domain pre-filter → weighted multi-dim ranking
|
||||
# 搜索策略:主题域预筛 → 多维加权精排
|
||||
# - Emotion coordinates based on Russell circumplex model:
|
||||
# 情感坐标基于环形情感模型(Russell circumplex):
|
||||
# valence (0~1): 0=negative → 1=positive
|
||||
# arousal (0~1): 0=calm → 1=excited
|
||||
#
|
||||
# Depended on by: server.py, decay_engine.py
|
||||
# 被谁依赖:server.py, decay_engine.py
|
||||
# ============================================================
|
||||
|
||||
import os
|
||||
import math
|
||||
import logging
|
||||
import re
|
||||
import shutil
|
||||
from collections import Counter
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import frontmatter
|
||||
import jieba
|
||||
from rapidfuzz import fuzz
|
||||
|
||||
from utils import generate_bucket_id, sanitize_name, safe_path, now_iso
|
||||
|
||||
logger = logging.getLogger("ombre_brain.bucket")
|
||||
|
||||
|
||||
class BucketManager:
|
||||
"""
|
||||
Memory bucket manager — entry point for all bucket CRUD operations.
|
||||
Buckets are stored as Markdown files with YAML frontmatter for metadata
|
||||
and body for content. Natively compatible with Obsidian browsing/editing.
|
||||
记忆桶管理器 —— 所有桶的 CRUD 操作入口。
|
||||
桶以 Markdown 文件存储,YAML frontmatter 存元数据,正文存内容。
|
||||
天然兼容 Obsidian 直接浏览和编辑。
|
||||
"""
|
||||
|
||||
def __init__(self, config: dict):
|
||||
# --- Read storage paths from config / 从配置中读取存储路径 ---
|
||||
self.base_dir = config["buckets_dir"]
|
||||
self.permanent_dir = os.path.join(self.base_dir, "permanent")
|
||||
self.dynamic_dir = os.path.join(self.base_dir, "dynamic")
|
||||
self.archive_dir = os.path.join(self.base_dir, "archive")
|
||||
self.fuzzy_threshold = config.get("matching", {}).get("fuzzy_threshold", 50)
|
||||
self.max_results = config.get("matching", {}).get("max_results", 5)
|
||||
|
||||
# --- Wikilink config / 双链配置 ---
|
||||
wikilink_cfg = config.get("wikilink", {})
|
||||
self.wikilink_enabled = wikilink_cfg.get("enabled", True)
|
||||
self.wikilink_use_tags = wikilink_cfg.get("use_tags", False)
|
||||
self.wikilink_use_domain = wikilink_cfg.get("use_domain", True)
|
||||
self.wikilink_use_auto_keywords = wikilink_cfg.get("use_auto_keywords", True)
|
||||
self.wikilink_auto_top_k = wikilink_cfg.get("auto_top_k", 8)
|
||||
self.wikilink_min_len = wikilink_cfg.get("min_keyword_len", 2)
|
||||
self.wikilink_exclude_keywords = set(wikilink_cfg.get("exclude_keywords", []))
|
||||
self.wikilink_stopwords = {
|
||||
"的", "了", "在", "是", "我", "有", "和", "就", "不", "人",
|
||||
"都", "一个", "上", "也", "很", "到", "说", "要", "去",
|
||||
"你", "会", "着", "没有", "看", "好", "自己", "这", "他", "她",
|
||||
"我们", "你们", "他们", "然后", "今天", "昨天", "明天", "一下",
|
||||
"the", "and", "for", "are", "but", "not", "you", "all", "can",
|
||||
"had", "her", "was", "one", "our", "out", "has", "have", "with",
|
||||
"this", "that", "from", "they", "been", "said", "will", "each",
|
||||
}
|
||||
self.wikilink_stopwords |= {w.lower() for w in self.wikilink_exclude_keywords}
|
||||
|
||||
# --- Search scoring weights / 检索权重配置 ---
|
||||
scoring = config.get("scoring_weights", {})
|
||||
self.w_topic = scoring.get("topic_relevance", 4.0)
|
||||
self.w_emotion = scoring.get("emotion_resonance", 2.0)
|
||||
self.w_time = scoring.get("time_proximity", 1.5)
|
||||
self.w_importance = scoring.get("importance", 1.0)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Create a new bucket
|
||||
# 创建新桶
|
||||
# Write content and metadata into a .md file
|
||||
# 将内容和元数据写入一个 .md 文件
|
||||
# ---------------------------------------------------------
|
||||
async def create(
|
||||
self,
|
||||
content: str,
|
||||
tags: list[str] = None,
|
||||
importance: int = 5,
|
||||
domain: list[str] = None,
|
||||
valence: float = 0.5,
|
||||
arousal: float = 0.3,
|
||||
bucket_type: str = "dynamic",
|
||||
name: str = None,
|
||||
) -> str:
|
||||
"""
|
||||
Create a new memory bucket, return bucket ID.
|
||||
创建一个新的记忆桶,返回桶 ID。
|
||||
"""
|
||||
bucket_id = generate_bucket_id()
|
||||
bucket_name = sanitize_name(name) if name else bucket_id
|
||||
domain = domain or ["未分类"]
|
||||
tags = tags or []
|
||||
linked_content = self._apply_wikilinks(content, tags, domain, bucket_name)
|
||||
|
||||
# --- Build YAML frontmatter metadata / 构建元数据 ---
|
||||
metadata = {
|
||||
"id": bucket_id,
|
||||
"name": bucket_name,
|
||||
"tags": tags,
|
||||
"domain": domain,
|
||||
"valence": max(0.0, min(1.0, valence)),
|
||||
"arousal": max(0.0, min(1.0, arousal)),
|
||||
"importance": max(1, min(10, importance)),
|
||||
"type": bucket_type,
|
||||
"created": now_iso(),
|
||||
"last_active": now_iso(),
|
||||
"activation_count": 1,
|
||||
}
|
||||
|
||||
# --- Assemble Markdown file (frontmatter + body) ---
|
||||
# --- 组装 Markdown 文件 ---
|
||||
post = frontmatter.Post(linked_content, **metadata)
|
||||
|
||||
# --- Choose directory by type + primary domain ---
|
||||
# --- 按类型 + 主题域选择存储目录 ---
|
||||
type_dir = self.permanent_dir if bucket_type == "permanent" else self.dynamic_dir
|
||||
primary_domain = sanitize_name(domain[0]) if domain else "未分类"
|
||||
target_dir = os.path.join(type_dir, primary_domain)
|
||||
os.makedirs(target_dir, exist_ok=True)
|
||||
|
||||
# --- Filename: readable_name_bucketID.md (Obsidian friendly) ---
|
||||
# --- 文件名:可读名称_桶ID.md ---
|
||||
if bucket_name and bucket_name != bucket_id:
|
||||
filename = f"{bucket_name}_{bucket_id}.md"
|
||||
else:
|
||||
filename = f"{bucket_id}.md"
|
||||
file_path = safe_path(target_dir, filename)
|
||||
|
||||
try:
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
f.write(frontmatter.dumps(post))
|
||||
except OSError as e:
|
||||
logger.error(f"Failed to write bucket file / 写入桶文件失败: {file_path}: {e}")
|
||||
raise
|
||||
|
||||
logger.info(
|
||||
f"Created bucket / 创建记忆桶: {bucket_id} ({bucket_name}) → {primary_domain}/"
|
||||
)
|
||||
return bucket_id
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Read bucket content
|
||||
# 读取桶内容
|
||||
# Returns {"id", "metadata", "content", "path"} or None
|
||||
# ---------------------------------------------------------
|
||||
async def get(self, bucket_id: str) -> Optional[dict]:
|
||||
"""
|
||||
Read a single bucket by ID.
|
||||
根据 ID 读取单个桶。
|
||||
"""
|
||||
if not bucket_id or not isinstance(bucket_id, str):
|
||||
return None
|
||||
file_path = self._find_bucket_file(bucket_id)
|
||||
if not file_path:
|
||||
return None
|
||||
return self._load_bucket(file_path)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Update bucket
|
||||
# 更新桶
|
||||
# Supports: content, tags, importance, valence, arousal, name, resolved
|
||||
# ---------------------------------------------------------
|
||||
async def update(self, bucket_id: str, **kwargs) -> bool:
|
||||
"""
|
||||
Update bucket content or metadata fields.
|
||||
更新桶的内容或元数据字段。
|
||||
"""
|
||||
file_path = self._find_bucket_file(bucket_id)
|
||||
if not file_path:
|
||||
return False
|
||||
|
||||
try:
|
||||
post = frontmatter.load(file_path)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load bucket for update / 加载桶失败: {file_path}: {e}")
|
||||
return False
|
||||
|
||||
# --- Update only fields that were passed in / 只改传入的字段 ---
|
||||
if "content" in kwargs:
|
||||
next_tags = kwargs.get("tags", post.get("tags", []))
|
||||
next_domain = kwargs.get("domain", post.get("domain", []))
|
||||
next_name = kwargs.get("name", post.get("name", ""))
|
||||
post.content = self._apply_wikilinks(
|
||||
kwargs["content"],
|
||||
next_tags,
|
||||
next_domain,
|
||||
next_name,
|
||||
)
|
||||
if "tags" in kwargs:
|
||||
post["tags"] = kwargs["tags"]
|
||||
if "importance" in kwargs:
|
||||
post["importance"] = max(1, min(10, int(kwargs["importance"])))
|
||||
if "domain" in kwargs:
|
||||
post["domain"] = kwargs["domain"]
|
||||
if "valence" in kwargs:
|
||||
post["valence"] = max(0.0, min(1.0, float(kwargs["valence"])))
|
||||
if "arousal" in kwargs:
|
||||
post["arousal"] = max(0.0, min(1.0, float(kwargs["arousal"])))
|
||||
if "name" in kwargs:
|
||||
post["name"] = sanitize_name(kwargs["name"])
|
||||
if "resolved" in kwargs:
|
||||
post["resolved"] = bool(kwargs["resolved"])
|
||||
|
||||
# --- Auto-refresh activation time / 自动刷新激活时间 ---
|
||||
post["last_active"] = now_iso()
|
||||
|
||||
try:
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
f.write(frontmatter.dumps(post))
|
||||
except OSError as e:
|
||||
logger.error(f"Failed to write bucket update / 写入桶更新失败: {file_path}: {e}")
|
||||
return False
|
||||
|
||||
logger.info(f"Updated bucket / 更新记忆桶: {bucket_id}")
|
||||
return True
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Wikilink injection
|
||||
# 自动添加 Obsidian 双链
|
||||
# ---------------------------------------------------------
|
||||
def _apply_wikilinks(
|
||||
self,
|
||||
content: str,
|
||||
tags: list[str],
|
||||
domain: list[str],
|
||||
name: str,
|
||||
) -> str:
|
||||
"""
|
||||
Auto-inject Obsidian wikilinks, avoiding double-wrapping existing [[...]].
|
||||
自动添加 Obsidian 双链,避免重复包裹已有 [[...]]。
|
||||
"""
|
||||
if not self.wikilink_enabled or not content:
|
||||
return content
|
||||
|
||||
keywords = self._collect_wikilink_keywords(content, tags, domain, name)
|
||||
if not keywords:
|
||||
return content
|
||||
|
||||
# Split on existing wikilinks to avoid wrapping them again
|
||||
# 按已有双链切分,避免重复包裹
|
||||
segments = re.split(r"(\[\[[^\]]+\]\])", content)
|
||||
pattern = re.compile("|".join(re.escape(kw) for kw in keywords))
|
||||
for i, segment in enumerate(segments):
|
||||
if segment.startswith("[[") and segment.endswith("]]"):
|
||||
continue
|
||||
updated = pattern.sub(lambda m: f"[[{m.group(0)}]]", segment)
|
||||
segments[i] = updated
|
||||
return "".join(segments)
|
||||
|
||||
def _collect_wikilink_keywords(
|
||||
self,
|
||||
content: str,
|
||||
tags: list[str],
|
||||
domain: list[str],
|
||||
name: str,
|
||||
) -> list[str]:
|
||||
"""
|
||||
Collect candidate keywords from tags/domain/auto-extraction.
|
||||
汇总候选关键词:可选 tags/domain + 自动提词。
|
||||
"""
|
||||
candidates = []
|
||||
|
||||
if self.wikilink_use_tags:
|
||||
candidates.extend(tags or [])
|
||||
if self.wikilink_use_domain:
|
||||
candidates.extend(domain or [])
|
||||
if name:
|
||||
candidates.append(name)
|
||||
if self.wikilink_use_auto_keywords:
|
||||
candidates.extend(self._extract_auto_keywords(content))
|
||||
|
||||
return self._normalize_keywords(candidates)
|
||||
|
||||
def _normalize_keywords(self, keywords: list[str]) -> list[str]:
|
||||
"""
|
||||
Deduplicate and sort by length (longer first to avoid short words
|
||||
breaking long ones during replacement).
|
||||
去重并按长度排序,优先替换长词。
|
||||
"""
|
||||
if not keywords:
|
||||
return []
|
||||
|
||||
seen = set()
|
||||
cleaned = []
|
||||
for keyword in keywords:
|
||||
if not isinstance(keyword, str):
|
||||
continue
|
||||
kw = keyword.strip()
|
||||
if len(kw) < self.wikilink_min_len:
|
||||
continue
|
||||
if kw in self.wikilink_exclude_keywords:
|
||||
continue
|
||||
if kw.lower() in self.wikilink_stopwords:
|
||||
continue
|
||||
if kw in seen:
|
||||
continue
|
||||
seen.add(kw)
|
||||
cleaned.append(kw)
|
||||
|
||||
return sorted(cleaned, key=len, reverse=True)
|
||||
|
||||
def _extract_auto_keywords(self, content: str) -> list[str]:
|
||||
"""
|
||||
Auto-extract keywords from body text, prioritizing high-frequency words.
|
||||
从正文自动提词,优先高频词。
|
||||
"""
|
||||
if not content:
|
||||
return []
|
||||
|
||||
try:
|
||||
zh_words = [w.strip() for w in jieba.lcut(content) if w.strip()]
|
||||
except Exception:
|
||||
zh_words = []
|
||||
en_words = re.findall(r"[A-Za-z][A-Za-z0-9_-]{2,20}", content)
|
||||
|
||||
# Chinese bigrams / 中文双词组合
|
||||
zh_bigrams = []
|
||||
for i in range(len(zh_words) - 1):
|
||||
left = zh_words[i]
|
||||
right = zh_words[i + 1]
|
||||
if len(left) < self.wikilink_min_len or len(right) < self.wikilink_min_len:
|
||||
continue
|
||||
if not re.fullmatch(r"[\u4e00-\u9fff]+", left + right):
|
||||
continue
|
||||
if len(left + right) > 8:
|
||||
continue
|
||||
zh_bigrams.append(left + right)
|
||||
|
||||
merged = []
|
||||
for word in zh_words + zh_bigrams + en_words:
|
||||
if len(word) < self.wikilink_min_len:
|
||||
continue
|
||||
if re.fullmatch(r"\d+", word):
|
||||
continue
|
||||
if word.lower() in self.wikilink_stopwords:
|
||||
continue
|
||||
merged.append(word)
|
||||
|
||||
if not merged:
|
||||
return []
|
||||
|
||||
counter = Counter(merged)
|
||||
return [w for w, _ in counter.most_common(self.wikilink_auto_top_k)]
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Delete bucket
|
||||
# 删除桶
|
||||
# ---------------------------------------------------------
|
||||
async def delete(self, bucket_id: str) -> bool:
|
||||
"""
|
||||
Delete a memory bucket file.
|
||||
删除指定的记忆桶文件。
|
||||
"""
|
||||
file_path = self._find_bucket_file(bucket_id)
|
||||
if not file_path:
|
||||
return False
|
||||
|
||||
try:
|
||||
os.remove(file_path)
|
||||
except OSError as e:
|
||||
logger.error(f"Failed to delete bucket file / 删除桶文件失败: {file_path}: {e}")
|
||||
return False
|
||||
|
||||
logger.info(f"Deleted bucket / 删除记忆桶: {bucket_id}")
|
||||
return True
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Touch bucket (refresh activation time + increment count)
|
||||
# 触碰桶(刷新激活时间 + 累加激活次数)
|
||||
# Called on every recall hit; affects decay score.
|
||||
# 每次检索命中时调用,影响衰减得分。
|
||||
# ---------------------------------------------------------
|
||||
async def touch(self, bucket_id: str) -> None:
|
||||
"""
|
||||
Update a bucket's last activation time and count.
|
||||
更新桶的最后激活时间和激活次数。
|
||||
"""
|
||||
file_path = self._find_bucket_file(bucket_id)
|
||||
if not file_path:
|
||||
return
|
||||
|
||||
try:
|
||||
post = frontmatter.load(file_path)
|
||||
post["last_active"] = now_iso()
|
||||
post["activation_count"] = post.get("activation_count", 0) + 1
|
||||
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
f.write(frontmatter.dumps(post))
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to touch bucket / 触碰桶失败: {bucket_id}: {e}")
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Multi-dimensional search (core feature)
|
||||
# 多维搜索(核心功能)
|
||||
#
|
||||
# Strategy: domain pre-filter → weighted multi-dim ranking
|
||||
# 策略:主题域预筛 → 多维加权精排
|
||||
#
|
||||
# Ranking formula:
|
||||
# total = topic(×w_topic) + emotion(×w_emotion)
|
||||
# + time(×w_time) + importance(×w_importance)
|
||||
#
|
||||
# Per-dimension scores (normalized to 0~1):
|
||||
# topic = rapidfuzz weighted match (name/tags/domain/body)
|
||||
# emotion = 1 - Euclidean distance (query v/a vs bucket v/a)
|
||||
# time = e^(-0.02 × days) (recent memories first)
|
||||
# importance = importance / 10
|
||||
# ---------------------------------------------------------
|
||||
async def search(
|
||||
self,
|
||||
query: str,
|
||||
limit: int = None,
|
||||
domain_filter: list[str] = None,
|
||||
query_valence: float = None,
|
||||
query_arousal: float = None,
|
||||
) -> list[dict]:
|
||||
"""
|
||||
Multi-dimensional indexed search for memory buckets.
|
||||
多维索引搜索记忆桶。
|
||||
|
||||
domain_filter: pre-filter by domain (None = search all)
|
||||
query_valence/arousal: emotion coordinates for resonance scoring
|
||||
"""
|
||||
if not query or not query.strip():
|
||||
return []
|
||||
|
||||
limit = limit or self.max_results
|
||||
all_buckets = await self.list_all(include_archive=False)
|
||||
|
||||
if not all_buckets:
|
||||
return []
|
||||
|
||||
# --- Layer 1: domain pre-filter (fast scope reduction) ---
|
||||
# --- 第一层:主题域预筛(快速缩小范围)---
|
||||
if domain_filter:
|
||||
filter_set = {d.lower() for d in domain_filter}
|
||||
candidates = [
|
||||
b for b in all_buckets
|
||||
if {d.lower() for d in b["metadata"].get("domain", [])} & filter_set
|
||||
]
|
||||
# Fall back to full search if pre-filter yields nothing
|
||||
# 预筛为空则回退全量搜索
|
||||
if not candidates:
|
||||
candidates = all_buckets
|
||||
else:
|
||||
candidates = all_buckets
|
||||
|
||||
# --- Layer 2: weighted multi-dim ranking ---
|
||||
# --- 第二层:多维加权精排 ---
|
||||
scored = []
|
||||
for bucket in candidates:
|
||||
meta = bucket.get("metadata", {})
|
||||
|
||||
try:
|
||||
# Dim 1: topic relevance (fuzzy text, 0~1)
|
||||
topic_score = self._calc_topic_score(query, bucket)
|
||||
|
||||
# Dim 2: emotion resonance (coordinate distance, 0~1)
|
||||
emotion_score = self._calc_emotion_score(
|
||||
query_valence, query_arousal, meta
|
||||
)
|
||||
|
||||
# Dim 3: time proximity (exponential decay, 0~1)
|
||||
time_score = self._calc_time_score(meta)
|
||||
|
||||
# Dim 4: importance (direct normalization)
|
||||
importance_score = max(1, min(10, int(meta.get("importance", 5)))) / 10.0
|
||||
|
||||
# --- Weighted sum / 加权求和 ---
|
||||
total = (
|
||||
topic_score * self.w_topic
|
||||
+ emotion_score * self.w_emotion
|
||||
+ time_score * self.w_time
|
||||
+ importance_score * self.w_importance
|
||||
)
|
||||
# Normalize to 0~100 for readability
|
||||
weight_sum = self.w_topic + self.w_emotion + self.w_time + self.w_importance
|
||||
normalized = (total / weight_sum) * 100 if weight_sum > 0 else 0
|
||||
|
||||
# Resolved buckets get ranking penalty (but still reachable by keyword)
|
||||
# 已解决的桶降权排序(但仍可被关键词激活)
|
||||
if meta.get("resolved", False):
|
||||
normalized *= 0.3
|
||||
|
||||
if normalized >= self.fuzzy_threshold:
|
||||
bucket["score"] = round(normalized, 2)
|
||||
scored.append(bucket)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Scoring failed for bucket {bucket.get('id', '?')} / "
|
||||
f"桶评分失败: {e}"
|
||||
)
|
||||
continue
|
||||
|
||||
scored.sort(key=lambda x: x["score"], reverse=True)
|
||||
return scored[:limit]
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Topic relevance sub-score:
|
||||
# name(×3) + domain(×2.5) + tags(×2) + body(×1)
|
||||
# 文本相关性子分:桶名(×3) + 主题域(×2.5) + 标签(×2) + 正文(×1)
|
||||
# ---------------------------------------------------------
|
||||
def _calc_topic_score(self, query: str, bucket: dict) -> float:
|
||||
"""
|
||||
Calculate text dimension relevance score (0~1).
|
||||
计算文本维度的相关性得分。
|
||||
"""
|
||||
meta = bucket.get("metadata", {})
|
||||
|
||||
name_score = fuzz.partial_ratio(query, meta.get("name", "")) * 3
|
||||
domain_score = (
|
||||
max(
|
||||
(fuzz.partial_ratio(query, d) for d in meta.get("domain", [])),
|
||||
default=0,
|
||||
)
|
||||
* 2.5
|
||||
)
|
||||
tag_score = (
|
||||
max(
|
||||
(fuzz.partial_ratio(query, tag) for tag in meta.get("tags", [])),
|
||||
default=0,
|
||||
)
|
||||
* 2
|
||||
)
|
||||
content_score = fuzz.partial_ratio(query, bucket.get("content", "")[:500]) * 1
|
||||
|
||||
return (name_score + domain_score + tag_score + content_score) / (100 * 8.5)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Emotion resonance sub-score:
|
||||
# Based on Russell circumplex Euclidean distance
|
||||
# 情感共鸣子分:基于环形情感模型的欧氏距离
|
||||
# No emotion in query → neutral 0.5 (doesn't affect ranking)
|
||||
# ---------------------------------------------------------
|
||||
def _calc_emotion_score(
|
||||
self, q_valence: float, q_arousal: float, meta: dict
|
||||
) -> float:
|
||||
"""
|
||||
Calculate emotion resonance score (0~1, closer = higher).
|
||||
计算情感共鸣度(0~1,越近越高)。
|
||||
"""
|
||||
if q_valence is None or q_arousal is None:
|
||||
return 0.5 # No emotion coordinates → neutral / 无情感坐标时给中性分
|
||||
|
||||
try:
|
||||
b_valence = float(meta.get("valence", 0.5))
|
||||
b_arousal = float(meta.get("arousal", 0.3))
|
||||
except (ValueError, TypeError):
|
||||
return 0.5
|
||||
|
||||
# Euclidean distance, max sqrt(2) ≈ 1.414
|
||||
dist = math.sqrt((q_valence - b_valence) ** 2 + (q_arousal - b_arousal) ** 2)
|
||||
return max(0.0, 1.0 - dist / 1.414)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Time proximity sub-score:
|
||||
# More recent activation → higher score
|
||||
# 时间亲近子分:距上次激活越近分越高
|
||||
# ---------------------------------------------------------
|
||||
def _calc_time_score(self, meta: dict) -> float:
|
||||
"""
|
||||
Calculate time proximity score (0~1, more recent = higher).
|
||||
计算时间亲近度。
|
||||
"""
|
||||
last_active_str = meta.get("last_active", meta.get("created", ""))
|
||||
try:
|
||||
last_active = datetime.fromisoformat(str(last_active_str))
|
||||
days = max(0.0, (datetime.now() - last_active).total_seconds() / 86400)
|
||||
except (ValueError, TypeError):
|
||||
days = 30
|
||||
return math.exp(-0.02 * days)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# List all buckets
|
||||
# 列出所有桶
|
||||
# ---------------------------------------------------------
|
||||
async def list_all(self, include_archive: bool = False) -> list[dict]:
|
||||
"""
|
||||
Recursively walk directories (including domain subdirs), list all buckets.
|
||||
递归遍历目录(含域子目录),列出所有记忆桶。
|
||||
"""
|
||||
buckets = []
|
||||
|
||||
dirs = [self.permanent_dir, self.dynamic_dir]
|
||||
if include_archive:
|
||||
dirs.append(self.archive_dir)
|
||||
|
||||
for dir_path in dirs:
|
||||
if not os.path.exists(dir_path):
|
||||
continue
|
||||
for root, _, files in os.walk(dir_path):
|
||||
for filename in files:
|
||||
if not filename.endswith(".md"):
|
||||
continue
|
||||
file_path = os.path.join(root, filename)
|
||||
bucket = self._load_bucket(file_path)
|
||||
if bucket:
|
||||
buckets.append(bucket)
|
||||
|
||||
return buckets
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Statistics (counts per category + total size)
|
||||
# 统计信息(各分类桶数量 + 总体积)
|
||||
# ---------------------------------------------------------
|
||||
async def get_stats(self) -> dict:
|
||||
"""
|
||||
Return memory bucket statistics (including domain subdirs).
|
||||
返回记忆桶的统计数据。
|
||||
"""
|
||||
stats = {
|
||||
"permanent_count": 0,
|
||||
"dynamic_count": 0,
|
||||
"archive_count": 0,
|
||||
"total_size_kb": 0.0,
|
||||
"domains": {},
|
||||
}
|
||||
|
||||
for subdir, key in [
|
||||
(self.permanent_dir, "permanent_count"),
|
||||
(self.dynamic_dir, "dynamic_count"),
|
||||
(self.archive_dir, "archive_count"),
|
||||
]:
|
||||
if not os.path.exists(subdir):
|
||||
continue
|
||||
for root, _, files in os.walk(subdir):
|
||||
for f in files:
|
||||
if f.endswith(".md"):
|
||||
stats[key] += 1
|
||||
fpath = os.path.join(root, f)
|
||||
try:
|
||||
stats["total_size_kb"] += os.path.getsize(fpath) / 1024
|
||||
except OSError:
|
||||
pass
|
||||
# Per-domain counts / 每个域的桶数量
|
||||
domain_name = os.path.basename(root)
|
||||
if domain_name != os.path.basename(subdir):
|
||||
stats["domains"][domain_name] = stats["domains"].get(domain_name, 0) + 1
|
||||
|
||||
return stats
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Archive bucket (move from permanent/dynamic into archive)
|
||||
# 归档桶(从 permanent/dynamic 移入 archive)
|
||||
# Called by decay engine to simulate "forgetting"
|
||||
# 由衰减引擎调用,模拟"遗忘"
|
||||
# ---------------------------------------------------------
|
||||
async def archive(self, bucket_id: str) -> bool:
|
||||
"""
|
||||
Move a bucket into the archive directory (preserving domain subdirs).
|
||||
将指定桶移入归档目录(保留域子目录结构)。
|
||||
"""
|
||||
file_path = self._find_bucket_file(bucket_id)
|
||||
if not file_path:
|
||||
return False
|
||||
|
||||
try:
|
||||
# Read once, get domain info and update type / 一次性读取
|
||||
post = frontmatter.load(file_path)
|
||||
domain = post.get("domain", ["未分类"])
|
||||
primary_domain = sanitize_name(domain[0]) if domain else "未分类"
|
||||
archive_subdir = os.path.join(self.archive_dir, primary_domain)
|
||||
os.makedirs(archive_subdir, exist_ok=True)
|
||||
|
||||
dest = safe_path(archive_subdir, os.path.basename(file_path))
|
||||
|
||||
# Update type marker then move file / 更新类型标记后移动文件
|
||||
post["type"] = "archived"
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
f.write(frontmatter.dumps(post))
|
||||
|
||||
# Use shutil.move for cross-filesystem safety
|
||||
# 使用 shutil.move 保证跨文件系统安全
|
||||
shutil.move(file_path, str(dest))
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to archive bucket / 归档桶失败: {bucket_id}: {e}"
|
||||
)
|
||||
return False
|
||||
|
||||
logger.info(f"Archived bucket / 归档记忆桶: {bucket_id} → archive/{primary_domain}/")
|
||||
return True
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Internal: find bucket file across all three directories
|
||||
# 内部:在三个目录中查找桶文件
|
||||
# ---------------------------------------------------------
|
||||
def _find_bucket_file(self, bucket_id: str) -> Optional[str]:
|
||||
"""
|
||||
Recursively search permanent/dynamic/archive for a bucket file
|
||||
matching the given ID.
|
||||
在 permanent/dynamic/archive 中递归查找指定 ID 的桶文件。
|
||||
"""
|
||||
if not bucket_id:
|
||||
return None
|
||||
for dir_path in [self.permanent_dir, self.dynamic_dir, self.archive_dir]:
|
||||
if not os.path.exists(dir_path):
|
||||
continue
|
||||
for root, _, files in os.walk(dir_path):
|
||||
for fname in files:
|
||||
if not fname.endswith(".md"):
|
||||
continue
|
||||
# Match by exact ID segment in filename
|
||||
# 通过文件名中的 ID 片段精确匹配
|
||||
if bucket_id in fname:
|
||||
return os.path.join(root, fname)
|
||||
return None
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Internal: load bucket data from .md file
|
||||
# 内部:从 .md 文件加载桶数据
|
||||
# ---------------------------------------------------------
|
||||
def _load_bucket(self, file_path: str) -> Optional[dict]:
|
||||
"""
|
||||
Parse a Markdown file and return structured bucket data.
|
||||
解析 Markdown 文件,返回桶的结构化数据。
|
||||
"""
|
||||
try:
|
||||
post = frontmatter.load(file_path)
|
||||
return {
|
||||
"id": post.get("id", Path(file_path).stem),
|
||||
"metadata": dict(post.metadata),
|
||||
"content": post.content,
|
||||
"path": file_path,
|
||||
}
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Failed to load bucket file / 加载桶文件失败: {file_path}: {e}"
|
||||
)
|
||||
return None
|
||||
242
backup_20260405_2124/decay_engine.py
Normal file
242
backup_20260405_2124/decay_engine.py
Normal file
@@ -0,0 +1,242 @@
|
||||
# ============================================================
|
||||
# Module: Memory Decay Engine (decay_engine.py)
|
||||
# 模块:记忆衰减引擎
|
||||
#
|
||||
# Simulates human forgetting curve; auto-decays inactive memories and archives them.
|
||||
# 模拟人类遗忘曲线,自动衰减不活跃记忆并归档。
|
||||
#
|
||||
# Core formula (improved Ebbinghaus + emotion coordinates):
|
||||
# 核心公式(改进版艾宾浩斯遗忘曲线 + 情感坐标):
|
||||
# Score = Importance × (activation_count^0.3) × e^(-λ×days) × emotion_weight
|
||||
#
|
||||
# Emotion weight (continuous coordinate, not discrete labels):
|
||||
# 情感权重(基于连续坐标而非离散列举):
|
||||
# emotion_weight = base + (arousal × arousal_boost)
|
||||
# Higher arousal → higher emotion weight → slower decay
|
||||
# 唤醒度越高 → 情感权重越大 → 记忆衰减越慢
|
||||
#
|
||||
# Depended on by: server.py
|
||||
# 被谁依赖:server.py
|
||||
# ============================================================
|
||||
|
||||
import math
|
||||
import asyncio
|
||||
import logging
|
||||
from datetime import datetime
|
||||
|
||||
logger = logging.getLogger("ombre_brain.decay")
|
||||
|
||||
|
||||
class DecayEngine:
|
||||
"""
|
||||
Memory decay engine — periodically scans all dynamic buckets,
|
||||
calculates decay scores, auto-archives low-activity buckets
|
||||
to simulate natural forgetting.
|
||||
记忆衰减引擎 —— 定期扫描所有动态桶,
|
||||
计算衰减得分,将低活跃桶自动归档,模拟自然遗忘。
|
||||
"""
|
||||
|
||||
def __init__(self, config: dict, bucket_mgr):
|
||||
# --- Load decay parameters / 加载衰减参数 ---
|
||||
decay_cfg = config.get("decay", {})
|
||||
self.decay_lambda = decay_cfg.get("lambda", 0.05)
|
||||
self.threshold = decay_cfg.get("threshold", 0.3)
|
||||
self.check_interval = decay_cfg.get("check_interval_hours", 24)
|
||||
|
||||
# --- Emotion weight params (continuous arousal coordinate) ---
|
||||
# --- 情感权重参数(基于连续 arousal 坐标)---
|
||||
emotion_cfg = decay_cfg.get("emotion_weights", {})
|
||||
self.emotion_base = emotion_cfg.get("base", 1.0)
|
||||
self.arousal_boost = emotion_cfg.get("arousal_boost", 0.8)
|
||||
|
||||
self.bucket_mgr = bucket_mgr
|
||||
|
||||
# --- Background task control / 后台任务控制 ---
|
||||
self._task: asyncio.Task | None = None
|
||||
self._running = False
|
||||
|
||||
@property
|
||||
def is_running(self) -> bool:
|
||||
"""Whether the decay engine is running in the background.
|
||||
衰减引擎是否正在后台运行。"""
|
||||
return self._running
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Core: calculate decay score for a single bucket
|
||||
# 核心:计算单个桶的衰减得分
|
||||
#
|
||||
# Higher score = more vivid memory; below threshold → archive
|
||||
# 得分越高 = 记忆越鲜活,低于阈值则归档
|
||||
# Permanent buckets never decay / 固化桶永远不衰减
|
||||
# ---------------------------------------------------------
|
||||
def calculate_score(self, metadata: dict) -> float:
|
||||
"""
|
||||
Calculate current activity score for a memory bucket.
|
||||
计算一个记忆桶的当前活跃度得分。
|
||||
|
||||
Formula: Score = Importance × (act_count^0.3) × e^(-λ×days) × (base + arousal×boost)
|
||||
"""
|
||||
if not isinstance(metadata, dict):
|
||||
return 0.0
|
||||
|
||||
# --- Permanent buckets never decay / 固化桶永不衰减 ---
|
||||
if metadata.get("type") == "permanent":
|
||||
return 999.0
|
||||
|
||||
importance = max(1, min(10, int(metadata.get("importance", 5))))
|
||||
activation_count = max(1, int(metadata.get("activation_count", 1)))
|
||||
|
||||
# --- Days since last activation / 距离上次激活过了多少天 ---
|
||||
last_active_str = metadata.get("last_active", metadata.get("created", ""))
|
||||
try:
|
||||
last_active = datetime.fromisoformat(str(last_active_str))
|
||||
days_since = max(0.0, (datetime.now() - last_active).total_seconds() / 86400)
|
||||
except (ValueError, TypeError):
|
||||
days_since = 30 # Parse failure → assume 30 days / 解析失败假设已过 30 天
|
||||
|
||||
# --- Emotion weight: continuous arousal coordinate ---
|
||||
# --- 情感权重:基于连续 arousal 坐标计算 ---
|
||||
# Higher arousal → stronger emotion → higher weight → slower decay
|
||||
# arousal 越高 → 情感越强烈 → 权重越大 → 衰减越慢
|
||||
try:
|
||||
arousal = max(0.0, min(1.0, float(metadata.get("arousal", 0.3))))
|
||||
except (ValueError, TypeError):
|
||||
arousal = 0.3
|
||||
emotion_weight = self.emotion_base + arousal * self.arousal_boost
|
||||
|
||||
# --- Apply decay formula / 套入衰减公式 ---
|
||||
score = (
|
||||
importance
|
||||
* (activation_count ** 0.3)
|
||||
* math.exp(-self.decay_lambda * days_since)
|
||||
* emotion_weight
|
||||
)
|
||||
|
||||
# --- Weight pool modifiers / 权重池修正因子 ---
|
||||
# Resolved events drop to 5%, sink to bottom awaiting keyword reactivation
|
||||
# 已解决的事件权重骤降到 5%,沉底等待关键词激活
|
||||
resolved_factor = 0.05 if metadata.get("resolved", False) else 1.0
|
||||
# High-arousal unresolved buckets get urgency boost for priority surfacing
|
||||
# 高唤醒未解决桶额外加成,优先浮现
|
||||
urgency_boost = 1.5 if (arousal > 0.7 and not metadata.get("resolved", False)) else 1.0
|
||||
|
||||
return round(score * resolved_factor * urgency_boost, 4)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Execute one decay cycle
|
||||
# 执行一轮衰减周期
|
||||
# Scan all dynamic buckets → score → archive those below threshold
|
||||
# 扫描所有动态桶 → 算分 → 低于阈值的归档
|
||||
# ---------------------------------------------------------
|
||||
async def run_decay_cycle(self) -> dict:
|
||||
"""
|
||||
Execute one decay cycle: iterate dynamic buckets, archive those
|
||||
scoring below threshold.
|
||||
执行一轮衰减:遍历动态桶,归档得分低于阈值的桶。
|
||||
|
||||
Returns stats: {"checked": N, "archived": N, "lowest_score": X}
|
||||
"""
|
||||
try:
|
||||
buckets = await self.bucket_mgr.list_all(include_archive=False)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to list buckets for decay / 衰减周期列桶失败: {e}")
|
||||
return {"checked": 0, "archived": 0, "lowest_score": 0, "error": str(e)}
|
||||
|
||||
checked = 0
|
||||
archived = 0
|
||||
lowest_score = float("inf")
|
||||
|
||||
for bucket in buckets:
|
||||
meta = bucket.get("metadata", {})
|
||||
|
||||
# Skip permanent buckets / 跳过固化桶
|
||||
if meta.get("type") == "permanent":
|
||||
continue
|
||||
|
||||
checked += 1
|
||||
try:
|
||||
score = self.calculate_score(meta)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Score calculation failed for {bucket.get('id', '?')} / "
|
||||
f"计算得分失败: {e}"
|
||||
)
|
||||
continue
|
||||
|
||||
lowest_score = min(lowest_score, score)
|
||||
|
||||
# --- Below threshold → archive (simulate forgetting) ---
|
||||
# --- 低于阈值 → 归档(模拟遗忘)---
|
||||
if score < self.threshold:
|
||||
try:
|
||||
success = await self.bucket_mgr.archive(bucket["id"])
|
||||
if success:
|
||||
archived += 1
|
||||
logger.info(
|
||||
f"Decay archived / 衰减归档: "
|
||||
f"{meta.get('name', bucket['id'])} "
|
||||
f"(score={score:.4f}, threshold={self.threshold})"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Archive failed for {bucket.get('id', '?')} / "
|
||||
f"归档失败: {e}"
|
||||
)
|
||||
|
||||
result = {
|
||||
"checked": checked,
|
||||
"archived": archived,
|
||||
"lowest_score": lowest_score if checked > 0 else 0,
|
||||
}
|
||||
logger.info(f"Decay cycle complete / 衰减周期完成: {result}")
|
||||
return result
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Background decay task management
|
||||
# 后台衰减任务管理
|
||||
# ---------------------------------------------------------
|
||||
async def ensure_started(self) -> None:
|
||||
"""
|
||||
Ensure the decay engine is started (lazy init on first call).
|
||||
确保衰减引擎已启动(懒加载,首次调用时启动)。
|
||||
"""
|
||||
if not self._running:
|
||||
await self.start()
|
||||
|
||||
async def start(self) -> None:
|
||||
"""Start the background decay loop.
|
||||
启动后台衰减循环。"""
|
||||
if self._running:
|
||||
return
|
||||
self._running = True
|
||||
self._task = asyncio.create_task(self._background_loop())
|
||||
logger.info(
|
||||
f"Decay engine started, interval: {self.check_interval}h / "
|
||||
f"衰减引擎已启动,检查间隔: {self.check_interval} 小时"
|
||||
)
|
||||
|
||||
async def stop(self) -> None:
|
||||
"""Stop the background decay loop.
|
||||
停止后台衰减循环。"""
|
||||
self._running = False
|
||||
if self._task:
|
||||
self._task.cancel()
|
||||
try:
|
||||
await self._task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
logger.info("Decay engine stopped / 衰减引擎已停止")
|
||||
|
||||
async def _background_loop(self) -> None:
|
||||
"""Background loop: run decay → sleep → repeat.
|
||||
后台循环体:执行衰减 → 睡眠 → 重复。"""
|
||||
while self._running:
|
||||
try:
|
||||
await self.run_decay_cycle()
|
||||
except Exception as e:
|
||||
logger.error(f"Decay cycle error / 衰减周期出错: {e}")
|
||||
# --- Wait for next cycle / 等待下一个周期 ---
|
||||
try:
|
||||
await asyncio.sleep(self.check_interval * 3600)
|
||||
except asyncio.CancelledError:
|
||||
break
|
||||
536
backup_20260405_2124/server.py
Normal file
536
backup_20260405_2124/server.py
Normal file
@@ -0,0 +1,536 @@
|
||||
# ============================================================
|
||||
# Module: MCP Server Entry Point (server.py)
|
||||
# 模块:MCP 服务器主入口
|
||||
#
|
||||
# Starts the Ombre Brain MCP service and registers memory
|
||||
# operation tools for Claude to call.
|
||||
# 启动 Ombre Brain MCP 服务,注册记忆操作工具供 Claude 调用。
|
||||
#
|
||||
# Core responsibilities:
|
||||
# 核心职责:
|
||||
# - Initialize config, bucket manager, dehydrator, decay engine
|
||||
# 初始化配置、记忆桶管理器、脱水器、衰减引擎
|
||||
# - Expose 5 MCP tools:
|
||||
# 暴露 5 个 MCP 工具:
|
||||
# breath — Surface unresolved memories or search by keyword
|
||||
# 浮现未解决记忆 或 按关键词检索
|
||||
# hold — Store a single memory
|
||||
# 存储单条记忆
|
||||
# grow — Diary digest, auto-split into multiple buckets
|
||||
# 日记归档,自动拆分多桶
|
||||
# trace — Modify metadata / resolved / delete
|
||||
# 修改元数据 / resolved 标记 / 删除
|
||||
# pulse — System status + bucket listing
|
||||
# 系统状态 + 所有桶列表
|
||||
#
|
||||
# Startup:
|
||||
# 启动方式:
|
||||
# Local: python server.py
|
||||
# Remote: OMBRE_TRANSPORT=streamable-http python server.py
|
||||
# Docker: docker-compose up
|
||||
# ============================================================
|
||||
|
||||
import os
|
||||
import sys
|
||||
import random
|
||||
import logging
|
||||
import asyncio
|
||||
import httpx
|
||||
|
||||
# --- Ensure same-directory modules can be imported ---
|
||||
# --- 确保同目录下的模块能被正确导入 ---
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
from mcp.server.fastmcp import FastMCP
|
||||
|
||||
from bucket_manager import BucketManager
|
||||
from dehydrator import Dehydrator
|
||||
from decay_engine import DecayEngine
|
||||
from utils import load_config, setup_logging
|
||||
|
||||
# --- Load config & init logging / 加载配置 & 初始化日志 ---
|
||||
config = load_config()
|
||||
setup_logging(config.get("log_level", "INFO"))
|
||||
logger = logging.getLogger("ombre_brain")
|
||||
|
||||
# --- Initialize three core components / 初始化三大核心组件 ---
|
||||
bucket_mgr = BucketManager(config) # Bucket manager / 记忆桶管理器
|
||||
dehydrator = Dehydrator(config) # Dehydrator / 脱水器
|
||||
decay_engine = DecayEngine(config, bucket_mgr) # Decay engine / 衰减引擎
|
||||
|
||||
# --- Create MCP server instance / 创建 MCP 服务器实例 ---
|
||||
# host="0.0.0.0" so Docker container's SSE is externally reachable
|
||||
# stdio mode ignores host (no network)
|
||||
mcp = FastMCP(
|
||||
"Ombre Brain",
|
||||
host="0.0.0.0",
|
||||
port=8000,
|
||||
)
|
||||
|
||||
|
||||
# =============================================================
|
||||
# /health endpoint: lightweight keepalive
|
||||
# 轻量保活接口
|
||||
# For Cloudflare Tunnel or reverse proxy to ping, preventing idle timeout
|
||||
# 供 Cloudflare Tunnel 或反代定期 ping,防止空闲超时断连
|
||||
# =============================================================
|
||||
@mcp.custom_route("/health", methods=["GET"])
|
||||
async def health_check(request):
|
||||
from starlette.responses import JSONResponse
|
||||
try:
|
||||
stats = await bucket_mgr.get_stats()
|
||||
return JSONResponse({
|
||||
"status": "ok",
|
||||
"buckets": stats["permanent_count"] + stats["dynamic_count"],
|
||||
"decay_engine": "running" if decay_engine.is_running else "stopped",
|
||||
})
|
||||
except Exception as e:
|
||||
return JSONResponse({"status": "error", "detail": str(e)}, status_code=500)
|
||||
|
||||
|
||||
# =============================================================
|
||||
# Internal helper: merge-or-create
|
||||
# 内部辅助:检查是否可合并,可以则合并,否则新建
|
||||
# Shared by hold and grow to avoid duplicate logic
|
||||
# hold 和 grow 共用,避免重复逻辑
|
||||
# =============================================================
|
||||
async def _merge_or_create(
|
||||
content: str,
|
||||
tags: list,
|
||||
importance: int,
|
||||
domain: list,
|
||||
valence: float,
|
||||
arousal: float,
|
||||
name: str = "",
|
||||
) -> tuple[str, bool]:
|
||||
"""
|
||||
Check if a similar bucket exists for merging; merge if so, create if not.
|
||||
Returns (bucket_id_or_name, is_merged).
|
||||
检查是否有相似桶可合并,有则合并,无则新建。
|
||||
返回 (桶ID或名称, 是否合并)。
|
||||
"""
|
||||
try:
|
||||
existing = await bucket_mgr.search(content, limit=1)
|
||||
except Exception as e:
|
||||
logger.warning(f"Search for merge failed, creating new / 合并搜索失败,新建: {e}")
|
||||
existing = []
|
||||
|
||||
if existing and existing[0].get("score", 0) > config.get("merge_threshold", 75):
|
||||
bucket = existing[0]
|
||||
try:
|
||||
merged = await dehydrator.merge(bucket["content"], content)
|
||||
await bucket_mgr.update(
|
||||
bucket["id"],
|
||||
content=merged,
|
||||
tags=list(set(bucket["metadata"].get("tags", []) + tags)),
|
||||
importance=max(bucket["metadata"].get("importance", 5), importance),
|
||||
domain=list(set(bucket["metadata"].get("domain", []) + domain)),
|
||||
valence=valence,
|
||||
arousal=arousal,
|
||||
)
|
||||
return bucket["metadata"].get("name", bucket["id"]), True
|
||||
except Exception as e:
|
||||
logger.warning(f"Merge failed, creating new / 合并失败,新建: {e}")
|
||||
|
||||
bucket_id = await bucket_mgr.create(
|
||||
content=content,
|
||||
tags=tags,
|
||||
importance=importance,
|
||||
domain=domain,
|
||||
valence=valence,
|
||||
arousal=arousal,
|
||||
name=name or None,
|
||||
)
|
||||
return bucket_id, False
|
||||
|
||||
|
||||
# =============================================================
|
||||
# Tool 1: breath — Breathe
|
||||
# 工具 1:breath — 呼吸
|
||||
#
|
||||
# No args: surface highest-weight unresolved memories (active push)
|
||||
# 无参数:浮现权重最高的未解决记忆
|
||||
# With args: search by keyword + emotion coordinates
|
||||
# 有参数:按关键词+情感坐标检索记忆
|
||||
# =============================================================
|
||||
@mcp.tool()
|
||||
async def breath(
|
||||
query: str = "",
|
||||
max_results: int = 3,
|
||||
domain: str = "",
|
||||
valence: float = -1,
|
||||
arousal: float = -1,
|
||||
) -> str:
|
||||
"""检索记忆或浮现未解决记忆。query 为空时自动推送权重最高的未解决桶;有 query 时按关键词+情感检索。domain 逗号分隔,valence/arousal 传 0~1 启用情感共鸣,-1 忽略。"""
|
||||
await decay_engine.ensure_started()
|
||||
|
||||
# --- No args: surfacing mode (weight pool active push) ---
|
||||
# --- 无参数:浮现模式(权重池主动推送)---
|
||||
if not query.strip():
|
||||
try:
|
||||
all_buckets = await bucket_mgr.list_all(include_archive=False)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to list buckets for surfacing / 浮现列桶失败: {e}")
|
||||
return "记忆系统暂时无法访问。"
|
||||
|
||||
unresolved = [
|
||||
b for b in all_buckets
|
||||
if not b["metadata"].get("resolved", False)
|
||||
and b["metadata"].get("type") != "permanent"
|
||||
]
|
||||
if not unresolved:
|
||||
return "权重池平静,没有需要处理的记忆。"
|
||||
|
||||
scored = sorted(
|
||||
unresolved,
|
||||
key=lambda b: decay_engine.calculate_score(b["metadata"]),
|
||||
reverse=True,
|
||||
)
|
||||
top = scored[:2]
|
||||
results = []
|
||||
for b in top:
|
||||
try:
|
||||
summary = await dehydrator.dehydrate(b["content"], b["metadata"])
|
||||
await bucket_mgr.touch(b["id"])
|
||||
score = decay_engine.calculate_score(b["metadata"])
|
||||
results.append(f"[权重:{score:.2f}] {summary}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to dehydrate surfaced bucket / 浮现脱水失败: {e}")
|
||||
continue
|
||||
if not results:
|
||||
return "权重池平静,没有需要处理的记忆。"
|
||||
return "=== 浮现记忆 ===\n" + "\n---\n".join(results)
|
||||
|
||||
# --- With args: search mode / 有参数:检索模式 ---
|
||||
domain_filter = [d.strip() for d in domain.split(",") if d.strip()] or None
|
||||
q_valence = valence if 0 <= valence <= 1 else None
|
||||
q_arousal = arousal if 0 <= arousal <= 1 else None
|
||||
|
||||
try:
|
||||
matches = await bucket_mgr.search(
|
||||
query,
|
||||
limit=max_results,
|
||||
domain_filter=domain_filter,
|
||||
query_valence=q_valence,
|
||||
query_arousal=q_arousal,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Search failed / 检索失败: {e}")
|
||||
return "检索过程出错,请稍后重试。"
|
||||
|
||||
results = []
|
||||
for bucket in matches:
|
||||
try:
|
||||
summary = await dehydrator.dehydrate(bucket["content"], bucket["metadata"])
|
||||
await bucket_mgr.touch(bucket["id"])
|
||||
results.append(summary)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to dehydrate search result / 检索结果脱水失败: {e}")
|
||||
continue
|
||||
|
||||
# --- Random surfacing: when search returns < 3, 40% chance to float old memories ---
|
||||
# --- 随机浮现:检索结果不足 3 条时,40% 概率从低权重旧桶里漂上来 ---
|
||||
if len(matches) < 3 and random.random() < 0.4:
|
||||
try:
|
||||
all_buckets = await bucket_mgr.list_all(include_archive=False)
|
||||
matched_ids = {b["id"] for b in matches}
|
||||
low_weight = [
|
||||
b for b in all_buckets
|
||||
if b["id"] not in matched_ids
|
||||
and decay_engine.calculate_score(b["metadata"]) < 2.0
|
||||
]
|
||||
if low_weight:
|
||||
drifted = random.sample(low_weight, min(random.randint(1, 3), len(low_weight)))
|
||||
drift_results = []
|
||||
for b in drifted:
|
||||
summary = await dehydrator.dehydrate(b["content"], b["metadata"])
|
||||
drift_results.append(f"[surface_type: random]\n{summary}")
|
||||
results.append("--- 忽然想起来 ---\n" + "\n---\n".join(drift_results))
|
||||
except Exception as e:
|
||||
logger.warning(f"Random surfacing failed / 随机浮现失败: {e}")
|
||||
|
||||
if not results:
|
||||
return "未找到相关记忆。"
|
||||
|
||||
return "\n---\n".join(results)
|
||||
|
||||
|
||||
# =============================================================
|
||||
# Tool 2: hold — Hold on to this
|
||||
# 工具 2:hold — 握住,留下来
|
||||
# =============================================================
|
||||
@mcp.tool()
|
||||
async def hold(
|
||||
content: str,
|
||||
tags: str = "",
|
||||
importance: int = 5,
|
||||
) -> str:
|
||||
"""存储单条记忆。自动打标+合并相似桶。tags 逗号分隔,importance 1-10。"""
|
||||
await decay_engine.ensure_started()
|
||||
|
||||
# --- Input validation / 输入校验 ---
|
||||
if not content or not content.strip():
|
||||
return "内容为空,无法存储。"
|
||||
|
||||
importance = max(1, min(10, importance))
|
||||
extra_tags = [t.strip() for t in tags.split(",") if t.strip()]
|
||||
|
||||
# --- Step 1: auto-tagging / 自动打标 ---
|
||||
try:
|
||||
analysis = await dehydrator.analyze(content)
|
||||
except Exception as e:
|
||||
logger.warning(f"Auto-tagging failed, using defaults / 自动打标失败: {e}")
|
||||
analysis = {
|
||||
"domain": ["未分类"], "valence": 0.5, "arousal": 0.3,
|
||||
"tags": [], "suggested_name": "",
|
||||
}
|
||||
|
||||
domain = analysis["domain"]
|
||||
valence = analysis["valence"]
|
||||
arousal = analysis["arousal"]
|
||||
auto_tags = analysis["tags"]
|
||||
suggested_name = analysis.get("suggested_name", "")
|
||||
|
||||
all_tags = list(dict.fromkeys(auto_tags + extra_tags))
|
||||
|
||||
# --- Step 2: merge or create / 合并或新建 ---
|
||||
result_name, is_merged = await _merge_or_create(
|
||||
content=content,
|
||||
tags=all_tags,
|
||||
importance=importance,
|
||||
domain=domain,
|
||||
valence=valence,
|
||||
arousal=arousal,
|
||||
name=suggested_name,
|
||||
)
|
||||
|
||||
if is_merged:
|
||||
return (
|
||||
f"已合并到现有记忆桶: {result_name}\n"
|
||||
f"主题域: {', '.join(domain)} | 情感: V{valence:.1f}/A{arousal:.1f}"
|
||||
)
|
||||
return (
|
||||
f"已创建新记忆桶: {result_name}\n"
|
||||
f"主题域: {', '.join(domain)} | 情感: V{valence:.1f}/A{arousal:.1f} | 标签: {', '.join(all_tags)}"
|
||||
)
|
||||
|
||||
|
||||
# =============================================================
|
||||
# Tool 3: grow — Grow, fragments become memories
|
||||
# 工具 3:grow — 生长,一天的碎片长成记忆
|
||||
# =============================================================
|
||||
@mcp.tool()
|
||||
async def grow(content: str) -> str:
|
||||
"""日记归档。自动拆分长内容为多个记忆桶。"""
|
||||
await decay_engine.ensure_started()
|
||||
|
||||
if not content or not content.strip():
|
||||
return "内容为空,无法整理。"
|
||||
|
||||
# --- Step 1: let API split and organize / 让 API 拆分整理 ---
|
||||
try:
|
||||
items = await dehydrator.digest(content)
|
||||
except Exception as e:
|
||||
logger.error(f"Diary digest failed / 日记整理失败: {e}")
|
||||
return f"日记整理失败: {e}"
|
||||
|
||||
if not items:
|
||||
return "内容为空或整理失败。"
|
||||
|
||||
results = []
|
||||
created = 0
|
||||
merged = 0
|
||||
|
||||
# --- Step 2: merge or create each item (with per-item error handling) ---
|
||||
# --- 逐条合并或新建(单条失败不影响其他)---
|
||||
for item in items:
|
||||
try:
|
||||
result_name, is_merged = await _merge_or_create(
|
||||
content=item["content"],
|
||||
tags=item.get("tags", []),
|
||||
importance=item.get("importance", 5),
|
||||
domain=item.get("domain", ["未分类"]),
|
||||
valence=item.get("valence", 0.5),
|
||||
arousal=item.get("arousal", 0.3),
|
||||
name=item.get("name", ""),
|
||||
)
|
||||
|
||||
if is_merged:
|
||||
results.append(f" 📎 合并 → {result_name}")
|
||||
merged += 1
|
||||
else:
|
||||
domains_str = ",".join(item.get("domain", []))
|
||||
results.append(
|
||||
f" 📝 新建 [{item.get('name', result_name)}] "
|
||||
f"主题:{domains_str} V{item.get('valence', 0.5):.1f}/A{item.get('arousal', 0.3):.1f}"
|
||||
)
|
||||
created += 1
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Failed to process diary item / 日记条目处理失败: "
|
||||
f"{item.get('name', '?')}: {e}"
|
||||
)
|
||||
results.append(f" ⚠️ 失败: {item.get('name', '未知条目')}")
|
||||
|
||||
summary = f"=== 日记整理完成 ===\n拆分为 {len(items)} 条 | 新建 {created} 桶 | 合并 {merged} 桶\n"
|
||||
return summary + "\n".join(results)
|
||||
|
||||
|
||||
# =============================================================
|
||||
# Tool 4: trace — Trace, redraw the outline of a memory
|
||||
# 工具 4:trace — 描摹,重新勾勒记忆的轮廓
|
||||
# Also handles deletion (delete=True)
|
||||
# 同时承接删除功能
|
||||
# =============================================================
|
||||
@mcp.tool()
|
||||
async def trace(
|
||||
bucket_id: str,
|
||||
name: str = "",
|
||||
domain: str = "",
|
||||
valence: float = -1,
|
||||
arousal: float = -1,
|
||||
importance: int = -1,
|
||||
tags: str = "",
|
||||
resolved: int = -1,
|
||||
delete: bool = False,
|
||||
) -> str:
|
||||
"""修改记忆元数据。resolved=1 标记已解决(桶权重骤降沉底),resolved=0 重新激活,delete=True 删除桶。其余字段只传需改的,-1 或空串表示不改。"""
|
||||
|
||||
if not bucket_id or not bucket_id.strip():
|
||||
return "请提供有效的 bucket_id。"
|
||||
|
||||
# --- Delete mode / 删除模式 ---
|
||||
if delete:
|
||||
success = await bucket_mgr.delete(bucket_id)
|
||||
return f"已遗忘记忆桶: {bucket_id}" if success else f"未找到记忆桶: {bucket_id}"
|
||||
|
||||
bucket = await bucket_mgr.get(bucket_id)
|
||||
if not bucket:
|
||||
return f"未找到记忆桶: {bucket_id}"
|
||||
|
||||
# --- Collect only fields actually passed / 只收集用户实际传入的字段 ---
|
||||
updates = {}
|
||||
if name:
|
||||
updates["name"] = name
|
||||
if domain:
|
||||
updates["domain"] = [d.strip() for d in domain.split(",") if d.strip()]
|
||||
if 0 <= valence <= 1:
|
||||
updates["valence"] = valence
|
||||
if 0 <= arousal <= 1:
|
||||
updates["arousal"] = arousal
|
||||
if 1 <= importance <= 10:
|
||||
updates["importance"] = importance
|
||||
if tags:
|
||||
updates["tags"] = [t.strip() for t in tags.split(",") if t.strip()]
|
||||
if resolved in (0, 1):
|
||||
updates["resolved"] = bool(resolved)
|
||||
|
||||
if not updates:
|
||||
return "没有任何字段需要修改。"
|
||||
|
||||
success = await bucket_mgr.update(bucket_id, **updates)
|
||||
if not success:
|
||||
return f"修改失败: {bucket_id}"
|
||||
|
||||
changed = ", ".join(f"{k}={v}" for k, v in updates.items())
|
||||
# Explicit hint about resolved state change semantics
|
||||
# 特别提示 resolved 状态变化的语义
|
||||
if "resolved" in updates:
|
||||
if updates["resolved"]:
|
||||
changed += " → 已沉底,只在关键词触发时重新浮现"
|
||||
else:
|
||||
changed += " → 已重新激活,将参与浮现排序"
|
||||
return f"已修改记忆桶 {bucket_id}: {changed}"
|
||||
|
||||
|
||||
# =============================================================
|
||||
# Tool 5: pulse — Heartbeat, system status + memory listing
|
||||
# 工具 5:pulse — 脉搏,系统状态 + 记忆列表
|
||||
# =============================================================
|
||||
@mcp.tool()
|
||||
async def pulse(include_archive: bool = False) -> str:
|
||||
"""系统状态和所有记忆桶摘要。include_archive=True 时包含归档桶。"""
|
||||
try:
|
||||
stats = await bucket_mgr.get_stats()
|
||||
except Exception as e:
|
||||
return f"获取系统状态失败: {e}"
|
||||
|
||||
status = (
|
||||
f"=== Ombre Brain 记忆系统 ===\n"
|
||||
f"固化记忆桶: {stats['permanent_count']} 个\n"
|
||||
f"动态记忆桶: {stats['dynamic_count']} 个\n"
|
||||
f"归档记忆桶: {stats['archive_count']} 个\n"
|
||||
f"总存储大小: {stats['total_size_kb']:.1f} KB\n"
|
||||
f"衰减引擎: {'运行中' if decay_engine.is_running else '已停止'}\n"
|
||||
)
|
||||
|
||||
# --- List all bucket summaries / 列出所有桶摘要 ---
|
||||
try:
|
||||
buckets = await bucket_mgr.list_all(include_archive=include_archive)
|
||||
except Exception as e:
|
||||
return status + f"\n列出记忆桶失败: {e}"
|
||||
|
||||
if not buckets:
|
||||
return status + "\n记忆库为空。"
|
||||
|
||||
lines = []
|
||||
for b in buckets:
|
||||
meta = b.get("metadata", {})
|
||||
if meta.get("type") == "permanent":
|
||||
icon = "📦"
|
||||
elif meta.get("type") == "archived":
|
||||
icon = "🗄️"
|
||||
elif meta.get("resolved", False):
|
||||
icon = "✅"
|
||||
else:
|
||||
icon = "💭"
|
||||
try:
|
||||
score = decay_engine.calculate_score(meta)
|
||||
except Exception:
|
||||
score = 0.0
|
||||
domains = ",".join(meta.get("domain", []))
|
||||
val = meta.get("valence", 0.5)
|
||||
aro = meta.get("arousal", 0.3)
|
||||
resolved_tag = " [已解决]" if meta.get("resolved", False) else ""
|
||||
lines.append(
|
||||
f"{icon} [{meta.get('name', b['id'])}]{resolved_tag} "
|
||||
f"主题:{domains} "
|
||||
f"情感:V{val:.1f}/A{aro:.1f} "
|
||||
f"重要:{meta.get('importance', '?')} "
|
||||
f"权重:{score:.2f} "
|
||||
f"标签:{','.join(meta.get('tags', []))}"
|
||||
)
|
||||
|
||||
return status + "\n=== 记忆列表 ===\n" + "\n".join(lines)
|
||||
|
||||
|
||||
# --- Entry point / 启动入口 ---
|
||||
if __name__ == "__main__":
|
||||
transport = config.get("transport", "stdio")
|
||||
logger.info(f"Ombre Brain starting | transport: {transport}")
|
||||
|
||||
# --- Application-level keepalive: remote mode only, ping /health every 60s ---
|
||||
# --- 应用层保活:仅远程模式下启动,每 60 秒 ping 一次 /health ---
|
||||
# Prevents Cloudflare Tunnel from dropping idle connections
|
||||
if transport in ("sse", "streamable-http"):
|
||||
async def _keepalive_loop():
|
||||
await asyncio.sleep(10) # Wait for server to fully start
|
||||
async with httpx.AsyncClient() as client:
|
||||
while True:
|
||||
try:
|
||||
await client.get("http://localhost:8000/health", timeout=5)
|
||||
logger.debug("Keepalive ping OK / 保活 ping 成功")
|
||||
except Exception as e:
|
||||
logger.warning(f"Keepalive ping failed / 保活 ping 失败: {e}")
|
||||
await asyncio.sleep(60)
|
||||
|
||||
import threading
|
||||
|
||||
def _start_keepalive():
|
||||
loop = asyncio.new_event_loop()
|
||||
loop.run_until_complete(_keepalive_loop())
|
||||
|
||||
t = threading.Thread(target=_start_keepalive, daemon=True)
|
||||
t.start()
|
||||
|
||||
mcp.run(transport=transport)
|
||||
781
bucket_manager.py
Normal file
781
bucket_manager.py
Normal file
@@ -0,0 +1,781 @@
|
||||
# ============================================================
|
||||
# Module: Memory Bucket Manager (bucket_manager.py)
|
||||
# 模块:记忆桶管理器
|
||||
#
|
||||
# CRUD operations, multi-dimensional index search, activation updates
|
||||
# for memory buckets.
|
||||
# 记忆桶的增删改查、多维索引搜索、激活更新。
|
||||
#
|
||||
# Core design:
|
||||
# 核心逻辑:
|
||||
# - Each bucket = one Markdown file (YAML frontmatter + body)
|
||||
# 每个记忆桶 = 一个 Markdown 文件
|
||||
# - Storage by type: permanent / dynamic / archive
|
||||
# 存储按类型分目录
|
||||
# - Multi-dimensional soft index: domain + valence/arousal + fuzzy text
|
||||
# 多维软索引:主题域 + 情感坐标 + 文本模糊匹配
|
||||
# - Search strategy: domain pre-filter → weighted multi-dim ranking
|
||||
# 搜索策略:主题域预筛 → 多维加权精排
|
||||
# - Emotion coordinates based on Russell circumplex model:
|
||||
# 情感坐标基于环形情感模型(Russell circumplex):
|
||||
# valence (0~1): 0=negative → 1=positive
|
||||
# arousal (0~1): 0=calm → 1=excited
|
||||
#
|
||||
# Depended on by: server.py, decay_engine.py
|
||||
# 被谁依赖:server.py, decay_engine.py
|
||||
# ============================================================
|
||||
|
||||
import os
|
||||
import math
|
||||
import logging
|
||||
import re
|
||||
import shutil
|
||||
from collections import Counter
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import frontmatter
|
||||
import jieba
|
||||
from rapidfuzz import fuzz
|
||||
|
||||
from utils import generate_bucket_id, sanitize_name, safe_path, now_iso
|
||||
|
||||
logger = logging.getLogger("ombre_brain.bucket")
|
||||
|
||||
|
||||
class BucketManager:
|
||||
"""
|
||||
Memory bucket manager — entry point for all bucket CRUD operations.
|
||||
Buckets are stored as Markdown files with YAML frontmatter for metadata
|
||||
and body for content. Natively compatible with Obsidian browsing/editing.
|
||||
记忆桶管理器 —— 所有桶的 CRUD 操作入口。
|
||||
桶以 Markdown 文件存储,YAML frontmatter 存元数据,正文存内容。
|
||||
天然兼容 Obsidian 直接浏览和编辑。
|
||||
"""
|
||||
|
||||
def __init__(self, config: dict):
|
||||
# --- Read storage paths from config / 从配置中读取存储路径 ---
|
||||
self.base_dir = config["buckets_dir"]
|
||||
self.permanent_dir = os.path.join(self.base_dir, "permanent")
|
||||
self.dynamic_dir = os.path.join(self.base_dir, "dynamic")
|
||||
self.archive_dir = os.path.join(self.base_dir, "archive")
|
||||
self.fuzzy_threshold = config.get("matching", {}).get("fuzzy_threshold", 50)
|
||||
self.max_results = config.get("matching", {}).get("max_results", 5)
|
||||
|
||||
# --- Wikilink config / 双链配置 ---
|
||||
wikilink_cfg = config.get("wikilink", {})
|
||||
self.wikilink_enabled = wikilink_cfg.get("enabled", True)
|
||||
self.wikilink_use_tags = wikilink_cfg.get("use_tags", False)
|
||||
self.wikilink_use_domain = wikilink_cfg.get("use_domain", True)
|
||||
self.wikilink_use_auto_keywords = wikilink_cfg.get("use_auto_keywords", True)
|
||||
self.wikilink_auto_top_k = wikilink_cfg.get("auto_top_k", 8)
|
||||
self.wikilink_min_len = wikilink_cfg.get("min_keyword_len", 2)
|
||||
self.wikilink_exclude_keywords = set(wikilink_cfg.get("exclude_keywords", []))
|
||||
self.wikilink_stopwords = {
|
||||
"的", "了", "在", "是", "我", "有", "和", "就", "不", "人",
|
||||
"都", "一个", "上", "也", "很", "到", "说", "要", "去",
|
||||
"你", "会", "着", "没有", "看", "好", "自己", "这", "他", "她",
|
||||
"我们", "你们", "他们", "然后", "今天", "昨天", "明天", "一下",
|
||||
"the", "and", "for", "are", "but", "not", "you", "all", "can",
|
||||
"had", "her", "was", "one", "our", "out", "has", "have", "with",
|
||||
"this", "that", "from", "they", "been", "said", "will", "each",
|
||||
}
|
||||
self.wikilink_stopwords |= {w.lower() for w in self.wikilink_exclude_keywords}
|
||||
|
||||
# --- Search scoring weights / 检索权重配置 ---
|
||||
scoring = config.get("scoring_weights", {})
|
||||
self.w_topic = scoring.get("topic_relevance", 4.0)
|
||||
self.w_emotion = scoring.get("emotion_resonance", 2.0)
|
||||
self.w_time = scoring.get("time_proximity", 1.5)
|
||||
self.w_importance = scoring.get("importance", 1.0)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Create a new bucket
|
||||
# 创建新桶
|
||||
# Write content and metadata into a .md file
|
||||
# 将内容和元数据写入一个 .md 文件
|
||||
# ---------------------------------------------------------
|
||||
async def create(
|
||||
self,
|
||||
content: str,
|
||||
tags: list[str] = None,
|
||||
importance: int = 5,
|
||||
domain: list[str] = None,
|
||||
valence: float = 0.5,
|
||||
arousal: float = 0.3,
|
||||
bucket_type: str = "dynamic",
|
||||
name: str = None,
|
||||
pinned: bool = False,
|
||||
protected: bool = False,
|
||||
) -> str:
|
||||
"""
|
||||
Create a new memory bucket, return bucket ID.
|
||||
创建一个新的记忆桶,返回桶 ID。
|
||||
|
||||
pinned/protected=True: bucket won't be merged, decayed, or have importance changed.
|
||||
Importance is locked to 10 for pinned/protected buckets.
|
||||
pinned/protected 桶不参与合并与衰减,importance 强制锁定为 10。
|
||||
"""
|
||||
bucket_id = generate_bucket_id()
|
||||
bucket_name = sanitize_name(name) if name else bucket_id
|
||||
domain = domain or ["未分类"]
|
||||
tags = tags or []
|
||||
linked_content = self._apply_wikilinks(content, tags, domain, bucket_name)
|
||||
|
||||
# --- Pinned/protected buckets: lock importance to 10 ---
|
||||
# --- 钉选/保护桶:importance 强制锁定为 10 ---
|
||||
if pinned or protected:
|
||||
importance = 10
|
||||
|
||||
# --- Build YAML frontmatter metadata / 构建元数据 ---
|
||||
metadata = {
|
||||
"id": bucket_id,
|
||||
"name": bucket_name,
|
||||
"tags": tags,
|
||||
"domain": domain,
|
||||
"valence": max(0.0, min(1.0, valence)),
|
||||
"arousal": max(0.0, min(1.0, arousal)),
|
||||
"importance": max(1, min(10, importance)),
|
||||
"type": bucket_type,
|
||||
"created": now_iso(),
|
||||
"last_active": now_iso(),
|
||||
"activation_count": 1,
|
||||
}
|
||||
if pinned:
|
||||
metadata["pinned"] = True
|
||||
if protected:
|
||||
metadata["protected"] = True
|
||||
|
||||
# --- Assemble Markdown file (frontmatter + body) ---
|
||||
# --- 组装 Markdown 文件 ---
|
||||
post = frontmatter.Post(linked_content, **metadata)
|
||||
|
||||
# --- Choose directory by type + primary domain ---
|
||||
# --- 按类型 + 主题域选择存储目录 ---
|
||||
type_dir = self.permanent_dir if bucket_type == "permanent" else self.dynamic_dir
|
||||
primary_domain = sanitize_name(domain[0]) if domain else "未分类"
|
||||
target_dir = os.path.join(type_dir, primary_domain)
|
||||
os.makedirs(target_dir, exist_ok=True)
|
||||
|
||||
# --- Filename: readable_name_bucketID.md (Obsidian friendly) ---
|
||||
# --- 文件名:可读名称_桶ID.md ---
|
||||
if bucket_name and bucket_name != bucket_id:
|
||||
filename = f"{bucket_name}_{bucket_id}.md"
|
||||
else:
|
||||
filename = f"{bucket_id}.md"
|
||||
file_path = safe_path(target_dir, filename)
|
||||
|
||||
try:
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
f.write(frontmatter.dumps(post))
|
||||
except OSError as e:
|
||||
logger.error(f"Failed to write bucket file / 写入桶文件失败: {file_path}: {e}")
|
||||
raise
|
||||
|
||||
logger.info(
|
||||
f"Created bucket / 创建记忆桶: {bucket_id} ({bucket_name}) → {primary_domain}/"
|
||||
+ (" [PINNED]" if pinned else "") + (" [PROTECTED]" if protected else "")
|
||||
)
|
||||
return bucket_id
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Read bucket content
|
||||
# 读取桶内容
|
||||
# Returns {"id", "metadata", "content", "path"} or None
|
||||
# ---------------------------------------------------------
|
||||
async def get(self, bucket_id: str) -> Optional[dict]:
|
||||
"""
|
||||
Read a single bucket by ID.
|
||||
根据 ID 读取单个桶。
|
||||
"""
|
||||
if not bucket_id or not isinstance(bucket_id, str):
|
||||
return None
|
||||
file_path = self._find_bucket_file(bucket_id)
|
||||
if not file_path:
|
||||
return None
|
||||
return self._load_bucket(file_path)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Update bucket
|
||||
# 更新桶
|
||||
# Supports: content, tags, importance, valence, arousal, name, resolved
|
||||
# ---------------------------------------------------------
|
||||
async def update(self, bucket_id: str, **kwargs) -> bool:
|
||||
"""
|
||||
Update bucket content or metadata fields.
|
||||
更新桶的内容或元数据字段。
|
||||
"""
|
||||
file_path = self._find_bucket_file(bucket_id)
|
||||
if not file_path:
|
||||
return False
|
||||
|
||||
try:
|
||||
post = frontmatter.load(file_path)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to load bucket for update / 加载桶失败: {file_path}: {e}")
|
||||
return False
|
||||
|
||||
# --- Pinned/protected buckets: lock importance to 10, ignore importance changes ---
|
||||
# --- 钉选/保护桶:importance 不可修改,强制保持 10 ---
|
||||
is_pinned = post.get("pinned", False) or post.get("protected", False)
|
||||
if is_pinned:
|
||||
kwargs.pop("importance", None) # silently ignore importance update
|
||||
|
||||
# --- Update only fields that were passed in / 只改传入的字段 ---
|
||||
if "content" in kwargs:
|
||||
next_tags = kwargs.get("tags", post.get("tags", []))
|
||||
next_domain = kwargs.get("domain", post.get("domain", []))
|
||||
next_name = kwargs.get("name", post.get("name", ""))
|
||||
post.content = self._apply_wikilinks(
|
||||
kwargs["content"],
|
||||
next_tags,
|
||||
next_domain,
|
||||
next_name,
|
||||
)
|
||||
if "tags" in kwargs:
|
||||
post["tags"] = kwargs["tags"]
|
||||
if "importance" in kwargs:
|
||||
post["importance"] = max(1, min(10, int(kwargs["importance"])))
|
||||
if "domain" in kwargs:
|
||||
post["domain"] = kwargs["domain"]
|
||||
if "valence" in kwargs:
|
||||
post["valence"] = max(0.0, min(1.0, float(kwargs["valence"])))
|
||||
if "arousal" in kwargs:
|
||||
post["arousal"] = max(0.0, min(1.0, float(kwargs["arousal"])))
|
||||
if "name" in kwargs:
|
||||
post["name"] = sanitize_name(kwargs["name"])
|
||||
if "resolved" in kwargs:
|
||||
post["resolved"] = bool(kwargs["resolved"])
|
||||
if "pinned" in kwargs:
|
||||
post["pinned"] = bool(kwargs["pinned"])
|
||||
if kwargs["pinned"]:
|
||||
post["importance"] = 10 # pinned → lock importance to 10
|
||||
|
||||
# --- Auto-refresh activation time / 自动刷新激活时间 ---
|
||||
post["last_active"] = now_iso()
|
||||
|
||||
try:
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
f.write(frontmatter.dumps(post))
|
||||
except OSError as e:
|
||||
logger.error(f"Failed to write bucket update / 写入桶更新失败: {file_path}: {e}")
|
||||
return False
|
||||
|
||||
logger.info(f"Updated bucket / 更新记忆桶: {bucket_id}")
|
||||
return True
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Wikilink injection
|
||||
# 自动添加 Obsidian 双链
|
||||
# ---------------------------------------------------------
|
||||
def _apply_wikilinks(
|
||||
self,
|
||||
content: str,
|
||||
tags: list[str],
|
||||
domain: list[str],
|
||||
name: str,
|
||||
) -> str:
|
||||
"""
|
||||
Auto-inject Obsidian wikilinks, avoiding double-wrapping existing [[...]].
|
||||
自动添加 Obsidian 双链,避免重复包裹已有 [[...]]。
|
||||
"""
|
||||
if not self.wikilink_enabled or not content:
|
||||
return content
|
||||
|
||||
keywords = self._collect_wikilink_keywords(content, tags, domain, name)
|
||||
if not keywords:
|
||||
return content
|
||||
|
||||
# Split on existing wikilinks to avoid wrapping them again
|
||||
# 按已有双链切分,避免重复包裹
|
||||
segments = re.split(r"(\[\[[^\]]+\]\])", content)
|
||||
pattern = re.compile("|".join(re.escape(kw) for kw in keywords))
|
||||
for i, segment in enumerate(segments):
|
||||
if segment.startswith("[[") and segment.endswith("]]"):
|
||||
continue
|
||||
updated = pattern.sub(lambda m: f"[[{m.group(0)}]]", segment)
|
||||
segments[i] = updated
|
||||
return "".join(segments)
|
||||
|
||||
def _collect_wikilink_keywords(
|
||||
self,
|
||||
content: str,
|
||||
tags: list[str],
|
||||
domain: list[str],
|
||||
name: str,
|
||||
) -> list[str]:
|
||||
"""
|
||||
Collect candidate keywords from tags/domain/auto-extraction.
|
||||
汇总候选关键词:可选 tags/domain + 自动提词。
|
||||
"""
|
||||
candidates = []
|
||||
|
||||
if self.wikilink_use_tags:
|
||||
candidates.extend(tags or [])
|
||||
if self.wikilink_use_domain:
|
||||
candidates.extend(domain or [])
|
||||
if name:
|
||||
candidates.append(name)
|
||||
if self.wikilink_use_auto_keywords:
|
||||
candidates.extend(self._extract_auto_keywords(content))
|
||||
|
||||
return self._normalize_keywords(candidates)
|
||||
|
||||
def _normalize_keywords(self, keywords: list[str]) -> list[str]:
|
||||
"""
|
||||
Deduplicate and sort by length (longer first to avoid short words
|
||||
breaking long ones during replacement).
|
||||
去重并按长度排序,优先替换长词。
|
||||
"""
|
||||
if not keywords:
|
||||
return []
|
||||
|
||||
seen = set()
|
||||
cleaned = []
|
||||
for keyword in keywords:
|
||||
if not isinstance(keyword, str):
|
||||
continue
|
||||
kw = keyword.strip()
|
||||
if len(kw) < self.wikilink_min_len:
|
||||
continue
|
||||
if kw in self.wikilink_exclude_keywords:
|
||||
continue
|
||||
if kw.lower() in self.wikilink_stopwords:
|
||||
continue
|
||||
if kw in seen:
|
||||
continue
|
||||
seen.add(kw)
|
||||
cleaned.append(kw)
|
||||
|
||||
return sorted(cleaned, key=len, reverse=True)
|
||||
|
||||
def _extract_auto_keywords(self, content: str) -> list[str]:
|
||||
"""
|
||||
Auto-extract keywords from body text, prioritizing high-frequency words.
|
||||
从正文自动提词,优先高频词。
|
||||
"""
|
||||
if not content:
|
||||
return []
|
||||
|
||||
try:
|
||||
zh_words = [w.strip() for w in jieba.lcut(content) if w.strip()]
|
||||
except Exception:
|
||||
zh_words = []
|
||||
en_words = re.findall(r"[A-Za-z][A-Za-z0-9_-]{2,20}", content)
|
||||
|
||||
# Chinese bigrams / 中文双词组合
|
||||
zh_bigrams = []
|
||||
for i in range(len(zh_words) - 1):
|
||||
left = zh_words[i]
|
||||
right = zh_words[i + 1]
|
||||
if len(left) < self.wikilink_min_len or len(right) < self.wikilink_min_len:
|
||||
continue
|
||||
if not re.fullmatch(r"[\u4e00-\u9fff]+", left + right):
|
||||
continue
|
||||
if len(left + right) > 8:
|
||||
continue
|
||||
zh_bigrams.append(left + right)
|
||||
|
||||
merged = []
|
||||
for word in zh_words + zh_bigrams + en_words:
|
||||
if len(word) < self.wikilink_min_len:
|
||||
continue
|
||||
if re.fullmatch(r"\d+", word):
|
||||
continue
|
||||
if word.lower() in self.wikilink_stopwords:
|
||||
continue
|
||||
merged.append(word)
|
||||
|
||||
if not merged:
|
||||
return []
|
||||
|
||||
counter = Counter(merged)
|
||||
return [w for w, _ in counter.most_common(self.wikilink_auto_top_k)]
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Delete bucket
|
||||
# 删除桶
|
||||
# ---------------------------------------------------------
|
||||
async def delete(self, bucket_id: str) -> bool:
|
||||
"""
|
||||
Delete a memory bucket file.
|
||||
删除指定的记忆桶文件。
|
||||
"""
|
||||
file_path = self._find_bucket_file(bucket_id)
|
||||
if not file_path:
|
||||
return False
|
||||
|
||||
try:
|
||||
os.remove(file_path)
|
||||
except OSError as e:
|
||||
logger.error(f"Failed to delete bucket file / 删除桶文件失败: {file_path}: {e}")
|
||||
return False
|
||||
|
||||
logger.info(f"Deleted bucket / 删除记忆桶: {bucket_id}")
|
||||
return True
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Touch bucket (refresh activation time + increment count)
|
||||
# 触碰桶(刷新激活时间 + 累加激活次数)
|
||||
# Called on every recall hit; affects decay score.
|
||||
# 每次检索命中时调用,影响衰减得分。
|
||||
# ---------------------------------------------------------
|
||||
async def touch(self, bucket_id: str) -> None:
|
||||
"""
|
||||
Update a bucket's last activation time and count.
|
||||
更新桶的最后激活时间和激活次数。
|
||||
"""
|
||||
file_path = self._find_bucket_file(bucket_id)
|
||||
if not file_path:
|
||||
return
|
||||
|
||||
try:
|
||||
post = frontmatter.load(file_path)
|
||||
post["last_active"] = now_iso()
|
||||
post["activation_count"] = post.get("activation_count", 0) + 1
|
||||
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
f.write(frontmatter.dumps(post))
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to touch bucket / 触碰桶失败: {bucket_id}: {e}")
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Multi-dimensional search (core feature)
|
||||
# 多维搜索(核心功能)
|
||||
#
|
||||
# Strategy: domain pre-filter → weighted multi-dim ranking
|
||||
# 策略:主题域预筛 → 多维加权精排
|
||||
#
|
||||
# Ranking formula:
|
||||
# total = topic(×w_topic) + emotion(×w_emotion)
|
||||
# + time(×w_time) + importance(×w_importance)
|
||||
#
|
||||
# Per-dimension scores (normalized to 0~1):
|
||||
# topic = rapidfuzz weighted match (name/tags/domain/body)
|
||||
# emotion = 1 - Euclidean distance (query v/a vs bucket v/a)
|
||||
# time = e^(-0.02 × days) (recent memories first)
|
||||
# importance = importance / 10
|
||||
# ---------------------------------------------------------
|
||||
async def search(
|
||||
self,
|
||||
query: str,
|
||||
limit: int = None,
|
||||
domain_filter: list[str] = None,
|
||||
query_valence: float = None,
|
||||
query_arousal: float = None,
|
||||
) -> list[dict]:
|
||||
"""
|
||||
Multi-dimensional indexed search for memory buckets.
|
||||
多维索引搜索记忆桶。
|
||||
|
||||
domain_filter: pre-filter by domain (None = search all)
|
||||
query_valence/arousal: emotion coordinates for resonance scoring
|
||||
"""
|
||||
if not query or not query.strip():
|
||||
return []
|
||||
|
||||
limit = limit or self.max_results
|
||||
all_buckets = await self.list_all(include_archive=False)
|
||||
|
||||
if not all_buckets:
|
||||
return []
|
||||
|
||||
# --- Layer 1: domain pre-filter (fast scope reduction) ---
|
||||
# --- 第一层:主题域预筛(快速缩小范围)---
|
||||
if domain_filter:
|
||||
filter_set = {d.lower() for d in domain_filter}
|
||||
candidates = [
|
||||
b for b in all_buckets
|
||||
if {d.lower() for d in b["metadata"].get("domain", [])} & filter_set
|
||||
]
|
||||
# Fall back to full search if pre-filter yields nothing
|
||||
# 预筛为空则回退全量搜索
|
||||
if not candidates:
|
||||
candidates = all_buckets
|
||||
else:
|
||||
candidates = all_buckets
|
||||
|
||||
# --- Layer 2: weighted multi-dim ranking ---
|
||||
# --- 第二层:多维加权精排 ---
|
||||
scored = []
|
||||
for bucket in candidates:
|
||||
meta = bucket.get("metadata", {})
|
||||
|
||||
try:
|
||||
# Dim 1: topic relevance (fuzzy text, 0~1)
|
||||
topic_score = self._calc_topic_score(query, bucket)
|
||||
|
||||
# Dim 2: emotion resonance (coordinate distance, 0~1)
|
||||
emotion_score = self._calc_emotion_score(
|
||||
query_valence, query_arousal, meta
|
||||
)
|
||||
|
||||
# Dim 3: time proximity (exponential decay, 0~1)
|
||||
time_score = self._calc_time_score(meta)
|
||||
|
||||
# Dim 4: importance (direct normalization)
|
||||
importance_score = max(1, min(10, int(meta.get("importance", 5)))) / 10.0
|
||||
|
||||
# --- Weighted sum / 加权求和 ---
|
||||
total = (
|
||||
topic_score * self.w_topic
|
||||
+ emotion_score * self.w_emotion
|
||||
+ time_score * self.w_time
|
||||
+ importance_score * self.w_importance
|
||||
)
|
||||
# Normalize to 0~100 for readability
|
||||
weight_sum = self.w_topic + self.w_emotion + self.w_time + self.w_importance
|
||||
normalized = (total / weight_sum) * 100 if weight_sum > 0 else 0
|
||||
|
||||
# Resolved buckets get ranking penalty (but still reachable by keyword)
|
||||
# 已解决的桶降权排序(但仍可被关键词激活)
|
||||
if meta.get("resolved", False):
|
||||
normalized *= 0.3
|
||||
|
||||
if normalized >= self.fuzzy_threshold:
|
||||
bucket["score"] = round(normalized, 2)
|
||||
scored.append(bucket)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Scoring failed for bucket {bucket.get('id', '?')} / "
|
||||
f"桶评分失败: {e}"
|
||||
)
|
||||
continue
|
||||
|
||||
scored.sort(key=lambda x: x["score"], reverse=True)
|
||||
return scored[:limit]
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Topic relevance sub-score:
|
||||
# name(×3) + domain(×2.5) + tags(×2) + body(×1)
|
||||
# 文本相关性子分:桶名(×3) + 主题域(×2.5) + 标签(×2) + 正文(×1)
|
||||
# ---------------------------------------------------------
|
||||
def _calc_topic_score(self, query: str, bucket: dict) -> float:
|
||||
"""
|
||||
Calculate text dimension relevance score (0~1).
|
||||
计算文本维度的相关性得分。
|
||||
"""
|
||||
meta = bucket.get("metadata", {})
|
||||
|
||||
name_score = fuzz.partial_ratio(query, meta.get("name", "")) * 3
|
||||
domain_score = (
|
||||
max(
|
||||
(fuzz.partial_ratio(query, d) for d in meta.get("domain", [])),
|
||||
default=0,
|
||||
)
|
||||
* 2.5
|
||||
)
|
||||
tag_score = (
|
||||
max(
|
||||
(fuzz.partial_ratio(query, tag) for tag in meta.get("tags", [])),
|
||||
default=0,
|
||||
)
|
||||
* 2
|
||||
)
|
||||
content_score = fuzz.partial_ratio(query, bucket.get("content", "")[:500]) * 1
|
||||
|
||||
return (name_score + domain_score + tag_score + content_score) / (100 * 8.5)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Emotion resonance sub-score:
|
||||
# Based on Russell circumplex Euclidean distance
|
||||
# 情感共鸣子分:基于环形情感模型的欧氏距离
|
||||
# No emotion in query → neutral 0.5 (doesn't affect ranking)
|
||||
# ---------------------------------------------------------
|
||||
def _calc_emotion_score(
|
||||
self, q_valence: float, q_arousal: float, meta: dict
|
||||
) -> float:
|
||||
"""
|
||||
Calculate emotion resonance score (0~1, closer = higher).
|
||||
计算情感共鸣度(0~1,越近越高)。
|
||||
"""
|
||||
if q_valence is None or q_arousal is None:
|
||||
return 0.5 # No emotion coordinates → neutral / 无情感坐标时给中性分
|
||||
|
||||
try:
|
||||
b_valence = float(meta.get("valence", 0.5))
|
||||
b_arousal = float(meta.get("arousal", 0.3))
|
||||
except (ValueError, TypeError):
|
||||
return 0.5
|
||||
|
||||
# Euclidean distance, max sqrt(2) ≈ 1.414
|
||||
dist = math.sqrt((q_valence - b_valence) ** 2 + (q_arousal - b_arousal) ** 2)
|
||||
return max(0.0, 1.0 - dist / 1.414)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Time proximity sub-score:
|
||||
# More recent activation → higher score
|
||||
# 时间亲近子分:距上次激活越近分越高
|
||||
# ---------------------------------------------------------
|
||||
def _calc_time_score(self, meta: dict) -> float:
|
||||
"""
|
||||
Calculate time proximity score (0~1, more recent = higher).
|
||||
计算时间亲近度。
|
||||
"""
|
||||
last_active_str = meta.get("last_active", meta.get("created", ""))
|
||||
try:
|
||||
last_active = datetime.fromisoformat(str(last_active_str))
|
||||
days = max(0.0, (datetime.now() - last_active).total_seconds() / 86400)
|
||||
except (ValueError, TypeError):
|
||||
days = 30
|
||||
return math.exp(-0.02 * days)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# List all buckets
|
||||
# 列出所有桶
|
||||
# ---------------------------------------------------------
|
||||
async def list_all(self, include_archive: bool = False) -> list[dict]:
|
||||
"""
|
||||
Recursively walk directories (including domain subdirs), list all buckets.
|
||||
递归遍历目录(含域子目录),列出所有记忆桶。
|
||||
"""
|
||||
buckets = []
|
||||
|
||||
dirs = [self.permanent_dir, self.dynamic_dir]
|
||||
if include_archive:
|
||||
dirs.append(self.archive_dir)
|
||||
|
||||
for dir_path in dirs:
|
||||
if not os.path.exists(dir_path):
|
||||
continue
|
||||
for root, _, files in os.walk(dir_path):
|
||||
for filename in files:
|
||||
if not filename.endswith(".md"):
|
||||
continue
|
||||
file_path = os.path.join(root, filename)
|
||||
bucket = self._load_bucket(file_path)
|
||||
if bucket:
|
||||
buckets.append(bucket)
|
||||
|
||||
return buckets
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Statistics (counts per category + total size)
|
||||
# 统计信息(各分类桶数量 + 总体积)
|
||||
# ---------------------------------------------------------
|
||||
async def get_stats(self) -> dict:
|
||||
"""
|
||||
Return memory bucket statistics (including domain subdirs).
|
||||
返回记忆桶的统计数据。
|
||||
"""
|
||||
stats = {
|
||||
"permanent_count": 0,
|
||||
"dynamic_count": 0,
|
||||
"archive_count": 0,
|
||||
"total_size_kb": 0.0,
|
||||
"domains": {},
|
||||
}
|
||||
|
||||
for subdir, key in [
|
||||
(self.permanent_dir, "permanent_count"),
|
||||
(self.dynamic_dir, "dynamic_count"),
|
||||
(self.archive_dir, "archive_count"),
|
||||
]:
|
||||
if not os.path.exists(subdir):
|
||||
continue
|
||||
for root, _, files in os.walk(subdir):
|
||||
for f in files:
|
||||
if f.endswith(".md"):
|
||||
stats[key] += 1
|
||||
fpath = os.path.join(root, f)
|
||||
try:
|
||||
stats["total_size_kb"] += os.path.getsize(fpath) / 1024
|
||||
except OSError:
|
||||
pass
|
||||
# Per-domain counts / 每个域的桶数量
|
||||
domain_name = os.path.basename(root)
|
||||
if domain_name != os.path.basename(subdir):
|
||||
stats["domains"][domain_name] = stats["domains"].get(domain_name, 0) + 1
|
||||
|
||||
return stats
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Archive bucket (move from permanent/dynamic into archive)
|
||||
# 归档桶(从 permanent/dynamic 移入 archive)
|
||||
# Called by decay engine to simulate "forgetting"
|
||||
# 由衰减引擎调用,模拟"遗忘"
|
||||
# ---------------------------------------------------------
|
||||
async def archive(self, bucket_id: str) -> bool:
|
||||
"""
|
||||
Move a bucket into the archive directory (preserving domain subdirs).
|
||||
将指定桶移入归档目录(保留域子目录结构)。
|
||||
"""
|
||||
file_path = self._find_bucket_file(bucket_id)
|
||||
if not file_path:
|
||||
return False
|
||||
|
||||
try:
|
||||
# Read once, get domain info and update type / 一次性读取
|
||||
post = frontmatter.load(file_path)
|
||||
domain = post.get("domain", ["未分类"])
|
||||
primary_domain = sanitize_name(domain[0]) if domain else "未分类"
|
||||
archive_subdir = os.path.join(self.archive_dir, primary_domain)
|
||||
os.makedirs(archive_subdir, exist_ok=True)
|
||||
|
||||
dest = safe_path(archive_subdir, os.path.basename(file_path))
|
||||
|
||||
# Update type marker then move file / 更新类型标记后移动文件
|
||||
post["type"] = "archived"
|
||||
with open(file_path, "w", encoding="utf-8") as f:
|
||||
f.write(frontmatter.dumps(post))
|
||||
|
||||
# Use shutil.move for cross-filesystem safety
|
||||
# 使用 shutil.move 保证跨文件系统安全
|
||||
shutil.move(file_path, str(dest))
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to archive bucket / 归档桶失败: {bucket_id}: {e}"
|
||||
)
|
||||
return False
|
||||
|
||||
logger.info(f"Archived bucket / 归档记忆桶: {bucket_id} → archive/{primary_domain}/")
|
||||
return True
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Internal: find bucket file across all three directories
|
||||
# 内部:在三个目录中查找桶文件
|
||||
# ---------------------------------------------------------
|
||||
def _find_bucket_file(self, bucket_id: str) -> Optional[str]:
|
||||
"""
|
||||
Recursively search permanent/dynamic/archive for a bucket file
|
||||
matching the given ID.
|
||||
在 permanent/dynamic/archive 中递归查找指定 ID 的桶文件。
|
||||
"""
|
||||
if not bucket_id:
|
||||
return None
|
||||
for dir_path in [self.permanent_dir, self.dynamic_dir, self.archive_dir]:
|
||||
if not os.path.exists(dir_path):
|
||||
continue
|
||||
for root, _, files in os.walk(dir_path):
|
||||
for fname in files:
|
||||
if not fname.endswith(".md"):
|
||||
continue
|
||||
# Match by exact ID segment in filename
|
||||
# 通过文件名中的 ID 片段精确匹配
|
||||
if bucket_id in fname:
|
||||
return os.path.join(root, fname)
|
||||
return None
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Internal: load bucket data from .md file
|
||||
# 内部:从 .md 文件加载桶数据
|
||||
# ---------------------------------------------------------
|
||||
def _load_bucket(self, file_path: str) -> Optional[dict]:
|
||||
"""
|
||||
Parse a Markdown file and return structured bucket data.
|
||||
解析 Markdown 文件,返回桶的结构化数据。
|
||||
"""
|
||||
try:
|
||||
post = frontmatter.load(file_path)
|
||||
return {
|
||||
"id": post.get("id", Path(file_path).stem),
|
||||
"metadata": dict(post.metadata),
|
||||
"content": post.content,
|
||||
"path": file_path,
|
||||
}
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Failed to load bucket file / 加载桶文件失败: {file_path}: {e}"
|
||||
)
|
||||
return None
|
||||
82
config.example.yaml
Normal file
82
config.example.yaml
Normal file
@@ -0,0 +1,82 @@
|
||||
# ============================================================
|
||||
# Ombre Brain Configuration / 配置文件
|
||||
# Copy this file to config.yaml and modify as needed
|
||||
# 复制此文件为 config.yaml 后按需修改
|
||||
# ============================================================
|
||||
|
||||
# --- Transport / 传输方式 ---
|
||||
# stdio: local use (Claude Desktop, direct pipe)
|
||||
# streamable-http: remote use (HTTP, tunnel/CDN/proxy friendly)
|
||||
# stdio: 本地使用(Claude Desktop,直接管道通信)
|
||||
# streamable-http: 远程使用(标准 HTTP,对隧道/CDN/代理友好)
|
||||
transport: "stdio"
|
||||
|
||||
# --- Log level / 日志级别 ---
|
||||
log_level: "INFO"
|
||||
|
||||
# --- Bucket storage path / 记忆桶存储路径 ---
|
||||
# Point this to your Obsidian vault subdirectory, or any local folder
|
||||
# 指向你的 Obsidian 仓库子目录,或任意本地文件夹
|
||||
# Leave as-is to use the built-in ./buckets/ directory
|
||||
# 保持默认则使用内置的 ./buckets/ 目录
|
||||
# buckets_dir: "/path/to/your/Obsidian Vault/Ombre Brain"
|
||||
|
||||
# --- Merge threshold / 桶合并阈值 ---
|
||||
# When storing a new memory, if similarity with an existing bucket exceeds
|
||||
# this value (0-100), merge instead of creating a new one
|
||||
# 存新记忆时,如果与已有桶的相似度超过此值(0-100),则合并而非新建
|
||||
merge_threshold: 75
|
||||
|
||||
# --- Dehydration API / 脱水压缩 API 配置 ---
|
||||
# Uses a cheap LLM for intelligent compression; auto-degrades to local
|
||||
# keyword extraction if API is unavailable
|
||||
# 用廉价 LLM 做智能压缩,API 不可用时自动降级到本地关键词提取
|
||||
dehydration:
|
||||
# Supports any OpenAI-compatible API: DeepSeek / Ollama / LM Studio / vLLM / Gemini etc.
|
||||
# 支持所有 OpenAI 兼容 API:DeepSeek / Ollama / LM Studio / vLLM / Gemini 等
|
||||
model: "deepseek-chat"
|
||||
base_url: "https://api.deepseek.com/v1"
|
||||
# Common base_url examples / 常见 base_url 示例:
|
||||
# DeepSeek: https://api.deepseek.com/v1
|
||||
# SiliconFlow: https://api.siliconflow.cn/v1
|
||||
# Ollama: http://localhost:11434/v1
|
||||
# LM Studio: http://localhost:1234/v1
|
||||
# vLLM: http://localhost:8000/v1
|
||||
# Gemini: https://generativelanguage.googleapis.com/v1beta/openai
|
||||
# api_key: "" # ⚠️ Use env var OMBRE_API_KEY instead / 请使用环境变量 OMBRE_API_KEY
|
||||
max_tokens: 1024
|
||||
temperature: 0.1
|
||||
|
||||
# --- Decay parameters / 记忆衰减参数 ---
|
||||
# Simulates Ebbinghaus forgetting curve, auto-archives inactive memories
|
||||
# 模拟艾宾浩斯遗忘曲线,自动归档不活跃的记忆
|
||||
decay:
|
||||
lambda: 0.05 # Decay rate / 衰减速率(越大遗忘越快)
|
||||
threshold: 0.3 # Archive threshold / 归档阈值
|
||||
check_interval_hours: 24 # Check interval (hours) / 衰减检查间隔(小时)
|
||||
emotion_weights:
|
||||
base: 1.0 # Base weight / 基础权重
|
||||
arousal_boost: 0.8 # Arousal boost coefficient / 唤醒度加成系数
|
||||
|
||||
# --- Scoring weights / 检索权重参数 ---
|
||||
# total = topic(×4) + emotion(×2) + time(×1.5) + importance(×1)
|
||||
scoring_weights:
|
||||
topic_relevance: 4.0
|
||||
emotion_resonance: 2.0
|
||||
time_proximity: 1.5
|
||||
importance: 1.0
|
||||
|
||||
# --- Fuzzy matching / 模糊匹配参数 ---
|
||||
matching:
|
||||
fuzzy_threshold: 50 # Minimum match score (0-100) / 最低匹配分数
|
||||
max_results: 5 # Max results per search / 单次搜索最多返回条数
|
||||
|
||||
# --- Obsidian wikilinks / Obsidian 双链自动注入 ---
|
||||
wikilink:
|
||||
enabled: true
|
||||
use_tags: false
|
||||
use_domain: true
|
||||
use_auto_keywords: true
|
||||
auto_top_k: 8
|
||||
min_keyword_len: 2
|
||||
exclude_keywords: []
|
||||
279
decay_engine.py
Normal file
279
decay_engine.py
Normal file
@@ -0,0 +1,279 @@
|
||||
# ============================================================
|
||||
# Module: Memory Decay Engine (decay_engine.py)
|
||||
# 模块:记忆衰减引擎
|
||||
#
|
||||
# Simulates human forgetting curve; auto-decays inactive memories and archives them.
|
||||
# 模拟人类遗忘曲线,自动衰减不活跃记忆并归档。
|
||||
#
|
||||
# Core formula (improved Ebbinghaus + emotion coordinates):
|
||||
# 核心公式(改进版艾宾浩斯遗忘曲线 + 情感坐标):
|
||||
# Score = Importance × (activation_count^0.3) × e^(-λ×days) × emotion_weight
|
||||
#
|
||||
# Emotion weight (continuous coordinate, not discrete labels):
|
||||
# 情感权重(基于连续坐标而非离散列举):
|
||||
# emotion_weight = base + (arousal × arousal_boost)
|
||||
# Higher arousal → higher emotion weight → slower decay
|
||||
# 唤醒度越高 → 情感权重越大 → 记忆衰减越慢
|
||||
#
|
||||
# Depended on by: server.py
|
||||
# 被谁依赖:server.py
|
||||
# ============================================================
|
||||
|
||||
import math
|
||||
import asyncio
|
||||
import logging
|
||||
from datetime import datetime
|
||||
|
||||
logger = logging.getLogger("ombre_brain.decay")
|
||||
|
||||
|
||||
class DecayEngine:
|
||||
"""
|
||||
Memory decay engine — periodically scans all dynamic buckets,
|
||||
calculates decay scores, auto-archives low-activity buckets
|
||||
to simulate natural forgetting.
|
||||
记忆衰减引擎 —— 定期扫描所有动态桶,
|
||||
计算衰减得分,将低活跃桶自动归档,模拟自然遗忘。
|
||||
"""
|
||||
|
||||
def __init__(self, config: dict, bucket_mgr):
|
||||
# --- Load decay parameters / 加载衰减参数 ---
|
||||
decay_cfg = config.get("decay", {})
|
||||
self.decay_lambda = decay_cfg.get("lambda", 0.05)
|
||||
self.threshold = decay_cfg.get("threshold", 0.3)
|
||||
self.check_interval = decay_cfg.get("check_interval_hours", 24)
|
||||
|
||||
# --- Emotion weight params (continuous arousal coordinate) ---
|
||||
# --- 情感权重参数(基于连续 arousal 坐标)---
|
||||
emotion_cfg = decay_cfg.get("emotion_weights", {})
|
||||
self.emotion_base = emotion_cfg.get("base", 1.0)
|
||||
self.arousal_boost = emotion_cfg.get("arousal_boost", 0.8)
|
||||
|
||||
self.bucket_mgr = bucket_mgr
|
||||
|
||||
# --- Background task control / 后台任务控制 ---
|
||||
self._task: asyncio.Task | None = None
|
||||
self._running = False
|
||||
|
||||
@property
|
||||
def is_running(self) -> bool:
|
||||
"""Whether the decay engine is running in the background.
|
||||
衰减引擎是否正在后台运行。"""
|
||||
return self._running
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Core: calculate decay score for a single bucket
|
||||
# 核心:计算单个桶的衰减得分
|
||||
#
|
||||
# Higher score = more vivid memory; below threshold → archive
|
||||
# 得分越高 = 记忆越鲜活,低于阈值则归档
|
||||
# Permanent buckets never decay / 固化桶永远不衰减
|
||||
# ---------------------------------------------------------
|
||||
# ---------------------------------------------------------
|
||||
# Time weight: 0-1d→1.0, day2→0.9, then ~10%/day, floor 0.3
|
||||
# 时间系数:0-1天=1.0,第2天=0.9,之后每天约降10%,7天后稳定在0.3
|
||||
# ---------------------------------------------------------
|
||||
@staticmethod
|
||||
def _calc_time_weight(days_since: float) -> float:
|
||||
"""
|
||||
Piecewise time weight multiplier (multiplies base_score).
|
||||
分段式时间权重系数,作为 final_score 的乘数。
|
||||
"""
|
||||
if days_since <= 1.0:
|
||||
return 1.0
|
||||
elif days_since <= 2.0:
|
||||
# Linear interpolation: 1.0→0.9 over [1,2]
|
||||
return 1.0 - 0.1 * (days_since - 1.0)
|
||||
else:
|
||||
# Exponential decay from 0.9, floor at 0.3
|
||||
# k = ln(3)/5 ≈ 0.2197 so that at day 7 (5 days past day 2) → 0.3
|
||||
raw = 0.9 * math.exp(-0.2197 * (days_since - 2.0))
|
||||
return max(0.3, raw)
|
||||
|
||||
def calculate_score(self, metadata: dict) -> float:
|
||||
"""
|
||||
Calculate current activity score for a memory bucket.
|
||||
计算一个记忆桶的当前活跃度得分。
|
||||
|
||||
Formula: final_score = time_weight × base_score
|
||||
base_score = Importance × (act_count^0.3) × e^(-λ×days) × (base + arousal×boost)
|
||||
time_weight is the outer multiplier, takes priority over emotion factors.
|
||||
"""
|
||||
if not isinstance(metadata, dict):
|
||||
return 0.0
|
||||
|
||||
# --- Pinned/protected buckets: never decay, importance locked to 10 ---
|
||||
# --- 固化桶(pinned/protected):永不衰减,importance 锁定为 10 ---
|
||||
if metadata.get("pinned") or metadata.get("protected"):
|
||||
return 999.0
|
||||
|
||||
# --- Permanent buckets never decay / 固化桶永不衰减 ---
|
||||
if metadata.get("type") == "permanent":
|
||||
return 999.0
|
||||
|
||||
importance = max(1, min(10, int(metadata.get("importance", 5))))
|
||||
activation_count = max(1, int(metadata.get("activation_count", 1)))
|
||||
|
||||
# --- Days since last activation / 距离上次激活过了多少天 ---
|
||||
last_active_str = metadata.get("last_active", metadata.get("created", ""))
|
||||
try:
|
||||
last_active = datetime.fromisoformat(str(last_active_str))
|
||||
days_since = max(0.0, (datetime.now() - last_active).total_seconds() / 86400)
|
||||
except (ValueError, TypeError):
|
||||
days_since = 30 # Parse failure → assume 30 days / 解析失败假设已过 30 天
|
||||
|
||||
# --- Emotion weight: continuous arousal coordinate ---
|
||||
# --- 情感权重:基于连续 arousal 坐标计算 ---
|
||||
# Higher arousal → stronger emotion → higher weight → slower decay
|
||||
# arousal 越高 → 情感越强烈 → 权重越大 → 衰减越慢
|
||||
try:
|
||||
arousal = max(0.0, min(1.0, float(metadata.get("arousal", 0.3))))
|
||||
except (ValueError, TypeError):
|
||||
arousal = 0.3
|
||||
emotion_weight = self.emotion_base + arousal * self.arousal_boost
|
||||
|
||||
# --- Time weight (outer multiplier, highest priority) ---
|
||||
# --- 时间权重(外层乘数,优先级最高)---
|
||||
time_weight = self._calc_time_weight(days_since)
|
||||
|
||||
# --- Base score = Importance × act_count^0.3 × e^(-λ×days) × emotion ---
|
||||
# --- 基础得分 ---
|
||||
base_score = (
|
||||
importance
|
||||
* (activation_count ** 0.3)
|
||||
* math.exp(-self.decay_lambda * days_since)
|
||||
* emotion_weight
|
||||
)
|
||||
|
||||
# --- final_score = time_weight × base_score ---
|
||||
score = time_weight * base_score
|
||||
|
||||
# --- Weight pool modifiers / 权重池修正因子 ---
|
||||
# Resolved events drop to 5%, sink to bottom awaiting keyword reactivation
|
||||
# 已解决的事件权重骤降到 5%,沉底等待关键词激活
|
||||
resolved_factor = 0.05 if metadata.get("resolved", False) else 1.0
|
||||
# High-arousal unresolved buckets get urgency boost for priority surfacing
|
||||
# 高唤醒未解决桶额外加成,优先浮现
|
||||
urgency_boost = 1.5 if (arousal > 0.7 and not metadata.get("resolved", False)) else 1.0
|
||||
|
||||
return round(score * resolved_factor * urgency_boost, 4)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Execute one decay cycle
|
||||
# 执行一轮衰减周期
|
||||
# Scan all dynamic buckets → score → archive those below threshold
|
||||
# 扫描所有动态桶 → 算分 → 低于阈值的归档
|
||||
# ---------------------------------------------------------
|
||||
async def run_decay_cycle(self) -> dict:
|
||||
"""
|
||||
Execute one decay cycle: iterate dynamic buckets, archive those
|
||||
scoring below threshold.
|
||||
执行一轮衰减:遍历动态桶,归档得分低于阈值的桶。
|
||||
|
||||
Returns stats: {"checked": N, "archived": N, "lowest_score": X}
|
||||
"""
|
||||
try:
|
||||
buckets = await self.bucket_mgr.list_all(include_archive=False)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to list buckets for decay / 衰减周期列桶失败: {e}")
|
||||
return {"checked": 0, "archived": 0, "lowest_score": 0, "error": str(e)}
|
||||
|
||||
checked = 0
|
||||
archived = 0
|
||||
lowest_score = float("inf")
|
||||
|
||||
for bucket in buckets:
|
||||
meta = bucket.get("metadata", {})
|
||||
|
||||
# Skip permanent / pinned / protected buckets
|
||||
# 跳过固化桶和钉选/保护桶
|
||||
if meta.get("type") == "permanent" or meta.get("pinned") or meta.get("protected"):
|
||||
continue
|
||||
|
||||
checked += 1
|
||||
try:
|
||||
score = self.calculate_score(meta)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Score calculation failed for {bucket.get('id', '?')} / "
|
||||
f"计算得分失败: {e}"
|
||||
)
|
||||
continue
|
||||
|
||||
lowest_score = min(lowest_score, score)
|
||||
|
||||
# --- Below threshold → archive (simulate forgetting) ---
|
||||
# --- 低于阈值 → 归档(模拟遗忘)---
|
||||
if score < self.threshold:
|
||||
try:
|
||||
success = await self.bucket_mgr.archive(bucket["id"])
|
||||
if success:
|
||||
archived += 1
|
||||
logger.info(
|
||||
f"Decay archived / 衰减归档: "
|
||||
f"{meta.get('name', bucket['id'])} "
|
||||
f"(score={score:.4f}, threshold={self.threshold})"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Archive failed for {bucket.get('id', '?')} / "
|
||||
f"归档失败: {e}"
|
||||
)
|
||||
|
||||
result = {
|
||||
"checked": checked,
|
||||
"archived": archived,
|
||||
"lowest_score": lowest_score if checked > 0 else 0,
|
||||
}
|
||||
logger.info(f"Decay cycle complete / 衰减周期完成: {result}")
|
||||
return result
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Background decay task management
|
||||
# 后台衰减任务管理
|
||||
# ---------------------------------------------------------
|
||||
async def ensure_started(self) -> None:
|
||||
"""
|
||||
Ensure the decay engine is started (lazy init on first call).
|
||||
确保衰减引擎已启动(懒加载,首次调用时启动)。
|
||||
"""
|
||||
if not self._running:
|
||||
await self.start()
|
||||
|
||||
async def start(self) -> None:
|
||||
"""Start the background decay loop.
|
||||
启动后台衰减循环。"""
|
||||
if self._running:
|
||||
return
|
||||
self._running = True
|
||||
self._task = asyncio.create_task(self._background_loop())
|
||||
logger.info(
|
||||
f"Decay engine started, interval: {self.check_interval}h / "
|
||||
f"衰减引擎已启动,检查间隔: {self.check_interval} 小时"
|
||||
)
|
||||
|
||||
async def stop(self) -> None:
|
||||
"""Stop the background decay loop.
|
||||
停止后台衰减循环。"""
|
||||
self._running = False
|
||||
if self._task:
|
||||
self._task.cancel()
|
||||
try:
|
||||
await self._task
|
||||
except asyncio.CancelledError:
|
||||
pass
|
||||
logger.info("Decay engine stopped / 衰减引擎已停止")
|
||||
|
||||
async def _background_loop(self) -> None:
|
||||
"""Background loop: run decay → sleep → repeat.
|
||||
后台循环体:执行衰减 → 睡眠 → 重复。"""
|
||||
while self._running:
|
||||
try:
|
||||
await self.run_decay_cycle()
|
||||
except Exception as e:
|
||||
logger.error(f"Decay cycle error / 衰减周期出错: {e}")
|
||||
# --- Wait for next cycle / 等待下一个周期 ---
|
||||
try:
|
||||
await asyncio.sleep(self.check_interval * 3600)
|
||||
except asyncio.CancelledError:
|
||||
break
|
||||
779
dehydrator.py
Normal file
779
dehydrator.py
Normal file
@@ -0,0 +1,779 @@
|
||||
# ============================================================
|
||||
# Module: Dehydration & Auto-tagging (dehydrator.py)
|
||||
# 模块:数据脱水压缩 + 自动打标
|
||||
#
|
||||
# Capabilities:
|
||||
# 能力:
|
||||
# 1. Dehydrate: compress memory content into high-density summaries (save tokens)
|
||||
# 脱水:将记忆桶的原始内容压缩为高密度摘要,省 token
|
||||
# 2. Merge: blend old and new content, keeping bucket size constant
|
||||
# 合并:揉合新旧内容,控制桶体积恒定
|
||||
# 3. Analyze: auto-analyze content for domain/emotion/tags
|
||||
# 打标:自动分析内容,输出主题域/情感坐标/标签
|
||||
#
|
||||
# Operating modes:
|
||||
# 工作模式:
|
||||
# - Primary: OpenAI-compatible API (DeepSeek/Ollama/LM Studio/vLLM/Gemini etc.)
|
||||
# 主路径:通过 OpenAI 兼容客户端调用 LLM API
|
||||
# - Fallback: local keyword extraction when API is unavailable
|
||||
# 备用路径:API 不可用时用本地关键词提取
|
||||
#
|
||||
# Depended on by: server.py
|
||||
# 被谁依赖:server.py
|
||||
# ============================================================
|
||||
|
||||
|
||||
import re
|
||||
import json
|
||||
import logging
|
||||
from collections import Counter
|
||||
import jieba
|
||||
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
from utils import count_tokens_approx
|
||||
|
||||
logger = logging.getLogger("ombre_brain.dehydrator")
|
||||
|
||||
|
||||
# --- Dehydration prompt: instructs cheap LLM to compress information ---
|
||||
# --- 脱水提示词:指导廉价 LLM 压缩信息 ---
|
||||
DEHYDRATE_PROMPT = """你是一个信息压缩专家。请将以下内容脱水为紧凑摘要。
|
||||
|
||||
压缩规则:
|
||||
1. 提取所有核心事实,去除冗余修饰和重复
|
||||
2. 保留最新的情绪状态和态度
|
||||
3. 保留所有待办/未完成事项
|
||||
4. 关键数字、日期、名称必须保留
|
||||
5. 目标压缩率 > 70%
|
||||
|
||||
输出格式(纯 JSON,无其他内容):
|
||||
{
|
||||
"core_facts": ["事实1", "事实2"],
|
||||
"emotion_state": "当前情绪关键词",
|
||||
"todos": ["待办1", "待办2"],
|
||||
"keywords": ["关键词1", "关键词2"],
|
||||
"summary": "50字以内的核心总结"
|
||||
}"""
|
||||
|
||||
|
||||
# --- Diary digest prompt: split daily notes into independent memory entries ---
|
||||
# --- 日记整理提示词:把一大段日常拆分成多个独立记忆条目 ---
|
||||
DIGEST_PROMPT = """你是一个日记整理专家。用户会发送一段包含今天各种事情的文本(可能很杂乱),请你将其拆分成多个独立的记忆条目。
|
||||
|
||||
整理规则:
|
||||
1. 每个条目应该是一个独立的主题/事件(不要混在一起)
|
||||
2. 为每个条目自动分析元数据
|
||||
3. 去除无意义的口水话和重复信息,保留核心内容
|
||||
4. 同一主题的零散信息应合并为一个条目
|
||||
5. 如果有待办事项,单独提取为一个条目
|
||||
|
||||
输出格式(纯 JSON 数组,无其他内容):
|
||||
[
|
||||
{
|
||||
"name": "条目标题(10字以内)",
|
||||
"content": "整理后的内容",
|
||||
"domain": ["主题域1"],
|
||||
"valence": 0.7,
|
||||
"arousal": 0.4,
|
||||
"tags": ["标签1", "标签2"],
|
||||
"importance": 5
|
||||
}
|
||||
]
|
||||
|
||||
主题域可选(选最精确的 1~2 个,只选真正相关的):
|
||||
日常: ["饮食", "穿搭", "出行", "居家", "购物"]
|
||||
人际: ["家庭", "恋爱", "友谊", "社交"]
|
||||
成长: ["工作", "学习", "考试", "求职"]
|
||||
身心: ["健康", "心理", "睡眠", "运动"]
|
||||
兴趣: ["游戏", "影视", "音乐", "阅读", "创作", "手工"]
|
||||
数字: ["编程", "AI", "硬件", "网络"]
|
||||
事务: ["财务", "计划", "待办"]
|
||||
内心: ["情绪", "回忆", "梦境", "自省"]
|
||||
importance: 1-10,根据内容重要程度判断
|
||||
valence: 0~1(0=消极, 0.5=中性, 1=积极)
|
||||
arousal: 0~1(0=平静, 0.5=普通, 1=激动)"""
|
||||
|
||||
|
||||
# --- Merge prompt: instruct LLM to blend old and new memories ---
|
||||
# --- 合并提示词:指导 LLM 揉合新旧记忆 ---
|
||||
MERGE_PROMPT = """你是一个信息合并专家。请将旧记忆与新内容合并为一份统一的简洁记录。
|
||||
|
||||
合并规则:
|
||||
1. 新内容与旧记忆冲突时,以新内容为准
|
||||
2. 去除重复信息
|
||||
3. 保留所有重要事实
|
||||
4. 总长度尽量不超过旧记忆的 120%
|
||||
|
||||
直接输出合并后的文本,不要加额外说明。"""
|
||||
|
||||
|
||||
# --- Auto-tagging prompt: analyze content for domain and emotion coords ---
|
||||
# --- 自动打标提示词:分析内容的主题域和情感坐标 ---
|
||||
ANALYZE_PROMPT = """你是一个内容分析器。请分析以下文本,输出结构化的元数据。
|
||||
|
||||
分析规则:
|
||||
1. domain(主题域):选最精确的 1~2 个,只选真正相关的
|
||||
日常: ["饮食", "穿搭", "出行", "居家", "购物"]
|
||||
人际: ["家庭", "恋爱", "友谊", "社交"]
|
||||
成长: ["工作", "学习", "考试", "求职"]
|
||||
身心: ["健康", "心理", "睡眠", "运动"]
|
||||
兴趣: ["游戏", "影视", "音乐", "阅读", "创作", "手工"]
|
||||
数字: ["编程", "AI", "硬件", "网络"]
|
||||
事务: ["财务", "计划", "待办"]
|
||||
内心: ["情绪", "回忆", "梦境", "自省"]
|
||||
2. valence(情感效价):0.0~1.0,0=极度消极 → 0.5=中性 → 1.0=极度积极
|
||||
3. arousal(情感唤醒度):0.0~1.0,0=非常平静 → 0.5=普通 → 1.0=非常激动
|
||||
4. tags(关键词标签):3~5 个最能概括内容的关键词
|
||||
5. suggested_name(建议桶名):10字以内的简短标题
|
||||
|
||||
输出格式(纯 JSON,无其他内容):
|
||||
{
|
||||
"domain": ["主题域1", "主题域2"],
|
||||
"valence": 0.7,
|
||||
"arousal": 0.4,
|
||||
"tags": ["标签1", "标签2", "标签3"],
|
||||
"suggested_name": "简短标题"
|
||||
}"""
|
||||
|
||||
|
||||
class Dehydrator:
|
||||
"""
|
||||
Data dehydrator + content analyzer.
|
||||
Three capabilities: dehydration / merge / auto-tagging (domain + emotion).
|
||||
Prefers API (better quality); auto-degrades to local (guaranteed availability).
|
||||
数据脱水器 + 内容分析器。
|
||||
三大能力:脱水压缩 / 新旧合并 / 自动打标。
|
||||
优先走 API,API 挂了自动降级到本地。
|
||||
"""
|
||||
|
||||
def __init__(self, config: dict):
|
||||
# --- Read dehydration API config / 读取脱水 API 配置 ---
|
||||
dehy_cfg = config.get("dehydration", {})
|
||||
self.api_key = dehy_cfg.get("api_key", "")
|
||||
self.model = dehy_cfg.get("model", "deepseek-chat")
|
||||
self.base_url = dehy_cfg.get("base_url", "https://api.deepseek.com/v1")
|
||||
self.max_tokens = dehy_cfg.get("max_tokens", 1024)
|
||||
self.temperature = dehy_cfg.get("temperature", 0.1)
|
||||
|
||||
# --- API availability / 是否有可用的 API ---
|
||||
self.api_available = bool(self.api_key)
|
||||
|
||||
# --- Initialize OpenAI-compatible client ---
|
||||
# --- 初始化 OpenAI 兼容客户端 ---
|
||||
# Supports any OpenAI-format API: DeepSeek / Ollama / LM Studio / vLLM / Gemini etc.
|
||||
# User only needs to set base_url in config.yaml
|
||||
if self.api_available:
|
||||
self.client = AsyncOpenAI(
|
||||
api_key=self.api_key,
|
||||
base_url=self.base_url,
|
||||
timeout=60.0,
|
||||
)
|
||||
else:
|
||||
self.client = None
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Dehydrate: compress raw content into concise summary
|
||||
# 脱水:将原始内容压缩为精简摘要
|
||||
# Try API first, fallback to local
|
||||
# 先尝试 API,失败则回退本地
|
||||
# ---------------------------------------------------------
|
||||
async def dehydrate(self, content: str, metadata: dict = None) -> str:
|
||||
"""
|
||||
Dehydrate/compress memory content.
|
||||
Returns formatted summary string ready for Claude context injection.
|
||||
对记忆内容做脱水压缩。
|
||||
返回格式化的摘要字符串,可直接注入 Claude 上下文。
|
||||
"""
|
||||
if not content or not content.strip():
|
||||
return "(空记忆 / empty memory)"
|
||||
|
||||
# --- Content is short enough, no compression needed ---
|
||||
# --- 内容已经很短,不需要压缩 ---
|
||||
if count_tokens_approx(content) < 100:
|
||||
return self._format_output(content, metadata)
|
||||
|
||||
# --- Try API compression first (best quality) ---
|
||||
# --- 优先尝试 API 压缩 ---
|
||||
if self.api_available:
|
||||
try:
|
||||
result = await self._api_dehydrate(content)
|
||||
if result:
|
||||
return self._format_output(result, metadata)
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"API dehydration failed, degrading to local / "
|
||||
f"API 脱水失败,降级到本地压缩: {e}"
|
||||
)
|
||||
|
||||
# --- Local compression fallback (works without API) ---
|
||||
# --- 本地压缩兜底 ---
|
||||
result = self._local_dehydrate(content)
|
||||
return self._format_output(result, metadata)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Merge: blend new content into existing bucket
|
||||
# 合并:将新内容揉入已有桶,保持体积恒定
|
||||
# ---------------------------------------------------------
|
||||
async def merge(self, old_content: str, new_content: str) -> str:
|
||||
"""
|
||||
Merge new content with old memory, preventing infinite bucket growth.
|
||||
将新内容与旧记忆合并,避免桶无限膨胀。
|
||||
"""
|
||||
if not old_content and not new_content:
|
||||
return ""
|
||||
if not old_content:
|
||||
return new_content or ""
|
||||
if not new_content:
|
||||
return old_content
|
||||
|
||||
# --- Try API merge first / 优先 API 合并 ---
|
||||
if self.api_available:
|
||||
try:
|
||||
result = await self._api_merge(old_content, new_content)
|
||||
if result:
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"API merge failed, degrading to local / "
|
||||
f"API 合并失败,降级到本地合并: {e}"
|
||||
)
|
||||
|
||||
# --- Local merge fallback / 本地合并兜底 ---
|
||||
return self._local_merge(old_content, new_content)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# API call: dehydration
|
||||
# API 调用:脱水压缩
|
||||
# ---------------------------------------------------------
|
||||
async def _api_dehydrate(self, content: str) -> str:
|
||||
"""
|
||||
Call LLM API for intelligent dehydration (via OpenAI-compatible client).
|
||||
调用 LLM API 执行智能脱水。
|
||||
"""
|
||||
response = await self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=[
|
||||
{"role": "system", "content": DEHYDRATE_PROMPT},
|
||||
{"role": "user", "content": content[:3000]},
|
||||
],
|
||||
max_tokens=self.max_tokens,
|
||||
temperature=self.temperature,
|
||||
)
|
||||
if not response.choices:
|
||||
return ""
|
||||
return response.choices[0].message.content or ""
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# API call: merge
|
||||
# API 调用:合并
|
||||
# ---------------------------------------------------------
|
||||
async def _api_merge(self, old_content: str, new_content: str) -> str:
|
||||
"""
|
||||
Call LLM API for intelligent merge (via OpenAI-compatible client).
|
||||
调用 LLM API 执行智能合并。
|
||||
"""
|
||||
user_msg = f"旧记忆:\n{old_content[:2000]}\n\n新内容:\n{new_content[:2000]}"
|
||||
response = await self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=[
|
||||
{"role": "system", "content": MERGE_PROMPT},
|
||||
{"role": "user", "content": user_msg},
|
||||
],
|
||||
max_tokens=self.max_tokens,
|
||||
temperature=self.temperature,
|
||||
)
|
||||
if not response.choices:
|
||||
return ""
|
||||
return response.choices[0].message.content or ""
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Local dehydration (fallback when API is unavailable)
|
||||
# 本地脱水(无 API 时的兜底方案)
|
||||
# Keyword frequency + sentence position weighting
|
||||
# 基于关键词频率 + 句子位置权重
|
||||
# ---------------------------------------------------------
|
||||
def _local_dehydrate(self, content: str) -> str:
|
||||
"""
|
||||
Local keyword extraction + position-weighted simple compression.
|
||||
本地关键词提取 + 位置加权的简单压缩。
|
||||
"""
|
||||
# --- Split into sentences / 分句 ---
|
||||
sentences = re.split(r"[。!?\n.!?]+", content)
|
||||
sentences = [s.strip() for s in sentences if len(s.strip()) > 5]
|
||||
|
||||
if not sentences:
|
||||
return content[:200]
|
||||
|
||||
# --- Extract high-frequency keywords / 提取高频关键词 ---
|
||||
keywords = self._extract_keywords(content)
|
||||
|
||||
# --- Score sentences: position weight + keyword hits ---
|
||||
# --- 句子评分:开头结尾权重高 + 关键词命中加分 ---
|
||||
scored = []
|
||||
for i, sent in enumerate(sentences):
|
||||
position_weight = 1.5 if i < 3 else (1.2 if i > len(sentences) - 3 else 1.0)
|
||||
keyword_hits = sum(1 for kw in keywords if kw in sent)
|
||||
score = position_weight * (1 + keyword_hits)
|
||||
scored.append((score, sent))
|
||||
|
||||
scored.sort(key=lambda x: x[0], reverse=True)
|
||||
|
||||
# --- Top-8 sentences + keyword list / 取高分句 + 关键词列表 ---
|
||||
selected = [s for _, s in scored[:8]]
|
||||
summary = "。".join(selected)
|
||||
keyword_str = ", ".join(keywords[:10])
|
||||
|
||||
return f"[摘要] {summary}\n[关键词] {keyword_str}"
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Local merge (simple concatenation + truncation)
|
||||
# 本地合并(简单拼接 + 截断)
|
||||
# ---------------------------------------------------------
|
||||
def _local_merge(self, old_content: str, new_content: str) -> str:
|
||||
"""
|
||||
Simple concatenation merge; truncates if too long.
|
||||
简单拼接合并,超长时截断保留两端。
|
||||
"""
|
||||
merged = f"{old_content.strip()}\n\n--- 更新 ---\n{new_content.strip()}"
|
||||
# Truncate if over 3000 chars / 超过 3000 字符则各取一半
|
||||
if len(merged) > 3000:
|
||||
half = 1400
|
||||
merged = (
|
||||
f"{old_content[:half].strip()}\n\n--- 更新 ---\n{new_content[:half].strip()}"
|
||||
)
|
||||
return merged
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Keyword extraction
|
||||
# 关键词提取
|
||||
# Chinese + English tokenization → stopword filter → frequency sort
|
||||
# 中英文分词 + 停用词过滤 + 词频排序
|
||||
# ---------------------------------------------------------
|
||||
def _extract_keywords(self, text: str) -> list[str]:
|
||||
"""
|
||||
Extract high-frequency keywords using jieba (Chinese + English mixed).
|
||||
用 jieba 分词提取高频关键词。
|
||||
"""
|
||||
try:
|
||||
words = jieba.lcut(text)
|
||||
except Exception:
|
||||
words = []
|
||||
# English words / 英文单词
|
||||
english_words = re.findall(r"[a-zA-Z]{3,}", text.lower())
|
||||
words += english_words
|
||||
|
||||
# Stopwords / 停用词
|
||||
stopwords = {
|
||||
"的", "了", "在", "是", "我", "有", "和", "就", "不", "人",
|
||||
"都", "一个", "上", "也", "很", "到", "说", "要", "去",
|
||||
"你", "会", "着", "没有", "看", "好", "自己", "这", "他", "她",
|
||||
"the", "and", "for", "are", "but", "not", "you", "all", "can",
|
||||
"had", "her", "was", "one", "our", "out", "has", "have", "with",
|
||||
"this", "that", "from", "they", "been", "said", "will", "each",
|
||||
}
|
||||
filtered = [
|
||||
w for w in words
|
||||
if w not in stopwords and len(w.strip()) > 1 and not re.match(r"^[0-9]+$", w)
|
||||
]
|
||||
counter = Counter(filtered)
|
||||
return [word for word, _ in counter.most_common(15)]
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Output formatting
|
||||
# 输出格式化
|
||||
# Wraps dehydrated result with bucket name, tags, emotion coords
|
||||
# 把脱水结果包装成带桶名、标签、情感坐标的可读文本
|
||||
# ---------------------------------------------------------
|
||||
def _format_output(self, content: str, metadata: dict = None) -> str:
|
||||
"""
|
||||
Format dehydrated result into context-injectable text.
|
||||
将脱水结果格式化为可注入上下文的文本。
|
||||
"""
|
||||
header = ""
|
||||
if metadata and isinstance(metadata, dict):
|
||||
name = metadata.get("name", "未命名")
|
||||
tags = ", ".join(metadata.get("tags", []))
|
||||
domains = ", ".join(metadata.get("domain", []))
|
||||
try:
|
||||
valence = float(metadata.get("valence", 0.5))
|
||||
arousal = float(metadata.get("arousal", 0.3))
|
||||
except (ValueError, TypeError):
|
||||
valence, arousal = 0.5, 0.3
|
||||
header = f"📌 记忆桶: {name}"
|
||||
if domains:
|
||||
header += f" [主题:{domains}]"
|
||||
if tags:
|
||||
header += f" [标签:{tags}]"
|
||||
header += f" [情感:V{valence:.1f}/A{arousal:.1f}]"
|
||||
header += "\n"
|
||||
return f"{header}{content}"
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Auto-tagging: analyze content for domain + emotion + tags
|
||||
# 自动打标:分析内容,输出主题域 + 情感坐标 + 标签
|
||||
# Called by server.py when storing new memories
|
||||
# 存新记忆时由 server.py 调用
|
||||
# ---------------------------------------------------------
|
||||
async def analyze(self, content: str) -> dict:
|
||||
"""
|
||||
Analyze content and return structured metadata.
|
||||
分析内容,返回结构化元数据。
|
||||
|
||||
Returns: {"domain", "valence", "arousal", "tags", "suggested_name"}
|
||||
"""
|
||||
if not content or not content.strip():
|
||||
return self._default_analysis()
|
||||
|
||||
# --- Try API first (best quality) / 优先走 API ---
|
||||
if self.api_available:
|
||||
try:
|
||||
result = await self._api_analyze(content)
|
||||
if result:
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"API tagging failed, degrading to local / "
|
||||
f"API 打标失败,降级到本地分析: {e}"
|
||||
)
|
||||
|
||||
# --- Local analysis fallback / 本地分析兜底 ---
|
||||
return self._local_analyze(content)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# API call: auto-tagging
|
||||
# API 调用:自动打标
|
||||
# ---------------------------------------------------------
|
||||
async def _api_analyze(self, content: str) -> dict:
|
||||
"""
|
||||
Call LLM API for content analysis / tagging.
|
||||
调用 LLM API 执行内容分析打标。
|
||||
"""
|
||||
response = await self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=[
|
||||
{"role": "system", "content": ANALYZE_PROMPT},
|
||||
{"role": "user", "content": content[:2000]},
|
||||
],
|
||||
max_tokens=256,
|
||||
temperature=0.1,
|
||||
)
|
||||
if not response.choices:
|
||||
return self._default_analysis()
|
||||
raw = response.choices[0].message.content or ""
|
||||
if not raw.strip():
|
||||
return self._default_analysis()
|
||||
return self._parse_analysis(raw)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Parse API JSON response with safety checks
|
||||
# 解析 API 返回的 JSON,做安全校验
|
||||
# Ensure valence/arousal in 0~1, domain/tags valid
|
||||
# ---------------------------------------------------------
|
||||
def _parse_analysis(self, raw: str) -> dict:
|
||||
"""
|
||||
Parse and validate API tagging result.
|
||||
解析并校验 API 返回的打标结果。
|
||||
"""
|
||||
try:
|
||||
# Handle potential markdown code block wrapping
|
||||
# 处理可能的 markdown 代码块包裹
|
||||
cleaned = raw.strip()
|
||||
if cleaned.startswith("```"):
|
||||
cleaned = cleaned.split("\n", 1)[-1].rsplit("```", 1)[0]
|
||||
result = json.loads(cleaned)
|
||||
except (json.JSONDecodeError, IndexError, ValueError):
|
||||
logger.warning(f"API tagging JSON parse failed / JSON 解析失败: {raw[:200]}")
|
||||
return self._default_analysis()
|
||||
|
||||
if not isinstance(result, dict):
|
||||
return self._default_analysis()
|
||||
|
||||
# --- Validate and clamp value ranges / 校验并钳制数值范围 ---
|
||||
try:
|
||||
valence = max(0.0, min(1.0, float(result.get("valence", 0.5))))
|
||||
arousal = max(0.0, min(1.0, float(result.get("arousal", 0.3))))
|
||||
except (ValueError, TypeError):
|
||||
valence, arousal = 0.5, 0.3
|
||||
|
||||
return {
|
||||
"domain": result.get("domain", ["未分类"])[:3],
|
||||
"valence": valence,
|
||||
"arousal": arousal,
|
||||
"tags": result.get("tags", [])[:5],
|
||||
"suggested_name": str(result.get("suggested_name", ""))[:20],
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Local analysis (fallback when API is unavailable)
|
||||
# 本地分析(无 API 时的兜底方案)
|
||||
# Keyword matching + simple sentiment dictionary
|
||||
# 基于关键词 + 简单情感词典匹配
|
||||
# ---------------------------------------------------------
|
||||
def _local_analyze(self, content: str) -> dict:
|
||||
"""
|
||||
Local keyword + sentiment dictionary analysis.
|
||||
本地关键词 + 情感词典的简单分析。
|
||||
"""
|
||||
keywords = self._extract_keywords(content)
|
||||
text_lower = content.lower()
|
||||
|
||||
# --- Domain matching by keyword hits ---
|
||||
# --- 主题域匹配:基于关键词命中 ---
|
||||
domain_keywords = {
|
||||
# Daily / 日常
|
||||
"饮食": {"吃", "饭", "做饭", "外卖", "奶茶", "咖啡", "麻辣烫", "面包",
|
||||
"超市", "零食", "水果", "牛奶", "食堂", "减肥", "节食"},
|
||||
"出行": {"旅行", "出发", "航班", "酒店", "地铁", "打车", "高铁", "机票",
|
||||
"景点", "签证", "护照"},
|
||||
"居家": {"打扫", "洗衣", "搬家", "快递", "收纳", "装修", "租房"},
|
||||
"购物": {"买", "下单", "到货", "退货", "优惠", "折扣", "代购"},
|
||||
# Relationships / 人际
|
||||
"家庭": {"爸", "妈", "父亲", "母亲", "家人", "弟弟", "姐姐", "哥哥",
|
||||
"奶奶", "爷爷", "亲戚", "家里"},
|
||||
"恋爱": {"爱人", "男友", "女友", "恋", "约会", "接吻", "分手",
|
||||
"暧昧", "在一起", "想你", "同床"},
|
||||
"友谊": {"朋友", "闺蜜", "兄弟", "聚", "约饭", "聊天", "群"},
|
||||
"社交": {"见面", "被人", "圈子", "消息", "评论", "点赞"},
|
||||
# Growth / 成长
|
||||
"工作": {"会议", "项目", "客户", "汇报", "deadline", "同事",
|
||||
"老板", "薪资", "合同", "需求", "加班", "实习"},
|
||||
"学习": {"课", "考试", "论文", "笔记", "作业", "教授", "讲座",
|
||||
"分数", "选课", "学分"},
|
||||
"求职": {"面试", "简历", "offer", "投递", "薪资", "岗位"},
|
||||
# Health / 身心
|
||||
"健康": {"医院", "复查", "吃药", "抽血", "手术", "心率",
|
||||
"病", "症状", "指标", "体检", "月经"},
|
||||
"心理": {"焦虑", "抑郁", "恐慌", "创伤", "人格", "咨询",
|
||||
"安全感", "自残", "崩溃", "压力"},
|
||||
"睡眠": {"睡", "失眠", "噩梦", "清醒", "熬夜", "早起", "午觉"},
|
||||
# Interests / 兴趣
|
||||
"游戏": {"游戏", "steam", "极乐迪斯科", "存档", "通关", "角色",
|
||||
"mod", "DLC", "剧情"},
|
||||
"影视": {"电影", "番剧", "动漫", "剧", "综艺", "追番", "上映"},
|
||||
"音乐": {"歌", "音乐", "专辑", "live", "演唱会", "耳机"},
|
||||
"阅读": {"书", "小说", "读完", "kindle", "连载", "漫画"},
|
||||
"创作": {"写", "画", "预设", "脚本", "视频", "剪辑", "P图",
|
||||
"SillyTavern", "插件", "正则", "人设"},
|
||||
# Digital / 数字
|
||||
"编程": {"代码", "code", "python", "bug", "api", "docker",
|
||||
"git", "调试", "框架", "部署", "开发", "server"},
|
||||
"AI": {"模型", "GPT", "Claude", "gemini", "LLM", "token",
|
||||
"prompt", "LoRA", "微调", "推理", "MCP"},
|
||||
"网络": {"VPN", "梯子", "代理", "域名", "隧道", "服务器",
|
||||
"cloudflare", "tunnel", "反代"},
|
||||
# Affairs / 事务
|
||||
"财务": {"钱", "转账", "工资", "花了", "欠", "还款", "借",
|
||||
"账单", "余额", "预算", "黄金"},
|
||||
"计划": {"计划", "目标", "deadline", "日程", "清单", "安排"},
|
||||
"待办": {"要做", "记得", "别忘", "提醒", "下次"},
|
||||
# Inner / 内心
|
||||
"情绪": {"开心", "难过", "生气", "哭", "泪", "孤独", "幸福",
|
||||
"伤心", "烦", "委屈", "感动", "温柔"},
|
||||
"回忆": {"以前", "小时候", "那时", "怀念", "曾经", "记得"},
|
||||
"梦境": {"梦", "梦到", "梦见", "噩梦", "清醒梦"},
|
||||
"自省": {"反思", "觉得自己", "问自己", "意识到", "明白了"},
|
||||
}
|
||||
|
||||
matched_domains = []
|
||||
for domain, kws in domain_keywords.items():
|
||||
hits = sum(1 for kw in kws if kw in text_lower)
|
||||
if hits >= 2:
|
||||
matched_domains.append((domain, hits))
|
||||
matched_domains.sort(key=lambda x: x[1], reverse=True)
|
||||
domains = [d for d, _ in matched_domains[:3]] or ["未分类"]
|
||||
|
||||
# --- Emotion estimation via simple sentiment dictionary ---
|
||||
# --- 情感坐标估算:基于简单情感词典 ---
|
||||
positive_words = {"开心", "高兴", "喜欢", "哈哈", "棒", "赞", "爱",
|
||||
"幸福", "成功", "感动", "兴奋", "棒极了",
|
||||
"happy", "love", "great", "awesome", "nice"}
|
||||
negative_words = {"难过", "伤心", "生气", "焦虑", "害怕", "无聊",
|
||||
"烦", "累", "失望", "崩溃", "愤怒", "痛苦",
|
||||
"sad", "angry", "hate", "tired", "afraid"}
|
||||
intense_words = {"太", "非常", "极", "超", "特别", "十分", "炸",
|
||||
"崩溃", "激动", "愤怒", "狂喜", "very", "so", "extremely"}
|
||||
|
||||
pos_count = sum(1 for w in positive_words if w in text_lower)
|
||||
neg_count = sum(1 for w in negative_words if w in text_lower)
|
||||
intense_count = sum(1 for w in intense_words if w in text_lower)
|
||||
|
||||
# valence: positive/negative emotion balance
|
||||
if pos_count + neg_count > 0:
|
||||
valence = 0.5 + 0.4 * (pos_count - neg_count) / (pos_count + neg_count)
|
||||
else:
|
||||
valence = 0.5
|
||||
|
||||
# arousal: intensity level
|
||||
arousal = min(1.0, 0.3 + intense_count * 0.15 + (pos_count + neg_count) * 0.08)
|
||||
|
||||
return {
|
||||
"domain": domains,
|
||||
"valence": round(max(0.0, min(1.0, valence)), 2),
|
||||
"arousal": round(max(0.0, min(1.0, arousal)), 2),
|
||||
"tags": keywords[:5],
|
||||
"suggested_name": "",
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Default analysis result (empty content or total failure)
|
||||
# 默认分析结果(内容为空或完全失败时用)
|
||||
# ---------------------------------------------------------
|
||||
def _default_analysis(self) -> dict:
|
||||
"""
|
||||
Return default neutral analysis result.
|
||||
返回默认的中性分析结果。
|
||||
"""
|
||||
return {
|
||||
"domain": ["未分类"],
|
||||
"valence": 0.5,
|
||||
"arousal": 0.3,
|
||||
"tags": [],
|
||||
"suggested_name": "",
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Diary digest: split daily notes into independent memory entries
|
||||
# 日记整理:把一大段日常拆分成多个独立记忆条目
|
||||
# For the "grow" tool — "dump a day's content and it gets organized"
|
||||
# 给 grow 工具用,"一天结束发一坨内容"靠这个
|
||||
# ---------------------------------------------------------
|
||||
async def digest(self, content: str) -> list[dict]:
|
||||
"""
|
||||
Split a large chunk of daily content into independent memory entries.
|
||||
将一大段日常内容拆分成多个独立记忆条目。
|
||||
|
||||
Returns: [{"name", "content", "domain", "valence", "arousal", "tags", "importance"}, ...]
|
||||
"""
|
||||
if not content or not content.strip():
|
||||
return []
|
||||
|
||||
# --- Try API digest first (best quality, understands semantic splits) ---
|
||||
# --- 优先 API 整理 ---
|
||||
if self.api_available:
|
||||
try:
|
||||
result = await self._api_digest(content)
|
||||
if result:
|
||||
return result
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"API diary digest failed, degrading to local / "
|
||||
f"API 日记整理失败,降级到本地拆分: {e}"
|
||||
)
|
||||
|
||||
# --- Local split fallback / 本地拆分兜底 ---
|
||||
return await self._local_digest(content)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# API call: diary digest
|
||||
# API 调用:日记整理
|
||||
# ---------------------------------------------------------
|
||||
async def _api_digest(self, content: str) -> list[dict]:
|
||||
"""
|
||||
Call LLM API for diary organization.
|
||||
调用 LLM API 执行日记整理。
|
||||
"""
|
||||
response = await self.client.chat.completions.create(
|
||||
model=self.model,
|
||||
messages=[
|
||||
{"role": "system", "content": DIGEST_PROMPT},
|
||||
{"role": "user", "content": content[:5000]},
|
||||
],
|
||||
max_tokens=2048,
|
||||
temperature=0.2,
|
||||
)
|
||||
if not response.choices:
|
||||
return []
|
||||
raw = response.choices[0].message.content or ""
|
||||
if not raw.strip():
|
||||
return []
|
||||
return self._parse_digest(raw)
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Parse diary digest result with safety checks
|
||||
# 解析日记整理结果,做安全校验
|
||||
# ---------------------------------------------------------
|
||||
def _parse_digest(self, raw: str) -> list[dict]:
|
||||
"""
|
||||
Parse and validate API diary digest result.
|
||||
解析并校验 API 返回的日记整理结果。
|
||||
"""
|
||||
try:
|
||||
cleaned = raw.strip()
|
||||
if cleaned.startswith("```"):
|
||||
cleaned = cleaned.split("\n", 1)[-1].rsplit("```", 1)[0]
|
||||
items = json.loads(cleaned)
|
||||
except (json.JSONDecodeError, IndexError, ValueError):
|
||||
logger.warning(f"Diary digest JSON parse failed / JSON 解析失败: {raw[:200]}")
|
||||
return []
|
||||
|
||||
if not isinstance(items, list):
|
||||
return []
|
||||
|
||||
validated = []
|
||||
for item in items:
|
||||
if not isinstance(item, dict) or not item.get("content"):
|
||||
continue
|
||||
try:
|
||||
importance = max(1, min(10, int(item.get("importance", 5))))
|
||||
except (ValueError, TypeError):
|
||||
importance = 5
|
||||
try:
|
||||
valence = max(0.0, min(1.0, float(item.get("valence", 0.5))))
|
||||
arousal = max(0.0, min(1.0, float(item.get("arousal", 0.3))))
|
||||
except (ValueError, TypeError):
|
||||
valence, arousal = 0.5, 0.3
|
||||
|
||||
validated.append({
|
||||
"name": str(item.get("name", ""))[:20],
|
||||
"content": str(item.get("content", "")),
|
||||
"domain": item.get("domain", ["未分类"])[:3],
|
||||
"valence": valence,
|
||||
"arousal": arousal,
|
||||
"tags": item.get("tags", [])[:5],
|
||||
"importance": importance,
|
||||
})
|
||||
return validated
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# Local diary split (fallback when API is unavailable)
|
||||
# 本地日记拆分(无 API 时的兜底)
|
||||
# Split by blank lines/separators, analyze each segment
|
||||
# 按空行/分隔符拆段,每段独立分析
|
||||
# ---------------------------------------------------------
|
||||
async def _local_digest(self, content: str) -> list[dict]:
|
||||
"""
|
||||
Local paragraph split + per-segment analysis.
|
||||
本地按段落拆分 + 逐段分析。
|
||||
"""
|
||||
# Split by blank lines or separators / 按空行或分隔线拆分
|
||||
segments = re.split(r"\n{2,}|---+|\n-\s", content)
|
||||
segments = [s.strip() for s in segments if len(s.strip()) > 20]
|
||||
|
||||
if not segments:
|
||||
# Content too short, treat as single entry
|
||||
# 内容太短,整个作为一个条目
|
||||
analysis = self._local_analyze(content)
|
||||
return [{
|
||||
"name": analysis.get("suggested_name", "日记"),
|
||||
"content": content.strip(),
|
||||
"domain": analysis["domain"],
|
||||
"valence": analysis["valence"],
|
||||
"arousal": analysis["arousal"],
|
||||
"tags": analysis["tags"],
|
||||
"importance": 5,
|
||||
}]
|
||||
|
||||
items = []
|
||||
for seg in segments[:10]: # Max 10 segments / 最多 10 段
|
||||
analysis = self._local_analyze(seg)
|
||||
items.append({
|
||||
"name": analysis.get("suggested_name", "") or seg[:10],
|
||||
"content": seg,
|
||||
"domain": analysis["domain"],
|
||||
"valence": analysis["valence"],
|
||||
"arousal": analysis["arousal"],
|
||||
"tags": analysis["tags"],
|
||||
"importance": 5,
|
||||
})
|
||||
return items
|
||||
48
docker-compose.yml
Normal file
48
docker-compose.yml
Normal file
@@ -0,0 +1,48 @@
|
||||
# ============================================================
|
||||
# Ombre Brain Docker Compose
|
||||
# Docker Compose 配置
|
||||
#
|
||||
# Usage / 使用:
|
||||
# 1. Create .env: echo "OMBRE_API_KEY=your-key" > .env
|
||||
# 创建 .env 文件
|
||||
# 2. docker-compose up -d
|
||||
# 3. docker compose logs tunnel (for public URL / 查看公网地址)
|
||||
# ============================================================
|
||||
|
||||
services:
|
||||
ombre-brain:
|
||||
build: .
|
||||
container_name: ombre-brain
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- "18001:8000" # Local debug port (optional) / 本地调试端口
|
||||
environment:
|
||||
- OMBRE_API_KEY=${OMBRE_API_KEY} # From .env file / 从 .env 文件读取
|
||||
- OMBRE_TRANSPORT=streamable-http # Claude.ai requires streamable-http
|
||||
- OMBRE_BUCKETS_DIR=/data # Container-internal bucket path / 容器内路径
|
||||
volumes:
|
||||
# Mount your Obsidian vault (or any host directory) for persistent storage
|
||||
# 挂载你的 Obsidian 仓库(或任意宿主机目录)做持久化存储
|
||||
# Example / 示例:
|
||||
# - /path/to/your/Obsidian Vault/Ombre Brain:/data
|
||||
- /Users/p0lar1s/Library/Mobile Documents/iCloud~md~obsidian/Documents/Obsidian Vault/Ombre Brain:/data
|
||||
- ./config.yaml:/app/config.yaml
|
||||
|
||||
# Cloudflare Tunnel (optional) — expose to public internet
|
||||
# Cloudflare Tunnel(可选)— 暴露到公网
|
||||
# Configure your own credentials under ~/.cloudflared/
|
||||
# 在 ~/.cloudflared/ 下放你自己的凭证
|
||||
tunnel:
|
||||
image: cloudflare/cloudflared:latest
|
||||
container_name: ombre-tunnel
|
||||
restart: unless-stopped
|
||||
command: >
|
||||
tunnel --no-autoupdate --protocol http2
|
||||
--config /etc/cloudflared/config.yml
|
||||
--proxy-keepalive-timeout 300s
|
||||
--proxy-connection-timeout 300s
|
||||
run
|
||||
volumes:
|
||||
- ~/.cloudflared:/etc/cloudflared
|
||||
depends_on:
|
||||
- ombre-brain
|
||||
118
migrate_to_domains.py
Normal file
118
migrate_to_domains.py
Normal file
@@ -0,0 +1,118 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
迁移脚本:将 dynamic/ 下的平铺记忆桶文件重组为域子目录结构。
|
||||
|
||||
旧结构: dynamic/{bucket_id}.md
|
||||
新结构: dynamic/{primary_domain}/{name}_{bucket_id}.md
|
||||
|
||||
纯标准库,无外部依赖。
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
|
||||
VAULT_DIR = os.path.expanduser("~/Documents/Obsidian Vault/Ombre Brain")
|
||||
DYNAMIC_DIR = os.path.join(VAULT_DIR, "dynamic")
|
||||
|
||||
|
||||
def sanitize_name(name: str) -> str:
|
||||
cleaned = re.sub(r"[^\w\s\u4e00-\u9fff-]", "", name, flags=re.UNICODE)
|
||||
return cleaned.strip()[:80] or "unnamed"
|
||||
|
||||
|
||||
def parse_frontmatter(filepath):
|
||||
"""纯正则解析 YAML frontmatter 中的 id, name, domain 字段。"""
|
||||
with open(filepath, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
if not content.startswith("---"):
|
||||
return None
|
||||
parts = content.split("---", 2)
|
||||
if len(parts) < 3:
|
||||
return None
|
||||
yaml_text = parts[1]
|
||||
|
||||
meta = {}
|
||||
# 提取 id
|
||||
m = re.search(r"^id:\s*(.+)$", yaml_text, re.MULTILINE)
|
||||
if m:
|
||||
meta["id"] = m.group(1).strip().strip("'\"")
|
||||
# 提取 name
|
||||
m = re.search(r"^name:\s*(.+)$", yaml_text, re.MULTILINE)
|
||||
if m:
|
||||
meta["name"] = m.group(1).strip().strip("'\"")
|
||||
# 提取 domain 列表
|
||||
m = re.search(r"^domain:\s*\n((?:\s*-\s*.+\n?)+)", yaml_text, re.MULTILINE)
|
||||
if m:
|
||||
meta["domain"] = re.findall(r"-\s*(.+)", m.group(1))
|
||||
else:
|
||||
meta["domain"] = ["未分类"]
|
||||
|
||||
return meta
|
||||
|
||||
|
||||
def migrate():
|
||||
if not os.path.exists(DYNAMIC_DIR):
|
||||
print(f"目录不存在: {DYNAMIC_DIR}")
|
||||
return
|
||||
|
||||
# 只处理直接在 dynamic/ 下的 .md 文件(不处理已在子目录中的)
|
||||
files = [f for f in os.listdir(DYNAMIC_DIR)
|
||||
if f.endswith(".md") and os.path.isfile(os.path.join(DYNAMIC_DIR, f))]
|
||||
|
||||
if not files:
|
||||
print("没有需要迁移的文件。")
|
||||
return
|
||||
|
||||
print(f"发现 {len(files)} 个待迁移文件\n")
|
||||
|
||||
for filename in sorted(files):
|
||||
old_path = os.path.join(DYNAMIC_DIR, filename)
|
||||
try:
|
||||
meta = parse_frontmatter(old_path)
|
||||
except Exception as e:
|
||||
print(f" ✗ 无法解析 {filename}: {e}")
|
||||
continue
|
||||
|
||||
if not meta:
|
||||
print(f" ✗ 无 frontmatter: {filename}")
|
||||
continue
|
||||
|
||||
bucket_id = meta.get("id", filename.replace(".md", ""))
|
||||
name = meta.get("name", "")
|
||||
domain = meta.get("domain", ["未分类"])
|
||||
primary_domain = sanitize_name(domain[0]) if domain else "未分类"
|
||||
|
||||
# 构造新路径
|
||||
domain_dir = os.path.join(DYNAMIC_DIR, primary_domain)
|
||||
os.makedirs(domain_dir, exist_ok=True)
|
||||
|
||||
if name and name != bucket_id:
|
||||
new_filename = f"{sanitize_name(name)}_{bucket_id}.md"
|
||||
else:
|
||||
new_filename = f"{bucket_id}.md"
|
||||
|
||||
new_path = os.path.join(domain_dir, new_filename)
|
||||
|
||||
# 移动
|
||||
shutil.move(old_path, new_path)
|
||||
print(f" ✓ {filename}")
|
||||
print(f" → {primary_domain}/{new_filename}")
|
||||
|
||||
print(f"\n迁移完成。")
|
||||
|
||||
# 展示新结构
|
||||
print("\n=== 新目录结构 ===")
|
||||
for root, dirs, files in os.walk(DYNAMIC_DIR):
|
||||
level = root.replace(DYNAMIC_DIR, "").count(os.sep)
|
||||
indent = " " * level
|
||||
folder = os.path.basename(root)
|
||||
if level > 0:
|
||||
print(f"{indent}📁 {folder}/")
|
||||
for f in sorted(files):
|
||||
if f.endswith(".md"):
|
||||
print(f"{indent} 📄 {f}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
migrate()
|
||||
121
reclassify_api.py
Normal file
121
reclassify_api.py
Normal file
@@ -0,0 +1,121 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
用 API 重新打标未分类记忆桶,修正 domain/tags/name,移动到正确目录。
|
||||
用法: docker exec ombre-brain python3 /app/reclassify_api.py
|
||||
"""
|
||||
import asyncio
|
||||
import os
|
||||
import json
|
||||
import glob
|
||||
import re
|
||||
|
||||
from openai import AsyncOpenAI
|
||||
import frontmatter
|
||||
|
||||
ANALYZE_PROMPT = (
|
||||
"你是一个内容分析器。请分析以下文本,输出结构化的元数据。\n\n"
|
||||
"分析规则:\n"
|
||||
'1. domain(主题域):选最精确的 1~2 个,只选真正相关的\n'
|
||||
' 日常: ["饮食", "穿搭", "出行", "居家", "购物"]\n'
|
||||
' 人际: ["家庭", "恋爱", "友谊", "社交"]\n'
|
||||
' 成长: ["工作", "学习", "考试", "求职"]\n'
|
||||
' 身心: ["健康", "心理", "睡眠", "运动"]\n'
|
||||
' 兴趣: ["游戏", "影视", "音乐", "阅读", "创作", "手工"]\n'
|
||||
' 数字: ["编程", "AI", "硬件", "网络"]\n'
|
||||
' 事务: ["财务", "计划", "待办"]\n'
|
||||
' 内心: ["情绪", "回忆", "梦境", "自省"]\n'
|
||||
"2. valence(情感效价):0.0~1.0,0=极度消极 → 0.5=中性 → 1.0=极度积极\n"
|
||||
"3. arousal(情感唤醒度):0.0~1.0,0=非常平静 → 0.5=普通 → 1.0=非常激动\n"
|
||||
"4. tags(关键词标签):3~5 个最能概括内容的关键词\n"
|
||||
"5. suggested_name(建议桶名):10字以内的简短标题\n\n"
|
||||
"输出格式(纯 JSON,无其他内容):\n"
|
||||
'{\n'
|
||||
' "domain": ["主题域1", "主题域2"],\n'
|
||||
' "valence": 0.7,\n'
|
||||
' "arousal": 0.4,\n'
|
||||
' "tags": ["标签1", "标签2", "标签3"],\n'
|
||||
' "suggested_name": "简短标题"\n'
|
||||
'}'
|
||||
)
|
||||
|
||||
DATA_DIR = "/data/dynamic"
|
||||
UNCLASS_DIR = os.path.join(DATA_DIR, "未分类")
|
||||
|
||||
|
||||
def sanitize(name):
|
||||
name = re.sub(r'[<>:"/\\|?*\n\r]', '', name).strip()
|
||||
return name[:20] if name else "未命名"
|
||||
|
||||
|
||||
async def reclassify():
|
||||
client = AsyncOpenAI(
|
||||
api_key=os.environ.get("OMBRE_API_KEY", ""),
|
||||
base_url="https://api.siliconflow.cn/v1",
|
||||
timeout=60.0,
|
||||
)
|
||||
|
||||
files = sorted(glob.glob(os.path.join(UNCLASS_DIR, "*.md")))
|
||||
print(f"找到 {len(files)} 个未分类文件\n")
|
||||
|
||||
for fpath in files:
|
||||
basename = os.path.basename(fpath)
|
||||
post = frontmatter.load(fpath)
|
||||
content = post.content.strip()
|
||||
name = post.metadata.get("name", "")
|
||||
full_text = f"{name}\n{content}" if name else content
|
||||
|
||||
try:
|
||||
resp = await client.chat.completions.create(
|
||||
model="deepseek-ai/DeepSeek-V3",
|
||||
messages=[
|
||||
{"role": "system", "content": ANALYZE_PROMPT},
|
||||
{"role": "user", "content": full_text[:2000]},
|
||||
],
|
||||
max_tokens=256,
|
||||
temperature=0.1,
|
||||
)
|
||||
raw = resp.choices[0].message.content.strip()
|
||||
if raw.startswith("```"):
|
||||
raw = raw.split("\n", 1)[-1].rsplit("```", 1)[0]
|
||||
result = json.loads(raw)
|
||||
except Exception as e:
|
||||
print(f" X API失败 {basename}: {e}")
|
||||
continue
|
||||
|
||||
new_domain = result.get("domain", ["未分类"])[:3]
|
||||
new_tags = result.get("tags", [])[:5]
|
||||
new_name = sanitize(result.get("suggested_name", "") or name)
|
||||
new_valence = max(0.0, min(1.0, float(result.get("valence", 0.5))))
|
||||
new_arousal = max(0.0, min(1.0, float(result.get("arousal", 0.3))))
|
||||
|
||||
post.metadata["domain"] = new_domain
|
||||
post.metadata["tags"] = new_tags
|
||||
post.metadata["valence"] = new_valence
|
||||
post.metadata["arousal"] = new_arousal
|
||||
if new_name:
|
||||
post.metadata["name"] = new_name
|
||||
|
||||
# 写回文件
|
||||
with open(fpath, "w", encoding="utf-8") as f:
|
||||
f.write(frontmatter.dumps(post))
|
||||
|
||||
# 移动到正确目录
|
||||
primary = sanitize(new_domain[0]) if new_domain else "未分类"
|
||||
target_dir = os.path.join(DATA_DIR, primary)
|
||||
os.makedirs(target_dir, exist_ok=True)
|
||||
|
||||
bid = post.metadata.get("id", "")
|
||||
new_filename = f"{new_name}_{bid}.md" if new_name and new_name != bid else basename
|
||||
dest = os.path.join(target_dir, new_filename)
|
||||
|
||||
if dest != fpath:
|
||||
os.rename(fpath, dest)
|
||||
|
||||
print(f" OK {basename}")
|
||||
print(f" -> {primary}/{new_filename}")
|
||||
print(f" domain={new_domain} tags={new_tags} V={new_valence} A={new_arousal}")
|
||||
print()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(reclassify())
|
||||
198
reclassify_domains.py
Normal file
198
reclassify_domains.py
Normal file
@@ -0,0 +1,198 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
重分类脚本:根据新的域列表,重新分析已有桶的 domain 并搬到对应子目录。
|
||||
纯标准库,读 frontmatter + 正文内容做关键词匹配。
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
|
||||
VAULT_DIR = os.path.expanduser("~/Documents/Obsidian Vault/Ombre Brain")
|
||||
DYNAMIC_DIR = os.path.join(VAULT_DIR, "dynamic")
|
||||
|
||||
# 新域关键词表(和 dehydrator.py 的 _local_analyze 一致)
|
||||
DOMAIN_KEYWORDS = {
|
||||
"饮食": {"吃", "饭", "做饭", "外卖", "奶茶", "咖啡", "麻辣烫", "面包",
|
||||
"超市", "零食", "水果", "牛奶", "食堂", "减肥", "节食", "麦片"},
|
||||
"家庭": {"爸", "妈", "父亲", "母亲", "家人", "弟弟", "姐姐", "哥哥",
|
||||
"奶奶", "爷爷", "亲戚", "家里", "生日礼", "生活费"},
|
||||
"恋爱": {"爱人", "男友", "女友", "恋", "约会", "分手", "暧昧",
|
||||
"在一起", "想你", "同床", "一辈子", "爱你", "我们是",
|
||||
"克劳德", "亲密", "接吻", "正缘"},
|
||||
"友谊": {"朋友", "闺蜜", "兄弟", "聚", "约饭"},
|
||||
"社交": {"见面", "圈子", "社区", "创作者", "发帖", "鹤见"},
|
||||
"工作": {"会议", "项目", "客户", "汇报", "同事", "老板", "薪资",
|
||||
"领导力", "管理沟通"},
|
||||
"学习": {"课", "考试", "论文", "作业", "教授", "Python实操",
|
||||
"选课", "学分", "jieba", "分词"},
|
||||
"健康": {"医院", "复查", "吃药", "抽血", "心率", "心电图",
|
||||
"病", "慢粒", "融合基因", "二尖瓣", "月经", "脚趾甲"},
|
||||
"心理": {"焦虑", "抑郁", "创伤", "人格", "安全感", "崩溃",
|
||||
"压力", "自残", "ABC人格", "人格分裂", "恋爱焦虑"},
|
||||
"睡眠": {"睡", "失眠", "噩梦", "清醒", "熬夜", "做梦"},
|
||||
"游戏": {"游戏", "极乐迪斯科", "存档", "通关", "Shivers", "DLC"},
|
||||
"影视": {"电影", "番剧", "动漫", "剧", "综艺"},
|
||||
"阅读": {"书", "小说", "读完", "漫画", "李宿芳菲"},
|
||||
"创作": {"写", "预设", "脚本", "SillyTavern", "插件", "正则",
|
||||
"人设卡", "天气同步", "破甲词"},
|
||||
"编程": {"代码", "python", "bug", "api", "docker", "git",
|
||||
"调试", "部署", "开发", "server"},
|
||||
"AI": {"模型", "Claude", "gemini", "LLM", "token", "prompt",
|
||||
"LoRA", "MCP", "DeepSeek", "隧道", "Ombre Brain",
|
||||
"打包盒", "脱水", "记忆系统"},
|
||||
"网络": {"VPN", "梯子", "代理", "域名", "隧道", "cloudflare",
|
||||
"tunnel", "反代"},
|
||||
"财务": {"钱", "转账", "花了", "欠", "黄金", "卖掉", "换了",
|
||||
"生活费", "4276"},
|
||||
"情绪": {"开心", "难过", "哭", "泪", "孤独", "伤心", "烦",
|
||||
"委屈", "感动", "温柔", "口罩湿了"},
|
||||
"回忆": {"以前", "小时候", "那时", "怀念", "曾经", "纹身",
|
||||
"十三岁", "九岁"},
|
||||
"自省": {"反思", "觉得自己", "问自己", "自恋", "投射"},
|
||||
}
|
||||
|
||||
|
||||
def sanitize_name(name):
|
||||
cleaned = re.sub(r"[^\w\s\u4e00-\u9fff-]", "", name, flags=re.UNICODE)
|
||||
return cleaned.strip()[:80] or "unnamed"
|
||||
|
||||
|
||||
def parse_md(filepath):
|
||||
"""解析 frontmatter 和正文。"""
|
||||
with open(filepath, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
if not content.startswith("---"):
|
||||
return None, None, content
|
||||
parts = content.split("---", 2)
|
||||
if len(parts) < 3:
|
||||
return None, None, content
|
||||
yaml_text = parts[1]
|
||||
body = parts[2]
|
||||
|
||||
meta = {}
|
||||
m = re.search(r"^id:\s*(.+)$", yaml_text, re.MULTILINE)
|
||||
if m:
|
||||
meta["id"] = m.group(1).strip().strip("'\"")
|
||||
m = re.search(r"^name:\s*(.+)$", yaml_text, re.MULTILINE)
|
||||
if m:
|
||||
meta["name"] = m.group(1).strip().strip("'\"")
|
||||
m = re.search(r"^domain:\s*\n((?:\s*-\s*.+\n?)+)", yaml_text, re.MULTILINE)
|
||||
if m:
|
||||
meta["domain"] = [d.strip() for d in re.findall(r"-\s*(.+)", m.group(1))]
|
||||
else:
|
||||
meta["domain"] = ["未分类"]
|
||||
|
||||
return meta, yaml_text, body
|
||||
|
||||
|
||||
def classify(body, old_domains):
|
||||
"""基于正文内容重新分类。"""
|
||||
text = body.lower()
|
||||
scored = []
|
||||
for domain, kws in DOMAIN_KEYWORDS.items():
|
||||
hits = sum(1 for kw in kws if kw.lower() in text)
|
||||
if hits >= 2:
|
||||
scored.append((domain, hits))
|
||||
scored.sort(key=lambda x: x[1], reverse=True)
|
||||
if scored:
|
||||
return [d for d, _ in scored[:2]]
|
||||
return old_domains # 匹配不上就保留旧的
|
||||
|
||||
|
||||
def update_domain_in_file(filepath, new_domains):
|
||||
"""更新文件中 frontmatter 的 domain 字段。"""
|
||||
with open(filepath, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
|
||||
# 替换 domain 块
|
||||
domain_yaml = "domain:\n" + "".join(f"- {d}\n" for d in new_domains)
|
||||
content = re.sub(
|
||||
r"domain:\s*\n(?:\s*-\s*.+\n?)+",
|
||||
domain_yaml,
|
||||
content,
|
||||
count=1
|
||||
)
|
||||
with open(filepath, "w", encoding="utf-8") as f:
|
||||
f.write(content)
|
||||
|
||||
|
||||
def reclassify():
|
||||
if not os.path.exists(DYNAMIC_DIR):
|
||||
print("目录不存在")
|
||||
return
|
||||
|
||||
# 收集所有 .md 文件(递归)
|
||||
all_files = []
|
||||
for root, _, files in os.walk(DYNAMIC_DIR):
|
||||
for f in files:
|
||||
if f.endswith(".md"):
|
||||
all_files.append(os.path.join(root, f))
|
||||
|
||||
if not all_files:
|
||||
print("没有文件。")
|
||||
return
|
||||
|
||||
print(f"扫描到 {len(all_files)} 个桶文件\n")
|
||||
|
||||
for filepath in sorted(all_files):
|
||||
meta, yaml_text, body = parse_md(filepath)
|
||||
if not meta:
|
||||
print(f" ✗ 无法解析: {os.path.basename(filepath)}")
|
||||
continue
|
||||
|
||||
bucket_id = meta.get("id", "unknown")
|
||||
name = meta.get("name", bucket_id)
|
||||
old_domains = meta.get("domain", ["未分类"])
|
||||
new_domains = classify(body, old_domains)
|
||||
|
||||
primary = sanitize_name(new_domains[0])
|
||||
old_primary = sanitize_name(old_domains[0]) if old_domains else "未分类"
|
||||
|
||||
if name and name != bucket_id:
|
||||
new_filename = f"{sanitize_name(name)}_{bucket_id}.md"
|
||||
else:
|
||||
new_filename = f"{bucket_id}.md"
|
||||
|
||||
new_dir = os.path.join(DYNAMIC_DIR, primary)
|
||||
os.makedirs(new_dir, exist_ok=True)
|
||||
new_path = os.path.join(new_dir, new_filename)
|
||||
|
||||
changed = (new_domains != old_domains) or (filepath != new_path)
|
||||
|
||||
if changed:
|
||||
# 更新 frontmatter
|
||||
update_domain_in_file(filepath, new_domains)
|
||||
# 移动文件
|
||||
if filepath != new_path:
|
||||
shutil.move(filepath, new_path)
|
||||
print(f" ✓ {name}")
|
||||
print(f" {','.join(old_domains)} → {','.join(new_domains)}")
|
||||
print(f" → {primary}/{new_filename}")
|
||||
else:
|
||||
print(f" · {name} (不变)")
|
||||
|
||||
# 清理空目录
|
||||
for d in os.listdir(DYNAMIC_DIR):
|
||||
dp = os.path.join(DYNAMIC_DIR, d)
|
||||
if os.path.isdir(dp) and not os.listdir(dp):
|
||||
os.rmdir(dp)
|
||||
print(f"\n 🗑 删除空目录: {d}/")
|
||||
|
||||
print(f"\n重分类完成。\n")
|
||||
|
||||
# 展示新结构
|
||||
print("=== 新目录结构 ===")
|
||||
for root, dirs, files in os.walk(DYNAMIC_DIR):
|
||||
level = root.replace(DYNAMIC_DIR, "").count(os.sep)
|
||||
indent = " " * level
|
||||
folder = os.path.basename(root)
|
||||
if level > 0:
|
||||
print(f"{indent}📁 {folder}/")
|
||||
for f in sorted(files):
|
||||
if f.endswith(".md"):
|
||||
print(f"{indent} 📄 {f}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
reclassify()
|
||||
21
render.yaml
Normal file
21
render.yaml
Normal file
@@ -0,0 +1,21 @@
|
||||
services:
|
||||
- type: web
|
||||
name: ombre-brain
|
||||
env: python
|
||||
region: oregon
|
||||
plan: free
|
||||
buildCommand: pip install -r requirements.txt
|
||||
startCommand: python server.py
|
||||
envVars:
|
||||
- key: OMBRE_TRANSPORT
|
||||
value: streamable-http
|
||||
- key: OMBRE_API_KEY
|
||||
sync: false # Set in Render dashboard > Environment (any OpenAI-compatible key)
|
||||
- key: OMBRE_BASE_URL
|
||||
sync: false # e.g. https://api.deepseek.com/v1 or https://api.siliconflow.cn/v1
|
||||
- key: OMBRE_BUCKETS_DIR
|
||||
value: /opt/render/project/src/buckets
|
||||
disk:
|
||||
name: ombre-buckets
|
||||
mountPath: /opt/render/project/src/buckets
|
||||
sizeGB: 1
|
||||
25
requirements.txt
Normal file
25
requirements.txt
Normal file
@@ -0,0 +1,25 @@
|
||||
# ============================================================
|
||||
# Ombre Brain Python 依赖
|
||||
# 安装: pip install -r requirements.txt
|
||||
# ============================================================
|
||||
|
||||
# MCP 协议 SDK(Claude 通信核心)
|
||||
mcp>=1.0.0
|
||||
|
||||
# 模糊匹配(记忆桶搜索)
|
||||
rapidfuzz>=3.0.0
|
||||
|
||||
# OpenAI 兼容客户端(支持 DeepSeek/Ollama/LM Studio/vLLM/Gemini 等任意兼容 API)
|
||||
openai>=1.0.0
|
||||
|
||||
# YAML 配置解析
|
||||
pyyaml>=6.0
|
||||
|
||||
# Markdown frontmatter 解析(桶文件读写)
|
||||
python-frontmatter>=1.1.0
|
||||
|
||||
# 中文分词
|
||||
jieba>=0.42.1
|
||||
|
||||
# 异步 HTTP 客户端(应用层保活 ping)
|
||||
httpx>=0.27.0
|
||||
620
server.py
Normal file
620
server.py
Normal file
@@ -0,0 +1,620 @@
|
||||
# ============================================================
|
||||
# Module: MCP Server Entry Point (server.py)
|
||||
# 模块:MCP 服务器主入口
|
||||
#
|
||||
# Starts the Ombre Brain MCP service and registers memory
|
||||
# operation tools for Claude to call.
|
||||
# 启动 Ombre Brain MCP 服务,注册记忆操作工具供 Claude 调用。
|
||||
#
|
||||
# Core responsibilities:
|
||||
# 核心职责:
|
||||
# - Initialize config, bucket manager, dehydrator, decay engine
|
||||
# 初始化配置、记忆桶管理器、脱水器、衰减引擎
|
||||
# - Expose 5 MCP tools:
|
||||
# 暴露 5 个 MCP 工具:
|
||||
# breath — Surface unresolved memories or search by keyword
|
||||
# 浮现未解决记忆 或 按关键词检索
|
||||
# hold — Store a single memory
|
||||
# 存储单条记忆
|
||||
# grow — Diary digest, auto-split into multiple buckets
|
||||
# 日记归档,自动拆分多桶
|
||||
# trace — Modify metadata / resolved / delete
|
||||
# 修改元数据 / resolved 标记 / 删除
|
||||
# pulse — System status + bucket listing
|
||||
# 系统状态 + 所有桶列表
|
||||
#
|
||||
# Startup:
|
||||
# 启动方式:
|
||||
# Local: python server.py
|
||||
# Remote: OMBRE_TRANSPORT=streamable-http python server.py
|
||||
# Docker: docker-compose up
|
||||
# ============================================================
|
||||
|
||||
import os
|
||||
import sys
|
||||
import random
|
||||
import logging
|
||||
import asyncio
|
||||
import httpx
|
||||
from typing import Optional
|
||||
|
||||
# --- Ensure same-directory modules can be imported ---
|
||||
# --- 确保同目录下的模块能被正确导入 ---
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
from mcp.server.fastmcp import FastMCP
|
||||
|
||||
from bucket_manager import BucketManager
|
||||
from dehydrator import Dehydrator
|
||||
from decay_engine import DecayEngine
|
||||
from utils import load_config, setup_logging
|
||||
|
||||
# --- Load config & init logging / 加载配置 & 初始化日志 ---
|
||||
config = load_config()
|
||||
setup_logging(config.get("log_level", "INFO"))
|
||||
logger = logging.getLogger("ombre_brain")
|
||||
|
||||
# --- Initialize three core components / 初始化三大核心组件 ---
|
||||
bucket_mgr = BucketManager(config) # Bucket manager / 记忆桶管理器
|
||||
dehydrator = Dehydrator(config) # Dehydrator / 脱水器
|
||||
decay_engine = DecayEngine(config, bucket_mgr) # Decay engine / 衰减引擎
|
||||
|
||||
# --- Create MCP server instance / 创建 MCP 服务器实例 ---
|
||||
# host="0.0.0.0" so Docker container's SSE is externally reachable
|
||||
# stdio mode ignores host (no network)
|
||||
mcp = FastMCP(
|
||||
"Ombre Brain",
|
||||
host="0.0.0.0",
|
||||
port=8000,
|
||||
)
|
||||
|
||||
|
||||
# =============================================================
|
||||
# /health endpoint: lightweight keepalive
|
||||
# 轻量保活接口
|
||||
# For Cloudflare Tunnel or reverse proxy to ping, preventing idle timeout
|
||||
# 供 Cloudflare Tunnel 或反代定期 ping,防止空闲超时断连
|
||||
# =============================================================
|
||||
@mcp.custom_route("/health", methods=["GET"])
|
||||
async def health_check(request):
|
||||
from starlette.responses import JSONResponse
|
||||
try:
|
||||
stats = await bucket_mgr.get_stats()
|
||||
return JSONResponse({
|
||||
"status": "ok",
|
||||
"buckets": stats["permanent_count"] + stats["dynamic_count"],
|
||||
"decay_engine": "running" if decay_engine.is_running else "stopped",
|
||||
})
|
||||
except Exception as e:
|
||||
return JSONResponse({"status": "error", "detail": str(e)}, status_code=500)
|
||||
|
||||
|
||||
# =============================================================
|
||||
# Internal helper: merge-or-create
|
||||
# 内部辅助:检查是否可合并,可以则合并,否则新建
|
||||
# Shared by hold and grow to avoid duplicate logic
|
||||
# hold 和 grow 共用,避免重复逻辑
|
||||
# =============================================================
|
||||
async def _merge_or_create(
|
||||
content: str,
|
||||
tags: list,
|
||||
importance: int,
|
||||
domain: list,
|
||||
valence: float,
|
||||
arousal: float,
|
||||
name: str = "",
|
||||
) -> tuple[str, bool]:
|
||||
"""
|
||||
Check if a similar bucket exists for merging; merge if so, create if not.
|
||||
Returns (bucket_id_or_name, is_merged).
|
||||
检查是否有相似桶可合并,有则合并,无则新建。
|
||||
返回 (桶ID或名称, 是否合并)。
|
||||
"""
|
||||
try:
|
||||
existing = await bucket_mgr.search(content, limit=1)
|
||||
except Exception as e:
|
||||
logger.warning(f"Search for merge failed, creating new / 合并搜索失败,新建: {e}")
|
||||
existing = []
|
||||
|
||||
if existing and existing[0].get("score", 0) > config.get("merge_threshold", 75):
|
||||
bucket = existing[0]
|
||||
# --- Never merge into pinned/protected buckets ---
|
||||
# --- 不合并到钉选/保护桶 ---
|
||||
if not (bucket["metadata"].get("pinned") or bucket["metadata"].get("protected")):
|
||||
try:
|
||||
merged = await dehydrator.merge(bucket["content"], content)
|
||||
await bucket_mgr.update(
|
||||
bucket["id"],
|
||||
content=merged,
|
||||
tags=list(set(bucket["metadata"].get("tags", []) + tags)),
|
||||
importance=max(bucket["metadata"].get("importance", 5), importance),
|
||||
domain=list(set(bucket["metadata"].get("domain", []) + domain)),
|
||||
valence=valence,
|
||||
arousal=arousal,
|
||||
)
|
||||
return bucket["metadata"].get("name", bucket["id"]), True
|
||||
except Exception as e:
|
||||
logger.warning(f"Merge failed, creating new / 合并失败,新建: {e}")
|
||||
|
||||
bucket_id = await bucket_mgr.create(
|
||||
content=content,
|
||||
tags=tags,
|
||||
importance=importance,
|
||||
domain=domain,
|
||||
valence=valence,
|
||||
arousal=arousal,
|
||||
name=name or None,
|
||||
)
|
||||
return bucket_id, False
|
||||
|
||||
|
||||
# =============================================================
|
||||
# Tool 1: breath — Breathe
|
||||
# 工具 1:breath — 呼吸
|
||||
#
|
||||
# No args: surface highest-weight unresolved memories (active push)
|
||||
# 无参数:浮现权重最高的未解决记忆
|
||||
# With args: search by keyword + emotion coordinates
|
||||
# 有参数:按关键词+情感坐标检索记忆
|
||||
# =============================================================
|
||||
@mcp.tool()
|
||||
async def breath(
|
||||
query: Optional[str] = None,
|
||||
max_results: int = 3,
|
||||
domain: str = "",
|
||||
valence: float = -1,
|
||||
arousal: float = -1,
|
||||
) -> str:
|
||||
"""检索/浮现记忆。不传query或传空=自动浮现,有query=关键词检索。domain逗号分隔,valence/arousal 0~1(-1忽略)。"""
|
||||
await decay_engine.ensure_started()
|
||||
|
||||
# --- No args or empty query: surfacing mode (weight pool active push) ---
|
||||
# --- 无参数或空query:浮现模式(权重池主动推送)---
|
||||
if not query or not query.strip():
|
||||
try:
|
||||
all_buckets = await bucket_mgr.list_all(include_archive=False)
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to list buckets for surfacing / 浮现列桶失败: {e}")
|
||||
return "记忆系统暂时无法访问。"
|
||||
|
||||
# --- Pinned/protected buckets: always surface as core principles ---
|
||||
# --- 钉选桶:作为核心准则,始终浮现 ---
|
||||
pinned_buckets = [
|
||||
b for b in all_buckets
|
||||
if b["metadata"].get("pinned") or b["metadata"].get("protected")
|
||||
]
|
||||
pinned_results = []
|
||||
for b in pinned_buckets:
|
||||
try:
|
||||
summary = await dehydrator.dehydrate(b["content"], b["metadata"])
|
||||
pinned_results.append(f"📌 [核心准则] {summary}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to dehydrate pinned bucket / 钉选桶脱水失败: {e}")
|
||||
continue
|
||||
|
||||
# --- Unresolved buckets: surface top 2 by weight ---
|
||||
# --- 未解决桶:按权重浮现前 2 条 ---
|
||||
unresolved = [
|
||||
b for b in all_buckets
|
||||
if not b["metadata"].get("resolved", False)
|
||||
and b["metadata"].get("type") != "permanent"
|
||||
and not b["metadata"].get("pinned", False)
|
||||
and not b["metadata"].get("protected", False)
|
||||
]
|
||||
|
||||
scored = sorted(
|
||||
unresolved,
|
||||
key=lambda b: decay_engine.calculate_score(b["metadata"]),
|
||||
reverse=True,
|
||||
)
|
||||
top = scored[:2]
|
||||
dynamic_results = []
|
||||
for b in top:
|
||||
try:
|
||||
summary = await dehydrator.dehydrate(b["content"], b["metadata"])
|
||||
await bucket_mgr.touch(b["id"])
|
||||
score = decay_engine.calculate_score(b["metadata"])
|
||||
dynamic_results.append(f"[权重:{score:.2f}] {summary}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to dehydrate surfaced bucket / 浮现脱水失败: {e}")
|
||||
continue
|
||||
|
||||
if not pinned_results and not dynamic_results:
|
||||
return "权重池平静,没有需要处理的记忆。"
|
||||
|
||||
parts = []
|
||||
if pinned_results:
|
||||
parts.append("=== 核心准则 ===\n" + "\n---\n".join(pinned_results))
|
||||
if dynamic_results:
|
||||
parts.append("=== 浮现记忆 ===\n" + "\n---\n".join(dynamic_results))
|
||||
return "\n\n".join(parts)
|
||||
|
||||
# --- With args: search mode / 有参数:检索模式 ---
|
||||
domain_filter = [d.strip() for d in domain.split(",") if d.strip()] or None
|
||||
q_valence = valence if 0 <= valence <= 1 else None
|
||||
q_arousal = arousal if 0 <= arousal <= 1 else None
|
||||
|
||||
try:
|
||||
matches = await bucket_mgr.search(
|
||||
query,
|
||||
limit=max_results,
|
||||
domain_filter=domain_filter,
|
||||
query_valence=q_valence,
|
||||
query_arousal=q_arousal,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Search failed / 检索失败: {e}")
|
||||
return "检索过程出错,请稍后重试。"
|
||||
|
||||
results = []
|
||||
for bucket in matches:
|
||||
try:
|
||||
summary = await dehydrator.dehydrate(bucket["content"], bucket["metadata"])
|
||||
await bucket_mgr.touch(bucket["id"])
|
||||
results.append(summary)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to dehydrate search result / 检索结果脱水失败: {e}")
|
||||
continue
|
||||
|
||||
# --- Random surfacing: when search returns < 3, 40% chance to float old memories ---
|
||||
# --- 随机浮现:检索结果不足 3 条时,40% 概率从低权重旧桶里漂上来 ---
|
||||
if len(matches) < 3 and random.random() < 0.4:
|
||||
try:
|
||||
all_buckets = await bucket_mgr.list_all(include_archive=False)
|
||||
matched_ids = {b["id"] for b in matches}
|
||||
low_weight = [
|
||||
b for b in all_buckets
|
||||
if b["id"] not in matched_ids
|
||||
and decay_engine.calculate_score(b["metadata"]) < 2.0
|
||||
]
|
||||
if low_weight:
|
||||
drifted = random.sample(low_weight, min(random.randint(1, 3), len(low_weight)))
|
||||
drift_results = []
|
||||
for b in drifted:
|
||||
summary = await dehydrator.dehydrate(b["content"], b["metadata"])
|
||||
drift_results.append(f"[surface_type: random]\n{summary}")
|
||||
results.append("--- 忽然想起来 ---\n" + "\n---\n".join(drift_results))
|
||||
except Exception as e:
|
||||
logger.warning(f"Random surfacing failed / 随机浮现失败: {e}")
|
||||
|
||||
if not results:
|
||||
return "未找到相关记忆。"
|
||||
|
||||
return "\n---\n".join(results)
|
||||
|
||||
|
||||
# =============================================================
|
||||
# Tool 2: hold — Hold on to this
|
||||
# 工具 2:hold — 握住,留下来
|
||||
# =============================================================
|
||||
@mcp.tool()
|
||||
async def hold(
|
||||
content: str,
|
||||
tags: str = "",
|
||||
importance: int = 5,
|
||||
pinned: bool = False,
|
||||
) -> str:
|
||||
"""存储单条记忆,自动打标+合并。tags逗号分隔,importance 1-10。pinned=True创建永久钉选桶。"""
|
||||
await decay_engine.ensure_started()
|
||||
|
||||
# --- Input validation / 输入校验 ---
|
||||
if not content or not content.strip():
|
||||
return "内容为空,无法存储。"
|
||||
|
||||
importance = max(1, min(10, importance))
|
||||
extra_tags = [t.strip() for t in tags.split(",") if t.strip()]
|
||||
|
||||
# --- Step 1: auto-tagging / 自动打标 ---
|
||||
try:
|
||||
analysis = await dehydrator.analyze(content)
|
||||
except Exception as e:
|
||||
logger.warning(f"Auto-tagging failed, using defaults / 自动打标失败: {e}")
|
||||
analysis = {
|
||||
"domain": ["未分类"], "valence": 0.5, "arousal": 0.3,
|
||||
"tags": [], "suggested_name": "",
|
||||
}
|
||||
|
||||
domain = analysis["domain"]
|
||||
valence = analysis["valence"]
|
||||
arousal = analysis["arousal"]
|
||||
auto_tags = analysis["tags"]
|
||||
suggested_name = analysis.get("suggested_name", "")
|
||||
|
||||
all_tags = list(dict.fromkeys(auto_tags + extra_tags))
|
||||
|
||||
# --- Pinned buckets bypass merge and are created directly in permanent dir ---
|
||||
# --- 钉选桶跳过合并,直接新建到 permanent 目录 ---
|
||||
if pinned:
|
||||
bucket_id = await bucket_mgr.create(
|
||||
content=content,
|
||||
tags=all_tags,
|
||||
importance=10,
|
||||
domain=domain,
|
||||
valence=valence,
|
||||
arousal=arousal,
|
||||
name=suggested_name or None,
|
||||
bucket_type="permanent",
|
||||
pinned=True,
|
||||
)
|
||||
return f"📌钉选→{bucket_id} {','.join(domain)}"
|
||||
|
||||
# --- Step 2: merge or create / 合并或新建 ---
|
||||
result_name, is_merged = await _merge_or_create(
|
||||
content=content,
|
||||
tags=all_tags,
|
||||
importance=importance,
|
||||
domain=domain,
|
||||
valence=valence,
|
||||
arousal=arousal,
|
||||
name=suggested_name,
|
||||
)
|
||||
|
||||
action = "合并→" if is_merged else "新建→"
|
||||
return f"{action}{result_name} {','.join(domain)}"
|
||||
|
||||
|
||||
# =============================================================
|
||||
# Tool 3: grow — Grow, fragments become memories
|
||||
# 工具 3:grow — 生长,一天的碎片长成记忆
|
||||
# =============================================================
|
||||
@mcp.tool()
|
||||
async def grow(content: str) -> str:
|
||||
"""日记归档,自动拆分为多桶。短内容(<30字)走快速路径。"""
|
||||
await decay_engine.ensure_started()
|
||||
|
||||
if not content or not content.strip():
|
||||
return "内容为空,无法整理。"
|
||||
|
||||
# --- Short content fast path: skip digest, use hold logic directly ---
|
||||
# --- 短内容快速路径:跳过 digest 拆分,直接走 hold 逻辑省一次 API ---
|
||||
# For very short inputs (like "1"), calling digest is wasteful:
|
||||
# it sends the full DIGEST_PROMPT (~800 tokens) to DeepSeek for nothing.
|
||||
# Instead, run analyze + create directly.
|
||||
if len(content.strip()) < 30:
|
||||
logger.info(f"grow short-content fast path: {len(content.strip())} chars")
|
||||
try:
|
||||
analysis = await dehydrator.analyze(content)
|
||||
except Exception as e:
|
||||
logger.warning(f"Fast-path analyze failed / 快速路径打标失败: {e}")
|
||||
analysis = {
|
||||
"domain": ["未分类"], "valence": 0.5, "arousal": 0.3,
|
||||
"tags": [], "suggested_name": "",
|
||||
}
|
||||
result_name, is_merged = await _merge_or_create(
|
||||
content=content.strip(),
|
||||
tags=analysis.get("tags", []),
|
||||
importance=analysis.get("importance", 5) if isinstance(analysis.get("importance"), int) else 5,
|
||||
domain=analysis.get("domain", ["未分类"]),
|
||||
valence=analysis.get("valence", 0.5),
|
||||
arousal=analysis.get("arousal", 0.3),
|
||||
name=analysis.get("suggested_name", ""),
|
||||
)
|
||||
action = "合并" if is_merged else "新建"
|
||||
return f"{action} → {result_name} | {','.join(analysis.get('domain', []))} V{analysis.get('valence', 0.5):.1f}/A{analysis.get('arousal', 0.3):.1f}"
|
||||
|
||||
# --- Step 1: let API split and organize / 让 API 拆分整理 ---
|
||||
try:
|
||||
items = await dehydrator.digest(content)
|
||||
except Exception as e:
|
||||
logger.error(f"Diary digest failed / 日记整理失败: {e}")
|
||||
return f"日记整理失败: {e}"
|
||||
|
||||
if not items:
|
||||
return "内容为空或整理失败。"
|
||||
|
||||
results = []
|
||||
created = 0
|
||||
merged = 0
|
||||
|
||||
# --- Step 2: merge or create each item (with per-item error handling) ---
|
||||
# --- 逐条合并或新建(单条失败不影响其他)---
|
||||
for item in items:
|
||||
try:
|
||||
result_name, is_merged = await _merge_or_create(
|
||||
content=item["content"],
|
||||
tags=item.get("tags", []),
|
||||
importance=item.get("importance", 5),
|
||||
domain=item.get("domain", ["未分类"]),
|
||||
valence=item.get("valence", 0.5),
|
||||
arousal=item.get("arousal", 0.3),
|
||||
name=item.get("name", ""),
|
||||
)
|
||||
|
||||
if is_merged:
|
||||
results.append(f"📎{result_name}")
|
||||
merged += 1
|
||||
else:
|
||||
results.append(f"📝{item.get('name', result_name)}")
|
||||
created += 1
|
||||
except Exception as e:
|
||||
logger.warning(
|
||||
f"Failed to process diary item / 日记条目处理失败: "
|
||||
f"{item.get('name', '?')}: {e}"
|
||||
)
|
||||
results.append(f"⚠️{item.get('name', '?')}")
|
||||
|
||||
return f"{len(items)}条|新{created}合{merged}\n" + "\n".join(results)
|
||||
|
||||
|
||||
# =============================================================
|
||||
# Tool 4: trace — Trace, redraw the outline of a memory
|
||||
# 工具 4:trace — 描摹,重新勾勒记忆的轮廓
|
||||
# Also handles deletion (delete=True)
|
||||
# 同时承接删除功能
|
||||
# =============================================================
|
||||
@mcp.tool()
|
||||
async def trace(
|
||||
bucket_id: str,
|
||||
name: str = "",
|
||||
domain: str = "",
|
||||
valence: float = -1,
|
||||
arousal: float = -1,
|
||||
importance: int = -1,
|
||||
tags: str = "",
|
||||
resolved: int = -1,
|
||||
pinned: int = -1,
|
||||
delete: bool = False,
|
||||
) -> str:
|
||||
"""修改记忆元数据。resolved=1沉底/0激活,pinned=1钉选/0取消,delete=True删除。只传需改的,-1或空=不改。"""
|
||||
|
||||
if not bucket_id or not bucket_id.strip():
|
||||
return "请提供有效的 bucket_id。"
|
||||
|
||||
# --- Delete mode / 删除模式 ---
|
||||
if delete:
|
||||
success = await bucket_mgr.delete(bucket_id)
|
||||
return f"已遗忘记忆桶: {bucket_id}" if success else f"未找到记忆桶: {bucket_id}"
|
||||
|
||||
bucket = await bucket_mgr.get(bucket_id)
|
||||
if not bucket:
|
||||
return f"未找到记忆桶: {bucket_id}"
|
||||
|
||||
# --- Collect only fields actually passed / 只收集用户实际传入的字段 ---
|
||||
updates = {}
|
||||
if name:
|
||||
updates["name"] = name
|
||||
if domain:
|
||||
updates["domain"] = [d.strip() for d in domain.split(",") if d.strip()]
|
||||
if 0 <= valence <= 1:
|
||||
updates["valence"] = valence
|
||||
if 0 <= arousal <= 1:
|
||||
updates["arousal"] = arousal
|
||||
if 1 <= importance <= 10:
|
||||
updates["importance"] = importance
|
||||
if tags:
|
||||
updates["tags"] = [t.strip() for t in tags.split(",") if t.strip()]
|
||||
if resolved in (0, 1):
|
||||
updates["resolved"] = bool(resolved)
|
||||
if pinned in (0, 1):
|
||||
updates["pinned"] = bool(pinned)
|
||||
if pinned == 1:
|
||||
updates["importance"] = 10 # pinned → lock importance
|
||||
|
||||
if not updates:
|
||||
return "没有任何字段需要修改。"
|
||||
|
||||
success = await bucket_mgr.update(bucket_id, **updates)
|
||||
if not success:
|
||||
return f"修改失败: {bucket_id}"
|
||||
|
||||
changed = ", ".join(f"{k}={v}" for k, v in updates.items())
|
||||
# Explicit hint about resolved state change semantics
|
||||
# 特别提示 resolved 状态变化的语义
|
||||
if "resolved" in updates:
|
||||
if updates["resolved"]:
|
||||
changed += " → 已沉底,只在关键词触发时重新浮现"
|
||||
else:
|
||||
changed += " → 已重新激活,将参与浮现排序"
|
||||
return f"已修改记忆桶 {bucket_id}: {changed}"
|
||||
|
||||
|
||||
# =============================================================
|
||||
# Tool 5: pulse — Heartbeat, system status + memory listing
|
||||
# 工具 5:pulse — 脉搏,系统状态 + 记忆列表
|
||||
# =============================================================
|
||||
@mcp.tool()
|
||||
async def pulse(include_archive: bool = False) -> str:
|
||||
"""系统状态+记忆桶列表。include_archive=True含归档。"""
|
||||
try:
|
||||
stats = await bucket_mgr.get_stats()
|
||||
except Exception as e:
|
||||
return f"获取系统状态失败: {e}"
|
||||
|
||||
status = (
|
||||
f"=== Ombre Brain 记忆系统 ===\n"
|
||||
f"固化记忆桶: {stats['permanent_count']} 个\n"
|
||||
f"动态记忆桶: {stats['dynamic_count']} 个\n"
|
||||
f"归档记忆桶: {stats['archive_count']} 个\n"
|
||||
f"总存储大小: {stats['total_size_kb']:.1f} KB\n"
|
||||
f"衰减引擎: {'运行中' if decay_engine.is_running else '已停止'}\n"
|
||||
)
|
||||
|
||||
# --- List all bucket summaries / 列出所有桶摘要 ---
|
||||
try:
|
||||
buckets = await bucket_mgr.list_all(include_archive=include_archive)
|
||||
except Exception as e:
|
||||
return status + f"\n列出记忆桶失败: {e}"
|
||||
|
||||
if not buckets:
|
||||
return status + "\n记忆库为空。"
|
||||
|
||||
lines = []
|
||||
for b in buckets:
|
||||
meta = b.get("metadata", {})
|
||||
if meta.get("pinned") or meta.get("protected"):
|
||||
icon = "📌"
|
||||
elif meta.get("type") == "permanent":
|
||||
icon = "📦"
|
||||
elif meta.get("type") == "archived":
|
||||
icon = "🗄️"
|
||||
elif meta.get("resolved", False):
|
||||
icon = "✅"
|
||||
else:
|
||||
icon = "💭"
|
||||
try:
|
||||
score = decay_engine.calculate_score(meta)
|
||||
except Exception:
|
||||
score = 0.0
|
||||
domains = ",".join(meta.get("domain", []))
|
||||
val = meta.get("valence", 0.5)
|
||||
aro = meta.get("arousal", 0.3)
|
||||
resolved_tag = " [已解决]" if meta.get("resolved", False) else ""
|
||||
lines.append(
|
||||
f"{icon} [{meta.get('name', b['id'])}]{resolved_tag} "
|
||||
f"主题:{domains} "
|
||||
f"情感:V{val:.1f}/A{aro:.1f} "
|
||||
f"重要:{meta.get('importance', '?')} "
|
||||
f"权重:{score:.2f} "
|
||||
f"标签:{','.join(meta.get('tags', []))}"
|
||||
)
|
||||
|
||||
return status + "\n=== 记忆列表 ===\n" + "\n".join(lines)
|
||||
|
||||
|
||||
# --- Entry point / 启动入口 ---
|
||||
if __name__ == "__main__":
|
||||
transport = config.get("transport", "stdio")
|
||||
logger.info(f"Ombre Brain starting | transport: {transport}")
|
||||
|
||||
if transport in ("sse", "streamable-http"):
|
||||
import threading
|
||||
import uvicorn
|
||||
from starlette.middleware.cors import CORSMiddleware
|
||||
|
||||
# --- Application-level keepalive: ping /health every 60s ---
|
||||
# --- 应用层保活:每 60 秒 ping 一次 /health,防止 Cloudflare Tunnel 空闲断连 ---
|
||||
async def _keepalive_loop():
|
||||
await asyncio.sleep(10) # Wait for server to fully start
|
||||
async with httpx.AsyncClient() as client:
|
||||
while True:
|
||||
try:
|
||||
await client.get("http://localhost:8000/health", timeout=5)
|
||||
logger.debug("Keepalive ping OK / 保活 ping 成功")
|
||||
except Exception as e:
|
||||
logger.warning(f"Keepalive ping failed / 保活 ping 失败: {e}")
|
||||
await asyncio.sleep(60)
|
||||
|
||||
def _start_keepalive():
|
||||
loop = asyncio.new_event_loop()
|
||||
loop.run_until_complete(_keepalive_loop())
|
||||
|
||||
t = threading.Thread(target=_start_keepalive, daemon=True)
|
||||
t.start()
|
||||
|
||||
# --- Add CORS middleware so remote clients (Cloudflare Tunnel / ngrok) can connect ---
|
||||
# --- 添加 CORS 中间件,让远程客户端(Cloudflare Tunnel / ngrok)能正常连接 ---
|
||||
if transport == "streamable-http":
|
||||
_app = mcp.streamable_http_app()
|
||||
else:
|
||||
_app = mcp.sse_app()
|
||||
_app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
expose_headers=["*"],
|
||||
)
|
||||
logger.info("CORS middleware enabled for remote transport / 已启用 CORS 中间件")
|
||||
uvicorn.run(_app, host="0.0.0.0", port=8000)
|
||||
else:
|
||||
mcp.run(transport=transport)
|
||||
126
test_smoke.py
Normal file
126
test_smoke.py
Normal file
@@ -0,0 +1,126 @@
|
||||
"""Ombre Brain 冒烟测试:验证核心功能链路"""
|
||||
import asyncio
|
||||
import os
|
||||
|
||||
# 确保模块路径
|
||||
import sys
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
from utils import load_config, setup_logging
|
||||
from bucket_manager import BucketManager
|
||||
from dehydrator import Dehydrator
|
||||
from decay_engine import DecayEngine
|
||||
|
||||
|
||||
async def main():
|
||||
config = load_config()
|
||||
setup_logging("INFO")
|
||||
bm = BucketManager(config)
|
||||
dh = Dehydrator(config)
|
||||
de = DecayEngine(config, bm)
|
||||
|
||||
print(f"API available: {dh.api_available}")
|
||||
print(f"base_url: {dh.base_url}")
|
||||
print()
|
||||
|
||||
# ===== 1. 自动打标 =====
|
||||
print("=== 1. analyze (自动打标) ===")
|
||||
try:
|
||||
result = await dh.analyze("今天学了 Python 的 asyncio,感觉收获很大,心情不错")
|
||||
print(f" domain: {result['domain']}")
|
||||
print(f" valence: {result['valence']}, arousal: {result['arousal']}")
|
||||
print(f" tags: {result['tags']}")
|
||||
print(" [OK]")
|
||||
except Exception as e:
|
||||
print(f" [FAIL] {e}")
|
||||
print()
|
||||
|
||||
# ===== 2. 建桶 =====
|
||||
print("=== 2. create (建桶) ===")
|
||||
try:
|
||||
bid = await bm.create(
|
||||
content="P酱喜欢猫,家里养了一只橘猫叫小橘",
|
||||
tags=["猫", "宠物"],
|
||||
importance=7,
|
||||
domain=["生活"],
|
||||
valence=0.8,
|
||||
arousal=0.4,
|
||||
)
|
||||
print(f" bucket_id: {bid}")
|
||||
print(" [OK]")
|
||||
except Exception as e:
|
||||
print(f" [FAIL] {e}")
|
||||
return
|
||||
print()
|
||||
|
||||
# ===== 3. 搜索 =====
|
||||
print("=== 3. search (检索) ===")
|
||||
try:
|
||||
hits = await bm.search("猫", limit=3)
|
||||
print(f" found {len(hits)} results")
|
||||
for h in hits:
|
||||
name = h["metadata"].get("name", h["id"])
|
||||
print(f" - {name} (score={h['score']:.1f})")
|
||||
print(" [OK]")
|
||||
except Exception as e:
|
||||
print(f" [FAIL] {e}")
|
||||
print()
|
||||
|
||||
# ===== 4. 脱水压缩 =====
|
||||
print("=== 4. dehydrate (脱水压缩) ===")
|
||||
try:
|
||||
text = (
|
||||
"这是一段很长的内容用来测试脱水功能。"
|
||||
"P酱今天去了咖啡厅,点了一杯拿铁,然后坐在窗边看书看了两个小时。"
|
||||
"期间遇到了一个朋友,聊了聊最近的工作情况。回家之后写了会代码。"
|
||||
)
|
||||
summary = await dh.dehydrate(text, {})
|
||||
print(f" summary: {summary[:120]}...")
|
||||
print(" [OK]")
|
||||
except Exception as e:
|
||||
print(f" [FAIL] {e}")
|
||||
print()
|
||||
|
||||
# ===== 5. 衰减评分 =====
|
||||
print("=== 5. decay score (衰减评分) ===")
|
||||
try:
|
||||
bucket = await bm.get(bid)
|
||||
score = de.calculate_score(bucket["metadata"])
|
||||
print(f" score: {score:.3f}")
|
||||
print(" [OK]")
|
||||
except Exception as e:
|
||||
print(f" [FAIL] {e}")
|
||||
print()
|
||||
|
||||
# ===== 6. 日记整理 =====
|
||||
print("=== 6. digest (日记整理) ===")
|
||||
try:
|
||||
diary = (
|
||||
"今天上午写了个 Python 脚本处理数据,下午和朋友去吃了火锅很开心,"
|
||||
"晚上失眠了有点焦虑,想了想明天的面试。"
|
||||
)
|
||||
items = await dh.digest(diary)
|
||||
print(f" 拆分出 {len(items)} 条记忆:")
|
||||
for it in items:
|
||||
print(f" - [{it.get('name','')}] domain={it['domain']} V{it['valence']:.1f}/A{it['arousal']:.1f}")
|
||||
print(" [OK]")
|
||||
except Exception as e:
|
||||
print(f" [FAIL] {e}")
|
||||
print()
|
||||
|
||||
# ===== 7. 清理测试数据 =====
|
||||
print("=== 7. cleanup (删除测试桶) ===")
|
||||
try:
|
||||
ok = await bm.delete(bid)
|
||||
print(f" deleted: {ok}")
|
||||
print(" [OK]")
|
||||
except Exception as e:
|
||||
print(f" [FAIL] {e}")
|
||||
print()
|
||||
|
||||
print("=" * 40)
|
||||
print("冒烟测试完成!")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
159
test_tools.py
Normal file
159
test_tools.py
Normal file
@@ -0,0 +1,159 @@
|
||||
"""Ombre Brain MCP tool-level end-to-end test: direct calls to @mcp.tool() functions
|
||||
Ombre Brain MCP 工具层端到端测试:直接调用 @mcp.tool() 函数"""
|
||||
import asyncio
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
from utils import load_config, setup_logging
|
||||
|
||||
config = load_config()
|
||||
setup_logging("INFO")
|
||||
|
||||
# Must import after config is set, since server.py does module-level init
|
||||
# 必须在配置好后导入,因为 server.py 有模块级初始化
|
||||
from server import breath, hold, trace, pulse, grow
|
||||
|
||||
|
||||
async def main():
|
||||
passed = 0
|
||||
failed = 0
|
||||
|
||||
# ===== pulse =====
|
||||
print("=== [1/6] pulse ===")
|
||||
try:
|
||||
r = await pulse()
|
||||
assert "Ombre Brain" in r
|
||||
print(f" {r.splitlines()[0]}")
|
||||
print(" [OK]")
|
||||
passed += 1
|
||||
except Exception as e:
|
||||
print(f" [FAIL] {e}")
|
||||
failed += 1
|
||||
print()
|
||||
|
||||
# ===== hold =====
|
||||
print("=== [2/6] hold ===")
|
||||
try:
|
||||
r = await hold(content="P酱最喜欢的编程语言是 Python,喜欢用 FastAPI 写后端", tags="编程,偏好", importance=8)
|
||||
print(f" {r.splitlines()[0]}")
|
||||
assert any(kw in r for kw in ["新建", "合并", "📌"])
|
||||
print(" [OK]")
|
||||
passed += 1
|
||||
except Exception as e:
|
||||
print(f" [FAIL] {e}")
|
||||
failed += 1
|
||||
print()
|
||||
|
||||
# ===== hold (merge test / 合并测试) =====
|
||||
print("=== [2b/6] hold (合并测试) ===")
|
||||
try:
|
||||
r = await hold(content="P酱也喜欢用 Python 写爬虫和数据分析", tags="编程", importance=6)
|
||||
print(f" {r.splitlines()[0]}")
|
||||
print(" [OK]")
|
||||
passed += 1
|
||||
except Exception as e:
|
||||
print(f" [FAIL] {e}")
|
||||
failed += 1
|
||||
print()
|
||||
|
||||
# ===== breath =====
|
||||
print("=== [3/6] breath ===")
|
||||
try:
|
||||
r = await breath(query="Python 编程", max_results=3)
|
||||
print(f" 结果前80字: {r[:80]}...")
|
||||
assert "未找到" not in r
|
||||
print(" [OK]")
|
||||
passed += 1
|
||||
except Exception as e:
|
||||
print(f" [FAIL] {e}")
|
||||
failed += 1
|
||||
print()
|
||||
|
||||
# ===== breath (emotion resonance / 情感共鸣) =====
|
||||
print("=== [3b/6] breath (情感共鸣检索) ===")
|
||||
try:
|
||||
r = await breath(query="编程", domain="编程", valence=0.8, arousal=0.5)
|
||||
print(f" 结果前80字: {r[:80]}...")
|
||||
print(" [OK]")
|
||||
passed += 1
|
||||
except Exception as e:
|
||||
print(f" [FAIL] {e}")
|
||||
failed += 1
|
||||
print()
|
||||
|
||||
# --- Get a bucket ID for subsequent tests / 取一个桶 ID 用于后续测试 ---
|
||||
bucket_id = None
|
||||
from bucket_manager import BucketManager
|
||||
bm = BucketManager(config)
|
||||
all_buckets = await bm.list_all()
|
||||
if all_buckets:
|
||||
bucket_id = all_buckets[0]["id"]
|
||||
|
||||
# ===== trace =====
|
||||
print("=== [4/6] trace ===")
|
||||
if bucket_id:
|
||||
try:
|
||||
r = await trace(bucket_id=bucket_id, domain="编程,创作", importance=9)
|
||||
print(f" {r}")
|
||||
assert "已修改" in r
|
||||
print(" [OK]")
|
||||
passed += 1
|
||||
except Exception as e:
|
||||
print(f" [FAIL] {e}")
|
||||
failed += 1
|
||||
else:
|
||||
print(" [SKIP] 没有可编辑的桶")
|
||||
print()
|
||||
|
||||
# ===== grow =====
|
||||
print("=== [5/6] grow ===")
|
||||
try:
|
||||
diary = (
|
||||
"今天早上复习了线性代数,搞懂了特征值分解。"
|
||||
"中午和室友去吃了拉面,聊了聊暑假实习的事。"
|
||||
"下午写了一个 Flask 项目的 API 接口。"
|
||||
"晚上看了部电影叫《星际穿越》,被结尾感动哭了。"
|
||||
)
|
||||
r = await grow(content=diary)
|
||||
print(f" {r.splitlines()[0]}")
|
||||
for line in r.splitlines()[1:]:
|
||||
if line.strip():
|
||||
print(f" {line}")
|
||||
assert "条|新" in r or "整理" in r
|
||||
print(" [OK]")
|
||||
passed += 1
|
||||
except Exception as e:
|
||||
print(f" [FAIL] {e}")
|
||||
failed += 1
|
||||
print()
|
||||
|
||||
# ===== cleanup via trace(delete=True) / 清理测试数据 =====
|
||||
print("=== [6/6] cleanup (清理全部测试数据) ===")
|
||||
try:
|
||||
all_buckets = await bm.list_all()
|
||||
for b in all_buckets:
|
||||
r = await trace(bucket_id=b["id"], delete=True)
|
||||
print(f" {r}")
|
||||
print(" [OK]")
|
||||
passed += 1
|
||||
except Exception as e:
|
||||
print(f" [FAIL] {e}")
|
||||
failed += 1
|
||||
print()
|
||||
|
||||
# ===== Confirm cleanup / 确认清理干净 =====
|
||||
final = await pulse()
|
||||
print(f"清理后: {final.splitlines()[0]}")
|
||||
print()
|
||||
print("=" * 50)
|
||||
print(f"MCP tool test complete / 工具测试完成: {passed} passed / {failed} failed")
|
||||
if failed == 0:
|
||||
print("All passed ✓")
|
||||
else:
|
||||
print(f"{failed} failed ✗")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
204
utils.py
Normal file
204
utils.py
Normal file
@@ -0,0 +1,204 @@
|
||||
# ============================================================
|
||||
# Module: Common Utilities (utils.py)
|
||||
# 模块:通用工具函数
|
||||
#
|
||||
# Provides config loading, logging init, path safety, ID generation, etc.
|
||||
# 提供配置加载、日志初始化、路径安全校验、ID 生成等基础能力
|
||||
#
|
||||
# Depended on by: server.py, bucket_manager.py, dehydrator.py, decay_engine.py
|
||||
# 被谁依赖:server.py, bucket_manager.py, dehydrator.py, decay_engine.py
|
||||
# ============================================================
|
||||
|
||||
import os
|
||||
import re
|
||||
import uuid
|
||||
import yaml
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
def load_config(config_path: str = None) -> dict:
|
||||
"""
|
||||
Load configuration file.
|
||||
加载配置文件。
|
||||
|
||||
Priority: environment variables > config.yaml > built-in defaults.
|
||||
优先级:环境变量 > config.yaml > 内置默认值。
|
||||
"""
|
||||
# --- Built-in defaults (fallback so it runs even without config.yaml) ---
|
||||
# --- 内置默认配置(兜底,保证即使没有 config.yaml 也能跑)---
|
||||
defaults = {
|
||||
"transport": "stdio",
|
||||
"log_level": "INFO",
|
||||
"buckets_dir": os.path.join(os.path.dirname(os.path.abspath(__file__)), "buckets"),
|
||||
"merge_threshold": 75,
|
||||
"dehydration": {
|
||||
"model": "deepseek-chat",
|
||||
"base_url": "https://api.deepseek.com/v1",
|
||||
"api_key": "",
|
||||
"max_tokens": 1024,
|
||||
"temperature": 0.1,
|
||||
},
|
||||
"decay": {
|
||||
"lambda": 0.05,
|
||||
"threshold": 0.3,
|
||||
"check_interval_hours": 24,
|
||||
"emotion_weights": {
|
||||
"base": 1.0,
|
||||
"arousal_boost": 0.8,
|
||||
},
|
||||
},
|
||||
"matching": {
|
||||
"fuzzy_threshold": 50,
|
||||
"max_results": 5,
|
||||
},
|
||||
}
|
||||
|
||||
# --- Load user config from YAML file ---
|
||||
# --- 从 YAML 文件加载用户自定义配置 ---
|
||||
if config_path is None:
|
||||
config_path = os.path.join(
|
||||
os.path.dirname(os.path.abspath(__file__)), "config.yaml"
|
||||
)
|
||||
|
||||
config = defaults.copy()
|
||||
if os.path.exists(config_path):
|
||||
try:
|
||||
with open(config_path, "r", encoding="utf-8") as f:
|
||||
file_config = yaml.safe_load(f) or {}
|
||||
if isinstance(file_config, dict):
|
||||
config = _deep_merge(defaults, file_config)
|
||||
else:
|
||||
logging.warning(
|
||||
f"Config file is not a valid YAML dict, using defaults / "
|
||||
f"配置文件不是有效的 YAML 字典,使用默认配置: {config_path}"
|
||||
)
|
||||
except yaml.YAMLError as e:
|
||||
logging.warning(
|
||||
f"Failed to parse config file, using defaults / "
|
||||
f"配置文件解析失败,使用默认配置: {e}"
|
||||
)
|
||||
|
||||
# --- Environment variable overrides (highest priority) ---
|
||||
# --- 环境变量覆盖敏感/运行时配置(优先级最高)---
|
||||
env_api_key = os.environ.get("OMBRE_API_KEY", "")
|
||||
if env_api_key:
|
||||
config.setdefault("dehydration", {})["api_key"] = env_api_key
|
||||
|
||||
env_base_url = os.environ.get("OMBRE_BASE_URL", "")
|
||||
if env_base_url:
|
||||
config.setdefault("dehydration", {})["base_url"] = env_base_url
|
||||
|
||||
env_transport = os.environ.get("OMBRE_TRANSPORT", "")
|
||||
if env_transport:
|
||||
config["transport"] = env_transport
|
||||
|
||||
env_buckets_dir = os.environ.get("OMBRE_BUCKETS_DIR", "")
|
||||
if env_buckets_dir:
|
||||
config["buckets_dir"] = env_buckets_dir
|
||||
|
||||
# --- Ensure bucket storage directories exist ---
|
||||
# --- 确保记忆桶存储目录存在 ---
|
||||
buckets_dir = config["buckets_dir"]
|
||||
for subdir in ["permanent", "dynamic", "archive"]:
|
||||
os.makedirs(os.path.join(buckets_dir, subdir), exist_ok=True)
|
||||
|
||||
return config
|
||||
|
||||
|
||||
def _deep_merge(base: dict, override: dict) -> dict:
|
||||
"""
|
||||
Deep-merge two dicts; override values take precedence.
|
||||
深度合并两个字典,override 的值覆盖 base。
|
||||
"""
|
||||
result = base.copy()
|
||||
for key, value in override.items():
|
||||
if key in result and isinstance(result[key], dict) and isinstance(value, dict):
|
||||
result[key] = _deep_merge(result[key], value)
|
||||
else:
|
||||
result[key] = value
|
||||
return result
|
||||
|
||||
|
||||
def setup_logging(level: str = "INFO") -> None:
|
||||
"""
|
||||
Initialize logging system.
|
||||
初始化日志系统。
|
||||
|
||||
Note: In MCP stdio mode, stdout is occupied by the protocol;
|
||||
logs must go to stderr.
|
||||
注意:MCP stdio 模式下 stdout 被协议占用,日志只能走 stderr。
|
||||
"""
|
||||
log_level = getattr(logging, level.upper(), None)
|
||||
if not isinstance(log_level, int):
|
||||
log_level = logging.INFO
|
||||
|
||||
logging.basicConfig(
|
||||
level=log_level,
|
||||
format="[%(asctime)s] %(name)s %(levelname)s: %(message)s",
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
handlers=[logging.StreamHandler()], # StreamHandler defaults to stderr
|
||||
)
|
||||
|
||||
|
||||
def generate_bucket_id() -> str:
|
||||
"""
|
||||
Generate a unique bucket ID (12-char short UUID for readability).
|
||||
生成唯一的记忆桶 ID(12 位短 UUID,方便人类阅读)。
|
||||
"""
|
||||
return uuid.uuid4().hex[:12]
|
||||
|
||||
|
||||
def sanitize_name(name: str) -> str:
|
||||
"""
|
||||
Sanitize bucket name, keeping only safe characters.
|
||||
Prevents path traversal attacks (e.g. ../../etc/passwd).
|
||||
清洗桶名称,只保留安全字符。防止路径遍历攻击。
|
||||
"""
|
||||
if not isinstance(name, str):
|
||||
return "unnamed"
|
||||
cleaned = re.sub(r"[^\w\s\u4e00-\u9fff-]", "", name, flags=re.UNICODE)
|
||||
cleaned = cleaned.strip()[:80]
|
||||
return cleaned if cleaned else "unnamed"
|
||||
|
||||
|
||||
def safe_path(base_dir: str, filename: str) -> Path:
|
||||
"""
|
||||
Construct a safe file path, ensuring it stays within base_dir.
|
||||
Prevents directory traversal.
|
||||
构造安全的文件路径,确保最终路径始终在 base_dir 内部。
|
||||
"""
|
||||
base = Path(base_dir).resolve()
|
||||
target = (base / filename).resolve()
|
||||
if not str(target).startswith(str(base)):
|
||||
raise ValueError(
|
||||
f"Path safety check failed / 路径安全检查失败: "
|
||||
f"{target} is not inside / 不在 {base} 内"
|
||||
)
|
||||
return target
|
||||
|
||||
|
||||
def count_tokens_approx(text: str) -> int:
|
||||
"""
|
||||
Rough token count estimate.
|
||||
粗略估算 token 数。
|
||||
|
||||
Chinese ≈ 1 char = 1.5 tokens, English ≈ 1 word = 1.3 tokens.
|
||||
Used to decide whether dehydration is needed; precision not required.
|
||||
中文 ≈ 1字=1.5token,英文 ≈ 1词=1.3token。
|
||||
用于判断是否需要脱水压缩,不追求精确。
|
||||
"""
|
||||
if not text:
|
||||
return 0
|
||||
chinese_chars = len(re.findall(r"[\u4e00-\u9fff]", text))
|
||||
english_words = len(re.findall(r"[a-zA-Z]+", text))
|
||||
return int(chinese_chars * 1.5 + english_words * 1.3 + len(text) * 0.05)
|
||||
|
||||
|
||||
def now_iso() -> str:
|
||||
"""
|
||||
Return current time as ISO format string.
|
||||
返回当前时间的 ISO 格式字符串。
|
||||
"""
|
||||
return datetime.now().isoformat(timespec="seconds")
|
||||
101
write_memory.py
Normal file
101
write_memory.py
Normal file
@@ -0,0 +1,101 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Ombre Brain 手动记忆写入工具
|
||||
用途:在 Copilot 端直接写入记忆文件,绕过 MCP 和 API 调用
|
||||
用法:
|
||||
python3 write_memory.py --name "记忆名" --content "内容" --domain "情感" --tags "标签1,标签2"
|
||||
或交互模式:python3 write_memory.py
|
||||
"""
|
||||
|
||||
import os
|
||||
import uuid
|
||||
import argparse
|
||||
from datetime import datetime
|
||||
|
||||
VAULT_DIR = os.path.expanduser("~/Documents/Obsidian Vault/Ombre Brain/dynamic")
|
||||
|
||||
|
||||
def gen_id():
|
||||
return uuid.uuid4().hex[:12]
|
||||
|
||||
|
||||
def write_memory(
|
||||
name: str,
|
||||
content: str,
|
||||
domain: list[str],
|
||||
tags: list[str],
|
||||
importance: int = 7,
|
||||
valence: float = 0.5,
|
||||
arousal: float = 0.3,
|
||||
):
|
||||
mid = gen_id()
|
||||
now = datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
|
||||
|
||||
# YAML frontmatter
|
||||
domain_yaml = "\n".join(f"- {d}" for d in domain)
|
||||
tags_yaml = "\n".join(f"- {t}" for t in tags)
|
||||
|
||||
md = f"""---
|
||||
activation_count: 1
|
||||
arousal: {arousal}
|
||||
created: '{now}'
|
||||
domain:
|
||||
{domain_yaml}
|
||||
id: {mid}
|
||||
importance: {importance}
|
||||
last_active: '{now}'
|
||||
name: {name}
|
||||
tags:
|
||||
{tags_yaml}
|
||||
type: dynamic
|
||||
valence: {valence}
|
||||
---
|
||||
|
||||
{content}
|
||||
"""
|
||||
|
||||
path = os.path.join(VAULT_DIR, f"{mid}.md")
|
||||
os.makedirs(VAULT_DIR, exist_ok=True)
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
f.write(md)
|
||||
|
||||
print(f"✓ 已写入: {path}")
|
||||
print(f" ID: {mid} | 名称: {name}")
|
||||
return mid
|
||||
|
||||
|
||||
def interactive():
|
||||
print("=== Ombre Brain 手动写入 ===")
|
||||
name = input("记忆名称: ").strip()
|
||||
content = input("内容: ").strip()
|
||||
domain = [d.strip() for d in input("主题域(逗号分隔): ").split(",") if d.strip()]
|
||||
tags = [t.strip() for t in input("标签(逗号分隔): ").split(",") if t.strip()]
|
||||
importance = int(input("重要性(1-10, 默认7): ").strip() or "7")
|
||||
valence = float(input("效价(0-1, 默认0.5): ").strip() or "0.5")
|
||||
arousal = float(input("唤醒(0-1, 默认0.3): ").strip() or "0.3")
|
||||
write_memory(name, content, domain, tags, importance, valence, arousal)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="手动写入 Ombre Brain 记忆")
|
||||
parser.add_argument("--name", help="记忆名称")
|
||||
parser.add_argument("--content", help="记忆内容")
|
||||
parser.add_argument("--domain", help="主题域,逗号分隔")
|
||||
parser.add_argument("--tags", help="标签,逗号分隔")
|
||||
parser.add_argument("--importance", type=int, default=7)
|
||||
parser.add_argument("--valence", type=float, default=0.5)
|
||||
parser.add_argument("--arousal", type=float, default=0.3)
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.name and args.content and args.domain:
|
||||
write_memory(
|
||||
name=args.name,
|
||||
content=args.content,
|
||||
domain=[d.strip() for d in args.domain.split(",")],
|
||||
tags=[t.strip() for t in (args.tags or "").split(",") if t.strip()],
|
||||
importance=args.importance,
|
||||
valence=args.valence,
|
||||
arousal=args.arousal,
|
||||
)
|
||||
else:
|
||||
interactive()
|
||||
1
zbpack.json
Normal file
1
zbpack.json
Normal file
@@ -0,0 +1 @@
|
||||
{}
|
||||
Reference in New Issue
Block a user