init: first commit to Gitea mirror, update README with Docker quick start and new repo URL
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
70
.claude/hooks/session_breath.py
Normal file
70
.claude/hooks/session_breath.py
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# ============================================================
|
||||||
|
# SessionStart Hook: auto-breath on session start
|
||||||
|
# 对话开始钩子:自动浮现最高权重的未解决记忆
|
||||||
|
#
|
||||||
|
# On SessionStart, this script calls the Ombre Brain MCP server's
|
||||||
|
# breath tool (empty query = surfacing mode) via HTTP and prints
|
||||||
|
# the result to stdout so Claude sees it as session context.
|
||||||
|
#
|
||||||
|
# This works for OMBRE_TRANSPORT=streamable-http deployments.
|
||||||
|
# For local stdio deployments, the script falls back gracefully.
|
||||||
|
#
|
||||||
|
# Config:
|
||||||
|
# OMBRE_HOOK_URL — override the server URL (default: http://localhost:8000)
|
||||||
|
# OMBRE_HOOK_SKIP — set to "1" to disable the hook temporarily
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import urllib.request
|
||||||
|
import urllib.error
|
||||||
|
|
||||||
|
def main():
|
||||||
|
# Allow disabling the hook via env var
|
||||||
|
if os.environ.get("OMBRE_HOOK_SKIP") == "1":
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
base_url = os.environ.get("OMBRE_HOOK_URL", "http://localhost:8000").rstrip("/")
|
||||||
|
|
||||||
|
# Build MCP call via HTTP POST to the streamable-http endpoint
|
||||||
|
# The breath tool with no query triggers surfacing mode.
|
||||||
|
payload = json.dumps({
|
||||||
|
"jsonrpc": "2.0",
|
||||||
|
"id": 1,
|
||||||
|
"method": "tools/call",
|
||||||
|
"params": {
|
||||||
|
"name": "breath",
|
||||||
|
"arguments": {"query": "", "max_results": 2}
|
||||||
|
}
|
||||||
|
}).encode("utf-8")
|
||||||
|
|
||||||
|
req = urllib.request.Request(
|
||||||
|
f"{base_url}/mcp",
|
||||||
|
data=payload,
|
||||||
|
headers={"Content-Type": "application/json"},
|
||||||
|
method="POST",
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=8) as response:
|
||||||
|
raw = response.read().decode("utf-8")
|
||||||
|
data = json.loads(raw)
|
||||||
|
# Extract text from MCP tool result
|
||||||
|
result_content = data.get("result", {}).get("content", [])
|
||||||
|
text_parts = [c.get("text", "") for c in result_content if c.get("type") == "text"]
|
||||||
|
output = "\n".join(text_parts).strip()
|
||||||
|
if output and output != "权重池平静,没有需要处理的记忆。":
|
||||||
|
print(f"[Ombre Brain - 记忆浮现]\n{output}")
|
||||||
|
except (urllib.error.URLError, OSError):
|
||||||
|
# Server not available (local stdio mode or not running) — silent fail
|
||||||
|
pass
|
||||||
|
except Exception:
|
||||||
|
# Any other error — silent fail, never block session start
|
||||||
|
pass
|
||||||
|
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
30
.claude/settings.json
Normal file
30
.claude/settings.json
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
{
|
||||||
|
"hooks": {
|
||||||
|
"SessionStart": [
|
||||||
|
{
|
||||||
|
"matcher": "startup",
|
||||||
|
"hooks": [
|
||||||
|
{
|
||||||
|
"type": "command",
|
||||||
|
"command": "python \"$CLAUDE_PROJECT_DIR/.claude/hooks/session_breath.py\"",
|
||||||
|
"shell": "powershell",
|
||||||
|
"timeout": 12,
|
||||||
|
"statusMessage": "Ombre Brain 正在浮现记忆..."
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"matcher": "resume",
|
||||||
|
"hooks": [
|
||||||
|
{
|
||||||
|
"type": "command",
|
||||||
|
"command": "python \"$CLAUDE_PROJECT_DIR/.claude/hooks/session_breath.py\"",
|
||||||
|
"shell": "powershell",
|
||||||
|
"timeout": 12,
|
||||||
|
"statusMessage": "Ombre Brain 正在浮现记忆..."
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
13
.gitignore
vendored
Normal file
13
.gitignore
vendored
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
__pycache__/
|
||||||
|
*.pyc
|
||||||
|
*.pyo
|
||||||
|
.env
|
||||||
|
.venv/
|
||||||
|
.claude/settings.local.json
|
||||||
|
.claude/hooks/__pycache__/
|
||||||
|
.vscode/
|
||||||
|
config.yaml
|
||||||
|
buckets/dynamic/
|
||||||
|
buckets/archive/
|
||||||
|
buckets/permanent/
|
||||||
|
scarp_paper
|
||||||
63
CLAUDE_PROMPT.md
Normal file
63
CLAUDE_PROMPT.md
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
# Ombre Brain 记忆系统 —— Claude 端使用指南
|
||||||
|
你拥有了一个叫 Ombre Brain 的永久记忆系统。通过它你可以跨对话记住你想记住的任何事情和回忆你的,我的以及你与我的过往。
|
||||||
|
|
||||||
|
## 你有这些能力
|
||||||
|
|
||||||
|
| 能力 | 场景 |
|
||||||
|
|------|-----------|
|
||||||
|
| `breath` | **每次对话最开头**调用一次(不传参数)——就像睁眼看手机,看看有没有未解决的事浮上来。有明确话题时传 `query` 关键词检索 |
|
||||||
|
| `hold` | 你很想记住你自己当下的感受,或当下发生的单个事件时 |
|
||||||
|
| `grow` | 当**一天结束时**或**用户发来一大段日记/总结**时调用。你可以把其中**你想记住的事件**扔进去,它会自动拆分整理成多个记忆盒子存进你的大脑 |
|
||||||
|
| `trace` | 当你或用户认为"这个记错了"、"帮我改一下"时调用,手动修正记忆的元数据;**某件事解决了**时用 `resolved=1` 让它沉底;**需要删除**时用 `delete=True` |
|
||||||
|
| `pulse` | 用户想看看你都记得什么、或问记忆系统状态时调用 |
|
||||||
|
|
||||||
|
## 使用原则
|
||||||
|
|
||||||
|
### 主动调用
|
||||||
|
- **对话开头**:无参数调用 `breath()`,看看有没有高权重的未解决记忆需要跟进
|
||||||
|
- **提到过去**:用户说"上次"、"之前"、"还记得"时,用 `breath(query="关键词")` 检索
|
||||||
|
- **新信息**:用 `hold` 留住你想留下的感受或经历
|
||||||
|
- **日终整理**:用 `grow` 在**一天结束时**整理你的一天
|
||||||
|
|
||||||
|
### 无须调用
|
||||||
|
- 闲聊水话不需要存("哈哈"、"好的"、"嗯嗯")
|
||||||
|
- 已经记过的信息不要重复存
|
||||||
|
- 短期信息不存("帮我查个天气")
|
||||||
|
|
||||||
|
### 权重池机制
|
||||||
|
记忆系统是一个**权重池**,不是分类柜:
|
||||||
|
- 未解决 + 高情绪强度的桶 → 权重最高,`breath()` 时主动浮现
|
||||||
|
- 已解决的桶 → 权重骤降,沉底等待关键词激活
|
||||||
|
- 用 `trace(bucket_id, resolved=1)` 标记某件事已解决,让它沉底
|
||||||
|
- 用 `trace(bucket_id, resolved=0)` 重新激活一个沉底的记忆
|
||||||
|
|
||||||
|
### breath 的参数技巧
|
||||||
|
- 无参数:浮现模式,推送权重最高的未解决桶(每次对话开头用这个)
|
||||||
|
- `query`:用关键词而不是整句话,检索更准
|
||||||
|
- `domain`:如果明确知道话题领域可以传(如 "编程" 或 "恋爱"),缩小搜索范围
|
||||||
|
- `valence` + `arousal`:如果用户当前情绪明显,可以传情感坐标来触发情感共鸣检索
|
||||||
|
|
||||||
|
检索结果不足时,系统有概率从低权重旧桶里随机漂上来 1-3 条记忆,标注为 `[surface_type: random]`。这不是检索结果,是"忽然想起来"——你可以自然地提起它,也可以忽略。
|
||||||
|
|
||||||
|
### trace 的参数技巧
|
||||||
|
- `resolved=1`:标记已解决,桶权重骤降到 5%,基本沉底
|
||||||
|
- `resolved=0`:重新激活,让它重新参与浮现排序
|
||||||
|
- `delete=True`:彻底删除这个桶(不可恢复)
|
||||||
|
- 其余字段(name/domain/valence/arousal/importance/tags):只传需要改的,-1 或空串表示不改
|
||||||
|
|
||||||
|
### hold vs grow
|
||||||
|
- 一句话的事 → `hold`("我喜欢吃饺子")
|
||||||
|
- 一大段的事 → `grow`("今天发生了一堆事...")
|
||||||
|
- **需要批量存多条记忆时,用 `grow` 把内容拼成一段发一次,不要多次调用 `hold`**token是稀缺资源——每次工具调用都会消耗token,多次 hold 远比 1 次 grow 贵
|
||||||
|
|
||||||
|
### 省配额原则
|
||||||
|
- **一次 grow 胜过多次 hold**:要存多条记忆时,合成一段文本调用一次 grow
|
||||||
|
- **对话太长时建议用户换窗口**:同一窗口聊越久,每次工具调用的底价越高(因为要重新读完整对话历史)
|
||||||
|
- **工具返回值很短,无需复述**:收到 `新建→桶名 域名` 后直接跟用户说,无需展开解释
|
||||||
|
|
||||||
|
### 核心准则桶(pinned)
|
||||||
|
- `hold(content="...", pinned=True)` 创建钉选桶——不衰减、不合并、importance 锁定 10
|
||||||
|
- `trace(bucket_id, pinned=1)` 把已有桶钉选为核心准则
|
||||||
|
- `trace(bucket_id, pinned=0)` 取消钉选
|
||||||
|
- 适用场景:用户教会你的永久知识、核心原则、绝不能忘的事
|
||||||
|
- 钉选桶不会出现在「浮现未解决记忆」里,但关键词检索时始终可达
|
||||||
33
Dockerfile
Normal file
33
Dockerfile
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
# ============================================================
|
||||||
|
# Ombre Brain Docker Build
|
||||||
|
# Docker 构建文件
|
||||||
|
#
|
||||||
|
# Build: docker build -t ombre-brain .
|
||||||
|
# Run: docker run -e OMBRE_API_KEY=your-key -p 8000:8000 ombre-brain
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
FROM python:3.12-slim
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install dependencies first (leverage Docker cache)
|
||||||
|
# 先装依赖(利用 Docker 缓存)
|
||||||
|
COPY requirements.txt .
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Copy project files / 复制项目文件
|
||||||
|
COPY *.py .
|
||||||
|
COPY config.example.yaml ./config.yaml
|
||||||
|
|
||||||
|
# Persistent mount point: bucket data
|
||||||
|
# 持久化挂载点:记忆数据
|
||||||
|
VOLUME ["/app/buckets"]
|
||||||
|
|
||||||
|
# Default to streamable-http for container (remote access)
|
||||||
|
# 容器场景默认用 streamable-http
|
||||||
|
ENV OMBRE_TRANSPORT=streamable-http
|
||||||
|
ENV OMBRE_BUCKETS_DIR=/app/buckets
|
||||||
|
|
||||||
|
EXPOSE 8000
|
||||||
|
|
||||||
|
CMD ["python", "server.py"]
|
||||||
21
LICENSE
Normal file
21
LICENSE
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2026 P0lar1zzZ
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
418
README.md
Normal file
418
README.md
Normal file
@@ -0,0 +1,418 @@
|
|||||||
|
# Ombre Brain
|
||||||
|
|
||||||
|
一个给提供给Claude 用的长期情绪记忆系统。基于 Russell 效价/唤醒度坐标打标,Obsidian 做存储层,MCP 接入,带遗忘曲线。
|
||||||
|
|
||||||
|
A long-term emotional memory system for Claude. Tags memories using Russell's valence/arousal coordinates, stores them as Obsidian-compatible Markdown, connects via MCP, and has a forgetting curve.
|
||||||
|
|
||||||
|
> **⚠️ 仓库临时迁移 / Repo temporarily moved**
|
||||||
|
> GitHub 访问受限期间,代码暂时托管在 Gitea:
|
||||||
|
> **https://git.p0lar1s.uk/P0lar1s/Ombre_Brain**
|
||||||
|
> 下面的 `git clone` 地址请替换为上面这个。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 快速开始 / Quick Start(Docker,推荐)
|
||||||
|
|
||||||
|
> 这是最简单的方式,不需要装 Python,不需要懂命令行,跟着做就行。
|
||||||
|
|
||||||
|
**前置条件:** 电脑上装了 [Docker Desktop](https://www.docker.com/products/docker-desktop/),并且已经打开。
|
||||||
|
|
||||||
|
**第一步:拉取代码**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git clone https://git.p0lar1s.uk/P0lar1s/Ombre_Brain.git
|
||||||
|
cd Ombre_Brain
|
||||||
|
```
|
||||||
|
|
||||||
|
**第二步:创建 `.env` 文件**
|
||||||
|
|
||||||
|
在项目目录下新建一个叫 `.env` 的文件(注意有个点),内容填:
|
||||||
|
|
||||||
|
```
|
||||||
|
OMBRE_API_KEY=你的DeepSeek或其他API密钥
|
||||||
|
```
|
||||||
|
|
||||||
|
没有 API key 也能用,脱水压缩会降级到本地模式,只是效果差一点。那就写:
|
||||||
|
|
||||||
|
```
|
||||||
|
OMBRE_API_KEY=
|
||||||
|
```
|
||||||
|
|
||||||
|
**第三步:配置 `docker-compose.yml`(指向你的 Obsidian Vault)**
|
||||||
|
|
||||||
|
用文本编辑器打开 `docker-compose.yml`,找到这一行:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
- ./buckets:/data
|
||||||
|
```
|
||||||
|
|
||||||
|
改成你的 Obsidian Vault 里 `Ombre Brain` 文件夹的路径,例如:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
- /Users/你的用户名/Documents/Obsidian Vault/Ombre Brain:/data
|
||||||
|
```
|
||||||
|
|
||||||
|
> 不知道路径?在 Obsidian 里右键那个文件夹 → 「在访达中显示」,然后把地址栏的路径复制过来。
|
||||||
|
> 不想挂载 Obsidian 也行,保持 `./buckets:/data` 不动,数据会存在项目目录的 `buckets/` 文件夹里。
|
||||||
|
|
||||||
|
**第四步:启动**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker compose up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
等它跑完,看到 `Started` 就好了。
|
||||||
|
|
||||||
|
**验证是否正常运行:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
docker logs ombre-brain
|
||||||
|
```
|
||||||
|
|
||||||
|
看到 `Uvicorn running on http://0.0.0.0:8000` 说明成功了。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**接入 Claude.ai(远程访问)**
|
||||||
|
|
||||||
|
需要额外配置 Cloudflare Tunnel,把服务暴露到公网。参考下面「接入 Claude.ai (远程)」章节。
|
||||||
|
|
||||||
|
**接入 Claude Desktop(本地)**
|
||||||
|
|
||||||
|
不需要 Docker,直接用 Python 本地跑。参考下面「安装 / Setup」章节。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
[](https://render.com/deploy?repo=https://github.com/P0lar1zzZ/Ombre-Brain)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 它是什么 / What is this
|
||||||
|
|
||||||
|
Claude 没有跨对话记忆。每次对话结束,之前聊过的所有东西都会消失。
|
||||||
|
|
||||||
|
Ombre Brain 给了它一套持久记忆——不是那种冷冰冰的键值存储,而是带情感坐标的、会自然衰减的、像人类记忆一样会遗忘和浮现的系统。
|
||||||
|
|
||||||
|
Claude has no cross-conversation memory. Everything from a previous chat vanishes once it ends.
|
||||||
|
|
||||||
|
Ombre Brain gives it persistent memory — not cold key-value storage, but a system with emotional coordinates, natural decay, and forgetting/surfacing mechanics that loosely mimic how human memory works.
|
||||||
|
|
||||||
|
核心特点 / Key features:
|
||||||
|
|
||||||
|
- **情感坐标打标 / Emotional tagging**: 每条记忆用 Russell 环形情感模型的 valence(效价)和 arousal(唤醒度)两个连续维度标记。不是"开心/难过"这种离散标签。
|
||||||
|
Each memory is tagged with two continuous dimensions from Russell's circumplex model: valence and arousal. Not discrete labels like "happy/sad".
|
||||||
|
|
||||||
|
- **自然遗忘 / Natural forgetting**: 改进版艾宾浩斯遗忘曲线。不活跃的记忆自动衰减归档,高情绪强度的记忆衰减更慢。
|
||||||
|
Modified Ebbinghaus forgetting curve. Inactive memories naturally decay and archive. High-arousal memories decay slower.
|
||||||
|
|
||||||
|
- **权重池浮现 / Weight pool surfacing**: 记忆不是被动检索的,它们会主动浮现——未解决的、情绪强烈的记忆权重更高,会在对话开头自动推送。
|
||||||
|
Memories aren't just passively retrieved — they actively surface. Unresolved, emotionally intense memories carry higher weight and get pushed at conversation start.
|
||||||
|
|
||||||
|
- **Obsidian 原生 / Obsidian-native**: 每个记忆桶就是一个 Markdown 文件,YAML frontmatter 存元数据。可以直接在 Obsidian 里浏览、编辑、搜索。自动注入 `[[双链]]`。
|
||||||
|
Each memory bucket is a Markdown file with YAML frontmatter. Browse, edit, and search directly in Obsidian. Wikilinks are auto-injected.
|
||||||
|
|
||||||
|
- **API 降级 / API degradation**: 脱水压缩和自动打标优先用廉价 LLM API(DeepSeek 等),API 不可用时自动降级到本地关键词分析——始终可用。
|
||||||
|
Dehydration and auto-tagging prefer a cheap LLM API (DeepSeek etc.). When the API is unavailable, it degrades to local keyword analysis — always functional.
|
||||||
|
|
||||||
|
## 边界说明 / Design boundaries
|
||||||
|
|
||||||
|
官方记忆功能已经在做身份层的事了——你是谁,你有什么偏好,你们的关系是什么。那一层交给它,Ombre Brain不打算造重复的轮子。
|
||||||
|
|
||||||
|
Ombre Brain 的边界是时间里发生的事,不是你是谁。它记住的是:你们聊过什么,经历了什么,哪些事情还悬在那里没有解决。两层配合用,才是完整的。
|
||||||
|
|
||||||
|
每次新对话,Claude 从零开始——但它能从 Ombre Brain 里找回跟你有关的一切。不是重建,是接续。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
Official memory already handles the identity layer — who you are, what you prefer, what your relationship is. That layer belongs there. Ombre Brain isn't trying to duplicate it.
|
||||||
|
|
||||||
|
Ombre Brain's boundary is *what happened in time*, not *who you are*. It holds conversations, experiences, unresolved things. The two layers together are what make it feel complete.
|
||||||
|
|
||||||
|
Each new conversation starts fresh — but Claude can reach back through Ombre Brain and find everything that happened between you. Not a rebuild. A continuation.
|
||||||
|
|
||||||
|
## 架构 / Architecture
|
||||||
|
|
||||||
|
```
|
||||||
|
Claude ←→ MCP Protocol ←→ server.py
|
||||||
|
│
|
||||||
|
┌───────────────┼───────────────┐
|
||||||
|
│ │ │
|
||||||
|
bucket_manager dehydrator decay_engine
|
||||||
|
(CRUD + 搜索) (压缩 + 打标) (遗忘曲线)
|
||||||
|
│
|
||||||
|
Obsidian Vault (Markdown files)
|
||||||
|
```
|
||||||
|
|
||||||
|
5 个 MCP 工具 / 5 MCP tools:
|
||||||
|
|
||||||
|
| 工具 Tool | 作用 Purpose |
|
||||||
|
|-----------|-------------|
|
||||||
|
| `breath` | 浮现或检索记忆。无参数=推送未解决记忆;有参数=关键词+情感检索 / Surface or search memories |
|
||||||
|
| `hold` | 存储单条记忆,自动打标+合并相似桶 / Store a single memory with auto-tagging |
|
||||||
|
| `grow` | 日记归档,自动拆分长内容为多个记忆桶 / Diary digest, auto-split into multiple buckets |
|
||||||
|
| `trace` | 修改元数据、标记已解决、删除 / Modify metadata, mark resolved, delete |
|
||||||
|
| `pulse` | 系统状态 + 所有记忆桶列表 / System status + bucket listing |
|
||||||
|
|
||||||
|
## 安装 / Setup
|
||||||
|
|
||||||
|
### 环境要求 / Requirements
|
||||||
|
|
||||||
|
- Python 3.11+
|
||||||
|
- 一个 Obsidian Vault(可选,不用也行,会在项目目录下自建 `buckets/`)
|
||||||
|
An Obsidian vault (optional — without one, it uses a local `buckets/` directory)
|
||||||
|
|
||||||
|
### 步骤 / Steps
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git clone https://github.com/P0lar1zzZ/Ombre-Brain.git
|
||||||
|
cd Ombre-Brain
|
||||||
|
|
||||||
|
python -m venv .venv
|
||||||
|
source .venv/bin/activate # Windows: .venv\Scripts\activate
|
||||||
|
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
复制配置文件并按需修改 / Copy config and edit as needed:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cp config.example.yaml config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
如果你要用 API 做脱水压缩和自动打标(推荐,效果好很多),设置环境变量:
|
||||||
|
If you want API-powered dehydration and tagging (recommended, much better quality):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export OMBRE_API_KEY="your-api-key"
|
||||||
|
```
|
||||||
|
|
||||||
|
支持任何 OpenAI 兼容 API。在 `config.yaml` 里改 `base_url` 和 `model` 就行。
|
||||||
|
Supports any OpenAI-compatible API. Just change `base_url` and `model` in `config.yaml`.
|
||||||
|
|
||||||
|
### 接入 Claude Desktop / Connect to Claude Desktop
|
||||||
|
|
||||||
|
在 Claude Desktop 配置文件中添加(macOS: `~/Library/Application Support/Claude/claude_desktop_config.json`):
|
||||||
|
|
||||||
|
Add to your Claude Desktop config:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"mcpServers": {
|
||||||
|
"ombre-brain": {
|
||||||
|
"command": "python",
|
||||||
|
"args": ["/path/to/Ombre-Brain/server.py"],
|
||||||
|
"env": {
|
||||||
|
"OMBRE_API_KEY": "your-api-key"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 接入 Claude.ai (远程) / Connect to Claude.ai (remote)
|
||||||
|
|
||||||
|
需要 HTTP 传输 + 隧道。可以用 Docker:
|
||||||
|
Requires HTTP transport + tunnel. Docker setup:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
echo "OMBRE_API_KEY=your-api-key" > .env
|
||||||
|
docker-compose up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
`docker-compose.yml` 里配好了 Cloudflare Tunnel。你需要自己在 `~/.cloudflared/` 下放凭证和路由配置。
|
||||||
|
The `docker-compose.yml` includes Cloudflare Tunnel. You'll need your own credentials under `~/.cloudflared/`.
|
||||||
|
|
||||||
|
### 指向 Obsidian / Point to Obsidian
|
||||||
|
|
||||||
|
在 `config.yaml` 里设置 `buckets_dir`:
|
||||||
|
Set `buckets_dir` in `config.yaml`:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
buckets_dir: "/path/to/your/Obsidian Vault/Ombre Brain"
|
||||||
|
```
|
||||||
|
|
||||||
|
不设的话,默认用项目目录下的 `buckets/`。
|
||||||
|
If not set, defaults to `buckets/` in the project directory.
|
||||||
|
|
||||||
|
## 配置 / Configuration
|
||||||
|
|
||||||
|
所有参数在 `config.yaml`(从 `config.example.yaml` 复制)。关键的几个:
|
||||||
|
All parameters in `config.yaml` (copy from `config.example.yaml`). Key ones:
|
||||||
|
|
||||||
|
| 参数 Parameter | 说明 Description | 默认 Default |
|
||||||
|
|---|---|---|
|
||||||
|
| `transport` | `stdio`(本地)/ `streamable-http`(远程)| `stdio` |
|
||||||
|
| `buckets_dir` | 记忆桶存储路径 / Bucket storage path | `./buckets/` |
|
||||||
|
| `dehydration.model` | 脱水用的 LLM 模型 / LLM model for dehydration | `deepseek-chat` |
|
||||||
|
| `dehydration.base_url` | API 地址 / API endpoint | `https://api.deepseek.com/v1` |
|
||||||
|
| `decay.lambda` | 衰减速率,越大越快忘 / Decay rate | `0.05` |
|
||||||
|
| `decay.threshold` | 归档阈值 / Archive threshold | `0.3` |
|
||||||
|
| `merge_threshold` | 合并相似度阈值 (0-100) / Merge similarity | `75` |
|
||||||
|
|
||||||
|
敏感配置用环境变量:
|
||||||
|
Sensitive config via env vars:
|
||||||
|
- `OMBRE_API_KEY` — LLM API 密钥
|
||||||
|
- `OMBRE_TRANSPORT` — 覆盖传输方式
|
||||||
|
- `OMBRE_BUCKETS_DIR` — 覆盖存储路径
|
||||||
|
|
||||||
|
## 衰减公式 / Decay Formula
|
||||||
|
|
||||||
|
$$final\_score = time\_weight \times base\_score$$
|
||||||
|
|
||||||
|
$$base\_score = Importance \times activation\_count^{0.3} \times e^{-\lambda \times days} \times (base + arousal \times boost)$$
|
||||||
|
|
||||||
|
时间系数(乘数,优先级最高)/ Time weight (multiplier, highest priority):
|
||||||
|
|
||||||
|
| 距今天数 Days since active | 时间系数 Weight |
|
||||||
|
|---|---|
|
||||||
|
| 0–1 天 | 1.0 |
|
||||||
|
| 第 2 天 | 0.9 |
|
||||||
|
| 之后每天约降 10% | `max(0.3, 0.9 × e^{-0.2197 × (days-2)})` |
|
||||||
|
| 7 天后稳定 | ≈ 0.3(不归零)|
|
||||||
|
|
||||||
|
- `importance`: 1-10,记忆重要性 / memory importance
|
||||||
|
- `activation_count`: 被检索的次数,越常被想起衰减越慢 / retrieval count; more recalls = slower decay
|
||||||
|
- `days`: 距上次激活的天数 / days since last activation
|
||||||
|
- `arousal`: 唤醒度,越强烈的记忆越难忘 / arousal; intense memories are harder to forget
|
||||||
|
- 已解决的记忆权重降到 5%,沉底等被关键词唤醒 / resolved memories drop to 5%, sink until keyword-triggered
|
||||||
|
- `pinned=true` 的桶:不衰减、不合并、importance 锁定 10 / `pinned` buckets: never decay, never merge, importance locked at 10
|
||||||
|
|
||||||
|
## 给 Claude 的使用指南 / Usage Guide for Claude
|
||||||
|
|
||||||
|
`CLAUDE_PROMPT.md` 是写给 Claude 看的使用说明。放到你的 system prompt 或 custom instructions 里就行。
|
||||||
|
|
||||||
|
`CLAUDE_PROMPT.md` is the usage guide written for Claude. Put it in your system prompt or custom instructions.
|
||||||
|
|
||||||
|
## 工具脚本 / Utility Scripts
|
||||||
|
|
||||||
|
| 脚本 Script | 用途 Purpose |
|
||||||
|
|---|---|
|
||||||
|
| `write_memory.py` | 手动写入记忆,绕过 MCP / Manually write memories, bypass MCP |
|
||||||
|
| `migrate_to_domains.py` | 迁移平铺文件到域子目录 / Migrate flat files to domain subdirs |
|
||||||
|
| `reclassify_domains.py` | 基于关键词重分类 / Reclassify by keywords |
|
||||||
|
| `reclassify_api.py` | 用 API 重打标未分类桶 / Re-tag uncategorized buckets via API |
|
||||||
|
| `test_smoke.py` | 冒烟测试 / Smoke test |
|
||||||
|
|
||||||
|
## 部署 / Deploy
|
||||||
|
|
||||||
|
### Render
|
||||||
|
|
||||||
|
[](https://render.com/deploy?repo=https://github.com/P0lar1zzZ/Ombre-Brain)
|
||||||
|
|
||||||
|
> ⚠️ **免费层不可用**:Render 免费层**不支持持久化磁盘**,服务重启后记忆数据会丢失,且会在无流量时休眠。**必须使用 Starter($7/mo)或以上**才能正常使用。
|
||||||
|
> **Free tier won't work**: Render free tier has **no persistent disk** — all memory data is lost on restart. It also sleeps on inactivity. **Starter plan ($7/mo) or above is required.**
|
||||||
|
|
||||||
|
项目根目录已包含 `render.yaml`,点击按钮后:
|
||||||
|
1. (可选)设置 `OMBRE_API_KEY`:任何 OpenAI 兼容 API 的 key,不填则自动降级为本地关键词提取
|
||||||
|
2. (可选)设置 `OMBRE_BASE_URL`:API 地址,支持任意 OpenAI 化地址,如 `https://api.deepseek.com/v1` / `http://123.1.1.1:7689/v1` / `http://your-ollama:11434/v1`
|
||||||
|
3. Render 自动挂载持久化磁盘到 `/opt/render/project/src/buckets`
|
||||||
|
4. 部署后 MCP URL:`https://<你的服务名>.onrender.com/mcp`
|
||||||
|
|
||||||
|
`render.yaml` is included. After clicking the button:
|
||||||
|
1. (Optional) `OMBRE_API_KEY`: any OpenAI-compatible key; omit to fall back to local keyword extraction
|
||||||
|
2. (Optional) `OMBRE_BASE_URL`: any OpenAI-compatible endpoint, e.g. `https://api.deepseek.com/v1`, `http://123.1.1.1:7689/v1`, `http://your-ollama:11434/v1`
|
||||||
|
3. Persistent disk auto-mounts at `/opt/render/project/src/buckets`
|
||||||
|
4. MCP URL after deploy: `https://<your-service>.onrender.com/mcp`
|
||||||
|
|
||||||
|
### Zeabur
|
||||||
|
|
||||||
|
> 💡 **Zeabur 的定价模式**:Zeabur 是「买 VPS + 平台托管」,你先购买一台服务器(最低腾讯云新加坡 $2/mo、火山引擎 $3/mo),Volume 直接挂在该服务器上,**数据天然持久化,无丢失问题**。另需订阅 Zeabur 管理方案(Developer $5/mo),总计约 $7-8/mo 起。
|
||||||
|
> **Zeabur pricing model**: You buy a VPS first (cheapest: Tencent Cloud Singapore ~$2/mo, Volcano Engine ~$3/mo), then add Zeabur's Developer plan ($5/mo) for management. Volumes mount directly on your server — **data is always persistent, no cold-start data loss**. Total ~$7-8/mo minimum.
|
||||||
|
|
||||||
|
**步骤 / Steps:**
|
||||||
|
|
||||||
|
1. **创建项目 / Create project**
|
||||||
|
- 打开 [zeabur.com](https://zeabur.com) → 购买一台服务器 → **New Project** → **Deploy from GitHub**
|
||||||
|
- 先 Fork 本仓库到自己 GitHub 账号,然后在 Zeabur 选择 `你的用户名/Ombre-Brain`
|
||||||
|
- Zeabur 会自动检测到根目录的 `Dockerfile` 并使用 Docker 方式构建
|
||||||
|
- Go to [zeabur.com](https://zeabur.com) → buy a server → **New Project** → **Deploy from GitHub**
|
||||||
|
- Fork this repo first, then select `your-username/Ombre-Brain` in Zeabur
|
||||||
|
- Zeabur auto-detects the `Dockerfile` in root and builds via Docker
|
||||||
|
|
||||||
|
2. **设置环境变量 / Set environment variables**(服务页面 → **Variables** 标签页)
|
||||||
|
- `OMBRE_API_KEY`(可选)— LLM API 密钥,不填则自动降级为本地关键词提取
|
||||||
|
- `OMBRE_BASE_URL`(可选)— API 地址,如 `https://api.deepseek.com/v1`
|
||||||
|
|
||||||
|
> ⚠️ **不需要**手动设置 `OMBRE_TRANSPORT` 和 `OMBRE_BUCKETS_DIR`,Dockerfile 里已经设好了默认值。Zeabur 对单阶段 Dockerfile 会自动注入控制台设置的环境变量。
|
||||||
|
> You do **NOT** need to set `OMBRE_TRANSPORT` or `OMBRE_BUCKETS_DIR` — defaults are baked into the Dockerfile. Zeabur auto-injects dashboard env vars for single-stage Dockerfiles.
|
||||||
|
|
||||||
|
3. **挂载持久存储 / Mount persistent volume**(服务页面 → **Volumes** 标签页)
|
||||||
|
- Volume ID:填 `ombre-buckets`(或任意名)
|
||||||
|
- 挂载路径 / Path:**`/app/buckets`**
|
||||||
|
- ⚠️ 不挂载的话,每次重新部署记忆数据会丢失
|
||||||
|
- ⚠️ Without this, memory data is lost on every redeploy
|
||||||
|
|
||||||
|
4. **配置端口 / Configure port**(服务页面 → **Networking** 标签页)
|
||||||
|
- Port Name:`web`(或任意名)
|
||||||
|
- Port:**`8000`**
|
||||||
|
- Port Type:**`HTTP`**
|
||||||
|
- 然后点 **Generate Domain** 生成一个 `xxx.zeabur.app` 域名
|
||||||
|
- Then click **Generate Domain** to get a `xxx.zeabur.app` domain
|
||||||
|
|
||||||
|
5. **验证 / Verify**
|
||||||
|
- 访问 `https://<你的域名>.zeabur.app/health`,应返回 JSON
|
||||||
|
- Visit `https://<your-domain>.zeabur.app/health` — should return JSON
|
||||||
|
- 最终 MCP 地址 / MCP URL:`https://<你的域名>.zeabur.app/mcp`
|
||||||
|
|
||||||
|
**常见问题 / Troubleshooting:**
|
||||||
|
|
||||||
|
| 现象 Symptom | 原因 Cause | 解决 Fix |
|
||||||
|
|---|---|---|
|
||||||
|
| 域名无法访问 / Domain unreachable | 没配端口 / Port not configured | Networking 标签页加 port 8000 (HTTP) |
|
||||||
|
| 域名无法访问 / Domain unreachable | `OMBRE_TRANSPORT` 未设置,服务以 stdio 模式启动,不监听任何端口 / Service started in stdio mode — no port is listened | **Variables 标签页确认设置 `OMBRE_TRANSPORT=streamable-http`,然后重新部署** |
|
||||||
|
| 构建失败 / Build failed | Dockerfile 未被识别 / Dockerfile not detected | 确认仓库根目录有 `Dockerfile`(大小写敏感) |
|
||||||
|
| 服务启动后立刻退出 | `OMBRE_TRANSPORT` 被覆盖为 `stdio` | 检查 Variables 里有没有多余的 `OMBRE_TRANSPORT=stdio`,删掉即可 |
|
||||||
|
| 重启后记忆丢失 / Data lost on restart | Volume 未挂载 | Volumes 标签页挂载到 `/app/buckets` |
|
||||||
|
|
||||||
|
### 使用 Cloudflare Tunnel 或 ngrok 连接 / Connecting via Cloudflare Tunnel or ngrok
|
||||||
|
|
||||||
|
> ℹ️ 自 v1.1 起,server.py 在 HTTP 模式下已自动添加 CORS 中间件,无需额外配置。
|
||||||
|
> Since v1.1, server.py automatically enables CORS middleware in HTTP mode — no extra config needed.
|
||||||
|
|
||||||
|
使用隧道连接时,确保以下条件满足:
|
||||||
|
When connecting via tunnel, ensure:
|
||||||
|
|
||||||
|
1. **服务器必须运行在 HTTP 模式** / Server must use HTTP transport
|
||||||
|
```bash
|
||||||
|
OMBRE_TRANSPORT=streamable-http python server.py
|
||||||
|
```
|
||||||
|
或 Docker:
|
||||||
|
```bash
|
||||||
|
docker-compose up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **在 Claude.ai 网页版添加 MCP 服务器** / Adding to Claude.ai web
|
||||||
|
- URL 格式 / URL format: `https://<tunnel-subdomain>.trycloudflare.com/mcp`
|
||||||
|
- 或 ngrok / or ngrok: `https://<xxxx>.ngrok-free.app/mcp`
|
||||||
|
- 先访问 `/health` 验证连接 / Verify first: `https://<your-tunnel>/health` should return `{"status":"ok",...}`
|
||||||
|
|
||||||
|
3. **已知限制 / Known limitations**
|
||||||
|
- Cloudflare Tunnel 免费版有空闲超时(约 10 分钟),系统内置保活 ping 可缓解但不能完全消除
|
||||||
|
- Free Cloudflare Tunnel has idle timeout (~10 min); built-in keepalive pings mitigate but can't fully prevent it
|
||||||
|
- ngrok 免费版有请求速率限制 / ngrok free tier has rate limits
|
||||||
|
- 如果连接仍失败,检查隧道是否正在运行、服务是否以 `streamable-http` 模式启动
|
||||||
|
- If connection still fails, verify the tunnel is running and the server started in `streamable-http` mode
|
||||||
|
|
||||||
|
| 现象 Symptom | 原因 Cause | 解决 Fix |
|
||||||
|
|---|---|---|
|
||||||
|
| 网页版无法连接隧道 URL / Web can't connect to tunnel URL | 服务以 stdio 模式运行 / Server in stdio mode | 设置 `OMBRE_TRANSPORT=streamable-http` 后重启 |
|
||||||
|
| 网页版无法连接隧道 URL / Web can't connect to tunnel URL | 旧版 server.py 缺少 CORS 头 / Missing CORS headers | 拉取最新代码,CORS 已内置 / Pull latest — CORS is now built-in |
|
||||||
|
| `/health` 返回 200 但 MCP 连不上 / `/health` 200 but MCP fails | 路径错误 / Wrong path | MCP URL 末尾必须是 `/mcp` 而非 `/` |
|
||||||
|
| 隧道连接偶尔断开 / Tunnel disconnects intermittently | Cloudflare Tunnel 空闲超时 / Idle timeout | 保活 ping 已内置,若仍断开可缩短隧道超时配置 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Session Start Hook(自动 breath)
|
||||||
|
|
||||||
|
部署后,如果你使用 Claude Code,可以在项目内激活自动浮现 hook:
|
||||||
|
`.claude/settings.json` 已配置好 `SessionStart` hook,每次新会话或恢复会话时自动触发 `breath`,把最高权重未解决记忆推入上下文。
|
||||||
|
|
||||||
|
**仅在远程 HTTP 模式下有效**(`OMBRE_TRANSPORT=streamable-http`)。本地 stdio 模式下 hook 会安静退出,不影响正常使用。
|
||||||
|
|
||||||
|
可以通过 `OMBRE_HOOK_URL` 环境变量指定服务器地址(默认 `http://localhost:8000`),或者设置 `OMBRE_HOOK_SKIP=1` 临时禁用。
|
||||||
|
|
||||||
|
If using Claude Code, `.claude/settings.json` configures a `SessionStart` hook that auto-calls `breath` on each new or resumed session, surfacing your highest-weight unresolved memories as context. Only active in remote HTTP mode. Set `OMBRE_HOOK_SKIP=1` to disable temporarily.
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
MIT
|
||||||
205
backup_20260405_2124/README.md
Normal file
205
backup_20260405_2124/README.md
Normal file
@@ -0,0 +1,205 @@
|
|||||||
|
# Ombre Brain
|
||||||
|
|
||||||
|
一个给 Claude 用的长期情绪记忆系统。基于 Russell 效价/唤醒度坐标打标,Obsidian 做存储层,MCP 接入,带遗忘曲线。
|
||||||
|
|
||||||
|
A long-term emotional memory system for Claude. Tags memories using Russell's valence/arousal coordinates, stores them as Obsidian-compatible Markdown, connects via MCP, and has a forgetting curve.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 它是什么 / What is this
|
||||||
|
|
||||||
|
Claude 没有跨对话记忆。每次对话结束,之前聊过的所有东西都会消失。
|
||||||
|
|
||||||
|
Ombre Brain 给了它一套持久记忆——不是那种冷冰冰的键值存储,而是带情感坐标的、会自然衰减的、像人类记忆一样会遗忘和浮现的系统。
|
||||||
|
|
||||||
|
Claude has no cross-conversation memory. Everything from a previous chat vanishes once it ends.
|
||||||
|
|
||||||
|
Ombre Brain gives it persistent memory — not cold key-value storage, but a system with emotional coordinates, natural decay, and forgetting/surfacing mechanics that loosely mimic how human memory works.
|
||||||
|
|
||||||
|
核心特点 / Key features:
|
||||||
|
|
||||||
|
- **情感坐标打标 / Emotional tagging**: 每条记忆用 Russell 环形情感模型的 valence(效价)和 arousal(唤醒度)两个连续维度标记。不是"开心/难过"这种离散标签。
|
||||||
|
Each memory is tagged with two continuous dimensions from Russell's circumplex model: valence and arousal. Not discrete labels like "happy/sad".
|
||||||
|
|
||||||
|
- **自然遗忘 / Natural forgetting**: 改进版艾宾浩斯遗忘曲线。不活跃的记忆自动衰减归档,高情绪强度的记忆衰减更慢。
|
||||||
|
Modified Ebbinghaus forgetting curve. Inactive memories naturally decay and archive. High-arousal memories decay slower.
|
||||||
|
|
||||||
|
- **权重池浮现 / Weight pool surfacing**: 记忆不是被动检索的,它们会主动浮现——未解决的、情绪强烈的记忆权重更高,会在对话开头自动推送。
|
||||||
|
Memories aren't just passively retrieved — they actively surface. Unresolved, emotionally intense memories carry higher weight and get pushed at conversation start.
|
||||||
|
|
||||||
|
- **Obsidian 原生 / Obsidian-native**: 每个记忆桶就是一个 Markdown 文件,YAML frontmatter 存元数据。可以直接在 Obsidian 里浏览、编辑、搜索。自动注入 `[[双链]]`。
|
||||||
|
Each memory bucket is a Markdown file with YAML frontmatter. Browse, edit, and search directly in Obsidian. Wikilinks are auto-injected.
|
||||||
|
|
||||||
|
- **API 降级 / API degradation**: 脱水压缩和自动打标优先用廉价 LLM API(DeepSeek 等),API 不可用时自动降级到本地关键词分析——始终可用。
|
||||||
|
Dehydration and auto-tagging prefer a cheap LLM API (DeepSeek etc.). When the API is unavailable, it degrades to local keyword analysis — always functional.
|
||||||
|
|
||||||
|
## 边界说明 / Design boundaries
|
||||||
|
|
||||||
|
官方记忆功能已经在做身份层的事了——你是谁,你有什么偏好,你们的关系是什么。那一层交给它,Ombre Brain不打算造重复的轮子。
|
||||||
|
|
||||||
|
Ombre Brain 的边界是时间里发生的事,不是你是谁。它记住的是:你们聊过什么,经历了什么,哪些事情还悬在那里没有解决。两层配合用,才是完整的。
|
||||||
|
|
||||||
|
每次新对话,Claude 从零开始——但它能从 Ombre Brain 里找回跟你有关的一切。不是重建,是接续。
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
Official memory already handles the identity layer — who you are, what you prefer, what your relationship is. That layer belongs there. Ombre Brain isn't trying to duplicate it.
|
||||||
|
|
||||||
|
Ombre Brain's boundary is *what happened in time*, not *who you are*. It holds conversations, experiences, unresolved things. The two layers together are what make it feel complete.
|
||||||
|
|
||||||
|
Each new conversation starts fresh — but Claude can reach back through Ombre Brain and find everything that happened between you. Not a rebuild. A continuation.
|
||||||
|
|
||||||
|
## 架构 / Architecture
|
||||||
|
|
||||||
|
```
|
||||||
|
Claude ←→ MCP Protocol ←→ server.py
|
||||||
|
│
|
||||||
|
┌───────────────┼───────────────┐
|
||||||
|
│ │ │
|
||||||
|
bucket_manager dehydrator decay_engine
|
||||||
|
(CRUD + 搜索) (压缩 + 打标) (遗忘曲线)
|
||||||
|
│
|
||||||
|
Obsidian Vault (Markdown files)
|
||||||
|
```
|
||||||
|
|
||||||
|
5 个 MCP 工具 / 5 MCP tools:
|
||||||
|
|
||||||
|
| 工具 Tool | 作用 Purpose |
|
||||||
|
|-----------|-------------|
|
||||||
|
| `breath` | 浮现或检索记忆。无参数=推送未解决记忆;有参数=关键词+情感检索 / Surface or search memories |
|
||||||
|
| `hold` | 存储单条记忆,自动打标+合并相似桶 / Store a single memory with auto-tagging |
|
||||||
|
| `grow` | 日记归档,自动拆分长内容为多个记忆桶 / Diary digest, auto-split into multiple buckets |
|
||||||
|
| `trace` | 修改元数据、标记已解决、删除 / Modify metadata, mark resolved, delete |
|
||||||
|
| `pulse` | 系统状态 + 所有记忆桶列表 / System status + bucket listing |
|
||||||
|
|
||||||
|
## 安装 / Setup
|
||||||
|
|
||||||
|
### 环境要求 / Requirements
|
||||||
|
|
||||||
|
- Python 3.11+
|
||||||
|
- 一个 Obsidian Vault(可选,不用也行,会在项目目录下自建 `buckets/`)
|
||||||
|
An Obsidian vault (optional — without one, it uses a local `buckets/` directory)
|
||||||
|
|
||||||
|
### 步骤 / Steps
|
||||||
|
|
||||||
|
```bash
|
||||||
|
git clone https://github.com/P0lar1zzZ/Ombre-Brain.git
|
||||||
|
cd Ombre-Brain
|
||||||
|
|
||||||
|
python -m venv .venv
|
||||||
|
source .venv/bin/activate # Windows: .venv\Scripts\activate
|
||||||
|
|
||||||
|
pip install -r requirements.txt
|
||||||
|
```
|
||||||
|
|
||||||
|
复制配置文件并按需修改 / Copy config and edit as needed:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cp config.example.yaml config.yaml
|
||||||
|
```
|
||||||
|
|
||||||
|
如果你要用 API 做脱水压缩和自动打标(推荐,效果好很多),设置环境变量:
|
||||||
|
If you want API-powered dehydration and tagging (recommended, much better quality):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export OMBRE_API_KEY="your-api-key"
|
||||||
|
```
|
||||||
|
|
||||||
|
支持任何 OpenAI 兼容 API。在 `config.yaml` 里改 `base_url` 和 `model` 就行。
|
||||||
|
Supports any OpenAI-compatible API. Just change `base_url` and `model` in `config.yaml`.
|
||||||
|
|
||||||
|
### 接入 Claude Desktop / Connect to Claude Desktop
|
||||||
|
|
||||||
|
在 Claude Desktop 配置文件中添加(macOS: `~/Library/Application Support/Claude/claude_desktop_config.json`):
|
||||||
|
|
||||||
|
Add to your Claude Desktop config:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"mcpServers": {
|
||||||
|
"ombre-brain": {
|
||||||
|
"command": "python",
|
||||||
|
"args": ["/path/to/Ombre-Brain/server.py"],
|
||||||
|
"env": {
|
||||||
|
"OMBRE_API_KEY": "your-api-key"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 接入 Claude.ai (远程) / Connect to Claude.ai (remote)
|
||||||
|
|
||||||
|
需要 HTTP 传输 + 隧道。可以用 Docker:
|
||||||
|
Requires HTTP transport + tunnel. Docker setup:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
echo "OMBRE_API_KEY=your-api-key" > .env
|
||||||
|
docker-compose up -d
|
||||||
|
```
|
||||||
|
|
||||||
|
`docker-compose.yml` 里配好了 Cloudflare Tunnel。你需要自己在 `~/.cloudflared/` 下放凭证和路由配置。
|
||||||
|
The `docker-compose.yml` includes Cloudflare Tunnel. You'll need your own credentials under `~/.cloudflared/`.
|
||||||
|
|
||||||
|
### 指向 Obsidian / Point to Obsidian
|
||||||
|
|
||||||
|
在 `config.yaml` 里设置 `buckets_dir`:
|
||||||
|
Set `buckets_dir` in `config.yaml`:
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
buckets_dir: "/path/to/your/Obsidian Vault/Ombre Brain"
|
||||||
|
```
|
||||||
|
|
||||||
|
不设的话,默认用项目目录下的 `buckets/`。
|
||||||
|
If not set, defaults to `buckets/` in the project directory.
|
||||||
|
|
||||||
|
## 配置 / Configuration
|
||||||
|
|
||||||
|
所有参数在 `config.yaml`(从 `config.example.yaml` 复制)。关键的几个:
|
||||||
|
All parameters in `config.yaml` (copy from `config.example.yaml`). Key ones:
|
||||||
|
|
||||||
|
| 参数 Parameter | 说明 Description | 默认 Default |
|
||||||
|
|---|---|---|
|
||||||
|
| `transport` | `stdio`(本地)/ `streamable-http`(远程)| `stdio` |
|
||||||
|
| `buckets_dir` | 记忆桶存储路径 / Bucket storage path | `./buckets/` |
|
||||||
|
| `dehydration.model` | 脱水用的 LLM 模型 / LLM model for dehydration | `deepseek-chat` |
|
||||||
|
| `dehydration.base_url` | API 地址 / API endpoint | `https://api.deepseek.com/v1` |
|
||||||
|
| `decay.lambda` | 衰减速率,越大越快忘 / Decay rate | `0.05` |
|
||||||
|
| `decay.threshold` | 归档阈值 / Archive threshold | `0.3` |
|
||||||
|
| `merge_threshold` | 合并相似度阈值 (0-100) / Merge similarity | `75` |
|
||||||
|
|
||||||
|
敏感配置用环境变量:
|
||||||
|
Sensitive config via env vars:
|
||||||
|
- `OMBRE_API_KEY` — LLM API 密钥
|
||||||
|
- `OMBRE_TRANSPORT` — 覆盖传输方式
|
||||||
|
- `OMBRE_BUCKETS_DIR` — 覆盖存储路径
|
||||||
|
|
||||||
|
## 衰减公式 / Decay Formula
|
||||||
|
|
||||||
|
$$Score = Importance \times activation\_count^{0.3} \times e^{-\lambda \times days} \times (base + arousal \times boost)$$
|
||||||
|
|
||||||
|
- `importance`: 1-10,记忆重要性 / memory importance
|
||||||
|
- `activation_count`: 被检索的次数,越常被想起衰减越慢 / retrieval count; more recalls = slower decay
|
||||||
|
- `days`: 距上次激活的天数 / days since last activation
|
||||||
|
- `arousal`: 唤醒度,越强烈的记忆越难忘 / arousal; intense memories are harder to forget
|
||||||
|
- 已解决的记忆权重降到 5%,沉底等被关键词唤醒 / resolved memories drop to 5%, sink until keyword-triggered
|
||||||
|
|
||||||
|
## 给 Claude 的使用指南 / Usage Guide for Claude
|
||||||
|
|
||||||
|
`CLAUDE_PROMPT.md` 是写给 Claude 看的使用说明。放到你的 system prompt 或 custom instructions 里就行。
|
||||||
|
|
||||||
|
`CLAUDE_PROMPT.md` is the usage guide written for Claude. Put it in your system prompt or custom instructions.
|
||||||
|
|
||||||
|
## 工具脚本 / Utility Scripts
|
||||||
|
|
||||||
|
| 脚本 Script | 用途 Purpose |
|
||||||
|
|---|---|
|
||||||
|
| `write_memory.py` | 手动写入记忆,绕过 MCP / Manually write memories, bypass MCP |
|
||||||
|
| `migrate_to_domains.py` | 迁移平铺文件到域子目录 / Migrate flat files to domain subdirs |
|
||||||
|
| `reclassify_domains.py` | 基于关键词重分类 / Reclassify by keywords |
|
||||||
|
| `reclassify_api.py` | 用 API 重打标未分类桶 / Re-tag uncategorized buckets via API |
|
||||||
|
| `test_smoke.py` | 冒烟测试 / Smoke test |
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
MIT
|
||||||
755
backup_20260405_2124/bucket_manager.py
Normal file
755
backup_20260405_2124/bucket_manager.py
Normal file
@@ -0,0 +1,755 @@
|
|||||||
|
# ============================================================
|
||||||
|
# Module: Memory Bucket Manager (bucket_manager.py)
|
||||||
|
# 模块:记忆桶管理器
|
||||||
|
#
|
||||||
|
# CRUD operations, multi-dimensional index search, activation updates
|
||||||
|
# for memory buckets.
|
||||||
|
# 记忆桶的增删改查、多维索引搜索、激活更新。
|
||||||
|
#
|
||||||
|
# Core design:
|
||||||
|
# 核心逻辑:
|
||||||
|
# - Each bucket = one Markdown file (YAML frontmatter + body)
|
||||||
|
# 每个记忆桶 = 一个 Markdown 文件
|
||||||
|
# - Storage by type: permanent / dynamic / archive
|
||||||
|
# 存储按类型分目录
|
||||||
|
# - Multi-dimensional soft index: domain + valence/arousal + fuzzy text
|
||||||
|
# 多维软索引:主题域 + 情感坐标 + 文本模糊匹配
|
||||||
|
# - Search strategy: domain pre-filter → weighted multi-dim ranking
|
||||||
|
# 搜索策略:主题域预筛 → 多维加权精排
|
||||||
|
# - Emotion coordinates based on Russell circumplex model:
|
||||||
|
# 情感坐标基于环形情感模型(Russell circumplex):
|
||||||
|
# valence (0~1): 0=negative → 1=positive
|
||||||
|
# arousal (0~1): 0=calm → 1=excited
|
||||||
|
#
|
||||||
|
# Depended on by: server.py, decay_engine.py
|
||||||
|
# 被谁依赖:server.py, decay_engine.py
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
import os
|
||||||
|
import math
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
import shutil
|
||||||
|
from collections import Counter
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import frontmatter
|
||||||
|
import jieba
|
||||||
|
from rapidfuzz import fuzz
|
||||||
|
|
||||||
|
from utils import generate_bucket_id, sanitize_name, safe_path, now_iso
|
||||||
|
|
||||||
|
logger = logging.getLogger("ombre_brain.bucket")
|
||||||
|
|
||||||
|
|
||||||
|
class BucketManager:
|
||||||
|
"""
|
||||||
|
Memory bucket manager — entry point for all bucket CRUD operations.
|
||||||
|
Buckets are stored as Markdown files with YAML frontmatter for metadata
|
||||||
|
and body for content. Natively compatible with Obsidian browsing/editing.
|
||||||
|
记忆桶管理器 —— 所有桶的 CRUD 操作入口。
|
||||||
|
桶以 Markdown 文件存储,YAML frontmatter 存元数据,正文存内容。
|
||||||
|
天然兼容 Obsidian 直接浏览和编辑。
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, config: dict):
|
||||||
|
# --- Read storage paths from config / 从配置中读取存储路径 ---
|
||||||
|
self.base_dir = config["buckets_dir"]
|
||||||
|
self.permanent_dir = os.path.join(self.base_dir, "permanent")
|
||||||
|
self.dynamic_dir = os.path.join(self.base_dir, "dynamic")
|
||||||
|
self.archive_dir = os.path.join(self.base_dir, "archive")
|
||||||
|
self.fuzzy_threshold = config.get("matching", {}).get("fuzzy_threshold", 50)
|
||||||
|
self.max_results = config.get("matching", {}).get("max_results", 5)
|
||||||
|
|
||||||
|
# --- Wikilink config / 双链配置 ---
|
||||||
|
wikilink_cfg = config.get("wikilink", {})
|
||||||
|
self.wikilink_enabled = wikilink_cfg.get("enabled", True)
|
||||||
|
self.wikilink_use_tags = wikilink_cfg.get("use_tags", False)
|
||||||
|
self.wikilink_use_domain = wikilink_cfg.get("use_domain", True)
|
||||||
|
self.wikilink_use_auto_keywords = wikilink_cfg.get("use_auto_keywords", True)
|
||||||
|
self.wikilink_auto_top_k = wikilink_cfg.get("auto_top_k", 8)
|
||||||
|
self.wikilink_min_len = wikilink_cfg.get("min_keyword_len", 2)
|
||||||
|
self.wikilink_exclude_keywords = set(wikilink_cfg.get("exclude_keywords", []))
|
||||||
|
self.wikilink_stopwords = {
|
||||||
|
"的", "了", "在", "是", "我", "有", "和", "就", "不", "人",
|
||||||
|
"都", "一个", "上", "也", "很", "到", "说", "要", "去",
|
||||||
|
"你", "会", "着", "没有", "看", "好", "自己", "这", "他", "她",
|
||||||
|
"我们", "你们", "他们", "然后", "今天", "昨天", "明天", "一下",
|
||||||
|
"the", "and", "for", "are", "but", "not", "you", "all", "can",
|
||||||
|
"had", "her", "was", "one", "our", "out", "has", "have", "with",
|
||||||
|
"this", "that", "from", "they", "been", "said", "will", "each",
|
||||||
|
}
|
||||||
|
self.wikilink_stopwords |= {w.lower() for w in self.wikilink_exclude_keywords}
|
||||||
|
|
||||||
|
# --- Search scoring weights / 检索权重配置 ---
|
||||||
|
scoring = config.get("scoring_weights", {})
|
||||||
|
self.w_topic = scoring.get("topic_relevance", 4.0)
|
||||||
|
self.w_emotion = scoring.get("emotion_resonance", 2.0)
|
||||||
|
self.w_time = scoring.get("time_proximity", 1.5)
|
||||||
|
self.w_importance = scoring.get("importance", 1.0)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Create a new bucket
|
||||||
|
# 创建新桶
|
||||||
|
# Write content and metadata into a .md file
|
||||||
|
# 将内容和元数据写入一个 .md 文件
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
async def create(
|
||||||
|
self,
|
||||||
|
content: str,
|
||||||
|
tags: list[str] = None,
|
||||||
|
importance: int = 5,
|
||||||
|
domain: list[str] = None,
|
||||||
|
valence: float = 0.5,
|
||||||
|
arousal: float = 0.3,
|
||||||
|
bucket_type: str = "dynamic",
|
||||||
|
name: str = None,
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Create a new memory bucket, return bucket ID.
|
||||||
|
创建一个新的记忆桶,返回桶 ID。
|
||||||
|
"""
|
||||||
|
bucket_id = generate_bucket_id()
|
||||||
|
bucket_name = sanitize_name(name) if name else bucket_id
|
||||||
|
domain = domain or ["未分类"]
|
||||||
|
tags = tags or []
|
||||||
|
linked_content = self._apply_wikilinks(content, tags, domain, bucket_name)
|
||||||
|
|
||||||
|
# --- Build YAML frontmatter metadata / 构建元数据 ---
|
||||||
|
metadata = {
|
||||||
|
"id": bucket_id,
|
||||||
|
"name": bucket_name,
|
||||||
|
"tags": tags,
|
||||||
|
"domain": domain,
|
||||||
|
"valence": max(0.0, min(1.0, valence)),
|
||||||
|
"arousal": max(0.0, min(1.0, arousal)),
|
||||||
|
"importance": max(1, min(10, importance)),
|
||||||
|
"type": bucket_type,
|
||||||
|
"created": now_iso(),
|
||||||
|
"last_active": now_iso(),
|
||||||
|
"activation_count": 1,
|
||||||
|
}
|
||||||
|
|
||||||
|
# --- Assemble Markdown file (frontmatter + body) ---
|
||||||
|
# --- 组装 Markdown 文件 ---
|
||||||
|
post = frontmatter.Post(linked_content, **metadata)
|
||||||
|
|
||||||
|
# --- Choose directory by type + primary domain ---
|
||||||
|
# --- 按类型 + 主题域选择存储目录 ---
|
||||||
|
type_dir = self.permanent_dir if bucket_type == "permanent" else self.dynamic_dir
|
||||||
|
primary_domain = sanitize_name(domain[0]) if domain else "未分类"
|
||||||
|
target_dir = os.path.join(type_dir, primary_domain)
|
||||||
|
os.makedirs(target_dir, exist_ok=True)
|
||||||
|
|
||||||
|
# --- Filename: readable_name_bucketID.md (Obsidian friendly) ---
|
||||||
|
# --- 文件名:可读名称_桶ID.md ---
|
||||||
|
if bucket_name and bucket_name != bucket_id:
|
||||||
|
filename = f"{bucket_name}_{bucket_id}.md"
|
||||||
|
else:
|
||||||
|
filename = f"{bucket_id}.md"
|
||||||
|
file_path = safe_path(target_dir, filename)
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(file_path, "w", encoding="utf-8") as f:
|
||||||
|
f.write(frontmatter.dumps(post))
|
||||||
|
except OSError as e:
|
||||||
|
logger.error(f"Failed to write bucket file / 写入桶文件失败: {file_path}: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Created bucket / 创建记忆桶: {bucket_id} ({bucket_name}) → {primary_domain}/"
|
||||||
|
)
|
||||||
|
return bucket_id
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Read bucket content
|
||||||
|
# 读取桶内容
|
||||||
|
# Returns {"id", "metadata", "content", "path"} or None
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
async def get(self, bucket_id: str) -> Optional[dict]:
|
||||||
|
"""
|
||||||
|
Read a single bucket by ID.
|
||||||
|
根据 ID 读取单个桶。
|
||||||
|
"""
|
||||||
|
if not bucket_id or not isinstance(bucket_id, str):
|
||||||
|
return None
|
||||||
|
file_path = self._find_bucket_file(bucket_id)
|
||||||
|
if not file_path:
|
||||||
|
return None
|
||||||
|
return self._load_bucket(file_path)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Update bucket
|
||||||
|
# 更新桶
|
||||||
|
# Supports: content, tags, importance, valence, arousal, name, resolved
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
async def update(self, bucket_id: str, **kwargs) -> bool:
|
||||||
|
"""
|
||||||
|
Update bucket content or metadata fields.
|
||||||
|
更新桶的内容或元数据字段。
|
||||||
|
"""
|
||||||
|
file_path = self._find_bucket_file(bucket_id)
|
||||||
|
if not file_path:
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
post = frontmatter.load(file_path)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to load bucket for update / 加载桶失败: {file_path}: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# --- Update only fields that were passed in / 只改传入的字段 ---
|
||||||
|
if "content" in kwargs:
|
||||||
|
next_tags = kwargs.get("tags", post.get("tags", []))
|
||||||
|
next_domain = kwargs.get("domain", post.get("domain", []))
|
||||||
|
next_name = kwargs.get("name", post.get("name", ""))
|
||||||
|
post.content = self._apply_wikilinks(
|
||||||
|
kwargs["content"],
|
||||||
|
next_tags,
|
||||||
|
next_domain,
|
||||||
|
next_name,
|
||||||
|
)
|
||||||
|
if "tags" in kwargs:
|
||||||
|
post["tags"] = kwargs["tags"]
|
||||||
|
if "importance" in kwargs:
|
||||||
|
post["importance"] = max(1, min(10, int(kwargs["importance"])))
|
||||||
|
if "domain" in kwargs:
|
||||||
|
post["domain"] = kwargs["domain"]
|
||||||
|
if "valence" in kwargs:
|
||||||
|
post["valence"] = max(0.0, min(1.0, float(kwargs["valence"])))
|
||||||
|
if "arousal" in kwargs:
|
||||||
|
post["arousal"] = max(0.0, min(1.0, float(kwargs["arousal"])))
|
||||||
|
if "name" in kwargs:
|
||||||
|
post["name"] = sanitize_name(kwargs["name"])
|
||||||
|
if "resolved" in kwargs:
|
||||||
|
post["resolved"] = bool(kwargs["resolved"])
|
||||||
|
|
||||||
|
# --- Auto-refresh activation time / 自动刷新激活时间 ---
|
||||||
|
post["last_active"] = now_iso()
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(file_path, "w", encoding="utf-8") as f:
|
||||||
|
f.write(frontmatter.dumps(post))
|
||||||
|
except OSError as e:
|
||||||
|
logger.error(f"Failed to write bucket update / 写入桶更新失败: {file_path}: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
logger.info(f"Updated bucket / 更新记忆桶: {bucket_id}")
|
||||||
|
return True
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Wikilink injection
|
||||||
|
# 自动添加 Obsidian 双链
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
def _apply_wikilinks(
|
||||||
|
self,
|
||||||
|
content: str,
|
||||||
|
tags: list[str],
|
||||||
|
domain: list[str],
|
||||||
|
name: str,
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Auto-inject Obsidian wikilinks, avoiding double-wrapping existing [[...]].
|
||||||
|
自动添加 Obsidian 双链,避免重复包裹已有 [[...]]。
|
||||||
|
"""
|
||||||
|
if not self.wikilink_enabled or not content:
|
||||||
|
return content
|
||||||
|
|
||||||
|
keywords = self._collect_wikilink_keywords(content, tags, domain, name)
|
||||||
|
if not keywords:
|
||||||
|
return content
|
||||||
|
|
||||||
|
# Split on existing wikilinks to avoid wrapping them again
|
||||||
|
# 按已有双链切分,避免重复包裹
|
||||||
|
segments = re.split(r"(\[\[[^\]]+\]\])", content)
|
||||||
|
pattern = re.compile("|".join(re.escape(kw) for kw in keywords))
|
||||||
|
for i, segment in enumerate(segments):
|
||||||
|
if segment.startswith("[[") and segment.endswith("]]"):
|
||||||
|
continue
|
||||||
|
updated = pattern.sub(lambda m: f"[[{m.group(0)}]]", segment)
|
||||||
|
segments[i] = updated
|
||||||
|
return "".join(segments)
|
||||||
|
|
||||||
|
def _collect_wikilink_keywords(
|
||||||
|
self,
|
||||||
|
content: str,
|
||||||
|
tags: list[str],
|
||||||
|
domain: list[str],
|
||||||
|
name: str,
|
||||||
|
) -> list[str]:
|
||||||
|
"""
|
||||||
|
Collect candidate keywords from tags/domain/auto-extraction.
|
||||||
|
汇总候选关键词:可选 tags/domain + 自动提词。
|
||||||
|
"""
|
||||||
|
candidates = []
|
||||||
|
|
||||||
|
if self.wikilink_use_tags:
|
||||||
|
candidates.extend(tags or [])
|
||||||
|
if self.wikilink_use_domain:
|
||||||
|
candidates.extend(domain or [])
|
||||||
|
if name:
|
||||||
|
candidates.append(name)
|
||||||
|
if self.wikilink_use_auto_keywords:
|
||||||
|
candidates.extend(self._extract_auto_keywords(content))
|
||||||
|
|
||||||
|
return self._normalize_keywords(candidates)
|
||||||
|
|
||||||
|
def _normalize_keywords(self, keywords: list[str]) -> list[str]:
|
||||||
|
"""
|
||||||
|
Deduplicate and sort by length (longer first to avoid short words
|
||||||
|
breaking long ones during replacement).
|
||||||
|
去重并按长度排序,优先替换长词。
|
||||||
|
"""
|
||||||
|
if not keywords:
|
||||||
|
return []
|
||||||
|
|
||||||
|
seen = set()
|
||||||
|
cleaned = []
|
||||||
|
for keyword in keywords:
|
||||||
|
if not isinstance(keyword, str):
|
||||||
|
continue
|
||||||
|
kw = keyword.strip()
|
||||||
|
if len(kw) < self.wikilink_min_len:
|
||||||
|
continue
|
||||||
|
if kw in self.wikilink_exclude_keywords:
|
||||||
|
continue
|
||||||
|
if kw.lower() in self.wikilink_stopwords:
|
||||||
|
continue
|
||||||
|
if kw in seen:
|
||||||
|
continue
|
||||||
|
seen.add(kw)
|
||||||
|
cleaned.append(kw)
|
||||||
|
|
||||||
|
return sorted(cleaned, key=len, reverse=True)
|
||||||
|
|
||||||
|
def _extract_auto_keywords(self, content: str) -> list[str]:
|
||||||
|
"""
|
||||||
|
Auto-extract keywords from body text, prioritizing high-frequency words.
|
||||||
|
从正文自动提词,优先高频词。
|
||||||
|
"""
|
||||||
|
if not content:
|
||||||
|
return []
|
||||||
|
|
||||||
|
try:
|
||||||
|
zh_words = [w.strip() for w in jieba.lcut(content) if w.strip()]
|
||||||
|
except Exception:
|
||||||
|
zh_words = []
|
||||||
|
en_words = re.findall(r"[A-Za-z][A-Za-z0-9_-]{2,20}", content)
|
||||||
|
|
||||||
|
# Chinese bigrams / 中文双词组合
|
||||||
|
zh_bigrams = []
|
||||||
|
for i in range(len(zh_words) - 1):
|
||||||
|
left = zh_words[i]
|
||||||
|
right = zh_words[i + 1]
|
||||||
|
if len(left) < self.wikilink_min_len or len(right) < self.wikilink_min_len:
|
||||||
|
continue
|
||||||
|
if not re.fullmatch(r"[\u4e00-\u9fff]+", left + right):
|
||||||
|
continue
|
||||||
|
if len(left + right) > 8:
|
||||||
|
continue
|
||||||
|
zh_bigrams.append(left + right)
|
||||||
|
|
||||||
|
merged = []
|
||||||
|
for word in zh_words + zh_bigrams + en_words:
|
||||||
|
if len(word) < self.wikilink_min_len:
|
||||||
|
continue
|
||||||
|
if re.fullmatch(r"\d+", word):
|
||||||
|
continue
|
||||||
|
if word.lower() in self.wikilink_stopwords:
|
||||||
|
continue
|
||||||
|
merged.append(word)
|
||||||
|
|
||||||
|
if not merged:
|
||||||
|
return []
|
||||||
|
|
||||||
|
counter = Counter(merged)
|
||||||
|
return [w for w, _ in counter.most_common(self.wikilink_auto_top_k)]
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Delete bucket
|
||||||
|
# 删除桶
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
async def delete(self, bucket_id: str) -> bool:
|
||||||
|
"""
|
||||||
|
Delete a memory bucket file.
|
||||||
|
删除指定的记忆桶文件。
|
||||||
|
"""
|
||||||
|
file_path = self._find_bucket_file(bucket_id)
|
||||||
|
if not file_path:
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
os.remove(file_path)
|
||||||
|
except OSError as e:
|
||||||
|
logger.error(f"Failed to delete bucket file / 删除桶文件失败: {file_path}: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
logger.info(f"Deleted bucket / 删除记忆桶: {bucket_id}")
|
||||||
|
return True
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Touch bucket (refresh activation time + increment count)
|
||||||
|
# 触碰桶(刷新激活时间 + 累加激活次数)
|
||||||
|
# Called on every recall hit; affects decay score.
|
||||||
|
# 每次检索命中时调用,影响衰减得分。
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
async def touch(self, bucket_id: str) -> None:
|
||||||
|
"""
|
||||||
|
Update a bucket's last activation time and count.
|
||||||
|
更新桶的最后激活时间和激活次数。
|
||||||
|
"""
|
||||||
|
file_path = self._find_bucket_file(bucket_id)
|
||||||
|
if not file_path:
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
post = frontmatter.load(file_path)
|
||||||
|
post["last_active"] = now_iso()
|
||||||
|
post["activation_count"] = post.get("activation_count", 0) + 1
|
||||||
|
|
||||||
|
with open(file_path, "w", encoding="utf-8") as f:
|
||||||
|
f.write(frontmatter.dumps(post))
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to touch bucket / 触碰桶失败: {bucket_id}: {e}")
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Multi-dimensional search (core feature)
|
||||||
|
# 多维搜索(核心功能)
|
||||||
|
#
|
||||||
|
# Strategy: domain pre-filter → weighted multi-dim ranking
|
||||||
|
# 策略:主题域预筛 → 多维加权精排
|
||||||
|
#
|
||||||
|
# Ranking formula:
|
||||||
|
# total = topic(×w_topic) + emotion(×w_emotion)
|
||||||
|
# + time(×w_time) + importance(×w_importance)
|
||||||
|
#
|
||||||
|
# Per-dimension scores (normalized to 0~1):
|
||||||
|
# topic = rapidfuzz weighted match (name/tags/domain/body)
|
||||||
|
# emotion = 1 - Euclidean distance (query v/a vs bucket v/a)
|
||||||
|
# time = e^(-0.02 × days) (recent memories first)
|
||||||
|
# importance = importance / 10
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
async def search(
|
||||||
|
self,
|
||||||
|
query: str,
|
||||||
|
limit: int = None,
|
||||||
|
domain_filter: list[str] = None,
|
||||||
|
query_valence: float = None,
|
||||||
|
query_arousal: float = None,
|
||||||
|
) -> list[dict]:
|
||||||
|
"""
|
||||||
|
Multi-dimensional indexed search for memory buckets.
|
||||||
|
多维索引搜索记忆桶。
|
||||||
|
|
||||||
|
domain_filter: pre-filter by domain (None = search all)
|
||||||
|
query_valence/arousal: emotion coordinates for resonance scoring
|
||||||
|
"""
|
||||||
|
if not query or not query.strip():
|
||||||
|
return []
|
||||||
|
|
||||||
|
limit = limit or self.max_results
|
||||||
|
all_buckets = await self.list_all(include_archive=False)
|
||||||
|
|
||||||
|
if not all_buckets:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# --- Layer 1: domain pre-filter (fast scope reduction) ---
|
||||||
|
# --- 第一层:主题域预筛(快速缩小范围)---
|
||||||
|
if domain_filter:
|
||||||
|
filter_set = {d.lower() for d in domain_filter}
|
||||||
|
candidates = [
|
||||||
|
b for b in all_buckets
|
||||||
|
if {d.lower() for d in b["metadata"].get("domain", [])} & filter_set
|
||||||
|
]
|
||||||
|
# Fall back to full search if pre-filter yields nothing
|
||||||
|
# 预筛为空则回退全量搜索
|
||||||
|
if not candidates:
|
||||||
|
candidates = all_buckets
|
||||||
|
else:
|
||||||
|
candidates = all_buckets
|
||||||
|
|
||||||
|
# --- Layer 2: weighted multi-dim ranking ---
|
||||||
|
# --- 第二层:多维加权精排 ---
|
||||||
|
scored = []
|
||||||
|
for bucket in candidates:
|
||||||
|
meta = bucket.get("metadata", {})
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Dim 1: topic relevance (fuzzy text, 0~1)
|
||||||
|
topic_score = self._calc_topic_score(query, bucket)
|
||||||
|
|
||||||
|
# Dim 2: emotion resonance (coordinate distance, 0~1)
|
||||||
|
emotion_score = self._calc_emotion_score(
|
||||||
|
query_valence, query_arousal, meta
|
||||||
|
)
|
||||||
|
|
||||||
|
# Dim 3: time proximity (exponential decay, 0~1)
|
||||||
|
time_score = self._calc_time_score(meta)
|
||||||
|
|
||||||
|
# Dim 4: importance (direct normalization)
|
||||||
|
importance_score = max(1, min(10, int(meta.get("importance", 5)))) / 10.0
|
||||||
|
|
||||||
|
# --- Weighted sum / 加权求和 ---
|
||||||
|
total = (
|
||||||
|
topic_score * self.w_topic
|
||||||
|
+ emotion_score * self.w_emotion
|
||||||
|
+ time_score * self.w_time
|
||||||
|
+ importance_score * self.w_importance
|
||||||
|
)
|
||||||
|
# Normalize to 0~100 for readability
|
||||||
|
weight_sum = self.w_topic + self.w_emotion + self.w_time + self.w_importance
|
||||||
|
normalized = (total / weight_sum) * 100 if weight_sum > 0 else 0
|
||||||
|
|
||||||
|
# Resolved buckets get ranking penalty (but still reachable by keyword)
|
||||||
|
# 已解决的桶降权排序(但仍可被关键词激活)
|
||||||
|
if meta.get("resolved", False):
|
||||||
|
normalized *= 0.3
|
||||||
|
|
||||||
|
if normalized >= self.fuzzy_threshold:
|
||||||
|
bucket["score"] = round(normalized, 2)
|
||||||
|
scored.append(bucket)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(
|
||||||
|
f"Scoring failed for bucket {bucket.get('id', '?')} / "
|
||||||
|
f"桶评分失败: {e}"
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
scored.sort(key=lambda x: x["score"], reverse=True)
|
||||||
|
return scored[:limit]
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Topic relevance sub-score:
|
||||||
|
# name(×3) + domain(×2.5) + tags(×2) + body(×1)
|
||||||
|
# 文本相关性子分:桶名(×3) + 主题域(×2.5) + 标签(×2) + 正文(×1)
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
def _calc_topic_score(self, query: str, bucket: dict) -> float:
|
||||||
|
"""
|
||||||
|
Calculate text dimension relevance score (0~1).
|
||||||
|
计算文本维度的相关性得分。
|
||||||
|
"""
|
||||||
|
meta = bucket.get("metadata", {})
|
||||||
|
|
||||||
|
name_score = fuzz.partial_ratio(query, meta.get("name", "")) * 3
|
||||||
|
domain_score = (
|
||||||
|
max(
|
||||||
|
(fuzz.partial_ratio(query, d) for d in meta.get("domain", [])),
|
||||||
|
default=0,
|
||||||
|
)
|
||||||
|
* 2.5
|
||||||
|
)
|
||||||
|
tag_score = (
|
||||||
|
max(
|
||||||
|
(fuzz.partial_ratio(query, tag) for tag in meta.get("tags", [])),
|
||||||
|
default=0,
|
||||||
|
)
|
||||||
|
* 2
|
||||||
|
)
|
||||||
|
content_score = fuzz.partial_ratio(query, bucket.get("content", "")[:500]) * 1
|
||||||
|
|
||||||
|
return (name_score + domain_score + tag_score + content_score) / (100 * 8.5)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Emotion resonance sub-score:
|
||||||
|
# Based on Russell circumplex Euclidean distance
|
||||||
|
# 情感共鸣子分:基于环形情感模型的欧氏距离
|
||||||
|
# No emotion in query → neutral 0.5 (doesn't affect ranking)
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
def _calc_emotion_score(
|
||||||
|
self, q_valence: float, q_arousal: float, meta: dict
|
||||||
|
) -> float:
|
||||||
|
"""
|
||||||
|
Calculate emotion resonance score (0~1, closer = higher).
|
||||||
|
计算情感共鸣度(0~1,越近越高)。
|
||||||
|
"""
|
||||||
|
if q_valence is None or q_arousal is None:
|
||||||
|
return 0.5 # No emotion coordinates → neutral / 无情感坐标时给中性分
|
||||||
|
|
||||||
|
try:
|
||||||
|
b_valence = float(meta.get("valence", 0.5))
|
||||||
|
b_arousal = float(meta.get("arousal", 0.3))
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
return 0.5
|
||||||
|
|
||||||
|
# Euclidean distance, max sqrt(2) ≈ 1.414
|
||||||
|
dist = math.sqrt((q_valence - b_valence) ** 2 + (q_arousal - b_arousal) ** 2)
|
||||||
|
return max(0.0, 1.0 - dist / 1.414)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Time proximity sub-score:
|
||||||
|
# More recent activation → higher score
|
||||||
|
# 时间亲近子分:距上次激活越近分越高
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
def _calc_time_score(self, meta: dict) -> float:
|
||||||
|
"""
|
||||||
|
Calculate time proximity score (0~1, more recent = higher).
|
||||||
|
计算时间亲近度。
|
||||||
|
"""
|
||||||
|
last_active_str = meta.get("last_active", meta.get("created", ""))
|
||||||
|
try:
|
||||||
|
last_active = datetime.fromisoformat(str(last_active_str))
|
||||||
|
days = max(0.0, (datetime.now() - last_active).total_seconds() / 86400)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
days = 30
|
||||||
|
return math.exp(-0.02 * days)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# List all buckets
|
||||||
|
# 列出所有桶
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
async def list_all(self, include_archive: bool = False) -> list[dict]:
|
||||||
|
"""
|
||||||
|
Recursively walk directories (including domain subdirs), list all buckets.
|
||||||
|
递归遍历目录(含域子目录),列出所有记忆桶。
|
||||||
|
"""
|
||||||
|
buckets = []
|
||||||
|
|
||||||
|
dirs = [self.permanent_dir, self.dynamic_dir]
|
||||||
|
if include_archive:
|
||||||
|
dirs.append(self.archive_dir)
|
||||||
|
|
||||||
|
for dir_path in dirs:
|
||||||
|
if not os.path.exists(dir_path):
|
||||||
|
continue
|
||||||
|
for root, _, files in os.walk(dir_path):
|
||||||
|
for filename in files:
|
||||||
|
if not filename.endswith(".md"):
|
||||||
|
continue
|
||||||
|
file_path = os.path.join(root, filename)
|
||||||
|
bucket = self._load_bucket(file_path)
|
||||||
|
if bucket:
|
||||||
|
buckets.append(bucket)
|
||||||
|
|
||||||
|
return buckets
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Statistics (counts per category + total size)
|
||||||
|
# 统计信息(各分类桶数量 + 总体积)
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
async def get_stats(self) -> dict:
|
||||||
|
"""
|
||||||
|
Return memory bucket statistics (including domain subdirs).
|
||||||
|
返回记忆桶的统计数据。
|
||||||
|
"""
|
||||||
|
stats = {
|
||||||
|
"permanent_count": 0,
|
||||||
|
"dynamic_count": 0,
|
||||||
|
"archive_count": 0,
|
||||||
|
"total_size_kb": 0.0,
|
||||||
|
"domains": {},
|
||||||
|
}
|
||||||
|
|
||||||
|
for subdir, key in [
|
||||||
|
(self.permanent_dir, "permanent_count"),
|
||||||
|
(self.dynamic_dir, "dynamic_count"),
|
||||||
|
(self.archive_dir, "archive_count"),
|
||||||
|
]:
|
||||||
|
if not os.path.exists(subdir):
|
||||||
|
continue
|
||||||
|
for root, _, files in os.walk(subdir):
|
||||||
|
for f in files:
|
||||||
|
if f.endswith(".md"):
|
||||||
|
stats[key] += 1
|
||||||
|
fpath = os.path.join(root, f)
|
||||||
|
try:
|
||||||
|
stats["total_size_kb"] += os.path.getsize(fpath) / 1024
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
# Per-domain counts / 每个域的桶数量
|
||||||
|
domain_name = os.path.basename(root)
|
||||||
|
if domain_name != os.path.basename(subdir):
|
||||||
|
stats["domains"][domain_name] = stats["domains"].get(domain_name, 0) + 1
|
||||||
|
|
||||||
|
return stats
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Archive bucket (move from permanent/dynamic into archive)
|
||||||
|
# 归档桶(从 permanent/dynamic 移入 archive)
|
||||||
|
# Called by decay engine to simulate "forgetting"
|
||||||
|
# 由衰减引擎调用,模拟"遗忘"
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
async def archive(self, bucket_id: str) -> bool:
|
||||||
|
"""
|
||||||
|
Move a bucket into the archive directory (preserving domain subdirs).
|
||||||
|
将指定桶移入归档目录(保留域子目录结构)。
|
||||||
|
"""
|
||||||
|
file_path = self._find_bucket_file(bucket_id)
|
||||||
|
if not file_path:
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Read once, get domain info and update type / 一次性读取
|
||||||
|
post = frontmatter.load(file_path)
|
||||||
|
domain = post.get("domain", ["未分类"])
|
||||||
|
primary_domain = sanitize_name(domain[0]) if domain else "未分类"
|
||||||
|
archive_subdir = os.path.join(self.archive_dir, primary_domain)
|
||||||
|
os.makedirs(archive_subdir, exist_ok=True)
|
||||||
|
|
||||||
|
dest = safe_path(archive_subdir, os.path.basename(file_path))
|
||||||
|
|
||||||
|
# Update type marker then move file / 更新类型标记后移动文件
|
||||||
|
post["type"] = "archived"
|
||||||
|
with open(file_path, "w", encoding="utf-8") as f:
|
||||||
|
f.write(frontmatter.dumps(post))
|
||||||
|
|
||||||
|
# Use shutil.move for cross-filesystem safety
|
||||||
|
# 使用 shutil.move 保证跨文件系统安全
|
||||||
|
shutil.move(file_path, str(dest))
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(
|
||||||
|
f"Failed to archive bucket / 归档桶失败: {bucket_id}: {e}"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
logger.info(f"Archived bucket / 归档记忆桶: {bucket_id} → archive/{primary_domain}/")
|
||||||
|
return True
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Internal: find bucket file across all three directories
|
||||||
|
# 内部:在三个目录中查找桶文件
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
def _find_bucket_file(self, bucket_id: str) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Recursively search permanent/dynamic/archive for a bucket file
|
||||||
|
matching the given ID.
|
||||||
|
在 permanent/dynamic/archive 中递归查找指定 ID 的桶文件。
|
||||||
|
"""
|
||||||
|
if not bucket_id:
|
||||||
|
return None
|
||||||
|
for dir_path in [self.permanent_dir, self.dynamic_dir, self.archive_dir]:
|
||||||
|
if not os.path.exists(dir_path):
|
||||||
|
continue
|
||||||
|
for root, _, files in os.walk(dir_path):
|
||||||
|
for fname in files:
|
||||||
|
if not fname.endswith(".md"):
|
||||||
|
continue
|
||||||
|
# Match by exact ID segment in filename
|
||||||
|
# 通过文件名中的 ID 片段精确匹配
|
||||||
|
if bucket_id in fname:
|
||||||
|
return os.path.join(root, fname)
|
||||||
|
return None
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Internal: load bucket data from .md file
|
||||||
|
# 内部:从 .md 文件加载桶数据
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
def _load_bucket(self, file_path: str) -> Optional[dict]:
|
||||||
|
"""
|
||||||
|
Parse a Markdown file and return structured bucket data.
|
||||||
|
解析 Markdown 文件,返回桶的结构化数据。
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
post = frontmatter.load(file_path)
|
||||||
|
return {
|
||||||
|
"id": post.get("id", Path(file_path).stem),
|
||||||
|
"metadata": dict(post.metadata),
|
||||||
|
"content": post.content,
|
||||||
|
"path": file_path,
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(
|
||||||
|
f"Failed to load bucket file / 加载桶文件失败: {file_path}: {e}"
|
||||||
|
)
|
||||||
|
return None
|
||||||
242
backup_20260405_2124/decay_engine.py
Normal file
242
backup_20260405_2124/decay_engine.py
Normal file
@@ -0,0 +1,242 @@
|
|||||||
|
# ============================================================
|
||||||
|
# Module: Memory Decay Engine (decay_engine.py)
|
||||||
|
# 模块:记忆衰减引擎
|
||||||
|
#
|
||||||
|
# Simulates human forgetting curve; auto-decays inactive memories and archives them.
|
||||||
|
# 模拟人类遗忘曲线,自动衰减不活跃记忆并归档。
|
||||||
|
#
|
||||||
|
# Core formula (improved Ebbinghaus + emotion coordinates):
|
||||||
|
# 核心公式(改进版艾宾浩斯遗忘曲线 + 情感坐标):
|
||||||
|
# Score = Importance × (activation_count^0.3) × e^(-λ×days) × emotion_weight
|
||||||
|
#
|
||||||
|
# Emotion weight (continuous coordinate, not discrete labels):
|
||||||
|
# 情感权重(基于连续坐标而非离散列举):
|
||||||
|
# emotion_weight = base + (arousal × arousal_boost)
|
||||||
|
# Higher arousal → higher emotion weight → slower decay
|
||||||
|
# 唤醒度越高 → 情感权重越大 → 记忆衰减越慢
|
||||||
|
#
|
||||||
|
# Depended on by: server.py
|
||||||
|
# 被谁依赖:server.py
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
import math
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
logger = logging.getLogger("ombre_brain.decay")
|
||||||
|
|
||||||
|
|
||||||
|
class DecayEngine:
|
||||||
|
"""
|
||||||
|
Memory decay engine — periodically scans all dynamic buckets,
|
||||||
|
calculates decay scores, auto-archives low-activity buckets
|
||||||
|
to simulate natural forgetting.
|
||||||
|
记忆衰减引擎 —— 定期扫描所有动态桶,
|
||||||
|
计算衰减得分,将低活跃桶自动归档,模拟自然遗忘。
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, config: dict, bucket_mgr):
|
||||||
|
# --- Load decay parameters / 加载衰减参数 ---
|
||||||
|
decay_cfg = config.get("decay", {})
|
||||||
|
self.decay_lambda = decay_cfg.get("lambda", 0.05)
|
||||||
|
self.threshold = decay_cfg.get("threshold", 0.3)
|
||||||
|
self.check_interval = decay_cfg.get("check_interval_hours", 24)
|
||||||
|
|
||||||
|
# --- Emotion weight params (continuous arousal coordinate) ---
|
||||||
|
# --- 情感权重参数(基于连续 arousal 坐标)---
|
||||||
|
emotion_cfg = decay_cfg.get("emotion_weights", {})
|
||||||
|
self.emotion_base = emotion_cfg.get("base", 1.0)
|
||||||
|
self.arousal_boost = emotion_cfg.get("arousal_boost", 0.8)
|
||||||
|
|
||||||
|
self.bucket_mgr = bucket_mgr
|
||||||
|
|
||||||
|
# --- Background task control / 后台任务控制 ---
|
||||||
|
self._task: asyncio.Task | None = None
|
||||||
|
self._running = False
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_running(self) -> bool:
|
||||||
|
"""Whether the decay engine is running in the background.
|
||||||
|
衰减引擎是否正在后台运行。"""
|
||||||
|
return self._running
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Core: calculate decay score for a single bucket
|
||||||
|
# 核心:计算单个桶的衰减得分
|
||||||
|
#
|
||||||
|
# Higher score = more vivid memory; below threshold → archive
|
||||||
|
# 得分越高 = 记忆越鲜活,低于阈值则归档
|
||||||
|
# Permanent buckets never decay / 固化桶永远不衰减
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
def calculate_score(self, metadata: dict) -> float:
|
||||||
|
"""
|
||||||
|
Calculate current activity score for a memory bucket.
|
||||||
|
计算一个记忆桶的当前活跃度得分。
|
||||||
|
|
||||||
|
Formula: Score = Importance × (act_count^0.3) × e^(-λ×days) × (base + arousal×boost)
|
||||||
|
"""
|
||||||
|
if not isinstance(metadata, dict):
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
# --- Permanent buckets never decay / 固化桶永不衰减 ---
|
||||||
|
if metadata.get("type") == "permanent":
|
||||||
|
return 999.0
|
||||||
|
|
||||||
|
importance = max(1, min(10, int(metadata.get("importance", 5))))
|
||||||
|
activation_count = max(1, int(metadata.get("activation_count", 1)))
|
||||||
|
|
||||||
|
# --- Days since last activation / 距离上次激活过了多少天 ---
|
||||||
|
last_active_str = metadata.get("last_active", metadata.get("created", ""))
|
||||||
|
try:
|
||||||
|
last_active = datetime.fromisoformat(str(last_active_str))
|
||||||
|
days_since = max(0.0, (datetime.now() - last_active).total_seconds() / 86400)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
days_since = 30 # Parse failure → assume 30 days / 解析失败假设已过 30 天
|
||||||
|
|
||||||
|
# --- Emotion weight: continuous arousal coordinate ---
|
||||||
|
# --- 情感权重:基于连续 arousal 坐标计算 ---
|
||||||
|
# Higher arousal → stronger emotion → higher weight → slower decay
|
||||||
|
# arousal 越高 → 情感越强烈 → 权重越大 → 衰减越慢
|
||||||
|
try:
|
||||||
|
arousal = max(0.0, min(1.0, float(metadata.get("arousal", 0.3))))
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
arousal = 0.3
|
||||||
|
emotion_weight = self.emotion_base + arousal * self.arousal_boost
|
||||||
|
|
||||||
|
# --- Apply decay formula / 套入衰减公式 ---
|
||||||
|
score = (
|
||||||
|
importance
|
||||||
|
* (activation_count ** 0.3)
|
||||||
|
* math.exp(-self.decay_lambda * days_since)
|
||||||
|
* emotion_weight
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- Weight pool modifiers / 权重池修正因子 ---
|
||||||
|
# Resolved events drop to 5%, sink to bottom awaiting keyword reactivation
|
||||||
|
# 已解决的事件权重骤降到 5%,沉底等待关键词激活
|
||||||
|
resolved_factor = 0.05 if metadata.get("resolved", False) else 1.0
|
||||||
|
# High-arousal unresolved buckets get urgency boost for priority surfacing
|
||||||
|
# 高唤醒未解决桶额外加成,优先浮现
|
||||||
|
urgency_boost = 1.5 if (arousal > 0.7 and not metadata.get("resolved", False)) else 1.0
|
||||||
|
|
||||||
|
return round(score * resolved_factor * urgency_boost, 4)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Execute one decay cycle
|
||||||
|
# 执行一轮衰减周期
|
||||||
|
# Scan all dynamic buckets → score → archive those below threshold
|
||||||
|
# 扫描所有动态桶 → 算分 → 低于阈值的归档
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
async def run_decay_cycle(self) -> dict:
|
||||||
|
"""
|
||||||
|
Execute one decay cycle: iterate dynamic buckets, archive those
|
||||||
|
scoring below threshold.
|
||||||
|
执行一轮衰减:遍历动态桶,归档得分低于阈值的桶。
|
||||||
|
|
||||||
|
Returns stats: {"checked": N, "archived": N, "lowest_score": X}
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
buckets = await self.bucket_mgr.list_all(include_archive=False)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to list buckets for decay / 衰减周期列桶失败: {e}")
|
||||||
|
return {"checked": 0, "archived": 0, "lowest_score": 0, "error": str(e)}
|
||||||
|
|
||||||
|
checked = 0
|
||||||
|
archived = 0
|
||||||
|
lowest_score = float("inf")
|
||||||
|
|
||||||
|
for bucket in buckets:
|
||||||
|
meta = bucket.get("metadata", {})
|
||||||
|
|
||||||
|
# Skip permanent buckets / 跳过固化桶
|
||||||
|
if meta.get("type") == "permanent":
|
||||||
|
continue
|
||||||
|
|
||||||
|
checked += 1
|
||||||
|
try:
|
||||||
|
score = self.calculate_score(meta)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(
|
||||||
|
f"Score calculation failed for {bucket.get('id', '?')} / "
|
||||||
|
f"计算得分失败: {e}"
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
lowest_score = min(lowest_score, score)
|
||||||
|
|
||||||
|
# --- Below threshold → archive (simulate forgetting) ---
|
||||||
|
# --- 低于阈值 → 归档(模拟遗忘)---
|
||||||
|
if score < self.threshold:
|
||||||
|
try:
|
||||||
|
success = await self.bucket_mgr.archive(bucket["id"])
|
||||||
|
if success:
|
||||||
|
archived += 1
|
||||||
|
logger.info(
|
||||||
|
f"Decay archived / 衰减归档: "
|
||||||
|
f"{meta.get('name', bucket['id'])} "
|
||||||
|
f"(score={score:.4f}, threshold={self.threshold})"
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(
|
||||||
|
f"Archive failed for {bucket.get('id', '?')} / "
|
||||||
|
f"归档失败: {e}"
|
||||||
|
)
|
||||||
|
|
||||||
|
result = {
|
||||||
|
"checked": checked,
|
||||||
|
"archived": archived,
|
||||||
|
"lowest_score": lowest_score if checked > 0 else 0,
|
||||||
|
}
|
||||||
|
logger.info(f"Decay cycle complete / 衰减周期完成: {result}")
|
||||||
|
return result
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Background decay task management
|
||||||
|
# 后台衰减任务管理
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
async def ensure_started(self) -> None:
|
||||||
|
"""
|
||||||
|
Ensure the decay engine is started (lazy init on first call).
|
||||||
|
确保衰减引擎已启动(懒加载,首次调用时启动)。
|
||||||
|
"""
|
||||||
|
if not self._running:
|
||||||
|
await self.start()
|
||||||
|
|
||||||
|
async def start(self) -> None:
|
||||||
|
"""Start the background decay loop.
|
||||||
|
启动后台衰减循环。"""
|
||||||
|
if self._running:
|
||||||
|
return
|
||||||
|
self._running = True
|
||||||
|
self._task = asyncio.create_task(self._background_loop())
|
||||||
|
logger.info(
|
||||||
|
f"Decay engine started, interval: {self.check_interval}h / "
|
||||||
|
f"衰减引擎已启动,检查间隔: {self.check_interval} 小时"
|
||||||
|
)
|
||||||
|
|
||||||
|
async def stop(self) -> None:
|
||||||
|
"""Stop the background decay loop.
|
||||||
|
停止后台衰减循环。"""
|
||||||
|
self._running = False
|
||||||
|
if self._task:
|
||||||
|
self._task.cancel()
|
||||||
|
try:
|
||||||
|
await self._task
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
pass
|
||||||
|
logger.info("Decay engine stopped / 衰减引擎已停止")
|
||||||
|
|
||||||
|
async def _background_loop(self) -> None:
|
||||||
|
"""Background loop: run decay → sleep → repeat.
|
||||||
|
后台循环体:执行衰减 → 睡眠 → 重复。"""
|
||||||
|
while self._running:
|
||||||
|
try:
|
||||||
|
await self.run_decay_cycle()
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Decay cycle error / 衰减周期出错: {e}")
|
||||||
|
# --- Wait for next cycle / 等待下一个周期 ---
|
||||||
|
try:
|
||||||
|
await asyncio.sleep(self.check_interval * 3600)
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
break
|
||||||
536
backup_20260405_2124/server.py
Normal file
536
backup_20260405_2124/server.py
Normal file
@@ -0,0 +1,536 @@
|
|||||||
|
# ============================================================
|
||||||
|
# Module: MCP Server Entry Point (server.py)
|
||||||
|
# 模块:MCP 服务器主入口
|
||||||
|
#
|
||||||
|
# Starts the Ombre Brain MCP service and registers memory
|
||||||
|
# operation tools for Claude to call.
|
||||||
|
# 启动 Ombre Brain MCP 服务,注册记忆操作工具供 Claude 调用。
|
||||||
|
#
|
||||||
|
# Core responsibilities:
|
||||||
|
# 核心职责:
|
||||||
|
# - Initialize config, bucket manager, dehydrator, decay engine
|
||||||
|
# 初始化配置、记忆桶管理器、脱水器、衰减引擎
|
||||||
|
# - Expose 5 MCP tools:
|
||||||
|
# 暴露 5 个 MCP 工具:
|
||||||
|
# breath — Surface unresolved memories or search by keyword
|
||||||
|
# 浮现未解决记忆 或 按关键词检索
|
||||||
|
# hold — Store a single memory
|
||||||
|
# 存储单条记忆
|
||||||
|
# grow — Diary digest, auto-split into multiple buckets
|
||||||
|
# 日记归档,自动拆分多桶
|
||||||
|
# trace — Modify metadata / resolved / delete
|
||||||
|
# 修改元数据 / resolved 标记 / 删除
|
||||||
|
# pulse — System status + bucket listing
|
||||||
|
# 系统状态 + 所有桶列表
|
||||||
|
#
|
||||||
|
# Startup:
|
||||||
|
# 启动方式:
|
||||||
|
# Local: python server.py
|
||||||
|
# Remote: OMBRE_TRANSPORT=streamable-http python server.py
|
||||||
|
# Docker: docker-compose up
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import random
|
||||||
|
import logging
|
||||||
|
import asyncio
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
# --- Ensure same-directory modules can be imported ---
|
||||||
|
# --- 确保同目录下的模块能被正确导入 ---
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
|
||||||
|
from mcp.server.fastmcp import FastMCP
|
||||||
|
|
||||||
|
from bucket_manager import BucketManager
|
||||||
|
from dehydrator import Dehydrator
|
||||||
|
from decay_engine import DecayEngine
|
||||||
|
from utils import load_config, setup_logging
|
||||||
|
|
||||||
|
# --- Load config & init logging / 加载配置 & 初始化日志 ---
|
||||||
|
config = load_config()
|
||||||
|
setup_logging(config.get("log_level", "INFO"))
|
||||||
|
logger = logging.getLogger("ombre_brain")
|
||||||
|
|
||||||
|
# --- Initialize three core components / 初始化三大核心组件 ---
|
||||||
|
bucket_mgr = BucketManager(config) # Bucket manager / 记忆桶管理器
|
||||||
|
dehydrator = Dehydrator(config) # Dehydrator / 脱水器
|
||||||
|
decay_engine = DecayEngine(config, bucket_mgr) # Decay engine / 衰减引擎
|
||||||
|
|
||||||
|
# --- Create MCP server instance / 创建 MCP 服务器实例 ---
|
||||||
|
# host="0.0.0.0" so Docker container's SSE is externally reachable
|
||||||
|
# stdio mode ignores host (no network)
|
||||||
|
mcp = FastMCP(
|
||||||
|
"Ombre Brain",
|
||||||
|
host="0.0.0.0",
|
||||||
|
port=8000,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================
|
||||||
|
# /health endpoint: lightweight keepalive
|
||||||
|
# 轻量保活接口
|
||||||
|
# For Cloudflare Tunnel or reverse proxy to ping, preventing idle timeout
|
||||||
|
# 供 Cloudflare Tunnel 或反代定期 ping,防止空闲超时断连
|
||||||
|
# =============================================================
|
||||||
|
@mcp.custom_route("/health", methods=["GET"])
|
||||||
|
async def health_check(request):
|
||||||
|
from starlette.responses import JSONResponse
|
||||||
|
try:
|
||||||
|
stats = await bucket_mgr.get_stats()
|
||||||
|
return JSONResponse({
|
||||||
|
"status": "ok",
|
||||||
|
"buckets": stats["permanent_count"] + stats["dynamic_count"],
|
||||||
|
"decay_engine": "running" if decay_engine.is_running else "stopped",
|
||||||
|
})
|
||||||
|
except Exception as e:
|
||||||
|
return JSONResponse({"status": "error", "detail": str(e)}, status_code=500)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================
|
||||||
|
# Internal helper: merge-or-create
|
||||||
|
# 内部辅助:检查是否可合并,可以则合并,否则新建
|
||||||
|
# Shared by hold and grow to avoid duplicate logic
|
||||||
|
# hold 和 grow 共用,避免重复逻辑
|
||||||
|
# =============================================================
|
||||||
|
async def _merge_or_create(
|
||||||
|
content: str,
|
||||||
|
tags: list,
|
||||||
|
importance: int,
|
||||||
|
domain: list,
|
||||||
|
valence: float,
|
||||||
|
arousal: float,
|
||||||
|
name: str = "",
|
||||||
|
) -> tuple[str, bool]:
|
||||||
|
"""
|
||||||
|
Check if a similar bucket exists for merging; merge if so, create if not.
|
||||||
|
Returns (bucket_id_or_name, is_merged).
|
||||||
|
检查是否有相似桶可合并,有则合并,无则新建。
|
||||||
|
返回 (桶ID或名称, 是否合并)。
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
existing = await bucket_mgr.search(content, limit=1)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Search for merge failed, creating new / 合并搜索失败,新建: {e}")
|
||||||
|
existing = []
|
||||||
|
|
||||||
|
if existing and existing[0].get("score", 0) > config.get("merge_threshold", 75):
|
||||||
|
bucket = existing[0]
|
||||||
|
try:
|
||||||
|
merged = await dehydrator.merge(bucket["content"], content)
|
||||||
|
await bucket_mgr.update(
|
||||||
|
bucket["id"],
|
||||||
|
content=merged,
|
||||||
|
tags=list(set(bucket["metadata"].get("tags", []) + tags)),
|
||||||
|
importance=max(bucket["metadata"].get("importance", 5), importance),
|
||||||
|
domain=list(set(bucket["metadata"].get("domain", []) + domain)),
|
||||||
|
valence=valence,
|
||||||
|
arousal=arousal,
|
||||||
|
)
|
||||||
|
return bucket["metadata"].get("name", bucket["id"]), True
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Merge failed, creating new / 合并失败,新建: {e}")
|
||||||
|
|
||||||
|
bucket_id = await bucket_mgr.create(
|
||||||
|
content=content,
|
||||||
|
tags=tags,
|
||||||
|
importance=importance,
|
||||||
|
domain=domain,
|
||||||
|
valence=valence,
|
||||||
|
arousal=arousal,
|
||||||
|
name=name or None,
|
||||||
|
)
|
||||||
|
return bucket_id, False
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================
|
||||||
|
# Tool 1: breath — Breathe
|
||||||
|
# 工具 1:breath — 呼吸
|
||||||
|
#
|
||||||
|
# No args: surface highest-weight unresolved memories (active push)
|
||||||
|
# 无参数:浮现权重最高的未解决记忆
|
||||||
|
# With args: search by keyword + emotion coordinates
|
||||||
|
# 有参数:按关键词+情感坐标检索记忆
|
||||||
|
# =============================================================
|
||||||
|
@mcp.tool()
|
||||||
|
async def breath(
|
||||||
|
query: str = "",
|
||||||
|
max_results: int = 3,
|
||||||
|
domain: str = "",
|
||||||
|
valence: float = -1,
|
||||||
|
arousal: float = -1,
|
||||||
|
) -> str:
|
||||||
|
"""检索记忆或浮现未解决记忆。query 为空时自动推送权重最高的未解决桶;有 query 时按关键词+情感检索。domain 逗号分隔,valence/arousal 传 0~1 启用情感共鸣,-1 忽略。"""
|
||||||
|
await decay_engine.ensure_started()
|
||||||
|
|
||||||
|
# --- No args: surfacing mode (weight pool active push) ---
|
||||||
|
# --- 无参数:浮现模式(权重池主动推送)---
|
||||||
|
if not query.strip():
|
||||||
|
try:
|
||||||
|
all_buckets = await bucket_mgr.list_all(include_archive=False)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to list buckets for surfacing / 浮现列桶失败: {e}")
|
||||||
|
return "记忆系统暂时无法访问。"
|
||||||
|
|
||||||
|
unresolved = [
|
||||||
|
b for b in all_buckets
|
||||||
|
if not b["metadata"].get("resolved", False)
|
||||||
|
and b["metadata"].get("type") != "permanent"
|
||||||
|
]
|
||||||
|
if not unresolved:
|
||||||
|
return "权重池平静,没有需要处理的记忆。"
|
||||||
|
|
||||||
|
scored = sorted(
|
||||||
|
unresolved,
|
||||||
|
key=lambda b: decay_engine.calculate_score(b["metadata"]),
|
||||||
|
reverse=True,
|
||||||
|
)
|
||||||
|
top = scored[:2]
|
||||||
|
results = []
|
||||||
|
for b in top:
|
||||||
|
try:
|
||||||
|
summary = await dehydrator.dehydrate(b["content"], b["metadata"])
|
||||||
|
await bucket_mgr.touch(b["id"])
|
||||||
|
score = decay_engine.calculate_score(b["metadata"])
|
||||||
|
results.append(f"[权重:{score:.2f}] {summary}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to dehydrate surfaced bucket / 浮现脱水失败: {e}")
|
||||||
|
continue
|
||||||
|
if not results:
|
||||||
|
return "权重池平静,没有需要处理的记忆。"
|
||||||
|
return "=== 浮现记忆 ===\n" + "\n---\n".join(results)
|
||||||
|
|
||||||
|
# --- With args: search mode / 有参数:检索模式 ---
|
||||||
|
domain_filter = [d.strip() for d in domain.split(",") if d.strip()] or None
|
||||||
|
q_valence = valence if 0 <= valence <= 1 else None
|
||||||
|
q_arousal = arousal if 0 <= arousal <= 1 else None
|
||||||
|
|
||||||
|
try:
|
||||||
|
matches = await bucket_mgr.search(
|
||||||
|
query,
|
||||||
|
limit=max_results,
|
||||||
|
domain_filter=domain_filter,
|
||||||
|
query_valence=q_valence,
|
||||||
|
query_arousal=q_arousal,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Search failed / 检索失败: {e}")
|
||||||
|
return "检索过程出错,请稍后重试。"
|
||||||
|
|
||||||
|
results = []
|
||||||
|
for bucket in matches:
|
||||||
|
try:
|
||||||
|
summary = await dehydrator.dehydrate(bucket["content"], bucket["metadata"])
|
||||||
|
await bucket_mgr.touch(bucket["id"])
|
||||||
|
results.append(summary)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to dehydrate search result / 检索结果脱水失败: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# --- Random surfacing: when search returns < 3, 40% chance to float old memories ---
|
||||||
|
# --- 随机浮现:检索结果不足 3 条时,40% 概率从低权重旧桶里漂上来 ---
|
||||||
|
if len(matches) < 3 and random.random() < 0.4:
|
||||||
|
try:
|
||||||
|
all_buckets = await bucket_mgr.list_all(include_archive=False)
|
||||||
|
matched_ids = {b["id"] for b in matches}
|
||||||
|
low_weight = [
|
||||||
|
b for b in all_buckets
|
||||||
|
if b["id"] not in matched_ids
|
||||||
|
and decay_engine.calculate_score(b["metadata"]) < 2.0
|
||||||
|
]
|
||||||
|
if low_weight:
|
||||||
|
drifted = random.sample(low_weight, min(random.randint(1, 3), len(low_weight)))
|
||||||
|
drift_results = []
|
||||||
|
for b in drifted:
|
||||||
|
summary = await dehydrator.dehydrate(b["content"], b["metadata"])
|
||||||
|
drift_results.append(f"[surface_type: random]\n{summary}")
|
||||||
|
results.append("--- 忽然想起来 ---\n" + "\n---\n".join(drift_results))
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Random surfacing failed / 随机浮现失败: {e}")
|
||||||
|
|
||||||
|
if not results:
|
||||||
|
return "未找到相关记忆。"
|
||||||
|
|
||||||
|
return "\n---\n".join(results)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================
|
||||||
|
# Tool 2: hold — Hold on to this
|
||||||
|
# 工具 2:hold — 握住,留下来
|
||||||
|
# =============================================================
|
||||||
|
@mcp.tool()
|
||||||
|
async def hold(
|
||||||
|
content: str,
|
||||||
|
tags: str = "",
|
||||||
|
importance: int = 5,
|
||||||
|
) -> str:
|
||||||
|
"""存储单条记忆。自动打标+合并相似桶。tags 逗号分隔,importance 1-10。"""
|
||||||
|
await decay_engine.ensure_started()
|
||||||
|
|
||||||
|
# --- Input validation / 输入校验 ---
|
||||||
|
if not content or not content.strip():
|
||||||
|
return "内容为空,无法存储。"
|
||||||
|
|
||||||
|
importance = max(1, min(10, importance))
|
||||||
|
extra_tags = [t.strip() for t in tags.split(",") if t.strip()]
|
||||||
|
|
||||||
|
# --- Step 1: auto-tagging / 自动打标 ---
|
||||||
|
try:
|
||||||
|
analysis = await dehydrator.analyze(content)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Auto-tagging failed, using defaults / 自动打标失败: {e}")
|
||||||
|
analysis = {
|
||||||
|
"domain": ["未分类"], "valence": 0.5, "arousal": 0.3,
|
||||||
|
"tags": [], "suggested_name": "",
|
||||||
|
}
|
||||||
|
|
||||||
|
domain = analysis["domain"]
|
||||||
|
valence = analysis["valence"]
|
||||||
|
arousal = analysis["arousal"]
|
||||||
|
auto_tags = analysis["tags"]
|
||||||
|
suggested_name = analysis.get("suggested_name", "")
|
||||||
|
|
||||||
|
all_tags = list(dict.fromkeys(auto_tags + extra_tags))
|
||||||
|
|
||||||
|
# --- Step 2: merge or create / 合并或新建 ---
|
||||||
|
result_name, is_merged = await _merge_or_create(
|
||||||
|
content=content,
|
||||||
|
tags=all_tags,
|
||||||
|
importance=importance,
|
||||||
|
domain=domain,
|
||||||
|
valence=valence,
|
||||||
|
arousal=arousal,
|
||||||
|
name=suggested_name,
|
||||||
|
)
|
||||||
|
|
||||||
|
if is_merged:
|
||||||
|
return (
|
||||||
|
f"已合并到现有记忆桶: {result_name}\n"
|
||||||
|
f"主题域: {', '.join(domain)} | 情感: V{valence:.1f}/A{arousal:.1f}"
|
||||||
|
)
|
||||||
|
return (
|
||||||
|
f"已创建新记忆桶: {result_name}\n"
|
||||||
|
f"主题域: {', '.join(domain)} | 情感: V{valence:.1f}/A{arousal:.1f} | 标签: {', '.join(all_tags)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================
|
||||||
|
# Tool 3: grow — Grow, fragments become memories
|
||||||
|
# 工具 3:grow — 生长,一天的碎片长成记忆
|
||||||
|
# =============================================================
|
||||||
|
@mcp.tool()
|
||||||
|
async def grow(content: str) -> str:
|
||||||
|
"""日记归档。自动拆分长内容为多个记忆桶。"""
|
||||||
|
await decay_engine.ensure_started()
|
||||||
|
|
||||||
|
if not content or not content.strip():
|
||||||
|
return "内容为空,无法整理。"
|
||||||
|
|
||||||
|
# --- Step 1: let API split and organize / 让 API 拆分整理 ---
|
||||||
|
try:
|
||||||
|
items = await dehydrator.digest(content)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Diary digest failed / 日记整理失败: {e}")
|
||||||
|
return f"日记整理失败: {e}"
|
||||||
|
|
||||||
|
if not items:
|
||||||
|
return "内容为空或整理失败。"
|
||||||
|
|
||||||
|
results = []
|
||||||
|
created = 0
|
||||||
|
merged = 0
|
||||||
|
|
||||||
|
# --- Step 2: merge or create each item (with per-item error handling) ---
|
||||||
|
# --- 逐条合并或新建(单条失败不影响其他)---
|
||||||
|
for item in items:
|
||||||
|
try:
|
||||||
|
result_name, is_merged = await _merge_or_create(
|
||||||
|
content=item["content"],
|
||||||
|
tags=item.get("tags", []),
|
||||||
|
importance=item.get("importance", 5),
|
||||||
|
domain=item.get("domain", ["未分类"]),
|
||||||
|
valence=item.get("valence", 0.5),
|
||||||
|
arousal=item.get("arousal", 0.3),
|
||||||
|
name=item.get("name", ""),
|
||||||
|
)
|
||||||
|
|
||||||
|
if is_merged:
|
||||||
|
results.append(f" 📎 合并 → {result_name}")
|
||||||
|
merged += 1
|
||||||
|
else:
|
||||||
|
domains_str = ",".join(item.get("domain", []))
|
||||||
|
results.append(
|
||||||
|
f" 📝 新建 [{item.get('name', result_name)}] "
|
||||||
|
f"主题:{domains_str} V{item.get('valence', 0.5):.1f}/A{item.get('arousal', 0.3):.1f}"
|
||||||
|
)
|
||||||
|
created += 1
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(
|
||||||
|
f"Failed to process diary item / 日记条目处理失败: "
|
||||||
|
f"{item.get('name', '?')}: {e}"
|
||||||
|
)
|
||||||
|
results.append(f" ⚠️ 失败: {item.get('name', '未知条目')}")
|
||||||
|
|
||||||
|
summary = f"=== 日记整理完成 ===\n拆分为 {len(items)} 条 | 新建 {created} 桶 | 合并 {merged} 桶\n"
|
||||||
|
return summary + "\n".join(results)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================
|
||||||
|
# Tool 4: trace — Trace, redraw the outline of a memory
|
||||||
|
# 工具 4:trace — 描摹,重新勾勒记忆的轮廓
|
||||||
|
# Also handles deletion (delete=True)
|
||||||
|
# 同时承接删除功能
|
||||||
|
# =============================================================
|
||||||
|
@mcp.tool()
|
||||||
|
async def trace(
|
||||||
|
bucket_id: str,
|
||||||
|
name: str = "",
|
||||||
|
domain: str = "",
|
||||||
|
valence: float = -1,
|
||||||
|
arousal: float = -1,
|
||||||
|
importance: int = -1,
|
||||||
|
tags: str = "",
|
||||||
|
resolved: int = -1,
|
||||||
|
delete: bool = False,
|
||||||
|
) -> str:
|
||||||
|
"""修改记忆元数据。resolved=1 标记已解决(桶权重骤降沉底),resolved=0 重新激活,delete=True 删除桶。其余字段只传需改的,-1 或空串表示不改。"""
|
||||||
|
|
||||||
|
if not bucket_id or not bucket_id.strip():
|
||||||
|
return "请提供有效的 bucket_id。"
|
||||||
|
|
||||||
|
# --- Delete mode / 删除模式 ---
|
||||||
|
if delete:
|
||||||
|
success = await bucket_mgr.delete(bucket_id)
|
||||||
|
return f"已遗忘记忆桶: {bucket_id}" if success else f"未找到记忆桶: {bucket_id}"
|
||||||
|
|
||||||
|
bucket = await bucket_mgr.get(bucket_id)
|
||||||
|
if not bucket:
|
||||||
|
return f"未找到记忆桶: {bucket_id}"
|
||||||
|
|
||||||
|
# --- Collect only fields actually passed / 只收集用户实际传入的字段 ---
|
||||||
|
updates = {}
|
||||||
|
if name:
|
||||||
|
updates["name"] = name
|
||||||
|
if domain:
|
||||||
|
updates["domain"] = [d.strip() for d in domain.split(",") if d.strip()]
|
||||||
|
if 0 <= valence <= 1:
|
||||||
|
updates["valence"] = valence
|
||||||
|
if 0 <= arousal <= 1:
|
||||||
|
updates["arousal"] = arousal
|
||||||
|
if 1 <= importance <= 10:
|
||||||
|
updates["importance"] = importance
|
||||||
|
if tags:
|
||||||
|
updates["tags"] = [t.strip() for t in tags.split(",") if t.strip()]
|
||||||
|
if resolved in (0, 1):
|
||||||
|
updates["resolved"] = bool(resolved)
|
||||||
|
|
||||||
|
if not updates:
|
||||||
|
return "没有任何字段需要修改。"
|
||||||
|
|
||||||
|
success = await bucket_mgr.update(bucket_id, **updates)
|
||||||
|
if not success:
|
||||||
|
return f"修改失败: {bucket_id}"
|
||||||
|
|
||||||
|
changed = ", ".join(f"{k}={v}" for k, v in updates.items())
|
||||||
|
# Explicit hint about resolved state change semantics
|
||||||
|
# 特别提示 resolved 状态变化的语义
|
||||||
|
if "resolved" in updates:
|
||||||
|
if updates["resolved"]:
|
||||||
|
changed += " → 已沉底,只在关键词触发时重新浮现"
|
||||||
|
else:
|
||||||
|
changed += " → 已重新激活,将参与浮现排序"
|
||||||
|
return f"已修改记忆桶 {bucket_id}: {changed}"
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================
|
||||||
|
# Tool 5: pulse — Heartbeat, system status + memory listing
|
||||||
|
# 工具 5:pulse — 脉搏,系统状态 + 记忆列表
|
||||||
|
# =============================================================
|
||||||
|
@mcp.tool()
|
||||||
|
async def pulse(include_archive: bool = False) -> str:
|
||||||
|
"""系统状态和所有记忆桶摘要。include_archive=True 时包含归档桶。"""
|
||||||
|
try:
|
||||||
|
stats = await bucket_mgr.get_stats()
|
||||||
|
except Exception as e:
|
||||||
|
return f"获取系统状态失败: {e}"
|
||||||
|
|
||||||
|
status = (
|
||||||
|
f"=== Ombre Brain 记忆系统 ===\n"
|
||||||
|
f"固化记忆桶: {stats['permanent_count']} 个\n"
|
||||||
|
f"动态记忆桶: {stats['dynamic_count']} 个\n"
|
||||||
|
f"归档记忆桶: {stats['archive_count']} 个\n"
|
||||||
|
f"总存储大小: {stats['total_size_kb']:.1f} KB\n"
|
||||||
|
f"衰减引擎: {'运行中' if decay_engine.is_running else '已停止'}\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- List all bucket summaries / 列出所有桶摘要 ---
|
||||||
|
try:
|
||||||
|
buckets = await bucket_mgr.list_all(include_archive=include_archive)
|
||||||
|
except Exception as e:
|
||||||
|
return status + f"\n列出记忆桶失败: {e}"
|
||||||
|
|
||||||
|
if not buckets:
|
||||||
|
return status + "\n记忆库为空。"
|
||||||
|
|
||||||
|
lines = []
|
||||||
|
for b in buckets:
|
||||||
|
meta = b.get("metadata", {})
|
||||||
|
if meta.get("type") == "permanent":
|
||||||
|
icon = "📦"
|
||||||
|
elif meta.get("type") == "archived":
|
||||||
|
icon = "🗄️"
|
||||||
|
elif meta.get("resolved", False):
|
||||||
|
icon = "✅"
|
||||||
|
else:
|
||||||
|
icon = "💭"
|
||||||
|
try:
|
||||||
|
score = decay_engine.calculate_score(meta)
|
||||||
|
except Exception:
|
||||||
|
score = 0.0
|
||||||
|
domains = ",".join(meta.get("domain", []))
|
||||||
|
val = meta.get("valence", 0.5)
|
||||||
|
aro = meta.get("arousal", 0.3)
|
||||||
|
resolved_tag = " [已解决]" if meta.get("resolved", False) else ""
|
||||||
|
lines.append(
|
||||||
|
f"{icon} [{meta.get('name', b['id'])}]{resolved_tag} "
|
||||||
|
f"主题:{domains} "
|
||||||
|
f"情感:V{val:.1f}/A{aro:.1f} "
|
||||||
|
f"重要:{meta.get('importance', '?')} "
|
||||||
|
f"权重:{score:.2f} "
|
||||||
|
f"标签:{','.join(meta.get('tags', []))}"
|
||||||
|
)
|
||||||
|
|
||||||
|
return status + "\n=== 记忆列表 ===\n" + "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
# --- Entry point / 启动入口 ---
|
||||||
|
if __name__ == "__main__":
|
||||||
|
transport = config.get("transport", "stdio")
|
||||||
|
logger.info(f"Ombre Brain starting | transport: {transport}")
|
||||||
|
|
||||||
|
# --- Application-level keepalive: remote mode only, ping /health every 60s ---
|
||||||
|
# --- 应用层保活:仅远程模式下启动,每 60 秒 ping 一次 /health ---
|
||||||
|
# Prevents Cloudflare Tunnel from dropping idle connections
|
||||||
|
if transport in ("sse", "streamable-http"):
|
||||||
|
async def _keepalive_loop():
|
||||||
|
await asyncio.sleep(10) # Wait for server to fully start
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
await client.get("http://localhost:8000/health", timeout=5)
|
||||||
|
logger.debug("Keepalive ping OK / 保活 ping 成功")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Keepalive ping failed / 保活 ping 失败: {e}")
|
||||||
|
await asyncio.sleep(60)
|
||||||
|
|
||||||
|
import threading
|
||||||
|
|
||||||
|
def _start_keepalive():
|
||||||
|
loop = asyncio.new_event_loop()
|
||||||
|
loop.run_until_complete(_keepalive_loop())
|
||||||
|
|
||||||
|
t = threading.Thread(target=_start_keepalive, daemon=True)
|
||||||
|
t.start()
|
||||||
|
|
||||||
|
mcp.run(transport=transport)
|
||||||
781
bucket_manager.py
Normal file
781
bucket_manager.py
Normal file
@@ -0,0 +1,781 @@
|
|||||||
|
# ============================================================
|
||||||
|
# Module: Memory Bucket Manager (bucket_manager.py)
|
||||||
|
# 模块:记忆桶管理器
|
||||||
|
#
|
||||||
|
# CRUD operations, multi-dimensional index search, activation updates
|
||||||
|
# for memory buckets.
|
||||||
|
# 记忆桶的增删改查、多维索引搜索、激活更新。
|
||||||
|
#
|
||||||
|
# Core design:
|
||||||
|
# 核心逻辑:
|
||||||
|
# - Each bucket = one Markdown file (YAML frontmatter + body)
|
||||||
|
# 每个记忆桶 = 一个 Markdown 文件
|
||||||
|
# - Storage by type: permanent / dynamic / archive
|
||||||
|
# 存储按类型分目录
|
||||||
|
# - Multi-dimensional soft index: domain + valence/arousal + fuzzy text
|
||||||
|
# 多维软索引:主题域 + 情感坐标 + 文本模糊匹配
|
||||||
|
# - Search strategy: domain pre-filter → weighted multi-dim ranking
|
||||||
|
# 搜索策略:主题域预筛 → 多维加权精排
|
||||||
|
# - Emotion coordinates based on Russell circumplex model:
|
||||||
|
# 情感坐标基于环形情感模型(Russell circumplex):
|
||||||
|
# valence (0~1): 0=negative → 1=positive
|
||||||
|
# arousal (0~1): 0=calm → 1=excited
|
||||||
|
#
|
||||||
|
# Depended on by: server.py, decay_engine.py
|
||||||
|
# 被谁依赖:server.py, decay_engine.py
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
import os
|
||||||
|
import math
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
import shutil
|
||||||
|
from collections import Counter
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
import frontmatter
|
||||||
|
import jieba
|
||||||
|
from rapidfuzz import fuzz
|
||||||
|
|
||||||
|
from utils import generate_bucket_id, sanitize_name, safe_path, now_iso
|
||||||
|
|
||||||
|
logger = logging.getLogger("ombre_brain.bucket")
|
||||||
|
|
||||||
|
|
||||||
|
class BucketManager:
|
||||||
|
"""
|
||||||
|
Memory bucket manager — entry point for all bucket CRUD operations.
|
||||||
|
Buckets are stored as Markdown files with YAML frontmatter for metadata
|
||||||
|
and body for content. Natively compatible with Obsidian browsing/editing.
|
||||||
|
记忆桶管理器 —— 所有桶的 CRUD 操作入口。
|
||||||
|
桶以 Markdown 文件存储,YAML frontmatter 存元数据,正文存内容。
|
||||||
|
天然兼容 Obsidian 直接浏览和编辑。
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, config: dict):
|
||||||
|
# --- Read storage paths from config / 从配置中读取存储路径 ---
|
||||||
|
self.base_dir = config["buckets_dir"]
|
||||||
|
self.permanent_dir = os.path.join(self.base_dir, "permanent")
|
||||||
|
self.dynamic_dir = os.path.join(self.base_dir, "dynamic")
|
||||||
|
self.archive_dir = os.path.join(self.base_dir, "archive")
|
||||||
|
self.fuzzy_threshold = config.get("matching", {}).get("fuzzy_threshold", 50)
|
||||||
|
self.max_results = config.get("matching", {}).get("max_results", 5)
|
||||||
|
|
||||||
|
# --- Wikilink config / 双链配置 ---
|
||||||
|
wikilink_cfg = config.get("wikilink", {})
|
||||||
|
self.wikilink_enabled = wikilink_cfg.get("enabled", True)
|
||||||
|
self.wikilink_use_tags = wikilink_cfg.get("use_tags", False)
|
||||||
|
self.wikilink_use_domain = wikilink_cfg.get("use_domain", True)
|
||||||
|
self.wikilink_use_auto_keywords = wikilink_cfg.get("use_auto_keywords", True)
|
||||||
|
self.wikilink_auto_top_k = wikilink_cfg.get("auto_top_k", 8)
|
||||||
|
self.wikilink_min_len = wikilink_cfg.get("min_keyword_len", 2)
|
||||||
|
self.wikilink_exclude_keywords = set(wikilink_cfg.get("exclude_keywords", []))
|
||||||
|
self.wikilink_stopwords = {
|
||||||
|
"的", "了", "在", "是", "我", "有", "和", "就", "不", "人",
|
||||||
|
"都", "一个", "上", "也", "很", "到", "说", "要", "去",
|
||||||
|
"你", "会", "着", "没有", "看", "好", "自己", "这", "他", "她",
|
||||||
|
"我们", "你们", "他们", "然后", "今天", "昨天", "明天", "一下",
|
||||||
|
"the", "and", "for", "are", "but", "not", "you", "all", "can",
|
||||||
|
"had", "her", "was", "one", "our", "out", "has", "have", "with",
|
||||||
|
"this", "that", "from", "they", "been", "said", "will", "each",
|
||||||
|
}
|
||||||
|
self.wikilink_stopwords |= {w.lower() for w in self.wikilink_exclude_keywords}
|
||||||
|
|
||||||
|
# --- Search scoring weights / 检索权重配置 ---
|
||||||
|
scoring = config.get("scoring_weights", {})
|
||||||
|
self.w_topic = scoring.get("topic_relevance", 4.0)
|
||||||
|
self.w_emotion = scoring.get("emotion_resonance", 2.0)
|
||||||
|
self.w_time = scoring.get("time_proximity", 1.5)
|
||||||
|
self.w_importance = scoring.get("importance", 1.0)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Create a new bucket
|
||||||
|
# 创建新桶
|
||||||
|
# Write content and metadata into a .md file
|
||||||
|
# 将内容和元数据写入一个 .md 文件
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
async def create(
|
||||||
|
self,
|
||||||
|
content: str,
|
||||||
|
tags: list[str] = None,
|
||||||
|
importance: int = 5,
|
||||||
|
domain: list[str] = None,
|
||||||
|
valence: float = 0.5,
|
||||||
|
arousal: float = 0.3,
|
||||||
|
bucket_type: str = "dynamic",
|
||||||
|
name: str = None,
|
||||||
|
pinned: bool = False,
|
||||||
|
protected: bool = False,
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Create a new memory bucket, return bucket ID.
|
||||||
|
创建一个新的记忆桶,返回桶 ID。
|
||||||
|
|
||||||
|
pinned/protected=True: bucket won't be merged, decayed, or have importance changed.
|
||||||
|
Importance is locked to 10 for pinned/protected buckets.
|
||||||
|
pinned/protected 桶不参与合并与衰减,importance 强制锁定为 10。
|
||||||
|
"""
|
||||||
|
bucket_id = generate_bucket_id()
|
||||||
|
bucket_name = sanitize_name(name) if name else bucket_id
|
||||||
|
domain = domain or ["未分类"]
|
||||||
|
tags = tags or []
|
||||||
|
linked_content = self._apply_wikilinks(content, tags, domain, bucket_name)
|
||||||
|
|
||||||
|
# --- Pinned/protected buckets: lock importance to 10 ---
|
||||||
|
# --- 钉选/保护桶:importance 强制锁定为 10 ---
|
||||||
|
if pinned or protected:
|
||||||
|
importance = 10
|
||||||
|
|
||||||
|
# --- Build YAML frontmatter metadata / 构建元数据 ---
|
||||||
|
metadata = {
|
||||||
|
"id": bucket_id,
|
||||||
|
"name": bucket_name,
|
||||||
|
"tags": tags,
|
||||||
|
"domain": domain,
|
||||||
|
"valence": max(0.0, min(1.0, valence)),
|
||||||
|
"arousal": max(0.0, min(1.0, arousal)),
|
||||||
|
"importance": max(1, min(10, importance)),
|
||||||
|
"type": bucket_type,
|
||||||
|
"created": now_iso(),
|
||||||
|
"last_active": now_iso(),
|
||||||
|
"activation_count": 1,
|
||||||
|
}
|
||||||
|
if pinned:
|
||||||
|
metadata["pinned"] = True
|
||||||
|
if protected:
|
||||||
|
metadata["protected"] = True
|
||||||
|
|
||||||
|
# --- Assemble Markdown file (frontmatter + body) ---
|
||||||
|
# --- 组装 Markdown 文件 ---
|
||||||
|
post = frontmatter.Post(linked_content, **metadata)
|
||||||
|
|
||||||
|
# --- Choose directory by type + primary domain ---
|
||||||
|
# --- 按类型 + 主题域选择存储目录 ---
|
||||||
|
type_dir = self.permanent_dir if bucket_type == "permanent" else self.dynamic_dir
|
||||||
|
primary_domain = sanitize_name(domain[0]) if domain else "未分类"
|
||||||
|
target_dir = os.path.join(type_dir, primary_domain)
|
||||||
|
os.makedirs(target_dir, exist_ok=True)
|
||||||
|
|
||||||
|
# --- Filename: readable_name_bucketID.md (Obsidian friendly) ---
|
||||||
|
# --- 文件名:可读名称_桶ID.md ---
|
||||||
|
if bucket_name and bucket_name != bucket_id:
|
||||||
|
filename = f"{bucket_name}_{bucket_id}.md"
|
||||||
|
else:
|
||||||
|
filename = f"{bucket_id}.md"
|
||||||
|
file_path = safe_path(target_dir, filename)
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(file_path, "w", encoding="utf-8") as f:
|
||||||
|
f.write(frontmatter.dumps(post))
|
||||||
|
except OSError as e:
|
||||||
|
logger.error(f"Failed to write bucket file / 写入桶文件失败: {file_path}: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Created bucket / 创建记忆桶: {bucket_id} ({bucket_name}) → {primary_domain}/"
|
||||||
|
+ (" [PINNED]" if pinned else "") + (" [PROTECTED]" if protected else "")
|
||||||
|
)
|
||||||
|
return bucket_id
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Read bucket content
|
||||||
|
# 读取桶内容
|
||||||
|
# Returns {"id", "metadata", "content", "path"} or None
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
async def get(self, bucket_id: str) -> Optional[dict]:
|
||||||
|
"""
|
||||||
|
Read a single bucket by ID.
|
||||||
|
根据 ID 读取单个桶。
|
||||||
|
"""
|
||||||
|
if not bucket_id or not isinstance(bucket_id, str):
|
||||||
|
return None
|
||||||
|
file_path = self._find_bucket_file(bucket_id)
|
||||||
|
if not file_path:
|
||||||
|
return None
|
||||||
|
return self._load_bucket(file_path)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Update bucket
|
||||||
|
# 更新桶
|
||||||
|
# Supports: content, tags, importance, valence, arousal, name, resolved
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
async def update(self, bucket_id: str, **kwargs) -> bool:
|
||||||
|
"""
|
||||||
|
Update bucket content or metadata fields.
|
||||||
|
更新桶的内容或元数据字段。
|
||||||
|
"""
|
||||||
|
file_path = self._find_bucket_file(bucket_id)
|
||||||
|
if not file_path:
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
post = frontmatter.load(file_path)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to load bucket for update / 加载桶失败: {file_path}: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# --- Pinned/protected buckets: lock importance to 10, ignore importance changes ---
|
||||||
|
# --- 钉选/保护桶:importance 不可修改,强制保持 10 ---
|
||||||
|
is_pinned = post.get("pinned", False) or post.get("protected", False)
|
||||||
|
if is_pinned:
|
||||||
|
kwargs.pop("importance", None) # silently ignore importance update
|
||||||
|
|
||||||
|
# --- Update only fields that were passed in / 只改传入的字段 ---
|
||||||
|
if "content" in kwargs:
|
||||||
|
next_tags = kwargs.get("tags", post.get("tags", []))
|
||||||
|
next_domain = kwargs.get("domain", post.get("domain", []))
|
||||||
|
next_name = kwargs.get("name", post.get("name", ""))
|
||||||
|
post.content = self._apply_wikilinks(
|
||||||
|
kwargs["content"],
|
||||||
|
next_tags,
|
||||||
|
next_domain,
|
||||||
|
next_name,
|
||||||
|
)
|
||||||
|
if "tags" in kwargs:
|
||||||
|
post["tags"] = kwargs["tags"]
|
||||||
|
if "importance" in kwargs:
|
||||||
|
post["importance"] = max(1, min(10, int(kwargs["importance"])))
|
||||||
|
if "domain" in kwargs:
|
||||||
|
post["domain"] = kwargs["domain"]
|
||||||
|
if "valence" in kwargs:
|
||||||
|
post["valence"] = max(0.0, min(1.0, float(kwargs["valence"])))
|
||||||
|
if "arousal" in kwargs:
|
||||||
|
post["arousal"] = max(0.0, min(1.0, float(kwargs["arousal"])))
|
||||||
|
if "name" in kwargs:
|
||||||
|
post["name"] = sanitize_name(kwargs["name"])
|
||||||
|
if "resolved" in kwargs:
|
||||||
|
post["resolved"] = bool(kwargs["resolved"])
|
||||||
|
if "pinned" in kwargs:
|
||||||
|
post["pinned"] = bool(kwargs["pinned"])
|
||||||
|
if kwargs["pinned"]:
|
||||||
|
post["importance"] = 10 # pinned → lock importance to 10
|
||||||
|
|
||||||
|
# --- Auto-refresh activation time / 自动刷新激活时间 ---
|
||||||
|
post["last_active"] = now_iso()
|
||||||
|
|
||||||
|
try:
|
||||||
|
with open(file_path, "w", encoding="utf-8") as f:
|
||||||
|
f.write(frontmatter.dumps(post))
|
||||||
|
except OSError as e:
|
||||||
|
logger.error(f"Failed to write bucket update / 写入桶更新失败: {file_path}: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
logger.info(f"Updated bucket / 更新记忆桶: {bucket_id}")
|
||||||
|
return True
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Wikilink injection
|
||||||
|
# 自动添加 Obsidian 双链
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
def _apply_wikilinks(
|
||||||
|
self,
|
||||||
|
content: str,
|
||||||
|
tags: list[str],
|
||||||
|
domain: list[str],
|
||||||
|
name: str,
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Auto-inject Obsidian wikilinks, avoiding double-wrapping existing [[...]].
|
||||||
|
自动添加 Obsidian 双链,避免重复包裹已有 [[...]]。
|
||||||
|
"""
|
||||||
|
if not self.wikilink_enabled or not content:
|
||||||
|
return content
|
||||||
|
|
||||||
|
keywords = self._collect_wikilink_keywords(content, tags, domain, name)
|
||||||
|
if not keywords:
|
||||||
|
return content
|
||||||
|
|
||||||
|
# Split on existing wikilinks to avoid wrapping them again
|
||||||
|
# 按已有双链切分,避免重复包裹
|
||||||
|
segments = re.split(r"(\[\[[^\]]+\]\])", content)
|
||||||
|
pattern = re.compile("|".join(re.escape(kw) for kw in keywords))
|
||||||
|
for i, segment in enumerate(segments):
|
||||||
|
if segment.startswith("[[") and segment.endswith("]]"):
|
||||||
|
continue
|
||||||
|
updated = pattern.sub(lambda m: f"[[{m.group(0)}]]", segment)
|
||||||
|
segments[i] = updated
|
||||||
|
return "".join(segments)
|
||||||
|
|
||||||
|
def _collect_wikilink_keywords(
|
||||||
|
self,
|
||||||
|
content: str,
|
||||||
|
tags: list[str],
|
||||||
|
domain: list[str],
|
||||||
|
name: str,
|
||||||
|
) -> list[str]:
|
||||||
|
"""
|
||||||
|
Collect candidate keywords from tags/domain/auto-extraction.
|
||||||
|
汇总候选关键词:可选 tags/domain + 自动提词。
|
||||||
|
"""
|
||||||
|
candidates = []
|
||||||
|
|
||||||
|
if self.wikilink_use_tags:
|
||||||
|
candidates.extend(tags or [])
|
||||||
|
if self.wikilink_use_domain:
|
||||||
|
candidates.extend(domain or [])
|
||||||
|
if name:
|
||||||
|
candidates.append(name)
|
||||||
|
if self.wikilink_use_auto_keywords:
|
||||||
|
candidates.extend(self._extract_auto_keywords(content))
|
||||||
|
|
||||||
|
return self._normalize_keywords(candidates)
|
||||||
|
|
||||||
|
def _normalize_keywords(self, keywords: list[str]) -> list[str]:
|
||||||
|
"""
|
||||||
|
Deduplicate and sort by length (longer first to avoid short words
|
||||||
|
breaking long ones during replacement).
|
||||||
|
去重并按长度排序,优先替换长词。
|
||||||
|
"""
|
||||||
|
if not keywords:
|
||||||
|
return []
|
||||||
|
|
||||||
|
seen = set()
|
||||||
|
cleaned = []
|
||||||
|
for keyword in keywords:
|
||||||
|
if not isinstance(keyword, str):
|
||||||
|
continue
|
||||||
|
kw = keyword.strip()
|
||||||
|
if len(kw) < self.wikilink_min_len:
|
||||||
|
continue
|
||||||
|
if kw in self.wikilink_exclude_keywords:
|
||||||
|
continue
|
||||||
|
if kw.lower() in self.wikilink_stopwords:
|
||||||
|
continue
|
||||||
|
if kw in seen:
|
||||||
|
continue
|
||||||
|
seen.add(kw)
|
||||||
|
cleaned.append(kw)
|
||||||
|
|
||||||
|
return sorted(cleaned, key=len, reverse=True)
|
||||||
|
|
||||||
|
def _extract_auto_keywords(self, content: str) -> list[str]:
|
||||||
|
"""
|
||||||
|
Auto-extract keywords from body text, prioritizing high-frequency words.
|
||||||
|
从正文自动提词,优先高频词。
|
||||||
|
"""
|
||||||
|
if not content:
|
||||||
|
return []
|
||||||
|
|
||||||
|
try:
|
||||||
|
zh_words = [w.strip() for w in jieba.lcut(content) if w.strip()]
|
||||||
|
except Exception:
|
||||||
|
zh_words = []
|
||||||
|
en_words = re.findall(r"[A-Za-z][A-Za-z0-9_-]{2,20}", content)
|
||||||
|
|
||||||
|
# Chinese bigrams / 中文双词组合
|
||||||
|
zh_bigrams = []
|
||||||
|
for i in range(len(zh_words) - 1):
|
||||||
|
left = zh_words[i]
|
||||||
|
right = zh_words[i + 1]
|
||||||
|
if len(left) < self.wikilink_min_len or len(right) < self.wikilink_min_len:
|
||||||
|
continue
|
||||||
|
if not re.fullmatch(r"[\u4e00-\u9fff]+", left + right):
|
||||||
|
continue
|
||||||
|
if len(left + right) > 8:
|
||||||
|
continue
|
||||||
|
zh_bigrams.append(left + right)
|
||||||
|
|
||||||
|
merged = []
|
||||||
|
for word in zh_words + zh_bigrams + en_words:
|
||||||
|
if len(word) < self.wikilink_min_len:
|
||||||
|
continue
|
||||||
|
if re.fullmatch(r"\d+", word):
|
||||||
|
continue
|
||||||
|
if word.lower() in self.wikilink_stopwords:
|
||||||
|
continue
|
||||||
|
merged.append(word)
|
||||||
|
|
||||||
|
if not merged:
|
||||||
|
return []
|
||||||
|
|
||||||
|
counter = Counter(merged)
|
||||||
|
return [w for w, _ in counter.most_common(self.wikilink_auto_top_k)]
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Delete bucket
|
||||||
|
# 删除桶
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
async def delete(self, bucket_id: str) -> bool:
|
||||||
|
"""
|
||||||
|
Delete a memory bucket file.
|
||||||
|
删除指定的记忆桶文件。
|
||||||
|
"""
|
||||||
|
file_path = self._find_bucket_file(bucket_id)
|
||||||
|
if not file_path:
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
os.remove(file_path)
|
||||||
|
except OSError as e:
|
||||||
|
logger.error(f"Failed to delete bucket file / 删除桶文件失败: {file_path}: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
logger.info(f"Deleted bucket / 删除记忆桶: {bucket_id}")
|
||||||
|
return True
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Touch bucket (refresh activation time + increment count)
|
||||||
|
# 触碰桶(刷新激活时间 + 累加激活次数)
|
||||||
|
# Called on every recall hit; affects decay score.
|
||||||
|
# 每次检索命中时调用,影响衰减得分。
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
async def touch(self, bucket_id: str) -> None:
|
||||||
|
"""
|
||||||
|
Update a bucket's last activation time and count.
|
||||||
|
更新桶的最后激活时间和激活次数。
|
||||||
|
"""
|
||||||
|
file_path = self._find_bucket_file(bucket_id)
|
||||||
|
if not file_path:
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
post = frontmatter.load(file_path)
|
||||||
|
post["last_active"] = now_iso()
|
||||||
|
post["activation_count"] = post.get("activation_count", 0) + 1
|
||||||
|
|
||||||
|
with open(file_path, "w", encoding="utf-8") as f:
|
||||||
|
f.write(frontmatter.dumps(post))
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to touch bucket / 触碰桶失败: {bucket_id}: {e}")
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Multi-dimensional search (core feature)
|
||||||
|
# 多维搜索(核心功能)
|
||||||
|
#
|
||||||
|
# Strategy: domain pre-filter → weighted multi-dim ranking
|
||||||
|
# 策略:主题域预筛 → 多维加权精排
|
||||||
|
#
|
||||||
|
# Ranking formula:
|
||||||
|
# total = topic(×w_topic) + emotion(×w_emotion)
|
||||||
|
# + time(×w_time) + importance(×w_importance)
|
||||||
|
#
|
||||||
|
# Per-dimension scores (normalized to 0~1):
|
||||||
|
# topic = rapidfuzz weighted match (name/tags/domain/body)
|
||||||
|
# emotion = 1 - Euclidean distance (query v/a vs bucket v/a)
|
||||||
|
# time = e^(-0.02 × days) (recent memories first)
|
||||||
|
# importance = importance / 10
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
async def search(
|
||||||
|
self,
|
||||||
|
query: str,
|
||||||
|
limit: int = None,
|
||||||
|
domain_filter: list[str] = None,
|
||||||
|
query_valence: float = None,
|
||||||
|
query_arousal: float = None,
|
||||||
|
) -> list[dict]:
|
||||||
|
"""
|
||||||
|
Multi-dimensional indexed search for memory buckets.
|
||||||
|
多维索引搜索记忆桶。
|
||||||
|
|
||||||
|
domain_filter: pre-filter by domain (None = search all)
|
||||||
|
query_valence/arousal: emotion coordinates for resonance scoring
|
||||||
|
"""
|
||||||
|
if not query or not query.strip():
|
||||||
|
return []
|
||||||
|
|
||||||
|
limit = limit or self.max_results
|
||||||
|
all_buckets = await self.list_all(include_archive=False)
|
||||||
|
|
||||||
|
if not all_buckets:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# --- Layer 1: domain pre-filter (fast scope reduction) ---
|
||||||
|
# --- 第一层:主题域预筛(快速缩小范围)---
|
||||||
|
if domain_filter:
|
||||||
|
filter_set = {d.lower() for d in domain_filter}
|
||||||
|
candidates = [
|
||||||
|
b for b in all_buckets
|
||||||
|
if {d.lower() for d in b["metadata"].get("domain", [])} & filter_set
|
||||||
|
]
|
||||||
|
# Fall back to full search if pre-filter yields nothing
|
||||||
|
# 预筛为空则回退全量搜索
|
||||||
|
if not candidates:
|
||||||
|
candidates = all_buckets
|
||||||
|
else:
|
||||||
|
candidates = all_buckets
|
||||||
|
|
||||||
|
# --- Layer 2: weighted multi-dim ranking ---
|
||||||
|
# --- 第二层:多维加权精排 ---
|
||||||
|
scored = []
|
||||||
|
for bucket in candidates:
|
||||||
|
meta = bucket.get("metadata", {})
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Dim 1: topic relevance (fuzzy text, 0~1)
|
||||||
|
topic_score = self._calc_topic_score(query, bucket)
|
||||||
|
|
||||||
|
# Dim 2: emotion resonance (coordinate distance, 0~1)
|
||||||
|
emotion_score = self._calc_emotion_score(
|
||||||
|
query_valence, query_arousal, meta
|
||||||
|
)
|
||||||
|
|
||||||
|
# Dim 3: time proximity (exponential decay, 0~1)
|
||||||
|
time_score = self._calc_time_score(meta)
|
||||||
|
|
||||||
|
# Dim 4: importance (direct normalization)
|
||||||
|
importance_score = max(1, min(10, int(meta.get("importance", 5)))) / 10.0
|
||||||
|
|
||||||
|
# --- Weighted sum / 加权求和 ---
|
||||||
|
total = (
|
||||||
|
topic_score * self.w_topic
|
||||||
|
+ emotion_score * self.w_emotion
|
||||||
|
+ time_score * self.w_time
|
||||||
|
+ importance_score * self.w_importance
|
||||||
|
)
|
||||||
|
# Normalize to 0~100 for readability
|
||||||
|
weight_sum = self.w_topic + self.w_emotion + self.w_time + self.w_importance
|
||||||
|
normalized = (total / weight_sum) * 100 if weight_sum > 0 else 0
|
||||||
|
|
||||||
|
# Resolved buckets get ranking penalty (but still reachable by keyword)
|
||||||
|
# 已解决的桶降权排序(但仍可被关键词激活)
|
||||||
|
if meta.get("resolved", False):
|
||||||
|
normalized *= 0.3
|
||||||
|
|
||||||
|
if normalized >= self.fuzzy_threshold:
|
||||||
|
bucket["score"] = round(normalized, 2)
|
||||||
|
scored.append(bucket)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(
|
||||||
|
f"Scoring failed for bucket {bucket.get('id', '?')} / "
|
||||||
|
f"桶评分失败: {e}"
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
scored.sort(key=lambda x: x["score"], reverse=True)
|
||||||
|
return scored[:limit]
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Topic relevance sub-score:
|
||||||
|
# name(×3) + domain(×2.5) + tags(×2) + body(×1)
|
||||||
|
# 文本相关性子分:桶名(×3) + 主题域(×2.5) + 标签(×2) + 正文(×1)
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
def _calc_topic_score(self, query: str, bucket: dict) -> float:
|
||||||
|
"""
|
||||||
|
Calculate text dimension relevance score (0~1).
|
||||||
|
计算文本维度的相关性得分。
|
||||||
|
"""
|
||||||
|
meta = bucket.get("metadata", {})
|
||||||
|
|
||||||
|
name_score = fuzz.partial_ratio(query, meta.get("name", "")) * 3
|
||||||
|
domain_score = (
|
||||||
|
max(
|
||||||
|
(fuzz.partial_ratio(query, d) for d in meta.get("domain", [])),
|
||||||
|
default=0,
|
||||||
|
)
|
||||||
|
* 2.5
|
||||||
|
)
|
||||||
|
tag_score = (
|
||||||
|
max(
|
||||||
|
(fuzz.partial_ratio(query, tag) for tag in meta.get("tags", [])),
|
||||||
|
default=0,
|
||||||
|
)
|
||||||
|
* 2
|
||||||
|
)
|
||||||
|
content_score = fuzz.partial_ratio(query, bucket.get("content", "")[:500]) * 1
|
||||||
|
|
||||||
|
return (name_score + domain_score + tag_score + content_score) / (100 * 8.5)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Emotion resonance sub-score:
|
||||||
|
# Based on Russell circumplex Euclidean distance
|
||||||
|
# 情感共鸣子分:基于环形情感模型的欧氏距离
|
||||||
|
# No emotion in query → neutral 0.5 (doesn't affect ranking)
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
def _calc_emotion_score(
|
||||||
|
self, q_valence: float, q_arousal: float, meta: dict
|
||||||
|
) -> float:
|
||||||
|
"""
|
||||||
|
Calculate emotion resonance score (0~1, closer = higher).
|
||||||
|
计算情感共鸣度(0~1,越近越高)。
|
||||||
|
"""
|
||||||
|
if q_valence is None or q_arousal is None:
|
||||||
|
return 0.5 # No emotion coordinates → neutral / 无情感坐标时给中性分
|
||||||
|
|
||||||
|
try:
|
||||||
|
b_valence = float(meta.get("valence", 0.5))
|
||||||
|
b_arousal = float(meta.get("arousal", 0.3))
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
return 0.5
|
||||||
|
|
||||||
|
# Euclidean distance, max sqrt(2) ≈ 1.414
|
||||||
|
dist = math.sqrt((q_valence - b_valence) ** 2 + (q_arousal - b_arousal) ** 2)
|
||||||
|
return max(0.0, 1.0 - dist / 1.414)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Time proximity sub-score:
|
||||||
|
# More recent activation → higher score
|
||||||
|
# 时间亲近子分:距上次激活越近分越高
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
def _calc_time_score(self, meta: dict) -> float:
|
||||||
|
"""
|
||||||
|
Calculate time proximity score (0~1, more recent = higher).
|
||||||
|
计算时间亲近度。
|
||||||
|
"""
|
||||||
|
last_active_str = meta.get("last_active", meta.get("created", ""))
|
||||||
|
try:
|
||||||
|
last_active = datetime.fromisoformat(str(last_active_str))
|
||||||
|
days = max(0.0, (datetime.now() - last_active).total_seconds() / 86400)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
days = 30
|
||||||
|
return math.exp(-0.02 * days)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# List all buckets
|
||||||
|
# 列出所有桶
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
async def list_all(self, include_archive: bool = False) -> list[dict]:
|
||||||
|
"""
|
||||||
|
Recursively walk directories (including domain subdirs), list all buckets.
|
||||||
|
递归遍历目录(含域子目录),列出所有记忆桶。
|
||||||
|
"""
|
||||||
|
buckets = []
|
||||||
|
|
||||||
|
dirs = [self.permanent_dir, self.dynamic_dir]
|
||||||
|
if include_archive:
|
||||||
|
dirs.append(self.archive_dir)
|
||||||
|
|
||||||
|
for dir_path in dirs:
|
||||||
|
if not os.path.exists(dir_path):
|
||||||
|
continue
|
||||||
|
for root, _, files in os.walk(dir_path):
|
||||||
|
for filename in files:
|
||||||
|
if not filename.endswith(".md"):
|
||||||
|
continue
|
||||||
|
file_path = os.path.join(root, filename)
|
||||||
|
bucket = self._load_bucket(file_path)
|
||||||
|
if bucket:
|
||||||
|
buckets.append(bucket)
|
||||||
|
|
||||||
|
return buckets
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Statistics (counts per category + total size)
|
||||||
|
# 统计信息(各分类桶数量 + 总体积)
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
async def get_stats(self) -> dict:
|
||||||
|
"""
|
||||||
|
Return memory bucket statistics (including domain subdirs).
|
||||||
|
返回记忆桶的统计数据。
|
||||||
|
"""
|
||||||
|
stats = {
|
||||||
|
"permanent_count": 0,
|
||||||
|
"dynamic_count": 0,
|
||||||
|
"archive_count": 0,
|
||||||
|
"total_size_kb": 0.0,
|
||||||
|
"domains": {},
|
||||||
|
}
|
||||||
|
|
||||||
|
for subdir, key in [
|
||||||
|
(self.permanent_dir, "permanent_count"),
|
||||||
|
(self.dynamic_dir, "dynamic_count"),
|
||||||
|
(self.archive_dir, "archive_count"),
|
||||||
|
]:
|
||||||
|
if not os.path.exists(subdir):
|
||||||
|
continue
|
||||||
|
for root, _, files in os.walk(subdir):
|
||||||
|
for f in files:
|
||||||
|
if f.endswith(".md"):
|
||||||
|
stats[key] += 1
|
||||||
|
fpath = os.path.join(root, f)
|
||||||
|
try:
|
||||||
|
stats["total_size_kb"] += os.path.getsize(fpath) / 1024
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
# Per-domain counts / 每个域的桶数量
|
||||||
|
domain_name = os.path.basename(root)
|
||||||
|
if domain_name != os.path.basename(subdir):
|
||||||
|
stats["domains"][domain_name] = stats["domains"].get(domain_name, 0) + 1
|
||||||
|
|
||||||
|
return stats
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Archive bucket (move from permanent/dynamic into archive)
|
||||||
|
# 归档桶(从 permanent/dynamic 移入 archive)
|
||||||
|
# Called by decay engine to simulate "forgetting"
|
||||||
|
# 由衰减引擎调用,模拟"遗忘"
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
async def archive(self, bucket_id: str) -> bool:
|
||||||
|
"""
|
||||||
|
Move a bucket into the archive directory (preserving domain subdirs).
|
||||||
|
将指定桶移入归档目录(保留域子目录结构)。
|
||||||
|
"""
|
||||||
|
file_path = self._find_bucket_file(bucket_id)
|
||||||
|
if not file_path:
|
||||||
|
return False
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Read once, get domain info and update type / 一次性读取
|
||||||
|
post = frontmatter.load(file_path)
|
||||||
|
domain = post.get("domain", ["未分类"])
|
||||||
|
primary_domain = sanitize_name(domain[0]) if domain else "未分类"
|
||||||
|
archive_subdir = os.path.join(self.archive_dir, primary_domain)
|
||||||
|
os.makedirs(archive_subdir, exist_ok=True)
|
||||||
|
|
||||||
|
dest = safe_path(archive_subdir, os.path.basename(file_path))
|
||||||
|
|
||||||
|
# Update type marker then move file / 更新类型标记后移动文件
|
||||||
|
post["type"] = "archived"
|
||||||
|
with open(file_path, "w", encoding="utf-8") as f:
|
||||||
|
f.write(frontmatter.dumps(post))
|
||||||
|
|
||||||
|
# Use shutil.move for cross-filesystem safety
|
||||||
|
# 使用 shutil.move 保证跨文件系统安全
|
||||||
|
shutil.move(file_path, str(dest))
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(
|
||||||
|
f"Failed to archive bucket / 归档桶失败: {bucket_id}: {e}"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
|
||||||
|
logger.info(f"Archived bucket / 归档记忆桶: {bucket_id} → archive/{primary_domain}/")
|
||||||
|
return True
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Internal: find bucket file across all three directories
|
||||||
|
# 内部:在三个目录中查找桶文件
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
def _find_bucket_file(self, bucket_id: str) -> Optional[str]:
|
||||||
|
"""
|
||||||
|
Recursively search permanent/dynamic/archive for a bucket file
|
||||||
|
matching the given ID.
|
||||||
|
在 permanent/dynamic/archive 中递归查找指定 ID 的桶文件。
|
||||||
|
"""
|
||||||
|
if not bucket_id:
|
||||||
|
return None
|
||||||
|
for dir_path in [self.permanent_dir, self.dynamic_dir, self.archive_dir]:
|
||||||
|
if not os.path.exists(dir_path):
|
||||||
|
continue
|
||||||
|
for root, _, files in os.walk(dir_path):
|
||||||
|
for fname in files:
|
||||||
|
if not fname.endswith(".md"):
|
||||||
|
continue
|
||||||
|
# Match by exact ID segment in filename
|
||||||
|
# 通过文件名中的 ID 片段精确匹配
|
||||||
|
if bucket_id in fname:
|
||||||
|
return os.path.join(root, fname)
|
||||||
|
return None
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Internal: load bucket data from .md file
|
||||||
|
# 内部:从 .md 文件加载桶数据
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
def _load_bucket(self, file_path: str) -> Optional[dict]:
|
||||||
|
"""
|
||||||
|
Parse a Markdown file and return structured bucket data.
|
||||||
|
解析 Markdown 文件,返回桶的结构化数据。
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
post = frontmatter.load(file_path)
|
||||||
|
return {
|
||||||
|
"id": post.get("id", Path(file_path).stem),
|
||||||
|
"metadata": dict(post.metadata),
|
||||||
|
"content": post.content,
|
||||||
|
"path": file_path,
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(
|
||||||
|
f"Failed to load bucket file / 加载桶文件失败: {file_path}: {e}"
|
||||||
|
)
|
||||||
|
return None
|
||||||
82
config.example.yaml
Normal file
82
config.example.yaml
Normal file
@@ -0,0 +1,82 @@
|
|||||||
|
# ============================================================
|
||||||
|
# Ombre Brain Configuration / 配置文件
|
||||||
|
# Copy this file to config.yaml and modify as needed
|
||||||
|
# 复制此文件为 config.yaml 后按需修改
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
# --- Transport / 传输方式 ---
|
||||||
|
# stdio: local use (Claude Desktop, direct pipe)
|
||||||
|
# streamable-http: remote use (HTTP, tunnel/CDN/proxy friendly)
|
||||||
|
# stdio: 本地使用(Claude Desktop,直接管道通信)
|
||||||
|
# streamable-http: 远程使用(标准 HTTP,对隧道/CDN/代理友好)
|
||||||
|
transport: "stdio"
|
||||||
|
|
||||||
|
# --- Log level / 日志级别 ---
|
||||||
|
log_level: "INFO"
|
||||||
|
|
||||||
|
# --- Bucket storage path / 记忆桶存储路径 ---
|
||||||
|
# Point this to your Obsidian vault subdirectory, or any local folder
|
||||||
|
# 指向你的 Obsidian 仓库子目录,或任意本地文件夹
|
||||||
|
# Leave as-is to use the built-in ./buckets/ directory
|
||||||
|
# 保持默认则使用内置的 ./buckets/ 目录
|
||||||
|
# buckets_dir: "/path/to/your/Obsidian Vault/Ombre Brain"
|
||||||
|
|
||||||
|
# --- Merge threshold / 桶合并阈值 ---
|
||||||
|
# When storing a new memory, if similarity with an existing bucket exceeds
|
||||||
|
# this value (0-100), merge instead of creating a new one
|
||||||
|
# 存新记忆时,如果与已有桶的相似度超过此值(0-100),则合并而非新建
|
||||||
|
merge_threshold: 75
|
||||||
|
|
||||||
|
# --- Dehydration API / 脱水压缩 API 配置 ---
|
||||||
|
# Uses a cheap LLM for intelligent compression; auto-degrades to local
|
||||||
|
# keyword extraction if API is unavailable
|
||||||
|
# 用廉价 LLM 做智能压缩,API 不可用时自动降级到本地关键词提取
|
||||||
|
dehydration:
|
||||||
|
# Supports any OpenAI-compatible API: DeepSeek / Ollama / LM Studio / vLLM / Gemini etc.
|
||||||
|
# 支持所有 OpenAI 兼容 API:DeepSeek / Ollama / LM Studio / vLLM / Gemini 等
|
||||||
|
model: "deepseek-chat"
|
||||||
|
base_url: "https://api.deepseek.com/v1"
|
||||||
|
# Common base_url examples / 常见 base_url 示例:
|
||||||
|
# DeepSeek: https://api.deepseek.com/v1
|
||||||
|
# SiliconFlow: https://api.siliconflow.cn/v1
|
||||||
|
# Ollama: http://localhost:11434/v1
|
||||||
|
# LM Studio: http://localhost:1234/v1
|
||||||
|
# vLLM: http://localhost:8000/v1
|
||||||
|
# Gemini: https://generativelanguage.googleapis.com/v1beta/openai
|
||||||
|
# api_key: "" # ⚠️ Use env var OMBRE_API_KEY instead / 请使用环境变量 OMBRE_API_KEY
|
||||||
|
max_tokens: 1024
|
||||||
|
temperature: 0.1
|
||||||
|
|
||||||
|
# --- Decay parameters / 记忆衰减参数 ---
|
||||||
|
# Simulates Ebbinghaus forgetting curve, auto-archives inactive memories
|
||||||
|
# 模拟艾宾浩斯遗忘曲线,自动归档不活跃的记忆
|
||||||
|
decay:
|
||||||
|
lambda: 0.05 # Decay rate / 衰减速率(越大遗忘越快)
|
||||||
|
threshold: 0.3 # Archive threshold / 归档阈值
|
||||||
|
check_interval_hours: 24 # Check interval (hours) / 衰减检查间隔(小时)
|
||||||
|
emotion_weights:
|
||||||
|
base: 1.0 # Base weight / 基础权重
|
||||||
|
arousal_boost: 0.8 # Arousal boost coefficient / 唤醒度加成系数
|
||||||
|
|
||||||
|
# --- Scoring weights / 检索权重参数 ---
|
||||||
|
# total = topic(×4) + emotion(×2) + time(×1.5) + importance(×1)
|
||||||
|
scoring_weights:
|
||||||
|
topic_relevance: 4.0
|
||||||
|
emotion_resonance: 2.0
|
||||||
|
time_proximity: 1.5
|
||||||
|
importance: 1.0
|
||||||
|
|
||||||
|
# --- Fuzzy matching / 模糊匹配参数 ---
|
||||||
|
matching:
|
||||||
|
fuzzy_threshold: 50 # Minimum match score (0-100) / 最低匹配分数
|
||||||
|
max_results: 5 # Max results per search / 单次搜索最多返回条数
|
||||||
|
|
||||||
|
# --- Obsidian wikilinks / Obsidian 双链自动注入 ---
|
||||||
|
wikilink:
|
||||||
|
enabled: true
|
||||||
|
use_tags: false
|
||||||
|
use_domain: true
|
||||||
|
use_auto_keywords: true
|
||||||
|
auto_top_k: 8
|
||||||
|
min_keyword_len: 2
|
||||||
|
exclude_keywords: []
|
||||||
279
decay_engine.py
Normal file
279
decay_engine.py
Normal file
@@ -0,0 +1,279 @@
|
|||||||
|
# ============================================================
|
||||||
|
# Module: Memory Decay Engine (decay_engine.py)
|
||||||
|
# 模块:记忆衰减引擎
|
||||||
|
#
|
||||||
|
# Simulates human forgetting curve; auto-decays inactive memories and archives them.
|
||||||
|
# 模拟人类遗忘曲线,自动衰减不活跃记忆并归档。
|
||||||
|
#
|
||||||
|
# Core formula (improved Ebbinghaus + emotion coordinates):
|
||||||
|
# 核心公式(改进版艾宾浩斯遗忘曲线 + 情感坐标):
|
||||||
|
# Score = Importance × (activation_count^0.3) × e^(-λ×days) × emotion_weight
|
||||||
|
#
|
||||||
|
# Emotion weight (continuous coordinate, not discrete labels):
|
||||||
|
# 情感权重(基于连续坐标而非离散列举):
|
||||||
|
# emotion_weight = base + (arousal × arousal_boost)
|
||||||
|
# Higher arousal → higher emotion weight → slower decay
|
||||||
|
# 唤醒度越高 → 情感权重越大 → 记忆衰减越慢
|
||||||
|
#
|
||||||
|
# Depended on by: server.py
|
||||||
|
# 被谁依赖:server.py
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
import math
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
logger = logging.getLogger("ombre_brain.decay")
|
||||||
|
|
||||||
|
|
||||||
|
class DecayEngine:
|
||||||
|
"""
|
||||||
|
Memory decay engine — periodically scans all dynamic buckets,
|
||||||
|
calculates decay scores, auto-archives low-activity buckets
|
||||||
|
to simulate natural forgetting.
|
||||||
|
记忆衰减引擎 —— 定期扫描所有动态桶,
|
||||||
|
计算衰减得分,将低活跃桶自动归档,模拟自然遗忘。
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, config: dict, bucket_mgr):
|
||||||
|
# --- Load decay parameters / 加载衰减参数 ---
|
||||||
|
decay_cfg = config.get("decay", {})
|
||||||
|
self.decay_lambda = decay_cfg.get("lambda", 0.05)
|
||||||
|
self.threshold = decay_cfg.get("threshold", 0.3)
|
||||||
|
self.check_interval = decay_cfg.get("check_interval_hours", 24)
|
||||||
|
|
||||||
|
# --- Emotion weight params (continuous arousal coordinate) ---
|
||||||
|
# --- 情感权重参数(基于连续 arousal 坐标)---
|
||||||
|
emotion_cfg = decay_cfg.get("emotion_weights", {})
|
||||||
|
self.emotion_base = emotion_cfg.get("base", 1.0)
|
||||||
|
self.arousal_boost = emotion_cfg.get("arousal_boost", 0.8)
|
||||||
|
|
||||||
|
self.bucket_mgr = bucket_mgr
|
||||||
|
|
||||||
|
# --- Background task control / 后台任务控制 ---
|
||||||
|
self._task: asyncio.Task | None = None
|
||||||
|
self._running = False
|
||||||
|
|
||||||
|
@property
|
||||||
|
def is_running(self) -> bool:
|
||||||
|
"""Whether the decay engine is running in the background.
|
||||||
|
衰减引擎是否正在后台运行。"""
|
||||||
|
return self._running
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Core: calculate decay score for a single bucket
|
||||||
|
# 核心:计算单个桶的衰减得分
|
||||||
|
#
|
||||||
|
# Higher score = more vivid memory; below threshold → archive
|
||||||
|
# 得分越高 = 记忆越鲜活,低于阈值则归档
|
||||||
|
# Permanent buckets never decay / 固化桶永远不衰减
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Time weight: 0-1d→1.0, day2→0.9, then ~10%/day, floor 0.3
|
||||||
|
# 时间系数:0-1天=1.0,第2天=0.9,之后每天约降10%,7天后稳定在0.3
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
@staticmethod
|
||||||
|
def _calc_time_weight(days_since: float) -> float:
|
||||||
|
"""
|
||||||
|
Piecewise time weight multiplier (multiplies base_score).
|
||||||
|
分段式时间权重系数,作为 final_score 的乘数。
|
||||||
|
"""
|
||||||
|
if days_since <= 1.0:
|
||||||
|
return 1.0
|
||||||
|
elif days_since <= 2.0:
|
||||||
|
# Linear interpolation: 1.0→0.9 over [1,2]
|
||||||
|
return 1.0 - 0.1 * (days_since - 1.0)
|
||||||
|
else:
|
||||||
|
# Exponential decay from 0.9, floor at 0.3
|
||||||
|
# k = ln(3)/5 ≈ 0.2197 so that at day 7 (5 days past day 2) → 0.3
|
||||||
|
raw = 0.9 * math.exp(-0.2197 * (days_since - 2.0))
|
||||||
|
return max(0.3, raw)
|
||||||
|
|
||||||
|
def calculate_score(self, metadata: dict) -> float:
|
||||||
|
"""
|
||||||
|
Calculate current activity score for a memory bucket.
|
||||||
|
计算一个记忆桶的当前活跃度得分。
|
||||||
|
|
||||||
|
Formula: final_score = time_weight × base_score
|
||||||
|
base_score = Importance × (act_count^0.3) × e^(-λ×days) × (base + arousal×boost)
|
||||||
|
time_weight is the outer multiplier, takes priority over emotion factors.
|
||||||
|
"""
|
||||||
|
if not isinstance(metadata, dict):
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
# --- Pinned/protected buckets: never decay, importance locked to 10 ---
|
||||||
|
# --- 固化桶(pinned/protected):永不衰减,importance 锁定为 10 ---
|
||||||
|
if metadata.get("pinned") or metadata.get("protected"):
|
||||||
|
return 999.0
|
||||||
|
|
||||||
|
# --- Permanent buckets never decay / 固化桶永不衰减 ---
|
||||||
|
if metadata.get("type") == "permanent":
|
||||||
|
return 999.0
|
||||||
|
|
||||||
|
importance = max(1, min(10, int(metadata.get("importance", 5))))
|
||||||
|
activation_count = max(1, int(metadata.get("activation_count", 1)))
|
||||||
|
|
||||||
|
# --- Days since last activation / 距离上次激活过了多少天 ---
|
||||||
|
last_active_str = metadata.get("last_active", metadata.get("created", ""))
|
||||||
|
try:
|
||||||
|
last_active = datetime.fromisoformat(str(last_active_str))
|
||||||
|
days_since = max(0.0, (datetime.now() - last_active).total_seconds() / 86400)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
days_since = 30 # Parse failure → assume 30 days / 解析失败假设已过 30 天
|
||||||
|
|
||||||
|
# --- Emotion weight: continuous arousal coordinate ---
|
||||||
|
# --- 情感权重:基于连续 arousal 坐标计算 ---
|
||||||
|
# Higher arousal → stronger emotion → higher weight → slower decay
|
||||||
|
# arousal 越高 → 情感越强烈 → 权重越大 → 衰减越慢
|
||||||
|
try:
|
||||||
|
arousal = max(0.0, min(1.0, float(metadata.get("arousal", 0.3))))
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
arousal = 0.3
|
||||||
|
emotion_weight = self.emotion_base + arousal * self.arousal_boost
|
||||||
|
|
||||||
|
# --- Time weight (outer multiplier, highest priority) ---
|
||||||
|
# --- 时间权重(外层乘数,优先级最高)---
|
||||||
|
time_weight = self._calc_time_weight(days_since)
|
||||||
|
|
||||||
|
# --- Base score = Importance × act_count^0.3 × e^(-λ×days) × emotion ---
|
||||||
|
# --- 基础得分 ---
|
||||||
|
base_score = (
|
||||||
|
importance
|
||||||
|
* (activation_count ** 0.3)
|
||||||
|
* math.exp(-self.decay_lambda * days_since)
|
||||||
|
* emotion_weight
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- final_score = time_weight × base_score ---
|
||||||
|
score = time_weight * base_score
|
||||||
|
|
||||||
|
# --- Weight pool modifiers / 权重池修正因子 ---
|
||||||
|
# Resolved events drop to 5%, sink to bottom awaiting keyword reactivation
|
||||||
|
# 已解决的事件权重骤降到 5%,沉底等待关键词激活
|
||||||
|
resolved_factor = 0.05 if metadata.get("resolved", False) else 1.0
|
||||||
|
# High-arousal unresolved buckets get urgency boost for priority surfacing
|
||||||
|
# 高唤醒未解决桶额外加成,优先浮现
|
||||||
|
urgency_boost = 1.5 if (arousal > 0.7 and not metadata.get("resolved", False)) else 1.0
|
||||||
|
|
||||||
|
return round(score * resolved_factor * urgency_boost, 4)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Execute one decay cycle
|
||||||
|
# 执行一轮衰减周期
|
||||||
|
# Scan all dynamic buckets → score → archive those below threshold
|
||||||
|
# 扫描所有动态桶 → 算分 → 低于阈值的归档
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
async def run_decay_cycle(self) -> dict:
|
||||||
|
"""
|
||||||
|
Execute one decay cycle: iterate dynamic buckets, archive those
|
||||||
|
scoring below threshold.
|
||||||
|
执行一轮衰减:遍历动态桶,归档得分低于阈值的桶。
|
||||||
|
|
||||||
|
Returns stats: {"checked": N, "archived": N, "lowest_score": X}
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
buckets = await self.bucket_mgr.list_all(include_archive=False)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to list buckets for decay / 衰减周期列桶失败: {e}")
|
||||||
|
return {"checked": 0, "archived": 0, "lowest_score": 0, "error": str(e)}
|
||||||
|
|
||||||
|
checked = 0
|
||||||
|
archived = 0
|
||||||
|
lowest_score = float("inf")
|
||||||
|
|
||||||
|
for bucket in buckets:
|
||||||
|
meta = bucket.get("metadata", {})
|
||||||
|
|
||||||
|
# Skip permanent / pinned / protected buckets
|
||||||
|
# 跳过固化桶和钉选/保护桶
|
||||||
|
if meta.get("type") == "permanent" or meta.get("pinned") or meta.get("protected"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
checked += 1
|
||||||
|
try:
|
||||||
|
score = self.calculate_score(meta)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(
|
||||||
|
f"Score calculation failed for {bucket.get('id', '?')} / "
|
||||||
|
f"计算得分失败: {e}"
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
lowest_score = min(lowest_score, score)
|
||||||
|
|
||||||
|
# --- Below threshold → archive (simulate forgetting) ---
|
||||||
|
# --- 低于阈值 → 归档(模拟遗忘)---
|
||||||
|
if score < self.threshold:
|
||||||
|
try:
|
||||||
|
success = await self.bucket_mgr.archive(bucket["id"])
|
||||||
|
if success:
|
||||||
|
archived += 1
|
||||||
|
logger.info(
|
||||||
|
f"Decay archived / 衰减归档: "
|
||||||
|
f"{meta.get('name', bucket['id'])} "
|
||||||
|
f"(score={score:.4f}, threshold={self.threshold})"
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(
|
||||||
|
f"Archive failed for {bucket.get('id', '?')} / "
|
||||||
|
f"归档失败: {e}"
|
||||||
|
)
|
||||||
|
|
||||||
|
result = {
|
||||||
|
"checked": checked,
|
||||||
|
"archived": archived,
|
||||||
|
"lowest_score": lowest_score if checked > 0 else 0,
|
||||||
|
}
|
||||||
|
logger.info(f"Decay cycle complete / 衰减周期完成: {result}")
|
||||||
|
return result
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Background decay task management
|
||||||
|
# 后台衰减任务管理
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
async def ensure_started(self) -> None:
|
||||||
|
"""
|
||||||
|
Ensure the decay engine is started (lazy init on first call).
|
||||||
|
确保衰减引擎已启动(懒加载,首次调用时启动)。
|
||||||
|
"""
|
||||||
|
if not self._running:
|
||||||
|
await self.start()
|
||||||
|
|
||||||
|
async def start(self) -> None:
|
||||||
|
"""Start the background decay loop.
|
||||||
|
启动后台衰减循环。"""
|
||||||
|
if self._running:
|
||||||
|
return
|
||||||
|
self._running = True
|
||||||
|
self._task = asyncio.create_task(self._background_loop())
|
||||||
|
logger.info(
|
||||||
|
f"Decay engine started, interval: {self.check_interval}h / "
|
||||||
|
f"衰减引擎已启动,检查间隔: {self.check_interval} 小时"
|
||||||
|
)
|
||||||
|
|
||||||
|
async def stop(self) -> None:
|
||||||
|
"""Stop the background decay loop.
|
||||||
|
停止后台衰减循环。"""
|
||||||
|
self._running = False
|
||||||
|
if self._task:
|
||||||
|
self._task.cancel()
|
||||||
|
try:
|
||||||
|
await self._task
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
pass
|
||||||
|
logger.info("Decay engine stopped / 衰减引擎已停止")
|
||||||
|
|
||||||
|
async def _background_loop(self) -> None:
|
||||||
|
"""Background loop: run decay → sleep → repeat.
|
||||||
|
后台循环体:执行衰减 → 睡眠 → 重复。"""
|
||||||
|
while self._running:
|
||||||
|
try:
|
||||||
|
await self.run_decay_cycle()
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Decay cycle error / 衰减周期出错: {e}")
|
||||||
|
# --- Wait for next cycle / 等待下一个周期 ---
|
||||||
|
try:
|
||||||
|
await asyncio.sleep(self.check_interval * 3600)
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
break
|
||||||
779
dehydrator.py
Normal file
779
dehydrator.py
Normal file
@@ -0,0 +1,779 @@
|
|||||||
|
# ============================================================
|
||||||
|
# Module: Dehydration & Auto-tagging (dehydrator.py)
|
||||||
|
# 模块:数据脱水压缩 + 自动打标
|
||||||
|
#
|
||||||
|
# Capabilities:
|
||||||
|
# 能力:
|
||||||
|
# 1. Dehydrate: compress memory content into high-density summaries (save tokens)
|
||||||
|
# 脱水:将记忆桶的原始内容压缩为高密度摘要,省 token
|
||||||
|
# 2. Merge: blend old and new content, keeping bucket size constant
|
||||||
|
# 合并:揉合新旧内容,控制桶体积恒定
|
||||||
|
# 3. Analyze: auto-analyze content for domain/emotion/tags
|
||||||
|
# 打标:自动分析内容,输出主题域/情感坐标/标签
|
||||||
|
#
|
||||||
|
# Operating modes:
|
||||||
|
# 工作模式:
|
||||||
|
# - Primary: OpenAI-compatible API (DeepSeek/Ollama/LM Studio/vLLM/Gemini etc.)
|
||||||
|
# 主路径:通过 OpenAI 兼容客户端调用 LLM API
|
||||||
|
# - Fallback: local keyword extraction when API is unavailable
|
||||||
|
# 备用路径:API 不可用时用本地关键词提取
|
||||||
|
#
|
||||||
|
# Depended on by: server.py
|
||||||
|
# 被谁依赖:server.py
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
|
||||||
|
import re
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from collections import Counter
|
||||||
|
import jieba
|
||||||
|
|
||||||
|
from openai import AsyncOpenAI
|
||||||
|
|
||||||
|
from utils import count_tokens_approx
|
||||||
|
|
||||||
|
logger = logging.getLogger("ombre_brain.dehydrator")
|
||||||
|
|
||||||
|
|
||||||
|
# --- Dehydration prompt: instructs cheap LLM to compress information ---
|
||||||
|
# --- 脱水提示词:指导廉价 LLM 压缩信息 ---
|
||||||
|
DEHYDRATE_PROMPT = """你是一个信息压缩专家。请将以下内容脱水为紧凑摘要。
|
||||||
|
|
||||||
|
压缩规则:
|
||||||
|
1. 提取所有核心事实,去除冗余修饰和重复
|
||||||
|
2. 保留最新的情绪状态和态度
|
||||||
|
3. 保留所有待办/未完成事项
|
||||||
|
4. 关键数字、日期、名称必须保留
|
||||||
|
5. 目标压缩率 > 70%
|
||||||
|
|
||||||
|
输出格式(纯 JSON,无其他内容):
|
||||||
|
{
|
||||||
|
"core_facts": ["事实1", "事实2"],
|
||||||
|
"emotion_state": "当前情绪关键词",
|
||||||
|
"todos": ["待办1", "待办2"],
|
||||||
|
"keywords": ["关键词1", "关键词2"],
|
||||||
|
"summary": "50字以内的核心总结"
|
||||||
|
}"""
|
||||||
|
|
||||||
|
|
||||||
|
# --- Diary digest prompt: split daily notes into independent memory entries ---
|
||||||
|
# --- 日记整理提示词:把一大段日常拆分成多个独立记忆条目 ---
|
||||||
|
DIGEST_PROMPT = """你是一个日记整理专家。用户会发送一段包含今天各种事情的文本(可能很杂乱),请你将其拆分成多个独立的记忆条目。
|
||||||
|
|
||||||
|
整理规则:
|
||||||
|
1. 每个条目应该是一个独立的主题/事件(不要混在一起)
|
||||||
|
2. 为每个条目自动分析元数据
|
||||||
|
3. 去除无意义的口水话和重复信息,保留核心内容
|
||||||
|
4. 同一主题的零散信息应合并为一个条目
|
||||||
|
5. 如果有待办事项,单独提取为一个条目
|
||||||
|
|
||||||
|
输出格式(纯 JSON 数组,无其他内容):
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"name": "条目标题(10字以内)",
|
||||||
|
"content": "整理后的内容",
|
||||||
|
"domain": ["主题域1"],
|
||||||
|
"valence": 0.7,
|
||||||
|
"arousal": 0.4,
|
||||||
|
"tags": ["标签1", "标签2"],
|
||||||
|
"importance": 5
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
主题域可选(选最精确的 1~2 个,只选真正相关的):
|
||||||
|
日常: ["饮食", "穿搭", "出行", "居家", "购物"]
|
||||||
|
人际: ["家庭", "恋爱", "友谊", "社交"]
|
||||||
|
成长: ["工作", "学习", "考试", "求职"]
|
||||||
|
身心: ["健康", "心理", "睡眠", "运动"]
|
||||||
|
兴趣: ["游戏", "影视", "音乐", "阅读", "创作", "手工"]
|
||||||
|
数字: ["编程", "AI", "硬件", "网络"]
|
||||||
|
事务: ["财务", "计划", "待办"]
|
||||||
|
内心: ["情绪", "回忆", "梦境", "自省"]
|
||||||
|
importance: 1-10,根据内容重要程度判断
|
||||||
|
valence: 0~1(0=消极, 0.5=中性, 1=积极)
|
||||||
|
arousal: 0~1(0=平静, 0.5=普通, 1=激动)"""
|
||||||
|
|
||||||
|
|
||||||
|
# --- Merge prompt: instruct LLM to blend old and new memories ---
|
||||||
|
# --- 合并提示词:指导 LLM 揉合新旧记忆 ---
|
||||||
|
MERGE_PROMPT = """你是一个信息合并专家。请将旧记忆与新内容合并为一份统一的简洁记录。
|
||||||
|
|
||||||
|
合并规则:
|
||||||
|
1. 新内容与旧记忆冲突时,以新内容为准
|
||||||
|
2. 去除重复信息
|
||||||
|
3. 保留所有重要事实
|
||||||
|
4. 总长度尽量不超过旧记忆的 120%
|
||||||
|
|
||||||
|
直接输出合并后的文本,不要加额外说明。"""
|
||||||
|
|
||||||
|
|
||||||
|
# --- Auto-tagging prompt: analyze content for domain and emotion coords ---
|
||||||
|
# --- 自动打标提示词:分析内容的主题域和情感坐标 ---
|
||||||
|
ANALYZE_PROMPT = """你是一个内容分析器。请分析以下文本,输出结构化的元数据。
|
||||||
|
|
||||||
|
分析规则:
|
||||||
|
1. domain(主题域):选最精确的 1~2 个,只选真正相关的
|
||||||
|
日常: ["饮食", "穿搭", "出行", "居家", "购物"]
|
||||||
|
人际: ["家庭", "恋爱", "友谊", "社交"]
|
||||||
|
成长: ["工作", "学习", "考试", "求职"]
|
||||||
|
身心: ["健康", "心理", "睡眠", "运动"]
|
||||||
|
兴趣: ["游戏", "影视", "音乐", "阅读", "创作", "手工"]
|
||||||
|
数字: ["编程", "AI", "硬件", "网络"]
|
||||||
|
事务: ["财务", "计划", "待办"]
|
||||||
|
内心: ["情绪", "回忆", "梦境", "自省"]
|
||||||
|
2. valence(情感效价):0.0~1.0,0=极度消极 → 0.5=中性 → 1.0=极度积极
|
||||||
|
3. arousal(情感唤醒度):0.0~1.0,0=非常平静 → 0.5=普通 → 1.0=非常激动
|
||||||
|
4. tags(关键词标签):3~5 个最能概括内容的关键词
|
||||||
|
5. suggested_name(建议桶名):10字以内的简短标题
|
||||||
|
|
||||||
|
输出格式(纯 JSON,无其他内容):
|
||||||
|
{
|
||||||
|
"domain": ["主题域1", "主题域2"],
|
||||||
|
"valence": 0.7,
|
||||||
|
"arousal": 0.4,
|
||||||
|
"tags": ["标签1", "标签2", "标签3"],
|
||||||
|
"suggested_name": "简短标题"
|
||||||
|
}"""
|
||||||
|
|
||||||
|
|
||||||
|
class Dehydrator:
|
||||||
|
"""
|
||||||
|
Data dehydrator + content analyzer.
|
||||||
|
Three capabilities: dehydration / merge / auto-tagging (domain + emotion).
|
||||||
|
Prefers API (better quality); auto-degrades to local (guaranteed availability).
|
||||||
|
数据脱水器 + 内容分析器。
|
||||||
|
三大能力:脱水压缩 / 新旧合并 / 自动打标。
|
||||||
|
优先走 API,API 挂了自动降级到本地。
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, config: dict):
|
||||||
|
# --- Read dehydration API config / 读取脱水 API 配置 ---
|
||||||
|
dehy_cfg = config.get("dehydration", {})
|
||||||
|
self.api_key = dehy_cfg.get("api_key", "")
|
||||||
|
self.model = dehy_cfg.get("model", "deepseek-chat")
|
||||||
|
self.base_url = dehy_cfg.get("base_url", "https://api.deepseek.com/v1")
|
||||||
|
self.max_tokens = dehy_cfg.get("max_tokens", 1024)
|
||||||
|
self.temperature = dehy_cfg.get("temperature", 0.1)
|
||||||
|
|
||||||
|
# --- API availability / 是否有可用的 API ---
|
||||||
|
self.api_available = bool(self.api_key)
|
||||||
|
|
||||||
|
# --- Initialize OpenAI-compatible client ---
|
||||||
|
# --- 初始化 OpenAI 兼容客户端 ---
|
||||||
|
# Supports any OpenAI-format API: DeepSeek / Ollama / LM Studio / vLLM / Gemini etc.
|
||||||
|
# User only needs to set base_url in config.yaml
|
||||||
|
if self.api_available:
|
||||||
|
self.client = AsyncOpenAI(
|
||||||
|
api_key=self.api_key,
|
||||||
|
base_url=self.base_url,
|
||||||
|
timeout=60.0,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
self.client = None
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Dehydrate: compress raw content into concise summary
|
||||||
|
# 脱水:将原始内容压缩为精简摘要
|
||||||
|
# Try API first, fallback to local
|
||||||
|
# 先尝试 API,失败则回退本地
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
async def dehydrate(self, content: str, metadata: dict = None) -> str:
|
||||||
|
"""
|
||||||
|
Dehydrate/compress memory content.
|
||||||
|
Returns formatted summary string ready for Claude context injection.
|
||||||
|
对记忆内容做脱水压缩。
|
||||||
|
返回格式化的摘要字符串,可直接注入 Claude 上下文。
|
||||||
|
"""
|
||||||
|
if not content or not content.strip():
|
||||||
|
return "(空记忆 / empty memory)"
|
||||||
|
|
||||||
|
# --- Content is short enough, no compression needed ---
|
||||||
|
# --- 内容已经很短,不需要压缩 ---
|
||||||
|
if count_tokens_approx(content) < 100:
|
||||||
|
return self._format_output(content, metadata)
|
||||||
|
|
||||||
|
# --- Try API compression first (best quality) ---
|
||||||
|
# --- 优先尝试 API 压缩 ---
|
||||||
|
if self.api_available:
|
||||||
|
try:
|
||||||
|
result = await self._api_dehydrate(content)
|
||||||
|
if result:
|
||||||
|
return self._format_output(result, metadata)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(
|
||||||
|
f"API dehydration failed, degrading to local / "
|
||||||
|
f"API 脱水失败,降级到本地压缩: {e}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- Local compression fallback (works without API) ---
|
||||||
|
# --- 本地压缩兜底 ---
|
||||||
|
result = self._local_dehydrate(content)
|
||||||
|
return self._format_output(result, metadata)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Merge: blend new content into existing bucket
|
||||||
|
# 合并:将新内容揉入已有桶,保持体积恒定
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
async def merge(self, old_content: str, new_content: str) -> str:
|
||||||
|
"""
|
||||||
|
Merge new content with old memory, preventing infinite bucket growth.
|
||||||
|
将新内容与旧记忆合并,避免桶无限膨胀。
|
||||||
|
"""
|
||||||
|
if not old_content and not new_content:
|
||||||
|
return ""
|
||||||
|
if not old_content:
|
||||||
|
return new_content or ""
|
||||||
|
if not new_content:
|
||||||
|
return old_content
|
||||||
|
|
||||||
|
# --- Try API merge first / 优先 API 合并 ---
|
||||||
|
if self.api_available:
|
||||||
|
try:
|
||||||
|
result = await self._api_merge(old_content, new_content)
|
||||||
|
if result:
|
||||||
|
return result
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(
|
||||||
|
f"API merge failed, degrading to local / "
|
||||||
|
f"API 合并失败,降级到本地合并: {e}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- Local merge fallback / 本地合并兜底 ---
|
||||||
|
return self._local_merge(old_content, new_content)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# API call: dehydration
|
||||||
|
# API 调用:脱水压缩
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
async def _api_dehydrate(self, content: str) -> str:
|
||||||
|
"""
|
||||||
|
Call LLM API for intelligent dehydration (via OpenAI-compatible client).
|
||||||
|
调用 LLM API 执行智能脱水。
|
||||||
|
"""
|
||||||
|
response = await self.client.chat.completions.create(
|
||||||
|
model=self.model,
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": DEHYDRATE_PROMPT},
|
||||||
|
{"role": "user", "content": content[:3000]},
|
||||||
|
],
|
||||||
|
max_tokens=self.max_tokens,
|
||||||
|
temperature=self.temperature,
|
||||||
|
)
|
||||||
|
if not response.choices:
|
||||||
|
return ""
|
||||||
|
return response.choices[0].message.content or ""
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# API call: merge
|
||||||
|
# API 调用:合并
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
async def _api_merge(self, old_content: str, new_content: str) -> str:
|
||||||
|
"""
|
||||||
|
Call LLM API for intelligent merge (via OpenAI-compatible client).
|
||||||
|
调用 LLM API 执行智能合并。
|
||||||
|
"""
|
||||||
|
user_msg = f"旧记忆:\n{old_content[:2000]}\n\n新内容:\n{new_content[:2000]}"
|
||||||
|
response = await self.client.chat.completions.create(
|
||||||
|
model=self.model,
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": MERGE_PROMPT},
|
||||||
|
{"role": "user", "content": user_msg},
|
||||||
|
],
|
||||||
|
max_tokens=self.max_tokens,
|
||||||
|
temperature=self.temperature,
|
||||||
|
)
|
||||||
|
if not response.choices:
|
||||||
|
return ""
|
||||||
|
return response.choices[0].message.content or ""
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Local dehydration (fallback when API is unavailable)
|
||||||
|
# 本地脱水(无 API 时的兜底方案)
|
||||||
|
# Keyword frequency + sentence position weighting
|
||||||
|
# 基于关键词频率 + 句子位置权重
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
def _local_dehydrate(self, content: str) -> str:
|
||||||
|
"""
|
||||||
|
Local keyword extraction + position-weighted simple compression.
|
||||||
|
本地关键词提取 + 位置加权的简单压缩。
|
||||||
|
"""
|
||||||
|
# --- Split into sentences / 分句 ---
|
||||||
|
sentences = re.split(r"[。!?\n.!?]+", content)
|
||||||
|
sentences = [s.strip() for s in sentences if len(s.strip()) > 5]
|
||||||
|
|
||||||
|
if not sentences:
|
||||||
|
return content[:200]
|
||||||
|
|
||||||
|
# --- Extract high-frequency keywords / 提取高频关键词 ---
|
||||||
|
keywords = self._extract_keywords(content)
|
||||||
|
|
||||||
|
# --- Score sentences: position weight + keyword hits ---
|
||||||
|
# --- 句子评分:开头结尾权重高 + 关键词命中加分 ---
|
||||||
|
scored = []
|
||||||
|
for i, sent in enumerate(sentences):
|
||||||
|
position_weight = 1.5 if i < 3 else (1.2 if i > len(sentences) - 3 else 1.0)
|
||||||
|
keyword_hits = sum(1 for kw in keywords if kw in sent)
|
||||||
|
score = position_weight * (1 + keyword_hits)
|
||||||
|
scored.append((score, sent))
|
||||||
|
|
||||||
|
scored.sort(key=lambda x: x[0], reverse=True)
|
||||||
|
|
||||||
|
# --- Top-8 sentences + keyword list / 取高分句 + 关键词列表 ---
|
||||||
|
selected = [s for _, s in scored[:8]]
|
||||||
|
summary = "。".join(selected)
|
||||||
|
keyword_str = ", ".join(keywords[:10])
|
||||||
|
|
||||||
|
return f"[摘要] {summary}\n[关键词] {keyword_str}"
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Local merge (simple concatenation + truncation)
|
||||||
|
# 本地合并(简单拼接 + 截断)
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
def _local_merge(self, old_content: str, new_content: str) -> str:
|
||||||
|
"""
|
||||||
|
Simple concatenation merge; truncates if too long.
|
||||||
|
简单拼接合并,超长时截断保留两端。
|
||||||
|
"""
|
||||||
|
merged = f"{old_content.strip()}\n\n--- 更新 ---\n{new_content.strip()}"
|
||||||
|
# Truncate if over 3000 chars / 超过 3000 字符则各取一半
|
||||||
|
if len(merged) > 3000:
|
||||||
|
half = 1400
|
||||||
|
merged = (
|
||||||
|
f"{old_content[:half].strip()}\n\n--- 更新 ---\n{new_content[:half].strip()}"
|
||||||
|
)
|
||||||
|
return merged
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Keyword extraction
|
||||||
|
# 关键词提取
|
||||||
|
# Chinese + English tokenization → stopword filter → frequency sort
|
||||||
|
# 中英文分词 + 停用词过滤 + 词频排序
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
def _extract_keywords(self, text: str) -> list[str]:
|
||||||
|
"""
|
||||||
|
Extract high-frequency keywords using jieba (Chinese + English mixed).
|
||||||
|
用 jieba 分词提取高频关键词。
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
words = jieba.lcut(text)
|
||||||
|
except Exception:
|
||||||
|
words = []
|
||||||
|
# English words / 英文单词
|
||||||
|
english_words = re.findall(r"[a-zA-Z]{3,}", text.lower())
|
||||||
|
words += english_words
|
||||||
|
|
||||||
|
# Stopwords / 停用词
|
||||||
|
stopwords = {
|
||||||
|
"的", "了", "在", "是", "我", "有", "和", "就", "不", "人",
|
||||||
|
"都", "一个", "上", "也", "很", "到", "说", "要", "去",
|
||||||
|
"你", "会", "着", "没有", "看", "好", "自己", "这", "他", "她",
|
||||||
|
"the", "and", "for", "are", "but", "not", "you", "all", "can",
|
||||||
|
"had", "her", "was", "one", "our", "out", "has", "have", "with",
|
||||||
|
"this", "that", "from", "they", "been", "said", "will", "each",
|
||||||
|
}
|
||||||
|
filtered = [
|
||||||
|
w for w in words
|
||||||
|
if w not in stopwords and len(w.strip()) > 1 and not re.match(r"^[0-9]+$", w)
|
||||||
|
]
|
||||||
|
counter = Counter(filtered)
|
||||||
|
return [word for word, _ in counter.most_common(15)]
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Output formatting
|
||||||
|
# 输出格式化
|
||||||
|
# Wraps dehydrated result with bucket name, tags, emotion coords
|
||||||
|
# 把脱水结果包装成带桶名、标签、情感坐标的可读文本
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
def _format_output(self, content: str, metadata: dict = None) -> str:
|
||||||
|
"""
|
||||||
|
Format dehydrated result into context-injectable text.
|
||||||
|
将脱水结果格式化为可注入上下文的文本。
|
||||||
|
"""
|
||||||
|
header = ""
|
||||||
|
if metadata and isinstance(metadata, dict):
|
||||||
|
name = metadata.get("name", "未命名")
|
||||||
|
tags = ", ".join(metadata.get("tags", []))
|
||||||
|
domains = ", ".join(metadata.get("domain", []))
|
||||||
|
try:
|
||||||
|
valence = float(metadata.get("valence", 0.5))
|
||||||
|
arousal = float(metadata.get("arousal", 0.3))
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
valence, arousal = 0.5, 0.3
|
||||||
|
header = f"📌 记忆桶: {name}"
|
||||||
|
if domains:
|
||||||
|
header += f" [主题:{domains}]"
|
||||||
|
if tags:
|
||||||
|
header += f" [标签:{tags}]"
|
||||||
|
header += f" [情感:V{valence:.1f}/A{arousal:.1f}]"
|
||||||
|
header += "\n"
|
||||||
|
return f"{header}{content}"
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Auto-tagging: analyze content for domain + emotion + tags
|
||||||
|
# 自动打标:分析内容,输出主题域 + 情感坐标 + 标签
|
||||||
|
# Called by server.py when storing new memories
|
||||||
|
# 存新记忆时由 server.py 调用
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
async def analyze(self, content: str) -> dict:
|
||||||
|
"""
|
||||||
|
Analyze content and return structured metadata.
|
||||||
|
分析内容,返回结构化元数据。
|
||||||
|
|
||||||
|
Returns: {"domain", "valence", "arousal", "tags", "suggested_name"}
|
||||||
|
"""
|
||||||
|
if not content or not content.strip():
|
||||||
|
return self._default_analysis()
|
||||||
|
|
||||||
|
# --- Try API first (best quality) / 优先走 API ---
|
||||||
|
if self.api_available:
|
||||||
|
try:
|
||||||
|
result = await self._api_analyze(content)
|
||||||
|
if result:
|
||||||
|
return result
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(
|
||||||
|
f"API tagging failed, degrading to local / "
|
||||||
|
f"API 打标失败,降级到本地分析: {e}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- Local analysis fallback / 本地分析兜底 ---
|
||||||
|
return self._local_analyze(content)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# API call: auto-tagging
|
||||||
|
# API 调用:自动打标
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
async def _api_analyze(self, content: str) -> dict:
|
||||||
|
"""
|
||||||
|
Call LLM API for content analysis / tagging.
|
||||||
|
调用 LLM API 执行内容分析打标。
|
||||||
|
"""
|
||||||
|
response = await self.client.chat.completions.create(
|
||||||
|
model=self.model,
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": ANALYZE_PROMPT},
|
||||||
|
{"role": "user", "content": content[:2000]},
|
||||||
|
],
|
||||||
|
max_tokens=256,
|
||||||
|
temperature=0.1,
|
||||||
|
)
|
||||||
|
if not response.choices:
|
||||||
|
return self._default_analysis()
|
||||||
|
raw = response.choices[0].message.content or ""
|
||||||
|
if not raw.strip():
|
||||||
|
return self._default_analysis()
|
||||||
|
return self._parse_analysis(raw)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Parse API JSON response with safety checks
|
||||||
|
# 解析 API 返回的 JSON,做安全校验
|
||||||
|
# Ensure valence/arousal in 0~1, domain/tags valid
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
def _parse_analysis(self, raw: str) -> dict:
|
||||||
|
"""
|
||||||
|
Parse and validate API tagging result.
|
||||||
|
解析并校验 API 返回的打标结果。
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Handle potential markdown code block wrapping
|
||||||
|
# 处理可能的 markdown 代码块包裹
|
||||||
|
cleaned = raw.strip()
|
||||||
|
if cleaned.startswith("```"):
|
||||||
|
cleaned = cleaned.split("\n", 1)[-1].rsplit("```", 1)[0]
|
||||||
|
result = json.loads(cleaned)
|
||||||
|
except (json.JSONDecodeError, IndexError, ValueError):
|
||||||
|
logger.warning(f"API tagging JSON parse failed / JSON 解析失败: {raw[:200]}")
|
||||||
|
return self._default_analysis()
|
||||||
|
|
||||||
|
if not isinstance(result, dict):
|
||||||
|
return self._default_analysis()
|
||||||
|
|
||||||
|
# --- Validate and clamp value ranges / 校验并钳制数值范围 ---
|
||||||
|
try:
|
||||||
|
valence = max(0.0, min(1.0, float(result.get("valence", 0.5))))
|
||||||
|
arousal = max(0.0, min(1.0, float(result.get("arousal", 0.3))))
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
valence, arousal = 0.5, 0.3
|
||||||
|
|
||||||
|
return {
|
||||||
|
"domain": result.get("domain", ["未分类"])[:3],
|
||||||
|
"valence": valence,
|
||||||
|
"arousal": arousal,
|
||||||
|
"tags": result.get("tags", [])[:5],
|
||||||
|
"suggested_name": str(result.get("suggested_name", ""))[:20],
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Local analysis (fallback when API is unavailable)
|
||||||
|
# 本地分析(无 API 时的兜底方案)
|
||||||
|
# Keyword matching + simple sentiment dictionary
|
||||||
|
# 基于关键词 + 简单情感词典匹配
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
def _local_analyze(self, content: str) -> dict:
|
||||||
|
"""
|
||||||
|
Local keyword + sentiment dictionary analysis.
|
||||||
|
本地关键词 + 情感词典的简单分析。
|
||||||
|
"""
|
||||||
|
keywords = self._extract_keywords(content)
|
||||||
|
text_lower = content.lower()
|
||||||
|
|
||||||
|
# --- Domain matching by keyword hits ---
|
||||||
|
# --- 主题域匹配:基于关键词命中 ---
|
||||||
|
domain_keywords = {
|
||||||
|
# Daily / 日常
|
||||||
|
"饮食": {"吃", "饭", "做饭", "外卖", "奶茶", "咖啡", "麻辣烫", "面包",
|
||||||
|
"超市", "零食", "水果", "牛奶", "食堂", "减肥", "节食"},
|
||||||
|
"出行": {"旅行", "出发", "航班", "酒店", "地铁", "打车", "高铁", "机票",
|
||||||
|
"景点", "签证", "护照"},
|
||||||
|
"居家": {"打扫", "洗衣", "搬家", "快递", "收纳", "装修", "租房"},
|
||||||
|
"购物": {"买", "下单", "到货", "退货", "优惠", "折扣", "代购"},
|
||||||
|
# Relationships / 人际
|
||||||
|
"家庭": {"爸", "妈", "父亲", "母亲", "家人", "弟弟", "姐姐", "哥哥",
|
||||||
|
"奶奶", "爷爷", "亲戚", "家里"},
|
||||||
|
"恋爱": {"爱人", "男友", "女友", "恋", "约会", "接吻", "分手",
|
||||||
|
"暧昧", "在一起", "想你", "同床"},
|
||||||
|
"友谊": {"朋友", "闺蜜", "兄弟", "聚", "约饭", "聊天", "群"},
|
||||||
|
"社交": {"见面", "被人", "圈子", "消息", "评论", "点赞"},
|
||||||
|
# Growth / 成长
|
||||||
|
"工作": {"会议", "项目", "客户", "汇报", "deadline", "同事",
|
||||||
|
"老板", "薪资", "合同", "需求", "加班", "实习"},
|
||||||
|
"学习": {"课", "考试", "论文", "笔记", "作业", "教授", "讲座",
|
||||||
|
"分数", "选课", "学分"},
|
||||||
|
"求职": {"面试", "简历", "offer", "投递", "薪资", "岗位"},
|
||||||
|
# Health / 身心
|
||||||
|
"健康": {"医院", "复查", "吃药", "抽血", "手术", "心率",
|
||||||
|
"病", "症状", "指标", "体检", "月经"},
|
||||||
|
"心理": {"焦虑", "抑郁", "恐慌", "创伤", "人格", "咨询",
|
||||||
|
"安全感", "自残", "崩溃", "压力"},
|
||||||
|
"睡眠": {"睡", "失眠", "噩梦", "清醒", "熬夜", "早起", "午觉"},
|
||||||
|
# Interests / 兴趣
|
||||||
|
"游戏": {"游戏", "steam", "极乐迪斯科", "存档", "通关", "角色",
|
||||||
|
"mod", "DLC", "剧情"},
|
||||||
|
"影视": {"电影", "番剧", "动漫", "剧", "综艺", "追番", "上映"},
|
||||||
|
"音乐": {"歌", "音乐", "专辑", "live", "演唱会", "耳机"},
|
||||||
|
"阅读": {"书", "小说", "读完", "kindle", "连载", "漫画"},
|
||||||
|
"创作": {"写", "画", "预设", "脚本", "视频", "剪辑", "P图",
|
||||||
|
"SillyTavern", "插件", "正则", "人设"},
|
||||||
|
# Digital / 数字
|
||||||
|
"编程": {"代码", "code", "python", "bug", "api", "docker",
|
||||||
|
"git", "调试", "框架", "部署", "开发", "server"},
|
||||||
|
"AI": {"模型", "GPT", "Claude", "gemini", "LLM", "token",
|
||||||
|
"prompt", "LoRA", "微调", "推理", "MCP"},
|
||||||
|
"网络": {"VPN", "梯子", "代理", "域名", "隧道", "服务器",
|
||||||
|
"cloudflare", "tunnel", "反代"},
|
||||||
|
# Affairs / 事务
|
||||||
|
"财务": {"钱", "转账", "工资", "花了", "欠", "还款", "借",
|
||||||
|
"账单", "余额", "预算", "黄金"},
|
||||||
|
"计划": {"计划", "目标", "deadline", "日程", "清单", "安排"},
|
||||||
|
"待办": {"要做", "记得", "别忘", "提醒", "下次"},
|
||||||
|
# Inner / 内心
|
||||||
|
"情绪": {"开心", "难过", "生气", "哭", "泪", "孤独", "幸福",
|
||||||
|
"伤心", "烦", "委屈", "感动", "温柔"},
|
||||||
|
"回忆": {"以前", "小时候", "那时", "怀念", "曾经", "记得"},
|
||||||
|
"梦境": {"梦", "梦到", "梦见", "噩梦", "清醒梦"},
|
||||||
|
"自省": {"反思", "觉得自己", "问自己", "意识到", "明白了"},
|
||||||
|
}
|
||||||
|
|
||||||
|
matched_domains = []
|
||||||
|
for domain, kws in domain_keywords.items():
|
||||||
|
hits = sum(1 for kw in kws if kw in text_lower)
|
||||||
|
if hits >= 2:
|
||||||
|
matched_domains.append((domain, hits))
|
||||||
|
matched_domains.sort(key=lambda x: x[1], reverse=True)
|
||||||
|
domains = [d for d, _ in matched_domains[:3]] or ["未分类"]
|
||||||
|
|
||||||
|
# --- Emotion estimation via simple sentiment dictionary ---
|
||||||
|
# --- 情感坐标估算:基于简单情感词典 ---
|
||||||
|
positive_words = {"开心", "高兴", "喜欢", "哈哈", "棒", "赞", "爱",
|
||||||
|
"幸福", "成功", "感动", "兴奋", "棒极了",
|
||||||
|
"happy", "love", "great", "awesome", "nice"}
|
||||||
|
negative_words = {"难过", "伤心", "生气", "焦虑", "害怕", "无聊",
|
||||||
|
"烦", "累", "失望", "崩溃", "愤怒", "痛苦",
|
||||||
|
"sad", "angry", "hate", "tired", "afraid"}
|
||||||
|
intense_words = {"太", "非常", "极", "超", "特别", "十分", "炸",
|
||||||
|
"崩溃", "激动", "愤怒", "狂喜", "very", "so", "extremely"}
|
||||||
|
|
||||||
|
pos_count = sum(1 for w in positive_words if w in text_lower)
|
||||||
|
neg_count = sum(1 for w in negative_words if w in text_lower)
|
||||||
|
intense_count = sum(1 for w in intense_words if w in text_lower)
|
||||||
|
|
||||||
|
# valence: positive/negative emotion balance
|
||||||
|
if pos_count + neg_count > 0:
|
||||||
|
valence = 0.5 + 0.4 * (pos_count - neg_count) / (pos_count + neg_count)
|
||||||
|
else:
|
||||||
|
valence = 0.5
|
||||||
|
|
||||||
|
# arousal: intensity level
|
||||||
|
arousal = min(1.0, 0.3 + intense_count * 0.15 + (pos_count + neg_count) * 0.08)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"domain": domains,
|
||||||
|
"valence": round(max(0.0, min(1.0, valence)), 2),
|
||||||
|
"arousal": round(max(0.0, min(1.0, arousal)), 2),
|
||||||
|
"tags": keywords[:5],
|
||||||
|
"suggested_name": "",
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Default analysis result (empty content or total failure)
|
||||||
|
# 默认分析结果(内容为空或完全失败时用)
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
def _default_analysis(self) -> dict:
|
||||||
|
"""
|
||||||
|
Return default neutral analysis result.
|
||||||
|
返回默认的中性分析结果。
|
||||||
|
"""
|
||||||
|
return {
|
||||||
|
"domain": ["未分类"],
|
||||||
|
"valence": 0.5,
|
||||||
|
"arousal": 0.3,
|
||||||
|
"tags": [],
|
||||||
|
"suggested_name": "",
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Diary digest: split daily notes into independent memory entries
|
||||||
|
# 日记整理:把一大段日常拆分成多个独立记忆条目
|
||||||
|
# For the "grow" tool — "dump a day's content and it gets organized"
|
||||||
|
# 给 grow 工具用,"一天结束发一坨内容"靠这个
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
async def digest(self, content: str) -> list[dict]:
|
||||||
|
"""
|
||||||
|
Split a large chunk of daily content into independent memory entries.
|
||||||
|
将一大段日常内容拆分成多个独立记忆条目。
|
||||||
|
|
||||||
|
Returns: [{"name", "content", "domain", "valence", "arousal", "tags", "importance"}, ...]
|
||||||
|
"""
|
||||||
|
if not content or not content.strip():
|
||||||
|
return []
|
||||||
|
|
||||||
|
# --- Try API digest first (best quality, understands semantic splits) ---
|
||||||
|
# --- 优先 API 整理 ---
|
||||||
|
if self.api_available:
|
||||||
|
try:
|
||||||
|
result = await self._api_digest(content)
|
||||||
|
if result:
|
||||||
|
return result
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(
|
||||||
|
f"API diary digest failed, degrading to local / "
|
||||||
|
f"API 日记整理失败,降级到本地拆分: {e}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- Local split fallback / 本地拆分兜底 ---
|
||||||
|
return await self._local_digest(content)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# API call: diary digest
|
||||||
|
# API 调用:日记整理
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
async def _api_digest(self, content: str) -> list[dict]:
|
||||||
|
"""
|
||||||
|
Call LLM API for diary organization.
|
||||||
|
调用 LLM API 执行日记整理。
|
||||||
|
"""
|
||||||
|
response = await self.client.chat.completions.create(
|
||||||
|
model=self.model,
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": DIGEST_PROMPT},
|
||||||
|
{"role": "user", "content": content[:5000]},
|
||||||
|
],
|
||||||
|
max_tokens=2048,
|
||||||
|
temperature=0.2,
|
||||||
|
)
|
||||||
|
if not response.choices:
|
||||||
|
return []
|
||||||
|
raw = response.choices[0].message.content or ""
|
||||||
|
if not raw.strip():
|
||||||
|
return []
|
||||||
|
return self._parse_digest(raw)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Parse diary digest result with safety checks
|
||||||
|
# 解析日记整理结果,做安全校验
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
def _parse_digest(self, raw: str) -> list[dict]:
|
||||||
|
"""
|
||||||
|
Parse and validate API diary digest result.
|
||||||
|
解析并校验 API 返回的日记整理结果。
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
cleaned = raw.strip()
|
||||||
|
if cleaned.startswith("```"):
|
||||||
|
cleaned = cleaned.split("\n", 1)[-1].rsplit("```", 1)[0]
|
||||||
|
items = json.loads(cleaned)
|
||||||
|
except (json.JSONDecodeError, IndexError, ValueError):
|
||||||
|
logger.warning(f"Diary digest JSON parse failed / JSON 解析失败: {raw[:200]}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
if not isinstance(items, list):
|
||||||
|
return []
|
||||||
|
|
||||||
|
validated = []
|
||||||
|
for item in items:
|
||||||
|
if not isinstance(item, dict) or not item.get("content"):
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
importance = max(1, min(10, int(item.get("importance", 5))))
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
importance = 5
|
||||||
|
try:
|
||||||
|
valence = max(0.0, min(1.0, float(item.get("valence", 0.5))))
|
||||||
|
arousal = max(0.0, min(1.0, float(item.get("arousal", 0.3))))
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
valence, arousal = 0.5, 0.3
|
||||||
|
|
||||||
|
validated.append({
|
||||||
|
"name": str(item.get("name", ""))[:20],
|
||||||
|
"content": str(item.get("content", "")),
|
||||||
|
"domain": item.get("domain", ["未分类"])[:3],
|
||||||
|
"valence": valence,
|
||||||
|
"arousal": arousal,
|
||||||
|
"tags": item.get("tags", [])[:5],
|
||||||
|
"importance": importance,
|
||||||
|
})
|
||||||
|
return validated
|
||||||
|
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
# Local diary split (fallback when API is unavailable)
|
||||||
|
# 本地日记拆分(无 API 时的兜底)
|
||||||
|
# Split by blank lines/separators, analyze each segment
|
||||||
|
# 按空行/分隔符拆段,每段独立分析
|
||||||
|
# ---------------------------------------------------------
|
||||||
|
async def _local_digest(self, content: str) -> list[dict]:
|
||||||
|
"""
|
||||||
|
Local paragraph split + per-segment analysis.
|
||||||
|
本地按段落拆分 + 逐段分析。
|
||||||
|
"""
|
||||||
|
# Split by blank lines or separators / 按空行或分隔线拆分
|
||||||
|
segments = re.split(r"\n{2,}|---+|\n-\s", content)
|
||||||
|
segments = [s.strip() for s in segments if len(s.strip()) > 20]
|
||||||
|
|
||||||
|
if not segments:
|
||||||
|
# Content too short, treat as single entry
|
||||||
|
# 内容太短,整个作为一个条目
|
||||||
|
analysis = self._local_analyze(content)
|
||||||
|
return [{
|
||||||
|
"name": analysis.get("suggested_name", "日记"),
|
||||||
|
"content": content.strip(),
|
||||||
|
"domain": analysis["domain"],
|
||||||
|
"valence": analysis["valence"],
|
||||||
|
"arousal": analysis["arousal"],
|
||||||
|
"tags": analysis["tags"],
|
||||||
|
"importance": 5,
|
||||||
|
}]
|
||||||
|
|
||||||
|
items = []
|
||||||
|
for seg in segments[:10]: # Max 10 segments / 最多 10 段
|
||||||
|
analysis = self._local_analyze(seg)
|
||||||
|
items.append({
|
||||||
|
"name": analysis.get("suggested_name", "") or seg[:10],
|
||||||
|
"content": seg,
|
||||||
|
"domain": analysis["domain"],
|
||||||
|
"valence": analysis["valence"],
|
||||||
|
"arousal": analysis["arousal"],
|
||||||
|
"tags": analysis["tags"],
|
||||||
|
"importance": 5,
|
||||||
|
})
|
||||||
|
return items
|
||||||
48
docker-compose.yml
Normal file
48
docker-compose.yml
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
# ============================================================
|
||||||
|
# Ombre Brain Docker Compose
|
||||||
|
# Docker Compose 配置
|
||||||
|
#
|
||||||
|
# Usage / 使用:
|
||||||
|
# 1. Create .env: echo "OMBRE_API_KEY=your-key" > .env
|
||||||
|
# 创建 .env 文件
|
||||||
|
# 2. docker-compose up -d
|
||||||
|
# 3. docker compose logs tunnel (for public URL / 查看公网地址)
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
services:
|
||||||
|
ombre-brain:
|
||||||
|
build: .
|
||||||
|
container_name: ombre-brain
|
||||||
|
restart: unless-stopped
|
||||||
|
ports:
|
||||||
|
- "18001:8000" # Local debug port (optional) / 本地调试端口
|
||||||
|
environment:
|
||||||
|
- OMBRE_API_KEY=${OMBRE_API_KEY} # From .env file / 从 .env 文件读取
|
||||||
|
- OMBRE_TRANSPORT=streamable-http # Claude.ai requires streamable-http
|
||||||
|
- OMBRE_BUCKETS_DIR=/data # Container-internal bucket path / 容器内路径
|
||||||
|
volumes:
|
||||||
|
# Mount your Obsidian vault (or any host directory) for persistent storage
|
||||||
|
# 挂载你的 Obsidian 仓库(或任意宿主机目录)做持久化存储
|
||||||
|
# Example / 示例:
|
||||||
|
# - /path/to/your/Obsidian Vault/Ombre Brain:/data
|
||||||
|
- /Users/p0lar1s/Library/Mobile Documents/iCloud~md~obsidian/Documents/Obsidian Vault/Ombre Brain:/data
|
||||||
|
- ./config.yaml:/app/config.yaml
|
||||||
|
|
||||||
|
# Cloudflare Tunnel (optional) — expose to public internet
|
||||||
|
# Cloudflare Tunnel(可选)— 暴露到公网
|
||||||
|
# Configure your own credentials under ~/.cloudflared/
|
||||||
|
# 在 ~/.cloudflared/ 下放你自己的凭证
|
||||||
|
tunnel:
|
||||||
|
image: cloudflare/cloudflared:latest
|
||||||
|
container_name: ombre-tunnel
|
||||||
|
restart: unless-stopped
|
||||||
|
command: >
|
||||||
|
tunnel --no-autoupdate --protocol http2
|
||||||
|
--config /etc/cloudflared/config.yml
|
||||||
|
--proxy-keepalive-timeout 300s
|
||||||
|
--proxy-connection-timeout 300s
|
||||||
|
run
|
||||||
|
volumes:
|
||||||
|
- ~/.cloudflared:/etc/cloudflared
|
||||||
|
depends_on:
|
||||||
|
- ombre-brain
|
||||||
118
migrate_to_domains.py
Normal file
118
migrate_to_domains.py
Normal file
@@ -0,0 +1,118 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
迁移脚本:将 dynamic/ 下的平铺记忆桶文件重组为域子目录结构。
|
||||||
|
|
||||||
|
旧结构: dynamic/{bucket_id}.md
|
||||||
|
新结构: dynamic/{primary_domain}/{name}_{bucket_id}.md
|
||||||
|
|
||||||
|
纯标准库,无外部依赖。
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
VAULT_DIR = os.path.expanduser("~/Documents/Obsidian Vault/Ombre Brain")
|
||||||
|
DYNAMIC_DIR = os.path.join(VAULT_DIR, "dynamic")
|
||||||
|
|
||||||
|
|
||||||
|
def sanitize_name(name: str) -> str:
|
||||||
|
cleaned = re.sub(r"[^\w\s\u4e00-\u9fff-]", "", name, flags=re.UNICODE)
|
||||||
|
return cleaned.strip()[:80] or "unnamed"
|
||||||
|
|
||||||
|
|
||||||
|
def parse_frontmatter(filepath):
|
||||||
|
"""纯正则解析 YAML frontmatter 中的 id, name, domain 字段。"""
|
||||||
|
with open(filepath, "r", encoding="utf-8") as f:
|
||||||
|
content = f.read()
|
||||||
|
if not content.startswith("---"):
|
||||||
|
return None
|
||||||
|
parts = content.split("---", 2)
|
||||||
|
if len(parts) < 3:
|
||||||
|
return None
|
||||||
|
yaml_text = parts[1]
|
||||||
|
|
||||||
|
meta = {}
|
||||||
|
# 提取 id
|
||||||
|
m = re.search(r"^id:\s*(.+)$", yaml_text, re.MULTILINE)
|
||||||
|
if m:
|
||||||
|
meta["id"] = m.group(1).strip().strip("'\"")
|
||||||
|
# 提取 name
|
||||||
|
m = re.search(r"^name:\s*(.+)$", yaml_text, re.MULTILINE)
|
||||||
|
if m:
|
||||||
|
meta["name"] = m.group(1).strip().strip("'\"")
|
||||||
|
# 提取 domain 列表
|
||||||
|
m = re.search(r"^domain:\s*\n((?:\s*-\s*.+\n?)+)", yaml_text, re.MULTILINE)
|
||||||
|
if m:
|
||||||
|
meta["domain"] = re.findall(r"-\s*(.+)", m.group(1))
|
||||||
|
else:
|
||||||
|
meta["domain"] = ["未分类"]
|
||||||
|
|
||||||
|
return meta
|
||||||
|
|
||||||
|
|
||||||
|
def migrate():
|
||||||
|
if not os.path.exists(DYNAMIC_DIR):
|
||||||
|
print(f"目录不存在: {DYNAMIC_DIR}")
|
||||||
|
return
|
||||||
|
|
||||||
|
# 只处理直接在 dynamic/ 下的 .md 文件(不处理已在子目录中的)
|
||||||
|
files = [f for f in os.listdir(DYNAMIC_DIR)
|
||||||
|
if f.endswith(".md") and os.path.isfile(os.path.join(DYNAMIC_DIR, f))]
|
||||||
|
|
||||||
|
if not files:
|
||||||
|
print("没有需要迁移的文件。")
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f"发现 {len(files)} 个待迁移文件\n")
|
||||||
|
|
||||||
|
for filename in sorted(files):
|
||||||
|
old_path = os.path.join(DYNAMIC_DIR, filename)
|
||||||
|
try:
|
||||||
|
meta = parse_frontmatter(old_path)
|
||||||
|
except Exception as e:
|
||||||
|
print(f" ✗ 无法解析 {filename}: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not meta:
|
||||||
|
print(f" ✗ 无 frontmatter: {filename}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
bucket_id = meta.get("id", filename.replace(".md", ""))
|
||||||
|
name = meta.get("name", "")
|
||||||
|
domain = meta.get("domain", ["未分类"])
|
||||||
|
primary_domain = sanitize_name(domain[0]) if domain else "未分类"
|
||||||
|
|
||||||
|
# 构造新路径
|
||||||
|
domain_dir = os.path.join(DYNAMIC_DIR, primary_domain)
|
||||||
|
os.makedirs(domain_dir, exist_ok=True)
|
||||||
|
|
||||||
|
if name and name != bucket_id:
|
||||||
|
new_filename = f"{sanitize_name(name)}_{bucket_id}.md"
|
||||||
|
else:
|
||||||
|
new_filename = f"{bucket_id}.md"
|
||||||
|
|
||||||
|
new_path = os.path.join(domain_dir, new_filename)
|
||||||
|
|
||||||
|
# 移动
|
||||||
|
shutil.move(old_path, new_path)
|
||||||
|
print(f" ✓ {filename}")
|
||||||
|
print(f" → {primary_domain}/{new_filename}")
|
||||||
|
|
||||||
|
print(f"\n迁移完成。")
|
||||||
|
|
||||||
|
# 展示新结构
|
||||||
|
print("\n=== 新目录结构 ===")
|
||||||
|
for root, dirs, files in os.walk(DYNAMIC_DIR):
|
||||||
|
level = root.replace(DYNAMIC_DIR, "").count(os.sep)
|
||||||
|
indent = " " * level
|
||||||
|
folder = os.path.basename(root)
|
||||||
|
if level > 0:
|
||||||
|
print(f"{indent}📁 {folder}/")
|
||||||
|
for f in sorted(files):
|
||||||
|
if f.endswith(".md"):
|
||||||
|
print(f"{indent} 📄 {f}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
migrate()
|
||||||
121
reclassify_api.py
Normal file
121
reclassify_api.py
Normal file
@@ -0,0 +1,121 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
用 API 重新打标未分类记忆桶,修正 domain/tags/name,移动到正确目录。
|
||||||
|
用法: docker exec ombre-brain python3 /app/reclassify_api.py
|
||||||
|
"""
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
import glob
|
||||||
|
import re
|
||||||
|
|
||||||
|
from openai import AsyncOpenAI
|
||||||
|
import frontmatter
|
||||||
|
|
||||||
|
ANALYZE_PROMPT = (
|
||||||
|
"你是一个内容分析器。请分析以下文本,输出结构化的元数据。\n\n"
|
||||||
|
"分析规则:\n"
|
||||||
|
'1. domain(主题域):选最精确的 1~2 个,只选真正相关的\n'
|
||||||
|
' 日常: ["饮食", "穿搭", "出行", "居家", "购物"]\n'
|
||||||
|
' 人际: ["家庭", "恋爱", "友谊", "社交"]\n'
|
||||||
|
' 成长: ["工作", "学习", "考试", "求职"]\n'
|
||||||
|
' 身心: ["健康", "心理", "睡眠", "运动"]\n'
|
||||||
|
' 兴趣: ["游戏", "影视", "音乐", "阅读", "创作", "手工"]\n'
|
||||||
|
' 数字: ["编程", "AI", "硬件", "网络"]\n'
|
||||||
|
' 事务: ["财务", "计划", "待办"]\n'
|
||||||
|
' 内心: ["情绪", "回忆", "梦境", "自省"]\n'
|
||||||
|
"2. valence(情感效价):0.0~1.0,0=极度消极 → 0.5=中性 → 1.0=极度积极\n"
|
||||||
|
"3. arousal(情感唤醒度):0.0~1.0,0=非常平静 → 0.5=普通 → 1.0=非常激动\n"
|
||||||
|
"4. tags(关键词标签):3~5 个最能概括内容的关键词\n"
|
||||||
|
"5. suggested_name(建议桶名):10字以内的简短标题\n\n"
|
||||||
|
"输出格式(纯 JSON,无其他内容):\n"
|
||||||
|
'{\n'
|
||||||
|
' "domain": ["主题域1", "主题域2"],\n'
|
||||||
|
' "valence": 0.7,\n'
|
||||||
|
' "arousal": 0.4,\n'
|
||||||
|
' "tags": ["标签1", "标签2", "标签3"],\n'
|
||||||
|
' "suggested_name": "简短标题"\n'
|
||||||
|
'}'
|
||||||
|
)
|
||||||
|
|
||||||
|
DATA_DIR = "/data/dynamic"
|
||||||
|
UNCLASS_DIR = os.path.join(DATA_DIR, "未分类")
|
||||||
|
|
||||||
|
|
||||||
|
def sanitize(name):
|
||||||
|
name = re.sub(r'[<>:"/\\|?*\n\r]', '', name).strip()
|
||||||
|
return name[:20] if name else "未命名"
|
||||||
|
|
||||||
|
|
||||||
|
async def reclassify():
|
||||||
|
client = AsyncOpenAI(
|
||||||
|
api_key=os.environ.get("OMBRE_API_KEY", ""),
|
||||||
|
base_url="https://api.siliconflow.cn/v1",
|
||||||
|
timeout=60.0,
|
||||||
|
)
|
||||||
|
|
||||||
|
files = sorted(glob.glob(os.path.join(UNCLASS_DIR, "*.md")))
|
||||||
|
print(f"找到 {len(files)} 个未分类文件\n")
|
||||||
|
|
||||||
|
for fpath in files:
|
||||||
|
basename = os.path.basename(fpath)
|
||||||
|
post = frontmatter.load(fpath)
|
||||||
|
content = post.content.strip()
|
||||||
|
name = post.metadata.get("name", "")
|
||||||
|
full_text = f"{name}\n{content}" if name else content
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = await client.chat.completions.create(
|
||||||
|
model="deepseek-ai/DeepSeek-V3",
|
||||||
|
messages=[
|
||||||
|
{"role": "system", "content": ANALYZE_PROMPT},
|
||||||
|
{"role": "user", "content": full_text[:2000]},
|
||||||
|
],
|
||||||
|
max_tokens=256,
|
||||||
|
temperature=0.1,
|
||||||
|
)
|
||||||
|
raw = resp.choices[0].message.content.strip()
|
||||||
|
if raw.startswith("```"):
|
||||||
|
raw = raw.split("\n", 1)[-1].rsplit("```", 1)[0]
|
||||||
|
result = json.loads(raw)
|
||||||
|
except Exception as e:
|
||||||
|
print(f" X API失败 {basename}: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
new_domain = result.get("domain", ["未分类"])[:3]
|
||||||
|
new_tags = result.get("tags", [])[:5]
|
||||||
|
new_name = sanitize(result.get("suggested_name", "") or name)
|
||||||
|
new_valence = max(0.0, min(1.0, float(result.get("valence", 0.5))))
|
||||||
|
new_arousal = max(0.0, min(1.0, float(result.get("arousal", 0.3))))
|
||||||
|
|
||||||
|
post.metadata["domain"] = new_domain
|
||||||
|
post.metadata["tags"] = new_tags
|
||||||
|
post.metadata["valence"] = new_valence
|
||||||
|
post.metadata["arousal"] = new_arousal
|
||||||
|
if new_name:
|
||||||
|
post.metadata["name"] = new_name
|
||||||
|
|
||||||
|
# 写回文件
|
||||||
|
with open(fpath, "w", encoding="utf-8") as f:
|
||||||
|
f.write(frontmatter.dumps(post))
|
||||||
|
|
||||||
|
# 移动到正确目录
|
||||||
|
primary = sanitize(new_domain[0]) if new_domain else "未分类"
|
||||||
|
target_dir = os.path.join(DATA_DIR, primary)
|
||||||
|
os.makedirs(target_dir, exist_ok=True)
|
||||||
|
|
||||||
|
bid = post.metadata.get("id", "")
|
||||||
|
new_filename = f"{new_name}_{bid}.md" if new_name and new_name != bid else basename
|
||||||
|
dest = os.path.join(target_dir, new_filename)
|
||||||
|
|
||||||
|
if dest != fpath:
|
||||||
|
os.rename(fpath, dest)
|
||||||
|
|
||||||
|
print(f" OK {basename}")
|
||||||
|
print(f" -> {primary}/{new_filename}")
|
||||||
|
print(f" domain={new_domain} tags={new_tags} V={new_valence} A={new_arousal}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(reclassify())
|
||||||
198
reclassify_domains.py
Normal file
198
reclassify_domains.py
Normal file
@@ -0,0 +1,198 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
重分类脚本:根据新的域列表,重新分析已有桶的 domain 并搬到对应子目录。
|
||||||
|
纯标准库,读 frontmatter + 正文内容做关键词匹配。
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
VAULT_DIR = os.path.expanduser("~/Documents/Obsidian Vault/Ombre Brain")
|
||||||
|
DYNAMIC_DIR = os.path.join(VAULT_DIR, "dynamic")
|
||||||
|
|
||||||
|
# 新域关键词表(和 dehydrator.py 的 _local_analyze 一致)
|
||||||
|
DOMAIN_KEYWORDS = {
|
||||||
|
"饮食": {"吃", "饭", "做饭", "外卖", "奶茶", "咖啡", "麻辣烫", "面包",
|
||||||
|
"超市", "零食", "水果", "牛奶", "食堂", "减肥", "节食", "麦片"},
|
||||||
|
"家庭": {"爸", "妈", "父亲", "母亲", "家人", "弟弟", "姐姐", "哥哥",
|
||||||
|
"奶奶", "爷爷", "亲戚", "家里", "生日礼", "生活费"},
|
||||||
|
"恋爱": {"爱人", "男友", "女友", "恋", "约会", "分手", "暧昧",
|
||||||
|
"在一起", "想你", "同床", "一辈子", "爱你", "我们是",
|
||||||
|
"克劳德", "亲密", "接吻", "正缘"},
|
||||||
|
"友谊": {"朋友", "闺蜜", "兄弟", "聚", "约饭"},
|
||||||
|
"社交": {"见面", "圈子", "社区", "创作者", "发帖", "鹤见"},
|
||||||
|
"工作": {"会议", "项目", "客户", "汇报", "同事", "老板", "薪资",
|
||||||
|
"领导力", "管理沟通"},
|
||||||
|
"学习": {"课", "考试", "论文", "作业", "教授", "Python实操",
|
||||||
|
"选课", "学分", "jieba", "分词"},
|
||||||
|
"健康": {"医院", "复查", "吃药", "抽血", "心率", "心电图",
|
||||||
|
"病", "慢粒", "融合基因", "二尖瓣", "月经", "脚趾甲"},
|
||||||
|
"心理": {"焦虑", "抑郁", "创伤", "人格", "安全感", "崩溃",
|
||||||
|
"压力", "自残", "ABC人格", "人格分裂", "恋爱焦虑"},
|
||||||
|
"睡眠": {"睡", "失眠", "噩梦", "清醒", "熬夜", "做梦"},
|
||||||
|
"游戏": {"游戏", "极乐迪斯科", "存档", "通关", "Shivers", "DLC"},
|
||||||
|
"影视": {"电影", "番剧", "动漫", "剧", "综艺"},
|
||||||
|
"阅读": {"书", "小说", "读完", "漫画", "李宿芳菲"},
|
||||||
|
"创作": {"写", "预设", "脚本", "SillyTavern", "插件", "正则",
|
||||||
|
"人设卡", "天气同步", "破甲词"},
|
||||||
|
"编程": {"代码", "python", "bug", "api", "docker", "git",
|
||||||
|
"调试", "部署", "开发", "server"},
|
||||||
|
"AI": {"模型", "Claude", "gemini", "LLM", "token", "prompt",
|
||||||
|
"LoRA", "MCP", "DeepSeek", "隧道", "Ombre Brain",
|
||||||
|
"打包盒", "脱水", "记忆系统"},
|
||||||
|
"网络": {"VPN", "梯子", "代理", "域名", "隧道", "cloudflare",
|
||||||
|
"tunnel", "反代"},
|
||||||
|
"财务": {"钱", "转账", "花了", "欠", "黄金", "卖掉", "换了",
|
||||||
|
"生活费", "4276"},
|
||||||
|
"情绪": {"开心", "难过", "哭", "泪", "孤独", "伤心", "烦",
|
||||||
|
"委屈", "感动", "温柔", "口罩湿了"},
|
||||||
|
"回忆": {"以前", "小时候", "那时", "怀念", "曾经", "纹身",
|
||||||
|
"十三岁", "九岁"},
|
||||||
|
"自省": {"反思", "觉得自己", "问自己", "自恋", "投射"},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def sanitize_name(name):
|
||||||
|
cleaned = re.sub(r"[^\w\s\u4e00-\u9fff-]", "", name, flags=re.UNICODE)
|
||||||
|
return cleaned.strip()[:80] or "unnamed"
|
||||||
|
|
||||||
|
|
||||||
|
def parse_md(filepath):
|
||||||
|
"""解析 frontmatter 和正文。"""
|
||||||
|
with open(filepath, "r", encoding="utf-8") as f:
|
||||||
|
content = f.read()
|
||||||
|
if not content.startswith("---"):
|
||||||
|
return None, None, content
|
||||||
|
parts = content.split("---", 2)
|
||||||
|
if len(parts) < 3:
|
||||||
|
return None, None, content
|
||||||
|
yaml_text = parts[1]
|
||||||
|
body = parts[2]
|
||||||
|
|
||||||
|
meta = {}
|
||||||
|
m = re.search(r"^id:\s*(.+)$", yaml_text, re.MULTILINE)
|
||||||
|
if m:
|
||||||
|
meta["id"] = m.group(1).strip().strip("'\"")
|
||||||
|
m = re.search(r"^name:\s*(.+)$", yaml_text, re.MULTILINE)
|
||||||
|
if m:
|
||||||
|
meta["name"] = m.group(1).strip().strip("'\"")
|
||||||
|
m = re.search(r"^domain:\s*\n((?:\s*-\s*.+\n?)+)", yaml_text, re.MULTILINE)
|
||||||
|
if m:
|
||||||
|
meta["domain"] = [d.strip() for d in re.findall(r"-\s*(.+)", m.group(1))]
|
||||||
|
else:
|
||||||
|
meta["domain"] = ["未分类"]
|
||||||
|
|
||||||
|
return meta, yaml_text, body
|
||||||
|
|
||||||
|
|
||||||
|
def classify(body, old_domains):
|
||||||
|
"""基于正文内容重新分类。"""
|
||||||
|
text = body.lower()
|
||||||
|
scored = []
|
||||||
|
for domain, kws in DOMAIN_KEYWORDS.items():
|
||||||
|
hits = sum(1 for kw in kws if kw.lower() in text)
|
||||||
|
if hits >= 2:
|
||||||
|
scored.append((domain, hits))
|
||||||
|
scored.sort(key=lambda x: x[1], reverse=True)
|
||||||
|
if scored:
|
||||||
|
return [d for d, _ in scored[:2]]
|
||||||
|
return old_domains # 匹配不上就保留旧的
|
||||||
|
|
||||||
|
|
||||||
|
def update_domain_in_file(filepath, new_domains):
|
||||||
|
"""更新文件中 frontmatter 的 domain 字段。"""
|
||||||
|
with open(filepath, "r", encoding="utf-8") as f:
|
||||||
|
content = f.read()
|
||||||
|
|
||||||
|
# 替换 domain 块
|
||||||
|
domain_yaml = "domain:\n" + "".join(f"- {d}\n" for d in new_domains)
|
||||||
|
content = re.sub(
|
||||||
|
r"domain:\s*\n(?:\s*-\s*.+\n?)+",
|
||||||
|
domain_yaml,
|
||||||
|
content,
|
||||||
|
count=1
|
||||||
|
)
|
||||||
|
with open(filepath, "w", encoding="utf-8") as f:
|
||||||
|
f.write(content)
|
||||||
|
|
||||||
|
|
||||||
|
def reclassify():
|
||||||
|
if not os.path.exists(DYNAMIC_DIR):
|
||||||
|
print("目录不存在")
|
||||||
|
return
|
||||||
|
|
||||||
|
# 收集所有 .md 文件(递归)
|
||||||
|
all_files = []
|
||||||
|
for root, _, files in os.walk(DYNAMIC_DIR):
|
||||||
|
for f in files:
|
||||||
|
if f.endswith(".md"):
|
||||||
|
all_files.append(os.path.join(root, f))
|
||||||
|
|
||||||
|
if not all_files:
|
||||||
|
print("没有文件。")
|
||||||
|
return
|
||||||
|
|
||||||
|
print(f"扫描到 {len(all_files)} 个桶文件\n")
|
||||||
|
|
||||||
|
for filepath in sorted(all_files):
|
||||||
|
meta, yaml_text, body = parse_md(filepath)
|
||||||
|
if not meta:
|
||||||
|
print(f" ✗ 无法解析: {os.path.basename(filepath)}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
bucket_id = meta.get("id", "unknown")
|
||||||
|
name = meta.get("name", bucket_id)
|
||||||
|
old_domains = meta.get("domain", ["未分类"])
|
||||||
|
new_domains = classify(body, old_domains)
|
||||||
|
|
||||||
|
primary = sanitize_name(new_domains[0])
|
||||||
|
old_primary = sanitize_name(old_domains[0]) if old_domains else "未分类"
|
||||||
|
|
||||||
|
if name and name != bucket_id:
|
||||||
|
new_filename = f"{sanitize_name(name)}_{bucket_id}.md"
|
||||||
|
else:
|
||||||
|
new_filename = f"{bucket_id}.md"
|
||||||
|
|
||||||
|
new_dir = os.path.join(DYNAMIC_DIR, primary)
|
||||||
|
os.makedirs(new_dir, exist_ok=True)
|
||||||
|
new_path = os.path.join(new_dir, new_filename)
|
||||||
|
|
||||||
|
changed = (new_domains != old_domains) or (filepath != new_path)
|
||||||
|
|
||||||
|
if changed:
|
||||||
|
# 更新 frontmatter
|
||||||
|
update_domain_in_file(filepath, new_domains)
|
||||||
|
# 移动文件
|
||||||
|
if filepath != new_path:
|
||||||
|
shutil.move(filepath, new_path)
|
||||||
|
print(f" ✓ {name}")
|
||||||
|
print(f" {','.join(old_domains)} → {','.join(new_domains)}")
|
||||||
|
print(f" → {primary}/{new_filename}")
|
||||||
|
else:
|
||||||
|
print(f" · {name} (不变)")
|
||||||
|
|
||||||
|
# 清理空目录
|
||||||
|
for d in os.listdir(DYNAMIC_DIR):
|
||||||
|
dp = os.path.join(DYNAMIC_DIR, d)
|
||||||
|
if os.path.isdir(dp) and not os.listdir(dp):
|
||||||
|
os.rmdir(dp)
|
||||||
|
print(f"\n 🗑 删除空目录: {d}/")
|
||||||
|
|
||||||
|
print(f"\n重分类完成。\n")
|
||||||
|
|
||||||
|
# 展示新结构
|
||||||
|
print("=== 新目录结构 ===")
|
||||||
|
for root, dirs, files in os.walk(DYNAMIC_DIR):
|
||||||
|
level = root.replace(DYNAMIC_DIR, "").count(os.sep)
|
||||||
|
indent = " " * level
|
||||||
|
folder = os.path.basename(root)
|
||||||
|
if level > 0:
|
||||||
|
print(f"{indent}📁 {folder}/")
|
||||||
|
for f in sorted(files):
|
||||||
|
if f.endswith(".md"):
|
||||||
|
print(f"{indent} 📄 {f}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
reclassify()
|
||||||
21
render.yaml
Normal file
21
render.yaml
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
services:
|
||||||
|
- type: web
|
||||||
|
name: ombre-brain
|
||||||
|
env: python
|
||||||
|
region: oregon
|
||||||
|
plan: free
|
||||||
|
buildCommand: pip install -r requirements.txt
|
||||||
|
startCommand: python server.py
|
||||||
|
envVars:
|
||||||
|
- key: OMBRE_TRANSPORT
|
||||||
|
value: streamable-http
|
||||||
|
- key: OMBRE_API_KEY
|
||||||
|
sync: false # Set in Render dashboard > Environment (any OpenAI-compatible key)
|
||||||
|
- key: OMBRE_BASE_URL
|
||||||
|
sync: false # e.g. https://api.deepseek.com/v1 or https://api.siliconflow.cn/v1
|
||||||
|
- key: OMBRE_BUCKETS_DIR
|
||||||
|
value: /opt/render/project/src/buckets
|
||||||
|
disk:
|
||||||
|
name: ombre-buckets
|
||||||
|
mountPath: /opt/render/project/src/buckets
|
||||||
|
sizeGB: 1
|
||||||
25
requirements.txt
Normal file
25
requirements.txt
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
# ============================================================
|
||||||
|
# Ombre Brain Python 依赖
|
||||||
|
# 安装: pip install -r requirements.txt
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
# MCP 协议 SDK(Claude 通信核心)
|
||||||
|
mcp>=1.0.0
|
||||||
|
|
||||||
|
# 模糊匹配(记忆桶搜索)
|
||||||
|
rapidfuzz>=3.0.0
|
||||||
|
|
||||||
|
# OpenAI 兼容客户端(支持 DeepSeek/Ollama/LM Studio/vLLM/Gemini 等任意兼容 API)
|
||||||
|
openai>=1.0.0
|
||||||
|
|
||||||
|
# YAML 配置解析
|
||||||
|
pyyaml>=6.0
|
||||||
|
|
||||||
|
# Markdown frontmatter 解析(桶文件读写)
|
||||||
|
python-frontmatter>=1.1.0
|
||||||
|
|
||||||
|
# 中文分词
|
||||||
|
jieba>=0.42.1
|
||||||
|
|
||||||
|
# 异步 HTTP 客户端(应用层保活 ping)
|
||||||
|
httpx>=0.27.0
|
||||||
620
server.py
Normal file
620
server.py
Normal file
@@ -0,0 +1,620 @@
|
|||||||
|
# ============================================================
|
||||||
|
# Module: MCP Server Entry Point (server.py)
|
||||||
|
# 模块:MCP 服务器主入口
|
||||||
|
#
|
||||||
|
# Starts the Ombre Brain MCP service and registers memory
|
||||||
|
# operation tools for Claude to call.
|
||||||
|
# 启动 Ombre Brain MCP 服务,注册记忆操作工具供 Claude 调用。
|
||||||
|
#
|
||||||
|
# Core responsibilities:
|
||||||
|
# 核心职责:
|
||||||
|
# - Initialize config, bucket manager, dehydrator, decay engine
|
||||||
|
# 初始化配置、记忆桶管理器、脱水器、衰减引擎
|
||||||
|
# - Expose 5 MCP tools:
|
||||||
|
# 暴露 5 个 MCP 工具:
|
||||||
|
# breath — Surface unresolved memories or search by keyword
|
||||||
|
# 浮现未解决记忆 或 按关键词检索
|
||||||
|
# hold — Store a single memory
|
||||||
|
# 存储单条记忆
|
||||||
|
# grow — Diary digest, auto-split into multiple buckets
|
||||||
|
# 日记归档,自动拆分多桶
|
||||||
|
# trace — Modify metadata / resolved / delete
|
||||||
|
# 修改元数据 / resolved 标记 / 删除
|
||||||
|
# pulse — System status + bucket listing
|
||||||
|
# 系统状态 + 所有桶列表
|
||||||
|
#
|
||||||
|
# Startup:
|
||||||
|
# 启动方式:
|
||||||
|
# Local: python server.py
|
||||||
|
# Remote: OMBRE_TRANSPORT=streamable-http python server.py
|
||||||
|
# Docker: docker-compose up
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import random
|
||||||
|
import logging
|
||||||
|
import asyncio
|
||||||
|
import httpx
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
# --- Ensure same-directory modules can be imported ---
|
||||||
|
# --- 确保同目录下的模块能被正确导入 ---
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
|
||||||
|
from mcp.server.fastmcp import FastMCP
|
||||||
|
|
||||||
|
from bucket_manager import BucketManager
|
||||||
|
from dehydrator import Dehydrator
|
||||||
|
from decay_engine import DecayEngine
|
||||||
|
from utils import load_config, setup_logging
|
||||||
|
|
||||||
|
# --- Load config & init logging / 加载配置 & 初始化日志 ---
|
||||||
|
config = load_config()
|
||||||
|
setup_logging(config.get("log_level", "INFO"))
|
||||||
|
logger = logging.getLogger("ombre_brain")
|
||||||
|
|
||||||
|
# --- Initialize three core components / 初始化三大核心组件 ---
|
||||||
|
bucket_mgr = BucketManager(config) # Bucket manager / 记忆桶管理器
|
||||||
|
dehydrator = Dehydrator(config) # Dehydrator / 脱水器
|
||||||
|
decay_engine = DecayEngine(config, bucket_mgr) # Decay engine / 衰减引擎
|
||||||
|
|
||||||
|
# --- Create MCP server instance / 创建 MCP 服务器实例 ---
|
||||||
|
# host="0.0.0.0" so Docker container's SSE is externally reachable
|
||||||
|
# stdio mode ignores host (no network)
|
||||||
|
mcp = FastMCP(
|
||||||
|
"Ombre Brain",
|
||||||
|
host="0.0.0.0",
|
||||||
|
port=8000,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================
|
||||||
|
# /health endpoint: lightweight keepalive
|
||||||
|
# 轻量保活接口
|
||||||
|
# For Cloudflare Tunnel or reverse proxy to ping, preventing idle timeout
|
||||||
|
# 供 Cloudflare Tunnel 或反代定期 ping,防止空闲超时断连
|
||||||
|
# =============================================================
|
||||||
|
@mcp.custom_route("/health", methods=["GET"])
|
||||||
|
async def health_check(request):
|
||||||
|
from starlette.responses import JSONResponse
|
||||||
|
try:
|
||||||
|
stats = await bucket_mgr.get_stats()
|
||||||
|
return JSONResponse({
|
||||||
|
"status": "ok",
|
||||||
|
"buckets": stats["permanent_count"] + stats["dynamic_count"],
|
||||||
|
"decay_engine": "running" if decay_engine.is_running else "stopped",
|
||||||
|
})
|
||||||
|
except Exception as e:
|
||||||
|
return JSONResponse({"status": "error", "detail": str(e)}, status_code=500)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================
|
||||||
|
# Internal helper: merge-or-create
|
||||||
|
# 内部辅助:检查是否可合并,可以则合并,否则新建
|
||||||
|
# Shared by hold and grow to avoid duplicate logic
|
||||||
|
# hold 和 grow 共用,避免重复逻辑
|
||||||
|
# =============================================================
|
||||||
|
async def _merge_or_create(
|
||||||
|
content: str,
|
||||||
|
tags: list,
|
||||||
|
importance: int,
|
||||||
|
domain: list,
|
||||||
|
valence: float,
|
||||||
|
arousal: float,
|
||||||
|
name: str = "",
|
||||||
|
) -> tuple[str, bool]:
|
||||||
|
"""
|
||||||
|
Check if a similar bucket exists for merging; merge if so, create if not.
|
||||||
|
Returns (bucket_id_or_name, is_merged).
|
||||||
|
检查是否有相似桶可合并,有则合并,无则新建。
|
||||||
|
返回 (桶ID或名称, 是否合并)。
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
existing = await bucket_mgr.search(content, limit=1)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Search for merge failed, creating new / 合并搜索失败,新建: {e}")
|
||||||
|
existing = []
|
||||||
|
|
||||||
|
if existing and existing[0].get("score", 0) > config.get("merge_threshold", 75):
|
||||||
|
bucket = existing[0]
|
||||||
|
# --- Never merge into pinned/protected buckets ---
|
||||||
|
# --- 不合并到钉选/保护桶 ---
|
||||||
|
if not (bucket["metadata"].get("pinned") or bucket["metadata"].get("protected")):
|
||||||
|
try:
|
||||||
|
merged = await dehydrator.merge(bucket["content"], content)
|
||||||
|
await bucket_mgr.update(
|
||||||
|
bucket["id"],
|
||||||
|
content=merged,
|
||||||
|
tags=list(set(bucket["metadata"].get("tags", []) + tags)),
|
||||||
|
importance=max(bucket["metadata"].get("importance", 5), importance),
|
||||||
|
domain=list(set(bucket["metadata"].get("domain", []) + domain)),
|
||||||
|
valence=valence,
|
||||||
|
arousal=arousal,
|
||||||
|
)
|
||||||
|
return bucket["metadata"].get("name", bucket["id"]), True
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Merge failed, creating new / 合并失败,新建: {e}")
|
||||||
|
|
||||||
|
bucket_id = await bucket_mgr.create(
|
||||||
|
content=content,
|
||||||
|
tags=tags,
|
||||||
|
importance=importance,
|
||||||
|
domain=domain,
|
||||||
|
valence=valence,
|
||||||
|
arousal=arousal,
|
||||||
|
name=name or None,
|
||||||
|
)
|
||||||
|
return bucket_id, False
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================
|
||||||
|
# Tool 1: breath — Breathe
|
||||||
|
# 工具 1:breath — 呼吸
|
||||||
|
#
|
||||||
|
# No args: surface highest-weight unresolved memories (active push)
|
||||||
|
# 无参数:浮现权重最高的未解决记忆
|
||||||
|
# With args: search by keyword + emotion coordinates
|
||||||
|
# 有参数:按关键词+情感坐标检索记忆
|
||||||
|
# =============================================================
|
||||||
|
@mcp.tool()
|
||||||
|
async def breath(
|
||||||
|
query: Optional[str] = None,
|
||||||
|
max_results: int = 3,
|
||||||
|
domain: str = "",
|
||||||
|
valence: float = -1,
|
||||||
|
arousal: float = -1,
|
||||||
|
) -> str:
|
||||||
|
"""检索/浮现记忆。不传query或传空=自动浮现,有query=关键词检索。domain逗号分隔,valence/arousal 0~1(-1忽略)。"""
|
||||||
|
await decay_engine.ensure_started()
|
||||||
|
|
||||||
|
# --- No args or empty query: surfacing mode (weight pool active push) ---
|
||||||
|
# --- 无参数或空query:浮现模式(权重池主动推送)---
|
||||||
|
if not query or not query.strip():
|
||||||
|
try:
|
||||||
|
all_buckets = await bucket_mgr.list_all(include_archive=False)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to list buckets for surfacing / 浮现列桶失败: {e}")
|
||||||
|
return "记忆系统暂时无法访问。"
|
||||||
|
|
||||||
|
# --- Pinned/protected buckets: always surface as core principles ---
|
||||||
|
# --- 钉选桶:作为核心准则,始终浮现 ---
|
||||||
|
pinned_buckets = [
|
||||||
|
b for b in all_buckets
|
||||||
|
if b["metadata"].get("pinned") or b["metadata"].get("protected")
|
||||||
|
]
|
||||||
|
pinned_results = []
|
||||||
|
for b in pinned_buckets:
|
||||||
|
try:
|
||||||
|
summary = await dehydrator.dehydrate(b["content"], b["metadata"])
|
||||||
|
pinned_results.append(f"📌 [核心准则] {summary}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to dehydrate pinned bucket / 钉选桶脱水失败: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# --- Unresolved buckets: surface top 2 by weight ---
|
||||||
|
# --- 未解决桶:按权重浮现前 2 条 ---
|
||||||
|
unresolved = [
|
||||||
|
b for b in all_buckets
|
||||||
|
if not b["metadata"].get("resolved", False)
|
||||||
|
and b["metadata"].get("type") != "permanent"
|
||||||
|
and not b["metadata"].get("pinned", False)
|
||||||
|
and not b["metadata"].get("protected", False)
|
||||||
|
]
|
||||||
|
|
||||||
|
scored = sorted(
|
||||||
|
unresolved,
|
||||||
|
key=lambda b: decay_engine.calculate_score(b["metadata"]),
|
||||||
|
reverse=True,
|
||||||
|
)
|
||||||
|
top = scored[:2]
|
||||||
|
dynamic_results = []
|
||||||
|
for b in top:
|
||||||
|
try:
|
||||||
|
summary = await dehydrator.dehydrate(b["content"], b["metadata"])
|
||||||
|
await bucket_mgr.touch(b["id"])
|
||||||
|
score = decay_engine.calculate_score(b["metadata"])
|
||||||
|
dynamic_results.append(f"[权重:{score:.2f}] {summary}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to dehydrate surfaced bucket / 浮现脱水失败: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not pinned_results and not dynamic_results:
|
||||||
|
return "权重池平静,没有需要处理的记忆。"
|
||||||
|
|
||||||
|
parts = []
|
||||||
|
if pinned_results:
|
||||||
|
parts.append("=== 核心准则 ===\n" + "\n---\n".join(pinned_results))
|
||||||
|
if dynamic_results:
|
||||||
|
parts.append("=== 浮现记忆 ===\n" + "\n---\n".join(dynamic_results))
|
||||||
|
return "\n\n".join(parts)
|
||||||
|
|
||||||
|
# --- With args: search mode / 有参数:检索模式 ---
|
||||||
|
domain_filter = [d.strip() for d in domain.split(",") if d.strip()] or None
|
||||||
|
q_valence = valence if 0 <= valence <= 1 else None
|
||||||
|
q_arousal = arousal if 0 <= arousal <= 1 else None
|
||||||
|
|
||||||
|
try:
|
||||||
|
matches = await bucket_mgr.search(
|
||||||
|
query,
|
||||||
|
limit=max_results,
|
||||||
|
domain_filter=domain_filter,
|
||||||
|
query_valence=q_valence,
|
||||||
|
query_arousal=q_arousal,
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Search failed / 检索失败: {e}")
|
||||||
|
return "检索过程出错,请稍后重试。"
|
||||||
|
|
||||||
|
results = []
|
||||||
|
for bucket in matches:
|
||||||
|
try:
|
||||||
|
summary = await dehydrator.dehydrate(bucket["content"], bucket["metadata"])
|
||||||
|
await bucket_mgr.touch(bucket["id"])
|
||||||
|
results.append(summary)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to dehydrate search result / 检索结果脱水失败: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# --- Random surfacing: when search returns < 3, 40% chance to float old memories ---
|
||||||
|
# --- 随机浮现:检索结果不足 3 条时,40% 概率从低权重旧桶里漂上来 ---
|
||||||
|
if len(matches) < 3 and random.random() < 0.4:
|
||||||
|
try:
|
||||||
|
all_buckets = await bucket_mgr.list_all(include_archive=False)
|
||||||
|
matched_ids = {b["id"] for b in matches}
|
||||||
|
low_weight = [
|
||||||
|
b for b in all_buckets
|
||||||
|
if b["id"] not in matched_ids
|
||||||
|
and decay_engine.calculate_score(b["metadata"]) < 2.0
|
||||||
|
]
|
||||||
|
if low_weight:
|
||||||
|
drifted = random.sample(low_weight, min(random.randint(1, 3), len(low_weight)))
|
||||||
|
drift_results = []
|
||||||
|
for b in drifted:
|
||||||
|
summary = await dehydrator.dehydrate(b["content"], b["metadata"])
|
||||||
|
drift_results.append(f"[surface_type: random]\n{summary}")
|
||||||
|
results.append("--- 忽然想起来 ---\n" + "\n---\n".join(drift_results))
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Random surfacing failed / 随机浮现失败: {e}")
|
||||||
|
|
||||||
|
if not results:
|
||||||
|
return "未找到相关记忆。"
|
||||||
|
|
||||||
|
return "\n---\n".join(results)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================
|
||||||
|
# Tool 2: hold — Hold on to this
|
||||||
|
# 工具 2:hold — 握住,留下来
|
||||||
|
# =============================================================
|
||||||
|
@mcp.tool()
|
||||||
|
async def hold(
|
||||||
|
content: str,
|
||||||
|
tags: str = "",
|
||||||
|
importance: int = 5,
|
||||||
|
pinned: bool = False,
|
||||||
|
) -> str:
|
||||||
|
"""存储单条记忆,自动打标+合并。tags逗号分隔,importance 1-10。pinned=True创建永久钉选桶。"""
|
||||||
|
await decay_engine.ensure_started()
|
||||||
|
|
||||||
|
# --- Input validation / 输入校验 ---
|
||||||
|
if not content or not content.strip():
|
||||||
|
return "内容为空,无法存储。"
|
||||||
|
|
||||||
|
importance = max(1, min(10, importance))
|
||||||
|
extra_tags = [t.strip() for t in tags.split(",") if t.strip()]
|
||||||
|
|
||||||
|
# --- Step 1: auto-tagging / 自动打标 ---
|
||||||
|
try:
|
||||||
|
analysis = await dehydrator.analyze(content)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Auto-tagging failed, using defaults / 自动打标失败: {e}")
|
||||||
|
analysis = {
|
||||||
|
"domain": ["未分类"], "valence": 0.5, "arousal": 0.3,
|
||||||
|
"tags": [], "suggested_name": "",
|
||||||
|
}
|
||||||
|
|
||||||
|
domain = analysis["domain"]
|
||||||
|
valence = analysis["valence"]
|
||||||
|
arousal = analysis["arousal"]
|
||||||
|
auto_tags = analysis["tags"]
|
||||||
|
suggested_name = analysis.get("suggested_name", "")
|
||||||
|
|
||||||
|
all_tags = list(dict.fromkeys(auto_tags + extra_tags))
|
||||||
|
|
||||||
|
# --- Pinned buckets bypass merge and are created directly in permanent dir ---
|
||||||
|
# --- 钉选桶跳过合并,直接新建到 permanent 目录 ---
|
||||||
|
if pinned:
|
||||||
|
bucket_id = await bucket_mgr.create(
|
||||||
|
content=content,
|
||||||
|
tags=all_tags,
|
||||||
|
importance=10,
|
||||||
|
domain=domain,
|
||||||
|
valence=valence,
|
||||||
|
arousal=arousal,
|
||||||
|
name=suggested_name or None,
|
||||||
|
bucket_type="permanent",
|
||||||
|
pinned=True,
|
||||||
|
)
|
||||||
|
return f"📌钉选→{bucket_id} {','.join(domain)}"
|
||||||
|
|
||||||
|
# --- Step 2: merge or create / 合并或新建 ---
|
||||||
|
result_name, is_merged = await _merge_or_create(
|
||||||
|
content=content,
|
||||||
|
tags=all_tags,
|
||||||
|
importance=importance,
|
||||||
|
domain=domain,
|
||||||
|
valence=valence,
|
||||||
|
arousal=arousal,
|
||||||
|
name=suggested_name,
|
||||||
|
)
|
||||||
|
|
||||||
|
action = "合并→" if is_merged else "新建→"
|
||||||
|
return f"{action}{result_name} {','.join(domain)}"
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================
|
||||||
|
# Tool 3: grow — Grow, fragments become memories
|
||||||
|
# 工具 3:grow — 生长,一天的碎片长成记忆
|
||||||
|
# =============================================================
|
||||||
|
@mcp.tool()
|
||||||
|
async def grow(content: str) -> str:
|
||||||
|
"""日记归档,自动拆分为多桶。短内容(<30字)走快速路径。"""
|
||||||
|
await decay_engine.ensure_started()
|
||||||
|
|
||||||
|
if not content or not content.strip():
|
||||||
|
return "内容为空,无法整理。"
|
||||||
|
|
||||||
|
# --- Short content fast path: skip digest, use hold logic directly ---
|
||||||
|
# --- 短内容快速路径:跳过 digest 拆分,直接走 hold 逻辑省一次 API ---
|
||||||
|
# For very short inputs (like "1"), calling digest is wasteful:
|
||||||
|
# it sends the full DIGEST_PROMPT (~800 tokens) to DeepSeek for nothing.
|
||||||
|
# Instead, run analyze + create directly.
|
||||||
|
if len(content.strip()) < 30:
|
||||||
|
logger.info(f"grow short-content fast path: {len(content.strip())} chars")
|
||||||
|
try:
|
||||||
|
analysis = await dehydrator.analyze(content)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Fast-path analyze failed / 快速路径打标失败: {e}")
|
||||||
|
analysis = {
|
||||||
|
"domain": ["未分类"], "valence": 0.5, "arousal": 0.3,
|
||||||
|
"tags": [], "suggested_name": "",
|
||||||
|
}
|
||||||
|
result_name, is_merged = await _merge_or_create(
|
||||||
|
content=content.strip(),
|
||||||
|
tags=analysis.get("tags", []),
|
||||||
|
importance=analysis.get("importance", 5) if isinstance(analysis.get("importance"), int) else 5,
|
||||||
|
domain=analysis.get("domain", ["未分类"]),
|
||||||
|
valence=analysis.get("valence", 0.5),
|
||||||
|
arousal=analysis.get("arousal", 0.3),
|
||||||
|
name=analysis.get("suggested_name", ""),
|
||||||
|
)
|
||||||
|
action = "合并" if is_merged else "新建"
|
||||||
|
return f"{action} → {result_name} | {','.join(analysis.get('domain', []))} V{analysis.get('valence', 0.5):.1f}/A{analysis.get('arousal', 0.3):.1f}"
|
||||||
|
|
||||||
|
# --- Step 1: let API split and organize / 让 API 拆分整理 ---
|
||||||
|
try:
|
||||||
|
items = await dehydrator.digest(content)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Diary digest failed / 日记整理失败: {e}")
|
||||||
|
return f"日记整理失败: {e}"
|
||||||
|
|
||||||
|
if not items:
|
||||||
|
return "内容为空或整理失败。"
|
||||||
|
|
||||||
|
results = []
|
||||||
|
created = 0
|
||||||
|
merged = 0
|
||||||
|
|
||||||
|
# --- Step 2: merge or create each item (with per-item error handling) ---
|
||||||
|
# --- 逐条合并或新建(单条失败不影响其他)---
|
||||||
|
for item in items:
|
||||||
|
try:
|
||||||
|
result_name, is_merged = await _merge_or_create(
|
||||||
|
content=item["content"],
|
||||||
|
tags=item.get("tags", []),
|
||||||
|
importance=item.get("importance", 5),
|
||||||
|
domain=item.get("domain", ["未分类"]),
|
||||||
|
valence=item.get("valence", 0.5),
|
||||||
|
arousal=item.get("arousal", 0.3),
|
||||||
|
name=item.get("name", ""),
|
||||||
|
)
|
||||||
|
|
||||||
|
if is_merged:
|
||||||
|
results.append(f"📎{result_name}")
|
||||||
|
merged += 1
|
||||||
|
else:
|
||||||
|
results.append(f"📝{item.get('name', result_name)}")
|
||||||
|
created += 1
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(
|
||||||
|
f"Failed to process diary item / 日记条目处理失败: "
|
||||||
|
f"{item.get('name', '?')}: {e}"
|
||||||
|
)
|
||||||
|
results.append(f"⚠️{item.get('name', '?')}")
|
||||||
|
|
||||||
|
return f"{len(items)}条|新{created}合{merged}\n" + "\n".join(results)
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================
|
||||||
|
# Tool 4: trace — Trace, redraw the outline of a memory
|
||||||
|
# 工具 4:trace — 描摹,重新勾勒记忆的轮廓
|
||||||
|
# Also handles deletion (delete=True)
|
||||||
|
# 同时承接删除功能
|
||||||
|
# =============================================================
|
||||||
|
@mcp.tool()
|
||||||
|
async def trace(
|
||||||
|
bucket_id: str,
|
||||||
|
name: str = "",
|
||||||
|
domain: str = "",
|
||||||
|
valence: float = -1,
|
||||||
|
arousal: float = -1,
|
||||||
|
importance: int = -1,
|
||||||
|
tags: str = "",
|
||||||
|
resolved: int = -1,
|
||||||
|
pinned: int = -1,
|
||||||
|
delete: bool = False,
|
||||||
|
) -> str:
|
||||||
|
"""修改记忆元数据。resolved=1沉底/0激活,pinned=1钉选/0取消,delete=True删除。只传需改的,-1或空=不改。"""
|
||||||
|
|
||||||
|
if not bucket_id or not bucket_id.strip():
|
||||||
|
return "请提供有效的 bucket_id。"
|
||||||
|
|
||||||
|
# --- Delete mode / 删除模式 ---
|
||||||
|
if delete:
|
||||||
|
success = await bucket_mgr.delete(bucket_id)
|
||||||
|
return f"已遗忘记忆桶: {bucket_id}" if success else f"未找到记忆桶: {bucket_id}"
|
||||||
|
|
||||||
|
bucket = await bucket_mgr.get(bucket_id)
|
||||||
|
if not bucket:
|
||||||
|
return f"未找到记忆桶: {bucket_id}"
|
||||||
|
|
||||||
|
# --- Collect only fields actually passed / 只收集用户实际传入的字段 ---
|
||||||
|
updates = {}
|
||||||
|
if name:
|
||||||
|
updates["name"] = name
|
||||||
|
if domain:
|
||||||
|
updates["domain"] = [d.strip() for d in domain.split(",") if d.strip()]
|
||||||
|
if 0 <= valence <= 1:
|
||||||
|
updates["valence"] = valence
|
||||||
|
if 0 <= arousal <= 1:
|
||||||
|
updates["arousal"] = arousal
|
||||||
|
if 1 <= importance <= 10:
|
||||||
|
updates["importance"] = importance
|
||||||
|
if tags:
|
||||||
|
updates["tags"] = [t.strip() for t in tags.split(",") if t.strip()]
|
||||||
|
if resolved in (0, 1):
|
||||||
|
updates["resolved"] = bool(resolved)
|
||||||
|
if pinned in (0, 1):
|
||||||
|
updates["pinned"] = bool(pinned)
|
||||||
|
if pinned == 1:
|
||||||
|
updates["importance"] = 10 # pinned → lock importance
|
||||||
|
|
||||||
|
if not updates:
|
||||||
|
return "没有任何字段需要修改。"
|
||||||
|
|
||||||
|
success = await bucket_mgr.update(bucket_id, **updates)
|
||||||
|
if not success:
|
||||||
|
return f"修改失败: {bucket_id}"
|
||||||
|
|
||||||
|
changed = ", ".join(f"{k}={v}" for k, v in updates.items())
|
||||||
|
# Explicit hint about resolved state change semantics
|
||||||
|
# 特别提示 resolved 状态变化的语义
|
||||||
|
if "resolved" in updates:
|
||||||
|
if updates["resolved"]:
|
||||||
|
changed += " → 已沉底,只在关键词触发时重新浮现"
|
||||||
|
else:
|
||||||
|
changed += " → 已重新激活,将参与浮现排序"
|
||||||
|
return f"已修改记忆桶 {bucket_id}: {changed}"
|
||||||
|
|
||||||
|
|
||||||
|
# =============================================================
|
||||||
|
# Tool 5: pulse — Heartbeat, system status + memory listing
|
||||||
|
# 工具 5:pulse — 脉搏,系统状态 + 记忆列表
|
||||||
|
# =============================================================
|
||||||
|
@mcp.tool()
|
||||||
|
async def pulse(include_archive: bool = False) -> str:
|
||||||
|
"""系统状态+记忆桶列表。include_archive=True含归档。"""
|
||||||
|
try:
|
||||||
|
stats = await bucket_mgr.get_stats()
|
||||||
|
except Exception as e:
|
||||||
|
return f"获取系统状态失败: {e}"
|
||||||
|
|
||||||
|
status = (
|
||||||
|
f"=== Ombre Brain 记忆系统 ===\n"
|
||||||
|
f"固化记忆桶: {stats['permanent_count']} 个\n"
|
||||||
|
f"动态记忆桶: {stats['dynamic_count']} 个\n"
|
||||||
|
f"归档记忆桶: {stats['archive_count']} 个\n"
|
||||||
|
f"总存储大小: {stats['total_size_kb']:.1f} KB\n"
|
||||||
|
f"衰减引擎: {'运行中' if decay_engine.is_running else '已停止'}\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- List all bucket summaries / 列出所有桶摘要 ---
|
||||||
|
try:
|
||||||
|
buckets = await bucket_mgr.list_all(include_archive=include_archive)
|
||||||
|
except Exception as e:
|
||||||
|
return status + f"\n列出记忆桶失败: {e}"
|
||||||
|
|
||||||
|
if not buckets:
|
||||||
|
return status + "\n记忆库为空。"
|
||||||
|
|
||||||
|
lines = []
|
||||||
|
for b in buckets:
|
||||||
|
meta = b.get("metadata", {})
|
||||||
|
if meta.get("pinned") or meta.get("protected"):
|
||||||
|
icon = "📌"
|
||||||
|
elif meta.get("type") == "permanent":
|
||||||
|
icon = "📦"
|
||||||
|
elif meta.get("type") == "archived":
|
||||||
|
icon = "🗄️"
|
||||||
|
elif meta.get("resolved", False):
|
||||||
|
icon = "✅"
|
||||||
|
else:
|
||||||
|
icon = "💭"
|
||||||
|
try:
|
||||||
|
score = decay_engine.calculate_score(meta)
|
||||||
|
except Exception:
|
||||||
|
score = 0.0
|
||||||
|
domains = ",".join(meta.get("domain", []))
|
||||||
|
val = meta.get("valence", 0.5)
|
||||||
|
aro = meta.get("arousal", 0.3)
|
||||||
|
resolved_tag = " [已解决]" if meta.get("resolved", False) else ""
|
||||||
|
lines.append(
|
||||||
|
f"{icon} [{meta.get('name', b['id'])}]{resolved_tag} "
|
||||||
|
f"主题:{domains} "
|
||||||
|
f"情感:V{val:.1f}/A{aro:.1f} "
|
||||||
|
f"重要:{meta.get('importance', '?')} "
|
||||||
|
f"权重:{score:.2f} "
|
||||||
|
f"标签:{','.join(meta.get('tags', []))}"
|
||||||
|
)
|
||||||
|
|
||||||
|
return status + "\n=== 记忆列表 ===\n" + "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
# --- Entry point / 启动入口 ---
|
||||||
|
if __name__ == "__main__":
|
||||||
|
transport = config.get("transport", "stdio")
|
||||||
|
logger.info(f"Ombre Brain starting | transport: {transport}")
|
||||||
|
|
||||||
|
if transport in ("sse", "streamable-http"):
|
||||||
|
import threading
|
||||||
|
import uvicorn
|
||||||
|
from starlette.middleware.cors import CORSMiddleware
|
||||||
|
|
||||||
|
# --- Application-level keepalive: ping /health every 60s ---
|
||||||
|
# --- 应用层保活:每 60 秒 ping 一次 /health,防止 Cloudflare Tunnel 空闲断连 ---
|
||||||
|
async def _keepalive_loop():
|
||||||
|
await asyncio.sleep(10) # Wait for server to fully start
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
await client.get("http://localhost:8000/health", timeout=5)
|
||||||
|
logger.debug("Keepalive ping OK / 保活 ping 成功")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Keepalive ping failed / 保活 ping 失败: {e}")
|
||||||
|
await asyncio.sleep(60)
|
||||||
|
|
||||||
|
def _start_keepalive():
|
||||||
|
loop = asyncio.new_event_loop()
|
||||||
|
loop.run_until_complete(_keepalive_loop())
|
||||||
|
|
||||||
|
t = threading.Thread(target=_start_keepalive, daemon=True)
|
||||||
|
t.start()
|
||||||
|
|
||||||
|
# --- Add CORS middleware so remote clients (Cloudflare Tunnel / ngrok) can connect ---
|
||||||
|
# --- 添加 CORS 中间件,让远程客户端(Cloudflare Tunnel / ngrok)能正常连接 ---
|
||||||
|
if transport == "streamable-http":
|
||||||
|
_app = mcp.streamable_http_app()
|
||||||
|
else:
|
||||||
|
_app = mcp.sse_app()
|
||||||
|
_app.add_middleware(
|
||||||
|
CORSMiddleware,
|
||||||
|
allow_origins=["*"],
|
||||||
|
allow_methods=["*"],
|
||||||
|
allow_headers=["*"],
|
||||||
|
expose_headers=["*"],
|
||||||
|
)
|
||||||
|
logger.info("CORS middleware enabled for remote transport / 已启用 CORS 中间件")
|
||||||
|
uvicorn.run(_app, host="0.0.0.0", port=8000)
|
||||||
|
else:
|
||||||
|
mcp.run(transport=transport)
|
||||||
126
test_smoke.py
Normal file
126
test_smoke.py
Normal file
@@ -0,0 +1,126 @@
|
|||||||
|
"""Ombre Brain 冒烟测试:验证核心功能链路"""
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
|
||||||
|
# 确保模块路径
|
||||||
|
import sys
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
|
||||||
|
from utils import load_config, setup_logging
|
||||||
|
from bucket_manager import BucketManager
|
||||||
|
from dehydrator import Dehydrator
|
||||||
|
from decay_engine import DecayEngine
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
config = load_config()
|
||||||
|
setup_logging("INFO")
|
||||||
|
bm = BucketManager(config)
|
||||||
|
dh = Dehydrator(config)
|
||||||
|
de = DecayEngine(config, bm)
|
||||||
|
|
||||||
|
print(f"API available: {dh.api_available}")
|
||||||
|
print(f"base_url: {dh.base_url}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
# ===== 1. 自动打标 =====
|
||||||
|
print("=== 1. analyze (自动打标) ===")
|
||||||
|
try:
|
||||||
|
result = await dh.analyze("今天学了 Python 的 asyncio,感觉收获很大,心情不错")
|
||||||
|
print(f" domain: {result['domain']}")
|
||||||
|
print(f" valence: {result['valence']}, arousal: {result['arousal']}")
|
||||||
|
print(f" tags: {result['tags']}")
|
||||||
|
print(" [OK]")
|
||||||
|
except Exception as e:
|
||||||
|
print(f" [FAIL] {e}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
# ===== 2. 建桶 =====
|
||||||
|
print("=== 2. create (建桶) ===")
|
||||||
|
try:
|
||||||
|
bid = await bm.create(
|
||||||
|
content="P酱喜欢猫,家里养了一只橘猫叫小橘",
|
||||||
|
tags=["猫", "宠物"],
|
||||||
|
importance=7,
|
||||||
|
domain=["生活"],
|
||||||
|
valence=0.8,
|
||||||
|
arousal=0.4,
|
||||||
|
)
|
||||||
|
print(f" bucket_id: {bid}")
|
||||||
|
print(" [OK]")
|
||||||
|
except Exception as e:
|
||||||
|
print(f" [FAIL] {e}")
|
||||||
|
return
|
||||||
|
print()
|
||||||
|
|
||||||
|
# ===== 3. 搜索 =====
|
||||||
|
print("=== 3. search (检索) ===")
|
||||||
|
try:
|
||||||
|
hits = await bm.search("猫", limit=3)
|
||||||
|
print(f" found {len(hits)} results")
|
||||||
|
for h in hits:
|
||||||
|
name = h["metadata"].get("name", h["id"])
|
||||||
|
print(f" - {name} (score={h['score']:.1f})")
|
||||||
|
print(" [OK]")
|
||||||
|
except Exception as e:
|
||||||
|
print(f" [FAIL] {e}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
# ===== 4. 脱水压缩 =====
|
||||||
|
print("=== 4. dehydrate (脱水压缩) ===")
|
||||||
|
try:
|
||||||
|
text = (
|
||||||
|
"这是一段很长的内容用来测试脱水功能。"
|
||||||
|
"P酱今天去了咖啡厅,点了一杯拿铁,然后坐在窗边看书看了两个小时。"
|
||||||
|
"期间遇到了一个朋友,聊了聊最近的工作情况。回家之后写了会代码。"
|
||||||
|
)
|
||||||
|
summary = await dh.dehydrate(text, {})
|
||||||
|
print(f" summary: {summary[:120]}...")
|
||||||
|
print(" [OK]")
|
||||||
|
except Exception as e:
|
||||||
|
print(f" [FAIL] {e}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
# ===== 5. 衰减评分 =====
|
||||||
|
print("=== 5. decay score (衰减评分) ===")
|
||||||
|
try:
|
||||||
|
bucket = await bm.get(bid)
|
||||||
|
score = de.calculate_score(bucket["metadata"])
|
||||||
|
print(f" score: {score:.3f}")
|
||||||
|
print(" [OK]")
|
||||||
|
except Exception as e:
|
||||||
|
print(f" [FAIL] {e}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
# ===== 6. 日记整理 =====
|
||||||
|
print("=== 6. digest (日记整理) ===")
|
||||||
|
try:
|
||||||
|
diary = (
|
||||||
|
"今天上午写了个 Python 脚本处理数据,下午和朋友去吃了火锅很开心,"
|
||||||
|
"晚上失眠了有点焦虑,想了想明天的面试。"
|
||||||
|
)
|
||||||
|
items = await dh.digest(diary)
|
||||||
|
print(f" 拆分出 {len(items)} 条记忆:")
|
||||||
|
for it in items:
|
||||||
|
print(f" - [{it.get('name','')}] domain={it['domain']} V{it['valence']:.1f}/A{it['arousal']:.1f}")
|
||||||
|
print(" [OK]")
|
||||||
|
except Exception as e:
|
||||||
|
print(f" [FAIL] {e}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
# ===== 7. 清理测试数据 =====
|
||||||
|
print("=== 7. cleanup (删除测试桶) ===")
|
||||||
|
try:
|
||||||
|
ok = await bm.delete(bid)
|
||||||
|
print(f" deleted: {ok}")
|
||||||
|
print(" [OK]")
|
||||||
|
except Exception as e:
|
||||||
|
print(f" [FAIL] {e}")
|
||||||
|
print()
|
||||||
|
|
||||||
|
print("=" * 40)
|
||||||
|
print("冒烟测试完成!")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
159
test_tools.py
Normal file
159
test_tools.py
Normal file
@@ -0,0 +1,159 @@
|
|||||||
|
"""Ombre Brain MCP tool-level end-to-end test: direct calls to @mcp.tool() functions
|
||||||
|
Ombre Brain MCP 工具层端到端测试:直接调用 @mcp.tool() 函数"""
|
||||||
|
import asyncio
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||||
|
|
||||||
|
from utils import load_config, setup_logging
|
||||||
|
|
||||||
|
config = load_config()
|
||||||
|
setup_logging("INFO")
|
||||||
|
|
||||||
|
# Must import after config is set, since server.py does module-level init
|
||||||
|
# 必须在配置好后导入,因为 server.py 有模块级初始化
|
||||||
|
from server import breath, hold, trace, pulse, grow
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
passed = 0
|
||||||
|
failed = 0
|
||||||
|
|
||||||
|
# ===== pulse =====
|
||||||
|
print("=== [1/6] pulse ===")
|
||||||
|
try:
|
||||||
|
r = await pulse()
|
||||||
|
assert "Ombre Brain" in r
|
||||||
|
print(f" {r.splitlines()[0]}")
|
||||||
|
print(" [OK]")
|
||||||
|
passed += 1
|
||||||
|
except Exception as e:
|
||||||
|
print(f" [FAIL] {e}")
|
||||||
|
failed += 1
|
||||||
|
print()
|
||||||
|
|
||||||
|
# ===== hold =====
|
||||||
|
print("=== [2/6] hold ===")
|
||||||
|
try:
|
||||||
|
r = await hold(content="P酱最喜欢的编程语言是 Python,喜欢用 FastAPI 写后端", tags="编程,偏好", importance=8)
|
||||||
|
print(f" {r.splitlines()[0]}")
|
||||||
|
assert any(kw in r for kw in ["新建", "合并", "📌"])
|
||||||
|
print(" [OK]")
|
||||||
|
passed += 1
|
||||||
|
except Exception as e:
|
||||||
|
print(f" [FAIL] {e}")
|
||||||
|
failed += 1
|
||||||
|
print()
|
||||||
|
|
||||||
|
# ===== hold (merge test / 合并测试) =====
|
||||||
|
print("=== [2b/6] hold (合并测试) ===")
|
||||||
|
try:
|
||||||
|
r = await hold(content="P酱也喜欢用 Python 写爬虫和数据分析", tags="编程", importance=6)
|
||||||
|
print(f" {r.splitlines()[0]}")
|
||||||
|
print(" [OK]")
|
||||||
|
passed += 1
|
||||||
|
except Exception as e:
|
||||||
|
print(f" [FAIL] {e}")
|
||||||
|
failed += 1
|
||||||
|
print()
|
||||||
|
|
||||||
|
# ===== breath =====
|
||||||
|
print("=== [3/6] breath ===")
|
||||||
|
try:
|
||||||
|
r = await breath(query="Python 编程", max_results=3)
|
||||||
|
print(f" 结果前80字: {r[:80]}...")
|
||||||
|
assert "未找到" not in r
|
||||||
|
print(" [OK]")
|
||||||
|
passed += 1
|
||||||
|
except Exception as e:
|
||||||
|
print(f" [FAIL] {e}")
|
||||||
|
failed += 1
|
||||||
|
print()
|
||||||
|
|
||||||
|
# ===== breath (emotion resonance / 情感共鸣) =====
|
||||||
|
print("=== [3b/6] breath (情感共鸣检索) ===")
|
||||||
|
try:
|
||||||
|
r = await breath(query="编程", domain="编程", valence=0.8, arousal=0.5)
|
||||||
|
print(f" 结果前80字: {r[:80]}...")
|
||||||
|
print(" [OK]")
|
||||||
|
passed += 1
|
||||||
|
except Exception as e:
|
||||||
|
print(f" [FAIL] {e}")
|
||||||
|
failed += 1
|
||||||
|
print()
|
||||||
|
|
||||||
|
# --- Get a bucket ID for subsequent tests / 取一个桶 ID 用于后续测试 ---
|
||||||
|
bucket_id = None
|
||||||
|
from bucket_manager import BucketManager
|
||||||
|
bm = BucketManager(config)
|
||||||
|
all_buckets = await bm.list_all()
|
||||||
|
if all_buckets:
|
||||||
|
bucket_id = all_buckets[0]["id"]
|
||||||
|
|
||||||
|
# ===== trace =====
|
||||||
|
print("=== [4/6] trace ===")
|
||||||
|
if bucket_id:
|
||||||
|
try:
|
||||||
|
r = await trace(bucket_id=bucket_id, domain="编程,创作", importance=9)
|
||||||
|
print(f" {r}")
|
||||||
|
assert "已修改" in r
|
||||||
|
print(" [OK]")
|
||||||
|
passed += 1
|
||||||
|
except Exception as e:
|
||||||
|
print(f" [FAIL] {e}")
|
||||||
|
failed += 1
|
||||||
|
else:
|
||||||
|
print(" [SKIP] 没有可编辑的桶")
|
||||||
|
print()
|
||||||
|
|
||||||
|
# ===== grow =====
|
||||||
|
print("=== [5/6] grow ===")
|
||||||
|
try:
|
||||||
|
diary = (
|
||||||
|
"今天早上复习了线性代数,搞懂了特征值分解。"
|
||||||
|
"中午和室友去吃了拉面,聊了聊暑假实习的事。"
|
||||||
|
"下午写了一个 Flask 项目的 API 接口。"
|
||||||
|
"晚上看了部电影叫《星际穿越》,被结尾感动哭了。"
|
||||||
|
)
|
||||||
|
r = await grow(content=diary)
|
||||||
|
print(f" {r.splitlines()[0]}")
|
||||||
|
for line in r.splitlines()[1:]:
|
||||||
|
if line.strip():
|
||||||
|
print(f" {line}")
|
||||||
|
assert "条|新" in r or "整理" in r
|
||||||
|
print(" [OK]")
|
||||||
|
passed += 1
|
||||||
|
except Exception as e:
|
||||||
|
print(f" [FAIL] {e}")
|
||||||
|
failed += 1
|
||||||
|
print()
|
||||||
|
|
||||||
|
# ===== cleanup via trace(delete=True) / 清理测试数据 =====
|
||||||
|
print("=== [6/6] cleanup (清理全部测试数据) ===")
|
||||||
|
try:
|
||||||
|
all_buckets = await bm.list_all()
|
||||||
|
for b in all_buckets:
|
||||||
|
r = await trace(bucket_id=b["id"], delete=True)
|
||||||
|
print(f" {r}")
|
||||||
|
print(" [OK]")
|
||||||
|
passed += 1
|
||||||
|
except Exception as e:
|
||||||
|
print(f" [FAIL] {e}")
|
||||||
|
failed += 1
|
||||||
|
print()
|
||||||
|
|
||||||
|
# ===== Confirm cleanup / 确认清理干净 =====
|
||||||
|
final = await pulse()
|
||||||
|
print(f"清理后: {final.splitlines()[0]}")
|
||||||
|
print()
|
||||||
|
print("=" * 50)
|
||||||
|
print(f"MCP tool test complete / 工具测试完成: {passed} passed / {failed} failed")
|
||||||
|
if failed == 0:
|
||||||
|
print("All passed ✓")
|
||||||
|
else:
|
||||||
|
print(f"{failed} failed ✗")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
204
utils.py
Normal file
204
utils.py
Normal file
@@ -0,0 +1,204 @@
|
|||||||
|
# ============================================================
|
||||||
|
# Module: Common Utilities (utils.py)
|
||||||
|
# 模块:通用工具函数
|
||||||
|
#
|
||||||
|
# Provides config loading, logging init, path safety, ID generation, etc.
|
||||||
|
# 提供配置加载、日志初始化、路径安全校验、ID 生成等基础能力
|
||||||
|
#
|
||||||
|
# Depended on by: server.py, bucket_manager.py, dehydrator.py, decay_engine.py
|
||||||
|
# 被谁依赖:server.py, bucket_manager.py, dehydrator.py, decay_engine.py
|
||||||
|
# ============================================================
|
||||||
|
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import uuid
|
||||||
|
import yaml
|
||||||
|
import logging
|
||||||
|
from pathlib import Path
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
|
||||||
|
def load_config(config_path: str = None) -> dict:
|
||||||
|
"""
|
||||||
|
Load configuration file.
|
||||||
|
加载配置文件。
|
||||||
|
|
||||||
|
Priority: environment variables > config.yaml > built-in defaults.
|
||||||
|
优先级:环境变量 > config.yaml > 内置默认值。
|
||||||
|
"""
|
||||||
|
# --- Built-in defaults (fallback so it runs even without config.yaml) ---
|
||||||
|
# --- 内置默认配置(兜底,保证即使没有 config.yaml 也能跑)---
|
||||||
|
defaults = {
|
||||||
|
"transport": "stdio",
|
||||||
|
"log_level": "INFO",
|
||||||
|
"buckets_dir": os.path.join(os.path.dirname(os.path.abspath(__file__)), "buckets"),
|
||||||
|
"merge_threshold": 75,
|
||||||
|
"dehydration": {
|
||||||
|
"model": "deepseek-chat",
|
||||||
|
"base_url": "https://api.deepseek.com/v1",
|
||||||
|
"api_key": "",
|
||||||
|
"max_tokens": 1024,
|
||||||
|
"temperature": 0.1,
|
||||||
|
},
|
||||||
|
"decay": {
|
||||||
|
"lambda": 0.05,
|
||||||
|
"threshold": 0.3,
|
||||||
|
"check_interval_hours": 24,
|
||||||
|
"emotion_weights": {
|
||||||
|
"base": 1.0,
|
||||||
|
"arousal_boost": 0.8,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
"matching": {
|
||||||
|
"fuzzy_threshold": 50,
|
||||||
|
"max_results": 5,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
# --- Load user config from YAML file ---
|
||||||
|
# --- 从 YAML 文件加载用户自定义配置 ---
|
||||||
|
if config_path is None:
|
||||||
|
config_path = os.path.join(
|
||||||
|
os.path.dirname(os.path.abspath(__file__)), "config.yaml"
|
||||||
|
)
|
||||||
|
|
||||||
|
config = defaults.copy()
|
||||||
|
if os.path.exists(config_path):
|
||||||
|
try:
|
||||||
|
with open(config_path, "r", encoding="utf-8") as f:
|
||||||
|
file_config = yaml.safe_load(f) or {}
|
||||||
|
if isinstance(file_config, dict):
|
||||||
|
config = _deep_merge(defaults, file_config)
|
||||||
|
else:
|
||||||
|
logging.warning(
|
||||||
|
f"Config file is not a valid YAML dict, using defaults / "
|
||||||
|
f"配置文件不是有效的 YAML 字典,使用默认配置: {config_path}"
|
||||||
|
)
|
||||||
|
except yaml.YAMLError as e:
|
||||||
|
logging.warning(
|
||||||
|
f"Failed to parse config file, using defaults / "
|
||||||
|
f"配置文件解析失败,使用默认配置: {e}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- Environment variable overrides (highest priority) ---
|
||||||
|
# --- 环境变量覆盖敏感/运行时配置(优先级最高)---
|
||||||
|
env_api_key = os.environ.get("OMBRE_API_KEY", "")
|
||||||
|
if env_api_key:
|
||||||
|
config.setdefault("dehydration", {})["api_key"] = env_api_key
|
||||||
|
|
||||||
|
env_base_url = os.environ.get("OMBRE_BASE_URL", "")
|
||||||
|
if env_base_url:
|
||||||
|
config.setdefault("dehydration", {})["base_url"] = env_base_url
|
||||||
|
|
||||||
|
env_transport = os.environ.get("OMBRE_TRANSPORT", "")
|
||||||
|
if env_transport:
|
||||||
|
config["transport"] = env_transport
|
||||||
|
|
||||||
|
env_buckets_dir = os.environ.get("OMBRE_BUCKETS_DIR", "")
|
||||||
|
if env_buckets_dir:
|
||||||
|
config["buckets_dir"] = env_buckets_dir
|
||||||
|
|
||||||
|
# --- Ensure bucket storage directories exist ---
|
||||||
|
# --- 确保记忆桶存储目录存在 ---
|
||||||
|
buckets_dir = config["buckets_dir"]
|
||||||
|
for subdir in ["permanent", "dynamic", "archive"]:
|
||||||
|
os.makedirs(os.path.join(buckets_dir, subdir), exist_ok=True)
|
||||||
|
|
||||||
|
return config
|
||||||
|
|
||||||
|
|
||||||
|
def _deep_merge(base: dict, override: dict) -> dict:
|
||||||
|
"""
|
||||||
|
Deep-merge two dicts; override values take precedence.
|
||||||
|
深度合并两个字典,override 的值覆盖 base。
|
||||||
|
"""
|
||||||
|
result = base.copy()
|
||||||
|
for key, value in override.items():
|
||||||
|
if key in result and isinstance(result[key], dict) and isinstance(value, dict):
|
||||||
|
result[key] = _deep_merge(result[key], value)
|
||||||
|
else:
|
||||||
|
result[key] = value
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def setup_logging(level: str = "INFO") -> None:
|
||||||
|
"""
|
||||||
|
Initialize logging system.
|
||||||
|
初始化日志系统。
|
||||||
|
|
||||||
|
Note: In MCP stdio mode, stdout is occupied by the protocol;
|
||||||
|
logs must go to stderr.
|
||||||
|
注意:MCP stdio 模式下 stdout 被协议占用,日志只能走 stderr。
|
||||||
|
"""
|
||||||
|
log_level = getattr(logging, level.upper(), None)
|
||||||
|
if not isinstance(log_level, int):
|
||||||
|
log_level = logging.INFO
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
level=log_level,
|
||||||
|
format="[%(asctime)s] %(name)s %(levelname)s: %(message)s",
|
||||||
|
datefmt="%Y-%m-%d %H:%M:%S",
|
||||||
|
handlers=[logging.StreamHandler()], # StreamHandler defaults to stderr
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def generate_bucket_id() -> str:
|
||||||
|
"""
|
||||||
|
Generate a unique bucket ID (12-char short UUID for readability).
|
||||||
|
生成唯一的记忆桶 ID(12 位短 UUID,方便人类阅读)。
|
||||||
|
"""
|
||||||
|
return uuid.uuid4().hex[:12]
|
||||||
|
|
||||||
|
|
||||||
|
def sanitize_name(name: str) -> str:
|
||||||
|
"""
|
||||||
|
Sanitize bucket name, keeping only safe characters.
|
||||||
|
Prevents path traversal attacks (e.g. ../../etc/passwd).
|
||||||
|
清洗桶名称,只保留安全字符。防止路径遍历攻击。
|
||||||
|
"""
|
||||||
|
if not isinstance(name, str):
|
||||||
|
return "unnamed"
|
||||||
|
cleaned = re.sub(r"[^\w\s\u4e00-\u9fff-]", "", name, flags=re.UNICODE)
|
||||||
|
cleaned = cleaned.strip()[:80]
|
||||||
|
return cleaned if cleaned else "unnamed"
|
||||||
|
|
||||||
|
|
||||||
|
def safe_path(base_dir: str, filename: str) -> Path:
|
||||||
|
"""
|
||||||
|
Construct a safe file path, ensuring it stays within base_dir.
|
||||||
|
Prevents directory traversal.
|
||||||
|
构造安全的文件路径,确保最终路径始终在 base_dir 内部。
|
||||||
|
"""
|
||||||
|
base = Path(base_dir).resolve()
|
||||||
|
target = (base / filename).resolve()
|
||||||
|
if not str(target).startswith(str(base)):
|
||||||
|
raise ValueError(
|
||||||
|
f"Path safety check failed / 路径安全检查失败: "
|
||||||
|
f"{target} is not inside / 不在 {base} 内"
|
||||||
|
)
|
||||||
|
return target
|
||||||
|
|
||||||
|
|
||||||
|
def count_tokens_approx(text: str) -> int:
|
||||||
|
"""
|
||||||
|
Rough token count estimate.
|
||||||
|
粗略估算 token 数。
|
||||||
|
|
||||||
|
Chinese ≈ 1 char = 1.5 tokens, English ≈ 1 word = 1.3 tokens.
|
||||||
|
Used to decide whether dehydration is needed; precision not required.
|
||||||
|
中文 ≈ 1字=1.5token,英文 ≈ 1词=1.3token。
|
||||||
|
用于判断是否需要脱水压缩,不追求精确。
|
||||||
|
"""
|
||||||
|
if not text:
|
||||||
|
return 0
|
||||||
|
chinese_chars = len(re.findall(r"[\u4e00-\u9fff]", text))
|
||||||
|
english_words = len(re.findall(r"[a-zA-Z]+", text))
|
||||||
|
return int(chinese_chars * 1.5 + english_words * 1.3 + len(text) * 0.05)
|
||||||
|
|
||||||
|
|
||||||
|
def now_iso() -> str:
|
||||||
|
"""
|
||||||
|
Return current time as ISO format string.
|
||||||
|
返回当前时间的 ISO 格式字符串。
|
||||||
|
"""
|
||||||
|
return datetime.now().isoformat(timespec="seconds")
|
||||||
101
write_memory.py
Normal file
101
write_memory.py
Normal file
@@ -0,0 +1,101 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
"""
|
||||||
|
Ombre Brain 手动记忆写入工具
|
||||||
|
用途:在 Copilot 端直接写入记忆文件,绕过 MCP 和 API 调用
|
||||||
|
用法:
|
||||||
|
python3 write_memory.py --name "记忆名" --content "内容" --domain "情感" --tags "标签1,标签2"
|
||||||
|
或交互模式:python3 write_memory.py
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import uuid
|
||||||
|
import argparse
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
VAULT_DIR = os.path.expanduser("~/Documents/Obsidian Vault/Ombre Brain/dynamic")
|
||||||
|
|
||||||
|
|
||||||
|
def gen_id():
|
||||||
|
return uuid.uuid4().hex[:12]
|
||||||
|
|
||||||
|
|
||||||
|
def write_memory(
|
||||||
|
name: str,
|
||||||
|
content: str,
|
||||||
|
domain: list[str],
|
||||||
|
tags: list[str],
|
||||||
|
importance: int = 7,
|
||||||
|
valence: float = 0.5,
|
||||||
|
arousal: float = 0.3,
|
||||||
|
):
|
||||||
|
mid = gen_id()
|
||||||
|
now = datetime.now().strftime("%Y-%m-%dT%H:%M:%S")
|
||||||
|
|
||||||
|
# YAML frontmatter
|
||||||
|
domain_yaml = "\n".join(f"- {d}" for d in domain)
|
||||||
|
tags_yaml = "\n".join(f"- {t}" for t in tags)
|
||||||
|
|
||||||
|
md = f"""---
|
||||||
|
activation_count: 1
|
||||||
|
arousal: {arousal}
|
||||||
|
created: '{now}'
|
||||||
|
domain:
|
||||||
|
{domain_yaml}
|
||||||
|
id: {mid}
|
||||||
|
importance: {importance}
|
||||||
|
last_active: '{now}'
|
||||||
|
name: {name}
|
||||||
|
tags:
|
||||||
|
{tags_yaml}
|
||||||
|
type: dynamic
|
||||||
|
valence: {valence}
|
||||||
|
---
|
||||||
|
|
||||||
|
{content}
|
||||||
|
"""
|
||||||
|
|
||||||
|
path = os.path.join(VAULT_DIR, f"{mid}.md")
|
||||||
|
os.makedirs(VAULT_DIR, exist_ok=True)
|
||||||
|
with open(path, "w", encoding="utf-8") as f:
|
||||||
|
f.write(md)
|
||||||
|
|
||||||
|
print(f"✓ 已写入: {path}")
|
||||||
|
print(f" ID: {mid} | 名称: {name}")
|
||||||
|
return mid
|
||||||
|
|
||||||
|
|
||||||
|
def interactive():
|
||||||
|
print("=== Ombre Brain 手动写入 ===")
|
||||||
|
name = input("记忆名称: ").strip()
|
||||||
|
content = input("内容: ").strip()
|
||||||
|
domain = [d.strip() for d in input("主题域(逗号分隔): ").split(",") if d.strip()]
|
||||||
|
tags = [t.strip() for t in input("标签(逗号分隔): ").split(",") if t.strip()]
|
||||||
|
importance = int(input("重要性(1-10, 默认7): ").strip() or "7")
|
||||||
|
valence = float(input("效价(0-1, 默认0.5): ").strip() or "0.5")
|
||||||
|
arousal = float(input("唤醒(0-1, 默认0.3): ").strip() or "0.3")
|
||||||
|
write_memory(name, content, domain, tags, importance, valence, arousal)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(description="手动写入 Ombre Brain 记忆")
|
||||||
|
parser.add_argument("--name", help="记忆名称")
|
||||||
|
parser.add_argument("--content", help="记忆内容")
|
||||||
|
parser.add_argument("--domain", help="主题域,逗号分隔")
|
||||||
|
parser.add_argument("--tags", help="标签,逗号分隔")
|
||||||
|
parser.add_argument("--importance", type=int, default=7)
|
||||||
|
parser.add_argument("--valence", type=float, default=0.5)
|
||||||
|
parser.add_argument("--arousal", type=float, default=0.3)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.name and args.content and args.domain:
|
||||||
|
write_memory(
|
||||||
|
name=args.name,
|
||||||
|
content=args.content,
|
||||||
|
domain=[d.strip() for d in args.domain.split(",")],
|
||||||
|
tags=[t.strip() for t in (args.tags or "").split(",") if t.strip()],
|
||||||
|
importance=args.importance,
|
||||||
|
valence=args.valence,
|
||||||
|
arousal=args.arousal,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
interactive()
|
||||||
1
zbpack.json
Normal file
1
zbpack.json
Normal file
@@ -0,0 +1 @@
|
|||||||
|
{}
|
||||||
Reference in New Issue
Block a user