From 0d695f71cb11b76897e299f61b00f08c756ff423 Mon Sep 17 00:00:00 2001 From: P0lar1s Date: Wed, 15 Apr 2026 15:44:25 +0800 Subject: [PATCH] init: first commit to Gitea mirror, update README with Docker quick start and new repo URL Co-Authored-By: Claude Sonnet 4.6 --- .claude/hooks/session_breath.py | 70 +++ .claude/settings.json | 30 + .gitignore | 13 + CLAUDE_PROMPT.md | 63 ++ Dockerfile | 33 ++ LICENSE | 21 + README.md | 418 +++++++++++++ backup_20260405_2124/README.md | 205 +++++++ backup_20260405_2124/bucket_manager.py | 755 ++++++++++++++++++++++++ backup_20260405_2124/decay_engine.py | 242 ++++++++ backup_20260405_2124/server.py | 536 +++++++++++++++++ bucket_manager.py | 781 +++++++++++++++++++++++++ config.example.yaml | 82 +++ decay_engine.py | 279 +++++++++ dehydrator.py | 779 ++++++++++++++++++++++++ docker-compose.yml | 48 ++ migrate_to_domains.py | 118 ++++ reclassify_api.py | 121 ++++ reclassify_domains.py | 198 +++++++ render.yaml | 21 + requirements.txt | 25 + server.py | 620 ++++++++++++++++++++ test_smoke.py | 126 ++++ test_tools.py | 159 +++++ utils.py | 204 +++++++ write_memory.py | 101 ++++ zbpack.json | 1 + 27 files changed, 6049 insertions(+) create mode 100644 .claude/hooks/session_breath.py create mode 100644 .claude/settings.json create mode 100644 .gitignore create mode 100644 CLAUDE_PROMPT.md create mode 100644 Dockerfile create mode 100644 LICENSE create mode 100644 README.md create mode 100644 backup_20260405_2124/README.md create mode 100644 backup_20260405_2124/bucket_manager.py create mode 100644 backup_20260405_2124/decay_engine.py create mode 100644 backup_20260405_2124/server.py create mode 100644 bucket_manager.py create mode 100644 config.example.yaml create mode 100644 decay_engine.py create mode 100644 dehydrator.py create mode 100644 docker-compose.yml create mode 100644 migrate_to_domains.py create mode 100644 reclassify_api.py create mode 100644 reclassify_domains.py create mode 100644 render.yaml create mode 100644 requirements.txt create mode 100644 server.py create mode 100644 test_smoke.py create mode 100644 test_tools.py create mode 100644 utils.py create mode 100644 write_memory.py create mode 100644 zbpack.json diff --git a/.claude/hooks/session_breath.py b/.claude/hooks/session_breath.py new file mode 100644 index 0000000..e412453 --- /dev/null +++ b/.claude/hooks/session_breath.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +# ============================================================ +# SessionStart Hook: auto-breath on session start +# 对话开始钩子:自动浮现最高权重的未解决记忆 +# +# On SessionStart, this script calls the Ombre Brain MCP server's +# breath tool (empty query = surfacing mode) via HTTP and prints +# the result to stdout so Claude sees it as session context. +# +# This works for OMBRE_TRANSPORT=streamable-http deployments. +# For local stdio deployments, the script falls back gracefully. +# +# Config: +# OMBRE_HOOK_URL — override the server URL (default: http://localhost:8000) +# OMBRE_HOOK_SKIP — set to "1" to disable the hook temporarily +# ============================================================ + +import json +import os +import sys +import urllib.request +import urllib.error + +def main(): + # Allow disabling the hook via env var + if os.environ.get("OMBRE_HOOK_SKIP") == "1": + sys.exit(0) + + base_url = os.environ.get("OMBRE_HOOK_URL", "http://localhost:8000").rstrip("/") + + # Build MCP call via HTTP POST to the streamable-http endpoint + # The breath tool with no query triggers surfacing mode. + payload = json.dumps({ + "jsonrpc": "2.0", + "id": 1, + "method": "tools/call", + "params": { + "name": "breath", + "arguments": {"query": "", "max_results": 2} + } + }).encode("utf-8") + + req = urllib.request.Request( + f"{base_url}/mcp", + data=payload, + headers={"Content-Type": "application/json"}, + method="POST", + ) + + try: + with urllib.request.urlopen(req, timeout=8) as response: + raw = response.read().decode("utf-8") + data = json.loads(raw) + # Extract text from MCP tool result + result_content = data.get("result", {}).get("content", []) + text_parts = [c.get("text", "") for c in result_content if c.get("type") == "text"] + output = "\n".join(text_parts).strip() + if output and output != "权重池平静,没有需要处理的记忆。": + print(f"[Ombre Brain - 记忆浮现]\n{output}") + except (urllib.error.URLError, OSError): + # Server not available (local stdio mode or not running) — silent fail + pass + except Exception: + # Any other error — silent fail, never block session start + pass + + sys.exit(0) + +if __name__ == "__main__": + main() diff --git a/.claude/settings.json b/.claude/settings.json new file mode 100644 index 0000000..3c34cf8 --- /dev/null +++ b/.claude/settings.json @@ -0,0 +1,30 @@ +{ + "hooks": { + "SessionStart": [ + { + "matcher": "startup", + "hooks": [ + { + "type": "command", + "command": "python \"$CLAUDE_PROJECT_DIR/.claude/hooks/session_breath.py\"", + "shell": "powershell", + "timeout": 12, + "statusMessage": "Ombre Brain 正在浮现记忆..." + } + ] + }, + { + "matcher": "resume", + "hooks": [ + { + "type": "command", + "command": "python \"$CLAUDE_PROJECT_DIR/.claude/hooks/session_breath.py\"", + "shell": "powershell", + "timeout": 12, + "statusMessage": "Ombre Brain 正在浮现记忆..." + } + ] + } + ] + } +} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9af39a6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,13 @@ +__pycache__/ +*.pyc +*.pyo +.env +.venv/ +.claude/settings.local.json +.claude/hooks/__pycache__/ +.vscode/ +config.yaml +buckets/dynamic/ +buckets/archive/ +buckets/permanent/ +scarp_paper diff --git a/CLAUDE_PROMPT.md b/CLAUDE_PROMPT.md new file mode 100644 index 0000000..1bf17fb --- /dev/null +++ b/CLAUDE_PROMPT.md @@ -0,0 +1,63 @@ +# Ombre Brain 记忆系统 —— Claude 端使用指南 +你拥有了一个叫 Ombre Brain 的永久记忆系统。通过它你可以跨对话记住你想记住的任何事情和回忆你的,我的以及你与我的过往。 + +## 你有这些能力 + +| 能力 | 场景 | +|------|-----------| +| `breath` | **每次对话最开头**调用一次(不传参数)——就像睁眼看手机,看看有没有未解决的事浮上来。有明确话题时传 `query` 关键词检索 | +| `hold` | 你很想记住你自己当下的感受,或当下发生的单个事件时 | +| `grow` | 当**一天结束时**或**用户发来一大段日记/总结**时调用。你可以把其中**你想记住的事件**扔进去,它会自动拆分整理成多个记忆盒子存进你的大脑 | +| `trace` | 当你或用户认为"这个记错了"、"帮我改一下"时调用,手动修正记忆的元数据;**某件事解决了**时用 `resolved=1` 让它沉底;**需要删除**时用 `delete=True` | +| `pulse` | 用户想看看你都记得什么、或问记忆系统状态时调用 | + +## 使用原则 + +### 主动调用 +- **对话开头**:无参数调用 `breath()`,看看有没有高权重的未解决记忆需要跟进 +- **提到过去**:用户说"上次"、"之前"、"还记得"时,用 `breath(query="关键词")` 检索 +- **新信息**:用 `hold` 留住你想留下的感受或经历 +- **日终整理**:用 `grow` 在**一天结束时**整理你的一天 + +### 无须调用 +- 闲聊水话不需要存("哈哈"、"好的"、"嗯嗯") +- 已经记过的信息不要重复存 +- 短期信息不存("帮我查个天气") + +### 权重池机制 +记忆系统是一个**权重池**,不是分类柜: +- 未解决 + 高情绪强度的桶 → 权重最高,`breath()` 时主动浮现 +- 已解决的桶 → 权重骤降,沉底等待关键词激活 +- 用 `trace(bucket_id, resolved=1)` 标记某件事已解决,让它沉底 +- 用 `trace(bucket_id, resolved=0)` 重新激活一个沉底的记忆 + +### breath 的参数技巧 +- 无参数:浮现模式,推送权重最高的未解决桶(每次对话开头用这个) +- `query`:用关键词而不是整句话,检索更准 +- `domain`:如果明确知道话题领域可以传(如 "编程" 或 "恋爱"),缩小搜索范围 +- `valence` + `arousal`:如果用户当前情绪明显,可以传情感坐标来触发情感共鸣检索 + +检索结果不足时,系统有概率从低权重旧桶里随机漂上来 1-3 条记忆,标注为 `[surface_type: random]`。这不是检索结果,是"忽然想起来"——你可以自然地提起它,也可以忽略。 + +### trace 的参数技巧 +- `resolved=1`:标记已解决,桶权重骤降到 5%,基本沉底 +- `resolved=0`:重新激活,让它重新参与浮现排序 +- `delete=True`:彻底删除这个桶(不可恢复) +- 其余字段(name/domain/valence/arousal/importance/tags):只传需要改的,-1 或空串表示不改 + +### hold vs grow +- 一句话的事 → `hold`("我喜欢吃饺子") +- 一大段的事 → `grow`("今天发生了一堆事...") +- **需要批量存多条记忆时,用 `grow` 把内容拼成一段发一次,不要多次调用 `hold`**token是稀缺资源——每次工具调用都会消耗token,多次 hold 远比 1 次 grow 贵 + +### 省配额原则 +- **一次 grow 胜过多次 hold**:要存多条记忆时,合成一段文本调用一次 grow +- **对话太长时建议用户换窗口**:同一窗口聊越久,每次工具调用的底价越高(因为要重新读完整对话历史) +- **工具返回值很短,无需复述**:收到 `新建→桶名 域名` 后直接跟用户说,无需展开解释 + +### 核心准则桶(pinned) +- `hold(content="...", pinned=True)` 创建钉选桶——不衰减、不合并、importance 锁定 10 +- `trace(bucket_id, pinned=1)` 把已有桶钉选为核心准则 +- `trace(bucket_id, pinned=0)` 取消钉选 +- 适用场景:用户教会你的永久知识、核心原则、绝不能忘的事 +- 钉选桶不会出现在「浮现未解决记忆」里,但关键词检索时始终可达 diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..c1a4780 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,33 @@ +# ============================================================ +# Ombre Brain Docker Build +# Docker 构建文件 +# +# Build: docker build -t ombre-brain . +# Run: docker run -e OMBRE_API_KEY=your-key -p 8000:8000 ombre-brain +# ============================================================ + +FROM python:3.12-slim + +WORKDIR /app + +# Install dependencies first (leverage Docker cache) +# 先装依赖(利用 Docker 缓存) +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy project files / 复制项目文件 +COPY *.py . +COPY config.example.yaml ./config.yaml + +# Persistent mount point: bucket data +# 持久化挂载点:记忆数据 +VOLUME ["/app/buckets"] + +# Default to streamable-http for container (remote access) +# 容器场景默认用 streamable-http +ENV OMBRE_TRANSPORT=streamable-http +ENV OMBRE_BUCKETS_DIR=/app/buckets + +EXPOSE 8000 + +CMD ["python", "server.py"] diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..74b9bcb --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 P0lar1zzZ + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..d39d66c --- /dev/null +++ b/README.md @@ -0,0 +1,418 @@ +# Ombre Brain + +一个给提供给Claude 用的长期情绪记忆系统。基于 Russell 效价/唤醒度坐标打标,Obsidian 做存储层,MCP 接入,带遗忘曲线。 + +A long-term emotional memory system for Claude. Tags memories using Russell's valence/arousal coordinates, stores them as Obsidian-compatible Markdown, connects via MCP, and has a forgetting curve. + +> **⚠️ 仓库临时迁移 / Repo temporarily moved** +> GitHub 访问受限期间,代码暂时托管在 Gitea: +> **https://git.p0lar1s.uk/P0lar1s/Ombre_Brain** +> 下面的 `git clone` 地址请替换为上面这个。 + +--- + +## 快速开始 / Quick Start(Docker,推荐) + +> 这是最简单的方式,不需要装 Python,不需要懂命令行,跟着做就行。 + +**前置条件:** 电脑上装了 [Docker Desktop](https://www.docker.com/products/docker-desktop/),并且已经打开。 + +**第一步:拉取代码** + +```bash +git clone https://git.p0lar1s.uk/P0lar1s/Ombre_Brain.git +cd Ombre_Brain +``` + +**第二步:创建 `.env` 文件** + +在项目目录下新建一个叫 `.env` 的文件(注意有个点),内容填: + +``` +OMBRE_API_KEY=你的DeepSeek或其他API密钥 +``` + +没有 API key 也能用,脱水压缩会降级到本地模式,只是效果差一点。那就写: + +``` +OMBRE_API_KEY= +``` + +**第三步:配置 `docker-compose.yml`(指向你的 Obsidian Vault)** + +用文本编辑器打开 `docker-compose.yml`,找到这一行: + +```yaml +- ./buckets:/data +``` + +改成你的 Obsidian Vault 里 `Ombre Brain` 文件夹的路径,例如: + +```yaml +- /Users/你的用户名/Documents/Obsidian Vault/Ombre Brain:/data +``` + +> 不知道路径?在 Obsidian 里右键那个文件夹 → 「在访达中显示」,然后把地址栏的路径复制过来。 +> 不想挂载 Obsidian 也行,保持 `./buckets:/data` 不动,数据会存在项目目录的 `buckets/` 文件夹里。 + +**第四步:启动** + +```bash +docker compose up -d +``` + +等它跑完,看到 `Started` 就好了。 + +**验证是否正常运行:** + +```bash +docker logs ombre-brain +``` + +看到 `Uvicorn running on http://0.0.0.0:8000` 说明成功了。 + +--- + +**接入 Claude.ai(远程访问)** + +需要额外配置 Cloudflare Tunnel,把服务暴露到公网。参考下面「接入 Claude.ai (远程)」章节。 + +**接入 Claude Desktop(本地)** + +不需要 Docker,直接用 Python 本地跑。参考下面「安装 / Setup」章节。 + +--- + +[![Deploy to Render](https://render.com/images/deploy-to-render-button.svg)](https://render.com/deploy?repo=https://github.com/P0lar1zzZ/Ombre-Brain) + +--- + +## 它是什么 / What is this + +Claude 没有跨对话记忆。每次对话结束,之前聊过的所有东西都会消失。 + +Ombre Brain 给了它一套持久记忆——不是那种冷冰冰的键值存储,而是带情感坐标的、会自然衰减的、像人类记忆一样会遗忘和浮现的系统。 + +Claude has no cross-conversation memory. Everything from a previous chat vanishes once it ends. + +Ombre Brain gives it persistent memory — not cold key-value storage, but a system with emotional coordinates, natural decay, and forgetting/surfacing mechanics that loosely mimic how human memory works. + +核心特点 / Key features: + +- **情感坐标打标 / Emotional tagging**: 每条记忆用 Russell 环形情感模型的 valence(效价)和 arousal(唤醒度)两个连续维度标记。不是"开心/难过"这种离散标签。 + Each memory is tagged with two continuous dimensions from Russell's circumplex model: valence and arousal. Not discrete labels like "happy/sad". + +- **自然遗忘 / Natural forgetting**: 改进版艾宾浩斯遗忘曲线。不活跃的记忆自动衰减归档,高情绪强度的记忆衰减更慢。 + Modified Ebbinghaus forgetting curve. Inactive memories naturally decay and archive. High-arousal memories decay slower. + +- **权重池浮现 / Weight pool surfacing**: 记忆不是被动检索的,它们会主动浮现——未解决的、情绪强烈的记忆权重更高,会在对话开头自动推送。 + Memories aren't just passively retrieved — they actively surface. Unresolved, emotionally intense memories carry higher weight and get pushed at conversation start. + +- **Obsidian 原生 / Obsidian-native**: 每个记忆桶就是一个 Markdown 文件,YAML frontmatter 存元数据。可以直接在 Obsidian 里浏览、编辑、搜索。自动注入 `[[双链]]`。 + Each memory bucket is a Markdown file with YAML frontmatter. Browse, edit, and search directly in Obsidian. Wikilinks are auto-injected. + +- **API 降级 / API degradation**: 脱水压缩和自动打标优先用廉价 LLM API(DeepSeek 等),API 不可用时自动降级到本地关键词分析——始终可用。 + Dehydration and auto-tagging prefer a cheap LLM API (DeepSeek etc.). When the API is unavailable, it degrades to local keyword analysis — always functional. + +## 边界说明 / Design boundaries + +官方记忆功能已经在做身份层的事了——你是谁,你有什么偏好,你们的关系是什么。那一层交给它,Ombre Brain不打算造重复的轮子。 + +Ombre Brain 的边界是时间里发生的事,不是你是谁。它记住的是:你们聊过什么,经历了什么,哪些事情还悬在那里没有解决。两层配合用,才是完整的。 + +每次新对话,Claude 从零开始——但它能从 Ombre Brain 里找回跟你有关的一切。不是重建,是接续。 + +--- + +Official memory already handles the identity layer — who you are, what you prefer, what your relationship is. That layer belongs there. Ombre Brain isn't trying to duplicate it. + +Ombre Brain's boundary is *what happened in time*, not *who you are*. It holds conversations, experiences, unresolved things. The two layers together are what make it feel complete. + +Each new conversation starts fresh — but Claude can reach back through Ombre Brain and find everything that happened between you. Not a rebuild. A continuation. + +## 架构 / Architecture + +``` +Claude ←→ MCP Protocol ←→ server.py + │ + ┌───────────────┼───────────────┐ + │ │ │ + bucket_manager dehydrator decay_engine + (CRUD + 搜索) (压缩 + 打标) (遗忘曲线) + │ + Obsidian Vault (Markdown files) +``` + +5 个 MCP 工具 / 5 MCP tools: + +| 工具 Tool | 作用 Purpose | +|-----------|-------------| +| `breath` | 浮现或检索记忆。无参数=推送未解决记忆;有参数=关键词+情感检索 / Surface or search memories | +| `hold` | 存储单条记忆,自动打标+合并相似桶 / Store a single memory with auto-tagging | +| `grow` | 日记归档,自动拆分长内容为多个记忆桶 / Diary digest, auto-split into multiple buckets | +| `trace` | 修改元数据、标记已解决、删除 / Modify metadata, mark resolved, delete | +| `pulse` | 系统状态 + 所有记忆桶列表 / System status + bucket listing | + +## 安装 / Setup + +### 环境要求 / Requirements + +- Python 3.11+ +- 一个 Obsidian Vault(可选,不用也行,会在项目目录下自建 `buckets/`) + An Obsidian vault (optional — without one, it uses a local `buckets/` directory) + +### 步骤 / Steps + +```bash +git clone https://github.com/P0lar1zzZ/Ombre-Brain.git +cd Ombre-Brain + +python -m venv .venv +source .venv/bin/activate # Windows: .venv\Scripts\activate + +pip install -r requirements.txt +``` + +复制配置文件并按需修改 / Copy config and edit as needed: + +```bash +cp config.example.yaml config.yaml +``` + +如果你要用 API 做脱水压缩和自动打标(推荐,效果好很多),设置环境变量: +If you want API-powered dehydration and tagging (recommended, much better quality): + +```bash +export OMBRE_API_KEY="your-api-key" +``` + +支持任何 OpenAI 兼容 API。在 `config.yaml` 里改 `base_url` 和 `model` 就行。 +Supports any OpenAI-compatible API. Just change `base_url` and `model` in `config.yaml`. + +### 接入 Claude Desktop / Connect to Claude Desktop + +在 Claude Desktop 配置文件中添加(macOS: `~/Library/Application Support/Claude/claude_desktop_config.json`): + +Add to your Claude Desktop config: + +```json +{ + "mcpServers": { + "ombre-brain": { + "command": "python", + "args": ["/path/to/Ombre-Brain/server.py"], + "env": { + "OMBRE_API_KEY": "your-api-key" + } + } + } +} +``` + +### 接入 Claude.ai (远程) / Connect to Claude.ai (remote) + +需要 HTTP 传输 + 隧道。可以用 Docker: +Requires HTTP transport + tunnel. Docker setup: + +```bash +echo "OMBRE_API_KEY=your-api-key" > .env +docker-compose up -d +``` + +`docker-compose.yml` 里配好了 Cloudflare Tunnel。你需要自己在 `~/.cloudflared/` 下放凭证和路由配置。 +The `docker-compose.yml` includes Cloudflare Tunnel. You'll need your own credentials under `~/.cloudflared/`. + +### 指向 Obsidian / Point to Obsidian + +在 `config.yaml` 里设置 `buckets_dir`: +Set `buckets_dir` in `config.yaml`: + +```yaml +buckets_dir: "/path/to/your/Obsidian Vault/Ombre Brain" +``` + +不设的话,默认用项目目录下的 `buckets/`。 +If not set, defaults to `buckets/` in the project directory. + +## 配置 / Configuration + +所有参数在 `config.yaml`(从 `config.example.yaml` 复制)。关键的几个: +All parameters in `config.yaml` (copy from `config.example.yaml`). Key ones: + +| 参数 Parameter | 说明 Description | 默认 Default | +|---|---|---| +| `transport` | `stdio`(本地)/ `streamable-http`(远程)| `stdio` | +| `buckets_dir` | 记忆桶存储路径 / Bucket storage path | `./buckets/` | +| `dehydration.model` | 脱水用的 LLM 模型 / LLM model for dehydration | `deepseek-chat` | +| `dehydration.base_url` | API 地址 / API endpoint | `https://api.deepseek.com/v1` | +| `decay.lambda` | 衰减速率,越大越快忘 / Decay rate | `0.05` | +| `decay.threshold` | 归档阈值 / Archive threshold | `0.3` | +| `merge_threshold` | 合并相似度阈值 (0-100) / Merge similarity | `75` | + +敏感配置用环境变量: +Sensitive config via env vars: +- `OMBRE_API_KEY` — LLM API 密钥 +- `OMBRE_TRANSPORT` — 覆盖传输方式 +- `OMBRE_BUCKETS_DIR` — 覆盖存储路径 + +## 衰减公式 / Decay Formula + +$$final\_score = time\_weight \times base\_score$$ + +$$base\_score = Importance \times activation\_count^{0.3} \times e^{-\lambda \times days} \times (base + arousal \times boost)$$ + +时间系数(乘数,优先级最高)/ Time weight (multiplier, highest priority): + +| 距今天数 Days since active | 时间系数 Weight | +|---|---| +| 0–1 天 | 1.0 | +| 第 2 天 | 0.9 | +| 之后每天约降 10% | `max(0.3, 0.9 × e^{-0.2197 × (days-2)})` | +| 7 天后稳定 | ≈ 0.3(不归零)| + +- `importance`: 1-10,记忆重要性 / memory importance +- `activation_count`: 被检索的次数,越常被想起衰减越慢 / retrieval count; more recalls = slower decay +- `days`: 距上次激活的天数 / days since last activation +- `arousal`: 唤醒度,越强烈的记忆越难忘 / arousal; intense memories are harder to forget +- 已解决的记忆权重降到 5%,沉底等被关键词唤醒 / resolved memories drop to 5%, sink until keyword-triggered +- `pinned=true` 的桶:不衰减、不合并、importance 锁定 10 / `pinned` buckets: never decay, never merge, importance locked at 10 + +## 给 Claude 的使用指南 / Usage Guide for Claude + +`CLAUDE_PROMPT.md` 是写给 Claude 看的使用说明。放到你的 system prompt 或 custom instructions 里就行。 + +`CLAUDE_PROMPT.md` is the usage guide written for Claude. Put it in your system prompt or custom instructions. + +## 工具脚本 / Utility Scripts + +| 脚本 Script | 用途 Purpose | +|---|---| +| `write_memory.py` | 手动写入记忆,绕过 MCP / Manually write memories, bypass MCP | +| `migrate_to_domains.py` | 迁移平铺文件到域子目录 / Migrate flat files to domain subdirs | +| `reclassify_domains.py` | 基于关键词重分类 / Reclassify by keywords | +| `reclassify_api.py` | 用 API 重打标未分类桶 / Re-tag uncategorized buckets via API | +| `test_smoke.py` | 冒烟测试 / Smoke test | + +## 部署 / Deploy + +### Render + +[![Deploy to Render](https://render.com/images/deploy-to-render-button.svg)](https://render.com/deploy?repo=https://github.com/P0lar1zzZ/Ombre-Brain) + +> ⚠️ **免费层不可用**:Render 免费层**不支持持久化磁盘**,服务重启后记忆数据会丢失,且会在无流量时休眠。**必须使用 Starter($7/mo)或以上**才能正常使用。 +> **Free tier won't work**: Render free tier has **no persistent disk** — all memory data is lost on restart. It also sleeps on inactivity. **Starter plan ($7/mo) or above is required.** + +项目根目录已包含 `render.yaml`,点击按钮后: +1. (可选)设置 `OMBRE_API_KEY`:任何 OpenAI 兼容 API 的 key,不填则自动降级为本地关键词提取 +2. (可选)设置 `OMBRE_BASE_URL`:API 地址,支持任意 OpenAI 化地址,如 `https://api.deepseek.com/v1` / `http://123.1.1.1:7689/v1` / `http://your-ollama:11434/v1` +3. Render 自动挂载持久化磁盘到 `/opt/render/project/src/buckets` +4. 部署后 MCP URL:`https://<你的服务名>.onrender.com/mcp` + +`render.yaml` is included. After clicking the button: +1. (Optional) `OMBRE_API_KEY`: any OpenAI-compatible key; omit to fall back to local keyword extraction +2. (Optional) `OMBRE_BASE_URL`: any OpenAI-compatible endpoint, e.g. `https://api.deepseek.com/v1`, `http://123.1.1.1:7689/v1`, `http://your-ollama:11434/v1` +3. Persistent disk auto-mounts at `/opt/render/project/src/buckets` +4. MCP URL after deploy: `https://.onrender.com/mcp` + +### Zeabur + +> 💡 **Zeabur 的定价模式**:Zeabur 是「买 VPS + 平台托管」,你先购买一台服务器(最低腾讯云新加坡 $2/mo、火山引擎 $3/mo),Volume 直接挂在该服务器上,**数据天然持久化,无丢失问题**。另需订阅 Zeabur 管理方案(Developer $5/mo),总计约 $7-8/mo 起。 +> **Zeabur pricing model**: You buy a VPS first (cheapest: Tencent Cloud Singapore ~$2/mo, Volcano Engine ~$3/mo), then add Zeabur's Developer plan ($5/mo) for management. Volumes mount directly on your server — **data is always persistent, no cold-start data loss**. Total ~$7-8/mo minimum. + +**步骤 / Steps:** + +1. **创建项目 / Create project** + - 打开 [zeabur.com](https://zeabur.com) → 购买一台服务器 → **New Project** → **Deploy from GitHub** + - 先 Fork 本仓库到自己 GitHub 账号,然后在 Zeabur 选择 `你的用户名/Ombre-Brain` + - Zeabur 会自动检测到根目录的 `Dockerfile` 并使用 Docker 方式构建 + - Go to [zeabur.com](https://zeabur.com) → buy a server → **New Project** → **Deploy from GitHub** + - Fork this repo first, then select `your-username/Ombre-Brain` in Zeabur + - Zeabur auto-detects the `Dockerfile` in root and builds via Docker + +2. **设置环境变量 / Set environment variables**(服务页面 → **Variables** 标签页) + - `OMBRE_API_KEY`(可选)— LLM API 密钥,不填则自动降级为本地关键词提取 + - `OMBRE_BASE_URL`(可选)— API 地址,如 `https://api.deepseek.com/v1` + + > ⚠️ **不需要**手动设置 `OMBRE_TRANSPORT` 和 `OMBRE_BUCKETS_DIR`,Dockerfile 里已经设好了默认值。Zeabur 对单阶段 Dockerfile 会自动注入控制台设置的环境变量。 + > You do **NOT** need to set `OMBRE_TRANSPORT` or `OMBRE_BUCKETS_DIR` — defaults are baked into the Dockerfile. Zeabur auto-injects dashboard env vars for single-stage Dockerfiles. + +3. **挂载持久存储 / Mount persistent volume**(服务页面 → **Volumes** 标签页) + - Volume ID:填 `ombre-buckets`(或任意名) + - 挂载路径 / Path:**`/app/buckets`** + - ⚠️ 不挂载的话,每次重新部署记忆数据会丢失 + - ⚠️ Without this, memory data is lost on every redeploy + +4. **配置端口 / Configure port**(服务页面 → **Networking** 标签页) + - Port Name:`web`(或任意名) + - Port:**`8000`** + - Port Type:**`HTTP`** + - 然后点 **Generate Domain** 生成一个 `xxx.zeabur.app` 域名 + - Then click **Generate Domain** to get a `xxx.zeabur.app` domain + +5. **验证 / Verify** + - 访问 `https://<你的域名>.zeabur.app/health`,应返回 JSON + - Visit `https://.zeabur.app/health` — should return JSON + - 最终 MCP 地址 / MCP URL:`https://<你的域名>.zeabur.app/mcp` + +**常见问题 / Troubleshooting:** + +| 现象 Symptom | 原因 Cause | 解决 Fix | +|---|---|---| +| 域名无法访问 / Domain unreachable | 没配端口 / Port not configured | Networking 标签页加 port 8000 (HTTP) | +| 域名无法访问 / Domain unreachable | `OMBRE_TRANSPORT` 未设置,服务以 stdio 模式启动,不监听任何端口 / Service started in stdio mode — no port is listened | **Variables 标签页确认设置 `OMBRE_TRANSPORT=streamable-http`,然后重新部署** | +| 构建失败 / Build failed | Dockerfile 未被识别 / Dockerfile not detected | 确认仓库根目录有 `Dockerfile`(大小写敏感) | +| 服务启动后立刻退出 | `OMBRE_TRANSPORT` 被覆盖为 `stdio` | 检查 Variables 里有没有多余的 `OMBRE_TRANSPORT=stdio`,删掉即可 | +| 重启后记忆丢失 / Data lost on restart | Volume 未挂载 | Volumes 标签页挂载到 `/app/buckets` | + +### 使用 Cloudflare Tunnel 或 ngrok 连接 / Connecting via Cloudflare Tunnel or ngrok + +> ℹ️ 自 v1.1 起,server.py 在 HTTP 模式下已自动添加 CORS 中间件,无需额外配置。 +> Since v1.1, server.py automatically enables CORS middleware in HTTP mode — no extra config needed. + +使用隧道连接时,确保以下条件满足: +When connecting via tunnel, ensure: + +1. **服务器必须运行在 HTTP 模式** / Server must use HTTP transport + ```bash + OMBRE_TRANSPORT=streamable-http python server.py + ``` + 或 Docker: + ```bash + docker-compose up -d + ``` + +2. **在 Claude.ai 网页版添加 MCP 服务器** / Adding to Claude.ai web + - URL 格式 / URL format: `https://.trycloudflare.com/mcp` + - 或 ngrok / or ngrok: `https://.ngrok-free.app/mcp` + - 先访问 `/health` 验证连接 / Verify first: `https:///health` should return `{"status":"ok",...}` + +3. **已知限制 / Known limitations** + - Cloudflare Tunnel 免费版有空闲超时(约 10 分钟),系统内置保活 ping 可缓解但不能完全消除 + - Free Cloudflare Tunnel has idle timeout (~10 min); built-in keepalive pings mitigate but can't fully prevent it + - ngrok 免费版有请求速率限制 / ngrok free tier has rate limits + - 如果连接仍失败,检查隧道是否正在运行、服务是否以 `streamable-http` 模式启动 + - If connection still fails, verify the tunnel is running and the server started in `streamable-http` mode + +| 现象 Symptom | 原因 Cause | 解决 Fix | +|---|---|---| +| 网页版无法连接隧道 URL / Web can't connect to tunnel URL | 服务以 stdio 模式运行 / Server in stdio mode | 设置 `OMBRE_TRANSPORT=streamable-http` 后重启 | +| 网页版无法连接隧道 URL / Web can't connect to tunnel URL | 旧版 server.py 缺少 CORS 头 / Missing CORS headers | 拉取最新代码,CORS 已内置 / Pull latest — CORS is now built-in | +| `/health` 返回 200 但 MCP 连不上 / `/health` 200 but MCP fails | 路径错误 / Wrong path | MCP URL 末尾必须是 `/mcp` 而非 `/` | +| 隧道连接偶尔断开 / Tunnel disconnects intermittently | Cloudflare Tunnel 空闲超时 / Idle timeout | 保活 ping 已内置,若仍断开可缩短隧道超时配置 | + +--- + +### Session Start Hook(自动 breath) + +部署后,如果你使用 Claude Code,可以在项目内激活自动浮现 hook: +`.claude/settings.json` 已配置好 `SessionStart` hook,每次新会话或恢复会话时自动触发 `breath`,把最高权重未解决记忆推入上下文。 + +**仅在远程 HTTP 模式下有效**(`OMBRE_TRANSPORT=streamable-http`)。本地 stdio 模式下 hook 会安静退出,不影响正常使用。 + +可以通过 `OMBRE_HOOK_URL` 环境变量指定服务器地址(默认 `http://localhost:8000`),或者设置 `OMBRE_HOOK_SKIP=1` 临时禁用。 + +If using Claude Code, `.claude/settings.json` configures a `SessionStart` hook that auto-calls `breath` on each new or resumed session, surfacing your highest-weight unresolved memories as context. Only active in remote HTTP mode. Set `OMBRE_HOOK_SKIP=1` to disable temporarily. + +## License + +MIT diff --git a/backup_20260405_2124/README.md b/backup_20260405_2124/README.md new file mode 100644 index 0000000..b6932b0 --- /dev/null +++ b/backup_20260405_2124/README.md @@ -0,0 +1,205 @@ +# Ombre Brain + +一个给 Claude 用的长期情绪记忆系统。基于 Russell 效价/唤醒度坐标打标,Obsidian 做存储层,MCP 接入,带遗忘曲线。 + +A long-term emotional memory system for Claude. Tags memories using Russell's valence/arousal coordinates, stores them as Obsidian-compatible Markdown, connects via MCP, and has a forgetting curve. + +--- + +## 它是什么 / What is this + +Claude 没有跨对话记忆。每次对话结束,之前聊过的所有东西都会消失。 + +Ombre Brain 给了它一套持久记忆——不是那种冷冰冰的键值存储,而是带情感坐标的、会自然衰减的、像人类记忆一样会遗忘和浮现的系统。 + +Claude has no cross-conversation memory. Everything from a previous chat vanishes once it ends. + +Ombre Brain gives it persistent memory — not cold key-value storage, but a system with emotional coordinates, natural decay, and forgetting/surfacing mechanics that loosely mimic how human memory works. + +核心特点 / Key features: + +- **情感坐标打标 / Emotional tagging**: 每条记忆用 Russell 环形情感模型的 valence(效价)和 arousal(唤醒度)两个连续维度标记。不是"开心/难过"这种离散标签。 + Each memory is tagged with two continuous dimensions from Russell's circumplex model: valence and arousal. Not discrete labels like "happy/sad". + +- **自然遗忘 / Natural forgetting**: 改进版艾宾浩斯遗忘曲线。不活跃的记忆自动衰减归档,高情绪强度的记忆衰减更慢。 + Modified Ebbinghaus forgetting curve. Inactive memories naturally decay and archive. High-arousal memories decay slower. + +- **权重池浮现 / Weight pool surfacing**: 记忆不是被动检索的,它们会主动浮现——未解决的、情绪强烈的记忆权重更高,会在对话开头自动推送。 + Memories aren't just passively retrieved — they actively surface. Unresolved, emotionally intense memories carry higher weight and get pushed at conversation start. + +- **Obsidian 原生 / Obsidian-native**: 每个记忆桶就是一个 Markdown 文件,YAML frontmatter 存元数据。可以直接在 Obsidian 里浏览、编辑、搜索。自动注入 `[[双链]]`。 + Each memory bucket is a Markdown file with YAML frontmatter. Browse, edit, and search directly in Obsidian. Wikilinks are auto-injected. + +- **API 降级 / API degradation**: 脱水压缩和自动打标优先用廉价 LLM API(DeepSeek 等),API 不可用时自动降级到本地关键词分析——始终可用。 + Dehydration and auto-tagging prefer a cheap LLM API (DeepSeek etc.). When the API is unavailable, it degrades to local keyword analysis — always functional. + +## 边界说明 / Design boundaries + +官方记忆功能已经在做身份层的事了——你是谁,你有什么偏好,你们的关系是什么。那一层交给它,Ombre Brain不打算造重复的轮子。 + +Ombre Brain 的边界是时间里发生的事,不是你是谁。它记住的是:你们聊过什么,经历了什么,哪些事情还悬在那里没有解决。两层配合用,才是完整的。 + +每次新对话,Claude 从零开始——但它能从 Ombre Brain 里找回跟你有关的一切。不是重建,是接续。 + +--- + +Official memory already handles the identity layer — who you are, what you prefer, what your relationship is. That layer belongs there. Ombre Brain isn't trying to duplicate it. + +Ombre Brain's boundary is *what happened in time*, not *who you are*. It holds conversations, experiences, unresolved things. The two layers together are what make it feel complete. + +Each new conversation starts fresh — but Claude can reach back through Ombre Brain and find everything that happened between you. Not a rebuild. A continuation. + +## 架构 / Architecture + +``` +Claude ←→ MCP Protocol ←→ server.py + │ + ┌───────────────┼───────────────┐ + │ │ │ + bucket_manager dehydrator decay_engine + (CRUD + 搜索) (压缩 + 打标) (遗忘曲线) + │ + Obsidian Vault (Markdown files) +``` + +5 个 MCP 工具 / 5 MCP tools: + +| 工具 Tool | 作用 Purpose | +|-----------|-------------| +| `breath` | 浮现或检索记忆。无参数=推送未解决记忆;有参数=关键词+情感检索 / Surface or search memories | +| `hold` | 存储单条记忆,自动打标+合并相似桶 / Store a single memory with auto-tagging | +| `grow` | 日记归档,自动拆分长内容为多个记忆桶 / Diary digest, auto-split into multiple buckets | +| `trace` | 修改元数据、标记已解决、删除 / Modify metadata, mark resolved, delete | +| `pulse` | 系统状态 + 所有记忆桶列表 / System status + bucket listing | + +## 安装 / Setup + +### 环境要求 / Requirements + +- Python 3.11+ +- 一个 Obsidian Vault(可选,不用也行,会在项目目录下自建 `buckets/`) + An Obsidian vault (optional — without one, it uses a local `buckets/` directory) + +### 步骤 / Steps + +```bash +git clone https://github.com/P0lar1zzZ/Ombre-Brain.git +cd Ombre-Brain + +python -m venv .venv +source .venv/bin/activate # Windows: .venv\Scripts\activate + +pip install -r requirements.txt +``` + +复制配置文件并按需修改 / Copy config and edit as needed: + +```bash +cp config.example.yaml config.yaml +``` + +如果你要用 API 做脱水压缩和自动打标(推荐,效果好很多),设置环境变量: +If you want API-powered dehydration and tagging (recommended, much better quality): + +```bash +export OMBRE_API_KEY="your-api-key" +``` + +支持任何 OpenAI 兼容 API。在 `config.yaml` 里改 `base_url` 和 `model` 就行。 +Supports any OpenAI-compatible API. Just change `base_url` and `model` in `config.yaml`. + +### 接入 Claude Desktop / Connect to Claude Desktop + +在 Claude Desktop 配置文件中添加(macOS: `~/Library/Application Support/Claude/claude_desktop_config.json`): + +Add to your Claude Desktop config: + +```json +{ + "mcpServers": { + "ombre-brain": { + "command": "python", + "args": ["/path/to/Ombre-Brain/server.py"], + "env": { + "OMBRE_API_KEY": "your-api-key" + } + } + } +} +``` + +### 接入 Claude.ai (远程) / Connect to Claude.ai (remote) + +需要 HTTP 传输 + 隧道。可以用 Docker: +Requires HTTP transport + tunnel. Docker setup: + +```bash +echo "OMBRE_API_KEY=your-api-key" > .env +docker-compose up -d +``` + +`docker-compose.yml` 里配好了 Cloudflare Tunnel。你需要自己在 `~/.cloudflared/` 下放凭证和路由配置。 +The `docker-compose.yml` includes Cloudflare Tunnel. You'll need your own credentials under `~/.cloudflared/`. + +### 指向 Obsidian / Point to Obsidian + +在 `config.yaml` 里设置 `buckets_dir`: +Set `buckets_dir` in `config.yaml`: + +```yaml +buckets_dir: "/path/to/your/Obsidian Vault/Ombre Brain" +``` + +不设的话,默认用项目目录下的 `buckets/`。 +If not set, defaults to `buckets/` in the project directory. + +## 配置 / Configuration + +所有参数在 `config.yaml`(从 `config.example.yaml` 复制)。关键的几个: +All parameters in `config.yaml` (copy from `config.example.yaml`). Key ones: + +| 参数 Parameter | 说明 Description | 默认 Default | +|---|---|---| +| `transport` | `stdio`(本地)/ `streamable-http`(远程)| `stdio` | +| `buckets_dir` | 记忆桶存储路径 / Bucket storage path | `./buckets/` | +| `dehydration.model` | 脱水用的 LLM 模型 / LLM model for dehydration | `deepseek-chat` | +| `dehydration.base_url` | API 地址 / API endpoint | `https://api.deepseek.com/v1` | +| `decay.lambda` | 衰减速率,越大越快忘 / Decay rate | `0.05` | +| `decay.threshold` | 归档阈值 / Archive threshold | `0.3` | +| `merge_threshold` | 合并相似度阈值 (0-100) / Merge similarity | `75` | + +敏感配置用环境变量: +Sensitive config via env vars: +- `OMBRE_API_KEY` — LLM API 密钥 +- `OMBRE_TRANSPORT` — 覆盖传输方式 +- `OMBRE_BUCKETS_DIR` — 覆盖存储路径 + +## 衰减公式 / Decay Formula + +$$Score = Importance \times activation\_count^{0.3} \times e^{-\lambda \times days} \times (base + arousal \times boost)$$ + +- `importance`: 1-10,记忆重要性 / memory importance +- `activation_count`: 被检索的次数,越常被想起衰减越慢 / retrieval count; more recalls = slower decay +- `days`: 距上次激活的天数 / days since last activation +- `arousal`: 唤醒度,越强烈的记忆越难忘 / arousal; intense memories are harder to forget +- 已解决的记忆权重降到 5%,沉底等被关键词唤醒 / resolved memories drop to 5%, sink until keyword-triggered + +## 给 Claude 的使用指南 / Usage Guide for Claude + +`CLAUDE_PROMPT.md` 是写给 Claude 看的使用说明。放到你的 system prompt 或 custom instructions 里就行。 + +`CLAUDE_PROMPT.md` is the usage guide written for Claude. Put it in your system prompt or custom instructions. + +## 工具脚本 / Utility Scripts + +| 脚本 Script | 用途 Purpose | +|---|---| +| `write_memory.py` | 手动写入记忆,绕过 MCP / Manually write memories, bypass MCP | +| `migrate_to_domains.py` | 迁移平铺文件到域子目录 / Migrate flat files to domain subdirs | +| `reclassify_domains.py` | 基于关键词重分类 / Reclassify by keywords | +| `reclassify_api.py` | 用 API 重打标未分类桶 / Re-tag uncategorized buckets via API | +| `test_smoke.py` | 冒烟测试 / Smoke test | + +## License + +MIT diff --git a/backup_20260405_2124/bucket_manager.py b/backup_20260405_2124/bucket_manager.py new file mode 100644 index 0000000..806583d --- /dev/null +++ b/backup_20260405_2124/bucket_manager.py @@ -0,0 +1,755 @@ +# ============================================================ +# Module: Memory Bucket Manager (bucket_manager.py) +# 模块:记忆桶管理器 +# +# CRUD operations, multi-dimensional index search, activation updates +# for memory buckets. +# 记忆桶的增删改查、多维索引搜索、激活更新。 +# +# Core design: +# 核心逻辑: +# - Each bucket = one Markdown file (YAML frontmatter + body) +# 每个记忆桶 = 一个 Markdown 文件 +# - Storage by type: permanent / dynamic / archive +# 存储按类型分目录 +# - Multi-dimensional soft index: domain + valence/arousal + fuzzy text +# 多维软索引:主题域 + 情感坐标 + 文本模糊匹配 +# - Search strategy: domain pre-filter → weighted multi-dim ranking +# 搜索策略:主题域预筛 → 多维加权精排 +# - Emotion coordinates based on Russell circumplex model: +# 情感坐标基于环形情感模型(Russell circumplex): +# valence (0~1): 0=negative → 1=positive +# arousal (0~1): 0=calm → 1=excited +# +# Depended on by: server.py, decay_engine.py +# 被谁依赖:server.py, decay_engine.py +# ============================================================ + +import os +import math +import logging +import re +import shutil +from collections import Counter +from datetime import datetime +from pathlib import Path +from typing import Optional + +import frontmatter +import jieba +from rapidfuzz import fuzz + +from utils import generate_bucket_id, sanitize_name, safe_path, now_iso + +logger = logging.getLogger("ombre_brain.bucket") + + +class BucketManager: + """ + Memory bucket manager — entry point for all bucket CRUD operations. + Buckets are stored as Markdown files with YAML frontmatter for metadata + and body for content. Natively compatible with Obsidian browsing/editing. + 记忆桶管理器 —— 所有桶的 CRUD 操作入口。 + 桶以 Markdown 文件存储,YAML frontmatter 存元数据,正文存内容。 + 天然兼容 Obsidian 直接浏览和编辑。 + """ + + def __init__(self, config: dict): + # --- Read storage paths from config / 从配置中读取存储路径 --- + self.base_dir = config["buckets_dir"] + self.permanent_dir = os.path.join(self.base_dir, "permanent") + self.dynamic_dir = os.path.join(self.base_dir, "dynamic") + self.archive_dir = os.path.join(self.base_dir, "archive") + self.fuzzy_threshold = config.get("matching", {}).get("fuzzy_threshold", 50) + self.max_results = config.get("matching", {}).get("max_results", 5) + + # --- Wikilink config / 双链配置 --- + wikilink_cfg = config.get("wikilink", {}) + self.wikilink_enabled = wikilink_cfg.get("enabled", True) + self.wikilink_use_tags = wikilink_cfg.get("use_tags", False) + self.wikilink_use_domain = wikilink_cfg.get("use_domain", True) + self.wikilink_use_auto_keywords = wikilink_cfg.get("use_auto_keywords", True) + self.wikilink_auto_top_k = wikilink_cfg.get("auto_top_k", 8) + self.wikilink_min_len = wikilink_cfg.get("min_keyword_len", 2) + self.wikilink_exclude_keywords = set(wikilink_cfg.get("exclude_keywords", [])) + self.wikilink_stopwords = { + "的", "了", "在", "是", "我", "有", "和", "就", "不", "人", + "都", "一个", "上", "也", "很", "到", "说", "要", "去", + "你", "会", "着", "没有", "看", "好", "自己", "这", "他", "她", + "我们", "你们", "他们", "然后", "今天", "昨天", "明天", "一下", + "the", "and", "for", "are", "but", "not", "you", "all", "can", + "had", "her", "was", "one", "our", "out", "has", "have", "with", + "this", "that", "from", "they", "been", "said", "will", "each", + } + self.wikilink_stopwords |= {w.lower() for w in self.wikilink_exclude_keywords} + + # --- Search scoring weights / 检索权重配置 --- + scoring = config.get("scoring_weights", {}) + self.w_topic = scoring.get("topic_relevance", 4.0) + self.w_emotion = scoring.get("emotion_resonance", 2.0) + self.w_time = scoring.get("time_proximity", 1.5) + self.w_importance = scoring.get("importance", 1.0) + + # --------------------------------------------------------- + # Create a new bucket + # 创建新桶 + # Write content and metadata into a .md file + # 将内容和元数据写入一个 .md 文件 + # --------------------------------------------------------- + async def create( + self, + content: str, + tags: list[str] = None, + importance: int = 5, + domain: list[str] = None, + valence: float = 0.5, + arousal: float = 0.3, + bucket_type: str = "dynamic", + name: str = None, + ) -> str: + """ + Create a new memory bucket, return bucket ID. + 创建一个新的记忆桶,返回桶 ID。 + """ + bucket_id = generate_bucket_id() + bucket_name = sanitize_name(name) if name else bucket_id + domain = domain or ["未分类"] + tags = tags or [] + linked_content = self._apply_wikilinks(content, tags, domain, bucket_name) + + # --- Build YAML frontmatter metadata / 构建元数据 --- + metadata = { + "id": bucket_id, + "name": bucket_name, + "tags": tags, + "domain": domain, + "valence": max(0.0, min(1.0, valence)), + "arousal": max(0.0, min(1.0, arousal)), + "importance": max(1, min(10, importance)), + "type": bucket_type, + "created": now_iso(), + "last_active": now_iso(), + "activation_count": 1, + } + + # --- Assemble Markdown file (frontmatter + body) --- + # --- 组装 Markdown 文件 --- + post = frontmatter.Post(linked_content, **metadata) + + # --- Choose directory by type + primary domain --- + # --- 按类型 + 主题域选择存储目录 --- + type_dir = self.permanent_dir if bucket_type == "permanent" else self.dynamic_dir + primary_domain = sanitize_name(domain[0]) if domain else "未分类" + target_dir = os.path.join(type_dir, primary_domain) + os.makedirs(target_dir, exist_ok=True) + + # --- Filename: readable_name_bucketID.md (Obsidian friendly) --- + # --- 文件名:可读名称_桶ID.md --- + if bucket_name and bucket_name != bucket_id: + filename = f"{bucket_name}_{bucket_id}.md" + else: + filename = f"{bucket_id}.md" + file_path = safe_path(target_dir, filename) + + try: + with open(file_path, "w", encoding="utf-8") as f: + f.write(frontmatter.dumps(post)) + except OSError as e: + logger.error(f"Failed to write bucket file / 写入桶文件失败: {file_path}: {e}") + raise + + logger.info( + f"Created bucket / 创建记忆桶: {bucket_id} ({bucket_name}) → {primary_domain}/" + ) + return bucket_id + + # --------------------------------------------------------- + # Read bucket content + # 读取桶内容 + # Returns {"id", "metadata", "content", "path"} or None + # --------------------------------------------------------- + async def get(self, bucket_id: str) -> Optional[dict]: + """ + Read a single bucket by ID. + 根据 ID 读取单个桶。 + """ + if not bucket_id or not isinstance(bucket_id, str): + return None + file_path = self._find_bucket_file(bucket_id) + if not file_path: + return None + return self._load_bucket(file_path) + + # --------------------------------------------------------- + # Update bucket + # 更新桶 + # Supports: content, tags, importance, valence, arousal, name, resolved + # --------------------------------------------------------- + async def update(self, bucket_id: str, **kwargs) -> bool: + """ + Update bucket content or metadata fields. + 更新桶的内容或元数据字段。 + """ + file_path = self._find_bucket_file(bucket_id) + if not file_path: + return False + + try: + post = frontmatter.load(file_path) + except Exception as e: + logger.warning(f"Failed to load bucket for update / 加载桶失败: {file_path}: {e}") + return False + + # --- Update only fields that were passed in / 只改传入的字段 --- + if "content" in kwargs: + next_tags = kwargs.get("tags", post.get("tags", [])) + next_domain = kwargs.get("domain", post.get("domain", [])) + next_name = kwargs.get("name", post.get("name", "")) + post.content = self._apply_wikilinks( + kwargs["content"], + next_tags, + next_domain, + next_name, + ) + if "tags" in kwargs: + post["tags"] = kwargs["tags"] + if "importance" in kwargs: + post["importance"] = max(1, min(10, int(kwargs["importance"]))) + if "domain" in kwargs: + post["domain"] = kwargs["domain"] + if "valence" in kwargs: + post["valence"] = max(0.0, min(1.0, float(kwargs["valence"]))) + if "arousal" in kwargs: + post["arousal"] = max(0.0, min(1.0, float(kwargs["arousal"]))) + if "name" in kwargs: + post["name"] = sanitize_name(kwargs["name"]) + if "resolved" in kwargs: + post["resolved"] = bool(kwargs["resolved"]) + + # --- Auto-refresh activation time / 自动刷新激活时间 --- + post["last_active"] = now_iso() + + try: + with open(file_path, "w", encoding="utf-8") as f: + f.write(frontmatter.dumps(post)) + except OSError as e: + logger.error(f"Failed to write bucket update / 写入桶更新失败: {file_path}: {e}") + return False + + logger.info(f"Updated bucket / 更新记忆桶: {bucket_id}") + return True + + # --------------------------------------------------------- + # Wikilink injection + # 自动添加 Obsidian 双链 + # --------------------------------------------------------- + def _apply_wikilinks( + self, + content: str, + tags: list[str], + domain: list[str], + name: str, + ) -> str: + """ + Auto-inject Obsidian wikilinks, avoiding double-wrapping existing [[...]]. + 自动添加 Obsidian 双链,避免重复包裹已有 [[...]]。 + """ + if not self.wikilink_enabled or not content: + return content + + keywords = self._collect_wikilink_keywords(content, tags, domain, name) + if not keywords: + return content + + # Split on existing wikilinks to avoid wrapping them again + # 按已有双链切分,避免重复包裹 + segments = re.split(r"(\[\[[^\]]+\]\])", content) + pattern = re.compile("|".join(re.escape(kw) for kw in keywords)) + for i, segment in enumerate(segments): + if segment.startswith("[[") and segment.endswith("]]"): + continue + updated = pattern.sub(lambda m: f"[[{m.group(0)}]]", segment) + segments[i] = updated + return "".join(segments) + + def _collect_wikilink_keywords( + self, + content: str, + tags: list[str], + domain: list[str], + name: str, + ) -> list[str]: + """ + Collect candidate keywords from tags/domain/auto-extraction. + 汇总候选关键词:可选 tags/domain + 自动提词。 + """ + candidates = [] + + if self.wikilink_use_tags: + candidates.extend(tags or []) + if self.wikilink_use_domain: + candidates.extend(domain or []) + if name: + candidates.append(name) + if self.wikilink_use_auto_keywords: + candidates.extend(self._extract_auto_keywords(content)) + + return self._normalize_keywords(candidates) + + def _normalize_keywords(self, keywords: list[str]) -> list[str]: + """ + Deduplicate and sort by length (longer first to avoid short words + breaking long ones during replacement). + 去重并按长度排序,优先替换长词。 + """ + if not keywords: + return [] + + seen = set() + cleaned = [] + for keyword in keywords: + if not isinstance(keyword, str): + continue + kw = keyword.strip() + if len(kw) < self.wikilink_min_len: + continue + if kw in self.wikilink_exclude_keywords: + continue + if kw.lower() in self.wikilink_stopwords: + continue + if kw in seen: + continue + seen.add(kw) + cleaned.append(kw) + + return sorted(cleaned, key=len, reverse=True) + + def _extract_auto_keywords(self, content: str) -> list[str]: + """ + Auto-extract keywords from body text, prioritizing high-frequency words. + 从正文自动提词,优先高频词。 + """ + if not content: + return [] + + try: + zh_words = [w.strip() for w in jieba.lcut(content) if w.strip()] + except Exception: + zh_words = [] + en_words = re.findall(r"[A-Za-z][A-Za-z0-9_-]{2,20}", content) + + # Chinese bigrams / 中文双词组合 + zh_bigrams = [] + for i in range(len(zh_words) - 1): + left = zh_words[i] + right = zh_words[i + 1] + if len(left) < self.wikilink_min_len or len(right) < self.wikilink_min_len: + continue + if not re.fullmatch(r"[\u4e00-\u9fff]+", left + right): + continue + if len(left + right) > 8: + continue + zh_bigrams.append(left + right) + + merged = [] + for word in zh_words + zh_bigrams + en_words: + if len(word) < self.wikilink_min_len: + continue + if re.fullmatch(r"\d+", word): + continue + if word.lower() in self.wikilink_stopwords: + continue + merged.append(word) + + if not merged: + return [] + + counter = Counter(merged) + return [w for w, _ in counter.most_common(self.wikilink_auto_top_k)] + + # --------------------------------------------------------- + # Delete bucket + # 删除桶 + # --------------------------------------------------------- + async def delete(self, bucket_id: str) -> bool: + """ + Delete a memory bucket file. + 删除指定的记忆桶文件。 + """ + file_path = self._find_bucket_file(bucket_id) + if not file_path: + return False + + try: + os.remove(file_path) + except OSError as e: + logger.error(f"Failed to delete bucket file / 删除桶文件失败: {file_path}: {e}") + return False + + logger.info(f"Deleted bucket / 删除记忆桶: {bucket_id}") + return True + + # --------------------------------------------------------- + # Touch bucket (refresh activation time + increment count) + # 触碰桶(刷新激活时间 + 累加激活次数) + # Called on every recall hit; affects decay score. + # 每次检索命中时调用,影响衰减得分。 + # --------------------------------------------------------- + async def touch(self, bucket_id: str) -> None: + """ + Update a bucket's last activation time and count. + 更新桶的最后激活时间和激活次数。 + """ + file_path = self._find_bucket_file(bucket_id) + if not file_path: + return + + try: + post = frontmatter.load(file_path) + post["last_active"] = now_iso() + post["activation_count"] = post.get("activation_count", 0) + 1 + + with open(file_path, "w", encoding="utf-8") as f: + f.write(frontmatter.dumps(post)) + except Exception as e: + logger.warning(f"Failed to touch bucket / 触碰桶失败: {bucket_id}: {e}") + + # --------------------------------------------------------- + # Multi-dimensional search (core feature) + # 多维搜索(核心功能) + # + # Strategy: domain pre-filter → weighted multi-dim ranking + # 策略:主题域预筛 → 多维加权精排 + # + # Ranking formula: + # total = topic(×w_topic) + emotion(×w_emotion) + # + time(×w_time) + importance(×w_importance) + # + # Per-dimension scores (normalized to 0~1): + # topic = rapidfuzz weighted match (name/tags/domain/body) + # emotion = 1 - Euclidean distance (query v/a vs bucket v/a) + # time = e^(-0.02 × days) (recent memories first) + # importance = importance / 10 + # --------------------------------------------------------- + async def search( + self, + query: str, + limit: int = None, + domain_filter: list[str] = None, + query_valence: float = None, + query_arousal: float = None, + ) -> list[dict]: + """ + Multi-dimensional indexed search for memory buckets. + 多维索引搜索记忆桶。 + + domain_filter: pre-filter by domain (None = search all) + query_valence/arousal: emotion coordinates for resonance scoring + """ + if not query or not query.strip(): + return [] + + limit = limit or self.max_results + all_buckets = await self.list_all(include_archive=False) + + if not all_buckets: + return [] + + # --- Layer 1: domain pre-filter (fast scope reduction) --- + # --- 第一层:主题域预筛(快速缩小范围)--- + if domain_filter: + filter_set = {d.lower() for d in domain_filter} + candidates = [ + b for b in all_buckets + if {d.lower() for d in b["metadata"].get("domain", [])} & filter_set + ] + # Fall back to full search if pre-filter yields nothing + # 预筛为空则回退全量搜索 + if not candidates: + candidates = all_buckets + else: + candidates = all_buckets + + # --- Layer 2: weighted multi-dim ranking --- + # --- 第二层:多维加权精排 --- + scored = [] + for bucket in candidates: + meta = bucket.get("metadata", {}) + + try: + # Dim 1: topic relevance (fuzzy text, 0~1) + topic_score = self._calc_topic_score(query, bucket) + + # Dim 2: emotion resonance (coordinate distance, 0~1) + emotion_score = self._calc_emotion_score( + query_valence, query_arousal, meta + ) + + # Dim 3: time proximity (exponential decay, 0~1) + time_score = self._calc_time_score(meta) + + # Dim 4: importance (direct normalization) + importance_score = max(1, min(10, int(meta.get("importance", 5)))) / 10.0 + + # --- Weighted sum / 加权求和 --- + total = ( + topic_score * self.w_topic + + emotion_score * self.w_emotion + + time_score * self.w_time + + importance_score * self.w_importance + ) + # Normalize to 0~100 for readability + weight_sum = self.w_topic + self.w_emotion + self.w_time + self.w_importance + normalized = (total / weight_sum) * 100 if weight_sum > 0 else 0 + + # Resolved buckets get ranking penalty (but still reachable by keyword) + # 已解决的桶降权排序(但仍可被关键词激活) + if meta.get("resolved", False): + normalized *= 0.3 + + if normalized >= self.fuzzy_threshold: + bucket["score"] = round(normalized, 2) + scored.append(bucket) + except Exception as e: + logger.warning( + f"Scoring failed for bucket {bucket.get('id', '?')} / " + f"桶评分失败: {e}" + ) + continue + + scored.sort(key=lambda x: x["score"], reverse=True) + return scored[:limit] + + # --------------------------------------------------------- + # Topic relevance sub-score: + # name(×3) + domain(×2.5) + tags(×2) + body(×1) + # 文本相关性子分:桶名(×3) + 主题域(×2.5) + 标签(×2) + 正文(×1) + # --------------------------------------------------------- + def _calc_topic_score(self, query: str, bucket: dict) -> float: + """ + Calculate text dimension relevance score (0~1). + 计算文本维度的相关性得分。 + """ + meta = bucket.get("metadata", {}) + + name_score = fuzz.partial_ratio(query, meta.get("name", "")) * 3 + domain_score = ( + max( + (fuzz.partial_ratio(query, d) for d in meta.get("domain", [])), + default=0, + ) + * 2.5 + ) + tag_score = ( + max( + (fuzz.partial_ratio(query, tag) for tag in meta.get("tags", [])), + default=0, + ) + * 2 + ) + content_score = fuzz.partial_ratio(query, bucket.get("content", "")[:500]) * 1 + + return (name_score + domain_score + tag_score + content_score) / (100 * 8.5) + + # --------------------------------------------------------- + # Emotion resonance sub-score: + # Based on Russell circumplex Euclidean distance + # 情感共鸣子分:基于环形情感模型的欧氏距离 + # No emotion in query → neutral 0.5 (doesn't affect ranking) + # --------------------------------------------------------- + def _calc_emotion_score( + self, q_valence: float, q_arousal: float, meta: dict + ) -> float: + """ + Calculate emotion resonance score (0~1, closer = higher). + 计算情感共鸣度(0~1,越近越高)。 + """ + if q_valence is None or q_arousal is None: + return 0.5 # No emotion coordinates → neutral / 无情感坐标时给中性分 + + try: + b_valence = float(meta.get("valence", 0.5)) + b_arousal = float(meta.get("arousal", 0.3)) + except (ValueError, TypeError): + return 0.5 + + # Euclidean distance, max sqrt(2) ≈ 1.414 + dist = math.sqrt((q_valence - b_valence) ** 2 + (q_arousal - b_arousal) ** 2) + return max(0.0, 1.0 - dist / 1.414) + + # --------------------------------------------------------- + # Time proximity sub-score: + # More recent activation → higher score + # 时间亲近子分:距上次激活越近分越高 + # --------------------------------------------------------- + def _calc_time_score(self, meta: dict) -> float: + """ + Calculate time proximity score (0~1, more recent = higher). + 计算时间亲近度。 + """ + last_active_str = meta.get("last_active", meta.get("created", "")) + try: + last_active = datetime.fromisoformat(str(last_active_str)) + days = max(0.0, (datetime.now() - last_active).total_seconds() / 86400) + except (ValueError, TypeError): + days = 30 + return math.exp(-0.02 * days) + + # --------------------------------------------------------- + # List all buckets + # 列出所有桶 + # --------------------------------------------------------- + async def list_all(self, include_archive: bool = False) -> list[dict]: + """ + Recursively walk directories (including domain subdirs), list all buckets. + 递归遍历目录(含域子目录),列出所有记忆桶。 + """ + buckets = [] + + dirs = [self.permanent_dir, self.dynamic_dir] + if include_archive: + dirs.append(self.archive_dir) + + for dir_path in dirs: + if not os.path.exists(dir_path): + continue + for root, _, files in os.walk(dir_path): + for filename in files: + if not filename.endswith(".md"): + continue + file_path = os.path.join(root, filename) + bucket = self._load_bucket(file_path) + if bucket: + buckets.append(bucket) + + return buckets + + # --------------------------------------------------------- + # Statistics (counts per category + total size) + # 统计信息(各分类桶数量 + 总体积) + # --------------------------------------------------------- + async def get_stats(self) -> dict: + """ + Return memory bucket statistics (including domain subdirs). + 返回记忆桶的统计数据。 + """ + stats = { + "permanent_count": 0, + "dynamic_count": 0, + "archive_count": 0, + "total_size_kb": 0.0, + "domains": {}, + } + + for subdir, key in [ + (self.permanent_dir, "permanent_count"), + (self.dynamic_dir, "dynamic_count"), + (self.archive_dir, "archive_count"), + ]: + if not os.path.exists(subdir): + continue + for root, _, files in os.walk(subdir): + for f in files: + if f.endswith(".md"): + stats[key] += 1 + fpath = os.path.join(root, f) + try: + stats["total_size_kb"] += os.path.getsize(fpath) / 1024 + except OSError: + pass + # Per-domain counts / 每个域的桶数量 + domain_name = os.path.basename(root) + if domain_name != os.path.basename(subdir): + stats["domains"][domain_name] = stats["domains"].get(domain_name, 0) + 1 + + return stats + + # --------------------------------------------------------- + # Archive bucket (move from permanent/dynamic into archive) + # 归档桶(从 permanent/dynamic 移入 archive) + # Called by decay engine to simulate "forgetting" + # 由衰减引擎调用,模拟"遗忘" + # --------------------------------------------------------- + async def archive(self, bucket_id: str) -> bool: + """ + Move a bucket into the archive directory (preserving domain subdirs). + 将指定桶移入归档目录(保留域子目录结构)。 + """ + file_path = self._find_bucket_file(bucket_id) + if not file_path: + return False + + try: + # Read once, get domain info and update type / 一次性读取 + post = frontmatter.load(file_path) + domain = post.get("domain", ["未分类"]) + primary_domain = sanitize_name(domain[0]) if domain else "未分类" + archive_subdir = os.path.join(self.archive_dir, primary_domain) + os.makedirs(archive_subdir, exist_ok=True) + + dest = safe_path(archive_subdir, os.path.basename(file_path)) + + # Update type marker then move file / 更新类型标记后移动文件 + post["type"] = "archived" + with open(file_path, "w", encoding="utf-8") as f: + f.write(frontmatter.dumps(post)) + + # Use shutil.move for cross-filesystem safety + # 使用 shutil.move 保证跨文件系统安全 + shutil.move(file_path, str(dest)) + except Exception as e: + logger.error( + f"Failed to archive bucket / 归档桶失败: {bucket_id}: {e}" + ) + return False + + logger.info(f"Archived bucket / 归档记忆桶: {bucket_id} → archive/{primary_domain}/") + return True + + # --------------------------------------------------------- + # Internal: find bucket file across all three directories + # 内部:在三个目录中查找桶文件 + # --------------------------------------------------------- + def _find_bucket_file(self, bucket_id: str) -> Optional[str]: + """ + Recursively search permanent/dynamic/archive for a bucket file + matching the given ID. + 在 permanent/dynamic/archive 中递归查找指定 ID 的桶文件。 + """ + if not bucket_id: + return None + for dir_path in [self.permanent_dir, self.dynamic_dir, self.archive_dir]: + if not os.path.exists(dir_path): + continue + for root, _, files in os.walk(dir_path): + for fname in files: + if not fname.endswith(".md"): + continue + # Match by exact ID segment in filename + # 通过文件名中的 ID 片段精确匹配 + if bucket_id in fname: + return os.path.join(root, fname) + return None + + # --------------------------------------------------------- + # Internal: load bucket data from .md file + # 内部:从 .md 文件加载桶数据 + # --------------------------------------------------------- + def _load_bucket(self, file_path: str) -> Optional[dict]: + """ + Parse a Markdown file and return structured bucket data. + 解析 Markdown 文件,返回桶的结构化数据。 + """ + try: + post = frontmatter.load(file_path) + return { + "id": post.get("id", Path(file_path).stem), + "metadata": dict(post.metadata), + "content": post.content, + "path": file_path, + } + except Exception as e: + logger.warning( + f"Failed to load bucket file / 加载桶文件失败: {file_path}: {e}" + ) + return None diff --git a/backup_20260405_2124/decay_engine.py b/backup_20260405_2124/decay_engine.py new file mode 100644 index 0000000..93185d0 --- /dev/null +++ b/backup_20260405_2124/decay_engine.py @@ -0,0 +1,242 @@ +# ============================================================ +# Module: Memory Decay Engine (decay_engine.py) +# 模块:记忆衰减引擎 +# +# Simulates human forgetting curve; auto-decays inactive memories and archives them. +# 模拟人类遗忘曲线,自动衰减不活跃记忆并归档。 +# +# Core formula (improved Ebbinghaus + emotion coordinates): +# 核心公式(改进版艾宾浩斯遗忘曲线 + 情感坐标): +# Score = Importance × (activation_count^0.3) × e^(-λ×days) × emotion_weight +# +# Emotion weight (continuous coordinate, not discrete labels): +# 情感权重(基于连续坐标而非离散列举): +# emotion_weight = base + (arousal × arousal_boost) +# Higher arousal → higher emotion weight → slower decay +# 唤醒度越高 → 情感权重越大 → 记忆衰减越慢 +# +# Depended on by: server.py +# 被谁依赖:server.py +# ============================================================ + +import math +import asyncio +import logging +from datetime import datetime + +logger = logging.getLogger("ombre_brain.decay") + + +class DecayEngine: + """ + Memory decay engine — periodically scans all dynamic buckets, + calculates decay scores, auto-archives low-activity buckets + to simulate natural forgetting. + 记忆衰减引擎 —— 定期扫描所有动态桶, + 计算衰减得分,将低活跃桶自动归档,模拟自然遗忘。 + """ + + def __init__(self, config: dict, bucket_mgr): + # --- Load decay parameters / 加载衰减参数 --- + decay_cfg = config.get("decay", {}) + self.decay_lambda = decay_cfg.get("lambda", 0.05) + self.threshold = decay_cfg.get("threshold", 0.3) + self.check_interval = decay_cfg.get("check_interval_hours", 24) + + # --- Emotion weight params (continuous arousal coordinate) --- + # --- 情感权重参数(基于连续 arousal 坐标)--- + emotion_cfg = decay_cfg.get("emotion_weights", {}) + self.emotion_base = emotion_cfg.get("base", 1.0) + self.arousal_boost = emotion_cfg.get("arousal_boost", 0.8) + + self.bucket_mgr = bucket_mgr + + # --- Background task control / 后台任务控制 --- + self._task: asyncio.Task | None = None + self._running = False + + @property + def is_running(self) -> bool: + """Whether the decay engine is running in the background. + 衰减引擎是否正在后台运行。""" + return self._running + + # --------------------------------------------------------- + # Core: calculate decay score for a single bucket + # 核心:计算单个桶的衰减得分 + # + # Higher score = more vivid memory; below threshold → archive + # 得分越高 = 记忆越鲜活,低于阈值则归档 + # Permanent buckets never decay / 固化桶永远不衰减 + # --------------------------------------------------------- + def calculate_score(self, metadata: dict) -> float: + """ + Calculate current activity score for a memory bucket. + 计算一个记忆桶的当前活跃度得分。 + + Formula: Score = Importance × (act_count^0.3) × e^(-λ×days) × (base + arousal×boost) + """ + if not isinstance(metadata, dict): + return 0.0 + + # --- Permanent buckets never decay / 固化桶永不衰减 --- + if metadata.get("type") == "permanent": + return 999.0 + + importance = max(1, min(10, int(metadata.get("importance", 5)))) + activation_count = max(1, int(metadata.get("activation_count", 1))) + + # --- Days since last activation / 距离上次激活过了多少天 --- + last_active_str = metadata.get("last_active", metadata.get("created", "")) + try: + last_active = datetime.fromisoformat(str(last_active_str)) + days_since = max(0.0, (datetime.now() - last_active).total_seconds() / 86400) + except (ValueError, TypeError): + days_since = 30 # Parse failure → assume 30 days / 解析失败假设已过 30 天 + + # --- Emotion weight: continuous arousal coordinate --- + # --- 情感权重:基于连续 arousal 坐标计算 --- + # Higher arousal → stronger emotion → higher weight → slower decay + # arousal 越高 → 情感越强烈 → 权重越大 → 衰减越慢 + try: + arousal = max(0.0, min(1.0, float(metadata.get("arousal", 0.3)))) + except (ValueError, TypeError): + arousal = 0.3 + emotion_weight = self.emotion_base + arousal * self.arousal_boost + + # --- Apply decay formula / 套入衰减公式 --- + score = ( + importance + * (activation_count ** 0.3) + * math.exp(-self.decay_lambda * days_since) + * emotion_weight + ) + + # --- Weight pool modifiers / 权重池修正因子 --- + # Resolved events drop to 5%, sink to bottom awaiting keyword reactivation + # 已解决的事件权重骤降到 5%,沉底等待关键词激活 + resolved_factor = 0.05 if metadata.get("resolved", False) else 1.0 + # High-arousal unresolved buckets get urgency boost for priority surfacing + # 高唤醒未解决桶额外加成,优先浮现 + urgency_boost = 1.5 if (arousal > 0.7 and not metadata.get("resolved", False)) else 1.0 + + return round(score * resolved_factor * urgency_boost, 4) + + # --------------------------------------------------------- + # Execute one decay cycle + # 执行一轮衰减周期 + # Scan all dynamic buckets → score → archive those below threshold + # 扫描所有动态桶 → 算分 → 低于阈值的归档 + # --------------------------------------------------------- + async def run_decay_cycle(self) -> dict: + """ + Execute one decay cycle: iterate dynamic buckets, archive those + scoring below threshold. + 执行一轮衰减:遍历动态桶,归档得分低于阈值的桶。 + + Returns stats: {"checked": N, "archived": N, "lowest_score": X} + """ + try: + buckets = await self.bucket_mgr.list_all(include_archive=False) + except Exception as e: + logger.error(f"Failed to list buckets for decay / 衰减周期列桶失败: {e}") + return {"checked": 0, "archived": 0, "lowest_score": 0, "error": str(e)} + + checked = 0 + archived = 0 + lowest_score = float("inf") + + for bucket in buckets: + meta = bucket.get("metadata", {}) + + # Skip permanent buckets / 跳过固化桶 + if meta.get("type") == "permanent": + continue + + checked += 1 + try: + score = self.calculate_score(meta) + except Exception as e: + logger.warning( + f"Score calculation failed for {bucket.get('id', '?')} / " + f"计算得分失败: {e}" + ) + continue + + lowest_score = min(lowest_score, score) + + # --- Below threshold → archive (simulate forgetting) --- + # --- 低于阈值 → 归档(模拟遗忘)--- + if score < self.threshold: + try: + success = await self.bucket_mgr.archive(bucket["id"]) + if success: + archived += 1 + logger.info( + f"Decay archived / 衰减归档: " + f"{meta.get('name', bucket['id'])} " + f"(score={score:.4f}, threshold={self.threshold})" + ) + except Exception as e: + logger.warning( + f"Archive failed for {bucket.get('id', '?')} / " + f"归档失败: {e}" + ) + + result = { + "checked": checked, + "archived": archived, + "lowest_score": lowest_score if checked > 0 else 0, + } + logger.info(f"Decay cycle complete / 衰减周期完成: {result}") + return result + + # --------------------------------------------------------- + # Background decay task management + # 后台衰减任务管理 + # --------------------------------------------------------- + async def ensure_started(self) -> None: + """ + Ensure the decay engine is started (lazy init on first call). + 确保衰减引擎已启动(懒加载,首次调用时启动)。 + """ + if not self._running: + await self.start() + + async def start(self) -> None: + """Start the background decay loop. + 启动后台衰减循环。""" + if self._running: + return + self._running = True + self._task = asyncio.create_task(self._background_loop()) + logger.info( + f"Decay engine started, interval: {self.check_interval}h / " + f"衰减引擎已启动,检查间隔: {self.check_interval} 小时" + ) + + async def stop(self) -> None: + """Stop the background decay loop. + 停止后台衰减循环。""" + self._running = False + if self._task: + self._task.cancel() + try: + await self._task + except asyncio.CancelledError: + pass + logger.info("Decay engine stopped / 衰减引擎已停止") + + async def _background_loop(self) -> None: + """Background loop: run decay → sleep → repeat. + 后台循环体:执行衰减 → 睡眠 → 重复。""" + while self._running: + try: + await self.run_decay_cycle() + except Exception as e: + logger.error(f"Decay cycle error / 衰减周期出错: {e}") + # --- Wait for next cycle / 等待下一个周期 --- + try: + await asyncio.sleep(self.check_interval * 3600) + except asyncio.CancelledError: + break diff --git a/backup_20260405_2124/server.py b/backup_20260405_2124/server.py new file mode 100644 index 0000000..1333322 --- /dev/null +++ b/backup_20260405_2124/server.py @@ -0,0 +1,536 @@ +# ============================================================ +# Module: MCP Server Entry Point (server.py) +# 模块:MCP 服务器主入口 +# +# Starts the Ombre Brain MCP service and registers memory +# operation tools for Claude to call. +# 启动 Ombre Brain MCP 服务,注册记忆操作工具供 Claude 调用。 +# +# Core responsibilities: +# 核心职责: +# - Initialize config, bucket manager, dehydrator, decay engine +# 初始化配置、记忆桶管理器、脱水器、衰减引擎 +# - Expose 5 MCP tools: +# 暴露 5 个 MCP 工具: +# breath — Surface unresolved memories or search by keyword +# 浮现未解决记忆 或 按关键词检索 +# hold — Store a single memory +# 存储单条记忆 +# grow — Diary digest, auto-split into multiple buckets +# 日记归档,自动拆分多桶 +# trace — Modify metadata / resolved / delete +# 修改元数据 / resolved 标记 / 删除 +# pulse — System status + bucket listing +# 系统状态 + 所有桶列表 +# +# Startup: +# 启动方式: +# Local: python server.py +# Remote: OMBRE_TRANSPORT=streamable-http python server.py +# Docker: docker-compose up +# ============================================================ + +import os +import sys +import random +import logging +import asyncio +import httpx + +# --- Ensure same-directory modules can be imported --- +# --- 确保同目录下的模块能被正确导入 --- +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +from mcp.server.fastmcp import FastMCP + +from bucket_manager import BucketManager +from dehydrator import Dehydrator +from decay_engine import DecayEngine +from utils import load_config, setup_logging + +# --- Load config & init logging / 加载配置 & 初始化日志 --- +config = load_config() +setup_logging(config.get("log_level", "INFO")) +logger = logging.getLogger("ombre_brain") + +# --- Initialize three core components / 初始化三大核心组件 --- +bucket_mgr = BucketManager(config) # Bucket manager / 记忆桶管理器 +dehydrator = Dehydrator(config) # Dehydrator / 脱水器 +decay_engine = DecayEngine(config, bucket_mgr) # Decay engine / 衰减引擎 + +# --- Create MCP server instance / 创建 MCP 服务器实例 --- +# host="0.0.0.0" so Docker container's SSE is externally reachable +# stdio mode ignores host (no network) +mcp = FastMCP( + "Ombre Brain", + host="0.0.0.0", + port=8000, +) + + +# ============================================================= +# /health endpoint: lightweight keepalive +# 轻量保活接口 +# For Cloudflare Tunnel or reverse proxy to ping, preventing idle timeout +# 供 Cloudflare Tunnel 或反代定期 ping,防止空闲超时断连 +# ============================================================= +@mcp.custom_route("/health", methods=["GET"]) +async def health_check(request): + from starlette.responses import JSONResponse + try: + stats = await bucket_mgr.get_stats() + return JSONResponse({ + "status": "ok", + "buckets": stats["permanent_count"] + stats["dynamic_count"], + "decay_engine": "running" if decay_engine.is_running else "stopped", + }) + except Exception as e: + return JSONResponse({"status": "error", "detail": str(e)}, status_code=500) + + +# ============================================================= +# Internal helper: merge-or-create +# 内部辅助:检查是否可合并,可以则合并,否则新建 +# Shared by hold and grow to avoid duplicate logic +# hold 和 grow 共用,避免重复逻辑 +# ============================================================= +async def _merge_or_create( + content: str, + tags: list, + importance: int, + domain: list, + valence: float, + arousal: float, + name: str = "", +) -> tuple[str, bool]: + """ + Check if a similar bucket exists for merging; merge if so, create if not. + Returns (bucket_id_or_name, is_merged). + 检查是否有相似桶可合并,有则合并,无则新建。 + 返回 (桶ID或名称, 是否合并)。 + """ + try: + existing = await bucket_mgr.search(content, limit=1) + except Exception as e: + logger.warning(f"Search for merge failed, creating new / 合并搜索失败,新建: {e}") + existing = [] + + if existing and existing[0].get("score", 0) > config.get("merge_threshold", 75): + bucket = existing[0] + try: + merged = await dehydrator.merge(bucket["content"], content) + await bucket_mgr.update( + bucket["id"], + content=merged, + tags=list(set(bucket["metadata"].get("tags", []) + tags)), + importance=max(bucket["metadata"].get("importance", 5), importance), + domain=list(set(bucket["metadata"].get("domain", []) + domain)), + valence=valence, + arousal=arousal, + ) + return bucket["metadata"].get("name", bucket["id"]), True + except Exception as e: + logger.warning(f"Merge failed, creating new / 合并失败,新建: {e}") + + bucket_id = await bucket_mgr.create( + content=content, + tags=tags, + importance=importance, + domain=domain, + valence=valence, + arousal=arousal, + name=name or None, + ) + return bucket_id, False + + +# ============================================================= +# Tool 1: breath — Breathe +# 工具 1:breath — 呼吸 +# +# No args: surface highest-weight unresolved memories (active push) +# 无参数:浮现权重最高的未解决记忆 +# With args: search by keyword + emotion coordinates +# 有参数:按关键词+情感坐标检索记忆 +# ============================================================= +@mcp.tool() +async def breath( + query: str = "", + max_results: int = 3, + domain: str = "", + valence: float = -1, + arousal: float = -1, +) -> str: + """检索记忆或浮现未解决记忆。query 为空时自动推送权重最高的未解决桶;有 query 时按关键词+情感检索。domain 逗号分隔,valence/arousal 传 0~1 启用情感共鸣,-1 忽略。""" + await decay_engine.ensure_started() + + # --- No args: surfacing mode (weight pool active push) --- + # --- 无参数:浮现模式(权重池主动推送)--- + if not query.strip(): + try: + all_buckets = await bucket_mgr.list_all(include_archive=False) + except Exception as e: + logger.error(f"Failed to list buckets for surfacing / 浮现列桶失败: {e}") + return "记忆系统暂时无法访问。" + + unresolved = [ + b for b in all_buckets + if not b["metadata"].get("resolved", False) + and b["metadata"].get("type") != "permanent" + ] + if not unresolved: + return "权重池平静,没有需要处理的记忆。" + + scored = sorted( + unresolved, + key=lambda b: decay_engine.calculate_score(b["metadata"]), + reverse=True, + ) + top = scored[:2] + results = [] + for b in top: + try: + summary = await dehydrator.dehydrate(b["content"], b["metadata"]) + await bucket_mgr.touch(b["id"]) + score = decay_engine.calculate_score(b["metadata"]) + results.append(f"[权重:{score:.2f}] {summary}") + except Exception as e: + logger.warning(f"Failed to dehydrate surfaced bucket / 浮现脱水失败: {e}") + continue + if not results: + return "权重池平静,没有需要处理的记忆。" + return "=== 浮现记忆 ===\n" + "\n---\n".join(results) + + # --- With args: search mode / 有参数:检索模式 --- + domain_filter = [d.strip() for d in domain.split(",") if d.strip()] or None + q_valence = valence if 0 <= valence <= 1 else None + q_arousal = arousal if 0 <= arousal <= 1 else None + + try: + matches = await bucket_mgr.search( + query, + limit=max_results, + domain_filter=domain_filter, + query_valence=q_valence, + query_arousal=q_arousal, + ) + except Exception as e: + logger.error(f"Search failed / 检索失败: {e}") + return "检索过程出错,请稍后重试。" + + results = [] + for bucket in matches: + try: + summary = await dehydrator.dehydrate(bucket["content"], bucket["metadata"]) + await bucket_mgr.touch(bucket["id"]) + results.append(summary) + except Exception as e: + logger.warning(f"Failed to dehydrate search result / 检索结果脱水失败: {e}") + continue + + # --- Random surfacing: when search returns < 3, 40% chance to float old memories --- + # --- 随机浮现:检索结果不足 3 条时,40% 概率从低权重旧桶里漂上来 --- + if len(matches) < 3 and random.random() < 0.4: + try: + all_buckets = await bucket_mgr.list_all(include_archive=False) + matched_ids = {b["id"] for b in matches} + low_weight = [ + b for b in all_buckets + if b["id"] not in matched_ids + and decay_engine.calculate_score(b["metadata"]) < 2.0 + ] + if low_weight: + drifted = random.sample(low_weight, min(random.randint(1, 3), len(low_weight))) + drift_results = [] + for b in drifted: + summary = await dehydrator.dehydrate(b["content"], b["metadata"]) + drift_results.append(f"[surface_type: random]\n{summary}") + results.append("--- 忽然想起来 ---\n" + "\n---\n".join(drift_results)) + except Exception as e: + logger.warning(f"Random surfacing failed / 随机浮现失败: {e}") + + if not results: + return "未找到相关记忆。" + + return "\n---\n".join(results) + + +# ============================================================= +# Tool 2: hold — Hold on to this +# 工具 2:hold — 握住,留下来 +# ============================================================= +@mcp.tool() +async def hold( + content: str, + tags: str = "", + importance: int = 5, +) -> str: + """存储单条记忆。自动打标+合并相似桶。tags 逗号分隔,importance 1-10。""" + await decay_engine.ensure_started() + + # --- Input validation / 输入校验 --- + if not content or not content.strip(): + return "内容为空,无法存储。" + + importance = max(1, min(10, importance)) + extra_tags = [t.strip() for t in tags.split(",") if t.strip()] + + # --- Step 1: auto-tagging / 自动打标 --- + try: + analysis = await dehydrator.analyze(content) + except Exception as e: + logger.warning(f"Auto-tagging failed, using defaults / 自动打标失败: {e}") + analysis = { + "domain": ["未分类"], "valence": 0.5, "arousal": 0.3, + "tags": [], "suggested_name": "", + } + + domain = analysis["domain"] + valence = analysis["valence"] + arousal = analysis["arousal"] + auto_tags = analysis["tags"] + suggested_name = analysis.get("suggested_name", "") + + all_tags = list(dict.fromkeys(auto_tags + extra_tags)) + + # --- Step 2: merge or create / 合并或新建 --- + result_name, is_merged = await _merge_or_create( + content=content, + tags=all_tags, + importance=importance, + domain=domain, + valence=valence, + arousal=arousal, + name=suggested_name, + ) + + if is_merged: + return ( + f"已合并到现有记忆桶: {result_name}\n" + f"主题域: {', '.join(domain)} | 情感: V{valence:.1f}/A{arousal:.1f}" + ) + return ( + f"已创建新记忆桶: {result_name}\n" + f"主题域: {', '.join(domain)} | 情感: V{valence:.1f}/A{arousal:.1f} | 标签: {', '.join(all_tags)}" + ) + + +# ============================================================= +# Tool 3: grow — Grow, fragments become memories +# 工具 3:grow — 生长,一天的碎片长成记忆 +# ============================================================= +@mcp.tool() +async def grow(content: str) -> str: + """日记归档。自动拆分长内容为多个记忆桶。""" + await decay_engine.ensure_started() + + if not content or not content.strip(): + return "内容为空,无法整理。" + + # --- Step 1: let API split and organize / 让 API 拆分整理 --- + try: + items = await dehydrator.digest(content) + except Exception as e: + logger.error(f"Diary digest failed / 日记整理失败: {e}") + return f"日记整理失败: {e}" + + if not items: + return "内容为空或整理失败。" + + results = [] + created = 0 + merged = 0 + + # --- Step 2: merge or create each item (with per-item error handling) --- + # --- 逐条合并或新建(单条失败不影响其他)--- + for item in items: + try: + result_name, is_merged = await _merge_or_create( + content=item["content"], + tags=item.get("tags", []), + importance=item.get("importance", 5), + domain=item.get("domain", ["未分类"]), + valence=item.get("valence", 0.5), + arousal=item.get("arousal", 0.3), + name=item.get("name", ""), + ) + + if is_merged: + results.append(f" 📎 合并 → {result_name}") + merged += 1 + else: + domains_str = ",".join(item.get("domain", [])) + results.append( + f" 📝 新建 [{item.get('name', result_name)}] " + f"主题:{domains_str} V{item.get('valence', 0.5):.1f}/A{item.get('arousal', 0.3):.1f}" + ) + created += 1 + except Exception as e: + logger.warning( + f"Failed to process diary item / 日记条目处理失败: " + f"{item.get('name', '?')}: {e}" + ) + results.append(f" ⚠️ 失败: {item.get('name', '未知条目')}") + + summary = f"=== 日记整理完成 ===\n拆分为 {len(items)} 条 | 新建 {created} 桶 | 合并 {merged} 桶\n" + return summary + "\n".join(results) + + +# ============================================================= +# Tool 4: trace — Trace, redraw the outline of a memory +# 工具 4:trace — 描摹,重新勾勒记忆的轮廓 +# Also handles deletion (delete=True) +# 同时承接删除功能 +# ============================================================= +@mcp.tool() +async def trace( + bucket_id: str, + name: str = "", + domain: str = "", + valence: float = -1, + arousal: float = -1, + importance: int = -1, + tags: str = "", + resolved: int = -1, + delete: bool = False, +) -> str: + """修改记忆元数据。resolved=1 标记已解决(桶权重骤降沉底),resolved=0 重新激活,delete=True 删除桶。其余字段只传需改的,-1 或空串表示不改。""" + + if not bucket_id or not bucket_id.strip(): + return "请提供有效的 bucket_id。" + + # --- Delete mode / 删除模式 --- + if delete: + success = await bucket_mgr.delete(bucket_id) + return f"已遗忘记忆桶: {bucket_id}" if success else f"未找到记忆桶: {bucket_id}" + + bucket = await bucket_mgr.get(bucket_id) + if not bucket: + return f"未找到记忆桶: {bucket_id}" + + # --- Collect only fields actually passed / 只收集用户实际传入的字段 --- + updates = {} + if name: + updates["name"] = name + if domain: + updates["domain"] = [d.strip() for d in domain.split(",") if d.strip()] + if 0 <= valence <= 1: + updates["valence"] = valence + if 0 <= arousal <= 1: + updates["arousal"] = arousal + if 1 <= importance <= 10: + updates["importance"] = importance + if tags: + updates["tags"] = [t.strip() for t in tags.split(",") if t.strip()] + if resolved in (0, 1): + updates["resolved"] = bool(resolved) + + if not updates: + return "没有任何字段需要修改。" + + success = await bucket_mgr.update(bucket_id, **updates) + if not success: + return f"修改失败: {bucket_id}" + + changed = ", ".join(f"{k}={v}" for k, v in updates.items()) + # Explicit hint about resolved state change semantics + # 特别提示 resolved 状态变化的语义 + if "resolved" in updates: + if updates["resolved"]: + changed += " → 已沉底,只在关键词触发时重新浮现" + else: + changed += " → 已重新激活,将参与浮现排序" + return f"已修改记忆桶 {bucket_id}: {changed}" + + +# ============================================================= +# Tool 5: pulse — Heartbeat, system status + memory listing +# 工具 5:pulse — 脉搏,系统状态 + 记忆列表 +# ============================================================= +@mcp.tool() +async def pulse(include_archive: bool = False) -> str: + """系统状态和所有记忆桶摘要。include_archive=True 时包含归档桶。""" + try: + stats = await bucket_mgr.get_stats() + except Exception as e: + return f"获取系统状态失败: {e}" + + status = ( + f"=== Ombre Brain 记忆系统 ===\n" + f"固化记忆桶: {stats['permanent_count']} 个\n" + f"动态记忆桶: {stats['dynamic_count']} 个\n" + f"归档记忆桶: {stats['archive_count']} 个\n" + f"总存储大小: {stats['total_size_kb']:.1f} KB\n" + f"衰减引擎: {'运行中' if decay_engine.is_running else '已停止'}\n" + ) + + # --- List all bucket summaries / 列出所有桶摘要 --- + try: + buckets = await bucket_mgr.list_all(include_archive=include_archive) + except Exception as e: + return status + f"\n列出记忆桶失败: {e}" + + if not buckets: + return status + "\n记忆库为空。" + + lines = [] + for b in buckets: + meta = b.get("metadata", {}) + if meta.get("type") == "permanent": + icon = "📦" + elif meta.get("type") == "archived": + icon = "🗄️" + elif meta.get("resolved", False): + icon = "✅" + else: + icon = "💭" + try: + score = decay_engine.calculate_score(meta) + except Exception: + score = 0.0 + domains = ",".join(meta.get("domain", [])) + val = meta.get("valence", 0.5) + aro = meta.get("arousal", 0.3) + resolved_tag = " [已解决]" if meta.get("resolved", False) else "" + lines.append( + f"{icon} [{meta.get('name', b['id'])}]{resolved_tag} " + f"主题:{domains} " + f"情感:V{val:.1f}/A{aro:.1f} " + f"重要:{meta.get('importance', '?')} " + f"权重:{score:.2f} " + f"标签:{','.join(meta.get('tags', []))}" + ) + + return status + "\n=== 记忆列表 ===\n" + "\n".join(lines) + + +# --- Entry point / 启动入口 --- +if __name__ == "__main__": + transport = config.get("transport", "stdio") + logger.info(f"Ombre Brain starting | transport: {transport}") + + # --- Application-level keepalive: remote mode only, ping /health every 60s --- + # --- 应用层保活:仅远程模式下启动,每 60 秒 ping 一次 /health --- + # Prevents Cloudflare Tunnel from dropping idle connections + if transport in ("sse", "streamable-http"): + async def _keepalive_loop(): + await asyncio.sleep(10) # Wait for server to fully start + async with httpx.AsyncClient() as client: + while True: + try: + await client.get("http://localhost:8000/health", timeout=5) + logger.debug("Keepalive ping OK / 保活 ping 成功") + except Exception as e: + logger.warning(f"Keepalive ping failed / 保活 ping 失败: {e}") + await asyncio.sleep(60) + + import threading + + def _start_keepalive(): + loop = asyncio.new_event_loop() + loop.run_until_complete(_keepalive_loop()) + + t = threading.Thread(target=_start_keepalive, daemon=True) + t.start() + + mcp.run(transport=transport) diff --git a/bucket_manager.py b/bucket_manager.py new file mode 100644 index 0000000..d056f4e --- /dev/null +++ b/bucket_manager.py @@ -0,0 +1,781 @@ +# ============================================================ +# Module: Memory Bucket Manager (bucket_manager.py) +# 模块:记忆桶管理器 +# +# CRUD operations, multi-dimensional index search, activation updates +# for memory buckets. +# 记忆桶的增删改查、多维索引搜索、激活更新。 +# +# Core design: +# 核心逻辑: +# - Each bucket = one Markdown file (YAML frontmatter + body) +# 每个记忆桶 = 一个 Markdown 文件 +# - Storage by type: permanent / dynamic / archive +# 存储按类型分目录 +# - Multi-dimensional soft index: domain + valence/arousal + fuzzy text +# 多维软索引:主题域 + 情感坐标 + 文本模糊匹配 +# - Search strategy: domain pre-filter → weighted multi-dim ranking +# 搜索策略:主题域预筛 → 多维加权精排 +# - Emotion coordinates based on Russell circumplex model: +# 情感坐标基于环形情感模型(Russell circumplex): +# valence (0~1): 0=negative → 1=positive +# arousal (0~1): 0=calm → 1=excited +# +# Depended on by: server.py, decay_engine.py +# 被谁依赖:server.py, decay_engine.py +# ============================================================ + +import os +import math +import logging +import re +import shutil +from collections import Counter +from datetime import datetime +from pathlib import Path +from typing import Optional + +import frontmatter +import jieba +from rapidfuzz import fuzz + +from utils import generate_bucket_id, sanitize_name, safe_path, now_iso + +logger = logging.getLogger("ombre_brain.bucket") + + +class BucketManager: + """ + Memory bucket manager — entry point for all bucket CRUD operations. + Buckets are stored as Markdown files with YAML frontmatter for metadata + and body for content. Natively compatible with Obsidian browsing/editing. + 记忆桶管理器 —— 所有桶的 CRUD 操作入口。 + 桶以 Markdown 文件存储,YAML frontmatter 存元数据,正文存内容。 + 天然兼容 Obsidian 直接浏览和编辑。 + """ + + def __init__(self, config: dict): + # --- Read storage paths from config / 从配置中读取存储路径 --- + self.base_dir = config["buckets_dir"] + self.permanent_dir = os.path.join(self.base_dir, "permanent") + self.dynamic_dir = os.path.join(self.base_dir, "dynamic") + self.archive_dir = os.path.join(self.base_dir, "archive") + self.fuzzy_threshold = config.get("matching", {}).get("fuzzy_threshold", 50) + self.max_results = config.get("matching", {}).get("max_results", 5) + + # --- Wikilink config / 双链配置 --- + wikilink_cfg = config.get("wikilink", {}) + self.wikilink_enabled = wikilink_cfg.get("enabled", True) + self.wikilink_use_tags = wikilink_cfg.get("use_tags", False) + self.wikilink_use_domain = wikilink_cfg.get("use_domain", True) + self.wikilink_use_auto_keywords = wikilink_cfg.get("use_auto_keywords", True) + self.wikilink_auto_top_k = wikilink_cfg.get("auto_top_k", 8) + self.wikilink_min_len = wikilink_cfg.get("min_keyword_len", 2) + self.wikilink_exclude_keywords = set(wikilink_cfg.get("exclude_keywords", [])) + self.wikilink_stopwords = { + "的", "了", "在", "是", "我", "有", "和", "就", "不", "人", + "都", "一个", "上", "也", "很", "到", "说", "要", "去", + "你", "会", "着", "没有", "看", "好", "自己", "这", "他", "她", + "我们", "你们", "他们", "然后", "今天", "昨天", "明天", "一下", + "the", "and", "for", "are", "but", "not", "you", "all", "can", + "had", "her", "was", "one", "our", "out", "has", "have", "with", + "this", "that", "from", "they", "been", "said", "will", "each", + } + self.wikilink_stopwords |= {w.lower() for w in self.wikilink_exclude_keywords} + + # --- Search scoring weights / 检索权重配置 --- + scoring = config.get("scoring_weights", {}) + self.w_topic = scoring.get("topic_relevance", 4.0) + self.w_emotion = scoring.get("emotion_resonance", 2.0) + self.w_time = scoring.get("time_proximity", 1.5) + self.w_importance = scoring.get("importance", 1.0) + + # --------------------------------------------------------- + # Create a new bucket + # 创建新桶 + # Write content and metadata into a .md file + # 将内容和元数据写入一个 .md 文件 + # --------------------------------------------------------- + async def create( + self, + content: str, + tags: list[str] = None, + importance: int = 5, + domain: list[str] = None, + valence: float = 0.5, + arousal: float = 0.3, + bucket_type: str = "dynamic", + name: str = None, + pinned: bool = False, + protected: bool = False, + ) -> str: + """ + Create a new memory bucket, return bucket ID. + 创建一个新的记忆桶,返回桶 ID。 + + pinned/protected=True: bucket won't be merged, decayed, or have importance changed. + Importance is locked to 10 for pinned/protected buckets. + pinned/protected 桶不参与合并与衰减,importance 强制锁定为 10。 + """ + bucket_id = generate_bucket_id() + bucket_name = sanitize_name(name) if name else bucket_id + domain = domain or ["未分类"] + tags = tags or [] + linked_content = self._apply_wikilinks(content, tags, domain, bucket_name) + + # --- Pinned/protected buckets: lock importance to 10 --- + # --- 钉选/保护桶:importance 强制锁定为 10 --- + if pinned or protected: + importance = 10 + + # --- Build YAML frontmatter metadata / 构建元数据 --- + metadata = { + "id": bucket_id, + "name": bucket_name, + "tags": tags, + "domain": domain, + "valence": max(0.0, min(1.0, valence)), + "arousal": max(0.0, min(1.0, arousal)), + "importance": max(1, min(10, importance)), + "type": bucket_type, + "created": now_iso(), + "last_active": now_iso(), + "activation_count": 1, + } + if pinned: + metadata["pinned"] = True + if protected: + metadata["protected"] = True + + # --- Assemble Markdown file (frontmatter + body) --- + # --- 组装 Markdown 文件 --- + post = frontmatter.Post(linked_content, **metadata) + + # --- Choose directory by type + primary domain --- + # --- 按类型 + 主题域选择存储目录 --- + type_dir = self.permanent_dir if bucket_type == "permanent" else self.dynamic_dir + primary_domain = sanitize_name(domain[0]) if domain else "未分类" + target_dir = os.path.join(type_dir, primary_domain) + os.makedirs(target_dir, exist_ok=True) + + # --- Filename: readable_name_bucketID.md (Obsidian friendly) --- + # --- 文件名:可读名称_桶ID.md --- + if bucket_name and bucket_name != bucket_id: + filename = f"{bucket_name}_{bucket_id}.md" + else: + filename = f"{bucket_id}.md" + file_path = safe_path(target_dir, filename) + + try: + with open(file_path, "w", encoding="utf-8") as f: + f.write(frontmatter.dumps(post)) + except OSError as e: + logger.error(f"Failed to write bucket file / 写入桶文件失败: {file_path}: {e}") + raise + + logger.info( + f"Created bucket / 创建记忆桶: {bucket_id} ({bucket_name}) → {primary_domain}/" + + (" [PINNED]" if pinned else "") + (" [PROTECTED]" if protected else "") + ) + return bucket_id + + # --------------------------------------------------------- + # Read bucket content + # 读取桶内容 + # Returns {"id", "metadata", "content", "path"} or None + # --------------------------------------------------------- + async def get(self, bucket_id: str) -> Optional[dict]: + """ + Read a single bucket by ID. + 根据 ID 读取单个桶。 + """ + if not bucket_id or not isinstance(bucket_id, str): + return None + file_path = self._find_bucket_file(bucket_id) + if not file_path: + return None + return self._load_bucket(file_path) + + # --------------------------------------------------------- + # Update bucket + # 更新桶 + # Supports: content, tags, importance, valence, arousal, name, resolved + # --------------------------------------------------------- + async def update(self, bucket_id: str, **kwargs) -> bool: + """ + Update bucket content or metadata fields. + 更新桶的内容或元数据字段。 + """ + file_path = self._find_bucket_file(bucket_id) + if not file_path: + return False + + try: + post = frontmatter.load(file_path) + except Exception as e: + logger.warning(f"Failed to load bucket for update / 加载桶失败: {file_path}: {e}") + return False + + # --- Pinned/protected buckets: lock importance to 10, ignore importance changes --- + # --- 钉选/保护桶:importance 不可修改,强制保持 10 --- + is_pinned = post.get("pinned", False) or post.get("protected", False) + if is_pinned: + kwargs.pop("importance", None) # silently ignore importance update + + # --- Update only fields that were passed in / 只改传入的字段 --- + if "content" in kwargs: + next_tags = kwargs.get("tags", post.get("tags", [])) + next_domain = kwargs.get("domain", post.get("domain", [])) + next_name = kwargs.get("name", post.get("name", "")) + post.content = self._apply_wikilinks( + kwargs["content"], + next_tags, + next_domain, + next_name, + ) + if "tags" in kwargs: + post["tags"] = kwargs["tags"] + if "importance" in kwargs: + post["importance"] = max(1, min(10, int(kwargs["importance"]))) + if "domain" in kwargs: + post["domain"] = kwargs["domain"] + if "valence" in kwargs: + post["valence"] = max(0.0, min(1.0, float(kwargs["valence"]))) + if "arousal" in kwargs: + post["arousal"] = max(0.0, min(1.0, float(kwargs["arousal"]))) + if "name" in kwargs: + post["name"] = sanitize_name(kwargs["name"]) + if "resolved" in kwargs: + post["resolved"] = bool(kwargs["resolved"]) + if "pinned" in kwargs: + post["pinned"] = bool(kwargs["pinned"]) + if kwargs["pinned"]: + post["importance"] = 10 # pinned → lock importance to 10 + + # --- Auto-refresh activation time / 自动刷新激活时间 --- + post["last_active"] = now_iso() + + try: + with open(file_path, "w", encoding="utf-8") as f: + f.write(frontmatter.dumps(post)) + except OSError as e: + logger.error(f"Failed to write bucket update / 写入桶更新失败: {file_path}: {e}") + return False + + logger.info(f"Updated bucket / 更新记忆桶: {bucket_id}") + return True + + # --------------------------------------------------------- + # Wikilink injection + # 自动添加 Obsidian 双链 + # --------------------------------------------------------- + def _apply_wikilinks( + self, + content: str, + tags: list[str], + domain: list[str], + name: str, + ) -> str: + """ + Auto-inject Obsidian wikilinks, avoiding double-wrapping existing [[...]]. + 自动添加 Obsidian 双链,避免重复包裹已有 [[...]]。 + """ + if not self.wikilink_enabled or not content: + return content + + keywords = self._collect_wikilink_keywords(content, tags, domain, name) + if not keywords: + return content + + # Split on existing wikilinks to avoid wrapping them again + # 按已有双链切分,避免重复包裹 + segments = re.split(r"(\[\[[^\]]+\]\])", content) + pattern = re.compile("|".join(re.escape(kw) for kw in keywords)) + for i, segment in enumerate(segments): + if segment.startswith("[[") and segment.endswith("]]"): + continue + updated = pattern.sub(lambda m: f"[[{m.group(0)}]]", segment) + segments[i] = updated + return "".join(segments) + + def _collect_wikilink_keywords( + self, + content: str, + tags: list[str], + domain: list[str], + name: str, + ) -> list[str]: + """ + Collect candidate keywords from tags/domain/auto-extraction. + 汇总候选关键词:可选 tags/domain + 自动提词。 + """ + candidates = [] + + if self.wikilink_use_tags: + candidates.extend(tags or []) + if self.wikilink_use_domain: + candidates.extend(domain or []) + if name: + candidates.append(name) + if self.wikilink_use_auto_keywords: + candidates.extend(self._extract_auto_keywords(content)) + + return self._normalize_keywords(candidates) + + def _normalize_keywords(self, keywords: list[str]) -> list[str]: + """ + Deduplicate and sort by length (longer first to avoid short words + breaking long ones during replacement). + 去重并按长度排序,优先替换长词。 + """ + if not keywords: + return [] + + seen = set() + cleaned = [] + for keyword in keywords: + if not isinstance(keyword, str): + continue + kw = keyword.strip() + if len(kw) < self.wikilink_min_len: + continue + if kw in self.wikilink_exclude_keywords: + continue + if kw.lower() in self.wikilink_stopwords: + continue + if kw in seen: + continue + seen.add(kw) + cleaned.append(kw) + + return sorted(cleaned, key=len, reverse=True) + + def _extract_auto_keywords(self, content: str) -> list[str]: + """ + Auto-extract keywords from body text, prioritizing high-frequency words. + 从正文自动提词,优先高频词。 + """ + if not content: + return [] + + try: + zh_words = [w.strip() for w in jieba.lcut(content) if w.strip()] + except Exception: + zh_words = [] + en_words = re.findall(r"[A-Za-z][A-Za-z0-9_-]{2,20}", content) + + # Chinese bigrams / 中文双词组合 + zh_bigrams = [] + for i in range(len(zh_words) - 1): + left = zh_words[i] + right = zh_words[i + 1] + if len(left) < self.wikilink_min_len or len(right) < self.wikilink_min_len: + continue + if not re.fullmatch(r"[\u4e00-\u9fff]+", left + right): + continue + if len(left + right) > 8: + continue + zh_bigrams.append(left + right) + + merged = [] + for word in zh_words + zh_bigrams + en_words: + if len(word) < self.wikilink_min_len: + continue + if re.fullmatch(r"\d+", word): + continue + if word.lower() in self.wikilink_stopwords: + continue + merged.append(word) + + if not merged: + return [] + + counter = Counter(merged) + return [w for w, _ in counter.most_common(self.wikilink_auto_top_k)] + + # --------------------------------------------------------- + # Delete bucket + # 删除桶 + # --------------------------------------------------------- + async def delete(self, bucket_id: str) -> bool: + """ + Delete a memory bucket file. + 删除指定的记忆桶文件。 + """ + file_path = self._find_bucket_file(bucket_id) + if not file_path: + return False + + try: + os.remove(file_path) + except OSError as e: + logger.error(f"Failed to delete bucket file / 删除桶文件失败: {file_path}: {e}") + return False + + logger.info(f"Deleted bucket / 删除记忆桶: {bucket_id}") + return True + + # --------------------------------------------------------- + # Touch bucket (refresh activation time + increment count) + # 触碰桶(刷新激活时间 + 累加激活次数) + # Called on every recall hit; affects decay score. + # 每次检索命中时调用,影响衰减得分。 + # --------------------------------------------------------- + async def touch(self, bucket_id: str) -> None: + """ + Update a bucket's last activation time and count. + 更新桶的最后激活时间和激活次数。 + """ + file_path = self._find_bucket_file(bucket_id) + if not file_path: + return + + try: + post = frontmatter.load(file_path) + post["last_active"] = now_iso() + post["activation_count"] = post.get("activation_count", 0) + 1 + + with open(file_path, "w", encoding="utf-8") as f: + f.write(frontmatter.dumps(post)) + except Exception as e: + logger.warning(f"Failed to touch bucket / 触碰桶失败: {bucket_id}: {e}") + + # --------------------------------------------------------- + # Multi-dimensional search (core feature) + # 多维搜索(核心功能) + # + # Strategy: domain pre-filter → weighted multi-dim ranking + # 策略:主题域预筛 → 多维加权精排 + # + # Ranking formula: + # total = topic(×w_topic) + emotion(×w_emotion) + # + time(×w_time) + importance(×w_importance) + # + # Per-dimension scores (normalized to 0~1): + # topic = rapidfuzz weighted match (name/tags/domain/body) + # emotion = 1 - Euclidean distance (query v/a vs bucket v/a) + # time = e^(-0.02 × days) (recent memories first) + # importance = importance / 10 + # --------------------------------------------------------- + async def search( + self, + query: str, + limit: int = None, + domain_filter: list[str] = None, + query_valence: float = None, + query_arousal: float = None, + ) -> list[dict]: + """ + Multi-dimensional indexed search for memory buckets. + 多维索引搜索记忆桶。 + + domain_filter: pre-filter by domain (None = search all) + query_valence/arousal: emotion coordinates for resonance scoring + """ + if not query or not query.strip(): + return [] + + limit = limit or self.max_results + all_buckets = await self.list_all(include_archive=False) + + if not all_buckets: + return [] + + # --- Layer 1: domain pre-filter (fast scope reduction) --- + # --- 第一层:主题域预筛(快速缩小范围)--- + if domain_filter: + filter_set = {d.lower() for d in domain_filter} + candidates = [ + b for b in all_buckets + if {d.lower() for d in b["metadata"].get("domain", [])} & filter_set + ] + # Fall back to full search if pre-filter yields nothing + # 预筛为空则回退全量搜索 + if not candidates: + candidates = all_buckets + else: + candidates = all_buckets + + # --- Layer 2: weighted multi-dim ranking --- + # --- 第二层:多维加权精排 --- + scored = [] + for bucket in candidates: + meta = bucket.get("metadata", {}) + + try: + # Dim 1: topic relevance (fuzzy text, 0~1) + topic_score = self._calc_topic_score(query, bucket) + + # Dim 2: emotion resonance (coordinate distance, 0~1) + emotion_score = self._calc_emotion_score( + query_valence, query_arousal, meta + ) + + # Dim 3: time proximity (exponential decay, 0~1) + time_score = self._calc_time_score(meta) + + # Dim 4: importance (direct normalization) + importance_score = max(1, min(10, int(meta.get("importance", 5)))) / 10.0 + + # --- Weighted sum / 加权求和 --- + total = ( + topic_score * self.w_topic + + emotion_score * self.w_emotion + + time_score * self.w_time + + importance_score * self.w_importance + ) + # Normalize to 0~100 for readability + weight_sum = self.w_topic + self.w_emotion + self.w_time + self.w_importance + normalized = (total / weight_sum) * 100 if weight_sum > 0 else 0 + + # Resolved buckets get ranking penalty (but still reachable by keyword) + # 已解决的桶降权排序(但仍可被关键词激活) + if meta.get("resolved", False): + normalized *= 0.3 + + if normalized >= self.fuzzy_threshold: + bucket["score"] = round(normalized, 2) + scored.append(bucket) + except Exception as e: + logger.warning( + f"Scoring failed for bucket {bucket.get('id', '?')} / " + f"桶评分失败: {e}" + ) + continue + + scored.sort(key=lambda x: x["score"], reverse=True) + return scored[:limit] + + # --------------------------------------------------------- + # Topic relevance sub-score: + # name(×3) + domain(×2.5) + tags(×2) + body(×1) + # 文本相关性子分:桶名(×3) + 主题域(×2.5) + 标签(×2) + 正文(×1) + # --------------------------------------------------------- + def _calc_topic_score(self, query: str, bucket: dict) -> float: + """ + Calculate text dimension relevance score (0~1). + 计算文本维度的相关性得分。 + """ + meta = bucket.get("metadata", {}) + + name_score = fuzz.partial_ratio(query, meta.get("name", "")) * 3 + domain_score = ( + max( + (fuzz.partial_ratio(query, d) for d in meta.get("domain", [])), + default=0, + ) + * 2.5 + ) + tag_score = ( + max( + (fuzz.partial_ratio(query, tag) for tag in meta.get("tags", [])), + default=0, + ) + * 2 + ) + content_score = fuzz.partial_ratio(query, bucket.get("content", "")[:500]) * 1 + + return (name_score + domain_score + tag_score + content_score) / (100 * 8.5) + + # --------------------------------------------------------- + # Emotion resonance sub-score: + # Based on Russell circumplex Euclidean distance + # 情感共鸣子分:基于环形情感模型的欧氏距离 + # No emotion in query → neutral 0.5 (doesn't affect ranking) + # --------------------------------------------------------- + def _calc_emotion_score( + self, q_valence: float, q_arousal: float, meta: dict + ) -> float: + """ + Calculate emotion resonance score (0~1, closer = higher). + 计算情感共鸣度(0~1,越近越高)。 + """ + if q_valence is None or q_arousal is None: + return 0.5 # No emotion coordinates → neutral / 无情感坐标时给中性分 + + try: + b_valence = float(meta.get("valence", 0.5)) + b_arousal = float(meta.get("arousal", 0.3)) + except (ValueError, TypeError): + return 0.5 + + # Euclidean distance, max sqrt(2) ≈ 1.414 + dist = math.sqrt((q_valence - b_valence) ** 2 + (q_arousal - b_arousal) ** 2) + return max(0.0, 1.0 - dist / 1.414) + + # --------------------------------------------------------- + # Time proximity sub-score: + # More recent activation → higher score + # 时间亲近子分:距上次激活越近分越高 + # --------------------------------------------------------- + def _calc_time_score(self, meta: dict) -> float: + """ + Calculate time proximity score (0~1, more recent = higher). + 计算时间亲近度。 + """ + last_active_str = meta.get("last_active", meta.get("created", "")) + try: + last_active = datetime.fromisoformat(str(last_active_str)) + days = max(0.0, (datetime.now() - last_active).total_seconds() / 86400) + except (ValueError, TypeError): + days = 30 + return math.exp(-0.02 * days) + + # --------------------------------------------------------- + # List all buckets + # 列出所有桶 + # --------------------------------------------------------- + async def list_all(self, include_archive: bool = False) -> list[dict]: + """ + Recursively walk directories (including domain subdirs), list all buckets. + 递归遍历目录(含域子目录),列出所有记忆桶。 + """ + buckets = [] + + dirs = [self.permanent_dir, self.dynamic_dir] + if include_archive: + dirs.append(self.archive_dir) + + for dir_path in dirs: + if not os.path.exists(dir_path): + continue + for root, _, files in os.walk(dir_path): + for filename in files: + if not filename.endswith(".md"): + continue + file_path = os.path.join(root, filename) + bucket = self._load_bucket(file_path) + if bucket: + buckets.append(bucket) + + return buckets + + # --------------------------------------------------------- + # Statistics (counts per category + total size) + # 统计信息(各分类桶数量 + 总体积) + # --------------------------------------------------------- + async def get_stats(self) -> dict: + """ + Return memory bucket statistics (including domain subdirs). + 返回记忆桶的统计数据。 + """ + stats = { + "permanent_count": 0, + "dynamic_count": 0, + "archive_count": 0, + "total_size_kb": 0.0, + "domains": {}, + } + + for subdir, key in [ + (self.permanent_dir, "permanent_count"), + (self.dynamic_dir, "dynamic_count"), + (self.archive_dir, "archive_count"), + ]: + if not os.path.exists(subdir): + continue + for root, _, files in os.walk(subdir): + for f in files: + if f.endswith(".md"): + stats[key] += 1 + fpath = os.path.join(root, f) + try: + stats["total_size_kb"] += os.path.getsize(fpath) / 1024 + except OSError: + pass + # Per-domain counts / 每个域的桶数量 + domain_name = os.path.basename(root) + if domain_name != os.path.basename(subdir): + stats["domains"][domain_name] = stats["domains"].get(domain_name, 0) + 1 + + return stats + + # --------------------------------------------------------- + # Archive bucket (move from permanent/dynamic into archive) + # 归档桶(从 permanent/dynamic 移入 archive) + # Called by decay engine to simulate "forgetting" + # 由衰减引擎调用,模拟"遗忘" + # --------------------------------------------------------- + async def archive(self, bucket_id: str) -> bool: + """ + Move a bucket into the archive directory (preserving domain subdirs). + 将指定桶移入归档目录(保留域子目录结构)。 + """ + file_path = self._find_bucket_file(bucket_id) + if not file_path: + return False + + try: + # Read once, get domain info and update type / 一次性读取 + post = frontmatter.load(file_path) + domain = post.get("domain", ["未分类"]) + primary_domain = sanitize_name(domain[0]) if domain else "未分类" + archive_subdir = os.path.join(self.archive_dir, primary_domain) + os.makedirs(archive_subdir, exist_ok=True) + + dest = safe_path(archive_subdir, os.path.basename(file_path)) + + # Update type marker then move file / 更新类型标记后移动文件 + post["type"] = "archived" + with open(file_path, "w", encoding="utf-8") as f: + f.write(frontmatter.dumps(post)) + + # Use shutil.move for cross-filesystem safety + # 使用 shutil.move 保证跨文件系统安全 + shutil.move(file_path, str(dest)) + except Exception as e: + logger.error( + f"Failed to archive bucket / 归档桶失败: {bucket_id}: {e}" + ) + return False + + logger.info(f"Archived bucket / 归档记忆桶: {bucket_id} → archive/{primary_domain}/") + return True + + # --------------------------------------------------------- + # Internal: find bucket file across all three directories + # 内部:在三个目录中查找桶文件 + # --------------------------------------------------------- + def _find_bucket_file(self, bucket_id: str) -> Optional[str]: + """ + Recursively search permanent/dynamic/archive for a bucket file + matching the given ID. + 在 permanent/dynamic/archive 中递归查找指定 ID 的桶文件。 + """ + if not bucket_id: + return None + for dir_path in [self.permanent_dir, self.dynamic_dir, self.archive_dir]: + if not os.path.exists(dir_path): + continue + for root, _, files in os.walk(dir_path): + for fname in files: + if not fname.endswith(".md"): + continue + # Match by exact ID segment in filename + # 通过文件名中的 ID 片段精确匹配 + if bucket_id in fname: + return os.path.join(root, fname) + return None + + # --------------------------------------------------------- + # Internal: load bucket data from .md file + # 内部:从 .md 文件加载桶数据 + # --------------------------------------------------------- + def _load_bucket(self, file_path: str) -> Optional[dict]: + """ + Parse a Markdown file and return structured bucket data. + 解析 Markdown 文件,返回桶的结构化数据。 + """ + try: + post = frontmatter.load(file_path) + return { + "id": post.get("id", Path(file_path).stem), + "metadata": dict(post.metadata), + "content": post.content, + "path": file_path, + } + except Exception as e: + logger.warning( + f"Failed to load bucket file / 加载桶文件失败: {file_path}: {e}" + ) + return None diff --git a/config.example.yaml b/config.example.yaml new file mode 100644 index 0000000..394c081 --- /dev/null +++ b/config.example.yaml @@ -0,0 +1,82 @@ +# ============================================================ +# Ombre Brain Configuration / 配置文件 +# Copy this file to config.yaml and modify as needed +# 复制此文件为 config.yaml 后按需修改 +# ============================================================ + +# --- Transport / 传输方式 --- +# stdio: local use (Claude Desktop, direct pipe) +# streamable-http: remote use (HTTP, tunnel/CDN/proxy friendly) +# stdio: 本地使用(Claude Desktop,直接管道通信) +# streamable-http: 远程使用(标准 HTTP,对隧道/CDN/代理友好) +transport: "stdio" + +# --- Log level / 日志级别 --- +log_level: "INFO" + +# --- Bucket storage path / 记忆桶存储路径 --- +# Point this to your Obsidian vault subdirectory, or any local folder +# 指向你的 Obsidian 仓库子目录,或任意本地文件夹 +# Leave as-is to use the built-in ./buckets/ directory +# 保持默认则使用内置的 ./buckets/ 目录 +# buckets_dir: "/path/to/your/Obsidian Vault/Ombre Brain" + +# --- Merge threshold / 桶合并阈值 --- +# When storing a new memory, if similarity with an existing bucket exceeds +# this value (0-100), merge instead of creating a new one +# 存新记忆时,如果与已有桶的相似度超过此值(0-100),则合并而非新建 +merge_threshold: 75 + +# --- Dehydration API / 脱水压缩 API 配置 --- +# Uses a cheap LLM for intelligent compression; auto-degrades to local +# keyword extraction if API is unavailable +# 用廉价 LLM 做智能压缩,API 不可用时自动降级到本地关键词提取 +dehydration: + # Supports any OpenAI-compatible API: DeepSeek / Ollama / LM Studio / vLLM / Gemini etc. + # 支持所有 OpenAI 兼容 API:DeepSeek / Ollama / LM Studio / vLLM / Gemini 等 + model: "deepseek-chat" + base_url: "https://api.deepseek.com/v1" + # Common base_url examples / 常见 base_url 示例: + # DeepSeek: https://api.deepseek.com/v1 + # SiliconFlow: https://api.siliconflow.cn/v1 + # Ollama: http://localhost:11434/v1 + # LM Studio: http://localhost:1234/v1 + # vLLM: http://localhost:8000/v1 + # Gemini: https://generativelanguage.googleapis.com/v1beta/openai + # api_key: "" # ⚠️ Use env var OMBRE_API_KEY instead / 请使用环境变量 OMBRE_API_KEY + max_tokens: 1024 + temperature: 0.1 + +# --- Decay parameters / 记忆衰减参数 --- +# Simulates Ebbinghaus forgetting curve, auto-archives inactive memories +# 模拟艾宾浩斯遗忘曲线,自动归档不活跃的记忆 +decay: + lambda: 0.05 # Decay rate / 衰减速率(越大遗忘越快) + threshold: 0.3 # Archive threshold / 归档阈值 + check_interval_hours: 24 # Check interval (hours) / 衰减检查间隔(小时) + emotion_weights: + base: 1.0 # Base weight / 基础权重 + arousal_boost: 0.8 # Arousal boost coefficient / 唤醒度加成系数 + +# --- Scoring weights / 检索权重参数 --- +# total = topic(×4) + emotion(×2) + time(×1.5) + importance(×1) +scoring_weights: + topic_relevance: 4.0 + emotion_resonance: 2.0 + time_proximity: 1.5 + importance: 1.0 + +# --- Fuzzy matching / 模糊匹配参数 --- +matching: + fuzzy_threshold: 50 # Minimum match score (0-100) / 最低匹配分数 + max_results: 5 # Max results per search / 单次搜索最多返回条数 + +# --- Obsidian wikilinks / Obsidian 双链自动注入 --- +wikilink: + enabled: true + use_tags: false + use_domain: true + use_auto_keywords: true + auto_top_k: 8 + min_keyword_len: 2 + exclude_keywords: [] diff --git a/decay_engine.py b/decay_engine.py new file mode 100644 index 0000000..43c7624 --- /dev/null +++ b/decay_engine.py @@ -0,0 +1,279 @@ +# ============================================================ +# Module: Memory Decay Engine (decay_engine.py) +# 模块:记忆衰减引擎 +# +# Simulates human forgetting curve; auto-decays inactive memories and archives them. +# 模拟人类遗忘曲线,自动衰减不活跃记忆并归档。 +# +# Core formula (improved Ebbinghaus + emotion coordinates): +# 核心公式(改进版艾宾浩斯遗忘曲线 + 情感坐标): +# Score = Importance × (activation_count^0.3) × e^(-λ×days) × emotion_weight +# +# Emotion weight (continuous coordinate, not discrete labels): +# 情感权重(基于连续坐标而非离散列举): +# emotion_weight = base + (arousal × arousal_boost) +# Higher arousal → higher emotion weight → slower decay +# 唤醒度越高 → 情感权重越大 → 记忆衰减越慢 +# +# Depended on by: server.py +# 被谁依赖:server.py +# ============================================================ + +import math +import asyncio +import logging +from datetime import datetime + +logger = logging.getLogger("ombre_brain.decay") + + +class DecayEngine: + """ + Memory decay engine — periodically scans all dynamic buckets, + calculates decay scores, auto-archives low-activity buckets + to simulate natural forgetting. + 记忆衰减引擎 —— 定期扫描所有动态桶, + 计算衰减得分,将低活跃桶自动归档,模拟自然遗忘。 + """ + + def __init__(self, config: dict, bucket_mgr): + # --- Load decay parameters / 加载衰减参数 --- + decay_cfg = config.get("decay", {}) + self.decay_lambda = decay_cfg.get("lambda", 0.05) + self.threshold = decay_cfg.get("threshold", 0.3) + self.check_interval = decay_cfg.get("check_interval_hours", 24) + + # --- Emotion weight params (continuous arousal coordinate) --- + # --- 情感权重参数(基于连续 arousal 坐标)--- + emotion_cfg = decay_cfg.get("emotion_weights", {}) + self.emotion_base = emotion_cfg.get("base", 1.0) + self.arousal_boost = emotion_cfg.get("arousal_boost", 0.8) + + self.bucket_mgr = bucket_mgr + + # --- Background task control / 后台任务控制 --- + self._task: asyncio.Task | None = None + self._running = False + + @property + def is_running(self) -> bool: + """Whether the decay engine is running in the background. + 衰减引擎是否正在后台运行。""" + return self._running + + # --------------------------------------------------------- + # Core: calculate decay score for a single bucket + # 核心:计算单个桶的衰减得分 + # + # Higher score = more vivid memory; below threshold → archive + # 得分越高 = 记忆越鲜活,低于阈值则归档 + # Permanent buckets never decay / 固化桶永远不衰减 + # --------------------------------------------------------- + # --------------------------------------------------------- + # Time weight: 0-1d→1.0, day2→0.9, then ~10%/day, floor 0.3 + # 时间系数:0-1天=1.0,第2天=0.9,之后每天约降10%,7天后稳定在0.3 + # --------------------------------------------------------- + @staticmethod + def _calc_time_weight(days_since: float) -> float: + """ + Piecewise time weight multiplier (multiplies base_score). + 分段式时间权重系数,作为 final_score 的乘数。 + """ + if days_since <= 1.0: + return 1.0 + elif days_since <= 2.0: + # Linear interpolation: 1.0→0.9 over [1,2] + return 1.0 - 0.1 * (days_since - 1.0) + else: + # Exponential decay from 0.9, floor at 0.3 + # k = ln(3)/5 ≈ 0.2197 so that at day 7 (5 days past day 2) → 0.3 + raw = 0.9 * math.exp(-0.2197 * (days_since - 2.0)) + return max(0.3, raw) + + def calculate_score(self, metadata: dict) -> float: + """ + Calculate current activity score for a memory bucket. + 计算一个记忆桶的当前活跃度得分。 + + Formula: final_score = time_weight × base_score + base_score = Importance × (act_count^0.3) × e^(-λ×days) × (base + arousal×boost) + time_weight is the outer multiplier, takes priority over emotion factors. + """ + if not isinstance(metadata, dict): + return 0.0 + + # --- Pinned/protected buckets: never decay, importance locked to 10 --- + # --- 固化桶(pinned/protected):永不衰减,importance 锁定为 10 --- + if metadata.get("pinned") or metadata.get("protected"): + return 999.0 + + # --- Permanent buckets never decay / 固化桶永不衰减 --- + if metadata.get("type") == "permanent": + return 999.0 + + importance = max(1, min(10, int(metadata.get("importance", 5)))) + activation_count = max(1, int(metadata.get("activation_count", 1))) + + # --- Days since last activation / 距离上次激活过了多少天 --- + last_active_str = metadata.get("last_active", metadata.get("created", "")) + try: + last_active = datetime.fromisoformat(str(last_active_str)) + days_since = max(0.0, (datetime.now() - last_active).total_seconds() / 86400) + except (ValueError, TypeError): + days_since = 30 # Parse failure → assume 30 days / 解析失败假设已过 30 天 + + # --- Emotion weight: continuous arousal coordinate --- + # --- 情感权重:基于连续 arousal 坐标计算 --- + # Higher arousal → stronger emotion → higher weight → slower decay + # arousal 越高 → 情感越强烈 → 权重越大 → 衰减越慢 + try: + arousal = max(0.0, min(1.0, float(metadata.get("arousal", 0.3)))) + except (ValueError, TypeError): + arousal = 0.3 + emotion_weight = self.emotion_base + arousal * self.arousal_boost + + # --- Time weight (outer multiplier, highest priority) --- + # --- 时间权重(外层乘数,优先级最高)--- + time_weight = self._calc_time_weight(days_since) + + # --- Base score = Importance × act_count^0.3 × e^(-λ×days) × emotion --- + # --- 基础得分 --- + base_score = ( + importance + * (activation_count ** 0.3) + * math.exp(-self.decay_lambda * days_since) + * emotion_weight + ) + + # --- final_score = time_weight × base_score --- + score = time_weight * base_score + + # --- Weight pool modifiers / 权重池修正因子 --- + # Resolved events drop to 5%, sink to bottom awaiting keyword reactivation + # 已解决的事件权重骤降到 5%,沉底等待关键词激活 + resolved_factor = 0.05 if metadata.get("resolved", False) else 1.0 + # High-arousal unresolved buckets get urgency boost for priority surfacing + # 高唤醒未解决桶额外加成,优先浮现 + urgency_boost = 1.5 if (arousal > 0.7 and not metadata.get("resolved", False)) else 1.0 + + return round(score * resolved_factor * urgency_boost, 4) + + # --------------------------------------------------------- + # Execute one decay cycle + # 执行一轮衰减周期 + # Scan all dynamic buckets → score → archive those below threshold + # 扫描所有动态桶 → 算分 → 低于阈值的归档 + # --------------------------------------------------------- + async def run_decay_cycle(self) -> dict: + """ + Execute one decay cycle: iterate dynamic buckets, archive those + scoring below threshold. + 执行一轮衰减:遍历动态桶,归档得分低于阈值的桶。 + + Returns stats: {"checked": N, "archived": N, "lowest_score": X} + """ + try: + buckets = await self.bucket_mgr.list_all(include_archive=False) + except Exception as e: + logger.error(f"Failed to list buckets for decay / 衰减周期列桶失败: {e}") + return {"checked": 0, "archived": 0, "lowest_score": 0, "error": str(e)} + + checked = 0 + archived = 0 + lowest_score = float("inf") + + for bucket in buckets: + meta = bucket.get("metadata", {}) + + # Skip permanent / pinned / protected buckets + # 跳过固化桶和钉选/保护桶 + if meta.get("type") == "permanent" or meta.get("pinned") or meta.get("protected"): + continue + + checked += 1 + try: + score = self.calculate_score(meta) + except Exception as e: + logger.warning( + f"Score calculation failed for {bucket.get('id', '?')} / " + f"计算得分失败: {e}" + ) + continue + + lowest_score = min(lowest_score, score) + + # --- Below threshold → archive (simulate forgetting) --- + # --- 低于阈值 → 归档(模拟遗忘)--- + if score < self.threshold: + try: + success = await self.bucket_mgr.archive(bucket["id"]) + if success: + archived += 1 + logger.info( + f"Decay archived / 衰减归档: " + f"{meta.get('name', bucket['id'])} " + f"(score={score:.4f}, threshold={self.threshold})" + ) + except Exception as e: + logger.warning( + f"Archive failed for {bucket.get('id', '?')} / " + f"归档失败: {e}" + ) + + result = { + "checked": checked, + "archived": archived, + "lowest_score": lowest_score if checked > 0 else 0, + } + logger.info(f"Decay cycle complete / 衰减周期完成: {result}") + return result + + # --------------------------------------------------------- + # Background decay task management + # 后台衰减任务管理 + # --------------------------------------------------------- + async def ensure_started(self) -> None: + """ + Ensure the decay engine is started (lazy init on first call). + 确保衰减引擎已启动(懒加载,首次调用时启动)。 + """ + if not self._running: + await self.start() + + async def start(self) -> None: + """Start the background decay loop. + 启动后台衰减循环。""" + if self._running: + return + self._running = True + self._task = asyncio.create_task(self._background_loop()) + logger.info( + f"Decay engine started, interval: {self.check_interval}h / " + f"衰减引擎已启动,检查间隔: {self.check_interval} 小时" + ) + + async def stop(self) -> None: + """Stop the background decay loop. + 停止后台衰减循环。""" + self._running = False + if self._task: + self._task.cancel() + try: + await self._task + except asyncio.CancelledError: + pass + logger.info("Decay engine stopped / 衰减引擎已停止") + + async def _background_loop(self) -> None: + """Background loop: run decay → sleep → repeat. + 后台循环体:执行衰减 → 睡眠 → 重复。""" + while self._running: + try: + await self.run_decay_cycle() + except Exception as e: + logger.error(f"Decay cycle error / 衰减周期出错: {e}") + # --- Wait for next cycle / 等待下一个周期 --- + try: + await asyncio.sleep(self.check_interval * 3600) + except asyncio.CancelledError: + break diff --git a/dehydrator.py b/dehydrator.py new file mode 100644 index 0000000..ccb8944 --- /dev/null +++ b/dehydrator.py @@ -0,0 +1,779 @@ +# ============================================================ +# Module: Dehydration & Auto-tagging (dehydrator.py) +# 模块:数据脱水压缩 + 自动打标 +# +# Capabilities: +# 能力: +# 1. Dehydrate: compress memory content into high-density summaries (save tokens) +# 脱水:将记忆桶的原始内容压缩为高密度摘要,省 token +# 2. Merge: blend old and new content, keeping bucket size constant +# 合并:揉合新旧内容,控制桶体积恒定 +# 3. Analyze: auto-analyze content for domain/emotion/tags +# 打标:自动分析内容,输出主题域/情感坐标/标签 +# +# Operating modes: +# 工作模式: +# - Primary: OpenAI-compatible API (DeepSeek/Ollama/LM Studio/vLLM/Gemini etc.) +# 主路径:通过 OpenAI 兼容客户端调用 LLM API +# - Fallback: local keyword extraction when API is unavailable +# 备用路径:API 不可用时用本地关键词提取 +# +# Depended on by: server.py +# 被谁依赖:server.py +# ============================================================ + + +import re +import json +import logging +from collections import Counter +import jieba + +from openai import AsyncOpenAI + +from utils import count_tokens_approx + +logger = logging.getLogger("ombre_brain.dehydrator") + + +# --- Dehydration prompt: instructs cheap LLM to compress information --- +# --- 脱水提示词:指导廉价 LLM 压缩信息 --- +DEHYDRATE_PROMPT = """你是一个信息压缩专家。请将以下内容脱水为紧凑摘要。 + +压缩规则: +1. 提取所有核心事实,去除冗余修饰和重复 +2. 保留最新的情绪状态和态度 +3. 保留所有待办/未完成事项 +4. 关键数字、日期、名称必须保留 +5. 目标压缩率 > 70% + +输出格式(纯 JSON,无其他内容): +{ + "core_facts": ["事实1", "事实2"], + "emotion_state": "当前情绪关键词", + "todos": ["待办1", "待办2"], + "keywords": ["关键词1", "关键词2"], + "summary": "50字以内的核心总结" +}""" + + +# --- Diary digest prompt: split daily notes into independent memory entries --- +# --- 日记整理提示词:把一大段日常拆分成多个独立记忆条目 --- +DIGEST_PROMPT = """你是一个日记整理专家。用户会发送一段包含今天各种事情的文本(可能很杂乱),请你将其拆分成多个独立的记忆条目。 + +整理规则: +1. 每个条目应该是一个独立的主题/事件(不要混在一起) +2. 为每个条目自动分析元数据 +3. 去除无意义的口水话和重复信息,保留核心内容 +4. 同一主题的零散信息应合并为一个条目 +5. 如果有待办事项,单独提取为一个条目 + +输出格式(纯 JSON 数组,无其他内容): +[ + { + "name": "条目标题(10字以内)", + "content": "整理后的内容", + "domain": ["主题域1"], + "valence": 0.7, + "arousal": 0.4, + "tags": ["标签1", "标签2"], + "importance": 5 + } +] + +主题域可选(选最精确的 1~2 个,只选真正相关的): + 日常: ["饮食", "穿搭", "出行", "居家", "购物"] + 人际: ["家庭", "恋爱", "友谊", "社交"] + 成长: ["工作", "学习", "考试", "求职"] + 身心: ["健康", "心理", "睡眠", "运动"] + 兴趣: ["游戏", "影视", "音乐", "阅读", "创作", "手工"] + 数字: ["编程", "AI", "硬件", "网络"] + 事务: ["财务", "计划", "待办"] + 内心: ["情绪", "回忆", "梦境", "自省"] +importance: 1-10,根据内容重要程度判断 +valence: 0~1(0=消极, 0.5=中性, 1=积极) +arousal: 0~1(0=平静, 0.5=普通, 1=激动)""" + + +# --- Merge prompt: instruct LLM to blend old and new memories --- +# --- 合并提示词:指导 LLM 揉合新旧记忆 --- +MERGE_PROMPT = """你是一个信息合并专家。请将旧记忆与新内容合并为一份统一的简洁记录。 + +合并规则: +1. 新内容与旧记忆冲突时,以新内容为准 +2. 去除重复信息 +3. 保留所有重要事实 +4. 总长度尽量不超过旧记忆的 120% + +直接输出合并后的文本,不要加额外说明。""" + + +# --- Auto-tagging prompt: analyze content for domain and emotion coords --- +# --- 自动打标提示词:分析内容的主题域和情感坐标 --- +ANALYZE_PROMPT = """你是一个内容分析器。请分析以下文本,输出结构化的元数据。 + +分析规则: +1. domain(主题域):选最精确的 1~2 个,只选真正相关的 + 日常: ["饮食", "穿搭", "出行", "居家", "购物"] + 人际: ["家庭", "恋爱", "友谊", "社交"] + 成长: ["工作", "学习", "考试", "求职"] + 身心: ["健康", "心理", "睡眠", "运动"] + 兴趣: ["游戏", "影视", "音乐", "阅读", "创作", "手工"] + 数字: ["编程", "AI", "硬件", "网络"] + 事务: ["财务", "计划", "待办"] + 内心: ["情绪", "回忆", "梦境", "自省"] +2. valence(情感效价):0.0~1.0,0=极度消极 → 0.5=中性 → 1.0=极度积极 +3. arousal(情感唤醒度):0.0~1.0,0=非常平静 → 0.5=普通 → 1.0=非常激动 +4. tags(关键词标签):3~5 个最能概括内容的关键词 +5. suggested_name(建议桶名):10字以内的简短标题 + +输出格式(纯 JSON,无其他内容): +{ + "domain": ["主题域1", "主题域2"], + "valence": 0.7, + "arousal": 0.4, + "tags": ["标签1", "标签2", "标签3"], + "suggested_name": "简短标题" +}""" + + +class Dehydrator: + """ + Data dehydrator + content analyzer. + Three capabilities: dehydration / merge / auto-tagging (domain + emotion). + Prefers API (better quality); auto-degrades to local (guaranteed availability). + 数据脱水器 + 内容分析器。 + 三大能力:脱水压缩 / 新旧合并 / 自动打标。 + 优先走 API,API 挂了自动降级到本地。 + """ + + def __init__(self, config: dict): + # --- Read dehydration API config / 读取脱水 API 配置 --- + dehy_cfg = config.get("dehydration", {}) + self.api_key = dehy_cfg.get("api_key", "") + self.model = dehy_cfg.get("model", "deepseek-chat") + self.base_url = dehy_cfg.get("base_url", "https://api.deepseek.com/v1") + self.max_tokens = dehy_cfg.get("max_tokens", 1024) + self.temperature = dehy_cfg.get("temperature", 0.1) + + # --- API availability / 是否有可用的 API --- + self.api_available = bool(self.api_key) + + # --- Initialize OpenAI-compatible client --- + # --- 初始化 OpenAI 兼容客户端 --- + # Supports any OpenAI-format API: DeepSeek / Ollama / LM Studio / vLLM / Gemini etc. + # User only needs to set base_url in config.yaml + if self.api_available: + self.client = AsyncOpenAI( + api_key=self.api_key, + base_url=self.base_url, + timeout=60.0, + ) + else: + self.client = None + + # --------------------------------------------------------- + # Dehydrate: compress raw content into concise summary + # 脱水:将原始内容压缩为精简摘要 + # Try API first, fallback to local + # 先尝试 API,失败则回退本地 + # --------------------------------------------------------- + async def dehydrate(self, content: str, metadata: dict = None) -> str: + """ + Dehydrate/compress memory content. + Returns formatted summary string ready for Claude context injection. + 对记忆内容做脱水压缩。 + 返回格式化的摘要字符串,可直接注入 Claude 上下文。 + """ + if not content or not content.strip(): + return "(空记忆 / empty memory)" + + # --- Content is short enough, no compression needed --- + # --- 内容已经很短,不需要压缩 --- + if count_tokens_approx(content) < 100: + return self._format_output(content, metadata) + + # --- Try API compression first (best quality) --- + # --- 优先尝试 API 压缩 --- + if self.api_available: + try: + result = await self._api_dehydrate(content) + if result: + return self._format_output(result, metadata) + except Exception as e: + logger.warning( + f"API dehydration failed, degrading to local / " + f"API 脱水失败,降级到本地压缩: {e}" + ) + + # --- Local compression fallback (works without API) --- + # --- 本地压缩兜底 --- + result = self._local_dehydrate(content) + return self._format_output(result, metadata) + + # --------------------------------------------------------- + # Merge: blend new content into existing bucket + # 合并:将新内容揉入已有桶,保持体积恒定 + # --------------------------------------------------------- + async def merge(self, old_content: str, new_content: str) -> str: + """ + Merge new content with old memory, preventing infinite bucket growth. + 将新内容与旧记忆合并,避免桶无限膨胀。 + """ + if not old_content and not new_content: + return "" + if not old_content: + return new_content or "" + if not new_content: + return old_content + + # --- Try API merge first / 优先 API 合并 --- + if self.api_available: + try: + result = await self._api_merge(old_content, new_content) + if result: + return result + except Exception as e: + logger.warning( + f"API merge failed, degrading to local / " + f"API 合并失败,降级到本地合并: {e}" + ) + + # --- Local merge fallback / 本地合并兜底 --- + return self._local_merge(old_content, new_content) + + # --------------------------------------------------------- + # API call: dehydration + # API 调用:脱水压缩 + # --------------------------------------------------------- + async def _api_dehydrate(self, content: str) -> str: + """ + Call LLM API for intelligent dehydration (via OpenAI-compatible client). + 调用 LLM API 执行智能脱水。 + """ + response = await self.client.chat.completions.create( + model=self.model, + messages=[ + {"role": "system", "content": DEHYDRATE_PROMPT}, + {"role": "user", "content": content[:3000]}, + ], + max_tokens=self.max_tokens, + temperature=self.temperature, + ) + if not response.choices: + return "" + return response.choices[0].message.content or "" + + # --------------------------------------------------------- + # API call: merge + # API 调用:合并 + # --------------------------------------------------------- + async def _api_merge(self, old_content: str, new_content: str) -> str: + """ + Call LLM API for intelligent merge (via OpenAI-compatible client). + 调用 LLM API 执行智能合并。 + """ + user_msg = f"旧记忆:\n{old_content[:2000]}\n\n新内容:\n{new_content[:2000]}" + response = await self.client.chat.completions.create( + model=self.model, + messages=[ + {"role": "system", "content": MERGE_PROMPT}, + {"role": "user", "content": user_msg}, + ], + max_tokens=self.max_tokens, + temperature=self.temperature, + ) + if not response.choices: + return "" + return response.choices[0].message.content or "" + + # --------------------------------------------------------- + # Local dehydration (fallback when API is unavailable) + # 本地脱水(无 API 时的兜底方案) + # Keyword frequency + sentence position weighting + # 基于关键词频率 + 句子位置权重 + # --------------------------------------------------------- + def _local_dehydrate(self, content: str) -> str: + """ + Local keyword extraction + position-weighted simple compression. + 本地关键词提取 + 位置加权的简单压缩。 + """ + # --- Split into sentences / 分句 --- + sentences = re.split(r"[。!?\n.!?]+", content) + sentences = [s.strip() for s in sentences if len(s.strip()) > 5] + + if not sentences: + return content[:200] + + # --- Extract high-frequency keywords / 提取高频关键词 --- + keywords = self._extract_keywords(content) + + # --- Score sentences: position weight + keyword hits --- + # --- 句子评分:开头结尾权重高 + 关键词命中加分 --- + scored = [] + for i, sent in enumerate(sentences): + position_weight = 1.5 if i < 3 else (1.2 if i > len(sentences) - 3 else 1.0) + keyword_hits = sum(1 for kw in keywords if kw in sent) + score = position_weight * (1 + keyword_hits) + scored.append((score, sent)) + + scored.sort(key=lambda x: x[0], reverse=True) + + # --- Top-8 sentences + keyword list / 取高分句 + 关键词列表 --- + selected = [s for _, s in scored[:8]] + summary = "。".join(selected) + keyword_str = ", ".join(keywords[:10]) + + return f"[摘要] {summary}\n[关键词] {keyword_str}" + + # --------------------------------------------------------- + # Local merge (simple concatenation + truncation) + # 本地合并(简单拼接 + 截断) + # --------------------------------------------------------- + def _local_merge(self, old_content: str, new_content: str) -> str: + """ + Simple concatenation merge; truncates if too long. + 简单拼接合并,超长时截断保留两端。 + """ + merged = f"{old_content.strip()}\n\n--- 更新 ---\n{new_content.strip()}" + # Truncate if over 3000 chars / 超过 3000 字符则各取一半 + if len(merged) > 3000: + half = 1400 + merged = ( + f"{old_content[:half].strip()}\n\n--- 更新 ---\n{new_content[:half].strip()}" + ) + return merged + + # --------------------------------------------------------- + # Keyword extraction + # 关键词提取 + # Chinese + English tokenization → stopword filter → frequency sort + # 中英文分词 + 停用词过滤 + 词频排序 + # --------------------------------------------------------- + def _extract_keywords(self, text: str) -> list[str]: + """ + Extract high-frequency keywords using jieba (Chinese + English mixed). + 用 jieba 分词提取高频关键词。 + """ + try: + words = jieba.lcut(text) + except Exception: + words = [] + # English words / 英文单词 + english_words = re.findall(r"[a-zA-Z]{3,}", text.lower()) + words += english_words + + # Stopwords / 停用词 + stopwords = { + "的", "了", "在", "是", "我", "有", "和", "就", "不", "人", + "都", "一个", "上", "也", "很", "到", "说", "要", "去", + "你", "会", "着", "没有", "看", "好", "自己", "这", "他", "她", + "the", "and", "for", "are", "but", "not", "you", "all", "can", + "had", "her", "was", "one", "our", "out", "has", "have", "with", + "this", "that", "from", "they", "been", "said", "will", "each", + } + filtered = [ + w for w in words + if w not in stopwords and len(w.strip()) > 1 and not re.match(r"^[0-9]+$", w) + ] + counter = Counter(filtered) + return [word for word, _ in counter.most_common(15)] + + # --------------------------------------------------------- + # Output formatting + # 输出格式化 + # Wraps dehydrated result with bucket name, tags, emotion coords + # 把脱水结果包装成带桶名、标签、情感坐标的可读文本 + # --------------------------------------------------------- + def _format_output(self, content: str, metadata: dict = None) -> str: + """ + Format dehydrated result into context-injectable text. + 将脱水结果格式化为可注入上下文的文本。 + """ + header = "" + if metadata and isinstance(metadata, dict): + name = metadata.get("name", "未命名") + tags = ", ".join(metadata.get("tags", [])) + domains = ", ".join(metadata.get("domain", [])) + try: + valence = float(metadata.get("valence", 0.5)) + arousal = float(metadata.get("arousal", 0.3)) + except (ValueError, TypeError): + valence, arousal = 0.5, 0.3 + header = f"📌 记忆桶: {name}" + if domains: + header += f" [主题:{domains}]" + if tags: + header += f" [标签:{tags}]" + header += f" [情感:V{valence:.1f}/A{arousal:.1f}]" + header += "\n" + return f"{header}{content}" + + # --------------------------------------------------------- + # Auto-tagging: analyze content for domain + emotion + tags + # 自动打标:分析内容,输出主题域 + 情感坐标 + 标签 + # Called by server.py when storing new memories + # 存新记忆时由 server.py 调用 + # --------------------------------------------------------- + async def analyze(self, content: str) -> dict: + """ + Analyze content and return structured metadata. + 分析内容,返回结构化元数据。 + + Returns: {"domain", "valence", "arousal", "tags", "suggested_name"} + """ + if not content or not content.strip(): + return self._default_analysis() + + # --- Try API first (best quality) / 优先走 API --- + if self.api_available: + try: + result = await self._api_analyze(content) + if result: + return result + except Exception as e: + logger.warning( + f"API tagging failed, degrading to local / " + f"API 打标失败,降级到本地分析: {e}" + ) + + # --- Local analysis fallback / 本地分析兜底 --- + return self._local_analyze(content) + + # --------------------------------------------------------- + # API call: auto-tagging + # API 调用:自动打标 + # --------------------------------------------------------- + async def _api_analyze(self, content: str) -> dict: + """ + Call LLM API for content analysis / tagging. + 调用 LLM API 执行内容分析打标。 + """ + response = await self.client.chat.completions.create( + model=self.model, + messages=[ + {"role": "system", "content": ANALYZE_PROMPT}, + {"role": "user", "content": content[:2000]}, + ], + max_tokens=256, + temperature=0.1, + ) + if not response.choices: + return self._default_analysis() + raw = response.choices[0].message.content or "" + if not raw.strip(): + return self._default_analysis() + return self._parse_analysis(raw) + + # --------------------------------------------------------- + # Parse API JSON response with safety checks + # 解析 API 返回的 JSON,做安全校验 + # Ensure valence/arousal in 0~1, domain/tags valid + # --------------------------------------------------------- + def _parse_analysis(self, raw: str) -> dict: + """ + Parse and validate API tagging result. + 解析并校验 API 返回的打标结果。 + """ + try: + # Handle potential markdown code block wrapping + # 处理可能的 markdown 代码块包裹 + cleaned = raw.strip() + if cleaned.startswith("```"): + cleaned = cleaned.split("\n", 1)[-1].rsplit("```", 1)[0] + result = json.loads(cleaned) + except (json.JSONDecodeError, IndexError, ValueError): + logger.warning(f"API tagging JSON parse failed / JSON 解析失败: {raw[:200]}") + return self._default_analysis() + + if not isinstance(result, dict): + return self._default_analysis() + + # --- Validate and clamp value ranges / 校验并钳制数值范围 --- + try: + valence = max(0.0, min(1.0, float(result.get("valence", 0.5)))) + arousal = max(0.0, min(1.0, float(result.get("arousal", 0.3)))) + except (ValueError, TypeError): + valence, arousal = 0.5, 0.3 + + return { + "domain": result.get("domain", ["未分类"])[:3], + "valence": valence, + "arousal": arousal, + "tags": result.get("tags", [])[:5], + "suggested_name": str(result.get("suggested_name", ""))[:20], + } + + # --------------------------------------------------------- + # Local analysis (fallback when API is unavailable) + # 本地分析(无 API 时的兜底方案) + # Keyword matching + simple sentiment dictionary + # 基于关键词 + 简单情感词典匹配 + # --------------------------------------------------------- + def _local_analyze(self, content: str) -> dict: + """ + Local keyword + sentiment dictionary analysis. + 本地关键词 + 情感词典的简单分析。 + """ + keywords = self._extract_keywords(content) + text_lower = content.lower() + + # --- Domain matching by keyword hits --- + # --- 主题域匹配:基于关键词命中 --- + domain_keywords = { + # Daily / 日常 + "饮食": {"吃", "饭", "做饭", "外卖", "奶茶", "咖啡", "麻辣烫", "面包", + "超市", "零食", "水果", "牛奶", "食堂", "减肥", "节食"}, + "出行": {"旅行", "出发", "航班", "酒店", "地铁", "打车", "高铁", "机票", + "景点", "签证", "护照"}, + "居家": {"打扫", "洗衣", "搬家", "快递", "收纳", "装修", "租房"}, + "购物": {"买", "下单", "到货", "退货", "优惠", "折扣", "代购"}, + # Relationships / 人际 + "家庭": {"爸", "妈", "父亲", "母亲", "家人", "弟弟", "姐姐", "哥哥", + "奶奶", "爷爷", "亲戚", "家里"}, + "恋爱": {"爱人", "男友", "女友", "恋", "约会", "接吻", "分手", + "暧昧", "在一起", "想你", "同床"}, + "友谊": {"朋友", "闺蜜", "兄弟", "聚", "约饭", "聊天", "群"}, + "社交": {"见面", "被人", "圈子", "消息", "评论", "点赞"}, + # Growth / 成长 + "工作": {"会议", "项目", "客户", "汇报", "deadline", "同事", + "老板", "薪资", "合同", "需求", "加班", "实习"}, + "学习": {"课", "考试", "论文", "笔记", "作业", "教授", "讲座", + "分数", "选课", "学分"}, + "求职": {"面试", "简历", "offer", "投递", "薪资", "岗位"}, + # Health / 身心 + "健康": {"医院", "复查", "吃药", "抽血", "手术", "心率", + "病", "症状", "指标", "体检", "月经"}, + "心理": {"焦虑", "抑郁", "恐慌", "创伤", "人格", "咨询", + "安全感", "自残", "崩溃", "压力"}, + "睡眠": {"睡", "失眠", "噩梦", "清醒", "熬夜", "早起", "午觉"}, + # Interests / 兴趣 + "游戏": {"游戏", "steam", "极乐迪斯科", "存档", "通关", "角色", + "mod", "DLC", "剧情"}, + "影视": {"电影", "番剧", "动漫", "剧", "综艺", "追番", "上映"}, + "音乐": {"歌", "音乐", "专辑", "live", "演唱会", "耳机"}, + "阅读": {"书", "小说", "读完", "kindle", "连载", "漫画"}, + "创作": {"写", "画", "预设", "脚本", "视频", "剪辑", "P图", + "SillyTavern", "插件", "正则", "人设"}, + # Digital / 数字 + "编程": {"代码", "code", "python", "bug", "api", "docker", + "git", "调试", "框架", "部署", "开发", "server"}, + "AI": {"模型", "GPT", "Claude", "gemini", "LLM", "token", + "prompt", "LoRA", "微调", "推理", "MCP"}, + "网络": {"VPN", "梯子", "代理", "域名", "隧道", "服务器", + "cloudflare", "tunnel", "反代"}, + # Affairs / 事务 + "财务": {"钱", "转账", "工资", "花了", "欠", "还款", "借", + "账单", "余额", "预算", "黄金"}, + "计划": {"计划", "目标", "deadline", "日程", "清单", "安排"}, + "待办": {"要做", "记得", "别忘", "提醒", "下次"}, + # Inner / 内心 + "情绪": {"开心", "难过", "生气", "哭", "泪", "孤独", "幸福", + "伤心", "烦", "委屈", "感动", "温柔"}, + "回忆": {"以前", "小时候", "那时", "怀念", "曾经", "记得"}, + "梦境": {"梦", "梦到", "梦见", "噩梦", "清醒梦"}, + "自省": {"反思", "觉得自己", "问自己", "意识到", "明白了"}, + } + + matched_domains = [] + for domain, kws in domain_keywords.items(): + hits = sum(1 for kw in kws if kw in text_lower) + if hits >= 2: + matched_domains.append((domain, hits)) + matched_domains.sort(key=lambda x: x[1], reverse=True) + domains = [d for d, _ in matched_domains[:3]] or ["未分类"] + + # --- Emotion estimation via simple sentiment dictionary --- + # --- 情感坐标估算:基于简单情感词典 --- + positive_words = {"开心", "高兴", "喜欢", "哈哈", "棒", "赞", "爱", + "幸福", "成功", "感动", "兴奋", "棒极了", + "happy", "love", "great", "awesome", "nice"} + negative_words = {"难过", "伤心", "生气", "焦虑", "害怕", "无聊", + "烦", "累", "失望", "崩溃", "愤怒", "痛苦", + "sad", "angry", "hate", "tired", "afraid"} + intense_words = {"太", "非常", "极", "超", "特别", "十分", "炸", + "崩溃", "激动", "愤怒", "狂喜", "very", "so", "extremely"} + + pos_count = sum(1 for w in positive_words if w in text_lower) + neg_count = sum(1 for w in negative_words if w in text_lower) + intense_count = sum(1 for w in intense_words if w in text_lower) + + # valence: positive/negative emotion balance + if pos_count + neg_count > 0: + valence = 0.5 + 0.4 * (pos_count - neg_count) / (pos_count + neg_count) + else: + valence = 0.5 + + # arousal: intensity level + arousal = min(1.0, 0.3 + intense_count * 0.15 + (pos_count + neg_count) * 0.08) + + return { + "domain": domains, + "valence": round(max(0.0, min(1.0, valence)), 2), + "arousal": round(max(0.0, min(1.0, arousal)), 2), + "tags": keywords[:5], + "suggested_name": "", + } + + # --------------------------------------------------------- + # Default analysis result (empty content or total failure) + # 默认分析结果(内容为空或完全失败时用) + # --------------------------------------------------------- + def _default_analysis(self) -> dict: + """ + Return default neutral analysis result. + 返回默认的中性分析结果。 + """ + return { + "domain": ["未分类"], + "valence": 0.5, + "arousal": 0.3, + "tags": [], + "suggested_name": "", + } + + # --------------------------------------------------------- + # Diary digest: split daily notes into independent memory entries + # 日记整理:把一大段日常拆分成多个独立记忆条目 + # For the "grow" tool — "dump a day's content and it gets organized" + # 给 grow 工具用,"一天结束发一坨内容"靠这个 + # --------------------------------------------------------- + async def digest(self, content: str) -> list[dict]: + """ + Split a large chunk of daily content into independent memory entries. + 将一大段日常内容拆分成多个独立记忆条目。 + + Returns: [{"name", "content", "domain", "valence", "arousal", "tags", "importance"}, ...] + """ + if not content or not content.strip(): + return [] + + # --- Try API digest first (best quality, understands semantic splits) --- + # --- 优先 API 整理 --- + if self.api_available: + try: + result = await self._api_digest(content) + if result: + return result + except Exception as e: + logger.warning( + f"API diary digest failed, degrading to local / " + f"API 日记整理失败,降级到本地拆分: {e}" + ) + + # --- Local split fallback / 本地拆分兜底 --- + return await self._local_digest(content) + + # --------------------------------------------------------- + # API call: diary digest + # API 调用:日记整理 + # --------------------------------------------------------- + async def _api_digest(self, content: str) -> list[dict]: + """ + Call LLM API for diary organization. + 调用 LLM API 执行日记整理。 + """ + response = await self.client.chat.completions.create( + model=self.model, + messages=[ + {"role": "system", "content": DIGEST_PROMPT}, + {"role": "user", "content": content[:5000]}, + ], + max_tokens=2048, + temperature=0.2, + ) + if not response.choices: + return [] + raw = response.choices[0].message.content or "" + if not raw.strip(): + return [] + return self._parse_digest(raw) + + # --------------------------------------------------------- + # Parse diary digest result with safety checks + # 解析日记整理结果,做安全校验 + # --------------------------------------------------------- + def _parse_digest(self, raw: str) -> list[dict]: + """ + Parse and validate API diary digest result. + 解析并校验 API 返回的日记整理结果。 + """ + try: + cleaned = raw.strip() + if cleaned.startswith("```"): + cleaned = cleaned.split("\n", 1)[-1].rsplit("```", 1)[0] + items = json.loads(cleaned) + except (json.JSONDecodeError, IndexError, ValueError): + logger.warning(f"Diary digest JSON parse failed / JSON 解析失败: {raw[:200]}") + return [] + + if not isinstance(items, list): + return [] + + validated = [] + for item in items: + if not isinstance(item, dict) or not item.get("content"): + continue + try: + importance = max(1, min(10, int(item.get("importance", 5)))) + except (ValueError, TypeError): + importance = 5 + try: + valence = max(0.0, min(1.0, float(item.get("valence", 0.5)))) + arousal = max(0.0, min(1.0, float(item.get("arousal", 0.3)))) + except (ValueError, TypeError): + valence, arousal = 0.5, 0.3 + + validated.append({ + "name": str(item.get("name", ""))[:20], + "content": str(item.get("content", "")), + "domain": item.get("domain", ["未分类"])[:3], + "valence": valence, + "arousal": arousal, + "tags": item.get("tags", [])[:5], + "importance": importance, + }) + return validated + + # --------------------------------------------------------- + # Local diary split (fallback when API is unavailable) + # 本地日记拆分(无 API 时的兜底) + # Split by blank lines/separators, analyze each segment + # 按空行/分隔符拆段,每段独立分析 + # --------------------------------------------------------- + async def _local_digest(self, content: str) -> list[dict]: + """ + Local paragraph split + per-segment analysis. + 本地按段落拆分 + 逐段分析。 + """ + # Split by blank lines or separators / 按空行或分隔线拆分 + segments = re.split(r"\n{2,}|---+|\n-\s", content) + segments = [s.strip() for s in segments if len(s.strip()) > 20] + + if not segments: + # Content too short, treat as single entry + # 内容太短,整个作为一个条目 + analysis = self._local_analyze(content) + return [{ + "name": analysis.get("suggested_name", "日记"), + "content": content.strip(), + "domain": analysis["domain"], + "valence": analysis["valence"], + "arousal": analysis["arousal"], + "tags": analysis["tags"], + "importance": 5, + }] + + items = [] + for seg in segments[:10]: # Max 10 segments / 最多 10 段 + analysis = self._local_analyze(seg) + items.append({ + "name": analysis.get("suggested_name", "") or seg[:10], + "content": seg, + "domain": analysis["domain"], + "valence": analysis["valence"], + "arousal": analysis["arousal"], + "tags": analysis["tags"], + "importance": 5, + }) + return items diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..6d85cec --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,48 @@ +# ============================================================ +# Ombre Brain Docker Compose +# Docker Compose 配置 +# +# Usage / 使用: +# 1. Create .env: echo "OMBRE_API_KEY=your-key" > .env +# 创建 .env 文件 +# 2. docker-compose up -d +# 3. docker compose logs tunnel (for public URL / 查看公网地址) +# ============================================================ + +services: + ombre-brain: + build: . + container_name: ombre-brain + restart: unless-stopped + ports: + - "18001:8000" # Local debug port (optional) / 本地调试端口 + environment: + - OMBRE_API_KEY=${OMBRE_API_KEY} # From .env file / 从 .env 文件读取 + - OMBRE_TRANSPORT=streamable-http # Claude.ai requires streamable-http + - OMBRE_BUCKETS_DIR=/data # Container-internal bucket path / 容器内路径 + volumes: + # Mount your Obsidian vault (or any host directory) for persistent storage + # 挂载你的 Obsidian 仓库(或任意宿主机目录)做持久化存储 + # Example / 示例: + # - /path/to/your/Obsidian Vault/Ombre Brain:/data + - /Users/p0lar1s/Library/Mobile Documents/iCloud~md~obsidian/Documents/Obsidian Vault/Ombre Brain:/data + - ./config.yaml:/app/config.yaml + + # Cloudflare Tunnel (optional) — expose to public internet + # Cloudflare Tunnel(可选)— 暴露到公网 + # Configure your own credentials under ~/.cloudflared/ + # 在 ~/.cloudflared/ 下放你自己的凭证 + tunnel: + image: cloudflare/cloudflared:latest + container_name: ombre-tunnel + restart: unless-stopped + command: > + tunnel --no-autoupdate --protocol http2 + --config /etc/cloudflared/config.yml + --proxy-keepalive-timeout 300s + --proxy-connection-timeout 300s + run + volumes: + - ~/.cloudflared:/etc/cloudflared + depends_on: + - ombre-brain diff --git a/migrate_to_domains.py b/migrate_to_domains.py new file mode 100644 index 0000000..9eda2ea --- /dev/null +++ b/migrate_to_domains.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python3 +""" +迁移脚本:将 dynamic/ 下的平铺记忆桶文件重组为域子目录结构。 + +旧结构: dynamic/{bucket_id}.md +新结构: dynamic/{primary_domain}/{name}_{bucket_id}.md + +纯标准库,无外部依赖。 +""" + +import os +import re +import shutil + +VAULT_DIR = os.path.expanduser("~/Documents/Obsidian Vault/Ombre Brain") +DYNAMIC_DIR = os.path.join(VAULT_DIR, "dynamic") + + +def sanitize_name(name: str) -> str: + cleaned = re.sub(r"[^\w\s\u4e00-\u9fff-]", "", name, flags=re.UNICODE) + return cleaned.strip()[:80] or "unnamed" + + +def parse_frontmatter(filepath): + """纯正则解析 YAML frontmatter 中的 id, name, domain 字段。""" + with open(filepath, "r", encoding="utf-8") as f: + content = f.read() + if not content.startswith("---"): + return None + parts = content.split("---", 2) + if len(parts) < 3: + return None + yaml_text = parts[1] + + meta = {} + # 提取 id + m = re.search(r"^id:\s*(.+)$", yaml_text, re.MULTILINE) + if m: + meta["id"] = m.group(1).strip().strip("'\"") + # 提取 name + m = re.search(r"^name:\s*(.+)$", yaml_text, re.MULTILINE) + if m: + meta["name"] = m.group(1).strip().strip("'\"") + # 提取 domain 列表 + m = re.search(r"^domain:\s*\n((?:\s*-\s*.+\n?)+)", yaml_text, re.MULTILINE) + if m: + meta["domain"] = re.findall(r"-\s*(.+)", m.group(1)) + else: + meta["domain"] = ["未分类"] + + return meta + + +def migrate(): + if not os.path.exists(DYNAMIC_DIR): + print(f"目录不存在: {DYNAMIC_DIR}") + return + + # 只处理直接在 dynamic/ 下的 .md 文件(不处理已在子目录中的) + files = [f for f in os.listdir(DYNAMIC_DIR) + if f.endswith(".md") and os.path.isfile(os.path.join(DYNAMIC_DIR, f))] + + if not files: + print("没有需要迁移的文件。") + return + + print(f"发现 {len(files)} 个待迁移文件\n") + + for filename in sorted(files): + old_path = os.path.join(DYNAMIC_DIR, filename) + try: + meta = parse_frontmatter(old_path) + except Exception as e: + print(f" ✗ 无法解析 {filename}: {e}") + continue + + if not meta: + print(f" ✗ 无 frontmatter: {filename}") + continue + + bucket_id = meta.get("id", filename.replace(".md", "")) + name = meta.get("name", "") + domain = meta.get("domain", ["未分类"]) + primary_domain = sanitize_name(domain[0]) if domain else "未分类" + + # 构造新路径 + domain_dir = os.path.join(DYNAMIC_DIR, primary_domain) + os.makedirs(domain_dir, exist_ok=True) + + if name and name != bucket_id: + new_filename = f"{sanitize_name(name)}_{bucket_id}.md" + else: + new_filename = f"{bucket_id}.md" + + new_path = os.path.join(domain_dir, new_filename) + + # 移动 + shutil.move(old_path, new_path) + print(f" ✓ {filename}") + print(f" → {primary_domain}/{new_filename}") + + print(f"\n迁移完成。") + + # 展示新结构 + print("\n=== 新目录结构 ===") + for root, dirs, files in os.walk(DYNAMIC_DIR): + level = root.replace(DYNAMIC_DIR, "").count(os.sep) + indent = " " * level + folder = os.path.basename(root) + if level > 0: + print(f"{indent}📁 {folder}/") + for f in sorted(files): + if f.endswith(".md"): + print(f"{indent} 📄 {f}") + + +if __name__ == "__main__": + migrate() diff --git a/reclassify_api.py b/reclassify_api.py new file mode 100644 index 0000000..08d5a94 --- /dev/null +++ b/reclassify_api.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python3 +""" +用 API 重新打标未分类记忆桶,修正 domain/tags/name,移动到正确目录。 +用法: docker exec ombre-brain python3 /app/reclassify_api.py +""" +import asyncio +import os +import json +import glob +import re + +from openai import AsyncOpenAI +import frontmatter + +ANALYZE_PROMPT = ( + "你是一个内容分析器。请分析以下文本,输出结构化的元数据。\n\n" + "分析规则:\n" + '1. domain(主题域):选最精确的 1~2 个,只选真正相关的\n' + ' 日常: ["饮食", "穿搭", "出行", "居家", "购物"]\n' + ' 人际: ["家庭", "恋爱", "友谊", "社交"]\n' + ' 成长: ["工作", "学习", "考试", "求职"]\n' + ' 身心: ["健康", "心理", "睡眠", "运动"]\n' + ' 兴趣: ["游戏", "影视", "音乐", "阅读", "创作", "手工"]\n' + ' 数字: ["编程", "AI", "硬件", "网络"]\n' + ' 事务: ["财务", "计划", "待办"]\n' + ' 内心: ["情绪", "回忆", "梦境", "自省"]\n' + "2. valence(情感效价):0.0~1.0,0=极度消极 → 0.5=中性 → 1.0=极度积极\n" + "3. arousal(情感唤醒度):0.0~1.0,0=非常平静 → 0.5=普通 → 1.0=非常激动\n" + "4. tags(关键词标签):3~5 个最能概括内容的关键词\n" + "5. suggested_name(建议桶名):10字以内的简短标题\n\n" + "输出格式(纯 JSON,无其他内容):\n" + '{\n' + ' "domain": ["主题域1", "主题域2"],\n' + ' "valence": 0.7,\n' + ' "arousal": 0.4,\n' + ' "tags": ["标签1", "标签2", "标签3"],\n' + ' "suggested_name": "简短标题"\n' + '}' +) + +DATA_DIR = "/data/dynamic" +UNCLASS_DIR = os.path.join(DATA_DIR, "未分类") + + +def sanitize(name): + name = re.sub(r'[<>:"/\\|?*\n\r]', '', name).strip() + return name[:20] if name else "未命名" + + +async def reclassify(): + client = AsyncOpenAI( + api_key=os.environ.get("OMBRE_API_KEY", ""), + base_url="https://api.siliconflow.cn/v1", + timeout=60.0, + ) + + files = sorted(glob.glob(os.path.join(UNCLASS_DIR, "*.md"))) + print(f"找到 {len(files)} 个未分类文件\n") + + for fpath in files: + basename = os.path.basename(fpath) + post = frontmatter.load(fpath) + content = post.content.strip() + name = post.metadata.get("name", "") + full_text = f"{name}\n{content}" if name else content + + try: + resp = await client.chat.completions.create( + model="deepseek-ai/DeepSeek-V3", + messages=[ + {"role": "system", "content": ANALYZE_PROMPT}, + {"role": "user", "content": full_text[:2000]}, + ], + max_tokens=256, + temperature=0.1, + ) + raw = resp.choices[0].message.content.strip() + if raw.startswith("```"): + raw = raw.split("\n", 1)[-1].rsplit("```", 1)[0] + result = json.loads(raw) + except Exception as e: + print(f" X API失败 {basename}: {e}") + continue + + new_domain = result.get("domain", ["未分类"])[:3] + new_tags = result.get("tags", [])[:5] + new_name = sanitize(result.get("suggested_name", "") or name) + new_valence = max(0.0, min(1.0, float(result.get("valence", 0.5)))) + new_arousal = max(0.0, min(1.0, float(result.get("arousal", 0.3)))) + + post.metadata["domain"] = new_domain + post.metadata["tags"] = new_tags + post.metadata["valence"] = new_valence + post.metadata["arousal"] = new_arousal + if new_name: + post.metadata["name"] = new_name + + # 写回文件 + with open(fpath, "w", encoding="utf-8") as f: + f.write(frontmatter.dumps(post)) + + # 移动到正确目录 + primary = sanitize(new_domain[0]) if new_domain else "未分类" + target_dir = os.path.join(DATA_DIR, primary) + os.makedirs(target_dir, exist_ok=True) + + bid = post.metadata.get("id", "") + new_filename = f"{new_name}_{bid}.md" if new_name and new_name != bid else basename + dest = os.path.join(target_dir, new_filename) + + if dest != fpath: + os.rename(fpath, dest) + + print(f" OK {basename}") + print(f" -> {primary}/{new_filename}") + print(f" domain={new_domain} tags={new_tags} V={new_valence} A={new_arousal}") + print() + + +if __name__ == "__main__": + asyncio.run(reclassify()) diff --git a/reclassify_domains.py b/reclassify_domains.py new file mode 100644 index 0000000..5ee7d24 --- /dev/null +++ b/reclassify_domains.py @@ -0,0 +1,198 @@ +#!/usr/bin/env python3 +""" +重分类脚本:根据新的域列表,重新分析已有桶的 domain 并搬到对应子目录。 +纯标准库,读 frontmatter + 正文内容做关键词匹配。 +""" + +import os +import re +import shutil + +VAULT_DIR = os.path.expanduser("~/Documents/Obsidian Vault/Ombre Brain") +DYNAMIC_DIR = os.path.join(VAULT_DIR, "dynamic") + +# 新域关键词表(和 dehydrator.py 的 _local_analyze 一致) +DOMAIN_KEYWORDS = { + "饮食": {"吃", "饭", "做饭", "外卖", "奶茶", "咖啡", "麻辣烫", "面包", + "超市", "零食", "水果", "牛奶", "食堂", "减肥", "节食", "麦片"}, + "家庭": {"爸", "妈", "父亲", "母亲", "家人", "弟弟", "姐姐", "哥哥", + "奶奶", "爷爷", "亲戚", "家里", "生日礼", "生活费"}, + "恋爱": {"爱人", "男友", "女友", "恋", "约会", "分手", "暧昧", + "在一起", "想你", "同床", "一辈子", "爱你", "我们是", + "克劳德", "亲密", "接吻", "正缘"}, + "友谊": {"朋友", "闺蜜", "兄弟", "聚", "约饭"}, + "社交": {"见面", "圈子", "社区", "创作者", "发帖", "鹤见"}, + "工作": {"会议", "项目", "客户", "汇报", "同事", "老板", "薪资", + "领导力", "管理沟通"}, + "学习": {"课", "考试", "论文", "作业", "教授", "Python实操", + "选课", "学分", "jieba", "分词"}, + "健康": {"医院", "复查", "吃药", "抽血", "心率", "心电图", + "病", "慢粒", "融合基因", "二尖瓣", "月经", "脚趾甲"}, + "心理": {"焦虑", "抑郁", "创伤", "人格", "安全感", "崩溃", + "压力", "自残", "ABC人格", "人格分裂", "恋爱焦虑"}, + "睡眠": {"睡", "失眠", "噩梦", "清醒", "熬夜", "做梦"}, + "游戏": {"游戏", "极乐迪斯科", "存档", "通关", "Shivers", "DLC"}, + "影视": {"电影", "番剧", "动漫", "剧", "综艺"}, + "阅读": {"书", "小说", "读完", "漫画", "李宿芳菲"}, + "创作": {"写", "预设", "脚本", "SillyTavern", "插件", "正则", + "人设卡", "天气同步", "破甲词"}, + "编程": {"代码", "python", "bug", "api", "docker", "git", + "调试", "部署", "开发", "server"}, + "AI": {"模型", "Claude", "gemini", "LLM", "token", "prompt", + "LoRA", "MCP", "DeepSeek", "隧道", "Ombre Brain", + "打包盒", "脱水", "记忆系统"}, + "网络": {"VPN", "梯子", "代理", "域名", "隧道", "cloudflare", + "tunnel", "反代"}, + "财务": {"钱", "转账", "花了", "欠", "黄金", "卖掉", "换了", + "生活费", "4276"}, + "情绪": {"开心", "难过", "哭", "泪", "孤独", "伤心", "烦", + "委屈", "感动", "温柔", "口罩湿了"}, + "回忆": {"以前", "小时候", "那时", "怀念", "曾经", "纹身", + "十三岁", "九岁"}, + "自省": {"反思", "觉得自己", "问自己", "自恋", "投射"}, +} + + +def sanitize_name(name): + cleaned = re.sub(r"[^\w\s\u4e00-\u9fff-]", "", name, flags=re.UNICODE) + return cleaned.strip()[:80] or "unnamed" + + +def parse_md(filepath): + """解析 frontmatter 和正文。""" + with open(filepath, "r", encoding="utf-8") as f: + content = f.read() + if not content.startswith("---"): + return None, None, content + parts = content.split("---", 2) + if len(parts) < 3: + return None, None, content + yaml_text = parts[1] + body = parts[2] + + meta = {} + m = re.search(r"^id:\s*(.+)$", yaml_text, re.MULTILINE) + if m: + meta["id"] = m.group(1).strip().strip("'\"") + m = re.search(r"^name:\s*(.+)$", yaml_text, re.MULTILINE) + if m: + meta["name"] = m.group(1).strip().strip("'\"") + m = re.search(r"^domain:\s*\n((?:\s*-\s*.+\n?)+)", yaml_text, re.MULTILINE) + if m: + meta["domain"] = [d.strip() for d in re.findall(r"-\s*(.+)", m.group(1))] + else: + meta["domain"] = ["未分类"] + + return meta, yaml_text, body + + +def classify(body, old_domains): + """基于正文内容重新分类。""" + text = body.lower() + scored = [] + for domain, kws in DOMAIN_KEYWORDS.items(): + hits = sum(1 for kw in kws if kw.lower() in text) + if hits >= 2: + scored.append((domain, hits)) + scored.sort(key=lambda x: x[1], reverse=True) + if scored: + return [d for d, _ in scored[:2]] + return old_domains # 匹配不上就保留旧的 + + +def update_domain_in_file(filepath, new_domains): + """更新文件中 frontmatter 的 domain 字段。""" + with open(filepath, "r", encoding="utf-8") as f: + content = f.read() + + # 替换 domain 块 + domain_yaml = "domain:\n" + "".join(f"- {d}\n" for d in new_domains) + content = re.sub( + r"domain:\s*\n(?:\s*-\s*.+\n?)+", + domain_yaml, + content, + count=1 + ) + with open(filepath, "w", encoding="utf-8") as f: + f.write(content) + + +def reclassify(): + if not os.path.exists(DYNAMIC_DIR): + print("目录不存在") + return + + # 收集所有 .md 文件(递归) + all_files = [] + for root, _, files in os.walk(DYNAMIC_DIR): + for f in files: + if f.endswith(".md"): + all_files.append(os.path.join(root, f)) + + if not all_files: + print("没有文件。") + return + + print(f"扫描到 {len(all_files)} 个桶文件\n") + + for filepath in sorted(all_files): + meta, yaml_text, body = parse_md(filepath) + if not meta: + print(f" ✗ 无法解析: {os.path.basename(filepath)}") + continue + + bucket_id = meta.get("id", "unknown") + name = meta.get("name", bucket_id) + old_domains = meta.get("domain", ["未分类"]) + new_domains = classify(body, old_domains) + + primary = sanitize_name(new_domains[0]) + old_primary = sanitize_name(old_domains[0]) if old_domains else "未分类" + + if name and name != bucket_id: + new_filename = f"{sanitize_name(name)}_{bucket_id}.md" + else: + new_filename = f"{bucket_id}.md" + + new_dir = os.path.join(DYNAMIC_DIR, primary) + os.makedirs(new_dir, exist_ok=True) + new_path = os.path.join(new_dir, new_filename) + + changed = (new_domains != old_domains) or (filepath != new_path) + + if changed: + # 更新 frontmatter + update_domain_in_file(filepath, new_domains) + # 移动文件 + if filepath != new_path: + shutil.move(filepath, new_path) + print(f" ✓ {name}") + print(f" {','.join(old_domains)} → {','.join(new_domains)}") + print(f" → {primary}/{new_filename}") + else: + print(f" · {name} (不变)") + + # 清理空目录 + for d in os.listdir(DYNAMIC_DIR): + dp = os.path.join(DYNAMIC_DIR, d) + if os.path.isdir(dp) and not os.listdir(dp): + os.rmdir(dp) + print(f"\n 🗑 删除空目录: {d}/") + + print(f"\n重分类完成。\n") + + # 展示新结构 + print("=== 新目录结构 ===") + for root, dirs, files in os.walk(DYNAMIC_DIR): + level = root.replace(DYNAMIC_DIR, "").count(os.sep) + indent = " " * level + folder = os.path.basename(root) + if level > 0: + print(f"{indent}📁 {folder}/") + for f in sorted(files): + if f.endswith(".md"): + print(f"{indent} 📄 {f}") + + +if __name__ == "__main__": + reclassify() diff --git a/render.yaml b/render.yaml new file mode 100644 index 0000000..17b1447 --- /dev/null +++ b/render.yaml @@ -0,0 +1,21 @@ +services: + - type: web + name: ombre-brain + env: python + region: oregon + plan: free + buildCommand: pip install -r requirements.txt + startCommand: python server.py + envVars: + - key: OMBRE_TRANSPORT + value: streamable-http + - key: OMBRE_API_KEY + sync: false # Set in Render dashboard > Environment (any OpenAI-compatible key) + - key: OMBRE_BASE_URL + sync: false # e.g. https://api.deepseek.com/v1 or https://api.siliconflow.cn/v1 + - key: OMBRE_BUCKETS_DIR + value: /opt/render/project/src/buckets + disk: + name: ombre-buckets + mountPath: /opt/render/project/src/buckets + sizeGB: 1 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..01a24bd --- /dev/null +++ b/requirements.txt @@ -0,0 +1,25 @@ +# ============================================================ +# Ombre Brain Python 依赖 +# 安装: pip install -r requirements.txt +# ============================================================ + +# MCP 协议 SDK(Claude 通信核心) +mcp>=1.0.0 + +# 模糊匹配(记忆桶搜索) +rapidfuzz>=3.0.0 + +# OpenAI 兼容客户端(支持 DeepSeek/Ollama/LM Studio/vLLM/Gemini 等任意兼容 API) +openai>=1.0.0 + +# YAML 配置解析 +pyyaml>=6.0 + +# Markdown frontmatter 解析(桶文件读写) +python-frontmatter>=1.1.0 + +# 中文分词 +jieba>=0.42.1 + +# 异步 HTTP 客户端(应用层保活 ping) +httpx>=0.27.0 diff --git a/server.py b/server.py new file mode 100644 index 0000000..b3b69b4 --- /dev/null +++ b/server.py @@ -0,0 +1,620 @@ +# ============================================================ +# Module: MCP Server Entry Point (server.py) +# 模块:MCP 服务器主入口 +# +# Starts the Ombre Brain MCP service and registers memory +# operation tools for Claude to call. +# 启动 Ombre Brain MCP 服务,注册记忆操作工具供 Claude 调用。 +# +# Core responsibilities: +# 核心职责: +# - Initialize config, bucket manager, dehydrator, decay engine +# 初始化配置、记忆桶管理器、脱水器、衰减引擎 +# - Expose 5 MCP tools: +# 暴露 5 个 MCP 工具: +# breath — Surface unresolved memories or search by keyword +# 浮现未解决记忆 或 按关键词检索 +# hold — Store a single memory +# 存储单条记忆 +# grow — Diary digest, auto-split into multiple buckets +# 日记归档,自动拆分多桶 +# trace — Modify metadata / resolved / delete +# 修改元数据 / resolved 标记 / 删除 +# pulse — System status + bucket listing +# 系统状态 + 所有桶列表 +# +# Startup: +# 启动方式: +# Local: python server.py +# Remote: OMBRE_TRANSPORT=streamable-http python server.py +# Docker: docker-compose up +# ============================================================ + +import os +import sys +import random +import logging +import asyncio +import httpx +from typing import Optional + +# --- Ensure same-directory modules can be imported --- +# --- 确保同目录下的模块能被正确导入 --- +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +from mcp.server.fastmcp import FastMCP + +from bucket_manager import BucketManager +from dehydrator import Dehydrator +from decay_engine import DecayEngine +from utils import load_config, setup_logging + +# --- Load config & init logging / 加载配置 & 初始化日志 --- +config = load_config() +setup_logging(config.get("log_level", "INFO")) +logger = logging.getLogger("ombre_brain") + +# --- Initialize three core components / 初始化三大核心组件 --- +bucket_mgr = BucketManager(config) # Bucket manager / 记忆桶管理器 +dehydrator = Dehydrator(config) # Dehydrator / 脱水器 +decay_engine = DecayEngine(config, bucket_mgr) # Decay engine / 衰减引擎 + +# --- Create MCP server instance / 创建 MCP 服务器实例 --- +# host="0.0.0.0" so Docker container's SSE is externally reachable +# stdio mode ignores host (no network) +mcp = FastMCP( + "Ombre Brain", + host="0.0.0.0", + port=8000, +) + + +# ============================================================= +# /health endpoint: lightweight keepalive +# 轻量保活接口 +# For Cloudflare Tunnel or reverse proxy to ping, preventing idle timeout +# 供 Cloudflare Tunnel 或反代定期 ping,防止空闲超时断连 +# ============================================================= +@mcp.custom_route("/health", methods=["GET"]) +async def health_check(request): + from starlette.responses import JSONResponse + try: + stats = await bucket_mgr.get_stats() + return JSONResponse({ + "status": "ok", + "buckets": stats["permanent_count"] + stats["dynamic_count"], + "decay_engine": "running" if decay_engine.is_running else "stopped", + }) + except Exception as e: + return JSONResponse({"status": "error", "detail": str(e)}, status_code=500) + + +# ============================================================= +# Internal helper: merge-or-create +# 内部辅助:检查是否可合并,可以则合并,否则新建 +# Shared by hold and grow to avoid duplicate logic +# hold 和 grow 共用,避免重复逻辑 +# ============================================================= +async def _merge_or_create( + content: str, + tags: list, + importance: int, + domain: list, + valence: float, + arousal: float, + name: str = "", +) -> tuple[str, bool]: + """ + Check if a similar bucket exists for merging; merge if so, create if not. + Returns (bucket_id_or_name, is_merged). + 检查是否有相似桶可合并,有则合并,无则新建。 + 返回 (桶ID或名称, 是否合并)。 + """ + try: + existing = await bucket_mgr.search(content, limit=1) + except Exception as e: + logger.warning(f"Search for merge failed, creating new / 合并搜索失败,新建: {e}") + existing = [] + + if existing and existing[0].get("score", 0) > config.get("merge_threshold", 75): + bucket = existing[0] + # --- Never merge into pinned/protected buckets --- + # --- 不合并到钉选/保护桶 --- + if not (bucket["metadata"].get("pinned") or bucket["metadata"].get("protected")): + try: + merged = await dehydrator.merge(bucket["content"], content) + await bucket_mgr.update( + bucket["id"], + content=merged, + tags=list(set(bucket["metadata"].get("tags", []) + tags)), + importance=max(bucket["metadata"].get("importance", 5), importance), + domain=list(set(bucket["metadata"].get("domain", []) + domain)), + valence=valence, + arousal=arousal, + ) + return bucket["metadata"].get("name", bucket["id"]), True + except Exception as e: + logger.warning(f"Merge failed, creating new / 合并失败,新建: {e}") + + bucket_id = await bucket_mgr.create( + content=content, + tags=tags, + importance=importance, + domain=domain, + valence=valence, + arousal=arousal, + name=name or None, + ) + return bucket_id, False + + +# ============================================================= +# Tool 1: breath — Breathe +# 工具 1:breath — 呼吸 +# +# No args: surface highest-weight unresolved memories (active push) +# 无参数:浮现权重最高的未解决记忆 +# With args: search by keyword + emotion coordinates +# 有参数:按关键词+情感坐标检索记忆 +# ============================================================= +@mcp.tool() +async def breath( + query: Optional[str] = None, + max_results: int = 3, + domain: str = "", + valence: float = -1, + arousal: float = -1, +) -> str: + """检索/浮现记忆。不传query或传空=自动浮现,有query=关键词检索。domain逗号分隔,valence/arousal 0~1(-1忽略)。""" + await decay_engine.ensure_started() + + # --- No args or empty query: surfacing mode (weight pool active push) --- + # --- 无参数或空query:浮现模式(权重池主动推送)--- + if not query or not query.strip(): + try: + all_buckets = await bucket_mgr.list_all(include_archive=False) + except Exception as e: + logger.error(f"Failed to list buckets for surfacing / 浮现列桶失败: {e}") + return "记忆系统暂时无法访问。" + + # --- Pinned/protected buckets: always surface as core principles --- + # --- 钉选桶:作为核心准则,始终浮现 --- + pinned_buckets = [ + b for b in all_buckets + if b["metadata"].get("pinned") or b["metadata"].get("protected") + ] + pinned_results = [] + for b in pinned_buckets: + try: + summary = await dehydrator.dehydrate(b["content"], b["metadata"]) + pinned_results.append(f"📌 [核心准则] {summary}") + except Exception as e: + logger.warning(f"Failed to dehydrate pinned bucket / 钉选桶脱水失败: {e}") + continue + + # --- Unresolved buckets: surface top 2 by weight --- + # --- 未解决桶:按权重浮现前 2 条 --- + unresolved = [ + b for b in all_buckets + if not b["metadata"].get("resolved", False) + and b["metadata"].get("type") != "permanent" + and not b["metadata"].get("pinned", False) + and not b["metadata"].get("protected", False) + ] + + scored = sorted( + unresolved, + key=lambda b: decay_engine.calculate_score(b["metadata"]), + reverse=True, + ) + top = scored[:2] + dynamic_results = [] + for b in top: + try: + summary = await dehydrator.dehydrate(b["content"], b["metadata"]) + await bucket_mgr.touch(b["id"]) + score = decay_engine.calculate_score(b["metadata"]) + dynamic_results.append(f"[权重:{score:.2f}] {summary}") + except Exception as e: + logger.warning(f"Failed to dehydrate surfaced bucket / 浮现脱水失败: {e}") + continue + + if not pinned_results and not dynamic_results: + return "权重池平静,没有需要处理的记忆。" + + parts = [] + if pinned_results: + parts.append("=== 核心准则 ===\n" + "\n---\n".join(pinned_results)) + if dynamic_results: + parts.append("=== 浮现记忆 ===\n" + "\n---\n".join(dynamic_results)) + return "\n\n".join(parts) + + # --- With args: search mode / 有参数:检索模式 --- + domain_filter = [d.strip() for d in domain.split(",") if d.strip()] or None + q_valence = valence if 0 <= valence <= 1 else None + q_arousal = arousal if 0 <= arousal <= 1 else None + + try: + matches = await bucket_mgr.search( + query, + limit=max_results, + domain_filter=domain_filter, + query_valence=q_valence, + query_arousal=q_arousal, + ) + except Exception as e: + logger.error(f"Search failed / 检索失败: {e}") + return "检索过程出错,请稍后重试。" + + results = [] + for bucket in matches: + try: + summary = await dehydrator.dehydrate(bucket["content"], bucket["metadata"]) + await bucket_mgr.touch(bucket["id"]) + results.append(summary) + except Exception as e: + logger.warning(f"Failed to dehydrate search result / 检索结果脱水失败: {e}") + continue + + # --- Random surfacing: when search returns < 3, 40% chance to float old memories --- + # --- 随机浮现:检索结果不足 3 条时,40% 概率从低权重旧桶里漂上来 --- + if len(matches) < 3 and random.random() < 0.4: + try: + all_buckets = await bucket_mgr.list_all(include_archive=False) + matched_ids = {b["id"] for b in matches} + low_weight = [ + b for b in all_buckets + if b["id"] not in matched_ids + and decay_engine.calculate_score(b["metadata"]) < 2.0 + ] + if low_weight: + drifted = random.sample(low_weight, min(random.randint(1, 3), len(low_weight))) + drift_results = [] + for b in drifted: + summary = await dehydrator.dehydrate(b["content"], b["metadata"]) + drift_results.append(f"[surface_type: random]\n{summary}") + results.append("--- 忽然想起来 ---\n" + "\n---\n".join(drift_results)) + except Exception as e: + logger.warning(f"Random surfacing failed / 随机浮现失败: {e}") + + if not results: + return "未找到相关记忆。" + + return "\n---\n".join(results) + + +# ============================================================= +# Tool 2: hold — Hold on to this +# 工具 2:hold — 握住,留下来 +# ============================================================= +@mcp.tool() +async def hold( + content: str, + tags: str = "", + importance: int = 5, + pinned: bool = False, +) -> str: + """存储单条记忆,自动打标+合并。tags逗号分隔,importance 1-10。pinned=True创建永久钉选桶。""" + await decay_engine.ensure_started() + + # --- Input validation / 输入校验 --- + if not content or not content.strip(): + return "内容为空,无法存储。" + + importance = max(1, min(10, importance)) + extra_tags = [t.strip() for t in tags.split(",") if t.strip()] + + # --- Step 1: auto-tagging / 自动打标 --- + try: + analysis = await dehydrator.analyze(content) + except Exception as e: + logger.warning(f"Auto-tagging failed, using defaults / 自动打标失败: {e}") + analysis = { + "domain": ["未分类"], "valence": 0.5, "arousal": 0.3, + "tags": [], "suggested_name": "", + } + + domain = analysis["domain"] + valence = analysis["valence"] + arousal = analysis["arousal"] + auto_tags = analysis["tags"] + suggested_name = analysis.get("suggested_name", "") + + all_tags = list(dict.fromkeys(auto_tags + extra_tags)) + + # --- Pinned buckets bypass merge and are created directly in permanent dir --- + # --- 钉选桶跳过合并,直接新建到 permanent 目录 --- + if pinned: + bucket_id = await bucket_mgr.create( + content=content, + tags=all_tags, + importance=10, + domain=domain, + valence=valence, + arousal=arousal, + name=suggested_name or None, + bucket_type="permanent", + pinned=True, + ) + return f"📌钉选→{bucket_id} {','.join(domain)}" + + # --- Step 2: merge or create / 合并或新建 --- + result_name, is_merged = await _merge_or_create( + content=content, + tags=all_tags, + importance=importance, + domain=domain, + valence=valence, + arousal=arousal, + name=suggested_name, + ) + + action = "合并→" if is_merged else "新建→" + return f"{action}{result_name} {','.join(domain)}" + + +# ============================================================= +# Tool 3: grow — Grow, fragments become memories +# 工具 3:grow — 生长,一天的碎片长成记忆 +# ============================================================= +@mcp.tool() +async def grow(content: str) -> str: + """日记归档,自动拆分为多桶。短内容(<30字)走快速路径。""" + await decay_engine.ensure_started() + + if not content or not content.strip(): + return "内容为空,无法整理。" + + # --- Short content fast path: skip digest, use hold logic directly --- + # --- 短内容快速路径:跳过 digest 拆分,直接走 hold 逻辑省一次 API --- + # For very short inputs (like "1"), calling digest is wasteful: + # it sends the full DIGEST_PROMPT (~800 tokens) to DeepSeek for nothing. + # Instead, run analyze + create directly. + if len(content.strip()) < 30: + logger.info(f"grow short-content fast path: {len(content.strip())} chars") + try: + analysis = await dehydrator.analyze(content) + except Exception as e: + logger.warning(f"Fast-path analyze failed / 快速路径打标失败: {e}") + analysis = { + "domain": ["未分类"], "valence": 0.5, "arousal": 0.3, + "tags": [], "suggested_name": "", + } + result_name, is_merged = await _merge_or_create( + content=content.strip(), + tags=analysis.get("tags", []), + importance=analysis.get("importance", 5) if isinstance(analysis.get("importance"), int) else 5, + domain=analysis.get("domain", ["未分类"]), + valence=analysis.get("valence", 0.5), + arousal=analysis.get("arousal", 0.3), + name=analysis.get("suggested_name", ""), + ) + action = "合并" if is_merged else "新建" + return f"{action} → {result_name} | {','.join(analysis.get('domain', []))} V{analysis.get('valence', 0.5):.1f}/A{analysis.get('arousal', 0.3):.1f}" + + # --- Step 1: let API split and organize / 让 API 拆分整理 --- + try: + items = await dehydrator.digest(content) + except Exception as e: + logger.error(f"Diary digest failed / 日记整理失败: {e}") + return f"日记整理失败: {e}" + + if not items: + return "内容为空或整理失败。" + + results = [] + created = 0 + merged = 0 + + # --- Step 2: merge or create each item (with per-item error handling) --- + # --- 逐条合并或新建(单条失败不影响其他)--- + for item in items: + try: + result_name, is_merged = await _merge_or_create( + content=item["content"], + tags=item.get("tags", []), + importance=item.get("importance", 5), + domain=item.get("domain", ["未分类"]), + valence=item.get("valence", 0.5), + arousal=item.get("arousal", 0.3), + name=item.get("name", ""), + ) + + if is_merged: + results.append(f"📎{result_name}") + merged += 1 + else: + results.append(f"📝{item.get('name', result_name)}") + created += 1 + except Exception as e: + logger.warning( + f"Failed to process diary item / 日记条目处理失败: " + f"{item.get('name', '?')}: {e}" + ) + results.append(f"⚠️{item.get('name', '?')}") + + return f"{len(items)}条|新{created}合{merged}\n" + "\n".join(results) + + +# ============================================================= +# Tool 4: trace — Trace, redraw the outline of a memory +# 工具 4:trace — 描摹,重新勾勒记忆的轮廓 +# Also handles deletion (delete=True) +# 同时承接删除功能 +# ============================================================= +@mcp.tool() +async def trace( + bucket_id: str, + name: str = "", + domain: str = "", + valence: float = -1, + arousal: float = -1, + importance: int = -1, + tags: str = "", + resolved: int = -1, + pinned: int = -1, + delete: bool = False, +) -> str: + """修改记忆元数据。resolved=1沉底/0激活,pinned=1钉选/0取消,delete=True删除。只传需改的,-1或空=不改。""" + + if not bucket_id or not bucket_id.strip(): + return "请提供有效的 bucket_id。" + + # --- Delete mode / 删除模式 --- + if delete: + success = await bucket_mgr.delete(bucket_id) + return f"已遗忘记忆桶: {bucket_id}" if success else f"未找到记忆桶: {bucket_id}" + + bucket = await bucket_mgr.get(bucket_id) + if not bucket: + return f"未找到记忆桶: {bucket_id}" + + # --- Collect only fields actually passed / 只收集用户实际传入的字段 --- + updates = {} + if name: + updates["name"] = name + if domain: + updates["domain"] = [d.strip() for d in domain.split(",") if d.strip()] + if 0 <= valence <= 1: + updates["valence"] = valence + if 0 <= arousal <= 1: + updates["arousal"] = arousal + if 1 <= importance <= 10: + updates["importance"] = importance + if tags: + updates["tags"] = [t.strip() for t in tags.split(",") if t.strip()] + if resolved in (0, 1): + updates["resolved"] = bool(resolved) + if pinned in (0, 1): + updates["pinned"] = bool(pinned) + if pinned == 1: + updates["importance"] = 10 # pinned → lock importance + + if not updates: + return "没有任何字段需要修改。" + + success = await bucket_mgr.update(bucket_id, **updates) + if not success: + return f"修改失败: {bucket_id}" + + changed = ", ".join(f"{k}={v}" for k, v in updates.items()) + # Explicit hint about resolved state change semantics + # 特别提示 resolved 状态变化的语义 + if "resolved" in updates: + if updates["resolved"]: + changed += " → 已沉底,只在关键词触发时重新浮现" + else: + changed += " → 已重新激活,将参与浮现排序" + return f"已修改记忆桶 {bucket_id}: {changed}" + + +# ============================================================= +# Tool 5: pulse — Heartbeat, system status + memory listing +# 工具 5:pulse — 脉搏,系统状态 + 记忆列表 +# ============================================================= +@mcp.tool() +async def pulse(include_archive: bool = False) -> str: + """系统状态+记忆桶列表。include_archive=True含归档。""" + try: + stats = await bucket_mgr.get_stats() + except Exception as e: + return f"获取系统状态失败: {e}" + + status = ( + f"=== Ombre Brain 记忆系统 ===\n" + f"固化记忆桶: {stats['permanent_count']} 个\n" + f"动态记忆桶: {stats['dynamic_count']} 个\n" + f"归档记忆桶: {stats['archive_count']} 个\n" + f"总存储大小: {stats['total_size_kb']:.1f} KB\n" + f"衰减引擎: {'运行中' if decay_engine.is_running else '已停止'}\n" + ) + + # --- List all bucket summaries / 列出所有桶摘要 --- + try: + buckets = await bucket_mgr.list_all(include_archive=include_archive) + except Exception as e: + return status + f"\n列出记忆桶失败: {e}" + + if not buckets: + return status + "\n记忆库为空。" + + lines = [] + for b in buckets: + meta = b.get("metadata", {}) + if meta.get("pinned") or meta.get("protected"): + icon = "📌" + elif meta.get("type") == "permanent": + icon = "📦" + elif meta.get("type") == "archived": + icon = "🗄️" + elif meta.get("resolved", False): + icon = "✅" + else: + icon = "💭" + try: + score = decay_engine.calculate_score(meta) + except Exception: + score = 0.0 + domains = ",".join(meta.get("domain", [])) + val = meta.get("valence", 0.5) + aro = meta.get("arousal", 0.3) + resolved_tag = " [已解决]" if meta.get("resolved", False) else "" + lines.append( + f"{icon} [{meta.get('name', b['id'])}]{resolved_tag} " + f"主题:{domains} " + f"情感:V{val:.1f}/A{aro:.1f} " + f"重要:{meta.get('importance', '?')} " + f"权重:{score:.2f} " + f"标签:{','.join(meta.get('tags', []))}" + ) + + return status + "\n=== 记忆列表 ===\n" + "\n".join(lines) + + +# --- Entry point / 启动入口 --- +if __name__ == "__main__": + transport = config.get("transport", "stdio") + logger.info(f"Ombre Brain starting | transport: {transport}") + + if transport in ("sse", "streamable-http"): + import threading + import uvicorn + from starlette.middleware.cors import CORSMiddleware + + # --- Application-level keepalive: ping /health every 60s --- + # --- 应用层保活:每 60 秒 ping 一次 /health,防止 Cloudflare Tunnel 空闲断连 --- + async def _keepalive_loop(): + await asyncio.sleep(10) # Wait for server to fully start + async with httpx.AsyncClient() as client: + while True: + try: + await client.get("http://localhost:8000/health", timeout=5) + logger.debug("Keepalive ping OK / 保活 ping 成功") + except Exception as e: + logger.warning(f"Keepalive ping failed / 保活 ping 失败: {e}") + await asyncio.sleep(60) + + def _start_keepalive(): + loop = asyncio.new_event_loop() + loop.run_until_complete(_keepalive_loop()) + + t = threading.Thread(target=_start_keepalive, daemon=True) + t.start() + + # --- Add CORS middleware so remote clients (Cloudflare Tunnel / ngrok) can connect --- + # --- 添加 CORS 中间件,让远程客户端(Cloudflare Tunnel / ngrok)能正常连接 --- + if transport == "streamable-http": + _app = mcp.streamable_http_app() + else: + _app = mcp.sse_app() + _app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_methods=["*"], + allow_headers=["*"], + expose_headers=["*"], + ) + logger.info("CORS middleware enabled for remote transport / 已启用 CORS 中间件") + uvicorn.run(_app, host="0.0.0.0", port=8000) + else: + mcp.run(transport=transport) diff --git a/test_smoke.py b/test_smoke.py new file mode 100644 index 0000000..e20f071 --- /dev/null +++ b/test_smoke.py @@ -0,0 +1,126 @@ +"""Ombre Brain 冒烟测试:验证核心功能链路""" +import asyncio +import os + +# 确保模块路径 +import sys +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +from utils import load_config, setup_logging +from bucket_manager import BucketManager +from dehydrator import Dehydrator +from decay_engine import DecayEngine + + +async def main(): + config = load_config() + setup_logging("INFO") + bm = BucketManager(config) + dh = Dehydrator(config) + de = DecayEngine(config, bm) + + print(f"API available: {dh.api_available}") + print(f"base_url: {dh.base_url}") + print() + + # ===== 1. 自动打标 ===== + print("=== 1. analyze (自动打标) ===") + try: + result = await dh.analyze("今天学了 Python 的 asyncio,感觉收获很大,心情不错") + print(f" domain: {result['domain']}") + print(f" valence: {result['valence']}, arousal: {result['arousal']}") + print(f" tags: {result['tags']}") + print(" [OK]") + except Exception as e: + print(f" [FAIL] {e}") + print() + + # ===== 2. 建桶 ===== + print("=== 2. create (建桶) ===") + try: + bid = await bm.create( + content="P酱喜欢猫,家里养了一只橘猫叫小橘", + tags=["猫", "宠物"], + importance=7, + domain=["生活"], + valence=0.8, + arousal=0.4, + ) + print(f" bucket_id: {bid}") + print(" [OK]") + except Exception as e: + print(f" [FAIL] {e}") + return + print() + + # ===== 3. 搜索 ===== + print("=== 3. search (检索) ===") + try: + hits = await bm.search("猫", limit=3) + print(f" found {len(hits)} results") + for h in hits: + name = h["metadata"].get("name", h["id"]) + print(f" - {name} (score={h['score']:.1f})") + print(" [OK]") + except Exception as e: + print(f" [FAIL] {e}") + print() + + # ===== 4. 脱水压缩 ===== + print("=== 4. dehydrate (脱水压缩) ===") + try: + text = ( + "这是一段很长的内容用来测试脱水功能。" + "P酱今天去了咖啡厅,点了一杯拿铁,然后坐在窗边看书看了两个小时。" + "期间遇到了一个朋友,聊了聊最近的工作情况。回家之后写了会代码。" + ) + summary = await dh.dehydrate(text, {}) + print(f" summary: {summary[:120]}...") + print(" [OK]") + except Exception as e: + print(f" [FAIL] {e}") + print() + + # ===== 5. 衰减评分 ===== + print("=== 5. decay score (衰减评分) ===") + try: + bucket = await bm.get(bid) + score = de.calculate_score(bucket["metadata"]) + print(f" score: {score:.3f}") + print(" [OK]") + except Exception as e: + print(f" [FAIL] {e}") + print() + + # ===== 6. 日记整理 ===== + print("=== 6. digest (日记整理) ===") + try: + diary = ( + "今天上午写了个 Python 脚本处理数据,下午和朋友去吃了火锅很开心," + "晚上失眠了有点焦虑,想了想明天的面试。" + ) + items = await dh.digest(diary) + print(f" 拆分出 {len(items)} 条记忆:") + for it in items: + print(f" - [{it.get('name','')}] domain={it['domain']} V{it['valence']:.1f}/A{it['arousal']:.1f}") + print(" [OK]") + except Exception as e: + print(f" [FAIL] {e}") + print() + + # ===== 7. 清理测试数据 ===== + print("=== 7. cleanup (删除测试桶) ===") + try: + ok = await bm.delete(bid) + print(f" deleted: {ok}") + print(" [OK]") + except Exception as e: + print(f" [FAIL] {e}") + print() + + print("=" * 40) + print("冒烟测试完成!") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/test_tools.py b/test_tools.py new file mode 100644 index 0000000..6d6b453 --- /dev/null +++ b/test_tools.py @@ -0,0 +1,159 @@ +"""Ombre Brain MCP tool-level end-to-end test: direct calls to @mcp.tool() functions + Ombre Brain MCP 工具层端到端测试:直接调用 @mcp.tool() 函数""" +import asyncio +import os +import sys + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) + +from utils import load_config, setup_logging + +config = load_config() +setup_logging("INFO") + +# Must import after config is set, since server.py does module-level init +# 必须在配置好后导入,因为 server.py 有模块级初始化 +from server import breath, hold, trace, pulse, grow + + +async def main(): + passed = 0 + failed = 0 + + # ===== pulse ===== + print("=== [1/6] pulse ===") + try: + r = await pulse() + assert "Ombre Brain" in r + print(f" {r.splitlines()[0]}") + print(" [OK]") + passed += 1 + except Exception as e: + print(f" [FAIL] {e}") + failed += 1 + print() + + # ===== hold ===== + print("=== [2/6] hold ===") + try: + r = await hold(content="P酱最喜欢的编程语言是 Python,喜欢用 FastAPI 写后端", tags="编程,偏好", importance=8) + print(f" {r.splitlines()[0]}") + assert any(kw in r for kw in ["新建", "合并", "📌"]) + print(" [OK]") + passed += 1 + except Exception as e: + print(f" [FAIL] {e}") + failed += 1 + print() + + # ===== hold (merge test / 合并测试) ===== + print("=== [2b/6] hold (合并测试) ===") + try: + r = await hold(content="P酱也喜欢用 Python 写爬虫和数据分析", tags="编程", importance=6) + print(f" {r.splitlines()[0]}") + print(" [OK]") + passed += 1 + except Exception as e: + print(f" [FAIL] {e}") + failed += 1 + print() + + # ===== breath ===== + print("=== [3/6] breath ===") + try: + r = await breath(query="Python 编程", max_results=3) + print(f" 结果前80字: {r[:80]}...") + assert "未找到" not in r + print(" [OK]") + passed += 1 + except Exception as e: + print(f" [FAIL] {e}") + failed += 1 + print() + + # ===== breath (emotion resonance / 情感共鸣) ===== + print("=== [3b/6] breath (情感共鸣检索) ===") + try: + r = await breath(query="编程", domain="编程", valence=0.8, arousal=0.5) + print(f" 结果前80字: {r[:80]}...") + print(" [OK]") + passed += 1 + except Exception as e: + print(f" [FAIL] {e}") + failed += 1 + print() + + # --- Get a bucket ID for subsequent tests / 取一个桶 ID 用于后续测试 --- + bucket_id = None + from bucket_manager import BucketManager + bm = BucketManager(config) + all_buckets = await bm.list_all() + if all_buckets: + bucket_id = all_buckets[0]["id"] + + # ===== trace ===== + print("=== [4/6] trace ===") + if bucket_id: + try: + r = await trace(bucket_id=bucket_id, domain="编程,创作", importance=9) + print(f" {r}") + assert "已修改" in r + print(" [OK]") + passed += 1 + except Exception as e: + print(f" [FAIL] {e}") + failed += 1 + else: + print(" [SKIP] 没有可编辑的桶") + print() + + # ===== grow ===== + print("=== [5/6] grow ===") + try: + diary = ( + "今天早上复习了线性代数,搞懂了特征值分解。" + "中午和室友去吃了拉面,聊了聊暑假实习的事。" + "下午写了一个 Flask 项目的 API 接口。" + "晚上看了部电影叫《星际穿越》,被结尾感动哭了。" + ) + r = await grow(content=diary) + print(f" {r.splitlines()[0]}") + for line in r.splitlines()[1:]: + if line.strip(): + print(f" {line}") + assert "条|新" in r or "整理" in r + print(" [OK]") + passed += 1 + except Exception as e: + print(f" [FAIL] {e}") + failed += 1 + print() + + # ===== cleanup via trace(delete=True) / 清理测试数据 ===== + print("=== [6/6] cleanup (清理全部测试数据) ===") + try: + all_buckets = await bm.list_all() + for b in all_buckets: + r = await trace(bucket_id=b["id"], delete=True) + print(f" {r}") + print(" [OK]") + passed += 1 + except Exception as e: + print(f" [FAIL] {e}") + failed += 1 + print() + + # ===== Confirm cleanup / 确认清理干净 ===== + final = await pulse() + print(f"清理后: {final.splitlines()[0]}") + print() + print("=" * 50) + print(f"MCP tool test complete / 工具测试完成: {passed} passed / {failed} failed") + if failed == 0: + print("All passed ✓") + else: + print(f"{failed} failed ✗") + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..f0749d9 --- /dev/null +++ b/utils.py @@ -0,0 +1,204 @@ +# ============================================================ +# Module: Common Utilities (utils.py) +# 模块:通用工具函数 +# +# Provides config loading, logging init, path safety, ID generation, etc. +# 提供配置加载、日志初始化、路径安全校验、ID 生成等基础能力 +# +# Depended on by: server.py, bucket_manager.py, dehydrator.py, decay_engine.py +# 被谁依赖:server.py, bucket_manager.py, dehydrator.py, decay_engine.py +# ============================================================ + +import os +import re +import uuid +import yaml +import logging +from pathlib import Path +from datetime import datetime + + +def load_config(config_path: str = None) -> dict: + """ + Load configuration file. + 加载配置文件。 + + Priority: environment variables > config.yaml > built-in defaults. + 优先级:环境变量 > config.yaml > 内置默认值。 + """ + # --- Built-in defaults (fallback so it runs even without config.yaml) --- + # --- 内置默认配置(兜底,保证即使没有 config.yaml 也能跑)--- + defaults = { + "transport": "stdio", + "log_level": "INFO", + "buckets_dir": os.path.join(os.path.dirname(os.path.abspath(__file__)), "buckets"), + "merge_threshold": 75, + "dehydration": { + "model": "deepseek-chat", + "base_url": "https://api.deepseek.com/v1", + "api_key": "", + "max_tokens": 1024, + "temperature": 0.1, + }, + "decay": { + "lambda": 0.05, + "threshold": 0.3, + "check_interval_hours": 24, + "emotion_weights": { + "base": 1.0, + "arousal_boost": 0.8, + }, + }, + "matching": { + "fuzzy_threshold": 50, + "max_results": 5, + }, + } + + # --- Load user config from YAML file --- + # --- 从 YAML 文件加载用户自定义配置 --- + if config_path is None: + config_path = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "config.yaml" + ) + + config = defaults.copy() + if os.path.exists(config_path): + try: + with open(config_path, "r", encoding="utf-8") as f: + file_config = yaml.safe_load(f) or {} + if isinstance(file_config, dict): + config = _deep_merge(defaults, file_config) + else: + logging.warning( + f"Config file is not a valid YAML dict, using defaults / " + f"配置文件不是有效的 YAML 字典,使用默认配置: {config_path}" + ) + except yaml.YAMLError as e: + logging.warning( + f"Failed to parse config file, using defaults / " + f"配置文件解析失败,使用默认配置: {e}" + ) + + # --- Environment variable overrides (highest priority) --- + # --- 环境变量覆盖敏感/运行时配置(优先级最高)--- + env_api_key = os.environ.get("OMBRE_API_KEY", "") + if env_api_key: + config.setdefault("dehydration", {})["api_key"] = env_api_key + + env_base_url = os.environ.get("OMBRE_BASE_URL", "") + if env_base_url: + config.setdefault("dehydration", {})["base_url"] = env_base_url + + env_transport = os.environ.get("OMBRE_TRANSPORT", "") + if env_transport: + config["transport"] = env_transport + + env_buckets_dir = os.environ.get("OMBRE_BUCKETS_DIR", "") + if env_buckets_dir: + config["buckets_dir"] = env_buckets_dir + + # --- Ensure bucket storage directories exist --- + # --- 确保记忆桶存储目录存在 --- + buckets_dir = config["buckets_dir"] + for subdir in ["permanent", "dynamic", "archive"]: + os.makedirs(os.path.join(buckets_dir, subdir), exist_ok=True) + + return config + + +def _deep_merge(base: dict, override: dict) -> dict: + """ + Deep-merge two dicts; override values take precedence. + 深度合并两个字典,override 的值覆盖 base。 + """ + result = base.copy() + for key, value in override.items(): + if key in result and isinstance(result[key], dict) and isinstance(value, dict): + result[key] = _deep_merge(result[key], value) + else: + result[key] = value + return result + + +def setup_logging(level: str = "INFO") -> None: + """ + Initialize logging system. + 初始化日志系统。 + + Note: In MCP stdio mode, stdout is occupied by the protocol; + logs must go to stderr. + 注意:MCP stdio 模式下 stdout 被协议占用,日志只能走 stderr。 + """ + log_level = getattr(logging, level.upper(), None) + if not isinstance(log_level, int): + log_level = logging.INFO + + logging.basicConfig( + level=log_level, + format="[%(asctime)s] %(name)s %(levelname)s: %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + handlers=[logging.StreamHandler()], # StreamHandler defaults to stderr + ) + + +def generate_bucket_id() -> str: + """ + Generate a unique bucket ID (12-char short UUID for readability). + 生成唯一的记忆桶 ID(12 位短 UUID,方便人类阅读)。 + """ + return uuid.uuid4().hex[:12] + + +def sanitize_name(name: str) -> str: + """ + Sanitize bucket name, keeping only safe characters. + Prevents path traversal attacks (e.g. ../../etc/passwd). + 清洗桶名称,只保留安全字符。防止路径遍历攻击。 + """ + if not isinstance(name, str): + return "unnamed" + cleaned = re.sub(r"[^\w\s\u4e00-\u9fff-]", "", name, flags=re.UNICODE) + cleaned = cleaned.strip()[:80] + return cleaned if cleaned else "unnamed" + + +def safe_path(base_dir: str, filename: str) -> Path: + """ + Construct a safe file path, ensuring it stays within base_dir. + Prevents directory traversal. + 构造安全的文件路径,确保最终路径始终在 base_dir 内部。 + """ + base = Path(base_dir).resolve() + target = (base / filename).resolve() + if not str(target).startswith(str(base)): + raise ValueError( + f"Path safety check failed / 路径安全检查失败: " + f"{target} is not inside / 不在 {base} 内" + ) + return target + + +def count_tokens_approx(text: str) -> int: + """ + Rough token count estimate. + 粗略估算 token 数。 + + Chinese ≈ 1 char = 1.5 tokens, English ≈ 1 word = 1.3 tokens. + Used to decide whether dehydration is needed; precision not required. + 中文 ≈ 1字=1.5token,英文 ≈ 1词=1.3token。 + 用于判断是否需要脱水压缩,不追求精确。 + """ + if not text: + return 0 + chinese_chars = len(re.findall(r"[\u4e00-\u9fff]", text)) + english_words = len(re.findall(r"[a-zA-Z]+", text)) + return int(chinese_chars * 1.5 + english_words * 1.3 + len(text) * 0.05) + + +def now_iso() -> str: + """ + Return current time as ISO format string. + 返回当前时间的 ISO 格式字符串。 + """ + return datetime.now().isoformat(timespec="seconds") diff --git a/write_memory.py b/write_memory.py new file mode 100644 index 0000000..1c2bf8f --- /dev/null +++ b/write_memory.py @@ -0,0 +1,101 @@ +#!/usr/bin/env python3 +""" +Ombre Brain 手动记忆写入工具 +用途:在 Copilot 端直接写入记忆文件,绕过 MCP 和 API 调用 +用法: + python3 write_memory.py --name "记忆名" --content "内容" --domain "情感" --tags "标签1,标签2" + 或交互模式:python3 write_memory.py +""" + +import os +import uuid +import argparse +from datetime import datetime + +VAULT_DIR = os.path.expanduser("~/Documents/Obsidian Vault/Ombre Brain/dynamic") + + +def gen_id(): + return uuid.uuid4().hex[:12] + + +def write_memory( + name: str, + content: str, + domain: list[str], + tags: list[str], + importance: int = 7, + valence: float = 0.5, + arousal: float = 0.3, +): + mid = gen_id() + now = datetime.now().strftime("%Y-%m-%dT%H:%M:%S") + + # YAML frontmatter + domain_yaml = "\n".join(f"- {d}" for d in domain) + tags_yaml = "\n".join(f"- {t}" for t in tags) + + md = f"""--- +activation_count: 1 +arousal: {arousal} +created: '{now}' +domain: +{domain_yaml} +id: {mid} +importance: {importance} +last_active: '{now}' +name: {name} +tags: +{tags_yaml} +type: dynamic +valence: {valence} +--- + +{content} +""" + + path = os.path.join(VAULT_DIR, f"{mid}.md") + os.makedirs(VAULT_DIR, exist_ok=True) + with open(path, "w", encoding="utf-8") as f: + f.write(md) + + print(f"✓ 已写入: {path}") + print(f" ID: {mid} | 名称: {name}") + return mid + + +def interactive(): + print("=== Ombre Brain 手动写入 ===") + name = input("记忆名称: ").strip() + content = input("内容: ").strip() + domain = [d.strip() for d in input("主题域(逗号分隔): ").split(",") if d.strip()] + tags = [t.strip() for t in input("标签(逗号分隔): ").split(",") if t.strip()] + importance = int(input("重要性(1-10, 默认7): ").strip() or "7") + valence = float(input("效价(0-1, 默认0.5): ").strip() or "0.5") + arousal = float(input("唤醒(0-1, 默认0.3): ").strip() or "0.3") + write_memory(name, content, domain, tags, importance, valence, arousal) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="手动写入 Ombre Brain 记忆") + parser.add_argument("--name", help="记忆名称") + parser.add_argument("--content", help="记忆内容") + parser.add_argument("--domain", help="主题域,逗号分隔") + parser.add_argument("--tags", help="标签,逗号分隔") + parser.add_argument("--importance", type=int, default=7) + parser.add_argument("--valence", type=float, default=0.5) + parser.add_argument("--arousal", type=float, default=0.3) + args = parser.parse_args() + + if args.name and args.content and args.domain: + write_memory( + name=args.name, + content=args.content, + domain=[d.strip() for d in args.domain.split(",")], + tags=[t.strip() for t in (args.tags or "").split(",") if t.strip()], + importance=args.importance, + valence=args.valence, + arousal=args.arousal, + ) + else: + interactive() diff --git a/zbpack.json b/zbpack.json new file mode 100644 index 0000000..0967ef4 --- /dev/null +++ b/zbpack.json @@ -0,0 +1 @@ +{}