783 lines
31 KiB
Python
783 lines
31 KiB
Python
# ============================================================
|
||
# Module: Memory Bucket Manager (bucket_manager.py)
|
||
# 模块:记忆桶管理器
|
||
#
|
||
# CRUD operations, multi-dimensional index search, activation updates
|
||
# for memory buckets.
|
||
# 记忆桶的增删改查、多维索引搜索、激活更新。
|
||
#
|
||
# Core design:
|
||
# 核心逻辑:
|
||
# - Each bucket = one Markdown file (YAML frontmatter + body)
|
||
# 每个记忆桶 = 一个 Markdown 文件
|
||
# - Storage by type: permanent / dynamic / archive
|
||
# 存储按类型分目录
|
||
# - Multi-dimensional soft index: domain + valence/arousal + fuzzy text
|
||
# 多维软索引:主题域 + 情感坐标 + 文本模糊匹配
|
||
# - Search strategy: domain pre-filter → weighted multi-dim ranking
|
||
# 搜索策略:主题域预筛 → 多维加权精排
|
||
# - Emotion coordinates based on Russell circumplex model:
|
||
# 情感坐标基于环形情感模型(Russell circumplex):
|
||
# valence (0~1): 0=negative → 1=positive
|
||
# arousal (0~1): 0=calm → 1=excited
|
||
#
|
||
# Depended on by: server.py, decay_engine.py
|
||
# 被谁依赖:server.py, decay_engine.py
|
||
# ============================================================
|
||
|
||
import os
|
||
import math
|
||
import logging
|
||
import re
|
||
import shutil
|
||
from collections import Counter
|
||
from datetime import datetime
|
||
from pathlib import Path
|
||
from typing import Optional
|
||
|
||
import frontmatter
|
||
import jieba
|
||
from rapidfuzz import fuzz
|
||
|
||
from utils import generate_bucket_id, sanitize_name, safe_path, now_iso
|
||
|
||
logger = logging.getLogger("ombre_brain.bucket")
|
||
|
||
|
||
class BucketManager:
|
||
"""
|
||
Memory bucket manager — entry point for all bucket CRUD operations.
|
||
Buckets are stored as Markdown files with YAML frontmatter for metadata
|
||
and body for content. Natively compatible with Obsidian browsing/editing.
|
||
记忆桶管理器 —— 所有桶的 CRUD 操作入口。
|
||
桶以 Markdown 文件存储,YAML frontmatter 存元数据,正文存内容。
|
||
天然兼容 Obsidian 直接浏览和编辑。
|
||
"""
|
||
|
||
def __init__(self, config: dict):
|
||
# --- Read storage paths from config / 从配置中读取存储路径 ---
|
||
self.base_dir = config["buckets_dir"]
|
||
self.permanent_dir = os.path.join(self.base_dir, "permanent")
|
||
self.dynamic_dir = os.path.join(self.base_dir, "dynamic")
|
||
self.archive_dir = os.path.join(self.base_dir, "archive")
|
||
self.fuzzy_threshold = config.get("matching", {}).get("fuzzy_threshold", 50)
|
||
self.max_results = config.get("matching", {}).get("max_results", 5)
|
||
|
||
# --- Wikilink config / 双链配置 ---
|
||
wikilink_cfg = config.get("wikilink", {})
|
||
self.wikilink_enabled = wikilink_cfg.get("enabled", True)
|
||
self.wikilink_use_tags = wikilink_cfg.get("use_tags", False)
|
||
self.wikilink_use_domain = wikilink_cfg.get("use_domain", True)
|
||
self.wikilink_use_auto_keywords = wikilink_cfg.get("use_auto_keywords", True)
|
||
self.wikilink_auto_top_k = wikilink_cfg.get("auto_top_k", 8)
|
||
self.wikilink_min_len = wikilink_cfg.get("min_keyword_len", 2)
|
||
self.wikilink_exclude_keywords = set(wikilink_cfg.get("exclude_keywords", []))
|
||
self.wikilink_stopwords = {
|
||
"的", "了", "在", "是", "我", "有", "和", "就", "不", "人",
|
||
"都", "一个", "上", "也", "很", "到", "说", "要", "去",
|
||
"你", "会", "着", "没有", "看", "好", "自己", "这", "他", "她",
|
||
"我们", "你们", "他们", "然后", "今天", "昨天", "明天", "一下",
|
||
"the", "and", "for", "are", "but", "not", "you", "all", "can",
|
||
"had", "her", "was", "one", "our", "out", "has", "have", "with",
|
||
"this", "that", "from", "they", "been", "said", "will", "each",
|
||
}
|
||
self.wikilink_stopwords |= {w.lower() for w in self.wikilink_exclude_keywords}
|
||
|
||
# --- Search scoring weights / 检索权重配置 ---
|
||
scoring = config.get("scoring_weights", {})
|
||
self.w_topic = scoring.get("topic_relevance", 4.0)
|
||
self.w_emotion = scoring.get("emotion_resonance", 2.0)
|
||
self.w_time = scoring.get("time_proximity", 2.5)
|
||
self.w_importance = scoring.get("importance", 1.0)
|
||
self.content_weight = scoring.get("content_weight", 3.0) # Added to allow better content-based matching during merge
|
||
|
||
# ---------------------------------------------------------
|
||
# Create a new bucket
|
||
# 创建新桶
|
||
# Write content and metadata into a .md file
|
||
# 将内容和元数据写入一个 .md 文件
|
||
# ---------------------------------------------------------
|
||
async def create(
|
||
self,
|
||
content: str,
|
||
tags: list[str] = None,
|
||
importance: int = 5,
|
||
domain: list[str] = None,
|
||
valence: float = 0.5,
|
||
arousal: float = 0.3,
|
||
bucket_type: str = "dynamic",
|
||
name: str = None,
|
||
pinned: bool = False,
|
||
protected: bool = False,
|
||
) -> str:
|
||
"""
|
||
Create a new memory bucket, return bucket ID.
|
||
创建一个新的记忆桶,返回桶 ID。
|
||
|
||
pinned/protected=True: bucket won't be merged, decayed, or have importance changed.
|
||
Importance is locked to 10 for pinned/protected buckets.
|
||
pinned/protected 桶不参与合并与衰减,importance 强制锁定为 10。
|
||
"""
|
||
bucket_id = generate_bucket_id()
|
||
bucket_name = sanitize_name(name) if name else bucket_id
|
||
domain = domain or ["未分类"]
|
||
tags = tags or []
|
||
linked_content = self._apply_wikilinks(content, tags, domain, bucket_name)
|
||
|
||
# --- Pinned/protected buckets: lock importance to 10 ---
|
||
# --- 钉选/保护桶:importance 强制锁定为 10 ---
|
||
if pinned or protected:
|
||
importance = 10
|
||
|
||
# --- Build YAML frontmatter metadata / 构建元数据 ---
|
||
metadata = {
|
||
"id": bucket_id,
|
||
"name": bucket_name,
|
||
"tags": tags,
|
||
"domain": domain,
|
||
"valence": max(0.0, min(1.0, valence)),
|
||
"arousal": max(0.0, min(1.0, arousal)),
|
||
"importance": max(1, min(10, importance)),
|
||
"type": bucket_type,
|
||
"created": now_iso(),
|
||
"last_active": now_iso(),
|
||
"activation_count": 1,
|
||
}
|
||
if pinned:
|
||
metadata["pinned"] = True
|
||
if protected:
|
||
metadata["protected"] = True
|
||
|
||
# --- Assemble Markdown file (frontmatter + body) ---
|
||
# --- 组装 Markdown 文件 ---
|
||
post = frontmatter.Post(linked_content, **metadata)
|
||
|
||
# --- Choose directory by type + primary domain ---
|
||
# --- 按类型 + 主题域选择存储目录 ---
|
||
type_dir = self.permanent_dir if bucket_type == "permanent" else self.dynamic_dir
|
||
primary_domain = sanitize_name(domain[0]) if domain else "未分类"
|
||
target_dir = os.path.join(type_dir, primary_domain)
|
||
os.makedirs(target_dir, exist_ok=True)
|
||
|
||
# --- Filename: readable_name_bucketID.md (Obsidian friendly) ---
|
||
# --- 文件名:可读名称_桶ID.md ---
|
||
if bucket_name and bucket_name != bucket_id:
|
||
filename = f"{bucket_name}_{bucket_id}.md"
|
||
else:
|
||
filename = f"{bucket_id}.md"
|
||
file_path = safe_path(target_dir, filename)
|
||
|
||
try:
|
||
with open(file_path, "w", encoding="utf-8") as f:
|
||
f.write(frontmatter.dumps(post))
|
||
except OSError as e:
|
||
logger.error(f"Failed to write bucket file / 写入桶文件失败: {file_path}: {e}")
|
||
raise
|
||
|
||
logger.info(
|
||
f"Created bucket / 创建记忆桶: {bucket_id} ({bucket_name}) → {primary_domain}/"
|
||
+ (" [PINNED]" if pinned else "") + (" [PROTECTED]" if protected else "")
|
||
)
|
||
return bucket_id
|
||
|
||
# ---------------------------------------------------------
|
||
# Read bucket content
|
||
# 读取桶内容
|
||
# Returns {"id", "metadata", "content", "path"} or None
|
||
# ---------------------------------------------------------
|
||
async def get(self, bucket_id: str) -> Optional[dict]:
|
||
"""
|
||
Read a single bucket by ID.
|
||
根据 ID 读取单个桶。
|
||
"""
|
||
if not bucket_id or not isinstance(bucket_id, str):
|
||
return None
|
||
file_path = self._find_bucket_file(bucket_id)
|
||
if not file_path:
|
||
return None
|
||
return self._load_bucket(file_path)
|
||
|
||
# ---------------------------------------------------------
|
||
# Update bucket
|
||
# 更新桶
|
||
# Supports: content, tags, importance, valence, arousal, name, resolved
|
||
# ---------------------------------------------------------
|
||
async def update(self, bucket_id: str, **kwargs) -> bool:
|
||
"""
|
||
Update bucket content or metadata fields.
|
||
更新桶的内容或元数据字段。
|
||
"""
|
||
file_path = self._find_bucket_file(bucket_id)
|
||
if not file_path:
|
||
return False
|
||
|
||
try:
|
||
post = frontmatter.load(file_path)
|
||
except Exception as e:
|
||
logger.warning(f"Failed to load bucket for update / 加载桶失败: {file_path}: {e}")
|
||
return False
|
||
|
||
# --- Pinned/protected buckets: lock importance to 10, ignore importance changes ---
|
||
# --- 钉选/保护桶:importance 不可修改,强制保持 10 ---
|
||
is_pinned = post.get("pinned", False) or post.get("protected", False)
|
||
if is_pinned:
|
||
kwargs.pop("importance", None) # silently ignore importance update
|
||
|
||
# --- Update only fields that were passed in / 只改传入的字段 ---
|
||
if "content" in kwargs:
|
||
next_tags = kwargs.get("tags", post.get("tags", []))
|
||
next_domain = kwargs.get("domain", post.get("domain", []))
|
||
next_name = kwargs.get("name", post.get("name", ""))
|
||
post.content = self._apply_wikilinks(
|
||
kwargs["content"],
|
||
next_tags,
|
||
next_domain,
|
||
next_name,
|
||
)
|
||
if "tags" in kwargs:
|
||
post["tags"] = kwargs["tags"]
|
||
if "importance" in kwargs:
|
||
post["importance"] = max(1, min(10, int(kwargs["importance"])))
|
||
if "domain" in kwargs:
|
||
post["domain"] = kwargs["domain"]
|
||
if "valence" in kwargs:
|
||
post["valence"] = max(0.0, min(1.0, float(kwargs["valence"])))
|
||
if "arousal" in kwargs:
|
||
post["arousal"] = max(0.0, min(1.0, float(kwargs["arousal"])))
|
||
if "name" in kwargs:
|
||
post["name"] = sanitize_name(kwargs["name"])
|
||
if "resolved" in kwargs:
|
||
post["resolved"] = bool(kwargs["resolved"])
|
||
if "pinned" in kwargs:
|
||
post["pinned"] = bool(kwargs["pinned"])
|
||
if kwargs["pinned"]:
|
||
post["importance"] = 10 # pinned → lock importance to 10
|
||
|
||
# --- Auto-refresh activation time / 自动刷新激活时间 ---
|
||
post["last_active"] = now_iso()
|
||
|
||
try:
|
||
with open(file_path, "w", encoding="utf-8") as f:
|
||
f.write(frontmatter.dumps(post))
|
||
except OSError as e:
|
||
logger.error(f"Failed to write bucket update / 写入桶更新失败: {file_path}: {e}")
|
||
return False
|
||
|
||
logger.info(f"Updated bucket / 更新记忆桶: {bucket_id}")
|
||
return True
|
||
|
||
# ---------------------------------------------------------
|
||
# Wikilink injection
|
||
# 自动添加 Obsidian 双链
|
||
# ---------------------------------------------------------
|
||
def _apply_wikilinks(
|
||
self,
|
||
content: str,
|
||
tags: list[str],
|
||
domain: list[str],
|
||
name: str,
|
||
) -> str:
|
||
"""
|
||
Auto-inject Obsidian wikilinks, avoiding double-wrapping existing [[...]].
|
||
自动添加 Obsidian 双链,避免重复包裹已有 [[...]]。
|
||
"""
|
||
if not self.wikilink_enabled or not content:
|
||
return content
|
||
|
||
keywords = self._collect_wikilink_keywords(content, tags, domain, name)
|
||
if not keywords:
|
||
return content
|
||
|
||
# Split on existing wikilinks to avoid wrapping them again
|
||
# 按已有双链切分,避免重复包裹
|
||
segments = re.split(r"(\[\[[^\]]+\]\])", content)
|
||
pattern = re.compile("|".join(re.escape(kw) for kw in keywords))
|
||
for i, segment in enumerate(segments):
|
||
if segment.startswith("[[") and segment.endswith("]]"):
|
||
continue
|
||
updated = pattern.sub(lambda m: f"[[{m.group(0)}]]", segment)
|
||
segments[i] = updated
|
||
return "".join(segments)
|
||
|
||
def _collect_wikilink_keywords(
|
||
self,
|
||
content: str,
|
||
tags: list[str],
|
||
domain: list[str],
|
||
name: str,
|
||
) -> list[str]:
|
||
"""
|
||
Collect candidate keywords from tags/domain/auto-extraction.
|
||
汇总候选关键词:可选 tags/domain + 自动提词。
|
||
"""
|
||
candidates = []
|
||
|
||
if self.wikilink_use_tags:
|
||
candidates.extend(tags or [])
|
||
if self.wikilink_use_domain:
|
||
candidates.extend(domain or [])
|
||
if name:
|
||
candidates.append(name)
|
||
if self.wikilink_use_auto_keywords:
|
||
candidates.extend(self._extract_auto_keywords(content))
|
||
|
||
return self._normalize_keywords(candidates)
|
||
|
||
def _normalize_keywords(self, keywords: list[str]) -> list[str]:
|
||
"""
|
||
Deduplicate and sort by length (longer first to avoid short words
|
||
breaking long ones during replacement).
|
||
去重并按长度排序,优先替换长词。
|
||
"""
|
||
if not keywords:
|
||
return []
|
||
|
||
seen = set()
|
||
cleaned = []
|
||
for keyword in keywords:
|
||
if not isinstance(keyword, str):
|
||
continue
|
||
kw = keyword.strip()
|
||
if len(kw) < self.wikilink_min_len:
|
||
continue
|
||
if kw in self.wikilink_exclude_keywords:
|
||
continue
|
||
if kw.lower() in self.wikilink_stopwords:
|
||
continue
|
||
if kw in seen:
|
||
continue
|
||
seen.add(kw)
|
||
cleaned.append(kw)
|
||
|
||
return sorted(cleaned, key=len, reverse=True)
|
||
|
||
def _extract_auto_keywords(self, content: str) -> list[str]:
|
||
"""
|
||
Auto-extract keywords from body text, prioritizing high-frequency words.
|
||
从正文自动提词,优先高频词。
|
||
"""
|
||
if not content:
|
||
return []
|
||
|
||
try:
|
||
zh_words = [w.strip() for w in jieba.lcut(content) if w.strip()]
|
||
except Exception:
|
||
zh_words = []
|
||
en_words = re.findall(r"[A-Za-z][A-Za-z0-9_-]{2,20}", content)
|
||
|
||
# Chinese bigrams / 中文双词组合
|
||
zh_bigrams = []
|
||
for i in range(len(zh_words) - 1):
|
||
left = zh_words[i]
|
||
right = zh_words[i + 1]
|
||
if len(left) < self.wikilink_min_len or len(right) < self.wikilink_min_len:
|
||
continue
|
||
if not re.fullmatch(r"[\u4e00-\u9fff]+", left + right):
|
||
continue
|
||
if len(left + right) > 8:
|
||
continue
|
||
zh_bigrams.append(left + right)
|
||
|
||
merged = []
|
||
for word in zh_words + zh_bigrams + en_words:
|
||
if len(word) < self.wikilink_min_len:
|
||
continue
|
||
if re.fullmatch(r"\d+", word):
|
||
continue
|
||
if word.lower() in self.wikilink_stopwords:
|
||
continue
|
||
merged.append(word)
|
||
|
||
if not merged:
|
||
return []
|
||
|
||
counter = Counter(merged)
|
||
return [w for w, _ in counter.most_common(self.wikilink_auto_top_k)]
|
||
|
||
# ---------------------------------------------------------
|
||
# Delete bucket
|
||
# 删除桶
|
||
# ---------------------------------------------------------
|
||
async def delete(self, bucket_id: str) -> bool:
|
||
"""
|
||
Delete a memory bucket file.
|
||
删除指定的记忆桶文件。
|
||
"""
|
||
file_path = self._find_bucket_file(bucket_id)
|
||
if not file_path:
|
||
return False
|
||
|
||
try:
|
||
os.remove(file_path)
|
||
except OSError as e:
|
||
logger.error(f"Failed to delete bucket file / 删除桶文件失败: {file_path}: {e}")
|
||
return False
|
||
|
||
logger.info(f"Deleted bucket / 删除记忆桶: {bucket_id}")
|
||
return True
|
||
|
||
# ---------------------------------------------------------
|
||
# Touch bucket (refresh activation time + increment count)
|
||
# 触碰桶(刷新激活时间 + 累加激活次数)
|
||
# Called on every recall hit; affects decay score.
|
||
# 每次检索命中时调用,影响衰减得分。
|
||
# ---------------------------------------------------------
|
||
async def touch(self, bucket_id: str) -> None:
|
||
"""
|
||
Update a bucket's last activation time and count.
|
||
更新桶的最后激活时间和激活次数。
|
||
"""
|
||
file_path = self._find_bucket_file(bucket_id)
|
||
if not file_path:
|
||
return
|
||
|
||
try:
|
||
post = frontmatter.load(file_path)
|
||
post["last_active"] = now_iso()
|
||
post["activation_count"] = post.get("activation_count", 0) + 1
|
||
|
||
with open(file_path, "w", encoding="utf-8") as f:
|
||
f.write(frontmatter.dumps(post))
|
||
except Exception as e:
|
||
logger.warning(f"Failed to touch bucket / 触碰桶失败: {bucket_id}: {e}")
|
||
|
||
# ---------------------------------------------------------
|
||
# Multi-dimensional search (core feature)
|
||
# 多维搜索(核心功能)
|
||
#
|
||
# Strategy: domain pre-filter → weighted multi-dim ranking
|
||
# 策略:主题域预筛 → 多维加权精排
|
||
#
|
||
# Ranking formula:
|
||
# total = topic(×w_topic) + emotion(×w_emotion)
|
||
# + time(×w_time) + importance(×w_importance)
|
||
#
|
||
# Per-dimension scores (normalized to 0~1):
|
||
# topic = rapidfuzz weighted match (name/tags/domain/body)
|
||
# emotion = 1 - Euclidean distance (query v/a vs bucket v/a)
|
||
# time = e^(-0.02 × days) (recent memories first)
|
||
# importance = importance / 10
|
||
# ---------------------------------------------------------
|
||
async def search(
|
||
self,
|
||
query: str,
|
||
limit: int = None,
|
||
domain_filter: list[str] = None,
|
||
query_valence: float = None,
|
||
query_arousal: float = None,
|
||
) -> list[dict]:
|
||
"""
|
||
Multi-dimensional indexed search for memory buckets.
|
||
多维索引搜索记忆桶。
|
||
|
||
domain_filter: pre-filter by domain (None = search all)
|
||
query_valence/arousal: emotion coordinates for resonance scoring
|
||
"""
|
||
if not query or not query.strip():
|
||
return []
|
||
|
||
limit = limit or self.max_results
|
||
all_buckets = await self.list_all(include_archive=False)
|
||
|
||
if not all_buckets:
|
||
return []
|
||
|
||
# --- Layer 1: domain pre-filter (fast scope reduction) ---
|
||
# --- 第一层:主题域预筛(快速缩小范围)---
|
||
if domain_filter:
|
||
filter_set = {d.lower() for d in domain_filter}
|
||
candidates = [
|
||
b for b in all_buckets
|
||
if {d.lower() for d in b["metadata"].get("domain", [])} & filter_set
|
||
]
|
||
# Fall back to full search if pre-filter yields nothing
|
||
# 预筛为空则回退全量搜索
|
||
if not candidates:
|
||
candidates = all_buckets
|
||
else:
|
||
candidates = all_buckets
|
||
|
||
# --- Layer 2: weighted multi-dim ranking ---
|
||
# --- 第二层:多维加权精排 ---
|
||
scored = []
|
||
for bucket in candidates:
|
||
meta = bucket.get("metadata", {})
|
||
|
||
try:
|
||
# Dim 1: topic relevance (fuzzy text, 0~1)
|
||
topic_score = self._calc_topic_score(query, bucket)
|
||
|
||
# Dim 2: emotion resonance (coordinate distance, 0~1)
|
||
emotion_score = self._calc_emotion_score(
|
||
query_valence, query_arousal, meta
|
||
)
|
||
|
||
# Dim 3: time proximity (exponential decay, 0~1)
|
||
time_score = self._calc_time_score(meta)
|
||
|
||
# Dim 4: importance (direct normalization)
|
||
importance_score = max(1, min(10, int(meta.get("importance", 5)))) / 10.0
|
||
|
||
# --- Weighted sum / 加权求和 ---
|
||
total = (
|
||
topic_score * self.w_topic
|
||
+ emotion_score * self.w_emotion
|
||
+ time_score * self.w_time
|
||
+ importance_score * self.w_importance
|
||
)
|
||
# Normalize to 0~100 for readability
|
||
weight_sum = self.w_topic + self.w_emotion + self.w_time + self.w_importance
|
||
normalized = (total / weight_sum) * 100 if weight_sum > 0 else 0
|
||
|
||
# Resolved buckets get ranking penalty (but still reachable by keyword)
|
||
# 已解决的桶降权排序(但仍可被关键词激活)
|
||
if meta.get("resolved", False):
|
||
normalized *= 0.3
|
||
|
||
if normalized >= self.fuzzy_threshold:
|
||
bucket["score"] = round(normalized, 2)
|
||
scored.append(bucket)
|
||
except Exception as e:
|
||
logger.warning(
|
||
f"Scoring failed for bucket {bucket.get('id', '?')} / "
|
||
f"桶评分失败: {e}"
|
||
)
|
||
continue
|
||
|
||
scored.sort(key=lambda x: x["score"], reverse=True)
|
||
return scored[:limit]
|
||
|
||
# ---------------------------------------------------------
|
||
# Topic relevance sub-score:
|
||
# name(×3) + domain(×2.5) + tags(×2) + body(×1)
|
||
# 文本相关性子分:桶名(×3) + 主题域(×2.5) + 标签(×2) + 正文(×1)
|
||
# ---------------------------------------------------------
|
||
def _calc_topic_score(self, query: str, bucket: dict) -> float:
|
||
"""
|
||
Calculate text dimension relevance score (0~1).
|
||
计算文本维度的相关性得分。
|
||
"""
|
||
meta = bucket.get("metadata", {})
|
||
|
||
name_score = fuzz.partial_ratio(query, meta.get("name", "")) * 3
|
||
domain_score = (
|
||
max(
|
||
(fuzz.partial_ratio(query, d) for d in meta.get("domain", [])),
|
||
default=0,
|
||
)
|
||
* 2.5
|
||
)
|
||
tag_score = (
|
||
max(
|
||
(fuzz.partial_ratio(query, tag) for tag in meta.get("tags", [])),
|
||
default=0,
|
||
)
|
||
* 2
|
||
)
|
||
content_score = fuzz.partial_ratio(query, bucket.get("content", "")[:1000]) * self.content_weight
|
||
|
||
return (name_score + domain_score + tag_score + content_score) / (100 * 10.5)
|
||
|
||
# ---------------------------------------------------------
|
||
# Emotion resonance sub-score:
|
||
# Based on Russell circumplex Euclidean distance
|
||
# 情感共鸣子分:基于环形情感模型的欧氏距离
|
||
# No emotion in query → neutral 0.5 (doesn't affect ranking)
|
||
# ---------------------------------------------------------
|
||
def _calc_emotion_score(
|
||
self, q_valence: float, q_arousal: float, meta: dict
|
||
) -> float:
|
||
"""
|
||
Calculate emotion resonance score (0~1, closer = higher).
|
||
计算情感共鸣度(0~1,越近越高)。
|
||
"""
|
||
if q_valence is None or q_arousal is None:
|
||
return 0.5 # No emotion coordinates → neutral / 无情感坐标时给中性分
|
||
|
||
try:
|
||
b_valence = float(meta.get("valence", 0.5))
|
||
b_arousal = float(meta.get("arousal", 0.3))
|
||
except (ValueError, TypeError):
|
||
return 0.5
|
||
|
||
# Euclidean distance, max sqrt(2) ≈ 1.414
|
||
dist = math.sqrt((q_valence - b_valence) ** 2 + (q_arousal - b_arousal) ** 2)
|
||
return max(0.0, 1.0 - dist / 1.414)
|
||
|
||
# ---------------------------------------------------------
|
||
# Time proximity sub-score:
|
||
# More recent activation → higher score
|
||
# 时间亲近子分:距上次激活越近分越高
|
||
# ---------------------------------------------------------
|
||
def _calc_time_score(self, meta: dict) -> float:
|
||
"""
|
||
Calculate time proximity score (0~1, more recent = higher).
|
||
计算时间亲近度。
|
||
"""
|
||
last_active_str = meta.get("last_active", meta.get("created", ""))
|
||
try:
|
||
last_active = datetime.fromisoformat(str(last_active_str))
|
||
days = max(0.0, (datetime.now() - last_active).total_seconds() / 86400)
|
||
except (ValueError, TypeError):
|
||
days = 30
|
||
return math.exp(-0.1 * days)
|
||
|
||
# ---------------------------------------------------------
|
||
# List all buckets
|
||
# 列出所有桶
|
||
# ---------------------------------------------------------
|
||
async def list_all(self, include_archive: bool = False) -> list[dict]:
|
||
"""
|
||
Recursively walk directories (including domain subdirs), list all buckets.
|
||
递归遍历目录(含域子目录),列出所有记忆桶。
|
||
"""
|
||
buckets = []
|
||
|
||
dirs = [self.permanent_dir, self.dynamic_dir]
|
||
if include_archive:
|
||
dirs.append(self.archive_dir)
|
||
|
||
for dir_path in dirs:
|
||
if not os.path.exists(dir_path):
|
||
continue
|
||
for root, _, files in os.walk(dir_path):
|
||
for filename in files:
|
||
if not filename.endswith(".md"):
|
||
continue
|
||
file_path = os.path.join(root, filename)
|
||
bucket = self._load_bucket(file_path)
|
||
if bucket:
|
||
buckets.append(bucket)
|
||
|
||
return buckets
|
||
|
||
# ---------------------------------------------------------
|
||
# Statistics (counts per category + total size)
|
||
# 统计信息(各分类桶数量 + 总体积)
|
||
# ---------------------------------------------------------
|
||
async def get_stats(self) -> dict:
|
||
"""
|
||
Return memory bucket statistics (including domain subdirs).
|
||
返回记忆桶的统计数据。
|
||
"""
|
||
stats = {
|
||
"permanent_count": 0,
|
||
"dynamic_count": 0,
|
||
"archive_count": 0,
|
||
"total_size_kb": 0.0,
|
||
"domains": {},
|
||
}
|
||
|
||
for subdir, key in [
|
||
(self.permanent_dir, "permanent_count"),
|
||
(self.dynamic_dir, "dynamic_count"),
|
||
(self.archive_dir, "archive_count"),
|
||
]:
|
||
if not os.path.exists(subdir):
|
||
continue
|
||
for root, _, files in os.walk(subdir):
|
||
for f in files:
|
||
if f.endswith(".md"):
|
||
stats[key] += 1
|
||
fpath = os.path.join(root, f)
|
||
try:
|
||
stats["total_size_kb"] += os.path.getsize(fpath) / 1024
|
||
except OSError:
|
||
pass
|
||
# Per-domain counts / 每个域的桶数量
|
||
domain_name = os.path.basename(root)
|
||
if domain_name != os.path.basename(subdir):
|
||
stats["domains"][domain_name] = stats["domains"].get(domain_name, 0) + 1
|
||
|
||
return stats
|
||
|
||
# ---------------------------------------------------------
|
||
# Archive bucket (move from permanent/dynamic into archive)
|
||
# 归档桶(从 permanent/dynamic 移入 archive)
|
||
# Called by decay engine to simulate "forgetting"
|
||
# 由衰减引擎调用,模拟"遗忘"
|
||
# ---------------------------------------------------------
|
||
async def archive(self, bucket_id: str) -> bool:
|
||
"""
|
||
Move a bucket into the archive directory (preserving domain subdirs).
|
||
将指定桶移入归档目录(保留域子目录结构)。
|
||
"""
|
||
file_path = self._find_bucket_file(bucket_id)
|
||
if not file_path:
|
||
return False
|
||
|
||
try:
|
||
# Read once, get domain info and update type / 一次性读取
|
||
post = frontmatter.load(file_path)
|
||
domain = post.get("domain", ["未分类"])
|
||
primary_domain = sanitize_name(domain[0]) if domain else "未分类"
|
||
archive_subdir = os.path.join(self.archive_dir, primary_domain)
|
||
os.makedirs(archive_subdir, exist_ok=True)
|
||
|
||
dest = safe_path(archive_subdir, os.path.basename(file_path))
|
||
|
||
# Update type marker then move file / 更新类型标记后移动文件
|
||
post["type"] = "archived"
|
||
with open(file_path, "w", encoding="utf-8") as f:
|
||
f.write(frontmatter.dumps(post))
|
||
|
||
# Use shutil.move for cross-filesystem safety
|
||
# 使用 shutil.move 保证跨文件系统安全
|
||
shutil.move(file_path, str(dest))
|
||
except Exception as e:
|
||
logger.error(
|
||
f"Failed to archive bucket / 归档桶失败: {bucket_id}: {e}"
|
||
)
|
||
return False
|
||
|
||
logger.info(f"Archived bucket / 归档记忆桶: {bucket_id} → archive/{primary_domain}/")
|
||
return True
|
||
|
||
# ---------------------------------------------------------
|
||
# Internal: find bucket file across all three directories
|
||
# 内部:在三个目录中查找桶文件
|
||
# ---------------------------------------------------------
|
||
def _find_bucket_file(self, bucket_id: str) -> Optional[str]:
|
||
"""
|
||
Recursively search permanent/dynamic/archive for a bucket file
|
||
matching the given ID.
|
||
在 permanent/dynamic/archive 中递归查找指定 ID 的桶文件。
|
||
"""
|
||
if not bucket_id:
|
||
return None
|
||
for dir_path in [self.permanent_dir, self.dynamic_dir, self.archive_dir]:
|
||
if not os.path.exists(dir_path):
|
||
continue
|
||
for root, _, files in os.walk(dir_path):
|
||
for fname in files:
|
||
if not fname.endswith(".md"):
|
||
continue
|
||
# Match by exact ID segment in filename
|
||
# 通过文件名中的 ID 片段精确匹配
|
||
if bucket_id in fname:
|
||
return os.path.join(root, fname)
|
||
return None
|
||
|
||
# ---------------------------------------------------------
|
||
# Internal: load bucket data from .md file
|
||
# 内部:从 .md 文件加载桶数据
|
||
# ---------------------------------------------------------
|
||
def _load_bucket(self, file_path: str) -> Optional[dict]:
|
||
"""
|
||
Parse a Markdown file and return structured bucket data.
|
||
解析 Markdown 文件,返回桶的结构化数据。
|
||
"""
|
||
try:
|
||
post = frontmatter.load(file_path)
|
||
return {
|
||
"id": post.get("id", Path(file_path).stem),
|
||
"metadata": dict(post.metadata),
|
||
"content": post.content,
|
||
"path": file_path,
|
||
}
|
||
except Exception as e:
|
||
logger.warning(
|
||
f"Failed to load bucket file / 加载桶文件失败: {file_path}: {e}"
|
||
)
|
||
return None
|