docs: update README/INTERNALS for import feature, harden .gitignore

2026-04-19 12:09:53 +08:00
parent a09fbfe13a
commit 821546d5de
27 changed files with 5365 additions and 479 deletions
--- a/bucket_manager.py
+++ b/bucket_manager.py
@@ -60,6 +60,7 @@ class BucketManager:
        self.permanent_dir = os.path.join(self.base_dir, "permanent")
        self.dynamic_dir = os.path.join(self.base_dir, "dynamic")
        self.archive_dir = os.path.join(self.base_dir, "archive")
+        self.feel_dir = os.path.join(self.base_dir, "feel")
        self.fuzzy_threshold = config.get("matching", {}).get("fuzzy_threshold", 50)
        self.max_results = config.get("matching", {}).get("max_results", 5)

@@ -122,7 +123,7 @@ class BucketManager:
        bucket_name = sanitize_name(name) if name else bucket_id
        domain = domain or ["未分类"]
        tags = tags or []
-        linked_content = self._apply_wikilinks(content, tags, domain, bucket_name)
+        linked_content = content  # wikilink injection disabled; LLM adds [[]] via prompt

        # --- Pinned/protected buckets: lock importance to 10 ---
        # --- 钉选/保护桶：importance 强制锁定为 10 ---
@@ -154,8 +155,18 @@ class BucketManager:

        # --- Choose directory by type + primary domain ---
        # --- 按类型 + 主题域选择存储目录 ---
-        type_dir = self.permanent_dir if bucket_type == "permanent" else self.dynamic_dir
-        primary_domain = sanitize_name(domain[0]) if domain else "未分类"
+        if bucket_type == "permanent" or pinned:
+            type_dir = self.permanent_dir
+            if pinned and bucket_type != "permanent":
+                metadata["type"] = "permanent"
+        elif bucket_type == "feel":
+            type_dir = self.feel_dir
+        else:
+            type_dir = self.dynamic_dir
+        if bucket_type == "feel":
+            primary_domain = "沉淀物"  # feel subfolder name
+        else:
+            primary_domain = sanitize_name(domain[0]) if domain else "未分类"
        target_dir = os.path.join(type_dir, primary_domain)
        os.makedirs(target_dir, exist_ok=True)

@@ -197,6 +208,25 @@ class BucketManager:
            return None
        return self._load_bucket(file_path)

+    # ---------------------------------------------------------
+    # Move bucket between directories
+    # 在目录间移动桶文件
+    # ---------------------------------------------------------
+    def _move_bucket(self, file_path: str, target_type_dir: str, domain: list[str] = None) -> str:
+        """
+        Move a bucket file to a new type directory, preserving domain subfolder.
+        Returns new file path.
+        """
+        primary_domain = sanitize_name(domain[0]) if domain else "未分类"
+        target_dir = os.path.join(target_type_dir, primary_domain)
+        os.makedirs(target_dir, exist_ok=True)
+        filename = os.path.basename(file_path)
+        new_path = safe_path(target_dir, filename)
+        if os.path.normpath(file_path) != os.path.normpath(new_path):
+            os.rename(file_path, new_path)
+            logger.info(f"Moved bucket / 移动记忆桶: {filename} → {target_dir}/")
+        return new_path
+
    # ---------------------------------------------------------
    # Update bucket
    # 更新桶
@@ -225,15 +255,7 @@ class BucketManager:

        # --- Update only fields that were passed in / 只改传入的字段 ---
        if "content" in kwargs:
-            next_tags = kwargs.get("tags", post.get("tags", []))
-            next_domain = kwargs.get("domain", post.get("domain", []))
-            next_name = kwargs.get("name", post.get("name", ""))
-            post.content = self._apply_wikilinks(
-                kwargs["content"],
-                next_tags,
-                next_domain,
-                next_name,
-            )
+            post.content = kwargs["content"]  # wikilink injection disabled; LLM adds [[]] via prompt
        if "tags" in kwargs:
            post["tags"] = kwargs["tags"]
        if "importance" in kwargs:
@@ -252,6 +274,10 @@ class BucketManager:
            post["pinned"] = bool(kwargs["pinned"])
            if kwargs["pinned"]:
                post["importance"] = 10  # pinned → lock importance to 10
+        if "digested" in kwargs:
+            post["digested"] = bool(kwargs["digested"])
+        if "model_valence" in kwargs:
+            post["model_valence"] = max(0.0, min(1.0, float(kwargs["model_valence"])))

        # --- Auto-refresh activation time / 自动刷新激活时间 ---
        post["last_active"] = now_iso()
@@ -263,136 +289,33 @@ class BucketManager:
            logger.error(f"Failed to write bucket update / 写入桶更新失败: {file_path}: {e}")
            return False

+        # --- Auto-move: pinned → permanent/, resolved → archive/ ---
+        # --- 自动移动：钉选 → permanent/，已解决 → archive/ ---
+        domain = post.get("domain", ["未分类"])
+        if kwargs.get("pinned") and post.get("type") != "permanent":
+            post["type"] = "permanent"
+            with open(file_path, "w", encoding="utf-8") as f:
+                f.write(frontmatter.dumps(post))
+            self._move_bucket(file_path, self.permanent_dir, domain)
+        elif kwargs.get("resolved") and post.get("type") not in ("permanent", "feel"):
+            post["type"] = "archived"
+            with open(file_path, "w", encoding="utf-8") as f:
+                f.write(frontmatter.dumps(post))
+            self._move_bucket(file_path, self.archive_dir, domain)
+
        logger.info(f"Updated bucket / 更新记忆桶: {bucket_id}")
        return True

    # ---------------------------------------------------------
-    # Wikilink injection
-    # 自动添加 Obsidian 双链
+    # Wikilink injection — DISABLED
+    # 自动添加 Obsidian 双链 — 已禁用
+    # Now handled by LLM prompts (Gemini adds [[]] for proper nouns)
+    # 现在由 LLM prompt 处理（Gemini 对人名/地名/专有名词加 [[]]）
    # ---------------------------------------------------------
-    def _apply_wikilinks(
-        self,
-        content: str,
-        tags: list[str],
-        domain: list[str],
-        name: str,
-    ) -> str:
-        """
-        Auto-inject Obsidian wikilinks, avoiding double-wrapping existing [[...]].
-        自动添加 Obsidian 双链，避免重复包裹已有 [[...]]。
-        """
-        if not self.wikilink_enabled or not content:
-            return content
-
-        keywords = self._collect_wikilink_keywords(content, tags, domain, name)
-        if not keywords:
-            return content
-
-        # Split on existing wikilinks to avoid wrapping them again
-        # 按已有双链切分，避免重复包裹
-        segments = re.split(r"(\[\[[^\]]+\]\])", content)
-        pattern = re.compile("|".join(re.escape(kw) for kw in keywords))
-        for i, segment in enumerate(segments):
-            if segment.startswith("[[") and segment.endswith("]]"):
-                continue
-            updated = pattern.sub(lambda m: f"[[{m.group(0)}]]", segment)
-            segments[i] = updated
-        return "".join(segments)
-
-    def _collect_wikilink_keywords(
-        self,
-        content: str,
-        tags: list[str],
-        domain: list[str],
-        name: str,
-    ) -> list[str]:
-        """
-        Collect candidate keywords from tags/domain/auto-extraction.
-        汇总候选关键词：可选 tags/domain + 自动提词。
-        """
-        candidates = []
-
-        if self.wikilink_use_tags:
-            candidates.extend(tags or [])
-        if self.wikilink_use_domain:
-            candidates.extend(domain or [])
-        if name:
-            candidates.append(name)
-        if self.wikilink_use_auto_keywords:
-            candidates.extend(self._extract_auto_keywords(content))
-
-        return self._normalize_keywords(candidates)
-
-    def _normalize_keywords(self, keywords: list[str]) -> list[str]:
-        """
-        Deduplicate and sort by length (longer first to avoid short words
-        breaking long ones during replacement).
-        去重并按长度排序，优先替换长词。
-        """
-        if not keywords:
-            return []
-
-        seen = set()
-        cleaned = []
-        for keyword in keywords:
-            if not isinstance(keyword, str):
-                continue
-            kw = keyword.strip()
-            if len(kw) < self.wikilink_min_len:
-                continue
-            if kw in self.wikilink_exclude_keywords:
-                continue
-            if kw.lower() in self.wikilink_stopwords:
-                continue
-            if kw in seen:
-                continue
-            seen.add(kw)
-            cleaned.append(kw)
-
-        return sorted(cleaned, key=len, reverse=True)
-
-    def _extract_auto_keywords(self, content: str) -> list[str]:
-        """
-        Auto-extract keywords from body text, prioritizing high-frequency words.
-        从正文自动提词，优先高频词。
-        """
-        if not content:
-            return []
-
-        try:
-            zh_words = [w.strip() for w in jieba.lcut(content) if w.strip()]
-        except Exception:
-            zh_words = []
-        en_words = re.findall(r"[A-Za-z][A-Za-z0-9_-]{2,20}", content)
-
-        # Chinese bigrams / 中文双词组合
-        zh_bigrams = []
-        for i in range(len(zh_words) - 1):
-            left = zh_words[i]
-            right = zh_words[i + 1]
-            if len(left) < self.wikilink_min_len or len(right) < self.wikilink_min_len:
-                continue
-            if not re.fullmatch(r"[\u4e00-\u9fff]+", left + right):
-                continue
-            if len(left + right) > 8:
-                continue
-            zh_bigrams.append(left + right)
-
-        merged = []
-        for word in zh_words + zh_bigrams + en_words:
-            if len(word) < self.wikilink_min_len:
-                continue
-            if re.fullmatch(r"\d+", word):
-                continue
-            if word.lower() in self.wikilink_stopwords:
-                continue
-            merged.append(word)
-
-        if not merged:
-            return []
-
-        counter = Counter(merged)
-        return [w for w, _ in counter.most_common(self.wikilink_auto_top_k)]
+    # def _apply_wikilinks(self, content, tags, domain, name): ...
+    # def _collect_wikilink_keywords(self, content, tags, domain, name): ...
+    # def _normalize_keywords(self, keywords): ...
+    # def _extract_auto_keywords(self, content): ...

    # ---------------------------------------------------------
    # Delete bucket
@@ -425,7 +348,9 @@ class BucketManager:
    async def touch(self, bucket_id: str) -> None:
        """
        Update a bucket's last activation time and count.
+        Also triggers time ripple: nearby memories get a slight activation boost.
        更新桶的最后激活时间和激活次数。
+        同时触发时间涟漪：时间上相邻的记忆轻微唤醒。
        """
        file_path = self._find_bucket_file(bucket_id)
        if not file_path:
@@ -438,9 +363,60 @@ class BucketManager:

            with open(file_path, "w", encoding="utf-8") as f:
                f.write(frontmatter.dumps(post))
+
+            # --- Time ripple: boost nearby memories within ±48h ---
+            # --- 时间涟漪：±48小时内的记忆轻微唤醒 ---
+            current_time = datetime.fromisoformat(str(post.get("created", post.get("last_active", ""))))
+            await self._time_ripple(bucket_id, current_time)
        except Exception as e:
            logger.warning(f"Failed to touch bucket / 触碰桶失败: {bucket_id}: {e}")

+    async def _time_ripple(self, source_id: str, reference_time: datetime, hours: float = 48.0) -> None:
+        """
+        Slightly boost activation_count of buckets created/activated near the reference time.
+        轻微提升时间相邻桶的激活次数（+0.3），不改 last_active 避免递归唤醒。
+        Max 5 buckets rippled per touch to bound I/O.
+        """
+        try:
+            all_buckets = await self.list_all(include_archive=False)
+        except Exception:
+            return
+
+        rippled = 0
+        max_ripple = 5
+        for bucket in all_buckets:
+            if rippled >= max_ripple:
+                break
+            if bucket["id"] == source_id:
+                continue
+            meta = bucket.get("metadata", {})
+            # Skip pinned/permanent/feel
+            if meta.get("pinned") or meta.get("protected") or meta.get("type") in ("permanent", "feel"):
+                continue
+
+            created_str = meta.get("created", meta.get("last_active", ""))
+            try:
+                created = datetime.fromisoformat(str(created_str))
+                delta_hours = abs((reference_time - created).total_seconds()) / 3600
+            except (ValueError, TypeError):
+                continue
+
+            if delta_hours <= hours:
+                # Boost activation_count by 0.3 (fractional), don't change last_active
+                file_path = self._find_bucket_file(bucket["id"])
+                if not file_path:
+                    continue
+                try:
+                    post = frontmatter.load(file_path)
+                    current_count = post.get("activation_count", 1)
+                    # Store as float for fractional increments; calculate_score handles it
+                    post["activation_count"] = round(current_count + 0.3, 1)
+                    with open(file_path, "w", encoding="utf-8") as f:
+                        f.write(frontmatter.dumps(post))
+                    rippled += 1
+                except Exception:
+                    continue
+
    # ---------------------------------------------------------
    # Multi-dimensional search (core feature)
    # 多维搜索（核心功能）
@@ -576,7 +552,7 @@ class BucketManager:
        )
        content_score = fuzz.partial_ratio(query, bucket.get("content", "")[:1000]) * self.content_weight

-        return (name_score + domain_score + tag_score + content_score) / (100 * 10.5)
+        return (name_score + domain_score + tag_score + content_score) / (100 * (3 + 2.5 + 2 + self.content_weight))

    # ---------------------------------------------------------
    # Emotion resonance sub-score:
@@ -633,7 +609,7 @@ class BucketManager:
        """
        buckets = []

-        dirs = [self.permanent_dir, self.dynamic_dir]
+        dirs = [self.permanent_dir, self.dynamic_dir, self.feel_dir]
        if include_archive:
            dirs.append(self.archive_dir)

@@ -664,6 +640,7 @@ class BucketManager:
            "permanent_count": 0,
            "dynamic_count": 0,
            "archive_count": 0,
+            "feel_count": 0,
            "total_size_kb": 0.0,
            "domains": {},
        }
@@ -672,6 +649,7 @@ class BucketManager:
            (self.permanent_dir, "permanent_count"),
            (self.dynamic_dir, "dynamic_count"),
            (self.archive_dir, "archive_count"),
+            (self.feel_dir, "feel_count"),
        ]:
            if not os.path.exists(subdir):
                continue
@@ -745,7 +723,7 @@ class BucketManager:
        """
        if not bucket_id:
            return None
-        for dir_path in [self.permanent_dir, self.dynamic_dir, self.archive_dir]:
+        for dir_path in [self.permanent_dir, self.dynamic_dir, self.archive_dir, self.feel_dir]:
            if not os.path.exists(dir_path):
                continue
            for root, _, files in os.walk(dir_path):
@@ -754,7 +732,8 @@ class BucketManager:
                        continue
                    # Match by exact ID segment in filename
                    # 通过文件名中的 ID 片段精确匹配
-                    if bucket_id in fname:
+                    name_part = fname[:-3]  # remove .md
+                    if name_part == bucket_id or name_part.endswith(f"_{bucket_id}"):
                        return os.path.join(root, fname)
        return None