diff --git a/ENV_VARS.md b/ENV_VARS.md index 5245458..d337367 100644 --- a/ENV_VARS.md +++ b/ENV_VARS.md @@ -10,8 +10,10 @@ | `OMBRE_HOOK_SKIP` | 否 | `false` | 设为 `true` 跳过 Webhook 推送 | | `OMBRE_DASHBOARD_PASSWORD` | 否 | — | 预设 Dashboard 访问密码;设置后覆盖文件存储的密码,首次访问不弹设置向导 | | `OMBRE_DEHYDRATION_MODEL` | 否 | `deepseek-chat` | 脱水/打标/合并/拆分用的 LLM 模型名(覆盖 `dehydration.model`) | +| `OMBRE_DEHYDRATION_BASE_URL` | 否 | `https://api.deepseek.com/v1` | 脱水模型的 API Base URL(覆盖 `dehydration.base_url`) | | `OMBRE_MODEL` | 否 | — | `OMBRE_DEHYDRATION_MODEL` 的别名(前者优先) | | `OMBRE_EMBEDDING_MODEL` | 否 | `gemini-embedding-001` | 向量嵌入模型名(覆盖 `embedding.model`) | +| `OMBRE_EMBEDDING_BASE_URL` | 否 | — | 向量嵌入的 API Base URL(覆盖 `embedding.base_url`;留空则复用脱水配置) | ## 说明 diff --git a/check_icloud_conflicts.py b/check_icloud_conflicts.py new file mode 100644 index 0000000..3f50ba3 --- /dev/null +++ b/check_icloud_conflicts.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python3 +# ============================================================ +# check_icloud_conflicts.py — Ombre Brain iCloud Conflict Detector +# iCloud 冲突文件检测器 +# +# Scans the configured bucket directory for iCloud sync conflict +# artefacts and duplicate bucket IDs, then prints a report. +# 扫描配置的桶目录,发现 iCloud 同步冲突文件及重复桶 ID,输出报告。 +# +# Usage: +# python check_icloud_conflicts.py +# python check_icloud_conflicts.py --buckets-dir /path/to/dir +# python check_icloud_conflicts.py --quiet # exit-code only (0=clean) +# ============================================================ + +from __future__ import annotations + +import argparse +import os +import re +import sys +from collections import defaultdict +from pathlib import Path + +# ────────────────────────────────────────────────────────────── +# iCloud conflict file patterns +# Pattern 1 (macOS classic): "filename 2.md", "filename 3.md" +# Pattern 2 (iCloud Drive): "filename (Device's conflicted copy YYYY-MM-DD).md" +# ────────────────────────────────────────────────────────────── +_CONFLICT_SUFFIX = re.compile(r"^(.+?)\s+\d+\.md$") +_CONFLICT_ICLOUD = re.compile(r"^(.+?)\s+\(.+conflicted copy .+\)\.md$", re.IGNORECASE) +# Bucket ID pattern: 12 hex chars at end of stem before extension +_BUCKET_ID_PATTERN = re.compile(r"_([0-9a-f]{12})$") + + +def resolve_buckets_dir() -> Path: + """Resolve bucket directory: env var → config.yaml → ./buckets fallback.""" + env_dir = os.environ.get("OMBRE_BUCKETS_DIR", "").strip() + if env_dir: + return Path(env_dir) + + config_path = Path(__file__).parent / "config.yaml" + if config_path.exists(): + try: + import yaml # type: ignore + with open(config_path, encoding="utf-8") as f: + cfg = yaml.safe_load(f) or {} + if cfg.get("buckets_dir"): + return Path(cfg["buckets_dir"]) + except Exception: + pass + + return Path(__file__).parent / "buckets" + + +def scan(buckets_dir: Path) -> tuple[list[Path], dict[str, list[Path]]]: + """ + Returns: + conflict_files — list of files that look like iCloud conflict artefacts + dup_ids — dict of bucket_id -> [list of files sharing that id] + (only entries with 2+ files) + """ + if not buckets_dir.exists(): + return [], {} + + conflict_files: list[Path] = [] + id_to_files: dict[str, list[Path]] = defaultdict(list) + + for md_file in buckets_dir.rglob("*.md"): + name = md_file.name + + # --- Conflict file detection --- + if _CONFLICT_SUFFIX.match(name) or _CONFLICT_ICLOUD.match(name): + conflict_files.append(md_file) + continue # don't register conflicts in the ID map + + # --- Duplicate ID detection --- + stem = md_file.stem + m = _BUCKET_ID_PATTERN.search(stem) + if m: + id_to_files[m.group(1)].append(md_file) + + dup_ids = {bid: paths for bid, paths in id_to_files.items() if len(paths) > 1} + return conflict_files, dup_ids + + +def main() -> int: + parser = argparse.ArgumentParser( + description="Detect iCloud conflict files and duplicate bucket IDs." + ) + parser.add_argument( + "--buckets-dir", + metavar="PATH", + help="Override bucket directory (default: from config.yaml / OMBRE_BUCKETS_DIR)", + ) + parser.add_argument( + "--quiet", + action="store_true", + help="Suppress output; exit 0 = clean, 1 = problems found", + ) + args = parser.parse_args() + + buckets_dir = Path(args.buckets_dir) if args.buckets_dir else resolve_buckets_dir() + + if not args.quiet: + print(f"Scanning: {buckets_dir}") + if not buckets_dir.exists(): + print(" ✗ Directory does not exist.") + return 1 + print() + + conflict_files, dup_ids = scan(buckets_dir) + problems = bool(conflict_files or dup_ids) + + if args.quiet: + return 1 if problems else 0 + + # ── Report ───────────────────────────────────────────────── + if not problems: + print("✓ No iCloud conflicts or duplicate IDs found.") + return 0 + + if conflict_files: + print(f"⚠ iCloud conflict files ({len(conflict_files)} found):") + for f in sorted(conflict_files): + rel = f.relative_to(buckets_dir) if f.is_relative_to(buckets_dir) else f + print(f" {rel}") + print() + + if dup_ids: + print(f"⚠ Duplicate bucket IDs ({len(dup_ids)} ID(s) shared by multiple files):") + for bid, paths in sorted(dup_ids.items()): + print(f" ID: {bid}") + for p in sorted(paths): + rel = p.relative_to(buckets_dir) if p.is_relative_to(buckets_dir) else p + print(f" {rel}") + print() + + print( + "NOTE: This script is report-only. No files are modified or deleted.\n" + "注意:本脚本仅报告,不删除或修改任何文件。" + ) + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/docker-compose.user.yml b/docker-compose.user.yml index a3141db..ab207de 100644 --- a/docker-compose.user.yml +++ b/docker-compose.user.yml @@ -19,7 +19,20 @@ services: - OMBRE_API_KEY=${OMBRE_API_KEY} - OMBRE_TRANSPORT=streamable-http - OMBRE_BUCKETS_DIR=/data + # --- Model override (optional) --- + # If you use Gemini instead of DeepSeek, set these in your .env: + # 如使用 Gemini 而非 DeepSeek,在 .env 里加: + # OMBRE_DEHYDRATION_BASE_URL=https://generativelanguage.googleapis.com/v1beta/openai/ + # OMBRE_DEHYDRATION_MODEL=gemini-2.5-flash-lite + # OMBRE_EMBEDDING_BASE_URL=https://generativelanguage.googleapis.com/v1beta/openai/ + - OMBRE_DEHYDRATION_BASE_URL=${OMBRE_DEHYDRATION_BASE_URL:-} + - OMBRE_DEHYDRATION_MODEL=${OMBRE_DEHYDRATION_MODEL:-} + - OMBRE_EMBEDDING_BASE_URL=${OMBRE_EMBEDDING_BASE_URL:-} + - OMBRE_EMBEDDING_MODEL=${OMBRE_EMBEDDING_MODEL:-} volumes: # 改成你的 Obsidian Vault 路径,或保持 ./buckets 用本地目录 # Change to your Obsidian Vault path, or keep ./buckets for local storage - ./buckets:/data + # (Optional) Mount custom config to override model / API settings: + # (可选)挂载自定义配置,覆盖模型和 API 设置: + # - ./config.yaml:/app/config.yaml diff --git a/utils.py b/utils.py index d9d608e..e88c52c 100644 --- a/utils.py +++ b/utils.py @@ -103,11 +103,21 @@ def load_config(config_path: str = None) -> dict: if env_dehy_model: config.setdefault("dehydration", {})["model"] = env_dehy_model + # OMBRE_DEHYDRATION_BASE_URL overrides dehydration.base_url + env_dehy_base_url = os.environ.get("OMBRE_DEHYDRATION_BASE_URL", "") + if env_dehy_base_url: + config.setdefault("dehydration", {})["base_url"] = env_dehy_base_url + # OMBRE_EMBEDDING_MODEL overrides embedding.model env_embed_model = os.environ.get("OMBRE_EMBEDDING_MODEL", "") if env_embed_model: config.setdefault("embedding", {})["model"] = env_embed_model + # OMBRE_EMBEDDING_BASE_URL overrides embedding.base_url + env_embed_base_url = os.environ.get("OMBRE_EMBEDDING_BASE_URL", "") + if env_embed_base_url: + config.setdefault("embedding", {})["base_url"] = env_embed_base_url + # --- Ensure bucket storage directories exist --- # --- 确保记忆桶存储目录存在 --- buckets_dir = config["buckets_dir"]