"""FactStore — CRUD für strukturierte Fakten in Tier 2.""" import hashlib import sqlite3 import time import uuid as uuid_mod from dataclasses import dataclass from typing import List, Optional @dataclass class Fact: uuid: str content: str content_hash: str category: Optional[str] confidence: float source_type: str source_id: Optional[str] created_at: float updated_at: float expires_at: Optional[float] access_count: int last_accessed: Optional[float] is_archived: bool class FactStore: def __init__(self, conn: sqlite3.Connection): self.conn = conn @staticmethod def _hash(content: str) -> str: return hashlib.sha256(content.encode("utf-8")).hexdigest() def store( self, content: str, category: str = "general", confidence: float = 1.0, source_type: str = "user", source_id: Optional[str] = None, expires_at: Optional[float] = None, ) -> Fact: content_hash = self._hash(content) existing = self.get_by_hash(content_hash) if existing and not existing.is_archived: # Deduplizierung: Aktualisiere confidence und updated_at self.conn.execute( "UPDATE facts SET confidence = MAX(confidence, ?), updated_at = ?, access_count = access_count + 1 WHERE uuid = ?", (confidence, time.time(), existing.uuid), ) self.conn.commit() return self.get_by_uuid(existing.uuid) fact_uuid = str(uuid_mod.uuid4()) now = time.time() self.conn.execute( """ INSERT INTO facts (uuid, content, content_hash, category, confidence, source_type, source_id, created_at, updated_at, expires_at, access_count, last_accessed, is_archived) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """, (fact_uuid, content, content_hash, category, confidence, source_type, source_id, now, now, expires_at, 0, None, 0), ) self.conn.commit() return self.get_by_uuid(fact_uuid) def get_by_hash(self, content_hash: str) -> Optional[Fact]: row = self.conn.execute("SELECT * FROM facts WHERE content_hash = ? LIMIT 1", (content_hash,)).fetchone() return self._row_to_fact(row) if row else None def get_by_uuid(self, fact_uuid: str) -> Optional[Fact]: row = self.conn.execute("SELECT * FROM facts WHERE uuid = ? LIMIT 1", (fact_uuid,)).fetchone() return self._row_to_fact(row) if row else None def query( self, query: str = "", category: Optional[str] = None, limit: int = 10, min_confidence: float = 0.5, fts: bool = True, ) -> List[Fact]: if fts and query: # FTS5 + Filter sql = """ SELECT f.* FROM facts f JOIN facts_fts fts ON f.id = fts.rowid WHERE fts.facts_fts MATCH ? AND f.confidence >= ? AND f.is_archived = 0 """ params = [query, min_confidence] if category: sql += " AND f.category = ?" params.append(category) sql += " ORDER BY f.confidence DESC, f.created_at DESC LIMIT ?" params.append(limit) else: sql = "SELECT * FROM facts WHERE confidence >= ? AND is_archived = 0" params = [min_confidence] if category: sql += " AND category = ?" params.append(category) if query: sql += " AND content LIKE ?" params.append(f"%{query}%") sql += " ORDER BY confidence DESC, created_at DESC LIMIT ?" params.append(limit) rows = self.conn.execute(sql, params).fetchall() return [self._row_to_fact(r) for r in rows] def update(self, fact_uuid: str, **fields) -> Optional[Fact]: allowed = {"content", "category", "confidence", "expires_at", "is_archived"} updates = {k: v for k, v in fields.items() if k in allowed} if not updates: return self.get_by_uuid(fact_uuid) if "content" in updates: updates["content_hash"] = self._hash(updates["content"]) updates["updated_at"] = time.time() cols = ", ".join(f"{k} = ?" for k in updates) vals = list(updates.values()) + [fact_uuid] self.conn.execute(f"UPDATE facts SET {cols} WHERE uuid = ?", vals) self.conn.commit() return self.get_by_uuid(fact_uuid) def delete(self, fact_uuid: str, soft: bool = True) -> bool: if soft: self.conn.execute("UPDATE facts SET is_archived = 1, updated_at = ? WHERE uuid = ?", (time.time(), fact_uuid)) else: self.conn.execute("DELETE FROM facts WHERE uuid = ?", (fact_uuid,)) self.conn.commit() return True def deduplicate(self) -> int: """Merge Fakten mit identischem content_hash. Returns merged count.""" rows = self.conn.execute( "SELECT content_hash, COUNT(*) as c FROM facts WHERE is_archived = 0 GROUP BY content_hash HAVING c > 1" ).fetchall() merged = 0 for row in rows: hashes = self.conn.execute( "SELECT uuid FROM facts WHERE content_hash = ? AND is_archived = 0 ORDER BY created_at", (row["content_hash"],), ).fetchall() keep = hashes[0]["uuid"] for dup in hashes[1:]: self.delete(dup["uuid"], soft=False) merged += 1 return merged def _row_to_fact(self, row: sqlite3.Row) -> Fact: return Fact( uuid=row["uuid"], content=row["content"], content_hash=row["content_hash"], category=row["category"], confidence=row["confidence"], source_type=row["source_type"], source_id=row["source_id"], created_at=row["created_at"], updated_at=row["updated_at"], expires_at=row["expires_at"], access_count=row["access_count"], last_accessed=row["last_accessed"], is_archived=bool(row["is_archived"]), )