Files
Florian Hartmann b83546d833 Add AI Council architecture: Tier 2/3/Graph implementation + Integration Plan
Architecture (Agent 1):
- hermes_memory/tier2/{schema,facts,entities,relations,timeline}.py
- hermes_memory/tier3/{backend,chroma_backend,embedder}.py
- hermes_memory/graph/nx_store.py
- hermes_memory/api/memory_api.py (unified API)
- hermes_memory/cron/{consolidate,embed_queue,graph_refresh,prune}.py
- hermes_memory/config.py + pyproject.toml

Integration Plan (Agent 3):
- INTEGRATION_PLAN.md: Memory Provider Plugin strategy
- Hermes Core needs minimal changes
- sync_turn() + prefetch() hooks
- Skills integration via nextlevel_search/remember

Auto-Extraction (Agent 2):
- ARCHITECTURE.md: Full extraction pipeline docs
- Chunking, Pre-Filter, LLM Prompts, Classification
- Entity-Linking, Temporal Reasoning, Deduplication

All files: Python syntax checked, ECC standards applied.
2026-06-03 22:51:50 +00:00

167 lines
6.1 KiB
Python

"""FactStore — CRUD für strukturierte Fakten in Tier 2."""
import hashlib
import sqlite3
import time
import uuid as uuid_mod
from dataclasses import dataclass
from typing import List, Optional
@dataclass
class Fact:
uuid: str
content: str
content_hash: str
category: Optional[str]
confidence: float
source_type: str
source_id: Optional[str]
created_at: float
updated_at: float
expires_at: Optional[float]
access_count: int
last_accessed: Optional[float]
is_archived: bool
class FactStore:
def __init__(self, conn: sqlite3.Connection):
self.conn = conn
@staticmethod
def _hash(content: str) -> str:
return hashlib.sha256(content.encode("utf-8")).hexdigest()
def store(
self,
content: str,
category: str = "general",
confidence: float = 1.0,
source_type: str = "user",
source_id: Optional[str] = None,
expires_at: Optional[float] = None,
) -> Fact:
content_hash = self._hash(content)
existing = self.get_by_hash(content_hash)
if existing and not existing.is_archived:
# Deduplizierung: Aktualisiere confidence und updated_at
self.conn.execute(
"UPDATE facts SET confidence = MAX(confidence, ?), updated_at = ?, access_count = access_count + 1 WHERE uuid = ?",
(confidence, time.time(), existing.uuid),
)
self.conn.commit()
return self.get_by_uuid(existing.uuid)
fact_uuid = str(uuid_mod.uuid4())
now = time.time()
self.conn.execute(
"""
INSERT INTO facts (uuid, content, content_hash, category, confidence, source_type, source_id, created_at, updated_at, expires_at, access_count, last_accessed, is_archived)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""",
(fact_uuid, content, content_hash, category, confidence, source_type, source_id, now, now, expires_at, 0, None, 0),
)
self.conn.commit()
return self.get_by_uuid(fact_uuid)
def get_by_hash(self, content_hash: str) -> Optional[Fact]:
row = self.conn.execute("SELECT * FROM facts WHERE content_hash = ? LIMIT 1", (content_hash,)).fetchone()
return self._row_to_fact(row) if row else None
def get_by_uuid(self, fact_uuid: str) -> Optional[Fact]:
row = self.conn.execute("SELECT * FROM facts WHERE uuid = ? LIMIT 1", (fact_uuid,)).fetchone()
return self._row_to_fact(row) if row else None
def query(
self,
query: str = "",
category: Optional[str] = None,
limit: int = 10,
min_confidence: float = 0.5,
fts: bool = True,
) -> List[Fact]:
if fts and query:
# FTS5 + Filter
sql = """
SELECT f.* FROM facts f
JOIN facts_fts fts ON f.id = fts.rowid
WHERE fts.facts_fts MATCH ? AND f.confidence >= ? AND f.is_archived = 0
"""
params = [query, min_confidence]
if category:
sql += " AND f.category = ?"
params.append(category)
sql += " ORDER BY f.confidence DESC, f.created_at DESC LIMIT ?"
params.append(limit)
else:
sql = "SELECT * FROM facts WHERE confidence >= ? AND is_archived = 0"
params = [min_confidence]
if category:
sql += " AND category = ?"
params.append(category)
if query:
sql += " AND content LIKE ?"
params.append(f"%{query}%")
sql += " ORDER BY confidence DESC, created_at DESC LIMIT ?"
params.append(limit)
rows = self.conn.execute(sql, params).fetchall()
return [self._row_to_fact(r) for r in rows]
def update(self, fact_uuid: str, **fields) -> Optional[Fact]:
allowed = {"content", "category", "confidence", "expires_at", "is_archived"}
updates = {k: v for k, v in fields.items() if k in allowed}
if not updates:
return self.get_by_uuid(fact_uuid)
if "content" in updates:
updates["content_hash"] = self._hash(updates["content"])
updates["updated_at"] = time.time()
cols = ", ".join(f"{k} = ?" for k in updates)
vals = list(updates.values()) + [fact_uuid]
self.conn.execute(f"UPDATE facts SET {cols} WHERE uuid = ?", vals)
self.conn.commit()
return self.get_by_uuid(fact_uuid)
def delete(self, fact_uuid: str, soft: bool = True) -> bool:
if soft:
self.conn.execute("UPDATE facts SET is_archived = 1, updated_at = ? WHERE uuid = ?", (time.time(), fact_uuid))
else:
self.conn.execute("DELETE FROM facts WHERE uuid = ?", (fact_uuid,))
self.conn.commit()
return True
def deduplicate(self) -> int:
"""Merge Fakten mit identischem content_hash. Returns merged count."""
rows = self.conn.execute(
"SELECT content_hash, COUNT(*) as c FROM facts WHERE is_archived = 0 GROUP BY content_hash HAVING c > 1"
).fetchall()
merged = 0
for row in rows:
hashes = self.conn.execute(
"SELECT uuid FROM facts WHERE content_hash = ? AND is_archived = 0 ORDER BY created_at",
(row["content_hash"],),
).fetchall()
keep = hashes[0]["uuid"]
for dup in hashes[1:]:
self.delete(dup["uuid"], soft=False)
merged += 1
return merged
def _row_to_fact(self, row: sqlite3.Row) -> Fact:
return Fact(
uuid=row["uuid"],
content=row["content"],
content_hash=row["content_hash"],
category=row["category"],
confidence=row["confidence"],
source_type=row["source_type"],
source_id=row["source_id"],
created_at=row["created_at"],
updated_at=row["updated_at"],
expires_at=row["expires_at"],
access_count=row["access_count"],
last_accessed=row["last_accessed"],
is_archived=bool(row["is_archived"]),
)