b83546d833
Architecture (Agent 1):
- hermes_memory/tier2/{schema,facts,entities,relations,timeline}.py
- hermes_memory/tier3/{backend,chroma_backend,embedder}.py
- hermes_memory/graph/nx_store.py
- hermes_memory/api/memory_api.py (unified API)
- hermes_memory/cron/{consolidate,embed_queue,graph_refresh,prune}.py
- hermes_memory/config.py + pyproject.toml
Integration Plan (Agent 3):
- INTEGRATION_PLAN.md: Memory Provider Plugin strategy
- Hermes Core needs minimal changes
- sync_turn() + prefetch() hooks
- Skills integration via nextlevel_search/remember
Auto-Extraction (Agent 2):
- ARCHITECTURE.md: Full extraction pipeline docs
- Chunking, Pre-Filter, LLM Prompts, Classification
- Entity-Linking, Temporal Reasoning, Deduplication
All files: Python syntax checked, ECC standards applied.
167 lines
6.1 KiB
Python
167 lines
6.1 KiB
Python
"""FactStore — CRUD für strukturierte Fakten in Tier 2."""
|
|
|
|
import hashlib
|
|
import sqlite3
|
|
import time
|
|
import uuid as uuid_mod
|
|
from dataclasses import dataclass
|
|
from typing import List, Optional
|
|
|
|
|
|
@dataclass
|
|
class Fact:
|
|
uuid: str
|
|
content: str
|
|
content_hash: str
|
|
category: Optional[str]
|
|
confidence: float
|
|
source_type: str
|
|
source_id: Optional[str]
|
|
created_at: float
|
|
updated_at: float
|
|
expires_at: Optional[float]
|
|
access_count: int
|
|
last_accessed: Optional[float]
|
|
is_archived: bool
|
|
|
|
|
|
class FactStore:
|
|
def __init__(self, conn: sqlite3.Connection):
|
|
self.conn = conn
|
|
|
|
@staticmethod
|
|
def _hash(content: str) -> str:
|
|
return hashlib.sha256(content.encode("utf-8")).hexdigest()
|
|
|
|
def store(
|
|
self,
|
|
content: str,
|
|
category: str = "general",
|
|
confidence: float = 1.0,
|
|
source_type: str = "user",
|
|
source_id: Optional[str] = None,
|
|
expires_at: Optional[float] = None,
|
|
) -> Fact:
|
|
content_hash = self._hash(content)
|
|
existing = self.get_by_hash(content_hash)
|
|
if existing and not existing.is_archived:
|
|
# Deduplizierung: Aktualisiere confidence und updated_at
|
|
self.conn.execute(
|
|
"UPDATE facts SET confidence = MAX(confidence, ?), updated_at = ?, access_count = access_count + 1 WHERE uuid = ?",
|
|
(confidence, time.time(), existing.uuid),
|
|
)
|
|
self.conn.commit()
|
|
return self.get_by_uuid(existing.uuid)
|
|
|
|
fact_uuid = str(uuid_mod.uuid4())
|
|
now = time.time()
|
|
self.conn.execute(
|
|
"""
|
|
INSERT INTO facts (uuid, content, content_hash, category, confidence, source_type, source_id, created_at, updated_at, expires_at, access_count, last_accessed, is_archived)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
""",
|
|
(fact_uuid, content, content_hash, category, confidence, source_type, source_id, now, now, expires_at, 0, None, 0),
|
|
)
|
|
self.conn.commit()
|
|
return self.get_by_uuid(fact_uuid)
|
|
|
|
def get_by_hash(self, content_hash: str) -> Optional[Fact]:
|
|
row = self.conn.execute("SELECT * FROM facts WHERE content_hash = ? LIMIT 1", (content_hash,)).fetchone()
|
|
return self._row_to_fact(row) if row else None
|
|
|
|
def get_by_uuid(self, fact_uuid: str) -> Optional[Fact]:
|
|
row = self.conn.execute("SELECT * FROM facts WHERE uuid = ? LIMIT 1", (fact_uuid,)).fetchone()
|
|
return self._row_to_fact(row) if row else None
|
|
|
|
def query(
|
|
self,
|
|
query: str = "",
|
|
category: Optional[str] = None,
|
|
limit: int = 10,
|
|
min_confidence: float = 0.5,
|
|
fts: bool = True,
|
|
) -> List[Fact]:
|
|
if fts and query:
|
|
# FTS5 + Filter
|
|
sql = """
|
|
SELECT f.* FROM facts f
|
|
JOIN facts_fts fts ON f.id = fts.rowid
|
|
WHERE fts.facts_fts MATCH ? AND f.confidence >= ? AND f.is_archived = 0
|
|
"""
|
|
params = [query, min_confidence]
|
|
if category:
|
|
sql += " AND f.category = ?"
|
|
params.append(category)
|
|
sql += " ORDER BY f.confidence DESC, f.created_at DESC LIMIT ?"
|
|
params.append(limit)
|
|
else:
|
|
sql = "SELECT * FROM facts WHERE confidence >= ? AND is_archived = 0"
|
|
params = [min_confidence]
|
|
if category:
|
|
sql += " AND category = ?"
|
|
params.append(category)
|
|
if query:
|
|
sql += " AND content LIKE ?"
|
|
params.append(f"%{query}%")
|
|
sql += " ORDER BY confidence DESC, created_at DESC LIMIT ?"
|
|
params.append(limit)
|
|
|
|
rows = self.conn.execute(sql, params).fetchall()
|
|
return [self._row_to_fact(r) for r in rows]
|
|
|
|
def update(self, fact_uuid: str, **fields) -> Optional[Fact]:
|
|
allowed = {"content", "category", "confidence", "expires_at", "is_archived"}
|
|
updates = {k: v for k, v in fields.items() if k in allowed}
|
|
if not updates:
|
|
return self.get_by_uuid(fact_uuid)
|
|
if "content" in updates:
|
|
updates["content_hash"] = self._hash(updates["content"])
|
|
updates["updated_at"] = time.time()
|
|
cols = ", ".join(f"{k} = ?" for k in updates)
|
|
vals = list(updates.values()) + [fact_uuid]
|
|
self.conn.execute(f"UPDATE facts SET {cols} WHERE uuid = ?", vals)
|
|
self.conn.commit()
|
|
return self.get_by_uuid(fact_uuid)
|
|
|
|
def delete(self, fact_uuid: str, soft: bool = True) -> bool:
|
|
if soft:
|
|
self.conn.execute("UPDATE facts SET is_archived = 1, updated_at = ? WHERE uuid = ?", (time.time(), fact_uuid))
|
|
else:
|
|
self.conn.execute("DELETE FROM facts WHERE uuid = ?", (fact_uuid,))
|
|
self.conn.commit()
|
|
return True
|
|
|
|
def deduplicate(self) -> int:
|
|
"""Merge Fakten mit identischem content_hash. Returns merged count."""
|
|
rows = self.conn.execute(
|
|
"SELECT content_hash, COUNT(*) as c FROM facts WHERE is_archived = 0 GROUP BY content_hash HAVING c > 1"
|
|
).fetchall()
|
|
merged = 0
|
|
for row in rows:
|
|
hashes = self.conn.execute(
|
|
"SELECT uuid FROM facts WHERE content_hash = ? AND is_archived = 0 ORDER BY created_at",
|
|
(row["content_hash"],),
|
|
).fetchall()
|
|
keep = hashes[0]["uuid"]
|
|
for dup in hashes[1:]:
|
|
self.delete(dup["uuid"], soft=False)
|
|
merged += 1
|
|
return merged
|
|
|
|
def _row_to_fact(self, row: sqlite3.Row) -> Fact:
|
|
return Fact(
|
|
uuid=row["uuid"],
|
|
content=row["content"],
|
|
content_hash=row["content_hash"],
|
|
category=row["category"],
|
|
confidence=row["confidence"],
|
|
source_type=row["source_type"],
|
|
source_id=row["source_id"],
|
|
created_at=row["created_at"],
|
|
updated_at=row["updated_at"],
|
|
expires_at=row["expires_at"],
|
|
access_count=row["access_count"],
|
|
last_accessed=row["last_accessed"],
|
|
is_archived=bool(row["is_archived"]),
|
|
)
|