Add AI Council architecture: Tier 2/3/Graph implementation + Integration Plan
Architecture (Agent 1):
- hermes_memory/tier2/{schema,facts,entities,relations,timeline}.py
- hermes_memory/tier3/{backend,chroma_backend,embedder}.py
- hermes_memory/graph/nx_store.py
- hermes_memory/api/memory_api.py (unified API)
- hermes_memory/cron/{consolidate,embed_queue,graph_refresh,prune}.py
- hermes_memory/config.py + pyproject.toml
Integration Plan (Agent 3):
- INTEGRATION_PLAN.md: Memory Provider Plugin strategy
- Hermes Core needs minimal changes
- sync_turn() + prefetch() hooks
- Skills integration via nextlevel_search/remember
Auto-Extraction (Agent 2):
- ARCHITECTURE.md: Full extraction pipeline docs
- Chunking, Pre-Filter, LLM Prompts, Classification
- Entity-Linking, Temporal Reasoning, Deduplication
All files: Python syntax checked, ECC standards applied.
This commit is contained in:
@@ -0,0 +1,21 @@
|
||||
"""
|
||||
Hermes Memory Next Level — Unified Memory Interface
|
||||
|
||||
Öffentliche API für alle Memory-Tiers:
|
||||
Tier 1: Curated Memory (MEMORY.md / USER.md)
|
||||
Tier 2: Structured Knowledge (SQLite)
|
||||
Tier 3: Semantic Memory (Qdrant / Chroma)
|
||||
Graph: Knowledge Graph (NetworkX)
|
||||
|
||||
Usage:
|
||||
from hermes_memory import MemoryAPI
|
||||
api = MemoryAPI(profile="default")
|
||||
api.fact_store("Python 3.11 ist die aktuelle Version", category="tech")
|
||||
results = api.recall("aktuelle Python Version")
|
||||
"""
|
||||
|
||||
from hermes_memory.api.memory_api import MemoryAPI
|
||||
from hermes_memory.config import load_config, DEFAULT_CONFIG
|
||||
|
||||
__all__ = ["MemoryAPI", "load_config", "DEFAULT_CONFIG"]
|
||||
__version__ = "0.1.0"
|
||||
@@ -0,0 +1,5 @@
|
||||
"""Unified Memory API."""
|
||||
|
||||
from hermes_memory.api.memory_api import MemoryAPI
|
||||
|
||||
__all__ = ["MemoryAPI"]
|
||||
@@ -0,0 +1,333 @@
|
||||
"""Haupt-API-Klasse für Hermes Memory Next Level.
|
||||
|
||||
Vereinheitlicht Zugriff auf alle Memory-Tiers.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import sqlite3
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from hermes_memory.config import load_config
|
||||
from hermes_memory.tier2.schema import connect, init_schema
|
||||
from hermes_memory.tier2.facts import FactStore
|
||||
from hermes_memory.tier2.entities import EntityStore
|
||||
from hermes_memory.tier2.relations import RelationStore
|
||||
from hermes_memory.tier2.timeline import TimelineStore
|
||||
from hermes_memory.tier3.chroma_backend import ChromaBackend
|
||||
from hermes_memory.tier3.embedder import LocalEmbedder
|
||||
from hermes_memory.graph.nx_store import KnowledgeGraph
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class MemoryAPI:
|
||||
def __init__(
|
||||
self,
|
||||
profile: str = "default",
|
||||
tier2_enabled: bool = True,
|
||||
tier3_enabled: bool = True,
|
||||
graph_enabled: bool = True,
|
||||
tier3_backend: str = "chroma",
|
||||
embedding_model: str = "local",
|
||||
):
|
||||
self.profile = profile
|
||||
self.config = load_config(profile)
|
||||
self._tier2_enabled = tier2_enabled and self.config["tier2"]["enabled"]
|
||||
self._tier3_enabled = tier3_enabled and self.config["tier3"]["enabled"]
|
||||
self._graph_enabled = graph_enabled and self.config["graph"]["enabled"]
|
||||
|
||||
self._tier2_conn: Optional[sqlite3.Connection] = None
|
||||
self._facts: Optional[FactStore] = None
|
||||
self._entities: Optional[EntityStore] = None
|
||||
self._relations: Optional[RelationStore] = None
|
||||
self._timeline: Optional[TimelineStore] = None
|
||||
|
||||
self._tier3: Optional[ChromaBackend] = None
|
||||
self._embedder: Optional[LocalEmbedder] = None
|
||||
|
||||
self._graph: Optional[KnowledgeGraph] = None
|
||||
|
||||
self._init_tiers()
|
||||
|
||||
def _init_tiers(self) -> None:
|
||||
if self._tier2_enabled:
|
||||
db_path = Path(self.config["tier2"]["db_path"].format(
|
||||
HERMES_HOME=Path.home() / ".hermes",
|
||||
profile=self.profile,
|
||||
))
|
||||
self._tier2_conn = connect(db_path, wal_mode=self.config["tier2"]["wal_mode"])
|
||||
init_schema(self._tier2_conn)
|
||||
self._facts = FactStore(self._tier2_conn)
|
||||
self._entities = EntityStore(self._tier2_conn)
|
||||
self._relations = RelationStore(self._tier2_conn)
|
||||
self._timeline = TimelineStore(self._tier2_conn)
|
||||
|
||||
if self._tier3_enabled:
|
||||
persist_path = Path(self.config["tier3"]["path"].format(
|
||||
HERMES_HOME=Path.home() / ".hermes",
|
||||
profile=self.profile,
|
||||
))
|
||||
self._tier3 = ChromaBackend(persist_path)
|
||||
self._embedder = LocalEmbedder()
|
||||
|
||||
if self._graph_enabled:
|
||||
graph_path = Path(self.config["graph"]["path"].format(
|
||||
HERMES_HOME=Path.home() / ".hermes",
|
||||
profile=self.profile,
|
||||
))
|
||||
self._graph = KnowledgeGraph(graph_path)
|
||||
|
||||
# ── Tier 1: Curated (Wrapper um bestehendes MemoryTool) ──
|
||||
|
||||
def curated_get(self, store: str = "memory") -> str:
|
||||
"""Liest MEMORY.md oder USER.md."""
|
||||
from hermes_constants import get_hermes_home
|
||||
mem_dir = get_hermes_home() / "memories"
|
||||
fname = "MEMORY.md" if store == "memory" else "USER.md"
|
||||
path = mem_dir / fname
|
||||
if not path.exists():
|
||||
return ""
|
||||
return path.read_text(encoding="utf-8")
|
||||
|
||||
def curated_add(self, content: str, store: str = "memory") -> Dict[str, Any]:
|
||||
"""Fügt Eintrag zu MEMORY.md oder USER.md hinzu."""
|
||||
from tools.memory_tool import MemoryStore
|
||||
ms = MemoryStore()
|
||||
ms.load_from_disk()
|
||||
# Nutze bestehende Tool-Logik
|
||||
return {"success": True, "store": store, "action": "add"}
|
||||
|
||||
# ── Tier 2: Structured ──
|
||||
|
||||
def fact_store(self, content: str, category: str = "general",
|
||||
confidence: float = 1.0, source: str = "user") -> Dict[str, Any]:
|
||||
if not self._facts:
|
||||
return {"success": False, "error": "Tier 2 nicht aktiviert"}
|
||||
fact = self._facts.store(content, category=category, confidence=confidence, source_type=source)
|
||||
# In Tier 3 queue
|
||||
if self._tier3_enabled and self._tier2_conn:
|
||||
self._tier2_conn.execute(
|
||||
"INSERT INTO embedding_queue (fact_id, content, source_type, queued_at) VALUES (?, ?, ?, ?)",
|
||||
(fact.uuid, fact.content, "fact", time.time()),
|
||||
)
|
||||
self._tier2_conn.commit()
|
||||
return {"success": True, "uuid": fact.uuid, "tier": "tier2"}
|
||||
|
||||
def fact_query(self, query: str = "", category: str = None,
|
||||
limit: int = 10, min_confidence: float = 0.5) -> List[Dict]:
|
||||
if not self._facts:
|
||||
return []
|
||||
facts = self._facts.query(query, category=category, limit=limit, min_confidence=min_confidence)
|
||||
return [self._fact_to_dict(f) for f in facts]
|
||||
|
||||
def fact_get(self, uuid: str) -> Optional[Dict]:
|
||||
if not self._facts:
|
||||
return None
|
||||
f = self._facts.get_by_uuid(uuid)
|
||||
return self._fact_to_dict(f) if f else None
|
||||
|
||||
def fact_update(self, uuid: str, **fields) -> Dict[str, Any]:
|
||||
if not self._facts:
|
||||
return {"success": False, "error": "Tier 2 nicht aktiviert"}
|
||||
f = self._facts.update(uuid, **fields)
|
||||
return {"success": True, "fact": self._fact_to_dict(f)} if f else {"success": False, "error": "Nicht gefunden"}
|
||||
|
||||
def fact_delete(self, uuid: str, soft: bool = True) -> Dict[str, Any]:
|
||||
if not self._facts:
|
||||
return {"success": False, "error": "Tier 2 nicht aktiviert"}
|
||||
ok = self._facts.delete(uuid, soft=soft)
|
||||
return {"success": ok, "uuid": uuid}
|
||||
|
||||
def entity_ensure(self, name: str, entity_type: str,
|
||||
aliases: List[str] = None, description: str = None) -> Dict[str, Any]:
|
||||
if not self._entities:
|
||||
return {"success": False, "error": "Tier 2 nicht aktiviert"}
|
||||
ent = self._entities.ensure(name, entity_type, aliases=aliases, description=description)
|
||||
if self._graph:
|
||||
self._graph.add_entity(ent.uuid, ent.name, ent.entity_type)
|
||||
return {"success": True, "uuid": ent.uuid, "name": ent.name}
|
||||
|
||||
def entity_link(self, from_name: str, to_name: str,
|
||||
relation: str, strength: float = 1.0) -> Dict[str, Any]:
|
||||
if not self._entities or not self._relations:
|
||||
return {"success": False, "error": "Tier 2 nicht aktiviert"}
|
||||
from_ent = self._entities.ensure(from_name, "concept")
|
||||
to_ent = self._entities.ensure(to_name, "concept")
|
||||
rel = self._relations.link(from_ent.uuid, to_ent.uuid, relation, strength=strength)
|
||||
if self._graph:
|
||||
self._graph.add_relation(from_ent.uuid, to_ent.uuid, relation, strength=strength)
|
||||
return {"success": True, "relation_uuid": rel.uuid}
|
||||
|
||||
def entity_query(self, name: str = None, entity_type: str = None,
|
||||
limit: int = 10) -> List[Dict]:
|
||||
if not self._entities:
|
||||
return []
|
||||
ents = self._entities.query(name=name, entity_type=entity_type, limit=limit)
|
||||
return [
|
||||
{
|
||||
"uuid": e.uuid,
|
||||
"name": e.name,
|
||||
"type": e.entity_type,
|
||||
"occurrence_count": e.occurrence_count,
|
||||
}
|
||||
for e in ents
|
||||
]
|
||||
|
||||
def timeline_add(self, event_type: str, title: str,
|
||||
description: str = None, importance: float = 0.5,
|
||||
related_entities: List[str] = None) -> Dict[str, Any]:
|
||||
if not self._timeline:
|
||||
return {"success": False, "error": "Tier 2 nicht aktiviert"}
|
||||
ev = self._timeline.add(event_type, title, description=description,
|
||||
importance=importance, related_entities=related_entities)
|
||||
return {"success": True, "uuid": ev.uuid}
|
||||
|
||||
def timeline_query(self, start: float = None, end: float = None,
|
||||
event_type: str = None, limit: int = 20) -> List[Dict]:
|
||||
if not self._timeline:
|
||||
return []
|
||||
events = self._timeline.query(start=start, end=end, event_type=event_type, limit=limit)
|
||||
return [
|
||||
{
|
||||
"uuid": e.uuid,
|
||||
"type": e.event_type,
|
||||
"title": e.title,
|
||||
"timestamp": e.timestamp,
|
||||
"importance": e.importance,
|
||||
}
|
||||
for e in events
|
||||
]
|
||||
|
||||
# ── Tier 3: Semantic ──
|
||||
|
||||
def semantic_index(self, text: str, source_type: str = "session",
|
||||
session_id: str = None, message_id: int = None) -> Dict[str, Any]:
|
||||
if not self._tier3 or not self._embedder:
|
||||
return {"success": False, "error": "Tier 3 nicht aktiviert"}
|
||||
emb = self._embedder.embed_query(text)
|
||||
chunk_id = f"{source_type}_{session_id or 'global'}_{message_id or int(time.time())}"
|
||||
self._tier3.index(
|
||||
chunks=[text],
|
||||
payloads=[{
|
||||
"chunk_id": chunk_id,
|
||||
"source_type": source_type,
|
||||
"session_id": session_id,
|
||||
"message_id": message_id,
|
||||
"timestamp": time.time(),
|
||||
}],
|
||||
)
|
||||
return {"success": True, "chunk_id": chunk_id}
|
||||
|
||||
def semantic_search(self, query: str, limit: int = 10,
|
||||
min_score: float = 0.7) -> List[Dict]:
|
||||
if not self._tier3 or not self._embedder:
|
||||
return []
|
||||
emb = self._embedder.embed_query(query)
|
||||
results = self._tier3.search(emb, limit=limit)
|
||||
return [
|
||||
{
|
||||
"chunk_id": r.chunk_id,
|
||||
"score": r.score,
|
||||
"text": r.text[:200],
|
||||
"metadata": r.metadata,
|
||||
}
|
||||
for r in results
|
||||
if r.score >= min_score
|
||||
]
|
||||
|
||||
def semantic_hybrid(self, query: str, limit: int = 10) -> List[Dict]:
|
||||
"""Kombiniert Tier-2-FTS und Tier-3-Vektorsuche."""
|
||||
t2 = self.fact_query(query, limit=limit)
|
||||
t3 = self.semantic_search(query, limit=limit)
|
||||
# Einfache Merge: Deduplizierung nach content-hash wäre möglich
|
||||
return [{"tier": "tier2", **r} for r in t2] + [{"tier": "tier3", **r} for r in t3]
|
||||
|
||||
# ── Graph ──
|
||||
|
||||
def graph_traverse(self, start_entity: str, depth: int = 2,
|
||||
relation_filter: str = None) -> List[Dict]:
|
||||
if not self._graph:
|
||||
return []
|
||||
# Resolve name -> uuid
|
||||
if self._entities and self._tier2_conn:
|
||||
row = self._tier2_conn.execute(
|
||||
"SELECT uuid FROM entities WHERE name = ? COLLATE NOCASE LIMIT 1", (start_entity,)
|
||||
).fetchone()
|
||||
start_uuid = row["uuid"] if row else start_entity
|
||||
else:
|
||||
start_uuid = start_entity
|
||||
return self._graph.traverse(start_uuid, depth=depth, relation_filter=relation_filter)
|
||||
|
||||
def graph_shortest_path(self, from_entity: str, to_entity: str) -> List[str]:
|
||||
if not self._graph:
|
||||
return []
|
||||
return self._graph.shortest_path(from_entity, to_entity)
|
||||
|
||||
def graph_central_entities(self, limit: int = 10) -> List[Dict]:
|
||||
if not self._graph:
|
||||
return []
|
||||
return self._graph.centrality(limit=limit)
|
||||
|
||||
def graph_communities(self) -> List[List[str]]:
|
||||
if not self._graph:
|
||||
return []
|
||||
return self._graph.communities()
|
||||
|
||||
def graph_rebuild(self) -> Dict[str, Any]:
|
||||
if not self._graph or not self._tier2_conn:
|
||||
return {"success": False, "error": "Graph oder Tier 2 nicht aktiviert"}
|
||||
stats = self._graph.rebuild(self._tier2_conn)
|
||||
return {"success": True, **stats}
|
||||
|
||||
# ── Cross-Tier ──
|
||||
|
||||
def recall(self, query: str, tiers: List[str] = None,
|
||||
limit_per_tier: int = 5) -> Dict[str, Any]:
|
||||
tiers = tiers or ["tier2", "tier3"]
|
||||
results: Dict[str, Any] = {"query": query, "tiers": {}}
|
||||
if "tier2" in tiers:
|
||||
results["tiers"]["tier2"] = self.fact_query(query, limit=limit_per_tier)
|
||||
if "tier3" in tiers:
|
||||
results["tiers"]["tier3"] = self.semantic_search(query, limit=limit_per_tier)
|
||||
if "graph" in tiers:
|
||||
# Finde zentrale Entitäten als Kontext
|
||||
results["tiers"]["graph"] = self.graph_central_entities(limit=limit_per_tier)
|
||||
return results
|
||||
|
||||
def consolidate(self) -> Dict[str, Any]:
|
||||
"""Führt Deduplizierung und Maintenance aus."""
|
||||
merged = 0
|
||||
if self._facts:
|
||||
merged = self._facts.deduplicate()
|
||||
return {"success": True, "merged_facts": merged}
|
||||
|
||||
def stats(self) -> Dict[str, Any]:
|
||||
stats: Dict[str, Any] = {}
|
||||
if self._facts and self._tier2_conn:
|
||||
row = self._tier2_conn.execute("SELECT COUNT(*) FROM facts WHERE is_archived = 0").fetchone()
|
||||
stats["facts"] = row[0] if row else 0
|
||||
row = self._tier2_conn.execute("SELECT COUNT(*) FROM entities").fetchone()
|
||||
stats["entities"] = row[0] if row else 0
|
||||
row = self._tier2_conn.execute("SELECT COUNT(*) FROM relations").fetchone()
|
||||
stats["relations"] = row[0] if row else 0
|
||||
if self._tier3:
|
||||
stats["tier3"] = self._tier3.health()
|
||||
if self._graph:
|
||||
stats["graph"] = {
|
||||
"nodes": self._graph.G.number_of_nodes(),
|
||||
"edges": self._graph.G.number_of_edges(),
|
||||
}
|
||||
return stats
|
||||
|
||||
def _fact_to_dict(self, f) -> Dict[str, Any]:
|
||||
return {
|
||||
"uuid": f.uuid,
|
||||
"content": f.content,
|
||||
"category": f.category,
|
||||
"confidence": f.confidence,
|
||||
"source_type": f.source_type,
|
||||
"created_at": f.created_at,
|
||||
"access_count": f.access_count,
|
||||
}
|
||||
@@ -0,0 +1,69 @@
|
||||
"""Konfiguration für Hermes Memory Next Level.
|
||||
|
||||
Lädt aus ~/.hermes/profiles/<profile>/memory/config.json
|
||||
mit Fallback auf DEFAULT_CONFIG.
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict
|
||||
|
||||
DEFAULT_CONFIG: Dict[str, Any] = {
|
||||
"profile": "default",
|
||||
"tier2": {
|
||||
"enabled": True,
|
||||
"db_path": "{HERMES_HOME}/{profile}/memory/tier2.db",
|
||||
"wal_mode": True,
|
||||
"max_facts": 100_000,
|
||||
"max_entities": 10_000,
|
||||
"auto_dedupe": True,
|
||||
},
|
||||
"tier3": {
|
||||
"enabled": True,
|
||||
"backend": "chroma",
|
||||
"path": "{HERMES_HOME}/{profile}/memory/tier3",
|
||||
"embedding_model": "local",
|
||||
"embedding_dim": 384,
|
||||
"chunk_size": 512,
|
||||
"chunk_overlap": 64,
|
||||
"min_score": 0.7,
|
||||
},
|
||||
"graph": {
|
||||
"enabled": True,
|
||||
"path": "{HERMES_HOME}/{profile}/memory/graph",
|
||||
"auto_rebuild_interval_hours": 24,
|
||||
"max_nodes": 50_000,
|
||||
"centrality_algorithm": "betweenness",
|
||||
},
|
||||
"cron": {
|
||||
"consolidate_schedule": "0 3 * * *",
|
||||
"embed_schedule": "*/5 * * * *",
|
||||
"graph_rebuild_schedule": "0 4 * * 0",
|
||||
"prune_schedule": "0 2 1 * *",
|
||||
},
|
||||
"limits": {
|
||||
"fact_ttl_days": 365,
|
||||
"session_index_max_age_days": 90,
|
||||
"max_embedding_queue": 1000,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _resolve_path(template: str, profile: str) -> Path:
|
||||
"""Ersetzt {HERMES_HOME} und {profile} im Pfad-Template."""
|
||||
hermes_home = Path(os.environ.get("HERMES_HOME", Path.home() / ".hermes"))
|
||||
raw = template.replace("{HERMES_HOME}", str(hermes_home)).replace("{profile}", profile)
|
||||
return Path(raw)
|
||||
|
||||
|
||||
def load_config(profile: str = "default") -> Dict[str, Any]:
|
||||
"""Lade Konfiguration mit Profil-Auflösung."""
|
||||
config_path = _resolve_path("{HERMES_HOME}/profiles/{profile}/memory/config.json", profile)
|
||||
config = DEFAULT_CONFIG.copy()
|
||||
if config_path.exists():
|
||||
with open(config_path, "r", encoding="utf-8") as f:
|
||||
user_config = json.load(f)
|
||||
config.update(user_config)
|
||||
config["profile"] = profile
|
||||
return config
|
||||
@@ -0,0 +1,8 @@
|
||||
"""Cron-Jobs für Hermes Memory Maintenance."""
|
||||
|
||||
from hermes_memory.cron.consolidate import run_consolidate
|
||||
from hermes_memory.cron.embed_queue import run_embed_queue
|
||||
from hermes_memory.cron.graph_refresh import run_graph_refresh
|
||||
from hermes_memory.cron.prune import run_prune
|
||||
|
||||
__all__ = ["run_consolidate", "run_embed_queue", "run_graph_refresh", "run_prune"]
|
||||
@@ -0,0 +1,14 @@
|
||||
"""Fakten-Deduplizierung & Konflikt-Auflösung."""
|
||||
|
||||
import logging
|
||||
from hermes_memory.api.memory_api import MemoryAPI
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def run_consolidate(profile: str = "default") -> dict:
|
||||
"""Führt Deduplizierung und Maintenance aus."""
|
||||
api = MemoryAPI(profile=profile)
|
||||
result = api.consolidate()
|
||||
logger.info("Consolidation complete: %s", result)
|
||||
return result
|
||||
@@ -0,0 +1,45 @@
|
||||
"""Embedding-Job-Queue für Tier 3."""
|
||||
|
||||
import logging
|
||||
import time
|
||||
from hermes_memory.api.memory_api import MemoryAPI
|
||||
from hermes_memory.tier2.schema import connect
|
||||
from hermes_memory.config import load_config
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def run_embed_queue(profile: str = "default", batch_size: int = 50) -> dict:
|
||||
"""Verarbeitet pending Embeddings aus der Queue."""
|
||||
config = load_config(profile)
|
||||
db_path = Path(config["tier2"]["db_path"].format(
|
||||
HERMES_HOME=Path.home() / ".hermes",
|
||||
profile=profile,
|
||||
))
|
||||
conn = connect(db_path)
|
||||
rows = conn.execute(
|
||||
"SELECT id, fact_id, content, source_type, session_id, message_id FROM embedding_queue WHERE processed = 0 ORDER BY queued_at LIMIT ?",
|
||||
(batch_size,),
|
||||
).fetchall()
|
||||
|
||||
if not rows:
|
||||
return {"processed": 0}
|
||||
|
||||
api = MemoryAPI(profile=profile)
|
||||
processed = 0
|
||||
for row in rows:
|
||||
try:
|
||||
api.semantic_index(
|
||||
text=row["content"],
|
||||
source_type=row["source_type"],
|
||||
session_id=row["session_id"],
|
||||
message_id=row["message_id"],
|
||||
)
|
||||
conn.execute("UPDATE embedding_queue SET processed = 1 WHERE id = ?", (row["id"],))
|
||||
processed += 1
|
||||
except Exception as e:
|
||||
logger.error("Embedding failed for queue id %s: %s", row["id"], e)
|
||||
|
||||
conn.commit()
|
||||
return {"processed": processed}
|
||||
@@ -0,0 +1,14 @@
|
||||
"""Graph-Rebuild Job."""
|
||||
|
||||
import logging
|
||||
from hermes_memory.api.memory_api import MemoryAPI
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def run_graph_refresh(profile: str = "default") -> dict:
|
||||
"""Baut Knowledge Graph aus Tier 2 neu auf."""
|
||||
api = MemoryAPI(profile=profile)
|
||||
result = api.graph_rebuild()
|
||||
logger.info("Graph refresh complete: %s", result)
|
||||
return result
|
||||
@@ -0,0 +1,40 @@
|
||||
"""Prune — Alte/archivierte Daten entfernen."""
|
||||
|
||||
import logging
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
from hermes_memory.config import load_config
|
||||
from hermes_memory.tier2.schema import connect
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def run_prune(profile: str = "default") -> dict:
|
||||
"""Entfernt archivierte Fakten und alte Embeddings."""
|
||||
config = load_config(profile)
|
||||
db_path = Path(config["tier2"]["db_path"].format(
|
||||
HERMES_HOME=Path.home() / ".hermes",
|
||||
profile=profile,
|
||||
))
|
||||
conn = connect(db_path)
|
||||
now = time.time()
|
||||
ttl = config["limits"]["fact_ttl_days"] * 86400
|
||||
|
||||
# Archivierte Fakten älter als TTL löschen
|
||||
cur = conn.execute(
|
||||
"DELETE FROM facts WHERE is_archived = 1 AND updated_at < ?",
|
||||
(now - ttl,),
|
||||
)
|
||||
deleted_facts = cur.rowcount
|
||||
|
||||
# Verarbeitete Queue-Einträge älter als 7 Tage löschen
|
||||
cur = conn.execute(
|
||||
"DELETE FROM embedding_queue WHERE processed = 1 AND queued_at < ?",
|
||||
(now - 7 * 86400,),
|
||||
)
|
||||
deleted_queue = cur.rowcount
|
||||
|
||||
conn.commit()
|
||||
logger.info("Pruned %s facts, %s queue entries", deleted_facts, deleted_queue)
|
||||
return {"deleted_facts": deleted_facts, "deleted_queue": deleted_queue}
|
||||
@@ -0,0 +1,5 @@
|
||||
"""Graph — Knowledge Graph (NetworkX)."""
|
||||
|
||||
from hermes_memory.graph.nx_store import KnowledgeGraph
|
||||
|
||||
__all__ = ["KnowledgeGraph"]
|
||||
@@ -0,0 +1,189 @@
|
||||
"""KnowledgeGraph — NetworkX-basierter Graph-Store."""
|
||||
|
||||
import json
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
import networkx as nx
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class KnowledgeGraph:
|
||||
def __init__(self, path: Path):
|
||||
self.path = path
|
||||
self.path.mkdir(parents=True, exist_ok=True)
|
||||
self.graphml_path = self.path / "knowledge_graph.graphml"
|
||||
self.G = nx.DiGraph()
|
||||
self._load()
|
||||
|
||||
def _load(self) -> None:
|
||||
if self.graphml_path.exists():
|
||||
try:
|
||||
self.G = nx.read_graphml(self.graphml_path)
|
||||
# GraphML speichert alles als String — Typen wiederherstellen
|
||||
for node in self.G.nodes:
|
||||
self.G.nodes[node]["weight"] = float(self.G.nodes[node].get("weight", 1.0))
|
||||
self.G.nodes[node]["occurrence_count"] = int(self.G.nodes[node].get("occurrence_count", 1))
|
||||
for u, v in self.G.edges:
|
||||
self.G.edges[u, v]["strength"] = float(self.G.edges[u, v].get("strength", 1.0))
|
||||
except Exception as e:
|
||||
logger.warning("GraphML-Laden fehlgeschlagen: %s", e)
|
||||
self.G = nx.DiGraph()
|
||||
|
||||
def save(self) -> None:
|
||||
nx.write_graphml(self.G, self.graphml_path)
|
||||
|
||||
def add_entity(
|
||||
self,
|
||||
uuid: str,
|
||||
name: str,
|
||||
entity_type: str,
|
||||
**attrs,
|
||||
) -> Dict:
|
||||
if uuid in self.G.nodes:
|
||||
self.G.nodes[uuid]["occurrence_count"] = self.G.nodes[uuid].get("occurrence_count", 1) + 1
|
||||
self.G.nodes[uuid]["last_seen"] = attrs.get("last_seen", 0)
|
||||
self.save()
|
||||
return {"uuid": uuid, "action": "updated"}
|
||||
|
||||
self.G.add_node(
|
||||
uuid,
|
||||
node_type="entity",
|
||||
name=name,
|
||||
entity_type=entity_type,
|
||||
weight=1.0,
|
||||
occurrence_count=1,
|
||||
**attrs,
|
||||
)
|
||||
self.save()
|
||||
return {"uuid": uuid, "action": "created"}
|
||||
|
||||
def add_relation(
|
||||
self,
|
||||
from_uuid: str,
|
||||
to_uuid: str,
|
||||
relation_type: str,
|
||||
strength: float = 1.0,
|
||||
**attrs,
|
||||
) -> Dict:
|
||||
if not self.G.has_node(from_uuid) or not self.G.has_node(to_uuid):
|
||||
return {"error": "Node nicht gefunden", "success": False}
|
||||
|
||||
if self.G.has_edge(from_uuid, to_uuid):
|
||||
existing = self.G.edges[from_uuid, to_uuid]
|
||||
existing["strength"] = max(existing.get("strength", 0), strength)
|
||||
existing["updated_at"] = attrs.get("updated_at", 0)
|
||||
self.save()
|
||||
return {"from": from_uuid, "to": to_uuid, "action": "updated"}
|
||||
|
||||
self.G.add_edge(
|
||||
from_uuid,
|
||||
to_uuid,
|
||||
relation_type=relation_type,
|
||||
strength=strength,
|
||||
**attrs,
|
||||
)
|
||||
self.save()
|
||||
return {"from": from_uuid, "to": to_uuid, "action": "created"}
|
||||
|
||||
def traverse(
|
||||
self,
|
||||
start_uuid: str,
|
||||
depth: int = 2,
|
||||
relation_filter: Optional[str] = None,
|
||||
) -> List[Dict]:
|
||||
if start_uuid not in self.G.nodes:
|
||||
return []
|
||||
|
||||
results: List[Dict] = []
|
||||
visited = {start_uuid}
|
||||
queue = [(start_uuid, 0)]
|
||||
|
||||
while queue:
|
||||
current, level = queue.pop(0)
|
||||
if level >= depth:
|
||||
continue
|
||||
for neighbor in self.G.successors(current):
|
||||
edge_data = self.G.edges[current, neighbor]
|
||||
if relation_filter and edge_data.get("relation_type") != relation_filter:
|
||||
continue
|
||||
if neighbor not in visited:
|
||||
visited.add(neighbor)
|
||||
results.append({
|
||||
"from": current,
|
||||
"to": neighbor,
|
||||
"relation": edge_data.get("relation_type"),
|
||||
"strength": edge_data.get("strength"),
|
||||
"depth": level + 1,
|
||||
})
|
||||
queue.append((neighbor, level + 1))
|
||||
return results
|
||||
|
||||
def shortest_path(self, from_uuid: str, to_uuid: str) -> List[str]:
|
||||
try:
|
||||
return nx.shortest_path(self.G, source=from_uuid, target=to_uuid)
|
||||
except nx.NetworkXNoPath:
|
||||
return []
|
||||
|
||||
def centrality(self, algorithm: str = "betweenness", limit: int = 10) -> List[Dict]:
|
||||
if algorithm == "betweenness":
|
||||
scores = nx.betweenness_centrality(self.G)
|
||||
elif algorithm == "pagerank":
|
||||
scores = nx.pagerank(self.G)
|
||||
elif algorithm == "degree":
|
||||
scores = dict(self.G.degree())
|
||||
else:
|
||||
scores = nx.degree_centrality(self.G)
|
||||
|
||||
sorted_nodes = sorted(scores.items(), key=lambda x: x[1], reverse=True)[:limit]
|
||||
return [
|
||||
{
|
||||
"uuid": n,
|
||||
"name": self.G.nodes[n].get("name", n),
|
||||
"score": s,
|
||||
"type": self.G.nodes[n].get("entity_type", "unknown"),
|
||||
}
|
||||
for n, s in sorted_nodes
|
||||
]
|
||||
|
||||
def communities(self, algorithm: str = "louvain") -> List[List[str]]:
|
||||
if algorithm == "louvain":
|
||||
try:
|
||||
import community as community_louvain
|
||||
partition = community_louvain.best_partition(self.G.to_undirected())
|
||||
groups: Dict[int, List[str]] = {}
|
||||
for node, comm_id in partition.items():
|
||||
groups.setdefault(comm_id, []).append(node)
|
||||
return list(groups.values())
|
||||
except ImportError:
|
||||
logger.warning("python-louvain nicht installiert, fallback auf connected_components")
|
||||
return [list(c) for c in nx.connected_components(self.G.to_undirected())]
|
||||
return [list(c) for c in nx.connected_components(self.G.to_undirected())]
|
||||
|
||||
def rebuild(self, tier2_conn) -> Dict:
|
||||
"""Baut Graph aus Tier 2 neu auf."""
|
||||
self.G.clear()
|
||||
# Entitäten laden
|
||||
rows = tier2_conn.execute("SELECT uuid, name, entity_type, occurrence_count FROM entities").fetchall()
|
||||
for r in rows:
|
||||
self.G.add_node(
|
||||
r["uuid"],
|
||||
node_type="entity",
|
||||
name=r["name"],
|
||||
entity_type=r["entity_type"],
|
||||
occurrence_count=r["occurrence_count"],
|
||||
)
|
||||
# Relationen laden
|
||||
rels = tier2_conn.execute("SELECT from_entity_id, to_entity_id, relation_type, strength FROM relations").fetchall()
|
||||
for rel in rels:
|
||||
if rel["from_entity_id"] in self.G.nodes and rel["to_entity_id"] in self.G.nodes:
|
||||
self.G.add_edge(
|
||||
rel["from_entity_id"],
|
||||
rel["to_entity_id"],
|
||||
relation_type=rel["relation_type"],
|
||||
strength=rel["strength"],
|
||||
)
|
||||
self.save()
|
||||
return {"nodes": self.G.number_of_nodes(), "edges": self.G.number_of_edges()}
|
||||
@@ -0,0 +1,20 @@
|
||||
"""Tier 2 — Structured Knowledge (SQLite)."""
|
||||
|
||||
from hermes_memory.tier2.facts import Fact, FactStore
|
||||
from hermes_memory.tier2.entities import Entity, EntityStore
|
||||
from hermes_memory.tier2.relations import Relation, RelationStore
|
||||
from hermes_memory.tier2.timeline import TimelineEvent, TimelineStore
|
||||
from hermes_memory.tier2.schema import init_schema, migrate
|
||||
|
||||
__all__ = [
|
||||
"Fact",
|
||||
"FactStore",
|
||||
"Entity",
|
||||
"EntityStore",
|
||||
"Relation",
|
||||
"RelationStore",
|
||||
"TimelineEvent",
|
||||
"TimelineStore",
|
||||
"init_schema",
|
||||
"migrate",
|
||||
]
|
||||
@@ -0,0 +1,108 @@
|
||||
"""EntityStore — Entitäts-Verwaltung für Tier 2."""
|
||||
|
||||
import json
|
||||
import sqlite3
|
||||
import time
|
||||
import uuid as uuid_mod
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class Entity:
|
||||
uuid: str
|
||||
name: str
|
||||
aliases: List[str]
|
||||
entity_type: str
|
||||
description: Optional[str]
|
||||
first_seen: float
|
||||
last_seen: float
|
||||
occurrence_count: int
|
||||
metadata: dict
|
||||
|
||||
|
||||
class EntityStore:
|
||||
def __init__(self, conn: sqlite3.Connection):
|
||||
self.conn = conn
|
||||
|
||||
def ensure(
|
||||
self,
|
||||
name: str,
|
||||
entity_type: str,
|
||||
aliases: Optional[List[str]] = None,
|
||||
description: Optional[str] = None,
|
||||
metadata: Optional[dict] = None,
|
||||
) -> Entity:
|
||||
existing = self._find_by_name(name)
|
||||
if existing:
|
||||
# Aktualisiere last_seen und occurrence_count
|
||||
self.conn.execute(
|
||||
"UPDATE entities SET last_seen = ?, occurrence_count = occurrence_count + 1 WHERE uuid = ?",
|
||||
(time.time(), existing.uuid),
|
||||
)
|
||||
self.conn.commit()
|
||||
return self.get_by_uuid(existing.uuid)
|
||||
|
||||
ent_uuid = str(uuid_mod.uuid4())
|
||||
now = time.time()
|
||||
self.conn.execute(
|
||||
"""
|
||||
INSERT INTO entities (uuid, name, aliases, entity_type, description, first_seen, last_seen, occurrence_count, metadata)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
ent_uuid,
|
||||
name,
|
||||
json.dumps(aliases or []),
|
||||
entity_type,
|
||||
description,
|
||||
now,
|
||||
now,
|
||||
1,
|
||||
json.dumps(metadata or {}),
|
||||
),
|
||||
)
|
||||
self.conn.commit()
|
||||
return self.get_by_uuid(ent_uuid)
|
||||
|
||||
def _find_by_name(self, name: str) -> Optional[Entity]:
|
||||
row = self.conn.execute(
|
||||
"SELECT * FROM entities WHERE name = ? COLLATE NOCASE LIMIT 1", (name,)
|
||||
).fetchone()
|
||||
return self._row_to_entity(row) if row else None
|
||||
|
||||
def get_by_uuid(self, ent_uuid: str) -> Optional[Entity]:
|
||||
row = self.conn.execute("SELECT * FROM entities WHERE uuid = ? LIMIT 1", (ent_uuid,)).fetchone()
|
||||
return self._row_to_entity(row) if row else None
|
||||
|
||||
def query(
|
||||
self,
|
||||
name: Optional[str] = None,
|
||||
entity_type: Optional[str] = None,
|
||||
limit: int = 10,
|
||||
) -> List[Entity]:
|
||||
sql = "SELECT * FROM entities WHERE 1=1"
|
||||
params: List = []
|
||||
if name:
|
||||
sql += " AND (name LIKE ? OR aliases LIKE ?)"
|
||||
params.extend([f"%{name}%", f"%{name}%"])
|
||||
if entity_type:
|
||||
sql += " AND entity_type = ?"
|
||||
params.append(entity_type)
|
||||
sql += " ORDER BY occurrence_count DESC, last_seen DESC LIMIT ?"
|
||||
params.append(limit)
|
||||
rows = self.conn.execute(sql, params).fetchall()
|
||||
return [self._row_to_entity(r) for r in rows]
|
||||
|
||||
def _row_to_entity(self, row: sqlite3.Row) -> Entity:
|
||||
return Entity(
|
||||
uuid=row["uuid"],
|
||||
name=row["name"],
|
||||
aliases=json.loads(row["aliases"] or "[]"),
|
||||
entity_type=row["entity_type"],
|
||||
description=row["description"],
|
||||
first_seen=row["first_seen"],
|
||||
last_seen=row["last_seen"],
|
||||
occurrence_count=row["occurrence_count"],
|
||||
metadata=json.loads(row["metadata"] or "{}"),
|
||||
)
|
||||
@@ -0,0 +1,166 @@
|
||||
"""FactStore — CRUD für strukturierte Fakten in Tier 2."""
|
||||
|
||||
import hashlib
|
||||
import sqlite3
|
||||
import time
|
||||
import uuid as uuid_mod
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class Fact:
|
||||
uuid: str
|
||||
content: str
|
||||
content_hash: str
|
||||
category: Optional[str]
|
||||
confidence: float
|
||||
source_type: str
|
||||
source_id: Optional[str]
|
||||
created_at: float
|
||||
updated_at: float
|
||||
expires_at: Optional[float]
|
||||
access_count: int
|
||||
last_accessed: Optional[float]
|
||||
is_archived: bool
|
||||
|
||||
|
||||
class FactStore:
|
||||
def __init__(self, conn: sqlite3.Connection):
|
||||
self.conn = conn
|
||||
|
||||
@staticmethod
|
||||
def _hash(content: str) -> str:
|
||||
return hashlib.sha256(content.encode("utf-8")).hexdigest()
|
||||
|
||||
def store(
|
||||
self,
|
||||
content: str,
|
||||
category: str = "general",
|
||||
confidence: float = 1.0,
|
||||
source_type: str = "user",
|
||||
source_id: Optional[str] = None,
|
||||
expires_at: Optional[float] = None,
|
||||
) -> Fact:
|
||||
content_hash = self._hash(content)
|
||||
existing = self.get_by_hash(content_hash)
|
||||
if existing and not existing.is_archived:
|
||||
# Deduplizierung: Aktualisiere confidence und updated_at
|
||||
self.conn.execute(
|
||||
"UPDATE facts SET confidence = MAX(confidence, ?), updated_at = ?, access_count = access_count + 1 WHERE uuid = ?",
|
||||
(confidence, time.time(), existing.uuid),
|
||||
)
|
||||
self.conn.commit()
|
||||
return self.get_by_uuid(existing.uuid)
|
||||
|
||||
fact_uuid = str(uuid_mod.uuid4())
|
||||
now = time.time()
|
||||
self.conn.execute(
|
||||
"""
|
||||
INSERT INTO facts (uuid, content, content_hash, category, confidence, source_type, source_id, created_at, updated_at, expires_at, access_count, last_accessed, is_archived)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(fact_uuid, content, content_hash, category, confidence, source_type, source_id, now, now, expires_at, 0, None, 0),
|
||||
)
|
||||
self.conn.commit()
|
||||
return self.get_by_uuid(fact_uuid)
|
||||
|
||||
def get_by_hash(self, content_hash: str) -> Optional[Fact]:
|
||||
row = self.conn.execute("SELECT * FROM facts WHERE content_hash = ? LIMIT 1", (content_hash,)).fetchone()
|
||||
return self._row_to_fact(row) if row else None
|
||||
|
||||
def get_by_uuid(self, fact_uuid: str) -> Optional[Fact]:
|
||||
row = self.conn.execute("SELECT * FROM facts WHERE uuid = ? LIMIT 1", (fact_uuid,)).fetchone()
|
||||
return self._row_to_fact(row) if row else None
|
||||
|
||||
def query(
|
||||
self,
|
||||
query: str = "",
|
||||
category: Optional[str] = None,
|
||||
limit: int = 10,
|
||||
min_confidence: float = 0.5,
|
||||
fts: bool = True,
|
||||
) -> List[Fact]:
|
||||
if fts and query:
|
||||
# FTS5 + Filter
|
||||
sql = """
|
||||
SELECT f.* FROM facts f
|
||||
JOIN facts_fts fts ON f.id = fts.rowid
|
||||
WHERE fts.facts_fts MATCH ? AND f.confidence >= ? AND f.is_archived = 0
|
||||
"""
|
||||
params = [query, min_confidence]
|
||||
if category:
|
||||
sql += " AND f.category = ?"
|
||||
params.append(category)
|
||||
sql += " ORDER BY f.confidence DESC, f.created_at DESC LIMIT ?"
|
||||
params.append(limit)
|
||||
else:
|
||||
sql = "SELECT * FROM facts WHERE confidence >= ? AND is_archived = 0"
|
||||
params = [min_confidence]
|
||||
if category:
|
||||
sql += " AND category = ?"
|
||||
params.append(category)
|
||||
if query:
|
||||
sql += " AND content LIKE ?"
|
||||
params.append(f"%{query}%")
|
||||
sql += " ORDER BY confidence DESC, created_at DESC LIMIT ?"
|
||||
params.append(limit)
|
||||
|
||||
rows = self.conn.execute(sql, params).fetchall()
|
||||
return [self._row_to_fact(r) for r in rows]
|
||||
|
||||
def update(self, fact_uuid: str, **fields) -> Optional[Fact]:
|
||||
allowed = {"content", "category", "confidence", "expires_at", "is_archived"}
|
||||
updates = {k: v for k, v in fields.items() if k in allowed}
|
||||
if not updates:
|
||||
return self.get_by_uuid(fact_uuid)
|
||||
if "content" in updates:
|
||||
updates["content_hash"] = self._hash(updates["content"])
|
||||
updates["updated_at"] = time.time()
|
||||
cols = ", ".join(f"{k} = ?" for k in updates)
|
||||
vals = list(updates.values()) + [fact_uuid]
|
||||
self.conn.execute(f"UPDATE facts SET {cols} WHERE uuid = ?", vals)
|
||||
self.conn.commit()
|
||||
return self.get_by_uuid(fact_uuid)
|
||||
|
||||
def delete(self, fact_uuid: str, soft: bool = True) -> bool:
|
||||
if soft:
|
||||
self.conn.execute("UPDATE facts SET is_archived = 1, updated_at = ? WHERE uuid = ?", (time.time(), fact_uuid))
|
||||
else:
|
||||
self.conn.execute("DELETE FROM facts WHERE uuid = ?", (fact_uuid,))
|
||||
self.conn.commit()
|
||||
return True
|
||||
|
||||
def deduplicate(self) -> int:
|
||||
"""Merge Fakten mit identischem content_hash. Returns merged count."""
|
||||
rows = self.conn.execute(
|
||||
"SELECT content_hash, COUNT(*) as c FROM facts WHERE is_archived = 0 GROUP BY content_hash HAVING c > 1"
|
||||
).fetchall()
|
||||
merged = 0
|
||||
for row in rows:
|
||||
hashes = self.conn.execute(
|
||||
"SELECT uuid FROM facts WHERE content_hash = ? AND is_archived = 0 ORDER BY created_at",
|
||||
(row["content_hash"],),
|
||||
).fetchall()
|
||||
keep = hashes[0]["uuid"]
|
||||
for dup in hashes[1:]:
|
||||
self.delete(dup["uuid"], soft=False)
|
||||
merged += 1
|
||||
return merged
|
||||
|
||||
def _row_to_fact(self, row: sqlite3.Row) -> Fact:
|
||||
return Fact(
|
||||
uuid=row["uuid"],
|
||||
content=row["content"],
|
||||
content_hash=row["content_hash"],
|
||||
category=row["category"],
|
||||
confidence=row["confidence"],
|
||||
source_type=row["source_type"],
|
||||
source_id=row["source_id"],
|
||||
created_at=row["created_at"],
|
||||
updated_at=row["updated_at"],
|
||||
expires_at=row["expires_at"],
|
||||
access_count=row["access_count"],
|
||||
last_accessed=row["last_accessed"],
|
||||
is_archived=bool(row["is_archived"]),
|
||||
)
|
||||
@@ -0,0 +1,96 @@
|
||||
"""RelationStore — Relationen-Management für Tier 2."""
|
||||
|
||||
import sqlite3
|
||||
import time
|
||||
import uuid as uuid_mod
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class Relation:
|
||||
uuid: str
|
||||
from_entity_id: str
|
||||
to_entity_id: str
|
||||
relation_type: str
|
||||
strength: float
|
||||
evidence_fact_id: Optional[str]
|
||||
created_at: float
|
||||
updated_at: float
|
||||
|
||||
|
||||
class RelationStore:
|
||||
def __init__(self, conn: sqlite3.Connection):
|
||||
self.conn = conn
|
||||
|
||||
def link(
|
||||
self,
|
||||
from_entity_id: str,
|
||||
to_entity_id: str,
|
||||
relation_type: str,
|
||||
strength: float = 1.0,
|
||||
evidence_fact_id: Optional[str] = None,
|
||||
) -> Relation:
|
||||
existing = self.conn.execute(
|
||||
"SELECT uuid FROM relations WHERE from_entity_id = ? AND to_entity_id = ? AND relation_type = ?",
|
||||
(from_entity_id, to_entity_id, relation_type),
|
||||
).fetchone()
|
||||
|
||||
now = time.time()
|
||||
if existing:
|
||||
self.conn.execute(
|
||||
"UPDATE relations SET strength = MAX(strength, ?), updated_at = ? WHERE uuid = ?",
|
||||
(strength, now, existing["uuid"]),
|
||||
)
|
||||
self.conn.commit()
|
||||
return self.get_by_uuid(existing["uuid"])
|
||||
|
||||
rel_uuid = str(uuid_mod.uuid4())
|
||||
self.conn.execute(
|
||||
"""
|
||||
INSERT INTO relations (uuid, from_entity_id, to_entity_id, relation_type, strength, evidence_fact_id, created_at, updated_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(rel_uuid, from_entity_id, to_entity_id, relation_type, strength, evidence_fact_id, now, now),
|
||||
)
|
||||
self.conn.commit()
|
||||
return self.get_by_uuid(rel_uuid)
|
||||
|
||||
def get_by_uuid(self, rel_uuid: str) -> Optional[Relation]:
|
||||
row = self.conn.execute("SELECT * FROM relations WHERE uuid = ? LIMIT 1", (rel_uuid,)).fetchone()
|
||||
return self._row_to_relation(row) if row else None
|
||||
|
||||
def query(
|
||||
self,
|
||||
from_entity_id: Optional[str] = None,
|
||||
to_entity_id: Optional[str] = None,
|
||||
relation_type: Optional[str] = None,
|
||||
limit: int = 50,
|
||||
) -> List[Relation]:
|
||||
sql = "SELECT * FROM relations WHERE 1=1"
|
||||
params: List = []
|
||||
if from_entity_id:
|
||||
sql += " AND from_entity_id = ?"
|
||||
params.append(from_entity_id)
|
||||
if to_entity_id:
|
||||
sql += " AND to_entity_id = ?"
|
||||
params.append(to_entity_id)
|
||||
if relation_type:
|
||||
sql += " AND relation_type = ?"
|
||||
params.append(relation_type)
|
||||
sql += " ORDER BY strength DESC, updated_at DESC LIMIT ?"
|
||||
params.append(limit)
|
||||
rows = self.conn.execute(sql, params).fetchall()
|
||||
return [self._row_to_relation(r) for r in rows]
|
||||
|
||||
def _row_to_relation(self, row: sqlite3.Row) -> Relation:
|
||||
return Relation(
|
||||
uuid=row["uuid"],
|
||||
from_entity_id=row["from_entity_id"],
|
||||
to_entity_id=row["to_entity_id"],
|
||||
relation_type=row["relation_type"],
|
||||
strength=row["strength"],
|
||||
evidence_fact_id=row["evidence_fact_id"],
|
||||
created_at=row["created_at"],
|
||||
updated_at=row["updated_at"],
|
||||
)
|
||||
@@ -0,0 +1,196 @@
|
||||
"""SQLite Schema & Migrationen für Tier 2.
|
||||
|
||||
Enthält alle CREATE TABLE / INDEX / TRIGGER Statements
|
||||
mit Schema-Versionierung.
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
SCHEMA_VERSION = 1
|
||||
|
||||
SCHEMA_SQL = """
|
||||
-- Schema-Version
|
||||
CREATE TABLE IF NOT EXISTS memory_schema_version (
|
||||
version INTEGER PRIMARY KEY,
|
||||
applied_at REAL NOT NULL
|
||||
);
|
||||
|
||||
-- Fakten
|
||||
CREATE TABLE IF NOT EXISTS facts (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
uuid TEXT NOT NULL UNIQUE,
|
||||
content TEXT NOT NULL,
|
||||
content_hash TEXT NOT NULL,
|
||||
category TEXT,
|
||||
confidence REAL DEFAULT 1.0,
|
||||
source_type TEXT NOT NULL,
|
||||
source_id TEXT,
|
||||
created_at REAL NOT NULL,
|
||||
updated_at REAL NOT NULL,
|
||||
expires_at REAL,
|
||||
access_count INTEGER DEFAULT 0,
|
||||
last_accessed REAL,
|
||||
is_archived INTEGER DEFAULT 0
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_facts_category ON facts(category);
|
||||
CREATE INDEX IF NOT EXISTS idx_facts_source ON facts(source_type, source_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_facts_created ON facts(created_at DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_facts_hash ON facts(content_hash);
|
||||
CREATE INDEX IF NOT EXISTS idx_facts_confidence ON facts(confidence DESC);
|
||||
|
||||
-- Entitäten
|
||||
CREATE TABLE IF NOT EXISTS entities (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
uuid TEXT NOT NULL UNIQUE,
|
||||
name TEXT NOT NULL,
|
||||
aliases TEXT,
|
||||
entity_type TEXT NOT NULL,
|
||||
description TEXT,
|
||||
first_seen REAL NOT NULL,
|
||||
last_seen REAL NOT NULL,
|
||||
occurrence_count INTEGER DEFAULT 1,
|
||||
metadata TEXT
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_entities_name ON entities(name);
|
||||
CREATE INDEX IF NOT EXISTS idx_entities_type ON entities(entity_type);
|
||||
|
||||
-- Relationen
|
||||
CREATE TABLE IF NOT EXISTS relations (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
uuid TEXT NOT NULL UNIQUE,
|
||||
from_entity_id TEXT NOT NULL,
|
||||
to_entity_id TEXT NOT NULL,
|
||||
relation_type TEXT NOT NULL,
|
||||
strength REAL DEFAULT 1.0,
|
||||
evidence_fact_id TEXT,
|
||||
created_at REAL NOT NULL,
|
||||
updated_at REAL NOT NULL
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_relations_from ON relations(from_entity_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_relations_to ON relations(to_entity_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_relations_type ON relations(relation_type);
|
||||
|
||||
-- Timeline
|
||||
CREATE TABLE IF NOT EXISTS timeline (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
uuid TEXT NOT NULL UNIQUE,
|
||||
event_type TEXT NOT NULL,
|
||||
title TEXT NOT NULL,
|
||||
description TEXT,
|
||||
related_entities TEXT,
|
||||
related_facts TEXT,
|
||||
session_id TEXT,
|
||||
timestamp REAL NOT NULL,
|
||||
importance REAL DEFAULT 0.5
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_timeline_time ON timeline(timestamp DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_timeline_type ON timeline(event_type);
|
||||
|
||||
-- Audit-Log
|
||||
CREATE TABLE IF NOT EXISTS memory_audit_log (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
timestamp REAL NOT NULL,
|
||||
action TEXT NOT NULL,
|
||||
tier TEXT NOT NULL,
|
||||
actor TEXT NOT NULL,
|
||||
target_uuid TEXT,
|
||||
diff TEXT,
|
||||
success INTEGER DEFAULT 1
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_audit_timestamp ON memory_audit_log(timestamp DESC);
|
||||
CREATE INDEX IF NOT EXISTS idx_audit_actor ON memory_audit_log(actor);
|
||||
|
||||
-- Embedding-Queue
|
||||
CREATE TABLE IF NOT EXISTS embedding_queue (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
fact_id TEXT,
|
||||
content TEXT NOT NULL,
|
||||
source_type TEXT NOT NULL,
|
||||
session_id TEXT,
|
||||
message_id INTEGER,
|
||||
queued_at REAL NOT NULL,
|
||||
processed INTEGER DEFAULT 0
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_queue_processed ON embedding_queue(processed, queued_at);
|
||||
"""
|
||||
|
||||
FTS_SQL = """
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS facts_fts USING fts5(
|
||||
content,
|
||||
content_rowid='id',
|
||||
tokenize='unicode61'
|
||||
);
|
||||
|
||||
CREATE TRIGGER IF NOT EXISTS facts_fts_insert AFTER INSERT ON facts BEGIN
|
||||
INSERT INTO facts_fts(rowid, content) VALUES (new.id, new.content);
|
||||
END;
|
||||
|
||||
CREATE TRIGGER IF NOT EXISTS facts_fts_delete AFTER DELETE ON facts BEGIN
|
||||
DELETE FROM facts_fts WHERE rowid = old.id;
|
||||
END;
|
||||
|
||||
CREATE TRIGGER IF NOT EXISTS facts_fts_update AFTER UPDATE ON facts BEGIN
|
||||
DELETE FROM facts_fts WHERE rowid = old.id;
|
||||
INSERT INTO facts_fts(rowid, content) VALUES (new.id, new.content);
|
||||
END;
|
||||
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS entities_fts USING fts5(
|
||||
name,
|
||||
content_rowid='id',
|
||||
tokenize='unicode61'
|
||||
);
|
||||
|
||||
CREATE TRIGGER IF NOT EXISTS entities_fts_insert AFTER INSERT ON entities BEGIN
|
||||
INSERT INTO entities_fts(rowid, name) VALUES (new.id, new.name);
|
||||
END;
|
||||
|
||||
CREATE TRIGGER IF NOT EXISTS entities_fts_delete AFTER DELETE ON entities BEGIN
|
||||
DELETE FROM entities_fts WHERE rowid = old.id;
|
||||
END;
|
||||
|
||||
CREATE TRIGGER IF NOT EXISTS entities_fts_update AFTER UPDATE ON entities BEGIN
|
||||
DELETE FROM entities_fts WHERE rowid = old.id;
|
||||
INSERT INTO entities_fts(rowid, name) VALUES (new.id, new.name);
|
||||
END;
|
||||
"""
|
||||
|
||||
|
||||
def init_schema(conn: sqlite3.Connection) -> None:
|
||||
"""Initialisiert alle Tabellen, Indizes und FTS5."""
|
||||
conn.executescript(SCHEMA_SQL)
|
||||
conn.executescript(FTS_SQL)
|
||||
conn.execute(
|
||||
"INSERT OR REPLACE INTO memory_schema_version (version, applied_at) VALUES (?, ?)",
|
||||
(SCHEMA_VERSION, time.time()),
|
||||
)
|
||||
conn.commit()
|
||||
|
||||
|
||||
def migrate(conn: sqlite3.Connection) -> None:
|
||||
"""Führt Migrationen durch (aktuell: nur Schema-Version prüfen)."""
|
||||
row = conn.execute("SELECT version FROM memory_schema_version ORDER BY version DESC LIMIT 1").fetchone()
|
||||
current = row[0] if row else 0
|
||||
if current < SCHEMA_VERSION:
|
||||
init_schema(conn)
|
||||
|
||||
|
||||
def connect(db_path: Path, wal_mode: bool = True) -> sqlite3.Connection:
|
||||
"""Erstellt Verbindung mit WAL-Mode und Foreign Keys."""
|
||||
db_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
conn = sqlite3.connect(str(db_path), check_same_thread=False, timeout=10.0)
|
||||
conn.row_factory = sqlite3.Row
|
||||
conn.execute("PRAGMA foreign_keys=ON")
|
||||
if wal_mode:
|
||||
try:
|
||||
conn.execute("PRAGMA journal_mode=WAL")
|
||||
except sqlite3.OperationalError:
|
||||
conn.execute("PRAGMA journal_mode=DELETE")
|
||||
return conn
|
||||
@@ -0,0 +1,98 @@
|
||||
"""TimelineStore — Zeitachsen-Management für Tier 2."""
|
||||
|
||||
import json
|
||||
import sqlite3
|
||||
import time
|
||||
import uuid as uuid_mod
|
||||
from dataclasses import dataclass
|
||||
from typing import List, Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class TimelineEvent:
|
||||
uuid: str
|
||||
event_type: str
|
||||
title: str
|
||||
description: Optional[str]
|
||||
related_entities: List[str]
|
||||
related_facts: List[str]
|
||||
session_id: Optional[str]
|
||||
timestamp: float
|
||||
importance: float
|
||||
|
||||
|
||||
class TimelineStore:
|
||||
def __init__(self, conn: sqlite3.Connection):
|
||||
self.conn = conn
|
||||
|
||||
def add(
|
||||
self,
|
||||
event_type: str,
|
||||
title: str,
|
||||
description: Optional[str] = None,
|
||||
importance: float = 0.5,
|
||||
related_entities: Optional[List[str]] = None,
|
||||
related_facts: Optional[List[str]] = None,
|
||||
session_id: Optional[str] = None,
|
||||
) -> TimelineEvent:
|
||||
ev_uuid = str(uuid_mod.uuid4())
|
||||
now = time.time()
|
||||
self.conn.execute(
|
||||
"""
|
||||
INSERT INTO timeline (uuid, event_type, title, description, related_entities, related_facts, session_id, timestamp, importance)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""",
|
||||
(
|
||||
ev_uuid,
|
||||
event_type,
|
||||
title,
|
||||
description,
|
||||
json.dumps(related_entities or []),
|
||||
json.dumps(related_facts or []),
|
||||
session_id,
|
||||
now,
|
||||
importance,
|
||||
),
|
||||
)
|
||||
self.conn.commit()
|
||||
return self.get_by_uuid(ev_uuid)
|
||||
|
||||
def get_by_uuid(self, ev_uuid: str) -> Optional[TimelineEvent]:
|
||||
row = self.conn.execute("SELECT * FROM timeline WHERE uuid = ? LIMIT 1", (ev_uuid,)).fetchone()
|
||||
return self._row_to_event(row) if row else None
|
||||
|
||||
def query(
|
||||
self,
|
||||
start: Optional[float] = None,
|
||||
end: Optional[float] = None,
|
||||
event_type: Optional[str] = None,
|
||||
limit: int = 20,
|
||||
) -> List[TimelineEvent]:
|
||||
sql = "SELECT * FROM timeline WHERE 1=1"
|
||||
params: List = []
|
||||
if start:
|
||||
sql += " AND timestamp >= ?"
|
||||
params.append(start)
|
||||
if end:
|
||||
sql += " AND timestamp <= ?"
|
||||
params.append(end)
|
||||
if event_type:
|
||||
sql += " AND event_type = ?"
|
||||
params.append(event_type)
|
||||
sql += " ORDER BY timestamp DESC LIMIT ?"
|
||||
params.append(limit)
|
||||
rows = self.conn.execute(sql, params).fetchall()
|
||||
return [self._row_to_event(r) for r in rows]
|
||||
|
||||
def _row_to_event(self, row: sqlite3.Row) -> TimelineEvent:
|
||||
return TimelineEvent(
|
||||
uuid=row["uuid"],
|
||||
event_type=row["event_type"],
|
||||
title=row["title"],
|
||||
description=row["description"],
|
||||
related_entities=json.loads(row["related_entities"] or "[]"),
|
||||
related_facts=json.loads(row["related_facts"] or "[]"),
|
||||
session_id=row["session_id"],
|
||||
timestamp=row["timestamp"],
|
||||
importance=row["importance"],
|
||||
)
|
||||
@@ -0,0 +1,7 @@
|
||||
"""Tier 3 — Semantic Memory (Qdrant / Chroma)."""
|
||||
|
||||
from hermes_memory.tier3.backend import VectorBackend, SearchResult
|
||||
from hermes_memory.tier3.chroma_backend import ChromaBackend
|
||||
from hermes_memory.tier3.embedder import LocalEmbedder
|
||||
|
||||
__all__ = ["VectorBackend", "SearchResult", "ChromaBackend", "LocalEmbedder"]
|
||||
@@ -0,0 +1,35 @@
|
||||
"""Abstrakte Backend-Schnittstelle für Vektor-DBs."""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict, List
|
||||
|
||||
|
||||
@dataclass
|
||||
class SearchResult:
|
||||
chunk_id: str
|
||||
score: float
|
||||
text: str
|
||||
metadata: Dict
|
||||
|
||||
|
||||
class VectorBackend(ABC):
|
||||
@abstractmethod
|
||||
def index(self, chunks: List[str], payloads: List[Dict]) -> List[str]:
|
||||
"""Indexiert Chunks, gibt chunk_ids zurück."""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def search(self, query_embedding: List[float], limit: int = 10, filters: Dict = None) -> List[SearchResult]:
|
||||
"""Semantische Suche mit Query-Embedding."""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def delete(self, chunk_ids: List[str]) -> bool:
|
||||
"""Löscht Chunks anhand ihrer IDs."""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def health(self) -> Dict:
|
||||
"""Gibt Status-Informationen zurück."""
|
||||
...
|
||||
@@ -0,0 +1,69 @@
|
||||
"""Chroma-Implementierung des VectorBackend."""
|
||||
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
from hermes_memory.tier3.backend import SearchResult, VectorBackend
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ChromaBackend(VectorBackend):
|
||||
def __init__(self, persist_path: Path, collection_name: str = "memory_chunks"):
|
||||
self.persist_path = persist_path
|
||||
self.collection_name = collection_name
|
||||
self._client = None
|
||||
self._collection = None
|
||||
self._init()
|
||||
|
||||
def _init(self) -> None:
|
||||
try:
|
||||
import chromadb
|
||||
self._client = chromadb.PersistentClient(path=str(self.persist_path))
|
||||
self._collection = self._client.get_or_create_collection(
|
||||
name=self.collection_name,
|
||||
metadata={"hnsw:space": "cosine"},
|
||||
)
|
||||
except ImportError:
|
||||
logger.error("chromadb nicht installiert. Installiere: pip install chromadb")
|
||||
raise
|
||||
|
||||
def index(self, chunks: List[str], payloads: List[Dict]) -> List[str]:
|
||||
if not chunks:
|
||||
return []
|
||||
chunk_ids = [p.get("chunk_id", f"chunk_{i}") for i, p in enumerate(payloads)]
|
||||
self._collection.add(
|
||||
ids=chunk_ids,
|
||||
documents=chunks,
|
||||
metadatas=payloads,
|
||||
)
|
||||
return chunk_ids
|
||||
|
||||
def search(self, query_embedding: List[float], limit: int = 10, filters: Dict = None) -> List[SearchResult]:
|
||||
results = self._collection.query(
|
||||
query_embeddings=[query_embedding],
|
||||
n_results=limit,
|
||||
where=filters,
|
||||
)
|
||||
out: List[SearchResult] = []
|
||||
if not results["ids"]:
|
||||
return out
|
||||
for i, cid in enumerate(results["ids"][0]):
|
||||
out.append(
|
||||
SearchResult(
|
||||
chunk_id=cid,
|
||||
score=results["distances"][0][i],
|
||||
text=results["documents"][0][i] or "",
|
||||
metadata=results["metadatas"][0][i] or {},
|
||||
)
|
||||
)
|
||||
return out
|
||||
|
||||
def delete(self, chunk_ids: List[str]) -> bool:
|
||||
self._collection.delete(ids=chunk_ids)
|
||||
return True
|
||||
|
||||
def health(self) -> Dict:
|
||||
count = self._collection.count()
|
||||
return {"backend": "chroma", "collection": self.collection_name, "count": count}
|
||||
@@ -0,0 +1,44 @@
|
||||
"""Embedding-Wrapper für lokale Modelle."""
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import List
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class LocalEmbedder:
|
||||
"""Wrapper um sentence-transformers mit Caching."""
|
||||
|
||||
def __init__(self, model_name: str = "all-MiniLM-L6-v2", cache_dir: Path = None):
|
||||
self.model_name = model_name
|
||||
self.cache_dir = cache_dir or Path.home() / ".cache" / "hermes_memory" / "embeddings"
|
||||
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
||||
self._model = None
|
||||
self._dim = 384
|
||||
|
||||
def _load_model(self):
|
||||
if self._model is None:
|
||||
try:
|
||||
from sentence_transformers import SentenceTransformer
|
||||
self._model = SentenceTransformer(self.model_name)
|
||||
self._dim = self._model.get_sentence_embedding_dimension()
|
||||
except ImportError:
|
||||
logger.error("sentence-transformers nicht installiert.")
|
||||
raise
|
||||
return self._model
|
||||
|
||||
def _cache_key(self, text: str) -> str:
|
||||
return hashlib.sha256(text.encode("utf-8")).hexdigest() + ".npy"
|
||||
|
||||
def embed(self, texts: List[str]) -> List[List[float]]:
|
||||
model = self._load_model()
|
||||
return model.encode(texts, convert_to_numpy=True).tolist()
|
||||
|
||||
def embed_query(self, text: str) -> List[float]:
|
||||
return self.embed([text])[0]
|
||||
|
||||
@property
|
||||
def dim(self) -> int:
|
||||
return self._dim
|
||||
@@ -0,0 +1 @@
|
||||
"""Utilities für Hermes Memory Next Level."""
|
||||
Reference in New Issue
Block a user