b83546d833
Architecture (Agent 1):
- hermes_memory/tier2/{schema,facts,entities,relations,timeline}.py
- hermes_memory/tier3/{backend,chroma_backend,embedder}.py
- hermes_memory/graph/nx_store.py
- hermes_memory/api/memory_api.py (unified API)
- hermes_memory/cron/{consolidate,embed_queue,graph_refresh,prune}.py
- hermes_memory/config.py + pyproject.toml
Integration Plan (Agent 3):
- INTEGRATION_PLAN.md: Memory Provider Plugin strategy
- Hermes Core needs minimal changes
- sync_turn() + prefetch() hooks
- Skills integration via nextlevel_search/remember
Auto-Extraction (Agent 2):
- ARCHITECTURE.md: Full extraction pipeline docs
- Chunking, Pre-Filter, LLM Prompts, Classification
- Entity-Linking, Temporal Reasoning, Deduplication
All files: Python syntax checked, ECC standards applied.
70 lines
2.3 KiB
Python
70 lines
2.3 KiB
Python
"""Chroma-Implementierung des VectorBackend."""
|
|
|
|
import logging
|
|
from pathlib import Path
|
|
from typing import Dict, List, Optional
|
|
|
|
from hermes_memory.tier3.backend import SearchResult, VectorBackend
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class ChromaBackend(VectorBackend):
|
|
def __init__(self, persist_path: Path, collection_name: str = "memory_chunks"):
|
|
self.persist_path = persist_path
|
|
self.collection_name = collection_name
|
|
self._client = None
|
|
self._collection = None
|
|
self._init()
|
|
|
|
def _init(self) -> None:
|
|
try:
|
|
import chromadb
|
|
self._client = chromadb.PersistentClient(path=str(self.persist_path))
|
|
self._collection = self._client.get_or_create_collection(
|
|
name=self.collection_name,
|
|
metadata={"hnsw:space": "cosine"},
|
|
)
|
|
except ImportError:
|
|
logger.error("chromadb nicht installiert. Installiere: pip install chromadb")
|
|
raise
|
|
|
|
def index(self, chunks: List[str], payloads: List[Dict]) -> List[str]:
|
|
if not chunks:
|
|
return []
|
|
chunk_ids = [p.get("chunk_id", f"chunk_{i}") for i, p in enumerate(payloads)]
|
|
self._collection.add(
|
|
ids=chunk_ids,
|
|
documents=chunks,
|
|
metadatas=payloads,
|
|
)
|
|
return chunk_ids
|
|
|
|
def search(self, query_embedding: List[float], limit: int = 10, filters: Dict = None) -> List[SearchResult]:
|
|
results = self._collection.query(
|
|
query_embeddings=[query_embedding],
|
|
n_results=limit,
|
|
where=filters,
|
|
)
|
|
out: List[SearchResult] = []
|
|
if not results["ids"]:
|
|
return out
|
|
for i, cid in enumerate(results["ids"][0]):
|
|
out.append(
|
|
SearchResult(
|
|
chunk_id=cid,
|
|
score=results["distances"][0][i],
|
|
text=results["documents"][0][i] or "",
|
|
metadata=results["metadatas"][0][i] or {},
|
|
)
|
|
)
|
|
return out
|
|
|
|
def delete(self, chunk_ids: List[str]) -> bool:
|
|
self._collection.delete(ids=chunk_ids)
|
|
return True
|
|
|
|
def health(self) -> Dict:
|
|
count = self._collection.count()
|
|
return {"backend": "chroma", "collection": self.collection_name, "count": count}
|