Hermes-Memory-Next-Level/hermes_memory/tier3/chroma_backend.py

"""Chroma-Implementierung des VectorBackend."""

import logging
from pathlib import Path
from typing import Dict, List, Optional

from hermes_memory.tier3.backend import SearchResult, VectorBackend

logger = logging.getLogger(__name__)


class ChromaBackend(VectorBackend):
    def __init__(self, persist_path: Path, collection_name: str = "memory_chunks"):
        self.persist_path = persist_path
        self.collection_name = collection_name
        self._client = None
        self._collection = None
        self._init()

    def _init(self) -> None:
        try:
            import chromadb
            self._client = chromadb.PersistentClient(path=str(self.persist_path))
            self._collection = self._client.get_or_create_collection(
                name=self.collection_name,
                metadata={"hnsw:space": "cosine"},
            )
        except ImportError:
            logger.error("chromadb nicht installiert. Installiere: pip install chromadb")
            raise

    def index(self, chunks: List[str], payloads: List[Dict]) -> List[str]:
        if not chunks:
            return []
        chunk_ids = [p.get("chunk_id", f"chunk_{i}") for i, p in enumerate(payloads)]
        self._collection.add(
            ids=chunk_ids,
            documents=chunks,
            metadatas=payloads,
        )
        return chunk_ids

    def search(self, query_embedding: List[float], limit: int = 10, filters: Dict = None) -> List[SearchResult]:
        results = self._collection.query(
            query_embeddings=[query_embedding],
            n_results=limit,
            where=filters,
        )
        out: List[SearchResult] = []
        if not results["ids"]:
            return out
        for i, cid in enumerate(results["ids"][0]):
            out.append(
                SearchResult(
                    chunk_id=cid,
                    score=results["distances"][0][i],
                    text=results["documents"][0][i] or "",
                    metadata=results["metadatas"][0][i] or {},
                )
            )
        return out

    def delete(self, chunk_ids: List[str]) -> bool:
        self._collection.delete(ids=chunk_ids)
        return True

    def health(self) -> Dict:
        count = self._collection.count()
        return {"backend": "chroma", "collection": self.collection_name, "count": count}