Add Qdrant and ChromaDB support to the project

- Added Qdrant service to both docker-compose files for production and development.
- Updated environment variables in .env.example and settings.py to include Qdrant configuration.
- Included necessary dependencies for Qdrant and ChromaDB in requirements.txt.
- Updated .gitignore to exclude ChromaDB data files.
This commit is contained in:
2026-02-27 19:37:02 +03:30
parent 9ec0807d3c
commit 197f70ee12
36 changed files with 1199 additions and 0 deletions
+90
View File
@@ -0,0 +1,90 @@
"""
منطق اصلی indexing: embed کردن chunks و ذخیره در ChromaDB.
"""
from pathlib import Path
from .chunks import build_all_chunks
from .rag_settings import RAGConfig
from .embeddings import get_embedder
COLLECTION_NAME = "croplogic_kb"
def build_index(config: RAGConfig) -> int:
"""
ساخت/بازسازی کامل index پایگاه دانش.
chunks را از soil_data، sensor_data و فایل لحن تولید، embed و در ChromaDB ذخیره می‌کند.
Returns:
تعداد documentهای اضافه شده.
"""
tone_path = Path(config.tone_file)
chunks = build_all_chunks(
tone_path=tone_path,
max_chunk_tokens=config.chunking.max_chunk_tokens,
overlap_tokens=config.chunking.overlap_tokens,
)
if not chunks:
return 0
texts = [t for t, _ in chunks]
metadatas = [m for _, m in chunks]
# تبدیل metadata به فرمت ChromaDB (فقط str, int, float)
def _serialize_meta(m: dict) -> dict:
out = {}
for k, v in m.items():
if v is None:
continue
if isinstance(v, (str, int, float, bool)):
out[k] = v
else:
out[k] = str(v)
return out
metadatas = [_serialize_meta(m) for m in metadatas]
embedder = get_embedder(config)
batch_size = config.embedding.batch_size
all_embeddings = []
for i in range(0, len(texts), batch_size):
batch = texts[i : i + batch_size]
embs = embedder.encode(batch, batch_size=batch_size)
all_embeddings.extend(embs)
# ChromaDB
persist_dir = Path(config.chromadb.persist_directory)
persist_dir.mkdir(parents=True, exist_ok=True)
import chromadb
from chromadb.config import Settings as ChromaSettings
client = chromadb.PersistentClient(
path=str(persist_dir),
settings=ChromaSettings(anonymized_telemetry=False),
)
collection_name = config.chromadb.collection_name or COLLECTION_NAME
try:
client.delete_collection(collection_name)
except Exception:
pass
collection = client.create_collection(
name=collection_name,
metadata={"hnsw:space": "cosine"},
)
ids = [f"doc_{i}" for i in range(len(texts))]
collection.add(
ids=ids,
embeddings=all_embeddings,
documents=texts,
metadatas=metadatas,
)
return len(texts)