Add Qdrant and ChromaDB support to the project
- Added Qdrant service to both docker-compose files for production and development. - Updated environment variables in .env.example and settings.py to include Qdrant configuration. - Included necessary dependencies for Qdrant and ChromaDB in requirements.txt. - Updated .gitignore to exclude ChromaDB data files.
This commit is contained in:
@@ -0,0 +1,93 @@
|
||||
"""
|
||||
بارگذاری تنظیمات RAG از rag_config.yaml
|
||||
"""
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import yaml
|
||||
|
||||
|
||||
@dataclass
|
||||
class EmbeddingConfig:
|
||||
provider: str
|
||||
model: str
|
||||
batch_size: int = 32
|
||||
api_key_env: str | None = None
|
||||
base_url: str | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class QdrantConfig:
|
||||
host: str = "localhost"
|
||||
port: int = 6333
|
||||
collection_name: str = "croplogic_kb"
|
||||
vector_size: int = 384
|
||||
|
||||
|
||||
@dataclass
|
||||
class ChunkingConfig:
|
||||
max_chunk_tokens: int = 500
|
||||
overlap_tokens: int = 50
|
||||
|
||||
|
||||
@dataclass
|
||||
class RAGConfig:
|
||||
embedding: EmbeddingConfig
|
||||
qdrant: QdrantConfig
|
||||
chunking: ChunkingConfig
|
||||
tone_file: str = "config/tone.txt"
|
||||
knowledge_base_path: str = "config/knowledge_base"
|
||||
user_info_path: str = "config/user_info"
|
||||
chromadb: dict[str, Any] = field(default_factory=dict)
|
||||
|
||||
|
||||
def load_rag_config(config_path: str | Path | None = None) -> RAGConfig:
|
||||
"""
|
||||
بارگذاری تنظیمات از YAML و env.
|
||||
QDRANT_HOST و QDRANT_PORT از متغیرهای محیطی override میشوند.
|
||||
"""
|
||||
if config_path is None:
|
||||
base = Path(__file__).resolve().parent.parent
|
||||
config_path = base / "config" / "rag_config.yaml"
|
||||
|
||||
path = Path(config_path)
|
||||
if not path.exists():
|
||||
raise FileNotFoundError(f"RAG config not found: {path}")
|
||||
|
||||
with open(path, encoding="utf-8") as f:
|
||||
data = yaml.safe_load(f) or {}
|
||||
|
||||
emb = data.get("embedding", {})
|
||||
embedding = EmbeddingConfig(
|
||||
provider=emb.get("provider", "sentence_transformers"),
|
||||
model=emb.get("model", "text-embedding-3-small"),
|
||||
batch_size=emb.get("batch_size", 32),
|
||||
api_key_env=emb.get("api_key_env"),
|
||||
base_url=emb.get("base_url"),
|
||||
)
|
||||
|
||||
qd = data.get("qdrant", {})
|
||||
qdrant = QdrantConfig(
|
||||
host=os.environ.get("QDRANT_HOST", qd.get("host", "localhost")),
|
||||
port=int(os.environ.get("QDRANT_PORT", qd.get("port", 6333))),
|
||||
collection_name=qd.get("collection_name", "croplogic_kb"),
|
||||
vector_size=qd.get("vector_size", 1536),
|
||||
)
|
||||
|
||||
ch = data.get("chunking", {})
|
||||
chunking = ChunkingConfig(
|
||||
max_chunk_tokens=ch.get("max_chunk_tokens", 500),
|
||||
overlap_tokens=ch.get("overlap_tokens", 50),
|
||||
)
|
||||
|
||||
return RAGConfig(
|
||||
embedding=embedding,
|
||||
qdrant=qdrant,
|
||||
chunking=chunking,
|
||||
tone_file=data.get("tone_file", "config/tone.txt"),
|
||||
knowledge_base_path=data.get("knowledge_base_path", "config/knowledge_base"),
|
||||
user_info_path=data.get("user_info_path", "config/user_info"),
|
||||
chromadb=data.get("chromadb", {}),
|
||||
)
|
||||
Reference in New Issue
Block a user