Add Qdrant and ChromaDB support to the project
- Added Qdrant service to both docker-compose files for production and development. - Updated environment variables in .env.example and settings.py to include Qdrant configuration. - Included necessary dependencies for Qdrant and ChromaDB in requirements.txt. - Updated .gitignore to exclude ChromaDB data files.
This commit is contained in:
@@ -0,0 +1,71 @@
|
||||
"""
|
||||
سرویس تعبیهسازی متن با Avalai API (OpenAI-compatible)
|
||||
"""
|
||||
import os
|
||||
from typing import overload
|
||||
|
||||
from openai import OpenAI
|
||||
|
||||
from .config import load_rag_config, RAGConfig
|
||||
|
||||
|
||||
def _get_avalai_client(config: RAGConfig | None) -> OpenAI:
|
||||
"""ساخت کلاینت OpenAI برای Avalai API."""
|
||||
cfg = config or load_rag_config()
|
||||
emb = cfg.embedding
|
||||
env_var = emb.api_key_env or "AVALAI_API_KEY"
|
||||
api_key = os.environ.get(env_var)
|
||||
base_url = emb.base_url or os.environ.get(
|
||||
"AVALAI_BASE_URL", "https://api.avalai.ir/v1"
|
||||
)
|
||||
return OpenAI(api_key=api_key, base_url=base_url)
|
||||
|
||||
|
||||
def embed_texts(
|
||||
texts: list[str],
|
||||
config: RAGConfig | None = None,
|
||||
model: str | None = None,
|
||||
dimensions: int | None = None,
|
||||
) -> list[list[float]]:
|
||||
"""
|
||||
تعبیهسازی لیست متنها با Avalai.
|
||||
|
||||
Args:
|
||||
texts: لیست رشتههای ورودی
|
||||
config: تنظیمات RAG (پیشفرض: load_rag_config)
|
||||
model: نام مدل (override از config)
|
||||
dimensions: تعداد ابعاد (فقط برای مدلهای پشتیبانیکننده)
|
||||
|
||||
Returns:
|
||||
لیست وکتورها
|
||||
"""
|
||||
if not texts:
|
||||
return []
|
||||
|
||||
cfg = config or load_rag_config()
|
||||
client = _get_avalai_client(cfg)
|
||||
model_name = model or cfg.embedding.model
|
||||
batch_size = cfg.embedding.batch_size
|
||||
|
||||
all_embeddings: list[list[float]] = []
|
||||
extra = {}
|
||||
if dimensions is not None:
|
||||
extra["dimensions"] = dimensions
|
||||
|
||||
for i in range(0, len(texts), batch_size):
|
||||
batch = texts[i : i + batch_size]
|
||||
resp = client.embeddings.create(
|
||||
model=model_name,
|
||||
input=batch,
|
||||
**extra,
|
||||
)
|
||||
for item in sorted(resp.data, key=lambda x: x.index):
|
||||
all_embeddings.append(item.embedding)
|
||||
|
||||
return all_embeddings
|
||||
|
||||
|
||||
def embed_single(text: str, config: RAGConfig | None = None, **kwargs) -> list[float]:
|
||||
"""تعبیهسازی یک متن. خروجی مستقیماً یک وکتور است."""
|
||||
vecs = embed_texts([text], config=config, **kwargs)
|
||||
return vecs[0] if vecs else []
|
||||
Reference in New Issue
Block a user