197f70ee12
- Added Qdrant service to both docker-compose files for production and development. - Updated environment variables in .env.example and settings.py to include Qdrant configuration. - Included necessary dependencies for Qdrant and ChromaDB in requirements.txt. - Updated .gitignore to exclude ChromaDB data files.
72 lines
2.1 KiB
Python
72 lines
2.1 KiB
Python
"""
|
|
سرویس تعبیهسازی متن با Avalai API (OpenAI-compatible)
|
|
"""
|
|
import os
|
|
from typing import overload
|
|
|
|
from openai import OpenAI
|
|
|
|
from .config import load_rag_config, RAGConfig
|
|
|
|
|
|
def _get_avalai_client(config: RAGConfig | None) -> OpenAI:
|
|
"""ساخت کلاینت OpenAI برای Avalai API."""
|
|
cfg = config or load_rag_config()
|
|
emb = cfg.embedding
|
|
env_var = emb.api_key_env or "AVALAI_API_KEY"
|
|
api_key = os.environ.get(env_var)
|
|
base_url = emb.base_url or os.environ.get(
|
|
"AVALAI_BASE_URL", "https://api.avalai.ir/v1"
|
|
)
|
|
return OpenAI(api_key=api_key, base_url=base_url)
|
|
|
|
|
|
def embed_texts(
|
|
texts: list[str],
|
|
config: RAGConfig | None = None,
|
|
model: str | None = None,
|
|
dimensions: int | None = None,
|
|
) -> list[list[float]]:
|
|
"""
|
|
تعبیهسازی لیست متنها با Avalai.
|
|
|
|
Args:
|
|
texts: لیست رشتههای ورودی
|
|
config: تنظیمات RAG (پیشفرض: load_rag_config)
|
|
model: نام مدل (override از config)
|
|
dimensions: تعداد ابعاد (فقط برای مدلهای پشتیبانیکننده)
|
|
|
|
Returns:
|
|
لیست وکتورها
|
|
"""
|
|
if not texts:
|
|
return []
|
|
|
|
cfg = config or load_rag_config()
|
|
client = _get_avalai_client(cfg)
|
|
model_name = model or cfg.embedding.model
|
|
batch_size = cfg.embedding.batch_size
|
|
|
|
all_embeddings: list[list[float]] = []
|
|
extra = {}
|
|
if dimensions is not None:
|
|
extra["dimensions"] = dimensions
|
|
|
|
for i in range(0, len(texts), batch_size):
|
|
batch = texts[i : i + batch_size]
|
|
resp = client.embeddings.create(
|
|
model=model_name,
|
|
input=batch,
|
|
**extra,
|
|
)
|
|
for item in sorted(resp.data, key=lambda x: x.index):
|
|
all_embeddings.append(item.embedding)
|
|
|
|
return all_embeddings
|
|
|
|
|
|
def embed_single(text: str, config: RAGConfig | None = None, **kwargs) -> list[float]:
|
|
"""تعبیهسازی یک متن. خروجی مستقیماً یک وکتور است."""
|
|
vecs = embed_texts([text], config=config, **kwargs)
|
|
return vecs[0] if vecs else []
|