Add LLM configuration and update URL routing

- Introduced LLM configuration in rag_config.yaml and corresponding LLMConfig class in config.py. - Updated load_rag_config function to parse LLM settings from the configuration file. - Added new API route for RAG in urls.py to facilitate access to the chat model. - Modified QdrantVectorStore to use query_points method for improved functionality.
2026-02-27 19:44:49 +03:30
parent 197f70ee12
commit 94355af62b
8 changed files with 187 additions and 6 deletions
@@ -0,0 +1,102 @@
+"""
+چت RAG با استریم — استفاده از دیتای embed شده کاربر و Avalai API
+"""
+import os
+from pathlib import Path
+
+from openai import OpenAI
+
+from .config import load_rag_config, RAGConfig
+from .retrieve import search_with_query
+
+
+def _get_chat_client(config: RAGConfig | None) -> OpenAI:
+    """ساخت کلاینت OpenAI برای Avalai Chat API."""
+    cfg = config or load_rag_config()
+    llm = cfg.llm
+    env_var = llm.api_key_env or "AVALAI_API_KEY"
+    api_key = os.environ.get(env_var)
+    base_url = llm.base_url or os.environ.get(
+        "AVALAI_BASE_URL", "https://api.avalai.ir/v1"
+    )
+    return OpenAI(api_key=api_key, base_url=base_url)
+
+
+def _load_tone(config: RAGConfig | None) -> str:
+    """بارگذاری فایل لحن."""
+    cfg = config or load_rag_config()
+    base = Path(__file__).resolve().parent.parent
+    tone_path = base / cfg.tone_file
+    if tone_path.exists():
+        return tone_path.read_text(encoding="utf-8").strip()
+    return ""
+
+
+def build_rag_context(query: str, config: RAGConfig | None = None, limit: int = 5) -> str:
+    """
+    بازیابی متن‌های مرتبط از RAG برای کوئری کاربر.
+    """
+    results = search_with_query(query, limit=limit, config=config)
+    if not results:
+        return ""
+    parts = []
+    for r in results:
+        text = r.get("text", "").strip()
+        if text:
+            parts.append(text)
+    return "\n\n---\n\n".join(parts)
+
+
+def chat_rag_stream(
+    query: str,
+    config: RAGConfig | None = None,
+    limit: int = 5,
+    system_override: str | None = None,
+):
+    """
+    چت RAG با استریم: دیتای embed شده را بازیابی می‌کند و با LLM جواب می‌دهد.
+
+    Args:
+        query: پیام کاربر
+        config: تنظیمات RAG
+        limit: تعداد چانک‌های بازیابی‌شده
+        system_override: جایگزین system prompt (اختیاری)
+
+    Yields:
+        تک‌تک deltaهای content به‌صورت رشته
+    """
+    cfg = config or load_rag_config()
+    client = _get_chat_client(cfg)
+    model = cfg.llm.model
+
+    context = build_rag_context(query, config=cfg, limit=limit)
+
+    if system_override is not None:
+        system_content = system_override
+    else:
+        tone = _load_tone(cfg)
+        system_parts = [tone] if tone else []
+        system_parts.append(
+            "با استفاده از بخش «متن‌های مرجع» زیر به سوال کاربر پاسخ بده. "
+            "فقط در حد نیاز از مرجع استفاده کن و پاسخ را به زبان کاربر بنویس."
+        )
+        if context:
+            system_parts.append("\n\nمتن‌های مرجع:\n" + context)
+        system_content = "\n".join(system_parts)
+
+    messages = [
+        {"role": "system", "content": system_content},
+        {"role": "user", "content": query},
+    ]
+
+    stream = client.chat.completions.create(
+        model=model,
+        messages=messages,
+        stream=True,
+    )
+
+    for chunk in stream:
+        delta = chunk.choices[0].delta if chunk.choices else None
+        content = delta.content if delta else ""
+        if content:
+            yield content