UPDATE
This commit is contained in:
+99
-4
@@ -1,9 +1,12 @@
|
||||
"""
|
||||
چت RAG برای API چت عمومی — با ارسال کامل داده مزرعه و retrieval تکمیلی از KB.
|
||||
"""
|
||||
import base64
|
||||
import json
|
||||
import logging
|
||||
import mimetypes
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from .api_provider import get_chat_client
|
||||
from .chunker import chunk_text
|
||||
@@ -13,6 +16,95 @@ from .retrieve import search_with_texts
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _coerce_text_content(value: Any) -> str:
|
||||
if value is None:
|
||||
return ""
|
||||
if isinstance(value, str):
|
||||
return value
|
||||
if isinstance(value, list):
|
||||
parts: list[str] = []
|
||||
for item in value:
|
||||
if isinstance(item, dict) and item.get("type") == "text":
|
||||
text_value = item.get("text")
|
||||
if isinstance(text_value, str) and text_value.strip():
|
||||
parts.append(text_value.strip())
|
||||
elif isinstance(item, str) and item.strip():
|
||||
parts.append(item.strip())
|
||||
return "\n".join(parts)
|
||||
return str(value)
|
||||
|
||||
|
||||
def _normalize_image_inputs(images: list[Any] | None) -> list[dict[str, str]]:
|
||||
normalized: list[dict[str, str]] = []
|
||||
for item in images or []:
|
||||
if isinstance(item, str):
|
||||
value = item.strip()
|
||||
if value:
|
||||
normalized.append({"url": value})
|
||||
continue
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
url = item.get("url") or item.get("image_url") or item.get("data_url")
|
||||
if not isinstance(url, str) or not url.strip():
|
||||
continue
|
||||
entry = {"url": url.strip()}
|
||||
detail = item.get("detail")
|
||||
if isinstance(detail, str) and detail.strip():
|
||||
entry["detail"] = detail.strip()
|
||||
normalized.append(entry)
|
||||
return normalized
|
||||
|
||||
|
||||
def _build_content_parts(text: str, images: list[dict[str, str]] | None = None) -> str | list[dict[str, Any]]:
|
||||
normalized_text = (text or "").strip()
|
||||
normalized_images = _normalize_image_inputs(images)
|
||||
if not normalized_images:
|
||||
return normalized_text
|
||||
|
||||
parts: list[dict[str, Any]] = []
|
||||
if normalized_text:
|
||||
parts.append({"type": "text", "text": normalized_text})
|
||||
for image in normalized_images:
|
||||
image_payload: dict[str, Any] = {"url": image["url"]}
|
||||
if image.get("detail"):
|
||||
image_payload["detail"] = image["detail"]
|
||||
parts.append({"type": "image_url", "image_url": image_payload})
|
||||
return parts
|
||||
|
||||
|
||||
def _normalize_history_messages(history: list[dict[str, Any]] | None) -> list[dict[str, Any]]:
|
||||
normalized: list[dict[str, Any]] = []
|
||||
for item in history or []:
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
role = str(item.get("role") or "").strip().lower()
|
||||
if role not in {"user", "assistant"}:
|
||||
continue
|
||||
text = _coerce_text_content(
|
||||
item.get("content", item.get("message", item.get("text")))
|
||||
).strip()
|
||||
images = _normalize_image_inputs(item.get("images") or item.get("image_urls"))
|
||||
if not text and not images:
|
||||
continue
|
||||
content = _build_content_parts(text, images if role == "user" else None)
|
||||
normalized.append({"role": role, "content": content})
|
||||
return normalized
|
||||
|
||||
|
||||
def encode_uploaded_image(uploaded_file: Any) -> dict[str, str]:
|
||||
content_type = getattr(uploaded_file, "content_type", None) or mimetypes.guess_type(
|
||||
getattr(uploaded_file, "name", "")
|
||||
)[0] or "application/octet-stream"
|
||||
raw = uploaded_file.read()
|
||||
if not isinstance(raw, (bytes, bytearray)):
|
||||
raise ValueError("Uploaded image payload is invalid.")
|
||||
encoded = base64.b64encode(raw).decode("ascii")
|
||||
return {
|
||||
"url": f"data:{content_type};base64,{encoded}",
|
||||
"detail": "auto",
|
||||
}
|
||||
|
||||
|
||||
def _load_tone(config: RAGConfig | None) -> str:
|
||||
"""بارگذاری فایل لحن پیشفرض (chat KB)."""
|
||||
cfg = config or load_rag_config()
|
||||
@@ -214,6 +306,8 @@ def chat_rag_stream(
|
||||
config: RAGConfig | None = None,
|
||||
system_override: str | None = None,
|
||||
farm_details: dict | None = None,
|
||||
history: list[dict[str, Any]] | None = None,
|
||||
images: list[dict[str, str]] | None = None,
|
||||
):
|
||||
"""
|
||||
چت استریمی با سرویس ثابت `chat` و context مستقیم مزرعه.
|
||||
@@ -223,6 +317,8 @@ def chat_rag_stream(
|
||||
farm_uuid: شناسه مزرعه
|
||||
config: تنظیمات RAG
|
||||
system_override: جایگزین system prompt (اختیاری)
|
||||
history: لیست پیام های قبلی کاربر/هوش مصنوعی
|
||||
images: تصاویر مربوط به پیام فعلی کاربر
|
||||
|
||||
Yields:
|
||||
chunk های استریم پاسخ مدل
|
||||
@@ -268,10 +364,9 @@ def chat_rag_stream(
|
||||
else:
|
||||
system_prompt = _build_system_prompt(service, query, context, cfg)
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": query},
|
||||
]
|
||||
messages = [{"role": "system", "content": system_prompt}]
|
||||
messages.extend(_normalize_history_messages(history))
|
||||
messages.append({"role": "user", "content": _build_content_parts(query, images)})
|
||||
|
||||
logger.info(
|
||||
"Final prompt prepared service_id=%s farm_uuid=%s model=%s messages_count=%s",
|
||||
|
||||
Reference in New Issue
Block a user