rag/retrieve.py

"""
بازیابی RAG: embed کوئری و جستجو در vector store
"""
from .config import load_rag_config, RAGConfig, get_service_config
from .embedding import embed_single, embed_texts
from .vector_store import QdrantVectorStore


def _resolve_search_options(
    sensor_uuid: str | None = None,
    config: RAGConfig | None = None,
    kb_name: str | None = None,
    service_id: str | None = None,
    use_user_embeddings: bool | None = None,
) -> tuple[RAGConfig, list[str], list[str]]:
    cfg = config or load_rag_config()
    service = get_service_config(service_id, cfg) if service_id else None
    resolved_kb_name = kb_name or (service.knowledge_base if service else None)
    include_user_embeddings = (
        use_user_embeddings
        if use_user_embeddings is not None
        else (service.use_user_embeddings if service else True)
    )

    sensor_filters = ["__global__"]
    if include_user_embeddings and sensor_uuid:
        sensor_filters.insert(0, sensor_uuid)

    kb_filters = [resolved_kb_name] if resolved_kb_name else []
    if include_user_embeddings:
        kb_filters.append("__all__")

    return cfg, sensor_filters, kb_filters


def search_with_query(
    query: str,
    sensor_uuid: str | None = None,
    limit: int = 5,
    score_threshold: float | None = None,
    config: RAGConfig | None = None,
    kb_name: str | None = None,
    service_id: str | None = None,
    use_user_embeddings: bool | None = None,
) -> list[dict]:
    """
    کوئری را embed می‌کند و در vector store جستجو می‌کند.
    فقط chunks مربوط به sensor_uuid یا __global__ برمی‌گردد (ایزوله‌سازی کاربر).
    kb_name: اختیاری — فیلتر بر اساس پایگاه دانش.

    Args:
        sensor_uuid: شناسه سنسور کاربر — اجباری برای امنیت
        kb_name: نام پایگاه دانش (chat/irrigation/fertilization)

    Returns:
        لیست نتایج با id, score, text, metadata
    """
    cfg, sensor_filters, kb_filters = _resolve_search_options(
        sensor_uuid=sensor_uuid,
        config=config,
        kb_name=kb_name,
        service_id=service_id,
        use_user_embeddings=use_user_embeddings,
    )

    query_vector = embed_single(query, config=cfg)
    store = QdrantVectorStore(config=cfg)
    return store.search(
        query_vector=query_vector,
        limit=limit,
        score_threshold=score_threshold,
        sensor_uuids=sensor_filters,
        kb_names=kb_filters,
    )


def search_with_texts(
    texts: list[str],
    sensor_uuid: str | None = None,
    limit: int = 8,
    per_text_limit: int = 3,
    score_threshold: float | None = None,
    config: RAGConfig | None = None,
    kb_name: str | None = None,
    service_id: str | None = None,
    use_user_embeddings: bool | None = None,
) -> list[dict]:
    """
    چند متن را embed می‌کند و نتیجه جستجوها را به صورت dedupe شده برمی‌گرداند.
    برای حالتی مناسب است که هم پیام کاربر و هم داده‌های مزرعه را علیه KB جستجو کنیم.
    """
    normalized_texts = [text.strip() for text in texts if text and text.strip()]
    if not normalized_texts:
        return []

    cfg, sensor_filters, kb_filters = _resolve_search_options(
        sensor_uuid=sensor_uuid,
        config=config,
        kb_name=kb_name,
        service_id=service_id,
        use_user_embeddings=use_user_embeddings,
    )

    store = QdrantVectorStore(config=cfg)
    vectors = embed_texts(normalized_texts, config=cfg)
    merged_results: dict[str, dict] = {}

    for vector in vectors:
        results = store.search(
            query_vector=vector,
            limit=per_text_limit,
            score_threshold=score_threshold,
            sensor_uuids=sensor_filters,
            kb_names=kb_filters,
        )
        for item in results:
            current = merged_results.get(item["id"])
            if current is None or item["score"] > current["score"]:
                merged_results[item["id"]] = item

    return sorted(
        merged_results.values(),
        key=lambda item: item["score"],
        reverse=True,
    )[:limit]
Add Qdrant and ChromaDB support to the project 2026-02-27 19:37:02 +03:30			`"""`
			`بازیابی RAG: embed کوئری و جستجو در vector store`
			`"""`
AI UPDATE 2026-03-22 03:08:27 +03:30			`from .config import load_rag_config, RAGConfig, get_service_config`
UPDATE 2026-04-24 03:02:22 +03:30			`from .embedding import embed_single, embed_texts`
Add Qdrant and ChromaDB support to the project 2026-02-27 19:37:02 +03:30			`from .vector_store import QdrantVectorStore`


UPDATE 2026-04-24 03:02:22 +03:30			`def _resolve_search_options(`
			`sensor_uuid: str \| None = None,`
			`config: RAGConfig \| None = None,`
			`kb_name: str \| None = None,`
			`service_id: str \| None = None,`
			`use_user_embeddings: bool \| None = None,`
			`) -> tuple[RAGConfig, list[str], list[str]]:`
			`cfg = config or load_rag_config()`
			`service = get_service_config(service_id, cfg) if service_id else None`
			`resolved_kb_name = kb_name or (service.knowledge_base if service else None)`
			`include_user_embeddings = (`
			`use_user_embeddings`
			`if use_user_embeddings is not None`
			`else (service.use_user_embeddings if service else True)`
			`)`

			`sensor_filters = ["__global__"]`
			`if include_user_embeddings and sensor_uuid:`
			`sensor_filters.insert(0, sensor_uuid)`

			`kb_filters = [resolved_kb_name] if resolved_kb_name else []`
			`if include_user_embeddings:`
			`kb_filters.append("__all__")`

			`return cfg, sensor_filters, kb_filters`


Add Qdrant and ChromaDB support to the project 2026-02-27 19:37:02 +03:30			`def search_with_query(`
			`query: str,`
AI UPDATE 2026-03-22 03:08:27 +03:30			`sensor_uuid: str \| None = None,`
Add Qdrant and ChromaDB support to the project 2026-02-27 19:37:02 +03:30			`limit: int = 5,`
			`score_threshold: float \| None = None,`
			`config: RAGConfig \| None = None,`
first commit 2026-03-19 22:54:29 +03:30			`kb_name: str \| None = None,`
AI UPDATE 2026-03-22 03:08:27 +03:30			`service_id: str \| None = None,`
			`use_user_embeddings: bool \| None = None,`
Add Qdrant and ChromaDB support to the project 2026-02-27 19:37:02 +03:30			`) -> list[dict]:`
			`"""`
			`کوئری را embed می‌کند و در vector store جستجو می‌کند.`
Refactor user data handling and enhance chat functionality 2026-02-27 20:06:46 +03:30			`فقط chunks مربوط به sensor_uuid یا __global__ برمی‌گردد (ایزوله‌سازی کاربر).`
first commit 2026-03-19 22:54:29 +03:30			`kb_name: اختیاری — فیلتر بر اساس پایگاه دانش.`
Refactor user data handling and enhance chat functionality 2026-02-27 20:06:46 +03:30
			`Args:`
			`sensor_uuid: شناسه سنسور کاربر — اجباری برای امنیت`
first commit 2026-03-19 22:54:29 +03:30			`kb_name: نام پایگاه دانش (chat/irrigation/fertilization)`
Add Qdrant and ChromaDB support to the project 2026-02-27 19:37:02 +03:30
			`Returns:`
			`لیست نتایج با id, score, text, metadata`
			`"""`
UPDATE 2026-04-24 03:02:22 +03:30			`cfg, sensor_filters, kb_filters = _resolve_search_options(`
			`sensor_uuid=sensor_uuid,`
			`config=config,`
			`kb_name=kb_name,`
			`service_id=service_id,`
			`use_user_embeddings=use_user_embeddings,`
AI UPDATE 2026-03-22 03:08:27 +03:30			`)`

Add Qdrant and ChromaDB support to the project 2026-02-27 19:37:02 +03:30			`query_vector = embed_single(query, config=cfg)`
			`store = QdrantVectorStore(config=cfg)`
			`return store.search(`
			`query_vector=query_vector,`
			`limit=limit,`
			`score_threshold=score_threshold,`
AI UPDATE 2026-03-22 03:08:27 +03:30			`sensor_uuids=sensor_filters,`
			`kb_names=kb_filters,`
Add Qdrant and ChromaDB support to the project 2026-02-27 19:37:02 +03:30			`)`
UPDATE 2026-04-24 03:02:22 +03:30

			`def search_with_texts(`
			`texts: list[str],`
			`sensor_uuid: str \| None = None,`
			`limit: int = 8,`
			`per_text_limit: int = 3,`
			`score_threshold: float \| None = None,`
			`config: RAGConfig \| None = None,`
			`kb_name: str \| None = None,`
			`service_id: str \| None = None,`
			`use_user_embeddings: bool \| None = None,`
			`) -> list[dict]:`
			`"""`
			`چند متن را embed می‌کند و نتیجه جستجوها را به صورت dedupe شده برمی‌گرداند.`
			`برای حالتی مناسب است که هم پیام کاربر و هم داده‌های مزرعه را علیه KB جستجو کنیم.`
			`"""`
			`normalized_texts = [text.strip() for text in texts if text and text.strip()]`
			`if not normalized_texts:`
			`return []`

			`cfg, sensor_filters, kb_filters = _resolve_search_options(`
			`sensor_uuid=sensor_uuid,`
			`config=config,`
			`kb_name=kb_name,`
			`service_id=service_id,`
			`use_user_embeddings=use_user_embeddings,`
			`)`

			`store = QdrantVectorStore(config=cfg)`
			`vectors = embed_texts(normalized_texts, config=cfg)`
			`merged_results: dict[str, dict] = {}`

			`for vector in vectors:`
			`results = store.search(`
			`query_vector=vector,`
			`limit=per_text_limit,`
			`score_threshold=score_threshold,`
			`sensor_uuids=sensor_filters,`
			`kb_names=kb_filters,`
			`)`
			`for item in results:`
			`current = merged_results.get(item["id"])`
			`if current is None or item["score"] > current["score"]:`
			`merged_results[item["id"]] = item`

			`return sorted(`
			`merged_results.values(),`
			`key=lambda item: item["score"],`
			`reverse=True,`
			`)[:limit]`