from __future__ import annotations import json import logging from typing import Any, Literal from pydantic import BaseModel, Field, ValidationError from rag.api_provider import get_chat_client from rag.chat import ( _complete_audit_log, _create_audit_log, _fail_audit_log, _load_service_tone, build_rag_context, ) from rag.config import RAGConfig, get_service_config, load_rag_config logger = logging.getLogger(__name__) SERVICE_ID = "irrigation_plan_parser" KB_NAME = "irrigation_plan_parser" CORE_FIELDS = [ "crop_name", "growth_stage", "irrigation_method", "water_amount_per_event", "duration_minutes", "frequency_text", "interval_days", "preferred_time_of_day", "start_date", "target_area", ] IRRIGATION_PLAN_PROMPT = ( "شما یک تحلیل گر برنامه آبیاری هستی. " "کاربر ممکن است برنامه آبیاری را کامل یا ناقص توضیح دهد. " "وظیفه شما این است که فقط JSON معتبر برگردانی و متن اضافه، markdown، توضیح بیرون از JSON یا کلید اضافه تولید نکنی. " "اگر اطلاعات کافی بود status را completed بگذار و final_plan را کامل کن. " "اگر اطلاعات کافی نبود status را needs_clarification بگذار، missing_fields را پر کن و 1 تا 5 سوال کوتاه و دقیق در questions برگردان. " "اگر هرکدام از فیلدهای اصلی خالی، null یا نامشخص بود، حق نداری status را completed بگذاری. " "در حالت completed هیچ فیلد null در collected_data و final_plan نباید وجود داشته باشد. " "از حدس زدن جزئیات برنامه خودداری کن. " "اگر کاربر فقط بخشی از سوالات قبلی را جواب داد، داده های جدید را با partial_plan ادغام کن و فقط سوالات باقی مانده را بپرس. " "Schema: " "{" '"status": "completed" | "needs_clarification", ' '"summary": string, ' '"missing_fields": [string], ' '"questions": [{"id": string, "field": string, "question": string, "rationale": string}], ' '"collected_data": {' '"crop_name": string|null, ' '"growth_stage": string|null, ' '"irrigation_method": string|null, ' '"water_amount_per_event": string|null, ' '"duration_minutes": integer|null, ' '"frequency_text": string|null, ' '"interval_days": integer|null, ' '"preferred_time_of_day": string|null, ' '"start_date": string|null, ' '"target_area": string|null, ' '"trigger_conditions": [string], ' '"notes": [string]' "}, " '"final_plan": {same shape as collected_data} | null' "}." ) class ClarificationQuestionSchema(BaseModel): id: str field: str question: str rationale: str = "" class IrrigationPlanSchema(BaseModel): crop_name: str | None = None growth_stage: str | None = None irrigation_method: str | None = None water_amount_per_event: str | None = None duration_minutes: int | None = None frequency_text: str | None = None interval_days: int | None = None preferred_time_of_day: str | None = None start_date: str | None = None target_area: str | None = None trigger_conditions: list[str] = Field(default_factory=list) notes: list[str] = Field(default_factory=list) class IrrigationPlanParseResultSchema(BaseModel): status: Literal["completed", "needs_clarification"] summary: str missing_fields: list[str] = Field(default_factory=list) questions: list[ClarificationQuestionSchema] = Field(default_factory=list) collected_data: IrrigationPlanSchema = Field(default_factory=IrrigationPlanSchema) final_plan: IrrigationPlanSchema | None = None class IrrigationPlanParserService: def parse_plan( self, *, message: str = "", answers: dict[str, Any] | None = None, partial_plan: dict[str, Any] | None = None, farm_uuid: str | None = None, ) -> dict[str, Any]: cfg = load_rag_config() service, client, model = self._build_service_client(cfg) normalized_message = (message or "").strip() normalized_answers = answers if isinstance(answers, dict) else {} normalized_partial = partial_plan if isinstance(partial_plan, dict) else {} structured_context = { "message": normalized_message, "answers": normalized_answers, "partial_plan": normalized_partial, "required_core_fields": CORE_FIELDS, "service": "irrigation_plan_parser", } rag_query = self._build_retrieval_query( message=normalized_message, answers=normalized_answers, ) rag_context = build_rag_context( query=rag_query, sensor_uuid=farm_uuid, config=cfg, kb_name=KB_NAME, service_id=SERVICE_ID, ) system_prompt, messages = self._build_messages( service=service, cfg=cfg, structured_context=structured_context, rag_context=rag_context, ) audit_log = None if farm_uuid: try: audit_log = _create_audit_log( farm_uuid=farm_uuid, service_id=SERVICE_ID, model=model, query=rag_query, system_prompt=system_prompt, messages=messages, ) except Exception as exc: logger.warning("Irrigation plan parser audit log creation failed for %s: %s", farm_uuid, exc) try: response = client.chat.completions.create( model=model, messages=messages, response_format={"type": "json_object"}, ) raw = (response.choices[0].message.content or "").strip() parsed = self._clean_json(raw) validated = IrrigationPlanParseResultSchema.model_validate(parsed) normalized = self._normalize_result(validated) if audit_log is not None: _complete_audit_log(audit_log, raw) return normalized except (ValidationError, ValueError, KeyError, IndexError) as exc: logger.warning("Irrigation plan parser parsing failed: %s", exc) if audit_log is not None: _fail_audit_log(audit_log, str(exc)) return self._fallback_result( message=normalized_message, answers=normalized_answers, partial_plan=normalized_partial, ) except Exception as exc: logger.error("Irrigation plan parser failed: %s", exc) if audit_log is not None: _fail_audit_log(audit_log, str(exc)) return self._fallback_result( message=normalized_message, answers=normalized_answers, partial_plan=normalized_partial, ) def _build_service_client(self, cfg: RAGConfig): service = get_service_config(SERVICE_ID, cfg) service_cfg = RAGConfig( embedding=cfg.embedding, qdrant=cfg.qdrant, chunking=cfg.chunking, llm=service.llm, knowledge_bases=cfg.knowledge_bases, services=cfg.services, chromadb=cfg.chromadb, ) client = get_chat_client(service_cfg) return service, client, service.llm.model def _build_messages( self, *, service: Any, cfg: RAGConfig, structured_context: dict[str, Any], rag_context: str, ) -> tuple[str, list[dict[str, str]]]: tone = _load_service_tone(service, cfg) system_parts = [tone] if tone else [] if service.system_prompt: system_parts.append(service.system_prompt) system_parts.append(IRRIGATION_PLAN_PROMPT) system_parts.append( "[structured_context]\n" + json.dumps(structured_context, ensure_ascii=False, indent=2, default=str) ) if rag_context: system_parts.append(rag_context) system_prompt = "\n\n".join(part for part in system_parts if part) messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": "برنامه آبیاری را استخراج یا برای تکمیل آن سوال بپرس."}, ] return system_prompt, messages def _build_retrieval_query( self, *, message: str, answers: dict[str, Any], ) -> str: answer_lines = [f"{key}: {value}" for key, value in answers.items()] parts = [part for part in [message, "\n".join(answer_lines)] if part] return "\n".join(parts) or "استخراج برنامه آبیاری از متن کاربر" def _normalize_result(self, validated: IrrigationPlanParseResultSchema) -> dict[str, Any]: collected = validated.collected_data.model_dump() final_plan = validated.final_plan.model_dump() if validated.final_plan is not None else None missing_fields = list(dict.fromkeys(validated.missing_fields)) computed_missing = self._find_missing_fields(final_plan or collected) for field in computed_missing: if field not in missing_fields: missing_fields.append(field) can_complete = validated.status == "completed" and not missing_fields if can_complete: final_plan = final_plan or collected questions: list[dict[str, Any]] = [] status_fa = "تکمیل شد" else: questions = [item.model_dump() for item in validated.questions] if not questions and missing_fields: questions = self._build_generic_questions(missing_fields) final_plan = None validated.status = "needs_clarification" status_fa = "نیازمند پرسش تکمیلی" return { "status": "completed" if can_complete else "needs_clarification", "status_fa": status_fa, "summary": validated.summary, "missing_fields": missing_fields, "questions": questions, "collected_data": collected, "final_plan": final_plan, } def _fallback_result( self, *, message: str, answers: dict[str, Any], partial_plan: dict[str, Any], ) -> dict[str, Any]: merged = dict(partial_plan) notes = list(merged.get("notes") or []) if message: notes.append(f"متن اولیه کاربر: {message}") for key, value in answers.items(): merged.setdefault(key, value) return { "status": "needs_clarification", "status_fa": "نیازمند پرسش تکمیلی", "summary": "اطلاعات برنامه آبیاری برای ساخت JSON نهایی کافی نیست و به چند پاسخ تکمیلی نیاز است.", "missing_fields": CORE_FIELDS, "questions": self._build_generic_questions(CORE_FIELDS), "collected_data": { "crop_name": merged.get("crop_name"), "growth_stage": merged.get("growth_stage"), "irrigation_method": merged.get("irrigation_method"), "water_amount_per_event": merged.get("water_amount_per_event"), "duration_minutes": merged.get("duration_minutes"), "frequency_text": merged.get("frequency_text"), "interval_days": merged.get("interval_days"), "preferred_time_of_day": merged.get("preferred_time_of_day"), "start_date": merged.get("start_date"), "target_area": merged.get("target_area"), "trigger_conditions": merged.get("trigger_conditions") or [], "notes": notes, }, "final_plan": None, } def _build_generic_questions(self, missing_fields: list[str]) -> list[dict[str, str]]: catalog = { "crop_name": { "id": "crop_name", "field": "crop_name", "question": "این برنامه آبیاری برای کدام محصول است؟", "rationale": "نام محصول برای ثبت برنامه لازم است.", }, "growth_stage": { "id": "growth_stage", "field": "growth_stage", "question": "محصول الان در چه مرحله رشدی قرار دارد؟", "rationale": "مرحله رشد برای کامل شدن برنامه لازم است.", }, "irrigation_method": { "id": "irrigation_method", "field": "irrigation_method", "question": "روش آبیاری چیست؟ مثلا قطره ای، بارانی یا غرقابی.", "rationale": "روش اجرا روی شکل برنامه تاثیر دارد.", }, "water_amount_per_event": { "id": "water_amount_per_event", "field": "water_amount_per_event", "question": "در هر نوبت آبیاری چه مقدار آب داده می شود؟", "rationale": "حجم یا عمق آب هر نوبت مشخص نشده است.", }, "duration_minutes": { "id": "duration_minutes", "field": "duration_minutes", "question": "مدت زمان هر نوبت آبیاری چند دقیقه است؟", "rationale": "مدت اجرای هر نوبت هنوز مشخص نیست.", }, "frequency_text": { "id": "frequency_text", "field": "frequency_text", "question": "فاصله یا تعداد نوبت های آبیاری چگونه است؟ مثلا هر 3 روز یک بار.", "rationale": "الگوی تکرار آبیاری باید مشخص باشد.", }, "interval_days": { "id": "interval_days", "field": "interval_days", "question": "فاصله بین دو آبیاری چند روز است؟", "rationale": "عدد فاصله آبیاری برای JSON نهایی لازم است.", }, "preferred_time_of_day": { "id": "preferred_time_of_day", "field": "preferred_time_of_day", "question": "بهترین زمان اجرای آبیاری چه موقع از روز است؟", "rationale": "زمان اجرای برنامه هنوز معلوم نیست.", }, "start_date": { "id": "start_date", "field": "start_date", "question": "این برنامه از چه تاریخی یا از چه زمانی باید شروع شود؟", "rationale": "زمان شروع برنامه هنوز مشخص نشده است.", }, "target_area": { "id": "target_area", "field": "target_area", "question": "این برنامه برای کل مزرعه است یا بخش/ناحیه خاصی از مزرعه؟", "rationale": "محدوده اجرای برنامه باید مشخص باشد.", }, } return [catalog[field] for field in missing_fields if field in catalog][:5] def _find_missing_fields(self, plan: dict[str, Any]) -> list[str]: missing: list[str] = [] for field in CORE_FIELDS: value = plan.get(field) if value is None: missing.append(field) continue if isinstance(value, str) and not value.strip(): missing.append(field) return missing def _clean_json(self, raw: str) -> dict[str, Any]: cleaned = (raw or "").strip() if cleaned.startswith("```"): cleaned = cleaned.strip("`") if cleaned.startswith("json"): cleaned = cleaned[4:] cleaned = cleaned.strip() if not cleaned: raise ValueError("Irrigation plan parser response was empty.") parsed = json.loads(cleaned) if not isinstance(parsed, dict): raise ValueError("Irrigation plan parser response root must be an object.") return parsed