406 lines
17 KiB
Python
406 lines
17 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
import logging
|
|
from typing import Any, Literal
|
|
|
|
from pydantic import BaseModel, Field, ValidationError
|
|
|
|
from farm_data.services import build_ai_farm_snapshot
|
|
from rag.api_provider import get_chat_client
|
|
from rag.chat import (
|
|
_complete_audit_log,
|
|
_create_audit_log,
|
|
_fail_audit_log,
|
|
_load_service_tone,
|
|
build_rag_context,
|
|
)
|
|
from rag.config import RAGConfig, get_service_config, load_rag_config
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
SERVICE_ID = "irrigation_plan_parser"
|
|
KB_NAME = "irrigation_plan_parser"
|
|
CORE_FIELDS = [
|
|
"crop_name",
|
|
"growth_stage",
|
|
"irrigation_method",
|
|
"water_amount_per_event",
|
|
"duration_minutes",
|
|
"frequency_text",
|
|
"interval_days",
|
|
"preferred_time_of_day",
|
|
"start_date",
|
|
"target_area",
|
|
]
|
|
|
|
IRRIGATION_PLAN_PROMPT = (
|
|
"شما یک تحلیل گر برنامه آبیاری هستی. "
|
|
"کاربر ممکن است برنامه آبیاری را کامل یا ناقص توضیح دهد. "
|
|
"وظیفه شما این است که فقط JSON معتبر برگردانی و متن اضافه، markdown، توضیح بیرون از JSON یا کلید اضافه تولید نکنی. "
|
|
"اگر اطلاعات کافی بود status را completed بگذار و final_plan را کامل کن. "
|
|
"اگر اطلاعات کافی نبود status را needs_clarification بگذار، missing_fields را پر کن و 1 تا 5 سوال کوتاه و دقیق در questions برگردان. "
|
|
"اگر هرکدام از فیلدهای اصلی خالی، null یا نامشخص بود، حق نداری status را completed بگذاری. "
|
|
"در حالت completed هیچ فیلد null در collected_data و final_plan نباید وجود داشته باشد. "
|
|
"از حدس زدن جزئیات برنامه خودداری کن. "
|
|
"اگر کاربر فقط بخشی از سوالات قبلی را جواب داد، داده های جدید را با partial_plan ادغام کن و فقط سوالات باقی مانده را بپرس. "
|
|
"Schema: "
|
|
"{"
|
|
'"status": "completed" | "needs_clarification", '
|
|
'"summary": string, '
|
|
'"missing_fields": [string], '
|
|
'"questions": [{"id": string, "field": string, "question": string, "rationale": string}], '
|
|
'"collected_data": {'
|
|
'"crop_name": string|null, '
|
|
'"growth_stage": string|null, '
|
|
'"irrigation_method": string|null, '
|
|
'"water_amount_per_event": string|null, '
|
|
'"duration_minutes": integer|null, '
|
|
'"frequency_text": string|null, '
|
|
'"interval_days": integer|null, '
|
|
'"preferred_time_of_day": string|null, '
|
|
'"start_date": string|null, '
|
|
'"target_area": string|null, '
|
|
'"trigger_conditions": [string], '
|
|
'"notes": [string]'
|
|
"}, "
|
|
'"final_plan": {same shape as collected_data} | null'
|
|
"}."
|
|
)
|
|
|
|
|
|
class ClarificationQuestionSchema(BaseModel):
|
|
id: str
|
|
field: str
|
|
question: str
|
|
rationale: str = ""
|
|
|
|
|
|
class IrrigationPlanSchema(BaseModel):
|
|
crop_name: str | None = None
|
|
growth_stage: str | None = None
|
|
irrigation_method: str | None = None
|
|
water_amount_per_event: str | None = None
|
|
duration_minutes: int | None = None
|
|
frequency_text: str | None = None
|
|
interval_days: int | None = None
|
|
preferred_time_of_day: str | None = None
|
|
start_date: str | None = None
|
|
target_area: str | None = None
|
|
trigger_conditions: list[str] = Field(default_factory=list)
|
|
notes: list[str] = Field(default_factory=list)
|
|
|
|
|
|
class IrrigationPlanParseResultSchema(BaseModel):
|
|
status: Literal["completed", "needs_clarification"]
|
|
summary: str
|
|
missing_fields: list[str] = Field(default_factory=list)
|
|
questions: list[ClarificationQuestionSchema] = Field(default_factory=list)
|
|
collected_data: IrrigationPlanSchema = Field(default_factory=IrrigationPlanSchema)
|
|
final_plan: IrrigationPlanSchema | None = None
|
|
|
|
|
|
class IrrigationPlanParserService:
|
|
def parse_plan(
|
|
self,
|
|
*,
|
|
message: str = "",
|
|
answers: dict[str, Any] | None = None,
|
|
partial_plan: dict[str, Any] | None = None,
|
|
farm_uuid: str | None = None,
|
|
) -> dict[str, Any]:
|
|
cfg = load_rag_config()
|
|
service, client, model = self._build_service_client(cfg)
|
|
|
|
normalized_message = (message or "").strip()
|
|
normalized_answers = answers if isinstance(answers, dict) else {}
|
|
normalized_partial = partial_plan if isinstance(partial_plan, dict) else {}
|
|
structured_context = {
|
|
"message": normalized_message,
|
|
"answers": normalized_answers,
|
|
"partial_plan": normalized_partial,
|
|
"required_core_fields": CORE_FIELDS,
|
|
"service": "irrigation_plan_parser",
|
|
"endpoint_policy": "parser_first",
|
|
}
|
|
if farm_uuid:
|
|
# Parser-first endpoint: farm context is optional enrichment only.
|
|
structured_context["farm_context_source_metadata"] = {
|
|
"source": "build_ai_farm_snapshot",
|
|
"optional": True,
|
|
}
|
|
|
|
rag_query = self._build_retrieval_query(
|
|
message=normalized_message,
|
|
answers=normalized_answers,
|
|
)
|
|
rag_context = build_rag_context(
|
|
query=rag_query,
|
|
sensor_uuid=farm_uuid,
|
|
config=cfg,
|
|
kb_name=KB_NAME,
|
|
service_id=SERVICE_ID,
|
|
)
|
|
system_prompt, messages = self._build_messages(
|
|
service=service,
|
|
cfg=cfg,
|
|
structured_context=structured_context,
|
|
rag_context=rag_context,
|
|
)
|
|
|
|
audit_log = None
|
|
if farm_uuid:
|
|
try:
|
|
audit_log = _create_audit_log(
|
|
farm_uuid=farm_uuid,
|
|
service_id=SERVICE_ID,
|
|
model=model,
|
|
query=rag_query,
|
|
system_prompt=system_prompt,
|
|
messages=messages,
|
|
)
|
|
except Exception as exc:
|
|
logger.warning("Irrigation plan parser audit log creation failed for %s: %s", farm_uuid, exc)
|
|
|
|
try:
|
|
response = client.chat.completions.create(
|
|
model=model,
|
|
messages=messages,
|
|
response_format={"type": "json_object"},
|
|
)
|
|
raw = (response.choices[0].message.content or "").strip()
|
|
parsed = self._clean_json(raw)
|
|
validated = IrrigationPlanParseResultSchema.model_validate(parsed)
|
|
normalized = self._normalize_result(validated)
|
|
if audit_log is not None:
|
|
_complete_audit_log(audit_log, raw)
|
|
return normalized
|
|
except (ValidationError, ValueError, KeyError, IndexError) as exc:
|
|
logger.warning("Irrigation plan parser parsing failed: %s", exc)
|
|
if audit_log is not None:
|
|
_fail_audit_log(audit_log, str(exc))
|
|
return self._fallback_result(
|
|
message=normalized_message,
|
|
answers=normalized_answers,
|
|
partial_plan=normalized_partial,
|
|
)
|
|
except Exception as exc:
|
|
logger.error("Irrigation plan parser failed: %s", exc)
|
|
if audit_log is not None:
|
|
_fail_audit_log(audit_log, str(exc))
|
|
return self._fallback_result(
|
|
message=normalized_message,
|
|
answers=normalized_answers,
|
|
partial_plan=normalized_partial,
|
|
)
|
|
|
|
def _build_service_client(self, cfg: RAGConfig):
|
|
service = get_service_config(SERVICE_ID, cfg)
|
|
service_cfg = RAGConfig(
|
|
embedding=cfg.embedding,
|
|
qdrant=cfg.qdrant,
|
|
chunking=cfg.chunking,
|
|
llm=service.llm,
|
|
knowledge_bases=cfg.knowledge_bases,
|
|
services=cfg.services,
|
|
chromadb=cfg.chromadb,
|
|
)
|
|
client = get_chat_client(service_cfg)
|
|
return service, client, service.llm.model
|
|
|
|
def _build_messages(
|
|
self,
|
|
*,
|
|
service: Any,
|
|
cfg: RAGConfig,
|
|
structured_context: dict[str, Any],
|
|
rag_context: str,
|
|
) -> tuple[str, list[dict[str, str]]]:
|
|
tone = _load_service_tone(service, cfg)
|
|
system_parts = [tone] if tone else []
|
|
if service.system_prompt:
|
|
system_parts.append(service.system_prompt)
|
|
system_parts.append(IRRIGATION_PLAN_PROMPT)
|
|
system_parts.append(
|
|
"[structured_context]\n"
|
|
+ json.dumps(structured_context, ensure_ascii=False, indent=2, default=str)
|
|
)
|
|
if rag_context:
|
|
system_parts.append(rag_context)
|
|
system_prompt = "\n\n".join(part for part in system_parts if part)
|
|
messages = [
|
|
{"role": "system", "content": system_prompt},
|
|
{"role": "user", "content": "برنامه آبیاری را استخراج یا برای تکمیل آن سوال بپرس."},
|
|
]
|
|
return system_prompt, messages
|
|
|
|
def _build_retrieval_query(
|
|
self,
|
|
*,
|
|
message: str,
|
|
answers: dict[str, Any],
|
|
) -> str:
|
|
answer_lines = [f"{key}: {value}" for key, value in answers.items()]
|
|
parts = [part for part in [message, "\n".join(answer_lines)] if part]
|
|
return "\n".join(parts) or "استخراج برنامه آبیاری از متن کاربر"
|
|
|
|
def _normalize_result(self, validated: IrrigationPlanParseResultSchema) -> dict[str, Any]:
|
|
collected = validated.collected_data.model_dump()
|
|
final_plan = validated.final_plan.model_dump() if validated.final_plan is not None else None
|
|
missing_fields = list(dict.fromkeys(validated.missing_fields))
|
|
computed_missing = self._find_missing_fields(final_plan or collected)
|
|
for field in computed_missing:
|
|
if field not in missing_fields:
|
|
missing_fields.append(field)
|
|
|
|
can_complete = validated.status == "completed" and not missing_fields
|
|
|
|
if can_complete:
|
|
final_plan = final_plan or collected
|
|
questions: list[dict[str, Any]] = []
|
|
status_fa = "تکمیل شد"
|
|
else:
|
|
questions = [item.model_dump() for item in validated.questions]
|
|
if not questions and missing_fields:
|
|
questions = self._build_generic_questions(missing_fields)
|
|
final_plan = None
|
|
validated.status = "needs_clarification"
|
|
status_fa = "نیازمند پرسش تکمیلی"
|
|
|
|
return {
|
|
"status": "completed" if can_complete else "needs_clarification",
|
|
"status_fa": status_fa,
|
|
"summary": validated.summary,
|
|
"missing_fields": missing_fields,
|
|
"questions": questions,
|
|
"collected_data": collected,
|
|
"final_plan": final_plan,
|
|
}
|
|
|
|
def _fallback_result(
|
|
self,
|
|
*,
|
|
message: str,
|
|
answers: dict[str, Any],
|
|
partial_plan: dict[str, Any],
|
|
) -> dict[str, Any]:
|
|
merged = dict(partial_plan)
|
|
notes = list(merged.get("notes") or [])
|
|
if message:
|
|
notes.append(f"متن اولیه کاربر: {message}")
|
|
for key, value in answers.items():
|
|
merged.setdefault(key, value)
|
|
|
|
return {
|
|
"status": "needs_clarification",
|
|
"status_fa": "نیازمند پرسش تکمیلی",
|
|
"summary": "اطلاعات برنامه آبیاری برای ساخت JSON نهایی کافی نیست و به چند پاسخ تکمیلی نیاز است.",
|
|
"missing_fields": CORE_FIELDS,
|
|
"questions": self._build_generic_questions(CORE_FIELDS),
|
|
"collected_data": {
|
|
"crop_name": merged.get("crop_name"),
|
|
"growth_stage": merged.get("growth_stage"),
|
|
"irrigation_method": merged.get("irrigation_method"),
|
|
"water_amount_per_event": merged.get("water_amount_per_event"),
|
|
"duration_minutes": merged.get("duration_minutes"),
|
|
"frequency_text": merged.get("frequency_text"),
|
|
"interval_days": merged.get("interval_days"),
|
|
"preferred_time_of_day": merged.get("preferred_time_of_day"),
|
|
"start_date": merged.get("start_date"),
|
|
"target_area": merged.get("target_area"),
|
|
"trigger_conditions": merged.get("trigger_conditions") or [],
|
|
"notes": notes,
|
|
},
|
|
"final_plan": None,
|
|
}
|
|
|
|
def _build_generic_questions(self, missing_fields: list[str]) -> list[dict[str, str]]:
|
|
catalog = {
|
|
"crop_name": {
|
|
"id": "crop_name",
|
|
"field": "crop_name",
|
|
"question": "این برنامه آبیاری برای کدام محصول است؟",
|
|
"rationale": "نام محصول برای ثبت برنامه لازم است.",
|
|
},
|
|
"growth_stage": {
|
|
"id": "growth_stage",
|
|
"field": "growth_stage",
|
|
"question": "محصول الان در چه مرحله رشدی قرار دارد؟",
|
|
"rationale": "مرحله رشد برای کامل شدن برنامه لازم است.",
|
|
},
|
|
"irrigation_method": {
|
|
"id": "irrigation_method",
|
|
"field": "irrigation_method",
|
|
"question": "روش آبیاری چیست؟ مثلا قطره ای، بارانی یا غرقابی.",
|
|
"rationale": "روش اجرا روی شکل برنامه تاثیر دارد.",
|
|
},
|
|
"water_amount_per_event": {
|
|
"id": "water_amount_per_event",
|
|
"field": "water_amount_per_event",
|
|
"question": "در هر نوبت آبیاری چه مقدار آب داده می شود؟",
|
|
"rationale": "حجم یا عمق آب هر نوبت مشخص نشده است.",
|
|
},
|
|
"duration_minutes": {
|
|
"id": "duration_minutes",
|
|
"field": "duration_minutes",
|
|
"question": "مدت زمان هر نوبت آبیاری چند دقیقه است؟",
|
|
"rationale": "مدت اجرای هر نوبت هنوز مشخص نیست.",
|
|
},
|
|
"frequency_text": {
|
|
"id": "frequency_text",
|
|
"field": "frequency_text",
|
|
"question": "فاصله یا تعداد نوبت های آبیاری چگونه است؟ مثلا هر 3 روز یک بار.",
|
|
"rationale": "الگوی تکرار آبیاری باید مشخص باشد.",
|
|
},
|
|
"interval_days": {
|
|
"id": "interval_days",
|
|
"field": "interval_days",
|
|
"question": "فاصله بین دو آبیاری چند روز است؟",
|
|
"rationale": "عدد فاصله آبیاری برای JSON نهایی لازم است.",
|
|
},
|
|
"preferred_time_of_day": {
|
|
"id": "preferred_time_of_day",
|
|
"field": "preferred_time_of_day",
|
|
"question": "بهترین زمان اجرای آبیاری چه موقع از روز است؟",
|
|
"rationale": "زمان اجرای برنامه هنوز معلوم نیست.",
|
|
},
|
|
"start_date": {
|
|
"id": "start_date",
|
|
"field": "start_date",
|
|
"question": "این برنامه از چه تاریخی یا از چه زمانی باید شروع شود؟",
|
|
"rationale": "زمان شروع برنامه هنوز مشخص نشده است.",
|
|
},
|
|
"target_area": {
|
|
"id": "target_area",
|
|
"field": "target_area",
|
|
"question": "این برنامه برای کل مزرعه است یا بخش/ناحیه خاصی از مزرعه؟",
|
|
"rationale": "محدوده اجرای برنامه باید مشخص باشد.",
|
|
},
|
|
}
|
|
return [catalog[field] for field in missing_fields if field in catalog][:5]
|
|
|
|
def _find_missing_fields(self, plan: dict[str, Any]) -> list[str]:
|
|
missing: list[str] = []
|
|
for field in CORE_FIELDS:
|
|
value = plan.get(field)
|
|
if value is None:
|
|
missing.append(field)
|
|
continue
|
|
if isinstance(value, str) and not value.strip():
|
|
missing.append(field)
|
|
return missing
|
|
|
|
def _clean_json(self, raw: str) -> dict[str, Any]:
|
|
cleaned = (raw or "").strip()
|
|
if cleaned.startswith("```"):
|
|
cleaned = cleaned.strip("`")
|
|
if cleaned.startswith("json"):
|
|
cleaned = cleaned[4:]
|
|
cleaned = cleaned.strip()
|
|
if not cleaned:
|
|
raise ValueError("Irrigation plan parser response was empty.")
|
|
parsed = json.loads(cleaned)
|
|
if not isinstance(parsed, dict):
|
|
raise ValueError("Irrigation plan parser response root must be an object.")
|
|
return parsed
|