Files
Logic/Modules/Ai/rag/services/yield_harvest.py
T
2026-05-11 03:27:21 +03:30

264 lines
11 KiB
Python

from __future__ import annotations
import json
import logging
from typing import Any
from pydantic import BaseModel, Field, ValidationError
from rag.api_provider import get_chat_client
from rag.chat import (
_complete_audit_log,
_create_audit_log,
_fail_audit_log,
_load_service_tone,
)
from rag.config import RAGConfig, get_service_config, load_rag_config
from rag.failure_contract import RAGServiceError
logger = logging.getLogger(__name__)
SERVICE_ID = "yield_harvest"
YIELD_HARVEST_PROMPT = (
"You are an expert agronomist writing concise dashboard narratives for farmers. "
"Return only valid JSON matching this schema exactly: "
"{"
'"season_highlights_subtitle": string, '
'"yield_prediction_explanation": string, '
'"harvest_readiness_summary": string, '
'"operation_notes": [string, ...]'
"}. "
"Do not add markdown, explanations, or extra keys. "
"Strict Golden Rule: do not invent numbers, dates, prices, revenues, percentages, KPIs, scores, or measurements. "
"Use only values already present in the deterministic context. "
"If a fact is missing from the context, say less rather than guessing."
)
class YieldHarvestNarrativeSchema(BaseModel):
season_highlights_subtitle: str
yield_prediction_explanation: str
harvest_readiness_summary: str
operation_notes: list[str] = Field(default_factory=list)
class YieldHarvestRAGService:
def generate_narrative(
self,
deterministic_context: dict[str, Any],
) -> dict[str, Any]:
cfg = load_rag_config()
service, client, model = self._build_service_client(cfg)
structured_context = self._build_structured_context(
deterministic_context=deterministic_context,
)
user_prompt = (
"Generate short user-friendly narrative fields for the Yield & Harvest Summary dashboard "
"using only the deterministic context. Keep the language practical and agronomy-focused."
)
system_prompt, messages = self._build_messages(
service=service,
cfg=cfg,
structured_context=structured_context,
query=user_prompt,
)
farm_uuid = str(deterministic_context.get("farm_uuid") or "")
audit_log = None
if farm_uuid:
try:
audit_log = _create_audit_log(
farm_uuid=farm_uuid,
service_id=SERVICE_ID,
model=model,
query=user_prompt,
system_prompt=system_prompt,
messages=messages,
)
except Exception as exc:
logger.warning("Yield harvest audit log creation failed for %s: %s", farm_uuid, exc)
try:
response = client.chat.completions.create(
model=model,
messages=messages,
response_format={"type": "json_object"},
)
raw = (response.choices[0].message.content or "").strip()
parsed = self._clean_json(raw)
validated = YieldHarvestNarrativeSchema.model_validate(parsed)
if audit_log is not None:
_complete_audit_log(audit_log, raw)
return {
"status": "success",
"source": "llm",
"season_highlights_subtitle": validated.season_highlights_subtitle,
"yield_prediction_explanation": validated.yield_prediction_explanation,
"harvest_readiness_summary": validated.harvest_readiness_summary,
"operation_notes": validated.operation_notes,
}
except (ValidationError, ValueError, KeyError, IndexError) as exc:
logger.warning("Yield harvest narrative parsing failed for farm_uuid=%s: %s", farm_uuid, exc)
if audit_log is not None:
_fail_audit_log(audit_log, str(exc))
raise RAGServiceError(
error_code="invalid_payload",
message=f"Yield harvest narrative parsing failed for farm_uuid={farm_uuid or 'unknown'}.",
source="llm",
details={"farm_uuid": farm_uuid or "unknown", "service_id": SERVICE_ID},
http_status=502,
) from exc
except Exception as exc:
logger.error("Yield harvest narrative LLM call failed for farm_uuid=%s: %s", farm_uuid, exc)
if audit_log is not None:
_fail_audit_log(audit_log, str(exc))
raise RAGServiceError(
error_code="upstream_failure",
message=f"Yield harvest narrative generation failed for farm_uuid={farm_uuid or 'unknown'}.",
source="llm",
retriable=True,
details={"farm_uuid": farm_uuid or "unknown", "service_id": SERVICE_ID},
http_status=503,
) from exc
def _build_service_client(self, cfg: RAGConfig):
service = get_service_config(SERVICE_ID, cfg)
service_cfg = RAGConfig(
embedding=cfg.embedding,
qdrant=cfg.qdrant,
chunking=cfg.chunking,
llm=service.llm,
knowledge_bases=cfg.knowledge_bases,
services=cfg.services,
chromadb=cfg.chromadb,
)
client = get_chat_client(service_cfg)
return service, client, service.llm.model
def _build_messages(
self,
*,
service: Any,
cfg: RAGConfig,
structured_context: dict[str, Any],
query: str,
) -> tuple[str, list[dict[str, str]]]:
tone = _load_service_tone(service, cfg)
system_parts = [tone] if tone else []
if service.system_prompt:
system_parts.append(service.system_prompt)
system_parts.append(YIELD_HARVEST_PROMPT)
system_parts.append(
"[deterministic_context]\n"
+ json.dumps(structured_context, ensure_ascii=False, indent=2, default=str)
)
system_prompt = "\n\n".join(part for part in system_parts if part)
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": query},
]
return system_prompt, messages
def _build_structured_context(
self,
*,
deterministic_context: dict[str, Any],
) -> dict[str, Any]:
season = deterministic_context.get("season_highlights_card") or {}
harvest = deterministic_context.get("harvest_prediction_card") or {}
operations = deterministic_context.get("harvest_operations_card") or {}
yield_prediction = deterministic_context.get("yield_prediction") or {}
readiness = deterministic_context.get("harvest_readiness_zones") or {}
operation_steps = []
for step in operations.get("steps") or []:
if not isinstance(step, dict):
continue
operation_steps.append(
{
"key": step.get("key"),
"title": step.get("title"),
"status": step.get("status"),
}
)
return {
"farm_context": deterministic_context.get("farm_context") or {},
"yield_prediction": {
"predicted_yield_tons": yield_prediction.get("predicted_yield_tons"),
"unit": yield_prediction.get("unit"),
"simulation_warning": yield_prediction.get("simulation_warning"),
"supporting_metrics": yield_prediction.get("supporting_metrics"),
},
"season_highlights_card": {
"title": season.get("title"),
"subtitle": season.get("subtitle"),
"total_predicted_yield": season.get("total_predicted_yield"),
"yield_unit": season.get("yield_unit"),
"target_harvest_date": season.get("target_harvest_date"),
"days_until_harvest": season.get("days_until_harvest"),
"average_readiness": season.get("average_readiness"),
"primary_quality_grade": season.get("primary_quality_grade"),
"estimated_revenue": season.get("estimated_revenue"),
},
"harvest_prediction_card": {
"harvest_date": harvest.get("harvest_date"),
"harvest_date_formatted": harvest.get("harvest_date_formatted"),
"days_until": harvest.get("days_until"),
"optimal_window_start": harvest.get("optimal_window_start"),
"optimal_window_end": harvest.get("optimal_window_end"),
"description": harvest.get("description"),
},
"harvest_readiness_zones": {
"average_readiness": readiness.get("averageReadiness"),
"mean_ndvi": readiness.get("meanNdvi"),
"ndvi_trend": readiness.get("ndviTrend"),
"zones": readiness.get("zones"),
},
"harvest_operations_card": {
"stage_label": operations.get("stage_label"),
"days_until_harvest": operations.get("days_until_harvest"),
"current_dvs": operations.get("current_dvs"),
"summary": operations.get("summary"),
"steps": operation_steps,
},
}
def _clean_json(self, raw: str) -> dict[str, Any]:
cleaned = (raw or "").strip()
if cleaned.startswith("```"):
cleaned = cleaned.strip("`")
if cleaned.startswith("json"):
cleaned = cleaned[4:]
cleaned = cleaned.strip()
if not cleaned:
raise RAGServiceError(
error_code="empty_response",
message="Yield harvest narrative response was empty.",
source="llm",
retriable=True,
details={"service_id": SERVICE_ID},
http_status=502,
)
try:
parsed = json.loads(cleaned)
except (json.JSONDecodeError, ValueError) as exc:
raise RAGServiceError(
error_code="invalid_json",
message="Yield harvest narrative response was not valid JSON.",
source="llm",
retriable=True,
details={"service_id": SERVICE_ID},
http_status=502,
) from exc
if not isinstance(parsed, dict):
raise RAGServiceError(
error_code="invalid_schema",
message="Yield harvest narrative response root must be a JSON object.",
source="llm",
details={"service_id": SERVICE_ID},
http_status=502,
)
return parsed