228 lines
9.1 KiB
Python
228 lines
9.1 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
import logging
|
|
from typing import Any
|
|
|
|
from pydantic import BaseModel, Field, ValidationError
|
|
|
|
from rag.api_provider import get_chat_client
|
|
from rag.chat import (
|
|
_complete_audit_log,
|
|
_create_audit_log,
|
|
_fail_audit_log,
|
|
_load_service_tone,
|
|
)
|
|
from rag.config import RAGConfig, get_service_config, load_rag_config
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
SERVICE_ID = "yield_harvest"
|
|
|
|
YIELD_HARVEST_PROMPT = (
|
|
"You are an expert agronomist writing concise dashboard narratives for farmers. "
|
|
"Return only valid JSON matching this schema exactly: "
|
|
"{"
|
|
'"season_highlights_subtitle": string, '
|
|
'"yield_prediction_explanation": string, '
|
|
'"harvest_readiness_summary": string, '
|
|
'"operation_notes": [string, ...]'
|
|
"}. "
|
|
"Do not add markdown, explanations, or extra keys. "
|
|
"Strict Golden Rule: do not invent numbers, dates, prices, revenues, percentages, KPIs, scores, or measurements. "
|
|
"Use only values already present in the deterministic context. "
|
|
"If a fact is missing from the context, say less rather than guessing."
|
|
)
|
|
|
|
|
|
class YieldHarvestNarrativeSchema(BaseModel):
|
|
season_highlights_subtitle: str
|
|
yield_prediction_explanation: str
|
|
harvest_readiness_summary: str
|
|
operation_notes: list[str] = Field(default_factory=list)
|
|
|
|
|
|
class YieldHarvestRAGService:
|
|
def generate_narrative(
|
|
self,
|
|
deterministic_context: dict[str, Any],
|
|
) -> dict[str, Any]:
|
|
cfg = load_rag_config()
|
|
service, client, model = self._build_service_client(cfg)
|
|
structured_context = self._build_structured_context(
|
|
deterministic_context=deterministic_context,
|
|
)
|
|
user_prompt = (
|
|
"Generate short user-friendly narrative fields for the Yield & Harvest Summary dashboard "
|
|
"using only the deterministic context. Keep the language practical and agronomy-focused."
|
|
)
|
|
system_prompt, messages = self._build_messages(
|
|
service=service,
|
|
cfg=cfg,
|
|
structured_context=structured_context,
|
|
query=user_prompt,
|
|
)
|
|
|
|
farm_uuid = str(deterministic_context.get("farm_uuid") or "")
|
|
audit_log = None
|
|
if farm_uuid:
|
|
try:
|
|
audit_log = _create_audit_log(
|
|
farm_uuid=farm_uuid,
|
|
service_id=SERVICE_ID,
|
|
model=model,
|
|
query=user_prompt,
|
|
system_prompt=system_prompt,
|
|
messages=messages,
|
|
)
|
|
except Exception as exc:
|
|
logger.warning("Yield harvest audit log creation failed for %s: %s", farm_uuid, exc)
|
|
|
|
try:
|
|
response = client.chat.completions.create(
|
|
model=model,
|
|
messages=messages,
|
|
response_format={"type": "json_object"},
|
|
)
|
|
raw = (response.choices[0].message.content or "").strip()
|
|
parsed = self._clean_json(raw)
|
|
validated = YieldHarvestNarrativeSchema.model_validate(parsed)
|
|
if audit_log is not None:
|
|
_complete_audit_log(audit_log, raw)
|
|
return {
|
|
"season_highlights_subtitle": validated.season_highlights_subtitle,
|
|
"yield_prediction_explanation": validated.yield_prediction_explanation,
|
|
"harvest_readiness_summary": validated.harvest_readiness_summary,
|
|
"operation_notes": validated.operation_notes,
|
|
}
|
|
except (ValidationError, ValueError, KeyError, IndexError) as exc:
|
|
logger.warning("Yield harvest narrative parsing failed for farm_uuid=%s: %s", farm_uuid, exc)
|
|
if audit_log is not None:
|
|
_fail_audit_log(audit_log, str(exc))
|
|
return {}
|
|
except Exception as exc:
|
|
logger.error("Yield harvest narrative LLM call failed for farm_uuid=%s: %s", farm_uuid, exc)
|
|
if audit_log is not None:
|
|
_fail_audit_log(audit_log, str(exc))
|
|
return {}
|
|
|
|
def _build_service_client(self, cfg: RAGConfig):
|
|
service = get_service_config(SERVICE_ID, cfg)
|
|
service_cfg = RAGConfig(
|
|
embedding=cfg.embedding,
|
|
qdrant=cfg.qdrant,
|
|
chunking=cfg.chunking,
|
|
llm=service.llm,
|
|
knowledge_bases=cfg.knowledge_bases,
|
|
services=cfg.services,
|
|
chromadb=cfg.chromadb,
|
|
)
|
|
client = get_chat_client(service_cfg)
|
|
return service, client, service.llm.model
|
|
|
|
def _build_messages(
|
|
self,
|
|
*,
|
|
service: Any,
|
|
cfg: RAGConfig,
|
|
structured_context: dict[str, Any],
|
|
query: str,
|
|
) -> tuple[str, list[dict[str, str]]]:
|
|
tone = _load_service_tone(service, cfg)
|
|
system_parts = [tone] if tone else []
|
|
if service.system_prompt:
|
|
system_parts.append(service.system_prompt)
|
|
system_parts.append(YIELD_HARVEST_PROMPT)
|
|
system_parts.append(
|
|
"[deterministic_context]\n"
|
|
+ json.dumps(structured_context, ensure_ascii=False, indent=2, default=str)
|
|
)
|
|
system_prompt = "\n\n".join(part for part in system_parts if part)
|
|
messages = [
|
|
{"role": "system", "content": system_prompt},
|
|
{"role": "user", "content": query},
|
|
]
|
|
return system_prompt, messages
|
|
|
|
def _build_structured_context(
|
|
self,
|
|
*,
|
|
deterministic_context: dict[str, Any],
|
|
) -> dict[str, Any]:
|
|
season = deterministic_context.get("season_highlights_card") or {}
|
|
harvest = deterministic_context.get("harvest_prediction_card") or {}
|
|
operations = deterministic_context.get("harvest_operations_card") or {}
|
|
yield_prediction = deterministic_context.get("yield_prediction") or {}
|
|
readiness = deterministic_context.get("harvest_readiness_zones") or {}
|
|
|
|
operation_steps = []
|
|
for step in operations.get("steps") or []:
|
|
if not isinstance(step, dict):
|
|
continue
|
|
operation_steps.append(
|
|
{
|
|
"key": step.get("key"),
|
|
"title": step.get("title"),
|
|
"status": step.get("status"),
|
|
}
|
|
)
|
|
|
|
return {
|
|
"farm_context": deterministic_context.get("farm_context") or {},
|
|
"yield_prediction": {
|
|
"predicted_yield_tons": yield_prediction.get("predicted_yield_tons"),
|
|
"unit": yield_prediction.get("unit"),
|
|
"simulation_warning": yield_prediction.get("simulation_warning"),
|
|
"supporting_metrics": yield_prediction.get("supporting_metrics"),
|
|
},
|
|
"season_highlights_card": {
|
|
"title": season.get("title"),
|
|
"subtitle": season.get("subtitle"),
|
|
"total_predicted_yield": season.get("total_predicted_yield"),
|
|
"yield_unit": season.get("yield_unit"),
|
|
"target_harvest_date": season.get("target_harvest_date"),
|
|
"days_until_harvest": season.get("days_until_harvest"),
|
|
"average_readiness": season.get("average_readiness"),
|
|
"primary_quality_grade": season.get("primary_quality_grade"),
|
|
"estimated_revenue": season.get("estimated_revenue"),
|
|
},
|
|
"harvest_prediction_card": {
|
|
"harvest_date": harvest.get("harvest_date"),
|
|
"harvest_date_formatted": harvest.get("harvest_date_formatted"),
|
|
"days_until": harvest.get("days_until"),
|
|
"optimal_window_start": harvest.get("optimal_window_start"),
|
|
"optimal_window_end": harvest.get("optimal_window_end"),
|
|
"description": harvest.get("description"),
|
|
},
|
|
"harvest_readiness_zones": {
|
|
"average_readiness": readiness.get("averageReadiness"),
|
|
"mean_ndvi": readiness.get("meanNdvi"),
|
|
"ndvi_trend": readiness.get("ndviTrend"),
|
|
"zones": readiness.get("zones"),
|
|
},
|
|
"harvest_operations_card": {
|
|
"stage_label": operations.get("stage_label"),
|
|
"days_until_harvest": operations.get("days_until_harvest"),
|
|
"current_dvs": operations.get("current_dvs"),
|
|
"summary": operations.get("summary"),
|
|
"steps": operation_steps,
|
|
},
|
|
}
|
|
|
|
def _clean_json(self, raw: str) -> dict[str, Any]:
|
|
cleaned = (raw or "").strip()
|
|
if cleaned.startswith("```"):
|
|
cleaned = cleaned.strip("`")
|
|
if cleaned.startswith("json"):
|
|
cleaned = cleaned[4:]
|
|
cleaned = cleaned.strip()
|
|
if not cleaned:
|
|
raise ValueError("Yield harvest narrative response was empty.")
|
|
try:
|
|
parsed = json.loads(cleaned)
|
|
except (json.JSONDecodeError, ValueError) as exc:
|
|
raise ValueError("Yield harvest narrative response was not valid JSON.") from exc
|
|
if not isinstance(parsed, dict):
|
|
raise ValueError("Yield harvest narrative response root must be a JSON object.")
|
|
return parsed
|