from __future__ import annotations from typing import Any from django.db.models import Avg, QuerySet from .models import ( AnalysisGridObservation, RemoteSensingRun, RemoteSensingSubdivisionResult, SoilLocation, ) SATELLITE_METRIC_FIELDS = ( "ndvi", "ndwi", "soil_vv_db", "dem_m", "slope_deg", ) def build_location_satellite_snapshot( location: SoilLocation, *, block_code: str = "", sensor_payload: dict[str, Any] | None = None, ) -> dict[str, Any]: run = get_latest_completed_remote_sensing_run(location, block_code=block_code) sensor_summary = build_block_sensor_summary( location, block_code=block_code, sensor_payload=sensor_payload, ) if run is None: resolved_metrics = dict(sensor_summary["resolved_metrics"]) return { "status": "completed" if resolved_metrics else "missing", "block_code": block_code, "run_id": None, "temporal_extent": None, "cell_count": 0, "sub_block_count": int(sensor_summary["sub_block_count"]), "aggregation_strategy": "sub_block_mean" if sensor_summary["sub_block_count"] else "missing", "satellite_metrics": {}, "sensor_metrics": sensor_summary["resolved_metrics"], "sensor_metric_sources": sensor_summary["metric_sources"], "sensor_sub_blocks": sensor_summary["sub_blocks"], "satellite_sub_blocks": [], "resolved_metrics": resolved_metrics, "metric_sources": dict(sensor_summary["metric_sources"]), } observations = get_run_observations(run) subdivision_result = get_latest_subdivision_result(location, block_code=block_code, run=run) satellite_summary = summarize_block_satellite_metrics( run=run, observations=observations, subdivision_result=subdivision_result, ) resolved_metrics = dict(satellite_summary["resolved_metrics"]) metric_sources = dict(satellite_summary["metric_sources"]) for metric_name, metric_value in sensor_summary["resolved_metrics"].items(): resolved_metrics[metric_name] = metric_value metric_sources[metric_name] = sensor_summary["metric_sources"].get(metric_name, {}) return { "status": "completed", "block_code": run.block_code, "run_id": run.id, "temporal_extent": { "start_date": run.temporal_start.isoformat() if run.temporal_start else None, "end_date": run.temporal_end.isoformat() if run.temporal_end else None, }, "cell_count": observations.count(), "sub_block_count": int(max(satellite_summary["sub_block_count"], sensor_summary["sub_block_count"])), "aggregation_strategy": satellite_summary["aggregation_strategy"], "satellite_metrics": satellite_summary["resolved_metrics"], "sensor_metrics": sensor_summary["resolved_metrics"], "sensor_metric_sources": sensor_summary["metric_sources"], "sensor_sub_blocks": sensor_summary["sub_blocks"], "satellite_sub_blocks": satellite_summary["sub_blocks"], "resolved_metrics": resolved_metrics, "metric_sources": metric_sources, } def build_location_block_satellite_snapshots( location: SoilLocation, *, sensor_payload: dict[str, Any] | None = None, ) -> list[dict[str, Any]]: block_layout = location.block_layout or {} blocks = block_layout.get("blocks") or [] if not blocks: return [build_location_satellite_snapshot(location, sensor_payload=sensor_payload)] snapshots = [] for block in blocks: snapshots.append( build_location_satellite_snapshot( location, block_code=str(block.get("block_code") or "").strip(), sensor_payload=sensor_payload, ) ) return snapshots def build_block_layout_metric_summary( location: SoilLocation, *, sensor_payload: dict[str, Any] | None = None, ) -> dict[str, Any]: layout = dict(location.block_layout or {}) blocks = [dict(block) for block in (layout.get("blocks") or [])] snapshots_by_block_code = { str(snapshot.get("block_code") or ""): snapshot for snapshot in build_location_block_satellite_snapshots( location, sensor_payload=sensor_payload, ) } for block in blocks: snapshot = snapshots_by_block_code.get(str(block.get("block_code") or "").strip(), {}) block["aggregated_metrics"] = { "resolved_metrics": snapshot.get("resolved_metrics", {}), "metric_sources": snapshot.get("metric_sources", {}), "satellite_metrics": snapshot.get("satellite_metrics", {}), "sensor_metrics": snapshot.get("sensor_metrics", {}), "sub_block_count": snapshot.get("sub_block_count", 0), "satellite_sub_blocks": snapshot.get("satellite_sub_blocks", []), "sensor_sub_blocks": snapshot.get("sensor_sub_blocks", []), } layout["blocks"] = blocks return layout def build_farmer_block_aggregated_snapshot( location: SoilLocation, *, sensor_payload: dict[str, Any] | None = None, ) -> dict[str, Any]: block_snapshots = build_location_block_satellite_snapshots( location, sensor_payload=sensor_payload, ) usable_snapshots = [ snapshot for snapshot in block_snapshots if isinstance(snapshot.get("resolved_metrics"), dict) and snapshot.get("resolved_metrics") ] if not usable_snapshots: fallback_snapshot = build_location_satellite_snapshot( location, sensor_payload=sensor_payload, ) return { "status": fallback_snapshot.get("status", "missing"), "aggregation_strategy": "farmer_block_mean" if fallback_snapshot.get("resolved_metrics") else "missing", "block_count": len(block_snapshots), "resolved_metrics": dict(fallback_snapshot.get("resolved_metrics") or {}), "metric_sources": dict(fallback_snapshot.get("metric_sources") or {}), "blocks": block_snapshots, } resolved_metrics = average_metric_maps( [snapshot.get("resolved_metrics") or {} for snapshot in usable_snapshots] ) metric_sources = { metric_name: { "type": "farmer_block", "strategy": "average_of_main_blocks", "block_count": len( [ snapshot for snapshot in usable_snapshots if metric_name in (snapshot.get("resolved_metrics") or {}) ] ), } for metric_name in resolved_metrics } return { "status": "completed", "aggregation_strategy": "farmer_block_mean", "block_count": len(usable_snapshots), "resolved_metrics": resolved_metrics, "metric_sources": metric_sources, "blocks": block_snapshots, } def get_latest_completed_remote_sensing_run( location: SoilLocation, *, block_code: str = "", ) -> RemoteSensingRun | None: return ( RemoteSensingRun.objects.filter( soil_location=location, block_code=block_code or "", status=RemoteSensingRun.STATUS_SUCCESS, ) .order_by("-temporal_end", "-created_at", "-id") .first() ) def get_run_observations(run: RemoteSensingRun) -> QuerySet[AnalysisGridObservation]: return ( AnalysisGridObservation.objects.select_related("cell", "run") .filter( cell__soil_location=run.soil_location, cell__block_code=run.block_code or "", temporal_start=run.temporal_start, temporal_end=run.temporal_end, ) .order_by("cell__cell_code") ) def get_latest_subdivision_result( location: SoilLocation, *, block_code: str = "", run: RemoteSensingRun | None = None, ) -> RemoteSensingSubdivisionResult | None: queryset = ( RemoteSensingSubdivisionResult.objects.filter( soil_location=location, block_code=block_code or "", ) .select_related("run") .prefetch_related("cluster_blocks", "assignments__cell") .order_by("-temporal_end", "-created_at", "-id") ) if run is not None: queryset = queryset.filter(run=run) return queryset.first() def summarize_observations( observations: QuerySet[AnalysisGridObservation], ) -> dict[str, float]: aggregates = observations.aggregate( **{ f"{metric_name}_mean": Avg(metric_name) for metric_name in SATELLITE_METRIC_FIELDS } ) summary: dict[str, float] = {} for metric_name in SATELLITE_METRIC_FIELDS: value = aggregates.get(f"{metric_name}_mean") if value is None: continue summary[metric_name] = round(float(value), 6) return summary def summarize_block_satellite_metrics( *, run: RemoteSensingRun, observations: QuerySet[AnalysisGridObservation], subdivision_result: RemoteSensingSubdivisionResult | None, ) -> dict[str, Any]: _ = run if subdivision_result is None or not subdivision_result.cluster_blocks.exists(): resolved_metrics = summarize_observations(observations) return { "resolved_metrics": resolved_metrics, "metric_sources": { metric_name: { "type": "remote_sensing", "strategy": "cell_mean", "sub_block_count": 0, } for metric_name in resolved_metrics }, "sub_blocks": [], "sub_block_count": 0, "aggregation_strategy": "cell_mean", } observation_by_cell_id = { observation.cell_id: observation for observation in observations } assignments_by_label: dict[int, list[int]] = {} for assignment in subdivision_result.assignments.all(): assignments_by_label.setdefault(int(assignment.cluster_label), []).append(int(assignment.cell_id)) sub_block_snapshots: list[dict[str, Any]] = [] for cluster_block in subdivision_result.cluster_blocks.all().order_by("cluster_label", "id"): relevant_observations = [ observation_by_cell_id[cell_id] for cell_id in assignments_by_label.get(int(cluster_block.cluster_label), []) if cell_id in observation_by_cell_id ] metric_map = summarize_observation_list(relevant_observations) sub_block_snapshots.append( { "cluster_uuid": str(cluster_block.uuid), "sub_block_code": cluster_block.sub_block_code, "cluster_label": int(cluster_block.cluster_label), "cell_count": len(relevant_observations), "resolved_metrics": metric_map, } ) resolved_metrics = average_metric_maps( [sub_block_snapshot["resolved_metrics"] for sub_block_snapshot in sub_block_snapshots] ) return { "resolved_metrics": resolved_metrics, "metric_sources": { metric_name: { "type": "remote_sensing", "strategy": "sub_block_mean_average", "sub_block_count": len( [ sub_block_snapshot for sub_block_snapshot in sub_block_snapshots if metric_name in sub_block_snapshot["resolved_metrics"] ] ), } for metric_name in resolved_metrics }, "sub_blocks": sub_block_snapshots, "sub_block_count": len(sub_block_snapshots), "aggregation_strategy": "sub_block_mean", } def summarize_observation_list( observations: list[AnalysisGridObservation], ) -> dict[str, float]: metric_lists: dict[str, list[float]] = { metric_name: [] for metric_name in SATELLITE_METRIC_FIELDS } for observation in observations: for metric_name in SATELLITE_METRIC_FIELDS: numeric_value = _coerce_numeric(getattr(observation, metric_name, None)) if numeric_value is not None: metric_lists[metric_name].append(numeric_value) summary: dict[str, float] = {} for metric_name, values in metric_lists.items(): if not values: continue summary[metric_name] = round(sum(values) / len(values), 6) return summary def average_metric_maps(metric_maps: list[dict[str, Any]]) -> dict[str, float]: values_by_metric: dict[str, list[float]] = {} for metric_map in metric_maps: for metric_name, metric_value in metric_map.items(): numeric_value = _coerce_numeric(metric_value) if numeric_value is None: continue values_by_metric.setdefault(metric_name, []).append(numeric_value) return { metric_name: round(sum(values) / len(values), 6) for metric_name, values in values_by_metric.items() if values } def build_block_sensor_summary( location: SoilLocation, *, block_code: str, sensor_payload: dict[str, Any] | None, ) -> dict[str, Any]: if not isinstance(sensor_payload, dict): return { "resolved_metrics": {}, "metric_sources": {}, "sub_blocks": [], "sub_block_count": 0, } active_lookup = _build_active_sub_block_lookup(location) sensors_by_sub_block: dict[str, dict[str, Any]] = {} for sensor_key, sensor_values in sorted(sensor_payload.items()): if not isinstance(sensor_values, dict): continue resolved_assignment = _resolve_sensor_sub_block_assignment( sensor_values=sensor_values, active_lookup=active_lookup, ) if resolved_assignment is None or resolved_assignment["block_code"] != (block_code or ""): continue sub_block_identifier = str( resolved_assignment.get("cluster_uuid") or resolved_assignment.get("sub_block_code") or f"cluster-{resolved_assignment.get('cluster_label')}" ) sub_block_entry = sensors_by_sub_block.setdefault( sub_block_identifier, { "cluster_uuid": resolved_assignment.get("cluster_uuid"), "sub_block_code": resolved_assignment.get("sub_block_code"), "cluster_label": resolved_assignment.get("cluster_label"), "sensor_keys": [], "readings_by_metric": {}, }, ) sub_block_entry["sensor_keys"].append(sensor_key) for metric_name, metric_value in _extract_sensor_metric_values(sensor_values).items(): sub_block_entry["readings_by_metric"].setdefault(metric_name, []).append((sensor_key, metric_value)) sub_block_snapshots: list[dict[str, Any]] = [] for sub_block_identifier, sub_block_entry in sorted(sensors_by_sub_block.items()): resolved_metrics: dict[str, Any] = {} metric_sources: dict[str, Any] = {} for metric_name, readings in sub_block_entry["readings_by_metric"].items(): resolved_value, source = _resolve_metric_readings(readings) resolved_metrics[metric_name] = resolved_value metric_sources[metric_name] = source sub_block_snapshots.append( { "id": sub_block_identifier, "cluster_uuid": sub_block_entry.get("cluster_uuid"), "sub_block_code": sub_block_entry.get("sub_block_code"), "cluster_label": sub_block_entry.get("cluster_label"), "sensor_keys": sub_block_entry["sensor_keys"], "resolved_metrics": resolved_metrics, "metric_sources": metric_sources, } ) resolved_metrics = average_metric_maps( [sub_block_snapshot["resolved_metrics"] for sub_block_snapshot in sub_block_snapshots] ) metric_sources = { metric_name: { "type": "sensor", "strategy": "sub_block_mean_average", "sub_block_count": len( [ sub_block_snapshot for sub_block_snapshot in sub_block_snapshots if metric_name in sub_block_snapshot["resolved_metrics"] ] ), } for metric_name in resolved_metrics } return { "resolved_metrics": resolved_metrics, "metric_sources": metric_sources, "sub_blocks": sub_block_snapshots, "sub_block_count": len(sub_block_snapshots), } def _build_active_sub_block_lookup(location: SoilLocation) -> dict[str, Any]: block_layout = dict(location.block_layout or {}) by_cluster_uuid: dict[str, dict[str, Any]] = {} by_sub_block_code: dict[str, list[dict[str, Any]]] = {} by_block_and_cluster_label: dict[tuple[str, int], dict[str, Any]] = {} for block in block_layout.get("blocks") or []: block_code = str(block.get("block_code") or "").strip() for sub_block in block.get("sub_blocks") or []: record = { "block_code": block_code, "cluster_uuid": str(sub_block.get("cluster_uuid") or "").strip(), "sub_block_code": str(sub_block.get("sub_block_code") or "").strip(), "cluster_label": _coerce_int(sub_block.get("cluster_label")), } if record["cluster_uuid"]: by_cluster_uuid[record["cluster_uuid"]] = record if record["sub_block_code"]: by_sub_block_code.setdefault(record["sub_block_code"], []).append(record) if record["cluster_label"] is not None: by_block_and_cluster_label[(block_code, int(record["cluster_label"]))] = record return { "by_cluster_uuid": by_cluster_uuid, "by_sub_block_code": by_sub_block_code, "by_block_and_cluster_label": by_block_and_cluster_label, } def _resolve_sensor_sub_block_assignment( *, sensor_values: dict[str, Any], active_lookup: dict[str, Any], ) -> dict[str, Any] | None: assignment_payloads = [ sensor_values, sensor_values.get("assignment"), sensor_values.get("sub_block"), sensor_values.get("metadata"), ] candidate: dict[str, Any] = { "block_code": "", "cluster_uuid": "", "sub_block_code": "", "cluster_label": None, } for payload in assignment_payloads: if not isinstance(payload, dict): continue if not candidate["block_code"]: candidate["block_code"] = str(payload.get("block_code") or "").strip() if not candidate["cluster_uuid"]: candidate["cluster_uuid"] = str(payload.get("cluster_uuid") or "").strip() if not candidate["sub_block_code"]: candidate["sub_block_code"] = str(payload.get("sub_block_code") or "").strip() if candidate["cluster_label"] is None: candidate["cluster_label"] = _coerce_int(payload.get("cluster_label")) if candidate["cluster_uuid"]: resolved = active_lookup["by_cluster_uuid"].get(candidate["cluster_uuid"]) if resolved is not None: return resolved if candidate["block_code"] and candidate["cluster_label"] is not None: resolved = active_lookup["by_block_and_cluster_label"].get( (candidate["block_code"], int(candidate["cluster_label"])) ) if resolved is not None: return resolved if candidate["sub_block_code"]: matches = active_lookup["by_sub_block_code"].get(candidate["sub_block_code"], []) if candidate["block_code"]: for match in matches: if match["block_code"] == candidate["block_code"]: return match if len(matches) == 1: return matches[0] if candidate["block_code"] and candidate["cluster_label"] is not None: return { "block_code": candidate["block_code"], "cluster_uuid": candidate["cluster_uuid"], "sub_block_code": candidate["sub_block_code"], "cluster_label": candidate["cluster_label"], } return None def _extract_sensor_metric_values(sensor_values: dict[str, Any]) -> dict[str, Any]: ignored_keys = { "assignment", "metadata", "sub_block", "cluster_uuid", "sub_block_code", "cluster_label", "block_code", } metric_values: dict[str, Any] = {} for key, value in sensor_values.items(): if key in ignored_keys or isinstance(value, dict): continue metric_values[str(key)] = value return metric_values def _resolve_metric_readings(readings: list[tuple[str, object]]) -> tuple[object, dict[str, object]]: if not readings: return None, {"type": "sensor", "strategy": "empty", "sensor_keys": []} sensor_keys = [sensor_key for sensor_key, _value in readings] distinct_values: list[object] = [] for _sensor_key, value in readings: if value not in distinct_values: distinct_values.append(value) if len(distinct_values) == 1: return distinct_values[0], { "type": "sensor", "strategy": "single_value", "sensor_keys": sensor_keys, "sensor_count": len(sensor_keys), } numeric_values = [_coerce_numeric(value) for value in distinct_values] if all(value is not None for value in numeric_values): average = sum(numeric_values) / len(numeric_values) return round(float(average), 6), { "type": "sensor", "strategy": "average", "sensor_keys": sensor_keys, "sensor_count": len(sensor_keys), "conflict": True, "distinct_values": distinct_values, } return distinct_values, { "type": "sensor", "strategy": "distinct_values", "sensor_keys": sensor_keys, "sensor_count": len(sensor_keys), "conflict": True, "distinct_values": distinct_values, } def _coerce_numeric(value: Any) -> float | None: if isinstance(value, bool): return None try: return float(value) except (TypeError, ValueError): return None def _coerce_int(value: Any) -> int | None: try: if value is None or value == "": return None return int(value) except (TypeError, ValueError): return None