Files
Ai/location_data/data_driven_subdivision.py
T

2153 lines
83 KiB
Python
Raw Normal View History

2026-05-09 16:55:06 +03:30
from __future__ import annotations
2026-05-11 00:36:02 +03:30
from io import BytesIO
import math
import os
from pathlib import Path
2026-05-09 16:55:06 +03:30
from dataclasses import dataclass
2026-05-11 04:38:44 +03:30
from decimal import Decimal
2026-05-10 22:49:07 +03:30
import json
import logging
2026-05-09 16:55:06 +03:30
from typing import Any
2026-05-11 00:36:02 +03:30
from django.conf import settings
from django.core.files.base import ContentFile
2026-05-09 16:55:06 +03:30
from django.db import transaction
2026-05-11 04:38:44 +03:30
from .block_subdivision import detect_elbow_point, point_in_polygon, render_elbow_plot
2026-05-09 16:55:06 +03:30
from .models import (
2026-05-13 16:45:54 +03:30
build_default_sub_block,
ensure_block_layout_defaults,
2026-05-09 16:55:06 +03:30
AnalysisGridObservation,
BlockSubdivision,
2026-05-11 04:38:44 +03:30
RemoteSensingClusterBlock,
2026-05-09 16:55:06 +03:30
RemoteSensingClusterAssignment,
RemoteSensingRun,
RemoteSensingSubdivisionResult,
2026-05-11 04:38:44 +03:30
RemoteSensingSubdivisionOption,
RemoteSensingSubdivisionOptionAssignment,
RemoteSensingSubdivisionOptionBlock,
2026-05-09 16:55:06 +03:30
SoilLocation,
)
DEFAULT_CLUSTER_FEATURES = [
"ndvi",
"ndwi",
"soil_vv_db",
]
SUPPORTED_CLUSTER_FEATURES = tuple(DEFAULT_CLUSTER_FEATURES)
DEFAULT_RANDOM_STATE = 42
DEFAULT_MAX_K = 10
2026-05-11 00:36:02 +03:30
DEFAULT_REMOTE_SENSING_DIAGNOSTIC_DIR = "artifacts/remote_sensing_charts"
2026-05-09 16:55:06 +03:30
2026-05-10 22:49:07 +03:30
logger = logging.getLogger(__name__)
2026-05-09 16:55:06 +03:30
class DataDrivenSubdivisionError(Exception):
"""Raised when remote-sensing-driven subdivision can not be computed."""
2026-05-10 22:49:07 +03:30
class EmptyObservationDatasetError(DataDrivenSubdivisionError):
"""Raised when upstream persistence completes without usable clustering features."""
2026-05-09 16:55:06 +03:30
@dataclass
class ClusteringDataset:
observations: list[AnalysisGridObservation]
selected_features: list[str]
raw_feature_rows: list[list[float | None]]
raw_feature_maps: list[dict[str, float | None]]
skipped_cell_codes: list[str]
used_cell_codes: list[str]
imputed_matrix: list[list[float]]
scaled_matrix: list[list[float]]
imputer_statistics: dict[str, float | None]
scaler_means: dict[str, float]
scaler_scales: dict[str, float]
missing_value_counts: dict[str, int]
skipped_reasons: dict[str, list[str]]
2026-05-11 04:38:44 +03:30
@dataclass
class SubdivisionOptionPayload:
requested_k: int
effective_cluster_count: int
labels: list[int]
cluster_summaries: list[dict[str, Any]]
spatial_constraint_metadata: dict[str, Any]
assignment_rows: list[dict[str, Any]]
cluster_block_rows: list[dict[str, Any]]
2026-05-09 16:55:06 +03:30
def create_remote_sensing_subdivision_result(
*,
location: SoilLocation,
run: RemoteSensingRun,
observations: list[AnalysisGridObservation],
block_subdivision: BlockSubdivision | None = None,
block_code: str = "",
selected_features: list[str] | None = None,
explicit_k: int | None = None,
max_k: int = DEFAULT_MAX_K,
random_state: int = DEFAULT_RANDOM_STATE,
) -> RemoteSensingSubdivisionResult:
"""
Build a data-driven subdivision result from stored remote sensing observations.
KMeans is applied on actual per-cell feature vectors, not geometric points.
"""
dataset = build_clustering_dataset(
observations=observations,
selected_features=selected_features,
2026-05-10 22:49:07 +03:30
run=run,
location=location,
2026-05-09 16:55:06 +03:30
)
if not dataset.observations:
raise DataDrivenSubdivisionError("هیچ observation قابل استفاده‌ای برای خوشه‌بندی باقی نماند.")
optimal_k, inertia_curve = choose_cluster_count(
scaled_matrix=dataset.scaled_matrix,
explicit_k=explicit_k,
max_k=max_k,
random_state=random_state,
)
cluster_selection_strategy = "explicit_k" if explicit_k is not None else "elbow"
2026-05-11 04:38:44 +03:30
option_payloads = build_subdivision_option_payloads(
dataset=dataset,
max_k=max_k,
2026-05-09 16:55:06 +03:30
random_state=random_state,
)
2026-05-11 04:38:44 +03:30
if not option_payloads:
raise DataDrivenSubdivisionError("هیچ گزینه K معتبری برای ذخیره‌سازی subdivision ساخته نشد.")
active_requested_k = min(
int(explicit_k if explicit_k is not None else optimal_k),
max(option_payload.requested_k for option_payload in option_payloads),
)
active_option_payload = next(
(
option_payload
for option_payload in option_payloads
if option_payload.requested_k == active_requested_k
),
option_payloads[-1],
2026-05-09 16:55:06 +03:30
)
2026-05-11 04:38:44 +03:30
effective_cluster_count = active_option_payload.effective_cluster_count
cluster_summaries = active_option_payload.cluster_summaries
2026-05-09 16:55:06 +03:30
with transaction.atomic():
result, _created = RemoteSensingSubdivisionResult.objects.update_or_create(
run=run,
defaults={
"soil_location": location,
"block_subdivision": block_subdivision,
"block_code": block_code,
"chunk_size_sqm": run.chunk_size_sqm,
"temporal_start": run.temporal_start,
"temporal_end": run.temporal_end,
2026-05-11 04:38:44 +03:30
"cluster_count": effective_cluster_count,
2026-05-09 16:55:06 +03:30
"selected_features": dataset.selected_features,
"skipped_cell_codes": dataset.skipped_cell_codes,
"metadata": {
"cell_count": len(observations),
"used_cell_count": len(dataset.observations),
"skipped_cell_count": len(dataset.skipped_cell_codes),
"used_cell_codes": dataset.used_cell_codes,
"skipped_reasons": dataset.skipped_reasons,
"selected_features": dataset.selected_features,
"imputer_strategy": "median",
"imputer_statistics": dataset.imputer_statistics,
"missing_value_counts": dataset.missing_value_counts,
"scaler_means": dataset.scaler_means,
"scaler_scales": dataset.scaler_scales,
"kmeans_params": {
"random_state": random_state,
"explicit_k": explicit_k,
"selected_k": optimal_k,
2026-05-11 04:38:44 +03:30
"recommended_k": optimal_k,
"active_requested_k": active_requested_k,
"effective_k": effective_cluster_count,
2026-05-09 16:55:06 +03:30
"max_k": max_k,
"n_init": 10,
"selection_strategy": cluster_selection_strategy,
},
2026-05-11 04:38:44 +03:30
"recommended_requested_k": optimal_k,
"active_requested_k": active_requested_k,
"available_k_options": [
{
"requested_k": option_payload.requested_k,
"effective_cluster_count": option_payload.effective_cluster_count,
}
for option_payload in option_payloads
],
"spatial_constraint": active_option_payload.spatial_constraint_metadata,
2026-05-09 16:55:06 +03:30
"inertia_curve": inertia_curve,
"cluster_summaries": cluster_summaries,
},
},
)
result.assignments.all().delete()
2026-05-11 04:38:44 +03:30
result.cluster_blocks.all().delete()
result.options.all().delete()
option_objects = persist_subdivision_options(
result=result,
location=location,
block_subdivision=block_subdivision,
option_payloads=option_payloads,
recommended_requested_k=optimal_k,
active_requested_k=active_requested_k,
chunk_size_sqm=run.chunk_size_sqm,
)
persist_subdivision_option_artifacts(
result=result,
option_payloads=option_payloads,
option_objects=option_objects,
observations=dataset.observations,
selected_features=dataset.selected_features,
scaled_matrix=dataset.scaled_matrix,
inertia_curve=inertia_curve,
)
active_option = option_objects[active_requested_k]
activate_subdivision_option(
option=active_option,
selection_source="system",
recommended_requested_k=optimal_k,
)
result.refresh_from_db()
2026-05-11 00:36:02 +03:30
diagnostic_artifacts = _persist_remote_sensing_diagnostic_artifacts(
result=result,
observations=dataset.observations,
2026-05-11 04:38:44 +03:30
labels=active_option_payload.labels,
cluster_summaries=active_option_payload.cluster_summaries,
2026-05-11 00:36:02 +03:30
selected_features=dataset.selected_features,
scaled_matrix=dataset.scaled_matrix,
inertia_curve=inertia_curve,
)
if diagnostic_artifacts:
metadata = dict(result.metadata or {})
metadata["diagnostic_artifacts"] = diagnostic_artifacts
result.metadata = metadata
result.save(update_fields=["metadata", "updated_at"])
2026-05-09 16:55:06 +03:30
return result
def build_clustering_dataset(
*,
observations: list[AnalysisGridObservation],
selected_features: list[str] | None = None,
2026-05-10 22:49:07 +03:30
run: RemoteSensingRun | None = None,
location: SoilLocation | None = None,
2026-05-09 16:55:06 +03:30
) -> ClusteringDataset:
selected_features = list(selected_features or DEFAULT_CLUSTER_FEATURES)
invalid_features = [
feature_name
for feature_name in selected_features
if feature_name not in SUPPORTED_CLUSTER_FEATURES
]
if invalid_features:
raise DataDrivenSubdivisionError(
"ویژگی‌های نامعتبر برای خوشه‌بندی: "
+ ", ".join(sorted(invalid_features))
)
2026-05-10 22:49:07 +03:30
log_context = _build_clustering_log_context(
observations=observations,
selected_features=selected_features,
run=run,
location=location,
)
logger.info(
"Preparing clustering dataset: %s",
_serialize_log_payload(
{
**log_context,
"total_observations": len(observations),
"non_null_counts": _count_non_null_features(observations),
}
),
)
2026-05-09 16:55:06 +03:30
raw_rows: list[list[float | None]] = []
raw_maps: list[dict[str, float | None]] = []
usable_observations: list[AnalysisGridObservation] = []
skipped_cell_codes: list[str] = []
used_cell_codes: list[str] = []
missing_value_counts = {feature_name: 0 for feature_name in selected_features}
skipped_reasons = {"all_features_missing": []}
for observation in observations:
feature_map = {
feature_name: _coerce_float(getattr(observation, feature_name, None))
for feature_name in selected_features
}
for feature_name, value in feature_map.items():
if value is None:
missing_value_counts[feature_name] += 1
if all(value is None for value in feature_map.values()):
2026-05-10 22:49:07 +03:30
logger.debug(
"Skipping observation cell=%s: all clustering features are null | context=%s",
observation.cell.cell_code,
_serialize_log_payload(log_context),
)
2026-05-09 16:55:06 +03:30
skipped_cell_codes.append(observation.cell.cell_code)
skipped_reasons["all_features_missing"].append(observation.cell.cell_code)
continue
usable_observations.append(observation)
used_cell_codes.append(observation.cell.cell_code)
raw_maps.append(feature_map)
raw_rows.append([feature_map[feature_name] for feature_name in selected_features])
2026-05-10 22:49:07 +03:30
logger.info(
"Clustering dataset filtered observations: %s",
_serialize_log_payload(
{
**log_context,
"remaining_observations": len(usable_observations),
"removed_observations": len(observations) - len(usable_observations),
}
)
)
zero_usable_feature_names = [
feature_name for feature_name, missing_count in missing_value_counts.items() if missing_count == len(observations)
]
if zero_usable_feature_names and len(zero_usable_feature_names) < len(selected_features):
for feature_name in zero_usable_feature_names:
logger.warning(
"Feature %s has zero usable values in dataset | context=%s",
feature_name,
_serialize_log_payload(log_context),
)
2026-05-09 16:55:06 +03:30
if not usable_observations:
2026-05-10 22:49:07 +03:30
error_context = {
**log_context,
"total_observations": len(observations),
"removed_observations": len(observations),
"null_counts_per_feature": missing_value_counts,
"selected_features": selected_features,
}
logger.error(
"No usable observations available for clustering: %s",
_serialize_log_payload(error_context),
)
raise EmptyObservationDatasetError(
"Upstream processing completed but no usable feature values were persisted."
2026-05-09 16:55:06 +03:30
)
try:
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
except ImportError as exc: # pragma: no cover - runtime dependency guard
raise DataDrivenSubdivisionError(
"scikit-learn و numpy برای خوشه‌بندی داده‌محور لازم هستند."
) from exc
raw_matrix = np.array(raw_rows, dtype=float)
imputer = SimpleImputer(strategy="median")
imputed_matrix = imputer.fit_transform(raw_matrix)
scaler = StandardScaler()
scaled_matrix = scaler.fit_transform(imputed_matrix)
return ClusteringDataset(
observations=usable_observations,
selected_features=selected_features,
raw_feature_rows=raw_rows,
raw_feature_maps=raw_maps,
skipped_cell_codes=skipped_cell_codes,
used_cell_codes=used_cell_codes,
imputed_matrix=imputed_matrix.tolist(),
scaled_matrix=scaled_matrix.tolist(),
imputer_statistics={
feature_name: _coerce_float(imputer.statistics_[index])
for index, feature_name in enumerate(selected_features)
},
scaler_means={
feature_name: float(scaler.mean_[index])
for index, feature_name in enumerate(selected_features)
},
scaler_scales={
feature_name: float(scaler.scale_[index] or 1.0)
for index, feature_name in enumerate(selected_features)
},
missing_value_counts=missing_value_counts,
skipped_reasons=skipped_reasons,
)
def choose_cluster_count(
*,
scaled_matrix: list[list[float]],
explicit_k: int | None,
max_k: int,
random_state: int,
) -> tuple[int, list[dict[str, float]]]:
sample_count = len(scaled_matrix)
if sample_count == 0:
raise DataDrivenSubdivisionError("هیچ نمونه‌ای برای خوشه‌بندی وجود ندارد.")
if sample_count == 1:
return 1, [{"k": 1, "sse": 0.0}]
if explicit_k is not None:
if explicit_k <= 0:
raise DataDrivenSubdivisionError("cluster_count باید بزرگ‌تر از صفر باشد.")
return min(explicit_k, sample_count), []
try:
from sklearn.cluster import KMeans
except ImportError as exc: # pragma: no cover
raise DataDrivenSubdivisionError("scikit-learn برای انتخاب تعداد خوشه لازم است.") from exc
max_allowed_k = min(max_k, sample_count)
inertia_curve = []
for k in range(1, max_allowed_k + 1):
model = KMeans(n_clusters=k, n_init=10, random_state=random_state)
model.fit(scaled_matrix)
inertia_curve.append({"k": k, "sse": round(float(model.inertia_), 6)})
return detect_elbow_point(inertia_curve), inertia_curve
def run_kmeans_labels(
*,
scaled_matrix: list[list[float]],
cluster_count: int,
random_state: int,
) -> list[int]:
if cluster_count <= 0:
raise DataDrivenSubdivisionError("cluster_count باید بزرگ‌تر از صفر باشد.")
if len(scaled_matrix) == 1:
return [0]
try:
from sklearn.cluster import KMeans
except ImportError as exc: # pragma: no cover
raise DataDrivenSubdivisionError("scikit-learn برای اجرای KMeans لازم است.") from exc
model = KMeans(n_clusters=cluster_count, n_init=10, random_state=random_state)
return [int(label) for label in model.fit_predict(scaled_matrix)]
2026-05-11 04:38:44 +03:30
def enforce_spatial_contiguity(
*,
observations: list[AnalysisGridObservation],
labels: list[int],
scaled_matrix: list[list[float]],
) -> tuple[list[int], dict[str, Any]]:
if not observations or len(observations) <= 1:
return labels, {
"applied": False,
"strategy": "shared_edge_component_merge",
"initial_cluster_count": len(set(labels)),
"final_cluster_count": len(set(labels)),
"disconnected_components_merged": 0,
"shared_border_required": True,
}
adjacency_map = _build_shared_border_adjacency(observations)
if not adjacency_map:
return labels, {
"applied": False,
"strategy": "shared_edge_component_merge",
"initial_cluster_count": len(set(labels)),
"final_cluster_count": len(set(labels)),
"disconnected_components_merged": 0,
"shared_border_required": True,
"note": "No shared-border adjacency detected.",
}
working_labels = [int(label) for label in labels]
merged_component_count = 0
max_iterations = max(len(observations), 1)
for _iteration in range(max_iterations):
disconnected_components = _find_disconnected_label_components(
labels=working_labels,
adjacency_map=adjacency_map,
)
if not disconnected_components:
normalized_labels = _normalize_cluster_labels(working_labels)
return normalized_labels, {
"applied": merged_component_count > 0,
"strategy": "shared_edge_component_merge",
"initial_cluster_count": len(set(labels)),
"final_cluster_count": len(set(normalized_labels)),
"disconnected_components_merged": merged_component_count,
"shared_border_required": True,
}
for disconnected_component in disconnected_components:
target_label = _choose_neighbor_label_for_component(
component_indexes=disconnected_component,
labels=working_labels,
adjacency_map=adjacency_map,
scaled_matrix=scaled_matrix,
)
if target_label is None:
continue
for component_index in disconnected_component:
working_labels[component_index] = target_label
merged_component_count += 1
break
else:
raise DataDrivenSubdivisionError(
"نمی‌توان قید اتصال فضایی خوشه‌ها را تضمین کرد؛ بعضی سلول‌ها برای تشکیل بلوکِ دارای مرز مشترک قابل انتساب نبودند."
)
raise DataDrivenSubdivisionError(
"اعمال قید اتصال فضایی خوشه‌ها در تعداد تکرار مجاز همگرا نشد."
)
2026-05-09 16:55:06 +03:30
def build_cluster_summaries(
*,
observations: list[AnalysisGridObservation],
labels: list[int],
) -> list[dict[str, Any]]:
clusters: dict[int, dict[str, Any]] = {}
for observation, label in zip(observations, labels):
cluster = clusters.setdefault(
int(label),
{
"cluster_label": int(label),
2026-05-11 04:38:44 +03:30
"observations": [],
2026-05-09 16:55:06 +03:30
"cell_codes": [],
"centroid_lat_sum": 0.0,
"centroid_lon_sum": 0.0,
"cell_count": 0,
},
)
2026-05-11 04:38:44 +03:30
cluster["observations"].append(observation)
2026-05-09 16:55:06 +03:30
cluster["cell_codes"].append(observation.cell.cell_code)
cluster["centroid_lat_sum"] += float(observation.cell.centroid_lat)
cluster["centroid_lon_sum"] += float(observation.cell.centroid_lon)
cluster["cell_count"] += 1
summaries = []
for cluster_label in sorted(clusters):
cluster = clusters[cluster_label]
cell_count = cluster["cell_count"] or 1
2026-05-11 04:38:44 +03:30
center_payload = _select_cluster_center_observation(
cluster_observations=cluster["observations"],
)
2026-05-09 16:55:06 +03:30
summaries.append(
{
"cluster_label": cluster_label,
"cell_count": cluster["cell_count"],
"centroid_lat": round(cluster["centroid_lat_sum"] / cell_count, 6),
"centroid_lon": round(cluster["centroid_lon_sum"] / cell_count, 6),
"cell_codes": cluster["cell_codes"],
2026-05-11 04:38:44 +03:30
"center_cell_code": center_payload["cell_code"],
"center_cell_lat": center_payload["centroid_lat"],
"center_cell_lon": center_payload["centroid_lon"],
"center_radius": center_payload["radius"],
"center_mean_distance": center_payload["mean_distance"],
2026-05-09 16:55:06 +03:30
}
)
return summaries
2026-05-11 04:38:44 +03:30
def _select_cluster_center_observation(
*,
cluster_observations: list[AnalysisGridObservation],
) -> dict[str, Any]:
if not cluster_observations:
return {
"cell_code": "",
"centroid_lat": None,
"centroid_lon": None,
"radius": 0.0,
"mean_distance": 0.0,
}
candidate_payloads: list[dict[str, Any]] = []
for candidate in cluster_observations:
candidate_lat = float(candidate.cell.centroid_lat)
candidate_lon = float(candidate.cell.centroid_lon)
distances = [
_euclidean_distance(
[candidate_lon, candidate_lat],
[float(member.cell.centroid_lon), float(member.cell.centroid_lat)],
)
for member in cluster_observations
]
radius = max(distances) if distances else 0.0
mean_distance = sum(distances) / len(distances) if distances else 0.0
candidate_payloads.append(
{
"cell_code": candidate.cell.cell_code,
"centroid_lat": round(candidate_lat, 6),
"centroid_lon": round(candidate_lon, 6),
"radius": round(radius, 8),
"mean_distance": round(mean_distance, 8),
}
)
return min(
candidate_payloads,
key=lambda payload: (
float(payload["radius"]),
float(payload["mean_distance"]),
str(payload["cell_code"]),
),
)
def build_subdivision_option_payloads(
*,
dataset: ClusteringDataset,
max_k: int,
random_state: int,
) -> list[SubdivisionOptionPayload]:
sample_count = len(dataset.observations)
if sample_count == 0:
return []
max_allowed_k = min(max_k, sample_count)
option_payloads: list[SubdivisionOptionPayload] = []
for requested_k in range(1, max_allowed_k + 1):
labels = run_kmeans_labels(
scaled_matrix=dataset.scaled_matrix,
cluster_count=requested_k,
random_state=random_state,
)
labels, spatial_constraint_metadata = enforce_spatial_contiguity(
observations=dataset.observations,
labels=labels,
scaled_matrix=dataset.scaled_matrix,
)
effective_cluster_count = len(set(labels))
cluster_summaries = build_cluster_summaries(
observations=dataset.observations,
labels=labels,
)
cluster_block_rows = build_cluster_block_rows(
observations=dataset.observations,
labels=labels,
cluster_summaries=cluster_summaries,
)
assignment_rows = [
{
"cell": observation.cell,
"cluster_label": int(labels[index]),
"raw_feature_values": dataset.raw_feature_maps[index],
"scaled_feature_values": {
feature_name: round(dataset.scaled_matrix[index][feature_index], 6)
for feature_index, feature_name in enumerate(dataset.selected_features)
},
}
for index, observation in enumerate(dataset.observations)
]
option_payloads.append(
SubdivisionOptionPayload(
requested_k=requested_k,
effective_cluster_count=effective_cluster_count,
labels=labels,
cluster_summaries=cluster_summaries,
spatial_constraint_metadata=spatial_constraint_metadata,
assignment_rows=assignment_rows,
cluster_block_rows=cluster_block_rows,
)
)
return option_payloads
def build_cluster_block_rows(
*,
observations: list[AnalysisGridObservation],
labels: list[int],
cluster_summaries: list[dict[str, Any]],
) -> list[dict[str, Any]]:
observations_by_label: dict[int, list[AnalysisGridObservation]] = {}
for observation, label in zip(observations, labels):
observations_by_label.setdefault(int(label), []).append(observation)
cluster_block_rows: list[dict[str, Any]] = []
for cluster_summary in cluster_summaries:
cluster_label = int(cluster_summary["cluster_label"])
cluster_observations = observations_by_label.get(cluster_label, [])
cluster_geometry = _build_cluster_geometry(cluster_observations)
cluster_metadata = {
"cell_geometry_type": cluster_geometry.get("type"),
"source": "analysis_grid_cells",
}
cluster_block_rows.append(
{
"cluster_label": cluster_label,
"sub_block_code": f"cluster-{cluster_label}",
"centroid_lat": Decimal(str(cluster_summary["centroid_lat"])),
"centroid_lon": Decimal(str(cluster_summary["centroid_lon"])),
"center_cell_code": str(cluster_summary.get("center_cell_code") or ""),
"center_cell_lat": _to_decimal_or_none(cluster_summary.get("center_cell_lat")),
"center_cell_lon": _to_decimal_or_none(cluster_summary.get("center_cell_lon")),
"geometry": cluster_geometry,
"cell_count": int(cluster_summary["cell_count"]),
"cell_codes": list(cluster_summary["cell_codes"]),
"metadata": {
**cluster_metadata,
"center_selection": {
"strategy": "coordinate_1_center",
"center_cell_code": cluster_summary.get("center_cell_code") or "",
"center_radius": cluster_summary.get("center_radius"),
"center_mean_distance": cluster_summary.get("center_mean_distance"),
},
},
}
)
return cluster_block_rows
def persist_subdivision_options(
*,
result: RemoteSensingSubdivisionResult,
location: SoilLocation,
block_subdivision: BlockSubdivision | None,
option_payloads: list[SubdivisionOptionPayload],
recommended_requested_k: int,
active_requested_k: int,
chunk_size_sqm: int,
) -> dict[int, RemoteSensingSubdivisionOption]:
option_objects: dict[int, RemoteSensingSubdivisionOption] = {}
for option_payload in option_payloads:
option = RemoteSensingSubdivisionOption.objects.create(
result=result,
requested_k=option_payload.requested_k,
effective_cluster_count=option_payload.effective_cluster_count,
is_active=option_payload.requested_k == active_requested_k,
is_recommended=option_payload.requested_k == recommended_requested_k,
selection_source="system",
metadata={
"requested_k": option_payload.requested_k,
"effective_cluster_count": option_payload.effective_cluster_count,
"spatial_constraint": option_payload.spatial_constraint_metadata,
"cluster_summaries": option_payload.cluster_summaries,
},
)
RemoteSensingSubdivisionOptionAssignment.objects.bulk_create(
[
RemoteSensingSubdivisionOptionAssignment(
option=option,
cell=assignment_row["cell"],
cluster_label=assignment_row["cluster_label"],
raw_feature_values=assignment_row["raw_feature_values"],
scaled_feature_values=assignment_row["scaled_feature_values"],
)
for assignment_row in option_payload.assignment_rows
]
)
option_block_objects = []
for block_row in option_payload.cluster_block_rows:
option_block_objects.append(
RemoteSensingSubdivisionOptionBlock(
option=option,
cluster_label=block_row["cluster_label"],
sub_block_code=block_row["sub_block_code"],
chunk_size_sqm=chunk_size_sqm,
centroid_lat=block_row["centroid_lat"],
centroid_lon=block_row["centroid_lon"],
center_cell_code=block_row["center_cell_code"],
center_cell_lat=block_row["center_cell_lat"],
center_cell_lon=block_row["center_cell_lon"],
geometry=block_row["geometry"],
cell_count=block_row["cell_count"],
cell_codes=block_row["cell_codes"],
metadata=block_row["metadata"],
)
)
RemoteSensingSubdivisionOptionBlock.objects.bulk_create(option_block_objects)
option_objects[option.requested_k] = option
return option_objects
def persist_subdivision_option_artifacts(
*,
result: RemoteSensingSubdivisionResult,
option_payloads: list[SubdivisionOptionPayload],
option_objects: dict[int, RemoteSensingSubdivisionOption],
observations: list[AnalysisGridObservation],
selected_features: list[str],
scaled_matrix: list[list[float]],
inertia_curve: list[dict[str, float]],
) -> None:
for option_payload in option_payloads:
option = option_objects.get(option_payload.requested_k)
if option is None:
continue
diagnostic_artifacts = _persist_remote_sensing_diagnostic_artifacts(
result=result,
observations=observations,
labels=option_payload.labels,
cluster_summaries=option_payload.cluster_summaries,
selected_features=selected_features,
scaled_matrix=scaled_matrix,
inertia_curve=inertia_curve,
requested_k=option_payload.requested_k,
effective_cluster_count=option_payload.effective_cluster_count,
)
if not diagnostic_artifacts:
continue
metadata = dict(option.metadata or {})
metadata["diagnostic_artifacts"] = diagnostic_artifacts
option.metadata = metadata
option.save(update_fields=["metadata", "updated_at"])
def activate_subdivision_option(
*,
option: RemoteSensingSubdivisionOption,
selection_source: str,
recommended_requested_k: int | None = None,
) -> RemoteSensingSubdivisionResult:
result = option.result
requested_k = int(option.requested_k)
if recommended_requested_k is None:
recommended_requested_k = (
result.options.filter(is_recommended=True)
.values_list("requested_k", flat=True)
.first()
)
result.options.exclude(pk=option.pk).update(is_active=False)
option.is_active = True
option.selection_source = selection_source
option.save(update_fields=["is_active", "selection_source", "updated_at"])
assignments = list(
option.assignments.select_related("cell").order_by("cell__cell_code")
)
result.assignments.all().delete()
RemoteSensingClusterAssignment.objects.bulk_create(
[
RemoteSensingClusterAssignment(
result=result,
cell=assignment.cell,
cluster_label=assignment.cluster_label,
raw_feature_values=assignment.raw_feature_values,
scaled_feature_values=assignment.scaled_feature_values,
)
for assignment in assignments
]
)
result.cluster_blocks.all().delete()
cluster_block_objects = []
cluster_summaries = []
for option_block in option.cluster_blocks.order_by("cluster_label", "id"):
cluster_block = RemoteSensingClusterBlock.objects.create(
result=result,
soil_location=result.soil_location,
block_subdivision=result.block_subdivision,
block_code=result.block_code,
sub_block_code=option_block.sub_block_code,
cluster_label=option_block.cluster_label,
chunk_size_sqm=option_block.chunk_size_sqm,
centroid_lat=option_block.centroid_lat,
centroid_lon=option_block.centroid_lon,
center_cell_code=option_block.center_cell_code,
center_cell_lat=option_block.center_cell_lat,
center_cell_lon=option_block.center_cell_lon,
geometry=option_block.geometry,
cell_count=option_block.cell_count,
cell_codes=option_block.cell_codes,
metadata=option_block.metadata,
)
cluster_block_objects.append(cluster_block)
cluster_summaries.append(
{
"cluster_uuid": str(cluster_block.uuid),
"cluster_label": option_block.cluster_label,
"sub_block_code": option_block.sub_block_code,
"centroid_lat": float(option_block.centroid_lat),
"centroid_lon": float(option_block.centroid_lon),
"center_cell_code": option_block.center_cell_code,
"center_cell_lat": float(option_block.center_cell_lat) if option_block.center_cell_lat is not None else None,
"center_cell_lon": float(option_block.center_cell_lon) if option_block.center_cell_lon is not None else None,
"center_radius": (option_block.metadata or {}).get("center_selection", {}).get("center_radius"),
"center_mean_distance": (option_block.metadata or {}).get("center_selection", {}).get("center_mean_distance"),
"cell_count": option_block.cell_count,
"cell_codes": list(option_block.cell_codes or []),
"geometry": option_block.geometry,
"metadata": option_block.metadata,
}
)
metadata = dict(result.metadata or {})
kmeans_params = dict(metadata.get("kmeans_params") or {})
kmeans_params["active_requested_k"] = requested_k
kmeans_params["effective_k"] = option.effective_cluster_count
if recommended_requested_k is not None:
kmeans_params["recommended_k"] = recommended_requested_k
metadata["kmeans_params"] = kmeans_params
metadata["active_requested_k"] = requested_k
metadata["recommended_requested_k"] = recommended_requested_k
metadata["cluster_summaries"] = cluster_summaries
metadata["active_option"] = {
"requested_k": requested_k,
"effective_cluster_count": option.effective_cluster_count,
"selection_source": selection_source,
}
metadata["available_k_options"] = [
{
"requested_k": subdivision_option.requested_k,
"effective_cluster_count": subdivision_option.effective_cluster_count,
"is_active": subdivision_option.pk == option.pk,
"is_recommended": subdivision_option.is_recommended,
"selection_source": option.selection_source if subdivision_option.pk == option.pk else subdivision_option.selection_source,
"diagnostic_artifacts": (subdivision_option.metadata or {}).get("diagnostic_artifacts", {}),
}
for subdivision_option in result.options.order_by("requested_k")
]
result.cluster_count = option.effective_cluster_count
result.metadata = metadata
result.save(update_fields=["cluster_count", "metadata", "updated_at"])
if result.block_subdivision is not None:
sync_block_subdivision_with_result(
block_subdivision=result.block_subdivision,
result=result,
observations=assignments,
cluster_summaries=cluster_summaries,
)
sync_location_block_layout_with_result(
location=result.soil_location,
result=result,
cluster_summaries=cluster_summaries,
)
return result
def sync_cluster_blocks_with_result(
*,
result: RemoteSensingSubdivisionResult,
location: SoilLocation,
block_subdivision: BlockSubdivision | None,
observations: list[AnalysisGridObservation],
labels: list[int],
cluster_summaries: list[dict[str, Any]],
) -> list[RemoteSensingClusterBlock]:
observations_by_label: dict[int, list[AnalysisGridObservation]] = {}
for observation, label in zip(observations, labels):
observations_by_label.setdefault(int(label), []).append(observation)
existing_blocks = {
cluster_block.cluster_label: cluster_block
for cluster_block in result.cluster_blocks.all()
}
active_labels: set[int] = set()
synced_blocks: list[RemoteSensingClusterBlock] = []
for cluster_summary in cluster_summaries:
cluster_label = int(cluster_summary["cluster_label"])
active_labels.add(cluster_label)
cluster_observations = observations_by_label.get(cluster_label, [])
cluster_geometry = _build_cluster_geometry(cluster_observations)
cluster_metadata = {
"cell_geometry_type": cluster_geometry.get("type"),
"source": "analysis_grid_cells",
}
cluster_block = existing_blocks.get(cluster_label)
defaults = {
"soil_location": location,
"block_subdivision": block_subdivision,
"block_code": result.block_code,
"sub_block_code": f"cluster-{cluster_label}",
"chunk_size_sqm": result.chunk_size_sqm,
"centroid_lat": Decimal(str(cluster_summary["centroid_lat"])),
"centroid_lon": Decimal(str(cluster_summary["centroid_lon"])),
"center_cell_code": str(cluster_summary.get("center_cell_code") or ""),
"center_cell_lat": _to_decimal_or_none(cluster_summary.get("center_cell_lat")),
"center_cell_lon": _to_decimal_or_none(cluster_summary.get("center_cell_lon")),
"geometry": cluster_geometry,
"cell_count": int(cluster_summary["cell_count"]),
"cell_codes": list(cluster_summary["cell_codes"]),
"metadata": {
**cluster_metadata,
"center_selection": {
"strategy": "coordinate_1_center",
"center_cell_code": cluster_summary.get("center_cell_code") or "",
"center_radius": cluster_summary.get("center_radius"),
"center_mean_distance": cluster_summary.get("center_mean_distance"),
},
},
}
if cluster_block is None:
cluster_block = RemoteSensingClusterBlock.objects.create(
result=result,
cluster_label=cluster_label,
**defaults,
)
else:
for field_name, value in defaults.items():
setattr(cluster_block, field_name, value)
cluster_block.save(
update_fields=[
"soil_location",
"block_subdivision",
"block_code",
"sub_block_code",
"chunk_size_sqm",
"centroid_lat",
"centroid_lon",
"center_cell_code",
"center_cell_lat",
"center_cell_lon",
"geometry",
"cell_count",
"cell_codes",
"metadata",
"updated_at",
]
)
cluster_summary["cluster_uuid"] = str(cluster_block.uuid)
cluster_summary["geometry"] = cluster_block.geometry
cluster_summary["metadata"] = cluster_block.metadata
synced_blocks.append(cluster_block)
stale_labels = set(existing_blocks) - active_labels
if stale_labels:
result.cluster_blocks.filter(cluster_label__in=stale_labels).delete()
return synced_blocks
def _build_cluster_geometry(
observations: list[AnalysisGridObservation],
) -> dict[str, Any]:
rings = _build_cluster_boundary_rings(observations)
if not rings:
return {}
if len(rings) == 1:
return {"type": "Polygon", "coordinates": [rings[0]]}
outer_ring_indexes = []
hole_mapping: dict[int, list[list[list[float]]]] = {}
for ring_index, ring in enumerate(rings):
sample_point = ring[0]
parent_indexes = [
candidate_index
for candidate_index, candidate_ring in enumerate(rings)
if candidate_index != ring_index and point_in_polygon(
(sample_point[0], sample_point[1]),
[(point[0], point[1]) for point in candidate_ring[:-1]],
)
]
if not parent_indexes:
outer_ring_indexes.append(ring_index)
continue
parent_index = min(
parent_indexes,
key=lambda candidate_index: abs(_signed_ring_area(rings[candidate_index])),
)
hole_mapping.setdefault(parent_index, []).append(ring)
if len(outer_ring_indexes) == 1:
outer_index = outer_ring_indexes[0]
return {
"type": "Polygon",
"coordinates": [rings[outer_index], *hole_mapping.get(outer_index, [])],
}
return {
"type": "MultiPolygon",
"coordinates": [
[rings[outer_index], *hole_mapping.get(outer_index, [])]
for outer_index in outer_ring_indexes
],
}
def _build_shared_border_adjacency(
observations: list[AnalysisGridObservation],
) -> dict[int, set[int]]:
adjacency_map: dict[int, set[int]] = {index: set() for index in range(len(observations))}
shared_edge_map: dict[tuple[tuple[float, float], tuple[float, float]], list[int]] = {}
for index, observation in enumerate(observations):
for edge_key in _extract_cell_shared_edge_keys(observation):
shared_edge_map.setdefault(edge_key, []).append(index)
for cell_indexes in shared_edge_map.values():
if len(cell_indexes) != 2:
continue
left_index, right_index = cell_indexes
adjacency_map[left_index].add(right_index)
adjacency_map[right_index].add(left_index)
return adjacency_map
def _extract_cell_shared_edge_keys(
observation: AnalysisGridObservation,
) -> set[tuple[tuple[float, float], tuple[float, float]]]:
geometry = dict(getattr(observation.cell, "geometry", {}) or {})
coordinates = geometry.get("coordinates") or []
polygons = []
if geometry.get("type") == "Polygon" and coordinates:
polygons = [coordinates]
elif geometry.get("type") == "MultiPolygon" and coordinates:
polygons = coordinates
edge_keys: set[tuple[tuple[float, float], tuple[float, float]]] = set()
for polygon in polygons:
outer_ring = polygon[0] if polygon else []
normalized_ring = [
(float(point[0]), float(point[1]))
for point in outer_ring
if len(point) >= 2
]
if len(normalized_ring) < 4:
continue
if normalized_ring[0] != normalized_ring[-1]:
normalized_ring.append(normalized_ring[0])
for start_point, end_point in zip(normalized_ring, normalized_ring[1:]):
if start_point == end_point:
continue
edge_keys.add(tuple(sorted((start_point, end_point))))
return edge_keys
def _find_disconnected_label_components(
*,
labels: list[int],
adjacency_map: dict[int, set[int]],
) -> list[list[int]]:
components_to_merge: list[list[int]] = []
for cluster_label in sorted(set(labels)):
label_indexes = [index for index, label in enumerate(labels) if int(label) == cluster_label]
if len(label_indexes) <= 1:
continue
label_index_set = set(label_indexes)
visited: set[int] = set()
connected_components: list[list[int]] = []
for start_index in label_indexes:
if start_index in visited:
continue
component = []
queue = [start_index]
visited.add(start_index)
while queue:
current_index = queue.pop()
component.append(current_index)
for neighbor_index in adjacency_map.get(current_index, set()):
if neighbor_index in visited or neighbor_index not in label_index_set:
continue
visited.add(neighbor_index)
queue.append(neighbor_index)
connected_components.append(sorted(component))
if len(connected_components) <= 1:
continue
connected_components.sort(key=lambda component: (-len(component), component[0]))
components_to_merge.extend(connected_components[1:])
return components_to_merge
def _choose_neighbor_label_for_component(
*,
component_indexes: list[int],
labels: list[int],
adjacency_map: dict[int, set[int]],
scaled_matrix: list[list[float]],
) -> int | None:
component_centroid = _mean_vector([scaled_matrix[index] for index in component_indexes])
candidate_scores: dict[int, float] = {}
for component_index in component_indexes:
for neighbor_index in adjacency_map.get(component_index, set()):
neighbor_label = int(labels[neighbor_index])
if neighbor_label == int(labels[component_index]):
continue
candidate_indexes = [
index
for index, label in enumerate(labels)
if int(label) == neighbor_label
]
if not candidate_indexes:
continue
candidate_centroid = _mean_vector([scaled_matrix[index] for index in candidate_indexes])
distance = _euclidean_distance(component_centroid, candidate_centroid)
best_distance = candidate_scores.get(neighbor_label)
if best_distance is None or distance < best_distance:
candidate_scores[neighbor_label] = distance
if not candidate_scores:
return None
return min(candidate_scores.items(), key=lambda item: (item[1], item[0]))[0]
def _normalize_cluster_labels(labels: list[int]) -> list[int]:
label_mapping: dict[int, int] = {}
normalized_labels: list[int] = []
next_label = 0
for label in labels:
label = int(label)
if label not in label_mapping:
label_mapping[label] = next_label
next_label += 1
normalized_labels.append(label_mapping[label])
return normalized_labels
def _mean_vector(vectors: list[list[float]]) -> list[float]:
if not vectors:
return []
dimensions = len(vectors[0])
return [
sum(vector[dimension_index] for vector in vectors) / len(vectors)
for dimension_index in range(dimensions)
]
def _euclidean_distance(left: list[float], right: list[float]) -> float:
return math.sqrt(
sum((float(left[index]) - float(right[index])) ** 2 for index in range(len(left)))
)
def _build_cluster_boundary_rings(
observations: list[AnalysisGridObservation],
) -> list[list[list[float]]]:
directed_edges: dict[tuple[tuple[float, float], tuple[float, float]], int] = {}
undirected_counts: dict[tuple[tuple[float, float], tuple[float, float]], int] = {}
point_lookup: dict[tuple[float, float], list[tuple[float, float]]] = {}
for observation in observations:
geometry = dict(getattr(observation.cell, "geometry", {}) or {})
coordinates = geometry.get("coordinates") or []
polygons = []
if geometry.get("type") == "Polygon" and coordinates:
polygons = [coordinates]
elif geometry.get("type") == "MultiPolygon" and coordinates:
polygons = coordinates
for polygon in polygons:
outer_ring = polygon[0] if polygon else []
normalized_ring = [
(float(point[0]), float(point[1]))
for point in outer_ring
if len(point) >= 2
]
if len(normalized_ring) < 4:
continue
if normalized_ring[0] != normalized_ring[-1]:
normalized_ring.append(normalized_ring[0])
for start_point, end_point in zip(normalized_ring, normalized_ring[1:]):
if start_point == end_point:
continue
directed_edges[(start_point, end_point)] = directed_edges.get((start_point, end_point), 0) + 1
undirected_key = tuple(sorted((start_point, end_point)))
undirected_counts[undirected_key] = undirected_counts.get(undirected_key, 0) + 1
boundary_edges = [
(start_point, end_point)
for (start_point, end_point), _count in directed_edges.items()
if undirected_counts.get(tuple(sorted((start_point, end_point))), 0) == 1
]
if not boundary_edges:
return []
for start_point, end_point in boundary_edges:
point_lookup.setdefault(start_point, []).append(end_point)
rings: list[list[list[float]]] = []
while point_lookup:
start_point = next(iter(point_lookup))
current_point = start_point
ring = [start_point]
visited_guard = 0
while visited_guard <= len(boundary_edges) + 1:
next_points = point_lookup.get(current_point) or []
if not next_points:
break
next_point = next_points.pop(0)
if not next_points:
point_lookup.pop(current_point, None)
current_point = next_point
ring.append(current_point)
if current_point == start_point:
break
visited_guard += 1
if len(ring) >= 4 and ring[0] == ring[-1]:
signed_area = _signed_ring_area(ring)
if signed_area < 0:
ring = [ring[0], *list(reversed(ring[1:-1])), ring[0]]
rings.append([[point[0], point[1]] for point in ring])
return rings
def _signed_ring_area(ring: list[list[float]] | list[tuple[float, float]]) -> float:
area = 0.0
for current_point, next_point in zip(ring, ring[1:]):
area += (float(current_point[0]) * float(next_point[1])) - (float(next_point[0]) * float(current_point[1]))
return area / 2.0
2026-05-09 16:55:06 +03:30
def sync_location_block_layout_with_result(
*,
location: SoilLocation,
result: RemoteSensingSubdivisionResult,
cluster_summaries: list[dict[str, Any]],
) -> None:
2026-05-13 16:45:54 +03:30
layout = ensure_block_layout_defaults(location.block_layout, block_count=location.input_block_count)
2026-05-09 16:55:06 +03:30
blocks = list(layout.get("blocks") or [])
target_block = None
for block in blocks:
if block.get("block_code") == result.block_code:
target_block = block
break
if target_block is None:
target_block = {
"block_code": result.block_code,
"order": len(blocks) + 1,
"source": "remote_sensing",
"needs_subdivision": None,
"sub_blocks": [],
}
blocks.append(target_block)
target_block["needs_subdivision"] = result.cluster_count > 1
target_block["sub_blocks"] = [
{
2026-05-11 04:38:44 +03:30
"cluster_uuid": cluster.get("cluster_uuid"),
2026-05-09 16:55:06 +03:30
"sub_block_code": f"cluster-{cluster['cluster_label']}",
"cluster_label": cluster["cluster_label"],
"centroid_lat": cluster["centroid_lat"],
"centroid_lon": cluster["centroid_lon"],
2026-05-11 04:38:44 +03:30
"center_cell_code": cluster.get("center_cell_code") or "",
"center_cell_lat": cluster.get("center_cell_lat"),
"center_cell_lon": cluster.get("center_cell_lon"),
2026-05-09 16:55:06 +03:30
"cell_count": cluster["cell_count"],
2026-05-11 04:38:44 +03:30
"geometry": cluster.get("geometry") or {},
"metadata": cluster.get("metadata") or {},
2026-05-09 16:55:06 +03:30
}
for cluster in cluster_summaries
]
2026-05-13 16:45:54 +03:30
if not target_block["sub_blocks"]:
target_block["sub_blocks"] = [
build_default_sub_block(
str(target_block.get("block_code") or "block-1"),
boundary=target_block.get("boundary") or {},
)
]
2026-05-09 16:55:06 +03:30
target_block["subdivision_summary"] = {
"type": "data_driven_remote_sensing",
"cluster_count": result.cluster_count,
"selected_features": result.selected_features,
"used_cell_count": result.metadata.get("used_cell_count", 0),
"skipped_cell_count": result.metadata.get("skipped_cell_count", 0),
"run_id": result.run_id,
}
layout["blocks"] = blocks
layout["algorithm_status"] = "completed"
location.block_layout = layout
location.save(update_fields=["block_layout", "updated_at"])
def sync_block_subdivision_with_result(
*,
block_subdivision: BlockSubdivision,
result: RemoteSensingSubdivisionResult,
observations: list[AnalysisGridObservation],
cluster_summaries: list[dict[str, Any]],
) -> None:
metadata = dict(block_subdivision.metadata or {})
metadata["data_driven_subdivision"] = {
"run_id": result.run_id,
"result_id": result.id,
"cluster_count": result.cluster_count,
"selected_features": result.selected_features,
"used_cell_count": result.metadata.get("used_cell_count", 0),
"skipped_cell_count": result.metadata.get("skipped_cell_count", 0),
"temporal_extent": {
"start_date": result.temporal_start.isoformat() if result.temporal_start else None,
"end_date": result.temporal_end.isoformat() if result.temporal_end else None,
},
"inertia_curve": result.metadata.get("inertia_curve", []),
2026-05-11 00:36:02 +03:30
"diagnostic_artifacts": result.metadata.get("diagnostic_artifacts", {}),
2026-05-09 16:55:06 +03:30
}
block_subdivision.grid_points = [
{
"cell_code": observation.cell.cell_code,
"centroid_lat": round(float(observation.cell.centroid_lat), 6),
"centroid_lon": round(float(observation.cell.centroid_lon), 6),
}
for observation in observations
]
block_subdivision.centroid_points = [
{
2026-05-11 04:38:44 +03:30
"cluster_uuid": cluster.get("cluster_uuid"),
2026-05-09 16:55:06 +03:30
"sub_block_code": f"cluster-{cluster['cluster_label']}",
"cluster_label": cluster["cluster_label"],
"centroid_lat": cluster["centroid_lat"],
"centroid_lon": cluster["centroid_lon"],
2026-05-11 04:38:44 +03:30
"center_cell_code": cluster.get("center_cell_code") or "",
"center_cell_lat": cluster.get("center_cell_lat"),
"center_cell_lon": cluster.get("center_cell_lon"),
2026-05-09 16:55:06 +03:30
"cell_count": cluster["cell_count"],
"cell_codes": cluster["cell_codes"],
2026-05-11 04:38:44 +03:30
"geometry": cluster.get("geometry") or {},
"metadata": cluster.get("metadata") or {},
2026-05-09 16:55:06 +03:30
}
for cluster in cluster_summaries
]
block_subdivision.grid_point_count = len(observations)
block_subdivision.centroid_count = len(cluster_summaries)
block_subdivision.status = "subdivided"
block_subdivision.metadata = metadata
plot_content = render_elbow_plot(
inertia_curve=result.metadata.get("inertia_curve", []),
optimal_k=result.cluster_count,
block_code=result.block_code or block_subdivision.block_code,
)
if plot_content is not None:
block_subdivision.elbow_plot.save(
f"remote-sensing-{result.soil_location_id}-{result.block_code or block_subdivision.block_code}-elbow.png",
plot_content,
save=False,
)
block_subdivision.save(
update_fields=[
"grid_points",
"centroid_points",
"grid_point_count",
"centroid_count",
"status",
"metadata",
"elbow_plot",
"updated_at",
]
)
return
block_subdivision.save(
update_fields=[
"grid_points",
"centroid_points",
"grid_point_count",
"centroid_count",
"status",
"metadata",
"updated_at",
]
)
def _coerce_float(value: Any) -> float | None:
if value is None:
return None
try:
return float(value)
except (TypeError, ValueError):
return None
2026-05-10 22:49:07 +03:30
2026-05-11 04:38:44 +03:30
def _to_decimal_or_none(value: Any) -> Decimal | None:
if value is None:
return None
try:
return Decimal(str(value))
except (ArithmeticError, ValueError):
return None
2026-05-10 22:49:07 +03:30
def _count_non_null_features(observations: list[AnalysisGridObservation]) -> dict[str, int]:
counts = {feature_name: 0 for feature_name in DEFAULT_CLUSTER_FEATURES}
for observation in observations:
for feature_name in DEFAULT_CLUSTER_FEATURES:
if _coerce_float(getattr(observation, feature_name, None)) is not None:
counts[feature_name] += 1
return counts
2026-05-11 00:36:02 +03:30
def _persist_remote_sensing_diagnostic_artifacts(
*,
result: RemoteSensingSubdivisionResult,
observations: list[AnalysisGridObservation],
labels: list[int],
cluster_summaries: list[dict[str, Any]],
selected_features: list[str],
scaled_matrix: list[list[float]],
inertia_curve: list[dict[str, float]],
2026-05-11 04:38:44 +03:30
requested_k: int | None = None,
effective_cluster_count: int | None = None,
2026-05-11 00:36:02 +03:30
) -> dict[str, Any]:
try:
2026-05-11 04:38:44 +03:30
artifact_dir = _build_remote_sensing_diagnostic_dir(
result=result,
requested_k=requested_k,
effective_cluster_count=effective_cluster_count,
)
2026-05-11 00:36:02 +03:30
artifact_dir.mkdir(parents=True, exist_ok=True)
specs = [
(
"elbow_plot",
render_elbow_plot(
inertia_curve=inertia_curve,
optimal_k=result.cluster_count,
block_code=result.block_code or "farm",
),
"elbow",
),
(
"cluster_map",
_render_cluster_map_plot(
observations=observations,
labels=labels,
2026-05-11 04:38:44 +03:30
cluster_summaries=cluster_summaries,
2026-05-11 00:36:02 +03:30
block_code=result.block_code or "farm",
),
"cluster-map",
),
(
"cluster_sizes",
_render_cluster_size_plot(
2026-05-11 04:38:44 +03:30
observations=observations,
2026-05-11 00:36:02 +03:30
cluster_summaries=cluster_summaries,
block_code=result.block_code or "farm",
),
"cluster-sizes",
),
(
"feature_pairs",
_render_feature_pair_plot(
2026-05-11 04:38:44 +03:30
observations=observations,
2026-05-11 00:36:02 +03:30
selected_features=selected_features,
scaled_matrix=scaled_matrix,
labels=labels,
2026-05-11 04:38:44 +03:30
cluster_summaries=cluster_summaries,
2026-05-11 00:36:02 +03:30
block_code=result.block_code or "farm",
),
"feature-pairs",
),
2026-05-11 04:38:44 +03:30
(
"feature_projection",
_render_feature_projection_plot(
observations=observations,
selected_features=selected_features,
scaled_matrix=scaled_matrix,
labels=labels,
cluster_summaries=cluster_summaries,
block_code=result.block_code or "farm",
),
"feature-projection",
),
2026-05-11 00:36:02 +03:30
]
files: dict[str, str] = {}
for artifact_key, content, suffix in specs:
if content is None:
continue
2026-05-11 04:38:44 +03:30
target_path = artifact_dir / (
f"{_build_remote_sensing_artifact_stem(result=result, requested_k=requested_k, effective_cluster_count=effective_cluster_count)}"
f"__{suffix}.png"
)
2026-05-11 00:36:02 +03:30
_write_content_file(target_path=target_path, content=content)
files[artifact_key] = _to_project_relative_path(target_path)
return {
"directory": _to_project_relative_path(artifact_dir),
"files": files,
2026-05-11 04:38:44 +03:30
"requested_k": requested_k,
"effective_cluster_count": effective_cluster_count,
2026-05-11 00:36:02 +03:30
}
except (DataDrivenSubdivisionError, OSError) as exc:
logger.warning(
"Failed to persist remote sensing diagnostic artifacts for result_id=%s: %s",
result.id,
exc,
)
return {}
2026-05-11 04:38:44 +03:30
def _build_remote_sensing_diagnostic_dir(
*,
result: RemoteSensingSubdivisionResult,
requested_k: int | None = None,
effective_cluster_count: int | None = None,
) -> Path:
2026-05-11 00:36:02 +03:30
configured_dir = str(
os.environ.get("REMOTE_SENSING_DIAGNOSTIC_DIR", DEFAULT_REMOTE_SENSING_DIAGNOSTIC_DIR)
).strip()
base_dir = Path(getattr(settings, "BASE_DIR", Path.cwd()))
target_dir = Path(configured_dir)
if not target_dir.is_absolute():
target_dir = base_dir / target_dir
block_component = _sanitize_path_component(result.block_code or "farm")
2026-05-11 04:38:44 +03:30
diagnostic_dir = target_dir / f"location-{result.soil_location_id}" / f"run-{result.run_id}-{block_component}"
if requested_k is not None:
effective_component = effective_cluster_count if effective_cluster_count is not None else requested_k
diagnostic_dir = diagnostic_dir / f"k-{requested_k}-effective-{effective_component}"
return diagnostic_dir
2026-05-11 00:36:02 +03:30
2026-05-11 04:38:44 +03:30
def _build_remote_sensing_artifact_stem(
*,
result: RemoteSensingSubdivisionResult,
requested_k: int | None = None,
effective_cluster_count: int | None = None,
) -> str:
stem = (
2026-05-11 00:36:02 +03:30
f"location-{result.soil_location_id}"
f"__run-{result.run_id}"
f"__{_sanitize_path_component(result.block_code or 'farm')}"
)
2026-05-11 04:38:44 +03:30
if requested_k is not None:
effective_component = effective_cluster_count if effective_cluster_count is not None else requested_k
stem = f"{stem}__k-{requested_k}__effective-{effective_component}"
return stem
2026-05-11 00:36:02 +03:30
def _write_content_file(*, target_path: Path, content: ContentFile) -> None:
target_path.parent.mkdir(parents=True, exist_ok=True)
content.open("rb")
try:
target_path.write_bytes(content.read())
finally:
content.close()
def _to_project_relative_path(path: Path) -> str:
base_dir = Path(getattr(settings, "BASE_DIR", Path.cwd()))
try:
return str(path.relative_to(base_dir))
except ValueError:
return str(path)
def _sanitize_path_component(value: str) -> str:
text = str(value or "").strip() or "unknown"
sanitized = "".join(character if character.isalnum() or character in {"-", "_", "."} else "_" for character in text)
return sanitized or "unknown"
def _render_cluster_map_plot(
*,
observations: list[AnalysisGridObservation],
labels: list[int],
2026-05-11 04:38:44 +03:30
cluster_summaries: list[dict[str, Any]],
2026-05-11 00:36:02 +03:30
block_code: str,
) -> ContentFile | None:
if not observations:
return None
plt = _import_matplotlib_pyplot()
unique_labels = sorted(set(int(label) for label in labels))
colors = plt.cm.get_cmap("tab10", max(len(unique_labels), 1))
2026-05-11 04:38:44 +03:30
center_indexes_by_label = _build_center_indexes_by_label(
observations=observations,
labels=labels,
cluster_summaries=cluster_summaries,
)
2026-05-11 00:36:02 +03:30
fig, ax = plt.subplots(figsize=(8, 6))
buffer = BytesIO()
try:
for color_index, cluster_label in enumerate(unique_labels):
cluster_points = [
2026-05-11 04:38:44 +03:30
(
float(observation.cell.centroid_lon),
float(observation.cell.centroid_lat),
_build_observation_label(observation=observation, index=index),
)
for index, (observation, label) in enumerate(zip(observations, labels))
2026-05-11 00:36:02 +03:30
if int(label) == cluster_label
]
if not cluster_points:
continue
xs = [point[0] for point in cluster_points]
ys = [point[1] for point in cluster_points]
ax.scatter(
xs,
ys,
s=70,
alpha=0.9,
color=colors(color_index),
edgecolors="white",
linewidths=0.8,
label=f"Cluster {cluster_label}",
)
2026-05-11 04:38:44 +03:30
_annotate_plot_points(
axis=ax,
x_values=xs,
y_values=ys,
point_labels=[point[2] for point in cluster_points],
)
center_index = center_indexes_by_label.get(cluster_label)
if center_index is not None:
_plot_cluster_center_marker(
axis=ax,
x_value=float(observations[center_index].cell.centroid_lon),
y_value=float(observations[center_index].cell.centroid_lat),
point_label=_build_center_label(
observations=observations,
cluster_summaries=cluster_summaries,
cluster_label=cluster_label,
),
color=colors(color_index),
)
2026-05-11 00:36:02 +03:30
ax.set_title(f"KMeans Spatial Cluster Map - {block_code}")
ax.set_xlabel("Longitude")
ax.set_ylabel("Latitude")
ax.grid(True, linestyle="--", linewidth=0.5, alpha=0.4)
if unique_labels:
ax.legend()
fig.tight_layout()
fig.savefig(buffer, format="png", dpi=150)
buffer.seek(0)
return ContentFile(buffer.getvalue())
finally:
buffer.close()
plt.close(fig)
def _render_cluster_size_plot(
*,
2026-05-11 04:38:44 +03:30
observations: list[AnalysisGridObservation],
2026-05-11 00:36:02 +03:30
cluster_summaries: list[dict[str, Any]],
block_code: str,
) -> ContentFile | None:
if not cluster_summaries:
return None
plt = _import_matplotlib_pyplot()
labels = [f"C{int(cluster['cluster_label'])}" for cluster in cluster_summaries]
counts = [int(cluster["cell_count"]) for cluster in cluster_summaries]
fig, ax = plt.subplots(figsize=(8, 5))
buffer = BytesIO()
try:
bars = ax.bar(labels, counts, color="#2f6fed", alpha=0.85)
2026-05-11 04:38:44 +03:30
point_numbers_by_cell_code = {
str(observation.cell.cell_code): index + 1
for index, observation in enumerate(observations)
}
2026-05-11 00:36:02 +03:30
for bar, count in zip(bars, counts):
ax.text(
bar.get_x() + bar.get_width() / 2.0,
bar.get_height(),
str(count),
ha="center",
va="bottom",
fontsize=9,
)
2026-05-11 04:38:44 +03:30
for bar, cluster_summary in zip(bars, cluster_summaries):
center_cell_code = str(cluster_summary.get("center_cell_code") or "").strip()
if center_cell_code:
center_point_number = point_numbers_by_cell_code.get(center_cell_code)
center_text = f"center: {center_point_number}" if center_point_number is not None else "center"
ax.text(
bar.get_x() + bar.get_width() / 2.0,
bar.get_height() / 2.0 if bar.get_height() else 0.05,
center_text,
ha="center",
va="center",
fontsize=8,
color="#16325c",
rotation=90,
)
2026-05-11 00:36:02 +03:30
ax.set_title(f"Cluster Sizes - {block_code}")
ax.set_xlabel("Cluster")
ax.set_ylabel("Cell Count")
ax.grid(True, axis="y", linestyle="--", linewidth=0.5, alpha=0.4)
fig.tight_layout()
fig.savefig(buffer, format="png", dpi=150)
buffer.seek(0)
return ContentFile(buffer.getvalue())
finally:
buffer.close()
plt.close(fig)
def _render_feature_pair_plot(
*,
2026-05-11 04:38:44 +03:30
observations: list[AnalysisGridObservation],
2026-05-11 00:36:02 +03:30
selected_features: list[str],
scaled_matrix: list[list[float]],
labels: list[int],
2026-05-11 04:38:44 +03:30
cluster_summaries: list[dict[str, Any]],
2026-05-11 00:36:02 +03:30
block_code: str,
) -> ContentFile | None:
if not scaled_matrix or not selected_features:
return None
plt = _import_matplotlib_pyplot()
feature_count = len(selected_features)
pair_indexes = [(0, 0)] if feature_count == 1 else [
(left_index, right_index)
for left_index in range(feature_count)
for right_index in range(left_index + 1, feature_count)
]
subplot_count = len(pair_indexes)
columns = 2 if subplot_count > 1 else 1
rows = math.ceil(subplot_count / columns)
fig, axes = plt.subplots(rows, columns, figsize=(7 * columns, 5 * rows))
axes_list = axes.flatten().tolist() if hasattr(axes, "flatten") else [axes]
unique_labels = sorted(set(int(label) for label in labels))
colors = plt.cm.get_cmap("tab10", max(len(unique_labels), 1))
2026-05-11 04:38:44 +03:30
observation_labels = [
_build_observation_label(observation=observation, index=index)
for index, observation in enumerate(observations)
]
center_indexes_by_label = _build_center_indexes_by_label(
observations=observations,
labels=labels,
cluster_summaries=cluster_summaries,
)
2026-05-11 00:36:02 +03:30
buffer = BytesIO()
try:
for axis, (left_index, right_index) in zip(axes_list, pair_indexes):
if feature_count == 1:
xs = list(range(1, len(scaled_matrix) + 1))
ys = [row[0] for row in scaled_matrix]
for color_index, cluster_label in enumerate(unique_labels):
filtered = [
2026-05-11 04:38:44 +03:30
(x_value, y_value, point_label)
for x_value, y_value, label, point_label in zip(xs, ys, labels, observation_labels)
2026-05-11 00:36:02 +03:30
if int(label) == cluster_label
]
axis.scatter(
[item[0] for item in filtered],
[item[1] for item in filtered],
s=55,
color=colors(color_index),
alpha=0.85,
label=f"Cluster {cluster_label}",
)
2026-05-11 04:38:44 +03:30
_annotate_plot_points(
axis=axis,
x_values=[item[0] for item in filtered],
y_values=[item[1] for item in filtered],
point_labels=[item[2] for item in filtered],
)
center_index = center_indexes_by_label.get(cluster_label)
if center_index is not None:
_plot_cluster_center_marker(
axis=axis,
x_value=float(center_index + 1),
y_value=float(scaled_matrix[center_index][0]),
point_label=_build_center_label(
observations=observations,
cluster_summaries=cluster_summaries,
cluster_label=cluster_label,
),
color=colors(color_index),
)
2026-05-11 00:36:02 +03:30
axis.set_xlabel("Observation Index")
axis.set_ylabel(f"{selected_features[0]} (scaled)")
axis.set_title(f"{selected_features[0]} distribution")
else:
x_values = [row[left_index] for row in scaled_matrix]
y_values = [row[right_index] for row in scaled_matrix]
for color_index, cluster_label in enumerate(unique_labels):
filtered = [
2026-05-11 04:38:44 +03:30
(x_value, y_value, point_label)
for x_value, y_value, label, point_label in zip(
x_values,
y_values,
labels,
observation_labels,
)
2026-05-11 00:36:02 +03:30
if int(label) == cluster_label
]
axis.scatter(
[item[0] for item in filtered],
[item[1] for item in filtered],
s=55,
color=colors(color_index),
alpha=0.85,
label=f"Cluster {cluster_label}",
)
2026-05-11 04:38:44 +03:30
_annotate_plot_points(
axis=axis,
x_values=[item[0] for item in filtered],
y_values=[item[1] for item in filtered],
point_labels=[item[2] for item in filtered],
)
center_index = center_indexes_by_label.get(cluster_label)
if center_index is not None:
_plot_cluster_center_marker(
axis=axis,
x_value=float(scaled_matrix[center_index][left_index]),
y_value=float(scaled_matrix[center_index][right_index]),
point_label=_build_center_label(
observations=observations,
cluster_summaries=cluster_summaries,
cluster_label=cluster_label,
),
color=colors(color_index),
)
2026-05-11 00:36:02 +03:30
axis.set_xlabel(f"{selected_features[left_index]} (scaled)")
axis.set_ylabel(f"{selected_features[right_index]} (scaled)")
axis.set_title(
f"{selected_features[left_index]} vs {selected_features[right_index]}"
)
axis.grid(True, linestyle="--", linewidth=0.5, alpha=0.4)
for axis in axes_list[subplot_count:]:
axis.remove()
if unique_labels and axes_list:
axes_list[0].legend()
fig.suptitle(f"KMeans Feature Diagnostics - {block_code}", fontsize=14)
fig.tight_layout(rect=(0, 0, 1, 0.97))
fig.savefig(buffer, format="png", dpi=150)
buffer.seek(0)
return ContentFile(buffer.getvalue())
finally:
buffer.close()
plt.close(fig)
2026-05-11 04:38:44 +03:30
def _render_feature_projection_plot(
*,
observations: list[AnalysisGridObservation],
selected_features: list[str],
scaled_matrix: list[list[float]],
labels: list[int],
cluster_summaries: list[dict[str, Any]],
block_code: str,
) -> ContentFile | None:
if not scaled_matrix:
return None
plt = _import_matplotlib_pyplot()
projected_points, x_label, y_label = _project_all_features_to_2d(
scaled_matrix=scaled_matrix,
selected_features=selected_features,
)
unique_labels = sorted(set(int(label) for label in labels))
colors = plt.cm.get_cmap("tab10", max(len(unique_labels), 1))
observation_labels = [
_build_observation_label(observation=observation, index=index)
for index, observation in enumerate(observations)
]
center_indexes_by_label = _build_center_indexes_by_label(
observations=observations,
labels=labels,
cluster_summaries=cluster_summaries,
)
fig, ax = plt.subplots(figsize=(8, 6))
buffer = BytesIO()
try:
for color_index, cluster_label in enumerate(unique_labels):
filtered = [
(x_value, y_value, point_label)
for (x_value, y_value), label, point_label in zip(projected_points, labels, observation_labels)
if int(label) == cluster_label
]
if not filtered:
continue
ax.scatter(
[item[0] for item in filtered],
[item[1] for item in filtered],
s=65,
color=colors(color_index),
alpha=0.9,
edgecolors="white",
linewidths=0.8,
label=f"Cluster {cluster_label}",
)
_annotate_plot_points(
axis=ax,
x_values=[item[0] for item in filtered],
y_values=[item[1] for item in filtered],
point_labels=[item[2] for item in filtered],
)
center_index = center_indexes_by_label.get(cluster_label)
if center_index is not None and center_index < len(projected_points):
_plot_cluster_center_marker(
axis=ax,
x_value=float(projected_points[center_index][0]),
y_value=float(projected_points[center_index][1]),
point_label=_build_center_label(
observations=observations,
cluster_summaries=cluster_summaries,
cluster_label=cluster_label,
),
color=colors(color_index),
)
ax.set_title(f"KMeans All-Feature Projection - {block_code}")
ax.set_xlabel(x_label)
ax.set_ylabel(y_label)
ax.grid(True, linestyle="--", linewidth=0.5, alpha=0.4)
if unique_labels:
ax.legend()
fig.tight_layout()
fig.savefig(buffer, format="png", dpi=150)
buffer.seek(0)
return ContentFile(buffer.getvalue())
finally:
buffer.close()
plt.close(fig)
def _project_all_features_to_2d(
*,
scaled_matrix: list[list[float]],
selected_features: list[str],
) -> tuple[list[tuple[float, float]], str, str]:
if not scaled_matrix:
return [], "Projection Axis 1", "Projection Axis 2"
matrix = [[float(value) for value in row] for row in scaled_matrix]
row_count = len(matrix)
column_count = len(matrix[0]) if matrix and matrix[0] else 0
if row_count >= 2 and column_count >= 2:
try:
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
projected_matrix = pca.fit_transform(matrix)
x_axis_label = "PC1"
y_axis_label = "PC2"
explained = list(getattr(pca, "explained_variance_ratio_", []) or [])
if len(explained) >= 2:
x_axis_label = f"PC1 ({explained[0] * 100:.1f}%)"
y_axis_label = f"PC2 ({explained[1] * 100:.1f}%)"
return (
[(float(row[0]), float(row[1])) for row in projected_matrix.tolist()],
x_axis_label,
y_axis_label,
)
except ImportError:
logger.warning(
"scikit-learn PCA is unavailable, falling back to the first scaled features for projection."
)
except ValueError as exc:
logger.warning("Failed to calculate PCA projection for remote sensing diagnostics: %s", exc)
x_values = [float(row[0]) if row else 0.0 for row in matrix]
if column_count >= 2:
y_values = [float(row[1]) for row in matrix]
else:
y_values = [0.0 for _ in matrix]
x_axis_label = f"{selected_features[0]} (scaled)" if selected_features else "Feature 1 (scaled)"
y_axis_label = (
f"{selected_features[1]} (scaled)"
if len(selected_features) >= 2
else "Projection Axis 2"
)
return list(zip(x_values, y_values)), x_axis_label, y_axis_label
def _build_observation_label(*, observation: AnalysisGridObservation, index: int) -> str:
_ = observation
return str(index + 1)
def _annotate_plot_points(
*,
axis: Any,
x_values: list[float],
y_values: list[float],
point_labels: list[str],
) -> None:
for x_value, y_value, point_label in zip(x_values, y_values, point_labels):
axis.annotate(
point_label,
xy=(x_value, y_value),
xytext=(4, 4),
textcoords="offset points",
fontsize=7,
alpha=0.85,
)
def _build_center_indexes_by_label(
*,
observations: list[AnalysisGridObservation],
labels: list[int],
cluster_summaries: list[dict[str, Any]],
) -> dict[int, int]:
cell_code_to_index = {
str(observation.cell.cell_code): index
for index, observation in enumerate(observations)
}
center_indexes_by_label: dict[int, int] = {}
for cluster_summary in cluster_summaries:
cluster_label = int(cluster_summary.get("cluster_label", -1))
center_cell_code = str(cluster_summary.get("center_cell_code") or "").strip()
center_index = cell_code_to_index.get(center_cell_code)
if center_index is None:
continue
if center_index < len(labels) and int(labels[center_index]) == cluster_label:
center_indexes_by_label[cluster_label] = center_index
return center_indexes_by_label
def _build_center_label(
*,
observations: list[AnalysisGridObservation],
cluster_summaries: list[dict[str, Any]],
cluster_label: int,
) -> str:
point_numbers_by_cell_code = {
str(observation.cell.cell_code): index + 1
for index, observation in enumerate(observations)
}
for cluster_summary in cluster_summaries:
if int(cluster_summary.get("cluster_label", -1)) == int(cluster_label):
center_cell_code = str(cluster_summary.get("center_cell_code") or "").strip()
if center_cell_code:
point_number = point_numbers_by_cell_code.get(center_cell_code)
if point_number is not None:
return f"K-center: {point_number}"
return "K-center"
break
return f"K-center C{cluster_label}"
def _plot_cluster_center_marker(
*,
axis: Any,
x_value: float,
y_value: float,
point_label: str,
color: Any,
) -> None:
axis.scatter(
[x_value],
[y_value],
s=220,
marker="*",
color=color,
edgecolors="black",
linewidths=1.2,
zorder=5,
)
axis.annotate(
point_label,
xy=(x_value, y_value),
xytext=(7, -10),
textcoords="offset points",
fontsize=8,
fontweight="bold",
color="black",
)
2026-05-11 00:36:02 +03:30
def _import_matplotlib_pyplot():
try:
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
except ImportError as exc: # pragma: no cover - runtime dependency guard
raise DataDrivenSubdivisionError("matplotlib برای ذخیره نمودارهای KMeans لازم است.") from exc
return plt
2026-05-10 22:49:07 +03:30
def _build_clustering_log_context(
*,
observations: list[AnalysisGridObservation],
selected_features: list[str],
run: RemoteSensingRun | None,
location: SoilLocation | None,
) -> dict[str, Any]:
first_observation = observations[0] if observations else None
observation_metadata = dict(getattr(first_observation, "metadata", {}) or {})
resolved_run = run or getattr(first_observation, "run", None)
resolved_location = location or getattr(getattr(first_observation, "cell", None), "soil_location", None)
temporal_start = getattr(resolved_run, "temporal_start", None) or getattr(first_observation, "temporal_start", None)
temporal_end = getattr(resolved_run, "temporal_end", None) or getattr(first_observation, "temporal_end", None)
return {
"run_id": getattr(resolved_run, "id", None),
"job_ref": observation_metadata.get("job_refs", {}),
"region_id": getattr(resolved_location, "id", None),
"date_range": {
"temporal_start": temporal_start.isoformat() if hasattr(temporal_start, "isoformat") else temporal_start,
"temporal_end": temporal_end.isoformat() if hasattr(temporal_end, "isoformat") else temporal_end,
},
"selected_features": selected_features,
}
def _serialize_log_payload(payload: dict[str, Any]) -> str:
return json.dumps(payload, ensure_ascii=True, default=str, sort_keys=True)