""" تسک‌های Celery برای pipeline سنجش‌ازدور و subdivision داده‌محور. """ import logging from typing import Any from config.celery import app from django.conf import settings from django.db import transaction from django.utils import timezone from django.utils.dateparse import parse_date from .data_driven_subdivision import ( DEFAULT_CLUSTER_FEATURES, DataDrivenSubdivisionError, create_remote_sensing_subdivision_result, ) from .grid_analysis import create_or_get_analysis_grid_cells from .models import ( AnalysisGridCell, AnalysisGridObservation, BlockSubdivision, RemoteSensingRun, RemoteSensingSubdivisionResult, SoilLocation, ) from .openeo_service import ( OpenEOAuthenticationError, OpenEOExecutionError, OpenEOServiceError, compute_remote_sensing_metrics, ) try: import requests except ImportError: # pragma: no cover - handled in stripped envs RequestException = Exception else: RequestException = requests.RequestException logger = logging.getLogger(__name__) REMOTE_SENSING_TASK_MAX_RETRIES = 5 REMOTE_SENSING_TASK_RETRY_DELAY_SECONDS = 60 REMOTE_SENSING_TASK_RETRY_BACKOFF_MAX_SECONDS = 600 PERSISTED_OBSERVATION_FEATURES = ( "ndvi", "ndwi", "soil_vv", "soil_vv_db", ) def run_remote_sensing_analysis( *, soil_location_id: int, block_code: str = "", temporal_start: Any, temporal_end: Any, force_refresh: bool = False, task_id: str = "", run_id: int | None = None, cluster_count: int | None = None, selected_features: list[str] | None = None, ) -> dict[str, Any]: """ اجرای سنکرون تحلیل سنجش‌ازدور برای یک location/block. این helper برای Celery task و هر orchestration داخلی دیگر قابل استفاده است. """ start_date = _normalize_temporal_date(temporal_start, "temporal_start") end_date = _normalize_temporal_date(temporal_end, "temporal_end") if start_date > end_date: raise ValueError("temporal_start نمی‌تواند بعد از temporal_end باشد.") location = SoilLocation.objects.filter(pk=soil_location_id).first() if location is None: raise ValueError(f"SoilLocation با id={soil_location_id} پیدا نشد.") resolved_block_code = str(block_code or "").strip() subdivision = _resolve_block_subdivision(location, resolved_block_code) run = _get_or_create_remote_sensing_run( run_id=run_id, location=location, subdivision=subdivision, block_code=resolved_block_code, temporal_start=start_date, temporal_end=end_date, task_id=task_id, cluster_count=cluster_count, selected_features=selected_features or list(DEFAULT_CLUSTER_FEATURES), ) _mark_run_running(run) try: _record_run_stage( run, "preparing_analysis_grid", { "block_code": resolved_block_code, "temporal_extent": { "start_date": start_date.isoformat(), "end_date": end_date.isoformat(), }, }, ) grid_summary = create_or_get_analysis_grid_cells( location, block_code=resolved_block_code, block_subdivision=subdivision, ) _record_run_stage(run, "analysis_grid_ready", {"grid_summary": grid_summary}) all_cells = _load_grid_cells(location, resolved_block_code) cells_to_process = _select_cells_for_processing( all_cells=all_cells, temporal_start=start_date, temporal_end=end_date, force_refresh=force_refresh, ) _record_run_stage( run, "analysis_cells_selected", { "cell_selection": { "total_cell_count": len(all_cells), "cell_count_to_process": len(cells_to_process), "existing_cell_count": len(all_cells) - len(cells_to_process), "force_refresh": force_refresh, } }, ) if not cells_to_process: observations = _load_observations( location=location, block_code=resolved_block_code, temporal_start=start_date, temporal_end=end_date, ) if not _has_usable_observations( observations=observations, selected_features=selected_features or list(DEFAULT_CLUSTER_FEATURES), ): logger.warning( "Cached observations are fully null, refetching remote metrics for run_id=%s", run.id, ) _record_run_stage( run, "using_cached_observations", {"source": "database", "usable": False, "refetching": True}, ) cells_to_process = all_cells else: _record_run_stage( run, "using_cached_observations", {"source": "database", "usable": True, "refetching": False}, ) subdivision_result = _ensure_subdivision_result( location=location, run=run, subdivision=subdivision, block_code=resolved_block_code, observations=observations, cluster_count=cluster_count, selected_features=selected_features, ) _record_run_stage( run, "clustering_completed", _build_clustering_stage_metadata(subdivision_result), ) summary = { "status": "completed", "source": "database", "run_id": run.id, "processed_cell_count": 0, "created_observation_count": 0, "updated_observation_count": 0, "existing_observation_count": len(all_cells), "failed_metric_count": 0, "chunk_size_sqm": grid_summary["chunk_size_sqm"], "block_code": resolved_block_code, "cell_count": len(all_cells), "subdivision_result_id": getattr(subdivision_result, "id", None), "cluster_count": getattr(subdivision_result, "cluster_count", 0), } _mark_run_success(run, summary) return summary _record_run_stage( run, "fetching_remote_metrics", _build_remote_metric_stage_details( cells=cells_to_process, selected_features=selected_features, ), ) progress_callback = _build_remote_metric_progress_callback( run=run, cells=cells_to_process, selected_features=selected_features, ) remote_payload = compute_remote_sensing_metrics( cells_to_process, temporal_start=start_date, temporal_end=end_date, selected_features=selected_features or list(DEFAULT_CLUSTER_FEATURES), progress_callback=progress_callback, ) _record_run_stage( run, "remote_metrics_fetched", { "failed_metric_count": len(remote_payload["metadata"].get("failed_metrics", [])), "service_metadata": remote_payload["metadata"], }, ) upsert_summary = _upsert_grid_observations( cells=cells_to_process, run=run, temporal_start=start_date, temporal_end=end_date, metric_payload=remote_payload, ) _record_run_stage(run, "observations_persisted", upsert_summary) observations = _load_observations( location=location, block_code=resolved_block_code, temporal_start=start_date, temporal_end=end_date, ) subdivision_result = _ensure_subdivision_result( location=location, run=run, subdivision=subdivision, block_code=resolved_block_code, observations=observations, cluster_count=cluster_count, selected_features=selected_features, ) _record_run_stage( run, "clustering_completed", _build_clustering_stage_metadata(subdivision_result), ) summary = { "status": "completed", "source": "openeo", "run_id": run.id, "processed_cell_count": len(cells_to_process), "created_observation_count": upsert_summary["created_count"], "updated_observation_count": upsert_summary["updated_count"], "existing_observation_count": len(all_cells) - len(cells_to_process), "failed_metric_count": len(remote_payload["metadata"].get("failed_metrics", [])), "chunk_size_sqm": grid_summary["chunk_size_sqm"], "block_code": resolved_block_code, "cell_count": len(all_cells), "subdivision_result_id": subdivision_result.id, "cluster_count": subdivision_result.cluster_count, } _mark_run_success(run, summary, remote_payload["metadata"]) logger.info( "Remote sensing analysis completed", extra={ "run_id": run.id, "soil_location_id": location.id, "block_code": resolved_block_code, "processed_cell_count": summary["processed_cell_count"], }, ) return summary except Exception as exc: _mark_run_failure(run, str(exc)) raise @app.task( bind=True, max_retries=REMOTE_SENSING_TASK_MAX_RETRIES, default_retry_delay=REMOTE_SENSING_TASK_RETRY_DELAY_SECONDS, ) def run_remote_sensing_analysis_task( self, soil_location_id: int, block_code: str = "", temporal_start: Any = "", temporal_end: Any = "", force_refresh: bool = False, run_id: int | None = None, cluster_count: int | None = None, selected_features: list[str] | None = None, ): """ اجرای async تحلیل سنجش‌ازدور برای location/block و ذخیره نتایج در DB. """ logger.info( "Starting remote sensing analysis task", extra={ "task_id": self.request.id, "soil_location_id": soil_location_id, "block_code": block_code, "temporal_start": temporal_start, "temporal_end": temporal_end, "force_refresh": force_refresh, }, ) try: return run_remote_sensing_analysis( soil_location_id=soil_location_id, block_code=block_code, temporal_start=temporal_start, temporal_end=temporal_end, force_refresh=force_refresh, task_id=self.request.id, run_id=run_id, cluster_count=cluster_count, selected_features=selected_features, ) except OpenEOAuthenticationError: logger.exception( "Remote sensing auth failure", extra={"task_id": self.request.id, "soil_location_id": soil_location_id}, ) raise except (OpenEOExecutionError, OpenEOServiceError, RequestException, DataDrivenSubdivisionError) as exc: retry_count = self.request.retries + 1 countdown = min( REMOTE_SENSING_TASK_RETRY_DELAY_SECONDS * (2 ** self.request.retries), REMOTE_SENSING_TASK_RETRY_BACKOFF_MAX_SECONDS, ) _mark_run_retrying( run_id=run_id, task_id=self.request.id, error_message=str(exc), retry_count=retry_count, retry_delay_seconds=countdown, ) logger.warning( "Transient remote sensing failure, retrying task", extra={ "task_id": self.request.id, "soil_location_id": soil_location_id, "block_code": block_code, "retry_count": retry_count, "retry_delay_seconds": countdown, "error": str(exc), }, ) raise self.retry(exc=exc, countdown=countdown) def _normalize_temporal_date(value: Any, field_name: str): if hasattr(value, "isoformat") and not isinstance(value, str): return value parsed = parse_date(str(value)) if parsed is None: raise ValueError(f"{field_name} نامعتبر است.") return parsed def _resolve_block_subdivision(location: SoilLocation, block_code: str) -> BlockSubdivision | None: if not block_code: return None return ( BlockSubdivision.objects.filter( soil_location=location, block_code=block_code, ) .order_by("-updated_at", "-id") .first() ) def _get_or_create_remote_sensing_run( *, run_id: int | None, location: SoilLocation, subdivision: BlockSubdivision | None, block_code: str, temporal_start, temporal_end, task_id: str, cluster_count: int | None, selected_features: list[str], ) -> RemoteSensingRun: queued_at = timezone.now().isoformat() if run_id is not None: run = RemoteSensingRun.objects.filter(pk=run_id, soil_location=location).first() if run is not None: metadata = dict(run.metadata or {}) if task_id: metadata["task_id"] = task_id metadata.setdefault("status_label", "pending") metadata["stage"] = "queued" metadata["selected_features"] = selected_features metadata["requested_cluster_count"] = cluster_count metadata["pipeline"] = { "name": "remote_sensing_subdivision", "version": 2, } metadata["timestamps"] = { **dict(metadata.get("timestamps") or {}), "queued_at": queued_at, } run.block_subdivision = subdivision run.block_code = block_code run.chunk_size_sqm = int(getattr(settings, "SUBDIVISION_CHUNK_SQM", 900) or 900) run.temporal_start = temporal_start run.temporal_end = temporal_end run.metadata = metadata run.save( update_fields=[ "block_subdivision", "block_code", "chunk_size_sqm", "temporal_start", "temporal_end", "metadata", "updated_at", ] ) return run metadata = { "status_label": "pending", "stage": "queued", "selected_features": selected_features, "requested_cluster_count": cluster_count, "pipeline": { "name": "remote_sensing_subdivision", "version": 2, }, "timestamps": {"queued_at": queued_at}, } if task_id: metadata["task_id"] = task_id return RemoteSensingRun.objects.create( soil_location=location, block_subdivision=subdivision, block_code=block_code, chunk_size_sqm=int(getattr(settings, "SUBDIVISION_CHUNK_SQM", 900) or 900), temporal_start=temporal_start, temporal_end=temporal_end, status=RemoteSensingRun.STATUS_PENDING, metadata=metadata, ) def _mark_run_running(run: RemoteSensingRun) -> None: metadata = dict(run.metadata or {}) metadata["status_label"] = "running" metadata["stage"] = "running" metadata["timestamps"] = { **dict(metadata.get("timestamps") or {}), "started_at": timezone.now().isoformat(), } run.status = RemoteSensingRun.STATUS_RUNNING run.started_at = timezone.now() run.metadata = metadata run.save(update_fields=["status", "started_at", "metadata", "updated_at"]) def _mark_run_success( run: RemoteSensingRun, summary: dict[str, Any], service_metadata: dict[str, Any] | None = None, ) -> None: metadata = dict(run.metadata or {}) metadata["summary"] = summary metadata["status_label"] = "completed" metadata["stage"] = "completed" metadata["timestamps"] = { **dict(metadata.get("timestamps") or {}), "completed_at": timezone.now().isoformat(), } if service_metadata: metadata["service"] = service_metadata run.status = RemoteSensingRun.STATUS_SUCCESS run.finished_at = timezone.now() run.error_message = "" run.metadata = metadata run.save( update_fields=[ "status", "finished_at", "error_message", "metadata", "updated_at", ] ) def _mark_run_failure(run: RemoteSensingRun, error_message: str) -> None: metadata = dict(run.metadata or {}) failed_stage = str(metadata.get("stage") or "").strip() or None stage_details = dict(metadata.get("stage_details") or {}) metadata["status_label"] = "failed" metadata["stage"] = "failed" metadata["failed_stage"] = failed_stage metadata["failure_reason"] = error_message[:4000] metadata["stage_details"] = { **stage_details, "failed": { "failed_stage": failed_stage, "error_message": error_message[:4000], "failed_stage_details": stage_details.get(failed_stage, {}) if failed_stage else {}, }, } metadata["timestamps"] = { **dict(metadata.get("timestamps") or {}), "failed_at": timezone.now().isoformat(), } run.status = RemoteSensingRun.STATUS_FAILURE run.finished_at = timezone.now() run.error_message = error_message[:4000] run.metadata = metadata run.save( update_fields=[ "status", "finished_at", "error_message", "metadata", "updated_at", ] ) logger.exception( "Remote sensing analysis failed", extra={"run_id": run.id, "soil_location_id": run.soil_location_id, "block_code": run.block_code}, ) def _mark_run_retrying( *, run_id: int | None, task_id: str, error_message: str, retry_count: int, retry_delay_seconds: int, ) -> None: run = None if run_id is not None: run = RemoteSensingRun.objects.filter(pk=run_id).first() if run is None and task_id: run = RemoteSensingRun.objects.filter(metadata__task_id=str(task_id)).first() if run is None: return metadata = dict(run.metadata or {}) stage_details = dict(metadata.get("stage_details") or {}) failed_stage = ( str(metadata.get("failed_stage") or metadata.get("stage") or "").strip() or None ) metadata["status_label"] = "retrying" metadata["stage"] = "retrying" metadata["failed_stage"] = failed_stage metadata.pop("failure_reason", None) metadata["stage_details"] = { **stage_details, "retrying": { "retry_count": retry_count, "retry_delay_seconds": retry_delay_seconds, "last_error": error_message[:4000], "failed_stage": failed_stage, "failed_stage_details": stage_details.get(failed_stage, {}) if failed_stage else {}, }, } metadata["timestamps"] = { **dict(metadata.get("timestamps") or {}), "retrying_at": timezone.now().isoformat(), } run.status = RemoteSensingRun.STATUS_RUNNING run.error_message = "" run.metadata = metadata run.save(update_fields=["status", "error_message", "metadata", "updated_at"]) def _load_grid_cells(location: SoilLocation, block_code: str) -> list[AnalysisGridCell]: queryset = AnalysisGridCell.objects.filter(soil_location=location) queryset = queryset.filter(block_code=block_code or "") return list(queryset.order_by("cell_code")) def _load_observations( *, location: SoilLocation, block_code: str, temporal_start, temporal_end, ) -> list[AnalysisGridObservation]: queryset = ( AnalysisGridObservation.objects.select_related("cell", "run") .filter( cell__soil_location=location, cell__block_code=block_code or "", temporal_start=temporal_start, temporal_end=temporal_end, ) .order_by("cell__cell_code") ) return list(queryset) def _select_cells_for_processing( *, all_cells: list[AnalysisGridCell], temporal_start, temporal_end, force_refresh: bool, ) -> list[AnalysisGridCell]: if force_refresh: return all_cells existing_ids = set( AnalysisGridObservation.objects.filter( cell__in=all_cells, temporal_start=temporal_start, temporal_end=temporal_end, ).values_list("cell_id", flat=True) ) return [cell for cell in all_cells if cell.id not in existing_ids] def _has_usable_observations( *, observations: list[AnalysisGridObservation], selected_features: list[str], ) -> bool: for observation in observations: if any(getattr(observation, feature_name, None) is not None for feature_name in selected_features): return True return False def _upsert_grid_observations( *, cells: list[AnalysisGridCell], run: RemoteSensingRun, temporal_start, temporal_end, metric_payload: dict[str, Any], ) -> dict[str, int]: result_by_cell = metric_payload.get("results", {}) payload_diagnostics = metric_payload["metadata"].get("payload_diagnostics", {}) payload_cell_codes = sorted(str(cell_code) for cell_code in result_by_cell.keys()) db_cell_codes = [cell.cell_code for cell in cells] matched_cell_codes = sorted(set(db_cell_codes) & set(payload_cell_codes)) unmatched_db_cell_codes = sorted(set(db_cell_codes) - set(payload_cell_codes)) unmatched_payload_cell_codes = sorted(set(payload_cell_codes) - set(db_cell_codes)) available_features = _collect_available_features( result_by_cell=result_by_cell, payload_diagnostics=payload_diagnostics, ) payload_keys_sample = payload_cell_codes[:5] metadata_template = { "backend_name": metric_payload["metadata"].get("backend"), "backend_url": metric_payload["metadata"].get("backend_url"), "collections_used": metric_payload["metadata"].get("collections_used", []), "job_refs": metric_payload["metadata"].get("job_refs", {}), "failed_metrics": metric_payload["metadata"].get("failed_metrics", []), "payload_diagnostics": payload_diagnostics, "run_id": run.id, } logger.info( "Remote sensing payload/DB cell comparison: %s", { "run_id": run.id, "db_cell_count": len(db_cell_codes), "payload_cell_count": len(payload_cell_codes), "matched_cell_count": len(matched_cell_codes), "unmatched_db_cell_codes": unmatched_db_cell_codes, "unmatched_payload_cell_codes": unmatched_payload_cell_codes, }, ) if not matched_cell_codes: logger.error("No payload cells matched DB cell_codes for run_id=%s", run.id) created_count = 0 updated_count = 0 usable_observation_count = 0 fully_null_observation_count = 0 with transaction.atomic(): for cell in cells: values = result_by_cell.get(cell.cell_code, {}) defaults = { "run": run, "ndvi": values.get("ndvi"), "ndwi": values.get("ndwi"), "soil_vv": values.get("soil_vv"), "soil_vv_db": values.get("soil_vv_db"), "metadata": metadata_template, } persisted_values = [defaults[feature_name] for feature_name in PERSISTED_OBSERVATION_FEATURES] usable_values = [defaults[feature_name] for feature_name in DEFAULT_CLUSTER_FEATURES] if all(value is None for value in persisted_values): fully_null_observation_count += 1 logger.warning( "Persisting empty observation for cell=%s, run_id=%s", cell.cell_code, run.id, ) if any(value is not None for value in usable_values): usable_observation_count += 1 observation, created = AnalysisGridObservation.objects.update_or_create( cell=cell, temporal_start=temporal_start, temporal_end=temporal_end, defaults=defaults, ) if created: created_count += 1 else: updated_count += 1 summary = { "created_count": created_count, "updated_count": updated_count, "total_observation_count": len(cells), "usable_observation_count": usable_observation_count, "fully_null_observation_count": fully_null_observation_count, "matched_cell_count": len(matched_cell_codes), "matched_cell_codes": matched_cell_codes, "unmatched_db_cell_codes": unmatched_db_cell_codes, "unmatched_payload_cell_codes": unmatched_payload_cell_codes, "payload_keys_sample": payload_keys_sample, "available_features": available_features, } logger.info("Grid observation upsert summary: %s", summary) if usable_observation_count == 0: diagnostics = { "job_ref": metadata_template["job_refs"], "total_cells": len(cells), "matched_cells": len(matched_cell_codes), "payload_keys_sample": payload_keys_sample, "available_features": available_features, } logger.error("All persisted observations are empty for run_id=%s", run.id) _store_empty_observation_diagnostics(run=run, diagnostics=diagnostics) summary["empty_observation_diagnostics"] = diagnostics return summary def _collect_available_features( *, result_by_cell: dict[str, dict[str, Any]], payload_diagnostics: dict[str, Any], ) -> list[str]: available = { feature_name for values in result_by_cell.values() for feature_name, value in (values or {}).items() if value is not None } for metric_diagnostics in payload_diagnostics.values(): available.update(metric_diagnostics.get("available_features", [])) return sorted(str(feature_name) for feature_name in available) def _store_empty_observation_diagnostics(*, run: RemoteSensingRun, diagnostics: dict[str, Any]) -> None: metadata = dict(run.metadata or {}) metadata["diagnostics"] = { **dict(metadata.get("diagnostics") or {}), "empty_observations": diagnostics, } run.metadata = metadata run.save(update_fields=["metadata", "updated_at"]) def _build_remote_metric_stage_details( *, cells: list[AnalysisGridCell], selected_features: list[str] | None, active_metric: str | None = None, completed_metrics: list[str] | None = None, failed_metrics: list[dict[str, Any]] | None = None, metric_states: list[dict[str, Any]] | None = None, ) -> dict[str, Any]: features = list(selected_features or DEFAULT_CLUSTER_FEATURES) completed = list(completed_metrics or []) failed = list(failed_metrics or []) states = metric_states or [ { "metric": metric_name, "status": ( "completed" if metric_name in completed else "failed" if any(item.get("metric") == metric_name for item in failed) else "running" if metric_name == active_metric else "pending" ), } for metric_name in features ] return { "requested_cell_count": len(cells), "target_cells": [ { "cell_code": cell.cell_code, "block_code": cell.block_code, "centroid_lat": str(cell.centroid_lat), "centroid_lon": str(cell.centroid_lon), "chunk_size_sqm": cell.chunk_size_sqm, } for cell in cells ], "metric_progress": { "total_metrics": len(features), "completed_metric_count": len(completed), "active_metric": active_metric, "completed_metrics": completed, "failed_metrics": failed, "states": states, }, } def _normalize_progress_metric_name(metric_name: str, features: list[str]) -> str: derived_metric_map = { "soil_vv": "soil_vv_db", } normalized = derived_metric_map.get(metric_name, metric_name) if normalized in features: return normalized return metric_name def _resolve_progress_job_ref(candidate: str, job_refs: dict[str, Any]) -> Any: if candidate in job_refs: return job_refs.get(candidate) source_metric_map = { "soil_vv_db": "soil_vv", } return job_refs.get(source_metric_map.get(candidate, candidate)) def _build_remote_metric_progress_callback( *, run: RemoteSensingRun, cells: list[AnalysisGridCell], selected_features: list[str] | None, ): features = list(selected_features or DEFAULT_CLUSTER_FEATURES) completed_metrics: list[str] = [] failed_metrics: list[dict[str, Any]] = [] def callback(*, metric_name: str, state: str, metadata: dict[str, Any], metric_payload=None, error: str = "") -> None: progress_metric_name = _normalize_progress_metric_name(metric_name, features) if state == "completed" and progress_metric_name not in completed_metrics: completed_metrics.append(progress_metric_name) if state == "failed": failed_entry = {"metric": progress_metric_name, "error": error} if not any( item.get("metric") == progress_metric_name and item.get("error") == error for item in failed_metrics ): failed_metrics.append(failed_entry) stage_details = _build_remote_metric_stage_details( cells=cells, selected_features=features, active_metric=progress_metric_name if state == "running" else None, completed_metrics=completed_metrics, failed_metrics=failed_metrics, metric_states=[ { "metric": candidate, "status": ( "completed" if candidate in completed_metrics else "failed" if any(item.get("metric") == candidate for item in failed_metrics) else "running" if candidate == progress_metric_name and state == "running" else "pending" ), "job_ref": _resolve_progress_job_ref(candidate, metadata.get("job_refs", {})), } for candidate in features ], ) _record_run_stage(run, "fetching_remote_metrics", stage_details) return callback def _ensure_subdivision_result( *, location: SoilLocation, run: RemoteSensingRun, subdivision: BlockSubdivision | None, block_code: str, observations: list[AnalysisGridObservation], cluster_count: int | None, selected_features: list[str] | None, ) -> RemoteSensingSubdivisionResult: if not observations: raise DataDrivenSubdivisionError("هیچ observation برای ساخت subdivision داده‌محور پیدا نشد.") result = create_remote_sensing_subdivision_result( location=location, run=run, observations=observations, block_subdivision=subdivision, block_code=block_code, selected_features=selected_features or list(DEFAULT_CLUSTER_FEATURES), explicit_k=cluster_count, ) return result def _record_run_stage(run: RemoteSensingRun, stage: str, details: dict[str, Any] | None = None) -> None: metadata = dict(run.metadata or {}) metadata["stage"] = stage metadata["stage_details"] = { **dict(metadata.get("stage_details") or {}), stage: details or {}, } metadata["timestamps"] = { **dict(metadata.get("timestamps") or {}), f"{stage}_at": timezone.now().isoformat(), } run.metadata = metadata run.save(update_fields=["metadata", "updated_at"]) def _build_clustering_stage_metadata( result: RemoteSensingSubdivisionResult, ) -> dict[str, Any]: metadata = dict(result.metadata or {}) return { "subdivision_result_id": result.id, "cluster_count": result.cluster_count, "selected_features": result.selected_features, "used_cell_count": metadata.get("used_cell_count", 0), "skipped_cell_count": metadata.get("skipped_cell_count", 0), "skipped_cell_codes": result.skipped_cell_codes, "kmeans_params": metadata.get("kmeans_params", {}), }