This commit is contained in:
2026-05-10 22:49:07 +03:30
parent 2d1f7da89e
commit 2a6321a263
15 changed files with 2667 additions and 162 deletions
+374 -44
View File
@@ -42,6 +42,17 @@ else:
logger = logging.getLogger(__name__)
REMOTE_SENSING_TASK_MAX_RETRIES = 5
REMOTE_SENSING_TASK_RETRY_DELAY_SECONDS = 60
REMOTE_SENSING_TASK_RETRY_BACKOFF_MAX_SECONDS = 600
PERSISTED_OBSERVATION_FEATURES = (
"ndvi",
"ndwi",
"lst_c",
"soil_vv",
"soil_vv_db",
)
def run_remote_sensing_analysis(
*,
@@ -122,58 +133,83 @@ def run_remote_sensing_analysis(
)
if not cells_to_process:
_record_run_stage(
run,
"using_cached_observations",
{"source": "database"},
)
observations = _load_observations(
location=location,
block_code=resolved_block_code,
temporal_start=start_date,
temporal_end=end_date,
)
subdivision_result = _ensure_subdivision_result(
location=location,
run=run,
subdivision=subdivision,
block_code=resolved_block_code,
if not _has_usable_observations(
observations=observations,
cluster_count=cluster_count,
selected_features=selected_features,
)
_record_run_stage(
run,
"clustering_completed",
_build_clustering_stage_metadata(subdivision_result),
)
summary = {
"status": "completed",
"source": "database",
"run_id": run.id,
"processed_cell_count": 0,
"created_observation_count": 0,
"updated_observation_count": 0,
"existing_observation_count": len(all_cells),
"failed_metric_count": 0,
"chunk_size_sqm": grid_summary["chunk_size_sqm"],
"block_code": resolved_block_code,
"cell_count": len(all_cells),
"subdivision_result_id": getattr(subdivision_result, "id", None),
"cluster_count": getattr(subdivision_result, "cluster_count", 0),
}
_mark_run_success(run, summary)
return summary
selected_features=selected_features or list(DEFAULT_CLUSTER_FEATURES),
):
logger.warning(
"Cached observations are fully null, refetching remote metrics for run_id=%s",
run.id,
)
_record_run_stage(
run,
"using_cached_observations",
{"source": "database", "usable": False, "refetching": True},
)
cells_to_process = all_cells
else:
_record_run_stage(
run,
"using_cached_observations",
{"source": "database", "usable": True, "refetching": False},
)
subdivision_result = _ensure_subdivision_result(
location=location,
run=run,
subdivision=subdivision,
block_code=resolved_block_code,
observations=observations,
cluster_count=cluster_count,
selected_features=selected_features,
)
_record_run_stage(
run,
"clustering_completed",
_build_clustering_stage_metadata(subdivision_result),
)
summary = {
"status": "completed",
"source": "database",
"run_id": run.id,
"processed_cell_count": 0,
"created_observation_count": 0,
"updated_observation_count": 0,
"existing_observation_count": len(all_cells),
"failed_metric_count": 0,
"chunk_size_sqm": grid_summary["chunk_size_sqm"],
"block_code": resolved_block_code,
"cell_count": len(all_cells),
"subdivision_result_id": getattr(subdivision_result, "id", None),
"cluster_count": getattr(subdivision_result, "cluster_count", 0),
}
_mark_run_success(run, summary)
return summary
_record_run_stage(
run,
"fetching_remote_metrics",
{"requested_cell_count": len(cells_to_process)},
_build_remote_metric_stage_details(
cells=cells_to_process,
selected_features=selected_features,
),
)
progress_callback = _build_remote_metric_progress_callback(
run=run,
cells=cells_to_process,
selected_features=selected_features,
)
remote_payload = compute_remote_sensing_metrics(
cells_to_process,
temporal_start=start_date,
temporal_end=end_date,
selected_features=selected_features or list(DEFAULT_CLUSTER_FEATURES),
progress_callback=progress_callback,
)
_record_run_stage(
run,
@@ -242,7 +278,11 @@ def run_remote_sensing_analysis(
raise
@app.task(bind=True, max_retries=3, default_retry_delay=60)
@app.task(
bind=True,
max_retries=REMOTE_SENSING_TASK_MAX_RETRIES,
default_retry_delay=REMOTE_SENSING_TASK_RETRY_DELAY_SECONDS,
)
def run_remote_sensing_analysis_task(
self,
soil_location_id: int,
@@ -287,17 +327,30 @@ def run_remote_sensing_analysis_task(
)
raise
except (OpenEOExecutionError, OpenEOServiceError, RequestException, DataDrivenSubdivisionError) as exc:
retry_count = self.request.retries + 1
countdown = min(
REMOTE_SENSING_TASK_RETRY_DELAY_SECONDS * (2 ** self.request.retries),
REMOTE_SENSING_TASK_RETRY_BACKOFF_MAX_SECONDS,
)
_mark_run_retrying(
run_id=run_id,
task_id=self.request.id,
error_message=str(exc),
retry_count=retry_count,
retry_delay_seconds=countdown,
)
logger.warning(
"Transient remote sensing failure, retrying task",
extra={
"task_id": self.request.id,
"soil_location_id": soil_location_id,
"block_code": block_code,
"retry_count": self.request.retries,
"retry_count": retry_count,
"retry_delay_seconds": countdown,
"error": str(exc),
},
)
raise self.retry(exc=exc)
raise self.retry(exc=exc, countdown=countdown)
def _normalize_temporal_date(value: Any, field_name: str):
@@ -442,8 +495,20 @@ def _mark_run_success(
def _mark_run_failure(run: RemoteSensingRun, error_message: str) -> None:
metadata = dict(run.metadata or {})
failed_stage = str(metadata.get("stage") or "").strip() or None
stage_details = dict(metadata.get("stage_details") or {})
metadata["status_label"] = "failed"
metadata["stage"] = "failed"
metadata["failed_stage"] = failed_stage
metadata["failure_reason"] = error_message[:4000]
metadata["stage_details"] = {
**stage_details,
"failed": {
"failed_stage": failed_stage,
"error_message": error_message[:4000],
"failed_stage_details": stage_details.get(failed_stage, {}) if failed_stage else {},
},
}
metadata["timestamps"] = {
**dict(metadata.get("timestamps") or {}),
"failed_at": timezone.now().isoformat(),
@@ -467,6 +532,51 @@ def _mark_run_failure(run: RemoteSensingRun, error_message: str) -> None:
)
def _mark_run_retrying(
*,
run_id: int | None,
task_id: str,
error_message: str,
retry_count: int,
retry_delay_seconds: int,
) -> None:
run = None
if run_id is not None:
run = RemoteSensingRun.objects.filter(pk=run_id).first()
if run is None and task_id:
run = RemoteSensingRun.objects.filter(metadata__task_id=str(task_id)).first()
if run is None:
return
metadata = dict(run.metadata or {})
stage_details = dict(metadata.get("stage_details") or {})
failed_stage = (
str(metadata.get("failed_stage") or metadata.get("stage") or "").strip() or None
)
metadata["status_label"] = "retrying"
metadata["stage"] = "retrying"
metadata["failed_stage"] = failed_stage
metadata.pop("failure_reason", None)
metadata["stage_details"] = {
**stage_details,
"retrying": {
"retry_count": retry_count,
"retry_delay_seconds": retry_delay_seconds,
"last_error": error_message[:4000],
"failed_stage": failed_stage,
"failed_stage_details": stage_details.get(failed_stage, {}) if failed_stage else {},
},
}
metadata["timestamps"] = {
**dict(metadata.get("timestamps") or {}),
"retrying_at": timezone.now().isoformat(),
}
run.status = RemoteSensingRun.STATUS_RUNNING
run.error_message = ""
run.metadata = metadata
run.save(update_fields=["status", "error_message", "metadata", "updated_at"])
def _load_grid_cells(location: SoilLocation, block_code: str) -> list[AnalysisGridCell]:
queryset = AnalysisGridCell.objects.filter(soil_location=location)
queryset = queryset.filter(block_code=block_code or "")
@@ -513,6 +623,17 @@ def _select_cells_for_processing(
return [cell for cell in all_cells if cell.id not in existing_ids]
def _has_usable_observations(
*,
observations: list[AnalysisGridObservation],
selected_features: list[str],
) -> bool:
for observation in observations:
if any(getattr(observation, feature_name, None) is not None for feature_name in selected_features):
return True
return False
def _upsert_grid_observations(
*,
cells: list[AnalysisGridCell],
@@ -521,19 +642,47 @@ def _upsert_grid_observations(
temporal_end,
metric_payload: dict[str, Any],
) -> dict[str, int]:
result_by_cell = metric_payload.get("results", {})
payload_diagnostics = metric_payload["metadata"].get("payload_diagnostics", {})
payload_cell_codes = sorted(str(cell_code) for cell_code in result_by_cell.keys())
db_cell_codes = [cell.cell_code for cell in cells]
matched_cell_codes = sorted(set(db_cell_codes) & set(payload_cell_codes))
unmatched_db_cell_codes = sorted(set(db_cell_codes) - set(payload_cell_codes))
unmatched_payload_cell_codes = sorted(set(payload_cell_codes) - set(db_cell_codes))
available_features = _collect_available_features(
result_by_cell=result_by_cell,
payload_diagnostics=payload_diagnostics,
)
payload_keys_sample = payload_cell_codes[:5]
metadata_template = {
"backend_name": metric_payload["metadata"].get("backend"),
"backend_url": metric_payload["metadata"].get("backend_url"),
"collections_used": metric_payload["metadata"].get("collections_used", []),
"slope_supported": metric_payload["metadata"].get("slope_supported", False),
"job_refs": metric_payload["metadata"].get("job_refs", {}),
"failed_metrics": metric_payload["metadata"].get("failed_metrics", []),
"payload_diagnostics": payload_diagnostics,
"run_id": run.id,
}
result_by_cell = metric_payload.get("results", {})
logger.info(
"Remote sensing payload/DB cell comparison: %s",
{
"run_id": run.id,
"db_cell_count": len(db_cell_codes),
"payload_cell_count": len(payload_cell_codes),
"matched_cell_count": len(matched_cell_codes),
"unmatched_db_cell_codes": unmatched_db_cell_codes,
"unmatched_payload_cell_codes": unmatched_payload_cell_codes,
},
)
if not matched_cell_codes:
logger.error("No payload cells matched DB cell_codes for run_id=%s", run.id)
created_count = 0
updated_count = 0
usable_observation_count = 0
fully_null_observation_count = 0
with transaction.atomic():
for cell in cells:
values = result_by_cell.get(cell.cell_code, {})
@@ -544,10 +693,19 @@ def _upsert_grid_observations(
"lst_c": values.get("lst_c"),
"soil_vv": values.get("soil_vv"),
"soil_vv_db": values.get("soil_vv_db"),
"dem_m": values.get("dem_m"),
"slope_deg": values.get("slope_deg"),
"metadata": metadata_template,
}
persisted_values = [defaults[feature_name] for feature_name in PERSISTED_OBSERVATION_FEATURES]
usable_values = [defaults[feature_name] for feature_name in DEFAULT_CLUSTER_FEATURES]
if all(value is None for value in persisted_values):
fully_null_observation_count += 1
logger.warning(
"Persisting empty observation for cell=%s, run_id=%s",
cell.cell_code,
run.id,
)
if any(value is not None for value in usable_values):
usable_observation_count += 1
observation, created = AnalysisGridObservation.objects.update_or_create(
cell=cell,
temporal_start=temporal_start,
@@ -558,7 +716,179 @@ def _upsert_grid_observations(
created_count += 1
else:
updated_count += 1
return {"created_count": created_count, "updated_count": updated_count}
summary = {
"created_count": created_count,
"updated_count": updated_count,
"total_observation_count": len(cells),
"usable_observation_count": usable_observation_count,
"fully_null_observation_count": fully_null_observation_count,
"matched_cell_count": len(matched_cell_codes),
"matched_cell_codes": matched_cell_codes,
"unmatched_db_cell_codes": unmatched_db_cell_codes,
"unmatched_payload_cell_codes": unmatched_payload_cell_codes,
"payload_keys_sample": payload_keys_sample,
"available_features": available_features,
}
logger.info("Grid observation upsert summary: %s", summary)
if usable_observation_count == 0:
diagnostics = {
"job_ref": metadata_template["job_refs"],
"total_cells": len(cells),
"matched_cells": len(matched_cell_codes),
"payload_keys_sample": payload_keys_sample,
"available_features": available_features,
}
logger.error("All persisted observations are empty for run_id=%s", run.id)
_store_empty_observation_diagnostics(run=run, diagnostics=diagnostics)
summary["empty_observation_diagnostics"] = diagnostics
return summary
def _collect_available_features(
*,
result_by_cell: dict[str, dict[str, Any]],
payload_diagnostics: dict[str, Any],
) -> list[str]:
available = {
feature_name
for values in result_by_cell.values()
for feature_name, value in (values or {}).items()
if value is not None
}
for metric_diagnostics in payload_diagnostics.values():
available.update(metric_diagnostics.get("available_features", []))
return sorted(str(feature_name) for feature_name in available)
def _store_empty_observation_diagnostics(*, run: RemoteSensingRun, diagnostics: dict[str, Any]) -> None:
metadata = dict(run.metadata or {})
metadata["diagnostics"] = {
**dict(metadata.get("diagnostics") or {}),
"empty_observations": diagnostics,
}
run.metadata = metadata
run.save(update_fields=["metadata", "updated_at"])
def _build_remote_metric_stage_details(
*,
cells: list[AnalysisGridCell],
selected_features: list[str] | None,
active_metric: str | None = None,
completed_metrics: list[str] | None = None,
failed_metrics: list[dict[str, Any]] | None = None,
metric_states: list[dict[str, Any]] | None = None,
) -> dict[str, Any]:
features = list(selected_features or DEFAULT_CLUSTER_FEATURES)
completed = list(completed_metrics or [])
failed = list(failed_metrics or [])
states = metric_states or [
{
"metric": metric_name,
"status": (
"completed"
if metric_name in completed
else "failed"
if any(item.get("metric") == metric_name for item in failed)
else "running"
if metric_name == active_metric
else "pending"
),
}
for metric_name in features
]
return {
"requested_cell_count": len(cells),
"target_cells": [
{
"cell_code": cell.cell_code,
"block_code": cell.block_code,
"centroid_lat": str(cell.centroid_lat),
"centroid_lon": str(cell.centroid_lon),
"chunk_size_sqm": cell.chunk_size_sqm,
}
for cell in cells
],
"metric_progress": {
"total_metrics": len(features),
"completed_metric_count": len(completed),
"active_metric": active_metric,
"completed_metrics": completed,
"failed_metrics": failed,
"states": states,
},
}
def _normalize_progress_metric_name(metric_name: str, features: list[str]) -> str:
derived_metric_map = {
"soil_vv": "soil_vv_db",
}
normalized = derived_metric_map.get(metric_name, metric_name)
if normalized in features:
return normalized
return metric_name
def _resolve_progress_job_ref(candidate: str, job_refs: dict[str, Any]) -> Any:
if candidate in job_refs:
return job_refs.get(candidate)
source_metric_map = {
"soil_vv_db": "soil_vv",
}
return job_refs.get(source_metric_map.get(candidate, candidate))
def _build_remote_metric_progress_callback(
*,
run: RemoteSensingRun,
cells: list[AnalysisGridCell],
selected_features: list[str] | None,
):
features = list(selected_features or DEFAULT_CLUSTER_FEATURES)
completed_metrics: list[str] = []
failed_metrics: list[dict[str, Any]] = []
def callback(*, metric_name: str, state: str, metadata: dict[str, Any], metric_payload=None, error: str = "") -> None:
progress_metric_name = _normalize_progress_metric_name(metric_name, features)
if state == "completed" and progress_metric_name not in completed_metrics:
completed_metrics.append(progress_metric_name)
if state == "failed":
failed_entry = {"metric": progress_metric_name, "error": error}
if not any(
item.get("metric") == progress_metric_name and item.get("error") == error
for item in failed_metrics
):
failed_metrics.append(failed_entry)
stage_details = _build_remote_metric_stage_details(
cells=cells,
selected_features=features,
active_metric=progress_metric_name if state == "running" else None,
completed_metrics=completed_metrics,
failed_metrics=failed_metrics,
metric_states=[
{
"metric": candidate,
"status": (
"completed"
if candidate in completed_metrics
else "failed"
if any(item.get("metric") == candidate for item in failed_metrics)
else "running"
if candidate == progress_metric_name and state == "running"
else "pending"
),
"job_ref": _resolve_progress_job_ref(candidate, metadata.get("job_refs", {})),
}
for candidate in features
],
)
_record_run_stage(run, "fetching_remote_metrics", stage_details)
return callback
def _ensure_subdivision_result(