Files
Ai/location_data/tasks.py
T

616 lines
20 KiB
Python
Raw Normal View History

"""
2026-05-09 16:55:06 +03:30
تسک‌های Celery برای pipeline سنجش‌ازدور و subdivision داده‌محور.
"""
2026-05-09 16:55:06 +03:30
import logging
from typing import Any
from config.celery import app
2026-05-09 16:55:06 +03:30
from django.conf import settings
from django.db import transaction
2026-05-09 16:55:06 +03:30
from django.utils import timezone
from django.utils.dateparse import parse_date
2026-05-09 16:55:06 +03:30
from .data_driven_subdivision import (
DEFAULT_CLUSTER_FEATURES,
DataDrivenSubdivisionError,
create_remote_sensing_subdivision_result,
)
from .grid_analysis import create_or_get_analysis_grid_cells
from .models import (
AnalysisGridCell,
AnalysisGridObservation,
BlockSubdivision,
RemoteSensingRun,
RemoteSensingSubdivisionResult,
SoilLocation,
)
from .openeo_service import (
OpenEOAuthenticationError,
OpenEOExecutionError,
OpenEOServiceError,
compute_remote_sensing_metrics,
)
2026-04-29 01:27:29 +03:30
try:
import requests
except ImportError: # pragma: no cover - handled in stripped envs
RequestException = Exception
else:
RequestException = requests.RequestException
2026-05-09 16:55:06 +03:30
logger = logging.getLogger(__name__)
def run_remote_sensing_analysis(
*,
soil_location_id: int,
block_code: str = "",
temporal_start: Any,
temporal_end: Any,
force_refresh: bool = False,
2026-04-07 01:08:41 +03:30
task_id: str = "",
2026-05-09 16:55:06 +03:30
run_id: int | None = None,
cluster_count: int | None = None,
selected_features: list[str] | None = None,
) -> dict[str, Any]:
"""
2026-05-09 16:55:06 +03:30
اجرای سنکرون تحلیل سنجش‌ازدور برای یک location/block.
این helper برای Celery task و هر orchestration داخلی دیگر قابل استفاده است.
"""
2026-05-09 16:55:06 +03:30
start_date = _normalize_temporal_date(temporal_start, "temporal_start")
end_date = _normalize_temporal_date(temporal_end, "temporal_end")
if start_date > end_date:
raise ValueError("temporal_start نمی‌تواند بعد از temporal_end باشد.")
2026-05-09 16:55:06 +03:30
location = SoilLocation.objects.filter(pk=soil_location_id).first()
if location is None:
raise ValueError(f"SoilLocation با id={soil_location_id} پیدا نشد.")
resolved_block_code = str(block_code or "").strip()
subdivision = _resolve_block_subdivision(location, resolved_block_code)
run = _get_or_create_remote_sensing_run(
run_id=run_id,
location=location,
subdivision=subdivision,
block_code=resolved_block_code,
temporal_start=start_date,
temporal_end=end_date,
task_id=task_id,
cluster_count=cluster_count,
selected_features=selected_features or list(DEFAULT_CLUSTER_FEATURES),
)
_mark_run_running(run)
try:
_record_run_stage(
run,
"preparing_analysis_grid",
{
"block_code": resolved_block_code,
"temporal_extent": {
"start_date": start_date.isoformat(),
"end_date": end_date.isoformat(),
2026-04-07 01:08:41 +03:30
},
2026-05-09 16:55:06 +03:30
},
)
grid_summary = create_or_get_analysis_grid_cells(
location,
block_code=resolved_block_code,
block_subdivision=subdivision,
)
_record_run_stage(run, "analysis_grid_ready", {"grid_summary": grid_summary})
all_cells = _load_grid_cells(location, resolved_block_code)
cells_to_process = _select_cells_for_processing(
all_cells=all_cells,
temporal_start=start_date,
temporal_end=end_date,
force_refresh=force_refresh,
)
_record_run_stage(
run,
"analysis_cells_selected",
{
"cell_selection": {
"total_cell_count": len(all_cells),
"cell_count_to_process": len(cells_to_process),
"existing_cell_count": len(all_cells) - len(cells_to_process),
"force_refresh": force_refresh,
}
},
)
if not cells_to_process:
_record_run_stage(
run,
"using_cached_observations",
{"source": "database"},
2026-04-07 01:08:41 +03:30
)
2026-05-09 16:55:06 +03:30
observations = _load_observations(
location=location,
block_code=resolved_block_code,
temporal_start=start_date,
temporal_end=end_date,
)
2026-05-09 16:55:06 +03:30
subdivision_result = _ensure_subdivision_result(
location=location,
run=run,
subdivision=subdivision,
block_code=resolved_block_code,
observations=observations,
cluster_count=cluster_count,
selected_features=selected_features,
)
_record_run_stage(
run,
"clustering_completed",
_build_clustering_stage_metadata(subdivision_result),
)
summary = {
"status": "completed",
"source": "database",
"run_id": run.id,
"processed_cell_count": 0,
"created_observation_count": 0,
"updated_observation_count": 0,
"existing_observation_count": len(all_cells),
"failed_metric_count": 0,
"chunk_size_sqm": grid_summary["chunk_size_sqm"],
"block_code": resolved_block_code,
"cell_count": len(all_cells),
"subdivision_result_id": getattr(subdivision_result, "id", None),
"cluster_count": getattr(subdivision_result, "cluster_count", 0),
}
_mark_run_success(run, summary)
return summary
2026-05-09 16:55:06 +03:30
_record_run_stage(
run,
"fetching_remote_metrics",
{"requested_cell_count": len(cells_to_process)},
)
remote_payload = compute_remote_sensing_metrics(
cells_to_process,
temporal_start=start_date,
temporal_end=end_date,
)
_record_run_stage(
run,
"remote_metrics_fetched",
{
"failed_metric_count": len(remote_payload["metadata"].get("failed_metrics", [])),
"service_metadata": remote_payload["metadata"],
},
)
upsert_summary = _upsert_grid_observations(
cells=cells_to_process,
run=run,
temporal_start=start_date,
temporal_end=end_date,
metric_payload=remote_payload,
)
_record_run_stage(run, "observations_persisted", upsert_summary)
observations = _load_observations(
location=location,
block_code=resolved_block_code,
temporal_start=start_date,
temporal_end=end_date,
)
subdivision_result = _ensure_subdivision_result(
location=location,
run=run,
subdivision=subdivision,
block_code=resolved_block_code,
observations=observations,
cluster_count=cluster_count,
selected_features=selected_features,
)
_record_run_stage(
run,
"clustering_completed",
_build_clustering_stage_metadata(subdivision_result),
)
summary = {
"status": "completed",
"source": "openeo",
"run_id": run.id,
"processed_cell_count": len(cells_to_process),
"created_observation_count": upsert_summary["created_count"],
"updated_observation_count": upsert_summary["updated_count"],
"existing_observation_count": len(all_cells) - len(cells_to_process),
"failed_metric_count": len(remote_payload["metadata"].get("failed_metrics", [])),
"chunk_size_sqm": grid_summary["chunk_size_sqm"],
"block_code": resolved_block_code,
"cell_count": len(all_cells),
"subdivision_result_id": subdivision_result.id,
"cluster_count": subdivision_result.cluster_count,
}
_mark_run_success(run, summary, remote_payload["metadata"])
logger.info(
"Remote sensing analysis completed",
extra={
"run_id": run.id,
"soil_location_id": location.id,
"block_code": resolved_block_code,
"processed_cell_count": summary["processed_cell_count"],
},
)
return summary
except Exception as exc:
_mark_run_failure(run, str(exc))
raise
2026-04-07 01:08:41 +03:30
2026-05-09 16:55:06 +03:30
@app.task(bind=True, max_retries=3, default_retry_delay=60)
def run_remote_sensing_analysis_task(
self,
soil_location_id: int,
block_code: str = "",
temporal_start: Any = "",
temporal_end: Any = "",
force_refresh: bool = False,
run_id: int | None = None,
cluster_count: int | None = None,
selected_features: list[str] | None = None,
):
2026-04-07 01:08:41 +03:30
"""
2026-05-09 16:55:06 +03:30
اجرای async تحلیل سنجش‌ازدور برای location/block و ذخیره نتایج در DB.
2026-04-07 01:08:41 +03:30
"""
2026-05-09 16:55:06 +03:30
logger.info(
"Starting remote sensing analysis task",
extra={
"task_id": self.request.id,
"soil_location_id": soil_location_id,
"block_code": block_code,
"temporal_start": temporal_start,
"temporal_end": temporal_end,
"force_refresh": force_refresh,
},
)
2026-04-07 01:08:41 +03:30
try:
2026-05-09 16:55:06 +03:30
return run_remote_sensing_analysis(
soil_location_id=soil_location_id,
block_code=block_code,
temporal_start=temporal_start,
temporal_end=temporal_end,
force_refresh=force_refresh,
2026-04-07 01:08:41 +03:30
task_id=self.request.id,
2026-05-09 16:55:06 +03:30
run_id=run_id,
cluster_count=cluster_count,
selected_features=selected_features,
)
except OpenEOAuthenticationError:
logger.exception(
"Remote sensing auth failure",
extra={"task_id": self.request.id, "soil_location_id": soil_location_id},
)
raise
except (OpenEOExecutionError, OpenEOServiceError, RequestException, DataDrivenSubdivisionError) as exc:
logger.warning(
"Transient remote sensing failure, retrying task",
extra={
"task_id": self.request.id,
"soil_location_id": soil_location_id,
"block_code": block_code,
"retry_count": self.request.retries,
"error": str(exc),
},
)
raise self.retry(exc=exc)
def _normalize_temporal_date(value: Any, field_name: str):
if hasattr(value, "isoformat") and not isinstance(value, str):
return value
parsed = parse_date(str(value))
if parsed is None:
raise ValueError(f"{field_name} نامعتبر است.")
return parsed
def _resolve_block_subdivision(location: SoilLocation, block_code: str) -> BlockSubdivision | None:
if not block_code:
return None
return (
BlockSubdivision.objects.filter(
soil_location=location,
block_code=block_code,
)
.order_by("-updated_at", "-id")
.first()
)
def _get_or_create_remote_sensing_run(
*,
run_id: int | None,
location: SoilLocation,
subdivision: BlockSubdivision | None,
block_code: str,
temporal_start,
temporal_end,
task_id: str,
cluster_count: int | None,
selected_features: list[str],
) -> RemoteSensingRun:
queued_at = timezone.now().isoformat()
if run_id is not None:
run = RemoteSensingRun.objects.filter(pk=run_id, soil_location=location).first()
if run is not None:
metadata = dict(run.metadata or {})
if task_id:
metadata["task_id"] = task_id
metadata.setdefault("status_label", "pending")
metadata["stage"] = "queued"
metadata["selected_features"] = selected_features
metadata["requested_cluster_count"] = cluster_count
metadata["pipeline"] = {
"name": "remote_sensing_subdivision",
"version": 2,
}
metadata["timestamps"] = {
**dict(metadata.get("timestamps") or {}),
"queued_at": queued_at,
}
run.block_subdivision = subdivision
run.block_code = block_code
run.chunk_size_sqm = int(getattr(settings, "SUBDIVISION_CHUNK_SQM", 900) or 900)
run.temporal_start = temporal_start
run.temporal_end = temporal_end
run.metadata = metadata
run.save(
update_fields=[
"block_subdivision",
"block_code",
"chunk_size_sqm",
"temporal_start",
"temporal_end",
"metadata",
"updated_at",
]
)
return run
metadata = {
"status_label": "pending",
"stage": "queued",
"selected_features": selected_features,
"requested_cluster_count": cluster_count,
"pipeline": {
"name": "remote_sensing_subdivision",
"version": 2,
},
"timestamps": {"queued_at": queued_at},
}
if task_id:
metadata["task_id"] = task_id
return RemoteSensingRun.objects.create(
soil_location=location,
block_subdivision=subdivision,
block_code=block_code,
chunk_size_sqm=int(getattr(settings, "SUBDIVISION_CHUNK_SQM", 900) or 900),
temporal_start=temporal_start,
temporal_end=temporal_end,
status=RemoteSensingRun.STATUS_PENDING,
metadata=metadata,
)
def _mark_run_running(run: RemoteSensingRun) -> None:
metadata = dict(run.metadata or {})
metadata["status_label"] = "running"
metadata["stage"] = "running"
metadata["timestamps"] = {
**dict(metadata.get("timestamps") or {}),
"started_at": timezone.now().isoformat(),
}
run.status = RemoteSensingRun.STATUS_RUNNING
run.started_at = timezone.now()
run.metadata = metadata
run.save(update_fields=["status", "started_at", "metadata", "updated_at"])
def _mark_run_success(
run: RemoteSensingRun,
summary: dict[str, Any],
service_metadata: dict[str, Any] | None = None,
) -> None:
metadata = dict(run.metadata or {})
metadata["summary"] = summary
metadata["status_label"] = "completed"
metadata["stage"] = "completed"
metadata["timestamps"] = {
**dict(metadata.get("timestamps") or {}),
"completed_at": timezone.now().isoformat(),
}
if service_metadata:
metadata["service"] = service_metadata
run.status = RemoteSensingRun.STATUS_SUCCESS
run.finished_at = timezone.now()
run.error_message = ""
run.metadata = metadata
run.save(
update_fields=[
"status",
"finished_at",
"error_message",
"metadata",
"updated_at",
]
)
def _mark_run_failure(run: RemoteSensingRun, error_message: str) -> None:
metadata = dict(run.metadata or {})
metadata["status_label"] = "failed"
metadata["failure_reason"] = error_message[:4000]
metadata["timestamps"] = {
**dict(metadata.get("timestamps") or {}),
"failed_at": timezone.now().isoformat(),
}
run.status = RemoteSensingRun.STATUS_FAILURE
run.finished_at = timezone.now()
run.error_message = error_message[:4000]
run.metadata = metadata
run.save(
update_fields=[
"status",
"finished_at",
"error_message",
"metadata",
"updated_at",
]
)
logger.exception(
"Remote sensing analysis failed",
extra={"run_id": run.id, "soil_location_id": run.soil_location_id, "block_code": run.block_code},
)
def _load_grid_cells(location: SoilLocation, block_code: str) -> list[AnalysisGridCell]:
queryset = AnalysisGridCell.objects.filter(soil_location=location)
queryset = queryset.filter(block_code=block_code or "")
return list(queryset.order_by("cell_code"))
def _load_observations(
*,
location: SoilLocation,
block_code: str,
temporal_start,
temporal_end,
) -> list[AnalysisGridObservation]:
queryset = (
AnalysisGridObservation.objects.select_related("cell", "run")
.filter(
cell__soil_location=location,
cell__block_code=block_code or "",
temporal_start=temporal_start,
temporal_end=temporal_end,
)
.order_by("cell__cell_code")
)
return list(queryset)
def _select_cells_for_processing(
*,
all_cells: list[AnalysisGridCell],
temporal_start,
temporal_end,
force_refresh: bool,
) -> list[AnalysisGridCell]:
if force_refresh:
return all_cells
existing_ids = set(
AnalysisGridObservation.objects.filter(
cell__in=all_cells,
temporal_start=temporal_start,
temporal_end=temporal_end,
).values_list("cell_id", flat=True)
)
return [cell for cell in all_cells if cell.id not in existing_ids]
def _upsert_grid_observations(
*,
cells: list[AnalysisGridCell],
run: RemoteSensingRun,
temporal_start,
temporal_end,
metric_payload: dict[str, Any],
) -> dict[str, int]:
metadata_template = {
"backend_name": metric_payload["metadata"].get("backend"),
"backend_url": metric_payload["metadata"].get("backend_url"),
"collections_used": metric_payload["metadata"].get("collections_used", []),
"slope_supported": metric_payload["metadata"].get("slope_supported", False),
"job_refs": metric_payload["metadata"].get("job_refs", {}),
"failed_metrics": metric_payload["metadata"].get("failed_metrics", []),
"run_id": run.id,
}
result_by_cell = metric_payload.get("results", {})
created_count = 0
updated_count = 0
with transaction.atomic():
for cell in cells:
values = result_by_cell.get(cell.cell_code, {})
defaults = {
"run": run,
"ndvi": values.get("ndvi"),
"ndwi": values.get("ndwi"),
"lst_c": values.get("lst_c"),
"soil_vv": values.get("soil_vv"),
"soil_vv_db": values.get("soil_vv_db"),
"dem_m": values.get("dem_m"),
"slope_deg": values.get("slope_deg"),
"metadata": metadata_template,
}
observation, created = AnalysisGridObservation.objects.update_or_create(
cell=cell,
temporal_start=temporal_start,
temporal_end=temporal_end,
defaults=defaults,
)
if created:
created_count += 1
else:
updated_count += 1
return {"created_count": created_count, "updated_count": updated_count}
def _ensure_subdivision_result(
*,
location: SoilLocation,
run: RemoteSensingRun,
subdivision: BlockSubdivision | None,
block_code: str,
observations: list[AnalysisGridObservation],
cluster_count: int | None,
selected_features: list[str] | None,
) -> RemoteSensingSubdivisionResult:
if not observations:
raise DataDrivenSubdivisionError("هیچ observation برای ساخت subdivision داده‌محور پیدا نشد.")
result = create_remote_sensing_subdivision_result(
location=location,
run=run,
observations=observations,
block_subdivision=subdivision,
block_code=block_code,
selected_features=selected_features or list(DEFAULT_CLUSTER_FEATURES),
explicit_k=cluster_count,
)
return result
def _record_run_stage(run: RemoteSensingRun, stage: str, details: dict[str, Any] | None = None) -> None:
metadata = dict(run.metadata or {})
metadata["stage"] = stage
metadata["stage_details"] = {
**dict(metadata.get("stage_details") or {}),
stage: details or {},
}
metadata["timestamps"] = {
**dict(metadata.get("timestamps") or {}),
f"{stage}_at": timezone.now().isoformat(),
}
run.metadata = metadata
run.save(update_fields=["metadata", "updated_at"])
def _build_clustering_stage_metadata(
result: RemoteSensingSubdivisionResult,
) -> dict[str, Any]:
metadata = dict(result.metadata or {})
return {
"subdivision_result_id": result.id,
"cluster_count": result.cluster_count,
"selected_features": result.selected_features,
"used_cell_count": metadata.get("used_cell_count", 0),
"skipped_cell_count": metadata.get("skipped_cell_count", 0),
"skipped_cell_codes": result.skipped_cell_codes,
"kmeans_params": metadata.get("kmeans_params", {}),
}