UPDATE

2026-05-09 16:55:06 +03:30
parent 1679825ae2
commit cead7dafe2
51 changed files with 7514 additions and 1221 deletions
@@ -1,15 +1,36 @@
 """
-تسک‌های Celery برای واکشی داده‌های خاک.
+تسک‌های Celery برای pipeline سنجش‌ازدور و subdivision داده‌محور.
 """

-from decimal import Decimal
+import logging
+from typing import Any

 from config.celery import app
-from django.apps import apps
+from django.conf import settings
 from django.db import transaction
+from django.utils import timezone
+from django.utils.dateparse import parse_date

-from .models import SoilDepthData, SoilLocation
-from .soil_adapters import DEPTHS
+from .data_driven_subdivision import (
+    DEFAULT_CLUSTER_FEATURES,
+    DataDrivenSubdivisionError,
+    create_remote_sensing_subdivision_result,
+)
+from .grid_analysis import create_or_get_analysis_grid_cells
+from .models import (
+    AnalysisGridCell,
+    AnalysisGridObservation,
+    BlockSubdivision,
+    RemoteSensingRun,
+    RemoteSensingSubdivisionResult,
+    SoilLocation,
+)
+from .openeo_service import (
+    OpenEOAuthenticationError,
+    OpenEOExecutionError,
+    OpenEOServiceError,
+    compute_remote_sensing_metrics,
+)

 try:
    import requests
@@ -19,79 +40,576 @@ else:
    RequestException = requests.RequestException


-def fetch_soil_data_for_coordinates(
-    latitude: float,
-    longitude: float,
+logger = logging.getLogger(__name__)
+
+
+def run_remote_sensing_analysis(
+    *,
+    soil_location_id: int,
+    block_code: str = "",
+    temporal_start: Any,
+    temporal_end: Any,
+    force_refresh: bool = False,
    task_id: str = "",
-    progress_callback=None,
+    run_id: int | None = None,
+    cluster_count: int | None = None,
+    selected_features: list[str] | None = None,
+) -> dict[str, Any]:
+    """
+    اجرای سنکرون تحلیل سنجش‌ازدور برای یک location/block.
+    این helper برای Celery task و هر orchestration داخلی دیگر قابل استفاده است.
+    """
+    start_date = _normalize_temporal_date(temporal_start, "temporal_start")
+    end_date = _normalize_temporal_date(temporal_end, "temporal_end")
+    if start_date > end_date:
+        raise ValueError("temporal_start نمی‌تواند بعد از temporal_end باشد.")
+
+    location = SoilLocation.objects.filter(pk=soil_location_id).first()
+    if location is None:
+        raise ValueError(f"SoilLocation با id={soil_location_id} پیدا نشد.")
+
+    resolved_block_code = str(block_code or "").strip()
+    subdivision = _resolve_block_subdivision(location, resolved_block_code)
+    run = _get_or_create_remote_sensing_run(
+        run_id=run_id,
+        location=location,
+        subdivision=subdivision,
+        block_code=resolved_block_code,
+        temporal_start=start_date,
+        temporal_end=end_date,
+        task_id=task_id,
+        cluster_count=cluster_count,
+        selected_features=selected_features or list(DEFAULT_CLUSTER_FEATURES),
+    )
+    _mark_run_running(run)
+
+    try:
+        _record_run_stage(
+            run,
+            "preparing_analysis_grid",
+            {
+                "block_code": resolved_block_code,
+                "temporal_extent": {
+                    "start_date": start_date.isoformat(),
+                    "end_date": end_date.isoformat(),
+                },
+            },
+        )
+        grid_summary = create_or_get_analysis_grid_cells(
+            location,
+            block_code=resolved_block_code,
+            block_subdivision=subdivision,
+        )
+        _record_run_stage(run, "analysis_grid_ready", {"grid_summary": grid_summary})
+        all_cells = _load_grid_cells(location, resolved_block_code)
+        cells_to_process = _select_cells_for_processing(
+            all_cells=all_cells,
+            temporal_start=start_date,
+            temporal_end=end_date,
+            force_refresh=force_refresh,
+        )
+        _record_run_stage(
+            run,
+            "analysis_cells_selected",
+            {
+                "cell_selection": {
+                    "total_cell_count": len(all_cells),
+                    "cell_count_to_process": len(cells_to_process),
+                    "existing_cell_count": len(all_cells) - len(cells_to_process),
+                    "force_refresh": force_refresh,
+                }
+            },
+        )
+
+        if not cells_to_process:
+            _record_run_stage(
+                run,
+                "using_cached_observations",
+                {"source": "database"},
+            )
+            observations = _load_observations(
+                location=location,
+                block_code=resolved_block_code,
+                temporal_start=start_date,
+                temporal_end=end_date,
+            )
+            subdivision_result = _ensure_subdivision_result(
+                location=location,
+                run=run,
+                subdivision=subdivision,
+                block_code=resolved_block_code,
+                observations=observations,
+                cluster_count=cluster_count,
+                selected_features=selected_features,
+            )
+            _record_run_stage(
+                run,
+                "clustering_completed",
+                _build_clustering_stage_metadata(subdivision_result),
+            )
+            summary = {
+                "status": "completed",
+                "source": "database",
+                "run_id": run.id,
+                "processed_cell_count": 0,
+                "created_observation_count": 0,
+                "updated_observation_count": 0,
+                "existing_observation_count": len(all_cells),
+                "failed_metric_count": 0,
+                "chunk_size_sqm": grid_summary["chunk_size_sqm"],
+                "block_code": resolved_block_code,
+                "cell_count": len(all_cells),
+                "subdivision_result_id": getattr(subdivision_result, "id", None),
+                "cluster_count": getattr(subdivision_result, "cluster_count", 0),
+            }
+            _mark_run_success(run, summary)
+            return summary
+
+        _record_run_stage(
+            run,
+            "fetching_remote_metrics",
+            {"requested_cell_count": len(cells_to_process)},
+        )
+        remote_payload = compute_remote_sensing_metrics(
+            cells_to_process,
+            temporal_start=start_date,
+            temporal_end=end_date,
+        )
+        _record_run_stage(
+            run,
+            "remote_metrics_fetched",
+            {
+                "failed_metric_count": len(remote_payload["metadata"].get("failed_metrics", [])),
+                "service_metadata": remote_payload["metadata"],
+            },
+        )
+        upsert_summary = _upsert_grid_observations(
+            cells=cells_to_process,
+            run=run,
+            temporal_start=start_date,
+            temporal_end=end_date,
+            metric_payload=remote_payload,
+        )
+        _record_run_stage(run, "observations_persisted", upsert_summary)
+        observations = _load_observations(
+            location=location,
+            block_code=resolved_block_code,
+            temporal_start=start_date,
+            temporal_end=end_date,
+        )
+        subdivision_result = _ensure_subdivision_result(
+            location=location,
+            run=run,
+            subdivision=subdivision,
+            block_code=resolved_block_code,
+            observations=observations,
+            cluster_count=cluster_count,
+            selected_features=selected_features,
+        )
+        _record_run_stage(
+            run,
+            "clustering_completed",
+            _build_clustering_stage_metadata(subdivision_result),
+        )
+        summary = {
+            "status": "completed",
+            "source": "openeo",
+            "run_id": run.id,
+            "processed_cell_count": len(cells_to_process),
+            "created_observation_count": upsert_summary["created_count"],
+            "updated_observation_count": upsert_summary["updated_count"],
+            "existing_observation_count": len(all_cells) - len(cells_to_process),
+            "failed_metric_count": len(remote_payload["metadata"].get("failed_metrics", [])),
+            "chunk_size_sqm": grid_summary["chunk_size_sqm"],
+            "block_code": resolved_block_code,
+            "cell_count": len(all_cells),
+            "subdivision_result_id": subdivision_result.id,
+            "cluster_count": subdivision_result.cluster_count,
+        }
+        _mark_run_success(run, summary, remote_payload["metadata"])
+        logger.info(
+            "Remote sensing analysis completed",
+            extra={
+                "run_id": run.id,
+                "soil_location_id": location.id,
+                "block_code": resolved_block_code,
+                "processed_cell_count": summary["processed_cell_count"],
+            },
+        )
+        return summary
+    except Exception as exc:
+        _mark_run_failure(run, str(exc))
+        raise
+
+
+@app.task(bind=True, max_retries=3, default_retry_delay=60)
+def run_remote_sensing_analysis_task(
+    self,
+    soil_location_id: int,
+    block_code: str = "",
+    temporal_start: Any = "",
+    temporal_end: Any = "",
+    force_refresh: bool = False,
+    run_id: int | None = None,
+    cluster_count: int | None = None,
+    selected_features: list[str] | None = None,
 ):
    """
-    واکشی سنکرون داده خاک برای مختصات داده‌شده و ذخیره در DB.
-    این helper هم توسط Celery task و هم توسط endpointهای sync استفاده می‌شود.
-    """
-    lat = Decimal(str(round(float(latitude), 6)))
-    lon = Decimal(str(round(float(longitude), 6)))
-    adapter = apps.get_app_config("location_data").get_soil_data_adapter()
-
-    with transaction.atomic():
-        location, created = SoilLocation.objects.select_for_update().get_or_create(
-            latitude=lat,
-            longitude=lon,
-            defaults={"task_id": task_id},
-        )
-        if not created and task_id:
-            location.task_id = task_id
-            location.save(update_fields=["task_id"])
-
-    for index, depth in enumerate(DEPTHS):
-        if progress_callback is not None:
-            progress_callback(
-                state="PROGRESS",
-                meta={
-                    "current": index + 1,
-                    "total": len(DEPTHS),
-                    "message": f"در حال واکشی عمق {depth}...",
-                },
-            )
-        fields = adapter.fetch_depth_fields(float(lon), float(lat), depth)
-        with transaction.atomic():
-            SoilDepthData.objects.update_or_create(
-                soil_location=location,
-                depth_label=depth,
-                defaults=fields,
-            )
-
-    if task_id:
-        with transaction.atomic():
-            location.task_id = ""
-            location.save(update_fields=["task_id"])
-
-    return {
-        "status": "completed",
-        "location_id": location.id,
-        "depths": DEPTHS,
-    }
-
-
-@app.task(bind=True)
-def fetch_soil_data_task(self, latitude: float, longitude: float):
-    """
-    واکشی داده‌های خاک برای مختصات داده‌شده و ذخیره در DB.
-    برای هر عمق (0-5cm, 5-15cm, 15-30cm) یک ریکوئست/شبیه‌سازی جدا انجام می‌شود.
+    اجرای async تحلیل سنجش‌ازدور برای location/block و ذخیره نتایج در DB.
    """
+    logger.info(
+        "Starting remote sensing analysis task",
+        extra={
+            "task_id": self.request.id,
+            "soil_location_id": soil_location_id,
+            "block_code": block_code,
+            "temporal_start": temporal_start,
+            "temporal_end": temporal_end,
+            "force_refresh": force_refresh,
+        },
+    )
    try:
-        return fetch_soil_data_for_coordinates(
-            latitude=latitude,
-            longitude=longitude,
+        return run_remote_sensing_analysis(
+            soil_location_id=soil_location_id,
+            block_code=block_code,
+            temporal_start=temporal_start,
+            temporal_end=temporal_end,
+            force_refresh=force_refresh,
            task_id=self.request.id,
-            progress_callback=self.update_state,
+            run_id=run_id,
+            cluster_count=cluster_count,
+            selected_features=selected_features,
        )
-    except RequestException as exc:
-        lat = Decimal(str(round(float(latitude), 6)))
-        lon = Decimal(str(round(float(longitude), 6)))
-        location = SoilLocation.objects.filter(latitude=lat, longitude=lon).first()
-        return {
-            "status": "error",
-            "location_id": getattr(location, "id", None),
-            "error": str(exc),
-        }
+    except OpenEOAuthenticationError:
+        logger.exception(
+            "Remote sensing auth failure",
+            extra={"task_id": self.request.id, "soil_location_id": soil_location_id},
+        )
+        raise
+    except (OpenEOExecutionError, OpenEOServiceError, RequestException, DataDrivenSubdivisionError) as exc:
+        logger.warning(
+            "Transient remote sensing failure, retrying task",
+            extra={
+                "task_id": self.request.id,
+                "soil_location_id": soil_location_id,
+                "block_code": block_code,
+                "retry_count": self.request.retries,
+                "error": str(exc),
+            },
+        )
+        raise self.retry(exc=exc)
+
+
+def _normalize_temporal_date(value: Any, field_name: str):
+    if hasattr(value, "isoformat") and not isinstance(value, str):
+        return value
+    parsed = parse_date(str(value))
+    if parsed is None:
+        raise ValueError(f"{field_name} نامعتبر است.")
+    return parsed
+
+
+def _resolve_block_subdivision(location: SoilLocation, block_code: str) -> BlockSubdivision | None:
+    if not block_code:
+        return None
+    return (
+        BlockSubdivision.objects.filter(
+            soil_location=location,
+            block_code=block_code,
+        )
+        .order_by("-updated_at", "-id")
+        .first()
+    )
+
+
+def _get_or_create_remote_sensing_run(
+    *,
+    run_id: int | None,
+    location: SoilLocation,
+    subdivision: BlockSubdivision | None,
+    block_code: str,
+    temporal_start,
+    temporal_end,
+    task_id: str,
+    cluster_count: int | None,
+    selected_features: list[str],
+) -> RemoteSensingRun:
+    queued_at = timezone.now().isoformat()
+    if run_id is not None:
+        run = RemoteSensingRun.objects.filter(pk=run_id, soil_location=location).first()
+        if run is not None:
+            metadata = dict(run.metadata or {})
+            if task_id:
+                metadata["task_id"] = task_id
+            metadata.setdefault("status_label", "pending")
+            metadata["stage"] = "queued"
+            metadata["selected_features"] = selected_features
+            metadata["requested_cluster_count"] = cluster_count
+            metadata["pipeline"] = {
+                "name": "remote_sensing_subdivision",
+                "version": 2,
+            }
+            metadata["timestamps"] = {
+                **dict(metadata.get("timestamps") or {}),
+                "queued_at": queued_at,
+            }
+            run.block_subdivision = subdivision
+            run.block_code = block_code
+            run.chunk_size_sqm = int(getattr(settings, "SUBDIVISION_CHUNK_SQM", 900) or 900)
+            run.temporal_start = temporal_start
+            run.temporal_end = temporal_end
+            run.metadata = metadata
+            run.save(
+                update_fields=[
+                    "block_subdivision",
+                    "block_code",
+                    "chunk_size_sqm",
+                    "temporal_start",
+                    "temporal_end",
+                    "metadata",
+                    "updated_at",
+                ]
+            )
+            return run
+    metadata = {
+        "status_label": "pending",
+        "stage": "queued",
+        "selected_features": selected_features,
+        "requested_cluster_count": cluster_count,
+        "pipeline": {
+            "name": "remote_sensing_subdivision",
+            "version": 2,
+        },
+        "timestamps": {"queued_at": queued_at},
+    }
+    if task_id:
+        metadata["task_id"] = task_id
+    return RemoteSensingRun.objects.create(
+        soil_location=location,
+        block_subdivision=subdivision,
+        block_code=block_code,
+        chunk_size_sqm=int(getattr(settings, "SUBDIVISION_CHUNK_SQM", 900) or 900),
+        temporal_start=temporal_start,
+        temporal_end=temporal_end,
+        status=RemoteSensingRun.STATUS_PENDING,
+        metadata=metadata,
+    )
+
+
+def _mark_run_running(run: RemoteSensingRun) -> None:
+    metadata = dict(run.metadata or {})
+    metadata["status_label"] = "running"
+    metadata["stage"] = "running"
+    metadata["timestamps"] = {
+        **dict(metadata.get("timestamps") or {}),
+        "started_at": timezone.now().isoformat(),
+    }
+    run.status = RemoteSensingRun.STATUS_RUNNING
+    run.started_at = timezone.now()
+    run.metadata = metadata
+    run.save(update_fields=["status", "started_at", "metadata", "updated_at"])
+
+
+def _mark_run_success(
+    run: RemoteSensingRun,
+    summary: dict[str, Any],
+    service_metadata: dict[str, Any] | None = None,
+) -> None:
+    metadata = dict(run.metadata or {})
+    metadata["summary"] = summary
+    metadata["status_label"] = "completed"
+    metadata["stage"] = "completed"
+    metadata["timestamps"] = {
+        **dict(metadata.get("timestamps") or {}),
+        "completed_at": timezone.now().isoformat(),
+    }
+    if service_metadata:
+        metadata["service"] = service_metadata
+    run.status = RemoteSensingRun.STATUS_SUCCESS
+    run.finished_at = timezone.now()
+    run.error_message = ""
+    run.metadata = metadata
+    run.save(
+        update_fields=[
+            "status",
+            "finished_at",
+            "error_message",
+            "metadata",
+            "updated_at",
+        ]
+    )
+
+
+def _mark_run_failure(run: RemoteSensingRun, error_message: str) -> None:
+    metadata = dict(run.metadata or {})
+    metadata["status_label"] = "failed"
+    metadata["failure_reason"] = error_message[:4000]
+    metadata["timestamps"] = {
+        **dict(metadata.get("timestamps") or {}),
+        "failed_at": timezone.now().isoformat(),
+    }
+    run.status = RemoteSensingRun.STATUS_FAILURE
+    run.finished_at = timezone.now()
+    run.error_message = error_message[:4000]
+    run.metadata = metadata
+    run.save(
+        update_fields=[
+            "status",
+            "finished_at",
+            "error_message",
+            "metadata",
+            "updated_at",
+        ]
+    )
+    logger.exception(
+        "Remote sensing analysis failed",
+        extra={"run_id": run.id, "soil_location_id": run.soil_location_id, "block_code": run.block_code},
+    )
+
+
+def _load_grid_cells(location: SoilLocation, block_code: str) -> list[AnalysisGridCell]:
+    queryset = AnalysisGridCell.objects.filter(soil_location=location)
+    queryset = queryset.filter(block_code=block_code or "")
+    return list(queryset.order_by("cell_code"))
+
+
+def _load_observations(
+    *,
+    location: SoilLocation,
+    block_code: str,
+    temporal_start,
+    temporal_end,
+) -> list[AnalysisGridObservation]:
+    queryset = (
+        AnalysisGridObservation.objects.select_related("cell", "run")
+        .filter(
+            cell__soil_location=location,
+            cell__block_code=block_code or "",
+            temporal_start=temporal_start,
+            temporal_end=temporal_end,
+        )
+        .order_by("cell__cell_code")
+    )
+    return list(queryset)
+
+
+def _select_cells_for_processing(
+    *,
+    all_cells: list[AnalysisGridCell],
+    temporal_start,
+    temporal_end,
+    force_refresh: bool,
+) -> list[AnalysisGridCell]:
+    if force_refresh:
+        return all_cells
+
+    existing_ids = set(
+        AnalysisGridObservation.objects.filter(
+            cell__in=all_cells,
+            temporal_start=temporal_start,
+            temporal_end=temporal_end,
+        ).values_list("cell_id", flat=True)
+    )
+    return [cell for cell in all_cells if cell.id not in existing_ids]
+
+
+def _upsert_grid_observations(
+    *,
+    cells: list[AnalysisGridCell],
+    run: RemoteSensingRun,
+    temporal_start,
+    temporal_end,
+    metric_payload: dict[str, Any],
+) -> dict[str, int]:
+    metadata_template = {
+        "backend_name": metric_payload["metadata"].get("backend"),
+        "backend_url": metric_payload["metadata"].get("backend_url"),
+        "collections_used": metric_payload["metadata"].get("collections_used", []),
+        "slope_supported": metric_payload["metadata"].get("slope_supported", False),
+        "job_refs": metric_payload["metadata"].get("job_refs", {}),
+        "failed_metrics": metric_payload["metadata"].get("failed_metrics", []),
+        "run_id": run.id,
+    }
+    result_by_cell = metric_payload.get("results", {})
+
+    created_count = 0
+    updated_count = 0
+    with transaction.atomic():
+        for cell in cells:
+            values = result_by_cell.get(cell.cell_code, {})
+            defaults = {
+                "run": run,
+                "ndvi": values.get("ndvi"),
+                "ndwi": values.get("ndwi"),
+                "lst_c": values.get("lst_c"),
+                "soil_vv": values.get("soil_vv"),
+                "soil_vv_db": values.get("soil_vv_db"),
+                "dem_m": values.get("dem_m"),
+                "slope_deg": values.get("slope_deg"),
+                "metadata": metadata_template,
+            }
+            observation, created = AnalysisGridObservation.objects.update_or_create(
+                cell=cell,
+                temporal_start=temporal_start,
+                temporal_end=temporal_end,
+                defaults=defaults,
+            )
+            if created:
+                created_count += 1
+            else:
+                updated_count += 1
+    return {"created_count": created_count, "updated_count": updated_count}
+
+
+def _ensure_subdivision_result(
+    *,
+    location: SoilLocation,
+    run: RemoteSensingRun,
+    subdivision: BlockSubdivision | None,
+    block_code: str,
+    observations: list[AnalysisGridObservation],
+    cluster_count: int | None,
+    selected_features: list[str] | None,
+) -> RemoteSensingSubdivisionResult:
+    if not observations:
+        raise DataDrivenSubdivisionError("هیچ observation برای ساخت subdivision داده‌محور پیدا نشد.")
+    result = create_remote_sensing_subdivision_result(
+        location=location,
+        run=run,
+        observations=observations,
+        block_subdivision=subdivision,
+        block_code=block_code,
+        selected_features=selected_features or list(DEFAULT_CLUSTER_FEATURES),
+        explicit_k=cluster_count,
+    )
+    return result
+
+
+def _record_run_stage(run: RemoteSensingRun, stage: str, details: dict[str, Any] | None = None) -> None:
+    metadata = dict(run.metadata or {})
+    metadata["stage"] = stage
+    metadata["stage_details"] = {
+        **dict(metadata.get("stage_details") or {}),
+        stage: details or {},
+    }
+    metadata["timestamps"] = {
+        **dict(metadata.get("timestamps") or {}),
+        f"{stage}_at": timezone.now().isoformat(),
+    }
+    run.metadata = metadata
+    run.save(update_fields=["metadata", "updated_at"])
+
+
+def _build_clustering_stage_metadata(
+    result: RemoteSensingSubdivisionResult,
+) -> dict[str, Any]:
+    metadata = dict(result.metadata or {})
+    return {
+        "subdivision_result_id": result.id,
+        "cluster_count": result.cluster_count,
+        "selected_features": result.selected_features,
+        "used_cell_count": metadata.get("used_cell_count", 0),
+        "skipped_cell_count": metadata.get("skipped_cell_count", 0),
+        "skipped_cell_codes": result.skipped_cell_codes,
+        "kmeans_params": metadata.get("kmeans_params", {}),
+    }