This commit is contained in:
2026-05-09 16:55:06 +03:30
parent 1679825ae2
commit cead7dafe2
51 changed files with 7514 additions and 1221 deletions
+592 -74
View File
@@ -1,15 +1,36 @@
"""
تسک‌های Celery برای واکشی داده‌های خاک.
تسک‌های Celery برای pipeline سنجش‌ازدور و subdivision داده‌محور.
"""
from decimal import Decimal
import logging
from typing import Any
from config.celery import app
from django.apps import apps
from django.conf import settings
from django.db import transaction
from django.utils import timezone
from django.utils.dateparse import parse_date
from .models import SoilDepthData, SoilLocation
from .soil_adapters import DEPTHS
from .data_driven_subdivision import (
DEFAULT_CLUSTER_FEATURES,
DataDrivenSubdivisionError,
create_remote_sensing_subdivision_result,
)
from .grid_analysis import create_or_get_analysis_grid_cells
from .models import (
AnalysisGridCell,
AnalysisGridObservation,
BlockSubdivision,
RemoteSensingRun,
RemoteSensingSubdivisionResult,
SoilLocation,
)
from .openeo_service import (
OpenEOAuthenticationError,
OpenEOExecutionError,
OpenEOServiceError,
compute_remote_sensing_metrics,
)
try:
import requests
@@ -19,79 +40,576 @@ else:
RequestException = requests.RequestException
def fetch_soil_data_for_coordinates(
latitude: float,
longitude: float,
logger = logging.getLogger(__name__)
def run_remote_sensing_analysis(
*,
soil_location_id: int,
block_code: str = "",
temporal_start: Any,
temporal_end: Any,
force_refresh: bool = False,
task_id: str = "",
progress_callback=None,
run_id: int | None = None,
cluster_count: int | None = None,
selected_features: list[str] | None = None,
) -> dict[str, Any]:
"""
اجرای سنکرون تحلیل سنجش‌ازدور برای یک location/block.
این helper برای Celery task و هر orchestration داخلی دیگر قابل استفاده است.
"""
start_date = _normalize_temporal_date(temporal_start, "temporal_start")
end_date = _normalize_temporal_date(temporal_end, "temporal_end")
if start_date > end_date:
raise ValueError("temporal_start نمی‌تواند بعد از temporal_end باشد.")
location = SoilLocation.objects.filter(pk=soil_location_id).first()
if location is None:
raise ValueError(f"SoilLocation با id={soil_location_id} پیدا نشد.")
resolved_block_code = str(block_code or "").strip()
subdivision = _resolve_block_subdivision(location, resolved_block_code)
run = _get_or_create_remote_sensing_run(
run_id=run_id,
location=location,
subdivision=subdivision,
block_code=resolved_block_code,
temporal_start=start_date,
temporal_end=end_date,
task_id=task_id,
cluster_count=cluster_count,
selected_features=selected_features or list(DEFAULT_CLUSTER_FEATURES),
)
_mark_run_running(run)
try:
_record_run_stage(
run,
"preparing_analysis_grid",
{
"block_code": resolved_block_code,
"temporal_extent": {
"start_date": start_date.isoformat(),
"end_date": end_date.isoformat(),
},
},
)
grid_summary = create_or_get_analysis_grid_cells(
location,
block_code=resolved_block_code,
block_subdivision=subdivision,
)
_record_run_stage(run, "analysis_grid_ready", {"grid_summary": grid_summary})
all_cells = _load_grid_cells(location, resolved_block_code)
cells_to_process = _select_cells_for_processing(
all_cells=all_cells,
temporal_start=start_date,
temporal_end=end_date,
force_refresh=force_refresh,
)
_record_run_stage(
run,
"analysis_cells_selected",
{
"cell_selection": {
"total_cell_count": len(all_cells),
"cell_count_to_process": len(cells_to_process),
"existing_cell_count": len(all_cells) - len(cells_to_process),
"force_refresh": force_refresh,
}
},
)
if not cells_to_process:
_record_run_stage(
run,
"using_cached_observations",
{"source": "database"},
)
observations = _load_observations(
location=location,
block_code=resolved_block_code,
temporal_start=start_date,
temporal_end=end_date,
)
subdivision_result = _ensure_subdivision_result(
location=location,
run=run,
subdivision=subdivision,
block_code=resolved_block_code,
observations=observations,
cluster_count=cluster_count,
selected_features=selected_features,
)
_record_run_stage(
run,
"clustering_completed",
_build_clustering_stage_metadata(subdivision_result),
)
summary = {
"status": "completed",
"source": "database",
"run_id": run.id,
"processed_cell_count": 0,
"created_observation_count": 0,
"updated_observation_count": 0,
"existing_observation_count": len(all_cells),
"failed_metric_count": 0,
"chunk_size_sqm": grid_summary["chunk_size_sqm"],
"block_code": resolved_block_code,
"cell_count": len(all_cells),
"subdivision_result_id": getattr(subdivision_result, "id", None),
"cluster_count": getattr(subdivision_result, "cluster_count", 0),
}
_mark_run_success(run, summary)
return summary
_record_run_stage(
run,
"fetching_remote_metrics",
{"requested_cell_count": len(cells_to_process)},
)
remote_payload = compute_remote_sensing_metrics(
cells_to_process,
temporal_start=start_date,
temporal_end=end_date,
)
_record_run_stage(
run,
"remote_metrics_fetched",
{
"failed_metric_count": len(remote_payload["metadata"].get("failed_metrics", [])),
"service_metadata": remote_payload["metadata"],
},
)
upsert_summary = _upsert_grid_observations(
cells=cells_to_process,
run=run,
temporal_start=start_date,
temporal_end=end_date,
metric_payload=remote_payload,
)
_record_run_stage(run, "observations_persisted", upsert_summary)
observations = _load_observations(
location=location,
block_code=resolved_block_code,
temporal_start=start_date,
temporal_end=end_date,
)
subdivision_result = _ensure_subdivision_result(
location=location,
run=run,
subdivision=subdivision,
block_code=resolved_block_code,
observations=observations,
cluster_count=cluster_count,
selected_features=selected_features,
)
_record_run_stage(
run,
"clustering_completed",
_build_clustering_stage_metadata(subdivision_result),
)
summary = {
"status": "completed",
"source": "openeo",
"run_id": run.id,
"processed_cell_count": len(cells_to_process),
"created_observation_count": upsert_summary["created_count"],
"updated_observation_count": upsert_summary["updated_count"],
"existing_observation_count": len(all_cells) - len(cells_to_process),
"failed_metric_count": len(remote_payload["metadata"].get("failed_metrics", [])),
"chunk_size_sqm": grid_summary["chunk_size_sqm"],
"block_code": resolved_block_code,
"cell_count": len(all_cells),
"subdivision_result_id": subdivision_result.id,
"cluster_count": subdivision_result.cluster_count,
}
_mark_run_success(run, summary, remote_payload["metadata"])
logger.info(
"Remote sensing analysis completed",
extra={
"run_id": run.id,
"soil_location_id": location.id,
"block_code": resolved_block_code,
"processed_cell_count": summary["processed_cell_count"],
},
)
return summary
except Exception as exc:
_mark_run_failure(run, str(exc))
raise
@app.task(bind=True, max_retries=3, default_retry_delay=60)
def run_remote_sensing_analysis_task(
self,
soil_location_id: int,
block_code: str = "",
temporal_start: Any = "",
temporal_end: Any = "",
force_refresh: bool = False,
run_id: int | None = None,
cluster_count: int | None = None,
selected_features: list[str] | None = None,
):
"""
واکشی سنکرون داده خاک برای مختصات داده‌شده و ذخیره در DB.
این helper هم توسط Celery task و هم توسط endpointهای sync استفاده می‌شود.
"""
lat = Decimal(str(round(float(latitude), 6)))
lon = Decimal(str(round(float(longitude), 6)))
adapter = apps.get_app_config("location_data").get_soil_data_adapter()
with transaction.atomic():
location, created = SoilLocation.objects.select_for_update().get_or_create(
latitude=lat,
longitude=lon,
defaults={"task_id": task_id},
)
if not created and task_id:
location.task_id = task_id
location.save(update_fields=["task_id"])
for index, depth in enumerate(DEPTHS):
if progress_callback is not None:
progress_callback(
state="PROGRESS",
meta={
"current": index + 1,
"total": len(DEPTHS),
"message": f"در حال واکشی عمق {depth}...",
},
)
fields = adapter.fetch_depth_fields(float(lon), float(lat), depth)
with transaction.atomic():
SoilDepthData.objects.update_or_create(
soil_location=location,
depth_label=depth,
defaults=fields,
)
if task_id:
with transaction.atomic():
location.task_id = ""
location.save(update_fields=["task_id"])
return {
"status": "completed",
"location_id": location.id,
"depths": DEPTHS,
}
@app.task(bind=True)
def fetch_soil_data_task(self, latitude: float, longitude: float):
"""
واکشی داده‌های خاک برای مختصات داده‌شده و ذخیره در DB.
برای هر عمق (0-5cm, 5-15cm, 15-30cm) یک ریکوئست/شبیه‌سازی جدا انجام می‌شود.
اجرای async تحلیل سنجش‌ازدور برای location/block و ذخیره نتایج در DB.
"""
logger.info(
"Starting remote sensing analysis task",
extra={
"task_id": self.request.id,
"soil_location_id": soil_location_id,
"block_code": block_code,
"temporal_start": temporal_start,
"temporal_end": temporal_end,
"force_refresh": force_refresh,
},
)
try:
return fetch_soil_data_for_coordinates(
latitude=latitude,
longitude=longitude,
return run_remote_sensing_analysis(
soil_location_id=soil_location_id,
block_code=block_code,
temporal_start=temporal_start,
temporal_end=temporal_end,
force_refresh=force_refresh,
task_id=self.request.id,
progress_callback=self.update_state,
run_id=run_id,
cluster_count=cluster_count,
selected_features=selected_features,
)
except RequestException as exc:
lat = Decimal(str(round(float(latitude), 6)))
lon = Decimal(str(round(float(longitude), 6)))
location = SoilLocation.objects.filter(latitude=lat, longitude=lon).first()
return {
"status": "error",
"location_id": getattr(location, "id", None),
"error": str(exc),
}
except OpenEOAuthenticationError:
logger.exception(
"Remote sensing auth failure",
extra={"task_id": self.request.id, "soil_location_id": soil_location_id},
)
raise
except (OpenEOExecutionError, OpenEOServiceError, RequestException, DataDrivenSubdivisionError) as exc:
logger.warning(
"Transient remote sensing failure, retrying task",
extra={
"task_id": self.request.id,
"soil_location_id": soil_location_id,
"block_code": block_code,
"retry_count": self.request.retries,
"error": str(exc),
},
)
raise self.retry(exc=exc)
def _normalize_temporal_date(value: Any, field_name: str):
if hasattr(value, "isoformat") and not isinstance(value, str):
return value
parsed = parse_date(str(value))
if parsed is None:
raise ValueError(f"{field_name} نامعتبر است.")
return parsed
def _resolve_block_subdivision(location: SoilLocation, block_code: str) -> BlockSubdivision | None:
if not block_code:
return None
return (
BlockSubdivision.objects.filter(
soil_location=location,
block_code=block_code,
)
.order_by("-updated_at", "-id")
.first()
)
def _get_or_create_remote_sensing_run(
*,
run_id: int | None,
location: SoilLocation,
subdivision: BlockSubdivision | None,
block_code: str,
temporal_start,
temporal_end,
task_id: str,
cluster_count: int | None,
selected_features: list[str],
) -> RemoteSensingRun:
queued_at = timezone.now().isoformat()
if run_id is not None:
run = RemoteSensingRun.objects.filter(pk=run_id, soil_location=location).first()
if run is not None:
metadata = dict(run.metadata or {})
if task_id:
metadata["task_id"] = task_id
metadata.setdefault("status_label", "pending")
metadata["stage"] = "queued"
metadata["selected_features"] = selected_features
metadata["requested_cluster_count"] = cluster_count
metadata["pipeline"] = {
"name": "remote_sensing_subdivision",
"version": 2,
}
metadata["timestamps"] = {
**dict(metadata.get("timestamps") or {}),
"queued_at": queued_at,
}
run.block_subdivision = subdivision
run.block_code = block_code
run.chunk_size_sqm = int(getattr(settings, "SUBDIVISION_CHUNK_SQM", 900) or 900)
run.temporal_start = temporal_start
run.temporal_end = temporal_end
run.metadata = metadata
run.save(
update_fields=[
"block_subdivision",
"block_code",
"chunk_size_sqm",
"temporal_start",
"temporal_end",
"metadata",
"updated_at",
]
)
return run
metadata = {
"status_label": "pending",
"stage": "queued",
"selected_features": selected_features,
"requested_cluster_count": cluster_count,
"pipeline": {
"name": "remote_sensing_subdivision",
"version": 2,
},
"timestamps": {"queued_at": queued_at},
}
if task_id:
metadata["task_id"] = task_id
return RemoteSensingRun.objects.create(
soil_location=location,
block_subdivision=subdivision,
block_code=block_code,
chunk_size_sqm=int(getattr(settings, "SUBDIVISION_CHUNK_SQM", 900) or 900),
temporal_start=temporal_start,
temporal_end=temporal_end,
status=RemoteSensingRun.STATUS_PENDING,
metadata=metadata,
)
def _mark_run_running(run: RemoteSensingRun) -> None:
metadata = dict(run.metadata or {})
metadata["status_label"] = "running"
metadata["stage"] = "running"
metadata["timestamps"] = {
**dict(metadata.get("timestamps") or {}),
"started_at": timezone.now().isoformat(),
}
run.status = RemoteSensingRun.STATUS_RUNNING
run.started_at = timezone.now()
run.metadata = metadata
run.save(update_fields=["status", "started_at", "metadata", "updated_at"])
def _mark_run_success(
run: RemoteSensingRun,
summary: dict[str, Any],
service_metadata: dict[str, Any] | None = None,
) -> None:
metadata = dict(run.metadata or {})
metadata["summary"] = summary
metadata["status_label"] = "completed"
metadata["stage"] = "completed"
metadata["timestamps"] = {
**dict(metadata.get("timestamps") or {}),
"completed_at": timezone.now().isoformat(),
}
if service_metadata:
metadata["service"] = service_metadata
run.status = RemoteSensingRun.STATUS_SUCCESS
run.finished_at = timezone.now()
run.error_message = ""
run.metadata = metadata
run.save(
update_fields=[
"status",
"finished_at",
"error_message",
"metadata",
"updated_at",
]
)
def _mark_run_failure(run: RemoteSensingRun, error_message: str) -> None:
metadata = dict(run.metadata or {})
metadata["status_label"] = "failed"
metadata["failure_reason"] = error_message[:4000]
metadata["timestamps"] = {
**dict(metadata.get("timestamps") or {}),
"failed_at": timezone.now().isoformat(),
}
run.status = RemoteSensingRun.STATUS_FAILURE
run.finished_at = timezone.now()
run.error_message = error_message[:4000]
run.metadata = metadata
run.save(
update_fields=[
"status",
"finished_at",
"error_message",
"metadata",
"updated_at",
]
)
logger.exception(
"Remote sensing analysis failed",
extra={"run_id": run.id, "soil_location_id": run.soil_location_id, "block_code": run.block_code},
)
def _load_grid_cells(location: SoilLocation, block_code: str) -> list[AnalysisGridCell]:
queryset = AnalysisGridCell.objects.filter(soil_location=location)
queryset = queryset.filter(block_code=block_code or "")
return list(queryset.order_by("cell_code"))
def _load_observations(
*,
location: SoilLocation,
block_code: str,
temporal_start,
temporal_end,
) -> list[AnalysisGridObservation]:
queryset = (
AnalysisGridObservation.objects.select_related("cell", "run")
.filter(
cell__soil_location=location,
cell__block_code=block_code or "",
temporal_start=temporal_start,
temporal_end=temporal_end,
)
.order_by("cell__cell_code")
)
return list(queryset)
def _select_cells_for_processing(
*,
all_cells: list[AnalysisGridCell],
temporal_start,
temporal_end,
force_refresh: bool,
) -> list[AnalysisGridCell]:
if force_refresh:
return all_cells
existing_ids = set(
AnalysisGridObservation.objects.filter(
cell__in=all_cells,
temporal_start=temporal_start,
temporal_end=temporal_end,
).values_list("cell_id", flat=True)
)
return [cell for cell in all_cells if cell.id not in existing_ids]
def _upsert_grid_observations(
*,
cells: list[AnalysisGridCell],
run: RemoteSensingRun,
temporal_start,
temporal_end,
metric_payload: dict[str, Any],
) -> dict[str, int]:
metadata_template = {
"backend_name": metric_payload["metadata"].get("backend"),
"backend_url": metric_payload["metadata"].get("backend_url"),
"collections_used": metric_payload["metadata"].get("collections_used", []),
"slope_supported": metric_payload["metadata"].get("slope_supported", False),
"job_refs": metric_payload["metadata"].get("job_refs", {}),
"failed_metrics": metric_payload["metadata"].get("failed_metrics", []),
"run_id": run.id,
}
result_by_cell = metric_payload.get("results", {})
created_count = 0
updated_count = 0
with transaction.atomic():
for cell in cells:
values = result_by_cell.get(cell.cell_code, {})
defaults = {
"run": run,
"ndvi": values.get("ndvi"),
"ndwi": values.get("ndwi"),
"lst_c": values.get("lst_c"),
"soil_vv": values.get("soil_vv"),
"soil_vv_db": values.get("soil_vv_db"),
"dem_m": values.get("dem_m"),
"slope_deg": values.get("slope_deg"),
"metadata": metadata_template,
}
observation, created = AnalysisGridObservation.objects.update_or_create(
cell=cell,
temporal_start=temporal_start,
temporal_end=temporal_end,
defaults=defaults,
)
if created:
created_count += 1
else:
updated_count += 1
return {"created_count": created_count, "updated_count": updated_count}
def _ensure_subdivision_result(
*,
location: SoilLocation,
run: RemoteSensingRun,
subdivision: BlockSubdivision | None,
block_code: str,
observations: list[AnalysisGridObservation],
cluster_count: int | None,
selected_features: list[str] | None,
) -> RemoteSensingSubdivisionResult:
if not observations:
raise DataDrivenSubdivisionError("هیچ observation برای ساخت subdivision داده‌محور پیدا نشد.")
result = create_remote_sensing_subdivision_result(
location=location,
run=run,
observations=observations,
block_subdivision=subdivision,
block_code=block_code,
selected_features=selected_features or list(DEFAULT_CLUSTER_FEATURES),
explicit_k=cluster_count,
)
return result
def _record_run_stage(run: RemoteSensingRun, stage: str, details: dict[str, Any] | None = None) -> None:
metadata = dict(run.metadata or {})
metadata["stage"] = stage
metadata["stage_details"] = {
**dict(metadata.get("stage_details") or {}),
stage: details or {},
}
metadata["timestamps"] = {
**dict(metadata.get("timestamps") or {}),
f"{stage}_at": timezone.now().isoformat(),
}
run.metadata = metadata
run.save(update_fields=["metadata", "updated_at"])
def _build_clustering_stage_metadata(
result: RemoteSensingSubdivisionResult,
) -> dict[str, Any]:
metadata = dict(result.metadata or {})
return {
"subdivision_result_id": result.id,
"cluster_count": result.cluster_count,
"selected_features": result.selected_features,
"used_cell_count": metadata.get("used_cell_count", 0),
"skipped_cell_count": metadata.get("skipped_cell_count", 0),
"skipped_cell_codes": result.skipped_cell_codes,
"kmeans_params": metadata.get("kmeans_params", {}),
}