"""
تسک‌های Celery برای pipeline سنجش‌ازدور و subdivision داده‌محور.
"""

import logging
from typing import Any

from config.celery import app
from django.conf import settings
from django.db import transaction
from django.utils import timezone
from django.utils.dateparse import parse_date

from .data_driven_subdivision import (
    DEFAULT_CLUSTER_FEATURES,
    DataDrivenSubdivisionError,
    create_remote_sensing_subdivision_result,
)
from .grid_analysis import create_or_get_analysis_grid_cells
from .models import (
    AnalysisGridCell,
    AnalysisGridObservation,
    BlockSubdivision,
    RemoteSensingRun,
    RemoteSensingSubdivisionResult,
    SoilLocation,
)
from .openeo_service import (
    OpenEOAuthenticationError,
    OpenEOExecutionError,
    OpenEOServiceError,
    compute_remote_sensing_metrics,
)

try:
    import requests
except ImportError:  # pragma: no cover - handled in stripped envs
    RequestException = Exception
else:
    RequestException = requests.RequestException


logger = logging.getLogger(__name__)


def run_remote_sensing_analysis(
    *,
    soil_location_id: int,
    block_code: str = "",
    temporal_start: Any,
    temporal_end: Any,
    force_refresh: bool = False,
    task_id: str = "",
    run_id: int | None = None,
    cluster_count: int | None = None,
    selected_features: list[str] | None = None,
) -> dict[str, Any]:
    """
    اجرای سنکرون تحلیل سنجش‌ازدور برای یک location/block.
    این helper برای Celery task و هر orchestration داخلی دیگر قابل استفاده است.
    """
    start_date = _normalize_temporal_date(temporal_start, "temporal_start")
    end_date = _normalize_temporal_date(temporal_end, "temporal_end")
    if start_date > end_date:
        raise ValueError("temporal_start نمی‌تواند بعد از temporal_end باشد.")

    location = SoilLocation.objects.filter(pk=soil_location_id).first()
    if location is None:
        raise ValueError(f"SoilLocation با id={soil_location_id} پیدا نشد.")

    resolved_block_code = str(block_code or "").strip()
    subdivision = _resolve_block_subdivision(location, resolved_block_code)
    run = _get_or_create_remote_sensing_run(
        run_id=run_id,
        location=location,
        subdivision=subdivision,
        block_code=resolved_block_code,
        temporal_start=start_date,
        temporal_end=end_date,
        task_id=task_id,
        cluster_count=cluster_count,
        selected_features=selected_features or list(DEFAULT_CLUSTER_FEATURES),
    )
    _mark_run_running(run)

    try:
        _record_run_stage(
            run,
            "preparing_analysis_grid",
            {
                "block_code": resolved_block_code,
                "temporal_extent": {
                    "start_date": start_date.isoformat(),
                    "end_date": end_date.isoformat(),
                },
            },
        )
        grid_summary = create_or_get_analysis_grid_cells(
            location,
            block_code=resolved_block_code,
            block_subdivision=subdivision,
        )
        _record_run_stage(run, "analysis_grid_ready", {"grid_summary": grid_summary})
        all_cells = _load_grid_cells(location, resolved_block_code)
        cells_to_process = _select_cells_for_processing(
            all_cells=all_cells,
            temporal_start=start_date,
            temporal_end=end_date,
            force_refresh=force_refresh,
        )
        _record_run_stage(
            run,
            "analysis_cells_selected",
            {
                "cell_selection": {
                    "total_cell_count": len(all_cells),
                    "cell_count_to_process": len(cells_to_process),
                    "existing_cell_count": len(all_cells) - len(cells_to_process),
                    "force_refresh": force_refresh,
                }
            },
        )

        if not cells_to_process:
            _record_run_stage(
                run,
                "using_cached_observations",
                {"source": "database"},
            )
            observations = _load_observations(
                location=location,
                block_code=resolved_block_code,
                temporal_start=start_date,
                temporal_end=end_date,
            )
            subdivision_result = _ensure_subdivision_result(
                location=location,
                run=run,
                subdivision=subdivision,
                block_code=resolved_block_code,
                observations=observations,
                cluster_count=cluster_count,
                selected_features=selected_features,
            )
            _record_run_stage(
                run,
                "clustering_completed",
                _build_clustering_stage_metadata(subdivision_result),
            )
            summary = {
                "status": "completed",
                "source": "database",
                "run_id": run.id,
                "processed_cell_count": 0,
                "created_observation_count": 0,
                "updated_observation_count": 0,
                "existing_observation_count": len(all_cells),
                "failed_metric_count": 0,
                "chunk_size_sqm": grid_summary["chunk_size_sqm"],
                "block_code": resolved_block_code,
                "cell_count": len(all_cells),
                "subdivision_result_id": getattr(subdivision_result, "id", None),
                "cluster_count": getattr(subdivision_result, "cluster_count", 0),
            }
            _mark_run_success(run, summary)
            return summary

        _record_run_stage(
            run,
            "fetching_remote_metrics",
            {"requested_cell_count": len(cells_to_process)},
        )
        remote_payload = compute_remote_sensing_metrics(
            cells_to_process,
            temporal_start=start_date,
            temporal_end=end_date,
        )
        _record_run_stage(
            run,
            "remote_metrics_fetched",
            {
                "failed_metric_count": len(remote_payload["metadata"].get("failed_metrics", [])),
                "service_metadata": remote_payload["metadata"],
            },
        )
        upsert_summary = _upsert_grid_observations(
            cells=cells_to_process,
            run=run,
            temporal_start=start_date,
            temporal_end=end_date,
            metric_payload=remote_payload,
        )
        _record_run_stage(run, "observations_persisted", upsert_summary)
        observations = _load_observations(
            location=location,
            block_code=resolved_block_code,
            temporal_start=start_date,
            temporal_end=end_date,
        )
        subdivision_result = _ensure_subdivision_result(
            location=location,
            run=run,
            subdivision=subdivision,
            block_code=resolved_block_code,
            observations=observations,
            cluster_count=cluster_count,
            selected_features=selected_features,
        )
        _record_run_stage(
            run,
            "clustering_completed",
            _build_clustering_stage_metadata(subdivision_result),
        )
        summary = {
            "status": "completed",
            "source": "openeo",
            "run_id": run.id,
            "processed_cell_count": len(cells_to_process),
            "created_observation_count": upsert_summary["created_count"],
            "updated_observation_count": upsert_summary["updated_count"],
            "existing_observation_count": len(all_cells) - len(cells_to_process),
            "failed_metric_count": len(remote_payload["metadata"].get("failed_metrics", [])),
            "chunk_size_sqm": grid_summary["chunk_size_sqm"],
            "block_code": resolved_block_code,
            "cell_count": len(all_cells),
            "subdivision_result_id": subdivision_result.id,
            "cluster_count": subdivision_result.cluster_count,
        }
        _mark_run_success(run, summary, remote_payload["metadata"])
        logger.info(
            "Remote sensing analysis completed",
            extra={
                "run_id": run.id,
                "soil_location_id": location.id,
                "block_code": resolved_block_code,
                "processed_cell_count": summary["processed_cell_count"],
            },
        )
        return summary
    except Exception as exc:
        _mark_run_failure(run, str(exc))
        raise


@app.task(bind=True, max_retries=3, default_retry_delay=60)
def run_remote_sensing_analysis_task(
    self,
    soil_location_id: int,
    block_code: str = "",
    temporal_start: Any = "",
    temporal_end: Any = "",
    force_refresh: bool = False,
    run_id: int | None = None,
    cluster_count: int | None = None,
    selected_features: list[str] | None = None,
):
    """
    اجرای async تحلیل سنجش‌ازدور برای location/block و ذخیره نتایج در DB.
    """
    logger.info(
        "Starting remote sensing analysis task",
        extra={
            "task_id": self.request.id,
            "soil_location_id": soil_location_id,
            "block_code": block_code,
            "temporal_start": temporal_start,
            "temporal_end": temporal_end,
            "force_refresh": force_refresh,
        },
    )
    try:
        return run_remote_sensing_analysis(
            soil_location_id=soil_location_id,
            block_code=block_code,
            temporal_start=temporal_start,
            temporal_end=temporal_end,
            force_refresh=force_refresh,
            task_id=self.request.id,
            run_id=run_id,
            cluster_count=cluster_count,
            selected_features=selected_features,
        )
    except OpenEOAuthenticationError:
        logger.exception(
            "Remote sensing auth failure",
            extra={"task_id": self.request.id, "soil_location_id": soil_location_id},
        )
        raise
    except (OpenEOExecutionError, OpenEOServiceError, RequestException, DataDrivenSubdivisionError) as exc:
        logger.warning(
            "Transient remote sensing failure, retrying task",
            extra={
                "task_id": self.request.id,
                "soil_location_id": soil_location_id,
                "block_code": block_code,
                "retry_count": self.request.retries,
                "error": str(exc),
            },
        )
        raise self.retry(exc=exc)


def _normalize_temporal_date(value: Any, field_name: str):
    if hasattr(value, "isoformat") and not isinstance(value, str):
        return value
    parsed = parse_date(str(value))
    if parsed is None:
        raise ValueError(f"{field_name} نامعتبر است.")
    return parsed


def _resolve_block_subdivision(location: SoilLocation, block_code: str) -> BlockSubdivision | None:
    if not block_code:
        return None
    return (
        BlockSubdivision.objects.filter(
            soil_location=location,
            block_code=block_code,
        )
        .order_by("-updated_at", "-id")
        .first()
    )


def _get_or_create_remote_sensing_run(
    *,
    run_id: int | None,
    location: SoilLocation,
    subdivision: BlockSubdivision | None,
    block_code: str,
    temporal_start,
    temporal_end,
    task_id: str,
    cluster_count: int | None,
    selected_features: list[str],
) -> RemoteSensingRun:
    queued_at = timezone.now().isoformat()
    if run_id is not None:
        run = RemoteSensingRun.objects.filter(pk=run_id, soil_location=location).first()
        if run is not None:
            metadata = dict(run.metadata or {})
            if task_id:
                metadata["task_id"] = task_id
            metadata.setdefault("status_label", "pending")
            metadata["stage"] = "queued"
            metadata["selected_features"] = selected_features
            metadata["requested_cluster_count"] = cluster_count
            metadata["pipeline"] = {
                "name": "remote_sensing_subdivision",
                "version": 2,
            }
            metadata["timestamps"] = {
                **dict(metadata.get("timestamps") or {}),
                "queued_at": queued_at,
            }
            run.block_subdivision = subdivision
            run.block_code = block_code
            run.chunk_size_sqm = int(getattr(settings, "SUBDIVISION_CHUNK_SQM", 900) or 900)
            run.temporal_start = temporal_start
            run.temporal_end = temporal_end
            run.metadata = metadata
            run.save(
                update_fields=[
                    "block_subdivision",
                    "block_code",
                    "chunk_size_sqm",
                    "temporal_start",
                    "temporal_end",
                    "metadata",
                    "updated_at",
                ]
            )
            return run
    metadata = {
        "status_label": "pending",
        "stage": "queued",
        "selected_features": selected_features,
        "requested_cluster_count": cluster_count,
        "pipeline": {
            "name": "remote_sensing_subdivision",
            "version": 2,
        },
        "timestamps": {"queued_at": queued_at},
    }
    if task_id:
        metadata["task_id"] = task_id
    return RemoteSensingRun.objects.create(
        soil_location=location,
        block_subdivision=subdivision,
        block_code=block_code,
        chunk_size_sqm=int(getattr(settings, "SUBDIVISION_CHUNK_SQM", 900) or 900),
        temporal_start=temporal_start,
        temporal_end=temporal_end,
        status=RemoteSensingRun.STATUS_PENDING,
        metadata=metadata,
    )


def _mark_run_running(run: RemoteSensingRun) -> None:
    metadata = dict(run.metadata or {})
    metadata["status_label"] = "running"
    metadata["stage"] = "running"
    metadata["timestamps"] = {
        **dict(metadata.get("timestamps") or {}),
        "started_at": timezone.now().isoformat(),
    }
    run.status = RemoteSensingRun.STATUS_RUNNING
    run.started_at = timezone.now()
    run.metadata = metadata
    run.save(update_fields=["status", "started_at", "metadata", "updated_at"])


def _mark_run_success(
    run: RemoteSensingRun,
    summary: dict[str, Any],
    service_metadata: dict[str, Any] | None = None,
) -> None:
    metadata = dict(run.metadata or {})
    metadata["summary"] = summary
    metadata["status_label"] = "completed"
    metadata["stage"] = "completed"
    metadata["timestamps"] = {
        **dict(metadata.get("timestamps") or {}),
        "completed_at": timezone.now().isoformat(),
    }
    if service_metadata:
        metadata["service"] = service_metadata
    run.status = RemoteSensingRun.STATUS_SUCCESS
    run.finished_at = timezone.now()
    run.error_message = ""
    run.metadata = metadata
    run.save(
        update_fields=[
            "status",
            "finished_at",
            "error_message",
            "metadata",
            "updated_at",
        ]
    )


def _mark_run_failure(run: RemoteSensingRun, error_message: str) -> None:
    metadata = dict(run.metadata or {})
    metadata["status_label"] = "failed"
    metadata["failure_reason"] = error_message[:4000]
    metadata["timestamps"] = {
        **dict(metadata.get("timestamps") or {}),
        "failed_at": timezone.now().isoformat(),
    }
    run.status = RemoteSensingRun.STATUS_FAILURE
    run.finished_at = timezone.now()
    run.error_message = error_message[:4000]
    run.metadata = metadata
    run.save(
        update_fields=[
            "status",
            "finished_at",
            "error_message",
            "metadata",
            "updated_at",
        ]
    )
    logger.exception(
        "Remote sensing analysis failed",
        extra={"run_id": run.id, "soil_location_id": run.soil_location_id, "block_code": run.block_code},
    )


def _load_grid_cells(location: SoilLocation, block_code: str) -> list[AnalysisGridCell]:
    queryset = AnalysisGridCell.objects.filter(soil_location=location)
    queryset = queryset.filter(block_code=block_code or "")
    return list(queryset.order_by("cell_code"))


def _load_observations(
    *,
    location: SoilLocation,
    block_code: str,
    temporal_start,
    temporal_end,
) -> list[AnalysisGridObservation]:
    queryset = (
        AnalysisGridObservation.objects.select_related("cell", "run")
        .filter(
            cell__soil_location=location,
            cell__block_code=block_code or "",
            temporal_start=temporal_start,
            temporal_end=temporal_end,
        )
        .order_by("cell__cell_code")
    )
    return list(queryset)


def _select_cells_for_processing(
    *,
    all_cells: list[AnalysisGridCell],
    temporal_start,
    temporal_end,
    force_refresh: bool,
) -> list[AnalysisGridCell]:
    if force_refresh:
        return all_cells

    existing_ids = set(
        AnalysisGridObservation.objects.filter(
            cell__in=all_cells,
            temporal_start=temporal_start,
            temporal_end=temporal_end,
        ).values_list("cell_id", flat=True)
    )
    return [cell for cell in all_cells if cell.id not in existing_ids]


def _upsert_grid_observations(
    *,
    cells: list[AnalysisGridCell],
    run: RemoteSensingRun,
    temporal_start,
    temporal_end,
    metric_payload: dict[str, Any],
) -> dict[str, int]:
    metadata_template = {
        "backend_name": metric_payload["metadata"].get("backend"),
        "backend_url": metric_payload["metadata"].get("backend_url"),
        "collections_used": metric_payload["metadata"].get("collections_used", []),
        "slope_supported": metric_payload["metadata"].get("slope_supported", False),
        "job_refs": metric_payload["metadata"].get("job_refs", {}),
        "failed_metrics": metric_payload["metadata"].get("failed_metrics", []),
        "run_id": run.id,
    }
    result_by_cell = metric_payload.get("results", {})

    created_count = 0
    updated_count = 0
    with transaction.atomic():
        for cell in cells:
            values = result_by_cell.get(cell.cell_code, {})
            defaults = {
                "run": run,
                "ndvi": values.get("ndvi"),
                "ndwi": values.get("ndwi"),
                "lst_c": values.get("lst_c"),
                "soil_vv": values.get("soil_vv"),
                "soil_vv_db": values.get("soil_vv_db"),
                "dem_m": values.get("dem_m"),
                "slope_deg": values.get("slope_deg"),
                "metadata": metadata_template,
            }
            observation, created = AnalysisGridObservation.objects.update_or_create(
                cell=cell,
                temporal_start=temporal_start,
                temporal_end=temporal_end,
                defaults=defaults,
            )
            if created:
                created_count += 1
            else:
                updated_count += 1
    return {"created_count": created_count, "updated_count": updated_count}


def _ensure_subdivision_result(
    *,
    location: SoilLocation,
    run: RemoteSensingRun,
    subdivision: BlockSubdivision | None,
    block_code: str,
    observations: list[AnalysisGridObservation],
    cluster_count: int | None,
    selected_features: list[str] | None,
) -> RemoteSensingSubdivisionResult:
    if not observations:
        raise DataDrivenSubdivisionError("هیچ observation برای ساخت subdivision داده‌محور پیدا نشد.")
    result = create_remote_sensing_subdivision_result(
        location=location,
        run=run,
        observations=observations,
        block_subdivision=subdivision,
        block_code=block_code,
        selected_features=selected_features or list(DEFAULT_CLUSTER_FEATURES),
        explicit_k=cluster_count,
    )
    return result


def _record_run_stage(run: RemoteSensingRun, stage: str, details: dict[str, Any] | None = None) -> None:
    metadata = dict(run.metadata or {})
    metadata["stage"] = stage
    metadata["stage_details"] = {
        **dict(metadata.get("stage_details") or {}),
        stage: details or {},
    }
    metadata["timestamps"] = {
        **dict(metadata.get("timestamps") or {}),
        f"{stage}_at": timezone.now().isoformat(),
    }
    run.metadata = metadata
    run.save(update_fields=["metadata", "updated_at"])


def _build_clustering_stage_metadata(
    result: RemoteSensingSubdivisionResult,
) -> dict[str, Any]:
    metadata = dict(result.metadata or {})
    return {
        "subdivision_result_id": result.id,
        "cluster_count": result.cluster_count,
        "selected_features": result.selected_features,
        "used_cell_count": metadata.get("used_cell_count", 0),
        "skipped_cell_count": metadata.get("skipped_cell_count", 0),
        "skipped_cell_codes": result.skipped_cell_codes,
        "kmeans_params": metadata.get("kmeans_params", {}),
    }