UPDATE

2026-05-11 00:36:02 +03:30
parent 2a6321a263
commit 1740c20ddb
23 changed files with 1214 additions and 89 deletions
@@ -1,10 +1,16 @@
 from __future__ import annotations

+from io import BytesIO
+import math
+import os
+from pathlib import Path
 from dataclasses import dataclass
 import json
 import logging
 from typing import Any

+from django.conf import settings
+from django.core.files.base import ContentFile
 from django.db import transaction

 from .block_subdivision import detect_elbow_point, render_elbow_plot
@@ -21,12 +27,12 @@ from .models import (
 DEFAULT_CLUSTER_FEATURES = [
    "ndvi",
    "ndwi",
-    "lst_c",
    "soil_vv_db",
 ]
 SUPPORTED_CLUSTER_FEATURES = tuple(DEFAULT_CLUSTER_FEATURES)
 DEFAULT_RANDOM_STATE = 42
 DEFAULT_MAX_K = 10
+DEFAULT_REMOTE_SENSING_DIAGNOSTIC_DIR = "artifacts/remote_sensing_charts"

 logger = logging.getLogger(__name__)

@@ -153,6 +159,20 @@ def create_remote_sensing_subdivision_result(
                )
            )
        RemoteSensingClusterAssignment.objects.bulk_create(assignment_rows)
+        diagnostic_artifacts = _persist_remote_sensing_diagnostic_artifacts(
+            result=result,
+            observations=dataset.observations,
+            labels=labels,
+            cluster_summaries=cluster_summaries,
+            selected_features=dataset.selected_features,
+            scaled_matrix=dataset.scaled_matrix,
+            inertia_curve=inertia_curve,
+        )
+        if diagnostic_artifacts:
+            metadata = dict(result.metadata or {})
+            metadata["diagnostic_artifacts"] = diagnostic_artifacts
+            result.metadata = metadata
+            result.save(update_fields=["metadata", "updated_at"])
        if block_subdivision is not None:
            sync_block_subdivision_with_result(
                block_subdivision=block_subdivision,
@@ -468,6 +488,7 @@ def sync_block_subdivision_with_result(
            "end_date": result.temporal_end.isoformat() if result.temporal_end else None,
        },
        "inertia_curve": result.metadata.get("inertia_curve", []),
+        "diagnostic_artifacts": result.metadata.get("diagnostic_artifacts", {}),
    }

    block_subdivision.grid_points = [
@@ -550,6 +571,304 @@ def _count_non_null_features(observations: list[AnalysisGridObservation]) -> dic
    return counts


+def _persist_remote_sensing_diagnostic_artifacts(
+    *,
+    result: RemoteSensingSubdivisionResult,
+    observations: list[AnalysisGridObservation],
+    labels: list[int],
+    cluster_summaries: list[dict[str, Any]],
+    selected_features: list[str],
+    scaled_matrix: list[list[float]],
+    inertia_curve: list[dict[str, float]],
+) -> dict[str, Any]:
+    try:
+        artifact_dir = _build_remote_sensing_diagnostic_dir(result=result)
+        artifact_dir.mkdir(parents=True, exist_ok=True)
+
+        specs = [
+            (
+                "elbow_plot",
+                render_elbow_plot(
+                    inertia_curve=inertia_curve,
+                    optimal_k=result.cluster_count,
+                    block_code=result.block_code or "farm",
+                ),
+                "elbow",
+            ),
+            (
+                "cluster_map",
+                _render_cluster_map_plot(
+                    observations=observations,
+                    labels=labels,
+                    block_code=result.block_code or "farm",
+                ),
+                "cluster-map",
+            ),
+            (
+                "cluster_sizes",
+                _render_cluster_size_plot(
+                    cluster_summaries=cluster_summaries,
+                    block_code=result.block_code or "farm",
+                ),
+                "cluster-sizes",
+            ),
+            (
+                "feature_pairs",
+                _render_feature_pair_plot(
+                    selected_features=selected_features,
+                    scaled_matrix=scaled_matrix,
+                    labels=labels,
+                    block_code=result.block_code or "farm",
+                ),
+                "feature-pairs",
+            ),
+        ]
+
+        files: dict[str, str] = {}
+        for artifact_key, content, suffix in specs:
+            if content is None:
+                continue
+            target_path = artifact_dir / f"{_build_remote_sensing_artifact_stem(result=result)}__{suffix}.png"
+            _write_content_file(target_path=target_path, content=content)
+            files[artifact_key] = _to_project_relative_path(target_path)
+
+        return {
+            "directory": _to_project_relative_path(artifact_dir),
+            "files": files,
+        }
+    except (DataDrivenSubdivisionError, OSError) as exc:
+        logger.warning(
+            "Failed to persist remote sensing diagnostic artifacts for result_id=%s: %s",
+            result.id,
+            exc,
+        )
+        return {}
+
+
+def _build_remote_sensing_diagnostic_dir(*, result: RemoteSensingSubdivisionResult) -> Path:
+    configured_dir = str(
+        os.environ.get("REMOTE_SENSING_DIAGNOSTIC_DIR", DEFAULT_REMOTE_SENSING_DIAGNOSTIC_DIR)
+    ).strip()
+    base_dir = Path(getattr(settings, "BASE_DIR", Path.cwd()))
+    target_dir = Path(configured_dir)
+    if not target_dir.is_absolute():
+        target_dir = base_dir / target_dir
+    block_component = _sanitize_path_component(result.block_code or "farm")
+    return target_dir / f"location-{result.soil_location_id}" / f"run-{result.run_id}-{block_component}"
+
+
+def _build_remote_sensing_artifact_stem(*, result: RemoteSensingSubdivisionResult) -> str:
+    return (
+        f"location-{result.soil_location_id}"
+        f"__run-{result.run_id}"
+        f"__{_sanitize_path_component(result.block_code or 'farm')}"
+    )
+
+
+def _write_content_file(*, target_path: Path, content: ContentFile) -> None:
+    target_path.parent.mkdir(parents=True, exist_ok=True)
+    content.open("rb")
+    try:
+        target_path.write_bytes(content.read())
+    finally:
+        content.close()
+
+
+def _to_project_relative_path(path: Path) -> str:
+    base_dir = Path(getattr(settings, "BASE_DIR", Path.cwd()))
+    try:
+        return str(path.relative_to(base_dir))
+    except ValueError:
+        return str(path)
+
+
+def _sanitize_path_component(value: str) -> str:
+    text = str(value or "").strip() or "unknown"
+    sanitized = "".join(character if character.isalnum() or character in {"-", "_", "."} else "_" for character in text)
+    return sanitized or "unknown"
+
+
+def _render_cluster_map_plot(
+    *,
+    observations: list[AnalysisGridObservation],
+    labels: list[int],
+    block_code: str,
+) -> ContentFile | None:
+    if not observations:
+        return None
+    plt = _import_matplotlib_pyplot()
+    unique_labels = sorted(set(int(label) for label in labels))
+    colors = plt.cm.get_cmap("tab10", max(len(unique_labels), 1))
+    fig, ax = plt.subplots(figsize=(8, 6))
+    buffer = BytesIO()
+    try:
+        for color_index, cluster_label in enumerate(unique_labels):
+            cluster_points = [
+                (float(observation.cell.centroid_lon), float(observation.cell.centroid_lat))
+                for observation, label in zip(observations, labels)
+                if int(label) == cluster_label
+            ]
+            if not cluster_points:
+                continue
+            xs = [point[0] for point in cluster_points]
+            ys = [point[1] for point in cluster_points]
+            ax.scatter(
+                xs,
+                ys,
+                s=70,
+                alpha=0.9,
+                color=colors(color_index),
+                edgecolors="white",
+                linewidths=0.8,
+                label=f"Cluster {cluster_label}",
+            )
+        ax.set_title(f"KMeans Spatial Cluster Map - {block_code}")
+        ax.set_xlabel("Longitude")
+        ax.set_ylabel("Latitude")
+        ax.grid(True, linestyle="--", linewidth=0.5, alpha=0.4)
+        if unique_labels:
+            ax.legend()
+        fig.tight_layout()
+        fig.savefig(buffer, format="png", dpi=150)
+        buffer.seek(0)
+        return ContentFile(buffer.getvalue())
+    finally:
+        buffer.close()
+        plt.close(fig)
+
+
+def _render_cluster_size_plot(
+    *,
+    cluster_summaries: list[dict[str, Any]],
+    block_code: str,
+) -> ContentFile | None:
+    if not cluster_summaries:
+        return None
+    plt = _import_matplotlib_pyplot()
+    labels = [f"C{int(cluster['cluster_label'])}" for cluster in cluster_summaries]
+    counts = [int(cluster["cell_count"]) for cluster in cluster_summaries]
+    fig, ax = plt.subplots(figsize=(8, 5))
+    buffer = BytesIO()
+    try:
+        bars = ax.bar(labels, counts, color="#2f6fed", alpha=0.85)
+        for bar, count in zip(bars, counts):
+            ax.text(
+                bar.get_x() + bar.get_width() / 2.0,
+                bar.get_height(),
+                str(count),
+                ha="center",
+                va="bottom",
+                fontsize=9,
+            )
+        ax.set_title(f"Cluster Sizes - {block_code}")
+        ax.set_xlabel("Cluster")
+        ax.set_ylabel("Cell Count")
+        ax.grid(True, axis="y", linestyle="--", linewidth=0.5, alpha=0.4)
+        fig.tight_layout()
+        fig.savefig(buffer, format="png", dpi=150)
+        buffer.seek(0)
+        return ContentFile(buffer.getvalue())
+    finally:
+        buffer.close()
+        plt.close(fig)
+
+
+def _render_feature_pair_plot(
+    *,
+    selected_features: list[str],
+    scaled_matrix: list[list[float]],
+    labels: list[int],
+    block_code: str,
+) -> ContentFile | None:
+    if not scaled_matrix or not selected_features:
+        return None
+    plt = _import_matplotlib_pyplot()
+    feature_count = len(selected_features)
+    pair_indexes = [(0, 0)] if feature_count == 1 else [
+        (left_index, right_index)
+        for left_index in range(feature_count)
+        for right_index in range(left_index + 1, feature_count)
+    ]
+    subplot_count = len(pair_indexes)
+    columns = 2 if subplot_count > 1 else 1
+    rows = math.ceil(subplot_count / columns)
+    fig, axes = plt.subplots(rows, columns, figsize=(7 * columns, 5 * rows))
+    axes_list = axes.flatten().tolist() if hasattr(axes, "flatten") else [axes]
+    unique_labels = sorted(set(int(label) for label in labels))
+    colors = plt.cm.get_cmap("tab10", max(len(unique_labels), 1))
+    buffer = BytesIO()
+    try:
+        for axis, (left_index, right_index) in zip(axes_list, pair_indexes):
+            if feature_count == 1:
+                xs = list(range(1, len(scaled_matrix) + 1))
+                ys = [row[0] for row in scaled_matrix]
+                for color_index, cluster_label in enumerate(unique_labels):
+                    filtered = [
+                        (x_value, y_value)
+                        for x_value, y_value, label in zip(xs, ys, labels)
+                        if int(label) == cluster_label
+                    ]
+                    axis.scatter(
+                        [item[0] for item in filtered],
+                        [item[1] for item in filtered],
+                        s=55,
+                        color=colors(color_index),
+                        alpha=0.85,
+                        label=f"Cluster {cluster_label}",
+                    )
+                axis.set_xlabel("Observation Index")
+                axis.set_ylabel(f"{selected_features[0]} (scaled)")
+                axis.set_title(f"{selected_features[0]} distribution")
+            else:
+                x_values = [row[left_index] for row in scaled_matrix]
+                y_values = [row[right_index] for row in scaled_matrix]
+                for color_index, cluster_label in enumerate(unique_labels):
+                    filtered = [
+                        (x_value, y_value)
+                        for x_value, y_value, label in zip(x_values, y_values, labels)
+                        if int(label) == cluster_label
+                    ]
+                    axis.scatter(
+                        [item[0] for item in filtered],
+                        [item[1] for item in filtered],
+                        s=55,
+                        color=colors(color_index),
+                        alpha=0.85,
+                        label=f"Cluster {cluster_label}",
+                    )
+                axis.set_xlabel(f"{selected_features[left_index]} (scaled)")
+                axis.set_ylabel(f"{selected_features[right_index]} (scaled)")
+                axis.set_title(
+                    f"{selected_features[left_index]} vs {selected_features[right_index]}"
+                )
+            axis.grid(True, linestyle="--", linewidth=0.5, alpha=0.4)
+
+        for axis in axes_list[subplot_count:]:
+            axis.remove()
+
+        if unique_labels and axes_list:
+            axes_list[0].legend()
+        fig.suptitle(f"KMeans Feature Diagnostics - {block_code}", fontsize=14)
+        fig.tight_layout(rect=(0, 0, 1, 0.97))
+        fig.savefig(buffer, format="png", dpi=150)
+        buffer.seek(0)
+        return ContentFile(buffer.getvalue())
+    finally:
+        buffer.close()
+        plt.close(fig)
+
+
+def _import_matplotlib_pyplot():
+    try:
+        import matplotlib
+
+        matplotlib.use("Agg")
+        import matplotlib.pyplot as plt
+    except ImportError as exc:  # pragma: no cover - runtime dependency guard
+        raise DataDrivenSubdivisionError("matplotlib برای ذخیره نمودارهای KMeans لازم است.") from exc
+    return plt
+
+
 def _build_clustering_log_context(
    *,
    observations: list[AnalysisGridObservation],