This commit is contained in:
2026-05-11 00:36:02 +03:30
parent 2a6321a263
commit 1740c20ddb
23 changed files with 1214 additions and 89 deletions
+320 -1
View File
@@ -1,10 +1,16 @@
from __future__ import annotations
from io import BytesIO
import math
import os
from pathlib import Path
from dataclasses import dataclass
import json
import logging
from typing import Any
from django.conf import settings
from django.core.files.base import ContentFile
from django.db import transaction
from .block_subdivision import detect_elbow_point, render_elbow_plot
@@ -21,12 +27,12 @@ from .models import (
DEFAULT_CLUSTER_FEATURES = [
"ndvi",
"ndwi",
"lst_c",
"soil_vv_db",
]
SUPPORTED_CLUSTER_FEATURES = tuple(DEFAULT_CLUSTER_FEATURES)
DEFAULT_RANDOM_STATE = 42
DEFAULT_MAX_K = 10
DEFAULT_REMOTE_SENSING_DIAGNOSTIC_DIR = "artifacts/remote_sensing_charts"
logger = logging.getLogger(__name__)
@@ -153,6 +159,20 @@ def create_remote_sensing_subdivision_result(
)
)
RemoteSensingClusterAssignment.objects.bulk_create(assignment_rows)
diagnostic_artifacts = _persist_remote_sensing_diagnostic_artifacts(
result=result,
observations=dataset.observations,
labels=labels,
cluster_summaries=cluster_summaries,
selected_features=dataset.selected_features,
scaled_matrix=dataset.scaled_matrix,
inertia_curve=inertia_curve,
)
if diagnostic_artifacts:
metadata = dict(result.metadata or {})
metadata["diagnostic_artifacts"] = diagnostic_artifacts
result.metadata = metadata
result.save(update_fields=["metadata", "updated_at"])
if block_subdivision is not None:
sync_block_subdivision_with_result(
block_subdivision=block_subdivision,
@@ -468,6 +488,7 @@ def sync_block_subdivision_with_result(
"end_date": result.temporal_end.isoformat() if result.temporal_end else None,
},
"inertia_curve": result.metadata.get("inertia_curve", []),
"diagnostic_artifacts": result.metadata.get("diagnostic_artifacts", {}),
}
block_subdivision.grid_points = [
@@ -550,6 +571,304 @@ def _count_non_null_features(observations: list[AnalysisGridObservation]) -> dic
return counts
def _persist_remote_sensing_diagnostic_artifacts(
*,
result: RemoteSensingSubdivisionResult,
observations: list[AnalysisGridObservation],
labels: list[int],
cluster_summaries: list[dict[str, Any]],
selected_features: list[str],
scaled_matrix: list[list[float]],
inertia_curve: list[dict[str, float]],
) -> dict[str, Any]:
try:
artifact_dir = _build_remote_sensing_diagnostic_dir(result=result)
artifact_dir.mkdir(parents=True, exist_ok=True)
specs = [
(
"elbow_plot",
render_elbow_plot(
inertia_curve=inertia_curve,
optimal_k=result.cluster_count,
block_code=result.block_code or "farm",
),
"elbow",
),
(
"cluster_map",
_render_cluster_map_plot(
observations=observations,
labels=labels,
block_code=result.block_code or "farm",
),
"cluster-map",
),
(
"cluster_sizes",
_render_cluster_size_plot(
cluster_summaries=cluster_summaries,
block_code=result.block_code or "farm",
),
"cluster-sizes",
),
(
"feature_pairs",
_render_feature_pair_plot(
selected_features=selected_features,
scaled_matrix=scaled_matrix,
labels=labels,
block_code=result.block_code or "farm",
),
"feature-pairs",
),
]
files: dict[str, str] = {}
for artifact_key, content, suffix in specs:
if content is None:
continue
target_path = artifact_dir / f"{_build_remote_sensing_artifact_stem(result=result)}__{suffix}.png"
_write_content_file(target_path=target_path, content=content)
files[artifact_key] = _to_project_relative_path(target_path)
return {
"directory": _to_project_relative_path(artifact_dir),
"files": files,
}
except (DataDrivenSubdivisionError, OSError) as exc:
logger.warning(
"Failed to persist remote sensing diagnostic artifacts for result_id=%s: %s",
result.id,
exc,
)
return {}
def _build_remote_sensing_diagnostic_dir(*, result: RemoteSensingSubdivisionResult) -> Path:
configured_dir = str(
os.environ.get("REMOTE_SENSING_DIAGNOSTIC_DIR", DEFAULT_REMOTE_SENSING_DIAGNOSTIC_DIR)
).strip()
base_dir = Path(getattr(settings, "BASE_DIR", Path.cwd()))
target_dir = Path(configured_dir)
if not target_dir.is_absolute():
target_dir = base_dir / target_dir
block_component = _sanitize_path_component(result.block_code or "farm")
return target_dir / f"location-{result.soil_location_id}" / f"run-{result.run_id}-{block_component}"
def _build_remote_sensing_artifact_stem(*, result: RemoteSensingSubdivisionResult) -> str:
return (
f"location-{result.soil_location_id}"
f"__run-{result.run_id}"
f"__{_sanitize_path_component(result.block_code or 'farm')}"
)
def _write_content_file(*, target_path: Path, content: ContentFile) -> None:
target_path.parent.mkdir(parents=True, exist_ok=True)
content.open("rb")
try:
target_path.write_bytes(content.read())
finally:
content.close()
def _to_project_relative_path(path: Path) -> str:
base_dir = Path(getattr(settings, "BASE_DIR", Path.cwd()))
try:
return str(path.relative_to(base_dir))
except ValueError:
return str(path)
def _sanitize_path_component(value: str) -> str:
text = str(value or "").strip() or "unknown"
sanitized = "".join(character if character.isalnum() or character in {"-", "_", "."} else "_" for character in text)
return sanitized or "unknown"
def _render_cluster_map_plot(
*,
observations: list[AnalysisGridObservation],
labels: list[int],
block_code: str,
) -> ContentFile | None:
if not observations:
return None
plt = _import_matplotlib_pyplot()
unique_labels = sorted(set(int(label) for label in labels))
colors = plt.cm.get_cmap("tab10", max(len(unique_labels), 1))
fig, ax = plt.subplots(figsize=(8, 6))
buffer = BytesIO()
try:
for color_index, cluster_label in enumerate(unique_labels):
cluster_points = [
(float(observation.cell.centroid_lon), float(observation.cell.centroid_lat))
for observation, label in zip(observations, labels)
if int(label) == cluster_label
]
if not cluster_points:
continue
xs = [point[0] for point in cluster_points]
ys = [point[1] for point in cluster_points]
ax.scatter(
xs,
ys,
s=70,
alpha=0.9,
color=colors(color_index),
edgecolors="white",
linewidths=0.8,
label=f"Cluster {cluster_label}",
)
ax.set_title(f"KMeans Spatial Cluster Map - {block_code}")
ax.set_xlabel("Longitude")
ax.set_ylabel("Latitude")
ax.grid(True, linestyle="--", linewidth=0.5, alpha=0.4)
if unique_labels:
ax.legend()
fig.tight_layout()
fig.savefig(buffer, format="png", dpi=150)
buffer.seek(0)
return ContentFile(buffer.getvalue())
finally:
buffer.close()
plt.close(fig)
def _render_cluster_size_plot(
*,
cluster_summaries: list[dict[str, Any]],
block_code: str,
) -> ContentFile | None:
if not cluster_summaries:
return None
plt = _import_matplotlib_pyplot()
labels = [f"C{int(cluster['cluster_label'])}" for cluster in cluster_summaries]
counts = [int(cluster["cell_count"]) for cluster in cluster_summaries]
fig, ax = plt.subplots(figsize=(8, 5))
buffer = BytesIO()
try:
bars = ax.bar(labels, counts, color="#2f6fed", alpha=0.85)
for bar, count in zip(bars, counts):
ax.text(
bar.get_x() + bar.get_width() / 2.0,
bar.get_height(),
str(count),
ha="center",
va="bottom",
fontsize=9,
)
ax.set_title(f"Cluster Sizes - {block_code}")
ax.set_xlabel("Cluster")
ax.set_ylabel("Cell Count")
ax.grid(True, axis="y", linestyle="--", linewidth=0.5, alpha=0.4)
fig.tight_layout()
fig.savefig(buffer, format="png", dpi=150)
buffer.seek(0)
return ContentFile(buffer.getvalue())
finally:
buffer.close()
plt.close(fig)
def _render_feature_pair_plot(
*,
selected_features: list[str],
scaled_matrix: list[list[float]],
labels: list[int],
block_code: str,
) -> ContentFile | None:
if not scaled_matrix or not selected_features:
return None
plt = _import_matplotlib_pyplot()
feature_count = len(selected_features)
pair_indexes = [(0, 0)] if feature_count == 1 else [
(left_index, right_index)
for left_index in range(feature_count)
for right_index in range(left_index + 1, feature_count)
]
subplot_count = len(pair_indexes)
columns = 2 if subplot_count > 1 else 1
rows = math.ceil(subplot_count / columns)
fig, axes = plt.subplots(rows, columns, figsize=(7 * columns, 5 * rows))
axes_list = axes.flatten().tolist() if hasattr(axes, "flatten") else [axes]
unique_labels = sorted(set(int(label) for label in labels))
colors = plt.cm.get_cmap("tab10", max(len(unique_labels), 1))
buffer = BytesIO()
try:
for axis, (left_index, right_index) in zip(axes_list, pair_indexes):
if feature_count == 1:
xs = list(range(1, len(scaled_matrix) + 1))
ys = [row[0] for row in scaled_matrix]
for color_index, cluster_label in enumerate(unique_labels):
filtered = [
(x_value, y_value)
for x_value, y_value, label in zip(xs, ys, labels)
if int(label) == cluster_label
]
axis.scatter(
[item[0] for item in filtered],
[item[1] for item in filtered],
s=55,
color=colors(color_index),
alpha=0.85,
label=f"Cluster {cluster_label}",
)
axis.set_xlabel("Observation Index")
axis.set_ylabel(f"{selected_features[0]} (scaled)")
axis.set_title(f"{selected_features[0]} distribution")
else:
x_values = [row[left_index] for row in scaled_matrix]
y_values = [row[right_index] for row in scaled_matrix]
for color_index, cluster_label in enumerate(unique_labels):
filtered = [
(x_value, y_value)
for x_value, y_value, label in zip(x_values, y_values, labels)
if int(label) == cluster_label
]
axis.scatter(
[item[0] for item in filtered],
[item[1] for item in filtered],
s=55,
color=colors(color_index),
alpha=0.85,
label=f"Cluster {cluster_label}",
)
axis.set_xlabel(f"{selected_features[left_index]} (scaled)")
axis.set_ylabel(f"{selected_features[right_index]} (scaled)")
axis.set_title(
f"{selected_features[left_index]} vs {selected_features[right_index]}"
)
axis.grid(True, linestyle="--", linewidth=0.5, alpha=0.4)
for axis in axes_list[subplot_count:]:
axis.remove()
if unique_labels and axes_list:
axes_list[0].legend()
fig.suptitle(f"KMeans Feature Diagnostics - {block_code}", fontsize=14)
fig.tight_layout(rect=(0, 0, 1, 0.97))
fig.savefig(buffer, format="png", dpi=150)
buffer.seek(0)
return ContentFile(buffer.getvalue())
finally:
buffer.close()
plt.close(fig)
def _import_matplotlib_pyplot():
try:
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
except ImportError as exc: # pragma: no cover - runtime dependency guard
raise DataDrivenSubdivisionError("matplotlib برای ذخیره نمودارهای KMeans لازم است.") from exc
return plt
def _build_clustering_log_context(
*,
observations: list[AnalysisGridObservation],