UPDATE
This commit is contained in:
@@ -1,10 +1,16 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from io import BytesIO
|
||||
import math
|
||||
import os
|
||||
from pathlib import Path
|
||||
from dataclasses import dataclass
|
||||
import json
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
from django.conf import settings
|
||||
from django.core.files.base import ContentFile
|
||||
from django.db import transaction
|
||||
|
||||
from .block_subdivision import detect_elbow_point, render_elbow_plot
|
||||
@@ -21,12 +27,12 @@ from .models import (
|
||||
DEFAULT_CLUSTER_FEATURES = [
|
||||
"ndvi",
|
||||
"ndwi",
|
||||
"lst_c",
|
||||
"soil_vv_db",
|
||||
]
|
||||
SUPPORTED_CLUSTER_FEATURES = tuple(DEFAULT_CLUSTER_FEATURES)
|
||||
DEFAULT_RANDOM_STATE = 42
|
||||
DEFAULT_MAX_K = 10
|
||||
DEFAULT_REMOTE_SENSING_DIAGNOSTIC_DIR = "artifacts/remote_sensing_charts"
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -153,6 +159,20 @@ def create_remote_sensing_subdivision_result(
|
||||
)
|
||||
)
|
||||
RemoteSensingClusterAssignment.objects.bulk_create(assignment_rows)
|
||||
diagnostic_artifacts = _persist_remote_sensing_diagnostic_artifacts(
|
||||
result=result,
|
||||
observations=dataset.observations,
|
||||
labels=labels,
|
||||
cluster_summaries=cluster_summaries,
|
||||
selected_features=dataset.selected_features,
|
||||
scaled_matrix=dataset.scaled_matrix,
|
||||
inertia_curve=inertia_curve,
|
||||
)
|
||||
if diagnostic_artifacts:
|
||||
metadata = dict(result.metadata or {})
|
||||
metadata["diagnostic_artifacts"] = diagnostic_artifacts
|
||||
result.metadata = metadata
|
||||
result.save(update_fields=["metadata", "updated_at"])
|
||||
if block_subdivision is not None:
|
||||
sync_block_subdivision_with_result(
|
||||
block_subdivision=block_subdivision,
|
||||
@@ -468,6 +488,7 @@ def sync_block_subdivision_with_result(
|
||||
"end_date": result.temporal_end.isoformat() if result.temporal_end else None,
|
||||
},
|
||||
"inertia_curve": result.metadata.get("inertia_curve", []),
|
||||
"diagnostic_artifacts": result.metadata.get("diagnostic_artifacts", {}),
|
||||
}
|
||||
|
||||
block_subdivision.grid_points = [
|
||||
@@ -550,6 +571,304 @@ def _count_non_null_features(observations: list[AnalysisGridObservation]) -> dic
|
||||
return counts
|
||||
|
||||
|
||||
def _persist_remote_sensing_diagnostic_artifacts(
|
||||
*,
|
||||
result: RemoteSensingSubdivisionResult,
|
||||
observations: list[AnalysisGridObservation],
|
||||
labels: list[int],
|
||||
cluster_summaries: list[dict[str, Any]],
|
||||
selected_features: list[str],
|
||||
scaled_matrix: list[list[float]],
|
||||
inertia_curve: list[dict[str, float]],
|
||||
) -> dict[str, Any]:
|
||||
try:
|
||||
artifact_dir = _build_remote_sensing_diagnostic_dir(result=result)
|
||||
artifact_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
specs = [
|
||||
(
|
||||
"elbow_plot",
|
||||
render_elbow_plot(
|
||||
inertia_curve=inertia_curve,
|
||||
optimal_k=result.cluster_count,
|
||||
block_code=result.block_code or "farm",
|
||||
),
|
||||
"elbow",
|
||||
),
|
||||
(
|
||||
"cluster_map",
|
||||
_render_cluster_map_plot(
|
||||
observations=observations,
|
||||
labels=labels,
|
||||
block_code=result.block_code or "farm",
|
||||
),
|
||||
"cluster-map",
|
||||
),
|
||||
(
|
||||
"cluster_sizes",
|
||||
_render_cluster_size_plot(
|
||||
cluster_summaries=cluster_summaries,
|
||||
block_code=result.block_code or "farm",
|
||||
),
|
||||
"cluster-sizes",
|
||||
),
|
||||
(
|
||||
"feature_pairs",
|
||||
_render_feature_pair_plot(
|
||||
selected_features=selected_features,
|
||||
scaled_matrix=scaled_matrix,
|
||||
labels=labels,
|
||||
block_code=result.block_code or "farm",
|
||||
),
|
||||
"feature-pairs",
|
||||
),
|
||||
]
|
||||
|
||||
files: dict[str, str] = {}
|
||||
for artifact_key, content, suffix in specs:
|
||||
if content is None:
|
||||
continue
|
||||
target_path = artifact_dir / f"{_build_remote_sensing_artifact_stem(result=result)}__{suffix}.png"
|
||||
_write_content_file(target_path=target_path, content=content)
|
||||
files[artifact_key] = _to_project_relative_path(target_path)
|
||||
|
||||
return {
|
||||
"directory": _to_project_relative_path(artifact_dir),
|
||||
"files": files,
|
||||
}
|
||||
except (DataDrivenSubdivisionError, OSError) as exc:
|
||||
logger.warning(
|
||||
"Failed to persist remote sensing diagnostic artifacts for result_id=%s: %s",
|
||||
result.id,
|
||||
exc,
|
||||
)
|
||||
return {}
|
||||
|
||||
|
||||
def _build_remote_sensing_diagnostic_dir(*, result: RemoteSensingSubdivisionResult) -> Path:
|
||||
configured_dir = str(
|
||||
os.environ.get("REMOTE_SENSING_DIAGNOSTIC_DIR", DEFAULT_REMOTE_SENSING_DIAGNOSTIC_DIR)
|
||||
).strip()
|
||||
base_dir = Path(getattr(settings, "BASE_DIR", Path.cwd()))
|
||||
target_dir = Path(configured_dir)
|
||||
if not target_dir.is_absolute():
|
||||
target_dir = base_dir / target_dir
|
||||
block_component = _sanitize_path_component(result.block_code or "farm")
|
||||
return target_dir / f"location-{result.soil_location_id}" / f"run-{result.run_id}-{block_component}"
|
||||
|
||||
|
||||
def _build_remote_sensing_artifact_stem(*, result: RemoteSensingSubdivisionResult) -> str:
|
||||
return (
|
||||
f"location-{result.soil_location_id}"
|
||||
f"__run-{result.run_id}"
|
||||
f"__{_sanitize_path_component(result.block_code or 'farm')}"
|
||||
)
|
||||
|
||||
|
||||
def _write_content_file(*, target_path: Path, content: ContentFile) -> None:
|
||||
target_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
content.open("rb")
|
||||
try:
|
||||
target_path.write_bytes(content.read())
|
||||
finally:
|
||||
content.close()
|
||||
|
||||
|
||||
def _to_project_relative_path(path: Path) -> str:
|
||||
base_dir = Path(getattr(settings, "BASE_DIR", Path.cwd()))
|
||||
try:
|
||||
return str(path.relative_to(base_dir))
|
||||
except ValueError:
|
||||
return str(path)
|
||||
|
||||
|
||||
def _sanitize_path_component(value: str) -> str:
|
||||
text = str(value or "").strip() or "unknown"
|
||||
sanitized = "".join(character if character.isalnum() or character in {"-", "_", "."} else "_" for character in text)
|
||||
return sanitized or "unknown"
|
||||
|
||||
|
||||
def _render_cluster_map_plot(
|
||||
*,
|
||||
observations: list[AnalysisGridObservation],
|
||||
labels: list[int],
|
||||
block_code: str,
|
||||
) -> ContentFile | None:
|
||||
if not observations:
|
||||
return None
|
||||
plt = _import_matplotlib_pyplot()
|
||||
unique_labels = sorted(set(int(label) for label in labels))
|
||||
colors = plt.cm.get_cmap("tab10", max(len(unique_labels), 1))
|
||||
fig, ax = plt.subplots(figsize=(8, 6))
|
||||
buffer = BytesIO()
|
||||
try:
|
||||
for color_index, cluster_label in enumerate(unique_labels):
|
||||
cluster_points = [
|
||||
(float(observation.cell.centroid_lon), float(observation.cell.centroid_lat))
|
||||
for observation, label in zip(observations, labels)
|
||||
if int(label) == cluster_label
|
||||
]
|
||||
if not cluster_points:
|
||||
continue
|
||||
xs = [point[0] for point in cluster_points]
|
||||
ys = [point[1] for point in cluster_points]
|
||||
ax.scatter(
|
||||
xs,
|
||||
ys,
|
||||
s=70,
|
||||
alpha=0.9,
|
||||
color=colors(color_index),
|
||||
edgecolors="white",
|
||||
linewidths=0.8,
|
||||
label=f"Cluster {cluster_label}",
|
||||
)
|
||||
ax.set_title(f"KMeans Spatial Cluster Map - {block_code}")
|
||||
ax.set_xlabel("Longitude")
|
||||
ax.set_ylabel("Latitude")
|
||||
ax.grid(True, linestyle="--", linewidth=0.5, alpha=0.4)
|
||||
if unique_labels:
|
||||
ax.legend()
|
||||
fig.tight_layout()
|
||||
fig.savefig(buffer, format="png", dpi=150)
|
||||
buffer.seek(0)
|
||||
return ContentFile(buffer.getvalue())
|
||||
finally:
|
||||
buffer.close()
|
||||
plt.close(fig)
|
||||
|
||||
|
||||
def _render_cluster_size_plot(
|
||||
*,
|
||||
cluster_summaries: list[dict[str, Any]],
|
||||
block_code: str,
|
||||
) -> ContentFile | None:
|
||||
if not cluster_summaries:
|
||||
return None
|
||||
plt = _import_matplotlib_pyplot()
|
||||
labels = [f"C{int(cluster['cluster_label'])}" for cluster in cluster_summaries]
|
||||
counts = [int(cluster["cell_count"]) for cluster in cluster_summaries]
|
||||
fig, ax = plt.subplots(figsize=(8, 5))
|
||||
buffer = BytesIO()
|
||||
try:
|
||||
bars = ax.bar(labels, counts, color="#2f6fed", alpha=0.85)
|
||||
for bar, count in zip(bars, counts):
|
||||
ax.text(
|
||||
bar.get_x() + bar.get_width() / 2.0,
|
||||
bar.get_height(),
|
||||
str(count),
|
||||
ha="center",
|
||||
va="bottom",
|
||||
fontsize=9,
|
||||
)
|
||||
ax.set_title(f"Cluster Sizes - {block_code}")
|
||||
ax.set_xlabel("Cluster")
|
||||
ax.set_ylabel("Cell Count")
|
||||
ax.grid(True, axis="y", linestyle="--", linewidth=0.5, alpha=0.4)
|
||||
fig.tight_layout()
|
||||
fig.savefig(buffer, format="png", dpi=150)
|
||||
buffer.seek(0)
|
||||
return ContentFile(buffer.getvalue())
|
||||
finally:
|
||||
buffer.close()
|
||||
plt.close(fig)
|
||||
|
||||
|
||||
def _render_feature_pair_plot(
|
||||
*,
|
||||
selected_features: list[str],
|
||||
scaled_matrix: list[list[float]],
|
||||
labels: list[int],
|
||||
block_code: str,
|
||||
) -> ContentFile | None:
|
||||
if not scaled_matrix or not selected_features:
|
||||
return None
|
||||
plt = _import_matplotlib_pyplot()
|
||||
feature_count = len(selected_features)
|
||||
pair_indexes = [(0, 0)] if feature_count == 1 else [
|
||||
(left_index, right_index)
|
||||
for left_index in range(feature_count)
|
||||
for right_index in range(left_index + 1, feature_count)
|
||||
]
|
||||
subplot_count = len(pair_indexes)
|
||||
columns = 2 if subplot_count > 1 else 1
|
||||
rows = math.ceil(subplot_count / columns)
|
||||
fig, axes = plt.subplots(rows, columns, figsize=(7 * columns, 5 * rows))
|
||||
axes_list = axes.flatten().tolist() if hasattr(axes, "flatten") else [axes]
|
||||
unique_labels = sorted(set(int(label) for label in labels))
|
||||
colors = plt.cm.get_cmap("tab10", max(len(unique_labels), 1))
|
||||
buffer = BytesIO()
|
||||
try:
|
||||
for axis, (left_index, right_index) in zip(axes_list, pair_indexes):
|
||||
if feature_count == 1:
|
||||
xs = list(range(1, len(scaled_matrix) + 1))
|
||||
ys = [row[0] for row in scaled_matrix]
|
||||
for color_index, cluster_label in enumerate(unique_labels):
|
||||
filtered = [
|
||||
(x_value, y_value)
|
||||
for x_value, y_value, label in zip(xs, ys, labels)
|
||||
if int(label) == cluster_label
|
||||
]
|
||||
axis.scatter(
|
||||
[item[0] for item in filtered],
|
||||
[item[1] for item in filtered],
|
||||
s=55,
|
||||
color=colors(color_index),
|
||||
alpha=0.85,
|
||||
label=f"Cluster {cluster_label}",
|
||||
)
|
||||
axis.set_xlabel("Observation Index")
|
||||
axis.set_ylabel(f"{selected_features[0]} (scaled)")
|
||||
axis.set_title(f"{selected_features[0]} distribution")
|
||||
else:
|
||||
x_values = [row[left_index] for row in scaled_matrix]
|
||||
y_values = [row[right_index] for row in scaled_matrix]
|
||||
for color_index, cluster_label in enumerate(unique_labels):
|
||||
filtered = [
|
||||
(x_value, y_value)
|
||||
for x_value, y_value, label in zip(x_values, y_values, labels)
|
||||
if int(label) == cluster_label
|
||||
]
|
||||
axis.scatter(
|
||||
[item[0] for item in filtered],
|
||||
[item[1] for item in filtered],
|
||||
s=55,
|
||||
color=colors(color_index),
|
||||
alpha=0.85,
|
||||
label=f"Cluster {cluster_label}",
|
||||
)
|
||||
axis.set_xlabel(f"{selected_features[left_index]} (scaled)")
|
||||
axis.set_ylabel(f"{selected_features[right_index]} (scaled)")
|
||||
axis.set_title(
|
||||
f"{selected_features[left_index]} vs {selected_features[right_index]}"
|
||||
)
|
||||
axis.grid(True, linestyle="--", linewidth=0.5, alpha=0.4)
|
||||
|
||||
for axis in axes_list[subplot_count:]:
|
||||
axis.remove()
|
||||
|
||||
if unique_labels and axes_list:
|
||||
axes_list[0].legend()
|
||||
fig.suptitle(f"KMeans Feature Diagnostics - {block_code}", fontsize=14)
|
||||
fig.tight_layout(rect=(0, 0, 1, 0.97))
|
||||
fig.savefig(buffer, format="png", dpi=150)
|
||||
buffer.seek(0)
|
||||
return ContentFile(buffer.getvalue())
|
||||
finally:
|
||||
buffer.close()
|
||||
plt.close(fig)
|
||||
|
||||
|
||||
def _import_matplotlib_pyplot():
|
||||
try:
|
||||
import matplotlib
|
||||
|
||||
matplotlib.use("Agg")
|
||||
import matplotlib.pyplot as plt
|
||||
except ImportError as exc: # pragma: no cover - runtime dependency guard
|
||||
raise DataDrivenSubdivisionError("matplotlib برای ذخیره نمودارهای KMeans لازم است.") from exc
|
||||
return plt
|
||||
|
||||
|
||||
def _build_clustering_log_context(
|
||||
*,
|
||||
observations: list[AnalysisGridObservation],
|
||||
|
||||
Reference in New Issue
Block a user