This commit is contained in:
2026-05-11 00:36:02 +03:30
parent 2a6321a263
commit 1740c20ddb
23 changed files with 1214 additions and 89 deletions
+1 -1
View File
@@ -3,7 +3,7 @@
- ورودی این اپ، مختصات گوشه‌های کل زمین و boundary هر بلوکِ تعریف‌شده توسط کشاورز است.
- هر بلوک جداگانه به grid های `30×30` متر تبدیل می‌شود و در `AnalysisGridCell` ذخیره می‌شود.
- برای همه grid های همان بلوک، داده ماهواره‌ای یک بازه زمانی از `openEO` گرفته می‌شود و میانگین همان بازه به عنوان وضعیت نهایی هر grid در `AnalysisGridObservation` ذخیره می‌شود.
- feature های اصلی فعلی: `ndvi`, `ndwi`, `lst_c`, `soil_vv`, `soil_vv_db`, `dem_m`, `slope_deg`.
- feature های اصلی فعلی: `ndvi`, `ndwi`, `soil_vv`, `soil_vv_db`, `dem_m`, `slope_deg`.
- بعد برای هر بلوک، روی feature های grid ها `KMeans` اجرا می‌شود؛ برای هر `K` مقدار `SSE / Inertia` ذخیره می‌شود و نمودار `K-SSE` هم ساخته می‌شود.
- نقطه elbow همان تعداد مناسب زیر‌بلوک‌ها است و نتیجه در `RemoteSensingSubdivisionResult` و خود `BlockSubdivision` ذخیره می‌شود.
- جریان قدیمی depth-based soil data و `soil_adapters.py` دیگر در workflow این اپ جایی ندارد.
-1
View File
@@ -96,7 +96,6 @@ class AnalysisGridObservationAdmin(admin.ModelAdmin):
"temporal_end",
"ndvi",
"ndwi",
"lst_c",
"created_at",
)
list_filter = ("temporal_start", "temporal_end", "created_at")
+320 -1
View File
@@ -1,10 +1,16 @@
from __future__ import annotations
from io import BytesIO
import math
import os
from pathlib import Path
from dataclasses import dataclass
import json
import logging
from typing import Any
from django.conf import settings
from django.core.files.base import ContentFile
from django.db import transaction
from .block_subdivision import detect_elbow_point, render_elbow_plot
@@ -21,12 +27,12 @@ from .models import (
DEFAULT_CLUSTER_FEATURES = [
"ndvi",
"ndwi",
"lst_c",
"soil_vv_db",
]
SUPPORTED_CLUSTER_FEATURES = tuple(DEFAULT_CLUSTER_FEATURES)
DEFAULT_RANDOM_STATE = 42
DEFAULT_MAX_K = 10
DEFAULT_REMOTE_SENSING_DIAGNOSTIC_DIR = "artifacts/remote_sensing_charts"
logger = logging.getLogger(__name__)
@@ -153,6 +159,20 @@ def create_remote_sensing_subdivision_result(
)
)
RemoteSensingClusterAssignment.objects.bulk_create(assignment_rows)
diagnostic_artifacts = _persist_remote_sensing_diagnostic_artifacts(
result=result,
observations=dataset.observations,
labels=labels,
cluster_summaries=cluster_summaries,
selected_features=dataset.selected_features,
scaled_matrix=dataset.scaled_matrix,
inertia_curve=inertia_curve,
)
if diagnostic_artifacts:
metadata = dict(result.metadata or {})
metadata["diagnostic_artifacts"] = diagnostic_artifacts
result.metadata = metadata
result.save(update_fields=["metadata", "updated_at"])
if block_subdivision is not None:
sync_block_subdivision_with_result(
block_subdivision=block_subdivision,
@@ -468,6 +488,7 @@ def sync_block_subdivision_with_result(
"end_date": result.temporal_end.isoformat() if result.temporal_end else None,
},
"inertia_curve": result.metadata.get("inertia_curve", []),
"diagnostic_artifacts": result.metadata.get("diagnostic_artifacts", {}),
}
block_subdivision.grid_points = [
@@ -550,6 +571,304 @@ def _count_non_null_features(observations: list[AnalysisGridObservation]) -> dic
return counts
def _persist_remote_sensing_diagnostic_artifacts(
*,
result: RemoteSensingSubdivisionResult,
observations: list[AnalysisGridObservation],
labels: list[int],
cluster_summaries: list[dict[str, Any]],
selected_features: list[str],
scaled_matrix: list[list[float]],
inertia_curve: list[dict[str, float]],
) -> dict[str, Any]:
try:
artifact_dir = _build_remote_sensing_diagnostic_dir(result=result)
artifact_dir.mkdir(parents=True, exist_ok=True)
specs = [
(
"elbow_plot",
render_elbow_plot(
inertia_curve=inertia_curve,
optimal_k=result.cluster_count,
block_code=result.block_code or "farm",
),
"elbow",
),
(
"cluster_map",
_render_cluster_map_plot(
observations=observations,
labels=labels,
block_code=result.block_code or "farm",
),
"cluster-map",
),
(
"cluster_sizes",
_render_cluster_size_plot(
cluster_summaries=cluster_summaries,
block_code=result.block_code or "farm",
),
"cluster-sizes",
),
(
"feature_pairs",
_render_feature_pair_plot(
selected_features=selected_features,
scaled_matrix=scaled_matrix,
labels=labels,
block_code=result.block_code or "farm",
),
"feature-pairs",
),
]
files: dict[str, str] = {}
for artifact_key, content, suffix in specs:
if content is None:
continue
target_path = artifact_dir / f"{_build_remote_sensing_artifact_stem(result=result)}__{suffix}.png"
_write_content_file(target_path=target_path, content=content)
files[artifact_key] = _to_project_relative_path(target_path)
return {
"directory": _to_project_relative_path(artifact_dir),
"files": files,
}
except (DataDrivenSubdivisionError, OSError) as exc:
logger.warning(
"Failed to persist remote sensing diagnostic artifacts for result_id=%s: %s",
result.id,
exc,
)
return {}
def _build_remote_sensing_diagnostic_dir(*, result: RemoteSensingSubdivisionResult) -> Path:
configured_dir = str(
os.environ.get("REMOTE_SENSING_DIAGNOSTIC_DIR", DEFAULT_REMOTE_SENSING_DIAGNOSTIC_DIR)
).strip()
base_dir = Path(getattr(settings, "BASE_DIR", Path.cwd()))
target_dir = Path(configured_dir)
if not target_dir.is_absolute():
target_dir = base_dir / target_dir
block_component = _sanitize_path_component(result.block_code or "farm")
return target_dir / f"location-{result.soil_location_id}" / f"run-{result.run_id}-{block_component}"
def _build_remote_sensing_artifact_stem(*, result: RemoteSensingSubdivisionResult) -> str:
return (
f"location-{result.soil_location_id}"
f"__run-{result.run_id}"
f"__{_sanitize_path_component(result.block_code or 'farm')}"
)
def _write_content_file(*, target_path: Path, content: ContentFile) -> None:
target_path.parent.mkdir(parents=True, exist_ok=True)
content.open("rb")
try:
target_path.write_bytes(content.read())
finally:
content.close()
def _to_project_relative_path(path: Path) -> str:
base_dir = Path(getattr(settings, "BASE_DIR", Path.cwd()))
try:
return str(path.relative_to(base_dir))
except ValueError:
return str(path)
def _sanitize_path_component(value: str) -> str:
text = str(value or "").strip() or "unknown"
sanitized = "".join(character if character.isalnum() or character in {"-", "_", "."} else "_" for character in text)
return sanitized or "unknown"
def _render_cluster_map_plot(
*,
observations: list[AnalysisGridObservation],
labels: list[int],
block_code: str,
) -> ContentFile | None:
if not observations:
return None
plt = _import_matplotlib_pyplot()
unique_labels = sorted(set(int(label) for label in labels))
colors = plt.cm.get_cmap("tab10", max(len(unique_labels), 1))
fig, ax = plt.subplots(figsize=(8, 6))
buffer = BytesIO()
try:
for color_index, cluster_label in enumerate(unique_labels):
cluster_points = [
(float(observation.cell.centroid_lon), float(observation.cell.centroid_lat))
for observation, label in zip(observations, labels)
if int(label) == cluster_label
]
if not cluster_points:
continue
xs = [point[0] for point in cluster_points]
ys = [point[1] for point in cluster_points]
ax.scatter(
xs,
ys,
s=70,
alpha=0.9,
color=colors(color_index),
edgecolors="white",
linewidths=0.8,
label=f"Cluster {cluster_label}",
)
ax.set_title(f"KMeans Spatial Cluster Map - {block_code}")
ax.set_xlabel("Longitude")
ax.set_ylabel("Latitude")
ax.grid(True, linestyle="--", linewidth=0.5, alpha=0.4)
if unique_labels:
ax.legend()
fig.tight_layout()
fig.savefig(buffer, format="png", dpi=150)
buffer.seek(0)
return ContentFile(buffer.getvalue())
finally:
buffer.close()
plt.close(fig)
def _render_cluster_size_plot(
*,
cluster_summaries: list[dict[str, Any]],
block_code: str,
) -> ContentFile | None:
if not cluster_summaries:
return None
plt = _import_matplotlib_pyplot()
labels = [f"C{int(cluster['cluster_label'])}" for cluster in cluster_summaries]
counts = [int(cluster["cell_count"]) for cluster in cluster_summaries]
fig, ax = plt.subplots(figsize=(8, 5))
buffer = BytesIO()
try:
bars = ax.bar(labels, counts, color="#2f6fed", alpha=0.85)
for bar, count in zip(bars, counts):
ax.text(
bar.get_x() + bar.get_width() / 2.0,
bar.get_height(),
str(count),
ha="center",
va="bottom",
fontsize=9,
)
ax.set_title(f"Cluster Sizes - {block_code}")
ax.set_xlabel("Cluster")
ax.set_ylabel("Cell Count")
ax.grid(True, axis="y", linestyle="--", linewidth=0.5, alpha=0.4)
fig.tight_layout()
fig.savefig(buffer, format="png", dpi=150)
buffer.seek(0)
return ContentFile(buffer.getvalue())
finally:
buffer.close()
plt.close(fig)
def _render_feature_pair_plot(
*,
selected_features: list[str],
scaled_matrix: list[list[float]],
labels: list[int],
block_code: str,
) -> ContentFile | None:
if not scaled_matrix or not selected_features:
return None
plt = _import_matplotlib_pyplot()
feature_count = len(selected_features)
pair_indexes = [(0, 0)] if feature_count == 1 else [
(left_index, right_index)
for left_index in range(feature_count)
for right_index in range(left_index + 1, feature_count)
]
subplot_count = len(pair_indexes)
columns = 2 if subplot_count > 1 else 1
rows = math.ceil(subplot_count / columns)
fig, axes = plt.subplots(rows, columns, figsize=(7 * columns, 5 * rows))
axes_list = axes.flatten().tolist() if hasattr(axes, "flatten") else [axes]
unique_labels = sorted(set(int(label) for label in labels))
colors = plt.cm.get_cmap("tab10", max(len(unique_labels), 1))
buffer = BytesIO()
try:
for axis, (left_index, right_index) in zip(axes_list, pair_indexes):
if feature_count == 1:
xs = list(range(1, len(scaled_matrix) + 1))
ys = [row[0] for row in scaled_matrix]
for color_index, cluster_label in enumerate(unique_labels):
filtered = [
(x_value, y_value)
for x_value, y_value, label in zip(xs, ys, labels)
if int(label) == cluster_label
]
axis.scatter(
[item[0] for item in filtered],
[item[1] for item in filtered],
s=55,
color=colors(color_index),
alpha=0.85,
label=f"Cluster {cluster_label}",
)
axis.set_xlabel("Observation Index")
axis.set_ylabel(f"{selected_features[0]} (scaled)")
axis.set_title(f"{selected_features[0]} distribution")
else:
x_values = [row[left_index] for row in scaled_matrix]
y_values = [row[right_index] for row in scaled_matrix]
for color_index, cluster_label in enumerate(unique_labels):
filtered = [
(x_value, y_value)
for x_value, y_value, label in zip(x_values, y_values, labels)
if int(label) == cluster_label
]
axis.scatter(
[item[0] for item in filtered],
[item[1] for item in filtered],
s=55,
color=colors(color_index),
alpha=0.85,
label=f"Cluster {cluster_label}",
)
axis.set_xlabel(f"{selected_features[left_index]} (scaled)")
axis.set_ylabel(f"{selected_features[right_index]} (scaled)")
axis.set_title(
f"{selected_features[left_index]} vs {selected_features[right_index]}"
)
axis.grid(True, linestyle="--", linewidth=0.5, alpha=0.4)
for axis in axes_list[subplot_count:]:
axis.remove()
if unique_labels and axes_list:
axes_list[0].legend()
fig.suptitle(f"KMeans Feature Diagnostics - {block_code}", fontsize=14)
fig.tight_layout(rect=(0, 0, 1, 0.97))
fig.savefig(buffer, format="png", dpi=150)
buffer.seek(0)
return ContentFile(buffer.getvalue())
finally:
buffer.close()
plt.close(fig)
def _import_matplotlib_pyplot():
try:
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
except ImportError as exc: # pragma: no cover - runtime dependency guard
raise DataDrivenSubdivisionError("matplotlib برای ذخیره نمودارهای KMeans لازم است.") from exc
return plt
def _build_clustering_log_context(
*,
observations: list[AnalysisGridObservation],
@@ -0,0 +1,14 @@
from django.db import migrations
class Migration(migrations.Migration):
dependencies = [
("location_data", "0015_merge_20260509_1418"),
]
operations = [
migrations.RemoveField(
model_name="analysisgridobservation",
name="lst_c",
),
]
-1
View File
@@ -363,7 +363,6 @@ class AnalysisGridObservation(models.Model):
temporal_end = models.DateField(db_index=True)
ndvi = models.FloatField(null=True, blank=True)
ndwi = models.FloatField(null=True, blank=True)
lst_c = models.FloatField(null=True, blank=True)
soil_vv = models.FloatField(null=True, blank=True)
soil_vv_db = models.FloatField(null=True, blank=True)
dem_m = models.FloatField(null=True, blank=True)
+115 -52
View File
@@ -29,23 +29,21 @@ DEFAULT_OPENEO_PROXY_URL = "socks5h://host.docker.internal:10808"
DEFAULT_OPENEO_TIMEOUT_SECONDS = 600.0
DEFAULT_OPENEO_HTTP_RETRY_TOTAL = 5
DEFAULT_OPENEO_HTTP_RETRY_BACKOFF_FACTOR = 2.0
DEFAULT_OPENEO_PAYLOAD_ARCHIVE_DIR = "logs/openeo_payloads"
SENTINEL2_COLLECTION = "SENTINEL2_L2A"
SENTINEL3_LST_COLLECTION = "SENTINEL3_SLSTR_L2_LST"
SENTINEL1_COLLECTION = "SENTINEL1_GRD"
VALID_SCL_CLASSES = (4, 5, 6)
METRIC_NAMES = (
"ndvi",
"ndwi",
"lst_c",
"soil_vv",
"soil_vv_db",
)
CLUSTER_METRIC_NAMES = (
"ndvi",
"ndwi",
"lst_c",
"soil_vv_db",
)
@@ -479,7 +477,6 @@ def compute_remote_sensing_metrics(
"backend_url": DEFAULT_OPENEO_BACKEND_URL,
"collections_used": [
SENTINEL2_COLLECTION,
SENTINEL3_LST_COLLECTION,
SENTINEL1_COLLECTION,
],
"job_refs": {},
@@ -490,7 +487,6 @@ def compute_remote_sensing_metrics(
metric_runners = [
("ndvi", compute_ndvi),
("ndwi", compute_ndwi),
("lst_c", compute_lst_c),
("soil_vv", compute_soil_vv),
]
for metric_name, runner in metric_runners:
@@ -640,40 +636,6 @@ def compute_ndwi(
}
def compute_lst_c(
*,
connection,
feature_collection,
spatial_extent,
temporal_start,
temporal_end,
expected_feature_ids: list[str] | None = None,
) -> dict[str, Any]:
cube = connection.load_collection(
SENTINEL3_LST_COLLECTION,
spatial_extent=spatial_extent,
temporal_extent=[_normalize_date(temporal_start), _normalize_date(temporal_end)],
)
band_name = infer_band_name(cube, preferred=("LST", "LST_in", "LST", "band_0"))
lst_k = cube.band(band_name) if band_name else cube
lst_c = lst_k - 273.15
aggregated, job_ref = _run_aggregate_spatial_job(
lst_c.mean_time().aggregate_spatial(geometries=feature_collection, reducer="mean"),
metric_name="lst_c",
)
payload_diagnostics = _log_raw_payload_summary(aggregated, metric_name="lst_c", job_ref=job_ref)
return {
"results": parse_aggregate_spatial_response(
aggregated,
"lst_c",
job_ref=job_ref,
expected_feature_ids=expected_feature_ids,
),
"job_ref": job_ref,
"payload_diagnostics": payload_diagnostics,
}
def compute_soil_vv(
*,
connection,
@@ -739,14 +701,15 @@ def _run_aggregate_spatial_job(process: Any, *, metric_name: str) -> tuple[Any,
"openEO batch job finished: %s",
_serialize_for_log({"metric_name": metric_name, "job_ref": _extract_job_ref(job)}),
)
return _load_job_result_payload(job), _extract_job_ref(job)
return _load_job_result_payload(job, metric_name=metric_name), _extract_job_ref(job)
logger.info("openEO process uses synchronous execution fallback for metric `%s`.", metric_name)
return process.execute(), None
def _load_job_result_payload(job: Any) -> Any:
def _load_job_result_payload(job: Any, *, metric_name: str) -> Any:
results = job.get_results()
job_ref = _extract_job_ref(job)
if hasattr(results, "download_files"):
with TemporaryDirectory(prefix="openeo-job-") as temp_dir:
@@ -754,26 +717,46 @@ def _load_job_result_payload(job: Any) -> Any:
downloaded_files = sorted(str(path.relative_to(temp_dir)) for path in Path(temp_dir).rglob("*") if path.is_file())
logger.info(
"openEO batch job files downloaded: %s",
_serialize_for_log({"job_ref": _extract_job_ref(job), "files": downloaded_files}),
_serialize_for_log({"job_ref": job_ref, "files": downloaded_files}),
)
payload = _load_first_json_payload(Path(temp_dir), job_ref=_extract_job_ref(job))
payload, payload_path = _load_first_json_payload_with_source(Path(temp_dir), job_ref=job_ref)
if payload is not None:
if payload_path is not None:
_persist_raw_payload_file(
source_path=payload_path,
metric_name=metric_name,
job_ref=job_ref,
)
return payload
if hasattr(results, "get_metadata"):
metadata = results.get_metadata()
if isinstance(metadata, dict) and metadata.get("data") is not None:
_persist_raw_payload_value(
payload=metadata["data"],
metric_name=metric_name,
job_ref=job_ref,
)
return metadata["data"]
raise OpenEOExecutionError(
f"openEO batch job `{_extract_job_ref(job) or 'unknown'}` completed but no JSON result payload could be loaded."
f"openEO batch job `{job_ref or 'unknown'}` completed but no JSON result payload could be loaded."
)
def _load_first_json_payload(directory: Path, *, job_ref: str | None = None) -> Any | None:
asset_payload = _load_stac_asset_payload(directory, job_ref=job_ref)
payload, _source_path = _load_first_json_payload_with_source(directory, job_ref=job_ref)
return payload
def _load_first_json_payload_with_source(
directory: Path,
*,
job_ref: str | None = None,
) -> tuple[Any | None, Path | None]:
asset_payload, asset_path = _load_stac_asset_payload(directory, job_ref=job_ref)
if asset_payload is not None:
return asset_payload
return asset_payload, asset_path
for candidate in sorted(directory.rglob("*.json")):
payload = _read_json_file(candidate, job_ref=job_ref)
@@ -781,11 +764,11 @@ def _load_first_json_payload(directory: Path, *, job_ref: str | None = None) ->
continue
if _looks_like_stac_metadata_payload(payload):
continue
return payload
return None
return payload, candidate
return None, None
def _load_stac_asset_payload(directory: Path, *, job_ref: str | None = None) -> Any | None:
def _load_stac_asset_payload(directory: Path, *, job_ref: str | None = None) -> tuple[Any | None, Path | None]:
for candidate in sorted(directory.rglob("*.json")):
payload = _read_json_file(candidate, job_ref=job_ref)
if not _looks_like_stac_metadata_payload(payload):
@@ -817,8 +800,88 @@ def _load_stac_asset_payload(directory: Path, *, job_ref: str | None = None) ->
}
),
)
return _read_json_file(asset_path, job_ref=job_ref)
return None
return _read_json_file(asset_path, job_ref=job_ref), asset_path
return None, None
def _persist_raw_payload_file(
*,
source_path: Path,
metric_name: str,
job_ref: str | None,
) -> None:
archive_path = _build_payload_archive_path(
metric_name=metric_name,
job_ref=job_ref,
source_name=source_path.name,
)
if archive_path is None:
return
raw_bytes = source_path.read_bytes()
archive_path.parent.mkdir(parents=True, exist_ok=True)
archive_path.write_bytes(raw_bytes)
logger.info(
"openEO raw payload archived: %s",
_serialize_for_log(
{
"job_ref": job_ref,
"metric_name": metric_name,
"source_path": str(source_path),
"archive_path": str(archive_path),
}
),
)
def _persist_raw_payload_value(
*,
payload: Any,
metric_name: str,
job_ref: str | None,
) -> None:
archive_path = _build_payload_archive_path(
metric_name=metric_name,
job_ref=job_ref,
source_name="metadata.json",
)
if archive_path is None:
return
archive_path.parent.mkdir(parents=True, exist_ok=True)
archive_path.write_text(
json.dumps(payload, ensure_ascii=True, indent=2, sort_keys=False, default=str),
encoding="utf-8",
)
logger.info(
"openEO raw payload archived from metadata: %s",
_serialize_for_log(
{
"job_ref": job_ref,
"metric_name": metric_name,
"archive_path": str(archive_path),
}
),
)
def _build_payload_archive_path(
*,
metric_name: str,
job_ref: str | None,
source_name: str,
) -> Path | None:
archive_dir = str(os.environ.get("OPENEO_PAYLOAD_ARCHIVE_DIR", DEFAULT_OPENEO_PAYLOAD_ARCHIVE_DIR)).strip()
if not archive_dir:
return None
safe_job_ref = _sanitize_filename_component(job_ref or "unknown-job")
safe_metric_name = _sanitize_filename_component(metric_name or "unknown-metric")
safe_source_name = _sanitize_filename_component(source_name or "payload.json")
return Path(archive_dir) / f"{safe_job_ref}__{safe_metric_name}__{safe_source_name}"
def _sanitize_filename_component(value: str) -> str:
text = str(value or "").strip() or "unknown"
sanitized = "".join(character if character.isalnum() or character in {"-", "_", "."} else "_" for character in text)
return sanitized or "unknown"
def _iter_stac_asset_paths(payload: Any, directory: Path) -> list[tuple[str, Path]]:
@@ -997,7 +1060,7 @@ def _parse_list_results(
value = _extract_aggregate_value(item)
else:
feature_id = str(_normalize_feature_id(index, expected_feature_ids=expected_feature_ids))
value = item
value = _extract_aggregate_value(item)
results[feature_id] = {metric_name: _coerce_float(value)}
return results
-1
View File
@@ -10,7 +10,6 @@ from .models import AnalysisGridObservation, RemoteSensingRun, SoilLocation
SATELLITE_METRIC_FIELDS = (
"ndvi",
"ndwi",
"lst_c",
"soil_vv_db",
"dem_m",
"slope_deg",
-2
View File
@@ -162,7 +162,6 @@ class RemoteSensingCellObservationSerializer(serializers.ModelSerializer):
"temporal_end",
"ndvi",
"ndwi",
"lst_c",
"soil_vv",
"soil_vv_db",
"metadata",
@@ -173,7 +172,6 @@ class RemoteSensingSummarySerializer(serializers.Serializer):
cell_count = serializers.IntegerField()
ndvi_mean = serializers.FloatField(allow_null=True)
ndwi_mean = serializers.FloatField(allow_null=True)
lst_c_mean = serializers.FloatField(allow_null=True)
soil_vv_db_mean = serializers.FloatField(allow_null=True)
-2
View File
@@ -48,7 +48,6 @@ REMOTE_SENSING_TASK_RETRY_BACKOFF_MAX_SECONDS = 600
PERSISTED_OBSERVATION_FEATURES = (
"ndvi",
"ndwi",
"lst_c",
"soil_vv",
"soil_vv_db",
)
@@ -690,7 +689,6 @@ def _upsert_grid_observations(
"run": run,
"ndvi": values.get("ndvi"),
"ndwi": values.get("ndwi"),
"lst_c": values.get("lst_c"),
"soil_vv": values.get("soil_vv"),
"soil_vv_db": values.get("soil_vv_db"),
"metadata": metadata_template,
+78 -1
View File
@@ -1,9 +1,14 @@
from datetime import date
import os
from tempfile import TemporaryDirectory
from unittest.mock import patch
from django.core.files.base import ContentFile
from django.test import TestCase
from location_data.data_driven_subdivision import (
EmptyObservationDatasetError,
_persist_remote_sensing_diagnostic_artifacts,
build_clustering_dataset,
sync_block_subdivision_with_result,
)
@@ -137,6 +142,78 @@ class DataDrivenSubdivisionSyncTests(TestCase):
self.subdivision.metadata["data_driven_subdivision"]["cluster_count"],
2,
)
self.assertIn("diagnostic_artifacts", self.subdivision.metadata["data_driven_subdivision"])
def test_persist_remote_sensing_diagnostic_artifacts_saves_expected_images(self):
cell = AnalysisGridCell.objects.create(
soil_location=self.location,
block_subdivision=self.subdivision,
block_code="block-1",
cell_code="cell-1",
chunk_size_sqm=900,
geometry=self.boundary,
centroid_lat="35.689200",
centroid_lon="51.389200",
)
observation = AnalysisGridObservation.objects.create(
cell=cell,
run=self.run,
temporal_start=date(2025, 1, 1),
temporal_end=date(2025, 1, 31),
ndvi=0.5,
ndwi=0.2,
soil_vv_db=-8.0,
)
result = RemoteSensingSubdivisionResult.objects.create(
soil_location=self.location,
run=self.run,
block_subdivision=self.subdivision,
block_code="block-1",
chunk_size_sqm=900,
temporal_start=date(2025, 1, 1),
temporal_end=date(2025, 1, 31),
cluster_count=1,
selected_features=["ndvi", "ndwi", "soil_vv_db"],
metadata={"inertia_curve": [{"k": 1, "sse": 0.0}]},
)
with TemporaryDirectory() as temp_dir:
with patch.dict(os.environ, {"REMOTE_SENSING_DIAGNOSTIC_DIR": temp_dir}, clear=False), patch(
"location_data.data_driven_subdivision.render_elbow_plot",
return_value=ContentFile(b"elbow"),
), patch(
"location_data.data_driven_subdivision._render_cluster_map_plot",
return_value=ContentFile(b"map"),
), patch(
"location_data.data_driven_subdivision._render_cluster_size_plot",
return_value=ContentFile(b"sizes"),
), patch(
"location_data.data_driven_subdivision._render_feature_pair_plot",
return_value=ContentFile(b"pairs"),
):
artifacts = _persist_remote_sensing_diagnostic_artifacts(
result=result,
observations=[observation],
labels=[0],
cluster_summaries=[
{
"cluster_label": 0,
"cell_count": 1,
"centroid_lat": 35.6892,
"centroid_lon": 51.3892,
"cell_codes": ["cell-1"],
}
],
selected_features=["ndvi", "ndwi", "soil_vv_db"],
scaled_matrix=[[0.0, 0.0, 0.0]],
inertia_curve=[{"k": 1, "sse": 0.0}],
)
self.assertEqual(
sorted(artifacts["files"].keys()),
["cluster_map", "cluster_sizes", "elbow_plot", "feature_pairs"],
)
for path in artifacts["files"].values():
self.assertTrue(os.path.exists(path))
def test_build_clustering_dataset_raises_clear_error_when_all_selected_features_are_null(self):
cell = AnalysisGridCell.objects.create(
@@ -164,7 +241,7 @@ class DataDrivenSubdivisionSyncTests(TestCase):
):
build_clustering_dataset(
observations=[observation],
selected_features=["ndvi", "ndwi", "lst_c", "soil_vv_db"],
selected_features=["ndvi", "ndwi", "soil_vv_db"],
run=self.run,
location=self.location,
)
+41 -8
View File
@@ -16,6 +16,7 @@ from location_data.openeo_service import (
OpenEOExecutionError,
_log_raw_payload_summary,
_load_first_json_payload,
_load_job_result_payload,
_resolve_openeo_proxy_url_from_env,
_run_aggregate_spatial_job,
log_openeo_request_summary,
@@ -58,10 +59,10 @@ class OpenEOServiceParsingTests(SimpleTestCase):
"cell-2": {"mean": 15.1},
}
result = parse_aggregate_spatial_response(payload, "lst_c")
result = parse_aggregate_spatial_response(payload, "ndwi")
self.assertEqual(result["cell-1"]["lst_c"], 12.4)
self.assertEqual(result["cell-2"]["lst_c"], 15.1)
self.assertEqual(result["cell-1"]["ndwi"], 12.4)
self.assertEqual(result["cell-2"]["ndwi"], 15.1)
def test_parse_mapping_results_maps_numeric_keys_to_expected_feature_ids(self):
payload = {
@@ -71,12 +72,12 @@ class OpenEOServiceParsingTests(SimpleTestCase):
result = parse_aggregate_spatial_response(
payload,
"lst_c",
"ndwi",
expected_feature_ids=["cell-1", "cell-2"],
)
self.assertEqual(result["cell-1"]["lst_c"], 12.4)
self.assertEqual(result["cell-2"]["lst_c"], 15.1)
self.assertEqual(result["cell-1"]["ndwi"], 12.4)
self.assertEqual(result["cell-2"]["ndwi"], 15.1)
def test_parse_list_results_maps_positional_payload_to_expected_feature_ids(self):
payload = [{"mean": 0.61}, {"mean": 0.47}]
@@ -90,6 +91,18 @@ class OpenEOServiceParsingTests(SimpleTestCase):
self.assertEqual(result["cell-1"]["ndvi"], 0.61)
self.assertEqual(result["cell-2"]["ndvi"], 0.47)
def test_parse_list_results_extracts_scalar_from_nested_list_payloads(self):
payload = [[0.61], [0.47]]
result = parse_aggregate_spatial_response(
payload,
"ndvi",
expected_feature_ids=["cell-1", "cell-2"],
)
self.assertEqual(result["cell-1"]["ndvi"], 0.61)
self.assertEqual(result["cell-2"]["ndvi"], 0.47)
def test_log_raw_payload_summary_warns_for_empty_payload(self):
with self.assertLogs("location_data.openeo_service", level="WARNING") as captured:
summary = _log_raw_payload_summary({}, metric_name="ndvi", job_ref="job-1")
@@ -101,9 +114,9 @@ class OpenEOServiceParsingTests(SimpleTestCase):
payload = {"cell-1": {"foo": 12.4}}
with self.assertLogs("location_data.openeo_service", level="WARNING") as captured:
result = parse_aggregate_spatial_response(payload, "lst_c", job_ref="job-2")
result = parse_aggregate_spatial_response(payload, "ndwi", job_ref="job-2")
self.assertEqual(result["cell-1"]["lst_c"], 12.4)
self.assertEqual(result["cell-1"]["ndwi"], 12.4)
self.assertIn("Feature mismatch for cell=cell-1, available_keys=['foo']", "\n".join(captured.output))
def test_linear_to_db(self):
@@ -347,6 +360,26 @@ class OpenEOConnectionTests(SimpleTestCase):
self.assertEqual(job_ref, "job-123")
process.execute.assert_not_called()
def test_load_job_result_payload_archives_exact_raw_json_file(self):
job = Mock(job_id="job-123")
results = Mock()
job.get_results.return_value = results
raw_json = '{\n "cell-1": {"mean": 0.5}\n}\n'
def write_json(target_dir):
Path(target_dir, "timeseries.json").write_text(raw_json, encoding="utf-8")
results.download_files.side_effect = write_json
with TemporaryDirectory() as archive_dir:
with patch.dict(os.environ, {"OPENEO_PAYLOAD_ARCHIVE_DIR": archive_dir}, clear=False):
payload = _load_job_result_payload(job, metric_name="ndvi")
archive_path = Path(archive_dir) / "job-123__ndvi__timeseries.json"
self.assertTrue(archive_path.exists())
self.assertEqual(archive_path.read_text(encoding="utf-8"), raw_json)
self.assertEqual(payload, {"cell-1": {"mean": 0.5}})
def test_load_first_json_payload_prefers_stac_asset_data_over_metadata(self):
with TemporaryDirectory() as temp_dir:
Path(temp_dir, "item.json").write_text(
-2
View File
@@ -151,7 +151,6 @@ class RemoteSensingApiTests(TestCase):
temporal_end=self.temporal_end,
ndvi=0.61,
ndwi=0.22,
lst_c=24.5,
soil_vv=0.13,
soil_vv_db=-8.860566,
dem_m=1550.0,
@@ -433,7 +432,6 @@ class RemoteSensingApiTests(TestCase):
temporal_end=self.temporal_end,
ndvi=0.61,
ndwi=0.22,
lst_c=24.5,
soil_vv=0.13,
soil_vv_db=-8.860566,
dem_m=1550.0,
+1 -3
View File
@@ -88,7 +88,6 @@ class RemoteSensingTaskDiagnosticsTests(TestCase):
observation = AnalysisGridObservation.objects.get(cell=self.cell)
self.assertIsNone(observation.ndvi)
self.assertIsNone(observation.ndwi)
self.assertIsNone(observation.lst_c)
self.assertIsNone(observation.soil_vv)
self.assertIsNone(observation.soil_vv_db)
@@ -111,7 +110,7 @@ class RemoteSensingTaskDiagnosticsTests(TestCase):
subdivision_result = Mock(
id=99,
cluster_count=1,
selected_features=["ndvi", "ndwi", "lst_c", "soil_vv_db"],
selected_features=["ndvi", "ndwi", "soil_vv_db"],
metadata={"used_cell_count": 1, "skipped_cell_count": 0, "kmeans_params": {}},
skipped_cell_codes=[],
)
@@ -120,7 +119,6 @@ class RemoteSensingTaskDiagnosticsTests(TestCase):
"cell-1": {
"ndvi": 0.52,
"ndwi": 0.21,
"lst_c": None,
"soil_vv": 10.0,
"soil_vv_db": 10.0,
}
-3
View File
@@ -1085,14 +1085,12 @@ def _build_remote_sensing_summary(observations):
cell_count=Avg("cell_id"),
ndvi_mean=Avg("ndvi"),
ndwi_mean=Avg("ndwi"),
lst_c_mean=Avg("lst_c"),
soil_vv_db_mean=Avg("soil_vv_db"),
)
summary = {
"cell_count": observations.count(),
"ndvi_mean": _round_or_none(aggregates.get("ndvi_mean")),
"ndwi_mean": _round_or_none(aggregates.get("ndwi_mean")),
"lst_c_mean": _round_or_none(aggregates.get("lst_c_mean")),
"soil_vv_db_mean": _round_or_none(aggregates.get("soil_vv_db_mean")),
}
return summary
@@ -1103,7 +1101,6 @@ def _empty_remote_sensing_summary():
"cell_count": 0,
"ndvi_mean": None,
"ndwi_mean": None,
"lst_c_mean": None,
"soil_vv_db_mean": None,
}