UPDATE
This commit is contained in:
+666
-75
@@ -1,27 +1,38 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import math
|
||||
import os
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from datetime import date
|
||||
from decimal import Decimal
|
||||
from pathlib import Path
|
||||
from tempfile import TemporaryDirectory
|
||||
from typing import Any
|
||||
|
||||
import requests
|
||||
from requests.adapters import HTTPAdapter
|
||||
from urllib3.util.retry import Retry
|
||||
|
||||
from config.proxy import apply_requests_proxy, build_proxy_url_from_proxychains_env
|
||||
|
||||
from .models import AnalysisGridCell
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
DEFAULT_OPENEO_BACKEND_URL = "https://openeofed.dataspace.copernicus.eu"
|
||||
DEFAULT_OPENEO_PROVIDER = "openeo"
|
||||
DEFAULT_OPENEO_PROXY_URL = "socks5h://host.docker.internal:10808"
|
||||
DEFAULT_OPENEO_TIMEOUT_SECONDS = 600.0
|
||||
DEFAULT_OPENEO_HTTP_RETRY_TOTAL = 5
|
||||
DEFAULT_OPENEO_HTTP_RETRY_BACKOFF_FACTOR = 2.0
|
||||
|
||||
SENTINEL2_COLLECTION = "SENTINEL2_L2A"
|
||||
SENTINEL3_LST_COLLECTION = "SENTINEL3_SLSTR_L2_LST"
|
||||
SENTINEL1_COLLECTION = "SENTINEL1_GRD"
|
||||
COPERNICUS_DEM_COLLECTION = "COPERNICUS_30"
|
||||
|
||||
VALID_SCL_CLASSES = (4, 5, 6)
|
||||
METRIC_NAMES = (
|
||||
@@ -30,8 +41,12 @@ METRIC_NAMES = (
|
||||
"lst_c",
|
||||
"soil_vv",
|
||||
"soil_vv_db",
|
||||
"dem_m",
|
||||
"slope_deg",
|
||||
)
|
||||
CLUSTER_METRIC_NAMES = (
|
||||
"ndvi",
|
||||
"ndwi",
|
||||
"lst_c",
|
||||
"soil_vv_db",
|
||||
)
|
||||
|
||||
|
||||
@@ -53,19 +68,67 @@ class TimeoutOverrideSession(requests.Session):
|
||||
def __init__(self, timeout_seconds: float):
|
||||
super().__init__()
|
||||
self.timeout_seconds = timeout_seconds
|
||||
self.last_response_preview = ""
|
||||
self.last_response_content_type = ""
|
||||
self.last_response_url = ""
|
||||
|
||||
def request(self, method, url, **kwargs):
|
||||
timeout = kwargs.get("timeout")
|
||||
if timeout is None or timeout < self.timeout_seconds:
|
||||
kwargs["timeout"] = self.timeout_seconds
|
||||
return super().request(method, url, **kwargs)
|
||||
|
||||
request_log = {
|
||||
"method": str(method).upper(),
|
||||
"url": url,
|
||||
"timeout": kwargs.get("timeout"),
|
||||
"params": kwargs.get("params"),
|
||||
"json": kwargs.get("json"),
|
||||
"data": kwargs.get("data"),
|
||||
"headers": _sanitize_headers(kwargs.get("headers")),
|
||||
"proxy_url": _sanitize_proxy_url(self.proxies.get("https") or self.proxies.get("http")),
|
||||
}
|
||||
logger.info("openEO request payload: %s", _serialize_for_log(request_log))
|
||||
|
||||
started_at = time.monotonic()
|
||||
try:
|
||||
response = super().request(method, url, **kwargs)
|
||||
except Exception as exc:
|
||||
logger.exception(
|
||||
"openEO request failed after %.3fs: %s",
|
||||
time.monotonic() - started_at,
|
||||
_serialize_for_log(
|
||||
{
|
||||
"method": str(method).upper(),
|
||||
"url": url,
|
||||
"error": repr(exc),
|
||||
}
|
||||
),
|
||||
)
|
||||
raise
|
||||
|
||||
logger.info(
|
||||
"openEO response received after %.3fs: %s",
|
||||
time.monotonic() - started_at,
|
||||
_serialize_for_log(
|
||||
{
|
||||
"method": str(method).upper(),
|
||||
"url": url,
|
||||
"status_code": response.status_code,
|
||||
"headers": _sanitize_headers(dict(response.headers)),
|
||||
}
|
||||
),
|
||||
)
|
||||
self.last_response_url = str(response.url)
|
||||
self.last_response_content_type = str(response.headers.get("Content-Type", ""))
|
||||
self.last_response_preview = response.text[:1000] if response.text else ""
|
||||
return response
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class OpenEOConnectionSettings:
|
||||
backend_url: str = DEFAULT_OPENEO_BACKEND_URL
|
||||
auth_method: str = "client_credentials"
|
||||
timeout_seconds: float = 60.0
|
||||
timeout_seconds: float = DEFAULT_OPENEO_TIMEOUT_SECONDS
|
||||
client_id: str = ""
|
||||
client_secret: str = ""
|
||||
provider_id: str = ""
|
||||
@@ -73,13 +136,18 @@ class OpenEOConnectionSettings:
|
||||
password: str = ""
|
||||
allow_interactive_oidc: bool = False
|
||||
proxy_url: str = ""
|
||||
http_retry_total: int = DEFAULT_OPENEO_HTTP_RETRY_TOTAL
|
||||
http_retry_backoff_factor: float = DEFAULT_OPENEO_HTTP_RETRY_BACKOFF_FACTOR
|
||||
|
||||
@classmethod
|
||||
def from_env(cls) -> "OpenEOConnectionSettings":
|
||||
return cls(
|
||||
backend_url=os.environ.get("OPENEO_BACKEND_URL", DEFAULT_OPENEO_BACKEND_URL).strip(),
|
||||
auth_method=os.environ.get("OPENEO_AUTH_METHOD", "client_credentials").strip().lower(),
|
||||
timeout_seconds=float(os.environ.get("OPENEO_TIMEOUT_SECONDS", "60").strip() or "60"),
|
||||
timeout_seconds=float(
|
||||
os.environ.get("OPENEO_TIMEOUT_SECONDS", str(int(DEFAULT_OPENEO_TIMEOUT_SECONDS))).strip()
|
||||
or str(int(DEFAULT_OPENEO_TIMEOUT_SECONDS))
|
||||
),
|
||||
client_id=os.environ.get("OPENEO_AUTH_CLIENT_ID", "").strip(),
|
||||
client_secret=os.environ.get("OPENEO_AUTH_CLIENT_SECRET", "").strip(),
|
||||
provider_id=os.environ.get("OPENEO_AUTH_PROVIDER_ID", "").strip(),
|
||||
@@ -88,6 +156,17 @@ class OpenEOConnectionSettings:
|
||||
allow_interactive_oidc=os.environ.get("OPENEO_ALLOW_INTERACTIVE_OIDC", "0").strip().lower()
|
||||
in {"1", "true", "yes", "on"},
|
||||
proxy_url=_resolve_openeo_proxy_url_from_env(),
|
||||
http_retry_total=int(
|
||||
os.environ.get("OPENEO_HTTP_RETRY_TOTAL", str(DEFAULT_OPENEO_HTTP_RETRY_TOTAL)).strip()
|
||||
or str(DEFAULT_OPENEO_HTTP_RETRY_TOTAL)
|
||||
),
|
||||
http_retry_backoff_factor=float(
|
||||
os.environ.get(
|
||||
"OPENEO_HTTP_RETRY_BACKOFF_FACTOR",
|
||||
str(DEFAULT_OPENEO_HTTP_RETRY_BACKOFF_FACTOR),
|
||||
).strip()
|
||||
or str(DEFAULT_OPENEO_HTTP_RETRY_BACKOFF_FACTOR)
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@@ -104,6 +183,46 @@ def _resolve_openeo_proxy_url_from_env() -> str:
|
||||
return configured_proxy_url
|
||||
|
||||
|
||||
def _sanitize_headers(headers: dict[str, Any] | None) -> dict[str, Any] | None:
|
||||
if not headers:
|
||||
return headers
|
||||
return {key: _mask_sensitive_value(key, value) for key, value in headers.items()}
|
||||
|
||||
|
||||
def _sanitize_proxy_url(proxy_url: str | None) -> str | None:
|
||||
if not proxy_url:
|
||||
return proxy_url
|
||||
return proxy_url
|
||||
|
||||
|
||||
def _serialize_for_log(payload: Any) -> str:
|
||||
return json.dumps(_mask_sensitive_payload(payload), ensure_ascii=True, default=str, sort_keys=True)
|
||||
|
||||
|
||||
def _mask_sensitive_payload(value: Any, parent_key: str = "") -> Any:
|
||||
if isinstance(value, dict):
|
||||
return {str(key): _mask_sensitive_payload(item, str(key)) for key, item in value.items()}
|
||||
if isinstance(value, list):
|
||||
return [_mask_sensitive_payload(item, parent_key) for item in value]
|
||||
if isinstance(value, tuple):
|
||||
return [_mask_sensitive_payload(item, parent_key) for item in value]
|
||||
return _mask_sensitive_value(parent_key, value)
|
||||
|
||||
|
||||
def _mask_sensitive_value(key: str, value: Any) -> Any:
|
||||
normalized_key = (key or "").lower()
|
||||
if normalized_key in {
|
||||
"authorization",
|
||||
"access_token",
|
||||
"refresh_token",
|
||||
"id_token",
|
||||
"client_secret",
|
||||
"password",
|
||||
}:
|
||||
return "***redacted***"
|
||||
return value
|
||||
|
||||
|
||||
def is_openeo_auth_configured(settings: OpenEOConnectionSettings | None = None) -> bool:
|
||||
settings = settings or OpenEOConnectionSettings.from_env()
|
||||
|
||||
@@ -118,9 +237,26 @@ def is_openeo_auth_configured(settings: OpenEOConnectionSettings | None = None)
|
||||
|
||||
def build_openeo_requests_session(settings: OpenEOConnectionSettings) -> requests.Session:
|
||||
session = TimeoutOverrideSession(settings.timeout_seconds)
|
||||
session.headers.setdefault("Accept", "application/json")
|
||||
adapter = HTTPAdapter(max_retries=_build_openeo_http_retry(settings))
|
||||
session.mount("http://", adapter)
|
||||
session.mount("https://", adapter)
|
||||
return apply_requests_proxy(session, settings.proxy_url)
|
||||
|
||||
|
||||
def _build_openeo_http_retry(settings: OpenEOConnectionSettings) -> Retry:
|
||||
return Retry(
|
||||
total=settings.http_retry_total,
|
||||
connect=settings.http_retry_total,
|
||||
read=settings.http_retry_total,
|
||||
status=settings.http_retry_total,
|
||||
backoff_factor=settings.http_retry_backoff_factor,
|
||||
allowed_methods=None,
|
||||
status_forcelist=(429, 500, 502, 503, 504),
|
||||
raise_on_status=False,
|
||||
)
|
||||
|
||||
|
||||
def connect_openeo(settings: OpenEOConnectionSettings | None = None):
|
||||
"""
|
||||
Build an authenticated openEO connection using environment-driven configuration.
|
||||
@@ -140,11 +276,21 @@ def connect_openeo(settings: OpenEOConnectionSettings | None = None):
|
||||
raise OpenEOServiceError("The `openeo` Python client is required for remote sensing jobs.") from exc
|
||||
|
||||
session = build_openeo_requests_session(settings)
|
||||
connection = openeo.connect(
|
||||
settings.backend_url,
|
||||
session=session,
|
||||
default_timeout=settings.timeout_seconds,
|
||||
)
|
||||
try:
|
||||
connection = openeo.connect(
|
||||
settings.backend_url,
|
||||
session=session,
|
||||
default_timeout=settings.timeout_seconds,
|
||||
)
|
||||
except requests.exceptions.JSONDecodeError as exc:
|
||||
preview = (session.last_response_preview or "").strip()
|
||||
content_type = session.last_response_content_type or "unknown"
|
||||
response_url = session.last_response_url or settings.backend_url
|
||||
raise OpenEOServiceError(
|
||||
"openEO endpoint returned a non-JSON response while loading capabilities. "
|
||||
f"url={response_url!r} content_type={content_type!r} preview={preview[:300]!r}. "
|
||||
"This usually means the proxy returned an HTML page instead of the API response."
|
||||
) from exc
|
||||
|
||||
def resolve_oidc_context(
|
||||
provider_id: str | None,
|
||||
@@ -295,6 +441,8 @@ def compute_remote_sensing_metrics(
|
||||
*,
|
||||
temporal_start: date | str,
|
||||
temporal_end: date | str,
|
||||
selected_features: list[str] | None = None,
|
||||
progress_callback=None,
|
||||
connection=None,
|
||||
) -> dict[str, Any]:
|
||||
"""
|
||||
@@ -309,7 +457,6 @@ def compute_remote_sensing_metrics(
|
||||
"metadata": {
|
||||
"backend": DEFAULT_OPENEO_PROVIDER,
|
||||
"collections_used": [],
|
||||
"slope_supported": False,
|
||||
"job_refs": {},
|
||||
"failed_metrics": [],
|
||||
},
|
||||
@@ -318,6 +465,14 @@ def compute_remote_sensing_metrics(
|
||||
connection = connection or connect_openeo()
|
||||
feature_collection = build_feature_collection(cells)
|
||||
spatial_extent = build_spatial_extent(cells)
|
||||
log_openeo_request_summary(
|
||||
cells=cells,
|
||||
temporal_start=temporal_start,
|
||||
temporal_end=temporal_end,
|
||||
spatial_extent=spatial_extent,
|
||||
selected_features=selected_features or list(METRIC_NAMES),
|
||||
)
|
||||
expected_feature_ids = [cell.cell_code for cell in cells]
|
||||
results = initialize_metric_result_map(cells)
|
||||
metadata = {
|
||||
"backend": DEFAULT_OPENEO_PROVIDER,
|
||||
@@ -326,11 +481,10 @@ def compute_remote_sensing_metrics(
|
||||
SENTINEL2_COLLECTION,
|
||||
SENTINEL3_LST_COLLECTION,
|
||||
SENTINEL1_COLLECTION,
|
||||
COPERNICUS_DEM_COLLECTION,
|
||||
],
|
||||
"slope_supported": True,
|
||||
"job_refs": {},
|
||||
"failed_metrics": [],
|
||||
"payload_diagnostics": {},
|
||||
}
|
||||
|
||||
metric_runners = [
|
||||
@@ -338,29 +492,32 @@ def compute_remote_sensing_metrics(
|
||||
("ndwi", compute_ndwi),
|
||||
("lst_c", compute_lst_c),
|
||||
("soil_vv", compute_soil_vv),
|
||||
("dem_m", compute_dem_m),
|
||||
("slope_deg", compute_slope_deg),
|
||||
]
|
||||
for metric_name, runner in metric_runners:
|
||||
try:
|
||||
if progress_callback is not None:
|
||||
progress_callback(metric_name=metric_name, state="running", metadata=metadata)
|
||||
metric_payload = runner(
|
||||
connection=connection,
|
||||
feature_collection=feature_collection,
|
||||
spatial_extent=spatial_extent,
|
||||
temporal_start=temporal_start,
|
||||
temporal_end=temporal_end,
|
||||
expected_feature_ids=expected_feature_ids,
|
||||
)
|
||||
merge_metric_results(results, metric_payload["results"])
|
||||
metadata["job_refs"][metric_name] = metric_payload.get("job_ref")
|
||||
if metric_name == "slope_deg" and not metric_payload.get("supported", True):
|
||||
metadata["slope_supported"] = False
|
||||
except Exception as exc:
|
||||
if metric_name == "slope_deg":
|
||||
metadata["slope_supported"] = False
|
||||
metadata["failed_metrics"].append(
|
||||
{"metric": metric_name, "error": str(exc), "non_fatal": True}
|
||||
metadata["payload_diagnostics"][metric_name] = metric_payload.get("payload_diagnostics", {})
|
||||
if progress_callback is not None:
|
||||
progress_callback(
|
||||
metric_name=metric_name,
|
||||
state="completed",
|
||||
metadata=metadata,
|
||||
metric_payload=metric_payload,
|
||||
)
|
||||
continue
|
||||
except Exception as exc:
|
||||
if progress_callback is not None:
|
||||
progress_callback(metric_name=metric_name, state="failed", metadata=metadata, error=str(exc))
|
||||
raise OpenEOExecutionError(f"Failed to compute metric `{metric_name}`: {exc}") from exc
|
||||
|
||||
for cell_code, payload in results.items():
|
||||
@@ -370,7 +527,54 @@ def compute_remote_sensing_metrics(
|
||||
return {"results": results, "metadata": metadata}
|
||||
|
||||
|
||||
def compute_ndvi(*, connection, feature_collection, spatial_extent, temporal_start, temporal_end) -> dict[str, Any]:
|
||||
def log_openeo_request_summary(
|
||||
*,
|
||||
cells: list[AnalysisGridCell],
|
||||
temporal_start: date | str,
|
||||
temporal_end: date | str,
|
||||
spatial_extent: dict[str, float],
|
||||
selected_features: list[str],
|
||||
) -> None:
|
||||
start_date = _parse_date_value(temporal_start)
|
||||
end_date = _parse_date_value(temporal_end)
|
||||
logger.info(
|
||||
"openEO request summary: %s",
|
||||
_serialize_for_log(
|
||||
{
|
||||
"cell_count": len(cells),
|
||||
"date_range_days": max((end_date - start_date).days, 0) + 1,
|
||||
"area_m2": round(_estimate_extent_area_m2(spatial_extent), 2),
|
||||
"metrics": selected_features,
|
||||
"spatial_extent": spatial_extent,
|
||||
"temporal_start": start_date.isoformat(),
|
||||
"temporal_end": end_date.isoformat(),
|
||||
}
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
def _estimate_extent_area_m2(spatial_extent: dict[str, float]) -> float:
|
||||
west = float(spatial_extent["west"])
|
||||
east = float(spatial_extent["east"])
|
||||
south = float(spatial_extent["south"])
|
||||
north = float(spatial_extent["north"])
|
||||
mean_lat_rad = math.radians((south + north) / 2.0)
|
||||
meters_per_degree_lat = 111_320.0
|
||||
meters_per_degree_lon = 111_320.0 * math.cos(mean_lat_rad)
|
||||
width_m = max(east - west, 0.0) * meters_per_degree_lon
|
||||
height_m = max(north - south, 0.0) * meters_per_degree_lat
|
||||
return max(width_m, 0.0) * max(height_m, 0.0)
|
||||
|
||||
|
||||
def compute_ndvi(
|
||||
*,
|
||||
connection,
|
||||
feature_collection,
|
||||
spatial_extent,
|
||||
temporal_start,
|
||||
temporal_end,
|
||||
expected_feature_ids: list[str] | None = None,
|
||||
) -> dict[str, Any]:
|
||||
cube = connection.load_collection(
|
||||
SENTINEL2_COLLECTION,
|
||||
spatial_extent=spatial_extent,
|
||||
@@ -382,11 +586,32 @@ def compute_ndvi(*, connection, feature_collection, spatial_extent, temporal_sta
|
||||
red = cube.band("B04") * 0.0001
|
||||
nir = cube.band("B08") * 0.0001
|
||||
ndvi = ((nir - red) / (nir + red)).mask(invalid_mask.resample_cube_spatial(red))
|
||||
aggregated = ndvi.mean_time().aggregate_spatial(geometries=feature_collection, reducer="mean").execute()
|
||||
return {"results": parse_aggregate_spatial_response(aggregated, "ndvi")}
|
||||
aggregated, job_ref = _run_aggregate_spatial_job(
|
||||
ndvi.mean_time().aggregate_spatial(geometries=feature_collection, reducer="mean"),
|
||||
metric_name="ndvi",
|
||||
)
|
||||
payload_diagnostics = _log_raw_payload_summary(aggregated, metric_name="ndvi", job_ref=job_ref)
|
||||
return {
|
||||
"results": parse_aggregate_spatial_response(
|
||||
aggregated,
|
||||
"ndvi",
|
||||
job_ref=job_ref,
|
||||
expected_feature_ids=expected_feature_ids,
|
||||
),
|
||||
"job_ref": job_ref,
|
||||
"payload_diagnostics": payload_diagnostics,
|
||||
}
|
||||
|
||||
|
||||
def compute_ndwi(*, connection, feature_collection, spatial_extent, temporal_start, temporal_end) -> dict[str, Any]:
|
||||
def compute_ndwi(
|
||||
*,
|
||||
connection,
|
||||
feature_collection,
|
||||
spatial_extent,
|
||||
temporal_start,
|
||||
temporal_end,
|
||||
expected_feature_ids: list[str] | None = None,
|
||||
) -> dict[str, Any]:
|
||||
cube = connection.load_collection(
|
||||
SENTINEL2_COLLECTION,
|
||||
spatial_extent=spatial_extent,
|
||||
@@ -398,11 +623,32 @@ def compute_ndwi(*, connection, feature_collection, spatial_extent, temporal_sta
|
||||
green = cube.band("B03") * 0.0001
|
||||
nir = cube.band("B08") * 0.0001
|
||||
ndwi = ((green - nir) / (green + nir)).mask(invalid_mask.resample_cube_spatial(green))
|
||||
aggregated = ndwi.mean_time().aggregate_spatial(geometries=feature_collection, reducer="mean").execute()
|
||||
return {"results": parse_aggregate_spatial_response(aggregated, "ndwi")}
|
||||
aggregated, job_ref = _run_aggregate_spatial_job(
|
||||
ndwi.mean_time().aggregate_spatial(geometries=feature_collection, reducer="mean"),
|
||||
metric_name="ndwi",
|
||||
)
|
||||
payload_diagnostics = _log_raw_payload_summary(aggregated, metric_name="ndwi", job_ref=job_ref)
|
||||
return {
|
||||
"results": parse_aggregate_spatial_response(
|
||||
aggregated,
|
||||
"ndwi",
|
||||
job_ref=job_ref,
|
||||
expected_feature_ids=expected_feature_ids,
|
||||
),
|
||||
"job_ref": job_ref,
|
||||
"payload_diagnostics": payload_diagnostics,
|
||||
}
|
||||
|
||||
|
||||
def compute_lst_c(*, connection, feature_collection, spatial_extent, temporal_start, temporal_end) -> dict[str, Any]:
|
||||
def compute_lst_c(
|
||||
*,
|
||||
connection,
|
||||
feature_collection,
|
||||
spatial_extent,
|
||||
temporal_start,
|
||||
temporal_end,
|
||||
expected_feature_ids: list[str] | None = None,
|
||||
) -> dict[str, Any]:
|
||||
cube = connection.load_collection(
|
||||
SENTINEL3_LST_COLLECTION,
|
||||
spatial_extent=spatial_extent,
|
||||
@@ -411,11 +657,32 @@ def compute_lst_c(*, connection, feature_collection, spatial_extent, temporal_st
|
||||
band_name = infer_band_name(cube, preferred=("LST", "LST_in", "LST", "band_0"))
|
||||
lst_k = cube.band(band_name) if band_name else cube
|
||||
lst_c = lst_k - 273.15
|
||||
aggregated = lst_c.mean_time().aggregate_spatial(geometries=feature_collection, reducer="mean").execute()
|
||||
return {"results": parse_aggregate_spatial_response(aggregated, "lst_c")}
|
||||
aggregated, job_ref = _run_aggregate_spatial_job(
|
||||
lst_c.mean_time().aggregate_spatial(geometries=feature_collection, reducer="mean"),
|
||||
metric_name="lst_c",
|
||||
)
|
||||
payload_diagnostics = _log_raw_payload_summary(aggregated, metric_name="lst_c", job_ref=job_ref)
|
||||
return {
|
||||
"results": parse_aggregate_spatial_response(
|
||||
aggregated,
|
||||
"lst_c",
|
||||
job_ref=job_ref,
|
||||
expected_feature_ids=expected_feature_ids,
|
||||
),
|
||||
"job_ref": job_ref,
|
||||
"payload_diagnostics": payload_diagnostics,
|
||||
}
|
||||
|
||||
|
||||
def compute_soil_vv(*, connection, feature_collection, spatial_extent, temporal_start, temporal_end) -> dict[str, Any]:
|
||||
def compute_soil_vv(
|
||||
*,
|
||||
connection,
|
||||
feature_collection,
|
||||
spatial_extent,
|
||||
temporal_start,
|
||||
temporal_end,
|
||||
expected_feature_ids: list[str] | None = None,
|
||||
) -> dict[str, Any]:
|
||||
cube = connection.load_collection(
|
||||
SENTINEL1_COLLECTION,
|
||||
spatial_extent=spatial_extent,
|
||||
@@ -423,46 +690,216 @@ def compute_soil_vv(*, connection, feature_collection, spatial_extent, temporal_
|
||||
bands=["VV"],
|
||||
)
|
||||
vv = cube.band("VV")
|
||||
aggregated = vv.mean_time().aggregate_spatial(geometries=feature_collection, reducer="mean").execute()
|
||||
return {"results": parse_aggregate_spatial_response(aggregated, "soil_vv")}
|
||||
|
||||
|
||||
def compute_dem_m(*, connection, feature_collection, spatial_extent, temporal_start, temporal_end) -> dict[str, Any]:
|
||||
cube = connection.load_collection(
|
||||
COPERNICUS_DEM_COLLECTION,
|
||||
spatial_extent=spatial_extent,
|
||||
temporal_extent=[_normalize_date(temporal_start), _normalize_date(temporal_end)],
|
||||
aggregated, job_ref = _run_aggregate_spatial_job(
|
||||
vv.mean_time().aggregate_spatial(geometries=feature_collection, reducer="mean"),
|
||||
metric_name="soil_vv",
|
||||
)
|
||||
band_name = infer_band_name(cube, preferred=("DEM", "elevation", "band_0"))
|
||||
dem = cube.band(band_name) if band_name else cube
|
||||
aggregated = dem.aggregate_spatial(geometries=feature_collection, reducer="mean").execute()
|
||||
return {"results": parse_aggregate_spatial_response(aggregated, "dem_m")}
|
||||
payload_diagnostics = _log_raw_payload_summary(aggregated, metric_name="soil_vv", job_ref=job_ref)
|
||||
return {
|
||||
"results": parse_aggregate_spatial_response(
|
||||
aggregated,
|
||||
"soil_vv",
|
||||
job_ref=job_ref,
|
||||
expected_feature_ids=expected_feature_ids,
|
||||
),
|
||||
"job_ref": job_ref,
|
||||
"payload_diagnostics": payload_diagnostics,
|
||||
}
|
||||
|
||||
|
||||
def compute_slope_deg(*, connection, feature_collection, spatial_extent, temporal_start, temporal_end) -> dict[str, Any]:
|
||||
cube = connection.load_collection(
|
||||
COPERNICUS_DEM_COLLECTION,
|
||||
spatial_extent=spatial_extent,
|
||||
temporal_extent=[_normalize_date(temporal_start), _normalize_date(temporal_end)],
|
||||
def _run_aggregate_spatial_job(process: Any, *, metric_name: str) -> tuple[Any, str | None]:
|
||||
title = f"crop-logic-{metric_name}"
|
||||
description = f"Remote sensing aggregate_spatial execution for metric `{metric_name}`."
|
||||
logger.info(
|
||||
"openEO process graph prepared: %s",
|
||||
_serialize_for_log(
|
||||
{
|
||||
"metric_name": metric_name,
|
||||
"title": title,
|
||||
"description": description,
|
||||
"process_graph": process.flat_graph() if hasattr(process, "flat_graph") else None,
|
||||
}
|
||||
),
|
||||
)
|
||||
band_name = infer_band_name(cube, preferred=("DEM", "elevation", "band_0"))
|
||||
dem = cube.band(band_name) if band_name else cube
|
||||
|
||||
if hasattr(process, "create_job"):
|
||||
job = process.create_job(
|
||||
title=title,
|
||||
description=description,
|
||||
out_format="JSON",
|
||||
)
|
||||
logger.info(
|
||||
"openEO batch job created: %s",
|
||||
_serialize_for_log({"metric_name": metric_name, "job_ref": _extract_job_ref(job)}),
|
||||
)
|
||||
started_job = job.start_and_wait()
|
||||
if started_job is not None:
|
||||
job = started_job
|
||||
logger.info(
|
||||
"openEO batch job finished: %s",
|
||||
_serialize_for_log({"metric_name": metric_name, "job_ref": _extract_job_ref(job)}),
|
||||
)
|
||||
return _load_job_result_payload(job), _extract_job_ref(job)
|
||||
|
||||
logger.info("openEO process uses synchronous execution fallback for metric `%s`.", metric_name)
|
||||
return process.execute(), None
|
||||
|
||||
|
||||
def _load_job_result_payload(job: Any) -> Any:
|
||||
results = job.get_results()
|
||||
|
||||
if hasattr(results, "download_files"):
|
||||
with TemporaryDirectory(prefix="openeo-job-") as temp_dir:
|
||||
results.download_files(temp_dir)
|
||||
downloaded_files = sorted(str(path.relative_to(temp_dir)) for path in Path(temp_dir).rglob("*") if path.is_file())
|
||||
logger.info(
|
||||
"openEO batch job files downloaded: %s",
|
||||
_serialize_for_log({"job_ref": _extract_job_ref(job), "files": downloaded_files}),
|
||||
)
|
||||
payload = _load_first_json_payload(Path(temp_dir), job_ref=_extract_job_ref(job))
|
||||
if payload is not None:
|
||||
return payload
|
||||
|
||||
if hasattr(results, "get_metadata"):
|
||||
metadata = results.get_metadata()
|
||||
if isinstance(metadata, dict) and metadata.get("data") is not None:
|
||||
return metadata["data"]
|
||||
|
||||
raise OpenEOExecutionError(
|
||||
f"openEO batch job `{_extract_job_ref(job) or 'unknown'}` completed but no JSON result payload could be loaded."
|
||||
)
|
||||
|
||||
|
||||
def _load_first_json_payload(directory: Path, *, job_ref: str | None = None) -> Any | None:
|
||||
asset_payload = _load_stac_asset_payload(directory, job_ref=job_ref)
|
||||
if asset_payload is not None:
|
||||
return asset_payload
|
||||
|
||||
for candidate in sorted(directory.rglob("*.json")):
|
||||
payload = _read_json_file(candidate, job_ref=job_ref)
|
||||
if payload is None:
|
||||
continue
|
||||
if _looks_like_stac_metadata_payload(payload):
|
||||
continue
|
||||
return payload
|
||||
return None
|
||||
|
||||
|
||||
def _load_stac_asset_payload(directory: Path, *, job_ref: str | None = None) -> Any | None:
|
||||
for candidate in sorted(directory.rglob("*.json")):
|
||||
payload = _read_json_file(candidate, job_ref=job_ref)
|
||||
if not _looks_like_stac_metadata_payload(payload):
|
||||
continue
|
||||
for asset_name, asset_path in _iter_stac_asset_paths(payload, directory):
|
||||
if asset_path.suffix.lower() != ".json":
|
||||
continue
|
||||
if not asset_path.exists():
|
||||
logger.warning(
|
||||
"openEO STAC asset file is missing: %s",
|
||||
_serialize_for_log(
|
||||
{
|
||||
"job_ref": job_ref,
|
||||
"stac_path": str(candidate),
|
||||
"asset_name": asset_name,
|
||||
"asset_path": str(asset_path),
|
||||
}
|
||||
),
|
||||
)
|
||||
continue
|
||||
logger.info(
|
||||
"openEO batch job selecting STAC asset payload: %s",
|
||||
_serialize_for_log(
|
||||
{
|
||||
"job_ref": job_ref,
|
||||
"stac_path": str(candidate),
|
||||
"asset_name": asset_name,
|
||||
"asset_path": str(asset_path),
|
||||
}
|
||||
),
|
||||
)
|
||||
return _read_json_file(asset_path, job_ref=job_ref)
|
||||
return None
|
||||
|
||||
|
||||
def _iter_stac_asset_paths(payload: Any, directory: Path) -> list[tuple[str, Path]]:
|
||||
if not isinstance(payload, dict):
|
||||
return []
|
||||
assets = payload.get("assets")
|
||||
if not isinstance(assets, dict):
|
||||
return []
|
||||
resolved_paths: list[tuple[str, Path]] = []
|
||||
for asset_name, asset_details in assets.items():
|
||||
if not isinstance(asset_details, dict):
|
||||
continue
|
||||
href = asset_details.get("href")
|
||||
if not href:
|
||||
continue
|
||||
raw_path = Path(str(href))
|
||||
if raw_path.is_absolute():
|
||||
resolved = directory / raw_path.name
|
||||
else:
|
||||
resolved = directory / raw_path
|
||||
resolved_paths.append((str(asset_name), resolved))
|
||||
return resolved_paths
|
||||
|
||||
|
||||
def _looks_like_stac_metadata_path(path: Path) -> bool:
|
||||
name = path.name.lower()
|
||||
return name in {"item.json", "collection.json"} or name.endswith(".stac-item.json")
|
||||
|
||||
|
||||
def _looks_like_stac_metadata_payload(payload: Any) -> bool:
|
||||
return isinstance(payload, dict) and "assets" in payload and any(
|
||||
key in payload for key in ("stac_version", "stac_extensions", "extent", "summaries")
|
||||
)
|
||||
|
||||
|
||||
def _read_json_file(path: Path, *, job_ref: str | None = None) -> Any:
|
||||
raw_text = path.read_text(encoding="utf-8", errors="replace")
|
||||
if not raw_text.strip():
|
||||
logger.warning(
|
||||
"openEO batch job JSON file is empty: %s",
|
||||
_serialize_for_log({"job_ref": job_ref, "path": str(path), "preview": raw_text[:500]}),
|
||||
)
|
||||
return None
|
||||
try:
|
||||
slope_rad = dem.slope()
|
||||
slope_deg = slope_rad * (180.0 / math.pi)
|
||||
aggregated = slope_deg.aggregate_spatial(geometries=feature_collection, reducer="mean").execute()
|
||||
return {
|
||||
"results": parse_aggregate_spatial_response(aggregated, "slope_deg"),
|
||||
"supported": True,
|
||||
}
|
||||
except Exception:
|
||||
return {
|
||||
"results": {feature["id"]: {"slope_deg": None} for feature in feature_collection.get("features", [])},
|
||||
"supported": False,
|
||||
}
|
||||
return json.loads(raw_text)
|
||||
except json.JSONDecodeError as exc:
|
||||
logger.exception(
|
||||
"openEO batch job JSON parsing failed: %s",
|
||||
_serialize_for_log(
|
||||
{
|
||||
"job_ref": job_ref,
|
||||
"path": str(path),
|
||||
"error": str(exc),
|
||||
"preview": raw_text[:1000],
|
||||
}
|
||||
),
|
||||
)
|
||||
raise OpenEOExecutionError(
|
||||
f"Failed to parse openEO batch result file `{path.name}` for job `{job_ref or 'unknown'}`: {exc}"
|
||||
) from exc
|
||||
|
||||
|
||||
def parse_aggregate_spatial_response(payload: Any, metric_name: str) -> dict[str, dict[str, Any]]:
|
||||
def _extract_job_ref(job: Any) -> str | None:
|
||||
for attribute_name in ("job_id", "id"):
|
||||
value = getattr(job, attribute_name, None)
|
||||
if value:
|
||||
return str(value)
|
||||
if hasattr(job, "describe_job"):
|
||||
metadata = job.describe_job()
|
||||
if isinstance(metadata, dict) and metadata.get("id"):
|
||||
return str(metadata["id"])
|
||||
return None
|
||||
|
||||
|
||||
def parse_aggregate_spatial_response(
|
||||
payload: Any,
|
||||
metric_name: str,
|
||||
*,
|
||||
job_ref: str | None = None,
|
||||
expected_feature_ids: list[str] | None = None,
|
||||
) -> dict[str, dict[str, Any]]:
|
||||
"""
|
||||
Parse different JSON shapes returned by openEO aggregate_spatial executions.
|
||||
"""
|
||||
@@ -476,10 +913,20 @@ def parse_aggregate_spatial_response(payload: Any, metric_name: str) -> dict[str
|
||||
return _parse_feature_collection_results(payload, metric_name)
|
||||
|
||||
if isinstance(payload, dict):
|
||||
return _parse_mapping_results(payload, metric_name)
|
||||
return _parse_mapping_results(
|
||||
payload,
|
||||
metric_name,
|
||||
job_ref=job_ref,
|
||||
expected_feature_ids=expected_feature_ids,
|
||||
)
|
||||
|
||||
if isinstance(payload, list):
|
||||
return _parse_list_results(payload, metric_name)
|
||||
return _parse_list_results(
|
||||
payload,
|
||||
metric_name,
|
||||
job_ref=job_ref,
|
||||
expected_feature_ids=expected_feature_ids,
|
||||
)
|
||||
|
||||
raise OpenEOExecutionError(f"Unsupported openEO aggregate_spatial response type: {type(payload)!r}")
|
||||
|
||||
@@ -495,36 +942,174 @@ def _parse_feature_collection_results(payload: dict[str, Any], metric_name: str)
|
||||
if not feature_id:
|
||||
continue
|
||||
properties = feature.get("properties") or {}
|
||||
_log_feature_mismatch(feature_id, properties, metric_name)
|
||||
value = _extract_aggregate_value(properties)
|
||||
results[feature_id] = {metric_name: _coerce_float(value)}
|
||||
return results
|
||||
|
||||
|
||||
def _parse_mapping_results(payload: dict[str, Any], metric_name: str) -> dict[str, dict[str, Any]]:
|
||||
def _parse_mapping_results(
|
||||
payload: dict[str, Any],
|
||||
metric_name: str,
|
||||
*,
|
||||
job_ref: str | None = None,
|
||||
expected_feature_ids: list[str] | None = None,
|
||||
) -> dict[str, dict[str, Any]]:
|
||||
if "data" in payload and isinstance(payload["data"], (dict, list)):
|
||||
return parse_aggregate_spatial_response(payload["data"], metric_name)
|
||||
return parse_aggregate_spatial_response(
|
||||
payload["data"],
|
||||
metric_name,
|
||||
job_ref=job_ref,
|
||||
expected_feature_ids=expected_feature_ids,
|
||||
)
|
||||
|
||||
results: dict[str, dict[str, Any]] = {}
|
||||
for feature_id, value in payload.items():
|
||||
if feature_id in {"type", "links", "meta"}:
|
||||
continue
|
||||
results[str(feature_id)] = {metric_name: _coerce_float(_extract_aggregate_value(value))}
|
||||
normalized_feature_id = _normalize_feature_id(
|
||||
feature_id,
|
||||
expected_feature_ids=expected_feature_ids,
|
||||
)
|
||||
if isinstance(value, dict):
|
||||
_log_feature_mismatch(str(normalized_feature_id), value, metric_name)
|
||||
results[str(normalized_feature_id)] = {metric_name: _coerce_float(_extract_aggregate_value(value))}
|
||||
return results
|
||||
|
||||
|
||||
def _parse_list_results(payload: list[Any], metric_name: str) -> dict[str, dict[str, Any]]:
|
||||
def _parse_list_results(
|
||||
payload: list[Any],
|
||||
metric_name: str,
|
||||
*,
|
||||
job_ref: str | None = None,
|
||||
expected_feature_ids: list[str] | None = None,
|
||||
) -> dict[str, dict[str, Any]]:
|
||||
results: dict[str, dict[str, Any]] = {}
|
||||
for index, item in enumerate(payload):
|
||||
if isinstance(item, dict):
|
||||
feature_id = str(item.get("id") or item.get("cell_code") or item.get("feature_id") or index)
|
||||
feature_id = str(
|
||||
item.get("id")
|
||||
or item.get("cell_code")
|
||||
or item.get("feature_id")
|
||||
or _normalize_feature_id(index, expected_feature_ids=expected_feature_ids)
|
||||
)
|
||||
_log_feature_mismatch(feature_id, item, metric_name)
|
||||
value = _extract_aggregate_value(item)
|
||||
else:
|
||||
feature_id = str(index)
|
||||
feature_id = str(_normalize_feature_id(index, expected_feature_ids=expected_feature_ids))
|
||||
value = item
|
||||
results[feature_id] = {metric_name: _coerce_float(value)}
|
||||
return results
|
||||
|
||||
|
||||
def _normalize_feature_id(
|
||||
raw_feature_id: Any,
|
||||
*,
|
||||
expected_feature_ids: list[str] | None = None,
|
||||
) -> str:
|
||||
feature_id = str(raw_feature_id)
|
||||
if not expected_feature_ids:
|
||||
return feature_id
|
||||
try:
|
||||
index = int(feature_id)
|
||||
except (TypeError, ValueError):
|
||||
return feature_id
|
||||
if index < 0 or index >= len(expected_feature_ids):
|
||||
return feature_id
|
||||
return str(expected_feature_ids[index])
|
||||
|
||||
|
||||
def _log_raw_payload_summary(payload: Any, *, metric_name: str, job_ref: str | None = None) -> dict[str, Any]:
|
||||
payload_cells = _extract_payload_cells(payload)
|
||||
payload_keys_sample = [cell_code for cell_code, _raw in payload_cells[:5]]
|
||||
available_features = sorted(_collect_payload_feature_names(payload))
|
||||
returned_cell_count = len(payload_cells)
|
||||
is_empty = returned_cell_count == 0
|
||||
|
||||
if is_empty:
|
||||
logger.warning("openEO payload is empty for job_ref=%s", job_ref)
|
||||
|
||||
logger.info(
|
||||
"openEO payload summary: %s",
|
||||
_serialize_for_log(
|
||||
{
|
||||
"metric_name": metric_name,
|
||||
"job_ref": job_ref,
|
||||
"returned_cell_count": returned_cell_count,
|
||||
"payload_keys_sample": payload_keys_sample,
|
||||
"available_features": available_features,
|
||||
}
|
||||
),
|
||||
)
|
||||
return {
|
||||
"returned_cell_count": returned_cell_count,
|
||||
"payload_keys_sample": payload_keys_sample,
|
||||
"available_features": available_features,
|
||||
}
|
||||
|
||||
|
||||
def _extract_payload_cells(payload: Any) -> list[tuple[str, Any]]:
|
||||
if payload is None:
|
||||
return []
|
||||
if isinstance(payload, dict) and payload.get("type") == "FeatureCollection":
|
||||
cells = []
|
||||
for feature in payload.get("features", []):
|
||||
feature_id = str(
|
||||
feature.get("id")
|
||||
or (feature.get("properties") or {}).get("cell_code")
|
||||
or (feature.get("properties") or {}).get("id")
|
||||
or ""
|
||||
)
|
||||
if feature_id:
|
||||
cells.append((feature_id, feature.get("properties") or {}))
|
||||
return cells
|
||||
if isinstance(payload, dict) and "features" in payload and isinstance(payload["features"], list):
|
||||
return _extract_payload_cells({"type": "FeatureCollection", "features": payload["features"]})
|
||||
if isinstance(payload, dict) and "data" in payload and isinstance(payload["data"], (dict, list)):
|
||||
return _extract_payload_cells(payload["data"])
|
||||
if isinstance(payload, dict):
|
||||
return [
|
||||
(str(feature_id), value)
|
||||
for feature_id, value in payload.items()
|
||||
if feature_id not in {"type", "links", "meta", "data"}
|
||||
]
|
||||
if isinstance(payload, list):
|
||||
cells = []
|
||||
for index, item in enumerate(payload):
|
||||
if isinstance(item, dict):
|
||||
feature_id = str(item.get("id") or item.get("cell_code") or item.get("feature_id") or index)
|
||||
else:
|
||||
feature_id = str(index)
|
||||
cells.append((feature_id, item))
|
||||
return cells
|
||||
return []
|
||||
|
||||
|
||||
def _collect_payload_feature_names(payload: Any) -> set[str]:
|
||||
names: set[str] = set()
|
||||
for _cell_code, raw_value in _extract_payload_cells(payload):
|
||||
if isinstance(raw_value, dict):
|
||||
names.update(str(key) for key in raw_value.keys())
|
||||
return names
|
||||
|
||||
|
||||
def _log_feature_mismatch(cell_code: str, raw_value: dict[str, Any], metric_name: str) -> None:
|
||||
available_keys = sorted(str(key) for key in raw_value.keys())
|
||||
if not available_keys:
|
||||
return
|
||||
recognized_keys = set(CLUSTER_METRIC_NAMES) | {
|
||||
metric_name,
|
||||
"mean",
|
||||
"value",
|
||||
"result",
|
||||
"average",
|
||||
"id",
|
||||
"cell_code",
|
||||
}
|
||||
if not any(key in recognized_keys for key in available_keys):
|
||||
logger.warning("Feature mismatch for cell=%s, available_keys=%s", cell_code, available_keys)
|
||||
|
||||
|
||||
def _extract_aggregate_value(value: Any) -> Any:
|
||||
if isinstance(value, dict):
|
||||
for key in ("mean", "value", "result", "average"):
|
||||
@@ -589,3 +1174,9 @@ def _normalize_date(value: date | str) -> str:
|
||||
if isinstance(value, date):
|
||||
return value.isoformat()
|
||||
return str(value)
|
||||
|
||||
|
||||
def _parse_date_value(value: date | str) -> date:
|
||||
if isinstance(value, date):
|
||||
return value
|
||||
return date.fromisoformat(str(value))
|
||||
|
||||
Reference in New Issue
Block a user