This commit is contained in:
2026-05-10 22:49:07 +03:30
parent 2d1f7da89e
commit 2a6321a263
15 changed files with 2667 additions and 162 deletions
+666 -75
View File
@@ -1,27 +1,38 @@
from __future__ import annotations
import json
import logging
import math
import os
import time
from dataclasses import dataclass
from datetime import date
from decimal import Decimal
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Any
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from config.proxy import apply_requests_proxy, build_proxy_url_from_proxychains_env
from .models import AnalysisGridCell
logger = logging.getLogger(__name__)
DEFAULT_OPENEO_BACKEND_URL = "https://openeofed.dataspace.copernicus.eu"
DEFAULT_OPENEO_PROVIDER = "openeo"
DEFAULT_OPENEO_PROXY_URL = "socks5h://host.docker.internal:10808"
DEFAULT_OPENEO_TIMEOUT_SECONDS = 600.0
DEFAULT_OPENEO_HTTP_RETRY_TOTAL = 5
DEFAULT_OPENEO_HTTP_RETRY_BACKOFF_FACTOR = 2.0
SENTINEL2_COLLECTION = "SENTINEL2_L2A"
SENTINEL3_LST_COLLECTION = "SENTINEL3_SLSTR_L2_LST"
SENTINEL1_COLLECTION = "SENTINEL1_GRD"
COPERNICUS_DEM_COLLECTION = "COPERNICUS_30"
VALID_SCL_CLASSES = (4, 5, 6)
METRIC_NAMES = (
@@ -30,8 +41,12 @@ METRIC_NAMES = (
"lst_c",
"soil_vv",
"soil_vv_db",
"dem_m",
"slope_deg",
)
CLUSTER_METRIC_NAMES = (
"ndvi",
"ndwi",
"lst_c",
"soil_vv_db",
)
@@ -53,19 +68,67 @@ class TimeoutOverrideSession(requests.Session):
def __init__(self, timeout_seconds: float):
super().__init__()
self.timeout_seconds = timeout_seconds
self.last_response_preview = ""
self.last_response_content_type = ""
self.last_response_url = ""
def request(self, method, url, **kwargs):
timeout = kwargs.get("timeout")
if timeout is None or timeout < self.timeout_seconds:
kwargs["timeout"] = self.timeout_seconds
return super().request(method, url, **kwargs)
request_log = {
"method": str(method).upper(),
"url": url,
"timeout": kwargs.get("timeout"),
"params": kwargs.get("params"),
"json": kwargs.get("json"),
"data": kwargs.get("data"),
"headers": _sanitize_headers(kwargs.get("headers")),
"proxy_url": _sanitize_proxy_url(self.proxies.get("https") or self.proxies.get("http")),
}
logger.info("openEO request payload: %s", _serialize_for_log(request_log))
started_at = time.monotonic()
try:
response = super().request(method, url, **kwargs)
except Exception as exc:
logger.exception(
"openEO request failed after %.3fs: %s",
time.monotonic() - started_at,
_serialize_for_log(
{
"method": str(method).upper(),
"url": url,
"error": repr(exc),
}
),
)
raise
logger.info(
"openEO response received after %.3fs: %s",
time.monotonic() - started_at,
_serialize_for_log(
{
"method": str(method).upper(),
"url": url,
"status_code": response.status_code,
"headers": _sanitize_headers(dict(response.headers)),
}
),
)
self.last_response_url = str(response.url)
self.last_response_content_type = str(response.headers.get("Content-Type", ""))
self.last_response_preview = response.text[:1000] if response.text else ""
return response
@dataclass(frozen=True)
class OpenEOConnectionSettings:
backend_url: str = DEFAULT_OPENEO_BACKEND_URL
auth_method: str = "client_credentials"
timeout_seconds: float = 60.0
timeout_seconds: float = DEFAULT_OPENEO_TIMEOUT_SECONDS
client_id: str = ""
client_secret: str = ""
provider_id: str = ""
@@ -73,13 +136,18 @@ class OpenEOConnectionSettings:
password: str = ""
allow_interactive_oidc: bool = False
proxy_url: str = ""
http_retry_total: int = DEFAULT_OPENEO_HTTP_RETRY_TOTAL
http_retry_backoff_factor: float = DEFAULT_OPENEO_HTTP_RETRY_BACKOFF_FACTOR
@classmethod
def from_env(cls) -> "OpenEOConnectionSettings":
return cls(
backend_url=os.environ.get("OPENEO_BACKEND_URL", DEFAULT_OPENEO_BACKEND_URL).strip(),
auth_method=os.environ.get("OPENEO_AUTH_METHOD", "client_credentials").strip().lower(),
timeout_seconds=float(os.environ.get("OPENEO_TIMEOUT_SECONDS", "60").strip() or "60"),
timeout_seconds=float(
os.environ.get("OPENEO_TIMEOUT_SECONDS", str(int(DEFAULT_OPENEO_TIMEOUT_SECONDS))).strip()
or str(int(DEFAULT_OPENEO_TIMEOUT_SECONDS))
),
client_id=os.environ.get("OPENEO_AUTH_CLIENT_ID", "").strip(),
client_secret=os.environ.get("OPENEO_AUTH_CLIENT_SECRET", "").strip(),
provider_id=os.environ.get("OPENEO_AUTH_PROVIDER_ID", "").strip(),
@@ -88,6 +156,17 @@ class OpenEOConnectionSettings:
allow_interactive_oidc=os.environ.get("OPENEO_ALLOW_INTERACTIVE_OIDC", "0").strip().lower()
in {"1", "true", "yes", "on"},
proxy_url=_resolve_openeo_proxy_url_from_env(),
http_retry_total=int(
os.environ.get("OPENEO_HTTP_RETRY_TOTAL", str(DEFAULT_OPENEO_HTTP_RETRY_TOTAL)).strip()
or str(DEFAULT_OPENEO_HTTP_RETRY_TOTAL)
),
http_retry_backoff_factor=float(
os.environ.get(
"OPENEO_HTTP_RETRY_BACKOFF_FACTOR",
str(DEFAULT_OPENEO_HTTP_RETRY_BACKOFF_FACTOR),
).strip()
or str(DEFAULT_OPENEO_HTTP_RETRY_BACKOFF_FACTOR)
),
)
@@ -104,6 +183,46 @@ def _resolve_openeo_proxy_url_from_env() -> str:
return configured_proxy_url
def _sanitize_headers(headers: dict[str, Any] | None) -> dict[str, Any] | None:
if not headers:
return headers
return {key: _mask_sensitive_value(key, value) for key, value in headers.items()}
def _sanitize_proxy_url(proxy_url: str | None) -> str | None:
if not proxy_url:
return proxy_url
return proxy_url
def _serialize_for_log(payload: Any) -> str:
return json.dumps(_mask_sensitive_payload(payload), ensure_ascii=True, default=str, sort_keys=True)
def _mask_sensitive_payload(value: Any, parent_key: str = "") -> Any:
if isinstance(value, dict):
return {str(key): _mask_sensitive_payload(item, str(key)) for key, item in value.items()}
if isinstance(value, list):
return [_mask_sensitive_payload(item, parent_key) for item in value]
if isinstance(value, tuple):
return [_mask_sensitive_payload(item, parent_key) for item in value]
return _mask_sensitive_value(parent_key, value)
def _mask_sensitive_value(key: str, value: Any) -> Any:
normalized_key = (key or "").lower()
if normalized_key in {
"authorization",
"access_token",
"refresh_token",
"id_token",
"client_secret",
"password",
}:
return "***redacted***"
return value
def is_openeo_auth_configured(settings: OpenEOConnectionSettings | None = None) -> bool:
settings = settings or OpenEOConnectionSettings.from_env()
@@ -118,9 +237,26 @@ def is_openeo_auth_configured(settings: OpenEOConnectionSettings | None = None)
def build_openeo_requests_session(settings: OpenEOConnectionSettings) -> requests.Session:
session = TimeoutOverrideSession(settings.timeout_seconds)
session.headers.setdefault("Accept", "application/json")
adapter = HTTPAdapter(max_retries=_build_openeo_http_retry(settings))
session.mount("http://", adapter)
session.mount("https://", adapter)
return apply_requests_proxy(session, settings.proxy_url)
def _build_openeo_http_retry(settings: OpenEOConnectionSettings) -> Retry:
return Retry(
total=settings.http_retry_total,
connect=settings.http_retry_total,
read=settings.http_retry_total,
status=settings.http_retry_total,
backoff_factor=settings.http_retry_backoff_factor,
allowed_methods=None,
status_forcelist=(429, 500, 502, 503, 504),
raise_on_status=False,
)
def connect_openeo(settings: OpenEOConnectionSettings | None = None):
"""
Build an authenticated openEO connection using environment-driven configuration.
@@ -140,11 +276,21 @@ def connect_openeo(settings: OpenEOConnectionSettings | None = None):
raise OpenEOServiceError("The `openeo` Python client is required for remote sensing jobs.") from exc
session = build_openeo_requests_session(settings)
connection = openeo.connect(
settings.backend_url,
session=session,
default_timeout=settings.timeout_seconds,
)
try:
connection = openeo.connect(
settings.backend_url,
session=session,
default_timeout=settings.timeout_seconds,
)
except requests.exceptions.JSONDecodeError as exc:
preview = (session.last_response_preview or "").strip()
content_type = session.last_response_content_type or "unknown"
response_url = session.last_response_url or settings.backend_url
raise OpenEOServiceError(
"openEO endpoint returned a non-JSON response while loading capabilities. "
f"url={response_url!r} content_type={content_type!r} preview={preview[:300]!r}. "
"This usually means the proxy returned an HTML page instead of the API response."
) from exc
def resolve_oidc_context(
provider_id: str | None,
@@ -295,6 +441,8 @@ def compute_remote_sensing_metrics(
*,
temporal_start: date | str,
temporal_end: date | str,
selected_features: list[str] | None = None,
progress_callback=None,
connection=None,
) -> dict[str, Any]:
"""
@@ -309,7 +457,6 @@ def compute_remote_sensing_metrics(
"metadata": {
"backend": DEFAULT_OPENEO_PROVIDER,
"collections_used": [],
"slope_supported": False,
"job_refs": {},
"failed_metrics": [],
},
@@ -318,6 +465,14 @@ def compute_remote_sensing_metrics(
connection = connection or connect_openeo()
feature_collection = build_feature_collection(cells)
spatial_extent = build_spatial_extent(cells)
log_openeo_request_summary(
cells=cells,
temporal_start=temporal_start,
temporal_end=temporal_end,
spatial_extent=spatial_extent,
selected_features=selected_features or list(METRIC_NAMES),
)
expected_feature_ids = [cell.cell_code for cell in cells]
results = initialize_metric_result_map(cells)
metadata = {
"backend": DEFAULT_OPENEO_PROVIDER,
@@ -326,11 +481,10 @@ def compute_remote_sensing_metrics(
SENTINEL2_COLLECTION,
SENTINEL3_LST_COLLECTION,
SENTINEL1_COLLECTION,
COPERNICUS_DEM_COLLECTION,
],
"slope_supported": True,
"job_refs": {},
"failed_metrics": [],
"payload_diagnostics": {},
}
metric_runners = [
@@ -338,29 +492,32 @@ def compute_remote_sensing_metrics(
("ndwi", compute_ndwi),
("lst_c", compute_lst_c),
("soil_vv", compute_soil_vv),
("dem_m", compute_dem_m),
("slope_deg", compute_slope_deg),
]
for metric_name, runner in metric_runners:
try:
if progress_callback is not None:
progress_callback(metric_name=metric_name, state="running", metadata=metadata)
metric_payload = runner(
connection=connection,
feature_collection=feature_collection,
spatial_extent=spatial_extent,
temporal_start=temporal_start,
temporal_end=temporal_end,
expected_feature_ids=expected_feature_ids,
)
merge_metric_results(results, metric_payload["results"])
metadata["job_refs"][metric_name] = metric_payload.get("job_ref")
if metric_name == "slope_deg" and not metric_payload.get("supported", True):
metadata["slope_supported"] = False
except Exception as exc:
if metric_name == "slope_deg":
metadata["slope_supported"] = False
metadata["failed_metrics"].append(
{"metric": metric_name, "error": str(exc), "non_fatal": True}
metadata["payload_diagnostics"][metric_name] = metric_payload.get("payload_diagnostics", {})
if progress_callback is not None:
progress_callback(
metric_name=metric_name,
state="completed",
metadata=metadata,
metric_payload=metric_payload,
)
continue
except Exception as exc:
if progress_callback is not None:
progress_callback(metric_name=metric_name, state="failed", metadata=metadata, error=str(exc))
raise OpenEOExecutionError(f"Failed to compute metric `{metric_name}`: {exc}") from exc
for cell_code, payload in results.items():
@@ -370,7 +527,54 @@ def compute_remote_sensing_metrics(
return {"results": results, "metadata": metadata}
def compute_ndvi(*, connection, feature_collection, spatial_extent, temporal_start, temporal_end) -> dict[str, Any]:
def log_openeo_request_summary(
*,
cells: list[AnalysisGridCell],
temporal_start: date | str,
temporal_end: date | str,
spatial_extent: dict[str, float],
selected_features: list[str],
) -> None:
start_date = _parse_date_value(temporal_start)
end_date = _parse_date_value(temporal_end)
logger.info(
"openEO request summary: %s",
_serialize_for_log(
{
"cell_count": len(cells),
"date_range_days": max((end_date - start_date).days, 0) + 1,
"area_m2": round(_estimate_extent_area_m2(spatial_extent), 2),
"metrics": selected_features,
"spatial_extent": spatial_extent,
"temporal_start": start_date.isoformat(),
"temporal_end": end_date.isoformat(),
}
),
)
def _estimate_extent_area_m2(spatial_extent: dict[str, float]) -> float:
west = float(spatial_extent["west"])
east = float(spatial_extent["east"])
south = float(spatial_extent["south"])
north = float(spatial_extent["north"])
mean_lat_rad = math.radians((south + north) / 2.0)
meters_per_degree_lat = 111_320.0
meters_per_degree_lon = 111_320.0 * math.cos(mean_lat_rad)
width_m = max(east - west, 0.0) * meters_per_degree_lon
height_m = max(north - south, 0.0) * meters_per_degree_lat
return max(width_m, 0.0) * max(height_m, 0.0)
def compute_ndvi(
*,
connection,
feature_collection,
spatial_extent,
temporal_start,
temporal_end,
expected_feature_ids: list[str] | None = None,
) -> dict[str, Any]:
cube = connection.load_collection(
SENTINEL2_COLLECTION,
spatial_extent=spatial_extent,
@@ -382,11 +586,32 @@ def compute_ndvi(*, connection, feature_collection, spatial_extent, temporal_sta
red = cube.band("B04") * 0.0001
nir = cube.band("B08") * 0.0001
ndvi = ((nir - red) / (nir + red)).mask(invalid_mask.resample_cube_spatial(red))
aggregated = ndvi.mean_time().aggregate_spatial(geometries=feature_collection, reducer="mean").execute()
return {"results": parse_aggregate_spatial_response(aggregated, "ndvi")}
aggregated, job_ref = _run_aggregate_spatial_job(
ndvi.mean_time().aggregate_spatial(geometries=feature_collection, reducer="mean"),
metric_name="ndvi",
)
payload_diagnostics = _log_raw_payload_summary(aggregated, metric_name="ndvi", job_ref=job_ref)
return {
"results": parse_aggregate_spatial_response(
aggregated,
"ndvi",
job_ref=job_ref,
expected_feature_ids=expected_feature_ids,
),
"job_ref": job_ref,
"payload_diagnostics": payload_diagnostics,
}
def compute_ndwi(*, connection, feature_collection, spatial_extent, temporal_start, temporal_end) -> dict[str, Any]:
def compute_ndwi(
*,
connection,
feature_collection,
spatial_extent,
temporal_start,
temporal_end,
expected_feature_ids: list[str] | None = None,
) -> dict[str, Any]:
cube = connection.load_collection(
SENTINEL2_COLLECTION,
spatial_extent=spatial_extent,
@@ -398,11 +623,32 @@ def compute_ndwi(*, connection, feature_collection, spatial_extent, temporal_sta
green = cube.band("B03") * 0.0001
nir = cube.band("B08") * 0.0001
ndwi = ((green - nir) / (green + nir)).mask(invalid_mask.resample_cube_spatial(green))
aggregated = ndwi.mean_time().aggregate_spatial(geometries=feature_collection, reducer="mean").execute()
return {"results": parse_aggregate_spatial_response(aggregated, "ndwi")}
aggregated, job_ref = _run_aggregate_spatial_job(
ndwi.mean_time().aggregate_spatial(geometries=feature_collection, reducer="mean"),
metric_name="ndwi",
)
payload_diagnostics = _log_raw_payload_summary(aggregated, metric_name="ndwi", job_ref=job_ref)
return {
"results": parse_aggregate_spatial_response(
aggregated,
"ndwi",
job_ref=job_ref,
expected_feature_ids=expected_feature_ids,
),
"job_ref": job_ref,
"payload_diagnostics": payload_diagnostics,
}
def compute_lst_c(*, connection, feature_collection, spatial_extent, temporal_start, temporal_end) -> dict[str, Any]:
def compute_lst_c(
*,
connection,
feature_collection,
spatial_extent,
temporal_start,
temporal_end,
expected_feature_ids: list[str] | None = None,
) -> dict[str, Any]:
cube = connection.load_collection(
SENTINEL3_LST_COLLECTION,
spatial_extent=spatial_extent,
@@ -411,11 +657,32 @@ def compute_lst_c(*, connection, feature_collection, spatial_extent, temporal_st
band_name = infer_band_name(cube, preferred=("LST", "LST_in", "LST", "band_0"))
lst_k = cube.band(band_name) if band_name else cube
lst_c = lst_k - 273.15
aggregated = lst_c.mean_time().aggregate_spatial(geometries=feature_collection, reducer="mean").execute()
return {"results": parse_aggregate_spatial_response(aggregated, "lst_c")}
aggregated, job_ref = _run_aggregate_spatial_job(
lst_c.mean_time().aggregate_spatial(geometries=feature_collection, reducer="mean"),
metric_name="lst_c",
)
payload_diagnostics = _log_raw_payload_summary(aggregated, metric_name="lst_c", job_ref=job_ref)
return {
"results": parse_aggregate_spatial_response(
aggregated,
"lst_c",
job_ref=job_ref,
expected_feature_ids=expected_feature_ids,
),
"job_ref": job_ref,
"payload_diagnostics": payload_diagnostics,
}
def compute_soil_vv(*, connection, feature_collection, spatial_extent, temporal_start, temporal_end) -> dict[str, Any]:
def compute_soil_vv(
*,
connection,
feature_collection,
spatial_extent,
temporal_start,
temporal_end,
expected_feature_ids: list[str] | None = None,
) -> dict[str, Any]:
cube = connection.load_collection(
SENTINEL1_COLLECTION,
spatial_extent=spatial_extent,
@@ -423,46 +690,216 @@ def compute_soil_vv(*, connection, feature_collection, spatial_extent, temporal_
bands=["VV"],
)
vv = cube.band("VV")
aggregated = vv.mean_time().aggregate_spatial(geometries=feature_collection, reducer="mean").execute()
return {"results": parse_aggregate_spatial_response(aggregated, "soil_vv")}
def compute_dem_m(*, connection, feature_collection, spatial_extent, temporal_start, temporal_end) -> dict[str, Any]:
cube = connection.load_collection(
COPERNICUS_DEM_COLLECTION,
spatial_extent=spatial_extent,
temporal_extent=[_normalize_date(temporal_start), _normalize_date(temporal_end)],
aggregated, job_ref = _run_aggregate_spatial_job(
vv.mean_time().aggregate_spatial(geometries=feature_collection, reducer="mean"),
metric_name="soil_vv",
)
band_name = infer_band_name(cube, preferred=("DEM", "elevation", "band_0"))
dem = cube.band(band_name) if band_name else cube
aggregated = dem.aggregate_spatial(geometries=feature_collection, reducer="mean").execute()
return {"results": parse_aggregate_spatial_response(aggregated, "dem_m")}
payload_diagnostics = _log_raw_payload_summary(aggregated, metric_name="soil_vv", job_ref=job_ref)
return {
"results": parse_aggregate_spatial_response(
aggregated,
"soil_vv",
job_ref=job_ref,
expected_feature_ids=expected_feature_ids,
),
"job_ref": job_ref,
"payload_diagnostics": payload_diagnostics,
}
def compute_slope_deg(*, connection, feature_collection, spatial_extent, temporal_start, temporal_end) -> dict[str, Any]:
cube = connection.load_collection(
COPERNICUS_DEM_COLLECTION,
spatial_extent=spatial_extent,
temporal_extent=[_normalize_date(temporal_start), _normalize_date(temporal_end)],
def _run_aggregate_spatial_job(process: Any, *, metric_name: str) -> tuple[Any, str | None]:
title = f"crop-logic-{metric_name}"
description = f"Remote sensing aggregate_spatial execution for metric `{metric_name}`."
logger.info(
"openEO process graph prepared: %s",
_serialize_for_log(
{
"metric_name": metric_name,
"title": title,
"description": description,
"process_graph": process.flat_graph() if hasattr(process, "flat_graph") else None,
}
),
)
band_name = infer_band_name(cube, preferred=("DEM", "elevation", "band_0"))
dem = cube.band(band_name) if band_name else cube
if hasattr(process, "create_job"):
job = process.create_job(
title=title,
description=description,
out_format="JSON",
)
logger.info(
"openEO batch job created: %s",
_serialize_for_log({"metric_name": metric_name, "job_ref": _extract_job_ref(job)}),
)
started_job = job.start_and_wait()
if started_job is not None:
job = started_job
logger.info(
"openEO batch job finished: %s",
_serialize_for_log({"metric_name": metric_name, "job_ref": _extract_job_ref(job)}),
)
return _load_job_result_payload(job), _extract_job_ref(job)
logger.info("openEO process uses synchronous execution fallback for metric `%s`.", metric_name)
return process.execute(), None
def _load_job_result_payload(job: Any) -> Any:
results = job.get_results()
if hasattr(results, "download_files"):
with TemporaryDirectory(prefix="openeo-job-") as temp_dir:
results.download_files(temp_dir)
downloaded_files = sorted(str(path.relative_to(temp_dir)) for path in Path(temp_dir).rglob("*") if path.is_file())
logger.info(
"openEO batch job files downloaded: %s",
_serialize_for_log({"job_ref": _extract_job_ref(job), "files": downloaded_files}),
)
payload = _load_first_json_payload(Path(temp_dir), job_ref=_extract_job_ref(job))
if payload is not None:
return payload
if hasattr(results, "get_metadata"):
metadata = results.get_metadata()
if isinstance(metadata, dict) and metadata.get("data") is not None:
return metadata["data"]
raise OpenEOExecutionError(
f"openEO batch job `{_extract_job_ref(job) or 'unknown'}` completed but no JSON result payload could be loaded."
)
def _load_first_json_payload(directory: Path, *, job_ref: str | None = None) -> Any | None:
asset_payload = _load_stac_asset_payload(directory, job_ref=job_ref)
if asset_payload is not None:
return asset_payload
for candidate in sorted(directory.rglob("*.json")):
payload = _read_json_file(candidate, job_ref=job_ref)
if payload is None:
continue
if _looks_like_stac_metadata_payload(payload):
continue
return payload
return None
def _load_stac_asset_payload(directory: Path, *, job_ref: str | None = None) -> Any | None:
for candidate in sorted(directory.rglob("*.json")):
payload = _read_json_file(candidate, job_ref=job_ref)
if not _looks_like_stac_metadata_payload(payload):
continue
for asset_name, asset_path in _iter_stac_asset_paths(payload, directory):
if asset_path.suffix.lower() != ".json":
continue
if not asset_path.exists():
logger.warning(
"openEO STAC asset file is missing: %s",
_serialize_for_log(
{
"job_ref": job_ref,
"stac_path": str(candidate),
"asset_name": asset_name,
"asset_path": str(asset_path),
}
),
)
continue
logger.info(
"openEO batch job selecting STAC asset payload: %s",
_serialize_for_log(
{
"job_ref": job_ref,
"stac_path": str(candidate),
"asset_name": asset_name,
"asset_path": str(asset_path),
}
),
)
return _read_json_file(asset_path, job_ref=job_ref)
return None
def _iter_stac_asset_paths(payload: Any, directory: Path) -> list[tuple[str, Path]]:
if not isinstance(payload, dict):
return []
assets = payload.get("assets")
if not isinstance(assets, dict):
return []
resolved_paths: list[tuple[str, Path]] = []
for asset_name, asset_details in assets.items():
if not isinstance(asset_details, dict):
continue
href = asset_details.get("href")
if not href:
continue
raw_path = Path(str(href))
if raw_path.is_absolute():
resolved = directory / raw_path.name
else:
resolved = directory / raw_path
resolved_paths.append((str(asset_name), resolved))
return resolved_paths
def _looks_like_stac_metadata_path(path: Path) -> bool:
name = path.name.lower()
return name in {"item.json", "collection.json"} or name.endswith(".stac-item.json")
def _looks_like_stac_metadata_payload(payload: Any) -> bool:
return isinstance(payload, dict) and "assets" in payload and any(
key in payload for key in ("stac_version", "stac_extensions", "extent", "summaries")
)
def _read_json_file(path: Path, *, job_ref: str | None = None) -> Any:
raw_text = path.read_text(encoding="utf-8", errors="replace")
if not raw_text.strip():
logger.warning(
"openEO batch job JSON file is empty: %s",
_serialize_for_log({"job_ref": job_ref, "path": str(path), "preview": raw_text[:500]}),
)
return None
try:
slope_rad = dem.slope()
slope_deg = slope_rad * (180.0 / math.pi)
aggregated = slope_deg.aggregate_spatial(geometries=feature_collection, reducer="mean").execute()
return {
"results": parse_aggregate_spatial_response(aggregated, "slope_deg"),
"supported": True,
}
except Exception:
return {
"results": {feature["id"]: {"slope_deg": None} for feature in feature_collection.get("features", [])},
"supported": False,
}
return json.loads(raw_text)
except json.JSONDecodeError as exc:
logger.exception(
"openEO batch job JSON parsing failed: %s",
_serialize_for_log(
{
"job_ref": job_ref,
"path": str(path),
"error": str(exc),
"preview": raw_text[:1000],
}
),
)
raise OpenEOExecutionError(
f"Failed to parse openEO batch result file `{path.name}` for job `{job_ref or 'unknown'}`: {exc}"
) from exc
def parse_aggregate_spatial_response(payload: Any, metric_name: str) -> dict[str, dict[str, Any]]:
def _extract_job_ref(job: Any) -> str | None:
for attribute_name in ("job_id", "id"):
value = getattr(job, attribute_name, None)
if value:
return str(value)
if hasattr(job, "describe_job"):
metadata = job.describe_job()
if isinstance(metadata, dict) and metadata.get("id"):
return str(metadata["id"])
return None
def parse_aggregate_spatial_response(
payload: Any,
metric_name: str,
*,
job_ref: str | None = None,
expected_feature_ids: list[str] | None = None,
) -> dict[str, dict[str, Any]]:
"""
Parse different JSON shapes returned by openEO aggregate_spatial executions.
"""
@@ -476,10 +913,20 @@ def parse_aggregate_spatial_response(payload: Any, metric_name: str) -> dict[str
return _parse_feature_collection_results(payload, metric_name)
if isinstance(payload, dict):
return _parse_mapping_results(payload, metric_name)
return _parse_mapping_results(
payload,
metric_name,
job_ref=job_ref,
expected_feature_ids=expected_feature_ids,
)
if isinstance(payload, list):
return _parse_list_results(payload, metric_name)
return _parse_list_results(
payload,
metric_name,
job_ref=job_ref,
expected_feature_ids=expected_feature_ids,
)
raise OpenEOExecutionError(f"Unsupported openEO aggregate_spatial response type: {type(payload)!r}")
@@ -495,36 +942,174 @@ def _parse_feature_collection_results(payload: dict[str, Any], metric_name: str)
if not feature_id:
continue
properties = feature.get("properties") or {}
_log_feature_mismatch(feature_id, properties, metric_name)
value = _extract_aggregate_value(properties)
results[feature_id] = {metric_name: _coerce_float(value)}
return results
def _parse_mapping_results(payload: dict[str, Any], metric_name: str) -> dict[str, dict[str, Any]]:
def _parse_mapping_results(
payload: dict[str, Any],
metric_name: str,
*,
job_ref: str | None = None,
expected_feature_ids: list[str] | None = None,
) -> dict[str, dict[str, Any]]:
if "data" in payload and isinstance(payload["data"], (dict, list)):
return parse_aggregate_spatial_response(payload["data"], metric_name)
return parse_aggregate_spatial_response(
payload["data"],
metric_name,
job_ref=job_ref,
expected_feature_ids=expected_feature_ids,
)
results: dict[str, dict[str, Any]] = {}
for feature_id, value in payload.items():
if feature_id in {"type", "links", "meta"}:
continue
results[str(feature_id)] = {metric_name: _coerce_float(_extract_aggregate_value(value))}
normalized_feature_id = _normalize_feature_id(
feature_id,
expected_feature_ids=expected_feature_ids,
)
if isinstance(value, dict):
_log_feature_mismatch(str(normalized_feature_id), value, metric_name)
results[str(normalized_feature_id)] = {metric_name: _coerce_float(_extract_aggregate_value(value))}
return results
def _parse_list_results(payload: list[Any], metric_name: str) -> dict[str, dict[str, Any]]:
def _parse_list_results(
payload: list[Any],
metric_name: str,
*,
job_ref: str | None = None,
expected_feature_ids: list[str] | None = None,
) -> dict[str, dict[str, Any]]:
results: dict[str, dict[str, Any]] = {}
for index, item in enumerate(payload):
if isinstance(item, dict):
feature_id = str(item.get("id") or item.get("cell_code") or item.get("feature_id") or index)
feature_id = str(
item.get("id")
or item.get("cell_code")
or item.get("feature_id")
or _normalize_feature_id(index, expected_feature_ids=expected_feature_ids)
)
_log_feature_mismatch(feature_id, item, metric_name)
value = _extract_aggregate_value(item)
else:
feature_id = str(index)
feature_id = str(_normalize_feature_id(index, expected_feature_ids=expected_feature_ids))
value = item
results[feature_id] = {metric_name: _coerce_float(value)}
return results
def _normalize_feature_id(
raw_feature_id: Any,
*,
expected_feature_ids: list[str] | None = None,
) -> str:
feature_id = str(raw_feature_id)
if not expected_feature_ids:
return feature_id
try:
index = int(feature_id)
except (TypeError, ValueError):
return feature_id
if index < 0 or index >= len(expected_feature_ids):
return feature_id
return str(expected_feature_ids[index])
def _log_raw_payload_summary(payload: Any, *, metric_name: str, job_ref: str | None = None) -> dict[str, Any]:
payload_cells = _extract_payload_cells(payload)
payload_keys_sample = [cell_code for cell_code, _raw in payload_cells[:5]]
available_features = sorted(_collect_payload_feature_names(payload))
returned_cell_count = len(payload_cells)
is_empty = returned_cell_count == 0
if is_empty:
logger.warning("openEO payload is empty for job_ref=%s", job_ref)
logger.info(
"openEO payload summary: %s",
_serialize_for_log(
{
"metric_name": metric_name,
"job_ref": job_ref,
"returned_cell_count": returned_cell_count,
"payload_keys_sample": payload_keys_sample,
"available_features": available_features,
}
),
)
return {
"returned_cell_count": returned_cell_count,
"payload_keys_sample": payload_keys_sample,
"available_features": available_features,
}
def _extract_payload_cells(payload: Any) -> list[tuple[str, Any]]:
if payload is None:
return []
if isinstance(payload, dict) and payload.get("type") == "FeatureCollection":
cells = []
for feature in payload.get("features", []):
feature_id = str(
feature.get("id")
or (feature.get("properties") or {}).get("cell_code")
or (feature.get("properties") or {}).get("id")
or ""
)
if feature_id:
cells.append((feature_id, feature.get("properties") or {}))
return cells
if isinstance(payload, dict) and "features" in payload and isinstance(payload["features"], list):
return _extract_payload_cells({"type": "FeatureCollection", "features": payload["features"]})
if isinstance(payload, dict) and "data" in payload and isinstance(payload["data"], (dict, list)):
return _extract_payload_cells(payload["data"])
if isinstance(payload, dict):
return [
(str(feature_id), value)
for feature_id, value in payload.items()
if feature_id not in {"type", "links", "meta", "data"}
]
if isinstance(payload, list):
cells = []
for index, item in enumerate(payload):
if isinstance(item, dict):
feature_id = str(item.get("id") or item.get("cell_code") or item.get("feature_id") or index)
else:
feature_id = str(index)
cells.append((feature_id, item))
return cells
return []
def _collect_payload_feature_names(payload: Any) -> set[str]:
names: set[str] = set()
for _cell_code, raw_value in _extract_payload_cells(payload):
if isinstance(raw_value, dict):
names.update(str(key) for key in raw_value.keys())
return names
def _log_feature_mismatch(cell_code: str, raw_value: dict[str, Any], metric_name: str) -> None:
available_keys = sorted(str(key) for key in raw_value.keys())
if not available_keys:
return
recognized_keys = set(CLUSTER_METRIC_NAMES) | {
metric_name,
"mean",
"value",
"result",
"average",
"id",
"cell_code",
}
if not any(key in recognized_keys for key in available_keys):
logger.warning("Feature mismatch for cell=%s, available_keys=%s", cell_code, available_keys)
def _extract_aggregate_value(value: Any) -> Any:
if isinstance(value, dict):
for key in ("mean", "value", "result", "average"):
@@ -589,3 +1174,9 @@ def _normalize_date(value: date | str) -> str:
if isinstance(value, date):
return value.isoformat()
return str(value)
def _parse_date_value(value: date | str) -> date:
if isinstance(value, date):
return value
return date.fromisoformat(str(value))