This commit is contained in:
2026-05-11 00:36:02 +03:30
parent 2a6321a263
commit 1740c20ddb
23 changed files with 1214 additions and 89 deletions
+115 -52
View File
@@ -29,23 +29,21 @@ DEFAULT_OPENEO_PROXY_URL = "socks5h://host.docker.internal:10808"
DEFAULT_OPENEO_TIMEOUT_SECONDS = 600.0
DEFAULT_OPENEO_HTTP_RETRY_TOTAL = 5
DEFAULT_OPENEO_HTTP_RETRY_BACKOFF_FACTOR = 2.0
DEFAULT_OPENEO_PAYLOAD_ARCHIVE_DIR = "logs/openeo_payloads"
SENTINEL2_COLLECTION = "SENTINEL2_L2A"
SENTINEL3_LST_COLLECTION = "SENTINEL3_SLSTR_L2_LST"
SENTINEL1_COLLECTION = "SENTINEL1_GRD"
VALID_SCL_CLASSES = (4, 5, 6)
METRIC_NAMES = (
"ndvi",
"ndwi",
"lst_c",
"soil_vv",
"soil_vv_db",
)
CLUSTER_METRIC_NAMES = (
"ndvi",
"ndwi",
"lst_c",
"soil_vv_db",
)
@@ -479,7 +477,6 @@ def compute_remote_sensing_metrics(
"backend_url": DEFAULT_OPENEO_BACKEND_URL,
"collections_used": [
SENTINEL2_COLLECTION,
SENTINEL3_LST_COLLECTION,
SENTINEL1_COLLECTION,
],
"job_refs": {},
@@ -490,7 +487,6 @@ def compute_remote_sensing_metrics(
metric_runners = [
("ndvi", compute_ndvi),
("ndwi", compute_ndwi),
("lst_c", compute_lst_c),
("soil_vv", compute_soil_vv),
]
for metric_name, runner in metric_runners:
@@ -640,40 +636,6 @@ def compute_ndwi(
}
def compute_lst_c(
*,
connection,
feature_collection,
spatial_extent,
temporal_start,
temporal_end,
expected_feature_ids: list[str] | None = None,
) -> dict[str, Any]:
cube = connection.load_collection(
SENTINEL3_LST_COLLECTION,
spatial_extent=spatial_extent,
temporal_extent=[_normalize_date(temporal_start), _normalize_date(temporal_end)],
)
band_name = infer_band_name(cube, preferred=("LST", "LST_in", "LST", "band_0"))
lst_k = cube.band(band_name) if band_name else cube
lst_c = lst_k - 273.15
aggregated, job_ref = _run_aggregate_spatial_job(
lst_c.mean_time().aggregate_spatial(geometries=feature_collection, reducer="mean"),
metric_name="lst_c",
)
payload_diagnostics = _log_raw_payload_summary(aggregated, metric_name="lst_c", job_ref=job_ref)
return {
"results": parse_aggregate_spatial_response(
aggregated,
"lst_c",
job_ref=job_ref,
expected_feature_ids=expected_feature_ids,
),
"job_ref": job_ref,
"payload_diagnostics": payload_diagnostics,
}
def compute_soil_vv(
*,
connection,
@@ -739,14 +701,15 @@ def _run_aggregate_spatial_job(process: Any, *, metric_name: str) -> tuple[Any,
"openEO batch job finished: %s",
_serialize_for_log({"metric_name": metric_name, "job_ref": _extract_job_ref(job)}),
)
return _load_job_result_payload(job), _extract_job_ref(job)
return _load_job_result_payload(job, metric_name=metric_name), _extract_job_ref(job)
logger.info("openEO process uses synchronous execution fallback for metric `%s`.", metric_name)
return process.execute(), None
def _load_job_result_payload(job: Any) -> Any:
def _load_job_result_payload(job: Any, *, metric_name: str) -> Any:
results = job.get_results()
job_ref = _extract_job_ref(job)
if hasattr(results, "download_files"):
with TemporaryDirectory(prefix="openeo-job-") as temp_dir:
@@ -754,26 +717,46 @@ def _load_job_result_payload(job: Any) -> Any:
downloaded_files = sorted(str(path.relative_to(temp_dir)) for path in Path(temp_dir).rglob("*") if path.is_file())
logger.info(
"openEO batch job files downloaded: %s",
_serialize_for_log({"job_ref": _extract_job_ref(job), "files": downloaded_files}),
_serialize_for_log({"job_ref": job_ref, "files": downloaded_files}),
)
payload = _load_first_json_payload(Path(temp_dir), job_ref=_extract_job_ref(job))
payload, payload_path = _load_first_json_payload_with_source(Path(temp_dir), job_ref=job_ref)
if payload is not None:
if payload_path is not None:
_persist_raw_payload_file(
source_path=payload_path,
metric_name=metric_name,
job_ref=job_ref,
)
return payload
if hasattr(results, "get_metadata"):
metadata = results.get_metadata()
if isinstance(metadata, dict) and metadata.get("data") is not None:
_persist_raw_payload_value(
payload=metadata["data"],
metric_name=metric_name,
job_ref=job_ref,
)
return metadata["data"]
raise OpenEOExecutionError(
f"openEO batch job `{_extract_job_ref(job) or 'unknown'}` completed but no JSON result payload could be loaded."
f"openEO batch job `{job_ref or 'unknown'}` completed but no JSON result payload could be loaded."
)
def _load_first_json_payload(directory: Path, *, job_ref: str | None = None) -> Any | None:
asset_payload = _load_stac_asset_payload(directory, job_ref=job_ref)
payload, _source_path = _load_first_json_payload_with_source(directory, job_ref=job_ref)
return payload
def _load_first_json_payload_with_source(
directory: Path,
*,
job_ref: str | None = None,
) -> tuple[Any | None, Path | None]:
asset_payload, asset_path = _load_stac_asset_payload(directory, job_ref=job_ref)
if asset_payload is not None:
return asset_payload
return asset_payload, asset_path
for candidate in sorted(directory.rglob("*.json")):
payload = _read_json_file(candidate, job_ref=job_ref)
@@ -781,11 +764,11 @@ def _load_first_json_payload(directory: Path, *, job_ref: str | None = None) ->
continue
if _looks_like_stac_metadata_payload(payload):
continue
return payload
return None
return payload, candidate
return None, None
def _load_stac_asset_payload(directory: Path, *, job_ref: str | None = None) -> Any | None:
def _load_stac_asset_payload(directory: Path, *, job_ref: str | None = None) -> tuple[Any | None, Path | None]:
for candidate in sorted(directory.rglob("*.json")):
payload = _read_json_file(candidate, job_ref=job_ref)
if not _looks_like_stac_metadata_payload(payload):
@@ -817,8 +800,88 @@ def _load_stac_asset_payload(directory: Path, *, job_ref: str | None = None) ->
}
),
)
return _read_json_file(asset_path, job_ref=job_ref)
return None
return _read_json_file(asset_path, job_ref=job_ref), asset_path
return None, None
def _persist_raw_payload_file(
*,
source_path: Path,
metric_name: str,
job_ref: str | None,
) -> None:
archive_path = _build_payload_archive_path(
metric_name=metric_name,
job_ref=job_ref,
source_name=source_path.name,
)
if archive_path is None:
return
raw_bytes = source_path.read_bytes()
archive_path.parent.mkdir(parents=True, exist_ok=True)
archive_path.write_bytes(raw_bytes)
logger.info(
"openEO raw payload archived: %s",
_serialize_for_log(
{
"job_ref": job_ref,
"metric_name": metric_name,
"source_path": str(source_path),
"archive_path": str(archive_path),
}
),
)
def _persist_raw_payload_value(
*,
payload: Any,
metric_name: str,
job_ref: str | None,
) -> None:
archive_path = _build_payload_archive_path(
metric_name=metric_name,
job_ref=job_ref,
source_name="metadata.json",
)
if archive_path is None:
return
archive_path.parent.mkdir(parents=True, exist_ok=True)
archive_path.write_text(
json.dumps(payload, ensure_ascii=True, indent=2, sort_keys=False, default=str),
encoding="utf-8",
)
logger.info(
"openEO raw payload archived from metadata: %s",
_serialize_for_log(
{
"job_ref": job_ref,
"metric_name": metric_name,
"archive_path": str(archive_path),
}
),
)
def _build_payload_archive_path(
*,
metric_name: str,
job_ref: str | None,
source_name: str,
) -> Path | None:
archive_dir = str(os.environ.get("OPENEO_PAYLOAD_ARCHIVE_DIR", DEFAULT_OPENEO_PAYLOAD_ARCHIVE_DIR)).strip()
if not archive_dir:
return None
safe_job_ref = _sanitize_filename_component(job_ref or "unknown-job")
safe_metric_name = _sanitize_filename_component(metric_name or "unknown-metric")
safe_source_name = _sanitize_filename_component(source_name or "payload.json")
return Path(archive_dir) / f"{safe_job_ref}__{safe_metric_name}__{safe_source_name}"
def _sanitize_filename_component(value: str) -> str:
text = str(value or "").strip() or "unknown"
sanitized = "".join(character if character.isalnum() or character in {"-", "_", "."} else "_" for character in text)
return sanitized or "unknown"
def _iter_stac_asset_paths(payload: Any, directory: Path) -> list[tuple[str, Path]]:
@@ -997,7 +1060,7 @@ def _parse_list_results(
value = _extract_aggregate_value(item)
else:
feature_id = str(_normalize_feature_id(index, expected_feature_ids=expected_feature_ids))
value = item
value = _extract_aggregate_value(item)
results[feature_id] = {metric_name: _coerce_float(value)}
return results