From cc2aebd9854e169b4d80e92c55a8e7042cca90ae Mon Sep 17 00:00:00 2001 From: Darius Couchard Date: Tue, 19 Mar 2024 11:17:10 +0100 Subject: [PATCH 1/2] Added additional BAP distance to clouds + fixed to_scl_dilation mask inclusion --- .../S2_extraction_example.ipynb | 36 ++++++++++------- src/openeo_gfmap/fetching/commons.py | 10 ++++- .../preprocessing/cloudmasking.py | 27 +++++++++++++ src/openeo_gfmap/preprocessing/udf_cldmask.py | 39 +++++++++++++++++++ 4 files changed, 96 insertions(+), 16 deletions(-) create mode 100644 src/openeo_gfmap/preprocessing/udf_cldmask.py diff --git a/examples/extraction_pipelines/S2_extraction_example.ipynb b/examples/extraction_pipelines/S2_extraction_example.ipynb index 00a3458..784de2a 100644 --- a/examples/extraction_pipelines/S2_extraction_example.ipynb +++ b/examples/extraction_pipelines/S2_extraction_example.ipynb @@ -555,6 +555,27 @@ " fetch_type = FetchType.POLYGON\n", " bands_to_download = ['S2-L2A-B01', 'S2-L2A-B02', 'S2-L2A-B03', 'S2-L2A-B04', 'S2-L2A-B05', 'S2-L2A-B06', 'S2-L2A-B07', 'S2-L2A-B08', 'S2-L2A-B8A', 'S2-L2A-B09', 'S2-L2A-B11', 'S2-L2A-B12', 'S2-L2A-SCL']\n", "\n", + " # Compute the SCL dilation and add it to the cube\n", + " sub_collection = connection.load_collection(\n", + " collection_id=\"SENTINEL2_L2A\",\n", + " bands=[\"SCL\"],\n", + " temporal_extent=[start_date, end_date],\n", + " properties={\n", + " \"eo:cloud_cover\": lambda val: val <= 95.0,\n", + " \"tileId\": lambda val: val == row.s2_tile\n", + " }\n", + " )\n", + " scl_dilated_mask = sub_collection.process(\n", + " \"to_scl_dilation_mask\",\n", + " data=sub_collection,\n", + " scl_band_name=\"SCL\",\n", + " kernel1_size=17, # 17px dilation on a 20m layer\n", + " kernel2_size=77, # 77px dilation on a 20m layer\n", + " mask1_values=[2, 4, 5, 6, 7],\n", + " mask2_values=[3, 8, 9, 10, 11],\n", + " erosion_kernel_size=3\n", + " ).rename_labels(\"bands\", [\"S2-L2A-SCL_DILATED_MASK\"])\n", + "\n", " # Create the job to extract S2\n", " extraction_parameters = {\n", " \"target_resolution\": 10,\n", @@ -562,6 +583,7 @@ " \"eo:cloud_cover\": lambda val: val <= 95.0,\n", " \"tileId\": lambda val: val == row.s2_tile\n", " },\n", + " \"additional_masks\": scl_dilated_mask # Add an additional mask computed from the SCL layer\n", " }\n", " extractor = build_sentinel2_l2a_extractor(\n", " backend_context, bands=bands_to_download, fetch_type=fetch_type.POLYGON, **extraction_parameters \n", @@ -569,20 +591,6 @@ "\n", " cube = extractor.get_cube(connection, spatial_extent_url, temporal_context)\n", "\n", - " # Compute the SCL dilation and add it to the cube\n", - " scl_dilated_mask = cube.process(\n", - " \"to_scl_dilation_mask\",\n", - " data=cube,\n", - " scl_band_name=\"S2-L2A-SCL\",\n", - " kernel1_size=17, # 17px dilation on a 20m layer\n", - " kernel2_size=77, # 77px dilation on a 20m layer\n", - " mask1_values=[2, 4, 5, 6, 7],\n", - " mask2_values=[3, 8, 9, 10, 11],\n", - " erosion_kernel_size=3\n", - " ).rename_labels(\"bands\", [\"S2-L2A-SCL_DILATED_MASK\"])\n", - "\n", - " cube = cube.merge_cubes(scl_dilated_mask)\n", - "\n", " # Compute the distance to cloud and add it to the cube\n", " scl = cube.filter_bands(['S2-L2A-SCL'])\n", " distance_to_cloud = scl.apply_neighborhood(\n", diff --git a/src/openeo_gfmap/fetching/commons.py b/src/openeo_gfmap/fetching/commons.py index 69dc65f..f59f606 100644 --- a/src/openeo_gfmap/fetching/commons.py +++ b/src/openeo_gfmap/fetching/commons.py @@ -58,8 +58,6 @@ def resample_reproject( def rename_bands(datacube: openeo.DataCube, mapping: dict) -> openeo.DataCube: """Rename the bands from the given mapping scheme""" # Filter out bands that are not part of the datacube - print(datacube.dimension_labels("bands")) - def filter_condition(band_name, _): return band_name in datacube.metadata.band_names @@ -136,6 +134,14 @@ def load_collection( ) cube = cube.mask(pre_mask.resample_cube_spatial(cube)) + # Include a band containing the SCL dilated band + additional_mask = params.get("additional_mask", None) + if additional_mask is not None: + assert isinstance(additional_mask, openeo.DataCube), ( + f"The 'include_scl_dilation' parameter must be an openeo datacube, " f"got {additional_mask}." + ) + cube = cube.merge_cubes(additional_mask.resample_cube_spatial(cube)) + if fetch_type == FetchType.POLYGON: if isinstance(spatial_extent, str): geometry = connection.load_url( diff --git a/src/openeo_gfmap/preprocessing/cloudmasking.py b/src/openeo_gfmap/preprocessing/cloudmasking.py index 93289d3..88eb9ee 100644 --- a/src/openeo_gfmap/preprocessing/cloudmasking.py +++ b/src/openeo_gfmap/preprocessing/cloudmasking.py @@ -234,3 +234,30 @@ def bap_masking(cube: openeo.DataCube, period: Union[str, list], **params: dict) return optical_cube return optical_cube.merge_cubes(nonoptical_cube) + + +def cldmask_percentage(cube: openeo.DataCube, percentage: float = 0.95) -> openeo.DataCube: + """Compute a cloud mask array, that either fully covers an observation or is empty. + It computes the percentage of HIGH_CLOUD_PROBABILITY pixels in the SCL mask. If the percentage + is higher than the given threshold, the mask will be covering the observation, otherwise False. + """ + non_scl_cube = cube.filter_bands( + bands=list(filter(lambda band: "SCL" not in band, cube.metadata.band_names)) + ) + + scl_cube = cube.filter_bands(["SCL"]) + + cld_mask = scl_cube.apply_neighborhood( + process=openeo.UDF.from_file("udf_mask.py", context={}), + size=[ + {"dimension": "x", "unit": "px", "value": 1024}, + {"dimension": "y", "unit": "px", "value": 1024}, + {"dimension": "t", "value": 1} + ], + overlap=[], + ) + + non_scl_cube = non_scl_cube.mask(cld_mask.resample_cube_spatial(cube)) + + return non_scl_cube.merge_cubes(scl_cube) + diff --git a/src/openeo_gfmap/preprocessing/udf_cldmask.py b/src/openeo_gfmap/preprocessing/udf_cldmask.py new file mode 100644 index 0000000..f58befb --- /dev/null +++ b/src/openeo_gfmap/preprocessing/udf_cldmask.py @@ -0,0 +1,39 @@ +import numpy as np +import xarray as xr +from openeo.udf import XarrayDataCube + + +def apply_datacube(cube: XarrayDataCube, context: dict) -> XarrayDataCube: + """ + Computes a cloud mask covering a full observation or nothing depending on the percentage of + high probability cloud pixels. If the amount of before mentioned pixels is higher than 95%, + then returns a mask covering the whole observation, otherwise returns an empty mask. + """ + array = cube.get_array().transpose("t", "bands", "y", "x") + + output_array = np.zeros( + shape=(array.shape[0], 1, array.shape[2], array.shape[3]), + dtype=np.uint8 + ) + + for i in range(array.shape[0]): + high_proba_count = ( + (array[i] == 9) * 1 + ).sum() + high_proba_percentage = high_proba_count / (array.shape[2] * array.shape[3]) + + if high_proba_percentage > 0.95: + output_array[i] = 1 + + output_array = xr.DataArray( + output_array, + dims=["t", "bands", "y", "x"], + coords={ + "t": array.t, + "bands": ["mask"], + "y": array.y, + "x": array.x, + }, + ) + + return XarrayDataCube(output_array) From 1fd09e5c219133c348b38e839ceae032c83b74da Mon Sep 17 00:00:00 2001 From: Darius Couchard Date: Wed, 20 Mar 2024 10:11:04 +0100 Subject: [PATCH 2/2] Fix! ruff --- .gitignore | 3 ++- src/openeo_gfmap/fetching/commons.py | 4 +++- src/openeo_gfmap/preprocessing/cloudmasking.py | 3 +-- src/openeo_gfmap/preprocessing/udf_cldmask.py | 9 +++------ 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index b3b05df..6d012c8 100644 --- a/.gitignore +++ b/.gitignore @@ -161,4 +161,5 @@ cython_debug/ #.idea/ tests/test_openeo_gfmap/results/ -notebook/ \ No newline at end of file +notebook/ +data/ \ No newline at end of file diff --git a/src/openeo_gfmap/fetching/commons.py b/src/openeo_gfmap/fetching/commons.py index f59f606..b5b8d65 100644 --- a/src/openeo_gfmap/fetching/commons.py +++ b/src/openeo_gfmap/fetching/commons.py @@ -57,6 +57,7 @@ def resample_reproject( def rename_bands(datacube: openeo.DataCube, mapping: dict) -> openeo.DataCube: """Rename the bands from the given mapping scheme""" + # Filter out bands that are not part of the datacube def filter_condition(band_name, _): return band_name in datacube.metadata.band_names @@ -138,7 +139,8 @@ def load_collection( additional_mask = params.get("additional_mask", None) if additional_mask is not None: assert isinstance(additional_mask, openeo.DataCube), ( - f"The 'include_scl_dilation' parameter must be an openeo datacube, " f"got {additional_mask}." + f"The 'include_scl_dilation' parameter must be an openeo datacube, " + f"got {additional_mask}." ) cube = cube.merge_cubes(additional_mask.resample_cube_spatial(cube)) diff --git a/src/openeo_gfmap/preprocessing/cloudmasking.py b/src/openeo_gfmap/preprocessing/cloudmasking.py index 88eb9ee..374e3cb 100644 --- a/src/openeo_gfmap/preprocessing/cloudmasking.py +++ b/src/openeo_gfmap/preprocessing/cloudmasking.py @@ -252,7 +252,7 @@ def cldmask_percentage(cube: openeo.DataCube, percentage: float = 0.95) -> opene size=[ {"dimension": "x", "unit": "px", "value": 1024}, {"dimension": "y", "unit": "px", "value": 1024}, - {"dimension": "t", "value": 1} + {"dimension": "t", "value": 1}, ], overlap=[], ) @@ -260,4 +260,3 @@ def cldmask_percentage(cube: openeo.DataCube, percentage: float = 0.95) -> opene non_scl_cube = non_scl_cube.mask(cld_mask.resample_cube_spatial(cube)) return non_scl_cube.merge_cubes(scl_cube) - diff --git a/src/openeo_gfmap/preprocessing/udf_cldmask.py b/src/openeo_gfmap/preprocessing/udf_cldmask.py index f58befb..757e1a9 100644 --- a/src/openeo_gfmap/preprocessing/udf_cldmask.py +++ b/src/openeo_gfmap/preprocessing/udf_cldmask.py @@ -12,19 +12,16 @@ def apply_datacube(cube: XarrayDataCube, context: dict) -> XarrayDataCube: array = cube.get_array().transpose("t", "bands", "y", "x") output_array = np.zeros( - shape=(array.shape[0], 1, array.shape[2], array.shape[3]), - dtype=np.uint8 + shape=(array.shape[0], 1, array.shape[2], array.shape[3]), dtype=np.uint8 ) for i in range(array.shape[0]): - high_proba_count = ( - (array[i] == 9) * 1 - ).sum() + high_proba_count = ((array[i] == 9) * 1).sum() high_proba_percentage = high_proba_count / (array.shape[2] * array.shape[3]) if high_proba_percentage > 0.95: output_array[i] = 1 - + output_array = xr.DataArray( output_array, dims=["t", "bands", "y", "x"],