Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

IM-1787: Improve memory efficiency of threshold plugin #1913

Merged
merged 35 commits into from
Oct 20, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
8b68ae7
Making threshold more efficient.
bayliffe Jan 3, 2023
83513a4
Getting the contributions right when using masked data.
bayliffe Jan 4, 2023
589ae1a
Correct unit setting bug.
bayliffe Jan 10, 2023
c3b9670
Beginning to restructure.
bayliffe Jan 11, 2023
8e2dc97
Changing interfaces, simplifying CLI, and updating unit tests. Incomp…
bayliffe Jan 11, 2023
9689768
Break up the occurence within vicinity plugin to allow use of compone…
bayliffe Jan 12, 2023
174258f
Figuring out where the vicinity processing differs.
bayliffe Jan 16, 2023
037fdc5
Add new exception. May be temporary.
bayliffe Jan 17, 2023
5005977
more incremental changes.
bayliffe Jan 19, 2023
6910855
Final fixes to vicinity processing from within the threshold plugin.
bayliffe Feb 1, 2023
3a9a669
Format changes.
bayliffe Feb 1, 2023
c9934f3
Remove defunct test. Add array slicing to avoid swelling memory when …
bayliffe Feb 2, 2023
1d87141
Use a slice iterator rather than list to reduce memory usage. As sugg…
bayliffe Jun 16, 2023
5e371f8
Set self.fill_masked which was lost in rebase.
bayliffe Jun 16, 2023
193bca2
Remove assumption of realization coordinate added when changing to us…
bayliffe Jun 16, 2023
ba3e291
Modify threshold such that it can once again accept a single threshol…
bayliffe Jun 16, 2023
aff1499
Remove doc-strings relating to fuzzy-bounds, the setting of which usi…
bayliffe Jun 16, 2023
33c3529
Add unit tests to cover the vicinity processing applied within the th…
bayliffe Jun 16, 2023
80f96c8
Format fixes.
bayliffe Jun 16, 2023
4fa3df7
Merge remote-tracking branch 'upstream/master' into im1787
bayliffe Jul 27, 2023
19083bf
Replicate percentile collapse functionality.
bayliffe Jul 28, 2023
9cbfccb
Added back land mask without vicinity exception.
bayliffe Aug 2, 2023
1bfc213
Rewriting threshold unit tests using pytest and extending coverage. I…
bayliffe Aug 2, 2023
94a7885
Unit test replacement complete. Doc-strings and formatting changes to…
bayliffe Aug 3, 2023
e63df9f
Add docstrings and typing to the conftest functions.
bayliffe Aug 3, 2023
5ae1450
Formatting corrections.
bayliffe Aug 3, 2023
f926509
Remove broken new test added to BasicThreshold tests which is not nee…
bayliffe Aug 4, 2023
24f24ee
Add vicinity tests using a landmask.
bayliffe Aug 4, 2023
5f54444
Partial review response.
bayliffe Aug 25, 2023
7d84c96
Further review changes.
bayliffe Aug 30, 2023
2e6bb36
Further review tweaks.
bayliffe Sep 12, 2023
8767a61
Tweak to type checking.
bayliffe Sep 14, 2023
6cdcb10
Isort reorder
bayliffe Sep 14, 2023
ab7edfa
Review changes.
bayliffe Oct 17, 2023
ffd3fb9
Fix my test for bounds set to None.
bayliffe Oct 17, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 1 addition & 12 deletions improver/cli/threshold.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def process(
threshold_config (dict):
Threshold configuration containing threshold values and
(optionally) fuzzy bounds. Best used in combination with
'threshold_units' It should contain a dictionary of strings that
'threshold_units'. It should contain a dictionary of strings that
can be interpreted as floats with the structure:
"THRESHOLD_VALUE": [LOWER_BOUND, UPPER_BOUND]
e.g: {"280.0": [278.0, 282.0], "290.0": [288.0, 292.0]},
Expand Down Expand Up @@ -115,20 +115,9 @@ def process(
Returns:
iris.cube.Cube:
Cube of probabilities relative to the given thresholds

Raises:
ValueError: If threshold_config and threshold_values are both set
ValueError: If threshold_config is used for fuzzy thresholding
"""
from improver.threshold import Threshold

if threshold_config and threshold_values:
raise ValueError(
"--threshold-config and --threshold-values are mutually exclusive "
"- please set one or the other, not both"
)
if threshold_config and fuzzy_factor:
raise ValueError("--threshold-config cannot be used for fuzzy thresholding")
if fill_masked is not None:
fill_masked = float(fill_masked)

Expand Down
8 changes: 4 additions & 4 deletions improver/precipitation_type/shower_condition_probability.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,11 +172,11 @@ def process(self, cubes: CubeList) -> Cube:

# Threshold cubes
cloud_thresholded = Threshold(
self.cloud_threshold, comparison_operator="<="
threshold_values=self.cloud_threshold, comparison_operator="<="
).process(cloud)
convection_thresholded = Threshold(self.convection_threshold).process(
convection
)
convection_thresholded = Threshold(
threshold_values=self.convection_threshold
).process(convection)

# Fill any missing data in the convective ratio field with zeroes.
if np.ma.is_masked(convection_thresholded.data):
Expand Down
4 changes: 2 additions & 2 deletions improver/regrid/landsea.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,9 +288,9 @@ def _get_matches(
# Identify nearby points on regridded input_land that match the
# selector_value
if selector_val > 0.5:
thresholder = Threshold(0.5)
thresholder = Threshold(threshold_values=0.5)
else:
thresholder = Threshold(0.5, comparison_operator="<=")
thresholder = Threshold(threshold_values=0.5, comparison_operator="<=")
in_vicinity = self.vicinity(thresholder(self.input_land))

# Identify those points sourced from the opposite mask that are
Expand Down
17 changes: 12 additions & 5 deletions improver/threshold.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def __init__(
threshold_units: Optional[str] = None,
comparison_operator: str = ">",
collapse_coord: str = None,
vicinity: List[float] = None,
vicinity: Optional[Union[float, List[float]]] = None,
fill_masked: Optional[float] = None,
) -> None:
"""
Expand Down Expand Up @@ -162,7 +162,14 @@ def __init__(
ValueError: If both fuzzy_factor and fuzzy_bounds are set.
ValueError: Can only collapse over a realization coordinate or a percentile
gavinevans marked this conversation as resolved.
Show resolved Hide resolved
coordinate that has been rebadged as a realization coordinate.
ValueError: If threshold_config and threshold_values are both set
"""
if threshold_config and threshold_values:
raise ValueError(
"threshold_config and threshold_values are mutually exclusive "
"arguments - please provide one or the other, not both"
)

thresholds, fuzzy_bounds = self._set_thresholds(
threshold_values, threshold_config
)
Expand Down Expand Up @@ -211,14 +218,14 @@ def __init__(
"Can only collapse over a realization coordinate or a percentile "
"coordinate that has been rebadged as a realization coordinate."
)
else:
self.collapse_coord = collapse_coord
self.collapse_coord = collapse_coord

self.vicinity = None
if vicinity is not None:
if isinstance(vicinity, numbers.Number):
vicinity = [vicinity]
self.vicinity = [float(x) for x in vicinity]
if vicinity: # To handle an empty list being provided.
self.vicinity = [float(x) for x in vicinity]
MoseleyS marked this conversation as resolved.
Show resolved Hide resolved

self.fill_masked = fill_masked

Expand Down Expand Up @@ -654,7 +661,7 @@ def process(self, input_cube: Cube, landmask: Cube = None) -> Cube:
result = np.divide(dslice[valid], contribution_total[valid])
thresholded_cube.data[i, valid] = result

if (contribution_total == 0).any():
if not valid.all():
thresholded_cube.data = np.ma.masked_array(
thresholded_cube.data,
mask=np.broadcast_to(~valid, thresholded_cube.shape),
Expand Down
9 changes: 5 additions & 4 deletions improver/utilities/spatial.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,10 +474,11 @@ def create_vicinity_coord(
radius: Union[float, int], native_grid_point_radius: bool = False
) -> AuxCoord:
"""
Add a coordinate to the cube that records the vicinity radius that
has been applied to the data. This radius may be a distance in
physical units, or if native_grid_point_radius is True, it will be
a number of grid cells.
Create a coordinate that records the vicinity radius passed in.
This radius may be a distance in physical units, or if
native_grid_point_radius is True, it will be a number of grid cells.
If the latter an attribute comment is added to note that the radius
is in grid cells.

Args:
radius:
Expand Down
4 changes: 3 additions & 1 deletion improver/utilities/textural.py
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,9 @@ def process(self, input_cube: Cube) -> Cube:
ratios.append(self._calculate_ratio(cslice, cube_name, self.nbhood_radius))

ratios = ratios.merge_cube()
thresholded = Threshold(self.textural_threshold).process(ratios)
thresholded = Threshold(threshold_values=self.textural_threshold).process(
ratios
)

# Squeeze scalar threshold coordinate.
try:
Expand Down
20 changes: 10 additions & 10 deletions improver_tests/threshold/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@


def diagnostic_cube(n_realizations: int = 1, data: Optional[np.ndarray] = None) -> Cube:
"""Return a cube of preciptiation rate in mm/hr with n_realizations
"""Return a cube of precipitation rate in mm/hr with n_realizations
containing the data provided by the data argument.

Args:
Expand Down Expand Up @@ -136,7 +136,7 @@ def custom_cube(n_realizations: int, data: np.ndarray) -> Cube:
@pytest.fixture
def expected_cube_name(
comparison_operator: str, vicinity: Optional[Union[float, List[float]]]
) -> Cube:
) -> str:
"""Return a template for the name of a thresholded cube taking into
account the comparison operator and the application of vicinity
processing.
Expand All @@ -151,7 +151,7 @@ def expected_cube_name(
The expected diagnostic name after thresholding.
"""

vicinity_name = "_in_vicinity" if vicinity is not None else ""
vicinity_name = "_in_vicinity" if vicinity else ""

if "g" in comparison_operator.lower() or ">" in comparison_operator:
return f"probability_of_{{cube_name}}{vicinity_name}_above_threshold"
Expand All @@ -169,20 +169,20 @@ def expected_result(
) -> np.ndarray:
"""Return the expected values following thresholding.
The default cube has a 0.5 value at location (2, 2), with all other
values set to 0. The expected result, for a ">" or ">=" threshold
values set to 0. The expected result, for a "gt" or "ge" threshold
comparator, is achieved by placing the expected value, or values for
multi-realization data without coordinate collapsing, at the (..., 2, 2)
location in an array full of zeros. This array matches the size of
the input cube.

If the comparator is changed to be be "<" or "<=" then the array is
If the comparator is changed to be be "lt" or "le" then the array is
bayliffe marked this conversation as resolved.
Show resolved Hide resolved
filled with ones, and the expected value(s) is subtracted from 1 prior
to being places at the (..., 2, 2) location.
to being placed at the (..., 2, 2) location.

This function does no more than this to avoid making the tests harder
to follow. Cases for which the threshold value is equal to any of
the data values (without fuzziness) meaning that the ">" and ">=", or
"<" and "<=" comparators would give different results are handled
the data values (without fuzziness) meaning that the "gt" and "ge", or
"lt" and "le" comparators would give different results are handled
separately.

Args:
Expand All @@ -197,8 +197,8 @@ def expected_result(
Whether the test includes the collapsing of the realization
coordinate to calculate the final result.
comparator:
The comapartor being applied in the thresholding, either ">",
">=", "<", or "<=".
The comparator being applied in the thresholding, either "gt",
"ge", "lt", or "le".
default_cube:
The input cube to the test.
Returns:
Expand Down
78 changes: 60 additions & 18 deletions improver_tests/threshold/test_ThresholdPytest.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,15 @@
{"threshold_values": 0.6, "collapse_coord": "kittens"},
"Can only collapse over a realization coordinate or a percentile",
),
# threshold values provided as argument and via config
(
{"threshold_values": 0.6, "threshold_config": {"0.6"}},
"threshold_config and threshold_values are mutually exclusive arguments",
),
],
)
def test_init(kwargs, exception):
bayliffe marked this conversation as resolved.
Show resolved Hide resolved
"""Test the exceptions raised by the __init__ method."""
with pytest.raises(ValueError, match=exception):
Threshold(**kwargs)

Expand Down Expand Up @@ -144,11 +150,11 @@ def test__add_threshold_coord(default_cube, diagnostic, units):
"n_realizations,data",
[
# A typical case with float inputs
(1, np.zeros((25), dtype=np.float32).reshape(5, 5)),
(1, np.zeros(25, dtype=np.float32).reshape(5, 5)),
# A case with integer inputs, where the data is converted to
# float32 type, allowing for non-integer thresholded values,
# i.e. due to the application of fuzzy thresholds.
(1, np.zeros((25), dtype=np.int8).reshape(5, 5)),
(1, np.zeros(25, dtype=np.int8).reshape(5, 5)),
],
)
def test_attributes_and_types(custom_cube):
Expand Down Expand Up @@ -207,13 +213,25 @@ def test_threshold_metadata(
assert result.name() == expected_cube_name.format(cube_name=ref_cube_name)
assert result.coord(var_name="threshold") == threshold_coord

if vicinity is not None:
if vicinity is not None and vicinity:
MoseleyS marked this conversation as resolved.
Show resolved Hide resolved
expected_vicinity = DimCoord(
vicinity, long_name="radius_of_vicinity", units="m"
)
assert result.coord("radius_of_vicinity") == expected_vicinity


def test_vicinity_as_empty_list(default_cube):
"""Test that a vicinity set as an empty list does not result in
a vicinity diagnostic or any exceptions. Tested for a deterministic,
single realization, and multi-realization cube."""

expected_cube_name = "probability_of_precipitation_rate_above_threshold"
plugin = Threshold(threshold_values=0.5, vicinity=list())
result = plugin(default_cube)

assert result.name() == expected_cube_name


@pytest.mark.parametrize("collapse", (False, True))
@pytest.mark.parametrize("comparator", ("gt", "lt", "le", "ge"))
@pytest.mark.parametrize(
Expand Down Expand Up @@ -310,7 +328,7 @@ def test_expected_values(default_cube, kwargs, collapse, comparator, expected_re
{"threshold_config": {"0.": [-1, 1]}, "comparison_operator": "gt"},
1,
np.zeros((3, 3)),
np.ones((3, 3), dtype=np.float32) * 0.5,
np.full((3, 3), 0.5, dtype=np.float32),
),
# negative diagnostic value above negative threshold with fuzziness.
(
Expand Down Expand Up @@ -375,7 +393,7 @@ def test_expected_values(default_cube, kwargs, collapse, comparator, expected_re
np.ones((3, 3), dtype=np.float32),
),
# Vicinity processing, with one corner value exceeding the threshold
# resulting in neighbourhing cells probabilities of 1 within the limit
# resulting in neighbouring cells probabilities of 1 within the limit
# of the defined vicinity radius (2x2km grid cells)
(
{"threshold_values": 0.5, "vicinity": 3000},
Expand Down Expand Up @@ -413,22 +431,15 @@ def test_expected_values(default_cube, kwargs, collapse, comparator, expected_re
[
np.stack(
[
np.r_[[1] * 2, [0] * 3, [1] * 2, [0] * 18]
.reshape((5, 5))
.astype(np.float32),
np.r_[[1] * 3, [0] * 2, [1] * 3, [0] * 2, [1] * 3, [0] * 12]
.reshape((5, 5))
.astype(np.float32),
]
),
np.stack(
[
np.zeros((5, 5), dtype=np.float32),
np.zeros((5, 5), dtype=np.float32),
np.r_[[1] * 2, [0] * 3, [1] * 2, [0] * 18].reshape((5, 5)),
np.r_[
[1] * 3, [0] * 2, [1] * 3, [0] * 2, [1] * 3, [0] * 12
].reshape((5, 5)),
]
),
np.zeros((2, 5, 5)),
]
),
).astype(np.float32),
),
# Multiple thresholds applied.
(
Expand All @@ -448,6 +459,26 @@ def test_expected_values(default_cube, kwargs, collapse, comparator, expected_re
np.ones((3, 3)),
np.zeros((3, 3), dtype=np.float32),
),
# value of np.inf tested with an above and below comparator, as well as an
# equivalence test with a threshold set to np.inf.
(
{"threshold_values": 1.0e6, "comparison_operator": "gt"},
1,
np.array([[np.inf, 0], [0, 0]]),
np.array([[1.0, 0], [0, 0]], dtype=np.float32),
),
(
{"threshold_values": 1.0e6, "comparison_operator": "lt"},
1,
np.array([[np.inf, 0], [0, 0]]),
np.array([[0, 1.0], [1.0, 1.0]], dtype=np.float32),
),
(
{"threshold_values": np.inf, "comparison_operator": "ge"},
1,
np.array([[np.inf, 0], [0, 0]]),
np.array([[1.0, 0], [0, 0]], dtype=np.float32),
),
],
)
def test_bespoke_expected_values(custom_cube, kwargs, expected_result):
Expand Down Expand Up @@ -532,6 +563,16 @@ def test_nan_handling(custom_cube):
plugin(custom_cube)


@pytest.mark.parametrize("n_realizations,data", [(1, np.array([[0, np.inf], [1, 1]]))])
def test_inf_handling(custom_cube):
"""Test that an exception is raised if the input data contains an
unmasked NaN."""
gavinevans marked this conversation as resolved.
Show resolved Hide resolved

plugin = Threshold(threshold_values=1.0e6)
result = plugin(custom_cube)
print(result.data)
gavinevans marked this conversation as resolved.
Show resolved Hide resolved


@pytest.mark.parametrize(
"n_realizations,data,mask", [(1, np.zeros((2, 2)), np.zeros((2, 2)))]
)
Expand Down Expand Up @@ -597,3 +638,4 @@ def test_threshold_unit_conversion(default_cube):
result.coord(var_name="threshold").points
== np.array([3e-5, 9.0e-05, 1e-4], dtype="float32")
).all()
assert result.coord(var_name="threshold").units == "mm hr-1"
Loading