Merge branch 'master' into mobt_161_wxcode_tree_check

* master: remove cycle (metoppv#1657) Minor edits to remove raising unnecessary warnings. (metoppv#1646) Adds handling of a model-id-attr to wxcode-modal (metoppv#1634) # Conflicts: # improver_tests/acceptance/SHA256SUMS
MoseleyS · Feb 4, 2022 · c6a5923 · c6a5923
2 parents 38b253e + 6e894f1
commit c6a5923
Show file tree

Hide file tree

Showing 9 changed files with 72 additions and 27 deletions.
diff --git a/...tation/calibration/reliability_calibration/reliability_calibration_examples.rst b/...tation/calibration/reliability_calibration/reliability_calibration_examples.rst
@@ -12,7 +12,7 @@ The reliability calibration tables returned by this plugin are structured as sho
        Auxiliary coordinates:
             table_row_name                            -                   x                   -                           -                             -
        Scalar coordinates:
-            cycle_hour: 22
+            forecast_reference_time: 2017-11-11 00:00:00, bound=(2017-11-10 00:00:00, 2017-11-11 00:00:00)
             forecast_period: 68400 seconds
        Attributes:
             institution: Met Office

diff --git a/improver/calibration/dataframe_utilities.py b/improver/calibration/dataframe_utilities.py
@@ -326,17 +326,15 @@ def _prepare_dataframes(
         keep="last",
     )
     # Sort to ensure a consistent ordering after removing duplicates.
-    forecast_df.sort_values(
-        by=["blend_time", "percentile", "wmo_id"], inplace=True, ignore_index=True,
+    forecast_df = forecast_df.sort_values(
+        by=["blend_time", "percentile", "wmo_id"], ignore_index=True,
     )
 
     # Remove truth duplicates.
     truth_cols = ["diagnostic", "time", "wmo_id"]
     truth_df = truth_df.drop_duplicates(subset=truth_cols, keep="last",)
     # Sort to ensure a consistent ordering after removing duplicates.
-    truth_df.sort_values(
-        by=truth_cols, inplace=True, ignore_index=True,
-    )
+    truth_df = truth_df.sort_values(by=truth_cols, ignore_index=True)
 
     # Find the common set of WMO IDs.
     common_wmo_ids = sorted(

diff --git a/improver/calibration/ensemble_calibration.py b/improver/calibration/ensemble_calibration.py
@@ -1130,7 +1130,7 @@ def mask_cube(cube: Cube, landsea_mask: Cube) -> None:
             IndexError: if the cube and landsea_mask shapes are not compatible.
         """
         try:
-            cube.data[..., ~landsea_mask.data.astype(np.bool)] = np.nan
+            cube.data[..., ~landsea_mask.data.astype(bool)] = np.nan
         except IndexError as err:
             msg = "Cube and landsea_mask shapes are not compatible. {}".format(err)
             raise IndexError(msg)
@@ -1380,7 +1380,6 @@ def process(
             forecast_var,
             number_of_realizations,
         )
-
         return coefficients_cubelist
 
 

diff --git a/improver/calibration/reliability_calibration.py b/improver/calibration/reliability_calibration.py
@@ -233,10 +233,11 @@ def _create_reliability_table_cube(
     ) -> Cube:
         """
         Construct a reliability table cube and populate it with the provided
-        data. The returned cube will include a cycle hour coordinate, which
-        describes the model cycle hour at which the forecast data was produced.
-        It will further include the forecast period, threshold coordinate,
-        and spatial coordinates from the forecast cube.
+        data. The returned cube will include a forecast_reference_time
+        coordinate, which will be the maximum range of bounds of the input
+        forecast reference times, with the point value set to the latest
+        of those in the inputs. It will further include the forecast period,
+        threshold coordinate, and spatial coordinates from the forecast cube.
 
         Args:
             forecast:
@@ -443,11 +444,11 @@ def process(self, historic_forecasts: Cube, truths: Cube) -> Cube:
         whether the data is thresholded below or above a given diagnostic
         threshold.
 
+        `historic_forecasts` and `truths` should have matching validity times.
+
         Args:
             historic_forecasts:
                 A cube containing the historical forecasts used in calibration.
-                These are expected to all have a consistent cycle hour, that is
-                the hour in the forecast reference time.
             truths:
                 A cube containing the thresholded gridded truths used in
                 calibration.

diff --git a/improver/cli/wxcode_modal.py b/improver/cli/wxcode_modal.py
@@ -36,7 +36,7 @@
 
 @cli.clizefy
 @cli.with_output
-def process(*cubes: cli.inputcube):
+def process(*cubes: cli.inputcube, model_id_attr: str = None):
     """Generates a modal weather symbol for the period covered by the input
     weather symbol cubes. Where there are different weather codes available
     for night and day, the modal code returned is always a day code, regardless
@@ -46,6 +46,10 @@ def process(*cubes: cli.inputcube):
         cubes (iris.cube.CubeList):
             A cubelist containing weather symbols cubes that cover the period
             over which a modal symbol is desired.
+        model_id_attr (str):
+            Name of attribute recording source models that should be
+            inherited by the output cube. The source models are expected as
+            a space-separated string.
 
     Returns:
         iris.cube.Cube:
@@ -56,4 +60,4 @@ def process(*cubes: cli.inputcube):
     if not cubes:
         raise RuntimeError("Not enough input arguments. See help for more information.")
 
-    return ModalWeatherCode()(cubes)
+    return ModalWeatherCode(model_id_attr=model_id_attr)(cubes)
diff --git a/improver/synthetic_data/set_up_test_cubes.py b/improver/synthetic_data/set_up_test_cubes.py
@@ -260,7 +260,7 @@ def _create_dimension_coord(
 
         coord_array = np.array(coord_array)
 
-        if issubclass(coord_array.dtype.type, np.float):
+        if issubclass(coord_array.dtype.type, float):
             # option needed for realizations percentile & probability cube setup
             # and heights coordinate
             coord_array = coord_array.astype(np.float32)

diff --git a/improver/wxcode/modal_code.py b/improver/wxcode/modal_code.py
@@ -73,10 +73,20 @@ class ModalWeatherCode(BasePlugin):
     covered by the input files.
     """
 
-    def __init__(self):
-        """Create an aggregator instance for reuse"""
+    def __init__(self, model_id_attr: str = None):
+        """
+        Set up plugin and create an aggregator instance for reuse
+
+        Args:
+            model_id_attr:
+                Name of attribute recording source models that should be
+                inherited by the output cube. The source models are expected as
+                a space-separated string.
+        """
         self.aggregator_instance = Aggregator("mode", self.mode_aggregator)
 
+        self.model_id_attr = model_id_attr
+
         # Create the expected cell method for use with single cube inputs
         # that do not pass through the aggregator.
         self.mode_cell_method = iris.coords.CellMethod("mode", coords="time")
@@ -205,6 +215,16 @@ def process(self, cubes: CubeList) -> Cube:
             result = cube.collapsed("time", self.aggregator_instance)
         self._set_blended_times(result)
 
+        if self.model_id_attr:
+            # Update contributing models
+            contributing_models = set()
+            for source_cube in cubes:
+                for model in source_cube.attributes[self.model_id_attr].split(" "):
+                    contributing_models.update([model])
+            result.attributes[self.model_id_attr] = " ".join(
+                sorted(list(contributing_models))
+            )
+
         # Handle any unset points where it was hard to determine a suitable mode
         if (result.data == UNSET_CODE_INDICATOR).any():
             self._group_codes(result, cube)

diff --git a/improver_tests/acceptance/test_wxcode_modal.py b/improver_tests/acceptance/test_wxcode_modal.py
@@ -68,6 +68,8 @@ def test_expected(tmp_path, test_path):
     output_path = tmp_path / "output.nc"
     args = [
         *input_paths,
+        "--model-id-attr",
+        "mosg__model_configuration",
         "--output",
         output_path,
     ]

diff --git a/improver_tests/wxcode/wxcode/test_ModalCode.py b/improver_tests/wxcode/wxcode/test_ModalCode.py
@@ -33,30 +33,34 @@
 from calendar import timegm
 from datetime import datetime as dt
 from datetime import timedelta
+from typing import Tuple
 
 import numpy as np
 import pytest
-from iris.cube import Cube
+from iris.cube import CubeList
 
 from improver.spotdata.build_spotdata_cube import build_spotdata_cube
 from improver.synthetic_data.set_up_test_cubes import construct_scalar_time_coords
 from improver.wxcode.modal_code import ModalWeatherCode
 
 from . import set_up_wxcube
 
+MODEL_ID_ATTR = "mosg__model_configuration"
 TARGET_TIME = dt(2020, 6, 15, 18)
 
 
 @pytest.fixture(name="wxcode_series")
-def wxcode_series_fixture(data, cube_type, offset_reference_times: bool) -> Cube:
+def wxcode_series_fixture(
+    data, cube_type, offset_reference_times: bool, model_id_attr: bool,
+) -> Tuple[bool, CubeList]:
     """Generate a time series of weather code cubes for combination to create
     a period representative code. When offset_reference_times is set, each
     successive cube will have a reference time one hour older."""
 
     time = TARGET_TIME
 
     ntimes = len(data)
-    wxcubes = []
+    wxcubes = CubeList()
 
     for i in range(ntimes):
         wxtime = time - timedelta(hours=i)
@@ -93,9 +97,13 @@ def wxcode_series_fixture(data, cube_type, offset_reference_times: bool) -> Cube
                     scalar_coords=time_coords,
                 )
             )
-    return wxcubes
+        if model_id_attr:
+            [c.attributes.update({MODEL_ID_ATTR: "uk_ens"}) for c in wxcubes]
+            wxcubes[0].attributes.update({MODEL_ID_ATTR: "uk_det uk_ens"})
+    return model_id_attr, wxcubes
 
 
+@pytest.mark.parametrize("model_id_attr", [False, True])
 @pytest.mark.parametrize("offset_reference_times", [False, True])
 @pytest.mark.parametrize("cube_type", ["gridded", "spot"])
 @pytest.mark.parametrize(
@@ -133,13 +141,15 @@ def wxcode_series_fixture(data, cube_type, offset_reference_times: bool) -> Cube
 )
 def test_expected_values(wxcode_series, expected):
     """Test that the expected period representative symbol is returned."""
-    result = ModalWeatherCode()(wxcode_series)
+    _, wxcode_cubes = wxcode_series
+    result = ModalWeatherCode()(wxcode_cubes)
     assert result.data.flatten()[0] == expected
 
 
+@pytest.mark.parametrize("model_id_attr", [False, True])
 @pytest.mark.parametrize("offset_reference_times", [False, True])
 @pytest.mark.parametrize("cube_type", ["gridded", "spot"])
-@pytest.mark.parametrize("data", [np.ones((12)), np.ones((1))])
+@pytest.mark.parametrize("data", [np.ones(12), np.ones(1)])
 def test_metadata(wxcode_series):
     """Check that the returned metadata is correct. In this case we expect a
     time coordinate with bounds that describe the full period over which the
@@ -154,9 +164,16 @@ def test_metadata(wxcode_series):
     def as_utc_timestamp(time):
         return timegm(time.utctimetuple())
 
-    result = ModalWeatherCode()(wxcode_series)
+    model_id_attr, wxcode_cubes = wxcode_series
 
-    n_times = len(wxcode_series)
+    if model_id_attr:
+        kwargs = {"model_id_attr": MODEL_ID_ATTR}
+    else:
+        kwargs = {}
+
+    result = ModalWeatherCode(**kwargs)(wxcode_cubes)
+
+    n_times = len(wxcode_cubes)
     expected_time = TARGET_TIME
     expected_bounds = [TARGET_TIME - timedelta(hours=n_times), TARGET_TIME]
     expected_reference_time = TARGET_TIME - timedelta(hours=18)
@@ -180,3 +197,7 @@ def as_utc_timestamp(time):
     )
     assert result.cell_methods[0].method == expected_cell_method[0]
     assert result.cell_methods[0].coord_names[0] == expected_cell_method[1]
+    if model_id_attr:
+        assert result.attributes[MODEL_ID_ATTR] == "uk_det uk_ens"
+    else:
+        assert MODEL_ID_ATTR not in result.attributes.keys()