Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Possible EMOS amendments #11

Open
wants to merge 21 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
0fb1e0b
Modifications to support local standardisation of EMOS coefficients.
gavinevans May 13, 2021
de464dc
Edit to temporarily ignore metadata differences in the forecast perio…
gavinevans May 14, 2021
ef19a53
Initial edits to explore ways of incorporating additional predictors.
gavinevans May 28, 2021
e7a0ee2
Initial rough version that seems to work for global EMOS corrections.
gavinevans Jun 3, 2021
c1fb1e6
Modifications to support point-by-point and using default initial guess.
gavinevans Jun 3, 2021
801cf8d
Modifications to add global standardisation capability to this branch…
gavinevans Jun 7, 2021
36896bd
Adding filtering of truth to ensure a consistent altitude, latitude a…
gavinevans Jun 7, 2021
dcc5fc6
Add computation of a skew-logistic distribution to this branch.
gavinevans Jun 7, 2021
145cd36
Add masking of the forecast when the truth becomes masked by the loca…
gavinevans Jun 8, 2021
f9357f0
Amendments related to the skew logistic distribution to correct the u…
gavinevans Jun 9, 2021
d721233
Fixes to errors in code.
gavinevans Jun 15, 2021
99855ca
Modifications to raise errors in the standardisation, rather than con…
gavinevans Jun 16, 2021
36d41ea
Alterations to support standardising using only the forecast climatol…
gavinevans Jun 16, 2021
cd819c7
Minor updates to support local standardisation using forecasts.
gavinevans Jun 17, 2021
e5f53d3
Corrections following further testing.
gavinevans Jun 29, 2021
de17321
Minor edits following review comments.
gavinevans Jun 29, 2021
4aebecf
Correction to code.
gavinevans Jun 29, 2021
6691653
Further corrections.
gavinevans Jun 29, 2021
149a788
Update to only check the points of the additional predictors fields.
gavinevans Jun 30, 2021
8108f91
Minor edits including fix for when applying coefficients to forecasts…
gavinevans Jul 6, 2021
0ca37f0
Update to provide handling for a NaN scale parameter for the local st…
gavinevans Jul 22, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions bin/improver
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,9 @@ set -eu
export IMPROVER_DIR="$(cd $(dirname $0)/../ && pwd -P)"

# Apply site-specific setup if necessary.
if [[ -f "${IMPROVER_SITE_INIT:=$IMPROVER_DIR/etc/site-init}" ]]; then
. "$IMPROVER_SITE_INIT"
fi
# if [[ -f "${IMPROVER_SITE_INIT:=$IMPROVER_DIR/etc/site-init}" ]]; then
# . "$IMPROVER_SITE_INIT"
# fi

# Put our library and scripts in the paths.
export PATH="$IMPROVER_DIR/bin/:$PATH"
Expand Down
193 changes: 157 additions & 36 deletions improver/calibration/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,11 @@
# POSSIBILITY OF SUCH DAMAGE.
"""init for calibration"""

from collections import OrderedDict
from typing import List, Optional, Tuple

from iris.cube import Cube
from iris.cube import Cube, CubeList
import numpy as np
import scipy

from improver.metadata.probabilistic import (
get_diagnostic_cube_name_from_probability_name,
Expand All @@ -42,8 +43,8 @@


def split_forecasts_and_truth(
cubes: List[Cube], truth_attribute: str
) -> Tuple[Cube, Cube, Optional[Cube]]:
cubes: List[Cube], truth_attribute: str, land_sea_mask_name: bool
) -> Tuple[Cube, Cube, Optional[CubeList], Optional[Cube]]:
"""
A common utility for splitting the various inputs cubes required for
calibration CLIs. These are generally the forecast cubes, historic truths,
Expand Down Expand Up @@ -72,47 +73,167 @@ def split_forecasts_and_truth(
IOError:
Missing truth or historical forecast in input cubes.
"""
grouped_cubes = {}
cubes_dict = {"truth": {}, "land_sea_mask": {}, "other": {}, "historic_forecasts": {}, "additional_fields": {}}
# split non-land_sea_mask cubes on forecast vs truth
truth_key, truth_value = truth_attribute.split("=")
for cube in cubes:
try:
cube_name = get_diagnostic_cube_name_from_probability_name(cube.name())
except ValueError:
cube_name = cube.name()
grouped_cubes.setdefault(cube_name, []).append(cube)
if len(grouped_cubes) == 1:
# Only one group - all forecast/truth cubes
land_sea_mask = None
diag_name = list(grouped_cubes.keys())[0]
elif len(grouped_cubes) == 2:
# Two groups - the one with exactly one cube matching a name should
# be the land_sea_mask, since we require more than 2 cubes in
# the forecast/truth group
grouped_cubes = OrderedDict(
sorted(grouped_cubes.items(), key=lambda kv: len(kv[1]))
)
# landsea name should be the key with the lowest number of cubes (1)
landsea_name, diag_name = list(grouped_cubes.keys())
land_sea_mask = grouped_cubes[landsea_name][0]
if len(grouped_cubes[landsea_name]) != 1:
raise IOError("Expected one cube for land-sea mask.")
else:
raise ValueError("Must have cubes with 1 or 2 distinct names.")

# split non-land_sea_mask cubes on forecast vs truth
truth_key, truth_value = truth_attribute.split("=")
input_cubes = grouped_cubes[diag_name]
grouped_cubes = {"truth": [], "historical forecast": []}
for cube in input_cubes:
if cube.attributes.get(truth_key) == truth_value:
grouped_cubes["truth"].append(cube)
cubes_dict["truth"].setdefault(cube_name, []).append(cube)
elif cube_name == land_sea_mask_name:
cubes_dict["land_sea_mask"].setdefault(cube_name, []).append(cube)
else:
grouped_cubes["historical forecast"].append(cube)
blend_time_list = [c for c in cube.coords() if c.name() == "blend_time"]
if len(blend_time_list):
cube.remove_coord("blend_time")
if cube.coords("forecast_period"):
cube.coord("forecast_period").attributes = {}
if cube.coords("forecast_reference_time"):
cube.coord("forecast_reference_time").attributes = {}
cubes_dict["other"].setdefault(cube_name, []).append(cube)

if len(cubes_dict["truth"]) > 1:
msg = (f"Truth supplied for multiple diagnostics {list(cubes_dict['truth'].keys())}. "
"The truth should only exist for one diagnostic.")
raise ValueError(msg)

if land_sea_mask_name and not cubes_dict["land_sea_mask"]:
raise IOError("Expected one cube for land-sea mask with "
f"the name {land_sea_mask_name}.")

missing_inputs = " and ".join(k for k, v in grouped_cubes.items() if not v)
diag_name = list(cubes_dict["truth"].keys())[0]
cubes_dict["historic_forecasts"] = cubes_dict["other"][diag_name]
for k, v in cubes_dict["other"].items():
if k != diag_name:
cubes_dict["additional_fields"].setdefault(k, []).extend(v)

missing_inputs = " and ".join(k for k, v in cubes_dict.items() if k in ["truth", "historic_forecasts"] and not v)
if missing_inputs:
raise IOError(f"Missing {missing_inputs} input.")

truth = MergeCubes()(grouped_cubes["truth"])
forecast = MergeCubes()(grouped_cubes["historical forecast"])
truth = MergeCubes()(filter_obs(cubes_dict["truth"][diag_name]))
forecast = MergeCubes()(cubes_dict["historic_forecasts"])
additional_fields = CubeList([MergeCubes()(cubes_dict["additional_fields"][k]) for k in cubes_dict["additional_fields"]])
return forecast, truth, additional_fields, cubes_dict["land_sea_mask"]


def filter_obs(spot_truths_cubelist: CubeList) -> CubeList:
"""Deal with observation sites that failed to provide an altitude, latitude
and longitude. As we've ensured that each observation site has an entry
for each timestep, even if no observation was available at that timestep,
some observations will not have an altitude, latitudes or longitudes.
There is also the potential for some sites to report different altitudes,
latitudes and longitudes at different times. For simplicity, the altitude,
latitude and longitude from the first timestep is used as the altitude,
latitude and longitude throughout the input CubeList.

Args:
spot_truths_cubelist:
Cubelist of spot truths

Returns:
CubeList of spot truths with consistent values for the altitude,
latitude and longitude at each timestep.
"""
if not all([c if c.coords("spot_index") else False for c in spot_truths_cubelist]):
return spot_truths_cubelist
altitudes = np.squeeze(
scipy.stats.mode(
np.stack([c.coord("altitude").points for c in spot_truths_cubelist]), axis=0
)[0]
)
latitudes = np.squeeze(
scipy.stats.mode(
np.stack([c.coord(axis="y").points for c in spot_truths_cubelist]), axis=0
)[0]
)
longitudes = np.squeeze(
scipy.stats.mode(
np.stack([c.coord(axis="x").points for c in spot_truths_cubelist]), axis=0
)[0]
)

altitudes = np.nan_to_num(altitudes)
latitudes = np.nan_to_num(latitudes)
longitudes = np.nan_to_num(longitudes)

for index, _ in enumerate(spot_truths_cubelist):

spot_truths_cubelist[index].coord("altitude").points = altitudes
spot_truths_cubelist[index].coord(axis="y").points = latitudes
spot_truths_cubelist[index].coord(axis="x").points = longitudes

return spot_truths_cubelist


def split_forecasts_and_coeffs(
cubes: List[Cube], land_sea_mask_name: bool
) -> Tuple[Cube, Optional[Cube], CubeList, Optional[Cube]]:
"""
A common utility for splitting the various inputs cubes required for
calibration CLIs. These are generally the forecast cubes, historic truths,
and in some instances a land-sea mask is also required.

Args:
cubes:
A list of input cubes which will be split into relevant groups.
These include the historical forecasts, in the format supported by
the calibration CLIs, and the truth cubes.
truth_attribute:
An attribute and its value in the format of "attribute=value",
which must be present on truth cubes.

Returns:
- A cube containing all the historic forecasts.
- A cube containing all the truth data.
- If found within the input cubes list a land-sea mask will be
returned, else None is returned.

Raises:
ValueError:
An unexpected number of distinct cube names were passed in.
IOError:
More than one cube was identified as a land-sea mask.
IOError:
Missing truth or historical forecast in input cubes.
"""

cubes_dict = {"current_forecast": {}, "coefficients": {}, "land_sea_mask": {}, "additional_fields": {}, "other": {}}
# split non-land_sea_mask cubes on forecast vs truth
for cubelist in cubes:
for cube in cubelist:
try:
cube_name = get_diagnostic_cube_name_from_probability_name(cube.name())
except ValueError:
cube_name = cube.name()
if "emos_coefficient" in cube_name or "shape_parameters" == cube_name:
cubes_dict["coefficients"].setdefault(cube_name, []).append(cube)
elif cube_name == land_sea_mask_name:
cubes_dict["land_sea_mask"] = cube
else:
cubes_dict["other"].setdefault(cube_name, []).append(cube)

if land_sea_mask_name and not cubes_dict["land_sea_mask"]:
raise IOError("Expected one cube for land-sea mask with "
f"the name {land_sea_mask_name}.")

diagnostic_standard_name = list(set([v[0].attributes["diagnostic_standard_name"] for v in cubes_dict["coefficients"].values()]))
if len(diagnostic_standard_name) == 1:
diagnostic_standard_name, = diagnostic_standard_name
else:
msg = ("The coefficients cubes are expected to have one consistent "
f"diagnostic_standard_name attribute, rather than {diagnostic_standard_name}")
raise AttributeError(msg)

cubes_dict["current_forecast"], = cubes_dict["other"][diagnostic_standard_name]
for k, v in cubes_dict["other"].items():
if k != diagnostic_standard_name:
cubes_dict["additional_fields"].setdefault(k, []).extend(v)

additional_fields = CubeList([cubes_dict["additional_fields"][k][0] for k in cubes_dict["additional_fields"]])
coefficients = CubeList([cubes_dict["coefficients"][k][0] for k in cubes_dict["coefficients"]])

return forecast, truth, land_sea_mask
return cubes_dict["current_forecast"], additional_fields, coefficients, cubes_dict["land_sea_mask"]
Loading