From ff27ab4264dec57d231d6dcfdaf016e00fa18e7e Mon Sep 17 00:00:00 2001 From: Marco Favorito Date: Thu, 16 Mar 2023 20:04:11 +0100 Subject: [PATCH] TOFIX: add extras mechanism to finer-grained dependency selection REBASE: GPy was temporarily left as-is, it should be removed. Inspired by: https://github.com/bancaditalia/black-it/issues/36#issuecomment-1472327834. This commit includes the extra-dependencies mechanism of setuptools to overcome limitations specific to certain dependencies (e.g. no support for some Python interpreter versions). The changes use the following conventions for extras names: - `[all]`: install all dependencies from all extras - `[X-sampler]`: install all dependencies to make X sampler to work - `[X-loss]`: install all dependencies to make X loss function to work. We do not have yet an example for the last item for the moment; but for "forward-compatibility" of the nomenclature, we leave the -sampler suffix. E.g. for GPy, we could have the extra called gp-sampler, that installs GPy on-demand, and not installed if not needed by the user. This commit also includes a mechanism to handle import errors for the non-installed dependencies for some component. Such mechanism provides a useful message to the user, e.g. it raises an exception with a useful error message pointing out to the missing extra in its local installation of black-it. --- README.md | 21 +++- black_it/_load_dependency.py | 112 +++++++++++++++++++ black_it/samplers/gaussian_process.py | 26 +++++ black_it/samplers/xgboost.py | 25 ++++- pyproject.toml | 5 + tests/test_calibrator.py | 5 + tests/test_samplers/test_gaussian_process.py | 3 + tests/test_samplers/test_xgboost.py | 5 + tests/utils/base.py | 39 ++++++- tox.ini | 4 + 10 files changed, 240 insertions(+), 5 deletions(-) create mode 100644 black_it/_load_dependency.py diff --git a/README.md b/README.md index 9c22e1fb..a911494b 100644 --- a/README.md +++ b/README.md @@ -38,19 +38,34 @@ matter of days, with no need to reimplement all the plumbings from scratch. This project requires Python v3.8 or later. -To install the latest version of the package from [PyPI](https://pypi.org/project/black-it/): +To install the latest version of the package from [PyPI](https://pypi.org/project/black-it/), with all the extra dependencies (recommended): ``` -pip install black-it +pip install "black-it[all]" ``` Or, directly from GitHub: ``` -pip install git+https://github.com/bancaditalia/black-it.git#egg=black-it +pip install git+https://github.com/bancaditalia/black-it.git#egg="black-it[all]" ``` If you'd like to contribute to the package, please read the [CONTRIBUTING.md](./CONTRIBUTING.md) guide. +### Feature-specific Package Dependencies + +We use the [optional dependencies mechanism of `setuptools`](https://setuptools.pypa.io/en/latest/userguide/dependency_management.html#optional-dependencies) +(also called _extras_) to allow users to avoid dependencies for features they don't use. + +For the basic features of the package, you can install the `black-it` package without extras, e.g. `pip install black-it`. +However, for certain components, you will need to install some more extras using the syntax `pip install black-it[extra-1,extra-2,...]`. + +For example, the [Gaussian Process Sampler](https://bancaditalia.github.io/black-it/samplers/#black_it.samplers.gaussian_process.GaussianProcessSampler) +depends on the Python package [`GPy`](https://github.com/SheffieldML/GPy/). +If the Gaussian Process sampler is not needed by your application, you can avoid its installation by just installing `black-it` as explained above. +However, if you need the sampler, you must install `black-it` with the `gp-sampler` extra: `pip install black-it[gp-sampler]`. + +The special extra `all` will install all the dependencies. + ## Quick Example The GitHub repo of Black-it contains a series ready-to-run calibration examples. diff --git a/black_it/_load_dependency.py b/black_it/_load_dependency.py new file mode 100644 index 00000000..2f028f78 --- /dev/null +++ b/black_it/_load_dependency.py @@ -0,0 +1,112 @@ +# Black-box ABM Calibration Kit (Black-it) +# Copyright (C) 2021-2023 Banca d'Italia +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +"""Python module to handle extras dependencies loading and import errors. + +This is a private module of the library. There should be no point in using it directly from client code. +""" +from __future__ import annotations + +import sys + +# known extras and their dependencies +_GPY_PACKAGE_NAME = "GPy" +_GP_SAMPLER_EXTRA_NAME = "gp-sampler" + +_XGBOOST_PACKAGE_NAME = "xgboost" +_XGBOOST_SAMPLER_EXTRA_NAME = "xgboost-sampler" + + +class DependencyNotInstalledError(Exception): + """Library exception for when a required dependency is not installed.""" + + def __init__(self, component_name: str, package_name: str, extra_name: str) -> None: + """Initialize the exception object.""" + message = ( + f"Cannot import package '{package_name}', required by component {component_name}. " + f"To solve the issue, you can install the extra '{extra_name}': pip install black-it[{extra_name}]" + ) + super().__init__(message) + + +class GPyNotSupportedOnPy311Error(Exception): + """Specific exception class for import error of GPy on Python 3.11.""" + + __ERROR_MSG = ( + f"The GaussianProcessSampler depends on '{_GPY_PACKAGE_NAME}', which is not supported on Python 3.11; " + f"see https://github.com/bancaditalia/black-it/issues/36" + ) + + def __init__(self) -> None: + """Initialize the exception object.""" + super().__init__(self.__ERROR_MSG) + + +def _check_import_error_else_raise_exception( + import_error: ImportError | None, + component_name: str, + package_name: str, + black_it_extra_name: str, +) -> None: + """Check an import error; raise the DependencyNotInstalledError exception with a useful message. + + Args: + import_error: the ImportError object generated by the failed attempt. If None, then no error occurred. + component_name: the component for which the dependency is needed + package_name: the Python package name of the dependency + black_it_extra_name: the name of the black-it extra to install to solve the issue. + """ + if import_error is None: + # nothing to do. + return + + # an import error happened; we need to raise error to the caller + raise DependencyNotInstalledError(component_name, package_name, black_it_extra_name) + + +def _check_gpy_import_error_else_raise_exception( + import_error: ImportError | None, + component_name: str, + package_name: str, + black_it_extra_name: str, +) -> None: + """Check GPy import error and if an error occurred, raise erorr with a useful error message. + + We need to handle two cases: + + - the user is using Python 3.11: the GPy package cannot be installed there; + see https://github.com/SheffieldML/GPy/issues/998 + - the user did not install the 'gp-sampler' extra. + + Args: + import_error: the ImportError object generated by the failed attempt. If None, then no error occurred. + component_name: the component for which the dependency is needed + package_name: the Python package name of the dependency + black_it_extra_name: the name of the black-it extra to install to solve the issue. + """ + if import_error is None: + # nothing to do. + return + + if sys.version_info == (3, 11): + raise GPyNotSupportedOnPy311Error + + _check_import_error_else_raise_exception( + import_error, + component_name, + package_name, + black_it_extra_name, + ) diff --git a/black_it/samplers/gaussian_process.py b/black_it/samplers/gaussian_process.py index c40a731e..f3942e62 100644 --- a/black_it/samplers/gaussian_process.py +++ b/black_it/samplers/gaussian_process.py @@ -26,11 +26,25 @@ from scipy.special import erfc # type: ignore[import] # pylint: disable=no-name-in-module from sklearn.gaussian_process import GaussianProcessRegressor, kernels # type: ignore[import] +from black_it._load_dependency import ( + _GP_SAMPLER_EXTRA_NAME, + _GPY_PACKAGE_NAME, + _check_import_error_else_raise_exception, +) from black_it.samplers.surrogate import MLSurrogateSampler if TYPE_CHECKING: from numpy.typing import NDArray +_GPY_IMPORT_ERROR: ImportError | None +try: + import GPy # noqa: F401 + from GPy.models import GPRegression # noqa: F401 +except ImportError as e: + _GPY_IMPORT_ERROR = e +else: + _GPY_IMPORT_ERROR = None + _BIG_DATASET_SIZE_WARNING_THRESHOLD = 500 _SMALL_VARIANCE_VALUES = 1e-5 @@ -76,6 +90,8 @@ def __init__( # noqa: PLR0913 acquisition: type of acquisition function, it can be 'expected_improvement' of simply 'mean' jitter: positive value to make the "expected_improvement" acquisition more explorative. """ + self.__check_gpy_import_error() + self._validate_acquisition(acquisition) super().__init__( @@ -90,6 +106,16 @@ def __init__( # noqa: PLR0913 self._gpmodel: GaussianProcessRegressor | None = None self._fmin: np.double | float | None = None + @classmethod + def __check_gpy_import_error(cls) -> None: + """Check if an import error happened while attempting to import the 'GPy' package.""" + _check_import_error_else_raise_exception( + _GPY_IMPORT_ERROR, + cls.__name__, + _GPY_PACKAGE_NAME, + _GP_SAMPLER_EXTRA_NAME, + ) + @staticmethod def _validate_acquisition(acquisition: str) -> None: """Check that the required acquisition is among the supported ones. diff --git a/black_it/samplers/xgboost.py b/black_it/samplers/xgboost.py index ffb70aa2..808949ac 100644 --- a/black_it/samplers/xgboost.py +++ b/black_it/samplers/xgboost.py @@ -21,8 +21,12 @@ from typing import TYPE_CHECKING, cast import numpy as np -import xgboost as xgb +from black_it._load_dependency import ( + _XGBOOST_PACKAGE_NAME, + _XGBOOST_SAMPLER_EXTRA_NAME, + _check_import_error_else_raise_exception, +) from black_it.samplers.surrogate import MLSurrogateSampler if TYPE_CHECKING: @@ -32,6 +36,14 @@ MIN_FLOAT32 = np.finfo(np.float32).min EPS_FLOAT32 = np.finfo(np.float32).eps +_XGBOOST_IMPORT_ERROR: ImportError | None +try: + import xgboost as xgb +except ImportError as e: + _XGBOOST_IMPORT_ERROR = e +else: + _XGBOOST_IMPORT_ERROR = None + class XGBoostSampler(MLSurrogateSampler): """This class implements xgboost sampling.""" @@ -67,6 +79,7 @@ def __init__( # noqa: PLR0913 References: Lamperti, Roventini, and Sani, "Agent-based model calibration using machine learning surrogates" """ + self.__check_xgboost_import_error() super().__init__( batch_size, random_state, @@ -81,6 +94,16 @@ def __init__( # noqa: PLR0913 self._n_estimators = n_estimators self._xg_regressor: xgb.XGBRegressor | None = None + @classmethod + def __check_xgboost_import_error(cls) -> None: + """Check if an import error happened while attempting to import the 'xgboost' package.""" + _check_import_error_else_raise_exception( + _XGBOOST_IMPORT_ERROR, + cls.__name__, + _XGBOOST_PACKAGE_NAME, + _XGBOOST_SAMPLER_EXTRA_NAME, + ) + @property def colsample_bytree(self) -> float: """Get the colsample_bytree parameter.""" diff --git a/pyproject.toml b/pyproject.toml index b1ba9220..ca3f413b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -87,6 +87,11 @@ tox = "^4.4.12" twine = "^4.0.2" vulture = "^2.9.1" +[project.optional-dependencies] +gp-sampler = ["GPy~=1.10.0"] +xgboost-sampler = ["xgboost~=1.7.2"] +all = ["GPy~=1.10.0", "xgboost~=1.7.2"] + [build-system] requires = ["poetry-core>=1.0.0"] build-backend = "poetry.core.masonry.api" diff --git a/tests/test_calibrator.py b/tests/test_calibrator.py index e812bb53..620977a2 100644 --- a/tests/test_calibrator.py +++ b/tests/test_calibrator.py @@ -39,7 +39,12 @@ from black_it.utils.seedable import BaseSeedable from examples.models.simple_models import NormalMV +from .utils.base import no_gpy_installed, no_python311_for_gpy, no_xgboost_installed + +@no_python311_for_gpy +@no_gpy_installed +@no_xgboost_installed class TestCalibrate: """Test the Calibrator.calibrate method.""" diff --git a/tests/test_samplers/test_gaussian_process.py b/tests/test_samplers/test_gaussian_process.py index 3f2a8d3a..65f4d7f4 100644 --- a/tests/test_samplers/test_gaussian_process.py +++ b/tests/test_samplers/test_gaussian_process.py @@ -24,6 +24,9 @@ from black_it.samplers.gaussian_process import GaussianProcessSampler, _AcquisitionTypes from black_it.search_space import SearchSpace +from tests.utils.base import no_gpy_installed, no_python311_for_gpy + +pytestmark = [no_python311_for_gpy, no_gpy_installed] class TestGaussianProcess2D: diff --git a/tests/test_samplers/test_xgboost.py b/tests/test_samplers/test_xgboost.py index 5a7a3c4c..a05bf33c 100644 --- a/tests/test_samplers/test_xgboost.py +++ b/tests/test_samplers/test_xgboost.py @@ -24,6 +24,10 @@ from black_it.samplers.xgboost import XGBoostSampler from black_it.search_space import SearchSpace from examples.models.economics.brock_hommes import BH4 +from tests.utils.base import no_xgboost_installed + +pytestmark = no_xgboost_installed + # This is a temporary workaround to make tests to run also on Windows and Mac. # See: https://github.com/bancaditalia/black-it/issues/49 @@ -34,6 +38,7 @@ else: expected_params = np.array([[0.24, 0.26], [0.37, 0.21], [0.43, 0.14], [0.11, 0.04]]) + MAX_FLOAT32 = np.finfo(np.float32).max MIN_FLOAT32 = np.finfo(np.float32).min EPS_FLOAT32 = np.finfo(np.float32).eps diff --git a/tests/utils/base.py b/tests/utils/base.py index 48dd9686..0dde1fac 100644 --- a/tests/utils/base.py +++ b/tests/utils/base.py @@ -18,17 +18,24 @@ from __future__ import annotations import dataclasses +import importlib import shutil import signal import subprocess # nosec B404 import sys from functools import wraps -from typing import Callable +from typing import TYPE_CHECKING, Callable import pytest +from black_it._load_dependency import _GPY_PACKAGE_NAME, _XGBOOST_PACKAGE_NAME from tests.conftest import DEFAULT_SUBPROCESS_TIMEOUT +if TYPE_CHECKING: + import types + + from _pytest.mark.structures import MarkDecorator + @dataclasses.dataclass(frozen=True) class PopenResult: @@ -179,3 +186,33 @@ def wrapper( # type: ignore[no-untyped-def] # noqa: ANN202 sys.platform == "win32", reason="This test cannot be ran on Windows", ) + + +def try_import_else_none(module_name: str) -> types.ModuleType | None: + """Try to import a module; if it fails, return None.""" + try: + return importlib.import_module(module_name) + except ImportError: + return None + + +def try_import_else_skip( + package_name: str, + **skipif_kwargs, # noqa: ANN003 +) -> MarkDecorator: + """Try to import the package; else skip the test(s).""" + return pytest.mark.skipif( + try_import_else_none(package_name) is None, + reason=f"Cannot run the test because the package '{package_name}' is not installed", + **skipif_kwargs, + ) + + +no_python311_for_gpy = pytest.mark.skipif( + (3, 11) <= sys.version_info < (3, 12), + reason="GPy not supported on Python 3.11, see: https://github.com/bancaditalia/black-it/issues/36", +) + + +no_gpy_installed = try_import_else_skip(_GPY_PACKAGE_NAME) +no_xgboost_installed = try_import_else_skip(_XGBOOST_PACKAGE_NAME) diff --git a/tox.ini b/tox.ini index 7af7965f..01252057 100644 --- a/tox.ini +++ b/tox.ini @@ -11,6 +11,10 @@ basepython = python3 [testenv] setenv = PYTHONPATH = {toxinidir} +extras = + all + gp-sampler + xgboost-sampler deps = pytest==7.4.2 pytest-cov==4.1.0