Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add extras mechanism for finer-grained dependency selection #45

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 18 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,19 +38,34 @@ matter of days, with no need to reimplement all the plumbings from scratch.

This project requires Python v3.8 or later.

To install the latest version of the package from [PyPI](https://pypi.org/project/black-it/):
To install the latest version of the package from [PyPI](https://pypi.org/project/black-it/), with all the extra dependencies (recommended):
```
pip install black-it
pip install "black-it[all]"
```

Or, directly from GitHub:

```
pip install git+https://github.com/bancaditalia/black-it.git#egg=black-it
pip install git+https://github.com/bancaditalia/black-it.git#egg="black-it[all]"
```

If you'd like to contribute to the package, please read the [CONTRIBUTING.md](./CONTRIBUTING.md) guide.

### Feature-specific Package Dependencies

We use the [optional dependencies mechanism of `setuptools`](https://setuptools.pypa.io/en/latest/userguide/dependency_management.html#optional-dependencies)
(also called _extras_) to allow users to avoid dependencies for features they don't use.

For the basic features of the package, you can install the `black-it` package without extras, e.g. `pip install black-it`.
However, for certain components, you will need to install some more extras using the syntax `pip install black-it[extra-1,extra-2,...]`.

For example, the [Gaussian Process Sampler](https://bancaditalia.github.io/black-it/samplers/#black_it.samplers.gaussian_process.GaussianProcessSampler)
depends on the Python package [`GPy`](https://github.com/SheffieldML/GPy/).
If the Gaussian Process sampler is not needed by your application, you can avoid its installation by just installing `black-it` as explained above.
However, if you need the sampler, you must install `black-it` with the `gp-sampler` extra: `pip install black-it[gp-sampler]`.

The special extra `all` will install all the dependencies.

## Quick Example

The GitHub repo of Black-it contains a series ready-to-run calibration examples.
Expand Down
112 changes: 112 additions & 0 deletions black_it/_load_dependency.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
# Black-box ABM Calibration Kit (Black-it)
# Copyright (C) 2021-2023 Banca d'Italia
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

"""Python module to handle extras dependencies loading and import errors.

This is a private module of the library. There should be no point in using it directly from client code.
"""
from __future__ import annotations

import sys

# known extras and their dependencies
_GPY_PACKAGE_NAME = "GPy"
_GP_SAMPLER_EXTRA_NAME = "gp-sampler"

_XGBOOST_PACKAGE_NAME = "xgboost"
_XGBOOST_SAMPLER_EXTRA_NAME = "xgboost-sampler"


class DependencyNotInstalledError(Exception):
"""Library exception for when a required dependency is not installed."""

def __init__(self, component_name: str, package_name: str, extra_name: str) -> None:
"""Initialize the exception object."""
message = (
f"Cannot import package '{package_name}', required by component {component_name}. "
f"To solve the issue, you can install the extra '{extra_name}': pip install black-it[{extra_name}]"
)
super().__init__(message)


class GPyNotSupportedOnPy311Error(Exception):
"""Specific exception class for import error of GPy on Python 3.11."""

__ERROR_MSG = (
f"The GaussianProcessSampler depends on '{_GPY_PACKAGE_NAME}', which is not supported on Python 3.11; "
f"see https://github.com/bancaditalia/black-it/issues/36"
)

def __init__(self) -> None:
"""Initialize the exception object."""
super().__init__(self.__ERROR_MSG)


def _check_import_error_else_raise_exception(
import_error: ImportError | None,
component_name: str,
package_name: str,
black_it_extra_name: str,
) -> None:
"""Check an import error; raise the DependencyNotInstalledError exception with a useful message.

Args:
import_error: the ImportError object generated by the failed attempt. If None, then no error occurred.
component_name: the component for which the dependency is needed
package_name: the Python package name of the dependency
black_it_extra_name: the name of the black-it extra to install to solve the issue.
"""
if import_error is None:
# nothing to do.
return

# an import error happened; we need to raise error to the caller
raise DependencyNotInstalledError(component_name, package_name, black_it_extra_name)


def _check_gpy_import_error_else_raise_exception(
import_error: ImportError | None,
component_name: str,
package_name: str,
black_it_extra_name: str,
) -> None:
"""Check GPy import error and if an error occurred, raise erorr with a useful error message.

We need to handle two cases:

- the user is using Python 3.11: the GPy package cannot be installed there;
see https://github.com/SheffieldML/GPy/issues/998
- the user did not install the 'gp-sampler' extra.

Args:
import_error: the ImportError object generated by the failed attempt. If None, then no error occurred.
component_name: the component for which the dependency is needed
package_name: the Python package name of the dependency
black_it_extra_name: the name of the black-it extra to install to solve the issue.
"""
if import_error is None:
# nothing to do.
return

if sys.version_info == (3, 11):
raise GPyNotSupportedOnPy311Error

_check_import_error_else_raise_exception(
import_error,
component_name,
package_name,
black_it_extra_name,
)
26 changes: 26 additions & 0 deletions black_it/samplers/gaussian_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,25 @@
from scipy.special import erfc # type: ignore[import] # pylint: disable=no-name-in-module
from sklearn.gaussian_process import GaussianProcessRegressor, kernels # type: ignore[import]

from black_it._load_dependency import (
_GP_SAMPLER_EXTRA_NAME,
_GPY_PACKAGE_NAME,
_check_import_error_else_raise_exception,
)
from black_it.samplers.surrogate import MLSurrogateSampler

if TYPE_CHECKING:
from numpy.typing import NDArray

_GPY_IMPORT_ERROR: ImportError | None
try:
import GPy # noqa: F401
from GPy.models import GPRegression # noqa: F401
except ImportError as e:
_GPY_IMPORT_ERROR = e
else:
_GPY_IMPORT_ERROR = None


_BIG_DATASET_SIZE_WARNING_THRESHOLD = 500
_SMALL_VARIANCE_VALUES = 1e-5
Expand Down Expand Up @@ -76,6 +90,8 @@ def __init__( # noqa: PLR0913
acquisition: type of acquisition function, it can be 'expected_improvement' of simply 'mean'
jitter: positive value to make the "expected_improvement" acquisition more explorative.
"""
self.__check_gpy_import_error()

self._validate_acquisition(acquisition)

super().__init__(
Expand All @@ -90,6 +106,16 @@ def __init__( # noqa: PLR0913
self._gpmodel: GaussianProcessRegressor | None = None
self._fmin: np.double | float | None = None

@classmethod
def __check_gpy_import_error(cls) -> None:
"""Check if an import error happened while attempting to import the 'GPy' package."""
_check_import_error_else_raise_exception(
_GPY_IMPORT_ERROR,
cls.__name__,
_GPY_PACKAGE_NAME,
_GP_SAMPLER_EXTRA_NAME,
)

@staticmethod
def _validate_acquisition(acquisition: str) -> None:
"""Check that the required acquisition is among the supported ones.
Expand Down
25 changes: 24 additions & 1 deletion black_it/samplers/xgboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,12 @@
from typing import TYPE_CHECKING, cast

import numpy as np
import xgboost as xgb

from black_it._load_dependency import (
_XGBOOST_PACKAGE_NAME,
_XGBOOST_SAMPLER_EXTRA_NAME,
_check_import_error_else_raise_exception,
)
from black_it.samplers.surrogate import MLSurrogateSampler

if TYPE_CHECKING:
Expand All @@ -32,6 +36,14 @@
MIN_FLOAT32 = np.finfo(np.float32).min
EPS_FLOAT32 = np.finfo(np.float32).eps

_XGBOOST_IMPORT_ERROR: ImportError | None
try:
import xgboost as xgb
except ImportError as e:
_XGBOOST_IMPORT_ERROR = e
else:
_XGBOOST_IMPORT_ERROR = None


class XGBoostSampler(MLSurrogateSampler):
"""This class implements xgboost sampling."""
Expand Down Expand Up @@ -67,6 +79,7 @@ def __init__( # noqa: PLR0913
References:
Lamperti, Roventini, and Sani, "Agent-based model calibration using machine learning surrogates"
"""
self.__check_xgboost_import_error()
super().__init__(
batch_size,
random_state,
Expand All @@ -81,6 +94,16 @@ def __init__( # noqa: PLR0913
self._n_estimators = n_estimators
self._xg_regressor: xgb.XGBRegressor | None = None

@classmethod
def __check_xgboost_import_error(cls) -> None:
"""Check if an import error happened while attempting to import the 'xgboost' package."""
_check_import_error_else_raise_exception(
_XGBOOST_IMPORT_ERROR,
cls.__name__,
_XGBOOST_PACKAGE_NAME,
_XGBOOST_SAMPLER_EXTRA_NAME,
)

@property
def colsample_bytree(self) -> float:
"""Get the colsample_bytree parameter."""
Expand Down
5 changes: 5 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,11 @@ tox = "^4.4.12"
twine = "^4.0.2"
vulture = "^2.9.1"

[project.optional-dependencies]
gp-sampler = ["GPy~=1.10.0"]
xgboost-sampler = ["xgboost~=1.7.2"]
all = ["GPy~=1.10.0", "xgboost~=1.7.2"]

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
Expand Down
5 changes: 5 additions & 0 deletions tests/test_calibrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,12 @@
from black_it.utils.seedable import BaseSeedable
from examples.models.simple_models import NormalMV

from .utils.base import no_gpy_installed, no_python311_for_gpy, no_xgboost_installed


@no_python311_for_gpy
@no_gpy_installed
@no_xgboost_installed
class TestCalibrate:
"""Test the Calibrator.calibrate method."""

Expand Down
3 changes: 3 additions & 0 deletions tests/test_samplers/test_gaussian_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@

from black_it.samplers.gaussian_process import GaussianProcessSampler, _AcquisitionTypes
from black_it.search_space import SearchSpace
from tests.utils.base import no_gpy_installed, no_python311_for_gpy

pytestmark = [no_python311_for_gpy, no_gpy_installed]


class TestGaussianProcess2D:
Expand Down
5 changes: 5 additions & 0 deletions tests/test_samplers/test_xgboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@
from black_it.samplers.xgboost import XGBoostSampler
from black_it.search_space import SearchSpace
from examples.models.economics.brock_hommes import BH4
from tests.utils.base import no_xgboost_installed

pytestmark = no_xgboost_installed


# This is a temporary workaround to make tests to run also on Windows and Mac.
# See: https://github.com/bancaditalia/black-it/issues/49
Expand All @@ -34,6 +38,7 @@
else:
expected_params = np.array([[0.24, 0.26], [0.37, 0.21], [0.43, 0.14], [0.11, 0.04]])


MAX_FLOAT32 = np.finfo(np.float32).max
MIN_FLOAT32 = np.finfo(np.float32).min
EPS_FLOAT32 = np.finfo(np.float32).eps
Expand Down
39 changes: 38 additions & 1 deletion tests/utils/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,24 @@
from __future__ import annotations

import dataclasses
import importlib
import shutil
import signal
import subprocess # nosec B404
import sys
from functools import wraps
from typing import Callable
from typing import TYPE_CHECKING, Callable

import pytest

from black_it._load_dependency import _GPY_PACKAGE_NAME, _XGBOOST_PACKAGE_NAME
from tests.conftest import DEFAULT_SUBPROCESS_TIMEOUT

if TYPE_CHECKING:
import types

from _pytest.mark.structures import MarkDecorator


@dataclasses.dataclass(frozen=True)
class PopenResult:
Expand Down Expand Up @@ -179,3 +186,33 @@ def wrapper( # type: ignore[no-untyped-def] # noqa: ANN202
sys.platform == "win32",
reason="This test cannot be ran on Windows",
)


def try_import_else_none(module_name: str) -> types.ModuleType | None:
"""Try to import a module; if it fails, return None."""
try:
return importlib.import_module(module_name)
except ImportError:
return None


def try_import_else_skip(
package_name: str,
**skipif_kwargs, # noqa: ANN003
) -> MarkDecorator:
"""Try to import the package; else skip the test(s)."""
return pytest.mark.skipif(
try_import_else_none(package_name) is None,
reason=f"Cannot run the test because the package '{package_name}' is not installed",
**skipif_kwargs,
)


no_python311_for_gpy = pytest.mark.skipif(
(3, 11) <= sys.version_info < (3, 12),
reason="GPy not supported on Python 3.11, see: https://github.com/bancaditalia/black-it/issues/36",
)


no_gpy_installed = try_import_else_skip(_GPY_PACKAGE_NAME)
no_xgboost_installed = try_import_else_skip(_XGBOOST_PACKAGE_NAME)
1 change: 1 addition & 0 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ basepython = python3
[testenv]
setenv =
PYTHONPATH = {toxinidir}
extras = all
deps =
pytest==7.4.2
pytest-cov==4.1.0
Expand Down
Loading