From 86643ec6ad1966b72cb3a0c8b91025ec8c218d32 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Wed, 23 Oct 2024 09:37:47 +0200 Subject: [PATCH] Refactor priors Introduce `Distribution` classes for handling prior distributions and sampling from them. Later on this can be extended to noise models for measurements. --- doc/example.rst | 1 + doc/example/distributions.ipynb | 197 +++++++++++++++ doc/modules.rst | 1 + petab/v1/distributions.py | 435 ++++++++++++++++++++++++++++++++ petab/v1/parameters.py | 3 +- petab/v1/sampling.py | 107 ++------ tests/v1/test_distributions.py | 43 ++++ tests/v1/test_priors.py | 46 ++-- 8 files changed, 725 insertions(+), 108 deletions(-) create mode 100644 doc/example/distributions.ipynb create mode 100644 petab/v1/distributions.py create mode 100644 tests/v1/test_distributions.py diff --git a/doc/example.rst b/doc/example.rst index 6fe6dab5..dfe54fb3 100644 --- a/doc/example.rst +++ b/doc/example.rst @@ -10,6 +10,7 @@ The following examples should help to get a better idea of how to use the PEtab example/example_petablint.ipynb example/example_visualization.ipynb + example/distributions.ipynb Examples of systems biology parameter estimation problems specified in PEtab can be found in the `systems biology benchmark model collection `_. diff --git a/doc/example/distributions.ipynb b/doc/example/distributions.ipynb new file mode 100644 index 00000000..8689fcff --- /dev/null +++ b/doc/example/distributions.ipynb @@ -0,0 +1,197 @@ +{ + "cells": [ + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "# Prior distributions in PEtab\n", + "\n", + "This notebook gives a brief overview of the prior distributions in PEtab and how they are represented in the PEtab library.\n", + "\n", + "Prior distributions are used to specify the prior knowledge about the parameters.\n", + "Parameter priors are specified in the parameter table. A prior is defined by its type and its parameters.\n", + "Each prior type has a specific set of parameters. For example, the normal distribution has two parameters: the mean and the standard deviation.\n", + "\n", + "There are two types of priors in PEtab - objective priors and initialization priors:\n", + "\n", + "* *Objective priors* are used to specify the prior knowledge about the parameters that are to be estimated. They will enter the objective function of the optimization problem. They are specified in the `objectivePriorType` and `objectivePriorParameters` columns of the parameter table.\n", + "* *Initialization priors* can be used as a hint for the optimization algorithm. They will not enter the objective function. They are specified in the `initializationPriorType` and `initializationPriorParameters` columns of the parameter table.\n", + "\n", + "\n" + ], + "id": "372289411a2aa7b3" + }, + { + "metadata": { + "collapsed": true + }, + "cell_type": "code", + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import seaborn as sns\n", + "\n", + "from petab.v1.C import *\n", + "from petab.v1.distributions import *\n", + "\n", + "sns.set_style(None)\n", + "\n", + "\n", + "def plot(distr: Distribution, ax=None):\n", + " \"\"\"Visualize a distribution.\"\"\"\n", + " if ax is None:\n", + " fig, ax = plt.subplots()\n", + "\n", + " sample = distr.sample(10000)\n", + "\n", + " # pdf\n", + " xmin = min(sample.min(), distr.lb_scaled if distr.bounds is not None else sample.min())\n", + " xmax = max(sample.max(), distr.ub_scaled if distr.bounds is not None else sample.max())\n", + " x = np.linspace(xmin, xmax, 500)\n", + " y = distr.pdf(x)\n", + " ax.plot(x, y, color='red', label='pdf')\n", + "\n", + " sns.histplot(sample, stat='density', ax=ax, label=\"sample\")\n", + "\n", + " # bounds\n", + " if distr.bounds is not None:\n", + " for bound in (distr.lb_scaled, distr.ub_scaled):\n", + " if bound is not None and np.isfinite(bound):\n", + " ax.axvline(bound, color='black', linestyle='--', label='bound')\n", + "\n", + " ax.set_title(str(distr))\n", + " ax.set_xlabel('Parameter value on the parameter scale')\n", + " ax.grid(False)\n", + " handles, labels = ax.get_legend_handles_labels()\n", + " unique_labels = dict(zip(labels, handles))\n", + " ax.legend(unique_labels.values(), unique_labels.keys())\n", + " plt.show()" + ], + "id": "initial_id", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "The basic distributions are the uniform, normal, Laplace, log-normal, and log-laplace distributions:\n", + "id": "db36a4a93622ccb8" + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "plot(Uniform(0, 1))\n", + "plot(Normal(0, 1))\n", + "plot(Laplace(0, 1))\n", + "plot(LogNormal(0, 1))\n", + "plot(LogLaplace(1, 0.5))" + ], + "id": "4f09e50a3db06d9f", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "If a parameter scale is specified, the sample is transformed accordingly (but not the distribution parameters):\n", + "id": "dab4b2d1e0f312d8" + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "plot(Normal(10, 2, transformation=LIN))\n", + "plot(Normal(10, 2, transformation=LOG))\n", + "# Note that the log-normal distribution is different from a log-transformed normal distribution:\n", + "plot(LogNormal(10, 2, transformation=LIN))" + ], + "id": "f6192c226f179ef9", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "Prior distributions can also be defined on the parameter scale by using the types `parameterScaleUniform`, `parameterScaleNormal` or `parameterScaleLaplace`. In these cases, a sample from the given distribution is used directly, without applying any transformation according to `parameterScale` (this implies, that for `parameterScale=lin`, there is no difference between `parameterScaleUniform` and `uniform`):", + "id": "263c9fd31156a4d5" + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "plot(Uniform(0, 1, transformation=LOG10))\n", + "plot(ParameterScaleUniform(0, 1, transformation=LOG10))\n", + "\n", + "plot(Uniform(0, 1, transformation=LIN))\n", + "plot(ParameterScaleUniform(0, 1, transformation=LIN))\n" + ], + "id": "5ca940bc24312fc6", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "To prevent the sampled parameters from exceeding the bounds, the sampled parameters are clipped to the bounds. The bounds are defined in the parameter table. Note that the current implementation does not support sampling from a truncated distribution. Instead, the samples are clipped to the bounds. This may introduce unwanted bias, and thus, should only be used with caution (i.e., the bounds should be chosen wide enough):", + "id": "b1a8b17d765db826" + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "plot(Normal(0, 1, bounds=(-4, 4))) # negligible clipping-bias at 4 sigma\n", + "plot(Uniform(0, 1, bounds=(0.1, 0.9))) # significant clipping-bias" + ], + "id": "4ac42b1eed759bdd", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "Further distribution examples:", + "id": "45ffce1341483f24" + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "plot(Normal(10, 1, bounds=(6, 14), transformation=\"log10\"))\n", + "plot(ParameterScaleNormal(10, 1, bounds=(10**6, 10**14), transformation=\"log10\"))\n" + ], + "id": "581e1ac431860419", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "code", + "source": "", + "id": "802a64be56a6c94f", + "outputs": [], + "execution_count": null + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/modules.rst b/doc/modules.rst index 87a9559d..e933c06f 100644 --- a/doc/modules.rst +++ b/doc/modules.rst @@ -14,6 +14,7 @@ API Reference petab.v1.composite_problem petab.v1.conditions petab.v1.core + petab.v1.distributions petab.v1.lint petab.v1.measurements petab.v1.models diff --git a/petab/v1/distributions.py b/petab/v1/distributions.py new file mode 100644 index 00000000..bd221b3c --- /dev/null +++ b/petab/v1/distributions.py @@ -0,0 +1,435 @@ +"""Probability distributions used by PEtab.""" +from __future__ import annotations + +import abc +from typing import Literal + +import numpy as np +import pandas as pd +from scipy.stats import laplace, lognorm, norm, uniform + +from . import C +from .parameters import scale, unscale + +__all__ = [ + "Distribution", + "Normal", + "LogNormal", + "Uniform", + "Laplace", + "LogLaplace", + "ParameterScaleNormal", + "ParameterScaleUniform", + "ParameterScaleLaplace", +] + + +class Distribution(abc.ABC): + """A univariate probability distribution. + + :param type_: The type of the distribution. + :param transformation: The transformation to be applied to the sample. + Ignored if `parameter_scale` is `True`. + :param parameters: The parameters of the distribution (unaffected by + `parameter_scale` and `transformation`). + :param bounds: The untransformed bounds of the sample (lower, upper). + :param parameter_scale: Whether the parameters are already on the correct + scale. If `False`, the parameters are transformed to the correct scale. + If `True`, the parameters are assumed to be on the correct scale and + no transformation is applied. + :param transformation: The transformation of the distribution. + + """ + + _type_to_cls: dict[str, type[Distribution]] = {} + + def __init__( + self, + type_: str, + transformation: str, + parameters: tuple, + bounds: tuple = None, + parameter_scale: bool = False, + ): + if type_ not in self._type_to_cls: + raise ValueError(f"Unknown distribution type: {type_}") + + if len(parameters) != 2: + raise ValueError( + f"Expected two parameters, got {len(parameters)}: {parameters}" + ) + + if bounds is not None and len(bounds) != 2: + raise ValueError( + f"Expected two bounds, got {len(bounds)}: {bounds}" + ) + + self.type = type_ + self.parameters = parameters + self.bounds = bounds + self.transformation = transformation + self.parameter_scale = parameter_scale + + def __repr__(self): + return ( + f"{self.__class__.__name__}(" + f"{self.parameters[0]!r}, {self.parameters[1]!r}," + f" bounds={self.bounds!r}, transformation={self.transformation!r}" + ")" + ) + + @abc.abstractmethod + def sample(self, shape=None) -> np.ndarray: + """Sample from the distribution. + + :param shape: The shape of the sample. + :return: A sample from the distribution. + """ + ... + + def _scale_sample(self, sample): + """Scale the sample to the parameter space""" + if self.parameter_scale: + return sample + + return scale(sample, self.transformation) + + def _clip_to_bounds(self, x): + """Clip `x` values to bounds. + + :param x: The values to clip. Assumed to be on the parameter scale. + """ + # TODO: replace this by proper truncation + if self.bounds is None: + return x + + return np.maximum( + np.minimum(self.ub_scaled, x), + self.lb_scaled, + ) + + @property + def lb_scaled(self): + """The lower bound on the parameter scale.""" + return scale(self.bounds[0], self.transformation) + + @property + def ub_scaled(self): + """The upper bound on the parameter scale.""" + return scale(self.bounds[1], self.transformation) + + @abc.abstractmethod + def _pdf(self, x): + """Probability density function at x. + + :param x: The value at which to evaluate the PDF. + ``x`` is assumed to be on the linear scale. + :return: The value of the PDF at ``x``. + """ + ... + + def pdf(self, x): + """Probability density function at x. + + :param x: The value at which to evaluate the PDF. + ``x`` is assumed to be on the parameter scale. + :return: The value of the PDF at ``x``. Note that the PDF does + currently not account for the clipping at the bounds. + """ + x = x if self.parameter_scale else unscale(x, self.transformation) + + # scale the PDF to the parameter scale + if self.parameter_scale or self.transformation == C.LIN: + coeff = 1 + elif self.transformation == C.LOG10: + coeff = x * np.log(10) + elif self.transformation == C.LOG: + coeff = x + else: + raise ValueError(f"Unknown transformation: {self.transformation}") + + return self._pdf(x) * coeff + + def neglogprior(self, x): + """Negative log-prior at x. + + :param x: The value at which to evaluate the negative log-prior. + ``x`` is assumed to be on the parameter scale. + :return: The negative log-prior at ``x``. + """ + return -np.log(self.pdf(x)) + + @staticmethod + def from_par_dict( + d, type_=Literal["initialization", "objective"] + ) -> Distribution: + """Create a distribution from a row of the parameter table. + + :param d: A dictionary representing a row of the parameter table. + :param type_: The type of the distribution. + :return: A distribution object. + """ + dist_type = d.get(f"{type_}PriorType", C.NORMAL) + if not isinstance(dist_type, str) and np.isnan(dist_type): + dist_type = C.PARAMETER_SCALE_UNIFORM + cls = Distribution._type_to_cls[dist_type] + + pscale = d.get(C.PARAMETER_SCALE, C.LIN) + if ( + pd.isna(d[f"{type_}PriorParameters"]) + and dist_type == C.PARAMETER_SCALE_UNIFORM + ): + params = ( + scale(d[C.LOWER_BOUND], pscale), + scale(d[C.UPPER_BOUND], pscale), + ) + else: + params = tuple( + map( + float, + d[f"{type_}PriorParameters"].split(C.PARAMETER_SEPARATOR), + ) + ) + return cls( + *params, + bounds=(d[C.LOWER_BOUND], d[C.UPPER_BOUND]), + transformation=pscale, + ) + + +class Normal(Distribution): + """A normal distribution.""" + + def __init__( + self, + mean: float, + std: float, + bounds: tuple = None, + transformation: str = C.LIN, + _type=C.NORMAL, + _parameter_scale=False, + ): + super().__init__( + transformation=transformation, + parameters=(mean, std), + bounds=bounds, + parameter_scale=_parameter_scale, + type_=_type, + ) + + def sample(self, shape=None): + sample = np.random.normal( + loc=self.parameters[0], scale=self.parameters[1], size=shape + ) + return self._clip_to_bounds(self._scale_sample(sample)) + + def _pdf(self, x): + return norm.pdf(x, loc=self.parameters[0], scale=self.parameters[1]) + + +class LogNormal(Distribution): + """A log-normal distribution.""" + + def __init__( + self, + mean: float, + std: float, + bounds: tuple = None, + transformation: str = C.LIN, + ): + super().__init__( + C.LOG_NORMAL, + transformation=transformation, + parameters=(mean, std), + bounds=bounds, + parameter_scale=False, + ) + + def sample(self, shape=None): + sample = np.random.lognormal( + mean=self.parameters[0], sigma=self.parameters[1], size=shape + ) + return self._clip_to_bounds(self._scale_sample(sample)) + + def _pdf(self, x): + return lognorm.pdf( + x, scale=np.exp(self.parameters[0]), s=self.parameters[1] + ) + + +class Uniform(Distribution): + """A uniform distribution.""" + + def __init__( + self, + lower_bound: float, + upper_bound: float, + bounds: tuple = None, + transformation: str = C.LIN, + _type=C.UNIFORM, + _parameter_scale=False, + ): + super().__init__( + _type, + transformation=transformation, + parameters=(lower_bound, upper_bound), + bounds=bounds, + parameter_scale=_parameter_scale, + ) + + def sample(self, shape=None): + sample = np.random.uniform( + low=self.parameters[0], high=self.parameters[1], size=shape + ) + return self._clip_to_bounds(self._scale_sample(sample)) + + def _pdf(self, x): + return uniform.pdf( + x, + loc=self.parameters[0], + scale=self.parameters[1] - self.parameters[0], + ) + + +class Laplace(Distribution): + """A Laplace distribution.""" + + def __init__( + self, + mean: float, + scale: float, + bounds: tuple = None, + transformation: str = C.LIN, + _type=C.LAPLACE, + _parameter_scale=False, + ): + super().__init__( + _type, + transformation=transformation, + parameters=(mean, scale), + bounds=bounds, + parameter_scale=_parameter_scale, + ) + + def sample(self, shape=None): + sample = np.random.laplace( + loc=self.parameters[0], scale=self.parameters[1], size=shape + ) + return self._clip_to_bounds(self._scale_sample(sample)) + + def _pdf(self, x): + return laplace.pdf(x, loc=self.parameters[0], scale=self.parameters[1]) + + +class LogLaplace(Distribution): + """A log-Laplace distribution.""" + + def __init__( + self, + mean: float, + scale: float, + bounds: tuple = None, + transformation: str = C.LIN, + ): + super().__init__( + C.LOG_LAPLACE, + transformation=transformation, + parameters=(mean, scale), + bounds=bounds, + parameter_scale=False, + ) + + @property + def mean(self): + """The mean of the underlying Laplace distribution.""" + return self.parameters[0] + + @property + def scale(self): + """The scale of the underlying Laplace distribution.""" + return self.parameters[1] + + def sample(self, shape=None): + sample = np.exp( + np.random.laplace(loc=self.mean, scale=self.scale, size=shape) + ) + return self._clip_to_bounds(self._scale_sample(sample)) + + def _pdf(self, x): + return ( + 1 + / (2 * self.scale * x) + * np.exp(-np.abs(np.log(x) - self.mean) / self.scale) + ) + + +class ParameterScaleNormal(Normal): + """A normal distribution with parameters on the parameter scale.""" + + def __init__( + self, + mean: float, + std: float, + bounds: tuple = None, + transformation: str = C.LIN, + ): + super().__init__( + transformation=transformation, + bounds=bounds, + mean=mean, + std=std, + _type=C.PARAMETER_SCALE_NORMAL, + _parameter_scale=True, + ) + + +class ParameterScaleUniform(Uniform): + """A uniform distribution with parameters on the parameter scale.""" + + def __init__( + self, + lower_bound: float, + upper_bound: float, + bounds: tuple = None, + transformation: str = C.LIN, + ): + super().__init__( + transformation=transformation, + lower_bound=lower_bound, + upper_bound=upper_bound, + bounds=bounds, + _type=C.PARAMETER_SCALE_UNIFORM, + _parameter_scale=True, + ) + + +class ParameterScaleLaplace(Laplace): + """A Laplace distribution with parameters on the parameter scale.""" + + def __init__( + self, + mean: float, + scale: float, + bounds: tuple = None, + transformation: str = C.LIN, + ): + super().__init__( + _type=C.PARAMETER_SCALE_LAPLACE, + transformation=transformation, + mean=mean, + scale=scale, + bounds=bounds, + _parameter_scale=True, + ) + + +Distribution._type_to_cls = { + C.NORMAL: Normal, + C.LOG_NORMAL: LogNormal, + C.UNIFORM: Uniform, + C.LAPLACE: Laplace, + C.LOG_LAPLACE: LogLaplace, + C.PARAMETER_SCALE_NORMAL: ParameterScaleNormal, + C.PARAMETER_SCALE_UNIFORM: ParameterScaleUniform, + C.PARAMETER_SCALE_LAPLACE: ParameterScaleLaplace, +} diff --git a/petab/v1/parameters.py b/petab/v1/parameters.py index 8f252988..8875c84f 100644 --- a/petab/v1/parameters.py +++ b/petab/v1/parameters.py @@ -524,7 +524,8 @@ def scale( if scale_str == LOG: return np.log(parameter) if scale_str == LOG10: - return np.log10(parameter) + with np.errstate(divide="ignore"): + return np.log10(parameter) raise ValueError(f"Invalid parameter scaling: {scale_str}") diff --git a/petab/v1/sampling.py b/petab/v1/sampling.py index be154f1c..c24b3c51 100644 --- a/petab/v1/sampling.py +++ b/petab/v1/sampling.py @@ -5,8 +5,8 @@ import numpy as np import pandas as pd -from . import parameters from .C import * # noqa: F403 +from .distributions import Distribution __all__ = ["sample_from_prior", "sample_parameter_startpoints"] @@ -26,84 +26,10 @@ def sample_from_prior( """ # unpack info p_type, p_params, scaling, bounds = prior + prior_cls = Distribution._type_to_cls[p_type] + prior = prior_cls(*p_params, bounds=tuple(bounds), transformation=scaling) - # define a function to rescale the sampled points to parameter scale - def scale(x): - if scaling == LIN: - return x - if scaling == LOG: - return np.log(x) - if scaling == LOG10: - return np.log10(x) - raise NotImplementedError( - f"Parameter priors on the parameter scale {scaling} are " - "currently not implemented." - ) - - def clip_to_bounds(x: np.array): - """Clip values in array x to bounds""" - return np.maximum(np.minimum(scale(bounds[1]), x), scale(bounds[0])) - - # define lambda functions for each parameter - if p_type == UNIFORM: - sp = scale( - (p_params[1] - p_params[0]) * np.random.random((n_starts,)) - + p_params[0] - ) - - elif p_type == PARAMETER_SCALE_UNIFORM: - sp = (p_params[1] - p_params[0]) * np.random.random( - (n_starts,) - ) + p_params[0] - - elif p_type == NORMAL: - sp = scale( - np.random.normal( - loc=p_params[0], scale=p_params[1], size=(n_starts,) - ) - ) - - elif p_type == LOG_NORMAL: - sp = scale( - np.exp( - np.random.normal( - loc=p_params[0], scale=p_params[1], size=(n_starts,) - ) - ) - ) - - elif p_type == PARAMETER_SCALE_NORMAL: - sp = np.random.normal( - loc=p_params[0], scale=p_params[1], size=(n_starts,) - ) - - elif p_type == LAPLACE: - sp = scale( - np.random.laplace( - loc=p_params[0], scale=p_params[1], size=(n_starts,) - ) - ) - - elif p_type == LOG_LAPLACE: - sp = scale( - np.exp( - np.random.laplace( - loc=p_params[0], scale=p_params[1], size=(n_starts,) - ) - ) - ) - - elif p_type == PARAMETER_SCALE_LAPLACE: - sp = np.random.laplace( - loc=p_params[0], scale=p_params[1], size=(n_starts,) - ) - - else: - raise NotImplementedError( - f"Parameter priors of type {prior[0]} are not implemented." - ) - - return clip_to_bounds(sp) + return prior.sample(shape=(n_starts,)) def sample_parameter_startpoints( @@ -130,11 +56,24 @@ def sample_parameter_startpoints( if seed is not None: np.random.seed(seed) - # get types and parameters of priors from dataframe - prior_list = parameters.get_priors_from_df( - parameter_df, mode=INITIALIZATION, parameter_ids=parameter_ids - ) + par_to_estimate = parameter_df.loc[parameter_df[ESTIMATE] == 1] - startpoints = [sample_from_prior(prior, n_starts) for prior in prior_list] + if parameter_ids is not None: + try: + par_to_estimate = par_to_estimate.loc[parameter_ids, :] + except KeyError as e: + missing_ids = set(parameter_ids) - set(par_to_estimate.index) + raise KeyError( + "Parameter table does not contain estimated parameter(s) " + f"{missing_ids}." + ) from e - return np.array(startpoints).T + # get types and parameters of priors from dataframe + return np.array( + [ + Distribution.from_par_dict(row, type_="initialization").sample( + n_starts + ) + for row in par_to_estimate.to_dict("records") + ] + ).T diff --git a/tests/v1/test_distributions.py b/tests/v1/test_distributions.py new file mode 100644 index 00000000..10069d69 --- /dev/null +++ b/tests/v1/test_distributions.py @@ -0,0 +1,43 @@ +from itertools import product + +import numpy as np +import pytest +from scipy.integrate import cumulative_trapezoid +from scipy.stats import kstest + +from petab.v1.distributions import * +from petab.v2.C import * + + +@pytest.mark.parametrize( + "distribution, transform", + list( + product( + [ + Normal(1, 1), + LogNormal(2, 1), + Uniform(2, 4), + Laplace(1, 2), + LogLaplace(1, 0.5), + ParameterScaleNormal(1, 1), + ParameterScaleLaplace(1, 2), + ParameterScaleUniform(1, 2), + ], + [LIN, LOG, LOG10], + ) + ), +) +def test_sample_matches_pdf(distribution, transform): + """Test that the sample matches the PDF.""" + np.random.seed(1) + N_SAMPLES = 10000 + distribution.transform = transform + sample = distribution.sample(N_SAMPLES) + + # pdf -> cdf + def cdf(x): + return cumulative_trapezoid(distribution.pdf(x), x) + + # Kolmogorov-Smirnov test to check if the sample is drawn from the CDF + _, p = kstest(sample, cdf) + assert p > 0.05, (p, distribution) diff --git a/tests/v1/test_priors.py b/tests/v1/test_priors.py index ac07d089..73d76fb4 100644 --- a/tests/v1/test_priors.py +++ b/tests/v1/test_priors.py @@ -5,7 +5,6 @@ import numpy as np import pandas as pd import pytest -from scipy.stats import norm import petab.v1 from petab.v1 import ( @@ -17,6 +16,7 @@ get_simulation_conditions, get_simulation_df, ) +from petab.v1.distributions import Distribution from petab.v1.priors import priors_to_measurements @@ -48,6 +48,13 @@ def test_priors_to_measurements(problem_id): strict=True, ) ) + x_unscaled_dict = dict( + zip( + original_problem.x_free_ids, + original_problem.x_nominal_free, + strict=True, + ) + ) # convert priors to measurements petab_problem_measurements = priors_to_measurements(petab_problem_priors) @@ -110,9 +117,13 @@ def test_priors_to_measurements(problem_id): def apply_parameter_values(row): # apply the parameter values to the observable formula for the prior if row[OBSERVABLE_ID].startswith("prior_"): - row[SIMULATION] = x_scaled_dict[ - row[OBSERVABLE_ID].removeprefix("prior_") - ] + parameter_id = row[OBSERVABLE_ID].removeprefix("prior_") + if original_problem.parameter_df.loc[ + parameter_id, OBJECTIVE_PRIOR_TYPE + ].startswith("parameterScale"): + row[SIMULATION] = x_scaled_dict[parameter_id] + else: + row[SIMULATION] = x_unscaled_dict[parameter_id] return row simulated_prior_observables = simulated_prior_observables.apply( @@ -140,29 +151,18 @@ def apply_parameter_values(row): (petab_problem_priors.parameter_df[ESTIMATE] == 1) & petab_problem_priors.parameter_df[OBJECTIVE_PRIOR_TYPE].notna() ] - priors = petab.v1.get_priors_from_df( - petab_problem_priors.parameter_df, - mode="objective", - parameter_ids=parameter_ids, - ) + priors = [ + Distribution.from_par_dict( + petab_problem_priors.parameter_df.loc[par_id], type_="objective" + ) + for par_id in parameter_ids + ] prior_contrib = 0 for parameter_id, prior in zip(parameter_ids, priors, strict=True): - prior_type, prior_pars, par_scale, par_bounds = prior - if prior_type == petab.v1.PARAMETER_SCALE_NORMAL: - prior_contrib += norm.logpdf( - x_scaled_dict[parameter_id], - loc=prior_pars[0], - scale=prior_pars[1], - ) - else: - # enable other models, once libpetab has proper support for - # evaluating the prior contribution. until then, two test - # problems should suffice - assert problem_id == "Raimundez_PCB2020" - pytest.skip(f"Prior type {prior_type} not implemented") + prior_contrib -= prior.neglogprior(x_scaled_dict[parameter_id]) assert np.isclose( - llh_priors + prior_contrib, llh_measurements, rtol=1e-3, atol=1e-16 + llh_priors + prior_contrib, llh_measurements, rtol=1e-8, atol=1e-16 ), (llh_priors + prior_contrib, llh_measurements) # check that the tolerance is not too high assert np.abs(prior_contrib) > 1e-8 * np.abs(llh_priors)