Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

different error messages with different explainers for risk_estimation task #88

Open
zoezhang106 opened this issue Sep 12, 2024 · 0 comments
Assignees

Comments

@zoezhang106
Copy link

Hi,

I was able to train the model successfully, but when I tried to use explainers, I got different error messages with different explainers. Not sure if I missed something.

A demo for how to use explainer for risk_estimation and a table to show whick explainers work for wich type of task would be very helpful.

Any comments or suggestion would be highly appreciated.

After training the model, I get the model below:

import pprint

from autoprognosis.utils.serialization import load_model_from_file
from autoprognosis.utils.tester import evaluate_survival_estimator

output = workspace / study_name / "model.p"

model = load_model_from_file(output)

metrics = evaluate_survival_estimator(model, X, T, Y, eval_time_horizons)

print(f"Model {model.name()}")
print(f"Score: ")

pprint.pprint(metrics)

~~~~~Result~~~~~~~~~
Model ["1.0 * ice->nop->maxabs_scaler->data_cleanup->coxnet({'ice': {'max_iter': 500, 'tol': 0.001, 'initial_strategy': 2, 'imputation_order': 0}, 'nop': {}, 'maxabs_scaler': {}, 'coxnet': {'batch_norm': 0, 'dropout': 0, 'lr': 0.01, 'patience': 10, 'hidden_dim': 134, 'hidden_len': 2}})", "1.0 * ice->nop->nop->data_cleanup->cox_ph({'ice': {'max_iter': 600, 'tol': 0.01, 'initial_strategy': 1, 'imputation_order': 3}, 'nop': {}, 'cox_ph': {'alpha': 0.07781567509498505, 'penalizer': 0.17400242964936385}})", "1.0 * ice->nop->minmax_scaler->data_cleanup->survival_xgboost({'ice': {'max_iter': 900, 'tol': 0.0001, 'initial_strategy': 2, 'imputation_order': 4}, 'nop': {}, 'minmax_scaler': {}, 'survival_xgboost': {'reg_lambda': 7.2208339391040095, 'reg_alpha': 8.663956876960363, 'colsample_bytree': 0.8804172040023087, 'colsample_bynode': 0.7846426739140888, 'colsample_bylevel': 0.10937126734800158, 'subsample': 0.3879824515826912, 'lr': 0.0001, 'max_depth': 2, 'n_estimators': 68, 'min_child_weight': 5, 'max_bin': 459, 'grow_policy': 0, 'objective': 'cox', 'strategy': 'weibull'}})"]

It looks good, then, when I tried to use explainers, I got different error messages with different explainers. Not sure if I missed something. A demo for how to use explainer for risk_estimation would be very helpful~

explainer = Explainers().get(
    "kernel_shap",
    model,
    X = X,
    y = Y,
    time_to_event = T,
    eval_times = eval_time_horizons,
    task_type="risk_estimation",
)

explainer.plot(X.sample(frac=0.1))

~~~~~~error:~~~~~~~~~

ValueError                                Traceback (most recent call last)
Cell In[38], line 1
----> 1 explainer = Explainers().get(
      2     "kernel_shap",
      3     model,
      4     X = X,
      5     y = Y,
      6     time_to_event = T,
      7     eval_times = eval_time_horizons,
      8     task_type="risk_estimation",
      9 )
     11 explainer.plot(X.sample(frac=0.1))

File ~/miniconda3/envs/py310/lib/python3.10/site-packages/autoprognosis/plugins/core/base_plugin.py:327, in PluginLoader.get(self, name, *args, **kwargs)
    324 if name not in self._plugins:
    325     raise ValueError(f"Plugin {name} cannot be loaded.")
--> 327 return self._plugins[name](*args, **kwargs)

File ~/miniconda3/envs/py310/lib/python3.10/site-packages/autoprognosis/plugins/explainers/plugin_kernel_shap.py:81, in KernelSHAPPlugin.__init__(self, estimator, X, y, task_type, feature_names, subsample, prefit, n_epoch, time_to_event, eval_times, random_state, **kwargs)
     76 self.feature_names = (
     77     feature_names if feature_names is not None else pd.DataFrame(X).columns
     78 )
     80 X = pd.DataFrame(X, columns=self.feature_names)
---> 81 X_summary = shap.kmeans(X, subsample)
     82 model = copy.deepcopy(estimator)
     83 self.task_type = task_type

File ~/miniconda3/envs/py310/lib/python3.10/site-packages/shap/utils/_legacy.py:38, in kmeans(X, k, round_values)
     36 # in case there are any missing values in data impute them
     37 imp = SimpleImputer(missing_values=np.nan, strategy='mean')
---> 38 X = imp.fit_transform(X)
     40 # Specify `n_init` for consistent behaviour between sklearn versions
     41 kmeans = KMeans(n_clusters=k, random_state=0, n_init=10).fit(X)

File ~/miniconda3/envs/py310/lib/python3.10/site-packages/sklearn/utils/_set_output.py:157, in _wrap_method_output.<locals>.wrapped(self, X, *args, **kwargs)
    155 @wraps(f)
    156 def wrapped(self, X, *args, **kwargs):
--> 157     data_to_wrap = f(self, X, *args, **kwargs)
    158     if isinstance(data_to_wrap, tuple):
    159         # only wrap the first output for cross decomposition
    160         return_tuple = (
    161             _wrap_data_with_container(method, data_to_wrap[0], X, self),
    162             *data_to_wrap[1:],
    163         )

File ~/miniconda3/envs/py310/lib/python3.10/site-packages/sklearn/base.py:916, in TransformerMixin.fit_transform(self, X, y, **fit_params)
    912 # non-optimized default implementation; override when a better
    913 # method is possible for a given clustering algorithm
    914 if y is None:
    915     # fit method of arity 1 (unsupervised transformation)
--> 916     return self.fit(X, **fit_params).transform(X)
    917 else:
    918     # fit method of arity 2 (supervised transformation)
    919     return self.fit(X, y, **fit_params).transform(X)

File ~/miniconda3/envs/py310/lib/python3.10/site-packages/sklearn/base.py:1152, in _fit_context.<locals>.decorator.<locals>.wrapper(estimator, *args, **kwargs)
   1145     estimator._validate_params()
   1147 with config_context(
   1148     skip_parameter_validation=(
   1149         prefer_skip_nested_validation or global_skip_validation
   1150     )
   1151 ):
-> 1152     return fit_method(estimator, *args, **kwargs)

File ~/miniconda3/envs/py310/lib/python3.10/site-packages/sklearn/impute/_base.py:369, in SimpleImputer.fit(self, X, y)
    351 @_fit_context(prefer_skip_nested_validation=True)
    352 def fit(self, X, y=None):
    353     """Fit the imputer on `X`.
    354 
    355     Parameters
   (...)
    367         Fitted estimator.
    368     """
--> 369     X = self._validate_input(X, in_fit=True)
    371     # default fill_value is 0 for numerical input and "missing_value"
    372     # otherwise
    373     if self.fill_value is None:

File ~/miniconda3/envs/py310/lib/python3.10/site-packages/sklearn/impute/_base.py:330, in SimpleImputer._validate_input(self, X, in_fit)
    324 if "could not convert" in str(ve):
    325     new_ve = ValueError(
    326         "Cannot use {} strategy with non-numeric data:\n{}".format(
    327             self.strategy, ve
    328         )
    329     )
--> 330     raise new_ve from None
    331 else:
    332     raise ve

ValueError: Cannot use mean strategy with non-numeric data:
could not convert string to float: 'SG'


~~~~~~type 2 risk_effect_size, lime, invase, symbolic_pursuit ~~~~~~
# Explain using Risk Effect Size
explainer = Explainers().get(
    "risk_effect_size",
    model,
    X = X,
    y = Y,
    time_to_event = T,
    eval_times = eval_time_horizons,
    task_type="risk_estimation",
)

display(explainer.explain(X))
explainer.plot(X)


~~~~~~~error~~~~~~~

KeyError                                  Traceback (most recent call last)
Cell In[39], line 2
      1 # Explain using Risk Effect Size
----> 2 explainer = Explainers().get(
      3     "risk_effect_size",
      4     model,
      5     X = X,
      6     y = Y,
      7     time_to_event = T,
      8     eval_times = eval_time_horizons,
      9     task_type="risk_estimation",
     10 )
     12 display(explainer.explain(X))
     13 explainer.plot(X)

File ~/miniconda3/envs/py310/lib/python3.10/site-packages/autoprognosis/plugins/core/base_plugin.py:327, in PluginLoader.get(self, name, *args, **kwargs)
    324 if name not in self._plugins:
    325     raise ValueError(f"Plugin {name} cannot be loaded.")
--> 327 return self._plugins[name](*args, **kwargs)

File ~/miniconda3/envs/py310/lib/python3.10/site-packages/autoprognosis/plugins/explainers/plugin_risk_effect_size.py:100, in RiskEffectSizePlugin.__init__(self, estimator, X, y, task_type, feature_names, subsample, prefit, effect_size, time_to_event, eval_times, random_state, **kwargs)
     97     raise RuntimeError("Invalid input for risk estimation interpretability")
     99 if not prefit:
--> 100     model.fit(X, time_to_event, y)
    102 def model_fn(X: pd.DataFrame) -> pd.DataFrame:
    103     if eval_times is None:

File ~/miniconda3/envs/py310/lib/python3.10/site-packages/autoprognosis/plugins/uncertainty/plugin_cohort_explainer.py:579, in CohortExplainerPlugin.fit(self, *args, **kwargs)
    576     T = args[1]
    577     y = args[2]
--> 579     eval_times = kwargs["time_horizons"]
    580     self._calibrate_risk_estimation(X, T, y, eval_times)
    581 else:

KeyError: 'time_horizons'



~~~~~~~~~type 3 shap_permutation_sampler~~~~~~
# Explain using shap_permutation_sampler
explainer = Explainers().get(
    "shap_permutation_sampler",
    model,
    X = X,
    y = Y,
    time_to_event = T,
    eval_times = eval_time_horizons,
    task_type="risk_estimation",
)

display(explainer.explain(X))
explainer.plot(X)

~~~~~~error ~~~~~~~

TypeError                                 Traceback (most recent call last)
Cell In[23], line 12
      1 # Explain using shap_permutation_sampler
      2 explainer = Explainers().get(
      3     "shap_permutation_sampler",
      4     model,
   (...)
      9     task_type="risk_estimation",
     10 )
---> 12 display(explainer.explain(X))
     13 explainer.plot(X)

File ~/miniconda3/envs/py310/lib/python3.10/site-packages/autoprognosis/plugins/explainers/plugin_shap_permutation_sampler.py:119, in ShapPermutationSamplerPlugin.explain(self, X, max_evals)
    118 def explain(self, X: pd.DataFrame, max_evals: Union[int, str] = "auto") -> Any:
--> 119     expl = self.explainer(X, max_evals=max_evals, silent=True)
    120     if self.task_type == "classification":
    121         out = expl[..., 1]

File ~/miniconda3/envs/py310/lib/python3.10/site-packages/shap/explainers/_permutation.py:77, in PermutationExplainer.__call__(self, max_evals, main_effects, error_bounds, batch_size, outputs, silent, *args)
     74 def __call__(self, *args, max_evals=500, main_effects=False, error_bounds=False, batch_size="auto",
     75              outputs=None, silent=False):
     76     """Explain the output of the model on the given arguments."""
---> 77     return super().__call__(
     78         *args, max_evals=max_evals, main_effects=main_effects, error_bounds=error_bounds, batch_size=batch_size,
     79         outputs=outputs, silent=silent
     80     )

File ~/miniconda3/envs/py310/lib/python3.10/site-packages/shap/explainers/_explainer.py:266, in Explainer.__call__(self, max_evals, main_effects, error_bounds, batch_size, outputs, silent, *args, **kwargs)
    264     feature_names = [[] for _ in range(len(args))]
    265 for row_args in show_progress(zip(*args), num_rows, self.__class__.__name__+" explainer", silent):
--> 266     row_result = self.explain_row(
    267         *row_args, max_evals=max_evals, main_effects=main_effects, error_bounds=error_bounds,
    268         batch_size=batch_size, outputs=outputs, silent=silent, **kwargs
    269     )
    270     values.append(row_result.get("values", None))
    271     output_indices.append(row_result.get("output_indices", None))

File ~/miniconda3/envs/py310/lib/python3.10/site-packages/shap/explainers/_permutation.py:85, in PermutationExplainer.explain_row(self, max_evals, main_effects, error_bounds, batch_size, outputs, silent, *row_args)
     83 """Explains a single row and returns the tuple (row_values, row_expected_values, row_mask_shapes)."""
     84 # build a masked version of the model for the current input sample
---> 85 fm = MaskedModel(self.model, self.masker, self.link, self.linearize_link, *row_args)
     87 # by default we run 10 permutations forward and backward
     88 if max_evals == "auto":

File ~/miniconda3/envs/py310/lib/python3.10/site-packages/shap/utils/_masked_model.py:31, in MaskedModel.__init__(self, model, masker, link, linearize_link, *args)
     29 # if the masker supports it, save what positions vary from the background
     30 if callable(getattr(self.masker, "invariants", None)):
---> 31     self._variants = ~self.masker.invariants(*args)
     32     self._variants_column_sums = self._variants.sum(0)
     33     self._variants_row_inds = [
     34         self._variants[:,i] for i in range(self._variants.shape[1])
     35     ]

File ~/miniconda3/envs/py310/lib/python3.10/site-packages/shap/maskers/_tabular.py:150, in Tabular.invariants(self, x)
    144 if x.shape != self.data.shape[1:]:
    145     raise DimensionError(
    146         "The passed data does not match the background shape expected by the masker! The data of shape " + \
    147         str(x.shape) + " was passed while the masker expected data of shape " + str(self.data.shape[1:]) + "."
    148     )
--> 150 return np.isclose(x, self.data)

File <__array_function__ internals>:180, in isclose(*args, **kwargs)

File ~/miniconda3/envs/py310/lib/python3.10/site-packages/numpy/core/numeric.py:2372, in isclose(a, b, rtol, atol, equal_nan)
   2369     dt = multiarray.result_type(y, 1.)
   2370     y = asanyarray(y, dtype=dt)
-> 2372 xfin = isfinite(x)
   2373 yfin = isfinite(y)
   2374 if all(xfin) and all(yfin):

TypeError: ufunc 'isfinite' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''
@DrShushen DrShushen self-assigned this Sep 12, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants