Source code for lime_tabular

"""LIME tabular explainer."""
import sys
from typing import Iterable
from typing import List
from typing import Optional
from typing import Union
import numpy as np
from lime.lime_tabular import LimeTabularExplainer
from dianna import utils


[docs] class LIMETabular: """Wrapper around the LIME explainer for tabular data.""" def __init__( self, training_data: np.array, mode: str = "classification", feature_names: List[int] = None, categorical_features: List[int] = None, kernel_width: int = 25, kernel: callable = None, verbose: bool = False, class_names: List[str] = None, feature_selection: str = "auto", random_state: int = None, **kwargs, ) -> None: """Initializes Lime explainer. For numerical features, perturb them by sampling from a Normal(0,1) and doing the inverse operation of mean-centering and scaling, according to the means and stds in the training data. For categorical features, perturb by sampling according to the training distribution, and making a binary feature that is 1 when the value is the same as the instance being explained. More information can be found in the API guide: https://lime-ml.readthedocs.io/en/latest/lime.html#module-lime.lime_tabular Args: training_data (np.array): numpy 2d array mode (str, optional): "classification" or "regression" feature_names (list(str), optional): list of names corresponding to the columns in the training data. categorical_features (list(int), optional): list of indices corresponding to the categorical columns. Values in these columns MUST be integers. kernel_width (int, optional): kernel width kernel (callable, optional): kernel verbose (bool, optional): verbose class_names (str, optional): list of class names, ordered according to whatever the classifier is using. If not present, class names will be '0', '1', ... feature_selection (str, optional): feature selection random_state (int or np.RandomState, optional): seed or random state kwargs: These parameters are passed on """
[docs] self.mode = mode
init_instance_kwargs = utils.get_kwargs_applicable_to_function( LimeTabularExplainer, kwargs) # temporary solution for setting num_features and top_labels # when fixed, also fix in dashboard Tabular.py -> _feature_names
[docs] self.num_features = len(feature_names)
[docs] self.explainer = LimeTabularExplainer( training_data, mode=self.mode, feature_names=feature_names, categorical_features=categorical_features, kernel_width=kernel_width, kernel=kernel, verbose=verbose, class_names=class_names, feature_selection=feature_selection, random_state=random_state, **init_instance_kwargs, )
[docs] def explain( self, model_or_function: Union[str, callable], input_tabular: np.array, labels: Optional[Iterable[int]] = None, num_samples: int = 5000, **kwargs, ) -> np.array: """Run the LIME explainer. Args: model_or_function (callable or str): The function that runs the model to be explained or the path to a ONNX model on disk. input_tabular (np.ndarray): Data to be explained. labels (Iterable(int)): Indices of classes to be explained. num_samples (int, optional): Number of samples kwargs: These parameters are passed on Other keyword arguments: see the documentation for LimeTabularExplainer.explain_instance: https://lime-ml.readthedocs.io/en/latest/lime.html#lime.lime_tabular.LimeTabularExplainer.explain_instance Returns: An array (np.ndarray) containing LIME explanations for each class. """ # run the explanation. explain_instance_kwargs = utils.get_kwargs_applicable_to_function( self.explainer.explain_instance, kwargs) runner = utils.get_function(model_or_function) explanation = self.explainer.explain_instance( input_tabular, runner, labels=labels, top_labels=sys.maxsize, num_features=self.num_features, num_samples=num_samples, **explain_instance_kwargs, ) if self.mode == 'regression': local_exp = sorted(explanation.local_exp[1]) saliency = [i[1] for i in local_exp] elif self.mode == 'classification': # extract scores from lime explainer saliency = [] for i in range(len(explanation.local_exp.items())): local_exp = sorted(explanation.local_exp[i]) # shape of local_exp [(index, saliency)] selected_saliency = [x[1] for x in local_exp] saliency.append(selected_saliency[:]) else: raise ValueError(f'Unsupported mode "{self.mode}"') return np.array(saliency)