Source code for lime_text

"""LIME text explainer."""
from lime.lime_text import LimeTextExplainer
from dianna import utils


[docs] class LIMEText: """Wrapper around the LIME explainer. See Lime explainer by Marco Tulio Correia Ribeiro (https://github.com/marcotcr/lime). """ def __init__(self, kernel_width=25, kernel=None, verbose=False, class_names=None, feature_selection='auto', split_expression=r'\W+', bow=False, mask_string=None, random_state=None, char_level=False, preprocess_function=None, ): """Initializes Lime explainer. Args: kernel_width (int, optional): kernel width kernel (callable, optional): kernel verbose (bool, optional): verbose class_names (list, optional): names of the classes feature_selection (str, optional): feature selection split_expression (regexp, optional): split expression bow (bool, optional): bow mask_string (str, optional): mask string random_state (int or np.RandomState, optional): seed or random state char_level (bool, optional): char level preprocess_function (callable, optional): Function to preprocess input data with """
[docs] self.preprocess_function = preprocess_function
[docs] self.explainer = LimeTextExplainer(kernel_width, kernel, verbose, class_names, feature_selection, split_expression, bow, mask_string, random_state, char_level, )
[docs] def explain(self, model_or_function, input_text, labels, tokenizer=None, top_labels=None, num_features=10, num_samples=5000, **kwargs, ): """Run the LIME explainer. Args: model_or_function (callable or str): The function that runs the model to be explained _or_ the path to a ONNX model on disk. tokenizer : Tokenizer class with tokenize and convert_tokens_to_string methods, and mask_token attribute input_text (np.ndarray): Data to be explained labels (Iterable(int)): Iterable of indices of class to be explained top_labels: Top labels num_features (int): Number of features num_samples (int): Number of samples kwargs: These parameters are passed on Other keyword arguments: see the LIME documentation for LimeTextExplainer.explain_instance: https://lime-ml.readthedocs.io/en/latest/lime.html#lime.lime_text.LimeTextExplainer.explain_instance. Returns: List of tuples (word, index of word in raw text, importance for target class) for each class """ if tokenizer is None: raise ValueError('Please provide a tokenizer to explain_text.') self.explainer.split_expression = tokenizer.tokenize # lime accepts a callable as a split_expression runner = utils.get_function(model_or_function, preprocess_function=self.preprocess_function) explain_instance_kwargs = utils.get_kwargs_applicable_to_function( self.explainer.explain_instance, kwargs) explanation = self.explainer.explain_instance(input_text, runner, labels=labels, top_labels=top_labels, num_features=num_features, num_samples=num_samples, **explain_instance_kwargs ) local_explanations = explanation.local_exp string_map = explanation.domain_mapper.indexed_string return [self._reshape_result_for_single_label( local_explanations[label], string_map) for label in labels]
@staticmethod
[docs] def _reshape_result_for_single_label(local_explanation, string_map): """Get results for single label. Args: local_explanation: Lime output, map of tuples (index, importance) string_map: Lime's IndexedString, see documentation: https://lime-ml.readthedocs.io/en/latest/lime.html?highlight=indexedstring#lime.lime_text.IndexedString """ reshaped = [(string_map.word(index), index, importance) for index, importance in local_explanation] # sort reshaped by index sorted_reshaped = sorted(reshaped, key=lambda x: x[1]) return sorted_reshaped