"""LIME text explainer."""
from lime.lime_text import LimeTextExplainer
from dianna import utils
[docs]
class LIMEText:
"""Wrapper around the LIME explainer.
See Lime explainer by Marco Tulio Correia Ribeiro
(https://github.com/marcotcr/lime).
"""
def __init__(self,
kernel_width=25,
kernel=None,
verbose=False,
class_names=None,
feature_selection='auto',
split_expression=r'\W+',
bow=False,
mask_string=None,
random_state=None,
char_level=False,
preprocess_function=None,
):
"""Initializes Lime explainer.
Args:
kernel_width (int, optional): kernel width
kernel (callable, optional): kernel
verbose (bool, optional): verbose
class_names (list, optional): names of the classes
feature_selection (str, optional): feature selection
split_expression (regexp, optional): split expression
bow (bool, optional): bow
mask_string (str, optional): mask string
random_state (int or np.RandomState, optional): seed or random state
char_level (bool, optional): char level
preprocess_function (callable, optional): Function to preprocess input data with
"""
[docs]
self.preprocess_function = preprocess_function
[docs]
self.explainer = LimeTextExplainer(kernel_width,
kernel,
verbose,
class_names,
feature_selection,
split_expression,
bow,
mask_string,
random_state,
char_level,
)
[docs]
def explain(self,
model_or_function,
input_text,
labels,
tokenizer=None,
top_labels=None,
num_features=10,
num_samples=5000,
**kwargs,
):
"""Run the LIME explainer.
Args:
model_or_function (callable or str): The function that runs the model to be explained _or_
the path to a ONNX model on disk.
tokenizer : Tokenizer class with tokenize and convert_tokens_to_string methods, and mask_token attribute
input_text (np.ndarray): Data to be explained
labels (Iterable(int)): Iterable of indices of class to be explained
top_labels: Top labels
num_features (int): Number of features
num_samples (int): Number of samples
kwargs: These parameters are passed on
Other keyword arguments: see the LIME documentation for LimeTextExplainer.explain_instance:
https://lime-ml.readthedocs.io/en/latest/lime.html#lime.lime_text.LimeTextExplainer.explain_instance.
Returns:
List of tuples (word, index of word in raw text, importance for target class) for each class
"""
if tokenizer is None:
raise ValueError('Please provide a tokenizer to explain_text.')
self.explainer.split_expression = tokenizer.tokenize # lime accepts a callable as a split_expression
runner = utils.get_function(model_or_function, preprocess_function=self.preprocess_function)
explain_instance_kwargs = utils.get_kwargs_applicable_to_function(
self.explainer.explain_instance, kwargs)
explanation = self.explainer.explain_instance(input_text,
runner,
labels=labels,
top_labels=top_labels,
num_features=num_features,
num_samples=num_samples,
**explain_instance_kwargs
)
local_explanations = explanation.local_exp
string_map = explanation.domain_mapper.indexed_string
return [self._reshape_result_for_single_label(
local_explanations[label], string_map) for label in labels]
@staticmethod
[docs]
def _reshape_result_for_single_label(local_explanation, string_map):
"""Get results for single label.
Args:
local_explanation: Lime output, map of tuples (index, importance)
string_map: Lime's IndexedString, see documentation:
https://lime-ml.readthedocs.io/en/latest/lime.html?highlight=indexedstring#lime.lime_text.IndexedString
"""
reshaped = [(string_map.word(index), index, importance) for index, importance in local_explanation]
# sort reshaped by index
sorted_reshaped = sorted(reshaped, key=lambda x: x[1])
return sorted_reshaped