Friedman’s H-statistic The interpretable ML book by Christoph Molnar actually gives us a workable approach, by using Friedman’s H-statistic based on the decomposition of the partial dependence values to calculate the feature interactions.
In Python, sklearn_gbmi will accept feature sets of length two and higher but does not provide support for the first-order measure, very similar to interact.gbm in R. It only works on gradient boosting based models
I found a manual Python implementation from here, posted below for reference, where the feature interactions were calculated.
import itertools
import math
import pandas as pd
import seaborn as sns
from sklearn.ensemble import GradientBoostingClassifier
from pdpbox.pdp_calc_utils import _calc_ice_lines_inter
from pdpbox.pdp import pdp_isolate, PDPInteract
from pdpbox.utils import (_check_model, _check_dataset, _check_percentile_range, _check_feature,
_check_grid_type, _check_memory_limit, _make_list,
_calc_memory_usage, _get_grids, _get_grid_combos, _check_classes)
from joblib import Parallel, delayed
def pdp_multi_interact(model, dataset, model_features, features,
num_grid_points=None, grid_types=None, percentile_ranges=None, grid_ranges=None,
cust_grid_points=None,
cust_grid_combos=None, use_custom_grid_combos=False,
memory_limit=0.5, n_jobs=1, predict_kwds=None, data_transformer=None):
def _expand_default(x, default, length):
if x is None:
return [default] * length
return x
def _get_grid_combos(feature_grids, feature_types):
grids = [list(feature_grid) for feature_grid in feature_grids]
for i in range(len(feature_types)):
if feature_types[i] == 'onehot':
grids[i] = np.eye(len(grids[i])).astype(int).tolist()
return np.stack(np.meshgrid(*grids), -1).reshape(-1, len(grids))
if predict_kwds is None:
predict_kwds = dict()
nr_feats = len(features)
# check function inputs
n_classes, predict = _check_model(model=model)
_check_dataset(df=dataset)
_dataset = dataset.copy()
# prepare the grid
pdp_isolate_outs = []
if use_custom_grid_combos:
grid_combos = cust_grid_combos
feature_grids = []
feature_types = []
else:
num_grid_points = _expand_default(x=num_grid_points, default=10, length=nr_feats)
grid_types = _expand_default(x=grid_types, default='percentile', length=nr_feats)
for i in range(nr_feats):
_check_grid_type(grid_type=grid_types[i])
percentile_ranges = _expand_default(x=percentile_ranges, default=None, length=nr_feats)
for i in range(nr_feats):
_check_percentile_range(percentile_range=percentile_ranges[i])
grid_ranges = _expand_default(x=grid_ranges, default=None, length=nr_feats)
cust_grid_points = _expand_default(x=cust_grid_points, default=None, length=nr_feats)
_check_memory_limit(memory_limit=memory_limit)
pdp_isolate_outs = []
for idx in range(nr_feats):
pdp_isolate_out = pdp_isolate(
model=model, dataset=_dataset, model_features=model_features, feature=features[idx],
num_grid_points=num_grid_points[idx], grid_type=grid_types[idx], percentile_range=percentile_ranges[idx],
grid_range=grid_ranges[idx], cust_grid_points=cust_grid_points[idx], memory_limit=memory_limit,
n_jobs=n_jobs, predict_kwds=predict_kwds, data_transformer=data_transformer)
pdp_isolate_outs.append(pdp_isolate_out)
if n_classes > 2:
feature_grids = [pdp_isolate_outs[i][0].feature_grids for i in range(nr_feats)]
feature_types = [pdp_isolate_outs[i][0].feature_type for i in range(nr_feats)]
else:
feature_grids = [pdp_isolate_outs[i].feature_grids for i in range(nr_feats)]
feature_types = [pdp_isolate_outs[i].feature_type for i in range(nr_feats)]
grid_combos = _get_grid_combos(feature_grids, feature_types)
feature_list = []
for i in range(nr_feats):
feature_list.extend(_make_list(features[i]))
# Parallel calculate ICE lines
true_n_jobs = _calc_memory_usage(
df=_dataset, total_units=len(grid_combos), n_jobs=n_jobs, memory_limit=memory_limit)
grid_results = Parallel(n_jobs=true_n_jobs)(delayed(_calc_ice_lines_inter)(
grid_combo, data=_dataset, model=model, model_features=model_features, n_classes=n_classes,
feature_list=feature_list, predict_kwds=predict_kwds, data_transformer=data_transformer)
for grid_combo in grid_combos)
ice_lines = pd.concat(grid_results, axis=0).reset_index(drop=True)
pdp = ice_lines.groupby(feature_list, as_index=False).mean()
# combine the final results
pdp_interact_params = {'n_classes': n_classes,
'features': features,
'feature_types': feature_types,
'feature_grids': feature_grids}
if n_classes > 2:
pdp_interact_out = []
for n_class in range(n_classes):
_pdp = pdp[feature_list + ['class_%d_preds' % n_class]].rename(
columns={'class_%d_preds' % n_class: 'preds'})
pdp_interact_out.append(
PDPInteract(which_class=n_class,
pdp_isolate_outs=[pdp_isolate_outs[i][n_class] for i in range(nr_feats)],
pdp=_pdp, **pdp_interact_params))
else:
pdp_interact_out = PDPInteract(
which_class=None, pdp_isolate_outs=pdp_isolate_outs, pdp=pdp, **pdp_interact_params)
return pdp_interact_out
def center(arr): return arr - np.mean(arr)
def compute_f_vals(mdl, X, features, selectedfeatures, num_grid_points=10, use_data_grid=False):
f_vals = {}
data_grid = None
if use_data_grid:
data_grid = X[selectedfeatures].values
# Calculate partial dependencies for full feature set
p_full = pdp_multi_interact(mdl, X, features, selectedfeatures,
num_grid_points=[num_grid_points] * len(selectedfeatures),
cust_grid_combos=data_grid,
use_custom_grid_combos=use_data_grid)
f_vals[tuple(selectedfeatures)] = center(p_full.pdp.preds.values)
grid = p_full.pdp.drop('preds', axis=1)
# Calculate partial dependencies for [1..SFL-1]
for n in range(1, len(selectedfeatures)):
for subsetfeatures in itertools.combinations(selectedfeatures, n):
if use_data_grid:
data_grid = X[list(subsetfeatures)].values
p_partial = pdp_multi_interact(mdl, X, features, subsetfeatures,
num_grid_points=[num_grid_points] * len(selectedfeatures),
cust_grid_combos=data_grid,
use_custom_grid_combos=use_data_grid)
p_joined = pd.merge(grid, p_partial.pdp, how='left')
f_vals[tuple(subsetfeatures)] = center(p_joined.preds.values)
return f_vals
# the second-order H-measure:
def compute_h_val(f_vals, selectedfeatures):
denom_els = f_vals[tuple(selectedfeatures)].copy()
numer_els = f_vals[tuple(selectedfeatures)].copy()
sign = -1.0
for n in range(len(selectedfeatures)-1, 0, -1):
for subfeatures in itertools.combinations(selectedfeatures, n):
numer_els += sign * f_vals[tuple(subfeatures)]
sign *= -1.0
numer = np.sum(numer_els**2)
denom = np.sum(denom_els**2)
return math.sqrt(numer/denom) if numer < denom else np.nan
# first-order H-measure as well:
def compute_h_val_any(f_vals, allfeatures, selectedfeature):
otherfeatures = list(allfeatures)
otherfeatures.remove(selectedfeature)
denom_els = f_vals[tuple(allfeatures)].copy()
numer_els = denom_els.copy()
numer_els -= f_vals[(selectedfeature,)]
numer_els -= f_vals[tuple(otherfeatures)]
numer = np.sum(numer_els**2)
denom = np.sum(denom_els**2)
return math.sqrt(numer/denom) if numer < denom else np.nan
df = sns.load_dataset("diamonds")
data = pd.get_dummies(df, ["cut", "color", "clarity"])
X = data.drop("cut_Ideal", axis=1)
y = data["cut_Ideal"]
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size = 0.33,
random_state = 42)
gbc = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,
max_depth=1, random_state=0).fit(X_train, y_train)
f_val = compute_f_vals(gbc, X, X.columns, ['carat', 'depth'], num_grid_points=10, use_data_grid=False)
# second-order H-measure:
compute_h_val(f_val, ['carat', 'depth'])
I want to calculate feature interactions for all the columns in a dataframe. How could I do that? I am not here to avail free code writing service I just want to capture a little bit of knowledge from the experienced programmers with discussing things. I was just expecting a suggestion/reference for the appropriate library of methods on finding the feature interactions?