2

There is an error at the final stage of this analysis. When running explain() function on an h2o model, I get the following error: Error: All permutations have no similarity to the original observation. Try setting bin_continuous to TRUE and/or increase kernel_size I have tried both the suggestions in the error. If I change the bin_continous to TRUE, the lime() function does not work and other kernel sizes do not work either.

Any thought on how to solve this and therefore be able to get the results with the plot_features() function?

library(readxl)
library(httr)
library(dplyr)
library(h2o)        
library(lime) 


GET("https://community.watsonanalytics.com/wp-content/uploads/2015/03/WA_FnUseC_-HR-Employee-Attrition.xlsx", 
   write_disk(tf <- tempfile(fileext = ".xls")))
hr_data_raw <- read_xlsx(tf)


hr_data <- hr_data_raw %>%
mutate_if(is.character, as.factor) %>%
select(Attrition, everything())  


h2o.init()
h2o.no_progress() 

hr_data_h2o <- as.h2o(hr_data)
split_h2o <- h2o.splitFrame(hr_data_h2o, c(0.7, 0.15), seed = 1234 )
train_h2o <- h2o.assign(split_h2o[[1]], "train" ) # 70%
valid_h2o <- h2o.assign(split_h2o[[2]], "valid" ) # 15%
test_h2o  <- h2o.assign(split_h2o[[3]], "test" )  # 15%  


y <- "Attrition"
x <- setdiff(names(train_h2o), y)  
automl_models_h2o <- h2o.automl(
x = x, 
y = y,
training_frame = train_h2o,
validation_frame = valid_h2o,
leaderboard_frame = test_h2o,
max_runtime_secs  = 30)

automl_leader <- automl_models_h2o@leader 


explainer <- lime::lime(
   as.data.frame(train_h2o[,-1]), 
   model = automl_leader, 
   bin_continuous = F)

explanation <- lime::explain(
   as.data.frame(test_h2o[1:10, -1]), 
   explainer = explainer, 
   n_labels     = 1, 
   n_features   = 4)

  # Error: All permutations have no similarity to the original observation.
  # Try setting bin_continuous to TRUE and/or increase kernel_size

  # Cannot Continue
  plot_features(explanation)
YK95
  • 21
  • 1

0 Answers0