I'm using the following sample code to download a pojo that I found from this post:
import h2o
h2o.init()
iris_df = h2o.import_file("https://s3.amazonaws.com/h2o-public-test-data/smalldata/iris/iris.csv")
from h2o.estimators.glm import H2OGeneralizedLinearEstimator
predictors = iris_df.columns[0:4]
response_col = "C5"
train,valid,test = iris_df.split_frame([.7,.15], seed =1234)
glm_model = H2OGeneralizedLinearEstimator(family="multinomial")
glm_model.train(predictors, response_col, training_frame = train, validation_frame = valid)
h2o.download_pojo(glm_model, path = '/Users/your_user_name/Desktop/', get_jar = True)
When I open the downloaded java file I'm given some instructions for how to compile it. The following compiles successfully:
javac -cp h2o-genmodel.jar -J-Xmx2g -J-XX:MaxPermSize=128m GLM_model_python_1488677745392_2.java
Now, I'm not sure how to use it. I've tried the following:
java -cp h2o-genmodel.jar javac -cp h2o-genmodel.jar -J-Xmx2g -J-XX:MaxPermSize=128m GLM_model_python_1488677745392_2.java
The following is the code in the pojo:
/*
Licensed under the Apache License, Version 2.0
http://www.apache.org/licenses/LICENSE-2.0.html
AUTOGENERATED BY H2O at 2017-03-05T01:51:46.237Z
3.10.3.2
Standalone prediction code with sample test data for GLMModel named GLM_model_python_1488677745392_2
How to download, compile and execute:
mkdir tmpdir
cd tmpdir
curl http:/10.0.0.4/10.0.0.4:54321/3/h2o-genmodel.jar > h2o-genmodel.jar
curl http:/10.0.0.4/10.0.0.4:54321/3/Models.java/GLM_model_python_1488677745392_2 > GLM_model_python_1488677745392_2.java
javac -cp h2o-genmodel.jar -J-Xmx2g -J-XX:MaxPermSize=128m GLM_model_python_1488677745392_2.java
(Note: Try java argument -XX:+PrintCompilation to show runtime JIT compiler behavior.)
*/
import java.util.Map;
import hex.genmodel.GenModel;
import hex.genmodel.annotations.ModelPojo;
@ModelPojo(name="GLM_model_python_1488677745392_2", algorithm="glm")
public class GLM_model_python_1488677745392_2 extends GenModel {
public hex.ModelCategory getModelCategory() { return hex.ModelCategory.Multinomial; }
public boolean isSupervised() { return true; }
public int nfeatures() { return 4; }
public int nclasses() { return 3; }
// Names of columns used by model.
public static final String[] NAMES = NamesHolder_GLM_model_python_1488677745392_2.VALUES;
// Number of output classes included in training data response column.
public static final int NCLASSES = 3;
// Column domains. The last array contains domain of response column.
public static final String[][] DOMAINS = new String[][] {
/* C1 */ null,
/* C2 */ null,
/* C3 */ null,
/* C4 */ null,
/* C5 */ GLM_model_python_1488677745392_2_ColInfo_4.VALUES
};
// Prior class distribution
public static final double[] PRIOR_CLASS_DISTRIB = {0.2818181818181818,0.33636363636363636,0.38181818181818183};
// Class distribution used for model building
public static final double[] MODEL_CLASS_DISTRIB = null;
public GLM_model_python_1488677745392_2() { super(NAMES,DOMAINS); }
public String getUUID() { return Long.toString(-5598526670666235824L); }
// Pass in data in a double[], pre-aligned to the Model's requirements.
// Jam predictions into the preds[] array; preds[0] is reserved for the
// main prediction (class for classifiers or value for regression),
// and remaining columns hold a probability distribution for classifiers.
public final double[] score0( double[] data, double[] preds ) {
final double [] b = BETA.VALUES;
for(int i = 0; i < 0; ++i) if(Double.isNaN(data[i])) data[i] = CAT_MODES.VALUES[i];
for(int i = 0; i < 4; ++i) if(Double.isNaN(data[i + 0])) data[i+0] = NUM_MEANS.VALUES[i];
preds[0] = 0;
for(int c = 0; c < 3; ++c){
preds[c+1] = 0;
for(int i = 0; i < 4; ++i)
preds[c+1] += b[0+i + c*5]*data[i];
preds[c+1] += b[4 + c*5]; // reduce intercept
}
double max_row = 0;
for(int c = 1; c < preds.length; ++c) if(preds[c] > max_row) max_row = preds[c];
double sum_exp = 0;
for(int c = 1; c < preds.length; ++c) { sum_exp += (preds[c] = Math.exp(preds[c]-max_row));}
sum_exp = 1/sum_exp;
double max_p = 0;
for(int c = 1; c < preds.length; ++c) if((preds[c] *= sum_exp) > max_p){ max_p = preds[c]; preds[0] = c-1;};
return preds;
}
public static class BETA implements java.io.Serializable {
public static final double[] VALUES = new double[15];
static {
BETA_0.fill(VALUES);
}
static final class BETA_0 implements java.io.Serializable {
static final void fill(double[] sa) {
sa[0] = -1.4700470387418272;
sa[1] = 4.26067731522767;
sa[2] = -2.285756276489862;
sa[3] = -4.312931422791621;
sa[4] = 5.231215014401568;
sa[5] = 1.7769023115830205;
sa[6] = -0.2534145823550425;
sa[7] = -0.9887536067536575;
sa[8] = -1.2706135235877678;
sa[9] = -4.319817154759757;
sa[10] = 0.0;
sa[11] = -3.024835247270209;
sa[12] = 3.8622405283810464;
sa[13] = 7.018262604176258;
sa[14] = -22.702291637028203;
}
}
}
// Imputed numeric values
static class NUM_MEANS implements java.io.Serializable {
public static final double[] VALUES = new double[4];
static {
NUM_MEANS_0.fill(VALUES);
}
static final class NUM_MEANS_0 implements java.io.Serializable {
static final void fill(double[] sa) {
sa[0] = 5.90272727272727;
sa[1] = 3.024545454545454;
sa[2] = 3.9490909090909097;
sa[3] = 1.2872727272727267;
}
}
}
// Imputed categorical values.
static class CAT_MODES implements java.io.Serializable {
public static final int[] VALUES = new int[0];
static {
}
}
// Categorical Offsets
public static final int[] CATOFFS = {0};
}
// The class representing training column names
class NamesHolder_GLM_model_python_1488677745392_2 implements java.io.Serializable {
public static final String[] VALUES = new String[4];
static {
NamesHolder_GLM_model_python_1488677745392_2_0.fill(VALUES);
}
static final class NamesHolder_GLM_model_python_1488677745392_2_0 implements java.io.Serializable {
static final void fill(String[] sa) {
sa[0] = "C1";
sa[1] = "C2";
sa[2] = "C3";
sa[3] = "C4";
}
}
}
// The class representing column C5
class GLM_model_python_1488677745392_2_ColInfo_4 implements java.io.Serializable {
public static final String[] VALUES = new String[3];
static {
GLM_model_python_1488677745392_2_ColInfo_4_0.fill(VALUES);
}
static final class GLM_model_python_1488677745392_2_ColInfo_4_0 implements java.io.Serializable {
static final void fill(String[] sa) {
sa[0] = "Iris-setosa";
sa[1] = "Iris-versicolor";
sa[2] = "Iris-virginica";
}
}
}
Now, I think I need to call score0. I've figured out how to create my own main.java and create an entrypoint to main() so that I can instantiate the object and call score0, but I have no idea how it's supposed to work. I'm expecting to feed in 4 doubles and get back a category, but instead, the function takes two double[] and I can't figure out exactly what to put where and how to read the results. Here's my main:
public class Main {
public static void main(String[] args) {
double[] input = {4.6, 3.1, 1.5, 0.2};
double[] output = new double[4];
GLM_model_python_1488677745392_2 m = new GLM_model_python_1488677745392_2();
double[] t = m.score0(input,output);
for(int i = 0; i < t.length; i++) System.out.println(t[i]);
}
}
I'm actually getting a bunch of data returned, but I don't know what any of it means. I think I'm completely using the second argument incorrectly, but I'm not sure what to do. Here's the output:
0.0
0.9976588811416329
0.0023411188583572825
9.662837354438092E-15