I have a monthly timeseries dataframe grouped by date and team name. I want to convert it to a h2o frame to run the automl models but the index gets dropped in hf_train. How do I group this data in h2o so that the model runs per team?
data=[
['01/01/18','A','Team A',1.5,8.9,'Pop'],
['02/01/18','A','Team AA',1.5,8.9,'Pop'],
['03/01/18','A','Team AA',3.1,11,'Div'],
['04/01/18','A','Team AC',1.5,6,'Div'],
['01/01/18','A','Team A',1.5,8.9,'Pop'],
['02/01/18','A','Team BA',1.5,8.9,'Pop'],
['03/01/18','A','Team BA',3.1,11,'Div'],
['04/01/18','A','Team BC',1.5,6,'Div'],
['01/01/18','C','Team C',1.5,8.9,'Pop'],
['02/01/18','C','Team CA',1.5,8.9,'Pop'],
['03/01/18','C','Team CC',3.1,11,'Div'],
['04/01/18','C','Team CA',1.5,6,'Div']
]
import pandas as pd
df = pd.DataFrame(l, columns=['Dt','Co','Team','Val1','Val2','Type'])
df =df.set_index(['Dt','Co'])
import h2o
h2o.init()
hf_train = h2o.H2OFrame(df)
hf_train
H2o Automl
aml = H2OAutoML(max_runtime_secs = 600, seed = 42)
aml.train(x = x,
y = y,
training_frame = hf_train,
leaderboard_frame = hf_test)