I am trying to model a Hidden Markov model to some stock data from the S&P500.
The data is downloaded from Yahoo Finance and is contained in a CSV-file containing the data for 250 trading days. I had this code working a week ago but now it does not seem to work.
import pandas as pd
from hmmlearn import hmm
import numpy as np
from matplotlib import cm, pyplot as plt
from matplotlib.dates import YearLocator, MonthLocator
df = pd.read_csv( "SnP500_1Yhist.csv",
header = 0,
index_col = "Date",
parse_dates = True
)
df["Returns"] = df["Adj Close"].pct_change()
df.dropna( inplace = True )
hmm_model = hmm.GaussianHMM( n_components = 4,
covariance_type = "full",
n_iter = 100
) # %Create the model
df = df["Returns"] # %Extract the wanted column of data
training_set = np.column_stack( df ) # %Shape = [1,250]
hmm_model.fit( training_set ) # %This is where I get the error
The error I get is:
ValueError Traceback (most recent call last)
<ipython-input-51-c8f66806fad6> in <module>
9 print(training_set.shape)
10 print(training_set)
---> 11 hmm_model.fit(training_set)
~/Git Projects/Aiguille Systems/allocationmodel/macromodelv2_venv/lib/python3.9/site-packages/hmmlearn/base.py in fit(self, X, lengths)
460 """
461 X = check_array(X)
--> 462 self._init(X, lengths=lengths)
463 self._check()
464
~/Git Projects/Aiguille Systems/allocationmodel/macromodelv2_venv/lib/python3.9/site-packages/hmmlearn/hmm.py in _init(self, X, lengths)
205 kmeans = cluster.KMeans(n_clusters=self.n_components,
206 random_state=self.random_state)
--> 207 kmeans.fit(X)
208 self.means_ = kmeans.cluster_centers_
209 if self._needs_init("c", "covars_"):
~/Git Projects/Aiguille Systems/allocationmodel/macromodelv2_venv/lib/python3.9/site-packages/sklearn/cluster/_kmeans.py in fit(self, X, y, sample_weight)
1033 accept_large_sparse=False)
1034
-> 1035 self._check_params(X)
1036 random_state = check_random_state(self.random_state)
1037
~/Git Projects/Aiguille Systems/allocationmodel/macromodelv2_venv/lib/python3.9/site-packages/sklearn/cluster/_kmeans.py in _check_params(self, X)
956 # n_clusters
957 if X.shape[0] < self.n_clusters:
--> 958 raise ValueError(f"n_samples={X.shape[0]} should be >= "
959 f"n_clusters={self.n_clusters}.")
960
ValueError: n_samples=1 should be >= n_clusters=4.