I'm trying to create a SARIMAX code and to make sure it is correct, I'm using an exercise given in: https://otexts.com/fpp3/seasonal-arima.html#example-monthly-us-leisure-and-hospitality-employment. However, the beginning of my series of fitted values has an abnormal spike that I cannot fix, maintaining the parameters given in the exercise (2,1,0)(1,1,1)12. How can I fix this problem? enter image description here divisao(db,metodo)[0] represents the dataframe that has two columns: the dates and the number of millions of people. My code is:
def sarima (db,metodo,fds,p,d,q,m,P,D,Q):
if fds==True:
# Criar o modelo ARIMA
coeficientes = np.polyfit(np.arange(len(divisao(db,metodo)[0])), divisao(db,metodo)[0].Volume, 4)
tendencia = coeficientes
modelo = sm.tsa.SARIMAX(divisao(db,metodo)[0].Volume,
exog=None,
order=(p, d, q), seasonal_order=(P, D, Q, m),
trend=[coeficientes],
measurement_error=False,
time_varying_regression=False,
mle_regression=True,
simple_differencing=False,
enforce_stationarity=True,
enforce_invertibility=True,
hamilton_representation=False,
concentrate_scale=False,
trend_offset=1,
use_exact_diffuse=False,
date="2001-01-01", freq='M',
missing='none',
validate_specification=True)
# Ajustar o modelo aos dados
resultado = modelo.fit()
residuos = resultado.resid
residuos2 = residuos[1:]
valores_ajustados = resultado.fittedvalues
# Obter a previsão do modelo
previsao = resultado.predict(start=len(divisao(db,metodo)[1]), end=len(divisao(db,metodo)[1])+len(divisao(db,metodo)[2])-1)
# Obter a previsão do modelo
#previsao = resultado.predict(n_periods=len(divisao(db)[2]))
# Imprimir os resultados
mae = metrics.mean_absolute_error(divisao(db,metodo)[2].Volume, previsao)
mse = metrics.mean_squared_error(divisao(db,metodo)[2].Volume, previsao)
rmse = np.sqrt(mse) # or mse**(0.5)
#r2 = metrics.r2_score(divisao(db,metodo)[2].Volume, previsao)
resumo=pd.Series({"Fim de Semana": fds,"Parâmetros (p,d,q):": f"({p}, {d}, {q})",
'Parâmetros Sazonais (P,D,Q,m)': f"({P}, {D}, {Q}, {m})",
"MAE":mae,
'MSE':mse,"RMSE":rmse})
grafico=plt.figure(figsize=(10, 6))
#plt.plot(divisao(db,metodo)[0].index, divisao(db,metodo)[0].Volume,color='green', label='Teste')
plt.plot(divisao(db,metodo)[0].index[1:], valores_ajustados[1:],color='green', label=f"Modelo de Valores Ajustados SARIMA ({p},{d},{q})({P},{D},{Q}){m}")
plt.plot(divisao(db,metodo)[0].index, divisao(db,metodo)[0].Volume,color='blue', label='Dados Originais')
#plt.plot(divisao(db,metodo)[2].Data, previsao, color='red',label='Previsão')
plt.xlabel('Data')
plt.ylabel('Milhões de Pessoas')
plt.title('Número de Empregos Mensais nos EUA na Área de Lazer e Hospitalidade')
plt.legend()
return(resumo,resultado.summary(),residuos2,valores_ajustados,tendencia)
I tried changing the parameters of the SARIMAX model and I was hoping the residuals would end up like this: enter image description here However, the beginning of my time series made my residuals like this: enter image description here