0

I'm trying to create a SARIMAX code and to make sure it is correct, I'm using an exercise given in: https://otexts.com/fpp3/seasonal-arima.html#example-monthly-us-leisure-and-hospitality-employment. However, the beginning of my series of fitted values has an abnormal spike that I cannot fix, maintaining the parameters given in the exercise (2,1,0)(1,1,1)12. How can I fix this problem? enter image description here divisao(db,metodo)[0] represents the dataframe that has two columns: the dates and the number of millions of people. My code is:

def sarima (db,metodo,fds,p,d,q,m,P,D,Q):
    if fds==True:
        # Criar o modelo ARIMA
        coeficientes = np.polyfit(np.arange(len(divisao(db,metodo)[0])), divisao(db,metodo)[0].Volume, 4)
        tendencia = coeficientes
        modelo = sm.tsa.SARIMAX(divisao(db,metodo)[0].Volume,
                                exog=None,
                                order=(p, d, q), seasonal_order=(P, D, Q, m),
                                trend=[coeficientes],  
                                measurement_error=False, 
                                time_varying_regression=False, 
                                mle_regression=True, 
                                simple_differencing=False, 
                                enforce_stationarity=True, 
                                enforce_invertibility=True,
                                hamilton_representation=False,
                                concentrate_scale=False,
                                trend_offset=1,
                                use_exact_diffuse=False,
                                date="2001-01-01", freq='M', 
                                missing='none',
                                validate_specification=True)


        # Ajustar o modelo aos dados
        resultado = modelo.fit()
        residuos = resultado.resid
        residuos2 = residuos[1:]
        valores_ajustados = resultado.fittedvalues

        # Obter a previsão do modelo
        previsao = resultado.predict(start=len(divisao(db,metodo)[1]), end=len(divisao(db,metodo)[1])+len(divisao(db,metodo)[2])-1)

        # Obter a previsão do modelo
        #previsao = resultado.predict(n_periods=len(divisao(db)[2]))

        # Imprimir os resultados
        mae = metrics.mean_absolute_error(divisao(db,metodo)[2].Volume, previsao)
        mse = metrics.mean_squared_error(divisao(db,metodo)[2].Volume, previsao)
        rmse = np.sqrt(mse) # or mse**(0.5)  
        #r2 = metrics.r2_score(divisao(db,metodo)[2].Volume, previsao)
        resumo=pd.Series({"Fim de Semana": fds,"Parâmetros (p,d,q):": f"({p}, {d}, {q})",
                             'Parâmetros Sazonais (P,D,Q,m)': f"({P}, {D}, {Q}, {m})",
                             "MAE":mae,
                             'MSE':mse,"RMSE":rmse})
        grafico=plt.figure(figsize=(10, 6))
        #plt.plot(divisao(db,metodo)[0].index, divisao(db,metodo)[0].Volume,color='green', label='Teste')
        plt.plot(divisao(db,metodo)[0].index[1:], valores_ajustados[1:],color='green', label=f"Modelo de Valores Ajustados SARIMA ({p},{d},{q})({P},{D},{Q}){m}")
        plt.plot(divisao(db,metodo)[0].index, divisao(db,metodo)[0].Volume,color='blue', label='Dados Originais')
        #plt.plot(divisao(db,metodo)[2].Data, previsao, color='red',label='Previsão')
        plt.xlabel('Data')
        plt.ylabel('Milhões de Pessoas')
        plt.title('Número de Empregos Mensais nos EUA na Área de Lazer e Hospitalidade')
        plt.legend()

    return(resumo,resultado.summary(),residuos2,valores_ajustados,tendencia)

I tried changing the parameters of the SARIMAX model and I was hoping the residuals would end up like this: enter image description here However, the beginning of my time series made my residuals like this: enter image description here

0 Answers0