0

I am trying to plot a non-linear line of best fit on top of a scatterplot, but it prints multiple lines, no matter what spline parameters I use.

I have no idea what may be going on - any ideas?

Thanks!


def plot_gene_pseudotime(adata, gene_name, n_splines=20, spline_order=3):

    # define the spline term
    spline = pygam.s(0, n_splines=n_splines, spline_order=spline_order)

    X = adata.obs['dpt_pseudotime'].to_numpy().reshape(-1, 1)
    y = adata[:, gene_name].X.toarray().flatten()

    # create the LinearGAM object with the spline term and perform grid search
    gam = pygam.LinearGAM(spline).gridsearch(X, y)

    # fit the LinearGAM object to the data
    gam.fit(X, y)

    # predict the response values
    y_pred = gam.predict(X)

    # plot the data and the fitted spline
    plt.clf()
    fig, ax = plt.subplots()
    ax.scatter(X, y, s=5, alpha=0.5)
    ax.plot(X, y_pred, color='red')
    ax.set_xlabel("Pseudotime")
    ax.set_ylabel(f"{gene_name} expression")
    plt.show()
plot_gene_pseudotime(adata, 'SNCG', n_splines=15, spline_order=10)

enter image description here

plot_gene_pseudotime(adata, 'SNCG', n_splines=3, spline_order=2)

enter image description here

Carmen Sandoval
  • 2,266
  • 5
  • 30
  • 46

0 Answers0