I am trying to plot a non-linear line of best fit on top of a scatterplot, but it prints multiple lines, no matter what spline parameters I use.
I have no idea what may be going on - any ideas?
Thanks!
def plot_gene_pseudotime(adata, gene_name, n_splines=20, spline_order=3):
# define the spline term
spline = pygam.s(0, n_splines=n_splines, spline_order=spline_order)
X = adata.obs['dpt_pseudotime'].to_numpy().reshape(-1, 1)
y = adata[:, gene_name].X.toarray().flatten()
# create the LinearGAM object with the spline term and perform grid search
gam = pygam.LinearGAM(spline).gridsearch(X, y)
# fit the LinearGAM object to the data
gam.fit(X, y)
# predict the response values
y_pred = gam.predict(X)
# plot the data and the fitted spline
plt.clf()
fig, ax = plt.subplots()
ax.scatter(X, y, s=5, alpha=0.5)
ax.plot(X, y_pred, color='red')
ax.set_xlabel("Pseudotime")
ax.set_ylabel(f"{gene_name} expression")
plt.show()
plot_gene_pseudotime(adata, 'SNCG', n_splines=15, spline_order=10)
plot_gene_pseudotime(adata, 'SNCG', n_splines=3, spline_order=2)