I need to visualize a dataset distribution using several multiple
method. Then I created a multiplehue_kdeplots
function to generate three visual of the same dataset. It's working as expected, but as far as I can see on large dataset, the distribution is calculated three time (which takes time). How could I calculate it once and only change the way the distribution is displayed ?
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
np.random.seed(42)
df = pd.DataFrame({
'category': np.random.randint(0, 4, size=500),
'value': np.random.uniform(-10, 10, size=500)
})
def multiplehue_kdeplots(df, val_col, cat_col,
mults = ['layer', 'stack', 'fill'], leg_idx = None, pal = None,
**kwargs):
fig, axs = plt.subplots(nrows=len(mults), sharex=True, **kwargs)
if len(mults) == 1:
axs = [axs]
for i, m in enumerate(mults):
leg = None
if leg_idx == i:
leg = True
sns.kdeplot(data=df, x=val_col, hue=cat_col, fill=True, alpha=0.5, multiple=m,
legend = leg, palette=pal, ax=axs[i])
axs[i].set_title(mults[i].title())
axs[i].set_ylabel('Density')
fig.subplots_adjust(hspace=0.3)
plt.show()
multiplehue_kdeplots(df, 'value', 'category')