0

I have two distributions and I would like to know the properties of the multiplication of these distributions.

For example, if I had the distribution of properties velocity and time, I want the characteristics of the probability distribution of distance.

With reasonable estimates for the inegration bounds, I can calculate the probability density function from the product of two random variables:

from scipy import stats
import numpy as np
import matplotlib.pyplot as plt

T, dt = np.linspace(0,20,201, retstep = True)
T = T[1:] # avoid divide by zero below

V = np.linspace(0,20,201)
D = np.linspace(0,120,201)

P_t = stats.gamma(4,1) # probability distribution for time
P_v = stats.norm(8,2)  # probability distribution for speed

# complete integration
P_d = [np.trapz(P_t.pdf(T) * P_v.pdf(d / T) / T, dx = dt) for d in D]

plt.plot(T, P_t.pdf(T), label = 'time')
plt.plot(V, P_v.pdf(V), label = 'velocity')
plt.plot(D, P_d, label = 'distance')
plt.legend()
plt.ylabel('Probability density')

distributions

I would like to be able to compute things like P_d.sf(d), P_d.cdf(d), etc., for arbitrary values of d. Can I create a new distribution (perhaps using scipy.stats.rv_continuous) to characterize distance?

mkrieger1
  • 19,194
  • 5
  • 54
  • 65
ramzeek
  • 2,226
  • 12
  • 23

1 Answers1

0

The solution took a bit of time to understand the rv_continuous. Cobbling together knowledge from a bunch of examples (I should have documented them--sorry) I think I got a working solution.

The only issue is that the domain needs to be known in advance, but I can work with that. If someone has ideas for how to fix that, please let me know.

import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import scipy as sp

interp1d = sp.interpolate.interp1d
trapz = sp.integrate.trapz

# Time domain vector - needed in class
dt = 0.01
t_max = 10
T = np.arange(dt, t_max + dt, dt)

# Distance domain vector - needed in class
dd = 0.01
d_max = 30
D = np.arange(0, d_max + dd, dd)

class MultiplicativeModel(stats.rv_continuous):
    def __init__(self, Tmodel, Vmodel, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.Tmodel = Tmodel # The time-domain probability function
        self.Vmodel = Vmodel # The velocity-domain probability function

        # Create vectors for interpolation of distributions        
        self.pdf_vec = np.array([trapz(self.Tmodel.pdf(T) * \
                                       self.Vmodel.pdf(_ / T) / T, dx = dt) \
                                 for _ in D])
        self.cdf_vec = np.cumsum(self.pdf_vec) * dd
        self.sf_vec = 1 - self.cdf_vec

        # define key functions for rv_continuous class
        self._pdf = interp1d(D, self.pdf_vec, assume_sorted=True)        
        self._sf = interp1d(D, self.sf_vec, assume_sorted=True)        
        self._cdf = interp1d(D, self.cdf_vec, assume_sorted=True)

        # Extraolation option below is necessary because sometimes rvs picks 
        # a number really really close to 1 or 0 and this spits out an error if it
        # is outside of the interpolation range.        
        self._ppf = interp1d(self.cdf_vec, D, assume_sorted=True, 
                             fill_value = 'extrapolate')
        # Moments
        self._munp = lambda n, *args: np.trapz(self.pdf_vec * D ** n, dx=dd)

With the above defined, we get results like:

dv = 0.01
v_max = 10
V = np.arange(0, v_max + dv, dv)

model = MultiplicativeModel(stats.norm(3, 1),
                            stats.uniform(loc=2, scale = 2))


# test moments and stats functions
print(f'median: {model.median()}')
# median: 8.700970199181763
print(f'moments: {model.stats(moments = "mvsk")}')
#moments: (array(9.00872026), array(12.2315612), array(0.44131568), array(0.16819043))

plt.figure(figsize=(6,4))
plt.plot(T, model.Tmodel.pdf(T), label = 'Time PDF')
plt.plot(V, model.Vmodel.pdf(V), label = 'Velocity PDF')
plt.plot(D, model.pdf(D), label = 'Distance PDF')
plt.plot(D, model.cdf(D), label = 'Distance CDF')
plt.plot(D, model.sf(D), label = 'Distance SF')

x = model.rvs(size=10**5)
plt.hist(x, bins = 50, density = True, alpha = 0.5, label = 'Sampled distribution')
plt.legend()
plt.xlim([0,30])

distributions

ramzeek
  • 2,226
  • 12
  • 23