Can we make this faster ? Moschopoulos algorithm

Question

I implemented the algortihm of Moschopoulos that gives the density and c.d.f of a sum of gamma random variables. A C++ implementation exists in the dcoga R package, but i need mine to handle arbitrary precision numbers through the mpmath library.

The major problem with the following code is the runtime: some parameters of the class (the _delta slot) need to be updated and re-computed 'on-the-fly' when needed, and it takes a lot of time. I ran a cProfile on a simple exemple so you can see where is the problem quickly, but i dont know enough to make it faster. See for yourself by running the follwoing :

import mpmath as mp
import numpy as np
import scipy.stats.distributions as sc_dist

def gamma_density(x,a,b):
    return mp.power(x,a-1) * mp.exp(-x/b) / mp.power(b,a) / mp.gamma(a)

def gamma_cdf(x,a,b):
    return mp.gammainc(a,0,x/b,regularized=True)

class GammaConvolution:

    def __init__(self,alpha,beta):

        #alpha and beta must be provided as numpy array of mpmath.mpf objects
        n = len(alpha)
        if not len(beta) == n:
            raise ValueError('you should provide as much alphas and betas')
        if n == 1:
            raise ValueError('you should provide at least 2 gammas.')
        if not (type(alpha[0]) == mp.mpf and type(beta[0]) == mp.mpf):
            raise ValueError('you should provide alpha and beta in mp.mpf format.')

        alpha = np.array(alpha)
        beta = np.array(beta)

        # sanity check :
        check = alpha>0
        if not np.all(check):
            alpha = alpha[check]
            beta = beta[check]
            print('Some alphas were negatives. We discarded them. {} are remaining'.format(len(alpha)))

        self.signs = np.array([-1 if b < 0 else 1 for b in beta])
        self.alpha = alpha
        self.beta = 1/beta * self.signs
        self.n = self.alpha.shape[0]

        # Moshopoulos parameters :
        self._beta1 = np.min(self.beta)
        self._c = np.prod([mp.power(self._beta1/self.beta[i], self.alpha[i]) for i in range(self.n)])
        self._rho = np.sum(self.alpha)
        self._delta = [mp.mpf('1')]
        self._lgam_mod = [np.sum([self.alpha[i] * (1 - (self._beta1 / self.beta[i])) for i in range(self.n)])] # this correpsont o get_lgam(k=1)"
        self._to_power = [1 - self._beta1/self.beta[i] for i in range(self.n)]

    def _get_delta(self,k):
        if(len(self._delta)<=k):
            # Then we create it :
            n = len(self._delta)
            self._lgam_mod.extend([np.sum([self.alpha[i] * mp.power(self._to_power[i], j + 1) for i in range(self.n)])for j in range(n,k+1)])
            self._delta.extend([np.sum([self._lgam_mod[i] * self._delta[j - 1 - i] for i in range(j)])/j for j in range(n, k+1)])
        return self._delta[k]

    def coga(self, x, type='pdf'):
        if x < 0:
            return 0
        k = 0
        out = 0
        if type=='pdf':
            func = gamma_density
        if type=='cdf':
            func = gamma_cdf
        while True:
            step = self._get_delta(k) * func(x, self._rho + k, self._beta1)
            if mp.isinf(step) or mp.isnan(step):
                print('inf or nan happened, the algorithm did not converge')
                break
            out += step
            if mp.almosteq(step, 0):
                break
            k += 1

        out *= self._c
        return out

    def pdf(self,x):
        return self.coga(x, 'pdf')

    def cdf(self,x):
        return self.coga(x, 'cdf')


if __name__ == "__main__":
    mp.mp.dps = 20

    # some particular exemples values that 'approximates' a lognormal.

    alpha = np.array([mp.mpf(28.51334751960197301778147509487793953959733053134799171090326516406790428180220147416519532643017308),
       mp.mpf(11.22775884868121894986129015315963173419663023710308189240288960305130268927466536233373438048018254),
       mp.mpf(6.031218085515218207945488717293490366342446718306869797877975835997607997369075273734516467130527887),
       mp.mpf(3.566976340452999300401949508136750700482567798832918933344685923750679570986611068640936818600783319),
       mp.mpf(2.11321149019108276673514744052403419069919543373601000373799419309581402519772983291547041971629247),
       mp.mpf(1.13846760415283260768713745745968197587694610126298554688258480795156541979045502458925706173497129),
       mp.mpf(0.4517330810577715647869261976064157403882011767087065171431053299996540080549312203533542184738086012),
       mp.mpf(0.07749235677493576352946436194914173772169589371740264101530548860132559560092370430079007024964728383),
       mp.mpf(0.002501284133093294545540492059111705453529784044424054717786717782803430937621102255478670439562804153),
       mp.mpf(0.000006144939533164067887819376779035687994761732668244591993428755735056093784306786937652351425833352728)])

    beta = np.array([mp.mpf(391.6072818187915081052155152400534191999174250784251976117131780922742055385769343508047998043722828),
       mp.mpf(77.21898445771279675063405017644417196454232681648725486524482168571564310062495549360709158314560647),
       mp.mpf(31.76440960907061013049029007869346161467203121003911885547576503605957915202107379956314233702891246),
       mp.mpf(17.44293394293412500742344752991577287098138329678954573112349659319428017788092621396034528552305827),
       mp.mpf(11.23444737858955404891602233256282644042451542694693191750203254839756772074776087387688524524329672),
       mp.mpf(8.050341288822160015292746577166226701992193848793662515696372301333563919247892899484012106343185691),
       mp.mpf(6.255867387720061672816524328464895459610937299629691008594802004659480037331191722532939009895028284),
       mp.mpf(5.146993307537222489735861088512006440481952536952402160831342266591666243011939270854579656683779017),
       mp.mpf(4.285958039399903253267350243950743396496148339434605882255896571795493305652009075308916145669650666),
       mp.mpf(3.455673251219567018227405844933725014914508519853860295284610094904914286228770061300477809395436033)])

    dist = GammaConvolution(alpha, beta)
    print(sc_dist.lognorm(s=0.25).cdf(1))

    import cProfile
    pr = cProfile.Profile()
    pr.enable()
    print(dist.cdf(1))
    print(sc_dist.lognorm(s=0.25).cdf(1))
    pr.disable()
    # after your program ends
    import pstats
    pstats.Stats(pr).strip_dirs().sort_stats('cumtime').print_stats(20)

Can you help me making it faster ? The problem is clearly if the _get_delta function.

Can you provide some examples of input and output values we can test on? — GPhilo, Jun 29 '20 at 08:29
Yep, i did provide one in the main part of the script (should ouput `0.5028...`). Honestly, if this value is right then your script is right ;) — lrnv, Jun 29 '20 at 08:37

Can we make this faster ? Moschopoulos algorithm

0 Answers0