I want to extend the Austin's example on Dirichlet process mixtures for density estimationto the multivariate case.
The first information about multivariate Gaussian mixture using pymc3 I have found is this issue at Github. People involved in the issue said that there are two different solutions but they don't work for me. For instance, by using the Brandon's Multivariate Extension in a simple model like this:
import numpy as np
import pymc3 as pm
from mvnormal_extension import MvNormal
with pm.Model() as model:
var_x = MvNormal('var_x', mu = 3*np.zeros(2), tau = np.diag(np.ones(2)), shape=2)
trace = pm.sample(100)
I can't obtain the proper Mean around (3,3):
pm.summary(trace)
var_x:
Mean SD MC Error 95% HPD interval
-------------------------------------------------------------------
0.220 1.161 0.116 [-1.897, 2.245]
0.165 1.024 0.102 [-2.626, 1.948]
Posterior quantiles:
2.5 25 50 75 97.5
|--------------|==============|==============|--------------|
-1.897 -0.761 0.486 1.112 2.245
-2.295 -0.426 0.178 0.681 2.634
The other solution can be reproduced thanks to Benavente:
import numpy as np
import pymc3 as pm
import scipy
import theano
from theano import tensor
target_data = np.random.random((500, 16))
N_COMPONENTS = 5
N_SAMPLES, N_DIMS = target_data.shape
# Dirichilet prior.
ALPHA_0 = np.ones(N_COMPONENTS)
# Component means prior.
MU_0 = np.zeros(N_DIMS)
LAMB_0 = 1. * np.eye(N_DIMS)
# Components precision prior.
BETA_0, BETA_1 = 0., 1. # Covariance stds prior uniform limits.
L_0 = 2. # LKJ corr. shape. Larger shape -> more biased to identity.
# In order to convert the upper triangular correlation values to a
# complete correlation matrix, we need to construct an index matrix:
# Source: http://stackoverflow.com/q/29759789/1901296
N_ELEMS = N_DIMS * (N_DIMS - 1) / 2
tri_index = np.zeros([N_DIMS, N_DIMS], dtype=int)
tri_index[np.triu_indices(N_DIMS, k=1)] = np.arange(N_ELEMS)
tri_index[np.triu_indices(N_DIMS, k=1)[::-1]] = np.arange(N_ELEMS)
with pm.Model() as model:
# Component weight prior.
pi = pm.Dirichlet('pi', ALPHA_0, testval=np.ones(N_COMPONENTS) / N_COMPONENTS)
#pi_potential = pm.Potential('pi_potential', tensor.switch(tensor.min(pi) < .01, -np.inf, 0))
###################
# Components plate.
###################
# Component means.
mus = [pm.MvNormal('mu_{}'.format(i), MU_0, LAMB_0, shape=N_DIMS)
for i in range(N_COMPONENTS)]
# Component precisions.
#lamb = diag(sigma) * corr(corr_shape) * diag(sigma)
corr_vecs = [
pm.LKJCorr('corr_vec_{}'.format(i), L_0, N_DIMS)
for i in range(N_COMPONENTS)
]
# Transform the correlation vector representations to matrices.
corrs = [
tensor.fill_diagonal(corr_vecs[i][tri_index], 1.)
for i in range(N_COMPONENTS)
]
# Stds for the correlation matrices.
cov_stds = pm.Uniform('cov_stds', BETA_0, BETA_1, shape=(N_COMPONENTS, N_DIMS))
# Finally re-compose the covariance matrices using diag(sigma) * corr * diag(sigma)
# Source http://austinrochford.com/posts/2015-09-16-mvn-pymc3-lkj.html
lambs = []
for i in range(N_COMPONENTS):
std_diag = tensor.diag(cov_stds[i])
cov = std_diag.dot(corrs[i]).dot(std_diag)
lambs.append(tensor.nlinalg.matrix_inverse(cov))
stacked_mus = tensor.stack(mus)
stacked_lambs = tensor.stack(lambs)
#####################
# Observations plate.
#####################
z = pm.Categorical('z', pi, shape=N_SAMPLES)
@theano.as_op(itypes=[tensor.dmatrix, tensor.lvector, tensor.dmatrix, tensor.dtensor3],
otypes=[tensor.dscalar])
def likelihood_op(values, z_values, mu_values, prec_values):
logp = 0.
for i in range(N_COMPONENTS):
indices = z_values == i
if not indices.any():
continue
logp += scipy.stats.multivariate_normal(
mu_values[i], prec_values[i]).logpdf(values[indices]).sum()
return logp
def likelihood(values):
return likelihood_op(values, z, stacked_mus, stacked_lambs)
y = pm.DensityDist('y', likelihood, observed=target_data)
step1 = pm.Metropolis(vars=mus + lambs + [pi])
step2 = pm.ElemwiseCategoricalStep(vars=[z], values=list(range(N_COMPONENTS)))
trace = pm.sample(100, step=[step1, step2])
I have changed in this code pm.ElemwiseCategoricalStep
to pm.ElemwiseCategorical
and
logp += scipy.stats.multivariate_normal(mu_values[i], prec_values[i]).logpdf(values[indices])
by
logp += scipy.stats.multivariate_normal(mu_values[i], prec_values[i]).logpdf(values[indices]).sum()
but I get this exception:
ValueError: expected an ndarray
Apply node that caused the error: Elemwise{Composite{((i0 + i1) - (i2 + i3))}}[(0, 0)](Sum{acc_dtype=float64}.0, FromFunctionOp{likelihood_op}.0, Sum{acc_dtype=float64}.0, FromFunctionOp{likelihood_op}.0)
Toposort index: 101
Inputs types: [TensorType(float64, scalar), TensorType(float64, scalar), TensorType(float64, scalar), TensorType(float64, scalar)]
Inputs shapes: [(), (), (), ()]
Inputs strides: [(), (), (), ()]
Inputs values: [array(-127.70516572917249), -13460.012199423296, array(-110.90354888959129), -13234.61313535326]
Outputs clients: [['output']]
HINT: Re-running with most Theano optimization disabled could give you a back-trace of when this node was created. This can be done with by setting the Theano flag 'optimizer=fast_compile'. If that does not work, Theano optimizations can be disabled with 'optimizer=None'.
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and storage map footprint of this apply node.
I appreciate any help. Thanks!