I'm trying to parallelize this numba jitted function. I initially thought this would be trivial since it is an embarrassingly parallel problem, but it produces unexpected results (different output for the sequential and parallel 'implementations'). Any idea what could be happening there and whether there are ways to make this work?
See reproducible example below:
import numba
import numpy as np
from sklearn.datasets import make_regression
@numba.jit(nopython=True)
def nanlstsq(X, y):
"""Return the least-squares solution to a linear matrix equation
Analog to ``numpy.linalg.lstsq`` for dependant variable containing ``Nan``
Args:
X ((M, N) np.ndarray): Matrix of independant variables
y ({(M,), (M, K)} np.ndarray): Matrix of dependant variables
Returns:
np.ndarray: Least-squares solution, ignoring ``Nan``
"""
beta = np.zeros((X.shape[1], y.shape[1]), dtype=np.float64)
for idx in range(y.shape[1]):
# subset y and X
isna = np.isnan(y[:,idx])
X_sub = X[~isna]
y_sub = y[~isna,idx]
# Compute beta on data subset
XTX = np.linalg.inv(np.dot(X_sub.T, X_sub))
XTY = np.dot(X_sub.T, y_sub)
beta[:,idx] = np.dot(XTX, XTY)
return beta
@numba.jit(nopython=True, parallel=True)
def nanlstsq_parallel(X, y):
beta = np.zeros((X.shape[1], y.shape[1]), dtype=np.float64)
for idx in numba.prange(y.shape[1]):
# subset y and X
isna = np.isnan(y[:,idx])
X_sub = X[~isna]
y_sub = y[~isna,idx]
# Compute beta on data subset
XTX = np.linalg.inv(np.dot(X_sub.T, X_sub))
XTY = np.dot(X_sub.T, y_sub)
beta[:,idx] = np.dot(XTX, XTY)
return beta
# Generate random data
n_targets = 10000
n_features = 3
X, y = make_regression(n_samples=200, n_features=n_features,
n_targets=n_targets)
# Add random nan to y array
y.ravel()[np.random.choice(y.size, 5*n_targets, replace=False)] = np.nan
# Run the regression
beta = nanlstsq(X, y)
beta_parallel = nanlstsq_parallel(X, y)
np.testing.assert_allclose(beta, beta_parallel)