I need to rewrite this code using numexpr, it's calculating euclidean norm matrix of matrix data [rows x cols] and vector [1 x cols].
d = ((data-vec)**2).sum(axis=1)
How can it be done? Also maybe there is another even faster method?
The problem that I use hdf5, and data matrix readed from it. For example this code gives error: objects are not aligned.
#naive numpy solution, can be parallel?
def test_bruteforce_knn():
h5f = tables.open_file(fileName)
t0= time.time()
d = np.empty((rows*batches,))
for i in range(batches):
d[i*rows:(i+1)*rows] = ((h5f.root.carray[i*rows:(i+1)*rows]-vec)**2).sum(axis=1)
print (time.time()-t0)
ndx = d.argsort()
print ndx[:k]
h5f.close()
#using some tricks (don't work error: objects are not aligned )
def test_bruteforce_knn():
h5f = tables.open_file(fileName)
t0= time.time()
d = np.empty((rows*batches,))
for i in range(batches):
d[i*rows:(i+1)*rows] = (np.einsum('ij,ij->i', h5f.root.carray[i*rows:(i+1)*rows],
h5f.root.carray[i*rows:(i+1)*rows])
+ np.dot(vec, vec)
-2 * np.dot(h5f.root.carray[i*rows:(i+1)*rows], vec))
print (time.time()-t0)
ndx = d.argsort()
print ndx[:k]
h5f.close()
Using numexpr: seems numexpr don't understand h5f.root.carray[i*rows:(i+1)*rows] it must be reassigned?
import numexpr as ne
def test_bruteforce_knn():
h5f = tables.open_file(fileName)
t0= time.time()
d = np.empty((rows*batches,))
for i in range(batches):
ne.evaluate("sum((h5f.root.carray[i*rows:(i+1)*rows] - vec) ** 2, axis=1)")
print (time.time()-t0)
ndx = d.argsort()
print ndx[:k]
h5f.close()