just do which seems to be fastest option for you:
(1) test[test < 0] = 0
(2) np.where(test < 0, 0, test) # THANKS TO @antony-hatchkins
(3) test.clip(0) # THANKS TO @u12-forward
depending on how you test it.
when you execute each method 1000 times then approach number 2 is fastest. when you measure single function execution then option number 1 is fastest.
test:
import numpy as np
import timeit
from copy import copy
from functools import partial
def create_data():
return np.random.rand(int(1e7))-0.5
def func1(data):
data[data < 0] = 0
def func2(data):
np.putmask(data, data < 0, 0)
def func3(data):
np.maximum(data, 0)
def func4(data):
data.clip(0)
def func5(data):
np.where(data < 0, 0, data)
if __name__ == '__main__':
n_loops = 1000
test = create_data()
t1 = timeit.Timer(partial(func1, copy(test)))
t2 = timeit.Timer(partial(func2, copy(test)))
t3 = timeit.Timer(partial(func3, copy(test)))
t4 = timeit.Timer(partial(func4, copy(test)))
t5 = timeit.Timer(partial(func4, copy(test)))
print(f"func1 (x[x < 0]): timeit {t1.timeit(n_loops)} num test loops {n_loops}")
print(f"func2 (putmask): timeit {t2.timeit(n_loops)} num test loops {n_loops}")
print(f"func3 (maximum): timeit {t3.timeit(n_loops)} num test loops {n_loops}")
print(f"func4 (clip): timeit {t4.timeit(n_loops)} num test loops {n_loops}")
print(f"func5 (where): timeit {t5.timeit(n_loops)} num test loops {n_loops}")
test results:
func1 (x[x < 0]): timeit 7.2177265440000005 num test loops 1000
func2 (putmask): timeit 13.913492435999999 num test loops 1000
func3 (maximum): timeit 23.065230873999997 num test loops 1000
func4 (clip): timeit 22.768682354000006 num test loops 1000
func5 (where): timeit 23.844607757999995 num test loops 1000
EDIT:
different approach to test data[data < 0] = 0 vs np.where(data < 0, 0, data):
import numpy as np
from time import perf_counter as clock
z = np.random.rand(10**7) - 0.5
start = clock()
for i in range(100):
a = z.copy()
np.where(a<0, 0, a)
print(clock() - start)
start = clock()
for i in range(100):
a = z.copy()
a[a<0] = 0
print(clock() - start)
test result:
7.9247566030000005
8.021165436000002
test3:
In [1]: import numpy as np
...: from copy import copy
...:
...:
...:
...: test = np.random.rand(int(1e7))-0.5
...:
...:
...: def func1():
...: data = copy(test)
...: data[data < 0] = 0
...:
...:
...: def func2():
...: data = copy(test)
...: np.putmask(data, data < 0, 0)
...:
...:
...: def func3():
...: data = copy(test)
...: np.maximum(data, 0)
...:
...:
...: def func4():
...: data = copy(test)
...: data.clip(0)
...:
...:
...: def func5():
...: data = copy(test)
...: np.where(data < 0, 0, data)
...:
In [2]: timeit func1
16.9 ns ± 0.117 ns per loop (mean ± std. dev. of 7 runs, 100000000 loops each)
In [3]: timeit func2
15.8 ns ± 0.184 ns per loop (mean ± std. dev. of 7 runs, 100000000 loops each)
In [4]: timeit func3
22.1 ns ± 0.287 ns per loop (mean ± std. dev. of 7 runs, 10000000 loops each)
In [5]: timeit func4
15.6 ns ± 0.0594 ns per loop (mean ± std. dev. of 7 runs, 100000000 loops each)
In [6]: timeit func5
16.2 ns ± 0.187 ns per loop (mean ± std. dev. of 7 runs, 100000000 loops each)