That prints out 2808 while the true answer is 2809
No it does not:
$ cat ohnoitdoesnt.py
import numpy as np
import pycuda.gpuarray as gpuarray
import pycuda.autoinit
a = gpuarray.to_gpu(np.array([53]))
print((a**2).get()[0])
$ python ohnoitdoesnt.py
Traceback (most recent call last):
File "ohnoitdoesnt.py", line 6, in <module>
print((a**2).get()[0])
File "/usr/local/lib/python2.7/dist-packages/pycuda-2017.1.1-py2.7-linux-x86_64.egg/pycuda/gpuarray.py", line 659, in __pow__
return self._pow(other,new=True)
File "/usr/local/lib/python2.7/dist-packages/pycuda-2017.1.1-py2.7-linux-x86_64.egg/pycuda/gpuarray.py", line 643, in _pow
func = elementwise.get_pow_kernel(self.dtype)
File "<string>", line 2, in get_pow_kernel
File "/usr/local/lib/python2.7/dist-packages/pycuda-2017.1.1-py2.7-linux-x86_64.egg/pycuda/tools.py", line 430, in context_dependent_memoize
result = func(*args)
File "/usr/local/lib/python2.7/dist-packages/pycuda-2017.1.1-py2.7-linux-x86_64.egg/pycuda/elementwise.py", line 559, in get_pow_kernel
"pow_method")
File "/usr/local/lib/python2.7/dist-packages/pycuda-2017.1.1-py2.7-linux-x86_64.egg/pycuda/elementwise.py", line 161, in get_elwise_kernel
arguments, operation, name, keep, options, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/pycuda-2017.1.1-py2.7-linux-x86_64.egg/pycuda/elementwise.py", line 147, in get_elwise_kernel_and_types
keep, options, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/pycuda-2017.1.1-py2.7-linux-x86_64.egg/pycuda/elementwise.py", line 75, in get_elwise_module
options=options, keep=keep)
File "/usr/local/lib/python2.7/dist-packages/pycuda-2017.1.1-py2.7-linux-x86_64.egg/pycuda/compiler.py", line 291, in __init__
arch, code, cache_dir, include_dirs)
File "/usr/local/lib/python2.7/dist-packages/pycuda-2017.1.1-py2.7-linux-x86_64.egg/pycuda/compiler.py", line 255, in compile
return compile_plain(source, options, keep, nvcc, cache_dir, target)
File "/usr/local/lib/python2.7/dist-packages/pycuda-2017.1.1-py2.7-linux-x86_64.egg/pycuda/compiler.py", line 137, in compile_plain
stderr=stderr.decode("utf-8", "replace"))
pycuda.driver.CompileError: nvcc compilation of /tmp/tmpaeIBGe/kernel.cu failed
[command: nvcc --cubin -arch sm_52 -I/usr/local/lib/python2.7/dist-packages/pycuda-2017.1.1-py2.7-linux-x86_64.egg/pycuda/cuda kernel.cu]
[stderr:
kernel.cu(19): error: calling a __host__ function("std::pow<long, long> ") from a __global__ function("pow_method") is not allowed
kernel.cu(19): error: identifier "std::pow<long, long> " is undefined in device code
2 errors detected in the compilation of "/tmp/tmpxft_00001674_00000000-6_kernel.cpp1.ii".
]
This isn't an unknown problem in CUDA and PyCUDA -- the CUDA math library doesn't overload integer argument versions of most functions.
If we fix this and use a floating point type, it works as expected:
$ cat ohnoitdoesnt.py
import numpy as np
import pycuda.gpuarray as gpuarray
import pycuda.autoinit
a = gpuarray.to_gpu(np.array([53], dtype=np.float32))
print((a**2).get()[0])
$ python ohnoitdoesnt.py
2809.0