I'm trying to coerce the values in my NumPy array to float. However, in my array, there might be some values which might not coerce successfully and I want to replace those values with a default value. I do want the speed of NumPy though. I do not want to do the python loop. What's the best route to achieve this behavior?
For instance:
import numpy as np
my_array = np.array(["1", "2", "3", "NA"])
new_array = magic_coerce(my_array, float, -1.0) # I want to implement this
print (new_array) # should print [1., 2., 3. -1.]
I'm trying to write my own ufunc in c, and I have the following:
int is_float(const char* c)
{
regex_t regex;
regcomp(®ex, "^[+-]?([0-9]*[.])?[0-9]+$", REG_EXTENDED);
return regexec(®ex, c, 0, NULL, 0) == 0;
}
float to_float(const char *c, float default_value)
{
float result = default_value;
if (is_float(c))
{
result = atof(c);
}
return result;
}
static PyMethodDef LogitMethods[] = {
{NULL, NULL, 0, NULL}
};
/* The loop definition must precede the PyMODINIT_FUNC. */
static void double_logitprod(char **args, npy_intp *dimensions,
npy_intp* steps, void* data)
{
npy_intp i;
npy_intp n = dimensions[0];
char *in1 = args[0], *in2 = args[1];
char *out = args[2];
npy_intp in1_step = steps[0];
npy_intp out_step = steps[2];
double tmp;
for (i = 0; i < n; i++) {
/*BEGIN main ufunc computation*/
char *tmp1 = (char *) in1;
tmp = *((double *)in2);
*((double *) out) = to_float(tmp1, tmp);
/*END main ufunc computation*/
in1 += in1_step;
out += out_step;
}
}
/*This a pointer to the above function*/
PyUFuncGenericFunction funcs[1] = {&double_logitprod};
/* These are the input and return dtypes of logit.*/
static char types[3] = {NPY_OBJECT, NPY_DOUBLE,
NPY_DOUBLE};
But it looks like it's not working correctly. What's the type for UNICODE in numpy? NPY_UNICODE gives an error, so I coerced it to NPY_OBJECT, but this does not seem to play with it.