0

I'm trying to coerce the values in my NumPy array to float. However, in my array, there might be some values which might not coerce successfully and I want to replace those values with a default value. I do want the speed of NumPy though. I do not want to do the python loop. What's the best route to achieve this behavior?

For instance:

import numpy as np
my_array = np.array(["1", "2", "3", "NA"])
new_array = magic_coerce(my_array, float, -1.0) # I want to implement this
print (new_array) # should print [1., 2., 3. -1.] 

I'm trying to write my own ufunc in c, and I have the following:


int is_float(const char* c)
{
    regex_t regex;
    regcomp(&regex, "^[+-]?([0-9]*[.])?[0-9]+$", REG_EXTENDED);
    return regexec(&regex, c, 0, NULL, 0) == 0; 
}

float to_float(const char *c, float default_value)
{
    float result = default_value;
    if (is_float(c))
    {
        result = atof(c);
    }
    return result;
}


static PyMethodDef LogitMethods[] = {
        {NULL, NULL, 0, NULL}
};

/* The loop definition must precede the PyMODINIT_FUNC. */

static void double_logitprod(char **args, npy_intp *dimensions,
                            npy_intp* steps, void* data)
{
    npy_intp i;
    npy_intp n = dimensions[0];
    char *in1 = args[0], *in2 = args[1];
    char *out = args[2];
    npy_intp in1_step = steps[0]; 
    npy_intp out_step = steps[2];

    double tmp;

    for (i = 0; i < n; i++) {
        /*BEGIN main ufunc computation*/
        char *tmp1 = (char *) in1;
        tmp = *((double *)in2);
        *((double *) out) = to_float(tmp1, tmp);
        /*END main ufunc computation*/

        in1 += in1_step;
        out += out_step;
    }
}


/*This a pointer to the above function*/
PyUFuncGenericFunction funcs[1] = {&double_logitprod};

/* These are the input and return dtypes of logit.*/

static char types[3] = {NPY_OBJECT, NPY_DOUBLE,
                        NPY_DOUBLE};

But it looks like it's not working correctly. What's the type for UNICODE in numpy? NPY_UNICODE gives an error, so I coerced it to NPY_OBJECT, but this does not seem to play with it.

0 Answers0