-1

This extension is used to search data in a linked list and return it as a nested tuple of lists.[(ele1,ele2,),...]

However when the program runs for a period of time, segment fault will occur. I know that there might be something I'm missing with python memory management, but it's not clear to me exactly where the problem is.

header file:

#include "Python.h"
#define  PY_EXTENSION_H
#ifndef TESTFORSCORE_MAIN_H
#define TESTFORSCORE_MAIN_H
#define MAX(a,b) a<b?b:a
#define MIN(a,b) a<b?a:b
typedef unsigned char byte;
typedef struct{
    char* content;
    int content_len;
    int features_len;
    byte* features;
    long id;
}NODE;

typedef struct LISTNODE{
    LISTNODE* next;
    NODE* node;
}LISTNODE;

extern "C"{
static PyObject *compare(byte* features1,byte* features2,int min);
static PyObject *init_map(byte* features,int features_len,char* content ,int content_len,long id);
static PyObject *get_list_size(PyObject *self, PyObject *args);
static PyObject *search(PyObject *self, PyObject *args);
}
#endif

cpp

#define PY_SSIZE_T_CLEAN
#include "example.h"


static LISTNODE *head_of_map=(LISTNODE*) PyMem_RawMalloc(sizeof(LISTNODE));
LISTNODE *current_head=head_of_map;
long COUNTER=0;
static PyObject *get_list_size(PyObject *self, PyObject *args)
{
    return Py_BuildValue("l", COUNTER);
}
static PyObject *init_map(PyObject *self, PyObject *args){//
    PyObject  *obj1,*obj2;
    int len1,len2;
    long activity;
    byte *features,*tmp_b;
    char *content,*tmp_c;
    if (!PyArg_ParseTuple(args,"SSiiL", &obj1,&obj2,&len1,&len2,&activity)) {
        return NULL;
    }

    features=(byte *) PyBytes_AsString(obj1);
    content=PyBytes_AsString(obj2);

    LISTNODE *list_node=(LISTNODE*) PyMem_RawMalloc(sizeof(LISTNODE));
    NODE *node=(NODE*) PyMem_RawMalloc(sizeof(NODE));
    tmp_b=(byte *) PyMem_RawMalloc(sizeof(byte)*len1);
    tmp_c=(char *) PyMem_RawMalloc(sizeof(char)*len2);
    memcpy(tmp_b,features,len1);
    memcpy(tmp_c,content,len2);
    node->features=tmp_b;
    node->content=tmp_c;
    node->features_len=len1;
    node->content_len=len2;
    node->id=activity;
    list_node->node=node;
    current_head->next=list_node;
    current_head=current_head->next;
    COUNTER+=1;
    // err maybe here
    //Py_DecRef(obj1);
    //Py_DecRef(obj2);
    PyMem_FREE(tmp_b);
    PyMem_Free(tmp_c);
    return Py_BuildValue("b",0);
}

static double _compare(byte* features1,byte* features2,int min){
    //do something ...
    return   0.3;
}
static PyObject *search(PyObject *self, PyObject *args){//(byte* features,char *content,int features_len,int content_len,double threshold,int _idx){

    PyObject *obj1,*obj2,*tuple,*content,*matched_content,*score1,*_id,*matched_id, *lis= PyList_New(0);
    byte* features;
    char *c_content,*tmp_content,*tmp_matched;
    int content_len,length ,idx;
    double  threshold,score;
    LISTNODE *p = head_of_map;

    if (!PyArg_ParseTuple(args,"SSiidi", &obj1,&obj2,&length,&content_len,&threshold,&idx)) {

        return NULL;
    }
    features = (unsigned  char*)PyBytes_AsString(obj1);
    c_content = PyBytes_AsString(obj2);
//    Py_DecRef(obj1);
//    Py_DecRef(obj2);
    p=p->next;
    int counter=0;
    while (p!=NULL){
        counter+=1;
//      compare here
        score =0.3;
        if(score>threshold){
            //if(counter>1000) break;
            tuple      = PyTuple_New(5);
            score1     = Py_BuildValue("d",score);
            _id        = Py_BuildValue("b",idx);
            matched_id = Py_BuildValue("l",p->node->id);

            tmp_content = (char*)PyMem_Malloc(sizeof(char)*content_len);
            if (tmp_content == NULL)
                return PyErr_NoMemory();
            memcpy(tmp_content,c_content,content_len);
            content = Py_BuildValue("y#",tmp_content,(Py_ssize_t) sizeof(char)*content_len);

            tmp_matched=(char*) PyMem_Malloc(sizeof(char)*p->node->content_len);
            if (tmp_matched == NULL)
                return PyErr_NoMemory();
            memcpy(tmp_matched,p->node->content, p->node->content_len);
            matched_content = Py_BuildValue("y#",tmp_matched,(Py_ssize_t) sizeof(char)*p->node->content_len);

            PyTuple_SetItem(tuple,0,_id);
            PyTuple_SetItem(tuple,1,matched_id);
            PyTuple_SetItem(tuple,2,score1);
            PyTuple_SetItem(tuple,3,content);
            PyTuple_SetItem(tuple,4,matched_content);

            PyList_Append(lis,tuple);
            Py_DecRef(tuple);

            PyMem_Free(tmp_content);
            PyMem_Free(tmp_matched);
        }
        p=p->next;
    }

    if (PyErr_Occurred()){
        Py_DECREF(lis);
        return NULL ;
    }
    return lis;
}
static PyMethodDef exampleMethods[] = {
        {"get_list_size", get_list_size, METH_VARARGS, "example"},
        {"compare",compare,METH_VARARGS,"example"},
        {"init_map",init_map,METH_VARARGS,"example"},
        {"destory",destory,METH_VARARGS,""},
        {"search",search,METH_VARARGS,"example"},
        { NULL, NULL, 0, NULL}
};

static struct PyModuleDef ptexamplemodule = {
        PyModuleDef_HEAD_INIT,
        "example",           /* name of module */
        "A module that imports an API",  /* Doc string (may be NULL) */
        -1,                 /* Size of per-interpreter state or -1 */
        exampleMethods       /* Method table */
};
/* Module initialization function */
PyMODINIT_FUNC
PyInit_example(void) {
    PyObject *m;

    m = PyModule_Create(&ptexamplemodule);
    if (m == NULL)
        return NULL;

    /* Import sample, loading its API functions */
    return m;
}
int main() {
return 0;
}

setup file

from distutils.core import setup, Extension

setup(name='example',
      ext_modules=[
          Extension('example',
                    ['example.cpp'],
                    include_dirs = [],  # May need pysample.h directory
                    )
      ],
      version="0.0.1"
      )

test file

import example
import random
import time
import faulthandler
 
faulthandler.enable()
lis=[(b'\xd3\nR\x952\xbf\x8e\xeb[\xdc\xe8\xfb\xcb\x9f\xb4\xd2', b'\x856\x96\xda\xe4H_\xf9\xb3\x95\xff\xda\xc9\x05\x17\xd5'), (b'!\xdf\xc1\xc20h\xc3#\x8f\x8a\xd3\x92\xc9}\x1b\x08', b'\xa3\x03m\xd3\x96\x01\xb2\xe0\xfcOg\x87\xfa\xadA\x89'), (b'Fu\xe6\x97=\xb9\x88S\xe4Q\xc06\x9f[\x84J', b'`9\xf2- \x89\xc63?\xe3=\xf9o\x9b\xe4\x8e'), (b'\x95b\x1f\xba\x90\xfa\xe9j#\xd4\x12x\xc3\x93#\x1c', b'8f\xc4\xeb\xef\x95\xe8\x81\xec\xed1\x00j\xc2\xd2j'), (b'\xf9\x8bUk"\xa2y\xfbHI\x08\xf6\x03\xd3Ye', b'\xdf\xdb\xb6\x87\xf3\x05-p\x80%8\x8cd[43'), (b'\xee\x92\xcf\xb5\xd7\x05j\x92\xa2\x1c\xf7a.[\x05\x86', b'Y\x00|\x02\xa1\xb1wI\x08\xb3\xfc\xfc\xc7\xf5db'), (b"\x9d\xe0&\\~\xbf\xaf`\xae\xc2?'B\xfa\x95\x86", b'FL\xe3\xbb\x18\xeef\x08%\xe1\xc8,\xe3J=\xd5'), (b'\xe1\xfb\xc9n\x89\x1d\x9a\x9d\xe9\x1a\x9c\xde\xcd\xce\xbd\xcf', b'\xa7\xe0\x9b\x8bl\x88\x85,b\x04`3\xe6\x03\x85\x8e'), (b'\xc5\xbf\xd4\xa7\xe7e\xe5\x8a\xe0\xbc\xa8\xb8Yw\x0c\xd3', b":\xc2a\xb6\xd3\x1ct'z\xfe\xf9\xa7P\x8c\x1c\x7f"), (b'\xfd\xb2\x9aP\xc9\x0c\xcf\xe2\x8c\x82g\x8c{r\x94\x84', b'3B\xca\x8c\xd8\x9a\xb8\x94iyT\xd6\x05N_\x8a'), (b'\xea\x7f\x83\xaa\\\x10y\xd1h\x93l\xd9\xfe\x87\xe8j', b'jj\x8f\x1a-tq\xd9\xf1\xf8\xf7\xa5\xf6\x86\xb9\xdb'), (b'\xa4\x0b\x0f\xf1\x84\x95\x1eK\xddtH\xf0IaY:', b'p\xb5\xd9"\xb7\x0f(\r\xb1\x9bm\xc0hq\x00e'), (b'ga\x8f\xbbo\xff\xedQ\x9aM\xbe\x9cTO\xd35', b'=\xc3\x9d\x8d\xd0\xae8*1\x1d\xbf\x84\x89~76'), (b'\xaf\xe1%\xc6\xedo\xec24\xc0\x9d\xa8\x9fy=N', b'\x03m\x81\xd7B\x04\x1a*`C\xee\xccb\x1fm\xf0'), (b'\x1f\x10\xf6\x9fa\xc5\xbf\xaf\x18\xb1Mw\xba\x92&\xe1', b'\xb7\x90/\r$\t\xce}xk\xe7\xbeL\xe9\x8e\xc6'), (b'\xc8.\xd9)\x08g/\xc6\x0e\xed\xbb\xb9\xab\xad\xc8\xa1', b'\xa1c\xcc\xe9\xaasJ\x98\x1d\xd24\xc0\xf2+\x9d\xac'), (b'\xa1\x91\xa0:D\x11(\xb3\xbc0\x13\xd9\xdeD\xb9\x86', b'\x0fV\xa1\x8fl\x0c\xf4\x94=\xe7\xd0*\xdb\xcb\xa5;'), (b'bx;\x8dA\x83\xb8lca\x9c\xadV\xc2\xba/', b'\xe9\xe0\xe7({\x11\r*\xca\xf1\x08\x9bG\x830\x82'), (b"\xde\xbb\xd9\xce{%MP'Xj\xed8>\xa3a", b'\\5M\x13\x13\xb1YY\xa53y\x99$a\x91\xa5'), (b'\xf1\xda\xc4\xfe\xc6\x83\xe6\xf3\xc6\x88\x86H\xc4\x15\x12\xb4', b"\xf8'^\xe5\xb8`\x0fl\x06$\x1cz:\x93\xa1\\"), (b'\xc7@\x03Ny\x11\x96\xcb\xfe\x19\x0e\x18\xbf\xd59I', b'P\xf0\x0f\xec\xaf\x1bS\xa8\x8dt\x11O\xe8\x0f|2'), (b'\xb4\xda\xff\x8c0\x18/\xf1\xad\xa4F\x94\xb1\xad\xf4\xf1', b'.\x83(\x8f#\xe1\x11\x96\xdeS\x17\x8b\xe3]\xb5\xca'), (b'\x1d\t\x11\xc1\x15\xdc\x8e\xea\xe1\xd2\xba%J\\\xf2\xfd', b'\x070ht\xb4\xdeQ+\xc5\xb8#\xac\xd9z\xc0A'), (b' V\xe3\xc5gEg\xc5\xd5\xe1\xd6\xd7\xab\xd0\xb6n', b'\x88\x9b8\xf6\xe26\x92z\x91\xe4\x92\x1e\xbc\xc5;u'), (b'\x88}\xd7*QFn:\xd7\x12RA\xe6-\xd2)', b'\xeb\x04<\xe6\xfa)u\x9ds\xecj\xa7\x84\xcf\xa7\xa7'), (b"\xec\x04.bP)'\xb6\x08\x05\x92\x8c\x85\x1a\x84\x8f", b'@\x80\x08A\xba\x9c2Sd\xdf\x97\x07\xc4Im\xef'), (b'\x11(\x9ez\x0c\xd7\xca+7\xcc\xb0\x19,:\xcb5', b'\xf9V\x8cm\xe5-\xf9\xf1\xf6g\x86~\x8f\xd0<\x84'), (b'\xbb\xe9I=\xc74\xc9C\x0cT\x81\x9d\x85\xdeWF', b'\xfe\x1b\xb1gi\x94O\x98\xf0\x90cJ\xdb\xbf\xc7\x1e'), (b'2`\x9d\x9c\x02\xe4\xce\xf0\x14\xc0Z\xaaW\xf2\xa5\xff', b'\xe6\x05?\xe3X"\xb2O)~\x96\x8f^\x9c\x077'), (b'\xaf\x87,a\xa9\xcd\n|O\x1e\xa7\xdd\xba\x89;\x9d', b'\x98b\xac?\x13*\xf2m\xfeo=v\x96\xd5\x14\x8d'), (b'\xee\xcb{\xeeRQ\x82}\x05|\xc6S\x82\x85\xa4\xff', b'C\x93\xdes\x00\xe8cY<\xde1.\x19\xd8\xe5R'), (b'\xack\x036\x9b4\xca\xa4\x15\xbd\xd5\xbe\xd3\xd4\xfb{', b'm?\xc44>\xf4qig\x10\x8c\xeb\x11o\x99\xb4'), (b'D!\x0e*\xb3/<\x15F_\xd6;j\x85\xbe,', b'Y\xb01H\xb6:\x04_]OL\x9d\x8e\xbcp\xdc'), (b'\xe6\x9d\x02L[\x1c\xee\x8b0lu\xdd\x00\x9a\xfa!', b'J&\tkf\xa8\xeb\x1b\x90\x81\xce\\\x1b\xe8\xb6\x1d'), (b'\xcdnd\xf6\xf6\x11\x9e\x06\x10\xce\xcfP\x89\xed\xad\x02', b'I:\xe0{8H\xfcw\xb1\xe9v\x11p-/0'), (b'\xa22?\xcd\xfbU+\xbf- \xe3<\xc2\x94\xef\xb6', b'\xc7\x89\xc4K?{\x18\x1ftd\x0f\x19L}\xba\xf9'), (b'8\xf9\xe9\xd0\xa0\x97\x8e\rp\xe9\xd6[\x8e`\x93\x81', b'\xf7\x16"\xaa\x86\\b\x9d\x0c\x17"\x81\xeea\x03\x19'), (b')\xfb\x0c\xc7-D\xd8\xa4\xf7\x8d%\x02\x06\xa1\xbf\x94', b'\xc8s!s\xe2n\xee\xb0\x1eC\x15\xc0\xb4d\x08\xa7'), (b'\x7f\xa6\xdc\xf4\x9e\xe3\xda\xd3\x8a\xcb\xfc\x98,1\\\xe5', b'\x82,\xfbKL_\xe9\xc9L`\xc7"&3#\xb5'), (b'&\x98,\x88\xff\x0e\xca[\xcb\xc6t;\xbc5Y"', b'\x1b\x8bA\xd1\x8a\x9e\xe6\xce\xc4#C\x89\xe9Uon'), (b'\x1c\x1b\xac2\x86\xd6\x9a\xf9\xdd\x8d=\xa3\xff\x9a\xe1\xa5', b'\xed\x10\xed\xc1\xfe\xec\xd3\x0b\x8b\xe9\xd4?H"\xd6\x8f'), (b'9\x8f3\x8b\x02\xb7\xe7\xe5\xa1TO\x1a\xb6N\x0b\xb6', b'\xb4\x0f\xa7 \xc7\x83\x81\x18\x9cU\xfd2\x93\xb8(\xe9'), (b'\x8d\xbe\xcf`\xc3d\xde\xca\xf4\n.\x92Mrw\xa6', b';\t\xfb\x19eKt0\x8foG\xcd\xd5\xe0\xf6\x9d'), (b'd\xbcG)\xec\xc8\x04 \xdd\xd9\xf6\xd6\x91/\xb2\xd3', b'2\xba\xa8\x8a\xc5\xd6h\xab\x9c\x9e\x8b\x1b5\xcc_.'), (b'\x98\xea\xf0\xcbJ-c\xea\x81\xcf?B\x17\xcb\x99\xf9', b'\x82\x05\x14\xf2\x05q\x97\xf7Z-B\xad\xa2\x16\xa0`'), (b'\x1c\x94A\xfd\xe1\xf2\x06\x8b\x19\x80\xfa\x01g\xc3V\xaf', b'\x1c\xde\xec\xdf&\x93u\xab\xf9\x12\xfb\x04\xbc0\x99d'), (b'6\xfa\x993b\t\x12E\xd2t\xde\xb6\xfa\x98Im', b',\xa2\xf7VB\x81\x048\xc8\xbf3\xce\x8d\xecn\x82'), (b'L\xd6\x93\xbe\x02\xbeT\xa1\x8d\x88\x84\x18\xc7\xcf\x9fE', b'\x16i"\xb7\x81\xef7\x7f\xdb\x7fY\xbft\x1b\xac\xd0'), (b'\x90\xd6\xe7>Z\x8b\x83\x14\xab\xefH\xa1\x95\xe6\x8e\xc3', b'\xebJ\xfcXd\xd75I@\x12\xe9\x88\xdcG\x15\xb7'), (b'\xb8\xd6\xd03j\xed\xf9\xa0\xb1L3\x8d\x89P\xb4\xe5', b'\xe3\xdf)CR\x03s\x19\x8f\xb5\xc6\xf5\xee&W\xee'), (b'cx\xd7dQ\xc3)YR\x9c7\xf0\xb4\xe4P\xfd', b'h\xa8\x122\xceV\x8c\xf4\x12\x14J\x08\x91\xcf\x93\xfa'), (b'\x1d\x14\x86\xccTi\xdd\xc3L!\x97\x92\xb3\xb7X$', b't\x84\xa4\xe72\xa5X\xe9Ta\x03\x0c\xb00\x07\x83'), (b'\xa6\xdd\x1f\xfc\xca2\x88\x99\xbeA\x9a\xa6JVJ\x84', b'B\x81\x8e-K\xde\x15\xfd\t"\x08\x04@/-\xc9'), (b']b\x18\xe65C\x17*\x103J?\xe79!\x9b', b'\n\xbb\x1f\x98\nZZ\x97\xe0\x12\xf6W\xc4\x1e_~'), (b'\x9a\x9f-c\t\x14\xa4NL\xc5\xc0\x99R\x15\x94\xca', b'\xa0\xf9\x11\xfa\xd7\xf3\xb5\x11\xb2SK\xe8\xdb\x06\x94\x1f'), (b'!\xf0\x90\xd8t_O\xc1\xb5\xfe}\xca\xa6\x15}\x19', b'\xc9+\x89a9y3\xe0\xffh\xedmMq\x13\xcc'), (b'`\xf1\xab\x07A\xe9\xe5\x805\xf2\xabD8\xce\x85A', b'\x9d\x0f\x91\xc2\x01\x93\x8fm\xd6\x03XZl\t\x15J'), (b')\r\xcf\xc0\xa1\xa9\xf7i\x94\xa9\x8d\xed\xea\xcd\xb0B', b'jq\xe9\x01*H\x14\xa7M\x8c\xd2\xa5\x19\\\x80\x12'), (b'vJ\x98\xbf\xd5D\x15\xdb\xa1\xd7\xdc\xaf\xa0\xaa\xf7\xc1', b'\x06\xd8-\x05\xfe\x172W\xa4\xab\xe4\x97_h\xe6\xac'), (b"\xc6'P\xd2\xfcq\xcb\n\xe2\xc1sN>\xbc\x9a\x08", b'\xacysx4z\x96\x8f\xf1\xa5M\x9e\xe7f\\\x16'), (b'\x00\xc6\x81\xf0\xf1\xcb\xa8_\x1fF\x18\xe9\xb0\x95\xa3\xab', b'N\xd8\xdaz\xe8F4\xb1\x88>\xec\xd6[\x15\xd7\xfd'), (b'E\x1ee\x8f\xd5\x0f\x19\xa8\xb6~-\xe0n\xe7\x89\xe1', b'\x83\x90\xa0\x93\xec\x086+\xcf\x08\x9c\x048\x85\x88^'), (b"\x82\x13\xfa\x9f 6\xd3'\xd7\xd2\xc1\xf9\x03.\x13*", b'J\xe2\xf8\x1e\xec\xeb\xd9"q\xbb:\x04\xc2\xdf<\xa8'), (b'\x01\xdf\xfcPW\xc80&{\xfe\x99\xf8\x10\xc6R\xce', b'=Cnh3\x9drKE\x1aU\x8e\xcc\x84jS'), (b'<\xeeo[J\xb0r0N\x07\xf4\x1e\xfb\x16\xac\x07', b'\xf9KP\xfe\xd2\xed\xc7`\xc7\x0c\x9et\xd3^\xc9g'), (b'P\xa1b#\x07V\xb0s\x86S\x9a\x1d[\xd9\xe7\x92', b'r&\xb1a\x0ff2\xd2\x8ft\x13\xff\xba\x0fy\xf4'), (b'\xc8t\xcc\x96V\x8bS\xde\x98\xf1\xa33\xa9\xdb\x1b\x85', b'13\xf1&\x8c\xfe3\xf9\xca\\\xc6\xdb\x1dT\x04b'), (b'\xb9u\xf2O\xfb\x05\xb3\xc2r*\x17d\\\x8f9M', b'\xeeQ\xf9&\xc5\xc8\xc8\xe6B\x1d\xcf\x184\x19]b'), (b'\x96\xe6\xc9\x1e;\x83e4\xb0\xbe\xf6\x94\x03\x06\x85:', b'\xa2\xb0\nS\xaf\xads.\x94\x16s\x901\xaf-\xe4'), (b'&\x81t\x048(\xd7 \xb6\x1aR\xaa\x98\xdbN\x0b', b'\x9cd\x8a#\xdd \xdcS\x14\x1e\x1b\xea\x87\x1b\xa41'), (b"\x1fC\x0cr,\x1d\xab\xa8\xd9\xfc\xa1\xa2\xbb\xcdw'", b'D\xb6)\x7f\xd7\xe6\xb2@\x18D\x1d\x08c\xce2k'), (b'$\xc1\xabD\r\xe0a\x0co\x12\x17\xbdB\x06\xc9y', b's\xd8\xf8\x05\x81\xfeY\xc4\xc8|\xe5\xe2]U\xfa"'), (b'\x8f\xe3\xd6\xc9d\xc4\xf0\xd0\xe3\x17\x12\x82\xb2\xfd\xe9\xef', b'\xf1\r/9\x97/\xd4\xf6dz\x89\x7fEG/\x15'), (b'\x16\xf2\xd8\x9a\x029\x9b{\x02\xf4u\x08~\x80\xb9Z', b'\xe1+:9\xaa\x87:\xf0\x02\xdd\xe5}G\x0ch\x98'), (b'\xe5\n\xf8gP\xfd\x8aT\x04\xf2\xdc\xa4dgcO', b'\x13\xd0\x1c[\xe6\x8aa\xd6\xa0a\xb1X\x84\xb8\xde2'), (b'\xa8\xaa\xa8\xe7\xcal>\xe6\xdb\x18vL\xdak%}', b"\xe5W'\x83\xbd\xf2\x01\x9b\x89\x98l\xfd\xcf\x02\x84\x13"), (b'6\x9b\xae\x07\xe0+\xa0\xcd\xdbT\t\x02@\x81$E', b'\x10Q\x1an\xaa\xe7\x9cy\x04t\x16\x1d\xea\x9d\xd2\xe6'), (b'ne\x83\xb0\xe2\x8aG\xda\xaf%\x93cg\x84\xaf\xea', b'p!}\x8aQ(C\x08\xaf\x8bi\x80\xdaw\x7f\x81'), (b'<\xa6\x90\xb2\x03\x8f\xa4\x17$\xc7\xbe\x8a\x1e!\x01s', b'\x19\x7ftaaLUB\x1aBt\xf5U~\xeeY'), (b'\xe4|z\xaer8\xa0\xa3\xc8\x9c\x8d\x03`\xb8rN', b'b\xda\xf8e\n\x1f\xc1\xb2\x97\x13\xd7\x91\x91\x86\xbe\x13'), (b'\x00\x84\xc1\xd8\xe0ub\x1a\xc0\xa1\xfe\x08K\xfb>,', b'g\xb06$\xbb{s\xb9-\x8e5h|+\xf8\xb1'), (b'(\xac{\xb1^OOy\xc98\xf5\xc5\xc2\xd3\xb3\x1e', b'\x8d\xfc\x0f\xda\xd5\xa6\x89)\x92\x173\x1e)*\xf4&'), (b'G*.\xe2\xd8\xee\x87\xfa\x83~eS\xcc\x1e\xdeQ', b'\xc9y \x1a]Y\x8d\xf8\xa0\xd0^p\xc1\xf1\xa4\xd2'), (b'\x16\x91\xd7[\xbeC\x80T\x08\x0f\xbd\xe1\xf7\xbd\x1b\xb8', b'\xcc\x8c\xfb\x1f"\x16\xa7/\xe2inK\xaa\xdf\xc0\xe4'), (b'\x1d\xbc\xf2\x9b\xe90\x13\xc9\tuuk\r\xe4\xa9\xfa', b'\xf6\x97\xf2B\xaa\x83\xf9k\x15K\xa0\xb9\xdf\xf4,\x02'), (b'hIA(;\x8b\x92G!K\xca\x1a\xfd\x8c9\x95', b'0!V\xfc\xfb\xa1,R\xc6\xdaO\x9c\x16\xa8<\xe2'), (b'?\xd8\xd7e\x88\xad\xcd\xd6\x8d\\\x1e\xc2l$;\xd8', b"\x88\x8e\x86'#f\xcf\xe3,\x96&\r\xb1\xee@g"), (b"\x81\n\x18\xd7\x92\\dv'\x1a\xea\x8c7\xf2S\xb0", b'\x99\xbf\xf9qo\xde\x92xge\x04\x84\xb8 \xfb\xe3'), (b'\xa7\x8d\x83\xc8\xa3\xc15\xdc`\x82\xed\xae.\xea\xf2\xea', b'[\xe8{\r\xd6g\xcf\xe8L\x16\xde\xcd\x90M\xbd\x9e'), (b'\x1a\x1b;\xa62\x10i\xb2\x19n\xc1\xddKz\xb4\xd8', b';On\xd2N\xef%B\x05_T9)IG\xcf'), (b'@g`\x1e\xc1\xdf\x14A\xc1\x04N\xb6]\x97\xd3\x17', b'\xbc6\xcb\xaah*\xf1\xd0_\xd9B\x94F\xa8\xd8\r'), (b'<\x86\xa4z\xb4\xaeS\x90\xa9\xb5I\xe8\xe6FI\x8c', b'\r2\xd2.H"\xfb\x9d}{\x11^\xdf\xcb\x186'), (b'\xf0\xb8J|\x0b\xfe\xcd\x8b\xc0$&"8\xffob', b'\xa8}\xf31\xdd\x07\xc6\xa0\xbb\x9f0\xca\xe3&^\xf6'), (b'r\x9b\x81\xa0D\x94\x80\xfdu\xb1O\xbc\xf6\x1a\xd8\xe0', b'\xd4)\xb6\xdbQ\xa5#\x16\xed9\xadM\xf82%\xeb'), (b'\xb9V\xb7R\xb9\xcdJ9c\x1a`V\xb9\xb0oX', b'\x9a\xff]\xb3\x83\x96\xf5\r\x98\xe2\xc5\x86\xa21k%'), (b"J\xe9\xaa\x8f\x07\xab\x90\x18'\xa6\x85\x08\x18\xa8\x96?", b'\xa7\x83\xed\xd9\t\xd8\x85\xe4\xf8\xcb0\x95n\xf2`\xa8'), (b'\xc3\xaf\xc8m\xfc\x90}=\xb8\x1fr\xab}\r\xc1!', b'\x04\xa0sq\xf7\xd3Dm\xb9\xb8\x9c\xbap(\xa1L'), (b'\x97bq\x0f\x93\xb5\xa1\xa1\xf58\xdcu5\x9a\xbd\x8b', b"\x07'\xc0\xdb/0\xd9H\x81\x06\xe2^ `F\xa9"), (b'Nn\xd4\xbd\x13dN\x9e\xcd\xd4\x06kk~\xf5\x7f', b"\x98j'}\xc5\xb0\x1d\x19\xe2/\x92'\x9e\xd4\xba("), (b'\x08ty\x84\xde>\x0f5jL6\x05\xe1\x02pH', b'0\xb9\xd5\xd40u\x04\xfdK\xe2D\xbc\xddy\xd4\x11')]
def cost(func):
    def wrapper(*args,**kwargs):
        start=time.time()
        res=func(*args,**kwargs)
        end=time.time()
        spend=end-start
        print(f"cost:{spend}")
        return res
    return wrapper
@cost
def test(repeat):
    tmp=[*lis*repeat]
    if example.get_list_size()<4000000:

        for it in tmp:
            t=b'something here'
            example.init_map(it[0],t ,16,len(t),int(random.random()*10000))
 
    print(f"pool size is {example.get_list_size()}")
    print(f"get args from lis start")
    print(lis)
    arg1=lis[0][0]
    print(f"get args from lis complete")
    t=b'something need search'
    print(f"start search")
    res=example.search(arg1,t,16,len(t),0.1,int(random.random()*10000))
    l=len(gc.get_objects())
    print(f"res is {len(res)},gc:{l}")


if __name__=='__main__':
    for i in range(10000):
        test(10)

error

pool size is 1000
get args from lis start
Fatal Python error: Segmentation fault

Current thread 0x00007f0ca3c90340 (most recent call first):
  File "/tmp/tmp.ztKz0S1yKb/test.py", line 29 in test
  File "/tmp/tmp.ztKz0S1yKb/test.py", line 12 in wrapper
  File "/tmp/tmp.ztKz0S1yKb/test.py", line 41 in <module>
Segmentation fault (core dumped)

I tried using PyMem_RawMalloc and Py_DecRef but still segfault. what should i do?

Sorry for the little misinformation I gave. I may have located where the problem is: problem here

Everything works when I comment out Py_DecRef ,otherwise it throws a seg fault exception:

gdb backtrace,
print info

I don't understand why Py_DecRef can't be used here.

  • 1
    Have you used a debugger? What did it tell you? – MattDMo May 11 '23 at 16:06
  • Why the header file? If that is a *Python* module, it doesn't have to export anything (*C* style). – CristiFati May 11 '23 at 16:10
  • *but it's not clear to me exactly where the problem is.* -- Did you do the obvious thing, and that is to verify that those values you are sending to `PyMem_RawMalloc` are valid? What about the return values from that function? Did you check them for NULL? The first thing you should do is debug and/or log the values you are using in that C++ function. Right now, it seems you aren't doing either one (debugging or logging). – PaulMcKenzie May 11 '23 at 16:19
  • At the top of your cpp file, `#include `, then `assert(statement)` every statement that could possibly be in doubt. Build without optimizations and try again - rather than a segfault, you are likely to get an assert firing, with a much more useful message (because you wrote it yourself). – SRNissen May 11 '23 at 17:31
  • @CristiFati ,Thanks for the reminder, I'll fix this question. – the drugs don't work May 12 '23 at 05:18
  • I added some error messages and what might be the problem. – the drugs don't work May 12 '23 at 05:23
  • @MattDMo The error message indicates that an error occurred in [test.py]:func test while get the arguments for example.search. It was unable to get the list element. – the drugs don't work May 12 '23 at 05:46
  • @PaulMcKenzie The error did not appear on the PyMem_RawMalloc. It seems that it appeared on the Py_DecRef or dealloc. However, I will add some valid checks for PyMem_Malloc. Thanks. – the drugs don't work May 12 '23 at 05:53
  • You're freeing *tmp\_b* and *tmp\_c* while their addresses are still referenced in the map. That's clearly recipe for disaster. – CristiFati May 12 '23 at 06:26
  • Neither *C*,. nor *Python* code "compiles" *OOTB*. [\[SO\]: How to create a Minimal, Reproducible Example (reprex (mcve))](https://stackoverflow.com/help/minimal-reproducible-example). – CristiFati May 12 '23 at 11:51

1 Answers1

0

There are some things wrong:

  • C and Python code have errors, so it doesn't compile / run - not an MCVE ([SO]: How to create a Minimal, Reproducible Example (reprex (mcve)))

  • There is a mix of raw (malloc) and pymalloc memory functions

  • Mismatches between types and format specifiers

  • C API misunderstanding. According to [Python.Docs]: Parsing arguments and building values (emphasis is mine):

    Note that any Python object references which are provided to the caller are borrowed references; do not decrement their reference count!

  • Design flaw mixing C and Python types. There is the C list that is supposed to be the backbone, but it's tightly coupled with Python structures. Also attempting to export the functions from the .dll makes me think that things are not very clear for the OP. I'd suggest to keep the core functionality at the C level and provide wrappers for interacting with Python. Otherwise, keep switching from one to another would lose the speed advantage (which I assume is the reason for not writing everything in Python)

  • The question is tagged C++, but there's nothing C++'y about the code, it's rather C (in C++ there is std::list, smart pointers and other features that could save writing a lot of code)

  • Minor ones:

    • LISTNODE - NODE nesting. Although it looks better structured, the overhead for working with nested structures might be a greater loss

    • Passing the size of bytes (which is held inside it)

    • Method arguments specifications

    • Code style

    • Many many others

Before going further, I suggest a thorough reading of the following (including referenced URLs):

Now, there are multiple problems, each with multiple solutions.
Starting from your code, I prepared an example. It's written in C (lots of code which doesn't have anything to do with the goal).
All the business logic is carried out in C (it's callable from a C application), and the Python wrapping layer is on top of that.

  • dll00.h:

    #include <stdint.h>
    
    #if defined(_WIN32) && !defined(PY_MOD_BUILD)
    #  if defined(DLL00_STATIC)
    #    define DLL00_EXPORT_API
    #  else
    #    if defined DLL00_EXPORTS
    #      define DLL00_EXPORT_API __declspec(dllexport)
    #    else
    #      define DLL00_EXPORT_API __declspec(dllimport)
    #    endif
    #  endif
    #else
    #  define DLL00_EXPORT_API
    #endif
    
    
    typedef uint8_t byte;
    typedef unsigned long ulong;
    
    typedef struct Node_ {
        byte *features;
        ulong features_len;
        char *content;
        ulong content_len;
        long id;
    
        struct Node_ *next;
    } Node, *PNode;
    
    typedef struct {
        size_t len;
        PNode *nodes;
        double *scores;
    } FoundData, *PFoundData;
    
    
    #if defined(__cplusplus)
    extern "C" {
    #endif
    
    DLL00_EXPORT_API int addNode(const byte *features, ulong features_len, const char *content, ulong content_len, long id);
    DLL00_EXPORT_API size_t size();
    DLL00_EXPORT_API PFoundData search(const byte *features, ulong features_len, const char *content, ulong content_len, long id, double threshold);
    DLL00_EXPORT_API void freeFoundData(PFoundData data, int membersOnly);
    DLL00_EXPORT_API size_t cleanup();
    
    #if defined(__cplusplus)
    }
    #endif
    
  • dll00.c:

    #include <math.h>
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #define DLL00_EXPORTS
    #include "dll00.h"
    
    
    static Node *gHead = NULL;
    static Node *gCur = NULL;
    static size_t gLen = 0;
    
    
    int addNode(const byte *features, ulong features_len, const char *content, ulong content_len, long id)
    {
        Node *node = malloc(sizeof(Node));
        if (node == NULL) {
            return -1;
        }
        node->features = malloc(sizeof(byte) * features_len);
        if (node->features == NULL) {
            free(node);
            return -2;
        }
        node->content = malloc(sizeof(char) * content_len);
        if (node->content == NULL) {
            free(node->features);
            free(node);
            return -3;
        }
        memcpy(node->features, features, features_len);
        node->features_len = features_len;
        memcpy(node->content, content, content_len);
        node->content_len = content_len;
        node->id = id;
        node->next = NULL;
        if (gCur == NULL) {
            gCur = node;
            gHead = node;
        } else {
            gCur->next = node;
            gCur = node;
        }
        ++gLen;
        return 0;
    }
    
    
    size_t size()
    {
        return gLen;
    }
    
    
    static double compare(const byte *features0, ulong features0_len, const byte *features1, ulong features1_len, int min)
    {
        // @TODO - cfati: Dumb comparison
        if ((features0_len > 1) && (features1_len > 1) && ((features0[0] == features1[0]) || (features0[0] == features1[1]) || (features0[1] == features1[0]) || (features0[1] == features1[1]))) {
            return 0.3;
        }
        return 0;
    }
    
    
    PFoundData search(const byte *features, ulong features_len, const char *content, ulong content_len, long id, double threshold)
    {
        if (gCur == NULL) {
            return NULL;
        }
        Node *node = gHead;
        FoundData tmp = {0, NULL, NULL};
        tmp.nodes = malloc(sizeof(PNode) * gLen);
        if (tmp.nodes == NULL) {
            perror("malloc failed 1.");
            freeFoundData(&tmp, 1);
            return NULL;
        }
        tmp.scores = malloc(sizeof(double) * gLen);
        if (tmp.scores == NULL) {
            perror("malloc failed 2.");
            freeFoundData(&tmp, 1);
            return NULL;
        }
        while (node != NULL) {
            double score = compare(features, features_len, node->features, node->features_len, 0);
            if (score > threshold) {
                tmp.nodes[tmp.len] = node;
                tmp.scores[tmp.len] = score;
                ++tmp.len;
            }
            node = node->next;
        }
        if (tmp.len == 0) {
            return NULL;
        } else {
            PFoundData ret = calloc(1, sizeof(FoundData));
            if (ret == NULL) {
                perror("malloc failed 3.");
                freeFoundData(&tmp, 1);
                return NULL;
            }
            ret->nodes = malloc(sizeof(PNode) * tmp.len);
            if (ret->nodes == NULL) {
                perror("malloc failed 4.");
                freeFoundData(ret, 0);
                freeFoundData(&tmp, 1);
                return NULL;
            }
            ret->scores = malloc(sizeof(double) * tmp.len);
            if (ret->scores == NULL) {
                perror("malloc failed 5.");
                freeFoundData(ret, 0);
                freeFoundData(&tmp, 1);
                return NULL;
            }
            ret->len = tmp.len;
            memcpy(ret->nodes, tmp.nodes, sizeof(PNode) * tmp.len);
            memcpy(ret->scores, tmp.scores, sizeof(double) * tmp.len);
            freeFoundData(&tmp, 1);
            return ret;
        }
    }
    
    
    void freeFoundData(PFoundData data, int membersOnly)
    {
        if (data == NULL)
            return;
        free(data->nodes);
        free(data->scores);
        if (!membersOnly)
            free(data);
    }
    
    
    size_t cleanup()
    {
        size_t ret = 0;
        Node *node = gHead;
        while (node != NULL) {
            free(node->features);
            free(node->content);
            Node *next = node->next;
            free(node);
            node = next;
            ++ret;
        }
        gHead = NULL;
        gCur = NULL;
        gLen = 0;
        return ret;
    }
    
  • main00.c:

    #include <stdio.h>
    #include <string.h>
    #include "dll00.h"
    
    
    void handleFoundData(PFoundData data)
    {
        printf("Found data (%d items):\n", data == NULL ? 0 : data->len);
        if (data == NULL)
            return;
        for (size_t i = 0; i < data->len; ++i) {
            printf(" Index: %d\n  Score: %.03f\n", i, data->scores[i]);
            printf("  Id: %d, Features length: %d, Content length: %d\n", data->nodes[i]->id, data->nodes[i]->features_len, data->nodes[i]->content_len);
            printf("  Feats: %s\n", data->nodes[i]->features);
            printf("  Cnt: %s\n", data->nodes[i]->content);
        }
        freeFoundData(data, 0);
    }
    
    
    int main()
    {
        Node dummies[] = {
            {"1\02", 3, "567\0890", 7, 2},
            {"ab\0c", 4, "d4567\0890", 9, 3},
        };
        printf("Element count: %zu\n", size());
        for (int i = 0; i < sizeof(dummies) / sizeof(dummies[0]); ++i)
            printf("Add node (%d) returned: %d\n", i, addNode(dummies[i].features, dummies[i].features_len, dummies[i].content, dummies[i].content_len, dummies[i].id));
        printf("Element count: %zu\n", size());
        // @TODO - cfati: Values based on .dll's dumb comparison
        byte *src[] = {
            "xxx",  // None
            "111",  // 1st
            "xaa",  // 2nd
            "1bd",  // Both
        };
        for (int i = 0; i < sizeof(src) / sizeof(src[0]); ++i) {
            printf("Search for '%s' ...\n", src[i]);
            PFoundData found = search(src[i], strlen(src[i]), "", 0, 0, 0.1);
            handleFoundData(found);
        }
        printf("Freed %zu nodes\n", cleanup());
        printf("Element count: %zu\n", size());
        printf("\nDone.\n\n");
        return 0;
    }
    
  • example.c:

    #define PY_SSIZE_T_CLEAN
    #include <Python.h>
    
    //#define PY_MOD_BUILD
    #include "dll00.h"
    
    #define MOD_NAME "_example"
    
    
    static PyObject* example_size(PyObject *self, PyObject *Py_UNUSED(args))
    {
        return PyLong_FromSize_t(size());
    }
    
    
    static PyObject* example_add_node(PyObject *self, PyObject *args)
    {
        PyObject *feat = NULL, *cnt = NULL;
        long id;
        if (!PyArg_ParseTuple(args, "SSl", &feat, &cnt, &id)) {
            return NULL;
        }
        int ret = addNode((const byte*)PyBytes_AsString(feat), (ulong)PyBytes_Size(feat), PyBytes_AsString(cnt), (ulong)PyBytes_Size(cnt), id);
        return PyLong_FromLong(ret);
    }
    
    
    static PyObject* example_search(PyObject *self, PyObject *args)
    {
        PyObject *feat = NULL, *cnt = NULL;
        long id;
        double thres;
        if (!PyArg_ParseTuple(args, "SSld", &feat, &cnt, &id, &thres)) {
            return NULL;
        }
        PFoundData tmp = search((const byte*)PyBytes_AsString(feat), PyBytes_Size(feat), PyBytes_AsString(cnt), PyBytes_Size(cnt), id, thres);
        if (tmp == NULL) {
            return PyList_New(0);
        } else {
            PyObject *ret = PyList_New(tmp->len);
            if (ret == NULL) {
                freeFoundData(tmp, 0);
                return NULL;
            }
            for (size_t i = 0; i < tmp->len; ++i) {
                PyObject *tup = Py_BuildValue(
                    "(ly#y#d)", 
                    tmp->nodes[i]->id,
                    tmp->nodes[i]->content,
                    tmp->nodes[i]->content_len,
                    tmp->nodes[i]->features,
                    tmp->nodes[i]->features_len,
                    tmp->scores[i]);
                    // @TODO - cfati: No need to include data passed in arguments in each of the output tuples !!!
                if (tup == NULL) {
                    Py_XDECREF(ret);
                    freeFoundData(tmp, 0);
                    return NULL;
                } else {
                    PyList_SET_ITEM(ret, i, tup);
                }
            }
            freeFoundData(tmp, 0);
            return ret;
        }
    }
    
    
    static PyObject* example_cleanup(PyObject *self, PyObject *Py_UNUSED(args))
    {
        size_t ret = 0;
        //Py_BEGIN_ALLOW_THREADS
        ret = cleanup();
        //Py_END_ALLOW_THREADS
        return PyLong_FromSize_t(ret);
    }
    
    
    static PyMethodDef methDef[] = {
        {"size", example_size, METH_NOARGS, "Get List size"},
        {"add_node", example_add_node, METH_VARARGS, "Add node"},
        {"search", example_search, METH_VARARGS, "Search elements"},
        {"cleanup", example_cleanup, METH_NOARGS, "Clean up"},
        {NULL, NULL, 0, NULL},
    };
    
    
    static struct PyModuleDef modDef = {
            PyModuleDef_HEAD_INIT,
            MOD_NAME,  // name
            "A module that imports an API",  // Doc string (may be NULL)
            -1,  // Size of per-interpreter state or -1
            methDef,  // Method table
    };
    
    
    PyMODINIT_FUNC PyInit__example() {
        PyObject *m;
    
        m = PyModule_Create(&modDef);
        if (m == NULL)
            return NULL;
    
        /* Import sample, loading its API functions */
        return m;
    }
    
  • setup.py:

    #!/usr/bin/env python
    
    from distutils.core import setup, Extension
    
    
    setup(
        name="example",
        ext_modules=[
            Extension("_example",
                      ["example.c", "dll00.c"],
                      include_dirs=(),
                      define_macros=[
                        ("PY_MOD_BUILD", None),
                      ],
            ),
        ],
        version="0.0.1",
    )
    
  • data.py (I just extracted input data to avoid polluting the other source file):

    data = [
        (b"\xd3\nR\x952\xbf\x8e\xeb[\xdc\xe8\xfb\xcb\x9f\xb4\xd2", b"\x856\x96\xda\xe4H_\xf9\xb3\x95\xff\xda\xc9\x05\x17\xd5"),
        # @TODO - cfati: !!! DELETED NEXT 20 TUPLES so the answer fits the 30K characters limit !!!
        (b"\xb4\xda\xff\x8c0\x18/\xf1\xad\xa4F\x94\xb1\xad\xf4\xf1", b".\x83(\x8f#\xe1\x11\x96\xdeS\x17\x8b\xe3]\xb5\xca"),
        (b"\x1d\t\x11\xc1\x15\xdc\x8e\xea\xe1\xd2\xba%J\\\xf2\xfd", b"\x070ht\xb4\xdeQ+\xc5\xb8#\xac\xd9z\xc0A"),
        (b" V\xe3\xc5gEg\xc5\xd5\xe1\xd6\xd7\xab\xd0\xb6n", b"\x88\x9b8\xf6\xe26\x92z\x91\xe4\x92\x1e\xbc\xc5;u"),
        (b"\x88}\xd7*QFn:\xd7\x12RA\xe6-\xd2)", b"\xeb\x04<\xe6\xfa)u\x9ds\xecj\xa7\x84\xcf\xa7\xa7"),
        (b"\xec\x04.bP)'\xb6\x08\x05\x92\x8c\x85\x1a\x84\x8f", b"@\x80\x08A\xba\x9c2Sd\xdf\x97\x07\xc4Im\xef"),
        (b"\x11(\x9ez\x0c\xd7\xca+7\xcc\xb0\x19,:\xcb5", b"\xf9V\x8cm\xe5-\xf9\xf1\xf6g\x86~\x8f\xd0<\x84"),
        (b"\xbb\xe9I=\xc74\xc9C\x0cT\x81\x9d\x85\xdeWF", b"\xfe\x1b\xb1gi\x94O\x98\xf0\x90cJ\xdb\xbf\xc7\x1e"),
        (b"2`\x9d\x9c\x02\xe4\xce\xf0\x14\xc0Z\xaaW\xf2\xa5\xff", b"\xe6\x05?\xe3X\"\xb2O)~\x96\x8f^\x9c\x077"),
        (b"\xaf\x87,a\xa9\xcd\n|O\x1e\xa7\xdd\xba\x89;\x9d", b"\x98b\xac?\x13*\xf2m\xfeo=v\x96\xd5\x14\x8d"),
        (b"\xee\xcb{\xeeRQ\x82}\x05|\xc6S\x82\x85\xa4\xff", b"C\x93\xdes\x00\xe8cY<\xde1.\x19\xd8\xe5R"),
        (b"\xack\x036\x9b4\xca\xa4\x15\xbd\xd5\xbe\xd3\xd4\xfb{", b"m?\xc44>\xf4qig\x10\x8c\xeb\x11o\x99\xb4"),
        (b"D!\x0e*\xb3/<\x15F_\xd6;j\x85\xbe,", b"Y\xb01H\xb6:\x04_]OL\x9d\x8e\xbcp\xdc"),
        (b"\xe6\x9d\x02L[\x1c\xee\x8b0lu\xdd\x00\x9a\xfa!", b"J&\tkf\xa8\xeb\x1b\x90\x81\xce\\\x1b\xe8\xb6\x1d"),
        (b"\xcdnd\xf6\xf6\x11\x9e\x06\x10\xce\xcfP\x89\xed\xad\x02", b"I:\xe0{8H\xfcw\xb1\xe9v\x11p-/0"),
        (b"\xa22?\xcd\xfbU+\xbf- \xe3<\xc2\x94\xef\xb6", b"\xc7\x89\xc4K?{\x18\x1ftd\x0f\x19L}\xba\xf9"),
        (b"8\xf9\xe9\xd0\xa0\x97\x8e\rp\xe9\xd6[\x8e`\x93\x81", b"\xf7\x16\"\xaa\x86\\b\x9d\x0c\x17\"\x81\xeea\x03\x19"),
        (b")\xfb\x0c\xc7-D\xd8\xa4\xf7\x8d%\x02\x06\xa1\xbf\x94", b"\xc8s!s\xe2n\xee\xb0\x1eC\x15\xc0\xb4d\x08\xa7"),
        (b"\x7f\xa6\xdc\xf4\x9e\xe3\xda\xd3\x8a\xcb\xfc\x98,1\\\xe5", b"\x82,\xfbKL_\xe9\xc9L`\xc7\"&3#\xb5"),
        (b"&\x98,\x88\xff\x0e\xca[\xcb\xc6t;\xbc5Y\"", b"\x1b\x8bA\xd1\x8a\x9e\xe6\xce\xc4#C\x89\xe9Uon"),
        (b"\x1c\x1b\xac2\x86\xd6\x9a\xf9\xdd\x8d=\xa3\xff\x9a\xe1\xa5", b"\xed\x10\xed\xc1\xfe\xec\xd3\x0b\x8b\xe9\xd4?H\"\xd6\x8f"),
        (b"9\x8f3\x8b\x02\xb7\xe7\xe5\xa1TO\x1a\xb6N\x0b\xb6", b"\xb4\x0f\xa7 \xc7\x83\x81\x18\x9cU\xfd2\x93\xb8(\xe9"),
        (b"\x8d\xbe\xcf`\xc3d\xde\xca\xf4\n.\x92Mrw\xa6", b";\t\xfb\x19eKt0\x8foG\xcd\xd5\xe0\xf6\x9d"),
        (b"d\xbcG)\xec\xc8\x04 \xdd\xd9\xf6\xd6\x91/\xb2\xd3", b"2\xba\xa8\x8a\xc5\xd6h\xab\x9c\x9e\x8b\x1b5\xcc_."),
        (b"\x98\xea\xf0\xcbJ-c\xea\x81\xcf?B\x17\xcb\x99\xf9", b"\x82\x05\x14\xf2\x05q\x97\xf7Z-B\xad\xa2\x16\xa0`"),
        (b"\x1c\x94A\xfd\xe1\xf2\x06\x8b\x19\x80\xfa\x01g\xc3V\xaf", b"\x1c\xde\xec\xdf&\x93u\xab\xf9\x12\xfb\x04\xbc0\x99d"),
        (b"6\xfa\x993b\t\x12E\xd2t\xde\xb6\xfa\x98Im", b",\xa2\xf7VB\x81\x048\xc8\xbf3\xce\x8d\xecn\x82"),
        (b"L\xd6\x93\xbe\x02\xbeT\xa1\x8d\x88\x84\x18\xc7\xcf\x9fE", b"\x16i\"\xb7\x81\xef7\x7f\xdb\x7fY\xbft\x1b\xac\xd0"),
        (b"\x90\xd6\xe7>Z\x8b\x83\x14\xab\xefH\xa1\x95\xe6\x8e\xc3", b"\xebJ\xfcXd\xd75I@\x12\xe9\x88\xdcG\x15\xb7"),
        (b"\xb8\xd6\xd03j\xed\xf9\xa0\xb1L3\x8d\x89P\xb4\xe5", b"\xe3\xdf)CR\x03s\x19\x8f\xb5\xc6\xf5\xee&W\xee"),
        (b"cx\xd7dQ\xc3)YR\x9c7\xf0\xb4\xe4P\xfd", b"h\xa8\x122\xceV\x8c\xf4\x12\x14J\x08\x91\xcf\x93\xfa"),
        (b"\x1d\x14\x86\xccTi\xdd\xc3L!\x97\x92\xb3\xb7X$", b"t\x84\xa4\xe72\xa5X\xe9Ta\x03\x0c\xb00\x07\x83"),
        (b"\xa6\xdd\x1f\xfc\xca2\x88\x99\xbeA\x9a\xa6JVJ\x84", b"B\x81\x8e-K\xde\x15\xfd\t\"\x08\x04@/-\xc9"),
        (b"]b\x18\xe65C\x17*\x103J?\xe79!\x9b", b"\n\xbb\x1f\x98\nZZ\x97\xe0\x12\xf6W\xc4\x1e_~"),
        (b"\x9a\x9f-c\t\x14\xa4NL\xc5\xc0\x99R\x15\x94\xca", b"\xa0\xf9\x11\xfa\xd7\xf3\xb5\x11\xb2SK\xe8\xdb\x06\x94\x1f"),
        (b"!\xf0\x90\xd8t_O\xc1\xb5\xfe}\xca\xa6\x15}\x19", b"\xc9+\x89a9y3\xe0\xffh\xedmMq\x13\xcc"),
        (b"`\xf1\xab\x07A\xe9\xe5\x805\xf2\xabD8\xce\x85A", b"\x9d\x0f\x91\xc2\x01\x93\x8fm\xd6\x03XZl\t\x15J"),
        (b")\r\xcf\xc0\xa1\xa9\xf7i\x94\xa9\x8d\xed\xea\xcd\xb0B", b"jq\xe9\x01*H\x14\xa7M\x8c\xd2\xa5\x19\\\x80\x12"),
        (b"vJ\x98\xbf\xd5D\x15\xdb\xa1\xd7\xdc\xaf\xa0\xaa\xf7\xc1", b"\x06\xd8-\x05\xfe\x172W\xa4\xab\xe4\x97_h\xe6\xac"),
        (b"\xc6'P\xd2\xfcq\xcb\n\xe2\xc1sN>\xbc\x9a\x08", b"\xacysx4z\x96\x8f\xf1\xa5M\x9e\xe7f\\\x16"),
        (b"\x00\xc6\x81\xf0\xf1\xcb\xa8_\x1fF\x18\xe9\xb0\x95\xa3\xab", b"N\xd8\xdaz\xe8F4\xb1\x88>\xec\xd6[\x15\xd7\xfd"),
        (b"E\x1ee\x8f\xd5\x0f\x19\xa8\xb6~-\xe0n\xe7\x89\xe1", b"\x83\x90\xa0\x93\xec\x086+\xcf\x08\x9c\x048\x85\x88^"),
        (b"\x82\x13\xfa\x9f 6\xd3'\xd7\xd2\xc1\xf9\x03.\x13*", b"J\xe2\xf8\x1e\xec\xeb\xd9\"q\xbb:\x04\xc2\xdf<\xa8"),
        (b"\x01\xdf\xfcPW\xc80&{\xfe\x99\xf8\x10\xc6R\xce", b"=Cnh3\x9drKE\x1aU\x8e\xcc\x84jS"),
        (b"<\xeeo[J\xb0r0N\x07\xf4\x1e\xfb\x16\xac\x07", b"\xf9KP\xfe\xd2\xed\xc7`\xc7\x0c\x9et\xd3^\xc9g"),
        (b"P\xa1b#\x07V\xb0s\x86S\x9a\x1d[\xd9\xe7\x92", b"r&\xb1a\x0ff2\xd2\x8ft\x13\xff\xba\x0fy\xf4"),
        (b"\xc8t\xcc\x96V\x8bS\xde\x98\xf1\xa33\xa9\xdb\x1b\x85", b"13\xf1&\x8c\xfe3\xf9\xca\\\xc6\xdb\x1dT\x04b"),
        (b"\xb9u\xf2O\xfb\x05\xb3\xc2r*\x17d\\\x8f9M", b"\xeeQ\xf9&\xc5\xc8\xc8\xe6B\x1d\xcf\x184\x19]b"),
        (b"\x96\xe6\xc9\x1e;\x83e4\xb0\xbe\xf6\x94\x03\x06\x85:", b"\xa2\xb0\nS\xaf\xads.\x94\x16s\x901\xaf-\xe4"),
        (b"&\x81t\x048(\xd7 \xb6\x1aR\xaa\x98\xdbN\x0b", b"\x9cd\x8a#\xdd \xdcS\x14\x1e\x1b\xea\x87\x1b\xa41"),
        (b"\x1fC\x0cr,\x1d\xab\xa8\xd9\xfc\xa1\xa2\xbb\xcdw'", b"D\xb6)\x7f\xd7\xe6\xb2@\x18D\x1d\x08c\xce2k"),
        (b"$\xc1\xabD\r\xe0a\x0co\x12\x17\xbdB\x06\xc9y", b"s\xd8\xf8\x05\x81\xfeY\xc4\xc8|\xe5\xe2]U\xfa"),
        (b"\x8f\xe3\xd6\xc9d\xc4\xf0\xd0\xe3\x17\x12\x82\xb2\xfd\xe9\xef", b"\xf1\r/9\x97/\xd4\xf6dz\x89\x7fEG/\x15"),
        (b"\x16\xf2\xd8\x9a\x029\x9b{\x02\xf4u\x08~\x80\xb9Z", b"\xe1+:9\xaa\x87:\xf0\x02\xdd\xe5}G\x0ch\x98"),
        (b"\xe5\n\xf8gP\xfd\x8aT\x04\xf2\xdc\xa4dgcO", b"\x13\xd0\x1c[\xe6\x8aa\xd6\xa0a\xb1X\x84\xb8\xde2"),
        (b"\xa8\xaa\xa8\xe7\xcal>\xe6\xdb\x18vL\xdak%}", b"\xe5W'\x83\xbd\xf2\x01\x9b\x89\x98l\xfd\xcf\x02\x84\x13"),
        (b"6\x9b\xae\x07\xe0+\xa0\xcd\xdbT\t\x02@\x81$E", b"\x10Q\x1an\xaa\xe7\x9cy\x04t\x16\x1d\xea\x9d\xd2\xe6"),
        (b"ne\x83\xb0\xe2\x8aG\xda\xaf%\x93cg\x84\xaf\xea", b"p!}\x8aQ(C\x08\xaf\x8bi\x80\xdaw\x7f\x81"),
        (b"<\xa6\x90\xb2\x03\x8f\xa4\x17$\xc7\xbe\x8a\x1e!\x01s", b"\x19\x7ftaaLUB\x1aBt\xf5U~\xeeY"),
        (b"\xe4|z\xaer8\xa0\xa3\xc8\x9c\x8d\x03`\xb8rN", b"b\xda\xf8e\n\x1f\xc1\xb2\x97\x13\xd7\x91\x91\x86\xbe\x13"),
        (b"\x00\x84\xc1\xd8\xe0ub\x1a\xc0\xa1\xfe\x08K\xfb>,", b"g\xb06$\xbb{s\xb9-\x8e5h|+\xf8\xb1"),
        (b"(\xac{\xb1^OOy\xc98\xf5\xc5\xc2\xd3\xb3\x1e", b"\x8d\xfc\x0f\xda\xd5\xa6\x89)\x92\x173\x1e)*\xf4&"),
        (b"G*.\xe2\xd8\xee\x87\xfa\x83~eS\xcc\x1e\xdeQ", b"\xc9y \x1a]Y\x8d\xf8\xa0\xd0^p\xc1\xf1\xa4\xd2"),
        (b"\x16\x91\xd7[\xbeC\x80T\x08\x0f\xbd\xe1\xf7\xbd\x1b\xb8", b"\xcc\x8c\xfb\x1f\"\x16\xa7/\xe2inK\xaa\xdf\xc0\xe4"),
        (b"\x1d\xbc\xf2\x9b\xe90\x13\xc9\tuuk\r\xe4\xa9\xfa", b"\xf6\x97\xf2B\xaa\x83\xf9k\x15K\xa0\xb9\xdf\xf4,\x02"),
        (b"hIA(;\x8b\x92G!K\xca\x1a\xfd\x8c9\x95", b"0!V\xfc\xfb\xa1,R\xc6\xdaO\x9c\x16\xa8<\xe2"),
        (b"?\xd8\xd7e\x88\xad\xcd\xd6\x8d\\\x1e\xc2l$;\xd8", b"\x88\x8e\x86'#f\xcf\xe3,\x96&\r\xb1\xee@g"),
        (b"\x81\n\x18\xd7\x92\\dv'\x1a\xea\x8c7\xf2S\xb0", b"\x99\xbf\xf9qo\xde\x92xge\x04\x84\xb8 \xfb\xe3"),
        (b"\xa7\x8d\x83\xc8\xa3\xc15\xdc`\x82\xed\xae.\xea\xf2\xea", b"[\xe8{\r\xd6g\xcf\xe8L\x16\xde\xcd\x90M\xbd\x9e"),
        (b"\x1a\x1b;\xa62\x10i\xb2\x19n\xc1\xddKz\xb4\xd8", b";On\xd2N\xef%B\x05_T9)IG\xcf"),
        (b"@g`\x1e\xc1\xdf\x14A\xc1\x04N\xb6]\x97\xd3\x17", b"\xbc6\xcb\xaah*\xf1\xd0_\xd9B\x94F\xa8\xd8\r"),
        (b"<\x86\xa4z\xb4\xaeS\x90\xa9\xb5I\xe8\xe6FI\x8c", b"\r2\xd2.H\"\xfb\x9d}{\x11^\xdf\xcb\x186"),
        (b"\xf0\xb8J|\x0b\xfe\xcd\x8b\xc0$&\"8\xffob", b"\xa8}\xf31\xdd\x07\xc6\xa0\xbb\x9f0\xca\xe3&^\xf6"),
        (b"r\x9b\x81\xa0D\x94\x80\xfdu\xb1O\xbc\xf6\x1a\xd8\xe0", b"\xd4)\xb6\xdbQ\xa5#\x16\xed9\xadM\xf82%\xeb"),
        (b"\xb9V\xb7R\xb9\xcdJ9c\x1a`V\xb9\xb0oX", b"\x9a\xff]\xb3\x83\x96\xf5\r\x98\xe2\xc5\x86\xa21k%"),
        (b"J\xe9\xaa\x8f\x07\xab\x90\x18'\xa6\x85\x08\x18\xa8\x96?", b"\xa7\x83\xed\xd9\t\xd8\x85\xe4\xf8\xcb0\x95n\xf2`\xa8"),
        (b"\xc3\xaf\xc8m\xfc\x90}=\xb8\x1fr\xab}\r\xc1!", b"\x04\xa0sq\xf7\xd3Dm\xb9\xb8\x9c\xbap(\xa1L"),
        (b"\x97bq\x0f\x93\xb5\xa1\xa1\xf58\xdcu5\x9a\xbd\x8b", b"\x07'\xc0\xdb/0\xd9H\x81\x06\xe2^ `F\xa9"),
        (b"Nn\xd4\xbd\x13dN\x9e\xcd\xd4\x06kk~\xf5\x7f", b"\x98j'}\xc5\xb0\x1d\x19\xe2/\x92'\x9e\xd4\xba("),
        (b"\x08ty\x84\xde>\x0f5jL6\x05\xe1\x02pH", b"0\xb9\xd5\xd40u\x04\xfdK\xe2D\xbc\xddy\xd4\x11"),
    ]
    
  • code00.py:

    #!/usr/bin/env python
    
    import gc
    import random
    import sys
    from pprint import pprint as pp
    
    import _example as ex
    from data import data
    
    
    def test(repeat):
        tmp = data * repeat
        if ex.size() < 4000000:
            for idx, it in enumerate(tmp):
                t = f"dummy content {idx}"
                ex.add_node(it[0], t.encode(), random.randint(0, 10000))
    
        print(f"List (internal) size: {ex.size()}")
        print(f"Input data len: {len(data)}")
        search_feat = data[0][0]
        t = b"\xd3 "
        print("Start search:")
        res = ex.search(search_feat, t, random.randint(0, 10000), 0.1)
        pp(res)
        print(f"GC len: {len(gc.get_objects())}")
    
    
    def main(*argv):
        test(1)
        print(f"List (internal) size: {ex.size()}")
        print(f"Freed {ex.cleanup()} elements")
        print(f"List (internal) size: {ex.size()}")
        print(f"GC len: {len(gc.get_objects())}")
    
    
    if __name__ == "__main__":
        print("Python {:s} {:03d}bit on {:s}\n".format(" ".join(elem.strip() for elem in sys.version.split("\n")),
                                                       64 if sys.maxsize > 0x100000000 else 32, sys.platform))
        rc = main(*sys.argv[1:])
        print("\nDone.\n")
        sys.exit(rc)
    

Outputs:

  • Win (C .dll called from a C .exe):

    [cfati@CFATI-5510-0:e:\Work\Dev\StackExchange\StackOverflow\q076229579]> sopr.bat
    ### Set shorter prompt to better fit when pasted in StackOverflow (or other) pages ###
    
    [prompt]> "c:\Install\pc032\Microsoft\VisualStudioCommunity\2019\VC\Auxiliary\Build\vcvarsall.bat" x64 > nul
    
    [prompt]>
    [prompt]> dir /b
    code00.py
    data.py
    dll00.c
    dll00.h
    example.c
    main00.c
    orig
    setup.py
    
    [prompt]>
    [prompt]> cl /nologo /MD /DDLL dll00.c  /link /NOLOGO /DLL /OUT:dll00.dll
    dll00.c
       Creating library dll00.lib and object dll00.exp
    
    [prompt]> cl /nologo /MD /W0 main00.c  /link /NOLOGO /OUT:win_main00.exe dll00.lib
    main00.c
    
    [prompt]> del *.obj *.exp
    
    [prompt]> dir /b
    code00.py
    data.py
    dll00.c
    dll00.dll
    dll00.h
    dll00.lib
    example.c
    main00.c
    orig
    setup.py
    win_main00.exe
    
    [prompt]>
    [prompt]> win_main00.exe
    Element count: 0
    Add node (0) returned: 0
    Add node (1) returned: 0
    Element count: 2
    Search for 'xxx' ...
    Found data (0 items):
    Search for '111' ...
    Found data (1 items):
     Index: 0
      Score: 0.300
      Id: 2, Features length: 3, Content length: 7
      Feats: 1?
      Cnt: 567
    Search for 'xaa' ...
    Found data (1 items):
     Index: 0
      Score: 0.300
      Id: 3, Features length: 4, Content length: 9
      Feats: ab
      Cnt: d4567
    Search for '1bd' ...
    Found data (2 items):
     Index: 0
      Score: 0.300
      Id: 2, Features length: 3, Content length: 7
      Feats: 1?
      Cnt: 567
     Index: 1
      Score: 0.300
      Id: 3, Features length: 4, Content length: 9
      Feats: ab
      Cnt: d4567
    Freed 2 nodes
    Element count: 0
    
    Done.
    
  • Nix (Python* module):

    (py_pc064_03.10_test0) [cfati@cfati-5510-0:/mnt/e/Work/Dev/StackExchange/StackOverflow/q076229579]> ~/sopr.sh
    ### Set shorter prompt to better fit when pasted in StackOverflow (or other) pages ###
    
    [064bit prompt]>
    [064bit prompt]> ls
    code00.py  data.py  dll00.c  dll00.dll  dll00.h  dll00.lib  example.c  main00.c  orig  setup.py  win_main00.exe
    [064bit prompt]>
    [064bit prompt]> python setup.py build
    [064bit prompt]>
    [064bit prompt]> ls
    build  code00.py  data.py  dll00.c  dll00.dll  dll00.h  dll00.lib  example.c  main00.c  orig  setup.py  win_main00.exe
    [064bit prompt]> ls build/lib.linux-x86_64-cpython-310/
    _example.cpython-310-x86_64-linux-gnu.so
    [064bit prompt]>
    [064bit prompt]> PYTHONPATH=${PYTHONPATH}:build/lib.linux-x86_64-cpython-310 python code00.py
    Python 3.10.11 (main, Apr  5 2023, 14:15:10) [GCC 9.4.0] 064bit on linux
    
    List (internal) size: 100
    Input data len: 100
    Start search:
    [(4555,
      b'dummy content 0',
      b'\xd3\nR\x952\xbf\x8e\xeb[\xdc\xe8\xfb\xcb\x9f\xb4\xd2',
      0.3),
     (273, b'dummy content 74', b'\xe5\n\xf8gP\xfd\x8aT\x04\xf2\xdc\xa4dgcO', 0.3),
     (9201,
      b'dummy content 87',
      b"\x81\n\x18\xd7\x92\\dv'\x1a\xea\x8c7\xf2S\xb0",
      0.3)]
    GC len: 8942
    List (internal) size: 100
    Freed 100 elements
    List (internal) size: 0
    GC len: 8937
    
    Done.
    
CristiFati
  • 38,250
  • 9
  • 50
  • 87
  • 1
    Yeah,the most critical issue is the incorrect decrementing of the reference count and the combination of malloc and pymalloc. Your code has provided me with invaluable insights into nested struct overhead and the importance of separating core processing from wrapping. Thank you for your answer! – the drugs don't work May 16 '23 at 05:47