1

In CPython I have two types of objects, which are close connected to each other.

#include <Python.h>
#include <structmember.h>

typedef struct pyt PYT;
struct pyt { PyObject_HEAD PYT *other; };

static void dealloc (PYT *self) {
    Py_CLEAR(self->other);
    printf("dealloc object at %p\n", self);
    PyObject_GC_Del(self);
}

static PyTypeObject Pyt2Type = {
    PyObject_HEAD_INIT(NULL)
    0, "pyt.Pyt2", sizeof(PYT), 0,
    (destructor) dealloc
};

static PyObject * new (PyTypeObject *type, PyObject *args, PyObject *kwds) {
    PYT *self = PyObject_GC_New(PYT, type);
    if (!self) return NULL;
    self->other = PyObject_GC_New(PYT, &Pyt2Type);
    if (!self->other) { Py_DECREF(self); return NULL; }
    return Py_INCREF(self), self->other->other = self, (PyObject *) self;
}

static PyTypeObject Pyt1Type = {
    PyObject_HEAD_INIT(NULL)
    0, "pyt.Pyt1", sizeof(PYT), 0,
    (destructor) dealloc
};

static int traverse (PYT *self, visitproc visit, void *arg) {
    Py_VISIT(self->other);
    return 0;
}

static int clear (PYT *self) {
    Py_CLEAR(self->other);
    return 0;
}

static PyMemberDef members[] = {
    {"other", T_OBJECT, offsetof(PYT, other), RO, "other"},
    { NULL }
};

static PyMethodDef methods[] = {{ NULL }};

PyMODINIT_FUNC initpyt ( void ) {
    PyObject* m;

    Pyt1Type.tp_flags = Pyt2Type.tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC;
    Pyt1Type.tp_traverse = Pyt2Type.tp_traverse = (traverseproc) traverse;
    Pyt1Type.tp_clear = Pyt2Type.tp_clear = (inquiry) clear;
    Pyt1Type.tp_members = Pyt2Type.tp_members = members;
    Pyt1Type.tp_new = new;

    if (PyType_Ready(&Pyt1Type) < 0) return;
    if (PyType_Ready(&Pyt2Type) < 0) return;

    m = Py_InitModule("pyt", methods);

    Py_INCREF(&Pyt1Type), PyModule_AddObject(m, "Pyt", (PyObject *) &Pyt1Type);
}

Using my test script

from distutils.core import Extension, setup
import sys, gc
sys.argv.extend(["build_ext", "-i"])
setup(ext_modules = [Extension('pyt', ['pyt.c'])])
from pyt import Pyt
pyt = Pyt()
print pyt, sys.getrefcount(pyt)
pyt = pyt.other
print pyt, sys.getrefcount(pyt)
del pyt
gc.collect()

I get an output like

<pyt.Pyt1 object at 0x7fbc26540138> 3
<pyt.Pyt2 object at 0x7fbc26540150> 3

The objects are not deleted at the end, since each keeps a reference to the other, creating a closed cycle. In other code I was using an approach, where I just kept the objects, until both have a refcount of 0, which I suspect being bad practice. Now I have tried using the Garbage Collector here, but still the objects are not collected.

What is going wrong here? What did I miss?

tynn
  • 38,113
  • 8
  • 108
  • 143
  • I updated the question, implementing the Garbage Collector interface; still no collection. – tynn Nov 22 '14 at 18:36

3 Answers3

0

You can do this using weak references (see the weakref module). But it's usually better to just rely on the garbage collector. It's possible someone else will create a large reference cycle involving your objects, and then you'll be relying on the GC anyway, so you may as well use it for the simple case.

Please explain what you mean by "failed badly."

Kevin
  • 28,963
  • 9
  • 62
  • 81
  • I need these strong references, since both objects need to survive together. I guess I failed badly with the GC, because I didn't understand the concept of traversing and clearing correctly or made a mistake I didn't see. I'll try this again. – tynn Nov 20 '14 at 13:37
0

An important thing to note about (most) garbage collected languages is that deleting of an object is not guaranteed to happen as soon as an object becomes unreachable. Once an object becomes unreachable it is entirely up to the garbage collector as to when it will release the associated resources, which could be as late as when the program ends if there is no pressure for memory.

If you don't set __del__ methods for your linked classes then the garbage collector should work fine. It won't immediately clean up your objects as the function to detect reference cycles is more costly than simple reference counting, and is as such is run infrequently.

Example using a pure python class

import gc
import weakref

class Obj(object): pass

x = Obj()
y = Obj()

x.y = y, y.x = x

ref = weakref.ref(x)

print(ref())
del x, y
print(ref())
gc.collect()
print(ref())

Outputs:

<__main__.Obj object at 0x7f81c8ccc7b8>
<__main__.Obj object at 0x7f81c8ccc7b8>
None
Dunes
  • 37,291
  • 7
  • 81
  • 97
  • CPython is special; all objects are reference counted and the GC is theoretically optional (but AFAIK no one in their right mind actually turns it off). – Kevin Nov 18 '14 at 04:56
0

Ok, I finally found my problem. I didn't start tracking with PyObject_GC_Track.

Python requires some steps, when using the Garbage Collector:

  • adding Py_TPFLAGS_HAVE_GC to tp_flags
  • adding a tp_traverse and, if needed, a tp_clear functions
  • object creation with PyObject_GC_New or a similar function
  • calling PyObject_GC_Track on the fully initialised object
  • object deletion with PyObject_GC_Del or a similar function

So here modifying the new function will suffice.

static PyObject * new (PyTypeObject *type, PyObject *args, PyObject *kwds) {
    PYT *self = PyObject_GC_New(PYT, type);
    if (!self) return NULL;
    self->other = PyObject_GC_New(PYT, &Pyt2Type);
    if (!self->other) { Py_DECREF(self); return NULL; }
    self->other->other = (Py_INCREF(self), self);
    PyObject_GC_Track((PyObject *) self);
    PyObject_GC_Track((PyObject *) self->other);
    return (PyObject *) self;
}

With an output of

<pyt.Pyt1 object at 0x7f4904fe1398> 4
<pyt.Pyt2 object at 0x7f4904fe15c8> 4
dealloc object at 0x7f4904fe15c8
dealloc object at 0x7f4904fe1398
tynn
  • 38,113
  • 8
  • 108
  • 143