I’m working on:
- Ubuntu 24.04
- Python 3.12
- Cython 3.0
- cc 13.2.
I have a C module that implements an arbitrary tree and a few constructor functions:
typedef unsigned char byte;
typedef unsigned long ulong;
// ; #region Tree
typedef struct _file {
ulong length;
ulong offset;
byte *data;
} file;
typedef struct _node {
ulong length;
byte *value;
ulong childrenCount;
struct _node *children;
} node;
// ; #endregion Tree
// ; #region Functions
file read_file(char *file_name);
node process_file(char *file_name);
// ; #endregion Functions
All I have from the C side is a header file and a .so
library. The tree is implemented via the children
field, which is subject to a malloc
. This all works perfectly well on C alone, with no leaks, no SIGSEGV
, no fuss.
I’m trying to connect this to Python via Cython. I have the following files:
# cfile.pxd
cdef extern from "file.h":
ctypedef unsigned char byte;
ctypedef unsigned short ushort;
ctypedef unsigned long ulong;
ctypedef struct file:
ulong length
ulong offset
byte* data
ctypedef struct node:
ulong length
byte* value
ulong childrenCount
node* children
file read_file(char *file_name)
node process_file(char *file_name);
and:
# pyfile.pyx
cimport cfile
from libc.stdlib cimport malloc, free
from libc.string cimport memcpy
cdef class File:
cdef cfile.file c_struct
def __cinit__(self, unsigned long length = 0, unsigned long offset = 0, bytes data = None):
self.c_struct.length = length
self.c_struct.offset = offset
self.c_struct.data = <unsigned char *> malloc(length)
memcpy(self.c_struct.data, <unsigned char *> data, length)
@property
def length(self):
return self.c_struct.length
@property
def offset(self):
return self.c_struct.offset
@property
def data(self):
return list(self.c_struct.data[:self.c_struct.length])
cdef class Node:
cdef cfile.node c_struct
def __cinit__(self, unsigned long length, bytearray value, unsigned long childrenCount):
self.c_struct.length = length
self.c_struct.value = <unsigned char *> malloc(length)
memcpy(self.c_struct.value, <unsigned char *> value, length)
self.c_struct.childrenCount = childrenCount
self.c_struct.children = NULL
@property
def length(self):
return self.c_struct.length
@property
def value(self):
return bytearray(self.c_struct.value[:self.c_struct.length])
@property
def childrenCount(self):
return self.c_struct.childrenCount
@property
def children(self):
return [Node.from_c_struct(self.c_struct.children[i]) for i in range(self.c_struct.childrenCount)]
@property
def from_c_struct(self,):
cdef Node node = self.__new__(self)
node.c_struct = self.c_struct
return node
def read_file(str file_name):
cdef cfile.file c_data
c_data = cfile.read_file(file_name.encode('utf-8'))
file = File.__new__(File)
file._c_struct = c_data
return file
def process_file(str file_name):
cdef cfile.node c_node
c_node = cfile.process_file(file_name.encode('utf-8'))
return Node.from_c_struct(c_node)
Finally, I am using the following setup:
# setup.py
from setuptools import setup
from Cython.Build import cythonize
from setuptools.extension import Extension
extensions = [
Extension(
name="file",
sources=["file.pyx"],
include_dirs=["."],
libraries=["file"],
library_dirs=["."],
runtime_library_dirs=["."],
extra_objects=["libfile.so"]
)
]
setup(
name="file",
ext_modules=cythonize(extensions),
)
and running python setup.py build_ext --inplace
.
ERRORS
I’m getting these:
@property
def children(self):
return [Node.from_c_struct(self.c_struct.children[i]) for i in range(self.c_struct.childrenCount)]
^
------------------------------------------------------------
file.pyx:53:57: Cannot convert 'node' to Python object
and
def process_file(str file_name):
cdef cfile.node c_node
c_node = cfile.process_file(file_name.encode('utf-8'))
return Node.from_c_struct(c_node)
^
------------------------------------------------------------
file.pyx:72:30: Cannot convert 'node' to Python object
I believe this is related to the children
field, as removing it and all connected logic seems to work.
Question
Any ideas on how to make this work directly with Cython?
I’m looking for a solution that precludes third parties. I found many solutions based on cpython
imports or numpy
, or even both. I would like to avoid any of that, if possible.
I’m also looking for a solution that does not involve compiling the C code together with the Python code. In order to keep concerns separated, all I want from the C side is the .so
.