I am trying to write some wrappers using the Python C API to work with NumPy arrays. If I write all my code in one file, the code works fine, tests pass, and everything seems great. If however I try and split the file into some headers, and a few different files, it segfaults, and on the surface of it I can’t see why. Am I doing something wrong?
(PS – I am also trying to come up with a nice way of writing #define PY_ARRAY_UNIQUE_SYMBOL ...
and #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
only once, but struggle with multiple definition complaints, and suspect the right way to do this might better influence how I split up the various files and headers).
My Example
I am making a module which takes a numpy array, multiplies it by some factor, and writes the result into another array.
The files looks like:
dir/
├── module.c
├── module_example.py
├── module_examples.so
├── module_headers.h
└── module_implementation.c
The tests I want to run without segfaulting in module_example.py
:
#!/usr/bin/env python
import numpy as np
from module_examples import foo
import unittest
class TestNumpyFloatWrappers(unittest.TestCase):
def test_numpy_wrapper(self):
a = np.arange(10, dtype=float)
b = np.arange(10, dtype=float)
foo(input=a, output=b, factor=3.0)
if __name__ == "__main__":
unittest.main()
The function I want to define in module_headers.h
:
#ifndef PYARV_MODULE_HEADERS_H
#define PYARV_MODULE_HEADERS_H
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#include <numpy/arrayobject.h>
PyObject * foo(PyObject *Py_UNUSED(self), PyObject *args, PyObject *kwargs);
#endif//PYARV_MODULE_HEADERS_H
The implementation in module_implementation.c
#include "module_headers.h"
PyObject *foo(PyObject *Py_UNUSED(self), PyObject *args, PyObject *kwargs)
{
PyArrayObject *input_array;
PyArrayObject *output_array;
double factor;
#define N_ARRAYS 2
PyArrayObject **arrays[N_ARRAYS] = {&input_array, &output_array};
char *arg_names[] = {
"input",
"output",
"factor",
NULL};
if (!PyArg_ParseTupleAndKeywords(args, kwargs,
"$O!O!d:multiply",
arg_names,
&PyArray_Type,
&input_array,
&PyArray_Type,
&output_array,
&factor))
{
return NULL;
}
for (int i = 0; i < N_ARRAYS; i++)
{
PyObject *array = *arrays[i];
if (PyArray_NDIM(array) != 1)
{
PyErr_SetString(PyExc_ValueError, "Array must be 1-dimensional");
return NULL;
}
if (PyArray_TYPE(array) != NPY_DOUBLE)
{
PyErr_SetString(PyExc_ValueError, "Array must be of type double");
return NULL;
}
if (!PyArray_IS_C_CONTIGUOUS(array))
{
PyErr_SetString(PyExc_ValueError, "Array must be C contiguous.");
return NULL;
}
}
npy_double *input_buffer = (npy_double *) PyArray_DATA(input_array);
npy_double *output_buffer = (npy_double *) PyArray_DATA(output_array);
size_t input_buffer_size = PyArray_SIZE(input_array);
size_t output_buffer_size = PyArray_SIZE(output_array);
if (input_buffer_size != output_buffer_size)
{
PyErr_SetString(PyExc_ValueError, "The input and output arrays are of differing lengths.");
return NULL;
}
NPY_BEGIN_THREADS_DEF;
NPY_BEGIN_THREADS; /* No longer need the Python GIL */
for (size_t i = 0; i < input_buffer_size; i++)
{
output_buffer[i] = input_buffer[i] * factor;
}
NPY_END_THREADS; /* We return the Python GIL. */
Py_RETURN_NONE;
}
Trying to glue everything together in module.c
, where the first commented out few lines crash with a segfault, and the others (where everything lives in just one big file) works fine.
/* // Gives a segfault when the tests run
#define PY_ARRAY_UNIQUE_SYMBOL EXAMPLE_ARRAY_API
#include "module_headers.h"
*/
/* // Works fine.
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#define PY_ARRAY_UNIQUE_SYMBOL EXAMPLE_ARRAY_API
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#include <numpy/arrayobject.h>
PyObject *foo(PyObject *Py_UNUSED(self), PyObject *args, PyObject *kwargs)
{
//...
}
*/
static PyMethodDef example_methods[] = {
{"foo", (PyCFunction) foo, METH_VARARGS | METH_KEYWORDS, NULL},
{NULL},
};
static struct PyModuleDef example_module = {
.m_base = PyModuleDef_HEAD_INIT,
.m_doc = "Something is going wrong here.",
.m_name = "examples",
.m_size = -1,
.m_methods = example_methods,
};
PyObject *
PyInit_module_examples(void)
{
import_array();
PyObject *module = PyModule_Create(&example_module);
if (
!module || PyModule_AddStringConstant(module, "__version__", Py_STRINGIFY(NPB_VERSION)))
{
Py_XDECREF(module);
return NULL;
}
return module;
}
Attempting to debug the issue
Running through a debugger in Python just says:
..../python ..../module_example.py
process exited with status -1 (attach failed (Not allowed to attach to process. Look in the console messages (Console.app), near the debugserver entries, when the attach failed. The subsystem that denied the attach permission will likely have logged an informative message about why it was denied.))
Process finished with exit code 0
15
After going down a rabbit hole of trying to build everything by hand and reading more carefully through the documentation here about import_array
, I discovered the significance of #define NO_IMPORT_ARRAY
when using multiple files. The most important paragraph I repeat here (emphasis mine):
In order to make use of the C-API from another extension module, the
import_array()
command must be used. If the extension module is self-contained in a single.c
file, then that is all that needs to be done. If, however, the extension module involves multiple files where the C-API is needed then some additional steps must be taken.
…
In addition, in the files that do not have the module initialization sub_routine#define NO_IMPORT_ARRAY
prior to includingnumpy/arrayobject.h
.
Adding the #define NO_IMPORT_ARRAY
where needed solved the issue, where the module_headers.h
file now reads:
#ifndef TESTING_EXAMPLES_BINDINGS_H
#define TESTING_EXAMPLES_BINDINGS_H
#define PY_SSIZE_T_CLEAN
#include <Python.h>
PyObject *multiply_into(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs);
#endif//TESTING_EXAMPLES_BINDINGS_H
and module_implementation.c
:
// clang-format off
#include "module_headers.h"
#define NO_IMPORT_ARRAY
#define PY_ARRAY_UNIQUE_SYMBOL EXAMPLE_ARRAY_API
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#include <numpy/arrayobject.h>
// clang-format on
PyObject *
multiply_into(PyObject *Py_UNUSED(m), PyObject *args, PyObject *kwargs)
{
//...
}
and lastly module.c
:
#include "module_headers.h"
#define PY_ARRAY_UNIQUE_SYMBOL EXAMPLE_ARRAY_API
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#include <numpy/arrayobject.h>
static PyMethodDef example_methods[] = {
{"foo", (PyCFunction) (void (*)(void)) foo, METH_VARARGS | METH_KEYWORDS, NULL},
{NULL},
};
static struct PyModuleDef example_module = {
.m_base = PyModuleDef_HEAD_INIT,
.m_doc = "Something is going wrong here.",
.m_name = "examples",
.m_size = -1,
.m_methods = example_methods,
};
PyObject *
PyInit_module_examples(void)
{
import_array();
PyObject *module = PyModule_Create(&example_module);
if (
!module || PyModule_AddStringConstant(module, "__version__", Py_STRINGIFY(NPB_VERSION)))
{
Py_XDECREF(module);
return NULL;
}
return module;
}