I adopted a project that was abandoned by it’s original creator on GH: pybind11 bindings for whispercpp. I had to update Python version from 3.8 to 3.11.8 in CI and discovered that some of the test cases are failing.
Specifically the ones where wav data is read from a file with C++ and then accessed with a property of WavFileWrapper class. The project uses pybind11 to create interfaces for C++ code in Python.
The relevant pieces of code are:
in whispercpp/src/whispercpp/api_cpp2py_export.h
namespace whisper {
// std::make_unique for C++11
// /a/17902439/8643197
template <class T> struct _Unique_if {
typedef std::unique_ptr<T> _Single_object;
};
template <class T> struct _Unique_if<T[]> {
typedef std::unique_ptr<T[]> _Unknown_bound;
};
template <class T, size_t N> struct _Unique_if<T[N]> {
typedef void _Known_bound;
};
template <class T, class... Args>
typename _Unique_if<T>::_Single_object make_unique(Args &&...args) {
return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
}
template <class T>
typename _Unique_if<T>::_Unknown_bound make_unique(size_t n) {
typedef typename std::remove_extent<T>::type U;
return std::unique_ptr<T>(new U[n]());
}
template <class T, class... Args>
typename _Unique_if<T>::_Known_bound make_unique(Args &&...) = delete;
// Some black magic to make zero-copy numpy array
// See https://github.com/pybind/pybind11/issues/1042#issuecomment-642215028
template <typename Sequence>
inline py::array_t<typename Sequence::value_type> as_pyarray(Sequence &&seq) {
auto size = seq.size();
auto data = seq.data();
std::unique_ptr<Sequence> seq_ptr =
whisper::make_unique<Sequence>(std::move(seq));
auto capsule = py::capsule(seq_ptr.get(), [](void *p) {
std::unique_ptr<Sequence>(reinterpret_cast<Sequence *>(p));
});
seq_ptr.release();
return py::array(size, data, capsule);
}
} // namespace whisper
struct WavFileWrapper {
py::array_t<float> mono;
std::vector<std::vector<float>> stereo;
WavFileWrapper(std::vector<float> *mono,
std::vector<std::vector<float>> *stereo)
: mono(whisper::as_pyarray(std::move(*mono))), stereo(*stereo){};
static WavFileWrapper load_wav_file(const char *filename);
};
in whispercpp/src/whispercpp/api_cpp2py_export.cpp
WavFileWrapper WavFileWrapper::load_wav_file(const char *filename) {
std::vector<float> pcmf32;
std::vector<std::vector<float>> pcmf32s;
if (!::read_wav(filename, pcmf32, pcmf32s, false)) {
throw std::runtime_error("Failed to load wav file");
}
return WavFileWrapper(&pcmf32, &pcmf32s);
}
...
m.def("load_wav_file", &WavFileWrapper::load_wav_file, "filename"_a,
py::return_value_policy::reference);
py::class_<WavFileWrapper>(m, "Wavfile",
"A light wrapper for the processed wav file.")
.def_property_readonly(
"stereo", [](WavFileWrapper &self) { return self.stereo; },
py::return_value_policy::reference)
.def_property_readonly(
"mono", [](WavFileWrapper &self) { return self.mono; },
py::return_value_policy::reference);
I checked with print statements and the underlying WAV file read function works correctly. But in Python code, the property mono
only returns zero array.
I found a workaround – returning std::vector<float> pcmf32 directly (so only mono sound) works, the array has sounds samples inside of it this way.
If necessary, here is the repository in question with workaround commit:
https://github.com/AIWintermuteAI/whispercpp/commit/cc9d09889dd1adf42e1008692b1106122c7d19b1#diff-c1e9ecf4e144bd332e8d271fff939f6741ed30165e666237d69277b984a398ed .
Edit: Update from 3.8 to 3.9 also causes the described issue.
7