Consider the following example:
from typing import Any, get_type_hints
import pyarrow as pa
import pyarrow.compute as pac
ArrowTypes = {
int: pa.int64(),
str: pa.string(),
}
class StructLike:
__attr_types__: dict[str, Any] = {}
__attr_annotations__: dict[str, Any]
def __init__(self):
self.__dict__["__attr_annotations__"] = get_type_hints(self)
def __setattr__(self, name: str, value: Any) -> None:
try:
val_type = type(value)
if issubclass(self.__attr_annotations__[name], val_type):
self.__attr_types__[name] = type(value)
self.__dict__[name] = value
else:
raise TypeError(
f"'{name}' should have type "
f"{self.__attr_annotations__[name]}, "
f"but instead it has type {val_type}"
)
except TypeError as e:
raise e
except KeyError:
raise KeyError(
f"Could not find '{name}' in {self.__attr_annotations__=}"
)
def as_dict(self) -> dict[str, Any]:
return {k: getattr(self, k) for k in self.__attr_annotations__.keys()}
def arrow_type(self) -> pa.StructType:
return pa.struct(
[
pa.field(name, ArrowTypes[ty])
for name, ty in self.__attr_types__.items()
]
)
def to_arrow(self) -> pa.StructScalar:
return pac.cast(
self.as_dict(),
target_type=self.arrow_type(),
)
class PyStruct(StructLike):
aleph: str
bet: int
def __init__(self, aleph: str, bet: int):
super().__init__()
self.aleph = aleph
self.bet = bet
test = PyStruct("hello", 5)
test.to_arrow()
It works:
<pyarrow.StructScalar: [('aleph', 'hello'), ('bet', 5)]>
But I have to rely on the hand-built ArrowTypes
dictionary to map from Python types to a pa.DataType
in the method StructLike.arrow_type
. How can I infer a simple pa.DataType
from a given (simple) Python type in an automatic fashion?