diff --git a/docs/source/python/api/datatypes.rst b/docs/source/python/api/datatypes.rst index 5e151a1f93a..ddf71cbfa3e 100644 --- a/docs/source/python/api/datatypes.rst +++ b/docs/source/python/api/datatypes.rst @@ -131,7 +131,6 @@ Specific classes and functions for extension types. BaseExtensionType ExtensionType - PyExtensionType UnknownExtensionType register_extension_type unregister_extension_type diff --git a/python/pyarrow/__init__.py b/python/pyarrow/__init__.py index c2480e42c00..c8bbd0ecf09 100644 --- a/python/pyarrow/__init__.py +++ b/python/pyarrow/__init__.py @@ -178,7 +178,7 @@ def print_entry(label, value): BaseExtensionType, ExtensionType, RunEndEncodedType, Bool8Type, FixedShapeTensorType, JsonType, OpaqueType, UuidType, - PyExtensionType, UnknownExtensionType, + UnknownExtensionType, register_extension_type, unregister_extension_type, DictionaryMemo, KeyValueMetadata, diff --git a/python/pyarrow/lib.pxd b/python/pyarrow/lib.pxd index 0b2dedad509..1a0066071cb 100644 --- a/python/pyarrow/lib.pxd +++ b/python/pyarrow/lib.pxd @@ -207,10 +207,6 @@ cdef class JsonType(BaseExtensionType): const CJsonType* json_ext_type -cdef class PyExtensionType(ExtensionType): - pass - - cdef class _Metadata(_Weakrefable): # required because KeyValueMetadata also extends collections.abc.Mapping # and the first parent class must be an extension type diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py index 41a8f47bec5..5c9cd0b8e05 100644 --- a/python/pyarrow/tests/test_extension_type.py +++ b/python/pyarrow/tests/test_extension_type.py @@ -42,15 +42,6 @@ def registered_extension_type(ext_type): pa.unregister_extension_type(ext_type.extension_name) -@contextlib.contextmanager -def enabled_auto_load(): - pa.PyExtensionType.set_auto_load(True) - try: - yield - finally: - pa.PyExtensionType.set_auto_load(False) - - class TinyIntType(pa.ExtensionType): def __init__(self): @@ -233,15 +224,6 @@ def __arrow_ext_deserialize__(cls, storage_type, serialized): return cls(storage_type) -class LegacyIntType(pa.PyExtensionType): - - def __init__(self): - pa.PyExtensionType.__init__(self, pa.int8()) - - def __reduce__(self): - return LegacyIntType, () - - def ipc_write_batch(batch): stream = pa.BufferOutputStream() writer = pa.RecordBatchStreamWriter(stream, batch.schema) @@ -1735,25 +1717,6 @@ def test_tensor_type_str(tensor_type, text): assert text in tensor_type_str -def test_legacy_int_type(): - with pytest.warns(FutureWarning, match="PyExtensionType is deprecated"): - ext_ty = LegacyIntType() - arr = pa.array([1, 2, 3], type=ext_ty.storage_type) - ext_arr = pa.ExtensionArray.from_storage(ext_ty, arr) - batch = pa.RecordBatch.from_arrays([ext_arr], names=['ext']) - buf = ipc_write_batch(batch) - - with pytest.warns((RuntimeWarning, FutureWarning)): - batch = ipc_read_batch(buf) - assert isinstance(batch.column(0).type, pa.UnknownExtensionType) - - with enabled_auto_load(): - with pytest.warns(FutureWarning, match="PyExtensionType is deprecated"): - batch = ipc_read_batch(buf) - assert isinstance(batch.column(0).type, LegacyIntType) - assert batch.column(0) == ext_arr - - @pytest.mark.parametrize("storage_type,storage", [ (pa.null(), [None] * 4), (pa.int64(), [1, 2, None, 4]), diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi index 27d63a67fed..fa5a0bd1596 100644 --- a/python/pyarrow/types.pxi +++ b/python/pyarrow/types.pxi @@ -2136,91 +2136,7 @@ cdef class OpaqueType(BaseExtensionType): return OpaqueScalar -_py_extension_type_auto_load = False - - -cdef class PyExtensionType(ExtensionType): - """ - Concrete base class for Python-defined extension types based on pickle - for (de)serialization. - - .. warning:: - This class is deprecated and its deserialization is disabled by default. - :class:`ExtensionType` is recommended instead. - - Parameters - ---------- - storage_type : DataType - The storage type for which the extension is built. - """ - - def __cinit__(self): - if type(self) is PyExtensionType: - raise TypeError("Can only instantiate subclasses of " - "PyExtensionType") - - def __init__(self, DataType storage_type): - warnings.warn( - "pyarrow.PyExtensionType is deprecated " - "and will refuse deserialization by default. " - "Instead, please derive from pyarrow.ExtensionType and implement " - "your own serialization mechanism.", - FutureWarning) - ExtensionType.__init__(self, storage_type, "arrow.py_extension_type") - - def __reduce__(self): - raise NotImplementedError("Please implement {0}.__reduce__" - .format(type(self).__name__)) - - def __arrow_ext_serialize__(self): - return pickle.dumps(self) - - @classmethod - def __arrow_ext_deserialize__(cls, storage_type, serialized): - if not _py_extension_type_auto_load: - warnings.warn( - "pickle-based deserialization of pyarrow.PyExtensionType subclasses " - "is disabled by default; if you only ingest " - "trusted data files, you may re-enable this using " - "`pyarrow.PyExtensionType.set_auto_load(True)`.\n" - "In the future, Python-defined extension subclasses should " - "derive from pyarrow.ExtensionType (not pyarrow.PyExtensionType) " - "and implement their own serialization mechanism.\n", - RuntimeWarning) - return UnknownExtensionType(storage_type, serialized) - try: - ty = pickle.loads(serialized) - except Exception: - # For some reason, it's impossible to deserialize the - # ExtensionType instance. Perhaps the serialized data is - # corrupt, or more likely the type is being deserialized - # in an environment where the original Python class or module - # is not available. Fall back on a generic BaseExtensionType. - return UnknownExtensionType(storage_type, serialized) - - if ty.storage_type != storage_type: - raise TypeError("Expected storage type {0} but got {1}" - .format(ty.storage_type, storage_type)) - return ty - - # XXX Cython marks extension types as immutable, so cannot expose this - # as a writable class attribute. - @classmethod - def set_auto_load(cls, value): - """ - Enable or disable auto-loading of serialized PyExtensionType instances. - - Parameters - ---------- - value : bool - Whether to enable auto-loading. - """ - global _py_extension_type_auto_load - assert isinstance(value, bool) - _py_extension_type_auto_load = value - - -cdef class UnknownExtensionType(PyExtensionType): +cdef class UnknownExtensionType(ExtensionType): """ A concrete class for Python-defined extension types that refer to an unknown Python implementation. @@ -2238,11 +2154,15 @@ cdef class UnknownExtensionType(PyExtensionType): def __init__(self, DataType storage_type, serialized): self.serialized = serialized - PyExtensionType.__init__(self, storage_type) + super().__init__(storage_type, "pyarrow.unknown") def __arrow_ext_serialize__(self): return self.serialized + @classmethod + def __arrow_ext_deserialize__(cls, storage_type, serialized): + return UnknownExtensionType() + _python_extension_types_registry = [] @@ -6094,39 +6014,6 @@ cdef class _ExtensionRegistryNanny(_Weakrefable): _registry_nanny = _ExtensionRegistryNanny() -def _register_py_extension_type(): - cdef: - DataType storage_type - shared_ptr[CExtensionType] cpy_ext_type - c_string c_extension_name = tobytes("arrow.py_extension_type") - - # Make a dummy C++ ExtensionType - storage_type = null() - check_status(CPyExtensionType.FromClass( - storage_type.sp_type, c_extension_name, PyExtensionType, - &cpy_ext_type)) - check_status( - RegisterPyExtensionType( cpy_ext_type)) - - -def _unregister_py_extension_types(): - # This needs to be done explicitly before the Python interpreter is - # finalized. If the C++ type is destroyed later in the process - # teardown stage, it will invoke CPython APIs such as Py_DECREF - # with a destroyed interpreter. - unregister_extension_type("arrow.py_extension_type") - for ext_type in _python_extension_types_registry: - try: - unregister_extension_type(ext_type.extension_name) - except KeyError: - pass - _registry_nanny.release_registry() - - -_register_py_extension_type() -atexit.register(_unregister_py_extension_types) - - # # PyCapsule export utilities #