|
5 | 5 |
|
6 | 6 | import pyarrow
|
7 | 7 |
|
| 8 | +from pandas.compat import pa_version_under14p1 |
| 9 | + |
8 | 10 | from pandas.core.dtypes.dtypes import (
|
9 | 11 | IntervalDtype,
|
10 | 12 | PeriodDtype,
|
@@ -112,3 +114,61 @@ def to_pandas_dtype(self):
|
112 | 114 | # register the type with a dummy instance
|
113 | 115 | _interval_type = ArrowIntervalType(pyarrow.int64(), "left")
|
114 | 116 | pyarrow.register_extension_type(_interval_type)
|
| 117 | + |
| 118 | + |
| 119 | +_ERROR_MSG = """\ |
| 120 | +Disallowed deserialization of 'arrow.py_extension_type': |
| 121 | +storage_type = {storage_type} |
| 122 | +serialized = {serialized} |
| 123 | +pickle disassembly:\n{pickle_disassembly} |
| 124 | +
|
| 125 | +Reading of untrusted Parquet or Feather files with a PyExtensionType column |
| 126 | +allows arbitrary code execution. |
| 127 | +If you trust this file, you can enable reading the extension type by one of: |
| 128 | +
|
| 129 | +- upgrading to pyarrow >= 14.0.1, and call `pa.PyExtensionType.set_auto_load(True)` |
| 130 | +- install pyarrow-hotfix (`pip install pyarrow-hotfix`) and disable it by running |
| 131 | + `import pyarrow_hotfix; pyarrow_hotfix.uninstall()` |
| 132 | +
|
| 133 | +We strongly recommend updating your Parquet/Feather files to use extension types |
| 134 | +derived from `pyarrow.ExtensionType` instead, and register this type explicitly. |
| 135 | +""" |
| 136 | + |
| 137 | + |
| 138 | +def patch_pyarrow(): |
| 139 | + # starting from pyarrow 14.0.1, it has its own mechanism |
| 140 | + if not pa_version_under14p1: |
| 141 | + return |
| 142 | + |
| 143 | + # if https://github.com/pitrou/pyarrow-hotfix was installed and enabled |
| 144 | + if getattr(pyarrow, "_hotfix_installed", False): |
| 145 | + return |
| 146 | + |
| 147 | + class ForbiddenExtensionType(pyarrow.ExtensionType): |
| 148 | + def __arrow_ext_serialize__(self): |
| 149 | + return b"" |
| 150 | + |
| 151 | + @classmethod |
| 152 | + def __arrow_ext_deserialize__(cls, storage_type, serialized): |
| 153 | + import io |
| 154 | + import pickletools |
| 155 | + |
| 156 | + out = io.StringIO() |
| 157 | + pickletools.dis(serialized, out) |
| 158 | + raise RuntimeError( |
| 159 | + _ERROR_MSG.format( |
| 160 | + storage_type=storage_type, |
| 161 | + serialized=serialized, |
| 162 | + pickle_disassembly=out.getvalue(), |
| 163 | + ) |
| 164 | + ) |
| 165 | + |
| 166 | + pyarrow.unregister_extension_type("arrow.py_extension_type") |
| 167 | + pyarrow.register_extension_type( |
| 168 | + ForbiddenExtensionType(pyarrow.null(), "arrow.py_extension_type") |
| 169 | + ) |
| 170 | + |
| 171 | + pyarrow._hotfix_installed = True |
| 172 | + |
| 173 | + |
| 174 | +patch_pyarrow() |
0 commit comments