Skip to content

Commit 92a1ede

Browse files
lazy import for now
1 parent 4156718 commit 92a1ede

File tree

7 files changed

+104
-115
lines changed

7 files changed

+104
-115
lines changed

pandas/core/arrays/_arrow_utils.py

Lines changed: 90 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,12 @@
11
from distutils.version import LooseVersion
2+
import json
23

34
import numpy as np
5+
import pyarrow
46

5-
try:
6-
import pyarrow
7+
from pandas.core.arrays.interval import _VALID_CLOSED
78

8-
_PYARROW_INSTALLED = True
9-
except ImportError:
10-
_PYARROW_INSTALLED = False
11-
pyarrow = None
12-
13-
14-
if _PYARROW_INSTALLED:
15-
_pyarrow_version_ge_015 = LooseVersion(pyarrow.__version__) >= LooseVersion("0.15")
16-
else:
17-
_pyarrow_version_ge_015 = False
9+
_pyarrow_version_ge_015 = LooseVersion(pyarrow.__version__) >= LooseVersion("0.15")
1810

1911

2012
def pyarrow_array_to_numpy_and_mask(arr, dtype):
@@ -44,3 +36,89 @@ def pyarrow_array_to_numpy_and_mask(arr, dtype):
4436
else:
4537
mask = np.ones(len(arr), dtype=bool)
4638
return data, mask
39+
40+
41+
if _pyarrow_version_ge_015:
42+
# the pyarrow extension types are only available for pyarrow 0.15+
43+
44+
class ArrowPeriodType(pyarrow.ExtensionType):
45+
def __init__(self, freq):
46+
# attributes need to be set first before calling
47+
# super init (as that calls serialize)
48+
self._freq = freq
49+
pyarrow.ExtensionType.__init__(self, pyarrow.int64(), "pandas.period")
50+
51+
@property
52+
def freq(self):
53+
return self._freq
54+
55+
def __arrow_ext_serialize__(self):
56+
metadata = {"freq": self.freq}
57+
return json.dumps(metadata).encode()
58+
59+
@classmethod
60+
def __arrow_ext_deserialize__(cls, storage_type, serialized):
61+
metadata = json.loads(serialized.decode())
62+
return ArrowPeriodType(metadata["freq"])
63+
64+
def __eq__(self, other):
65+
if isinstance(other, pyarrow.BaseExtensionType):
66+
return type(self) == type(other) and self.freq == other.freq
67+
else:
68+
return NotImplemented
69+
70+
def __hash__(self):
71+
return hash((str(self), self.freq))
72+
73+
# register the type with a dummy instance
74+
_period_type = ArrowPeriodType("D")
75+
pyarrow.register_extension_type(_period_type)
76+
77+
class ArrowIntervalType(pyarrow.ExtensionType):
78+
def __init__(self, subtype, closed):
79+
# attributes need to be set first before calling
80+
# super init (as that calls serialize)
81+
assert closed in _VALID_CLOSED
82+
self._closed = closed
83+
if not isinstance(subtype, pyarrow.DataType):
84+
subtype = pyarrow.type_for_alias(str(subtype))
85+
self._subtype = subtype
86+
87+
storage_type = pyarrow.struct([("left", subtype), ("right", subtype)])
88+
pyarrow.ExtensionType.__init__(self, storage_type, "pandas.interval")
89+
90+
@property
91+
def subtype(self):
92+
return self._subtype
93+
94+
@property
95+
def closed(self):
96+
return self._closed
97+
98+
def __arrow_ext_serialize__(self):
99+
metadata = {"subtype": str(self.subtype), "closed": self.closed}
100+
return json.dumps(metadata).encode()
101+
102+
@classmethod
103+
def __arrow_ext_deserialize__(cls, storage_type, serialized):
104+
metadata = json.loads(serialized.decode())
105+
subtype = pyarrow.type_for_alias(metadata["subtype"])
106+
closed = metadata["closed"]
107+
return ArrowIntervalType(subtype, closed)
108+
109+
def __eq__(self, other):
110+
if isinstance(other, pyarrow.BaseExtensionType):
111+
return (
112+
type(self) == type(other)
113+
and self.subtype == other.subtype
114+
and self.closed == other.closed
115+
)
116+
else:
117+
return NotImplemented
118+
119+
def __hash__(self):
120+
return hash((str(self), str(self.subtype), self.closed))
121+
122+
# register the type with a dummy instance
123+
_interval_type = ArrowIntervalType(pyarrow.int64(), "left")
124+
pyarrow.register_extension_type(_interval_type)

pandas/core/arrays/integer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@
2828
from pandas.core import nanops, ops
2929
from pandas.core.algorithms import take
3030
from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin
31-
from pandas.core.arrays._arrow_utils import pyarrow_array_to_numpy_and_mask
3231
import pandas.core.common as com
3332
from pandas.core.indexers import check_bool_array_indexer
3433
from pandas.core.ops import invalid_comparison
@@ -95,6 +94,7 @@ def construct_array_type(cls):
9594
def __from_arrow__(self, array):
9695
"""Construct IntegerArray from passed pyarrow Array/ChunkedArray"""
9796
import pyarrow
97+
from pandas.core.arrays._arrow_utils import pyarrow_array_to_numpy_and_mask
9898

9999
if isinstance(array, pyarrow.Array):
100100
chunks = [array]

pandas/core/arrays/interval.py

Lines changed: 1 addition & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import json
21
from operator import le, lt
32
import textwrap
43

@@ -37,7 +36,6 @@
3736
from pandas.core.dtypes.missing import isna, notna
3837

3938
from pandas.core.algorithms import take, value_counts
40-
from pandas.core.arrays._arrow_utils import _PYARROW_INSTALLED, _pyarrow_version_ge_015
4139
from pandas.core.arrays.base import ExtensionArray, _extension_array_shared_docs
4240
from pandas.core.arrays.categorical import Categorical
4341
import pandas.core.common as com
@@ -1088,6 +1086,7 @@ def __arrow_array__(self, type=None):
10881086
Convert myself into a pyarrow Array.
10891087
"""
10901088
import pyarrow
1089+
from pandas.core.arrays._arrow_utils import ArrowIntervalType
10911090

10921091
try:
10931092
subtype = pyarrow.from_numpy_dtype(self.dtype.subtype)
@@ -1326,56 +1325,3 @@ def maybe_convert_platform_interval(values):
13261325
values = np.asarray(values)
13271326

13281327
return maybe_convert_platform(values)
1329-
1330-
1331-
if _PYARROW_INSTALLED and _pyarrow_version_ge_015:
1332-
import pyarrow
1333-
1334-
class ArrowIntervalType(pyarrow.ExtensionType):
1335-
def __init__(self, subtype, closed):
1336-
# attributes need to be set first before calling
1337-
# super init (as that calls serialize)
1338-
assert closed in _VALID_CLOSED
1339-
self._closed = closed
1340-
if not isinstance(subtype, pyarrow.DataType):
1341-
subtype = pyarrow.type_for_alias(str(subtype))
1342-
self._subtype = subtype
1343-
1344-
storage_type = pyarrow.struct([("left", subtype), ("right", subtype)])
1345-
pyarrow.ExtensionType.__init__(self, storage_type, "pandas.interval")
1346-
1347-
@property
1348-
def subtype(self):
1349-
return self._subtype
1350-
1351-
@property
1352-
def closed(self):
1353-
return self._closed
1354-
1355-
def __arrow_ext_serialize__(self):
1356-
metadata = {"subtype": str(self.subtype), "closed": self.closed}
1357-
return json.dumps(metadata).encode()
1358-
1359-
@classmethod
1360-
def __arrow_ext_deserialize__(cls, storage_type, serialized):
1361-
metadata = json.loads(serialized.decode())
1362-
subtype = pyarrow.type_for_alias(metadata["subtype"])
1363-
closed = metadata["closed"]
1364-
return ArrowIntervalType(subtype, closed)
1365-
1366-
def __eq__(self, other):
1367-
if isinstance(other, pyarrow.BaseExtensionType):
1368-
return (
1369-
type(self) == type(other)
1370-
and self.subtype == other.subtype
1371-
and self.closed == other.closed
1372-
)
1373-
else:
1374-
return NotImplemented
1375-
1376-
def __hash__(self):
1377-
return hash((str(self), str(self.subtype), self.closed))
1378-
1379-
# register the type with a dummy instance
1380-
_interval_type = ArrowIntervalType(pyarrow.int64(), "left")
1381-
pyarrow.register_extension_type(_interval_type)

pandas/core/arrays/period.py

Lines changed: 1 addition & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
from datetime import timedelta
2-
import json
32
import operator
43
from typing import Any, Callable, List, Optional, Sequence, Union
54

@@ -42,7 +41,6 @@
4241

4342
import pandas.core.algorithms as algos
4443
from pandas.core.arrays import datetimelike as dtl
45-
from pandas.core.arrays._arrow_utils import _PYARROW_INSTALLED, _pyarrow_version_ge_015
4644
import pandas.core.common as com
4745

4846
from pandas.tseries import frequencies
@@ -290,6 +288,7 @@ def __arrow_array__(self, type=None):
290288
Convert myself into a pyarrow Array.
291289
"""
292290
import pyarrow
291+
from pandas.core.arrays._arrow_utils import ArrowPeriodType
293292

294293
if type is not None:
295294
if pyarrow.types.is_integer(type):
@@ -1053,40 +1052,3 @@ def _make_field_arrays(*fields):
10531052
]
10541053

10551054
return arrays
1056-
1057-
1058-
if _PYARROW_INSTALLED and _pyarrow_version_ge_015:
1059-
import pyarrow
1060-
1061-
class ArrowPeriodType(pyarrow.ExtensionType):
1062-
def __init__(self, freq):
1063-
# attributes need to be set first before calling
1064-
# super init (as that calls serialize)
1065-
self._freq = freq
1066-
pyarrow.ExtensionType.__init__(self, pyarrow.int64(), "pandas.period")
1067-
1068-
@property
1069-
def freq(self):
1070-
return self._freq
1071-
1072-
def __arrow_ext_serialize__(self):
1073-
metadata = {"freq": self.freq}
1074-
return json.dumps(metadata).encode()
1075-
1076-
@classmethod
1077-
def __arrow_ext_deserialize__(cls, storage_type, serialized):
1078-
metadata = json.loads(serialized.decode())
1079-
return ArrowPeriodType(metadata["freq"])
1080-
1081-
def __eq__(self, other):
1082-
if isinstance(other, pyarrow.BaseExtensionType):
1083-
return type(self) == type(other) and self.freq == other.freq
1084-
else:
1085-
return NotImplemented
1086-
1087-
def __hash__(self):
1088-
return hash((str(self), self.freq))
1089-
1090-
# register the type with a dummy instance
1091-
_period_type = ArrowPeriodType("D")
1092-
pyarrow.register_extension_type(_period_type)

pandas/io/parquet.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,9 @@ def __init__(self):
7676
)
7777
import pyarrow.parquet
7878

79+
# import utils to register the pyarrow extension types
80+
import pandas.core.arrays._arrow_utils # noqa
81+
7982
self.api = pyarrow
8083

8184
def write(

pandas/tests/arrays/interval/test_interval.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ def test_repr():
117117
@pyarrow_skip
118118
def test_arrow_extension_type():
119119
import pyarrow as pa
120-
from pandas.core.arrays.interval import ArrowIntervalType
120+
from pandas.core.arrays._arrow_utils import ArrowIntervalType
121121

122122
p1 = ArrowIntervalType(pa.int64(), "left")
123123
p2 = ArrowIntervalType(pa.int64(), "left")
@@ -133,7 +133,7 @@ def test_arrow_extension_type():
133133
@pyarrow_skip
134134
def test_arrow_array():
135135
import pyarrow as pa
136-
from pandas.core.arrays.interval import ArrowIntervalType
136+
from pandas.core.arrays._arrow_utils import ArrowIntervalType
137137

138138
intervals = pd.interval_range(1, 5, freq=1).array
139139

@@ -162,7 +162,7 @@ def test_arrow_array():
162162
@pyarrow_skip
163163
def test_arrow_array_missing():
164164
import pyarrow as pa
165-
from pandas.core.arrays.interval import ArrowIntervalType
165+
from pandas.core.arrays._arrow_utils import ArrowIntervalType
166166

167167
arr = IntervalArray.from_breaks([0, 1, 2, 3])
168168
arr[1] = None
@@ -196,7 +196,7 @@ def test_arrow_array_missing():
196196
)
197197
def test_arrow_table_roundtrip(breaks):
198198
import pyarrow as pa
199-
from pandas.core.arrays.interval import ArrowIntervalType
199+
from pandas.core.arrays._arrow_utils import ArrowIntervalType
200200

201201
arr = IntervalArray.from_breaks(breaks)
202202
arr[1] = None

pandas/tests/arrays/test_period.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -334,7 +334,7 @@ def test_min_max_empty(self, skipna):
334334

335335
@pyarrow_skip
336336
def test_arrow_extension_type():
337-
from pandas.core.arrays.period import ArrowPeriodType
337+
from pandas.core.arrays._arrow_utils import ArrowPeriodType
338338

339339
p1 = ArrowPeriodType("D")
340340
p2 = ArrowPeriodType("D")
@@ -357,7 +357,7 @@ def test_arrow_extension_type():
357357
)
358358
def test_arrow_array(data, freq):
359359
import pyarrow as pa
360-
from pandas.core.arrays.period import ArrowPeriodType
360+
from pandas.core.arrays._arrow_utils import ArrowPeriodType
361361

362362
periods = period_array(data, freq=freq)
363363
result = pa.array(periods)
@@ -381,7 +381,7 @@ def test_arrow_array(data, freq):
381381
@pyarrow_skip
382382
def test_arrow_array_missing():
383383
import pyarrow as pa
384-
from pandas.core.arrays.period import ArrowPeriodType
384+
from pandas.core.arrays._arrow_utils import ArrowPeriodType
385385

386386
arr = PeriodArray([1, 2, 3], freq="D")
387387
arr[1] = pd.NaT
@@ -396,7 +396,7 @@ def test_arrow_array_missing():
396396
@pyarrow_skip
397397
def test_arrow_table_roundtrip():
398398
import pyarrow as pa
399-
from pandas.core.arrays.period import ArrowPeriodType
399+
from pandas.core.arrays._arrow_utils import ArrowPeriodType
400400

401401
arr = PeriodArray([1, 2, 3], freq="D")
402402
arr[1] = pd.NaT

0 commit comments

Comments
 (0)