Skip to content

Commit b98bdb7

Browse files
Backport PR #48264 on branch 1.5.x (BUG: ArrowExtensionArray._from_* accepts pyarrow arrays) (#48422)
* Backport PR #48264: BUG: ArrowExtensionArray._from_* accepts pyarrow arrays * Add missing import Co-authored-by: Matthew Roeschke <[email protected]>
1 parent e6f5022 commit b98bdb7

File tree

4 files changed

+127
-20
lines changed

4 files changed

+127
-20
lines changed

pandas/core/arrays/arrow/array.py

+13-7
Original file line numberDiff line numberDiff line change
@@ -224,11 +224,13 @@ def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy=False):
224224
Construct a new ExtensionArray from a sequence of scalars.
225225
"""
226226
pa_dtype = to_pyarrow_type(dtype)
227-
if isinstance(scalars, cls):
228-
data = scalars._data
227+
is_cls = isinstance(scalars, cls)
228+
if is_cls or isinstance(scalars, (pa.Array, pa.ChunkedArray)):
229+
if is_cls:
230+
scalars = scalars._data
229231
if pa_dtype:
230-
data = data.cast(pa_dtype)
231-
return cls(data)
232+
scalars = scalars.cast(pa_dtype)
233+
return cls(scalars)
232234
else:
233235
return cls(
234236
pa.chunked_array(pa.array(scalars, type=pa_dtype, from_pandas=True))
@@ -242,7 +244,10 @@ def _from_sequence_of_strings(
242244
Construct a new ExtensionArray from a sequence of strings.
243245
"""
244246
pa_type = to_pyarrow_type(dtype)
245-
if pa.types.is_timestamp(pa_type):
247+
if pa_type is None:
248+
# Let pyarrow try to infer or raise
249+
scalars = strings
250+
elif pa.types.is_timestamp(pa_type):
246251
from pandas.core.tools.datetimes import to_datetime
247252

248253
scalars = to_datetime(strings, errors="raise")
@@ -272,8 +277,9 @@ def _from_sequence_of_strings(
272277

273278
scalars = to_numeric(strings, errors="raise")
274279
else:
275-
# Let pyarrow try to infer or raise
276-
scalars = strings
280+
raise NotImplementedError(
281+
f"Converting strings to {pa_type} is not implemented."
282+
)
277283
return cls._from_sequence(scalars, dtype=pa_type, copy=copy)
278284

279285
def __getitem__(self, item: PositionalIndexer):

pandas/core/tools/times.py

+14-11
Original file line numberDiff line numberDiff line change
@@ -80,17 +80,20 @@ def _convert_listlike(arg, format):
8080
format_found = False
8181
for element in arg:
8282
time_object = None
83-
for time_format in formats:
84-
try:
85-
time_object = datetime.strptime(element, time_format).time()
86-
if not format_found:
87-
# Put the found format in front
88-
fmt = formats.pop(formats.index(time_format))
89-
formats.insert(0, fmt)
90-
format_found = True
91-
break
92-
except (ValueError, TypeError):
93-
continue
83+
try:
84+
time_object = time.fromisoformat(element)
85+
except (ValueError, TypeError):
86+
for time_format in formats:
87+
try:
88+
time_object = datetime.strptime(element, time_format).time()
89+
if not format_found:
90+
# Put the found format in front
91+
fmt = formats.pop(formats.index(time_format))
92+
formats.insert(0, fmt)
93+
format_found = True
94+
break
95+
except (ValueError, TypeError):
96+
continue
9497

9598
if time_object is not None:
9699
times.append(time_object)

pandas/tests/extension/test_arrow.py

+95
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,13 @@
2121
import pytest
2222

2323
from pandas.compat import (
24+
is_ci_environment,
25+
is_platform_windows,
2426
pa_version_under2p0,
2527
pa_version_under3p0,
2628
pa_version_under4p0,
2729
pa_version_under6p0,
30+
pa_version_under7p0,
2831
pa_version_under8p0,
2932
pa_version_under9p0,
3033
)
@@ -35,6 +38,8 @@
3538

3639
pa = pytest.importorskip("pyarrow", minversion="1.0.1")
3740

41+
from pandas.core.arrays.arrow.array import ArrowExtensionArray
42+
3843
from pandas.core.arrays.arrow.dtype import ArrowDtype # isort:skip
3944

4045

@@ -222,6 +227,96 @@ def test_from_dtype(self, data, request):
222227
)
223228
super().test_from_dtype(data)
224229

230+
def test_from_sequence_pa_array(self, data, request):
231+
# https://github.com/pandas-dev/pandas/pull/47034#discussion_r955500784
232+
# data._data = pa.ChunkedArray
233+
if pa_version_under3p0:
234+
request.node.add_marker(
235+
pytest.mark.xfail(
236+
reason="ChunkedArray has no attribute combine_chunks",
237+
)
238+
)
239+
result = type(data)._from_sequence(data._data)
240+
tm.assert_extension_array_equal(result, data)
241+
assert isinstance(result._data, pa.ChunkedArray)
242+
243+
result = type(data)._from_sequence(data._data.combine_chunks())
244+
tm.assert_extension_array_equal(result, data)
245+
assert isinstance(result._data, pa.ChunkedArray)
246+
247+
def test_from_sequence_pa_array_notimplemented(self, request):
248+
if pa_version_under6p0:
249+
request.node.add_marker(
250+
pytest.mark.xfail(
251+
raises=AttributeError,
252+
reason="month_day_nano_interval not implemented by pyarrow.",
253+
)
254+
)
255+
with pytest.raises(NotImplementedError, match="Converting strings to"):
256+
ArrowExtensionArray._from_sequence_of_strings(
257+
["12-1"], dtype=pa.month_day_nano_interval()
258+
)
259+
260+
def test_from_sequence_of_strings_pa_array(self, data, request):
261+
pa_dtype = data.dtype.pyarrow_dtype
262+
if pa_version_under3p0:
263+
request.node.add_marker(
264+
pytest.mark.xfail(
265+
reason="ChunkedArray has no attribute combine_chunks",
266+
)
267+
)
268+
elif pa.types.is_time64(pa_dtype) and pa_dtype.equals("time64[ns]"):
269+
request.node.add_marker(
270+
pytest.mark.xfail(
271+
reason="Nanosecond time parsing not supported.",
272+
)
273+
)
274+
elif pa.types.is_duration(pa_dtype):
275+
request.node.add_marker(
276+
pytest.mark.xfail(
277+
raises=pa.ArrowNotImplementedError,
278+
reason=f"pyarrow doesn't support parsing {pa_dtype}",
279+
)
280+
)
281+
elif pa.types.is_boolean(pa_dtype):
282+
request.node.add_marker(
283+
pytest.mark.xfail(
284+
reason="Iterating over ChunkedArray[bool] returns PyArrow scalars.",
285+
)
286+
)
287+
elif pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is not None:
288+
if pa_version_under7p0:
289+
request.node.add_marker(
290+
pytest.mark.xfail(
291+
raises=pa.ArrowNotImplementedError,
292+
reason=f"pyarrow doesn't support string cast from {pa_dtype}",
293+
)
294+
)
295+
elif is_platform_windows() and is_ci_environment():
296+
request.node.add_marker(
297+
pytest.mark.xfail(
298+
raises=pa.ArrowInvalid,
299+
reason=(
300+
"TODO: Set ARROW_TIMEZONE_DATABASE environment variable "
301+
"on CI to path to the tzdata for pyarrow."
302+
),
303+
)
304+
)
305+
elif pa_version_under6p0 and pa.types.is_temporal(pa_dtype):
306+
request.node.add_marker(
307+
pytest.mark.xfail(
308+
raises=pa.ArrowNotImplementedError,
309+
reason=f"pyarrow doesn't support string cast from {pa_dtype}",
310+
)
311+
)
312+
pa_array = data._data.cast(pa.string())
313+
result = type(data)._from_sequence_of_strings(pa_array, dtype=data.dtype)
314+
tm.assert_extension_array_equal(result, data)
315+
316+
pa_array = pa_array.combine_chunks()
317+
result = type(data)._from_sequence_of_strings(pa_array, dtype=data.dtype)
318+
tm.assert_extension_array_equal(result, data)
319+
225320

226321
@pytest.mark.xfail(
227322
raises=NotImplementedError, reason="pyarrow.ChunkedArray backing is 1D."

pandas/tests/tools/test_to_time.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
import numpy as np
55
import pytest
66

7+
from pandas.compat import PY311
8+
79
from pandas import Series
810
import pandas._testing as tm
911
from pandas.core.tools.datetimes import to_time as to_time_alias
@@ -40,8 +42,9 @@ def test_parsers_time(self, time_string):
4042
def test_odd_format(self):
4143
new_string = "14.15"
4244
msg = r"Cannot convert arg \['14\.15'\] to a time"
43-
with pytest.raises(ValueError, match=msg):
44-
to_time(new_string)
45+
if not PY311:
46+
with pytest.raises(ValueError, match=msg):
47+
to_time(new_string)
4548
assert to_time(new_string, format="%H.%M") == time(14, 15)
4649

4750
def test_arraylike(self):

0 commit comments

Comments
 (0)