Skip to content

Commit 974a450

Browse files
mroeschkemeeseeksmachine
authored andcommitted
Backport PR pandas-dev#48264: BUG: ArrowExtensionArray._from_* accepts pyarrow arrays
1 parent bccc060 commit 974a450

File tree

4 files changed

+126
-20
lines changed

4 files changed

+126
-20
lines changed

pandas/core/arrays/arrow/array.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -224,11 +224,13 @@ def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy=False):
224224
Construct a new ExtensionArray from a sequence of scalars.
225225
"""
226226
pa_dtype = to_pyarrow_type(dtype)
227-
if isinstance(scalars, cls):
228-
data = scalars._data
227+
is_cls = isinstance(scalars, cls)
228+
if is_cls or isinstance(scalars, (pa.Array, pa.ChunkedArray)):
229+
if is_cls:
230+
scalars = scalars._data
229231
if pa_dtype:
230-
data = data.cast(pa_dtype)
231-
return cls(data)
232+
scalars = scalars.cast(pa_dtype)
233+
return cls(scalars)
232234
else:
233235
return cls(
234236
pa.chunked_array(pa.array(scalars, type=pa_dtype, from_pandas=True))
@@ -242,7 +244,10 @@ def _from_sequence_of_strings(
242244
Construct a new ExtensionArray from a sequence of strings.
243245
"""
244246
pa_type = to_pyarrow_type(dtype)
245-
if pa.types.is_timestamp(pa_type):
247+
if pa_type is None:
248+
# Let pyarrow try to infer or raise
249+
scalars = strings
250+
elif pa.types.is_timestamp(pa_type):
246251
from pandas.core.tools.datetimes import to_datetime
247252

248253
scalars = to_datetime(strings, errors="raise")
@@ -272,8 +277,9 @@ def _from_sequence_of_strings(
272277

273278
scalars = to_numeric(strings, errors="raise")
274279
else:
275-
# Let pyarrow try to infer or raise
276-
scalars = strings
280+
raise NotImplementedError(
281+
f"Converting strings to {pa_type} is not implemented."
282+
)
277283
return cls._from_sequence(scalars, dtype=pa_type, copy=copy)
278284

279285
def __getitem__(self, item: PositionalIndexer):

pandas/core/tools/times.py

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -80,17 +80,20 @@ def _convert_listlike(arg, format):
8080
format_found = False
8181
for element in arg:
8282
time_object = None
83-
for time_format in formats:
84-
try:
85-
time_object = datetime.strptime(element, time_format).time()
86-
if not format_found:
87-
# Put the found format in front
88-
fmt = formats.pop(formats.index(time_format))
89-
formats.insert(0, fmt)
90-
format_found = True
91-
break
92-
except (ValueError, TypeError):
93-
continue
83+
try:
84+
time_object = time.fromisoformat(element)
85+
except (ValueError, TypeError):
86+
for time_format in formats:
87+
try:
88+
time_object = datetime.strptime(element, time_format).time()
89+
if not format_found:
90+
# Put the found format in front
91+
fmt = formats.pop(formats.index(time_format))
92+
formats.insert(0, fmt)
93+
format_found = True
94+
break
95+
except (ValueError, TypeError):
96+
continue
9497

9598
if time_object is not None:
9699
times.append(time_object)

pandas/tests/extension/test_arrow.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
import pytest
2222

2323
from pandas.compat import (
24+
is_ci_environment,
25+
is_platform_windows,
2426
pa_version_under2p0,
2527
pa_version_under3p0,
2628
pa_version_under4p0,
@@ -35,6 +37,8 @@
3537

3638
pa = pytest.importorskip("pyarrow", minversion="1.0.1")
3739

40+
from pandas.core.arrays.arrow.array import ArrowExtensionArray
41+
3842
from pandas.core.arrays.arrow.dtype import ArrowDtype # isort:skip
3943

4044

@@ -222,6 +226,96 @@ def test_from_dtype(self, data, request):
222226
)
223227
super().test_from_dtype(data)
224228

229+
def test_from_sequence_pa_array(self, data, request):
230+
# https://github.com/pandas-dev/pandas/pull/47034#discussion_r955500784
231+
# data._data = pa.ChunkedArray
232+
if pa_version_under3p0:
233+
request.node.add_marker(
234+
pytest.mark.xfail(
235+
reason="ChunkedArray has no attribute combine_chunks",
236+
)
237+
)
238+
result = type(data)._from_sequence(data._data)
239+
tm.assert_extension_array_equal(result, data)
240+
assert isinstance(result._data, pa.ChunkedArray)
241+
242+
result = type(data)._from_sequence(data._data.combine_chunks())
243+
tm.assert_extension_array_equal(result, data)
244+
assert isinstance(result._data, pa.ChunkedArray)
245+
246+
def test_from_sequence_pa_array_notimplemented(self, request):
247+
if pa_version_under6p0:
248+
request.node.add_marker(
249+
pytest.mark.xfail(
250+
raises=AttributeError,
251+
reason="month_day_nano_interval not implemented by pyarrow.",
252+
)
253+
)
254+
with pytest.raises(NotImplementedError, match="Converting strings to"):
255+
ArrowExtensionArray._from_sequence_of_strings(
256+
["12-1"], dtype=pa.month_day_nano_interval()
257+
)
258+
259+
def test_from_sequence_of_strings_pa_array(self, data, request):
260+
pa_dtype = data.dtype.pyarrow_dtype
261+
if pa_version_under3p0:
262+
request.node.add_marker(
263+
pytest.mark.xfail(
264+
reason="ChunkedArray has no attribute combine_chunks",
265+
)
266+
)
267+
elif pa.types.is_time64(pa_dtype) and pa_dtype.equals("time64[ns]"):
268+
request.node.add_marker(
269+
pytest.mark.xfail(
270+
reason="Nanosecond time parsing not supported.",
271+
)
272+
)
273+
elif pa.types.is_duration(pa_dtype):
274+
request.node.add_marker(
275+
pytest.mark.xfail(
276+
raises=pa.ArrowNotImplementedError,
277+
reason=f"pyarrow doesn't support parsing {pa_dtype}",
278+
)
279+
)
280+
elif pa.types.is_boolean(pa_dtype):
281+
request.node.add_marker(
282+
pytest.mark.xfail(
283+
reason="Iterating over ChunkedArray[bool] returns PyArrow scalars.",
284+
)
285+
)
286+
elif pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is not None:
287+
if pa_version_under7p0:
288+
request.node.add_marker(
289+
pytest.mark.xfail(
290+
raises=pa.ArrowNotImplementedError,
291+
reason=f"pyarrow doesn't support string cast from {pa_dtype}",
292+
)
293+
)
294+
elif is_platform_windows() and is_ci_environment():
295+
request.node.add_marker(
296+
pytest.mark.xfail(
297+
raises=pa.ArrowInvalid,
298+
reason=(
299+
"TODO: Set ARROW_TIMEZONE_DATABASE environment variable "
300+
"on CI to path to the tzdata for pyarrow."
301+
),
302+
)
303+
)
304+
elif pa_version_under6p0 and pa.types.is_temporal(pa_dtype):
305+
request.node.add_marker(
306+
pytest.mark.xfail(
307+
raises=pa.ArrowNotImplementedError,
308+
reason=f"pyarrow doesn't support string cast from {pa_dtype}",
309+
)
310+
)
311+
pa_array = data._data.cast(pa.string())
312+
result = type(data)._from_sequence_of_strings(pa_array, dtype=data.dtype)
313+
tm.assert_extension_array_equal(result, data)
314+
315+
pa_array = pa_array.combine_chunks()
316+
result = type(data)._from_sequence_of_strings(pa_array, dtype=data.dtype)
317+
tm.assert_extension_array_equal(result, data)
318+
225319

226320
@pytest.mark.xfail(
227321
raises=NotImplementedError, reason="pyarrow.ChunkedArray backing is 1D."

pandas/tests/tools/test_to_time.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
import numpy as np
55
import pytest
66

7+
from pandas.compat import PY311
8+
79
from pandas import Series
810
import pandas._testing as tm
911
from pandas.core.tools.datetimes import to_time as to_time_alias
@@ -40,8 +42,9 @@ def test_parsers_time(self, time_string):
4042
def test_odd_format(self):
4143
new_string = "14.15"
4244
msg = r"Cannot convert arg \['14\.15'\] to a time"
43-
with pytest.raises(ValueError, match=msg):
44-
to_time(new_string)
45+
if not PY311:
46+
with pytest.raises(ValueError, match=msg):
47+
to_time(new_string)
4548
assert to_time(new_string, format="%H.%M") == time(14, 15)
4649

4750
def test_arraylike(self):

0 commit comments

Comments
 (0)