Skip to content

Commit da92024

Browse files
lukemanleyphofl
authored andcommitted
CI: unpin pyarrow, fix failing test (#51175)
* unpin pyarrow, fix failing test * cleanup * handle NaT/NaN
1 parent d625904 commit da92024

File tree

12 files changed

+36
-11
lines changed

12 files changed

+36
-11
lines changed

.github/actions/setup-conda/action.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ runs:
1818
- name: Set Arrow version in ${{ inputs.environment-file }} to ${{ inputs.pyarrow-version }}
1919
run: |
2020
grep -q ' - pyarrow' ${{ inputs.environment-file }}
21-
sed -i"" -e "s/ - pyarrow<11/ - pyarrow=${{ inputs.pyarrow-version }}/" ${{ inputs.environment-file }}
21+
sed -i"" -e "s/ - pyarrow/ - pyarrow=${{ inputs.pyarrow-version }}/" ${{ inputs.environment-file }}
2222
cat ${{ inputs.environment-file }}
2323
shell: bash
2424
if: ${{ inputs.pyarrow-version }}

ci/deps/actions-310.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ dependencies:
4242
- psycopg2
4343
- pymysql
4444
- pytables
45-
- pyarrow<11
45+
- pyarrow
4646
- pyreadstat
4747
- python-snappy
4848
- pyxlsb

ci/deps/actions-311.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ dependencies:
4242
- psycopg2
4343
- pymysql
4444
# - pytables>=3.8.0 # first version that supports 3.11
45-
- pyarrow<11
45+
- pyarrow
4646
- pyreadstat
4747
- python-snappy
4848
- pyxlsb

ci/deps/actions-38-downstream_compat.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ dependencies:
4040
- openpyxl
4141
- odfpy
4242
- psycopg2
43-
- pyarrow<11
43+
- pyarrow
4444
- pymysql
4545
- pyreadstat
4646
- pytables

ci/deps/actions-38.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ dependencies:
4040
- odfpy
4141
- pandas-gbq
4242
- psycopg2
43-
- pyarrow<11
43+
- pyarrow
4444
- pymysql
4545
- pyreadstat
4646
- pytables

ci/deps/actions-39.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ dependencies:
4141
- pandas-gbq
4242
- psycopg2
4343
- pymysql
44-
- pyarrow<11
44+
- pyarrow
4545
- pyreadstat
4646
- pytables
4747
- python-snappy

ci/deps/circle-38-arm64.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ dependencies:
4040
- odfpy
4141
- pandas-gbq
4242
- psycopg2
43-
- pyarrow<11
43+
- pyarrow
4444
- pymysql
4545
# Not provided on ARM
4646
#- pyreadstat

environment.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ dependencies:
4343
- odfpy
4444
- py
4545
- psycopg2
46-
- pyarrow<11
46+
- pyarrow
4747
- pymysql
4848
- pyreadstat
4949
- pytables

pandas/core/arrays/arrow/array.py

+12
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,18 @@ def _from_sequence_of_strings(
275275
from pandas.core.tools.timedeltas import to_timedelta
276276

277277
scalars = to_timedelta(strings, errors="raise")
278+
if pa_type.unit != "ns":
279+
# GH51175: test_from_sequence_of_strings_pa_array
280+
# attempt to parse as int64 reflecting pyarrow's
281+
# duration to string casting behavior
282+
mask = isna(scalars)
283+
if not isinstance(strings, (pa.Array, pa.ChunkedArray)):
284+
strings = pa.array(strings, type=pa.string(), from_pandas=True)
285+
strings = pc.if_else(mask, None, strings)
286+
try:
287+
scalars = strings.cast(pa.int64())
288+
except pa.ArrowInvalid:
289+
pass
278290
elif pa.types.is_time(pa_type):
279291
from pandas.core.tools.times import to_time
280292

pandas/core/tools/timedeltas.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,9 @@ def _convert_listlike(
240240
# returning arg (errors == "ignore"), and where the input is a
241241
# generator, we return a useful list-like instead of a
242242
# used-up generator
243-
arg = np.array(list(arg), dtype=object)
243+
if not hasattr(arg, "__array__"):
244+
arg = list(arg)
245+
arg = np.array(arg, dtype=object)
244246

245247
try:
246248
td64arr = sequence_to_td64ns(arg, unit=unit, errors=errors, copy=False)[0]

pandas/tests/extension/test_arrow.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
pa_version_under7p0,
3333
pa_version_under8p0,
3434
pa_version_under9p0,
35+
pa_version_under11p0,
3536
)
3637
from pandas.errors import PerformanceWarning
3738

@@ -287,7 +288,7 @@ def test_from_sequence_of_strings_pa_array(self, data, request):
287288
reason="Nanosecond time parsing not supported.",
288289
)
289290
)
290-
elif pa.types.is_duration(pa_dtype):
291+
elif pa_version_under11p0 and pa.types.is_duration(pa_dtype):
291292
request.node.add_marker(
292293
pytest.mark.xfail(
293294
raises=pa.ArrowNotImplementedError,
@@ -1594,6 +1595,16 @@ def test_searchsorted_with_na_raises(data_for_sorting, as_series):
15941595
arr.searchsorted(b)
15951596

15961597

1598+
@pytest.mark.parametrize("unit", ["ns", "us", "ms", "s"])
1599+
def test_duration_from_strings_with_nat(unit):
1600+
# GH51175
1601+
strings = ["1000", "NaT"]
1602+
pa_type = pa.duration(unit)
1603+
result = ArrowExtensionArray._from_sequence_of_strings(strings, dtype=pa_type)
1604+
expected = ArrowExtensionArray(pa.array([1000, None], type=pa_type))
1605+
tm.assert_extension_array_equal(result, expected)
1606+
1607+
15971608
def test_unsupported_dt(data):
15981609
pa_dtype = data.dtype.pyarrow_dtype
15991610
if not pa.types.is_temporal(pa_dtype):

requirements-dev.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ openpyxl
3232
odfpy
3333
py
3434
psycopg2-binary
35-
pyarrow<11
35+
pyarrow
3636
pymysql
3737
pyreadstat
3838
tables

0 commit comments

Comments
 (0)