Skip to content

CI: unpin pyarrow, fix failing test #51175

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Feb 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/actions/setup-conda/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ runs:
- name: Set Arrow version in ${{ inputs.environment-file }} to ${{ inputs.pyarrow-version }}
run: |
grep -q ' - pyarrow' ${{ inputs.environment-file }}
sed -i"" -e "s/ - pyarrow<11/ - pyarrow=${{ inputs.pyarrow-version }}/" ${{ inputs.environment-file }}
sed -i"" -e "s/ - pyarrow/ - pyarrow=${{ inputs.pyarrow-version }}/" ${{ inputs.environment-file }}
cat ${{ inputs.environment-file }}
shell: bash
if: ${{ inputs.pyarrow-version }}
Expand Down
2 changes: 1 addition & 1 deletion ci/deps/actions-310.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ dependencies:
- psycopg2
- pymysql
- pytables
- pyarrow<11
- pyarrow
- pyreadstat
- python-snappy
- pyxlsb
Expand Down
2 changes: 1 addition & 1 deletion ci/deps/actions-311.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ dependencies:
- psycopg2
- pymysql
# - pytables>=3.8.0 # first version that supports 3.11
- pyarrow<11
- pyarrow
- pyreadstat
- python-snappy
- pyxlsb
Expand Down
2 changes: 1 addition & 1 deletion ci/deps/actions-38-downstream_compat.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ dependencies:
- openpyxl
- odfpy
- psycopg2
- pyarrow<11
- pyarrow
- pymysql
- pyreadstat
- pytables
Expand Down
2 changes: 1 addition & 1 deletion ci/deps/actions-38.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ dependencies:
- odfpy
- pandas-gbq
- psycopg2
- pyarrow<11
- pyarrow
- pymysql
- pyreadstat
- pytables
Expand Down
2 changes: 1 addition & 1 deletion ci/deps/actions-39.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ dependencies:
- pandas-gbq
- psycopg2
- pymysql
- pyarrow<11
- pyarrow
- pyreadstat
- pytables
- python-snappy
Expand Down
2 changes: 1 addition & 1 deletion ci/deps/circle-38-arm64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ dependencies:
- odfpy
- pandas-gbq
- psycopg2
- pyarrow<11
- pyarrow
- pymysql
# Not provided on ARM
#- pyreadstat
Expand Down
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ dependencies:
- odfpy
- py
- psycopg2
- pyarrow<11
- pyarrow
- pymysql
- pyreadstat
- pytables
Expand Down
12 changes: 12 additions & 0 deletions pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,18 @@ def _from_sequence_of_strings(
from pandas.core.tools.timedeltas import to_timedelta

scalars = to_timedelta(strings, errors="raise")
if pa_type.unit != "ns":
# GH51175: test_from_sequence_of_strings_pa_array
# attempt to parse as int64 reflecting pyarrow's
# duration to string casting behavior
mask = isna(scalars)
if not isinstance(strings, (pa.Array, pa.ChunkedArray)):
strings = pa.array(strings, type=pa.string(), from_pandas=True)
strings = pc.if_else(mask, None, strings)
try:
scalars = strings.cast(pa.int64())
except pa.ArrowInvalid:
pass
elif pa.types.is_time(pa_type):
from pandas.core.tools.times import to_time

Expand Down
4 changes: 3 additions & 1 deletion pandas/core/tools/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,9 @@ def _convert_listlike(
# returning arg (errors == "ignore"), and where the input is a
# generator, we return a useful list-like instead of a
# used-up generator
arg = np.array(list(arg), dtype=object)
if not hasattr(arg, "__array__"):
arg = list(arg)
arg = np.array(arg, dtype=object)

try:
td64arr = sequence_to_td64ns(arg, unit=unit, errors=errors, copy=False)[0]
Expand Down
13 changes: 12 additions & 1 deletion pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
pa_version_under7p0,
pa_version_under8p0,
pa_version_under9p0,
pa_version_under11p0,
)
from pandas.errors import PerformanceWarning

Expand Down Expand Up @@ -287,7 +288,7 @@ def test_from_sequence_of_strings_pa_array(self, data, request):
reason="Nanosecond time parsing not supported.",
)
)
elif pa.types.is_duration(pa_dtype):
elif pa_version_under11p0 and pa.types.is_duration(pa_dtype):
request.node.add_marker(
pytest.mark.xfail(
raises=pa.ArrowNotImplementedError,
Expand Down Expand Up @@ -1594,6 +1595,16 @@ def test_searchsorted_with_na_raises(data_for_sorting, as_series):
arr.searchsorted(b)


@pytest.mark.parametrize("unit", ["ns", "us", "ms", "s"])
def test_duration_from_strings_with_nat(unit):
# GH51175
strings = ["1000", "NaT"]
pa_type = pa.duration(unit)
result = ArrowExtensionArray._from_sequence_of_strings(strings, dtype=pa_type)
expected = ArrowExtensionArray(pa.array([1000, None], type=pa_type))
tm.assert_extension_array_equal(result, expected)


def test_unsupported_dt(data):
pa_dtype = data.dtype.pyarrow_dtype
if not pa.types.is_temporal(pa_dtype):
Expand Down
2 changes: 1 addition & 1 deletion requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ openpyxl
odfpy
py
psycopg2-binary
pyarrow<11
pyarrow
pymysql
pyreadstat
tables
Expand Down