diff --git a/.github/actions/setup-conda/action.yml b/.github/actions/setup-conda/action.yml index f4b1d9e49f63a..002d0020c2df1 100644 --- a/.github/actions/setup-conda/action.yml +++ b/.github/actions/setup-conda/action.yml @@ -18,7 +18,7 @@ runs: - name: Set Arrow version in ${{ inputs.environment-file }} to ${{ inputs.pyarrow-version }} run: | grep -q ' - pyarrow' ${{ inputs.environment-file }} - sed -i"" -e "s/ - pyarrow<11/ - pyarrow=${{ inputs.pyarrow-version }}/" ${{ inputs.environment-file }} + sed -i"" -e "s/ - pyarrow/ - pyarrow=${{ inputs.pyarrow-version }}/" ${{ inputs.environment-file }} cat ${{ inputs.environment-file }} shell: bash if: ${{ inputs.pyarrow-version }} diff --git a/ci/deps/actions-310.yaml b/ci/deps/actions-310.yaml index 24676856f9fad..28ff56d8619b9 100644 --- a/ci/deps/actions-310.yaml +++ b/ci/deps/actions-310.yaml @@ -42,7 +42,7 @@ dependencies: - psycopg2 - pymysql - pytables - - pyarrow<11 + - pyarrow - pyreadstat - python-snappy - pyxlsb diff --git a/ci/deps/actions-311.yaml b/ci/deps/actions-311.yaml index dcc5932826716..32e3fe740b431 100644 --- a/ci/deps/actions-311.yaml +++ b/ci/deps/actions-311.yaml @@ -42,7 +42,7 @@ dependencies: - psycopg2 - pymysql # - pytables>=3.8.0 # first version that supports 3.11 - - pyarrow<11 + - pyarrow - pyreadstat - python-snappy - pyxlsb diff --git a/ci/deps/actions-38-downstream_compat.yaml b/ci/deps/actions-38-downstream_compat.yaml index 8f6fe60403b18..a2f22de43fb23 100644 --- a/ci/deps/actions-38-downstream_compat.yaml +++ b/ci/deps/actions-38-downstream_compat.yaml @@ -40,7 +40,7 @@ dependencies: - openpyxl - odfpy - psycopg2 - - pyarrow<11 + - pyarrow - pymysql - pyreadstat - pytables diff --git a/ci/deps/actions-38.yaml b/ci/deps/actions-38.yaml index ea9e3fea365a0..e17941f93ecf1 100644 --- a/ci/deps/actions-38.yaml +++ b/ci/deps/actions-38.yaml @@ -40,7 +40,7 @@ dependencies: - odfpy - pandas-gbq - psycopg2 - - pyarrow<11 + - pyarrow - pymysql - pyreadstat - pytables diff --git a/ci/deps/actions-39.yaml b/ci/deps/actions-39.yaml index 80cf1c1e539b7..ed8dc6f760254 100644 --- a/ci/deps/actions-39.yaml +++ b/ci/deps/actions-39.yaml @@ -41,7 +41,7 @@ dependencies: - pandas-gbq - psycopg2 - pymysql - - pyarrow<11 + - pyarrow - pyreadstat - pytables - python-snappy diff --git a/ci/deps/circle-38-arm64.yaml b/ci/deps/circle-38-arm64.yaml index e8fc1a1459943..4d406460eab70 100644 --- a/ci/deps/circle-38-arm64.yaml +++ b/ci/deps/circle-38-arm64.yaml @@ -40,7 +40,7 @@ dependencies: - odfpy - pandas-gbq - psycopg2 - - pyarrow<11 + - pyarrow - pymysql # Not provided on ARM #- pyreadstat diff --git a/environment.yml b/environment.yml index 9169cbf08b45d..f0678abbfe211 100644 --- a/environment.yml +++ b/environment.yml @@ -43,7 +43,7 @@ dependencies: - odfpy - py - psycopg2 - - pyarrow<11 + - pyarrow - pymysql - pyreadstat - pytables diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 075beca106e6a..3a3f0b8ce61be 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -275,6 +275,18 @@ def _from_sequence_of_strings( from pandas.core.tools.timedeltas import to_timedelta scalars = to_timedelta(strings, errors="raise") + if pa_type.unit != "ns": + # GH51175: test_from_sequence_of_strings_pa_array + # attempt to parse as int64 reflecting pyarrow's + # duration to string casting behavior + mask = isna(scalars) + if not isinstance(strings, (pa.Array, pa.ChunkedArray)): + strings = pa.array(strings, type=pa.string(), from_pandas=True) + strings = pc.if_else(mask, None, strings) + try: + scalars = strings.cast(pa.int64()) + except pa.ArrowInvalid: + pass elif pa.types.is_time(pa_type): from pandas.core.tools.times import to_time diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index b968004846e8e..42cf92c6b2a35 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -240,7 +240,9 @@ def _convert_listlike( # returning arg (errors == "ignore"), and where the input is a # generator, we return a useful list-like instead of a # used-up generator - arg = np.array(list(arg), dtype=object) + if not hasattr(arg, "__array__"): + arg = list(arg) + arg = np.array(arg, dtype=object) try: td64arr = sequence_to_td64ns(arg, unit=unit, errors=errors, copy=False)[0] diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 1dac8faa3a9e2..681d048f38485 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -32,6 +32,7 @@ pa_version_under7p0, pa_version_under8p0, pa_version_under9p0, + pa_version_under11p0, ) from pandas.errors import PerformanceWarning @@ -287,7 +288,7 @@ def test_from_sequence_of_strings_pa_array(self, data, request): reason="Nanosecond time parsing not supported.", ) ) - elif pa.types.is_duration(pa_dtype): + elif pa_version_under11p0 and pa.types.is_duration(pa_dtype): request.node.add_marker( pytest.mark.xfail( raises=pa.ArrowNotImplementedError, @@ -1594,6 +1595,16 @@ def test_searchsorted_with_na_raises(data_for_sorting, as_series): arr.searchsorted(b) +@pytest.mark.parametrize("unit", ["ns", "us", "ms", "s"]) +def test_duration_from_strings_with_nat(unit): + # GH51175 + strings = ["1000", "NaT"] + pa_type = pa.duration(unit) + result = ArrowExtensionArray._from_sequence_of_strings(strings, dtype=pa_type) + expected = ArrowExtensionArray(pa.array([1000, None], type=pa_type)) + tm.assert_extension_array_equal(result, expected) + + def test_unsupported_dt(data): pa_dtype = data.dtype.pyarrow_dtype if not pa.types.is_temporal(pa_dtype): diff --git a/requirements-dev.txt b/requirements-dev.txt index b6992a7266600..6d9bd1c93ded0 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -32,7 +32,7 @@ openpyxl odfpy py psycopg2-binary -pyarrow<11 +pyarrow pymysql pyreadstat tables