Skip to content

Backport PR #51545 on branch 2.0.x (TST/CLN: Remove unnecessary pyarrow version checking) #51595

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 4 additions & 11 deletions pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -569,14 +569,8 @@ def argsort(
) -> np.ndarray:
order = "ascending" if ascending else "descending"
null_placement = {"last": "at_end", "first": "at_start"}.get(na_position, None)
if null_placement is None or pa_version_under7p0:
# Although pc.array_sort_indices exists in version 6
# there's a bug that affects the pa.ChunkedArray backing
# https://issues.apache.org/jira/browse/ARROW-12042
fallback_performancewarning("7")
return super().argsort(
ascending=ascending, kind=kind, na_position=na_position
)
if null_placement is None:
raise ValueError(f"invalid na_position: {na_position}")

result = pc.array_sort_indices(
self._data, order=order, null_placement=null_placement
Expand Down Expand Up @@ -640,9 +634,8 @@ def fillna(
if limit is not None:
return super().fillna(value=value, method=method, limit=limit)

if method is not None and pa_version_under7p0:
# fill_null_{forward|backward} added in pyarrow 7.0
fallback_performancewarning(version="7")
if method is not None:
fallback_performancewarning()
return super().fillna(value=value, method=method, limit=limit)

if is_array_like(value):
Expand Down
39 changes: 6 additions & 33 deletions pandas/tests/base/test_value_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@
import numpy as np
import pytest

from pandas.compat import pa_version_under7p0
from pandas.errors import PerformanceWarning

import pandas as pd
from pandas import (
DatetimeIndex,
Expand Down Expand Up @@ -48,16 +45,8 @@ def test_value_counts(index_or_series_obj):
# TODO(GH#32514): Order of entries with the same count is inconsistent
# on CI (gh-32449)
if obj.duplicated().any():
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow",
):
result = result.sort_index()
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow",
):
expected = expected.sort_index()
result = result.sort_index()
expected = expected.sort_index()
tm.assert_series_equal(result, expected)


Expand Down Expand Up @@ -97,16 +86,8 @@ def test_value_counts_null(null_obj, index_or_series_obj):
if obj.duplicated().any():
# TODO(GH#32514):
# Order of entries with the same count is inconsistent on CI (gh-32449)
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow",
):
expected = expected.sort_index()
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow",
):
result = result.sort_index()
expected = expected.sort_index()
result = result.sort_index()

if not isinstance(result.dtype, np.dtype):
# i.e IntegerDtype
Expand All @@ -119,16 +100,8 @@ def test_value_counts_null(null_obj, index_or_series_obj):
if obj.duplicated().any():
# TODO(GH#32514):
# Order of entries with the same count is inconsistent on CI (gh-32449)
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow",
):
expected = expected.sort_index()
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow",
):
result = result.sort_index()
expected = expected.sort_index()
result = result.sort_index()
tm.assert_series_equal(result, expected)


Expand Down
111 changes: 14 additions & 97 deletions pandas/tests/extension/test_arrow.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,6 @@

from pandas.core.arrays.arrow.dtype import ArrowDtype # isort:skip

pytestmark = pytest.mark.filterwarnings(
"ignore:.* may decrease performance. Upgrade to pyarrow >=7 to possibly"
)


@pytest.fixture(params=tm.ALL_PYARROW_DTYPES, ids=str)
def dtype(request):
Expand Down Expand Up @@ -311,14 +307,7 @@ def test_from_sequence_of_strings_pa_array(self, data, request):
)
)
elif pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is not None:
if pa_version_under7p0:
request.node.add_marker(
pytest.mark.xfail(
raises=pa.ArrowNotImplementedError,
reason=f"pyarrow doesn't support string cast from {pa_dtype}",
)
)
elif is_platform_windows() and is_ci_environment():
if is_platform_windows() and is_ci_environment():
request.node.add_marker(
pytest.mark.xfail(
raises=pa.ArrowInvalid,
Expand Down Expand Up @@ -569,23 +558,7 @@ def test_groupby_extension_transform(self, data_for_grouping, request):
reason=f"{pa_dtype} only has 2 unique possible values",
)
)
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under7p0 and not pa.types.is_duration(pa_dtype),
check_stacklevel=False,
):
super().test_groupby_extension_transform(data_for_grouping)

def test_groupby_extension_apply(
self, data_for_grouping, groupby_apply_op, request
):
pa_dtype = data_for_grouping.dtype.pyarrow_dtype
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under7p0 and not pa.types.is_duration(pa_dtype),
check_stacklevel=False,
):
super().test_groupby_extension_apply(data_for_grouping, groupby_apply_op)
super().test_groupby_extension_transform(data_for_grouping)

@pytest.mark.parametrize("as_index", [True, False])
def test_groupby_extension_agg(self, as_index, data_for_grouping, request):
Expand All @@ -597,12 +570,7 @@ def test_groupby_extension_agg(self, as_index, data_for_grouping, request):
reason=f"{pa_dtype} only has 2 unique possible values",
)
)
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under7p0 and not pa.types.is_duration(pa_dtype),
check_stacklevel=False,
):
super().test_groupby_extension_agg(as_index, data_for_grouping)
super().test_groupby_extension_agg(as_index, data_for_grouping)

def test_in_numeric_groupby(self, data_for_grouping):
if is_string_dtype(data_for_grouping.dtype):
Expand Down Expand Up @@ -720,14 +688,20 @@ def test_view(self, data):

class TestBaseMissing(base.BaseMissingTests):
def test_fillna_no_op_returns_copy(self, data):
with tm.maybe_produces_warning(
PerformanceWarning, pa_version_under7p0, check_stacklevel=False
):
super().test_fillna_no_op_returns_copy(data)
data = data[~data.isna()]

valid = data[0]
result = data.fillna(valid)
assert result is not data
self.assert_extension_array_equal(result, data)
with tm.assert_produces_warning(PerformanceWarning):
result = data.fillna(method="backfill")
assert result is not data
self.assert_extension_array_equal(result, data)

def test_fillna_series_method(self, data_missing, fillna_method):
with tm.maybe_produces_warning(
PerformanceWarning, pa_version_under7p0, check_stacklevel=False
PerformanceWarning, fillna_method is not None, check_stacklevel=False
):
super().test_fillna_series_method(data_missing, fillna_method)

Expand Down Expand Up @@ -805,12 +779,6 @@ def test_invert(self, data, request):


class TestBaseMethods(base.BaseMethodsTests):
def test_argsort_missing_array(self, data_missing_for_sorting):
with tm.maybe_produces_warning(
PerformanceWarning, pa_version_under7p0, check_stacklevel=False
):
super().test_argsort_missing_array(data_missing_for_sorting)

@pytest.mark.parametrize("periods", [1, -2])
def test_diff(self, data, periods, request):
pa_dtype = data.dtype.pyarrow_dtype
Expand All @@ -825,20 +793,10 @@ def test_diff(self, data, periods, request):
)
super().test_diff(data, periods)

@pytest.mark.filterwarnings("ignore:Falling back:pandas.errors.PerformanceWarning")
@pytest.mark.parametrize("dropna", [True, False])
def test_value_counts(self, all_data, dropna, request):
super().test_value_counts(all_data, dropna)

def test_value_counts_with_normalize(self, data, request):
pa_dtype = data.dtype.pyarrow_dtype
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under7p0 and not pa.types.is_duration(pa_dtype),
check_stacklevel=False,
):
super().test_value_counts_with_normalize(data)

def test_argmin_argmax(
self, data_for_sorting, data_missing_for_sorting, na_value, request
):
Expand Down Expand Up @@ -886,47 +844,6 @@ def test_argreduce_series(
data_missing_for_sorting, op_name, skipna, expected
)

@pytest.mark.parametrize(
"na_position, expected",
[
("last", np.array([2, 0, 1], dtype=np.dtype("intp"))),
("first", np.array([1, 2, 0], dtype=np.dtype("intp"))),
],
)
def test_nargsort(self, data_missing_for_sorting, na_position, expected):
with tm.maybe_produces_warning(
PerformanceWarning, pa_version_under7p0, check_stacklevel=False
):
super().test_nargsort(data_missing_for_sorting, na_position, expected)

@pytest.mark.parametrize("ascending", [True, False])
def test_sort_values(self, data_for_sorting, ascending, sort_by_key, request):
with tm.maybe_produces_warning(
PerformanceWarning, pa_version_under7p0, check_stacklevel=False
):
super().test_sort_values(data_for_sorting, ascending, sort_by_key)

@pytest.mark.parametrize("ascending", [True, False])
def test_sort_values_missing(
self, data_missing_for_sorting, ascending, sort_by_key
):
with tm.maybe_produces_warning(
PerformanceWarning, pa_version_under7p0, check_stacklevel=False
):
super().test_sort_values_missing(
data_missing_for_sorting, ascending, sort_by_key
)

@pytest.mark.parametrize("ascending", [True, False])
def test_sort_values_frame(self, data_for_sorting, ascending, request):
pa_dtype = data_for_sorting.dtype.pyarrow_dtype
with tm.maybe_produces_warning(
PerformanceWarning,
pa_version_under7p0 and not pa.types.is_duration(pa_dtype),
check_stacklevel=False,
):
super().test_sort_values_frame(data_for_sorting, ascending)

def test_factorize(self, data_for_grouping, request):
pa_dtype = data_for_grouping.dtype.pyarrow_dtype
if pa.types.is_boolean(pa_dtype):
Expand Down
Loading