From 7878963cd22f2d7d2be5518dd726fba85afdf4b3 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 21 Feb 2023 15:47:07 -0800 Subject: [PATCH 1/5] cleanups --- pandas/core/arrays/arrow/array.py | 12 +- pandas/tests/base/test_value_counts.py | 39 +------ pandas/tests/extension/test_arrow.py | 108 +----------------- pandas/tests/extension/test_string.py | 87 +------------- .../tests/frame/methods/test_combine_first.py | 25 +--- pandas/tests/indexes/test_base.py | 26 +---- pandas/tests/indexes/test_common.py | 26 +---- pandas/tests/indexes/test_setops.py | 59 ++-------- pandas/tests/io/parser/test_parse_dates.py | 8 +- pandas/tests/test_algos.py | 16 +-- 10 files changed, 37 insertions(+), 369 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index eeb252b10b1ea..ee36c10ff9999 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -569,11 +569,8 @@ def argsort( ) -> np.ndarray: order = "ascending" if ascending else "descending" null_placement = {"last": "at_end", "first": "at_start"}.get(na_position, None) - if null_placement is None or pa_version_under7p0: - # Although pc.array_sort_indices exists in version 6 - # there's a bug that affects the pa.ChunkedArray backing - # https://issues.apache.org/jira/browse/ARROW-12042 - fallback_performancewarning("7") + if null_placement is None: + fallback_performancewarning() return super().argsort( ascending=ascending, kind=kind, na_position=na_position ) @@ -640,9 +637,8 @@ def fillna( if limit is not None: return super().fillna(value=value, method=method, limit=limit) - if method is not None and pa_version_under7p0: - # fill_null_{forward|backward} added in pyarrow 7.0 - fallback_performancewarning(version="7") + if method is not None: + fallback_performancewarning() return super().fillna(value=value, method=method, limit=limit) if is_array_like(value): diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py index 4f5e8adbcdf93..97217430007eb 100644 --- a/pandas/tests/base/test_value_counts.py +++ b/pandas/tests/base/test_value_counts.py @@ -4,9 +4,6 @@ import numpy as np import pytest -from pandas.compat import pa_version_under7p0 -from pandas.errors import PerformanceWarning - import pandas as pd from pandas import ( DatetimeIndex, @@ -48,16 +45,8 @@ def test_value_counts(index_or_series_obj): # TODO(GH#32514): Order of entries with the same count is inconsistent # on CI (gh-32449) if obj.duplicated().any(): - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow", - ): - result = result.sort_index() - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow", - ): - expected = expected.sort_index() + result = result.sort_index() + expected = expected.sort_index() tm.assert_series_equal(result, expected) @@ -97,16 +86,8 @@ def test_value_counts_null(null_obj, index_or_series_obj): if obj.duplicated().any(): # TODO(GH#32514): # Order of entries with the same count is inconsistent on CI (gh-32449) - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow", - ): - expected = expected.sort_index() - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow", - ): - result = result.sort_index() + expected = expected.sort_index() + result = result.sort_index() if not isinstance(result.dtype, np.dtype): # i.e IntegerDtype @@ -119,16 +100,8 @@ def test_value_counts_null(null_obj, index_or_series_obj): if obj.duplicated().any(): # TODO(GH#32514): # Order of entries with the same count is inconsistent on CI (gh-32449) - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow", - ): - expected = expected.sort_index() - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow", - ): - result = result.sort_index() + expected = expected.sort_index() + result = result.sort_index() tm.assert_series_equal(result, expected) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 705e9d55c06e7..d778c8b1c1277 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -35,7 +35,6 @@ pa_version_under9p0, pa_version_under11p0, ) -from pandas.errors import PerformanceWarning from pandas.core.dtypes.common import is_any_int_dtype @@ -58,10 +57,6 @@ from pandas.core.arrays.arrow.dtype import ArrowDtype # isort:skip -pytestmark = pytest.mark.filterwarnings( - "ignore:.* may decrease performance. Upgrade to pyarrow >=7 to possibly" -) - @pytest.fixture(params=tm.ALL_PYARROW_DTYPES, ids=str) def dtype(request): @@ -293,14 +288,7 @@ def test_from_sequence_of_strings_pa_array(self, data, request): ) ) elif pa.types.is_timestamp(pa_dtype) and pa_dtype.tz is not None: - if pa_version_under7p0: - request.node.add_marker( - pytest.mark.xfail( - raises=pa.ArrowNotImplementedError, - reason=f"pyarrow doesn't support string cast from {pa_dtype}", - ) - ) - elif is_platform_windows() and is_ci_environment(): + if is_platform_windows() and is_ci_environment(): request.node.add_marker( pytest.mark.xfail( raises=pa.ArrowInvalid, @@ -535,23 +523,7 @@ def test_groupby_extension_transform(self, data_for_grouping, request): reason=f"{pa_dtype} only has 2 unique possible values", ) ) - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under7p0 and not pa.types.is_duration(pa_dtype), - check_stacklevel=False, - ): - super().test_groupby_extension_transform(data_for_grouping) - - def test_groupby_extension_apply( - self, data_for_grouping, groupby_apply_op, request - ): - pa_dtype = data_for_grouping.dtype.pyarrow_dtype - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under7p0 and not pa.types.is_duration(pa_dtype), - check_stacklevel=False, - ): - super().test_groupby_extension_apply(data_for_grouping, groupby_apply_op) + super().test_groupby_extension_transform(data_for_grouping) @pytest.mark.parametrize("as_index", [True, False]) def test_groupby_extension_agg(self, as_index, data_for_grouping, request): @@ -563,12 +535,7 @@ def test_groupby_extension_agg(self, as_index, data_for_grouping, request): reason=f"{pa_dtype} only has 2 unique possible values", ) ) - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under7p0 and not pa.types.is_duration(pa_dtype), - check_stacklevel=False, - ): - super().test_groupby_extension_agg(as_index, data_for_grouping) + super().test_groupby_extension_agg(as_index, data_for_grouping) def test_in_numeric_groupby(self, data_for_grouping): if is_string_dtype(data_for_grouping.dtype): @@ -659,17 +626,7 @@ def test_view(self, data): class TestBaseMissing(base.BaseMissingTests): - def test_fillna_no_op_returns_copy(self, data): - with tm.maybe_produces_warning( - PerformanceWarning, pa_version_under7p0, check_stacklevel=False - ): - super().test_fillna_no_op_returns_copy(data) - - def test_fillna_series_method(self, data_missing, fillna_method): - with tm.maybe_produces_warning( - PerformanceWarning, pa_version_under7p0, check_stacklevel=False - ): - super().test_fillna_series_method(data_missing, fillna_method) + pass class TestBasePrinting(base.BasePrintingTests): @@ -738,12 +695,6 @@ def test_invert(self, data, request): class TestBaseMethods(base.BaseMethodsTests): - def test_argsort_missing_array(self, data_missing_for_sorting): - with tm.maybe_produces_warning( - PerformanceWarning, pa_version_under7p0, check_stacklevel=False - ): - super().test_argsort_missing_array(data_missing_for_sorting) - @pytest.mark.parametrize("periods", [1, -2]) def test_diff(self, data, periods, request): pa_dtype = data.dtype.pyarrow_dtype @@ -758,20 +709,10 @@ def test_diff(self, data, periods, request): ) super().test_diff(data, periods) - @pytest.mark.filterwarnings("ignore:Falling back:pandas.errors.PerformanceWarning") @pytest.mark.parametrize("dropna", [True, False]) def test_value_counts(self, all_data, dropna, request): super().test_value_counts(all_data, dropna) - def test_value_counts_with_normalize(self, data, request): - pa_dtype = data.dtype.pyarrow_dtype - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under7p0 and not pa.types.is_duration(pa_dtype), - check_stacklevel=False, - ): - super().test_value_counts_with_normalize(data) - def test_argmin_argmax( self, data_for_sorting, data_missing_for_sorting, na_value, request ): @@ -804,47 +745,6 @@ def test_argreduce_series( data_missing_for_sorting, op_name, skipna, expected ) - @pytest.mark.parametrize( - "na_position, expected", - [ - ("last", np.array([2, 0, 1], dtype=np.dtype("intp"))), - ("first", np.array([1, 2, 0], dtype=np.dtype("intp"))), - ], - ) - def test_nargsort(self, data_missing_for_sorting, na_position, expected): - with tm.maybe_produces_warning( - PerformanceWarning, pa_version_under7p0, check_stacklevel=False - ): - super().test_nargsort(data_missing_for_sorting, na_position, expected) - - @pytest.mark.parametrize("ascending", [True, False]) - def test_sort_values(self, data_for_sorting, ascending, sort_by_key, request): - with tm.maybe_produces_warning( - PerformanceWarning, pa_version_under7p0, check_stacklevel=False - ): - super().test_sort_values(data_for_sorting, ascending, sort_by_key) - - @pytest.mark.parametrize("ascending", [True, False]) - def test_sort_values_missing( - self, data_missing_for_sorting, ascending, sort_by_key - ): - with tm.maybe_produces_warning( - PerformanceWarning, pa_version_under7p0, check_stacklevel=False - ): - super().test_sort_values_missing( - data_missing_for_sorting, ascending, sort_by_key - ) - - @pytest.mark.parametrize("ascending", [True, False]) - def test_sort_values_frame(self, data_for_sorting, ascending, request): - pa_dtype = data_for_sorting.dtype.pyarrow_dtype - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under7p0 and not pa.types.is_duration(pa_dtype), - check_stacklevel=False, - ): - super().test_sort_values_frame(data_for_sorting, ascending) - def test_factorize(self, data_for_grouping, request): pa_dtype = data_for_grouping.dtype.pyarrow_dtype if pa.types.is_boolean(pa_dtype): diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index ee855bb1cde8c..a886b109d1f9c 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -256,70 +256,6 @@ def test_value_counts(self, all_data, dropna, request): self.assert_series_equal(result, expected) - @pytest.mark.filterwarnings("ignore:Falling back:pandas.errors.PerformanceWarning") - def test_value_counts_with_normalize(self, data): - super().test_value_counts_with_normalize(data) - - def test_argsort_missing_array(self, data_missing_for_sorting): - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under7p0 - and getattr(data_missing_for_sorting.dtype, "storage", "") == "pyarrow", - check_stacklevel=False, - ): - super().test_argsort_missing(data_missing_for_sorting) - - @pytest.mark.parametrize( - "na_position, expected", - [ - ("last", np.array([2, 0, 1], dtype=np.dtype("intp"))), - ("first", np.array([1, 2, 0], dtype=np.dtype("intp"))), - ], - ) - def test_nargsort(self, data_missing_for_sorting, na_position, expected): - # GH 25439 - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under7p0 - and getattr(data_missing_for_sorting.dtype, "storage", "") == "pyarrow", - check_stacklevel=False, - ): - super().test_nargsort(data_missing_for_sorting, na_position, expected) - - @pytest.mark.parametrize("ascending", [True, False]) - def test_sort_values(self, data_for_sorting, ascending, sort_by_key): - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under7p0 - and getattr(data_for_sorting.dtype, "storage", "") == "pyarrow", - check_stacklevel=False, - ): - super().test_sort_values(data_for_sorting, ascending, sort_by_key) - - @pytest.mark.parametrize("ascending", [True, False]) - def test_sort_values_missing( - self, data_missing_for_sorting, ascending, sort_by_key - ): - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under7p0 - and getattr(data_missing_for_sorting.dtype, "storage", "") == "pyarrow", - check_stacklevel=False, - ): - super().test_sort_values_missing( - data_missing_for_sorting, ascending, sort_by_key - ) - - @pytest.mark.parametrize("ascending", [True, False]) - def test_sort_values_frame(self, data_for_sorting, ascending): - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under7p0 - and getattr(data_for_sorting.dtype, "storage", "") == "pyarrow", - check_stacklevel=False, - ): - super().test_sort_values_frame(data_for_sorting, ascending) - class TestCasting(base.BaseCastingTests): pass @@ -349,18 +285,8 @@ class TestGroupBy(base.BaseGroupbyTests): @pytest.mark.parametrize("as_index", [True, False]) def test_groupby_extension_agg(self, as_index, data_for_grouping): df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping}) - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under7p0 - and getattr(data_for_grouping.dtype, "storage", "") == "pyarrow", - ): - result = df.groupby("B", as_index=as_index).A.mean() - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under7p0 - and getattr(data_for_grouping.dtype, "storage", "") == "pyarrow", - ): - _, uniques = pd.factorize(data_for_grouping, sort=True) + result = df.groupby("B", as_index=as_index).A.mean() + _, uniques = pd.factorize(data_for_grouping, sort=True) if as_index: index = pd.Index(uniques, name="B") @@ -370,15 +296,6 @@ def test_groupby_extension_agg(self, as_index, data_for_grouping): expected = pd.DataFrame({"B": uniques, "A": [3.0, 1.0, 4.0]}) self.assert_frame_equal(result, expected) - def test_groupby_extension_transform(self, data_for_grouping): - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under7p0 - and getattr(data_for_grouping.dtype, "storage", "") == "pyarrow", - check_stacklevel=False, - ): - super().test_groupby_extension_transform(data_for_grouping) - @pytest.mark.filterwarnings("ignore:Falling back:pandas.errors.PerformanceWarning") def test_groupby_extension_apply(self, data_for_grouping, groupby_apply_op): super().test_groupby_extension_apply(data_for_grouping, groupby_apply_op) diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py index f7da28a43590d..7983aace587c6 100644 --- a/pandas/tests/frame/methods/test_combine_first.py +++ b/pandas/tests/frame/methods/test_combine_first.py @@ -3,9 +3,6 @@ import numpy as np import pytest -from pandas.compat import pa_version_under7p0 -from pandas.errors import PerformanceWarning - from pandas.core.dtypes.cast import find_common_type from pandas.core.dtypes.common import is_dtype_equal @@ -388,24 +385,12 @@ def test_combine_first_string_dtype_only_na(self, nullable_string_dtype): {"a": ["962", "85"], "b": [pd.NA] * 2}, dtype=nullable_string_dtype ) df2 = DataFrame({"a": ["85"], "b": [pd.NA]}, dtype=nullable_string_dtype) - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under7p0 and nullable_string_dtype == "string[pyarrow]", - ): - df.set_index(["a", "b"], inplace=True) - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under7p0 and nullable_string_dtype == "string[pyarrow]", - ): - df2.set_index(["a", "b"], inplace=True) + df.set_index(["a", "b"], inplace=True) + df2.set_index(["a", "b"], inplace=True) result = df.combine_first(df2) - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under7p0 and nullable_string_dtype == "string[pyarrow]", - ): - expected = DataFrame( - {"a": ["962", "85"], "b": [pd.NA] * 2}, dtype=nullable_string_dtype - ).set_index(["a", "b"]) + expected = DataFrame( + {"a": ["962", "85"], "b": [pd.NA] * 2}, dtype=nullable_string_dtype + ).set_index(["a", "b"]) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 665fad09f6d3c..6dbe61decfc73 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -8,14 +8,8 @@ import numpy as np import pytest -from pandas.compat import ( - IS64, - pa_version_under7p0, -) -from pandas.errors import ( - InvalidIndexError, - PerformanceWarning, -) +from pandas.compat import IS64 +from pandas.errors import InvalidIndexError from pandas.util._test_decorators import async_mark from pandas.core.dtypes.common import ( @@ -68,22 +62,6 @@ def test_new_axis(self, index): # GH#30588 multi-dimensional indexing deprecated index[None, :] - def test_argsort(self, index): - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under7p0 and getattr(index.dtype, "storage", "") == "pyarrow", - check_stacklevel=False, - ): - super().test_argsort(index) - - def test_numpy_argsort(self, index): - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under7p0 and getattr(index.dtype, "storage", "") == "pyarrow", - check_stacklevel=False, - ): - super().test_numpy_argsort(index) - def test_constructor_regular(self, index): tm.assert_contains_all(index, index) diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index 40440bd8e0ff8..a128aac774765 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -12,10 +12,7 @@ import numpy as np import pytest -from pandas.compat import ( - IS64, - pa_version_under7p0, -) +from pandas.compat import IS64 from pandas.errors import PerformanceWarning from pandas.core.dtypes.common import ( @@ -169,12 +166,7 @@ def test_copy_name(self, index_flat): s1 = pd.Series(2, index=first) s2 = pd.Series(3, index=second[:-1]) # See GH#13365 - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under7p0 and getattr(index.dtype, "storage", "") == "pyarrow", - check_stacklevel=False, - ): - s3 = s1 * s2 + s3 = s1 * s2 assert s3.index.name == "mario" def test_copy_name2(self, index_flat): @@ -394,16 +386,10 @@ def test_astype_preserves_name(self, index, dtype): # imaginary components discarded warn = np.ComplexWarning - is_pyarrow_str = ( - str(index.dtype) == "string[pyarrow]" - and pa_version_under7p0 - and dtype == "category" - ) try: # Some of these conversions cannot succeed so we use a try / except with tm.assert_produces_warning( warn, - raise_on_extra_warnings=is_pyarrow_str, check_stacklevel=False, ): result = index.astype(dtype) @@ -480,13 +466,7 @@ def test_sort_values_with_missing(index_with_missing, na_position, request): # Explicitly pass dtype needed for Index backed by EA e.g. IntegerArray expected = type(index_with_missing)(sorted_values, dtype=index_with_missing.dtype) - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under7p0 - and getattr(index_with_missing.dtype, "storage", "") == "pyarrow", - check_stacklevel=False, - ): - result = index_with_missing.sort_values(na_position=na_position) + result = index_with_missing.sort_values(na_position=na_position) tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index 445aeff4cbe79..1be4a1835de09 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -8,9 +8,6 @@ import numpy as np import pytest -from pandas.compat import pa_version_under7p0 -from pandas.errors import PerformanceWarning - from pandas.core.dtypes.cast import find_common_type from pandas import ( @@ -33,18 +30,8 @@ def test_union_same_types(index): # Union with a non-unique, non-monotonic index raises error # Only needed for bool index factory - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under7p0 and getattr(index.dtype, "storage", "") == "pyarrow", - check_stacklevel=False, - ): - idx1 = index.sort_values() - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under7p0 and getattr(index.dtype, "storage", "") == "pyarrow", - check_stacklevel=False, - ): - idx2 = index.sort_values() + idx1 = index.sort_values() + idx2 = index.sort_values() assert idx1.union(idx2).dtype == idx1.dtype @@ -103,18 +90,8 @@ def test_union_different_types(index_flat, index_flat2, request): # Union with a non-unique, non-monotonic index raises error # This applies to the boolean index - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under7p0 and getattr(idx1.dtype, "storage", "") == "pyarrow", - check_stacklevel=False, - ): - idx1 = idx1.sort_values() - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under7p0 and getattr(idx2.dtype, "storage", "") == "pyarrow", - check_stacklevel=False, - ): - idx2 = idx2.sort_values() + idx1 = idx1.sort_values() + idx2 = idx2.sort_values() with tm.assert_produces_warning(warn, match="'<' not supported between"): res1 = idx1.union(idx2) @@ -381,18 +358,8 @@ def test_union_unequal(self, index_flat, fname, sname, expected_name): # test copy.union(subset) - need sort for unicode and string first = index.copy().set_names(fname) second = index[1:].set_names(sname) - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under7p0 and getattr(index.dtype, "storage", "") == "pyarrow", - check_stacklevel=False, - ): - union = first.union(second).sort_values() - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under7p0 and getattr(index.dtype, "storage", "") == "pyarrow", - check_stacklevel=False, - ): - expected = index.set_names(expected_name).sort_values() + union = first.union(second).sort_values() + expected = index.set_names(expected_name).sort_values() tm.assert_index_equal(union, expected) @pytest.mark.parametrize( @@ -458,18 +425,8 @@ def test_intersect_unequal(self, index_flat, fname, sname, expected_name): # test copy.intersection(subset) - need sort for unicode and string first = index.copy().set_names(fname) second = index[1:].set_names(sname) - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under7p0 and getattr(index.dtype, "storage", "") == "pyarrow", - check_stacklevel=False, - ): - intersect = first.intersection(second).sort_values() - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under7p0 and getattr(index.dtype, "storage", "") == "pyarrow", - check_stacklevel=False, - ): - expected = index[1:].set_names(expected_name).sort_values() + intersect = first.intersection(second).sort_values() + expected = index[1:].set_names(expected_name).sort_values() tm.assert_index_equal(intersect, expected) def test_intersection_name_retention_with_nameless(self, index): diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 440a8597e14f2..878df24be4d9c 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -19,7 +19,6 @@ from pandas._libs.tslibs import parsing from pandas._libs.tslibs.parsing import py_parse_datetime_string -from pandas.compat.pyarrow import pa_version_under7p0 import pandas as pd from pandas import ( @@ -462,10 +461,7 @@ def test_date_col_as_index_col(all_parsers): columns=["X0", "X2", "X3", "X4", "X5", "X6", "X7"], index=index, ) - if parser.engine == "pyarrow" and not pa_version_under7p0: - # https://github.com/pandas-dev/pandas/issues/44231 - # pyarrow 6.0 starts to infer time type - expected["X2"] = pd.to_datetime("1970-01-01" + expected["X2"]).dt.time + expected["X2"] = pd.to_datetime("1970-01-01" + expected["X2"]).dt.time tm.assert_frame_equal(result, expected) @@ -963,8 +959,6 @@ def test_parse_dates_custom_euro_format(all_parsers, kwargs): def test_parse_tz_aware(all_parsers, request): # See gh-1693 parser = all_parsers - if parser.engine == "pyarrow" and pa_version_under7p0: - request.node.add_marker(pytest.mark.xfail(reason="Fails for pyarrow < 7.0")) data = "Date,x\n2012-06-13T01:39:00Z,0.5" result = parser.read_csv(StringIO(data), index_col=0, parse_dates=True) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index b00b28f1e6033..8dc5f301793b4 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -9,8 +9,6 @@ algos as libalgos, hashtable as ht, ) -from pandas.compat import pa_version_under7p0 -from pandas.errors import PerformanceWarning import pandas.util._test_decorators as td from pandas.core.dtypes.common import ( @@ -55,13 +53,7 @@ class TestFactorize: @pytest.mark.parametrize("sort", [True, False]) def test_factorize(self, index_or_series_obj, sort): obj = index_or_series_obj - with tm.maybe_produces_warning( - PerformanceWarning, - sort - and pa_version_under7p0 - and getattr(obj.dtype, "storage", "") == "pyarrow", - ): - result_codes, result_uniques = obj.factorize(sort=sort) + result_codes, result_uniques = obj.factorize(sort=sort) constructor = Index if isinstance(obj, MultiIndex): @@ -78,11 +70,7 @@ def test_factorize(self, index_or_series_obj, sort): expected_uniques = expected_uniques.astype(object) if sort: - with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under7p0 and getattr(obj.dtype, "storage", "") == "pyarrow", - ): - expected_uniques = expected_uniques.sort_values() + expected_uniques = expected_uniques.sort_values() # construct an integer ndarray so that # `expected_uniques.take(expected_codes)` is equal to `obj` From 1af89abf5339dfddef6c1f8d46e52ab29122447f Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 21 Feb 2023 17:41:43 -0800 Subject: [PATCH 2/5] Add back some checks --- pandas/core/arrays/arrow/array.py | 5 +---- pandas/tests/indexes/test_common.py | 2 ++ pandas/tests/io/parser/test_parse_dates.py | 5 ++++- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index ee36c10ff9999..08958a521e7f9 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -570,10 +570,7 @@ def argsort( order = "ascending" if ascending else "descending" null_placement = {"last": "at_end", "first": "at_start"}.get(na_position, None) if null_placement is None: - fallback_performancewarning() - return super().argsort( - ascending=ascending, kind=kind, na_position=na_position - ) + raise ValueError(f"invalid na_position: {na_position}") result = pc.array_sort_indices( self._data, order=order, null_placement=null_placement diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index a128aac774765..d41301226ec89 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -386,10 +386,12 @@ def test_astype_preserves_name(self, index, dtype): # imaginary components discarded warn = np.ComplexWarning + is_pyarrow_str = str(index.dtype) == "string[pyarrow]" and dtype == "category" try: # Some of these conversions cannot succeed so we use a try / except with tm.assert_produces_warning( warn, + raise_on_extra_warnings=is_pyarrow_str, check_stacklevel=False, ): result = index.astype(dtype) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 878df24be4d9c..7bb7ca5c6d159 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -461,7 +461,10 @@ def test_date_col_as_index_col(all_parsers): columns=["X0", "X2", "X3", "X4", "X5", "X6", "X7"], index=index, ) - expected["X2"] = pd.to_datetime("1970-01-01" + expected["X2"]).dt.time + if parser.engine == "pyarrow": + # https://github.com/pandas-dev/pandas/issues/44231 + # pyarrow 6.0 starts to infer time type + expected["X2"] = pd.to_datetime("1970-01-01" + expected["X2"]).dt.time tm.assert_frame_equal(result, expected) From bea39d9cd9f3fc057eb0174220e93e6a14c94356 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 22 Feb 2023 09:57:05 -0800 Subject: [PATCH 3/5] Unneeded warning checking --- pandas/tests/indexes/test_common.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index d41301226ec89..83b32bb1230c2 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -13,7 +13,6 @@ import pytest from pandas.compat import IS64 -from pandas.errors import PerformanceWarning from pandas.core.dtypes.common import ( is_integer_dtype, @@ -437,12 +436,7 @@ def test_hasnans_isnans(self, index_flat): @pytest.mark.parametrize("na_position", [None, "middle"]) def test_sort_values_invalid_na_position(index_with_missing, na_position): with pytest.raises(ValueError, match=f"invalid na_position: {na_position}"): - with tm.maybe_produces_warning( - PerformanceWarning, - getattr(index_with_missing.dtype, "storage", "") == "pyarrow", - check_stacklevel=False, - ): - index_with_missing.sort_values(na_position=na_position) + index_with_missing.sort_values(na_position=na_position) @pytest.mark.parametrize("na_position", ["first", "last"]) From fa2005fd2c49e09a1c1810700e5cc4c5d8ca2ea3 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 22 Feb 2023 16:47:24 -0800 Subject: [PATCH 4/5] fix fillna fallback warnings --- pandas/tests/extension/test_arrow.py | 19 ++++++++++++++++++- pandas/tests/extension/test_string.py | 17 ++++++++++++----- 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index b09139a03f027..6bdca98b2edb5 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -36,6 +36,7 @@ pa_version_under9p0, pa_version_under11p0, ) +from pandas.errors import PerformanceWarning from pandas.core.dtypes.common import is_any_int_dtype @@ -678,7 +679,23 @@ def test_view(self, data): class TestBaseMissing(base.BaseMissingTests): - pass + def test_fillna_no_op_returns_copy(self, data): + data = data[~data.isna()] + + valid = data[0] + result = data.fillna(valid) + assert result is not data + self.assert_extension_array_equal(result, data) + with tm.assert_produces_warning(PerformanceWarning): + result = data.fillna(method="backfill") + assert result is not data + self.assert_extension_array_equal(result, data) + + def test_fillna_series_method(self, data_missing, fillna_method): + with tm.maybe_produces_warning( + PerformanceWarning, fillna_method is not None, check_stacklevel=False + ): + super().test_fillna_series_method(data_missing, fillna_method) class TestBasePrinting(base.BasePrintingTests): diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index a886b109d1f9c..4fb98d67414e7 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -159,17 +159,24 @@ def test_dropna_array(self, data_missing): self.assert_extension_array_equal(result, expected) def test_fillna_no_op_returns_copy(self, data): + data = data[~data.isna()] + + valid = data[0] + result = data.fillna(valid) + assert result is not data + self.assert_extension_array_equal(result, data) + with tm.maybe_produces_warning( - PerformanceWarning, - pa_version_under7p0 and data.dtype.storage == "pyarrow", - check_stacklevel=False, + PerformanceWarning, data.dtype.storage == "pyarrow" ): - super().test_fillna_no_op_returns_copy(data) + result = data.fillna(method="backfill") + assert result is not data + self.assert_extension_array_equal(result, data) def test_fillna_series_method(self, data_missing, fillna_method): with tm.maybe_produces_warning( PerformanceWarning, - pa_version_under7p0 and data_missing.dtype.storage == "pyarrow", + fillna_method is not None and data_missing.dtype.storage == "pyarrow", check_stacklevel=False, ): super().test_fillna_series_method(data_missing, fillna_method) From fcb9fce212f330b4d28d4ef822d8e95eb7514966 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 23 Feb 2023 08:27:37 -0800 Subject: [PATCH 5/5] Remove tm.assert_produces_warning --- pandas/tests/indexes/test_common.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index 93cb93ebc4468..83b32bb1230c2 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -435,15 +435,8 @@ def test_hasnans_isnans(self, index_flat): @pytest.mark.parametrize("na_position", [None, "middle"]) def test_sort_values_invalid_na_position(index_with_missing, na_position): - dtype = index_with_missing.dtype - warning = ( - PerformanceWarning - if dtype.name == "string" and dtype.storage == "pyarrow" - else None - ) with pytest.raises(ValueError, match=f"invalid na_position: {na_position}"): - with tm.assert_produces_warning(warning): - index_with_missing.sort_values(na_position=na_position) + index_with_missing.sort_values(na_position=na_position) @pytest.mark.parametrize("na_position", ["first", "last"])