From 3ca64e27c4074ee99df36cbbdc7e312448262b95 Mon Sep 17 00:00:00 2001 From: Samuel Date: Sat, 2 Nov 2019 15:05:23 +0000 Subject: [PATCH 01/14] Add types to assert_frame_equal --- pandas/util/testing.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index f3b0226547c78..57fddecb2acd5 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1221,21 +1221,21 @@ def assert_series_equal( # This could be refactored to use the NDFrame.equals method def assert_frame_equal( - left, - right, - check_dtype=True, - check_index_type="equiv", - check_column_type="equiv", - check_frame_type=True, - check_less_precise=False, - check_names=True, - by_blocks=False, - check_exact=False, - check_datetimelike_compat=False, - check_categorical=True, - check_like=False, - obj="DataFrame", -): + left: DataFrame, + right: DataFrame, + check_dtype: bool = True, + check_index_type: str = "equiv", + check_column_type: str = "equiv", + check_frame_type: bool = True, + check_less_precise: bool = False, + check_names: bool = True, + by_blocks: bool = False, + check_exact: bool = False, + check_datetimelike_compat: bool = False, + check_categorical: bool = True, + check_like: bool = False, + obj: str = "DataFrame", +) -> None: """ Check that left and right DataFrame are equal. From 17caa4202e35e17396df2fed920b791a55fe83d9 Mon Sep 17 00:00:00 2001 From: Samuel Date: Sat, 2 Nov 2019 16:35:15 +0000 Subject: [PATCH 02/14] Add types to all assert_*_equal test functions --- pandas/util/testing.py | 118 +++++++++++++++++++++++++++-------------- 1 file changed, 79 insertions(+), 39 deletions(-) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 57fddecb2acd5..0c60fb2fd96e2 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -8,7 +8,7 @@ from shutil import rmtree import string import tempfile -from typing import Union, cast +from typing import Optional, Union, cast import warnings import zipfile @@ -53,6 +53,7 @@ Series, bdate_range, ) +from pandas._typing import AnyArrayLike from pandas.core.algorithms import take_1d from pandas.core.arrays import ( DatetimeArray, @@ -806,8 +807,12 @@ def assert_is_sorted(seq): def assert_categorical_equal( - left, right, check_dtype=True, check_category_order=True, obj="Categorical" -): + left: Categorical, + right: Categorical, + check_dtype: bool = True, + check_category_order: bool = True, + obj: str = "Categorical" +) -> None: """Test that Categoricals are equivalent. Parameters @@ -852,7 +857,12 @@ def assert_categorical_equal( assert_attr_equal("ordered", left, right, obj=obj) -def assert_interval_array_equal(left, right, exact="equiv", obj="IntervalArray"): +def assert_interval_array_equal( + left: IntervalArray, + right: IntervalArray, + exact: str = "equiv", + obj: str = "IntervalArray" +) -> None: """Test that two IntervalArrays are equivalent. Parameters @@ -878,7 +888,11 @@ def assert_interval_array_equal(left, right, exact="equiv", obj="IntervalArray") assert_attr_equal("closed", left, right, obj=obj) -def assert_period_array_equal(left, right, obj="PeriodArray"): +def assert_period_array_equal( + left: PeriodArray, + right: PeriodArray, + obj: str = "PeriodArray" +) -> None: _check_isinstance(left, right, PeriodArray) assert_numpy_array_equal( @@ -887,7 +901,11 @@ def assert_period_array_equal(left, right, obj="PeriodArray"): assert_attr_equal("freq", left, right, obj=obj) -def assert_datetime_array_equal(left, right, obj="DatetimeArray"): +def assert_datetime_array_equal( + left: DatetimeArray, + right: DatetimeArray, + obj: str = "DatetimeArray" +) -> None: __tracebackhide__ = True _check_isinstance(left, right, DatetimeArray) @@ -896,7 +914,11 @@ def assert_datetime_array_equal(left, right, obj="DatetimeArray"): assert_attr_equal("tz", left, right, obj=obj) -def assert_timedelta_array_equal(left, right, obj="TimedeltaArray"): +def assert_timedelta_array_equal( + left: TimedeltaArray, + right: TimedeltaArray, + obj: str = "TimedeltaArray" +) -> None: __tracebackhide__ = True _check_isinstance(left, right, TimedeltaArray) assert_numpy_array_equal(left._data, right._data, obj="{obj}._data".format(obj=obj)) @@ -931,13 +953,13 @@ def raise_assert_detail(obj, message, left, right, diff=None): def assert_numpy_array_equal( - left, - right, - strict_nan=False, - check_dtype=True, - err_msg=None, - check_same=None, - obj="numpy array", + left: np.ndarray, + right: np.ndarray, + strict_nan: bool = False, + check_dtype: bool = True, + err_msg: Optional[str] = None, + check_same: Optional[str] = None, + obj: str = "numpy array", ): """ Checks that 'np.ndarray' is equivalent @@ -1067,18 +1089,18 @@ def assert_extension_array_equal( # This could be refactored to use the NDFrame.equals method def assert_series_equal( - left, - right, - check_dtype=True, - check_index_type="equiv", - check_series_type=True, - check_less_precise=False, - check_names=True, - check_exact=False, - check_datetimelike_compat=False, - check_categorical=True, - obj="Series", -): + left: Series, + right: Series, + check_dtype: bool = True, + check_index_type: str = "equiv", + check_series_type: bool = True, + check_less_precise: bool = False, + check_names: bool = True, + check_exact: bool = False, + check_datetimelike_compat: bool = False, + check_categorical: bool = True, + obj: str = "Series", +) -> None: """ Check that left and right Series are equal. @@ -1185,8 +1207,13 @@ def assert_series_equal( right._internal_get_values(), check_dtype=check_dtype, ) - elif is_interval_dtype(left) or is_interval_dtype(right): - assert_interval_array_equal(left.array, right.array) + elif is_interval_dtype(left) or is_interval_dtype(left): + # must cast to interval dtype to keep mypy happy + assert is_interval_dtype(right) + assert is_interval_dtype(left) + left_array = IntervalArray(left.array) + right_array = IntervalArray(right.array) + assert_interval_array_equal(left_array, right_array) elif is_extension_array_dtype(left.dtype) and is_datetime64tz_dtype(left.dtype): # .values is an ndarray, but ._values is the ExtensionArray. # TODO: Use .array @@ -1403,7 +1430,11 @@ def assert_frame_equal( ) -def assert_equal(left, right, **kwargs): +def assert_equal( + left: Union[DataFrame, AnyArrayLike], + right: Union[DataFrame, AnyArrayLike], + **kwargs +) -> None: """ Wrapper for tm.assert_*_equal to dispatch to the appropriate test function. @@ -1415,27 +1446,36 @@ def assert_equal(left, right, **kwargs): """ __tracebackhide__ = True - if isinstance(left, pd.Index): + if isinstance(left, Index): + assert isinstance(right, Index) assert_index_equal(left, right, **kwargs) - elif isinstance(left, pd.Series): + elif isinstance(left, Series): + assert isinstance(right, Series) assert_series_equal(left, right, **kwargs) - elif isinstance(left, pd.DataFrame): + elif isinstance(left, DataFrame): + assert isinstance(right, DataFrame) assert_frame_equal(left, right, **kwargs) elif isinstance(left, IntervalArray): + assert isinstance(right, IntervalArray) assert_interval_array_equal(left, right, **kwargs) elif isinstance(left, PeriodArray): + assert isinstance(right, PeriodArray) assert_period_array_equal(left, right, **kwargs) elif isinstance(left, DatetimeArray): + assert isinstance(right, DatetimeArray) assert_datetime_array_equal(left, right, **kwargs) elif isinstance(left, TimedeltaArray): + assert isinstance(right, TimedeltaArray) assert_timedelta_array_equal(left, right, **kwargs) elif isinstance(left, ExtensionArray): + assert isinstance(right, ExtensionArray) assert_extension_array_equal(left, right, **kwargs) elif isinstance(left, np.ndarray): + assert isinstance(right, np.ndarray) assert_numpy_array_equal(left, right, **kwargs) elif isinstance(left, str): assert kwargs == {} - return left == right + assert left == right else: raise NotImplementedError(type(left)) @@ -1497,12 +1537,12 @@ def to_array(obj): def assert_sp_array_equal( - left, - right, - check_dtype=True, - check_kind=True, - check_fill_value=True, - consolidate_block_indices=False, + left: pd.SparseArray, + right: pd.SparseArray, + check_dtype: bool = True, + check_kind: bool = True, + check_fill_value: bool = True, + consolidate_block_indices: bool = False, ): """Check that the left and right SparseArray are equal. From 9d56cfc7a164afec77b2701b20101c600f6982b6 Mon Sep 17 00:00:00 2001 From: Samuel Date: Sat, 2 Nov 2019 16:39:54 +0000 Subject: [PATCH 03/14] black --- pandas/util/testing.py | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 0c60fb2fd96e2..45dd3262164bc 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -811,7 +811,7 @@ def assert_categorical_equal( right: Categorical, check_dtype: bool = True, check_category_order: bool = True, - obj: str = "Categorical" + obj: str = "Categorical", ) -> None: """Test that Categoricals are equivalent. @@ -858,10 +858,10 @@ def assert_categorical_equal( def assert_interval_array_equal( - left: IntervalArray, - right: IntervalArray, - exact: str = "equiv", - obj: str = "IntervalArray" + left: IntervalArray, + right: IntervalArray, + exact: str = "equiv", + obj: str = "IntervalArray", ) -> None: """Test that two IntervalArrays are equivalent. @@ -889,9 +889,7 @@ def assert_interval_array_equal( def assert_period_array_equal( - left: PeriodArray, - right: PeriodArray, - obj: str = "PeriodArray" + left: PeriodArray, right: PeriodArray, obj: str = "PeriodArray" ) -> None: _check_isinstance(left, right, PeriodArray) @@ -902,9 +900,7 @@ def assert_period_array_equal( def assert_datetime_array_equal( - left: DatetimeArray, - right: DatetimeArray, - obj: str = "DatetimeArray" + left: DatetimeArray, right: DatetimeArray, obj: str = "DatetimeArray" ) -> None: __tracebackhide__ = True _check_isinstance(left, right, DatetimeArray) @@ -915,9 +911,7 @@ def assert_datetime_array_equal( def assert_timedelta_array_equal( - left: TimedeltaArray, - right: TimedeltaArray, - obj: str = "TimedeltaArray" + left: TimedeltaArray, right: TimedeltaArray, obj: str = "TimedeltaArray" ) -> None: __tracebackhide__ = True _check_isinstance(left, right, TimedeltaArray) @@ -1431,9 +1425,9 @@ def assert_frame_equal( def assert_equal( - left: Union[DataFrame, AnyArrayLike], - right: Union[DataFrame, AnyArrayLike], - **kwargs + left: Union[DataFrame, AnyArrayLike], + right: Union[DataFrame, AnyArrayLike], + **kwargs ) -> None: """ Wrapper for tm.assert_*_equal to dispatch to the appropriate test function. From f7392ddad4f608190a331f3bc69d7dc6f2a60aed Mon Sep 17 00:00:00 2001 From: Samuel Date: Sat, 2 Nov 2019 16:39:54 +0000 Subject: [PATCH 04/14] black --- pandas/util/testing.py | 30 ++++++++++++------------------ 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 0c60fb2fd96e2..63cdc16295f77 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -811,7 +811,7 @@ def assert_categorical_equal( right: Categorical, check_dtype: bool = True, check_category_order: bool = True, - obj: str = "Categorical" + obj: str = "Categorical", ) -> None: """Test that Categoricals are equivalent. @@ -858,10 +858,10 @@ def assert_categorical_equal( def assert_interval_array_equal( - left: IntervalArray, - right: IntervalArray, - exact: str = "equiv", - obj: str = "IntervalArray" + left: IntervalArray, + right: IntervalArray, + exact: str = "equiv", + obj: str = "IntervalArray", ) -> None: """Test that two IntervalArrays are equivalent. @@ -889,9 +889,7 @@ def assert_interval_array_equal( def assert_period_array_equal( - left: PeriodArray, - right: PeriodArray, - obj: str = "PeriodArray" + left: PeriodArray, right: PeriodArray, obj: str = "PeriodArray" ) -> None: _check_isinstance(left, right, PeriodArray) @@ -902,9 +900,7 @@ def assert_period_array_equal( def assert_datetime_array_equal( - left: DatetimeArray, - right: DatetimeArray, - obj: str = "DatetimeArray" + left: DatetimeArray, right: DatetimeArray, obj: str = "DatetimeArray" ) -> None: __tracebackhide__ = True _check_isinstance(left, right, DatetimeArray) @@ -915,9 +911,7 @@ def assert_datetime_array_equal( def assert_timedelta_array_equal( - left: TimedeltaArray, - right: TimedeltaArray, - obj: str = "TimedeltaArray" + left: TimedeltaArray, right: TimedeltaArray, obj: str = "TimedeltaArray" ) -> None: __tracebackhide__ = True _check_isinstance(left, right, TimedeltaArray) @@ -960,7 +954,7 @@ def assert_numpy_array_equal( err_msg: Optional[str] = None, check_same: Optional[str] = None, obj: str = "numpy array", -): +) -> None: """ Checks that 'np.ndarray' is equivalent Parameters @@ -1431,9 +1425,9 @@ def assert_frame_equal( def assert_equal( - left: Union[DataFrame, AnyArrayLike], - right: Union[DataFrame, AnyArrayLike], - **kwargs + left: Union[DataFrame, AnyArrayLike], + right: Union[DataFrame, AnyArrayLike], + **kwargs ) -> None: """ Wrapper for tm.assert_*_equal to dispatch to the appropriate test function. From daa9c87f593bafa70d19fda516d5d5d1e76c8a62 Mon Sep 17 00:00:00 2001 From: Samuel Date: Wed, 6 Nov 2019 22:42:22 +0000 Subject: [PATCH 05/14] Remove unneeded asserts --- pandas/util/testing.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 63cdc16295f77..d47dfb6c97d69 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1203,8 +1203,6 @@ def assert_series_equal( ) elif is_interval_dtype(left) or is_interval_dtype(left): # must cast to interval dtype to keep mypy happy - assert is_interval_dtype(right) - assert is_interval_dtype(left) left_array = IntervalArray(left.array) right_array = IntervalArray(right.array) assert_interval_array_equal(left_array, right_array) From 0c2f6926f0de23e26ff8983ff14100672a01e99b Mon Sep 17 00:00:00 2001 From: Samuel Date: Wed, 6 Nov 2019 23:10:48 +0000 Subject: [PATCH 06/14] Preserve semantics but keep mypy happy for assert_index_equal --- pandas/util/testing.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index d47dfb6c97d69..c8beae6a2c7ca 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1439,7 +1439,8 @@ def assert_equal( __tracebackhide__ = True if isinstance(left, Index): - assert isinstance(right, Index) + _check_isinstance(left, right, Index) + right = Index(right) assert_index_equal(left, right, **kwargs) elif isinstance(left, Series): assert isinstance(right, Series) From f9f4e7c8ab949ed8231600dee8c3c59d5a69706a Mon Sep 17 00:00:00 2001 From: Samuel Date: Fri, 8 Nov 2019 07:02:54 +0000 Subject: [PATCH 07/14] Use typing.cast instead of creating a new object for mypy --- pandas/util/testing.py | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index c8beae6a2c7ca..e9a22f2f38a28 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -877,14 +877,14 @@ def assert_interval_array_equal( Specify object name being compared, internally used to show appropriate assertion message """ - _check_isinstance(left, right, IntervalArray) - assert_index_equal( left.left, right.left, exact=exact, obj="{obj}.left".format(obj=obj) ) assert_index_equal( left.right, right.right, exact=exact, obj="{obj}.left".format(obj=obj) ) + left = cast(IntervalArray, left) + right = cast(IntervalArray, right) assert_attr_equal("closed", left, right, obj=obj) @@ -1203,8 +1203,8 @@ def assert_series_equal( ) elif is_interval_dtype(left) or is_interval_dtype(left): # must cast to interval dtype to keep mypy happy - left_array = IntervalArray(left.array) - right_array = IntervalArray(right.array) + left_array = cast(IntervalArray, left.array) + right_array = cast(IntervalArray, right.array) assert_interval_array_equal(left_array, right_array) elif is_extension_array_dtype(left.dtype) and is_datetime64tz_dtype(left.dtype): # .values is an ndarray, but ._values is the ExtensionArray. @@ -1439,32 +1439,31 @@ def assert_equal( __tracebackhide__ = True if isinstance(left, Index): - _check_isinstance(left, right, Index) - right = Index(right) + right = cast(Index, right) assert_index_equal(left, right, **kwargs) elif isinstance(left, Series): - assert isinstance(right, Series) + right = cast(Series, right) assert_series_equal(left, right, **kwargs) elif isinstance(left, DataFrame): - assert isinstance(right, DataFrame) + right = cast(DataFrame, right) assert_frame_equal(left, right, **kwargs) elif isinstance(left, IntervalArray): - assert isinstance(right, IntervalArray) + right = cast(IntervalArray, right) assert_interval_array_equal(left, right, **kwargs) elif isinstance(left, PeriodArray): - assert isinstance(right, PeriodArray) + right = cast(PeriodArray, right) assert_period_array_equal(left, right, **kwargs) elif isinstance(left, DatetimeArray): - assert isinstance(right, DatetimeArray) + right = cast(DatetimeArray, right) assert_datetime_array_equal(left, right, **kwargs) elif isinstance(left, TimedeltaArray): - assert isinstance(right, TimedeltaArray) + right = cast(TimedeltaArray, cast) assert_timedelta_array_equal(left, right, **kwargs) elif isinstance(left, ExtensionArray): - assert isinstance(right, ExtensionArray) + right = cast(ExtensionArray, right) assert_extension_array_equal(left, right, **kwargs) elif isinstance(left, np.ndarray): - assert isinstance(right, np.ndarray) + right = cast(np.ndarray, right) assert_numpy_array_equal(left, right, **kwargs) elif isinstance(left, str): assert kwargs == {} From eb4c25a84d38a513458e2fce7331b1fda8d49da5 Mon Sep 17 00:00:00 2001 From: Samuel Date: Sun, 10 Nov 2019 08:52:00 +0000 Subject: [PATCH 08/14] Fix incorrect timedelta cast for mypy --- pandas/util/testing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index e9a22f2f38a28..7ca8132ce2a14 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1457,7 +1457,7 @@ def assert_equal( right = cast(DatetimeArray, right) assert_datetime_array_equal(left, right, **kwargs) elif isinstance(left, TimedeltaArray): - right = cast(TimedeltaArray, cast) + right = cast(TimedeltaArray, right) assert_timedelta_array_equal(left, right, **kwargs) elif isinstance(left, ExtensionArray): right = cast(ExtensionArray, right) From 7a2ae46d8ec5e002343e7e0892efca8551fba48f Mon Sep 17 00:00:00 2001 From: Samuel Date: Sun, 17 Nov 2019 20:13:02 +0000 Subject: [PATCH 09/14] Remove redundant casts --- pandas/util/testing.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 7ca8132ce2a14..3c8755a0cc831 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -883,8 +883,6 @@ def assert_interval_array_equal( assert_index_equal( left.right, right.right, exact=exact, obj="{obj}.left".format(obj=obj) ) - left = cast(IntervalArray, left) - right = cast(IntervalArray, right) assert_attr_equal("closed", left, right, obj=obj) @@ -1202,7 +1200,6 @@ def assert_series_equal( check_dtype=check_dtype, ) elif is_interval_dtype(left) or is_interval_dtype(left): - # must cast to interval dtype to keep mypy happy left_array = cast(IntervalArray, left.array) right_array = cast(IntervalArray, right.array) assert_interval_array_equal(left_array, right_array) From a735027bc208f5c92c0eb15f62320d1f7b651ed1 Mon Sep 17 00:00:00 2001 From: Samuel Date: Sun, 17 Nov 2019 20:21:25 +0000 Subject: [PATCH 10/14] fix incorrect condition --- pandas/util/testing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 3c8755a0cc831..b3e84e1757079 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1199,7 +1199,7 @@ def assert_series_equal( right._internal_get_values(), check_dtype=check_dtype, ) - elif is_interval_dtype(left) or is_interval_dtype(left): + elif is_interval_dtype(left) or is_interval_dtype(right): left_array = cast(IntervalArray, left.array) right_array = cast(IntervalArray, right.array) assert_interval_array_equal(left_array, right_array) From 6d38c1bf039196ccc4587a35c363bb0f254fb06d Mon Sep 17 00:00:00 2001 From: Samuel Date: Sun, 17 Nov 2019 23:40:59 +0000 Subject: [PATCH 11/14] black --- pandas/core/algorithms.py | 2 +- pandas/core/frame.py | 2 +- pandas/core/generic.py | 2 +- pandas/core/groupby/grouper.py | 6 +++++- pandas/core/indexes/base.py | 2 +- pandas/core/indexing.py | 6 +++--- pandas/core/internals/managers.py | 2 +- pandas/io/common.py | 4 ++-- pandas/io/parsers.py | 14 ++++++++++++-- pandas/io/stata.py | 2 +- pandas/tests/arrays/sparse/test_array.py | 12 ++++++++---- pandas/tests/dtypes/test_inference.py | 2 +- pandas/tests/frame/test_constructors.py | 6 +++--- pandas/tests/indexes/period/test_construction.py | 2 +- pandas/tests/indexing/multiindex/test_getitem.py | 2 +- pandas/tests/indexing/multiindex/test_xs.py | 2 +- pandas/tests/indexing/test_callable.py | 12 +++++++++--- pandas/tests/io/parser/test_index_col.py | 4 ++-- pandas/tests/reductions/test_reductions.py | 4 ++-- pandas/tests/test_algos.py | 10 +++++----- pandas/tests/test_nanops.py | 2 +- 21 files changed, 62 insertions(+), 38 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index c70e623778315..cc6c23929d49c 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1155,7 +1155,7 @@ def compute(self, method): n = min(n, narr) kth_val = algos.kth_smallest(arr.copy(), n - 1) - ns, = np.nonzero(arr <= kth_val) + (ns,) = np.nonzero(arr <= kth_val) inds = ns[arr[ns].argsort(kind="mergesort")] if self.keep != "all": diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 40efc4c65476a..7d8cc0b731017 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4829,7 +4829,7 @@ def drop_duplicates(self, subset=None, keep="first", inplace=False): duplicated = self.duplicated(subset, keep=keep) if inplace: - inds, = (-duplicated)._ndarray_values.nonzero() + (inds,) = (-duplicated)._ndarray_values.nonzero() new_data = self._data.take(inds) self._update_inplace(new_data) else: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f88c26c7bc782..fa43206b86ec4 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3613,7 +3613,7 @@ class animal locomotion if isinstance(loc, np.ndarray): if loc.dtype == np.bool_: - inds, = loc.nonzero() + (inds,) = loc.nonzero() return self.take(inds, axis=axis) else: return self.take(loc, axis=axis) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index d7eaaca5ac83a..d6beefbf31db7 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -284,7 +284,11 @@ def __init__( if self.name is None: self.name = index.names[level] - self.grouper, self._labels, self._group_index = index._get_grouper_for_level( # noqa: E501 + ( + self.grouper, + self._labels, + self._group_index, + ) = index._get_grouper_for_level( # noqa: E501 self.grouper, level ) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 187c7e2f3a7f7..954607a0dc6de 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1873,7 +1873,7 @@ def _isnan(self): @cache_readonly def _nan_idxs(self): if self._can_hold_na: - w, = self._isnan.nonzero() + (w,) = self._isnan.nonzero() return w else: return np.array([], dtype=np.int64) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 44c786f003369..1315e9d5b1c3f 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -320,7 +320,7 @@ def _setitem_with_indexer(self, indexer, value): # if there is only one block/type, still have to take split path # unless the block is one-dimensional or it can hold the value if not take_split_path and self.obj._data.blocks: - blk, = self.obj._data.blocks + (blk,) = self.obj._data.blocks if 1 < blk.ndim: # in case of dict, keys are indices val = list(value.values()) if isinstance(value, dict) else value take_split_path = not blk._can_hold_element(val) @@ -1120,7 +1120,7 @@ def _getitem_iterable(self, key, axis: int): if com.is_bool_indexer(key): # A boolean indexer key = check_bool_indexer(labels, key) - inds, = key.nonzero() + (inds,) = key.nonzero() return self.obj.take(inds, axis=axis) else: # A collection of keys @@ -1264,7 +1264,7 @@ def _convert_to_indexer(self, obj, axis: int, raise_missing: bool = False): if com.is_bool_indexer(obj): obj = check_bool_indexer(labels, obj) - inds, = obj.nonzero() + (inds,) = obj.nonzero() return inds else: # When setting, missing keys are not allowed, even with .loc: diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index c47aaf7c773c4..db782b4550907 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1868,7 +1868,7 @@ def _shape_compat(x): def _interleaved_dtype( - blocks: List[Block] + blocks: List[Block], ) -> Optional[Union[np.dtype, ExtensionDtype]]: """Find the common dtype for `blocks`. diff --git a/pandas/io/common.py b/pandas/io/common.py index 0bef14e4999c7..e08fd37e65ad9 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -109,7 +109,7 @@ def _is_url(url) -> bool: def _expand_user( - filepath_or_buffer: FilePathOrBuffer[AnyStr] + filepath_or_buffer: FilePathOrBuffer[AnyStr], ) -> FilePathOrBuffer[AnyStr]: """Return the argument with an initial component of ~ or ~user replaced by that user's home directory. @@ -139,7 +139,7 @@ def _validate_header_arg(header) -> None: def _stringify_path( - filepath_or_buffer: FilePathOrBuffer[AnyStr] + filepath_or_buffer: FilePathOrBuffer[AnyStr], ) -> FilePathOrBuffer[AnyStr]: """Attempt to convert a path-like object to a string. diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 058d65b9464ae..4ff988acfe4da 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1918,7 +1918,12 @@ def __init__(self, src, **kwds): else: if len(self._reader.header) > 1: # we have a multi index in the columns - self.names, self.index_names, self.col_names, passed_names = self._extract_multi_indexer_columns( # noqa: E501 + ( + self.names, + self.index_names, + self.col_names, + passed_names, + ) = self._extract_multi_indexer_columns( # noqa: E501 self._reader.header, self.index_names, self.col_names, passed_names ) else: @@ -2307,7 +2312,12 @@ def __init__(self, f, **kwds): # The original set is stored in self.original_columns. if len(self.columns) > 1: # we are processing a multi index column - self.columns, self.index_names, self.col_names, _ = self._extract_multi_indexer_columns( # noqa: E501 + ( + self.columns, + self.index_names, + self.col_names, + _, + ) = self._extract_multi_indexer_columns( # noqa: E501 self.columns, self.index_names, self.col_names ) # Update list of original names to include all indices. diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 07475f224bd5f..8e5fa48d460e8 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -614,7 +614,7 @@ def _cast_to_stata_types(data): data[col] = data[col].astype(np.int32) else: data[col] = data[col].astype(np.float64) - if data[col].max() >= 2 ** 53 or data[col].min() <= -2 ** 53: + if data[col].max() >= 2 ** 53 or data[col].min() <= -(2 ** 53): ws = precision_loss_doc % ("int64", "float64") elif dtype in (np.float32, np.float64): value = data[col].max() diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index f9bb4981df7df..755cbfb716fcd 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -658,12 +658,16 @@ def test_getslice_tuple(self): dense = np.array([np.nan, 0, 3, 4, 0, 5, np.nan, np.nan, 0]) sparse = SparseArray(dense) - res = sparse[4:,] # noqa: E231 + res = sparse[ + 4:, + ] # noqa: E231 exp = SparseArray(dense[4:,]) # noqa: E231 tm.assert_sp_array_equal(res, exp) sparse = SparseArray(dense, fill_value=0) - res = sparse[4:,] # noqa: E231 + res = sparse[ + 4:, + ] # noqa: E231 exp = SparseArray(dense[4:,], fill_value=0) # noqa: E231 tm.assert_sp_array_equal(res, exp) @@ -823,11 +827,11 @@ def test_nonzero(self): # Tests regression #21172. sa = pd.SparseArray([float("nan"), float("nan"), 1, 0, 0, 2, 0, 0, 0, 3, 0, 0]) expected = np.array([2, 5, 9], dtype=np.int32) - result, = sa.nonzero() + (result,) = sa.nonzero() tm.assert_numpy_array_equal(expected, result) sa = pd.SparseArray([0, 0, 1, 0, 0, 2, 0, 0, 0, 3, 0, 0]) - result, = sa.nonzero() + (result,) = sa.nonzero() tm.assert_numpy_array_equal(expected, result) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 62fb118f719e3..7d5b1891cbd32 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -505,7 +505,7 @@ def test_convert_numeric_int64_uint64(self, case, coerce): result = lib.maybe_convert_numeric(case, set(), coerce_numeric=coerce) tm.assert_almost_equal(result, expected) - @pytest.mark.parametrize("value", [-2 ** 63 - 1, 2 ** 64]) + @pytest.mark.parametrize("value", [-(2 ** 63) - 1, 2 ** 64]) def test_convert_int_overflow(self, value): # see gh-18584 arr = np.array([value], dtype=object) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index aa00cf234d9ee..f193f97aedb22 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -245,9 +245,9 @@ def test_constructor_overflow_int64(self): np.array([2 ** 64], dtype=object), np.array([2 ** 65]), [2 ** 64 + 1], - np.array([-2 ** 63 - 4], dtype=object), - np.array([-2 ** 64 - 1]), - [-2 ** 65 - 2], + np.array([-(2 ** 63) - 4], dtype=object), + np.array([-(2 ** 64) - 1]), + [-(2 ** 65) - 2], ], ) def test_constructor_int_overflow(self, values): diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index 8c75fbbae7de3..1973cb7f4740d 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -434,7 +434,7 @@ def test_constructor_range_based_deprecated_different_freq(self): with tm.assert_produces_warning(FutureWarning) as m: PeriodIndex(start="2000", periods=2) - warning, = m + (warning,) = m assert 'freq="A-DEC"' in str(warning.message) def test_constructor(self): diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index 4f95e6bd28989..519a1eb5b16d8 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -108,7 +108,7 @@ def test_series_getitem_indexing_errors( def test_series_getitem_corner_generator( - multiindex_year_month_day_dataframe_random_data + multiindex_year_month_day_dataframe_random_data, ): s = multiindex_year_month_day_dataframe_random_data["A"] result = s[(x > 0 for x in s)] diff --git a/pandas/tests/indexing/multiindex/test_xs.py b/pandas/tests/indexing/multiindex/test_xs.py index 99f343c2f4a7d..40483ffec4992 100644 --- a/pandas/tests/indexing/multiindex/test_xs.py +++ b/pandas/tests/indexing/multiindex/test_xs.py @@ -211,7 +211,7 @@ def test_xs_level_series_ymd(multiindex_year_month_day_dataframe_random_data): def test_xs_level_series_slice_not_implemented( - multiindex_year_month_day_dataframe_random_data + multiindex_year_month_day_dataframe_random_data, ): # this test is not explicitly testing .xs functionality # TODO: move to another module or refactor diff --git a/pandas/tests/indexing/test_callable.py b/pandas/tests/indexing/test_callable.py index aa73bd728595f..81dedfdc74409 100644 --- a/pandas/tests/indexing/test_callable.py +++ b/pandas/tests/indexing/test_callable.py @@ -17,10 +17,14 @@ def test_frame_loc_callable(self): res = df.loc[lambda x: x.A > 2] tm.assert_frame_equal(res, df.loc[df.A > 2]) - res = df.loc[lambda x: x.A > 2,] # noqa: E231 + res = df.loc[ + lambda x: x.A > 2, + ] # noqa: E231 tm.assert_frame_equal(res, df.loc[df.A > 2,]) # noqa: E231 - res = df.loc[lambda x: x.A > 2,] # noqa: E231 + res = df.loc[ + lambda x: x.A > 2, + ] # noqa: E231 tm.assert_frame_equal(res, df.loc[df.A > 2,]) # noqa: E231 res = df.loc[lambda x: x.B == "b", :] @@ -90,7 +94,9 @@ def test_frame_loc_callable_labels(self): res = df.loc[lambda x: ["A", "C"]] tm.assert_frame_equal(res, df.loc[["A", "C"]]) - res = df.loc[lambda x: ["A", "C"],] # noqa: E231 + res = df.loc[ + lambda x: ["A", "C"], + ] # noqa: E231 tm.assert_frame_equal(res, df.loc[["A", "C"],]) # noqa: E231 res = df.loc[lambda x: ["A", "C"], :] diff --git a/pandas/tests/io/parser/test_index_col.py b/pandas/tests/io/parser/test_index_col.py index 4dfb8d3bd2dc8..8122559997cce 100644 --- a/pandas/tests/io/parser/test_index_col.py +++ b/pandas/tests/io/parser/test_index_col.py @@ -22,8 +22,8 @@ def test_index_col_named(all_parsers, with_header): KORD5,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000 KORD6,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000""" # noqa header = ( - "ID,date,NominalTime,ActualTime,TDew,TAir,Windspeed,Precip,WindDir\n" - ) # noqa + "ID,date,NominalTime,ActualTime,TDew,TAir,Windspeed,Precip,WindDir\n" # noqa + ) if with_header: data = header + no_header diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 05ebff4387908..9bd6fb41cf366 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -179,8 +179,8 @@ class TestIndexReductions: [ (0, 400, 3), (500, 0, -6), - (-10 ** 6, 10 ** 6, 4), - (10 ** 6, -10 ** 6, -4), + (-(10 ** 6), 10 ** 6, 4), + (10 ** 6, -(10 ** 6), -4), (0, 10, 20), ], ) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 9dd88fd5dd25b..885428e5146b2 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -216,10 +216,10 @@ def test_uint64_factorize(self, writable): tm.assert_numpy_array_equal(uniques, exp_uniques) def test_int64_factorize(self, writable): - data = np.array([2 ** 63 - 1, -2 ** 63, 2 ** 63 - 1], dtype=np.int64) + data = np.array([2 ** 63 - 1, -(2 ** 63), 2 ** 63 - 1], dtype=np.int64) data.setflags(write=writable) exp_labels = np.array([0, 1, 0], dtype=np.intp) - exp_uniques = np.array([2 ** 63 - 1, -2 ** 63], dtype=np.int64) + exp_uniques = np.array([2 ** 63 - 1, -(2 ** 63)], dtype=np.int64) labels, uniques = algos.factorize(data) tm.assert_numpy_array_equal(labels, exp_labels) @@ -258,7 +258,7 @@ def test_deprecate_order(self): "data", [ np.array([0, 1, 0], dtype="u8"), - np.array([-2 ** 63, 1, -2 ** 63], dtype="i8"), + np.array([-(2 ** 63), 1, -(2 ** 63)], dtype="i8"), np.array(["__nan__", "foo", "__nan__"], dtype="object"), ], ) @@ -275,8 +275,8 @@ def test_parametrized_factorize_na_value_default(self, data): [ (np.array([0, 1, 0, 2], dtype="u8"), 0), (np.array([1, 0, 1, 2], dtype="u8"), 1), - (np.array([-2 ** 63, 1, -2 ** 63, 0], dtype="i8"), -2 ** 63), - (np.array([1, -2 ** 63, 1, 0], dtype="i8"), 1), + (np.array([-(2 ** 63), 1, -(2 ** 63), 0], dtype="i8"), -(2 ** 63)), + (np.array([1, -(2 ** 63), 1, 0], dtype="i8"), 1), (np.array(["a", "", "a", "b"], dtype=object), "a"), (np.array([(), ("a", 1), (), ("a", 2)], dtype=object), ()), (np.array([("a", 1), (), ("a", 1), ("a", 2)], dtype=object), ("a", 1)), diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 49d1777df0751..e6cff5c266bc6 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -302,7 +302,7 @@ def test_nanmean_overflow(self): # In the previous implementation mean can overflow for int dtypes, it # is now consistent with numpy - for a in [2 ** 55, -2 ** 55, 20150515061816532]: + for a in [2 ** 55, -(2 ** 55), 20150515061816532]: s = Series(a, index=range(500), dtype=np.int64) result = s.mean() np_result = s.values.mean() From e3b63c66805698c707d695b52d043d760546efd7 Mon Sep 17 00:00:00 2001 From: Samuel Date: Mon, 18 Nov 2019 06:19:04 +0000 Subject: [PATCH 12/14] Revert "black" This reverts commit 6d38c1bf039196ccc4587a35c363bb0f254fb06d. --- pandas/core/algorithms.py | 2 +- pandas/core/frame.py | 2 +- pandas/core/generic.py | 2 +- pandas/core/groupby/grouper.py | 6 +----- pandas/core/indexes/base.py | 2 +- pandas/core/indexing.py | 6 +++--- pandas/core/internals/managers.py | 2 +- pandas/io/common.py | 4 ++-- pandas/io/parsers.py | 14 ++------------ pandas/io/stata.py | 2 +- pandas/tests/arrays/sparse/test_array.py | 12 ++++-------- pandas/tests/dtypes/test_inference.py | 2 +- pandas/tests/frame/test_constructors.py | 6 +++--- pandas/tests/indexes/period/test_construction.py | 2 +- pandas/tests/indexing/multiindex/test_getitem.py | 2 +- pandas/tests/indexing/multiindex/test_xs.py | 2 +- pandas/tests/indexing/test_callable.py | 12 +++--------- pandas/tests/io/parser/test_index_col.py | 4 ++-- pandas/tests/reductions/test_reductions.py | 4 ++-- pandas/tests/test_algos.py | 10 +++++----- pandas/tests/test_nanops.py | 2 +- 21 files changed, 38 insertions(+), 62 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index cc6c23929d49c..c70e623778315 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1155,7 +1155,7 @@ def compute(self, method): n = min(n, narr) kth_val = algos.kth_smallest(arr.copy(), n - 1) - (ns,) = np.nonzero(arr <= kth_val) + ns, = np.nonzero(arr <= kth_val) inds = ns[arr[ns].argsort(kind="mergesort")] if self.keep != "all": diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7d8cc0b731017..40efc4c65476a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4829,7 +4829,7 @@ def drop_duplicates(self, subset=None, keep="first", inplace=False): duplicated = self.duplicated(subset, keep=keep) if inplace: - (inds,) = (-duplicated)._ndarray_values.nonzero() + inds, = (-duplicated)._ndarray_values.nonzero() new_data = self._data.take(inds) self._update_inplace(new_data) else: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index fa43206b86ec4..f88c26c7bc782 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3613,7 +3613,7 @@ class animal locomotion if isinstance(loc, np.ndarray): if loc.dtype == np.bool_: - (inds,) = loc.nonzero() + inds, = loc.nonzero() return self.take(inds, axis=axis) else: return self.take(loc, axis=axis) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index d6beefbf31db7..d7eaaca5ac83a 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -284,11 +284,7 @@ def __init__( if self.name is None: self.name = index.names[level] - ( - self.grouper, - self._labels, - self._group_index, - ) = index._get_grouper_for_level( # noqa: E501 + self.grouper, self._labels, self._group_index = index._get_grouper_for_level( # noqa: E501 self.grouper, level ) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 954607a0dc6de..187c7e2f3a7f7 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1873,7 +1873,7 @@ def _isnan(self): @cache_readonly def _nan_idxs(self): if self._can_hold_na: - (w,) = self._isnan.nonzero() + w, = self._isnan.nonzero() return w else: return np.array([], dtype=np.int64) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 1315e9d5b1c3f..44c786f003369 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -320,7 +320,7 @@ def _setitem_with_indexer(self, indexer, value): # if there is only one block/type, still have to take split path # unless the block is one-dimensional or it can hold the value if not take_split_path and self.obj._data.blocks: - (blk,) = self.obj._data.blocks + blk, = self.obj._data.blocks if 1 < blk.ndim: # in case of dict, keys are indices val = list(value.values()) if isinstance(value, dict) else value take_split_path = not blk._can_hold_element(val) @@ -1120,7 +1120,7 @@ def _getitem_iterable(self, key, axis: int): if com.is_bool_indexer(key): # A boolean indexer key = check_bool_indexer(labels, key) - (inds,) = key.nonzero() + inds, = key.nonzero() return self.obj.take(inds, axis=axis) else: # A collection of keys @@ -1264,7 +1264,7 @@ def _convert_to_indexer(self, obj, axis: int, raise_missing: bool = False): if com.is_bool_indexer(obj): obj = check_bool_indexer(labels, obj) - (inds,) = obj.nonzero() + inds, = obj.nonzero() return inds else: # When setting, missing keys are not allowed, even with .loc: diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index db782b4550907..c47aaf7c773c4 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1868,7 +1868,7 @@ def _shape_compat(x): def _interleaved_dtype( - blocks: List[Block], + blocks: List[Block] ) -> Optional[Union[np.dtype, ExtensionDtype]]: """Find the common dtype for `blocks`. diff --git a/pandas/io/common.py b/pandas/io/common.py index e08fd37e65ad9..0bef14e4999c7 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -109,7 +109,7 @@ def _is_url(url) -> bool: def _expand_user( - filepath_or_buffer: FilePathOrBuffer[AnyStr], + filepath_or_buffer: FilePathOrBuffer[AnyStr] ) -> FilePathOrBuffer[AnyStr]: """Return the argument with an initial component of ~ or ~user replaced by that user's home directory. @@ -139,7 +139,7 @@ def _validate_header_arg(header) -> None: def _stringify_path( - filepath_or_buffer: FilePathOrBuffer[AnyStr], + filepath_or_buffer: FilePathOrBuffer[AnyStr] ) -> FilePathOrBuffer[AnyStr]: """Attempt to convert a path-like object to a string. diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 4ff988acfe4da..058d65b9464ae 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1918,12 +1918,7 @@ def __init__(self, src, **kwds): else: if len(self._reader.header) > 1: # we have a multi index in the columns - ( - self.names, - self.index_names, - self.col_names, - passed_names, - ) = self._extract_multi_indexer_columns( # noqa: E501 + self.names, self.index_names, self.col_names, passed_names = self._extract_multi_indexer_columns( # noqa: E501 self._reader.header, self.index_names, self.col_names, passed_names ) else: @@ -2312,12 +2307,7 @@ def __init__(self, f, **kwds): # The original set is stored in self.original_columns. if len(self.columns) > 1: # we are processing a multi index column - ( - self.columns, - self.index_names, - self.col_names, - _, - ) = self._extract_multi_indexer_columns( # noqa: E501 + self.columns, self.index_names, self.col_names, _ = self._extract_multi_indexer_columns( # noqa: E501 self.columns, self.index_names, self.col_names ) # Update list of original names to include all indices. diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 8e5fa48d460e8..07475f224bd5f 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -614,7 +614,7 @@ def _cast_to_stata_types(data): data[col] = data[col].astype(np.int32) else: data[col] = data[col].astype(np.float64) - if data[col].max() >= 2 ** 53 or data[col].min() <= -(2 ** 53): + if data[col].max() >= 2 ** 53 or data[col].min() <= -2 ** 53: ws = precision_loss_doc % ("int64", "float64") elif dtype in (np.float32, np.float64): value = data[col].max() diff --git a/pandas/tests/arrays/sparse/test_array.py b/pandas/tests/arrays/sparse/test_array.py index 755cbfb716fcd..f9bb4981df7df 100644 --- a/pandas/tests/arrays/sparse/test_array.py +++ b/pandas/tests/arrays/sparse/test_array.py @@ -658,16 +658,12 @@ def test_getslice_tuple(self): dense = np.array([np.nan, 0, 3, 4, 0, 5, np.nan, np.nan, 0]) sparse = SparseArray(dense) - res = sparse[ - 4:, - ] # noqa: E231 + res = sparse[4:,] # noqa: E231 exp = SparseArray(dense[4:,]) # noqa: E231 tm.assert_sp_array_equal(res, exp) sparse = SparseArray(dense, fill_value=0) - res = sparse[ - 4:, - ] # noqa: E231 + res = sparse[4:,] # noqa: E231 exp = SparseArray(dense[4:,], fill_value=0) # noqa: E231 tm.assert_sp_array_equal(res, exp) @@ -827,11 +823,11 @@ def test_nonzero(self): # Tests regression #21172. sa = pd.SparseArray([float("nan"), float("nan"), 1, 0, 0, 2, 0, 0, 0, 3, 0, 0]) expected = np.array([2, 5, 9], dtype=np.int32) - (result,) = sa.nonzero() + result, = sa.nonzero() tm.assert_numpy_array_equal(expected, result) sa = pd.SparseArray([0, 0, 1, 0, 0, 2, 0, 0, 0, 3, 0, 0]) - (result,) = sa.nonzero() + result, = sa.nonzero() tm.assert_numpy_array_equal(expected, result) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 7d5b1891cbd32..62fb118f719e3 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -505,7 +505,7 @@ def test_convert_numeric_int64_uint64(self, case, coerce): result = lib.maybe_convert_numeric(case, set(), coerce_numeric=coerce) tm.assert_almost_equal(result, expected) - @pytest.mark.parametrize("value", [-(2 ** 63) - 1, 2 ** 64]) + @pytest.mark.parametrize("value", [-2 ** 63 - 1, 2 ** 64]) def test_convert_int_overflow(self, value): # see gh-18584 arr = np.array([value], dtype=object) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index f193f97aedb22..aa00cf234d9ee 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -245,9 +245,9 @@ def test_constructor_overflow_int64(self): np.array([2 ** 64], dtype=object), np.array([2 ** 65]), [2 ** 64 + 1], - np.array([-(2 ** 63) - 4], dtype=object), - np.array([-(2 ** 64) - 1]), - [-(2 ** 65) - 2], + np.array([-2 ** 63 - 4], dtype=object), + np.array([-2 ** 64 - 1]), + [-2 ** 65 - 2], ], ) def test_constructor_int_overflow(self, values): diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index 1973cb7f4740d..8c75fbbae7de3 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -434,7 +434,7 @@ def test_constructor_range_based_deprecated_different_freq(self): with tm.assert_produces_warning(FutureWarning) as m: PeriodIndex(start="2000", periods=2) - (warning,) = m + warning, = m assert 'freq="A-DEC"' in str(warning.message) def test_constructor(self): diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index 519a1eb5b16d8..4f95e6bd28989 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -108,7 +108,7 @@ def test_series_getitem_indexing_errors( def test_series_getitem_corner_generator( - multiindex_year_month_day_dataframe_random_data, + multiindex_year_month_day_dataframe_random_data ): s = multiindex_year_month_day_dataframe_random_data["A"] result = s[(x > 0 for x in s)] diff --git a/pandas/tests/indexing/multiindex/test_xs.py b/pandas/tests/indexing/multiindex/test_xs.py index 40483ffec4992..99f343c2f4a7d 100644 --- a/pandas/tests/indexing/multiindex/test_xs.py +++ b/pandas/tests/indexing/multiindex/test_xs.py @@ -211,7 +211,7 @@ def test_xs_level_series_ymd(multiindex_year_month_day_dataframe_random_data): def test_xs_level_series_slice_not_implemented( - multiindex_year_month_day_dataframe_random_data, + multiindex_year_month_day_dataframe_random_data ): # this test is not explicitly testing .xs functionality # TODO: move to another module or refactor diff --git a/pandas/tests/indexing/test_callable.py b/pandas/tests/indexing/test_callable.py index 81dedfdc74409..aa73bd728595f 100644 --- a/pandas/tests/indexing/test_callable.py +++ b/pandas/tests/indexing/test_callable.py @@ -17,14 +17,10 @@ def test_frame_loc_callable(self): res = df.loc[lambda x: x.A > 2] tm.assert_frame_equal(res, df.loc[df.A > 2]) - res = df.loc[ - lambda x: x.A > 2, - ] # noqa: E231 + res = df.loc[lambda x: x.A > 2,] # noqa: E231 tm.assert_frame_equal(res, df.loc[df.A > 2,]) # noqa: E231 - res = df.loc[ - lambda x: x.A > 2, - ] # noqa: E231 + res = df.loc[lambda x: x.A > 2,] # noqa: E231 tm.assert_frame_equal(res, df.loc[df.A > 2,]) # noqa: E231 res = df.loc[lambda x: x.B == "b", :] @@ -94,9 +90,7 @@ def test_frame_loc_callable_labels(self): res = df.loc[lambda x: ["A", "C"]] tm.assert_frame_equal(res, df.loc[["A", "C"]]) - res = df.loc[ - lambda x: ["A", "C"], - ] # noqa: E231 + res = df.loc[lambda x: ["A", "C"],] # noqa: E231 tm.assert_frame_equal(res, df.loc[["A", "C"],]) # noqa: E231 res = df.loc[lambda x: ["A", "C"], :] diff --git a/pandas/tests/io/parser/test_index_col.py b/pandas/tests/io/parser/test_index_col.py index 8122559997cce..4dfb8d3bd2dc8 100644 --- a/pandas/tests/io/parser/test_index_col.py +++ b/pandas/tests/io/parser/test_index_col.py @@ -22,8 +22,8 @@ def test_index_col_named(all_parsers, with_header): KORD5,19990127, 22:00:00, 21:56:00, -0.5900, 1.7100, 5.1000, 0.0000, 290.0000 KORD6,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000""" # noqa header = ( - "ID,date,NominalTime,ActualTime,TDew,TAir,Windspeed,Precip,WindDir\n" # noqa - ) + "ID,date,NominalTime,ActualTime,TDew,TAir,Windspeed,Precip,WindDir\n" + ) # noqa if with_header: data = header + no_header diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 9bd6fb41cf366..05ebff4387908 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -179,8 +179,8 @@ class TestIndexReductions: [ (0, 400, 3), (500, 0, -6), - (-(10 ** 6), 10 ** 6, 4), - (10 ** 6, -(10 ** 6), -4), + (-10 ** 6, 10 ** 6, 4), + (10 ** 6, -10 ** 6, -4), (0, 10, 20), ], ) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 885428e5146b2..9dd88fd5dd25b 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -216,10 +216,10 @@ def test_uint64_factorize(self, writable): tm.assert_numpy_array_equal(uniques, exp_uniques) def test_int64_factorize(self, writable): - data = np.array([2 ** 63 - 1, -(2 ** 63), 2 ** 63 - 1], dtype=np.int64) + data = np.array([2 ** 63 - 1, -2 ** 63, 2 ** 63 - 1], dtype=np.int64) data.setflags(write=writable) exp_labels = np.array([0, 1, 0], dtype=np.intp) - exp_uniques = np.array([2 ** 63 - 1, -(2 ** 63)], dtype=np.int64) + exp_uniques = np.array([2 ** 63 - 1, -2 ** 63], dtype=np.int64) labels, uniques = algos.factorize(data) tm.assert_numpy_array_equal(labels, exp_labels) @@ -258,7 +258,7 @@ def test_deprecate_order(self): "data", [ np.array([0, 1, 0], dtype="u8"), - np.array([-(2 ** 63), 1, -(2 ** 63)], dtype="i8"), + np.array([-2 ** 63, 1, -2 ** 63], dtype="i8"), np.array(["__nan__", "foo", "__nan__"], dtype="object"), ], ) @@ -275,8 +275,8 @@ def test_parametrized_factorize_na_value_default(self, data): [ (np.array([0, 1, 0, 2], dtype="u8"), 0), (np.array([1, 0, 1, 2], dtype="u8"), 1), - (np.array([-(2 ** 63), 1, -(2 ** 63), 0], dtype="i8"), -(2 ** 63)), - (np.array([1, -(2 ** 63), 1, 0], dtype="i8"), 1), + (np.array([-2 ** 63, 1, -2 ** 63, 0], dtype="i8"), -2 ** 63), + (np.array([1, -2 ** 63, 1, 0], dtype="i8"), 1), (np.array(["a", "", "a", "b"], dtype=object), "a"), (np.array([(), ("a", 1), (), ("a", 2)], dtype=object), ()), (np.array([("a", 1), (), ("a", 1), ("a", 2)], dtype=object), ("a", 1)), diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index e6cff5c266bc6..49d1777df0751 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -302,7 +302,7 @@ def test_nanmean_overflow(self): # In the previous implementation mean can overflow for int dtypes, it # is now consistent with numpy - for a in [2 ** 55, -(2 ** 55), 20150515061816532]: + for a in [2 ** 55, -2 ** 55, 20150515061816532]: s = Series(a, index=range(500), dtype=np.int64) result = s.mean() np_result = s.values.mean() From 9222488cc8c3c2ed8abf57a3d195206dd36441b7 Mon Sep 17 00:00:00 2001 From: Samuel Date: Mon, 18 Nov 2019 06:20:22 +0000 Subject: [PATCH 13/14] black --- pandas/util/testing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index a4c4bcf972b57..1278c90a8e5b8 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1419,7 +1419,7 @@ def assert_frame_equal( def assert_equal( left: Union[DataFrame, AnyArrayLike], right: Union[DataFrame, AnyArrayLike], - **kwargs + **kwargs, ) -> None: """ Wrapper for tm.assert_*_equal to dispatch to the appropriate test function. From 4a19f0aadd38124dee6acdf3d1148149658652c5 Mon Sep 17 00:00:00 2001 From: Samuel Date: Tue, 7 Jan 2020 05:58:55 +0000 Subject: [PATCH 14/14] comment out changes in deprecated testing.py file --- pandas/util/testing.py | 3637 ++++++++++------------------------------ 1 file changed, 848 insertions(+), 2789 deletions(-) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 550efa90a4f53..af7330bda4d4e 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -11,2793 +11,852 @@ stacklevel=2, ) -import pandas._libs.testing as _testing -from pandas.compat import _get_lzma_file, _import_lzma - -from pandas.core.dtypes.common import ( - is_bool, - is_categorical_dtype, - is_datetime64_dtype, - is_datetime64tz_dtype, - is_extension_array_dtype, - is_interval_dtype, - is_list_like, - is_number, - is_period_dtype, - is_sequence, - is_timedelta64_dtype, - needs_i8_conversion, -) -from pandas.core.dtypes.missing import array_equivalent - -import pandas as pd -from pandas import ( - Categorical, - CategoricalIndex, - DataFrame, - DatetimeIndex, - Index, - IntervalIndex, - MultiIndex, - RangeIndex, - Series, - bdate_range, -) -from pandas._typing import AnyArrayLike -from pandas.core.algorithms import take_1d -from pandas.core.arrays import ( - DatetimeArray, - ExtensionArray, - IntervalArray, - PeriodArray, - TimedeltaArray, - period_array, -) - -from pandas.io.common import urlopen -from pandas.io.formats.printing import pprint_thing - -lzma = _import_lzma() - -N = 30 -K = 4 -_RAISE_NETWORK_ERROR_DEFAULT = False - -# set testing_mode -_testing_mode_warnings = (DeprecationWarning, ResourceWarning) - - -def set_testing_mode(): - # set the testing mode filters - testing_mode = os.environ.get("PANDAS_TESTING_MODE", "None") - if "deprecate" in testing_mode: - warnings.simplefilter("always", _testing_mode_warnings) - - -def reset_testing_mode(): - # reset the testing mode filters - testing_mode = os.environ.get("PANDAS_TESTING_MODE", "None") - if "deprecate" in testing_mode: - warnings.simplefilter("ignore", _testing_mode_warnings) - - -set_testing_mode() - - -def reset_display_options(): - """ - Reset the display options for printing and representing objects. - """ - - pd.reset_option("^display.", silent=True) - - -def round_trip_pickle(obj, path=None): - """ - Pickle an object and then read it again. - - Parameters - ---------- - obj : pandas object - The object to pickle and then re-read. - path : str, default None - The path where the pickled object is written and then read. - - Returns - ------- - round_trip_pickled_object : pandas object - The original object that was pickled and then re-read. - """ - - if path is None: - path = "__{random_bytes}__.pickle".format(random_bytes=rands(10)) - with ensure_clean(path) as path: - pd.to_pickle(obj, path) - return pd.read_pickle(path) - - -def round_trip_pathlib(writer, reader, path=None): - """ - Write an object to file specified by a pathlib.Path and read it back - - Parameters - ---------- - writer : callable bound to pandas object - IO writing function (e.g. DataFrame.to_csv ) - reader : callable - IO reading function (e.g. pd.read_csv ) - path : str, default None - The path where the object is written and then read. - - Returns - ------- - round_trip_object : pandas object - The original object that was serialized and then re-read. - """ - - import pytest - - Path = pytest.importorskip("pathlib").Path - if path is None: - path = "___pathlib___" - with ensure_clean(path) as path: - writer(Path(path)) - obj = reader(Path(path)) - return obj - - -def round_trip_localpath(writer, reader, path=None): - """ - Write an object to file specified by a py.path LocalPath and read it back - - Parameters - ---------- - writer : callable bound to pandas object - IO writing function (e.g. DataFrame.to_csv ) - reader : callable - IO reading function (e.g. pd.read_csv ) - path : str, default None - The path where the object is written and then read. - - Returns - ------- - round_trip_object : pandas object - The original object that was serialized and then re-read. - """ - import pytest - - LocalPath = pytest.importorskip("py.path").local - if path is None: - path = "___localpath___" - with ensure_clean(path) as path: - writer(LocalPath(path)) - obj = reader(LocalPath(path)) - return obj - - -@contextmanager -def decompress_file(path, compression): - """ - Open a compressed file and return a file object - - Parameters - ---------- - path : str - The path where the file is read from - - compression : {'gzip', 'bz2', 'zip', 'xz', None} - Name of the decompression to use - - Returns - ------- - f : file object - """ - - if compression is None: - f = open(path, "rb") - elif compression == "gzip": - f = gzip.open(path, "rb") - elif compression == "bz2": - f = bz2.BZ2File(path, "rb") - elif compression == "xz": - f = _get_lzma_file(lzma)(path, "rb") - elif compression == "zip": - zip_file = zipfile.ZipFile(path) - zip_names = zip_file.namelist() - if len(zip_names) == 1: - f = zip_file.open(zip_names.pop()) - else: - raise ValueError("ZIP file {} error. Only one file per ZIP.".format(path)) - else: - msg = "Unrecognized compression type: {}".format(compression) - raise ValueError(msg) - - try: - yield f - finally: - f.close() - if compression == "zip": - zip_file.close() - - -def write_to_compressed(compression, path, data, dest="test"): - """ - Write data to a compressed file. - - Parameters - ---------- - compression : {'gzip', 'bz2', 'zip', 'xz'} - The compression type to use. - path : str - The file path to write the data. - data : str - The data to write. - dest : str, default "test" - The destination file (for ZIP only) - - Raises - ------ - ValueError : An invalid compression value was passed in. - """ - - if compression == "zip": - import zipfile - - compress_method = zipfile.ZipFile - elif compression == "gzip": - import gzip - - compress_method = gzip.GzipFile - elif compression == "bz2": - import bz2 - - compress_method = bz2.BZ2File - elif compression == "xz": - compress_method = _get_lzma_file(lzma) - else: - msg = "Unrecognized compression type: {}".format(compression) - raise ValueError(msg) - - if compression == "zip": - mode = "w" - args = (dest, data) - method = "writestr" - else: - mode = "wb" - args = (data,) - method = "write" - - with compress_method(path, mode=mode) as f: - getattr(f, method)(*args) - - -def assert_almost_equal( - left, right, check_dtype="equiv", check_less_precise=False, **kwargs -): - """ - Check that the left and right objects are approximately equal. - - By approximately equal, we refer to objects that are numbers or that - contain numbers which may be equivalent to specific levels of precision. - - Parameters - ---------- - left : object - right : object - check_dtype : bool or {'equiv'}, default 'equiv' - Check dtype if both a and b are the same type. If 'equiv' is passed in, - then `RangeIndex` and `Int64Index` are also considered equivalent - when doing type checking. - check_less_precise : bool or int, default False - Specify comparison precision. 5 digits (False) or 3 digits (True) - after decimal points are compared. If int, then specify the number - of digits to compare. - - When comparing two numbers, if the first number has magnitude less - than 1e-5, we compare the two numbers directly and check whether - they are equivalent within the specified precision. Otherwise, we - compare the **ratio** of the second number to the first number and - check whether it is equivalent to 1 within the specified precision. - """ - - if isinstance(left, pd.Index): - assert_index_equal( - left, - right, - check_exact=False, - exact=check_dtype, - check_less_precise=check_less_precise, - **kwargs, - ) - - elif isinstance(left, pd.Series): - assert_series_equal( - left, - right, - check_exact=False, - check_dtype=check_dtype, - check_less_precise=check_less_precise, - **kwargs, - ) - - elif isinstance(left, pd.DataFrame): - assert_frame_equal( - left, - right, - check_exact=False, - check_dtype=check_dtype, - check_less_precise=check_less_precise, - **kwargs, - ) - - else: - # Other sequences. - if check_dtype: - if is_number(left) and is_number(right): - # Do not compare numeric classes, like np.float64 and float. - pass - elif is_bool(left) and is_bool(right): - # Do not compare bool classes, like np.bool_ and bool. - pass - else: - if isinstance(left, np.ndarray) or isinstance(right, np.ndarray): - obj = "numpy array" - else: - obj = "Input" - assert_class_equal(left, right, obj=obj) - _testing.assert_almost_equal( - left, - right, - check_dtype=check_dtype, - check_less_precise=check_less_precise, - **kwargs, - ) - - -def _check_isinstance(left, right, cls): - """ - Helper method for our assert_* methods that ensures that - the two objects being compared have the right type before - proceeding with the comparison. - - Parameters - ---------- - left : The first object being compared. - right : The second object being compared. - cls : The class type to check against. - - Raises - ------ - AssertionError : Either `left` or `right` is not an instance of `cls`. - """ - - err_msg = "{name} Expected type {exp_type}, found {act_type} instead" - cls_name = cls.__name__ - - if not isinstance(left, cls): - raise AssertionError( - err_msg.format(name=cls_name, exp_type=cls, act_type=type(left)) - ) - if not isinstance(right, cls): - raise AssertionError( - err_msg.format(name=cls_name, exp_type=cls, act_type=type(right)) - ) - - -def assert_dict_equal(left, right, compare_keys=True): - - _check_isinstance(left, right, dict) - _testing.assert_dict_equal(left, right, compare_keys=compare_keys) - - -def randbool(size=(), p=0.5): - return rand(*size) <= p - - -RANDS_CHARS = np.array(list(string.ascii_letters + string.digits), dtype=(np.str_, 1)) -RANDU_CHARS = np.array( - list("".join(map(chr, range(1488, 1488 + 26))) + string.digits), - dtype=(np.unicode_, 1), -) - - -def rands_array(nchars, size, dtype="O"): - """Generate an array of byte strings.""" - retval = ( - np.random.choice(RANDS_CHARS, size=nchars * np.prod(size)) - .view((np.str_, nchars)) - .reshape(size) - ) - if dtype is None: - return retval - else: - return retval.astype(dtype) - - -def randu_array(nchars, size, dtype="O"): - """Generate an array of unicode strings.""" - retval = ( - np.random.choice(RANDU_CHARS, size=nchars * np.prod(size)) - .view((np.unicode_, nchars)) - .reshape(size) - ) - if dtype is None: - return retval - else: - return retval.astype(dtype) - - -def rands(nchars): - """ - Generate one random byte string. - - See `rands_array` if you want to create an array of random strings. - - """ - return "".join(np.random.choice(RANDS_CHARS, nchars)) - - -def randu(nchars): - """ - Generate one random unicode string. - - See `randu_array` if you want to create an array of random unicode strings. - - """ - return "".join(np.random.choice(RANDU_CHARS, nchars)) - - -def close(fignum=None): - from matplotlib.pyplot import get_fignums, close as _close - - if fignum is None: - for fignum in get_fignums(): - _close(fignum) - else: - _close(fignum) - - -# ----------------------------------------------------------------------------- -# contextmanager to ensure the file cleanup - - -@contextmanager -def ensure_clean(filename=None, return_filelike=False): - """Gets a temporary path and agrees to remove on close. - - Parameters - ---------- - filename : str (optional) - if None, creates a temporary file which is then removed when out of - scope. if passed, creates temporary file with filename as ending. - return_filelike : bool (default False) - if True, returns a file-like which is *always* cleaned. Necessary for - savefig and other functions which want to append extensions. - """ - filename = filename or "" - fd = None - - if return_filelike: - f = tempfile.TemporaryFile(suffix=filename) - try: - yield f - finally: - f.close() - else: - # don't generate tempfile if using a path with directory specified - if len(os.path.dirname(filename)): - raise ValueError("Can't pass a qualified name to ensure_clean()") - - try: - fd, filename = tempfile.mkstemp(suffix=filename) - except UnicodeEncodeError: - import pytest - - pytest.skip("no unicode file names on this system") - - try: - yield filename - finally: - try: - os.close(fd) - except OSError: - print( - "Couldn't close file descriptor: {fdesc} (file: {fname})".format( - fdesc=fd, fname=filename - ) - ) - try: - if os.path.exists(filename): - os.remove(filename) - except OSError as e: - print("Exception on removing file: {error}".format(error=e)) - - -@contextmanager -def ensure_clean_dir(): - """ - Get a temporary directory path and agrees to remove on close. - - Yields - ------ - Temporary directory path - """ - directory_name = tempfile.mkdtemp(suffix="") - try: - yield directory_name - finally: - try: - rmtree(directory_name) - except OSError: - pass - - -@contextmanager -def ensure_safe_environment_variables(): - """ - Get a context manager to safely set environment variables - - All changes will be undone on close, hence environment variables set - within this contextmanager will neither persist nor change global state. - """ - saved_environ = dict(os.environ) - try: - yield - finally: - os.environ.clear() - os.environ.update(saved_environ) - - -# ----------------------------------------------------------------------------- -# Comparators - - -def equalContents(arr1, arr2): - """Checks if the set of unique elements of arr1 and arr2 are equivalent. - """ - return frozenset(arr1) == frozenset(arr2) - - -def assert_index_equal( - left: Index, - right: Index, - exact: Union[bool, str] = "equiv", - check_names: bool = True, - check_less_precise: Union[bool, int] = False, - check_exact: bool = True, - check_categorical: bool = True, - obj: str = "Index", -) -> None: - """ - Check that left and right Index are equal. - - Parameters - ---------- - left : Index - right : Index - exact : bool or {'equiv'}, default 'equiv' - Whether to check the Index class, dtype and inferred_type - are identical. If 'equiv', then RangeIndex can be substituted for - Int64Index as well. - check_names : bool, default True - Whether to check the names attribute. - check_less_precise : bool or int, default False - Specify comparison precision. Only used when check_exact is False. - 5 digits (False) or 3 digits (True) after decimal points are compared. - If int, then specify the digits to compare. - check_exact : bool, default True - Whether to compare number exactly. - check_categorical : bool, default True - Whether to compare internal Categorical exactly. - obj : str, default 'Index' - Specify object name being compared, internally used to show appropriate - assertion message. - """ - __tracebackhide__ = True - - def _check_types(l, r, obj="Index"): - if exact: - assert_class_equal(l, r, exact=exact, obj=obj) - - # Skip exact dtype checking when `check_categorical` is False - if check_categorical: - assert_attr_equal("dtype", l, r, obj=obj) - - # allow string-like to have different inferred_types - if l.inferred_type in ("string", "unicode"): - assert r.inferred_type in ("string", "unicode") - else: - assert_attr_equal("inferred_type", l, r, obj=obj) - - def _get_ilevel_values(index, level): - # accept level number only - unique = index.levels[level] - level_codes = index.codes[level] - filled = take_1d(unique.values, level_codes, fill_value=unique._na_value) - values = unique._shallow_copy(filled, name=index.names[level]) - return values - - # instance validation - _check_isinstance(left, right, Index) - - # class / dtype comparison - _check_types(left, right, obj=obj) - - # level comparison - if left.nlevels != right.nlevels: - msg1 = "{obj} levels are different".format(obj=obj) - msg2 = "{nlevels}, {left}".format(nlevels=left.nlevels, left=left) - msg3 = "{nlevels}, {right}".format(nlevels=right.nlevels, right=right) - raise_assert_detail(obj, msg1, msg2, msg3) - - # length comparison - if len(left) != len(right): - msg1 = "{obj} length are different".format(obj=obj) - msg2 = "{length}, {left}".format(length=len(left), left=left) - msg3 = "{length}, {right}".format(length=len(right), right=right) - raise_assert_detail(obj, msg1, msg2, msg3) - - # MultiIndex special comparison for little-friendly error messages - if left.nlevels > 1: - left = cast(MultiIndex, left) - right = cast(MultiIndex, right) - - for level in range(left.nlevels): - # cannot use get_level_values here because it can change dtype - llevel = _get_ilevel_values(left, level) - rlevel = _get_ilevel_values(right, level) - - lobj = "MultiIndex level [{level}]".format(level=level) - assert_index_equal( - llevel, - rlevel, - exact=exact, - check_names=check_names, - check_less_precise=check_less_precise, - check_exact=check_exact, - obj=lobj, - ) - # get_level_values may change dtype - _check_types(left.levels[level], right.levels[level], obj=obj) - - # skip exact index checking when `check_categorical` is False - if check_exact and check_categorical: - if not left.equals(right): - diff = np.sum((left.values != right.values).astype(int)) * 100.0 / len(left) - msg = "{obj} values are different ({pct} %)".format( - obj=obj, pct=np.round(diff, 5) - ) - raise_assert_detail(obj, msg, left, right) - else: - _testing.assert_almost_equal( - left.values, - right.values, - check_less_precise=check_less_precise, - check_dtype=exact, - obj=obj, - lobj=left, - robj=right, - ) - - # metadata comparison - if check_names: - assert_attr_equal("names", left, right, obj=obj) - if isinstance(left, pd.PeriodIndex) or isinstance(right, pd.PeriodIndex): - assert_attr_equal("freq", left, right, obj=obj) - if isinstance(left, pd.IntervalIndex) or isinstance(right, pd.IntervalIndex): - assert_interval_array_equal(left.values, right.values) - - if check_categorical: - if is_categorical_dtype(left) or is_categorical_dtype(right): - assert_categorical_equal( - left.values, right.values, obj="{obj} category".format(obj=obj) - ) - - -def assert_class_equal(left, right, exact=True, obj="Input"): - """checks classes are equal.""" - __tracebackhide__ = True - - def repr_class(x): - if isinstance(x, Index): - # return Index as it is to include values in the error message - return x - - try: - return x.__class__.__name__ - except AttributeError: - return repr(type(x)) - - if exact == "equiv": - if type(left) != type(right): - # allow equivalence of Int64Index/RangeIndex - types = {type(left).__name__, type(right).__name__} - if len(types - {"Int64Index", "RangeIndex"}): - msg = "{obj} classes are not equivalent".format(obj=obj) - raise_assert_detail(obj, msg, repr_class(left), repr_class(right)) - elif exact: - if type(left) != type(right): - msg = "{obj} classes are different".format(obj=obj) - raise_assert_detail(obj, msg, repr_class(left), repr_class(right)) - - -def assert_attr_equal(attr, left, right, obj="Attributes"): - """checks attributes are equal. Both objects must have attribute. - - Parameters - ---------- - attr : str - Attribute name being compared. - left : object - right : object - obj : str, default 'Attributes' - Specify object name being compared, internally used to show appropriate - assertion message - """ - __tracebackhide__ = True - - left_attr = getattr(left, attr) - right_attr = getattr(right, attr) - - if left_attr is right_attr: - return True - elif ( - is_number(left_attr) - and np.isnan(left_attr) - and is_number(right_attr) - and np.isnan(right_attr) - ): - # np.nan - return True - - try: - result = left_attr == right_attr - except TypeError: - # datetimetz on rhs may raise TypeError - result = False - if not isinstance(result, bool): - result = result.all() - - if result: - return True - else: - msg = 'Attribute "{attr}" are different'.format(attr=attr) - raise_assert_detail(obj, msg, left_attr, right_attr) - - -def assert_is_valid_plot_return_object(objs): - import matplotlib.pyplot as plt - - if isinstance(objs, (pd.Series, np.ndarray)): - for el in objs.ravel(): - msg = ( - "one of 'objs' is not a matplotlib Axes instance, type " - "encountered {name!r}" - ).format(name=el.__class__.__name__) - assert isinstance(el, (plt.Axes, dict)), msg - else: - assert isinstance(objs, (plt.Artist, tuple, dict)), ( - "objs is neither an ndarray of Artist instances nor a " - 'single Artist instance, tuple, or dict, "objs" is a {name!r}'.format( - name=objs.__class__.__name__ - ) - ) - - -def isiterable(obj): - return hasattr(obj, "__iter__") - - -def assert_is_sorted(seq): - """Assert that the sequence is sorted.""" - if isinstance(seq, (Index, Series)): - seq = seq.values - # sorting does not change precisions - assert_numpy_array_equal(seq, np.sort(np.array(seq))) - - -def assert_categorical_equal( - left: Categorical, - right: Categorical, - check_dtype: bool = True, - check_category_order: bool = True, - obj: str = "Categorical", -) -> None: - """Test that Categoricals are equivalent. - - Parameters - ---------- - left : Categorical - right : Categorical - check_dtype : bool, default True - Check that integer dtype of the codes are the same - check_category_order : bool, default True - Whether the order of the categories should be compared, which - implies identical integer codes. If False, only the resulting - values are compared. The ordered attribute is - checked regardless. - obj : str, default 'Categorical' - Specify object name being compared, internally used to show appropriate - assertion message - """ - _check_isinstance(left, right, Categorical) - - if check_category_order: - assert_index_equal( - left.categories, right.categories, obj="{obj}.categories".format(obj=obj) - ) - assert_numpy_array_equal( - left.codes, - right.codes, - check_dtype=check_dtype, - obj="{obj}.codes".format(obj=obj), - ) - else: - assert_index_equal( - left.categories.sort_values(), - right.categories.sort_values(), - obj="{obj}.categories".format(obj=obj), - ) - assert_index_equal( - left.categories.take(left.codes), - right.categories.take(right.codes), - obj="{obj}.values".format(obj=obj), - ) - - assert_attr_equal("ordered", left, right, obj=obj) - - -def assert_interval_array_equal( - left: IntervalArray, - right: IntervalArray, - exact: str = "equiv", - obj: str = "IntervalArray", -) -> None: - """Test that two IntervalArrays are equivalent. - - Parameters - ---------- - left, right : IntervalArray - The IntervalArrays to compare. - exact : bool or {'equiv'}, default 'equiv' - Whether to check the Index class, dtype and inferred_type - are identical. If 'equiv', then RangeIndex can be substituted for - Int64Index as well. - obj : str, default 'IntervalArray' - Specify object name being compared, internally used to show appropriate - assertion message - """ - assert_index_equal( - left.left, right.left, exact=exact, obj="{obj}.left".format(obj=obj) - ) - assert_index_equal( - left.right, right.right, exact=exact, obj="{obj}.left".format(obj=obj) - ) - assert_attr_equal("closed", left, right, obj=obj) - - -def assert_period_array_equal( - left: PeriodArray, right: PeriodArray, obj: str = "PeriodArray" -) -> None: - _check_isinstance(left, right, PeriodArray) - - assert_numpy_array_equal( - left._data, right._data, obj="{obj}.values".format(obj=obj) - ) - assert_attr_equal("freq", left, right, obj=obj) - - -def assert_datetime_array_equal( - left: DatetimeArray, right: DatetimeArray, obj: str = "DatetimeArray" -) -> None: - __tracebackhide__ = True - _check_isinstance(left, right, DatetimeArray) - - assert_numpy_array_equal(left._data, right._data, obj="{obj}._data".format(obj=obj)) - assert_attr_equal("freq", left, right, obj=obj) - assert_attr_equal("tz", left, right, obj=obj) - - -def assert_timedelta_array_equal( - left: TimedeltaArray, right: TimedeltaArray, obj: str = "TimedeltaArray" -) -> None: - __tracebackhide__ = True - _check_isinstance(left, right, TimedeltaArray) - assert_numpy_array_equal(left._data, right._data, obj="{obj}._data".format(obj=obj)) - assert_attr_equal("freq", left, right, obj=obj) - - -def raise_assert_detail(obj, message, left, right, diff=None): - __tracebackhide__ = True - - if isinstance(left, np.ndarray): - left = pprint_thing(left) - elif is_categorical_dtype(left): - left = repr(left) - - if isinstance(right, np.ndarray): - right = pprint_thing(right) - elif is_categorical_dtype(right): - right = repr(right) - - msg = """{obj} are different - -{message} -[left]: {left} -[right]: {right}""".format( - obj=obj, message=message, left=left, right=right - ) - - if diff is not None: - msg += "\n[diff]: {diff}".format(diff=diff) - - raise AssertionError(msg) - - -def assert_numpy_array_equal( - left: np.ndarray, - right: np.ndarray, - strict_nan: bool = False, - check_dtype: bool = True, - err_msg: Optional[str] = None, - check_same: Optional[str] = None, - obj: str = "numpy array", -) -> None: - """ Checks that 'np.ndarray' is equivalent - - Parameters - ---------- - left : np.ndarray or iterable - right : np.ndarray or iterable - strict_nan : bool, default False - If True, consider NaN and None to be different. - check_dtype: bool, default True - check dtype if both a and b are np.ndarray - err_msg : str, default None - If provided, used as assertion message - check_same : None|'copy'|'same', default None - Ensure left and right refer/do not refer to the same memory area - obj : str, default 'numpy array' - Specify object name being compared, internally used to show appropriate - assertion message - """ - __tracebackhide__ = True - - # instance validation - # Show a detailed error message when classes are different - assert_class_equal(left, right, obj=obj) - # both classes must be an np.ndarray - _check_isinstance(left, right, np.ndarray) - - def _get_base(obj): - return obj.base if getattr(obj, "base", None) is not None else obj - - left_base = _get_base(left) - right_base = _get_base(right) - - if check_same == "same": - if left_base is not right_base: - msg = "{left!r} is not {right!r}".format(left=left_base, right=right_base) - raise AssertionError(msg) - elif check_same == "copy": - if left_base is right_base: - msg = "{left!r} is {right!r}".format(left=left_base, right=right_base) - raise AssertionError(msg) - - def _raise(left, right, err_msg): - if err_msg is None: - if left.shape != right.shape: - raise_assert_detail( - obj, - "{obj} shapes are different".format(obj=obj), - left.shape, - right.shape, - ) - - diff = 0 - for l, r in zip(left, right): - # count up differences - if not array_equivalent(l, r, strict_nan=strict_nan): - diff += 1 - - diff = diff * 100.0 / left.size - msg = "{obj} values are different ({pct} %)".format( - obj=obj, pct=np.round(diff, 5) - ) - raise_assert_detail(obj, msg, left, right) - - raise AssertionError(err_msg) - - # compare shape and values - if not array_equivalent(left, right, strict_nan=strict_nan): - _raise(left, right, err_msg) - - if check_dtype: - if isinstance(left, np.ndarray) and isinstance(right, np.ndarray): - assert_attr_equal("dtype", left, right, obj=obj) - - -def assert_extension_array_equal( - left, right, check_dtype=True, check_less_precise=False, check_exact=False -): - """Check that left and right ExtensionArrays are equal. - - Parameters - ---------- - left, right : ExtensionArray - The two arrays to compare - check_dtype : bool, default True - Whether to check if the ExtensionArray dtypes are identical. - check_less_precise : bool or int, default False - Specify comparison precision. Only used when check_exact is False. - 5 digits (False) or 3 digits (True) after decimal points are compared. - If int, then specify the digits to compare. - check_exact : bool, default False - Whether to compare number exactly. - - Notes - ----- - Missing values are checked separately from valid values. - A mask of missing values is computed for each and checked to match. - The remaining all-valid values are cast to object dtype and checked. - """ - assert isinstance(left, ExtensionArray), "left is not an ExtensionArray" - assert isinstance(right, ExtensionArray), "right is not an ExtensionArray" - if check_dtype: - assert_attr_equal("dtype", left, right, obj="ExtensionArray") - - if hasattr(left, "asi8") and type(right) == type(left): - # Avoid slow object-dtype comparisons - assert_numpy_array_equal(left.asi8, right.asi8) - return - - left_na = np.asarray(left.isna()) - right_na = np.asarray(right.isna()) - assert_numpy_array_equal(left_na, right_na, obj="ExtensionArray NA mask") - - left_valid = np.asarray(left[~left_na].astype(object)) - right_valid = np.asarray(right[~right_na].astype(object)) - if check_exact: - assert_numpy_array_equal(left_valid, right_valid, obj="ExtensionArray") - else: - _testing.assert_almost_equal( - left_valid, - right_valid, - check_dtype=check_dtype, - check_less_precise=check_less_precise, - obj="ExtensionArray", - ) - - -# This could be refactored to use the NDFrame.equals method -def assert_series_equal( - left: Series, - right: Series, - check_dtype: bool = True, - check_index_type: str = "equiv", - check_series_type: bool = True, - check_less_precise: bool = False, - check_names: bool = True, - check_exact: bool = False, - check_datetimelike_compat: bool = False, - check_categorical: bool = True, - obj: str = "Series", -) -> None: - """ - Check that left and right Series are equal. - - Parameters - ---------- - left : Series - right : Series - check_dtype : bool, default True - Whether to check the Series dtype is identical. - check_index_type : bool or {'equiv'}, default 'equiv' - Whether to check the Index class, dtype and inferred_type - are identical. - check_series_type : bool, default True - Whether to check the Series class is identical. - check_less_precise : bool or int, default False - Specify comparison precision. Only used when check_exact is False. - 5 digits (False) or 3 digits (True) after decimal points are compared. - If int, then specify the digits to compare. - - When comparing two numbers, if the first number has magnitude less - than 1e-5, we compare the two numbers directly and check whether - they are equivalent within the specified precision. Otherwise, we - compare the **ratio** of the second number to the first number and - check whether it is equivalent to 1 within the specified precision. - check_names : bool, default True - Whether to check the Series and Index names attribute. - check_exact : bool, default False - Whether to compare number exactly. - check_datetimelike_compat : bool, default False - Compare datetime-like which is comparable ignoring dtype. - check_categorical : bool, default True - Whether to compare internal Categorical exactly. - obj : str, default 'Series' - Specify object name being compared, internally used to show appropriate - assertion message. - """ - __tracebackhide__ = True - - # instance validation - _check_isinstance(left, right, Series) - - if check_series_type: - # ToDo: There are some tests using rhs is sparse - # lhs is dense. Should use assert_class_equal in future - assert isinstance(left, type(right)) - # assert_class_equal(left, right, obj=obj) - - # length comparison - if len(left) != len(right): - msg1 = "{len}, {left}".format(len=len(left), left=left.index) - msg2 = "{len}, {right}".format(len=len(right), right=right.index) - raise_assert_detail(obj, "Series length are different", msg1, msg2) - - # index comparison - assert_index_equal( - left.index, - right.index, - exact=check_index_type, - check_names=check_names, - check_less_precise=check_less_precise, - check_exact=check_exact, - check_categorical=check_categorical, - obj="{obj}.index".format(obj=obj), - ) - - if check_dtype: - # We want to skip exact dtype checking when `check_categorical` - # is False. We'll still raise if only one is a `Categorical`, - # regardless of `check_categorical` - if ( - is_categorical_dtype(left) - and is_categorical_dtype(right) - and not check_categorical - ): - pass - else: - assert_attr_equal( - "dtype", left, right, obj="Attributes of {obj}".format(obj=obj) - ) - - if check_exact: - assert_numpy_array_equal( - left._internal_get_values(), - right._internal_get_values(), - check_dtype=check_dtype, - obj="{obj}".format(obj=obj), - ) - elif check_datetimelike_compat: - # we want to check only if we have compat dtypes - # e.g. integer and M|m are NOT compat, but we can simply check - # the values in that case - if needs_i8_conversion(left) or needs_i8_conversion(right): - - # datetimelike may have different objects (e.g. datetime.datetime - # vs Timestamp) but will compare equal - if not Index(left.values).equals(Index(right.values)): - msg = ( - "[datetimelike_compat=True] {left} is not equal to {right}." - ).format(left=left.values, right=right.values) - raise AssertionError(msg) - else: - assert_numpy_array_equal( - left._internal_get_values(), - right._internal_get_values(), - check_dtype=check_dtype, - ) - elif is_interval_dtype(left) or is_interval_dtype(right): - left_array = cast(IntervalArray, left.array) - right_array = cast(IntervalArray, right.array) - assert_interval_array_equal(left_array, right_array) - elif is_extension_array_dtype(left.dtype) and is_datetime64tz_dtype(left.dtype): - # .values is an ndarray, but ._values is the ExtensionArray. - # TODO: Use .array - assert is_extension_array_dtype(right.dtype) - assert_extension_array_equal(left._values, right._values) - elif ( - is_extension_array_dtype(left) - and not is_categorical_dtype(left) - and is_extension_array_dtype(right) - and not is_categorical_dtype(right) - ): - assert_extension_array_equal(left.array, right.array) - else: - _testing.assert_almost_equal( - left._internal_get_values(), - right._internal_get_values(), - check_less_precise=check_less_precise, - check_dtype=check_dtype, - obj="{obj}".format(obj=obj), - ) - - # metadata comparison - if check_names: - assert_attr_equal("name", left, right, obj=obj) - - if check_categorical: - if is_categorical_dtype(left) or is_categorical_dtype(right): - assert_categorical_equal( - left.values, right.values, obj="{obj} category".format(obj=obj) - ) - - -# This could be refactored to use the NDFrame.equals method -def assert_frame_equal( - left: DataFrame, - right: DataFrame, - check_dtype: bool = True, - check_index_type: str = "equiv", - check_column_type: str = "equiv", - check_frame_type: bool = True, - check_less_precise: bool = False, - check_names: bool = True, - by_blocks: bool = False, - check_exact: bool = False, - check_datetimelike_compat: bool = False, - check_categorical: bool = True, - check_like: bool = False, - obj: str = "DataFrame", -) -> None: - """ - Check that left and right DataFrame are equal. - - This function is intended to compare two DataFrames and output any - differences. Is is mostly intended for use in unit tests. - Additional parameters allow varying the strictness of the - equality checks performed. - - Parameters - ---------- - left : DataFrame - First DataFrame to compare. - right : DataFrame - Second DataFrame to compare. - check_dtype : bool, default True - Whether to check the DataFrame dtype is identical. - check_index_type : bool or {'equiv'}, default 'equiv' - Whether to check the Index class, dtype and inferred_type - are identical. - check_column_type : bool or {'equiv'}, default 'equiv' - Whether to check the columns class, dtype and inferred_type - are identical. Is passed as the ``exact`` argument of - :func:`assert_index_equal`. - check_frame_type : bool, default True - Whether to check the DataFrame class is identical. - check_less_precise : bool or int, default False - Specify comparison precision. Only used when check_exact is False. - 5 digits (False) or 3 digits (True) after decimal points are compared. - If int, then specify the digits to compare. - - When comparing two numbers, if the first number has magnitude less - than 1e-5, we compare the two numbers directly and check whether - they are equivalent within the specified precision. Otherwise, we - compare the **ratio** of the second number to the first number and - check whether it is equivalent to 1 within the specified precision. - check_names : bool, default True - Whether to check that the `names` attribute for both the `index` - and `column` attributes of the DataFrame is identical. - by_blocks : bool, default False - Specify how to compare internal data. If False, compare by columns. - If True, compare by blocks. - check_exact : bool, default False - Whether to compare number exactly. - check_datetimelike_compat : bool, default False - Compare datetime-like which is comparable ignoring dtype. - check_categorical : bool, default True - Whether to compare internal Categorical exactly. - check_like : bool, default False - If True, ignore the order of index & columns. - Note: index labels must match their respective rows - (same as in columns) - same labels must be with the same data. - obj : str, default 'DataFrame' - Specify object name being compared, internally used to show appropriate - assertion message. - - See Also - -------- - assert_series_equal : Equivalent method for asserting Series equality. - DataFrame.equals : Check DataFrame equality. - - Examples - -------- - This example shows comparing two DataFrames that are equal - but with columns of differing dtypes. - - >>> from pandas.util.testing import assert_frame_equal - >>> df1 = pd.DataFrame({'a': [1, 2], 'b': [3, 4]}) - >>> df2 = pd.DataFrame({'a': [1, 2], 'b': [3.0, 4.0]}) - - df1 equals itself. - - >>> assert_frame_equal(df1, df1) - - df1 differs from df2 as column 'b' is of a different type. - - >>> assert_frame_equal(df1, df2) - Traceback (most recent call last): - ... - AssertionError: Attributes of DataFrame.iloc[:, 1] are different - - Attribute "dtype" are different - [left]: int64 - [right]: float64 - - Ignore differing dtypes in columns with check_dtype. - - >>> assert_frame_equal(df1, df2, check_dtype=False) - """ - __tracebackhide__ = True - - # instance validation - _check_isinstance(left, right, DataFrame) - - if check_frame_type: - assert isinstance(left, type(right)) - # assert_class_equal(left, right, obj=obj) - - # shape comparison - if left.shape != right.shape: - raise_assert_detail( - obj, - "{obj} shape mismatch".format(obj=obj), - "{shape!r}".format(shape=left.shape), - "{shape!r}".format(shape=right.shape), - ) - - if check_like: - left, right = left.reindex_like(right), right - - # index comparison - assert_index_equal( - left.index, - right.index, - exact=check_index_type, - check_names=check_names, - check_less_precise=check_less_precise, - check_exact=check_exact, - check_categorical=check_categorical, - obj="{obj}.index".format(obj=obj), - ) - - # column comparison - assert_index_equal( - left.columns, - right.columns, - exact=check_column_type, - check_names=check_names, - check_less_precise=check_less_precise, - check_exact=check_exact, - check_categorical=check_categorical, - obj="{obj}.columns".format(obj=obj), - ) - - # compare by blocks - if by_blocks: - rblocks = right._to_dict_of_blocks() - lblocks = left._to_dict_of_blocks() - for dtype in list(set(list(lblocks.keys()) + list(rblocks.keys()))): - assert dtype in lblocks - assert dtype in rblocks - assert_frame_equal( - lblocks[dtype], rblocks[dtype], check_dtype=check_dtype, obj=obj - ) - - # compare by columns - else: - for i, col in enumerate(left.columns): - assert col in right - lcol = left.iloc[:, i] - rcol = right.iloc[:, i] - assert_series_equal( - lcol, - rcol, - check_dtype=check_dtype, - check_index_type=check_index_type, - check_less_precise=check_less_precise, - check_exact=check_exact, - check_names=check_names, - check_datetimelike_compat=check_datetimelike_compat, - check_categorical=check_categorical, - obj="{obj}.iloc[:, {idx}]".format(obj=obj, idx=i), - ) - - -def assert_equal( - left: Union[DataFrame, AnyArrayLike], - right: Union[DataFrame, AnyArrayLike], - **kwargs, -) -> None: - """ - Wrapper for tm.assert_*_equal to dispatch to the appropriate test function. - - Parameters - ---------- - left : Index, Series, DataFrame, ExtensionArray, or np.ndarray - right : Index, Series, DataFrame, ExtensionArray, or np.ndarray - **kwargs - """ - __tracebackhide__ = True - - if isinstance(left, Index): - right = cast(Index, right) - assert_index_equal(left, right, **kwargs) - elif isinstance(left, Series): - right = cast(Series, right) - assert_series_equal(left, right, **kwargs) - elif isinstance(left, DataFrame): - right = cast(DataFrame, right) - assert_frame_equal(left, right, **kwargs) - elif isinstance(left, IntervalArray): - right = cast(IntervalArray, right) - assert_interval_array_equal(left, right, **kwargs) - elif isinstance(left, PeriodArray): - right = cast(PeriodArray, right) - assert_period_array_equal(left, right, **kwargs) - elif isinstance(left, DatetimeArray): - right = cast(DatetimeArray, right) - assert_datetime_array_equal(left, right, **kwargs) - elif isinstance(left, TimedeltaArray): - right = cast(TimedeltaArray, right) - assert_timedelta_array_equal(left, right, **kwargs) - elif isinstance(left, ExtensionArray): - right = cast(ExtensionArray, right) - assert_extension_array_equal(left, right, **kwargs) - elif isinstance(left, np.ndarray): - right = cast(np.ndarray, right) - assert_numpy_array_equal(left, right, **kwargs) - elif isinstance(left, str): - assert kwargs == {} - assert left == right - else: - raise NotImplementedError(type(left)) - - -def box_expected(expected, box_cls, transpose=True): - """ - Helper function to wrap the expected output of a test in a given box_class. - - Parameters - ---------- - expected : np.ndarray, Index, Series - box_cls : {Index, Series, DataFrame} - - Returns - ------- - subclass of box_cls - """ - if box_cls is pd.Index: - expected = pd.Index(expected) - elif box_cls is pd.Series: - expected = pd.Series(expected) - elif box_cls is pd.DataFrame: - expected = pd.Series(expected).to_frame() - if transpose: - # for vector operations, we we need a DataFrame to be a single-row, - # not a single-column, in order to operate against non-DataFrame - # vectors of the same length. - expected = expected.T - elif box_cls is PeriodArray: - # the PeriodArray constructor is not as flexible as period_array - expected = period_array(expected) - elif box_cls is DatetimeArray: - expected = DatetimeArray(expected) - elif box_cls is TimedeltaArray: - expected = TimedeltaArray(expected) - elif box_cls is np.ndarray: - expected = np.array(expected) - elif box_cls is to_array: - expected = to_array(expected) - else: - raise NotImplementedError(box_cls) - return expected - - -def to_array(obj): - # temporary implementation until we get pd.array in place - if is_period_dtype(obj): - return period_array(obj) - elif is_datetime64_dtype(obj) or is_datetime64tz_dtype(obj): - return DatetimeArray._from_sequence(obj) - elif is_timedelta64_dtype(obj): - return TimedeltaArray._from_sequence(obj) - else: - return np.array(obj) - - -# ----------------------------------------------------------------------------- -# Sparse - - -def assert_sp_array_equal( - left: pd.SparseArray, - right: pd.SparseArray, - check_dtype: bool = True, - check_kind: bool = True, - check_fill_value: bool = True, - consolidate_block_indices: bool = False, -): - """Check that the left and right SparseArray are equal. - - Parameters - ---------- - left : SparseArray - right : SparseArray - check_dtype : bool, default True - Whether to check the data dtype is identical. - check_kind : bool, default True - Whether to just the kind of the sparse index for each column. - check_fill_value : bool, default True - Whether to check that left.fill_value matches right.fill_value - consolidate_block_indices : bool, default False - Whether to consolidate contiguous blocks for sparse arrays with - a BlockIndex. Some operations, e.g. concat, will end up with - block indices that could be consolidated. Setting this to true will - create a new BlockIndex for that array, with consolidated - block indices. - """ - - _check_isinstance(left, right, pd.SparseArray) - - assert_numpy_array_equal(left.sp_values, right.sp_values, check_dtype=check_dtype) - - # SparseIndex comparison - assert isinstance(left.sp_index, pd._libs.sparse.SparseIndex) - assert isinstance(right.sp_index, pd._libs.sparse.SparseIndex) - - if not check_kind: - left_index = left.sp_index.to_block_index() - right_index = right.sp_index.to_block_index() - else: - left_index = left.sp_index - right_index = right.sp_index - - if consolidate_block_indices and left.kind == "block": - # we'll probably remove this hack... - left_index = left_index.to_int_index().to_block_index() - right_index = right_index.to_int_index().to_block_index() - - if not left_index.equals(right_index): - raise_assert_detail( - "SparseArray.index", "index are not equal", left_index, right_index - ) - else: - # Just ensure a - pass - - if check_fill_value: - assert_attr_equal("fill_value", left, right) - if check_dtype: - assert_attr_equal("dtype", left, right) - assert_numpy_array_equal(left.to_dense(), right.to_dense(), check_dtype=check_dtype) - - -# ----------------------------------------------------------------------------- -# Others - - -def assert_contains_all(iterable, dic): - for k in iterable: - assert k in dic, "Did not contain item: '{key!r}'".format(key=k) - - -def assert_copy(iter1, iter2, **eql_kwargs): - """ - iter1, iter2: iterables that produce elements - comparable with assert_almost_equal - - Checks that the elements are equal, but not - the same object. (Does not check that items - in sequences are also not the same object) - """ - for elem1, elem2 in zip(iter1, iter2): - assert_almost_equal(elem1, elem2, **eql_kwargs) - msg = ( - "Expected object {obj1!r} and object {obj2!r} to be " - "different objects, but they were the same object." - ).format(obj1=type(elem1), obj2=type(elem2)) - assert elem1 is not elem2, msg - - -def getCols(k): - return string.ascii_uppercase[:k] - - -# make index -def makeStringIndex(k=10, name=None): - return Index(rands_array(nchars=10, size=k), name=name) - - -def makeUnicodeIndex(k=10, name=None): - return Index(randu_array(nchars=10, size=k), name=name) - - -def makeCategoricalIndex(k=10, n=3, name=None, **kwargs): - """ make a length k index or n categories """ - x = rands_array(nchars=4, size=n) - return CategoricalIndex( - Categorical.from_codes(np.arange(k) % n, categories=x), name=name, **kwargs - ) - - -def makeIntervalIndex(k=10, name=None, **kwargs): - """ make a length k IntervalIndex """ - x = np.linspace(0, 100, num=(k + 1)) - return IntervalIndex.from_breaks(x, name=name, **kwargs) - - -def makeBoolIndex(k=10, name=None): - if k == 1: - return Index([True], name=name) - elif k == 2: - return Index([False, True], name=name) - return Index([False, True] + [False] * (k - 2), name=name) - - -def makeIntIndex(k=10, name=None): - return Index(list(range(k)), name=name) - - -def makeUIntIndex(k=10, name=None): - return Index([2 ** 63 + i for i in range(k)], name=name) - - -def makeRangeIndex(k=10, name=None, **kwargs): - return RangeIndex(0, k, 1, name=name, **kwargs) - - -def makeFloatIndex(k=10, name=None): - values = sorted(np.random.random_sample(k)) - np.random.random_sample(1) - return Index(values * (10 ** np.random.randint(0, 9)), name=name) - - -def makeDateIndex(k=10, freq="B", name=None, **kwargs): - dt = datetime(2000, 1, 1) - dr = bdate_range(dt, periods=k, freq=freq, name=name) - return DatetimeIndex(dr, name=name, **kwargs) - - -def makeTimedeltaIndex(k=10, freq="D", name=None, **kwargs): - return pd.timedelta_range(start="1 day", periods=k, freq=freq, name=name, **kwargs) - - -def makePeriodIndex(k=10, name=None, **kwargs): - dt = datetime(2000, 1, 1) - dr = pd.period_range(start=dt, periods=k, freq="B", name=name, **kwargs) - return dr - - -def makeMultiIndex(k=10, names=None, **kwargs): - return MultiIndex.from_product((("foo", "bar"), (1, 2)), names=names, **kwargs) - - -_names = [ - "Alice", - "Bob", - "Charlie", - "Dan", - "Edith", - "Frank", - "George", - "Hannah", - "Ingrid", - "Jerry", - "Kevin", - "Laura", - "Michael", - "Norbert", - "Oliver", - "Patricia", - "Quinn", - "Ray", - "Sarah", - "Tim", - "Ursula", - "Victor", - "Wendy", - "Xavier", - "Yvonne", - "Zelda", -] - - -def _make_timeseries(start="2000-01-01", end="2000-12-31", freq="1D", seed=None): - """ - Make a DataFrame with a DatetimeIndex - - Parameters - ---------- - start : str or Timestamp, default "2000-01-01" - The start of the index. Passed to date_range with `freq`. - end : str or Timestamp, default "2000-12-31" - The end of the index. Passed to date_range with `freq`. - freq : str or Freq - The frequency to use for the DatetimeIndex - seed : int, optional - The random state seed. - - * name : object dtype with string names - * id : int dtype with - * x, y : float dtype - - Examples - -------- - >>> _make_timeseries() - id name x y - timestamp - 2000-01-01 982 Frank 0.031261 0.986727 - 2000-01-02 1025 Edith -0.086358 -0.032920 - 2000-01-03 982 Edith 0.473177 0.298654 - 2000-01-04 1009 Sarah 0.534344 -0.750377 - 2000-01-05 963 Zelda -0.271573 0.054424 - ... ... ... ... ... - 2000-12-27 980 Ingrid -0.132333 -0.422195 - 2000-12-28 972 Frank -0.376007 -0.298687 - 2000-12-29 1009 Ursula -0.865047 -0.503133 - 2000-12-30 1000 Hannah -0.063757 -0.507336 - 2000-12-31 972 Tim -0.869120 0.531685 - """ - index = pd.date_range(start=start, end=end, freq=freq, name="timestamp") - n = len(index) - state = np.random.RandomState(seed) - columns = { - "name": state.choice(_names, size=n), - "id": state.poisson(1000, size=n), - "x": state.rand(n) * 2 - 1, - "y": state.rand(n) * 2 - 1, - } - df = pd.DataFrame(columns, index=index, columns=sorted(columns)) - if df.index[-1] == end: - df = df.iloc[:-1] - return df - - -def all_index_generator(k=10): - """Generator which can be iterated over to get instances of all the various - index classes. - - Parameters - ---------- - k: length of each of the index instances - """ - all_make_index_funcs = [ - makeIntIndex, - makeFloatIndex, - makeStringIndex, - makeUnicodeIndex, - makeDateIndex, - makePeriodIndex, - makeTimedeltaIndex, - makeBoolIndex, - makeRangeIndex, - makeIntervalIndex, - makeCategoricalIndex, - ] - for make_index_func in all_make_index_funcs: - yield make_index_func(k=k) - - -def index_subclass_makers_generator(): - make_index_funcs = [ - makeDateIndex, - makePeriodIndex, - makeTimedeltaIndex, - makeRangeIndex, - makeIntervalIndex, - makeCategoricalIndex, - makeMultiIndex, - ] - for make_index_func in make_index_funcs: - yield make_index_func - - -def all_timeseries_index_generator(k=10): - """Generator which can be iterated over to get instances of all the classes - which represent time-series. - - Parameters - ---------- - k: length of each of the index instances - """ - make_index_funcs = [makeDateIndex, makePeriodIndex, makeTimedeltaIndex] - for make_index_func in make_index_funcs: - yield make_index_func(k=k) - - -# make series -def makeFloatSeries(name=None): - index = makeStringIndex(N) - return Series(randn(N), index=index, name=name) - - -def makeStringSeries(name=None): - index = makeStringIndex(N) - return Series(randn(N), index=index, name=name) - - -def makeObjectSeries(name=None): - data = makeStringIndex(N) - data = Index(data, dtype=object) - index = makeStringIndex(N) - return Series(data, index=index, name=name) - - -def getSeriesData(): - index = makeStringIndex(N) - return {c: Series(randn(N), index=index) for c in getCols(K)} - - -def makeTimeSeries(nper=None, freq="B", name=None): - if nper is None: - nper = N - return Series(randn(nper), index=makeDateIndex(nper, freq=freq), name=name) - - -def makePeriodSeries(nper=None, name=None): - if nper is None: - nper = N - return Series(randn(nper), index=makePeriodIndex(nper), name=name) - - -def getTimeSeriesData(nper=None, freq="B"): - return {c: makeTimeSeries(nper, freq) for c in getCols(K)} - - -def getPeriodData(nper=None): - return {c: makePeriodSeries(nper) for c in getCols(K)} - - -# make frame -def makeTimeDataFrame(nper=None, freq="B"): - data = getTimeSeriesData(nper, freq) - return DataFrame(data) - - -def makeDataFrame(): - data = getSeriesData() - return DataFrame(data) - - -def getMixedTypeDict(): - index = Index(["a", "b", "c", "d", "e"]) - - data = { - "A": [0.0, 1.0, 2.0, 3.0, 4.0], - "B": [0.0, 1.0, 0.0, 1.0, 0.0], - "C": ["foo1", "foo2", "foo3", "foo4", "foo5"], - "D": bdate_range("1/1/2009", periods=5), - } - - return index, data - - -def makeMixedDataFrame(): - return DataFrame(getMixedTypeDict()[1]) - - -def makePeriodFrame(nper=None): - data = getPeriodData(nper) - return DataFrame(data) - - -def makeCustomIndex( - nentries, nlevels, prefix="#", names=False, ndupe_l=None, idx_type=None -): - """Create an index/multindex with given dimensions, levels, names, etc' - - nentries - number of entries in index - nlevels - number of levels (> 1 produces multindex) - prefix - a string prefix for labels - names - (Optional), bool or list of strings. if True will use default - names, if false will use no names, if a list is given, the name of - each level in the index will be taken from the list. - ndupe_l - (Optional), list of ints, the number of rows for which the - label will repeated at the corresponding level, you can specify just - the first few, the rest will use the default ndupe_l of 1. - len(ndupe_l) <= nlevels. - idx_type - "i"/"f"/"s"/"u"/"dt"/"p"/"td". - If idx_type is not None, `idx_nlevels` must be 1. - "i"/"f" creates an integer/float index, - "s"/"u" creates a string/unicode index - "dt" create a datetime index. - "td" create a datetime index. - - if unspecified, string labels will be generated. - """ - - if ndupe_l is None: - ndupe_l = [1] * nlevels - assert is_sequence(ndupe_l) and len(ndupe_l) <= nlevels - assert names is None or names is False or names is True or len(names) is nlevels - assert idx_type is None or ( - idx_type in ("i", "f", "s", "u", "dt", "p", "td") and nlevels == 1 - ) - - if names is True: - # build default names - names = [prefix + str(i) for i in range(nlevels)] - if names is False: - # pass None to index constructor for no name - names = None - - # make singleton case uniform - if isinstance(names, str) and nlevels == 1: - names = [names] - - # specific 1D index type requested? - idx_func = dict( - i=makeIntIndex, - f=makeFloatIndex, - s=makeStringIndex, - u=makeUnicodeIndex, - dt=makeDateIndex, - td=makeTimedeltaIndex, - p=makePeriodIndex, - ).get(idx_type) - if idx_func: - idx = idx_func(nentries) - # but we need to fill in the name - if names: - idx.name = names[0] - return idx - elif idx_type is not None: - raise ValueError( - '"{idx_type}" is not a legal value for `idx_type`, ' - 'use "i"/"f"/"s"/"u"/"dt/"p"/"td".'.format(idx_type=idx_type) - ) - - if len(ndupe_l) < nlevels: - ndupe_l.extend([1] * (nlevels - len(ndupe_l))) - assert len(ndupe_l) == nlevels - - assert all(x > 0 for x in ndupe_l) - - tuples = [] - for i in range(nlevels): - - def keyfunc(x): - import re - - numeric_tuple = re.sub(r"[^\d_]_?", "", x).split("_") - return [int(num) for num in numeric_tuple] - - # build a list of lists to create the index from - div_factor = nentries // ndupe_l[i] + 1 - cnt = Counter() - for j in range(div_factor): - label = "{prefix}_l{i}_g{j}".format(prefix=prefix, i=i, j=j) - cnt[label] = ndupe_l[i] - # cute Counter trick - result = sorted(cnt.elements(), key=keyfunc)[:nentries] - tuples.append(result) - - tuples = list(zip(*tuples)) - - # convert tuples to index - if nentries == 1: - # we have a single level of tuples, i.e. a regular Index - index = Index(tuples[0], name=names[0]) - elif nlevels == 1: - name = None if names is None else names[0] - index = Index((x[0] for x in tuples), name=name) - else: - index = MultiIndex.from_tuples(tuples, names=names) - return index - - -def makeCustomDataframe( - nrows, - ncols, - c_idx_names=True, - r_idx_names=True, - c_idx_nlevels=1, - r_idx_nlevels=1, - data_gen_f=None, - c_ndupe_l=None, - r_ndupe_l=None, - dtype=None, - c_idx_type=None, - r_idx_type=None, -): - """ - nrows, ncols - number of data rows/cols - c_idx_names, idx_names - False/True/list of strings, yields No names , - default names or uses the provided names for the levels of the - corresponding index. You can provide a single string when - c_idx_nlevels ==1. - c_idx_nlevels - number of levels in columns index. > 1 will yield MultiIndex - r_idx_nlevels - number of levels in rows index. > 1 will yield MultiIndex - data_gen_f - a function f(row,col) which return the data value - at that position, the default generator used yields values of the form - "RxCy" based on position. - c_ndupe_l, r_ndupe_l - list of integers, determines the number - of duplicates for each label at a given level of the corresponding - index. The default `None` value produces a multiplicity of 1 across - all levels, i.e. a unique index. Will accept a partial list of length - N < idx_nlevels, for just the first N levels. If ndupe doesn't divide - nrows/ncol, the last label might have lower multiplicity. - dtype - passed to the DataFrame constructor as is, in case you wish to - have more control in conjunction with a custom `data_gen_f` - r_idx_type, c_idx_type - "i"/"f"/"s"/"u"/"dt"/"td". - If idx_type is not None, `idx_nlevels` must be 1. - "i"/"f" creates an integer/float index, - "s"/"u" creates a string/unicode index - "dt" create a datetime index. - "td" create a timedelta index. - - if unspecified, string labels will be generated. - - Examples: - - # 5 row, 3 columns, default names on both, single index on both axis - >> makeCustomDataframe(5,3) - - # make the data a random int between 1 and 100 - >> mkdf(5,3,data_gen_f=lambda r,c:randint(1,100)) - - # 2-level multiindex on rows with each label duplicated - # twice on first level, default names on both axis, single - # index on both axis - >> a=makeCustomDataframe(5,3,r_idx_nlevels=2,r_ndupe_l=[2]) - - # DatetimeIndex on row, index with unicode labels on columns - # no names on either axis - >> a=makeCustomDataframe(5,3,c_idx_names=False,r_idx_names=False, - r_idx_type="dt",c_idx_type="u") - - # 4-level multindex on rows with names provided, 2-level multindex - # on columns with default labels and default names. - >> a=makeCustomDataframe(5,3,r_idx_nlevels=4, - r_idx_names=["FEE","FI","FO","FAM"], - c_idx_nlevels=2) - - >> a=mkdf(5,3,r_idx_nlevels=2,c_idx_nlevels=4) - """ - - assert c_idx_nlevels > 0 - assert r_idx_nlevels > 0 - assert r_idx_type is None or ( - r_idx_type in ("i", "f", "s", "u", "dt", "p", "td") and r_idx_nlevels == 1 - ) - assert c_idx_type is None or ( - c_idx_type in ("i", "f", "s", "u", "dt", "p", "td") and c_idx_nlevels == 1 - ) - - columns = makeCustomIndex( - ncols, - nlevels=c_idx_nlevels, - prefix="C", - names=c_idx_names, - ndupe_l=c_ndupe_l, - idx_type=c_idx_type, - ) - index = makeCustomIndex( - nrows, - nlevels=r_idx_nlevels, - prefix="R", - names=r_idx_names, - ndupe_l=r_ndupe_l, - idx_type=r_idx_type, - ) - - # by default, generate data based on location - if data_gen_f is None: - data_gen_f = lambda r, c: "R{rows}C{cols}".format(rows=r, cols=c) - - data = [[data_gen_f(r, c) for c in range(ncols)] for r in range(nrows)] - - return DataFrame(data, index, columns, dtype=dtype) - - -def _create_missing_idx(nrows, ncols, density, random_state=None): - if random_state is None: - random_state = np.random - else: - random_state = np.random.RandomState(random_state) - - # below is cribbed from scipy.sparse - size = int(np.round((1 - density) * nrows * ncols)) - # generate a few more to ensure unique values - min_rows = 5 - fac = 1.02 - extra_size = min(size + min_rows, fac * size) - - def _gen_unique_rand(rng, _extra_size): - ind = rng.rand(int(_extra_size)) - return np.unique(np.floor(ind * nrows * ncols))[:size] - - ind = _gen_unique_rand(random_state, extra_size) - while ind.size < size: - extra_size *= 1.05 - ind = _gen_unique_rand(random_state, extra_size) - - j = np.floor(ind * 1.0 / nrows).astype(int) - i = (ind - j * nrows).astype(int) - return i.tolist(), j.tolist() - - -def makeMissingCustomDataframe( - nrows, - ncols, - density=0.9, - random_state=None, - c_idx_names=True, - r_idx_names=True, - c_idx_nlevels=1, - r_idx_nlevels=1, - data_gen_f=None, - c_ndupe_l=None, - r_ndupe_l=None, - dtype=None, - c_idx_type=None, - r_idx_type=None, -): - """ - Parameters - ---------- - Density : float, optional - Float in (0, 1) that gives the percentage of non-missing numbers in - the DataFrame. - random_state : {np.random.RandomState, int}, optional - Random number generator or random seed. - - See makeCustomDataframe for descriptions of the rest of the parameters. - """ - df = makeCustomDataframe( - nrows, - ncols, - c_idx_names=c_idx_names, - r_idx_names=r_idx_names, - c_idx_nlevels=c_idx_nlevels, - r_idx_nlevels=r_idx_nlevels, - data_gen_f=data_gen_f, - c_ndupe_l=c_ndupe_l, - r_ndupe_l=r_ndupe_l, - dtype=dtype, - c_idx_type=c_idx_type, - r_idx_type=r_idx_type, - ) - - i, j = _create_missing_idx(nrows, ncols, density, random_state) - df.values[i, j] = np.nan - return df - - -def makeMissingDataframe(density=0.9, random_state=None): - df = makeDataFrame() - i, j = _create_missing_idx(*df.shape, density=density, random_state=random_state) - df.values[i, j] = np.nan - return df - - -class TestSubDict(dict): - def __init__(self, *args, **kwargs): - dict.__init__(self, *args, **kwargs) - - -def optional_args(decorator): - """allows a decorator to take optional positional and keyword arguments. - Assumes that taking a single, callable, positional argument means that - it is decorating a function, i.e. something like this:: - - @my_decorator - def function(): pass - - Calls decorator with decorator(f, *args, **kwargs)""" - - @wraps(decorator) - def wrapper(*args, **kwargs): - def dec(f): - return decorator(f, *args, **kwargs) - - is_decorating = not kwargs and len(args) == 1 and callable(args[0]) - if is_decorating: - f = args[0] - args = [] - return dec(f) - else: - return dec - - return wrapper - - -# skip tests on exceptions with this message -_network_error_messages = ( - # 'urlopen error timed out', - # 'timeout: timed out', - # 'socket.timeout: timed out', - "timed out", - "Server Hangup", - "HTTP Error 503: Service Unavailable", - "502: Proxy Error", - "HTTP Error 502: internal error", - "HTTP Error 502", - "HTTP Error 503", - "HTTP Error 403", - "HTTP Error 400", - "Temporary failure in name resolution", - "Name or service not known", - "Connection refused", - "certificate verify", -) - -# or this e.errno/e.reason.errno -_network_errno_vals = ( - 101, # Network is unreachable - 111, # Connection refused - 110, # Connection timed out - 104, # Connection reset Error - 54, # Connection reset by peer - 60, # urllib.error.URLError: [Errno 60] Connection timed out -) - -# Both of the above shouldn't mask real issues such as 404's -# or refused connections (changed DNS). -# But some tests (test_data yahoo) contact incredibly flakey -# servers. - -# and conditionally raise on exception types in _get_default_network_errors - - -def _get_default_network_errors(): - # Lazy import for http.client because it imports many things from the stdlib - import http.client - - return (IOError, http.client.HTTPException, TimeoutError) - - -def can_connect(url, error_classes=None): - """Try to connect to the given url. True if succeeds, False if IOError - raised - - Parameters - ---------- - url : basestring - The URL to try to connect to - - Returns - ------- - connectable : bool - Return True if no IOError (unable to connect) or URLError (bad url) was - raised - """ - - if error_classes is None: - error_classes = _get_default_network_errors() - - try: - with urlopen(url): - pass - except error_classes: - return False - else: - return True - - -@optional_args -def network( - t, - url="http://www.google.com", - raise_on_error=_RAISE_NETWORK_ERROR_DEFAULT, - check_before_test=False, - error_classes=None, - skip_errnos=_network_errno_vals, - _skip_on_messages=_network_error_messages, -): - """ - Label a test as requiring network connection and, if an error is - encountered, only raise if it does not find a network connection. - - In comparison to ``network``, this assumes an added contract to your test: - you must assert that, under normal conditions, your test will ONLY fail if - it does not have network connectivity. - - You can call this in 3 ways: as a standard decorator, with keyword - arguments, or with a positional argument that is the url to check. - - Parameters - ---------- - t : callable - The test requiring network connectivity. - url : path - The url to test via ``pandas.io.common.urlopen`` to check - for connectivity. Defaults to 'http://www.google.com'. - raise_on_error : bool - If True, never catches errors. - check_before_test : bool - If True, checks connectivity before running the test case. - error_classes : tuple or Exception - error classes to ignore. If not in ``error_classes``, raises the error. - defaults to IOError. Be careful about changing the error classes here. - skip_errnos : iterable of int - Any exception that has .errno or .reason.erno set to one - of these values will be skipped with an appropriate - message. - _skip_on_messages: iterable of string - any exception e for which one of the strings is - a substring of str(e) will be skipped with an appropriate - message. Intended to suppress errors where an errno isn't available. - - Notes - ----- - * ``raise_on_error`` supercedes ``check_before_test`` - - Returns - ------- - t : callable - The decorated test ``t``, with checks for connectivity errors. - - Example - ------- - - Tests decorated with @network will fail if it's possible to make a network - connection to another URL (defaults to google.com):: - - >>> from pandas.util.testing import network - >>> from pandas.io.common import urlopen - >>> @network - ... def test_network(): - ... with urlopen("rabbit://bonanza.com"): - ... pass - Traceback - ... - URLError: - - You can specify alternative URLs:: - - >>> @network("http://www.yahoo.com") - ... def test_something_with_yahoo(): - ... raise IOError("Failure Message") - >>> test_something_with_yahoo() - Traceback (most recent call last): - ... - IOError: Failure Message - - If you set check_before_test, it will check the url first and not run the - test on failure:: - - >>> @network("failing://url.blaher", check_before_test=True) - ... def test_something(): - ... print("I ran!") - ... raise ValueError("Failure") - >>> test_something() - Traceback (most recent call last): - ... - - Errors not related to networking will always be raised. - """ - from pytest import skip - - if error_classes is None: - error_classes = _get_default_network_errors() - - t.network = True - - @wraps(t) - def wrapper(*args, **kwargs): - if check_before_test and not raise_on_error: - if not can_connect(url, error_classes): - skip() - try: - return t(*args, **kwargs) - except Exception as err: - errno = getattr(err, "errno", None) - if not errno and hasattr(errno, "reason"): - errno = getattr(err.reason, "errno", None) - - if errno in skip_errnos: - skip( - "Skipping test due to known errno" - " and error {error}".format(error=err) - ) - - e_str = str(err) - - if any(m.lower() in e_str.lower() for m in _skip_on_messages): - skip( - "Skipping test because exception " - "message is known and error {error}".format(error=err) - ) - - if not isinstance(err, error_classes): - raise - - if raise_on_error or can_connect(url, error_classes): - raise - else: - skip( - "Skipping test due to lack of connectivity" - " and error {error}".format(error=err) - ) - - return wrapper - - -with_connectivity_check = network - - -@contextmanager -def assert_produces_warning( - expected_warning=Warning, - filter_level="always", - clear=None, - check_stacklevel=True, - raise_on_extra_warnings=True, -): - """ - Context manager for running code expected to either raise a specific - warning, or not raise any warnings. Verifies that the code raises the - expected warning, and that it does not raise any other unexpected - warnings. It is basically a wrapper around ``warnings.catch_warnings``. - - Parameters - ---------- - expected_warning : {Warning, False, None}, default Warning - The type of Exception raised. ``exception.Warning`` is the base - class for all warnings. To check that no warning is returned, - specify ``False`` or ``None``. - filter_level : str or None, default "always" - Specifies whether warnings are ignored, displayed, or turned - into errors. - Valid values are: - - * "error" - turns matching warnings into exceptions - * "ignore" - discard the warning - * "always" - always emit a warning - * "default" - print the warning the first time it is generated - from each location - * "module" - print the warning the first time it is generated - from each module - * "once" - print the warning the first time it is generated - - clear : str, default None - If not ``None`` then remove any previously raised warnings from - the ``__warningsregistry__`` to ensure that no warning messages are - suppressed by this context manager. If ``None`` is specified, - the ``__warningsregistry__`` keeps track of which warnings have been - shown, and does not show them again. - check_stacklevel : bool, default True - If True, displays the line that called the function containing - the warning to show were the function is called. Otherwise, the - line that implements the function is displayed. - raise_on_extra_warnings : bool, default True - Whether extra warnings not of the type `expected_warning` should - cause the test to fail. - - Examples - -------- - >>> import warnings - >>> with assert_produces_warning(): - ... warnings.warn(UserWarning()) - ... - >>> with assert_produces_warning(False): - ... warnings.warn(RuntimeWarning()) - ... - Traceback (most recent call last): - ... - AssertionError: Caused unexpected warning(s): ['RuntimeWarning']. - >>> with assert_produces_warning(UserWarning): - ... warnings.warn(RuntimeWarning()) - Traceback (most recent call last): - ... - AssertionError: Did not see expected warning of class 'UserWarning'. - - ..warn:: This is *not* thread-safe. - """ - __tracebackhide__ = True - - with warnings.catch_warnings(record=True) as w: - - if clear is not None: - # make sure that we are clearing these warnings - # if they have happened before - # to guarantee that we will catch them - if not is_list_like(clear): - clear = [clear] - for m in clear: - try: - m.__warningregistry__.clear() - except AttributeError: - # module may not have __warningregistry__ - pass - - saw_warning = False - warnings.simplefilter(filter_level) - yield w - extra_warnings = [] - - for actual_warning in w: - if expected_warning and issubclass( - actual_warning.category, expected_warning - ): - saw_warning = True - - if check_stacklevel and issubclass( - actual_warning.category, (FutureWarning, DeprecationWarning) - ): - from inspect import getframeinfo, stack - - caller = getframeinfo(stack()[2][0]) - msg = ( - "Warning not set with correct stacklevel. " - "File where warning is raised: {actual} != " - "{caller}. Warning message: {message}" - ).format( - actual=actual_warning.filename, - caller=caller.filename, - message=actual_warning.message, - ) - assert actual_warning.filename == caller.filename, msg - else: - extra_warnings.append( - ( - actual_warning.category.__name__, - actual_warning.message, - actual_warning.filename, - actual_warning.lineno, - ) - ) - if expected_warning: - msg = "Did not see expected warning of class {name!r}.".format( - name=expected_warning.__name__ - ) - assert saw_warning, msg - if raise_on_extra_warnings and extra_warnings: - raise AssertionError( - "Caused unexpected warning(s): {!r}.".format(extra_warnings) - ) - - -class RNGContext: - """ - Context manager to set the numpy random number generator speed. Returns - to the original value upon exiting the context manager. - - Parameters - ---------- - seed : int - Seed for numpy.random.seed - - Examples - -------- - - with RNGContext(42): - np.random.randn() - """ - - def __init__(self, seed): - self.seed = seed - - def __enter__(self): - - self.start_state = np.random.get_state() - np.random.seed(self.seed) - - def __exit__(self, exc_type, exc_value, traceback): - - np.random.set_state(self.start_state) - - -@contextmanager -def with_csv_dialect(name, **kwargs): - """ - Context manager to temporarily register a CSV dialect for parsing CSV. - - Parameters - ---------- - name : str - The name of the dialect. - kwargs : mapping - The parameters for the dialect. - - Raises - ------ - ValueError : the name of the dialect conflicts with a builtin one. - - See Also - -------- - csv : Python's CSV library. - """ - import csv - - _BUILTIN_DIALECTS = {"excel", "excel-tab", "unix"} - - if name in _BUILTIN_DIALECTS: - raise ValueError("Cannot override builtin dialect.") - - csv.register_dialect(name, **kwargs) - yield - csv.unregister_dialect(name) - - -@contextmanager -def use_numexpr(use, min_elements=None): - from pandas.core.computation import expressions as expr - - if min_elements is None: - min_elements = expr._MIN_ELEMENTS - - olduse = expr._USE_NUMEXPR - oldmin = expr._MIN_ELEMENTS - expr.set_use_numexpr(use) - expr._MIN_ELEMENTS = min_elements - yield - expr._MIN_ELEMENTS = oldmin - expr.set_use_numexpr(olduse) - - -def test_parallel(num_threads=2, kwargs_list=None): - """Decorator to run the same function multiple times in parallel. - - Parameters - ---------- - num_threads : int, optional - The number of times the function is run in parallel. - kwargs_list : list of dicts, optional - The list of kwargs to update original - function kwargs on different threads. - Notes - ----- - This decorator does not pass the return value of the decorated function. - - Original from scikit-image: - - https://github.com/scikit-image/scikit-image/pull/1519 - - """ - - assert num_threads > 0 - has_kwargs_list = kwargs_list is not None - if has_kwargs_list: - assert len(kwargs_list) == num_threads - import threading - - def wrapper(func): - @wraps(func) - def inner(*args, **kwargs): - if has_kwargs_list: - update_kwargs = lambda i: dict(kwargs, **kwargs_list[i]) - else: - update_kwargs = lambda i: kwargs - threads = [] - for i in range(num_threads): - updated_kwargs = update_kwargs(i) - thread = threading.Thread(target=func, args=args, kwargs=updated_kwargs) - threads.append(thread) - for thread in threads: - thread.start() - for thread in threads: - thread.join() - - return inner - - return wrapper - - -class SubclassedSeries(Series): - _metadata = ["testattr", "name"] - - @property - def _constructor(self): - return SubclassedSeries - - @property - def _constructor_expanddim(self): - return SubclassedDataFrame - - -class SubclassedDataFrame(DataFrame): - _metadata = ["testattr"] - - @property - def _constructor(self): - return SubclassedDataFrame - - @property - def _constructor_sliced(self): - return SubclassedSeries - - -class SubclassedCategorical(Categorical): - @property - def _constructor(self): - return SubclassedCategorical - - -@contextmanager -def set_timezone(tz): - """Context manager for temporarily setting a timezone. - - Parameters - ---------- - tz : str - A string representing a valid timezone. - - Examples - -------- - - >>> from datetime import datetime - >>> from dateutil.tz import tzlocal - >>> tzlocal().tzname(datetime.now()) - 'IST' - - >>> with set_timezone('US/Eastern'): - ... tzlocal().tzname(datetime.now()) - ... - 'EDT' - """ - - import os - import time - - def setTZ(tz): - if tz is None: - try: - del os.environ["TZ"] - except KeyError: - pass - else: - os.environ["TZ"] = tz - time.tzset() - - orig_tz = os.environ.get("TZ") - setTZ(tz) - try: - yield - finally: - setTZ(orig_tz) - - -def _make_skipna_wrapper(alternative, skipna_alternative=None): - """Create a function for calling on an array. - - Parameters - ---------- - alternative : function - The function to be called on the array with no NaNs. - Only used when 'skipna_alternative' is None. - skipna_alternative : function - The function to be called on the original array - - Returns - ------- - skipna_wrapper : function - """ - if skipna_alternative: - - def skipna_wrapper(x): - return skipna_alternative(x.values) - - else: - - def skipna_wrapper(x): - nona = x.dropna() - if len(nona) == 0: - return np.nan - return alternative(nona) - - return skipna_wrapper - - -def convert_rows_list_to_csv_str(rows_list): - """ - Convert list of CSV rows to single CSV-formatted string for current OS. - - This method is used for creating expected value of to_csv() method. - Parameters - ---------- - rows_list : list - The list of string. Each element represents the row of csv. - - Returns - ------- - expected : string - Expected output of to_csv() in current OS - """ - sep = os.linesep - expected = sep.join(rows_list) + sep - return expected +# def assert_almost_equal( +# left, right, check_dtype="equiv", check_less_precise=False, **kwargs +# ): +# """ +# Check that the left and right objects are approximately equal. + +# By approximately equal, we refer to objects that are numbers or that +# contain numbers which may be equivalent to specific levels of precision. + +# Parameters +# ---------- +# left : object +# right : object +# check_dtype : bool or {'equiv'}, default 'equiv' +# Check dtype if both a and b are the same type. If 'equiv' is passed in, +# then `RangeIndex` and `Int64Index` are also considered equivalent +# when doing type checking. +# check_less_precise : bool or int, default False +# Specify comparison precision. 5 digits (False) or 3 digits (True) +# after decimal points are compared. If int, then specify the number +# of digits to compare. + +# When comparing two numbers, if the first number has magnitude less +# than 1e-5, we compare the two numbers directly and check whether +# they are equivalent within the specified precision. Otherwise, we +# compare the **ratio** of the second number to the first number and +# check whether it is equivalent to 1 within the specified precision. +# """ + +# if isinstance(left, pd.Index): +# assert_index_equal( +# left, +# right, +# check_exact=False, +# exact=check_dtype, +# check_less_precise=check_less_precise, +# **kwargs, +# ) + +# elif isinstance(left, pd.Series): +# assert_series_equal( +# left, +# right, +# check_exact=False, +# check_dtype=check_dtype, +# check_less_precise=check_less_precise, +# **kwargs, +# ) + +# elif isinstance(left, pd.DataFrame): +# assert_frame_equal( +# left, +# right, +# check_exact=False, +# check_dtype=check_dtype, +# check_less_precise=check_less_precise, +# **kwargs, +# ) + +# else: +# # Other sequences. +# if check_dtype: +# if is_number(left) and is_number(right): +# # Do not compare numeric classes, like np.float64 and float. +# pass +# elif is_bool(left) and is_bool(right): +# # Do not compare bool classes, like np.bool_ and bool. +# pass +# else: +# if isinstance(left, np.ndarray) or isinstance(right, np.ndarray): +# obj = "numpy array" +# else: +# obj = "Input" +# assert_class_equal(left, right, obj=obj) +# _testing.assert_almost_equal( +# left, +# right, +# check_dtype=check_dtype, +# check_less_precise=check_less_precise, +# **kwargs, +# ) + + +# def assert_class_equal(left, right, exact=True, obj="Input"): +# """checks classes are equal.""" +# __tracebackhide__ = True + +# def repr_class(x): +# if isinstance(x, Index): +# # return Index as it is to include values in the error message +# return x + +# try: +# return x.__class__.__name__ +# except AttributeError: +# return repr(type(x)) + +# if exact == "equiv": +# if type(left) != type(right): +# # allow equivalence of Int64Index/RangeIndex +# types = {type(left).__name__, type(right).__name__} +# if len(types - {"Int64Index", "RangeIndex"}): +# msg = "{obj} classes are not equivalent".format(obj=obj) +# raise_assert_detail(obj, msg, repr_class(left), repr_class(right)) +# elif exact: +# if type(left) != type(right): +# msg = "{obj} classes are different".format(obj=obj) +# raise_assert_detail(obj, msg, repr_class(left), repr_class(right)) + + +# def assert_attr_equal(attr, left, right, obj="Attributes"): +# """checks attributes are equal. Both objects must have attribute. + +# Parameters +# ---------- +# attr : str +# Attribute name being compared. +# left : object +# right : object +# obj : str, default 'Attributes' +# Specify object name being compared, internally used to show appropriate +# assertion message +# """ +# __tracebackhide__ = True + +# left_attr = getattr(left, attr) +# right_attr = getattr(right, attr) + +# if left_attr is right_attr: +# return True +# elif ( +# is_number(left_attr) +# and np.isnan(left_attr) +# and is_number(right_attr) +# and np.isnan(right_attr) +# ): +# # np.nan +# return True + +# try: +# result = left_attr == right_attr +# except TypeError: +# # datetimetz on rhs may raise TypeError +# result = False +# if not isinstance(result, bool): +# result = result.all() + +# if result: +# return True +# else: +# msg = 'Attribute "{attr}" are different'.format(attr=attr) +# raise_assert_detail(obj, msg, left_attr, right_attr) + + +# def assert_categorical_equal( +# left: Categorical, +# right: Categorical, +# check_dtype: bool = True, +# check_category_order: bool = True, +# obj: str = "Categorical", +# ) -> None: +# """Test that Categoricals are equivalent. + +# Parameters +# ---------- +# left : Categorical +# right : Categorical +# check_dtype : bool, default True +# Check that integer dtype of the codes are the same +# check_category_order : bool, default True +# Whether the order of the categories should be compared, which +# implies identical integer codes. If False, only the resulting +# values are compared. The ordered attribute is +# checked regardless. +# obj : str, default 'Categorical' +# Specify object name being compared, internally used to show appropriate +# assertion message +# """ +# _check_isinstance(left, right, Categorical) + +# if check_category_order: +# assert_index_equal( +# left.categories, right.categories, obj="{obj}.categories".format(obj=obj) +# ) +# assert_numpy_array_equal( +# left.codes, +# right.codes, +# check_dtype=check_dtype, +# obj="{obj}.codes".format(obj=obj), +# ) +# else: +# assert_index_equal( +# left.categories.sort_values(), +# right.categories.sort_values(), +# obj="{obj}.categories".format(obj=obj), +# ) +# assert_index_equal( +# left.categories.take(left.codes), +# right.categories.take(right.codes), +# obj="{obj}.values".format(obj=obj), +# ) + +# assert_attr_equal("ordered", left, right, obj=obj) + + +# def assert_interval_array_equal( +# left: IntervalArray, +# right: IntervalArray, +# exact: str = "equiv", +# obj: str = "IntervalArray", +# ) -> None: +# """Test that two IntervalArrays are equivalent. + +# Parameters +# ---------- +# left, right : IntervalArray +# The IntervalArrays to compare. +# exact : bool or {'equiv'}, default 'equiv' +# Whether to check the Index class, dtype and inferred_type +# are identical. If 'equiv', then RangeIndex can be substituted for +# Int64Index as well. +# obj : str, default 'IntervalArray' +# Specify object name being compared, internally used to show appropriate +# assertion message +# """ +# assert_index_equal( +# left.left, right.left, exact=exact, obj="{obj}.left".format(obj=obj) +# ) +# assert_index_equal( +# left.right, right.right, exact=exact, obj="{obj}.left".format(obj=obj) +# ) +# assert_attr_equal("closed", left, right, obj=obj) + + +# def assert_period_array_equal( +# left: PeriodArray, right: PeriodArray, obj: str = "PeriodArray" +# ) -> None: +# _check_isinstance(left, right, PeriodArray) + +# assert_numpy_array_equal( +# left._data, right._data, obj="{obj}.values".format(obj=obj) +# ) +# assert_attr_equal("freq", left, right, obj=obj) + + +# def assert_datetime_array_equal( +# left: DatetimeArray, right: DatetimeArray, obj: str = "DatetimeArray" +# ) -> None: +# __tracebackhide__ = True +# _check_isinstance(left, right, DatetimeArray) + +# assert_numpy_array_equal(left._data, right._data, obj="{obj}._data".format(obj=obj)) +# assert_attr_equal("freq", left, right, obj=obj) +# assert_attr_equal("tz", left, right, obj=obj) + + +# def assert_timedelta_array_equal( +# left: TimedeltaArray, right: TimedeltaArray, obj: str = "TimedeltaArray" +# ) -> None: +# __tracebackhide__ = True +# _check_isinstance(left, right, TimedeltaArray) +# assert_numpy_array_equal(left._data, right._data, obj="{obj}._data".format(obj=obj)) +# assert_attr_equal("freq", left, right, obj=obj) + + +# def assert_numpy_array_equal( +# left: np.ndarray, +# right: np.ndarray, +# strict_nan: bool = False, +# check_dtype: bool = True, +# err_msg: Optional[str] = None, +# check_same: Optional[str] = None, +# obj: str = "numpy array", +# ) -> None: +# """ Checks that 'np.ndarray' is equivalent + +# Parameters +# ---------- +# left : np.ndarray or iterable +# right : np.ndarray or iterable +# strict_nan : bool, default False +# If True, consider NaN and None to be different. +# check_dtype: bool, default True +# check dtype if both a and b are np.ndarray +# err_msg : str, default None +# If provided, used as assertion message +# check_same : None|'copy'|'same', default None +# Ensure left and right refer/do not refer to the same memory area +# obj : str, default 'numpy array' +# Specify object name being compared, internally used to show appropriate +# assertion message +# """ +# __tracebackhide__ = True + +# # instance validation +# # Show a detailed error message when classes are different +# assert_class_equal(left, right, obj=obj) +# # both classes must be an np.ndarray +# _check_isinstance(left, right, np.ndarray) + +# def _get_base(obj): +# return obj.base if getattr(obj, "base", None) is not None else obj + +# left_base = _get_base(left) +# right_base = _get_base(right) + +# if check_same == "same": +# if left_base is not right_base: +# msg = "{left!r} is not {right!r}".format(left=left_base, right=right_base) +# raise AssertionError(msg) +# elif check_same == "copy": +# if left_base is right_base: +# msg = "{left!r} is {right!r}".format(left=left_base, right=right_base) +# raise AssertionError(msg) + +# def _raise(left, right, err_msg): +# if err_msg is None: +# if left.shape != right.shape: +# raise_assert_detail( +# obj, +# "{obj} shapes are different".format(obj=obj), +# left.shape, +# right.shape, +# ) + +# diff = 0 +# for l, r in zip(left, right): +# # count up differences +# if not array_equivalent(l, r, strict_nan=strict_nan): +# diff += 1 + +# diff = diff * 100.0 / left.size +# msg = "{obj} values are different ({pct} %)".format( +# obj=obj, pct=np.round(diff, 5) +# ) +# raise_assert_detail(obj, msg, left, right) + +# raise AssertionError(err_msg) + +# # compare shape and values +# if not array_equivalent(left, right, strict_nan=strict_nan): +# _raise(left, right, err_msg) + +# if check_dtype: +# if isinstance(left, np.ndarray) and isinstance(right, np.ndarray): +# assert_attr_equal("dtype", left, right, obj=obj) + + +# def assert_extension_array_equal( +# left, right, check_dtype=True, check_less_precise=False, check_exact=False +# ): +# """Check that left and right ExtensionArrays are equal. + +# Parameters +# ---------- +# left, right : ExtensionArray +# The two arrays to compare +# check_dtype : bool, default True +# Whether to check if the ExtensionArray dtypes are identical. +# check_less_precise : bool or int, default False +# Specify comparison precision. Only used when check_exact is False. +# 5 digits (False) or 3 digits (True) after decimal points are compared. +# If int, then specify the digits to compare. +# check_exact : bool, default False +# Whether to compare number exactly. + +# Notes +# ----- +# Missing values are checked separately from valid values. +# A mask of missing values is computed for each and checked to match. +# The remaining all-valid values are cast to object dtype and checked. +# """ +# assert isinstance(left, ExtensionArray), "left is not an ExtensionArray" +# assert isinstance(right, ExtensionArray), "right is not an ExtensionArray" +# if check_dtype: +# assert_attr_equal("dtype", left, right, obj="ExtensionArray") + +# if hasattr(left, "asi8") and type(right) == type(left): +# # Avoid slow object-dtype comparisons +# assert_numpy_array_equal(left.asi8, right.asi8) +# return + +# left_na = np.asarray(left.isna()) +# right_na = np.asarray(right.isna()) +# assert_numpy_array_equal(left_na, right_na, obj="ExtensionArray NA mask") + +# left_valid = np.asarray(left[~left_na].astype(object)) +# right_valid = np.asarray(right[~right_na].astype(object)) +# if check_exact: +# assert_numpy_array_equal(left_valid, right_valid, obj="ExtensionArray") +# else: +# _testing.assert_almost_equal( +# left_valid, +# right_valid, +# check_dtype=check_dtype, +# check_less_precise=check_less_precise, +# obj="ExtensionArray", +# ) + + +# # This could be refactored to use the NDFrame.equals method +# def assert_series_equal( +# left: Series, +# right: Series, +# check_dtype: bool = True, +# check_index_type: str = "equiv", +# check_series_type: bool = True, +# check_less_precise: bool = False, +# check_names: bool = True, +# check_exact: bool = False, +# check_datetimelike_compat: bool = False, +# check_categorical: bool = True, +# obj: str = "Series", +# ) -> None: +# """ +# Check that left and right Series are equal. + +# Parameters +# ---------- +# left : Series +# right : Series +# check_dtype : bool, default True +# Whether to check the Series dtype is identical. +# check_index_type : bool or {'equiv'}, default 'equiv' +# Whether to check the Index class, dtype and inferred_type +# are identical. +# check_series_type : bool, default True +# Whether to check the Series class is identical. +# check_less_precise : bool or int, default False +# Specify comparison precision. Only used when check_exact is False. +# 5 digits (False) or 3 digits (True) after decimal points are compared. +# If int, then specify the digits to compare. + +# When comparing two numbers, if the first number has magnitude less +# than 1e-5, we compare the two numbers directly and check whether +# they are equivalent within the specified precision. Otherwise, we +# compare the **ratio** of the second number to the first number and +# check whether it is equivalent to 1 within the specified precision. +# check_names : bool, default True +# Whether to check the Series and Index names attribute. +# check_exact : bool, default False +# Whether to compare number exactly. +# check_datetimelike_compat : bool, default False +# Compare datetime-like which is comparable ignoring dtype. +# check_categorical : bool, default True +# Whether to compare internal Categorical exactly. +# obj : str, default 'Series' +# Specify object name being compared, internally used to show appropriate +# assertion message. +# """ +# __tracebackhide__ = True + +# # instance validation +# _check_isinstance(left, right, Series) + +# if check_series_type: +# # ToDo: There are some tests using rhs is sparse +# # lhs is dense. Should use assert_class_equal in future +# assert isinstance(left, type(right)) +# # assert_class_equal(left, right, obj=obj) + +# # length comparison +# if len(left) != len(right): +# msg1 = "{len}, {left}".format(len=len(left), left=left.index) +# msg2 = "{len}, {right}".format(len=len(right), right=right.index) +# raise_assert_detail(obj, "Series length are different", msg1, msg2) + +# # index comparison +# assert_index_equal( +# left.index, +# right.index, +# exact=check_index_type, +# check_names=check_names, +# check_less_precise=check_less_precise, +# check_exact=check_exact, +# check_categorical=check_categorical, +# obj="{obj}.index".format(obj=obj), +# ) + +# if check_dtype: +# # We want to skip exact dtype checking when `check_categorical` +# # is False. We'll still raise if only one is a `Categorical`, +# # regardless of `check_categorical` +# if ( +# is_categorical_dtype(left) +# and is_categorical_dtype(right) +# and not check_categorical +# ): +# pass +# else: +# assert_attr_equal( +# "dtype", left, right, obj="Attributes of {obj}".format(obj=obj) +# ) + +# if check_exact: +# assert_numpy_array_equal( +# left._internal_get_values(), +# right._internal_get_values(), +# check_dtype=check_dtype, +# obj="{obj}".format(obj=obj), +# ) +# elif check_datetimelike_compat: +# # we want to check only if we have compat dtypes +# # e.g. integer and M|m are NOT compat, but we can simply check +# # the values in that case +# if needs_i8_conversion(left) or needs_i8_conversion(right): + +# # datetimelike may have different objects (e.g. datetime.datetime +# # vs Timestamp) but will compare equal +# if not Index(left.values).equals(Index(right.values)): +# msg = ( +# "[datetimelike_compat=True] {left} is not equal to {right}." +# ).format(left=left.values, right=right.values) +# raise AssertionError(msg) +# else: +# assert_numpy_array_equal( +# left._internal_get_values(), +# right._internal_get_values(), +# check_dtype=check_dtype, +# ) +# elif is_interval_dtype(left) or is_interval_dtype(right): +# left_array = cast(IntervalArray, left.array) +# right_array = cast(IntervalArray, right.array) +# assert_interval_array_equal(left_array, right_array) +# elif is_extension_array_dtype(left.dtype) and is_datetime64tz_dtype(left.dtype): +# # .values is an ndarray, but ._values is the ExtensionArray. +# # TODO: Use .array +# assert is_extension_array_dtype(right.dtype) +# assert_extension_array_equal(left._values, right._values) +# elif ( +# is_extension_array_dtype(left) +# and not is_categorical_dtype(left) +# and is_extension_array_dtype(right) +# and not is_categorical_dtype(right) +# ): +# assert_extension_array_equal(left.array, right.array) +# else: +# _testing.assert_almost_equal( +# left._internal_get_values(), +# right._internal_get_values(), +# check_less_precise=check_less_precise, +# check_dtype=check_dtype, +# obj="{obj}".format(obj=obj), +# ) + +# # metadata comparison +# if check_names: +# assert_attr_equal("name", left, right, obj=obj) + +# if check_categorical: +# if is_categorical_dtype(left) or is_categorical_dtype(right): +# assert_categorical_equal( +# left.values, right.values, obj="{obj} category".format(obj=obj) +# ) + + +# # This could be refactored to use the NDFrame.equals method +# def assert_frame_equal( +# left: Any, +# right: Any, +# check_dtype: bool = True, +# check_index_type: str = "equiv", +# check_column_type: str = "equiv", +# check_frame_type: bool = True, +# check_less_precise: bool = False, +# check_names: bool = True, +# by_blocks: bool = False, +# check_exact: bool = False, +# check_datetimelike_compat: bool = False, +# check_categorical: bool = True, +# check_like: bool = False, +# obj: str = "DataFrame", +# ) -> None: +# """ +# Check that left and right DataFrame are equal. + +# This function is intended to compare two DataFrames and output any +# differences. Is is mostly intended for use in unit tests. +# Additional parameters allow varying the strictness of the +# equality checks performed. + +# Parameters +# ---------- +# left : Any +# First DataFrame to compare. +# right : Any +# Second DataFrame to compare. +# check_dtype : bool, default True +# Whether to check the DataFrame dtype is identical. +# check_index_type : bool or {'equiv'}, default 'equiv' +# Whether to check the Index class, dtype and inferred_type +# are identical. +# check_column_type : bool or {'equiv'}, default 'equiv' +# Whether to check the columns class, dtype and inferred_type +# are identical. Is passed as the ``exact`` argument of +# :func:`assert_index_equal`. +# check_frame_type : bool, default True +# Whether to check the DataFrame class is identical. +# check_less_precise : bool or int, default False +# Specify comparison precision. Only used when check_exact is False. +# 5 digits (False) or 3 digits (True) after decimal points are compared. +# If int, then specify the digits to compare. + +# When comparing two numbers, if the first number has magnitude less +# than 1e-5, we compare the two numbers directly and check whether +# they are equivalent within the specified precision. Otherwise, we +# compare the **ratio** of the second number to the first number and +# check whether it is equivalent to 1 within the specified precision. +# check_names : bool, default True +# Whether to check that the `names` attribute for both the `index` +# and `column` attributes of the DataFrame is identical. +# by_blocks : bool, default False +# Specify how to compare internal data. If False, compare by columns. +# If True, compare by blocks. +# check_exact : bool, default False +# Whether to compare number exactly. +# check_datetimelike_compat : bool, default False +# Compare datetime-like which is comparable ignoring dtype. +# check_categorical : bool, default True +# Whether to compare internal Categorical exactly. +# check_like : bool, default False +# If True, ignore the order of index & columns. +# Note: index labels must match their respective rows +# (same as in columns) - same labels must be with the same data. +# obj : str, default 'DataFrame' +# Specify object name being compared, internally used to show appropriate +# assertion message. + +# See Also +# -------- +# assert_series_equal : Equivalent method for asserting Series equality. +# DataFrame.equals : Check DataFrame equality. + +# Examples +# -------- +# This example shows comparing two DataFrames that are equal +# but with columns of differing dtypes. + +# >>> from pandas.util.testing import assert_frame_equal +# >>> df1 = pd.DataFrame({'a': [1, 2], 'b': [3, 4]}) +# >>> df2 = pd.DataFrame({'a': [1, 2], 'b': [3.0, 4.0]}) + +# df1 equals itself. + +# >>> assert_frame_equal(df1, df1) + +# df1 differs from df2 as column 'b' is of a different type. + +# >>> assert_frame_equal(df1, df2) +# Traceback (most recent call last): +# ... +# AssertionError: Attributes of DataFrame.iloc[:, 1] are different + +# Attribute "dtype" are different +# [left]: int64 +# [right]: float64 + +# Ignore differing dtypes in columns with check_dtype. + +# >>> assert_frame_equal(df1, df2, check_dtype=False) +# """ +# __tracebackhide__ = True + +# # instance validation +# _check_isinstance(left, right, DataFrame) + +# if check_frame_type: +# assert isinstance(left, type(right)) +# # assert_class_equal(left, right, obj=obj) + +# # shape comparison +# if left.shape != right.shape: +# raise_assert_detail( +# obj, +# "{obj} shape mismatch".format(obj=obj), +# "{shape!r}".format(shape=left.shape), +# "{shape!r}".format(shape=right.shape), +# ) + +# if check_like: +# left, right = left.reindex_like(right), right + +# # index comparison +# assert_index_equal( +# left.index, +# right.index, +# exact=check_index_type, +# check_names=check_names, +# check_less_precise=check_less_precise, +# check_exact=check_exact, +# check_categorical=check_categorical, +# obj="{obj}.index".format(obj=obj), +# ) + +# # column comparison +# assert_index_equal( +# left.columns, +# right.columns, +# exact=check_column_type, +# check_names=check_names, +# check_less_precise=check_less_precise, +# check_exact=check_exact, +# check_categorical=check_categorical, +# obj="{obj}.columns".format(obj=obj), +# ) + +# # compare by blocks +# if by_blocks: +# rblocks = right._to_dict_of_blocks() +# lblocks = left._to_dict_of_blocks() +# for dtype in list(set(list(lblocks.keys()) + list(rblocks.keys()))): +# assert dtype in lblocks +# assert dtype in rblocks +# assert_frame_equal( +# lblocks[dtype], rblocks[dtype], check_dtype=check_dtype, obj=obj +# ) + +# # compare by columns +# else: +# for i, col in enumerate(left.columns): +# assert col in right +# lcol = left.iloc[:, i] +# rcol = right.iloc[:, i] +# assert_series_equal( +# lcol, +# rcol, +# check_dtype=check_dtype, +# check_index_type=check_index_type, +# check_less_precise=check_less_precise, +# check_exact=check_exact, +# check_names=check_names, +# check_datetimelike_compat=check_datetimelike_compat, +# check_categorical=check_categorical, +# obj="{obj}.iloc[:, {idx}]".format(obj=obj, idx=i), +# ) + + +# def assert_equal( +# left: Union[DataFrame, AnyArrayLike], +# right: Union[DataFrame, AnyArrayLike], +# **kwargs, +# ) -> None: +# """ +# Wrapper for tm.assert_*_equal to dispatch to the appropriate test function. + +# Parameters +# ---------- +# left : Index, Series, DataFrame, ExtensionArray, or np.ndarray +# right : Index, Series, DataFrame, ExtensionArray, or np.ndarray +# **kwargs +# """ +# __tracebackhide__ = True + +# if isinstance(left, Index): +# right = cast(Index, right) +# assert_index_equal(left, right, **kwargs) +# elif isinstance(left, Series): +# right = cast(Series, right) +# assert_series_equal(left, right, **kwargs) +# elif isinstance(left, DataFrame): +# right = cast(DataFrame, right) +# assert_frame_equal(left, right, **kwargs) +# elif isinstance(left, IntervalArray): +# right = cast(IntervalArray, right) +# assert_interval_array_equal(left, right, **kwargs) +# elif isinstance(left, PeriodArray): +# right = cast(PeriodArray, right) +# assert_period_array_equal(left, right, **kwargs) +# elif isinstance(left, DatetimeArray): +# right = cast(DatetimeArray, right) +# assert_datetime_array_equal(left, right, **kwargs) +# elif isinstance(left, TimedeltaArray): +# right = cast(TimedeltaArray, right) +# assert_timedelta_array_equal(left, right, **kwargs) +# elif isinstance(left, ExtensionArray): +# right = cast(ExtensionArray, right) +# assert_extension_array_equal(left, right, **kwargs) +# elif isinstance(left, np.ndarray): +# right = cast(np.ndarray, right) +# assert_numpy_array_equal(left, right, **kwargs) +# elif isinstance(left, str): +# assert kwargs == {} +# assert left == right +# else: +# raise NotImplementedError(type(left)) + + +# def assert_sp_array_equal( +# left: pd.SparseArray, +# right: pd.SparseArray, +# check_dtype: bool = True, +# check_kind: bool = True, +# check_fill_value: bool = True, +# consolidate_block_indices: bool = False, +# ): +# """Check that the left and right SparseArray are equal. + +# Parameters +# ---------- +# left : SparseArray +# right : SparseArray +# check_dtype : bool, default True +# Whether to check the data dtype is identical. +# check_kind : bool, default True +# Whether to just the kind of the sparse index for each column. +# check_fill_value : bool, default True +# Whether to check that left.fill_value matches right.fill_value +# consolidate_block_indices : bool, default False +# Whether to consolidate contiguous blocks for sparse arrays with +# a BlockIndex. Some operations, e.g. concat, will end up with +# block indices that could be consolidated. Setting this to true will +# create a new BlockIndex for that array, with consolidated +# block indices. +# """ + +# _check_isinstance(left, right, pd.SparseArray) + +# assert_numpy_array_equal(left.sp_values, right.sp_values, check_dtype=check_dtype) + +# # SparseIndex comparison +# assert isinstance(left.sp_index, pd._libs.sparse.SparseIndex) +# assert isinstance(right.sp_index, pd._libs.sparse.SparseIndex) + +# if not check_kind: +# left_index = left.sp_index.to_block_index() +# right_index = right.sp_index.to_block_index() +# else: +# left_index = left.sp_index +# right_index = right.sp_index + +# if consolidate_block_indices and left.kind == "block": +# # we'll probably remove this hack... +# left_index = left_index.to_int_index().to_block_index() +# right_index = right_index.to_int_index().to_block_index() + +# if not left_index.equals(right_index): +# raise_assert_detail( +# "SparseArray.index", "index are not equal", left_index, right_index +# ) +# else: +# # Just ensure a +# pass + +# if check_fill_value: +# assert_attr_equal("fill_value", left, right) +# if check_dtype: +# assert_attr_equal("dtype", left, right) +# assert_numpy_array_equal(left.to_dense(), right.to_dense(), check_dtype=check_dtype)