Skip to content

PERF: assert_frame_equal #44695

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 19 additions & 10 deletions pandas/_testing/asserters.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,10 @@
is_numeric_dtype,
needs_i8_conversion,
)
from pandas.core.dtypes.dtypes import PandasDtype
from pandas.core.dtypes.dtypes import (
CategoricalDtype,
PandasDtype,
)
from pandas.core.dtypes.missing import array_equivalent

import pandas as pd
Expand Down Expand Up @@ -655,7 +658,7 @@ def raise_assert_detail(obj, message, left, right, diff=None, index_values=None)
if isinstance(left, np.ndarray):
left = pprint_thing(left)
elif (
is_categorical_dtype(left)
isinstance(left, CategoricalDtype)
or isinstance(left, PandasDtype)
or isinstance(left, StringDtype)
):
Expand All @@ -664,7 +667,7 @@ def raise_assert_detail(obj, message, left, right, diff=None, index_values=None)
if isinstance(right, np.ndarray):
right = pprint_thing(right)
elif (
is_categorical_dtype(right)
isinstance(right, CategoricalDtype)
or isinstance(right, PandasDtype)
or isinstance(right, StringDtype)
):
Expand Down Expand Up @@ -1008,8 +1011,8 @@ def assert_series_equal(
# is False. We'll still raise if only one is a `Categorical`,
# regardless of `check_categorical`
if (
is_categorical_dtype(left.dtype)
and is_categorical_dtype(right.dtype)
isinstance(left.dtype, CategoricalDtype)
and isinstance(right.dtype, CategoricalDtype)
and not check_categorical
):
pass
Expand Down Expand Up @@ -1054,7 +1057,9 @@ def assert_series_equal(
raise AssertionError(msg)
elif is_interval_dtype(left.dtype) and is_interval_dtype(right.dtype):
assert_interval_array_equal(left.array, right.array)
elif is_categorical_dtype(left.dtype) or is_categorical_dtype(right.dtype):
elif isinstance(left.dtype, CategoricalDtype) or isinstance(
right.dtype, CategoricalDtype
):
_testing.assert_almost_equal(
left._values,
right._values,
Expand Down Expand Up @@ -1106,7 +1111,9 @@ def assert_series_equal(
assert_attr_equal("name", left, right, obj=obj)

if check_categorical:
if is_categorical_dtype(left.dtype) or is_categorical_dtype(right.dtype):
if isinstance(left.dtype, CategoricalDtype) or isinstance(
right.dtype, CategoricalDtype
):
assert_categorical_equal(
left._values,
right._values,
Expand Down Expand Up @@ -1315,9 +1322,11 @@ def assert_frame_equal(
# compare by columns
else:
for i, col in enumerate(left.columns):
assert col in right
lcol = left.iloc[:, i]
rcol = right.iloc[:, i]
# We have already checked that columns match, so we can do
# fast location-based lookups
lcol = left._ixs(i, axis=1)
rcol = right._ixs(i, axis=1)

# GH #38183
# use check_index=False, because we do not want to run
# assert_index_equal for each column,
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3910,7 +3910,8 @@ def _box_col_values(self, values, loc: int) -> Series:
# we attach the Timestamp object as the name.
name = self.columns[loc]
klass = self._constructor_sliced
return klass(values, index=self.index, name=name, fastpath=True)
# We get index=self.index bc values is a SingleDataManager
return klass(values, name=name, fastpath=True)

# ----------------------------------------------------------------------
# Lookup Caching
Expand Down
6 changes: 5 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,11 @@ def __init__(
):
# GH#33357 called with just the SingleBlockManager
NDFrame.__init__(self, data)
self.name = name
if fastpath:
# e.g. from _box_col_values, skip validation of name
object.__setattr__(self, "_name", name)
else:
self.name = name
return

# we are called internally, so short-circuit
Expand Down
10 changes: 1 addition & 9 deletions pandas/tests/frame/methods/test_sort_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
import numpy as np
import pytest

from pandas.errors import PerformanceWarning

import pandas as pd
from pandas import (
Categorical,
Expand Down Expand Up @@ -849,13 +847,7 @@ def test_sort_column_level_and_index_label(
# Compute result by transposing and sorting on axis=1.
result = df_idx.T.sort_values(by=sort_names, ascending=ascending, axis=1)

if len(levels) > 1:
# Accessing multi-level columns that are not lexsorted raises a
# performance warning
with tm.assert_produces_warning(PerformanceWarning):
tm.assert_frame_equal(result, expected)
else:
tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(result, expected)

def test_sort_values_pos_args_deprecation(self):
# https://github.com/pandas-dev/pandas/issues/41485
Expand Down
6 changes: 1 addition & 5 deletions pandas/tests/groupby/aggregate/test_aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
import numpy as np
import pytest

from pandas.errors import PerformanceWarning

from pandas.core.dtypes.common import is_integer_dtype

import pandas as pd
Expand Down Expand Up @@ -373,9 +371,7 @@ def test_agg_multiple_functions_same_name_with_ohlc_present():
expected = DataFrame(
expected_values, columns=expected_columns, index=expected_index
)
# PerformanceWarning is thrown by `assert col in right` in assert_frame_equal
with tm.assert_produces_warning(PerformanceWarning):
tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(result, expected)


def test_multiple_functions_tuples_and_non_tuples(df):
Expand Down