Skip to content

Commit 8149da0

Browse files
authored
PERF: assert_frame_equal (#44695)
1 parent c84fb05 commit 8149da0

File tree

5 files changed

+28
-26
lines changed

5 files changed

+28
-26
lines changed

pandas/_testing/asserters.py

+19-10
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,10 @@
2222
is_numeric_dtype,
2323
needs_i8_conversion,
2424
)
25-
from pandas.core.dtypes.dtypes import PandasDtype
25+
from pandas.core.dtypes.dtypes import (
26+
CategoricalDtype,
27+
PandasDtype,
28+
)
2629
from pandas.core.dtypes.missing import array_equivalent
2730

2831
import pandas as pd
@@ -655,7 +658,7 @@ def raise_assert_detail(obj, message, left, right, diff=None, index_values=None)
655658
if isinstance(left, np.ndarray):
656659
left = pprint_thing(left)
657660
elif (
658-
is_categorical_dtype(left)
661+
isinstance(left, CategoricalDtype)
659662
or isinstance(left, PandasDtype)
660663
or isinstance(left, StringDtype)
661664
):
@@ -664,7 +667,7 @@ def raise_assert_detail(obj, message, left, right, diff=None, index_values=None)
664667
if isinstance(right, np.ndarray):
665668
right = pprint_thing(right)
666669
elif (
667-
is_categorical_dtype(right)
670+
isinstance(right, CategoricalDtype)
668671
or isinstance(right, PandasDtype)
669672
or isinstance(right, StringDtype)
670673
):
@@ -1008,8 +1011,8 @@ def assert_series_equal(
10081011
# is False. We'll still raise if only one is a `Categorical`,
10091012
# regardless of `check_categorical`
10101013
if (
1011-
is_categorical_dtype(left.dtype)
1012-
and is_categorical_dtype(right.dtype)
1014+
isinstance(left.dtype, CategoricalDtype)
1015+
and isinstance(right.dtype, CategoricalDtype)
10131016
and not check_categorical
10141017
):
10151018
pass
@@ -1054,7 +1057,9 @@ def assert_series_equal(
10541057
raise AssertionError(msg)
10551058
elif is_interval_dtype(left.dtype) and is_interval_dtype(right.dtype):
10561059
assert_interval_array_equal(left.array, right.array)
1057-
elif is_categorical_dtype(left.dtype) or is_categorical_dtype(right.dtype):
1060+
elif isinstance(left.dtype, CategoricalDtype) or isinstance(
1061+
right.dtype, CategoricalDtype
1062+
):
10581063
_testing.assert_almost_equal(
10591064
left._values,
10601065
right._values,
@@ -1106,7 +1111,9 @@ def assert_series_equal(
11061111
assert_attr_equal("name", left, right, obj=obj)
11071112

11081113
if check_categorical:
1109-
if is_categorical_dtype(left.dtype) or is_categorical_dtype(right.dtype):
1114+
if isinstance(left.dtype, CategoricalDtype) or isinstance(
1115+
right.dtype, CategoricalDtype
1116+
):
11101117
assert_categorical_equal(
11111118
left._values,
11121119
right._values,
@@ -1315,9 +1322,11 @@ def assert_frame_equal(
13151322
# compare by columns
13161323
else:
13171324
for i, col in enumerate(left.columns):
1318-
assert col in right
1319-
lcol = left.iloc[:, i]
1320-
rcol = right.iloc[:, i]
1325+
# We have already checked that columns match, so we can do
1326+
# fast location-based lookups
1327+
lcol = left._ixs(i, axis=1)
1328+
rcol = right._ixs(i, axis=1)
1329+
13211330
# GH #38183
13221331
# use check_index=False, because we do not want to run
13231332
# assert_index_equal for each column,

pandas/core/frame.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -3911,7 +3911,8 @@ def _box_col_values(self, values: SingleDataManager, loc: int) -> Series:
39113911
# we attach the Timestamp object as the name.
39123912
name = self.columns[loc]
39133913
klass = self._constructor_sliced
3914-
return klass(values, index=self.index, name=name, fastpath=True)
3914+
# We get index=self.index bc values is a SingleDataManager
3915+
return klass(values, name=name, fastpath=True)
39153916

39163917
# ----------------------------------------------------------------------
39173918
# Lookup Caching

pandas/core/series.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -332,7 +332,11 @@ def __init__(
332332
):
333333
# GH#33357 called with just the SingleBlockManager
334334
NDFrame.__init__(self, data)
335-
self.name = name
335+
if fastpath:
336+
# e.g. from _box_col_values, skip validation of name
337+
object.__setattr__(self, "_name", name)
338+
else:
339+
self.name = name
336340
return
337341

338342
# we are called internally, so short-circuit

pandas/tests/frame/methods/test_sort_values.py

+1-9
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
import numpy as np
44
import pytest
55

6-
from pandas.errors import PerformanceWarning
7-
86
import pandas as pd
97
from pandas import (
108
Categorical,
@@ -849,13 +847,7 @@ def test_sort_column_level_and_index_label(
849847
# Compute result by transposing and sorting on axis=1.
850848
result = df_idx.T.sort_values(by=sort_names, ascending=ascending, axis=1)
851849

852-
if len(levels) > 1:
853-
# Accessing multi-level columns that are not lexsorted raises a
854-
# performance warning
855-
with tm.assert_produces_warning(PerformanceWarning):
856-
tm.assert_frame_equal(result, expected)
857-
else:
858-
tm.assert_frame_equal(result, expected)
850+
tm.assert_frame_equal(result, expected)
859851

860852
def test_sort_values_pos_args_deprecation(self):
861853
# https://github.com/pandas-dev/pandas/issues/41485

pandas/tests/groupby/aggregate/test_aggregate.py

+1-5
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@
99
import numpy as np
1010
import pytest
1111

12-
from pandas.errors import PerformanceWarning
13-
1412
from pandas.core.dtypes.common import is_integer_dtype
1513

1614
import pandas as pd
@@ -373,9 +371,7 @@ def test_agg_multiple_functions_same_name_with_ohlc_present():
373371
expected = DataFrame(
374372
expected_values, columns=expected_columns, index=expected_index
375373
)
376-
# PerformanceWarning is thrown by `assert col in right` in assert_frame_equal
377-
with tm.assert_produces_warning(PerformanceWarning):
378-
tm.assert_frame_equal(result, expected)
374+
tm.assert_frame_equal(result, expected)
379375

380376

381377
def test_multiple_functions_tuples_and_non_tuples(df):

0 commit comments

Comments
 (0)