diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 07e30d41c216d..f4a10abea9757 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -32,7 +32,6 @@ def assert_stat_op_calc( has_skipna=True, check_dtype=True, check_dates=False, - check_less_precise=False, skipna_alternative=None, ): """ @@ -54,9 +53,6 @@ def assert_stat_op_calc( "alternative(frame)" should be checked. check_dates : bool, default false Whether opname should be tested on a Datetime Series - check_less_precise : bool, default False - Whether results should only be compared approximately; - passed on to tm.assert_series_equal skipna_alternative : function, default None NaN-safe version of alternative """ @@ -84,17 +80,11 @@ def wrapper(x): result0 = f(axis=0, skipna=False) result1 = f(axis=1, skipna=False) tm.assert_series_equal( - result0, - frame.apply(wrapper), - check_dtype=check_dtype, - check_less_precise=check_less_precise, + result0, frame.apply(wrapper), check_dtype=check_dtype, ) # HACK: win32 tm.assert_series_equal( - result1, - frame.apply(wrapper, axis=1), - check_dtype=False, - check_less_precise=check_less_precise, + result1, frame.apply(wrapper, axis=1), check_dtype=False, ) else: skipna_wrapper = alternative @@ -102,17 +92,12 @@ def wrapper(x): result0 = f(axis=0) result1 = f(axis=1) tm.assert_series_equal( - result0, - frame.apply(skipna_wrapper), - check_dtype=check_dtype, - check_less_precise=check_less_precise, + result0, frame.apply(skipna_wrapper), check_dtype=check_dtype, ) if opname in ["sum", "prod"]: expected = frame.apply(skipna_wrapper, axis=1) - tm.assert_series_equal( - result1, expected, check_dtype=False, check_less_precise=check_less_precise - ) + tm.assert_series_equal(result1, expected, check_dtype=False) # check dtypes if check_dtype: @@ -333,11 +318,7 @@ def kurt(x): # mixed types (with upcasting happening) assert_stat_op_calc( - "sum", - np.sum, - mixed_float_frame.astype("float32"), - check_dtype=False, - check_less_precise=True, + "sum", np.sum, mixed_float_frame.astype("float32"), check_dtype=False, ) assert_stat_op_calc( diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index cec2bd4b634c1..a49da7a5ec2fc 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -250,9 +250,7 @@ def make_dtnat_arr(n, nnat=None): df.to_csv(pth, chunksize=chunksize) recons = self.read_csv(pth)._convert(datetime=True, coerce=True) - tm.assert_frame_equal( - df, recons, check_names=False, check_less_precise=True - ) + tm.assert_frame_equal(df, recons, check_names=False) @pytest.mark.slow def test_to_csv_moar(self): @@ -354,9 +352,7 @@ def _to_uni(x): recons.columns = np.array(recons.columns, dtype=c_dtype) df.columns = np.array(df.columns, dtype=c_dtype) - tm.assert_frame_equal( - df, recons, check_names=False, check_less_precise=True - ) + tm.assert_frame_equal(df, recons, check_names=False) N = 100 chunksize = 1000 diff --git a/pandas/tests/generic/test_series.py b/pandas/tests/generic/test_series.py index f119eb422a276..388bb8e3f636d 100644 --- a/pandas/tests/generic/test_series.py +++ b/pandas/tests/generic/test_series.py @@ -237,9 +237,7 @@ def test_to_xarray_index_types(self, index): assert isinstance(result, DataArray) # idempotency - tm.assert_series_equal( - result.to_series(), s, check_index_type=False, check_categorical=True - ) + tm.assert_series_equal(result.to_series(), s, check_index_type=False) @td.skip_if_no("xarray", min_version="0.7.0") def test_to_xarray(self): diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 83080aa98648f..03278e69fe94a 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -661,7 +661,7 @@ def test_nlargest_mi_grouper(): ] expected = Series(exp_values, index=exp_idx) - tm.assert_series_equal(result, expected, check_exact=False, check_less_precise=True) + tm.assert_series_equal(result, expected, check_exact=False) def test_nsmallest(): diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 506d223dbedb4..59899673cfc31 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -564,7 +564,7 @@ def test_roundtrip_indexlabels(self, merge_cells, frame, path): reader = ExcelFile(path) recons = pd.read_excel(reader, "test1", index_col=[0, 1]) - tm.assert_frame_equal(df, recons, check_less_precise=True) + tm.assert_frame_equal(df, recons) def test_excel_roundtrip_indexname(self, merge_cells, path): df = DataFrame(np.random.randn(10, 4)) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index fc3876eee9d66..86502a67e1869 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -2372,7 +2372,7 @@ def test_write_row_by_row(self): result = sql.read_sql("select * from test", con=self.conn) result.index = frame.index - tm.assert_frame_equal(result, frame, check_less_precise=True) + tm.assert_frame_equal(result, frame) def test_execute(self): frame = tm.makeTimeDataFrame() @@ -2632,7 +2632,7 @@ def test_write_row_by_row(self): result = sql.read_sql("select * from test", con=self.conn) result.index = frame.index - tm.assert_frame_equal(result, frame, check_less_precise=True) + tm.assert_frame_equal(result, frame) def test_chunksize_read_type(self): frame = tm.makeTimeDataFrame() diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index b65efac2bd527..3efac9cd605a8 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -254,12 +254,21 @@ def test_read_dta4(self, file): ) # these are all categoricals - expected = pd.concat( - [expected[col].astype("category") for col in expected], axis=1 - ) + for col in expected: + orig = expected[col].copy() + + categories = np.asarray(expected["fully_labeled"][orig.notna()]) + if col == "incompletely_labeled": + categories = orig + + cat = orig.astype("category")._values + cat = cat.set_categories(categories, ordered=True) + cat.categories.rename(None, inplace=True) + + expected[col] = cat # stata doesn't save .category metadata - tm.assert_frame_equal(parsed, expected, check_categorical=False) + tm.assert_frame_equal(parsed, expected) # File containing strls def test_read_dta12(self): @@ -952,19 +961,27 @@ def test_categorical_writing(self, version): original = pd.concat( [original[col].astype("category") for col in original], axis=1 ) + expected.index.name = "index" expected["incompletely_labeled"] = expected["incompletely_labeled"].apply(str) expected["unlabeled"] = expected["unlabeled"].apply(str) - expected = pd.concat( - [expected[col].astype("category") for col in expected], axis=1 - ) - expected.index.name = "index" + for col in expected: + orig = expected[col].copy() + + cat = orig.astype("category")._values + cat = cat.as_ordered() + if col == "unlabeled": + cat = cat.set_categories(orig, ordered=True) + + cat.categories.rename(None, inplace=True) + + expected[col] = cat with tm.ensure_clean() as path: original.to_stata(path, version=version) written_and_read_again = self.read_dta(path) res = written_and_read_again.set_index("index") - tm.assert_frame_equal(res, expected, check_categorical=False) + tm.assert_frame_equal(res, expected) def test_categorical_warnings_and_errors(self): # Warning for non-string labels @@ -1056,9 +1073,11 @@ def test_categorical_sorting(self, file): parsed.index = np.arange(parsed.shape[0]) codes = [-1, -1, 0, 1, 1, 1, 2, 2, 3, 4] categories = ["Poor", "Fair", "Good", "Very good", "Excellent"] - cat = pd.Categorical.from_codes(codes=codes, categories=categories) + cat = pd.Categorical.from_codes( + codes=codes, categories=categories, ordered=True + ) expected = pd.Series(cat, name="srh") - tm.assert_series_equal(expected, parsed["srh"], check_categorical=False) + tm.assert_series_equal(expected, parsed["srh"]) @pytest.mark.parametrize("file", ["dta19_115", "dta19_117"]) def test_categorical_ordering(self, file): diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 1a794f8656abe..46ac430a13394 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -393,7 +393,7 @@ def test_constructor_categorical_dtype(self): expected = Series( ["a", "a"], index=[0, 1], dtype=CategoricalDtype(["a", "b"], ordered=True) ) - tm.assert_series_equal(result, expected, check_categorical=True) + tm.assert_series_equal(result, expected) def test_constructor_categorical_string(self): # GH 26336: the string 'category' maintains existing CategoricalDtype