TST: stricter tests, avoid check_categorical=False, check_less_precise (pandas-dev#32571)

jbrockmendel · SeeminSyed · commit 13efe39559aa · 2020-03-22T19:55:07.000-04:00
diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
@@ -32,7 +32,6 @@ def assert_stat_op_calc(
     has_skipna=True,
     check_dtype=True,
     check_dates=False,
-    check_less_precise=False,
     skipna_alternative=None,
 ):
     """
@@ -54,9 +53,6 @@ def assert_stat_op_calc(
         "alternative(frame)" should be checked.
     check_dates : bool, default false
         Whether opname should be tested on a Datetime Series
-    check_less_precise : bool, default False
-        Whether results should only be compared approximately;
-        passed on to tm.assert_series_equal
     skipna_alternative : function, default None
         NaN-safe version of alternative
     """
@@ -84,35 +80,24 @@ def wrapper(x):
         result0 = f(axis=0, skipna=False)
         result1 = f(axis=1, skipna=False)
         tm.assert_series_equal(
-            result0,
-            frame.apply(wrapper),
-            check_dtype=check_dtype,
-            check_less_precise=check_less_precise,
+            result0, frame.apply(wrapper), check_dtype=check_dtype,
         )
         # HACK: win32
         tm.assert_series_equal(
-            result1,
-            frame.apply(wrapper, axis=1),
-            check_dtype=False,
-            check_less_precise=check_less_precise,
+            result1, frame.apply(wrapper, axis=1), check_dtype=False,
         )
     else:
         skipna_wrapper = alternative
 
     result0 = f(axis=0)
     result1 = f(axis=1)
     tm.assert_series_equal(
-        result0,
-        frame.apply(skipna_wrapper),
-        check_dtype=check_dtype,
-        check_less_precise=check_less_precise,
+        result0, frame.apply(skipna_wrapper), check_dtype=check_dtype,
     )
 
     if opname in ["sum", "prod"]:
         expected = frame.apply(skipna_wrapper, axis=1)
-        tm.assert_series_equal(
-            result1, expected, check_dtype=False, check_less_precise=check_less_precise
-        )
+        tm.assert_series_equal(result1, expected, check_dtype=False)
 
     # check dtypes
     if check_dtype:
@@ -333,11 +318,7 @@ def kurt(x):
 
         # mixed types (with upcasting happening)
         assert_stat_op_calc(
-            "sum",
-            np.sum,
-            mixed_float_frame.astype("float32"),
-            check_dtype=False,
-            check_less_precise=True,
+            "sum", np.sum, mixed_float_frame.astype("float32"), check_dtype=False,
         )
 
         assert_stat_op_calc(
diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py
@@ -250,9 +250,7 @@ def make_dtnat_arr(n, nnat=None):
             df.to_csv(pth, chunksize=chunksize)
 
             recons = self.read_csv(pth)._convert(datetime=True, coerce=True)
-            tm.assert_frame_equal(
-                df, recons, check_names=False, check_less_precise=True
-            )
+            tm.assert_frame_equal(df, recons, check_names=False)
 
     @pytest.mark.slow
     def test_to_csv_moar(self):
@@ -354,9 +352,7 @@ def _to_uni(x):
                     recons.columns = np.array(recons.columns, dtype=c_dtype)
                     df.columns = np.array(df.columns, dtype=c_dtype)
 
-            tm.assert_frame_equal(
-                df, recons, check_names=False, check_less_precise=True
-            )
+            tm.assert_frame_equal(df, recons, check_names=False)
 
         N = 100
         chunksize = 1000
diff --git a/pandas/tests/generic/test_series.py b/pandas/tests/generic/test_series.py
@@ -237,9 +237,7 @@ def test_to_xarray_index_types(self, index):
         assert isinstance(result, DataArray)
 
         # idempotency
-        tm.assert_series_equal(
-            result.to_series(), s, check_index_type=False, check_categorical=True
-        )
+        tm.assert_series_equal(result.to_series(), s, check_index_type=False)
 
     @td.skip_if_no("xarray", min_version="0.7.0")
     def test_to_xarray(self):
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
@@ -661,7 +661,7 @@ def test_nlargest_mi_grouper():
     ]
 
     expected = Series(exp_values, index=exp_idx)
-    tm.assert_series_equal(result, expected, check_exact=False, check_less_precise=True)
+    tm.assert_series_equal(result, expected, check_exact=False)
 
 
 def test_nsmallest():
diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py
@@ -564,7 +564,7 @@ def test_roundtrip_indexlabels(self, merge_cells, frame, path):
 
         reader = ExcelFile(path)
         recons = pd.read_excel(reader, "test1", index_col=[0, 1])
-        tm.assert_frame_equal(df, recons, check_less_precise=True)
+        tm.assert_frame_equal(df, recons)
 
     def test_excel_roundtrip_indexname(self, merge_cells, path):
         df = DataFrame(np.random.randn(10, 4))
diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py
@@ -2372,7 +2372,7 @@ def test_write_row_by_row(self):
 
         result = sql.read_sql("select * from test", con=self.conn)
         result.index = frame.index
-        tm.assert_frame_equal(result, frame, check_less_precise=True)
+        tm.assert_frame_equal(result, frame)
 
     def test_execute(self):
         frame = tm.makeTimeDataFrame()
@@ -2632,7 +2632,7 @@ def test_write_row_by_row(self):
 
         result = sql.read_sql("select * from test", con=self.conn)
         result.index = frame.index
-        tm.assert_frame_equal(result, frame, check_less_precise=True)
+        tm.assert_frame_equal(result, frame)
 
     def test_chunksize_read_type(self):
         frame = tm.makeTimeDataFrame()
diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
@@ -254,12 +254,21 @@ def test_read_dta4(self, file):
         )
 
         # these are all categoricals
-        expected = pd.concat(
-            [expected[col].astype("category") for col in expected], axis=1
-        )
+        for col in expected:
+            orig = expected[col].copy()
+
+            categories = np.asarray(expected["fully_labeled"][orig.notna()])
+            if col == "incompletely_labeled":
+                categories = orig
+
+            cat = orig.astype("category")._values
+            cat = cat.set_categories(categories, ordered=True)
+            cat.categories.rename(None, inplace=True)
+
+            expected[col] = cat
 
         # stata doesn't save .category metadata
-        tm.assert_frame_equal(parsed, expected, check_categorical=False)
+        tm.assert_frame_equal(parsed, expected)
 
     # File containing strls
     def test_read_dta12(self):
@@ -952,19 +961,27 @@ def test_categorical_writing(self, version):
         original = pd.concat(
             [original[col].astype("category") for col in original], axis=1
         )
+        expected.index.name = "index"
 
         expected["incompletely_labeled"] = expected["incompletely_labeled"].apply(str)
         expected["unlabeled"] = expected["unlabeled"].apply(str)
-        expected = pd.concat(
-            [expected[col].astype("category") for col in expected], axis=1
-        )
-        expected.index.name = "index"
+        for col in expected:
+            orig = expected[col].copy()
+
+            cat = orig.astype("category")._values
+            cat = cat.as_ordered()
+            if col == "unlabeled":
+                cat = cat.set_categories(orig, ordered=True)
+
+            cat.categories.rename(None, inplace=True)
+
+            expected[col] = cat
 
         with tm.ensure_clean() as path:
             original.to_stata(path, version=version)
             written_and_read_again = self.read_dta(path)
             res = written_and_read_again.set_index("index")
-            tm.assert_frame_equal(res, expected, check_categorical=False)
+            tm.assert_frame_equal(res, expected)
 
     def test_categorical_warnings_and_errors(self):
         # Warning for non-string labels
@@ -1056,9 +1073,11 @@ def test_categorical_sorting(self, file):
         parsed.index = np.arange(parsed.shape[0])
         codes = [-1, -1, 0, 1, 1, 1, 2, 2, 3, 4]
         categories = ["Poor", "Fair", "Good", "Very good", "Excellent"]
-        cat = pd.Categorical.from_codes(codes=codes, categories=categories)
+        cat = pd.Categorical.from_codes(
+            codes=codes, categories=categories, ordered=True
+        )
         expected = pd.Series(cat, name="srh")
-        tm.assert_series_equal(expected, parsed["srh"], check_categorical=False)
+        tm.assert_series_equal(expected, parsed["srh"])
 
     @pytest.mark.parametrize("file", ["dta19_115", "dta19_117"])
     def test_categorical_ordering(self, file):
diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
@@ -393,7 +393,7 @@ def test_constructor_categorical_dtype(self):
         expected = Series(
             ["a", "a"], index=[0, 1], dtype=CategoricalDtype(["a", "b"], ordered=True)
         )
-        tm.assert_series_equal(result, expected, check_categorical=True)
+        tm.assert_series_equal(result, expected)
 
     def test_constructor_categorical_string(self):
         # GH 26336: the string 'category' maintains existing CategoricalDtype

Original file line number	Diff line number	Diff line change
`@@ -661,7 +661,7 @@ def test_nlargest_mi_grouper():`
`661`	`661`	`]`
`662`	`662`
`663`	`663`	`expected = Series(exp_values, index=exp_idx)`
`664`		`- tm.assert_series_equal(result, expected, check_exact=False, check_less_precise=True)`
	`664`	`+ tm.assert_series_equal(result, expected, check_exact=False)`
`665`	`665`
`666`	`666`
`667`	`667`	`def test_nsmallest():`
Original file line number	Diff line number	Diff line change
`@@ -393,7 +393,7 @@ def test_constructor_categorical_dtype(self):`
`393`	`393`	`expected = Series(`
`394`	`394`	`["a", "a"], index=[0, 1], dtype=CategoricalDtype(["a", "b"], ordered=True)`
`395`	`395`	`)`
`396`		`- tm.assert_series_equal(result, expected, check_categorical=True)`
	`396`	`+ tm.assert_series_equal(result, expected)`
`397`	`397`
`398`	`398`	`def test_constructor_categorical_string(self):`
`399`	`399`	`# GH 26336: the string 'category' maintains existing CategoricalDtype`