Skip to content

Commit 13efe39

Browse files
jbrockmendelSeeminSyed
authored andcommitted
TST: stricter tests, avoid check_categorical=False, check_less_precise (pandas-dev#32571)
1 parent 184e582 commit 13efe39

File tree

8 files changed

+43
-49
lines changed

8 files changed

+43
-49
lines changed

pandas/tests/frame/test_analytics.py

+5-24
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ def assert_stat_op_calc(
3232
has_skipna=True,
3333
check_dtype=True,
3434
check_dates=False,
35-
check_less_precise=False,
3635
skipna_alternative=None,
3736
):
3837
"""
@@ -54,9 +53,6 @@ def assert_stat_op_calc(
5453
"alternative(frame)" should be checked.
5554
check_dates : bool, default false
5655
Whether opname should be tested on a Datetime Series
57-
check_less_precise : bool, default False
58-
Whether results should only be compared approximately;
59-
passed on to tm.assert_series_equal
6056
skipna_alternative : function, default None
6157
NaN-safe version of alternative
6258
"""
@@ -84,35 +80,24 @@ def wrapper(x):
8480
result0 = f(axis=0, skipna=False)
8581
result1 = f(axis=1, skipna=False)
8682
tm.assert_series_equal(
87-
result0,
88-
frame.apply(wrapper),
89-
check_dtype=check_dtype,
90-
check_less_precise=check_less_precise,
83+
result0, frame.apply(wrapper), check_dtype=check_dtype,
9184
)
9285
# HACK: win32
9386
tm.assert_series_equal(
94-
result1,
95-
frame.apply(wrapper, axis=1),
96-
check_dtype=False,
97-
check_less_precise=check_less_precise,
87+
result1, frame.apply(wrapper, axis=1), check_dtype=False,
9888
)
9989
else:
10090
skipna_wrapper = alternative
10191

10292
result0 = f(axis=0)
10393
result1 = f(axis=1)
10494
tm.assert_series_equal(
105-
result0,
106-
frame.apply(skipna_wrapper),
107-
check_dtype=check_dtype,
108-
check_less_precise=check_less_precise,
95+
result0, frame.apply(skipna_wrapper), check_dtype=check_dtype,
10996
)
11097

11198
if opname in ["sum", "prod"]:
11299
expected = frame.apply(skipna_wrapper, axis=1)
113-
tm.assert_series_equal(
114-
result1, expected, check_dtype=False, check_less_precise=check_less_precise
115-
)
100+
tm.assert_series_equal(result1, expected, check_dtype=False)
116101

117102
# check dtypes
118103
if check_dtype:
@@ -333,11 +318,7 @@ def kurt(x):
333318

334319
# mixed types (with upcasting happening)
335320
assert_stat_op_calc(
336-
"sum",
337-
np.sum,
338-
mixed_float_frame.astype("float32"),
339-
check_dtype=False,
340-
check_less_precise=True,
321+
"sum", np.sum, mixed_float_frame.astype("float32"), check_dtype=False,
341322
)
342323

343324
assert_stat_op_calc(

pandas/tests/frame/test_to_csv.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -250,9 +250,7 @@ def make_dtnat_arr(n, nnat=None):
250250
df.to_csv(pth, chunksize=chunksize)
251251

252252
recons = self.read_csv(pth)._convert(datetime=True, coerce=True)
253-
tm.assert_frame_equal(
254-
df, recons, check_names=False, check_less_precise=True
255-
)
253+
tm.assert_frame_equal(df, recons, check_names=False)
256254

257255
@pytest.mark.slow
258256
def test_to_csv_moar(self):
@@ -354,9 +352,7 @@ def _to_uni(x):
354352
recons.columns = np.array(recons.columns, dtype=c_dtype)
355353
df.columns = np.array(df.columns, dtype=c_dtype)
356354

357-
tm.assert_frame_equal(
358-
df, recons, check_names=False, check_less_precise=True
359-
)
355+
tm.assert_frame_equal(df, recons, check_names=False)
360356

361357
N = 100
362358
chunksize = 1000

pandas/tests/generic/test_series.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -237,9 +237,7 @@ def test_to_xarray_index_types(self, index):
237237
assert isinstance(result, DataArray)
238238

239239
# idempotency
240-
tm.assert_series_equal(
241-
result.to_series(), s, check_index_type=False, check_categorical=True
242-
)
240+
tm.assert_series_equal(result.to_series(), s, check_index_type=False)
243241

244242
@td.skip_if_no("xarray", min_version="0.7.0")
245243
def test_to_xarray(self):

pandas/tests/groupby/test_function.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -661,7 +661,7 @@ def test_nlargest_mi_grouper():
661661
]
662662

663663
expected = Series(exp_values, index=exp_idx)
664-
tm.assert_series_equal(result, expected, check_exact=False, check_less_precise=True)
664+
tm.assert_series_equal(result, expected, check_exact=False)
665665

666666

667667
def test_nsmallest():

pandas/tests/io/excel/test_writers.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -564,7 +564,7 @@ def test_roundtrip_indexlabels(self, merge_cells, frame, path):
564564

565565
reader = ExcelFile(path)
566566
recons = pd.read_excel(reader, "test1", index_col=[0, 1])
567-
tm.assert_frame_equal(df, recons, check_less_precise=True)
567+
tm.assert_frame_equal(df, recons)
568568

569569
def test_excel_roundtrip_indexname(self, merge_cells, path):
570570
df = DataFrame(np.random.randn(10, 4))

pandas/tests/io/test_sql.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -2372,7 +2372,7 @@ def test_write_row_by_row(self):
23722372

23732373
result = sql.read_sql("select * from test", con=self.conn)
23742374
result.index = frame.index
2375-
tm.assert_frame_equal(result, frame, check_less_precise=True)
2375+
tm.assert_frame_equal(result, frame)
23762376

23772377
def test_execute(self):
23782378
frame = tm.makeTimeDataFrame()
@@ -2632,7 +2632,7 @@ def test_write_row_by_row(self):
26322632

26332633
result = sql.read_sql("select * from test", con=self.conn)
26342634
result.index = frame.index
2635-
tm.assert_frame_equal(result, frame, check_less_precise=True)
2635+
tm.assert_frame_equal(result, frame)
26362636

26372637
def test_chunksize_read_type(self):
26382638
frame = tm.makeTimeDataFrame()

pandas/tests/io/test_stata.py

+30-11
Original file line numberDiff line numberDiff line change
@@ -254,12 +254,21 @@ def test_read_dta4(self, file):
254254
)
255255

256256
# these are all categoricals
257-
expected = pd.concat(
258-
[expected[col].astype("category") for col in expected], axis=1
259-
)
257+
for col in expected:
258+
orig = expected[col].copy()
259+
260+
categories = np.asarray(expected["fully_labeled"][orig.notna()])
261+
if col == "incompletely_labeled":
262+
categories = orig
263+
264+
cat = orig.astype("category")._values
265+
cat = cat.set_categories(categories, ordered=True)
266+
cat.categories.rename(None, inplace=True)
267+
268+
expected[col] = cat
260269

261270
# stata doesn't save .category metadata
262-
tm.assert_frame_equal(parsed, expected, check_categorical=False)
271+
tm.assert_frame_equal(parsed, expected)
263272

264273
# File containing strls
265274
def test_read_dta12(self):
@@ -952,19 +961,27 @@ def test_categorical_writing(self, version):
952961
original = pd.concat(
953962
[original[col].astype("category") for col in original], axis=1
954963
)
964+
expected.index.name = "index"
955965

956966
expected["incompletely_labeled"] = expected["incompletely_labeled"].apply(str)
957967
expected["unlabeled"] = expected["unlabeled"].apply(str)
958-
expected = pd.concat(
959-
[expected[col].astype("category") for col in expected], axis=1
960-
)
961-
expected.index.name = "index"
968+
for col in expected:
969+
orig = expected[col].copy()
970+
971+
cat = orig.astype("category")._values
972+
cat = cat.as_ordered()
973+
if col == "unlabeled":
974+
cat = cat.set_categories(orig, ordered=True)
975+
976+
cat.categories.rename(None, inplace=True)
977+
978+
expected[col] = cat
962979

963980
with tm.ensure_clean() as path:
964981
original.to_stata(path, version=version)
965982
written_and_read_again = self.read_dta(path)
966983
res = written_and_read_again.set_index("index")
967-
tm.assert_frame_equal(res, expected, check_categorical=False)
984+
tm.assert_frame_equal(res, expected)
968985

969986
def test_categorical_warnings_and_errors(self):
970987
# Warning for non-string labels
@@ -1056,9 +1073,11 @@ def test_categorical_sorting(self, file):
10561073
parsed.index = np.arange(parsed.shape[0])
10571074
codes = [-1, -1, 0, 1, 1, 1, 2, 2, 3, 4]
10581075
categories = ["Poor", "Fair", "Good", "Very good", "Excellent"]
1059-
cat = pd.Categorical.from_codes(codes=codes, categories=categories)
1076+
cat = pd.Categorical.from_codes(
1077+
codes=codes, categories=categories, ordered=True
1078+
)
10601079
expected = pd.Series(cat, name="srh")
1061-
tm.assert_series_equal(expected, parsed["srh"], check_categorical=False)
1080+
tm.assert_series_equal(expected, parsed["srh"])
10621081

10631082
@pytest.mark.parametrize("file", ["dta19_115", "dta19_117"])
10641083
def test_categorical_ordering(self, file):

pandas/tests/series/test_constructors.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -393,7 +393,7 @@ def test_constructor_categorical_dtype(self):
393393
expected = Series(
394394
["a", "a"], index=[0, 1], dtype=CategoricalDtype(["a", "b"], ordered=True)
395395
)
396-
tm.assert_series_equal(result, expected, check_categorical=True)
396+
tm.assert_series_equal(result, expected)
397397

398398
def test_constructor_categorical_string(self):
399399
# GH 26336: the string 'category' maintains existing CategoricalDtype

0 commit comments

Comments
 (0)