Skip to content

TST: stricter tests, avoid check_categorical=False, check_less_precise #32571

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 11, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 5 additions & 24 deletions pandas/tests/frame/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ def assert_stat_op_calc(
has_skipna=True,
check_dtype=True,
check_dates=False,
check_less_precise=False,
skipna_alternative=None,
):
"""
Expand All @@ -54,9 +53,6 @@ def assert_stat_op_calc(
"alternative(frame)" should be checked.
check_dates : bool, default false
Whether opname should be tested on a Datetime Series
check_less_precise : bool, default False
Whether results should only be compared approximately;
passed on to tm.assert_series_equal
skipna_alternative : function, default None
NaN-safe version of alternative
"""
Expand Down Expand Up @@ -84,35 +80,24 @@ def wrapper(x):
result0 = f(axis=0, skipna=False)
result1 = f(axis=1, skipna=False)
tm.assert_series_equal(
result0,
frame.apply(wrapper),
check_dtype=check_dtype,
check_less_precise=check_less_precise,
result0, frame.apply(wrapper), check_dtype=check_dtype,
)
# HACK: win32
tm.assert_series_equal(
result1,
frame.apply(wrapper, axis=1),
check_dtype=False,
check_less_precise=check_less_precise,
result1, frame.apply(wrapper, axis=1), check_dtype=False,
)
else:
skipna_wrapper = alternative

result0 = f(axis=0)
result1 = f(axis=1)
tm.assert_series_equal(
result0,
frame.apply(skipna_wrapper),
check_dtype=check_dtype,
check_less_precise=check_less_precise,
result0, frame.apply(skipna_wrapper), check_dtype=check_dtype,
)

if opname in ["sum", "prod"]:
expected = frame.apply(skipna_wrapper, axis=1)
tm.assert_series_equal(
result1, expected, check_dtype=False, check_less_precise=check_less_precise
)
tm.assert_series_equal(result1, expected, check_dtype=False)

# check dtypes
if check_dtype:
Expand Down Expand Up @@ -333,11 +318,7 @@ def kurt(x):

# mixed types (with upcasting happening)
assert_stat_op_calc(
"sum",
np.sum,
mixed_float_frame.astype("float32"),
check_dtype=False,
check_less_precise=True,
"sum", np.sum, mixed_float_frame.astype("float32"), check_dtype=False,
)

assert_stat_op_calc(
Expand Down
8 changes: 2 additions & 6 deletions pandas/tests/frame/test_to_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,9 +250,7 @@ def make_dtnat_arr(n, nnat=None):
df.to_csv(pth, chunksize=chunksize)

recons = self.read_csv(pth)._convert(datetime=True, coerce=True)
tm.assert_frame_equal(
df, recons, check_names=False, check_less_precise=True
)
tm.assert_frame_equal(df, recons, check_names=False)

@pytest.mark.slow
def test_to_csv_moar(self):
Expand Down Expand Up @@ -354,9 +352,7 @@ def _to_uni(x):
recons.columns = np.array(recons.columns, dtype=c_dtype)
df.columns = np.array(df.columns, dtype=c_dtype)

tm.assert_frame_equal(
df, recons, check_names=False, check_less_precise=True
)
tm.assert_frame_equal(df, recons, check_names=False)

N = 100
chunksize = 1000
Expand Down
4 changes: 1 addition & 3 deletions pandas/tests/generic/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,9 +237,7 @@ def test_to_xarray_index_types(self, index):
assert isinstance(result, DataArray)

# idempotency
tm.assert_series_equal(
result.to_series(), s, check_index_type=False, check_categorical=True
)
tm.assert_series_equal(result.to_series(), s, check_index_type=False)

@td.skip_if_no("xarray", min_version="0.7.0")
def test_to_xarray(self):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/groupby/test_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -661,7 +661,7 @@ def test_nlargest_mi_grouper():
]

expected = Series(exp_values, index=exp_idx)
tm.assert_series_equal(result, expected, check_exact=False, check_less_precise=True)
tm.assert_series_equal(result, expected, check_exact=False)


def test_nsmallest():
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/io/excel/test_writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,7 +564,7 @@ def test_roundtrip_indexlabels(self, merge_cells, frame, path):

reader = ExcelFile(path)
recons = pd.read_excel(reader, "test1", index_col=[0, 1])
tm.assert_frame_equal(df, recons, check_less_precise=True)
tm.assert_frame_equal(df, recons)

def test_excel_roundtrip_indexname(self, merge_cells, path):
df = DataFrame(np.random.randn(10, 4))
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/io/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -2372,7 +2372,7 @@ def test_write_row_by_row(self):

result = sql.read_sql("select * from test", con=self.conn)
result.index = frame.index
tm.assert_frame_equal(result, frame, check_less_precise=True)
tm.assert_frame_equal(result, frame)

def test_execute(self):
frame = tm.makeTimeDataFrame()
Expand Down Expand Up @@ -2632,7 +2632,7 @@ def test_write_row_by_row(self):

result = sql.read_sql("select * from test", con=self.conn)
result.index = frame.index
tm.assert_frame_equal(result, frame, check_less_precise=True)
tm.assert_frame_equal(result, frame)

def test_chunksize_read_type(self):
frame = tm.makeTimeDataFrame()
Expand Down
41 changes: 30 additions & 11 deletions pandas/tests/io/test_stata.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,12 +254,21 @@ def test_read_dta4(self, file):
)

# these are all categoricals
expected = pd.concat(
[expected[col].astype("category") for col in expected], axis=1
)
for col in expected:
orig = expected[col].copy()

categories = np.asarray(expected["fully_labeled"][orig.notna()])
if col == "incompletely_labeled":
categories = orig

cat = orig.astype("category")._values
cat = cat.set_categories(categories, ordered=True)
cat.categories.rename(None, inplace=True)

expected[col] = cat

# stata doesn't save .category metadata
tm.assert_frame_equal(parsed, expected, check_categorical=False)
tm.assert_frame_equal(parsed, expected)

# File containing strls
def test_read_dta12(self):
Expand Down Expand Up @@ -952,19 +961,27 @@ def test_categorical_writing(self, version):
original = pd.concat(
[original[col].astype("category") for col in original], axis=1
)
expected.index.name = "index"

expected["incompletely_labeled"] = expected["incompletely_labeled"].apply(str)
expected["unlabeled"] = expected["unlabeled"].apply(str)
expected = pd.concat(
[expected[col].astype("category") for col in expected], axis=1
)
expected.index.name = "index"
for col in expected:
orig = expected[col].copy()

cat = orig.astype("category")._values
cat = cat.as_ordered()
if col == "unlabeled":
cat = cat.set_categories(orig, ordered=True)

cat.categories.rename(None, inplace=True)

expected[col] = cat

with tm.ensure_clean() as path:
original.to_stata(path, version=version)
written_and_read_again = self.read_dta(path)
res = written_and_read_again.set_index("index")
tm.assert_frame_equal(res, expected, check_categorical=False)
tm.assert_frame_equal(res, expected)

def test_categorical_warnings_and_errors(self):
# Warning for non-string labels
Expand Down Expand Up @@ -1056,9 +1073,11 @@ def test_categorical_sorting(self, file):
parsed.index = np.arange(parsed.shape[0])
codes = [-1, -1, 0, 1, 1, 1, 2, 2, 3, 4]
categories = ["Poor", "Fair", "Good", "Very good", "Excellent"]
cat = pd.Categorical.from_codes(codes=codes, categories=categories)
cat = pd.Categorical.from_codes(
codes=codes, categories=categories, ordered=True
)
expected = pd.Series(cat, name="srh")
tm.assert_series_equal(expected, parsed["srh"], check_categorical=False)
tm.assert_series_equal(expected, parsed["srh"])

@pytest.mark.parametrize("file", ["dta19_115", "dta19_117"])
def test_categorical_ordering(self, file):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ def test_constructor_categorical_dtype(self):
expected = Series(
["a", "a"], index=[0, 1], dtype=CategoricalDtype(["a", "b"], ordered=True)
)
tm.assert_series_equal(result, expected, check_categorical=True)
tm.assert_series_equal(result, expected)

def test_constructor_categorical_string(self):
# GH 26336: the string 'category' maintains existing CategoricalDtype
Expand Down