Skip to content

TST: Fixturize / parameterize test_sas7bdat #45826

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Feb 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/tests/frame/test_nonunique_indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ def test_column_dups_indexes(self):
this_df = df.copy()
expected_ser = Series(index.values, index=this_df.index)
expected_df = DataFrame(
{"A": expected_ser, "B": this_df["B"], "A": expected_ser},
{"A": expected_ser, "B": this_df["B"]},
columns=["A", "B", "A"],
)
this_df["A"] = index
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/frame/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ def assert_bool_op_api(
----------
opname : str
Name of the operator to test on frame
float_frame : DataFrame
bool_frame_with_na : DataFrame
DataFrame with columns of type float
float_string_frame : DataFrame
DataFrame with both float and string columns
Expand Down
162 changes: 77 additions & 85 deletions pandas/tests/io/sas/test_sas7bdat.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,104 +15,98 @@
import pandas._testing as tm


@pytest.fixture
def dirpath(datapath):
return datapath("io", "sas", "data")


@pytest.fixture(params=[(1, range(1, 16)), (2, [16])])
def data_test_ix(request, dirpath):
i, test_ix = request.param
fname = os.path.join(dirpath, f"test_sas7bdat_{i}.csv")
df = pd.read_csv(fname)
epoch = datetime(1960, 1, 1)
t1 = pd.to_timedelta(df["Column4"], unit="d")
df["Column4"] = epoch + t1
t2 = pd.to_timedelta(df["Column12"], unit="d")
df["Column12"] = epoch + t2
for k in range(df.shape[1]):
col = df.iloc[:, k]
if col.dtype == np.int64:
df.iloc[:, k] = df.iloc[:, k].astype(np.float64)
return df, test_ix


# https://github.com/cython/cython/issues/1720
@pytest.mark.filterwarnings("ignore:can't resolve package:ImportWarning")
class TestSAS7BDAT:
@pytest.fixture(autouse=True)
def setup_method(self, datapath):
self.dirpath = datapath("io", "sas", "data")
self.data = []
self.test_ix = [list(range(1, 16)), [16]]
for j in 1, 2:
fname = os.path.join(self.dirpath, f"test_sas7bdat_{j}.csv")
df = pd.read_csv(fname)
epoch = datetime(1960, 1, 1)
t1 = pd.to_timedelta(df["Column4"], unit="d")
df["Column4"] = epoch + t1
t2 = pd.to_timedelta(df["Column12"], unit="d")
df["Column12"] = epoch + t2
for k in range(df.shape[1]):
col = df.iloc[:, k]
if col.dtype == np.int64:
df.iloc[:, k] = df.iloc[:, k].astype(np.float64)
self.data.append(df)

@pytest.mark.slow
def test_from_file(self):
for j in 0, 1:
df0 = self.data[j]
for k in self.test_ix[j]:
fname = os.path.join(self.dirpath, f"test{k}.sas7bdat")
df = pd.read_sas(fname, encoding="utf-8")
tm.assert_frame_equal(df, df0)
def test_from_file(self, dirpath, data_test_ix):
df0, test_ix = data_test_ix
for k in test_ix:
fname = os.path.join(dirpath, f"test{k}.sas7bdat")
df = pd.read_sas(fname, encoding="utf-8")
tm.assert_frame_equal(df, df0)

@pytest.mark.slow
def test_from_buffer(self):
for j in 0, 1:
df0 = self.data[j]
for k in self.test_ix[j]:
fname = os.path.join(self.dirpath, f"test{k}.sas7bdat")
with open(fname, "rb") as f:
byts = f.read()
buf = io.BytesIO(byts)
with pd.read_sas(
buf, format="sas7bdat", iterator=True, encoding="utf-8"
) as rdr:
df = rdr.read()
tm.assert_frame_equal(df, df0, check_exact=False)
def test_from_buffer(self, dirpath, data_test_ix):
df0, test_ix = data_test_ix
for k in test_ix:
fname = os.path.join(dirpath, f"test{k}.sas7bdat")
with open(fname, "rb") as f:
byts = f.read()
buf = io.BytesIO(byts)
with pd.read_sas(
buf, format="sas7bdat", iterator=True, encoding="utf-8"
) as rdr:
df = rdr.read()
tm.assert_frame_equal(df, df0, check_exact=False)

@pytest.mark.slow
def test_from_iterator(self):
for j in 0, 1:
df0 = self.data[j]
for k in self.test_ix[j]:
fname = os.path.join(self.dirpath, f"test{k}.sas7bdat")
with pd.read_sas(fname, iterator=True, encoding="utf-8") as rdr:
df = rdr.read(2)
tm.assert_frame_equal(df, df0.iloc[0:2, :])
df = rdr.read(3)
tm.assert_frame_equal(df, df0.iloc[2:5, :])
def test_from_iterator(self, dirpath, data_test_ix):
df0, test_ix = data_test_ix
for k in test_ix:
fname = os.path.join(dirpath, f"test{k}.sas7bdat")
with pd.read_sas(fname, iterator=True, encoding="utf-8") as rdr:
df = rdr.read(2)
tm.assert_frame_equal(df, df0.iloc[0:2, :])
df = rdr.read(3)
tm.assert_frame_equal(df, df0.iloc[2:5, :])

@pytest.mark.slow
def test_path_pathlib(self):
for j in 0, 1:
df0 = self.data[j]
for k in self.test_ix[j]:
fname = Path(os.path.join(self.dirpath, f"test{k}.sas7bdat"))
df = pd.read_sas(fname, encoding="utf-8")
tm.assert_frame_equal(df, df0)
def test_path_pathlib(self, dirpath, data_test_ix):
df0, test_ix = data_test_ix
for k in test_ix:
fname = Path(os.path.join(dirpath, f"test{k}.sas7bdat"))
df = pd.read_sas(fname, encoding="utf-8")
tm.assert_frame_equal(df, df0)

@td.skip_if_no("py.path")
@pytest.mark.slow
def test_path_localpath(self):
def test_path_localpath(self, dirpath, data_test_ix):
from py.path import local as LocalPath

for j in 0, 1:
df0 = self.data[j]
for k in self.test_ix[j]:
fname = LocalPath(os.path.join(self.dirpath, f"test{k}.sas7bdat"))
df = pd.read_sas(fname, encoding="utf-8")
tm.assert_frame_equal(df, df0)
df0, test_ix = data_test_ix
for k in test_ix:
fname = LocalPath(os.path.join(dirpath, f"test{k}.sas7bdat"))
df = pd.read_sas(fname, encoding="utf-8")
tm.assert_frame_equal(df, df0)

@pytest.mark.slow
def test_iterator_loop(self):
@pytest.mark.parametrize("chunksize", (3, 5, 10, 11))
@pytest.mark.parametrize("k", range(1, 17))
def test_iterator_loop(self, dirpath, k, chunksize):
# github #13654
for j in 0, 1:
for k in self.test_ix[j]:
for chunksize in (3, 5, 10, 11):
fname = os.path.join(self.dirpath, f"test{k}.sas7bdat")
with pd.read_sas(
fname, chunksize=chunksize, encoding="utf-8"
) as rdr:
y = 0
for x in rdr:
y += x.shape[0]
assert y == rdr.row_count

def test_iterator_read_too_much(self):
fname = os.path.join(dirpath, f"test{k}.sas7bdat")
with pd.read_sas(fname, chunksize=chunksize, encoding="utf-8") as rdr:
y = 0
for x in rdr:
y += x.shape[0]
assert y == rdr.row_count

def test_iterator_read_too_much(self, dirpath):
# github #14734
k = self.test_ix[0][0]
fname = os.path.join(self.dirpath, f"test{k}.sas7bdat")
fname = os.path.join(dirpath, "test1.sas7bdat")
with pd.read_sas(
fname, format="sas7bdat", iterator=True, encoding="utf-8"
) as rdr:
Expand Down Expand Up @@ -183,19 +177,17 @@ def test_date_time(datapath):
tm.assert_frame_equal(df, df0)


def test_compact_numerical_values(datapath):
@pytest.mark.parametrize("column", ["WGT", "CYL"])
def test_compact_numerical_values(datapath, column):
# Regression test for #21616
fname = datapath("io", "sas", "data", "cars.sas7bdat")
df = pd.read_sas(fname, encoding="latin-1")
# The two columns CYL and WGT in cars.sas7bdat have column
# width < 8 and only contain integral values.
# Test that pandas doesn't corrupt the numbers by adding
# decimals.
result = df["WGT"]
expected = df["WGT"].round()
tm.assert_series_equal(result, expected, check_exact=True)
result = df["CYL"]
expected = df["CYL"].round()
result = df[column]
expected = df[column].round()
tm.assert_series_equal(result, expected, check_exact=True)


Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/series/indexing/test_xs.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def test_series_getitem_multiindex_xs_by_label(self):
result = ser.xs("one", level="L2")
tm.assert_series_equal(result, expected)

def test_series_getitem_multiindex_xs(xs):
def test_series_getitem_multiindex_xs(self):
# GH#6258
dt = list(date_range("20130903", periods=3))
idx = MultiIndex.from_product([list("AB"), dt])
Expand Down