diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index d010426bee53e..865994519549b 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -173,7 +173,7 @@ def test_column_dups_indexes(self): this_df = df.copy() expected_ser = Series(index.values, index=this_df.index) expected_df = DataFrame( - {"A": expected_ser, "B": this_df["B"], "A": expected_ser}, + {"A": expected_ser, "B": this_df["B"]}, columns=["A", "B", "A"], ) this_df["A"] = index diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 6a99634d77f8f..daf70ccefc4ed 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -238,7 +238,7 @@ def assert_bool_op_api( ---------- opname : str Name of the operator to test on frame - float_frame : DataFrame + bool_frame_with_na : DataFrame DataFrame with columns of type float float_string_frame : DataFrame DataFrame with both float and string columns diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index 1847d3634a550..96882c6811683 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -15,104 +15,98 @@ import pandas._testing as tm +@pytest.fixture +def dirpath(datapath): + return datapath("io", "sas", "data") + + +@pytest.fixture(params=[(1, range(1, 16)), (2, [16])]) +def data_test_ix(request, dirpath): + i, test_ix = request.param + fname = os.path.join(dirpath, f"test_sas7bdat_{i}.csv") + df = pd.read_csv(fname) + epoch = datetime(1960, 1, 1) + t1 = pd.to_timedelta(df["Column4"], unit="d") + df["Column4"] = epoch + t1 + t2 = pd.to_timedelta(df["Column12"], unit="d") + df["Column12"] = epoch + t2 + for k in range(df.shape[1]): + col = df.iloc[:, k] + if col.dtype == np.int64: + df.iloc[:, k] = df.iloc[:, k].astype(np.float64) + return df, test_ix + + # https://github.com/cython/cython/issues/1720 @pytest.mark.filterwarnings("ignore:can't resolve package:ImportWarning") class TestSAS7BDAT: - @pytest.fixture(autouse=True) - def setup_method(self, datapath): - self.dirpath = datapath("io", "sas", "data") - self.data = [] - self.test_ix = [list(range(1, 16)), [16]] - for j in 1, 2: - fname = os.path.join(self.dirpath, f"test_sas7bdat_{j}.csv") - df = pd.read_csv(fname) - epoch = datetime(1960, 1, 1) - t1 = pd.to_timedelta(df["Column4"], unit="d") - df["Column4"] = epoch + t1 - t2 = pd.to_timedelta(df["Column12"], unit="d") - df["Column12"] = epoch + t2 - for k in range(df.shape[1]): - col = df.iloc[:, k] - if col.dtype == np.int64: - df.iloc[:, k] = df.iloc[:, k].astype(np.float64) - self.data.append(df) - @pytest.mark.slow - def test_from_file(self): - for j in 0, 1: - df0 = self.data[j] - for k in self.test_ix[j]: - fname = os.path.join(self.dirpath, f"test{k}.sas7bdat") - df = pd.read_sas(fname, encoding="utf-8") - tm.assert_frame_equal(df, df0) + def test_from_file(self, dirpath, data_test_ix): + df0, test_ix = data_test_ix + for k in test_ix: + fname = os.path.join(dirpath, f"test{k}.sas7bdat") + df = pd.read_sas(fname, encoding="utf-8") + tm.assert_frame_equal(df, df0) @pytest.mark.slow - def test_from_buffer(self): - for j in 0, 1: - df0 = self.data[j] - for k in self.test_ix[j]: - fname = os.path.join(self.dirpath, f"test{k}.sas7bdat") - with open(fname, "rb") as f: - byts = f.read() - buf = io.BytesIO(byts) - with pd.read_sas( - buf, format="sas7bdat", iterator=True, encoding="utf-8" - ) as rdr: - df = rdr.read() - tm.assert_frame_equal(df, df0, check_exact=False) + def test_from_buffer(self, dirpath, data_test_ix): + df0, test_ix = data_test_ix + for k in test_ix: + fname = os.path.join(dirpath, f"test{k}.sas7bdat") + with open(fname, "rb") as f: + byts = f.read() + buf = io.BytesIO(byts) + with pd.read_sas( + buf, format="sas7bdat", iterator=True, encoding="utf-8" + ) as rdr: + df = rdr.read() + tm.assert_frame_equal(df, df0, check_exact=False) @pytest.mark.slow - def test_from_iterator(self): - for j in 0, 1: - df0 = self.data[j] - for k in self.test_ix[j]: - fname = os.path.join(self.dirpath, f"test{k}.sas7bdat") - with pd.read_sas(fname, iterator=True, encoding="utf-8") as rdr: - df = rdr.read(2) - tm.assert_frame_equal(df, df0.iloc[0:2, :]) - df = rdr.read(3) - tm.assert_frame_equal(df, df0.iloc[2:5, :]) + def test_from_iterator(self, dirpath, data_test_ix): + df0, test_ix = data_test_ix + for k in test_ix: + fname = os.path.join(dirpath, f"test{k}.sas7bdat") + with pd.read_sas(fname, iterator=True, encoding="utf-8") as rdr: + df = rdr.read(2) + tm.assert_frame_equal(df, df0.iloc[0:2, :]) + df = rdr.read(3) + tm.assert_frame_equal(df, df0.iloc[2:5, :]) @pytest.mark.slow - def test_path_pathlib(self): - for j in 0, 1: - df0 = self.data[j] - for k in self.test_ix[j]: - fname = Path(os.path.join(self.dirpath, f"test{k}.sas7bdat")) - df = pd.read_sas(fname, encoding="utf-8") - tm.assert_frame_equal(df, df0) + def test_path_pathlib(self, dirpath, data_test_ix): + df0, test_ix = data_test_ix + for k in test_ix: + fname = Path(os.path.join(dirpath, f"test{k}.sas7bdat")) + df = pd.read_sas(fname, encoding="utf-8") + tm.assert_frame_equal(df, df0) @td.skip_if_no("py.path") @pytest.mark.slow - def test_path_localpath(self): + def test_path_localpath(self, dirpath, data_test_ix): from py.path import local as LocalPath - for j in 0, 1: - df0 = self.data[j] - for k in self.test_ix[j]: - fname = LocalPath(os.path.join(self.dirpath, f"test{k}.sas7bdat")) - df = pd.read_sas(fname, encoding="utf-8") - tm.assert_frame_equal(df, df0) + df0, test_ix = data_test_ix + for k in test_ix: + fname = LocalPath(os.path.join(dirpath, f"test{k}.sas7bdat")) + df = pd.read_sas(fname, encoding="utf-8") + tm.assert_frame_equal(df, df0) @pytest.mark.slow - def test_iterator_loop(self): + @pytest.mark.parametrize("chunksize", (3, 5, 10, 11)) + @pytest.mark.parametrize("k", range(1, 17)) + def test_iterator_loop(self, dirpath, k, chunksize): # github #13654 - for j in 0, 1: - for k in self.test_ix[j]: - for chunksize in (3, 5, 10, 11): - fname = os.path.join(self.dirpath, f"test{k}.sas7bdat") - with pd.read_sas( - fname, chunksize=chunksize, encoding="utf-8" - ) as rdr: - y = 0 - for x in rdr: - y += x.shape[0] - assert y == rdr.row_count - - def test_iterator_read_too_much(self): + fname = os.path.join(dirpath, f"test{k}.sas7bdat") + with pd.read_sas(fname, chunksize=chunksize, encoding="utf-8") as rdr: + y = 0 + for x in rdr: + y += x.shape[0] + assert y == rdr.row_count + + def test_iterator_read_too_much(self, dirpath): # github #14734 - k = self.test_ix[0][0] - fname = os.path.join(self.dirpath, f"test{k}.sas7bdat") + fname = os.path.join(dirpath, "test1.sas7bdat") with pd.read_sas( fname, format="sas7bdat", iterator=True, encoding="utf-8" ) as rdr: @@ -183,7 +177,8 @@ def test_date_time(datapath): tm.assert_frame_equal(df, df0) -def test_compact_numerical_values(datapath): +@pytest.mark.parametrize("column", ["WGT", "CYL"]) +def test_compact_numerical_values(datapath, column): # Regression test for #21616 fname = datapath("io", "sas", "data", "cars.sas7bdat") df = pd.read_sas(fname, encoding="latin-1") @@ -191,11 +186,8 @@ def test_compact_numerical_values(datapath): # width < 8 and only contain integral values. # Test that pandas doesn't corrupt the numbers by adding # decimals. - result = df["WGT"] - expected = df["WGT"].round() - tm.assert_series_equal(result, expected, check_exact=True) - result = df["CYL"] - expected = df["CYL"].round() + result = df[column] + expected = df[column].round() tm.assert_series_equal(result, expected, check_exact=True) diff --git a/pandas/tests/series/indexing/test_xs.py b/pandas/tests/series/indexing/test_xs.py index 9a277783a1b3d..8f3414f673627 100644 --- a/pandas/tests/series/indexing/test_xs.py +++ b/pandas/tests/series/indexing/test_xs.py @@ -45,7 +45,7 @@ def test_series_getitem_multiindex_xs_by_label(self): result = ser.xs("one", level="L2") tm.assert_series_equal(result, expected) - def test_series_getitem_multiindex_xs(xs): + def test_series_getitem_multiindex_xs(self): # GH#6258 dt = list(date_range("20130903", periods=3)) idx = MultiIndex.from_product([list("AB"), dt])