Skip to content

TST: Use less autouse=True when unecessary #45688

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jan 29, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1365,8 +1365,6 @@ def itertuples(
-----
The column names will be renamed to positional names if they are
invalid Python identifiers, repeated, or start with an underscore.
On python versions < 3.7 regular tuples are returned for DataFrames
with a large number of columns (>254).

Examples
--------
Expand Down
32 changes: 15 additions & 17 deletions pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,16 +25,6 @@
)
import pandas._testing as tm

_seriesd = tm.getSeriesData()

_frame = DataFrame(_seriesd)

_cat_frame = _frame.copy()
cat = ["bah"] * 5 + ["bar"] * 5 + ["baz"] * 5 + ["foo"] * (len(_cat_frame) - 15)
_cat_frame.index = pd.CategoricalIndex(cat, name="E")
_cat_frame["E"] = list(reversed(cat))
_cat_frame["sort"] = np.arange(len(_cat_frame), dtype="int64")


def assert_json_roundtrip_equal(result, expected, orient):
if orient == "records" or orient == "values":
Expand All @@ -49,11 +39,17 @@ def assert_json_roundtrip_equal(result, expected, orient):
)
@pytest.mark.filterwarnings("ignore:the 'numpy' keyword is deprecated:FutureWarning")
class TestPandasContainer:
@pytest.fixture(autouse=True)
def setup(self):
self.categorical = _cat_frame.copy()
@pytest.fixture
def categorical_frame(self):
_seriesd = tm.getSeriesData()

_cat_frame = DataFrame(_seriesd)

yield
cat = ["bah"] * 5 + ["bar"] * 5 + ["baz"] * 5 + ["foo"] * (len(_cat_frame) - 15)
_cat_frame.index = pd.CategoricalIndex(cat, name="E")
_cat_frame["E"] = list(reversed(cat))
_cat_frame["sort"] = np.arange(len(_cat_frame), dtype="int64")
return _cat_frame

@pytest.fixture
def datetime_series(self):
Expand Down Expand Up @@ -215,7 +211,9 @@ def test_roundtrip_str_axes(self, request, orient, convert_axes, numpy, dtype):

@pytest.mark.parametrize("convert_axes", [True, False])
@pytest.mark.parametrize("numpy", [True, False])
def test_roundtrip_categorical(self, request, orient, convert_axes, numpy):
def test_roundtrip_categorical(
self, request, orient, categorical_frame, convert_axes, numpy
):
# TODO: create a better frame to test with and improve coverage
if orient in ("index", "columns"):
request.node.add_marker(
Expand All @@ -224,15 +222,15 @@ def test_roundtrip_categorical(self, request, orient, convert_axes, numpy):
)
)

data = self.categorical.to_json(orient=orient)
data = categorical_frame.to_json(orient=orient)
if numpy and orient in ("records", "values"):
request.node.add_marker(
pytest.mark.xfail(reason=f"Orient {orient} is broken with numpy=True")
)

result = read_json(data, orient=orient, convert_axes=convert_axes, numpy=numpy)

expected = self.categorical.copy()
expected = categorical_frame.copy()
expected.index = expected.index.astype(str) # Categorical not preserved
expected.index.name = None # index names aren't preserved in JSON

Expand Down
25 changes: 10 additions & 15 deletions pandas/tests/io/parser/test_textreader.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
BytesIO,
StringIO,
)
import os

import numpy as np
import pytest
Expand All @@ -25,27 +24,23 @@


class TestTextReader:
@pytest.fixture(autouse=True)
def setup_method(self, datapath):
self.dirpath = datapath("io", "parser", "data")
csv1_dirpath = datapath("io", "data", "csv")
self.csv1 = os.path.join(csv1_dirpath, "test1.csv")
self.csv2 = os.path.join(self.dirpath, "test2.csv")
self.xls1 = os.path.join(self.dirpath, "test.xls")

def test_file_handle(self):
with open(self.csv1, "rb") as f:
@pytest.fixture
def csv_path(self, datapath):
return datapath("io", "data", "csv", "test1.csv")

def test_file_handle(self, csv_path):
with open(csv_path, "rb") as f:
reader = TextReader(f)
reader.read()

def test_file_handle_mmap(self):
def test_file_handle_mmap(self, csv_path):
# this was never using memory_map=True
with open(self.csv1, "rb") as f:
with open(csv_path, "rb") as f:
reader = TextReader(f, header=None)
reader.read()

def test_StringIO(self):
with open(self.csv1, "rb") as f:
def test_StringIO(self, csv_path):
with open(csv_path, "rb") as f:
text = f.read()
src = BytesIO(text)
reader = TextReader(src, header=None)
Expand Down
95 changes: 51 additions & 44 deletions pandas/tests/io/sas/test_xport.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import os

import numpy as np
import pytest

Expand All @@ -24,145 +22,154 @@ def numeric_as_float(data):

class TestXport:
@pytest.fixture(autouse=True)
def setup_method(self, datapath):
self.dirpath = datapath("io", "sas", "data")
self.file01 = os.path.join(self.dirpath, "DEMO_G.xpt")
self.file02 = os.path.join(self.dirpath, "SSHSV1_A.xpt")
self.file03 = os.path.join(self.dirpath, "DRXFCD_G.xpt")
self.file04 = os.path.join(self.dirpath, "paxraw_d_short.xpt")
self.file05 = os.path.join(self.dirpath, "DEMO_PUF.cpt")

def setup_method(self):
with td.file_leak_context():
yield

@pytest.fixture
def file01(self, datapath):
return datapath("io", "sas", "data", "DEMO_G.xpt")

@pytest.fixture
def file02(self, datapath):
return datapath("io", "sas", "data", "SSHSV1_A.xpt")

@pytest.fixture
def file03(self, datapath):
return datapath("io", "sas", "data", "DRXFCD_G.xpt")

@pytest.fixture
def file04(self, datapath):
return datapath("io", "sas", "data", "paxraw_d_short.xpt")

@pytest.fixture
def file05(self, datapath):
return datapath("io", "sas", "data", "DEMO_PUF.cpt")

@pytest.mark.slow
def test1_basic(self):
def test1_basic(self, file01):
# Tests with DEMO_G.xpt (all numeric file)

# Compare to this
data_csv = pd.read_csv(self.file01.replace(".xpt", ".csv"))
data_csv = pd.read_csv(file01.replace(".xpt", ".csv"))
numeric_as_float(data_csv)

# Read full file
data = read_sas(self.file01, format="xport")
data = read_sas(file01, format="xport")
tm.assert_frame_equal(data, data_csv)
num_rows = data.shape[0]

# Test reading beyond end of file
with read_sas(self.file01, format="xport", iterator=True) as reader:
with read_sas(file01, format="xport", iterator=True) as reader:
data = reader.read(num_rows + 100)
assert data.shape[0] == num_rows

# Test incremental read with `read` method.
with read_sas(self.file01, format="xport", iterator=True) as reader:
with read_sas(file01, format="xport", iterator=True) as reader:
data = reader.read(10)
tm.assert_frame_equal(data, data_csv.iloc[0:10, :])

# Test incremental read with `get_chunk` method.
with read_sas(self.file01, format="xport", chunksize=10) as reader:
with read_sas(file01, format="xport", chunksize=10) as reader:
data = reader.get_chunk()
tm.assert_frame_equal(data, data_csv.iloc[0:10, :])

# Test read in loop
m = 0
with read_sas(self.file01, format="xport", chunksize=100) as reader:
with read_sas(file01, format="xport", chunksize=100) as reader:
for x in reader:
m += x.shape[0]
assert m == num_rows

# Read full file with `read_sas` method
data = read_sas(self.file01)
data = read_sas(file01)
tm.assert_frame_equal(data, data_csv)

def test1_index(self):
def test1_index(self, file01):
# Tests with DEMO_G.xpt using index (all numeric file)

# Compare to this
data_csv = pd.read_csv(self.file01.replace(".xpt", ".csv"))
data_csv = pd.read_csv(file01.replace(".xpt", ".csv"))
data_csv = data_csv.set_index("SEQN")
numeric_as_float(data_csv)

# Read full file
data = read_sas(self.file01, index="SEQN", format="xport")
data = read_sas(file01, index="SEQN", format="xport")
tm.assert_frame_equal(data, data_csv, check_index_type=False)

# Test incremental read with `read` method.
with read_sas(
self.file01, index="SEQN", format="xport", iterator=True
) as reader:
with read_sas(file01, index="SEQN", format="xport", iterator=True) as reader:
data = reader.read(10)
tm.assert_frame_equal(data, data_csv.iloc[0:10, :], check_index_type=False)

# Test incremental read with `get_chunk` method.
with read_sas(
self.file01, index="SEQN", format="xport", chunksize=10
) as reader:
with read_sas(file01, index="SEQN", format="xport", chunksize=10) as reader:
data = reader.get_chunk()
tm.assert_frame_equal(data, data_csv.iloc[0:10, :], check_index_type=False)

def test1_incremental(self):
def test1_incremental(self, file01):
# Test with DEMO_G.xpt, reading full file incrementally

data_csv = pd.read_csv(self.file01.replace(".xpt", ".csv"))
data_csv = pd.read_csv(file01.replace(".xpt", ".csv"))
data_csv = data_csv.set_index("SEQN")
numeric_as_float(data_csv)

with read_sas(self.file01, index="SEQN", chunksize=1000) as reader:
with read_sas(file01, index="SEQN", chunksize=1000) as reader:
all_data = list(reader)
data = pd.concat(all_data, axis=0)

tm.assert_frame_equal(data, data_csv, check_index_type=False)

def test2(self):
def test2(self, file02):
# Test with SSHSV1_A.xpt

# Compare to this
data_csv = pd.read_csv(self.file02.replace(".xpt", ".csv"))
data_csv = pd.read_csv(file02.replace(".xpt", ".csv"))
numeric_as_float(data_csv)

data = read_sas(self.file02)
data = read_sas(file02)
tm.assert_frame_equal(data, data_csv)

def test2_binary(self):
def test2_binary(self, file02):
# Test with SSHSV1_A.xpt, read as a binary file

# Compare to this
data_csv = pd.read_csv(self.file02.replace(".xpt", ".csv"))
data_csv = pd.read_csv(file02.replace(".xpt", ".csv"))
numeric_as_float(data_csv)

with open(self.file02, "rb") as fd:
with open(file02, "rb") as fd:
with td.file_leak_context():
# GH#35693 ensure that if we pass an open file, we
# dont incorrectly close it in read_sas
data = read_sas(fd, format="xport")

tm.assert_frame_equal(data, data_csv)

def test_multiple_types(self):
def test_multiple_types(self, file03):
# Test with DRXFCD_G.xpt (contains text and numeric variables)

# Compare to this
data_csv = pd.read_csv(self.file03.replace(".xpt", ".csv"))
data_csv = pd.read_csv(file03.replace(".xpt", ".csv"))

data = read_sas(self.file03, encoding="utf-8")
data = read_sas(file03, encoding="utf-8")
tm.assert_frame_equal(data, data_csv)

def test_truncated_float_support(self):
def test_truncated_float_support(self, file04):
# Test with paxraw_d_short.xpt, a shortened version of:
# http://wwwn.cdc.gov/Nchs/Nhanes/2005-2006/PAXRAW_D.ZIP
# This file has truncated floats (5 bytes in this case).

# GH 11713

data_csv = pd.read_csv(self.file04.replace(".xpt", ".csv"))
data_csv = pd.read_csv(file04.replace(".xpt", ".csv"))

data = read_sas(self.file04, format="xport")
data = read_sas(file04, format="xport")
tm.assert_frame_equal(data.astype("int64"), data_csv)

def test_cport_header_found_raises(self):
def test_cport_header_found_raises(self, file05):
# Test with DEMO_PUF.cpt, the beginning of puf2019_1_fall.xpt
# from https://www.cms.gov/files/zip/puf2019.zip
# (despite the extension, it's a cpt file)
msg = "Header record indicates a CPORT file, which is not readable."
with pytest.raises(ValueError, match=msg):
read_sas(self.file05, format="xport")
read_sas(file05, format="xport")
Loading