pandas-dev · jbrockmendel · Jan 29, 2022 · Jan 28, 2022 · Jan 28, 2022 · Jan 28, 2022
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1365,8 +1365,6 @@ def itertuples(
         -----
         The column names will be renamed to positional names if they are
         invalid Python identifiers, repeated, or start with an underscore.
-        On python versions < 3.7 regular tuples are returned for DataFrames
-        with a large number of columns (>254).
 
         Examples
         --------

diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
@@ -25,16 +25,6 @@
 )
 import pandas._testing as tm
 
-_seriesd = tm.getSeriesData()
-
-_frame = DataFrame(_seriesd)
-
-_cat_frame = _frame.copy()
-cat = ["bah"] * 5 + ["bar"] * 5 + ["baz"] * 5 + ["foo"] * (len(_cat_frame) - 15)
-_cat_frame.index = pd.CategoricalIndex(cat, name="E")
-_cat_frame["E"] = list(reversed(cat))
-_cat_frame["sort"] = np.arange(len(_cat_frame), dtype="int64")
-
 
 def assert_json_roundtrip_equal(result, expected, orient):
     if orient == "records" or orient == "values":
@@ -49,11 +39,17 @@ def assert_json_roundtrip_equal(result, expected, orient):
 )
 @pytest.mark.filterwarnings("ignore:the 'numpy' keyword is deprecated:FutureWarning")
 class TestPandasContainer:
-    @pytest.fixture(autouse=True)
-    def setup(self):
-        self.categorical = _cat_frame.copy()
+    @pytest.fixture
+    def categorical_frame(self):
+        _seriesd = tm.getSeriesData()
+
+        _cat_frame = DataFrame(_seriesd)
 
-        yield
+        cat = ["bah"] * 5 + ["bar"] * 5 + ["baz"] * 5 + ["foo"] * (len(_cat_frame) - 15)
+        _cat_frame.index = pd.CategoricalIndex(cat, name="E")
+        _cat_frame["E"] = list(reversed(cat))
+        _cat_frame["sort"] = np.arange(len(_cat_frame), dtype="int64")
+        return _cat_frame
 
     @pytest.fixture
     def datetime_series(self):
@@ -215,7 +211,9 @@ def test_roundtrip_str_axes(self, request, orient, convert_axes, numpy, dtype):
 
     @pytest.mark.parametrize("convert_axes", [True, False])
     @pytest.mark.parametrize("numpy", [True, False])
-    def test_roundtrip_categorical(self, request, orient, convert_axes, numpy):
+    def test_roundtrip_categorical(
+        self, request, orient, categorical_frame, convert_axes, numpy
+    ):
         # TODO: create a better frame to test with and improve coverage
         if orient in ("index", "columns"):
             request.node.add_marker(
@@ -224,15 +222,15 @@ def test_roundtrip_categorical(self, request, orient, convert_axes, numpy):
                 )
             )
 
-        data = self.categorical.to_json(orient=orient)
+        data = categorical_frame.to_json(orient=orient)
         if numpy and orient in ("records", "values"):
             request.node.add_marker(
                 pytest.mark.xfail(reason=f"Orient {orient} is broken with numpy=True")
             )
 
         result = read_json(data, orient=orient, convert_axes=convert_axes, numpy=numpy)
 
-        expected = self.categorical.copy()
+        expected = categorical_frame.copy()
         expected.index = expected.index.astype(str)  # Categorical not preserved
         expected.index.name = None  # index names aren't preserved in JSON
 

diff --git a/pandas/tests/io/parser/test_textreader.py b/pandas/tests/io/parser/test_textreader.py
@@ -6,7 +6,6 @@
     BytesIO,
     StringIO,
 )
-import os
 
 import numpy as np
 import pytest
@@ -25,27 +24,23 @@
 
 
 class TestTextReader:
-    @pytest.fixture(autouse=True)
-    def setup_method(self, datapath):
-        self.dirpath = datapath("io", "parser", "data")
-        csv1_dirpath = datapath("io", "data", "csv")
-        self.csv1 = os.path.join(csv1_dirpath, "test1.csv")
-        self.csv2 = os.path.join(self.dirpath, "test2.csv")
-        self.xls1 = os.path.join(self.dirpath, "test.xls")
-
-    def test_file_handle(self):
-        with open(self.csv1, "rb") as f:
+    @pytest.fixture
+    def csv_path(self, datapath):
+        return datapath("io", "data", "csv", "test1.csv")
+
+    def test_file_handle(self, csv_path):
+        with open(csv_path, "rb") as f:
             reader = TextReader(f)
             reader.read()
 
-    def test_file_handle_mmap(self):
+    def test_file_handle_mmap(self, csv_path):
         # this was never using memory_map=True
-        with open(self.csv1, "rb") as f:
+        with open(csv_path, "rb") as f:
             reader = TextReader(f, header=None)
             reader.read()
 
-    def test_StringIO(self):
-        with open(self.csv1, "rb") as f:
+    def test_StringIO(self, csv_path):
+        with open(csv_path, "rb") as f:
             text = f.read()
         src = BytesIO(text)
         reader = TextReader(src, header=None)

diff --git a/pandas/tests/io/sas/test_xport.py b/pandas/tests/io/sas/test_xport.py
@@ -1,5 +1,3 @@
-import os
-
 import numpy as np
 import pytest
 
@@ -24,145 +22,154 @@ def numeric_as_float(data):
 
 class TestXport:
     @pytest.fixture(autouse=True)
-    def setup_method(self, datapath):
-        self.dirpath = datapath("io", "sas", "data")
-        self.file01 = os.path.join(self.dirpath, "DEMO_G.xpt")
-        self.file02 = os.path.join(self.dirpath, "SSHSV1_A.xpt")
-        self.file03 = os.path.join(self.dirpath, "DRXFCD_G.xpt")
-        self.file04 = os.path.join(self.dirpath, "paxraw_d_short.xpt")
-        self.file05 = os.path.join(self.dirpath, "DEMO_PUF.cpt")
-
+    def setup_method(self):
         with td.file_leak_context():
             yield
 
+    @pytest.fixture
+    def file01(self, datapath):
+        return datapath("io", "sas", "data", "DEMO_G.xpt")
+
+    @pytest.fixture
+    def file02(self, datapath):
+        return datapath("io", "sas", "data", "SSHSV1_A.xpt")
+
+    @pytest.fixture
+    def file03(self, datapath):
+        return datapath("io", "sas", "data", "DRXFCD_G.xpt")
+
+    @pytest.fixture
+    def file04(self, datapath):
+        return datapath("io", "sas", "data", "paxraw_d_short.xpt")
+
+    @pytest.fixture
+    def file05(self, datapath):
+        return datapath("io", "sas", "data", "DEMO_PUF.cpt")
+
     @pytest.mark.slow
-    def test1_basic(self):
+    def test1_basic(self, file01):
         # Tests with DEMO_G.xpt (all numeric file)
 
         # Compare to this
-        data_csv = pd.read_csv(self.file01.replace(".xpt", ".csv"))
+        data_csv = pd.read_csv(file01.replace(".xpt", ".csv"))
         numeric_as_float(data_csv)
 
         # Read full file
-        data = read_sas(self.file01, format="xport")
+        data = read_sas(file01, format="xport")
         tm.assert_frame_equal(data, data_csv)
         num_rows = data.shape[0]
 
         # Test reading beyond end of file
-        with read_sas(self.file01, format="xport", iterator=True) as reader:
+        with read_sas(file01, format="xport", iterator=True) as reader:
             data = reader.read(num_rows + 100)
         assert data.shape[0] == num_rows
 
         # Test incremental read with `read` method.
-        with read_sas(self.file01, format="xport", iterator=True) as reader:
+        with read_sas(file01, format="xport", iterator=True) as reader:
             data = reader.read(10)
         tm.assert_frame_equal(data, data_csv.iloc[0:10, :])
 
         # Test incremental read with `get_chunk` method.
-        with read_sas(self.file01, format="xport", chunksize=10) as reader:
+        with read_sas(file01, format="xport", chunksize=10) as reader:
             data = reader.get_chunk()
         tm.assert_frame_equal(data, data_csv.iloc[0:10, :])
 
         # Test read in loop
         m = 0
-        with read_sas(self.file01, format="xport", chunksize=100) as reader:
+        with read_sas(file01, format="xport", chunksize=100) as reader:
             for x in reader:
                 m += x.shape[0]
         assert m == num_rows
 
         # Read full file with `read_sas` method
-        data = read_sas(self.file01)
+        data = read_sas(file01)
         tm.assert_frame_equal(data, data_csv)
 
-    def test1_index(self):
+    def test1_index(self, file01):
         # Tests with DEMO_G.xpt using index (all numeric file)
 
         # Compare to this
-        data_csv = pd.read_csv(self.file01.replace(".xpt", ".csv"))
+        data_csv = pd.read_csv(file01.replace(".xpt", ".csv"))
         data_csv = data_csv.set_index("SEQN")
         numeric_as_float(data_csv)
 
         # Read full file
-        data = read_sas(self.file01, index="SEQN", format="xport")
+        data = read_sas(file01, index="SEQN", format="xport")
         tm.assert_frame_equal(data, data_csv, check_index_type=False)
 
         # Test incremental read with `read` method.
-        with read_sas(
-            self.file01, index="SEQN", format="xport", iterator=True
-        ) as reader:
+        with read_sas(file01, index="SEQN", format="xport", iterator=True) as reader:
             data = reader.read(10)
         tm.assert_frame_equal(data, data_csv.iloc[0:10, :], check_index_type=False)
 
         # Test incremental read with `get_chunk` method.
-        with read_sas(
-            self.file01, index="SEQN", format="xport", chunksize=10
-        ) as reader:
+        with read_sas(file01, index="SEQN", format="xport", chunksize=10) as reader:
             data = reader.get_chunk()
         tm.assert_frame_equal(data, data_csv.iloc[0:10, :], check_index_type=False)
 
-    def test1_incremental(self):
+    def test1_incremental(self, file01):
         # Test with DEMO_G.xpt, reading full file incrementally
 
-        data_csv = pd.read_csv(self.file01.replace(".xpt", ".csv"))
+        data_csv = pd.read_csv(file01.replace(".xpt", ".csv"))
         data_csv = data_csv.set_index("SEQN")
         numeric_as_float(data_csv)
 
-        with read_sas(self.file01, index="SEQN", chunksize=1000) as reader:
+        with read_sas(file01, index="SEQN", chunksize=1000) as reader:
             all_data = list(reader)
         data = pd.concat(all_data, axis=0)
 
         tm.assert_frame_equal(data, data_csv, check_index_type=False)
 
-    def test2(self):
+    def test2(self, file02):
         # Test with SSHSV1_A.xpt
 
         # Compare to this
-        data_csv = pd.read_csv(self.file02.replace(".xpt", ".csv"))
+        data_csv = pd.read_csv(file02.replace(".xpt", ".csv"))
         numeric_as_float(data_csv)
 
-        data = read_sas(self.file02)
+        data = read_sas(file02)
         tm.assert_frame_equal(data, data_csv)
 
-    def test2_binary(self):
+    def test2_binary(self, file02):
         # Test with SSHSV1_A.xpt, read as a binary file
 
         # Compare to this
-        data_csv = pd.read_csv(self.file02.replace(".xpt", ".csv"))
+        data_csv = pd.read_csv(file02.replace(".xpt", ".csv"))
         numeric_as_float(data_csv)
 
-        with open(self.file02, "rb") as fd:
+        with open(file02, "rb") as fd:
             with td.file_leak_context():
                 # GH#35693 ensure that if we pass an open file, we
                 #  dont incorrectly close it in read_sas
                 data = read_sas(fd, format="xport")
 
         tm.assert_frame_equal(data, data_csv)
 
-    def test_multiple_types(self):
+    def test_multiple_types(self, file03):
         # Test with DRXFCD_G.xpt (contains text and numeric variables)
 
         # Compare to this
-        data_csv = pd.read_csv(self.file03.replace(".xpt", ".csv"))
+        data_csv = pd.read_csv(file03.replace(".xpt", ".csv"))
 
-        data = read_sas(self.file03, encoding="utf-8")
+        data = read_sas(file03, encoding="utf-8")
         tm.assert_frame_equal(data, data_csv)
 
-    def test_truncated_float_support(self):
+    def test_truncated_float_support(self, file04):
         # Test with paxraw_d_short.xpt, a shortened version of:
         # http://wwwn.cdc.gov/Nchs/Nhanes/2005-2006/PAXRAW_D.ZIP
         # This file has truncated floats (5 bytes in this case).
 
         # GH 11713
 
-        data_csv = pd.read_csv(self.file04.replace(".xpt", ".csv"))
+        data_csv = pd.read_csv(file04.replace(".xpt", ".csv"))
 
-        data = read_sas(self.file04, format="xport")
+        data = read_sas(file04, format="xport")
         tm.assert_frame_equal(data.astype("int64"), data_csv)
 
-    def test_cport_header_found_raises(self):
+    def test_cport_header_found_raises(self, file05):
         # Test with DEMO_PUF.cpt, the beginning of puf2019_1_fall.xpt
         # from https://www.cms.gov/files/zip/puf2019.zip
         # (despite the extension, it's a cpt file)
         msg = "Header record indicates a CPORT file, which is not readable."
         with pytest.raises(ValueError, match=msg):
-            read_sas(self.file05, format="xport")
+            read_sas(file05, format="xport")