diff --git a/pandas/tests/io/parser/common/test_chunksize.py b/pandas/tests/io/parser/common/test_chunksize.py index 0f42aa81e4b37..baed74fc212e4 100644 --- a/pandas/tests/io/parser/common/test_chunksize.py +++ b/pandas/tests/io/parser/common/test_chunksize.py @@ -16,13 +16,11 @@ ) import pandas._testing as tm -xfail_pyarrow = pytest.mark.usefixtures("pyarrow_xfail") pytestmark = pytest.mark.filterwarnings( "ignore:Passing a BlockManager to DataFrame:DeprecationWarning" ) -@xfail_pyarrow # The 'chunksize' option is not supported @pytest.mark.parametrize("index_col", [0, "index"]) def test_read_chunksize_with_index(all_parsers, index_col): parser = all_parsers @@ -48,6 +46,13 @@ def test_read_chunksize_with_index(all_parsers, index_col): ) expected = expected.set_index("index") + if parser.engine == "pyarrow": + msg = "The 'chunksize' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + with parser.read_csv(StringIO(data), index_col=0, chunksize=2) as reader: + list(reader) + return + with parser.read_csv(StringIO(data), index_col=0, chunksize=2) as reader: chunks = list(reader) tm.assert_frame_equal(chunks[0], expected[:2]) @@ -55,7 +60,6 @@ def test_read_chunksize_with_index(all_parsers, index_col): tm.assert_frame_equal(chunks[2], expected[4:]) -@xfail_pyarrow # AssertionError: Regex pattern did not match @pytest.mark.parametrize("chunksize", [1.3, "foo", 0]) def test_read_chunksize_bad(all_parsers, chunksize): data = """index,A,B,C,D @@ -68,13 +72,14 @@ def test_read_chunksize_bad(all_parsers, chunksize): """ parser = all_parsers msg = r"'chunksize' must be an integer >=1" + if parser.engine == "pyarrow": + msg = "The 'chunksize' option is not supported with the 'pyarrow' engine" with pytest.raises(ValueError, match=msg): with parser.read_csv(StringIO(data), chunksize=chunksize) as _: pass -@xfail_pyarrow # The 'nrows' option is not supported @pytest.mark.parametrize("chunksize", [2, 8]) def test_read_chunksize_and_nrows(all_parsers, chunksize): # see gh-15755 @@ -89,12 +94,17 @@ def test_read_chunksize_and_nrows(all_parsers, chunksize): parser = all_parsers kwargs = {"index_col": 0, "nrows": 5} + if parser.engine == "pyarrow": + msg = "The 'nrows' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), **kwargs) + return + expected = parser.read_csv(StringIO(data), **kwargs) with parser.read_csv(StringIO(data), chunksize=chunksize, **kwargs) as reader: tm.assert_frame_equal(concat(reader), expected) -@xfail_pyarrow # The 'chunksize' option is not supported def test_read_chunksize_and_nrows_changing_size(all_parsers): data = """index,A,B,C,D foo,2,3,4,5 @@ -107,6 +117,12 @@ def test_read_chunksize_and_nrows_changing_size(all_parsers): parser = all_parsers kwargs = {"index_col": 0, "nrows": 5} + if parser.engine == "pyarrow": + msg = "The 'nrows' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), **kwargs) + return + expected = parser.read_csv(StringIO(data), **kwargs) with parser.read_csv(StringIO(data), chunksize=8, **kwargs) as reader: tm.assert_frame_equal(reader.get_chunk(size=2), expected.iloc[:2]) @@ -116,7 +132,6 @@ def test_read_chunksize_and_nrows_changing_size(all_parsers): reader.get_chunk(size=3) -@xfail_pyarrow # The 'chunksize' option is not supported def test_get_chunk_passed_chunksize(all_parsers): parser = all_parsers data = """A,B,C @@ -125,6 +140,13 @@ def test_get_chunk_passed_chunksize(all_parsers): 7,8,9 1,2,3""" + if parser.engine == "pyarrow": + msg = "The 'chunksize' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + with parser.read_csv(StringIO(data), chunksize=2) as reader: + reader.get_chunk() + return + with parser.read_csv(StringIO(data), chunksize=2) as reader: result = reader.get_chunk() @@ -132,7 +154,6 @@ def test_get_chunk_passed_chunksize(all_parsers): tm.assert_frame_equal(result, expected) -@xfail_pyarrow # The 'chunksize' option is not supported @pytest.mark.parametrize("kwargs", [{}, {"index_col": 0}]) def test_read_chunksize_compat(all_parsers, kwargs): # see gh-12185 @@ -146,17 +167,35 @@ def test_read_chunksize_compat(all_parsers, kwargs): """ parser = all_parsers result = parser.read_csv(StringIO(data), **kwargs) + + if parser.engine == "pyarrow": + msg = "The 'chunksize' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + with parser.read_csv(StringIO(data), chunksize=2, **kwargs) as reader: + concat(reader) + return + with parser.read_csv(StringIO(data), chunksize=2, **kwargs) as reader: - tm.assert_frame_equal(concat(reader), result) + via_reader = concat(reader) + tm.assert_frame_equal(via_reader, result) -@xfail_pyarrow # The 'chunksize' option is not supported def test_read_chunksize_jagged_names(all_parsers): # see gh-23509 parser = all_parsers data = "\n".join(["0"] * 7 + [",".join(["0"] * 10)]) expected = DataFrame([[0] + [np.nan] * 9] * 7 + [[0] * 10]) + + if parser.engine == "pyarrow": + msg = "The 'chunksize' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + with parser.read_csv( + StringIO(data), names=range(10), chunksize=4 + ) as reader: + concat(reader) + return + with parser.read_csv(StringIO(data), names=range(10), chunksize=4) as reader: result = concat(reader) tm.assert_frame_equal(result, expected) @@ -194,7 +233,6 @@ def test_chunks_have_consistent_numerical_type(all_parsers, monkeypatch): assert result.a.dtype == float -@xfail_pyarrow # ValueError: The 'chunksize' option is not supported def test_warn_if_chunks_have_mismatched_type(all_parsers): warning_type = None parser = all_parsers @@ -212,17 +250,24 @@ def test_warn_if_chunks_have_mismatched_type(all_parsers): buf = StringIO(data) - df = parser.read_csv_check_warnings( - warning_type, - r"Columns \(0\) have mixed types. " - "Specify dtype option on import or set low_memory=False.", - buf, - ) + if parser.engine == "pyarrow": + df = parser.read_csv_check_warnings( + DeprecationWarning, + "Passing a BlockManager to DataFrame is deprecated", + buf, + check_stacklevel=False, + ) + else: + df = parser.read_csv_check_warnings( + warning_type, + r"Columns \(0\) have mixed types. " + "Specify dtype option on import or set low_memory=False.", + buf, + ) assert df.a.dtype == object -@xfail_pyarrow # ValueError: The 'chunksize' option is not supported @pytest.mark.parametrize("iterator", [True, False]) def test_empty_with_nrows_chunksize(all_parsers, iterator): # see gh-9535 @@ -232,6 +277,18 @@ def test_empty_with_nrows_chunksize(all_parsers, iterator): nrows = 10 data = StringIO("foo,bar\n") + if parser.engine == "pyarrow": + msg = ( + "The '(nrows|chunksize)' option is not supported with the 'pyarrow' engine" + ) + with pytest.raises(ValueError, match=msg): + if iterator: + with parser.read_csv(data, chunksize=nrows) as reader: + next(iter(reader)) + else: + parser.read_csv(data, nrows=nrows) + return + if iterator: with parser.read_csv(data, chunksize=nrows) as reader: result = next(iter(reader)) @@ -241,7 +298,6 @@ def test_empty_with_nrows_chunksize(all_parsers, iterator): tm.assert_frame_equal(result, expected) -@xfail_pyarrow # ValueError: The 'chunksize' option is not supported def test_read_csv_memory_growth_chunksize(all_parsers): # see gh-24805 # @@ -254,12 +310,19 @@ def test_read_csv_memory_growth_chunksize(all_parsers): for i in range(1000): f.write(str(i) + "\n") + if parser.engine == "pyarrow": + msg = "The 'chunksize' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + with parser.read_csv(path, chunksize=20) as result: + for _ in result: + pass + return + with parser.read_csv(path, chunksize=20) as result: for _ in result: pass -@xfail_pyarrow # ValueError: The 'chunksize' option is not supported def test_chunksize_with_usecols_second_block_shorter(all_parsers): # GH#21211 parser = all_parsers @@ -268,6 +331,18 @@ def test_chunksize_with_usecols_second_block_shorter(all_parsers): 9,10,11 """ + if parser.engine == "pyarrow": + msg = "The 'chunksize' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + parser.read_csv( + StringIO(data), + names=["a", "b"], + chunksize=2, + usecols=[0, 1], + header=None, + ) + return + result_chunks = parser.read_csv( StringIO(data), names=["a", "b"], @@ -285,7 +360,6 @@ def test_chunksize_with_usecols_second_block_shorter(all_parsers): tm.assert_frame_equal(result, expected_frames[i]) -@xfail_pyarrow # ValueError: The 'chunksize' option is not supported def test_chunksize_second_block_shorter(all_parsers): # GH#21211 parser = all_parsers @@ -295,6 +369,12 @@ def test_chunksize_second_block_shorter(all_parsers): 9,10,11 """ + if parser.engine == "pyarrow": + msg = "The 'chunksize' option is not supported with the 'pyarrow' engine" + with pytest.raises(ValueError, match=msg): + parser.read_csv(StringIO(data), chunksize=2) + return + result_chunks = parser.read_csv(StringIO(data), chunksize=2) expected_frames = [ diff --git a/pandas/tests/io/parser/conftest.py b/pandas/tests/io/parser/conftest.py index eb7835bb27372..6d5f870f07206 100644 --- a/pandas/tests/io/parser/conftest.py +++ b/pandas/tests/io/parser/conftest.py @@ -34,6 +34,7 @@ def read_csv_check_warnings( warn_msg: str, *args, raise_on_extra_warnings=True, + check_stacklevel: bool = True, **kwargs, ): # We need to check the stacklevel here instead of in the tests @@ -41,7 +42,10 @@ def read_csv_check_warnings( # should point to. kwargs = self.update_kwargs(kwargs) with tm.assert_produces_warning( - warn_type, match=warn_msg, raise_on_extra_warnings=raise_on_extra_warnings + warn_type, + match=warn_msg, + raise_on_extra_warnings=raise_on_extra_warnings, + check_stacklevel=check_stacklevel, ): return read_csv(*args, **kwargs)