diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index ef561d50066d1..741591be25bf9 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -256,6 +256,7 @@ Removal of prior version deprecations/changes - Removed unused arguments ``*args`` and ``**kwargs`` in :class:`Resampler` methods (:issue:`50977`) - Unrecognized timezones when parsing strings to datetimes now raises a ``ValueError`` (:issue:`51477`) - Removed the :class:`Grouper` attributes ``ax``, ``groups``, ``indexer``, and ``obj`` (:issue:`51206`, :issue:`51182`) +- Removed deprecated keyword ``verbose`` on :func:`read_csv` and :func:`read_table` (:issue:`56556`) - Removed the attribute ``dtypes`` from :class:`.DataFrameGroupBy` (:issue:`51997`) .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index 01c7de0c6f2b3..c29cdbcf5975e 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -6,7 +6,6 @@ from csv import ( QUOTE_NONE, QUOTE_NONNUMERIC, ) -import time import warnings from pandas.util._exceptions import find_stack_level @@ -344,10 +343,9 @@ cdef class TextReader: object true_values, false_values object handle object orig_header - bint na_filter, keep_default_na, verbose, has_usecols, has_mi_columns + bint na_filter, keep_default_na, has_usecols, has_mi_columns bint allow_leading_cols uint64_t parser_start # this is modified after __init__ - list clocks const char *encoding_errors kh_str_starts_t *false_set kh_str_starts_t *true_set @@ -400,7 +398,6 @@ cdef class TextReader: bint allow_leading_cols=True, skiprows=None, skipfooter=0, # int64_t - bint verbose=False, float_precision=None, bint skip_blank_lines=True, encoding_errors=b"strict", @@ -417,9 +414,6 @@ cdef class TextReader: self.parser = parser_new() self.parser.chunksize = tokenize_chunksize - # For timekeeping - self.clocks = [] - self.parser.usecols = (usecols is not None) self._setup_parser_source(source) @@ -507,8 +501,6 @@ cdef class TextReader: self.converters = converters self.na_filter = na_filter - self.verbose = verbose - if float_precision == "round_trip": # see gh-15140 self.parser.double_converter = round_trip_wrapper @@ -896,8 +888,6 @@ cdef class TextReader: int64_t buffered_lines int64_t irows - self._start_clock() - if rows is not None: irows = rows buffered_lines = self.parser.lines - self.parser_start @@ -915,12 +905,8 @@ cdef class TextReader: if self.parser_start >= self.parser.lines: raise StopIteration - self._end_clock("Tokenization") - self._start_clock() columns = self._convert_column_data(rows) - self._end_clock("Type conversion") - self._start_clock() if len(columns) > 0: rows_read = len(list(columns.values())[0]) # trim @@ -929,18 +915,8 @@ cdef class TextReader: parser_trim_buffers(self.parser) self.parser_start -= rows_read - self._end_clock("Parser memory cleanup") - return columns - cdef _start_clock(self): - self.clocks.append(time.time()) - - cdef _end_clock(self, str what): - if self.verbose: - elapsed = time.time() - self.clocks.pop(-1) - print(f"{what} took: {elapsed * 1000:.2f} ms") - def set_noconvert(self, i: int) -> None: self.noconvert.add(i) diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 7b06c6b6b0d39..3bbb7c83345e5 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -519,7 +519,6 @@ def _convert_to_ndarrays( dct: Mapping, na_values, na_fvalues, - verbose: bool = False, converters=None, dtypes=None, ) -> dict[Any, np.ndarray]: @@ -596,8 +595,6 @@ def _convert_to_ndarrays( cvals = self._cast_types(cvals, cast_type, c) result[c] = cvals - if verbose and na_count: - print(f"Filled {na_count} NA values in column {c!s}") return result @final @@ -1236,7 +1233,6 @@ def converter(*date_cols, col: Hashable): "usecols": None, # 'iterator': False, "chunksize": None, - "verbose": False, "encoding": None, "compression": None, "skip_blank_lines": True, diff --git a/pandas/io/parsers/python_parser.py b/pandas/io/parsers/python_parser.py index dbda47172f6ac..44210b6979827 100644 --- a/pandas/io/parsers/python_parser.py +++ b/pandas/io/parsers/python_parser.py @@ -110,8 +110,6 @@ def __init__(self, f: ReadCsvBuffer[str] | list, **kwds) -> None: if "has_index_names" in kwds: self.has_index_names = kwds["has_index_names"] - self.verbose = kwds["verbose"] - self.thousands = kwds["thousands"] self.decimal = kwds["decimal"] @@ -372,7 +370,6 @@ def _convert_data( data, clean_na_values, clean_na_fvalues, - self.verbose, clean_conv, clean_dtypes, ) diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 9f2f208d8c350..b234a6b78e051 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -116,7 +116,6 @@ class _read_shared(TypedDict, Generic[HashableT], total=False): ) keep_default_na: bool na_filter: bool - verbose: bool | lib.NoDefault skip_blank_lines: bool parse_dates: bool | Sequence[Hashable] | None infer_datetime_format: bool | lib.NoDefault @@ -295,10 +294,6 @@ class _read_shared(TypedDict, Generic[HashableT], total=False): Detect missing value markers (empty strings and the value of ``na_values``). In data without any ``NA`` values, passing ``na_filter=False`` can improve the performance of reading a large file. -verbose : bool, default False - Indicate number of ``NA`` values placed in non-numeric columns. - - .. deprecated:: 2.2.0 skip_blank_lines : bool, default True If ``True``, skip over blank lines rather than interpreting as ``NaN`` values. parse_dates : bool, None, list of Hashable, list of lists or dict of {{Hashable : \ @@ -556,7 +551,6 @@ class _Fwf_Defaults(TypedDict): "converters", "iterator", "dayfirst", - "verbose", "skipinitialspace", "low_memory", } @@ -755,7 +749,6 @@ def read_csv( | None = None, keep_default_na: bool = True, na_filter: bool = True, - verbose: bool | lib.NoDefault = lib.no_default, skip_blank_lines: bool = True, # Datetime Handling parse_dates: bool | Sequence[Hashable] | None = None, @@ -845,17 +838,6 @@ def read_csv( else: delim_whitespace = False - if verbose is not lib.no_default: - # GH#55569 - warnings.warn( - "The 'verbose' keyword in pd.read_csv is deprecated and " - "will be removed in a future version.", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - verbose = False - # locals() should never be modified kwds = locals().copy() del kwds["filepath_or_buffer"] @@ -958,7 +940,6 @@ def read_table( | None = None, keep_default_na: bool = True, na_filter: bool = True, - verbose: bool | lib.NoDefault = lib.no_default, skip_blank_lines: bool = True, # Datetime Handling parse_dates: bool | Sequence[Hashable] | None = None, @@ -1039,17 +1020,6 @@ def read_table( else: delim_whitespace = False - if verbose is not lib.no_default: - # GH#55569 - warnings.warn( - "The 'verbose' keyword in pd.read_table is deprecated and " - "will be removed in a future version.", - FutureWarning, - stacklevel=find_stack_level(), - ) - else: - verbose = False - # locals() should never be modified kwds = locals().copy() del kwds["filepath_or_buffer"] diff --git a/pandas/tests/io/parser/common/test_verbose.py b/pandas/tests/io/parser/common/test_verbose.py deleted file mode 100644 index c5490afba1e04..0000000000000 --- a/pandas/tests/io/parser/common/test_verbose.py +++ /dev/null @@ -1,82 +0,0 @@ -""" -Tests that work on both the Python and C engines but do not have a -specific classification into the other test modules. -""" - -from io import StringIO - -import pytest - -import pandas._testing as tm - -depr_msg = "The 'verbose' keyword in pd.read_csv is deprecated" - - -def test_verbose_read(all_parsers, capsys): - parser = all_parsers - data = """a,b,c,d -one,1,2,3 -one,1,2,3 -,1,2,3 -one,1,2,3 -,1,2,3 -,1,2,3 -one,1,2,3 -two,1,2,3""" - - if parser.engine == "pyarrow": - msg = "The 'verbose' option is not supported with the 'pyarrow' engine" - with pytest.raises(ValueError, match=msg): - with tm.assert_produces_warning( - FutureWarning, match=depr_msg, check_stacklevel=False - ): - parser.read_csv(StringIO(data), verbose=True) - return - - # Engines are verbose in different ways. - with tm.assert_produces_warning( - FutureWarning, match=depr_msg, check_stacklevel=False - ): - parser.read_csv(StringIO(data), verbose=True) - captured = capsys.readouterr() - - if parser.engine == "c": - assert "Tokenization took:" in captured.out - assert "Parser memory cleanup took:" in captured.out - else: # Python engine - assert captured.out == "Filled 3 NA values in column a\n" - - -def test_verbose_read2(all_parsers, capsys): - parser = all_parsers - data = """a,b,c,d -one,1,2,3 -two,1,2,3 -three,1,2,3 -four,1,2,3 -five,1,2,3 -,1,2,3 -seven,1,2,3 -eight,1,2,3""" - - if parser.engine == "pyarrow": - msg = "The 'verbose' option is not supported with the 'pyarrow' engine" - with pytest.raises(ValueError, match=msg): - with tm.assert_produces_warning( - FutureWarning, match=depr_msg, check_stacklevel=False - ): - parser.read_csv(StringIO(data), verbose=True, index_col=0) - return - - with tm.assert_produces_warning( - FutureWarning, match=depr_msg, check_stacklevel=False - ): - parser.read_csv(StringIO(data), verbose=True, index_col=0) - captured = capsys.readouterr() - - # Engines are verbose in different ways. - if parser.engine == "c": - assert "Tokenization took:" in captured.out - assert "Parser memory cleanup took:" in captured.out - else: # Python engine - assert captured.out == "Filled 1 NA values in column a\n"