From 838c441640f3a9d1631d2bc7638a0b41d93bbbea Mon Sep 17 00:00:00 2001 From: nbonnin Date: Wed, 25 Dec 2019 13:36:58 -0500 Subject: [PATCH 1/8] replaced ".format()" strings with the new f-style strings except for the string at line 1501. That still requires work. https://github.com/pandas-dev/pandas/issues/29886 --- pandas/io/parsers.py | 369 ++++++++++++++++++++----------------------- 1 file changed, 175 insertions(+), 194 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 4d837af60c3e3..209cb12ec9a07 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -77,7 +77,7 @@ _BOM = "\ufeff" _doc_read_csv_and_table = ( - r""" + r""" {summary} Also supports optionally iterating or breaking of the file @@ -193,8 +193,8 @@ Additional strings to recognize as NA/NaN. If dict passed, specific per-column NA values. By default the following values are interpreted as NaN: '""" - + fill("', '".join(sorted(STR_NA_VALUES)), 70, subsequent_indent=" ") - + """'. + + fill("', '".join(sorted(STR_NA_VALUES)), 70, subsequent_indent=" ") + + """'. keep_default_na : bool, default True Whether or not to include the default NaN values when parsing the data. Depending on whether `na_values` is passed in, the behavior is as follows: @@ -381,9 +381,8 @@ def _validate_integer(name, val, min_val=0): min_val : int Minimum allowed value (val < min_val will result in a ValueError) """ - msg = "'{name:s}' must be an integer >={min_val:d}".format( - name=name, min_val=min_val - ) + + msg = f"{name:s} must be an integer >={min_val:d}" if val is not None: if is_float(val): @@ -509,7 +508,6 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds): "skip_blank_lines": True, } - _c_parser_defaults = { "delim_whitespace": False, "na_filter": True, @@ -531,63 +529,63 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds): def _make_parser_function(name, default_sep=","): def parser_f( - filepath_or_buffer: FilePathOrBuffer, - sep=default_sep, - delimiter=None, - # Column and Index Locations and Names - header="infer", - names=None, - index_col=None, - usecols=None, - squeeze=False, - prefix=None, - mangle_dupe_cols=True, - # General Parsing Configuration - dtype=None, - engine=None, - converters=None, - true_values=None, - false_values=None, - skipinitialspace=False, - skiprows=None, - skipfooter=0, - nrows=None, - # NA and Missing Data Handling - na_values=None, - keep_default_na=True, - na_filter=True, - verbose=False, - skip_blank_lines=True, - # Datetime Handling - parse_dates=False, - infer_datetime_format=False, - keep_date_col=False, - date_parser=None, - dayfirst=False, - cache_dates=True, - # Iteration - iterator=False, - chunksize=None, - # Quoting, Compression, and File Format - compression="infer", - thousands=None, - decimal: str = ".", - lineterminator=None, - quotechar='"', - quoting=csv.QUOTE_MINIMAL, - doublequote=True, - escapechar=None, - comment=None, - encoding=None, - dialect=None, - # Error Handling - error_bad_lines=True, - warn_bad_lines=True, - # Internal - delim_whitespace=False, - low_memory=_c_parser_defaults["low_memory"], - memory_map=False, - float_precision=None, + filepath_or_buffer: FilePathOrBuffer, + sep=default_sep, + delimiter=None, + # Column and Index Locations and Names + header="infer", + names=None, + index_col=None, + usecols=None, + squeeze=False, + prefix=None, + mangle_dupe_cols=True, + # General Parsing Configuration + dtype=None, + engine=None, + converters=None, + true_values=None, + false_values=None, + skipinitialspace=False, + skiprows=None, + skipfooter=0, + nrows=None, + # NA and Missing Data Handling + na_values=None, + keep_default_na=True, + na_filter=True, + verbose=False, + skip_blank_lines=True, + # Datetime Handling + parse_dates=False, + infer_datetime_format=False, + keep_date_col=False, + date_parser=None, + dayfirst=False, + cache_dates=True, + # Iteration + iterator=False, + chunksize=None, + # Quoting, Compression, and File Format + compression="infer", + thousands=None, + decimal: str = ".", + lineterminator=None, + quotechar='"', + quoting=csv.QUOTE_MINIMAL, + doublequote=True, + escapechar=None, + comment=None, + encoding=None, + dialect=None, + # Error Handling + error_bad_lines=True, + warn_bad_lines=True, + # Internal + delim_whitespace=False, + low_memory=_c_parser_defaults["low_memory"], + memory_map=False, + float_precision=None, ): # gh-23761 @@ -703,13 +701,12 @@ def parser_f( def read_fwf( - filepath_or_buffer: FilePathOrBuffer, - colspecs="infer", - widths=None, - infer_nrows=100, - **kwds, + filepath_or_buffer: FilePathOrBuffer, + colspecs="infer", + widths=None, + infer_nrows=100, + **kwds, ): - r""" Read a table of fixed-width formatted lines into DataFrame. @@ -812,20 +809,18 @@ def __init__(self, f, engine=None, **kwds): # Any valid dialect should have these attributes. # If any are missing, we will raise automatically. for param in ( - "delimiter", - "doublequote", - "escapechar", - "skipinitialspace", - "quotechar", - "quoting", + "delimiter", + "doublequote", + "escapechar", + "skipinitialspace", + "quotechar", + "quoting", ): try: dialect_val = getattr(dialect, param) except AttributeError: raise ValueError( - "Invalid dialect '{dialect}' provided".format( - dialect=kwds["dialect"] - ) + f"Invalid dialect {kwds['dialect']} provided" ) parser_default = _parser_defaults[param] provided = kwds.get(param, parser_default) @@ -838,11 +833,9 @@ def __init__(self, f, engine=None, **kwds): # even if it conflicts with the dialect (gh-23761). if provided != parser_default and provided != dialect_val: msg = ( - "Conflicting values for '{param}': '{val}' was " - "provided, but the dialect specifies '{diaval}'. " - "Using the dialect-specified value.".format( - param=param, val=provided, diaval=dialect_val - ) + f"Conflicting values for '{param}': '{provided}' was " + f"provided, but the dialect specifies '{dialect_val}'. " + "Using the dialect-specified value." ) # Annoying corner case for not warning about @@ -993,18 +986,18 @@ def _clean_options(self, options, engine): encodeable = False if not encodeable and engine not in ("python", "python-fwf"): fallback_reason = ( - "the separator encoded in {encoding} " - "is > 1 char long, and the 'c' engine " - "does not support such separators".format(encoding=encoding) + f"the separator encoded in {encoding} " + f"is > 1 char long, and the 'c' engine " + f"does not support such separators" ) engine = "python" quotechar = options["quotechar"] if quotechar is not None and isinstance(quotechar, (str, bytes)): if ( - len(quotechar) == 1 - and ord(quotechar) > 127 - and engine not in ("python", "python-fwf") + len(quotechar) == 1 + and ord(quotechar) > 127 + and engine not in ("python", "python-fwf") ): fallback_reason = ( "ord(quotechar) > 127, meaning the " @@ -1034,10 +1027,10 @@ def _clean_options(self, options, engine): if fallback_reason: warnings.warn( ( - "Falling back to the 'python' engine because " - "{0}; you can avoid this warning by specifying " - "engine='python'." - ).format(fallback_reason), + f"Falling back to the 'python' engine because " + f"{fallback_reason}; you can avoid this warning by specifying " + f"engine='python'." + ), ParserWarning, stacklevel=5, ) @@ -1128,9 +1121,9 @@ def _make_engine(self, engine="c"): klass = FixedWidthFieldParser else: raise ValueError( - "Unknown engine: {engine} (valid options are" - ' "c", "python", or' - ' "python-fwf")'.format(engine=engine) + f"Unknown engine: {engine} (valid options are" + f' "c", "python", or' + f' "python-fwf")' ) self._engine = klass(self.f, **self.options) @@ -1195,9 +1188,9 @@ def _is_potential_multi_index(columns): boolean : Whether or not columns could become a MultiIndex """ return ( - len(columns) - and not isinstance(columns, MultiIndex) - and all(isinstance(c, tuple) for c in columns) + len(columns) + and not isinstance(columns, MultiIndex) + and all(isinstance(c, tuple) for c in columns) ) @@ -1239,8 +1232,8 @@ def _validate_usecols_names(usecols, names): missing = [c for c in usecols if c not in names] if len(missing) > 0: raise ValueError( - "Usecols do not match columns, " - "columns expected but not found: {missing}".format(missing=missing) + f"Usecols do not match columns, " + f"columns expected but not found: {missing}" ) return usecols @@ -1406,9 +1399,9 @@ def __init__(self, kwds): if self.index_col is not None: is_sequence = isinstance(self.index_col, (list, tuple, np.ndarray)) if not ( - is_sequence - and all(map(is_integer, self.index_col)) - or is_integer(self.index_col) + is_sequence + and all(map(is_integer, self.index_col)) + or is_integer(self.index_col) ): raise ValueError( "index_col must only contain row numbers " @@ -1441,9 +1434,9 @@ def close(self): @property def _has_complex_date_col(self): return isinstance(self.parse_dates, dict) or ( - isinstance(self.parse_dates, list) - and len(self.parse_dates) > 0 - and isinstance(self.parse_dates[0], list) + isinstance(self.parse_dates, list) + and len(self.parse_dates) > 0 + and isinstance(self.parse_dates[0], list) ) def _should_parse_dates(self, i): @@ -1458,15 +1451,15 @@ def _should_parse_dates(self, i): if is_scalar(self.parse_dates): return (j == self.parse_dates) or ( - name is not None and name == self.parse_dates + name is not None and name == self.parse_dates ) else: return (j in self.parse_dates) or ( - name is not None and name in self.parse_dates + name is not None and name in self.parse_dates ) def _extract_multi_indexer_columns( - self, header, index_names, col_names, passed_names=False + self, header, index_names, col_names, passed_names=False ): """ extract and return the names, index_names, col_names header is a list-of-lists returned from the parsers """ @@ -1542,10 +1535,10 @@ def _maybe_dedup_names(self, names): if is_potential_mi: col = col[:-1] + ( - "{column}.{count}".format(column=col[-1], count=cur_count), + f"{col[-1]}.{cur_count}" ) else: - col = "{column}.{count}".format(column=col, count=cur_count) + col = f"{col}.{cur_count}" cur_count = counts[col] names[i] = col @@ -1591,7 +1584,7 @@ def _get_simple_index(self, data, columns): def ix(col): if not isinstance(col, str): return col - raise ValueError("Index {col} invalid".format(col=col)) + raise ValueError(f"Index {col} invalid") to_remove = [] index = [] @@ -1616,10 +1609,8 @@ def _get_name(icol): if col_names is None: raise ValueError( - ("Must supply column order to use {icol!s} as index").format( - icol=icol + f"Must supply column order to use {icol:s} as index" ) - ) for i, c in enumerate(col_names): if i == icol: @@ -1671,7 +1662,7 @@ def _agg_index(self, index, try_parse_dates=True): return index def _convert_to_ndarrays( - self, dct, na_values, na_fvalues, verbose=False, converters=None, dtypes=None + self, dct, na_values, na_fvalues, verbose=False, converters=None, dtypes=None ): result = {} for c, values in dct.items(): @@ -1694,10 +1685,10 @@ def _convert_to_ndarrays( if cast_type is not None: warnings.warn( ( - "Both a converter and dtype were specified " - "for column {0} - only the converter will " + f"Both a converter and dtype were specified " + f"for column {c} - only the converter will " "be used" - ).format(c), + ), ParserWarning, stacklevel=7, ) @@ -1726,19 +1717,19 @@ def _convert_to_ndarrays( # type specified in dtype param or cast_type is an EA if cast_type and ( - not is_dtype_equal(cvals, cast_type) - or is_extension_array_dtype(cast_type) + not is_dtype_equal(cvals, cast_type) + or is_extension_array_dtype(cast_type) ): try: if ( - is_bool_dtype(cast_type) - and not is_categorical_dtype(cast_type) - and na_count > 0 + is_bool_dtype(cast_type) + and not is_categorical_dtype(cast_type) + and na_count > 0 ): raise ValueError( - "Bool column has NA values in " - "column {column}".format(column=c) - ) + f"Bool column has NA values in " + f"column {c}") + except (AttributeError, TypeError): # invalid input to is_bool_dtype pass @@ -1747,9 +1738,7 @@ def _convert_to_ndarrays( result[c] = cvals if verbose and na_count: print( - "Filled {count} NA values in column {c!s}".format( - count=na_count, c=c - ) + f"Filled {na_count} NA values in column {c!s}" ) return result @@ -1823,8 +1812,8 @@ def _cast_types(self, values, cast_type, column): if is_categorical_dtype(cast_type): known_cats = ( - isinstance(cast_type, CategoricalDtype) - and cast_type.categories is not None + isinstance(cast_type, CategoricalDtype) + and cast_type.categories is not None ) if not is_object_dtype(values) and not known_cats: @@ -1847,9 +1836,9 @@ def _cast_types(self, values, cast_type, column): return array_type._from_sequence_of_strings(values, dtype=cast_type) except NotImplementedError: raise NotImplementedError( - "Extension Array: {ea} must implement " - "_from_sequence_of_strings in order " - "to be used in parser methods".format(ea=array_type) + f"Extension Array: {array_type} must implement " + f"_from_sequence_of_strings in order " + f"to be used in parser methods" ) else: @@ -1857,8 +1846,8 @@ def _cast_types(self, values, cast_type, column): values = astype_nansafe(values, cast_type, copy=True, skipna=True) except ValueError: raise ValueError( - "Unable to convert column {column} to type " - "{cast_type}".format(column=column, cast_type=cast_type) + f"Unable to convert column {column} to type " + f"{cast_type}" ) return values @@ -1929,7 +1918,7 @@ def __init__(self, src, **kwds): if self.names is None: if self.prefix: self.names = [ - "{prefix}{i}".format(prefix=self.prefix, i=i) + f"{self.prefix}{i}" for i in range(self._reader.table_width) ] else: @@ -1950,7 +1939,7 @@ def __init__(self, src, **kwds): # GH 14671 if self.usecols_dtype == "string" and not set(usecols).issubset( - self.orig_names + self.orig_names ): _validate_usecols_names(usecols, self.orig_names) @@ -2346,14 +2335,12 @@ def __init__(self, f, **kwds): if self.thousands is None: self.nonnum = re.compile( - r"[^-^0-9^{decimal}]+".format(decimal=self.decimal) + fr"[^-^0-9^{self.decimal}]+" ) else: self.nonnum = re.compile( - r"[^-^0-9^{thousands}^{decimal}]+".format( - thousands=self.thousands, decimal=self.decimal + fr"[^-^0-9^{self.thousands}^{self.decimal}]+" ) - ) def _set_no_thousands_columns(self): # Create a set of column ids that are not to be stripped of thousands @@ -2589,8 +2576,8 @@ def _infer_columns(self): except StopIteration: if self.line_pos < hr: raise ValueError( - "Passed header={hr} but only {pos} lines in " - "file".format(hr=hr, pos=(self.line_pos + 1)) + f"Passed header={hr} but only {self.line_pos +1} lines in " + "file" ) # We have an empty file, so check @@ -2613,11 +2600,9 @@ def _infer_columns(self): for i, c in enumerate(line): if c == "": if have_mi_columns: - col_name = "Unnamed: {i}_level_{level}".format( - i=i, level=level - ) + col_name = f"Unnamed: {i}_level_{level}" else: - col_name = "Unnamed: {i}".format(i=i) + col_name = f"Unnamed: {i}" this_unnamed_cols.append(i) this_columns.append(col_name) @@ -2632,7 +2617,7 @@ def _infer_columns(self): while cur_count > 0: counts[col] = cur_count + 1 - col = "{column}.{count}".format(column=col, count=cur_count) + col = f"{col}.{cur_count}" cur_count = counts[col] this_columns[i] = col @@ -2663,7 +2648,7 @@ def _infer_columns(self): if names is not None: if (self.usecols is not None and len(names) != len(self.usecols)) or ( - self.usecols is None and len(names) != len(columns[0]) + self.usecols is None and len(names) != len(columns[0]) ): raise ValueError( "Number of passed names did not match " @@ -2699,8 +2684,7 @@ def _infer_columns(self): if self.prefix: columns = [ [ - "{prefix}{idx}".format(prefix=self.prefix, idx=i) - for i in range(ncols) + f"{self.prefix}{i}" for i in range(ncols) ] ] else: @@ -2809,7 +2793,7 @@ def _check_for_bom(self, first_row): # Extract any remaining data after the second # quotation mark. if len(first_row_bom) > end + 1: - new_row += first_row_bom[end + 1 :] + new_row += first_row_bom[end + 1:] return [new_row] + first_row[1:] elif len(first_row_bom) > 1: @@ -2845,7 +2829,7 @@ def _next_line(self): self.pos += 1 # either uncommented or blank to begin with if not self.skip_blank_lines and ( - self._is_line_empty(self.data[self.pos - 1]) or line + self._is_line_empty(self.data[self.pos - 1]) or line ): break elif self.skip_blank_lines: @@ -2904,7 +2888,7 @@ def _alert_malformed(self, msg, row_num): if self.error_bad_lines: raise ParserError(msg) elif self.warn_bad_lines: - base = "Skipping line {row_num}: ".format(row_num=row_num) + base = f"Skipping line {row_num}: " sys.stderr.write(base + msg + "\n") def _next_iter_line(self, row_num): @@ -2984,9 +2968,9 @@ def _remove_empty_lines(self, lines): for l in lines: # Remove empty lines and lines with only one whitespace value if ( - len(l) > 1 - or len(l) == 1 - and (not isinstance(l[0], str) or l[0].strip()) + len(l) > 1 + or len(l) == 1 + and (not isinstance(l[0], str) or l[0].strip()) ): ret.append(l) return ret @@ -3005,10 +2989,10 @@ def _search_replace_num_columns(self, lines, search, replace): rl = [] for i, x in enumerate(l): if ( - not isinstance(x, str) - or search not in x - or (self._no_thousands_columns and i in self._no_thousands_columns) - or self.nonnum.search(x.strip()) + not isinstance(x, str) + or search not in x + or (self._no_thousands_columns and i in self._no_thousands_columns) + or self.nonnum.search(x.strip()) ): rl.append(x) else: @@ -3128,15 +3112,13 @@ def _rows_to_cols(self, content): for row_num, actual_len in bad_lines: msg = ( - "Expected {col_len} fields in line {line}, saw " - "{length}".format( - col_len=col_len, line=(row_num + 1), length=actual_len + f"Expected {col_len} fields in line {row_num + 1}, saw " + "{actual_len}" ) - ) if ( - self.delimiter - and len(self.delimiter) > 1 - and self.quoting != csv.QUOTE_NONE + self.delimiter + and len(self.delimiter) > 1 + and self.quoting != csv.QUOTE_NONE ): # see gh-13374 reason = ( @@ -3156,8 +3138,8 @@ def _rows_to_cols(self, content): a for i, a in enumerate(zipped_content) if ( - i < len(self.index_col) - or i - len(self.index_col) in self._col_indices + i < len(self.index_col) + or i - len(self.index_col) in self._col_indices ) ] else: @@ -3185,10 +3167,10 @@ def _get_lines(self, rows=None): if self.pos > len(self.data): raise StopIteration if rows is None: - new_rows = self.data[self.pos :] + new_rows = self.data[self.pos:] new_pos = len(self.data) else: - new_rows = self.data[self.pos : self.pos + rows] + new_rows = self.data[self.pos: self.pos + rows] new_pos = self.pos + rows # Check for stop rows. n.b.: self.skiprows is a set. @@ -3246,7 +3228,7 @@ def _get_lines(self, rows=None): def _make_date_converter( - date_parser=None, dayfirst=False, infer_datetime_format=False, cache_dates=True + date_parser=None, dayfirst=False, infer_datetime_format=False, cache_dates=True ): def converter(*date_cols): if date_parser is None: @@ -3291,17 +3273,17 @@ def converter(*date_cols): def _process_date_conversion( - data_dict, - converter, - parse_spec, - index_col, - index_names, - columns, - keep_date_col=False, + data_dict, + converter, + parse_spec, + index_col, + index_names, + columns, + keep_date_col=False, ): def _isindex(colspec): return (isinstance(index_col, list) and colspec in index_col) or ( - isinstance(index_names, list) and colspec in index_names + isinstance(index_names, list) and colspec in index_names ) new_cols = [] @@ -3330,7 +3312,7 @@ def _isindex(colspec): ) if new_name in data_dict: raise ValueError( - "New date column already in dict {name}".format(name=new_name) + f"New date column already in dict {new_name}" ) new_data[new_name] = col new_cols.append(new_name) @@ -3341,7 +3323,7 @@ def _isindex(colspec): for new_name, colspec in parse_spec.items(): if new_name in data_dict: raise ValueError( - "Date column {name} already in dict".format(name=new_name) + f"Date column {new_name} already in dict" ) _, col, old_names = _try_convert_dates( @@ -3383,7 +3365,6 @@ def _try_convert_dates(parser, colspec, data_dict, columns): def _clean_na_values(na_values, keep_default_na=True): - if na_values is None: if keep_default_na: na_values = STR_NA_VALUES @@ -3521,7 +3502,7 @@ def _stringify_na_values(na_values): # we are like 999 here if v == int(v): v = int(v) - result.append("{value}.0".format(value=v)) + result.append(f"{v}.0") result.append(str(v)) result.append(v) @@ -3606,10 +3587,10 @@ def __init__(self, f, colspecs, delimiter, comment, skiprows=None, infer_nrows=1 for colspec in self.colspecs: if not ( - isinstance(colspec, (tuple, list)) - and len(colspec) == 2 - and isinstance(colspec[0], (int, np.integer, type(None))) - and isinstance(colspec[1], (int, np.integer, type(None))) + isinstance(colspec, (tuple, list)) + and len(colspec) == 2 + and isinstance(colspec[0], (int, np.integer, type(None))) + and isinstance(colspec[1], (int, np.integer, type(None))) ): raise TypeError( "Each column specification must be " @@ -3667,7 +3648,7 @@ def detect_colspecs(self, infer_nrows=100, skiprows=None): rows = [row.partition(self.comment)[0] for row in rows] for row in rows: for m in pattern.finditer(row): - mask[m.start() : m.end()] = 1 + mask[m.start(): m.end()] = 1 shifted = np.roll(mask, 1) shifted[0] = 0 edges = np.where((mask ^ shifted) == 1)[0] From be28a59732d82e196ccdacb1a442848876cbd528 Mon Sep 17 00:00:00 2001 From: nbonnin Date: Wed, 25 Dec 2019 14:24:03 -0500 Subject: [PATCH 2/8] replaced ".format()" strings with the new f-style strings except for the string at line 1501. That still requires work. https://github.com/pandas-dev/pandas/issues/29886 -- Updated with "Black" Styling --- pandas/io/parsers.py | 302 ++++++++++++++++++++----------------------- 1 file changed, 140 insertions(+), 162 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 209cb12ec9a07..860816f270ff9 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -77,7 +77,7 @@ _BOM = "\ufeff" _doc_read_csv_and_table = ( - r""" + r""" {summary} Also supports optionally iterating or breaking of the file @@ -193,8 +193,8 @@ Additional strings to recognize as NA/NaN. If dict passed, specific per-column NA values. By default the following values are interpreted as NaN: '""" - + fill("', '".join(sorted(STR_NA_VALUES)), 70, subsequent_indent=" ") - + """'. + + fill("', '".join(sorted(STR_NA_VALUES)), 70, subsequent_indent=" ") + + """'. keep_default_na : bool, default True Whether or not to include the default NaN values when parsing the data. Depending on whether `na_values` is passed in, the behavior is as follows: @@ -529,63 +529,63 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds): def _make_parser_function(name, default_sep=","): def parser_f( - filepath_or_buffer: FilePathOrBuffer, - sep=default_sep, - delimiter=None, - # Column and Index Locations and Names - header="infer", - names=None, - index_col=None, - usecols=None, - squeeze=False, - prefix=None, - mangle_dupe_cols=True, - # General Parsing Configuration - dtype=None, - engine=None, - converters=None, - true_values=None, - false_values=None, - skipinitialspace=False, - skiprows=None, - skipfooter=0, - nrows=None, - # NA and Missing Data Handling - na_values=None, - keep_default_na=True, - na_filter=True, - verbose=False, - skip_blank_lines=True, - # Datetime Handling - parse_dates=False, - infer_datetime_format=False, - keep_date_col=False, - date_parser=None, - dayfirst=False, - cache_dates=True, - # Iteration - iterator=False, - chunksize=None, - # Quoting, Compression, and File Format - compression="infer", - thousands=None, - decimal: str = ".", - lineterminator=None, - quotechar='"', - quoting=csv.QUOTE_MINIMAL, - doublequote=True, - escapechar=None, - comment=None, - encoding=None, - dialect=None, - # Error Handling - error_bad_lines=True, - warn_bad_lines=True, - # Internal - delim_whitespace=False, - low_memory=_c_parser_defaults["low_memory"], - memory_map=False, - float_precision=None, + filepath_or_buffer: FilePathOrBuffer, + sep=default_sep, + delimiter=None, + # Column and Index Locations and Names + header="infer", + names=None, + index_col=None, + usecols=None, + squeeze=False, + prefix=None, + mangle_dupe_cols=True, + # General Parsing Configuration + dtype=None, + engine=None, + converters=None, + true_values=None, + false_values=None, + skipinitialspace=False, + skiprows=None, + skipfooter=0, + nrows=None, + # NA and Missing Data Handling + na_values=None, + keep_default_na=True, + na_filter=True, + verbose=False, + skip_blank_lines=True, + # Datetime Handling + parse_dates=False, + infer_datetime_format=False, + keep_date_col=False, + date_parser=None, + dayfirst=False, + cache_dates=True, + # Iteration + iterator=False, + chunksize=None, + # Quoting, Compression, and File Format + compression="infer", + thousands=None, + decimal: str = ".", + lineterminator=None, + quotechar='"', + quoting=csv.QUOTE_MINIMAL, + doublequote=True, + escapechar=None, + comment=None, + encoding=None, + dialect=None, + # Error Handling + error_bad_lines=True, + warn_bad_lines=True, + # Internal + delim_whitespace=False, + low_memory=_c_parser_defaults["low_memory"], + memory_map=False, + float_precision=None, ): # gh-23761 @@ -701,11 +701,11 @@ def parser_f( def read_fwf( - filepath_or_buffer: FilePathOrBuffer, - colspecs="infer", - widths=None, - infer_nrows=100, - **kwds, + filepath_or_buffer: FilePathOrBuffer, + colspecs="infer", + widths=None, + infer_nrows=100, + **kwds, ): r""" Read a table of fixed-width formatted lines into DataFrame. @@ -809,19 +809,17 @@ def __init__(self, f, engine=None, **kwds): # Any valid dialect should have these attributes. # If any are missing, we will raise automatically. for param in ( - "delimiter", - "doublequote", - "escapechar", - "skipinitialspace", - "quotechar", - "quoting", + "delimiter", + "doublequote", + "escapechar", + "skipinitialspace", + "quotechar", + "quoting", ): try: dialect_val = getattr(dialect, param) except AttributeError: - raise ValueError( - f"Invalid dialect {kwds['dialect']} provided" - ) + raise ValueError(f"Invalid dialect {kwds['dialect']} provided") parser_default = _parser_defaults[param] provided = kwds.get(param, parser_default) @@ -995,9 +993,9 @@ def _clean_options(self, options, engine): quotechar = options["quotechar"] if quotechar is not None and isinstance(quotechar, (str, bytes)): if ( - len(quotechar) == 1 - and ord(quotechar) > 127 - and engine not in ("python", "python-fwf") + len(quotechar) == 1 + and ord(quotechar) > 127 + and engine not in ("python", "python-fwf") ): fallback_reason = ( "ord(quotechar) > 127, meaning the " @@ -1188,9 +1186,9 @@ def _is_potential_multi_index(columns): boolean : Whether or not columns could become a MultiIndex """ return ( - len(columns) - and not isinstance(columns, MultiIndex) - and all(isinstance(c, tuple) for c in columns) + len(columns) + and not isinstance(columns, MultiIndex) + and all(isinstance(c, tuple) for c in columns) ) @@ -1399,9 +1397,9 @@ def __init__(self, kwds): if self.index_col is not None: is_sequence = isinstance(self.index_col, (list, tuple, np.ndarray)) if not ( - is_sequence - and all(map(is_integer, self.index_col)) - or is_integer(self.index_col) + is_sequence + and all(map(is_integer, self.index_col)) + or is_integer(self.index_col) ): raise ValueError( "index_col must only contain row numbers " @@ -1434,9 +1432,9 @@ def close(self): @property def _has_complex_date_col(self): return isinstance(self.parse_dates, dict) or ( - isinstance(self.parse_dates, list) - and len(self.parse_dates) > 0 - and isinstance(self.parse_dates[0], list) + isinstance(self.parse_dates, list) + and len(self.parse_dates) > 0 + and isinstance(self.parse_dates[0], list) ) def _should_parse_dates(self, i): @@ -1451,15 +1449,15 @@ def _should_parse_dates(self, i): if is_scalar(self.parse_dates): return (j == self.parse_dates) or ( - name is not None and name == self.parse_dates + name is not None and name == self.parse_dates ) else: return (j in self.parse_dates) or ( - name is not None and name in self.parse_dates + name is not None and name in self.parse_dates ) def _extract_multi_indexer_columns( - self, header, index_names, col_names, passed_names=False + self, header, index_names, col_names, passed_names=False ): """ extract and return the names, index_names, col_names header is a list-of-lists returned from the parsers """ @@ -1534,9 +1532,7 @@ def _maybe_dedup_names(self, names): counts[col] = cur_count + 1 if is_potential_mi: - col = col[:-1] + ( - f"{col[-1]}.{cur_count}" - ) + col = col[:-1] + (f"{col[-1]}.{cur_count}") else: col = f"{col}.{cur_count}" cur_count = counts[col] @@ -1608,9 +1604,7 @@ def _get_name(icol): return icol if col_names is None: - raise ValueError( - f"Must supply column order to use {icol:s} as index" - ) + raise ValueError(f"Must supply column order to use {icol:s} as index") for i, c in enumerate(col_names): if i == icol: @@ -1662,7 +1656,7 @@ def _agg_index(self, index, try_parse_dates=True): return index def _convert_to_ndarrays( - self, dct, na_values, na_fvalues, verbose=False, converters=None, dtypes=None + self, dct, na_values, na_fvalues, verbose=False, converters=None, dtypes=None ): result = {} for c, values in dct.items(): @@ -1717,18 +1711,18 @@ def _convert_to_ndarrays( # type specified in dtype param or cast_type is an EA if cast_type and ( - not is_dtype_equal(cvals, cast_type) - or is_extension_array_dtype(cast_type) + not is_dtype_equal(cvals, cast_type) + or is_extension_array_dtype(cast_type) ): try: if ( - is_bool_dtype(cast_type) - and not is_categorical_dtype(cast_type) - and na_count > 0 + is_bool_dtype(cast_type) + and not is_categorical_dtype(cast_type) + and na_count > 0 ): raise ValueError( - f"Bool column has NA values in " - f"column {c}") + f"Bool column has NA values in " f"column {c}" + ) except (AttributeError, TypeError): # invalid input to is_bool_dtype @@ -1737,9 +1731,7 @@ def _convert_to_ndarrays( result[c] = cvals if verbose and na_count: - print( - f"Filled {na_count} NA values in column {c!s}" - ) + print(f"Filled {na_count} NA values in column {c!s}") return result def _infer_types(self, values, na_values, try_num_bool=True): @@ -1812,8 +1804,8 @@ def _cast_types(self, values, cast_type, column): if is_categorical_dtype(cast_type): known_cats = ( - isinstance(cast_type, CategoricalDtype) - and cast_type.categories is not None + isinstance(cast_type, CategoricalDtype) + and cast_type.categories is not None ) if not is_object_dtype(values) and not known_cats: @@ -1846,8 +1838,7 @@ def _cast_types(self, values, cast_type, column): values = astype_nansafe(values, cast_type, copy=True, skipna=True) except ValueError: raise ValueError( - f"Unable to convert column {column} to type " - f"{cast_type}" + f"Unable to convert column {column} to type " f"{cast_type}" ) return values @@ -1918,8 +1909,7 @@ def __init__(self, src, **kwds): if self.names is None: if self.prefix: self.names = [ - f"{self.prefix}{i}" - for i in range(self._reader.table_width) + f"{self.prefix}{i}" for i in range(self._reader.table_width) ] else: self.names = list(range(self._reader.table_width)) @@ -1939,7 +1929,7 @@ def __init__(self, src, **kwds): # GH 14671 if self.usecols_dtype == "string" and not set(usecols).issubset( - self.orig_names + self.orig_names ): _validate_usecols_names(usecols, self.orig_names) @@ -2334,13 +2324,9 @@ def __init__(self, f, **kwds): raise ValueError("Only length-1 decimal markers supported") if self.thousands is None: - self.nonnum = re.compile( - fr"[^-^0-9^{self.decimal}]+" - ) + self.nonnum = re.compile(fr"[^-^0-9^{self.decimal}]+") else: - self.nonnum = re.compile( - fr"[^-^0-9^{self.thousands}^{self.decimal}]+" - ) + self.nonnum = re.compile(fr"[^-^0-9^{self.thousands}^{self.decimal}]+") def _set_no_thousands_columns(self): # Create a set of column ids that are not to be stripped of thousands @@ -2648,7 +2634,7 @@ def _infer_columns(self): if names is not None: if (self.usecols is not None and len(names) != len(self.usecols)) or ( - self.usecols is None and len(names) != len(columns[0]) + self.usecols is None and len(names) != len(columns[0]) ): raise ValueError( "Number of passed names did not match " @@ -2682,11 +2668,7 @@ def _infer_columns(self): if not names: if self.prefix: - columns = [ - [ - f"{self.prefix}{i}" for i in range(ncols) - ] - ] + columns = [[f"{self.prefix}{i}" for i in range(ncols)]] else: columns = [list(range(ncols))] columns = self._handle_usecols(columns, columns[0]) @@ -2793,7 +2775,7 @@ def _check_for_bom(self, first_row): # Extract any remaining data after the second # quotation mark. if len(first_row_bom) > end + 1: - new_row += first_row_bom[end + 1:] + new_row += first_row_bom[end + 1 :] return [new_row] + first_row[1:] elif len(first_row_bom) > 1: @@ -2829,7 +2811,7 @@ def _next_line(self): self.pos += 1 # either uncommented or blank to begin with if not self.skip_blank_lines and ( - self._is_line_empty(self.data[self.pos - 1]) or line + self._is_line_empty(self.data[self.pos - 1]) or line ): break elif self.skip_blank_lines: @@ -2968,9 +2950,9 @@ def _remove_empty_lines(self, lines): for l in lines: # Remove empty lines and lines with only one whitespace value if ( - len(l) > 1 - or len(l) == 1 - and (not isinstance(l[0], str) or l[0].strip()) + len(l) > 1 + or len(l) == 1 + and (not isinstance(l[0], str) or l[0].strip()) ): ret.append(l) return ret @@ -2989,10 +2971,10 @@ def _search_replace_num_columns(self, lines, search, replace): rl = [] for i, x in enumerate(l): if ( - not isinstance(x, str) - or search not in x - or (self._no_thousands_columns and i in self._no_thousands_columns) - or self.nonnum.search(x.strip()) + not isinstance(x, str) + or search not in x + or (self._no_thousands_columns and i in self._no_thousands_columns) + or self.nonnum.search(x.strip()) ): rl.append(x) else: @@ -3114,11 +3096,11 @@ def _rows_to_cols(self, content): msg = ( f"Expected {col_len} fields in line {row_num + 1}, saw " "{actual_len}" - ) + ) if ( - self.delimiter - and len(self.delimiter) > 1 - and self.quoting != csv.QUOTE_NONE + self.delimiter + and len(self.delimiter) > 1 + and self.quoting != csv.QUOTE_NONE ): # see gh-13374 reason = ( @@ -3138,8 +3120,8 @@ def _rows_to_cols(self, content): a for i, a in enumerate(zipped_content) if ( - i < len(self.index_col) - or i - len(self.index_col) in self._col_indices + i < len(self.index_col) + or i - len(self.index_col) in self._col_indices ) ] else: @@ -3167,10 +3149,10 @@ def _get_lines(self, rows=None): if self.pos > len(self.data): raise StopIteration if rows is None: - new_rows = self.data[self.pos:] + new_rows = self.data[self.pos :] new_pos = len(self.data) else: - new_rows = self.data[self.pos: self.pos + rows] + new_rows = self.data[self.pos : self.pos + rows] new_pos = self.pos + rows # Check for stop rows. n.b.: self.skiprows is a set. @@ -3228,7 +3210,7 @@ def _get_lines(self, rows=None): def _make_date_converter( - date_parser=None, dayfirst=False, infer_datetime_format=False, cache_dates=True + date_parser=None, dayfirst=False, infer_datetime_format=False, cache_dates=True ): def converter(*date_cols): if date_parser is None: @@ -3273,17 +3255,17 @@ def converter(*date_cols): def _process_date_conversion( - data_dict, - converter, - parse_spec, - index_col, - index_names, - columns, - keep_date_col=False, + data_dict, + converter, + parse_spec, + index_col, + index_names, + columns, + keep_date_col=False, ): def _isindex(colspec): return (isinstance(index_col, list) and colspec in index_col) or ( - isinstance(index_names, list) and colspec in index_names + isinstance(index_names, list) and colspec in index_names ) new_cols = [] @@ -3311,9 +3293,7 @@ def _isindex(colspec): converter, colspec, data_dict, orig_names ) if new_name in data_dict: - raise ValueError( - f"New date column already in dict {new_name}" - ) + raise ValueError(f"New date column already in dict {new_name}") new_data[new_name] = col new_cols.append(new_name) date_cols.update(old_names) @@ -3322,9 +3302,7 @@ def _isindex(colspec): # dict of new name to column list for new_name, colspec in parse_spec.items(): if new_name in data_dict: - raise ValueError( - f"Date column {new_name} already in dict" - ) + raise ValueError(f"Date column {new_name} already in dict") _, col, old_names = _try_convert_dates( converter, colspec, data_dict, orig_names @@ -3587,10 +3565,10 @@ def __init__(self, f, colspecs, delimiter, comment, skiprows=None, infer_nrows=1 for colspec in self.colspecs: if not ( - isinstance(colspec, (tuple, list)) - and len(colspec) == 2 - and isinstance(colspec[0], (int, np.integer, type(None))) - and isinstance(colspec[1], (int, np.integer, type(None))) + isinstance(colspec, (tuple, list)) + and len(colspec) == 2 + and isinstance(colspec[0], (int, np.integer, type(None))) + and isinstance(colspec[1], (int, np.integer, type(None))) ): raise TypeError( "Each column specification must be " @@ -3648,7 +3626,7 @@ def detect_colspecs(self, infer_nrows=100, skiprows=None): rows = [row.partition(self.comment)[0] for row in rows] for row in rows: for m in pattern.finditer(row): - mask[m.start(): m.end()] = 1 + mask[m.start() : m.end()] = 1 shifted = np.roll(mask, 1) shifted[0] = 0 edges = np.where((mask ^ shifted) == 1)[0] From 01bbda4c3f6a9ce400d8ac44caa22d1f2ea3a573 Mon Sep 17 00:00:00 2001 From: nbonnin Date: Wed, 25 Dec 2019 15:17:53 -0500 Subject: [PATCH 3/8] fixed styling issues (hopefully) causing flake8 to fail. also removed some unnecessary f-strings --- pandas/io/parsers.py | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 860816f270ff9..ca2171205a8ce 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -381,7 +381,6 @@ def _validate_integer(name, val, min_val=0): min_val : int Minimum allowed value (val < min_val will result in a ValueError) """ - msg = f"{name:s} must be an integer >={min_val:d}" if val is not None: @@ -508,6 +507,7 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds): "skip_blank_lines": True, } + _c_parser_defaults = { "delim_whitespace": False, "na_filter": True, @@ -707,6 +707,8 @@ def read_fwf( infer_nrows=100, **kwds, ): + + r""" Read a table of fixed-width formatted lines into DataFrame. @@ -985,8 +987,8 @@ def _clean_options(self, options, engine): if not encodeable and engine not in ("python", "python-fwf"): fallback_reason = ( f"the separator encoded in {encoding} " - f"is > 1 char long, and the 'c' engine " - f"does not support such separators" + "is > 1 char long, and the 'c' engine " + "does not support such separators" ) engine = "python" @@ -1016,18 +1018,18 @@ def _clean_options(self, options, engine): for arg in _python_unsupported: if fallback_reason and result[arg] != _c_parser_defaults[arg]: raise ValueError( - f"Falling back to the 'python' engine because " + "Falling back to the 'python' engine because " f"{fallback_reason}, but this causes {repr(arg)} to be " - f"ignored as it is not supported by the 'python' engine." + "ignored as it is not supported by the 'python' engine." ) del result[arg] if fallback_reason: warnings.warn( ( - f"Falling back to the 'python' engine because " + "Falling back to the 'python' engine because " f"{fallback_reason}; you can avoid this warning by specifying " - f"engine='python'." + "engine='python'." ), ParserWarning, stacklevel=5, @@ -1049,7 +1051,7 @@ def _clean_options(self, options, engine): msg = ( f"The {repr(arg)} argument has been deprecated and will be " - f"removed in a future version." + "removed in a future version." ) if result.get(arg, depr_default) != depr_default: @@ -1120,8 +1122,8 @@ def _make_engine(self, engine="c"): else: raise ValueError( f"Unknown engine: {engine} (valid options are" - f' "c", "python", or' - f' "python-fwf")' + ' "c", "python", or' + ' "python-fwf")' ) self._engine = klass(self.f, **self.options) @@ -1230,7 +1232,7 @@ def _validate_usecols_names(usecols, names): missing = [c for c in usecols if c not in names] if len(missing) > 0: raise ValueError( - f"Usecols do not match columns, " + "Usecols do not match columns, " f"columns expected but not found: {missing}" ) @@ -1532,7 +1534,7 @@ def _maybe_dedup_names(self, names): counts[col] = cur_count + 1 if is_potential_mi: - col = col[:-1] + (f"{col[-1]}.{cur_count}") + col = col[:-1] + f"{col[-1]}.{cur_count}" else: col = f"{col}.{cur_count}" cur_count = counts[col] @@ -1679,7 +1681,7 @@ def _convert_to_ndarrays( if cast_type is not None: warnings.warn( ( - f"Both a converter and dtype were specified " + "Both a converter and dtype were specified " f"for column {c} - only the converter will " "be used" ), @@ -1721,9 +1723,8 @@ def _convert_to_ndarrays( and na_count > 0 ): raise ValueError( - f"Bool column has NA values in " f"column {c}" + f"Bool column has NA values in column {c}" ) - except (AttributeError, TypeError): # invalid input to is_bool_dtype pass @@ -1829,8 +1830,8 @@ def _cast_types(self, values, cast_type, column): except NotImplementedError: raise NotImplementedError( f"Extension Array: {array_type} must implement " - f"_from_sequence_of_strings in order " - f"to be used in parser methods" + "_from_sequence_of_strings in order " + "to be used in parser methods" ) else: @@ -1838,7 +1839,7 @@ def _cast_types(self, values, cast_type, column): values = astype_nansafe(values, cast_type, copy=True, skipna=True) except ValueError: raise ValueError( - f"Unable to convert column {column} to type " f"{cast_type}" + f"Unable to convert column {column} to type {cast_type}" ) return values @@ -2562,7 +2563,7 @@ def _infer_columns(self): except StopIteration: if self.line_pos < hr: raise ValueError( - f"Passed header={hr} but only {self.line_pos +1} lines in " + f"Passed header={hr} but only {self.line_pos + 1} lines in " "file" ) @@ -3095,7 +3096,7 @@ def _rows_to_cols(self, content): for row_num, actual_len in bad_lines: msg = ( f"Expected {col_len} fields in line {row_num + 1}, saw " - "{actual_len}" + f"{actual_len}" ) if ( self.delimiter From 29e738286a62f07e525be0c5dc374a665f27f24d Mon Sep 17 00:00:00 2001 From: nbonnin Date: Wed, 25 Dec 2019 15:51:54 -0500 Subject: [PATCH 4/8] fixed styling issues (hopefully) causing flake8 to fail. also removed some unnecessary f-strings --- pandas/io/parsers.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index ca2171205a8ce..748f67a968598 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -708,7 +708,6 @@ def read_fwf( **kwds, ): - r""" Read a table of fixed-width formatted lines into DataFrame. From c809aa83b1d0fa0a1aa0f585c186374a30fb5906 Mon Sep 17 00:00:00 2001 From: nbonnin Date: Wed, 25 Dec 2019 16:02:13 -0500 Subject: [PATCH 5/8] Fixed a formatting error on one of the strings causing the integration tests to fail --- pandas/io/parsers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 748f67a968598..1046e49d0f430 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -381,7 +381,7 @@ def _validate_integer(name, val, min_val=0): min_val : int Minimum allowed value (val < min_val will result in a ValueError) """ - msg = f"{name:s} must be an integer >={min_val:d}" + msg = f"'{name:s}' must be an integer >={min_val:d}" if val is not None: if is_float(val): From c6e067928d00f8981421007397065495f675a2d9 Mon Sep 17 00:00:00 2001 From: nbonnin Date: Wed, 25 Dec 2019 16:32:25 -0500 Subject: [PATCH 6/8] Fixed an issue with :s vs !s --- pandas/io/parsers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 1046e49d0f430..8eb4615eec02b 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1605,7 +1605,7 @@ def _get_name(icol): return icol if col_names is None: - raise ValueError(f"Must supply column order to use {icol:s} as index") + raise ValueError(f"Must supply column order to use {icol!s} as index") for i, c in enumerate(col_names): if i == icol: @@ -3343,6 +3343,7 @@ def _try_convert_dates(parser, colspec, data_dict, columns): def _clean_na_values(na_values, keep_default_na=True): + if na_values is None: if keep_default_na: na_values = STR_NA_VALUES From 0e407bc19927e0797ddc998a1e7963609952cd7f Mon Sep 17 00:00:00 2001 From: nbonnin Date: Wed, 25 Dec 2019 17:12:50 -0500 Subject: [PATCH 7/8] Ran Black again to try to get it to pass linting --- pandas/io/parsers.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 8eb4615eec02b..3e4564aeb3a8f 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1721,9 +1721,7 @@ def _convert_to_ndarrays( and not is_categorical_dtype(cast_type) and na_count > 0 ): - raise ValueError( - f"Bool column has NA values in column {c}" - ) + raise ValueError(f"Bool column has NA values in column {c}") except (AttributeError, TypeError): # invalid input to is_bool_dtype pass From 4eae0c4caaa35007b49f1eea5ec326cb93b20868 Mon Sep 17 00:00:00 2001 From: nbonnin Date: Wed, 25 Dec 2019 17:34:22 -0500 Subject: [PATCH 8/8] Added parentheses to avoid a type error --- pandas/io/parsers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 3e4564aeb3a8f..c628a0d2bdf2e 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1533,7 +1533,7 @@ def _maybe_dedup_names(self, names): counts[col] = cur_count + 1 if is_potential_mi: - col = col[:-1] + f"{col[-1]}.{cur_count}" + col = col[:-1] + (f"{col[-1]}.{cur_count}",) else: col = f"{col}.{cur_count}" cur_count = counts[col]