Skip to content

Commit a7d96fa

Browse files
authored
TYP: make the type annotations of read_csv & read_table discoverable (#34976)
1 parent e23bd26 commit a7d96fa

File tree

3 files changed

+246
-158
lines changed

3 files changed

+246
-158
lines changed

doc/source/whatsnew/v1.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1025,6 +1025,7 @@ I/O
10251025
- Bug in :meth:`~SQLDatabase.execute` was raising a ``ProgrammingError`` for some DB-API drivers when the SQL statement contained the `%` character and no parameters were present (:issue:`34211`)
10261026
- Bug in :meth:`~pandas.io.stata.StataReader` which resulted in categorical variables with difference dtypes when reading data using an iterator. (:issue:`31544`)
10271027
- :meth:`HDFStore.keys` has now an optional `include` parameter that allows the retrieval of all native HDF5 table names (:issue:`29916`)
1028+
- `TypeError` exceptions raised by :meth:`read_csv` and :meth:`read_table` were showing as ``parser_f`` when an unexpected keyword argument was passed (:issue:`25648`)
10281029
- Bug in :meth:`read_excel` for ODS files removes 0.0 values (:issue:`27222`)
10291030
- Bug in :meth:`ujson.encode` was raising an `OverflowError` with numbers larger than sys.maxsize (:issue: `34395`)
10301031

pandas/io/parsers.py

+211-158
Original file line numberDiff line numberDiff line change
@@ -530,176 +530,229 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds):
530530
_deprecated_args: Set[str] = set()
531531

532532

533-
def _make_parser_function(name, default_sep=","):
534-
def parser_f(
535-
filepath_or_buffer: FilePathOrBuffer,
536-
sep=default_sep,
537-
delimiter=None,
538-
# Column and Index Locations and Names
539-
header="infer",
540-
names=None,
541-
index_col=None,
542-
usecols=None,
543-
squeeze=False,
544-
prefix=None,
545-
mangle_dupe_cols=True,
546-
# General Parsing Configuration
547-
dtype=None,
548-
engine=None,
549-
converters=None,
550-
true_values=None,
551-
false_values=None,
552-
skipinitialspace=False,
553-
skiprows=None,
554-
skipfooter=0,
555-
nrows=None,
556-
# NA and Missing Data Handling
557-
na_values=None,
558-
keep_default_na=True,
559-
na_filter=True,
560-
verbose=False,
561-
skip_blank_lines=True,
562-
# Datetime Handling
563-
parse_dates=False,
564-
infer_datetime_format=False,
565-
keep_date_col=False,
566-
date_parser=None,
567-
dayfirst=False,
568-
cache_dates=True,
569-
# Iteration
570-
iterator=False,
571-
chunksize=None,
572-
# Quoting, Compression, and File Format
573-
compression="infer",
574-
thousands=None,
575-
decimal: str = ".",
576-
lineterminator=None,
577-
quotechar='"',
578-
quoting=csv.QUOTE_MINIMAL,
579-
doublequote=True,
580-
escapechar=None,
581-
comment=None,
582-
encoding=None,
583-
dialect=None,
584-
# Error Handling
585-
error_bad_lines=True,
586-
warn_bad_lines=True,
587-
# Internal
588-
delim_whitespace=False,
589-
low_memory=_c_parser_defaults["low_memory"],
590-
memory_map=False,
591-
float_precision=None,
592-
):
593-
594-
# gh-23761
595-
#
596-
# When a dialect is passed, it overrides any of the overlapping
597-
# parameters passed in directly. We don't want to warn if the
598-
# default parameters were passed in (since it probably means
599-
# that the user didn't pass them in explicitly in the first place).
600-
#
601-
# "delimiter" is the annoying corner case because we alias it to
602-
# "sep" before doing comparison to the dialect values later on.
603-
# Thus, we need a flag to indicate that we need to "override"
604-
# the comparison to dialect values by checking if default values
605-
# for BOTH "delimiter" and "sep" were provided.
606-
if dialect is not None:
607-
sep_override = delimiter is None and sep == default_sep
608-
kwds = dict(sep_override=sep_override)
609-
else:
610-
kwds = dict()
611-
612-
# Alias sep -> delimiter.
613-
if delimiter is None:
614-
delimiter = sep
615-
616-
if delim_whitespace and delimiter != default_sep:
617-
raise ValueError(
618-
"Specified a delimiter with both sep and "
619-
"delim_whitespace=True; you can only specify one."
620-
)
533+
@Appender(
534+
_doc_read_csv_and_table.format(
535+
func_name="read_csv",
536+
summary="Read a comma-separated values (csv) file into DataFrame.",
537+
_default_sep="','",
538+
)
539+
)
540+
def read_csv(
541+
filepath_or_buffer: FilePathOrBuffer,
542+
sep=",",
543+
delimiter=None,
544+
# Column and Index Locations and Names
545+
header="infer",
546+
names=None,
547+
index_col=None,
548+
usecols=None,
549+
squeeze=False,
550+
prefix=None,
551+
mangle_dupe_cols=True,
552+
# General Parsing Configuration
553+
dtype=None,
554+
engine=None,
555+
converters=None,
556+
true_values=None,
557+
false_values=None,
558+
skipinitialspace=False,
559+
skiprows=None,
560+
skipfooter=0,
561+
nrows=None,
562+
# NA and Missing Data Handling
563+
na_values=None,
564+
keep_default_na=True,
565+
na_filter=True,
566+
verbose=False,
567+
skip_blank_lines=True,
568+
# Datetime Handling
569+
parse_dates=False,
570+
infer_datetime_format=False,
571+
keep_date_col=False,
572+
date_parser=None,
573+
dayfirst=False,
574+
cache_dates=True,
575+
# Iteration
576+
iterator=False,
577+
chunksize=None,
578+
# Quoting, Compression, and File Format
579+
compression="infer",
580+
thousands=None,
581+
decimal: str = ".",
582+
lineterminator=None,
583+
quotechar='"',
584+
quoting=csv.QUOTE_MINIMAL,
585+
doublequote=True,
586+
escapechar=None,
587+
comment=None,
588+
encoding=None,
589+
dialect=None,
590+
# Error Handling
591+
error_bad_lines=True,
592+
warn_bad_lines=True,
593+
# Internal
594+
delim_whitespace=False,
595+
low_memory=_c_parser_defaults["low_memory"],
596+
memory_map=False,
597+
float_precision=None,
598+
):
599+
# gh-23761
600+
#
601+
# When a dialect is passed, it overrides any of the overlapping
602+
# parameters passed in directly. We don't want to warn if the
603+
# default parameters were passed in (since it probably means
604+
# that the user didn't pass them in explicitly in the first place).
605+
#
606+
# "delimiter" is the annoying corner case because we alias it to
607+
# "sep" before doing comparison to the dialect values later on.
608+
# Thus, we need a flag to indicate that we need to "override"
609+
# the comparison to dialect values by checking if default values
610+
# for BOTH "delimiter" and "sep" were provided.
611+
default_sep = ","
612+
613+
if dialect is not None:
614+
sep_override = delimiter is None and sep == default_sep
615+
kwds = dict(sep_override=sep_override)
616+
else:
617+
kwds = dict()
621618

622-
if engine is not None:
623-
engine_specified = True
624-
else:
625-
engine = "c"
626-
engine_specified = False
619+
# Alias sep -> delimiter.
620+
if delimiter is None:
621+
delimiter = sep
627622

628-
kwds.update(
629-
delimiter=delimiter,
630-
engine=engine,
631-
dialect=dialect,
632-
compression=compression,
633-
engine_specified=engine_specified,
634-
doublequote=doublequote,
635-
escapechar=escapechar,
636-
quotechar=quotechar,
637-
quoting=quoting,
638-
skipinitialspace=skipinitialspace,
639-
lineterminator=lineterminator,
640-
header=header,
641-
index_col=index_col,
642-
names=names,
643-
prefix=prefix,
644-
skiprows=skiprows,
645-
skipfooter=skipfooter,
646-
na_values=na_values,
647-
true_values=true_values,
648-
false_values=false_values,
649-
keep_default_na=keep_default_na,
650-
thousands=thousands,
651-
comment=comment,
652-
decimal=decimal,
653-
parse_dates=parse_dates,
654-
keep_date_col=keep_date_col,
655-
dayfirst=dayfirst,
656-
date_parser=date_parser,
657-
cache_dates=cache_dates,
658-
nrows=nrows,
659-
iterator=iterator,
660-
chunksize=chunksize,
661-
converters=converters,
662-
dtype=dtype,
663-
usecols=usecols,
664-
verbose=verbose,
665-
encoding=encoding,
666-
squeeze=squeeze,
667-
memory_map=memory_map,
668-
float_precision=float_precision,
669-
na_filter=na_filter,
670-
delim_whitespace=delim_whitespace,
671-
warn_bad_lines=warn_bad_lines,
672-
error_bad_lines=error_bad_lines,
673-
low_memory=low_memory,
674-
mangle_dupe_cols=mangle_dupe_cols,
675-
infer_datetime_format=infer_datetime_format,
676-
skip_blank_lines=skip_blank_lines,
623+
if delim_whitespace and delimiter != default_sep:
624+
raise ValueError(
625+
"Specified a delimiter with both sep and "
626+
"delim_whitespace=True; you can only specify one."
677627
)
678628

679-
return _read(filepath_or_buffer, kwds)
680-
681-
parser_f.__name__ = name
682-
683-
return parser_f
629+
if engine is not None:
630+
engine_specified = True
631+
else:
632+
engine = "c"
633+
engine_specified = False
634+
635+
kwds.update(
636+
delimiter=delimiter,
637+
engine=engine,
638+
dialect=dialect,
639+
compression=compression,
640+
engine_specified=engine_specified,
641+
doublequote=doublequote,
642+
escapechar=escapechar,
643+
quotechar=quotechar,
644+
quoting=quoting,
645+
skipinitialspace=skipinitialspace,
646+
lineterminator=lineterminator,
647+
header=header,
648+
index_col=index_col,
649+
names=names,
650+
prefix=prefix,
651+
skiprows=skiprows,
652+
skipfooter=skipfooter,
653+
na_values=na_values,
654+
true_values=true_values,
655+
false_values=false_values,
656+
keep_default_na=keep_default_na,
657+
thousands=thousands,
658+
comment=comment,
659+
decimal=decimal,
660+
parse_dates=parse_dates,
661+
keep_date_col=keep_date_col,
662+
dayfirst=dayfirst,
663+
date_parser=date_parser,
664+
cache_dates=cache_dates,
665+
nrows=nrows,
666+
iterator=iterator,
667+
chunksize=chunksize,
668+
converters=converters,
669+
dtype=dtype,
670+
usecols=usecols,
671+
verbose=verbose,
672+
encoding=encoding,
673+
squeeze=squeeze,
674+
memory_map=memory_map,
675+
float_precision=float_precision,
676+
na_filter=na_filter,
677+
delim_whitespace=delim_whitespace,
678+
warn_bad_lines=warn_bad_lines,
679+
error_bad_lines=error_bad_lines,
680+
low_memory=low_memory,
681+
mangle_dupe_cols=mangle_dupe_cols,
682+
infer_datetime_format=infer_datetime_format,
683+
skip_blank_lines=skip_blank_lines,
684+
)
684685

686+
return _read(filepath_or_buffer, kwds)
685687

686-
read_csv = _make_parser_function("read_csv", default_sep=",")
687-
read_csv = Appender(
688-
_doc_read_csv_and_table.format(
689-
func_name="read_csv",
690-
summary="Read a comma-separated values (csv) file into DataFrame.",
691-
_default_sep="','",
692-
)
693-
)(read_csv)
694688

695-
read_table = _make_parser_function("read_table", default_sep="\t")
696-
read_table = Appender(
689+
@Appender(
697690
_doc_read_csv_and_table.format(
698691
func_name="read_table",
699692
summary="Read general delimited file into DataFrame.",
700693
_default_sep=r"'\\t' (tab-stop)",
701694
)
702-
)(read_table)
695+
)
696+
def read_table(
697+
filepath_or_buffer: FilePathOrBuffer,
698+
sep="\t",
699+
delimiter=None,
700+
# Column and Index Locations and Names
701+
header="infer",
702+
names=None,
703+
index_col=None,
704+
usecols=None,
705+
squeeze=False,
706+
prefix=None,
707+
mangle_dupe_cols=True,
708+
# General Parsing Configuration
709+
dtype=None,
710+
engine=None,
711+
converters=None,
712+
true_values=None,
713+
false_values=None,
714+
skipinitialspace=False,
715+
skiprows=None,
716+
skipfooter=0,
717+
nrows=None,
718+
# NA and Missing Data Handling
719+
na_values=None,
720+
keep_default_na=True,
721+
na_filter=True,
722+
verbose=False,
723+
skip_blank_lines=True,
724+
# Datetime Handling
725+
parse_dates=False,
726+
infer_datetime_format=False,
727+
keep_date_col=False,
728+
date_parser=None,
729+
dayfirst=False,
730+
cache_dates=True,
731+
# Iteration
732+
iterator=False,
733+
chunksize=None,
734+
# Quoting, Compression, and File Format
735+
compression="infer",
736+
thousands=None,
737+
decimal: str = ".",
738+
lineterminator=None,
739+
quotechar='"',
740+
quoting=csv.QUOTE_MINIMAL,
741+
doublequote=True,
742+
escapechar=None,
743+
comment=None,
744+
encoding=None,
745+
dialect=None,
746+
# Error Handling
747+
error_bad_lines=True,
748+
warn_bad_lines=True,
749+
# Internal
750+
delim_whitespace=False,
751+
low_memory=_c_parser_defaults["low_memory"],
752+
memory_map=False,
753+
float_precision=None,
754+
):
755+
return read_csv(**locals())
703756

704757

705758
def read_fwf(

0 commit comments

Comments
 (0)