@@ -542,7 +542,7 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds):
542
542
)
543
543
def read_csv (
544
544
filepath_or_buffer : FilePathOrBuffer ,
545
- sep = "," ,
545
+ sep = lib . no_default ,
546
546
delimiter = None ,
547
547
# Column and Index Locations and Names
548
548
header = "infer" ,
@@ -600,93 +600,14 @@ def read_csv(
600
600
float_precision = None ,
601
601
storage_options : StorageOptions = None ,
602
602
):
603
- # gh-23761
604
- #
605
- # When a dialect is passed, it overrides any of the overlapping
606
- # parameters passed in directly. We don't want to warn if the
607
- # default parameters were passed in (since it probably means
608
- # that the user didn't pass them in explicitly in the first place).
609
- #
610
- # "delimiter" is the annoying corner case because we alias it to
611
- # "sep" before doing comparison to the dialect values later on.
612
- # Thus, we need a flag to indicate that we need to "override"
613
- # the comparison to dialect values by checking if default values
614
- # for BOTH "delimiter" and "sep" were provided.
615
- default_sep = ","
616
-
617
- if dialect is not None :
618
- sep_override = delimiter is None and sep == default_sep
619
- kwds = dict (sep_override = sep_override )
620
- else :
621
- kwds = dict ()
622
-
623
- # Alias sep -> delimiter.
624
- if delimiter is None :
625
- delimiter = sep
603
+ kwds = locals ()
604
+ del kwds ["filepath_or_buffer" ]
605
+ del kwds ["sep" ]
626
606
627
- if delim_whitespace and delimiter != default_sep :
628
- raise ValueError (
629
- "Specified a delimiter with both sep and "
630
- "delim_whitespace=True; you can only specify one."
631
- )
632
-
633
- if engine is not None :
634
- engine_specified = True
635
- else :
636
- engine = "c"
637
- engine_specified = False
638
-
639
- kwds .update (
640
- delimiter = delimiter ,
641
- engine = engine ,
642
- dialect = dialect ,
643
- compression = compression ,
644
- engine_specified = engine_specified ,
645
- doublequote = doublequote ,
646
- escapechar = escapechar ,
647
- quotechar = quotechar ,
648
- quoting = quoting ,
649
- skipinitialspace = skipinitialspace ,
650
- lineterminator = lineterminator ,
651
- header = header ,
652
- index_col = index_col ,
653
- names = names ,
654
- prefix = prefix ,
655
- skiprows = skiprows ,
656
- skipfooter = skipfooter ,
657
- na_values = na_values ,
658
- true_values = true_values ,
659
- false_values = false_values ,
660
- keep_default_na = keep_default_na ,
661
- thousands = thousands ,
662
- comment = comment ,
663
- decimal = decimal ,
664
- parse_dates = parse_dates ,
665
- keep_date_col = keep_date_col ,
666
- dayfirst = dayfirst ,
667
- date_parser = date_parser ,
668
- cache_dates = cache_dates ,
669
- nrows = nrows ,
670
- iterator = iterator ,
671
- chunksize = chunksize ,
672
- converters = converters ,
673
- dtype = dtype ,
674
- usecols = usecols ,
675
- verbose = verbose ,
676
- encoding = encoding ,
677
- squeeze = squeeze ,
678
- memory_map = memory_map ,
679
- float_precision = float_precision ,
680
- na_filter = na_filter ,
681
- delim_whitespace = delim_whitespace ,
682
- warn_bad_lines = warn_bad_lines ,
683
- error_bad_lines = error_bad_lines ,
684
- low_memory = low_memory ,
685
- mangle_dupe_cols = mangle_dupe_cols ,
686
- infer_datetime_format = infer_datetime_format ,
687
- skip_blank_lines = skip_blank_lines ,
688
- storage_options = storage_options ,
607
+ kwds_defaults = _check_defaults_read (
608
+ dialect , delimiter , delim_whitespace , engine , sep , defaults = {"delimiter" : "," }
689
609
)
610
+ kwds .update (kwds_defaults )
690
611
691
612
return _read (filepath_or_buffer , kwds )
692
613
@@ -700,7 +621,7 @@ def read_csv(
700
621
)
701
622
def read_table (
702
623
filepath_or_buffer : FilePathOrBuffer ,
703
- sep = " \t " ,
624
+ sep = lib . no_default ,
704
625
delimiter = None ,
705
626
# Column and Index Locations and Names
706
627
header = "infer" ,
@@ -757,17 +678,16 @@ def read_table(
757
678
memory_map = False ,
758
679
float_precision = None ,
759
680
):
760
- # TODO: validation duplicated in read_csv
761
- if delim_whitespace and (delimiter is not None or sep != "\t " ):
762
- raise ValueError (
763
- "Specified a delimiter with both sep and "
764
- "delim_whitespace=True; you can only specify one."
765
- )
766
- if delim_whitespace :
767
- # In this case sep is not used so we set it to the read_csv
768
- # default to avoid a ValueError
769
- sep = ","
770
- return read_csv (** locals ())
681
+ kwds = locals ()
682
+ del kwds ["filepath_or_buffer" ]
683
+ del kwds ["sep" ]
684
+
685
+ kwds_defaults = _check_defaults_read (
686
+ dialect , delimiter , delim_whitespace , engine , sep , defaults = {"delimiter" : "\t " }
687
+ )
688
+ kwds .update (kwds_defaults )
689
+
690
+ return _read (filepath_or_buffer , kwds )
771
691
772
692
773
693
def read_fwf (
@@ -3782,3 +3702,92 @@ def _make_reader(self, f):
3782
3702
self .skiprows ,
3783
3703
self .infer_nrows ,
3784
3704
)
3705
+
3706
+
3707
+ def _check_defaults_read (
3708
+ dialect : Union [str , csv .Dialect ],
3709
+ delimiter : Union [str , object ],
3710
+ delim_whitespace : bool ,
3711
+ engine : str ,
3712
+ sep : Union [str , object ],
3713
+ defaults : Dict [str , Any ],
3714
+ ):
3715
+ """Check default values of input parameters of read_csv, read_table.
3716
+
3717
+ Parameters
3718
+ ----------
3719
+ dialect : str or csv.Dialect
3720
+ If provided, this parameter will override values (default or not) for the
3721
+ following parameters: `delimiter`, `doublequote`, `escapechar`,
3722
+ `skipinitialspace`, `quotechar`, and `quoting`. If it is necessary to
3723
+ override values, a ParserWarning will be issued. See csv.Dialect
3724
+ documentation for more details.
3725
+ delimiter : str or object
3726
+ Alias for sep.
3727
+ delim_whitespace : bool
3728
+ Specifies whether or not whitespace (e.g. ``' '`` or ``'\t '``) will be
3729
+ used as the sep. Equivalent to setting ``sep='\\ s+'``. If this option
3730
+ is set to True, nothing should be passed in for the ``delimiter``
3731
+ parameter.
3732
+ engine : {{'c', 'python'}}
3733
+ Parser engine to use. The C engine is faster while the python engine is
3734
+ currently more feature-complete.
3735
+ sep : str or object
3736
+ A delimiter provided by the user (str) or a sentinel value, i.e.
3737
+ pandas._libs.lib.no_default.
3738
+ defaults: dict
3739
+ Default values of input parameters.
3740
+
3741
+ Returns
3742
+ -------
3743
+ kwds : dict
3744
+ Input parameters with correct values.
3745
+
3746
+ Raises
3747
+ ------
3748
+ ValueError : If a delimiter was specified with ``sep`` (or ``delimiter``) and
3749
+ ``delim_whitespace=True``.
3750
+ """
3751
+ # fix types for sep, delimiter to Union(str, Any)
3752
+ delim_default = defaults ["delimiter" ]
3753
+ kwds : Dict [str , Any ] = {}
3754
+ # gh-23761
3755
+ #
3756
+ # When a dialect is passed, it overrides any of the overlapping
3757
+ # parameters passed in directly. We don't want to warn if the
3758
+ # default parameters were passed in (since it probably means
3759
+ # that the user didn't pass them in explicitly in the first place).
3760
+ #
3761
+ # "delimiter" is the annoying corner case because we alias it to
3762
+ # "sep" before doing comparison to the dialect values later on.
3763
+ # Thus, we need a flag to indicate that we need to "override"
3764
+ # the comparison to dialect values by checking if default values
3765
+ # for BOTH "delimiter" and "sep" were provided.
3766
+ if dialect is not None :
3767
+ kwds ["sep_override" ] = (delimiter is None ) and (
3768
+ sep is lib .no_default or sep == delim_default
3769
+ )
3770
+
3771
+ # Alias sep -> delimiter.
3772
+ if delimiter is None :
3773
+ delimiter = sep
3774
+
3775
+ if delim_whitespace and (delimiter is not lib .no_default ):
3776
+ raise ValueError (
3777
+ "Specified a delimiter with both sep and "
3778
+ "delim_whitespace=True; you can only specify one."
3779
+ )
3780
+
3781
+ if delimiter is lib .no_default :
3782
+ # assign default separator value
3783
+ kwds ["delimiter" ] = delim_default
3784
+ else :
3785
+ kwds ["delimiter" ] = delimiter
3786
+
3787
+ if engine is not None :
3788
+ kwds ["engine_specified" ] = True
3789
+ else :
3790
+ kwds ["engine" ] = "c"
3791
+ kwds ["engine_specified" ] = False
3792
+
3793
+ return kwds
0 commit comments