Skip to content

Commit 31201f5

Browse files
Nick Pentreathwesm
Nick Pentreath
authored andcommitted
read_csv automatically sniffs out separator, using csv.Sniffer()
1 parent 208279e commit 31201f5

File tree

1 file changed

+21
-24
lines changed

1 file changed

+21
-24
lines changed

pandas/io/parsers.py

+21-24
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
from pandas.core.index import Index
1313
from pandas.core.frame import DataFrame
1414

15-
def read_csv(filepath_or_buffer, header=0, skiprows=None, index_col=0,
16-
na_values=None, date_parser=None, names=None):
15+
def read_csv(filepath_or_buffer, sep=None, header=0, skiprows=None, index_col=0,
16+
na_values=None, date_parser=None, names=None, sniff_sep=True):
1717
"""
1818
Read CSV file into DataFrame
1919
@@ -34,6 +34,9 @@ def read_csv(filepath_or_buffer, header=0, skiprows=None, index_col=0,
3434
dateutil.parser
3535
names : array-like
3636
List of column names
37+
sniff_sep : boolean, default True
38+
Attempt to automatically determine the separator for the data. Defaults
39+
to True, however if sep is defined then it will take precedence
3740
3841
Returns
3942
-------
@@ -50,7 +53,19 @@ def read_csv(filepath_or_buffer, header=0, skiprows=None, index_col=0,
5053
except Exception: # pragma: no cover
5154
f = open(filepath_or_buffer, 'r')
5255

53-
reader = csv.reader(f, dialect='excel')
56+
# default dialect
57+
dia = csv.excel
58+
if sep is not None:
59+
sniff_sep = False
60+
dia.delimiter = sep
61+
# attempt to sniff the delimiter
62+
if sniff_sep:
63+
sample = f.readline()
64+
sniffed = csv.Sniffer().sniff(sample)
65+
dia.delimiter = sniffed.delimiter
66+
f.seek(0)
67+
68+
reader = csv.reader(f, dialect=dia)
5469

5570
if skiprows is not None:
5671
skiprows = set(skiprows)
@@ -63,8 +78,7 @@ def read_csv(filepath_or_buffer, header=0, skiprows=None, index_col=0,
6378
date_parser=date_parser)
6479

6580
def read_table(filepath_or_buffer, sep='\t', header=0, skiprows=None,
66-
index_col=0, na_values=None, names=None,
67-
date_parser=None):
81+
index_col=0, na_values=None, date_parser=None, names=None):
6882
"""
6983
Read delimited file into DataFrame
7084
@@ -92,25 +106,8 @@ def read_table(filepath_or_buffer, sep='\t', header=0, skiprows=None,
92106
-------
93107
parsed : DataFrame
94108
"""
95-
if hasattr(filepath_or_buffer, 'read'):
96-
reader = filepath_or_buffer
97-
else:
98-
try:
99-
# universal newline mode
100-
reader = open(filepath_or_buffer, 'U')
101-
except Exception: # pragma: no cover
102-
reader = open(filepath_or_buffer, 'r')
103-
104-
if skiprows is not None:
105-
skiprows = set(skiprows)
106-
lines = [l for i, l in enumerate(reader) if i not in skiprows]
107-
else:
108-
lines = [l for l in reader]
109-
110-
lines = [re.split(sep, l.rstrip()) for l in lines]
111-
return _simple_parser(lines, header=header, indexCol=index_col,
112-
colNames=names, na_values=na_values,
113-
date_parser=date_parser)
109+
return read_csv(filepath_or_buffer, sep, header, skiprows,
110+
index_col, na_values, date_parser, names)
114111

115112
def _simple_parser(lines, colNames=None, header=0, indexCol=0,
116113
na_values=None, date_parser=None, parse_dates=True):

0 commit comments

Comments
 (0)