12
12
from pandas .core .index import Index
13
13
from pandas .core .frame import DataFrame
14
14
15
- def read_csv (filepath_or_buffer , header = 0 , skiprows = None , index_col = 0 ,
16
- na_values = None , date_parser = None , names = None ):
15
+ def read_csv (filepath_or_buffer , sep = None , header = 0 , skiprows = None , index_col = 0 ,
16
+ na_values = None , date_parser = None , names = None , sniff_sep = True ):
17
17
"""
18
18
Read CSV file into DataFrame
19
19
@@ -34,6 +34,9 @@ def read_csv(filepath_or_buffer, header=0, skiprows=None, index_col=0,
34
34
dateutil.parser
35
35
names : array-like
36
36
List of column names
37
+ sniff_sep : boolean, default True
38
+ Attempt to automatically determine the separator for the data. Defaults
39
+ to True, however if sep is defined then it will take precedence
37
40
38
41
Returns
39
42
-------
@@ -50,7 +53,19 @@ def read_csv(filepath_or_buffer, header=0, skiprows=None, index_col=0,
50
53
except Exception : # pragma: no cover
51
54
f = open (filepath_or_buffer , 'r' )
52
55
53
- reader = csv .reader (f , dialect = 'excel' )
56
+ # default dialect
57
+ dia = csv .excel
58
+ if sep is not None :
59
+ sniff_sep = False
60
+ dia .delimiter = sep
61
+ # attempt to sniff the delimiter
62
+ if sniff_sep :
63
+ sample = f .readline ()
64
+ sniffed = csv .Sniffer ().sniff (sample )
65
+ dia .delimiter = sniffed .delimiter
66
+ f .seek (0 )
67
+
68
+ reader = csv .reader (f , dialect = dia )
54
69
55
70
if skiprows is not None :
56
71
skiprows = set (skiprows )
@@ -63,8 +78,7 @@ def read_csv(filepath_or_buffer, header=0, skiprows=None, index_col=0,
63
78
date_parser = date_parser )
64
79
65
80
def read_table (filepath_or_buffer , sep = '\t ' , header = 0 , skiprows = None ,
66
- index_col = 0 , na_values = None , names = None ,
67
- date_parser = None ):
81
+ index_col = 0 , na_values = None , date_parser = None , names = None ):
68
82
"""
69
83
Read delimited file into DataFrame
70
84
@@ -92,25 +106,8 @@ def read_table(filepath_or_buffer, sep='\t', header=0, skiprows=None,
92
106
-------
93
107
parsed : DataFrame
94
108
"""
95
- if hasattr (filepath_or_buffer , 'read' ):
96
- reader = filepath_or_buffer
97
- else :
98
- try :
99
- # universal newline mode
100
- reader = open (filepath_or_buffer , 'U' )
101
- except Exception : # pragma: no cover
102
- reader = open (filepath_or_buffer , 'r' )
103
-
104
- if skiprows is not None :
105
- skiprows = set (skiprows )
106
- lines = [l for i , l in enumerate (reader ) if i not in skiprows ]
107
- else :
108
- lines = [l for l in reader ]
109
-
110
- lines = [re .split (sep , l .rstrip ()) for l in lines ]
111
- return _simple_parser (lines , header = header , indexCol = index_col ,
112
- colNames = names , na_values = na_values ,
113
- date_parser = date_parser )
109
+ return read_csv (filepath_or_buffer , sep , header , skiprows ,
110
+ index_col , na_values , date_parser , names )
114
111
115
112
def _simple_parser (lines , colNames = None , header = 0 , indexCol = 0 ,
116
113
na_values = None , date_parser = None , parse_dates = True ):
0 commit comments