@@ -1265,6 +1265,7 @@ def __init__(self, kwds):
1265
1265
self .prefix = kwds .pop ('prefix' , None )
1266
1266
1267
1267
self .index_col = kwds .get ('index_col' , None )
1268
+ self .unnamed_cols = set ()
1268
1269
self .index_names = None
1269
1270
self .col_names = None
1270
1271
@@ -1374,7 +1375,8 @@ def _extract_multi_indexer_columns(self, header, index_names, col_names,
1374
1375
# clean the index_names
1375
1376
index_names = header .pop (- 1 )
1376
1377
index_names , names , index_col = _clean_index_names (index_names ,
1377
- self .index_col )
1378
+ self .index_col ,
1379
+ self .unnamed_cols )
1378
1380
1379
1381
# extract the columns
1380
1382
field_count = len (header [0 ])
@@ -1454,7 +1456,8 @@ def _make_index(self, data, alldata, columns, indexnamerow=False):
1454
1456
if not self ._name_processed :
1455
1457
(self .index_names , _ ,
1456
1458
self .index_col ) = _clean_index_names (list (columns ),
1457
- self .index_col )
1459
+ self .index_col ,
1460
+ self .unnamed_cols )
1458
1461
self ._name_processed = True
1459
1462
index = self ._get_complex_date_index (data , columns )
1460
1463
index = self ._agg_index (index , try_parse_dates = False )
@@ -1732,6 +1735,7 @@ def __init__(self, src, **kwds):
1732
1735
kwds ['usecols' ] = self .usecols
1733
1736
1734
1737
self ._reader = parsers .TextReader (src , ** kwds )
1738
+ self .unnamed_cols = self ._reader .unnamed_cols
1735
1739
1736
1740
passed_names = self .names is None
1737
1741
@@ -1792,7 +1796,8 @@ def __init__(self, src, **kwds):
1792
1796
self ._name_processed = True
1793
1797
(index_names , self .names ,
1794
1798
self .index_col ) = _clean_index_names (self .names ,
1795
- self .index_col )
1799
+ self .index_col ,
1800
+ self .unnamed_cols )
1796
1801
1797
1802
if self .index_names is None :
1798
1803
self .index_names = index_names
@@ -1966,7 +1971,8 @@ def _get_index_names(self):
1966
1971
1967
1972
if self ._reader .leading_cols == 0 and self .index_col is not None :
1968
1973
(idx_names , names ,
1969
- self .index_col ) = _clean_index_names (names , self .index_col )
1974
+ self .index_col ) = _clean_index_names (names , self .index_col ,
1975
+ self .unnamed_cols )
1970
1976
1971
1977
return names , idx_names
1972
1978
@@ -2112,7 +2118,8 @@ def __init__(self, f, **kwds):
2112
2118
# Get columns in two steps: infer from data, then
2113
2119
# infer column indices from self.usecols if it is specified.
2114
2120
self ._col_indices = None
2115
- self .columns , self .num_original_columns = self ._infer_columns ()
2121
+ (self .columns , self .num_original_columns ,
2122
+ self .unnamed_cols ) = self ._infer_columns ()
2116
2123
2117
2124
# Now self.columns has the set of columns that we will process.
2118
2125
# The original set is stored in self.original_columns.
@@ -2367,6 +2374,8 @@ def _infer_columns(self):
2367
2374
names = self .names
2368
2375
num_original_columns = 0
2369
2376
clear_buffer = True
2377
+ unnamed_cols = set ()
2378
+
2370
2379
if self .header is not None :
2371
2380
header = self .header
2372
2381
@@ -2400,24 +2409,25 @@ def _infer_columns(self):
2400
2409
if clear_buffer :
2401
2410
self ._clear_buffer ()
2402
2411
columns .append ([None ] * len (columns [- 1 ]))
2403
- return columns , num_original_columns
2412
+ return columns , num_original_columns , unnamed_cols
2404
2413
2405
2414
if not self .names :
2406
2415
raise EmptyDataError (
2407
2416
"No columns to parse from file" )
2408
2417
2409
2418
line = self .names [:]
2410
2419
2411
- unnamed_count = 0
2412
2420
this_columns = []
2413
2421
for i , c in enumerate (line ):
2414
2422
if c == '' :
2415
2423
if have_mi_columns :
2416
- this_columns . append ( ' Unnamed: %d_level_%d'
2417
- % ( i , level ))
2424
+ col_name = ( " Unnamed: {i}_level_{level}"
2425
+ . format ( i = i , level = level ))
2418
2426
else :
2419
- this_columns .append ('Unnamed: %d' % i )
2420
- unnamed_count += 1
2427
+ col_name = "Unnamed: {i}" .format (i = i )
2428
+
2429
+ unnamed_cols .add (col_name )
2430
+ this_columns .append (col_name )
2421
2431
else :
2422
2432
this_columns .append (c )
2423
2433
@@ -2443,6 +2453,8 @@ def _infer_columns(self):
2443
2453
lc = len (this_columns )
2444
2454
ic = (len (self .index_col )
2445
2455
if self .index_col is not None else 0 )
2456
+ unnamed_count = len (unnamed_cols )
2457
+
2446
2458
if lc != unnamed_count and lc - ic > unnamed_count :
2447
2459
clear_buffer = False
2448
2460
this_columns = [None ] * lc
@@ -2513,7 +2525,7 @@ def _infer_columns(self):
2513
2525
columns = [names ]
2514
2526
num_original_columns = ncols
2515
2527
2516
- return columns , num_original_columns
2528
+ return columns , num_original_columns , unnamed_cols
2517
2529
2518
2530
def _handle_usecols (self , columns , usecols_key ):
2519
2531
"""
@@ -2879,7 +2891,8 @@ def _get_index_name(self, columns):
2879
2891
else :
2880
2892
# Case 2
2881
2893
(index_name , columns_ ,
2882
- self .index_col ) = _clean_index_names (columns , self .index_col )
2894
+ self .index_col ) = _clean_index_names (columns , self .index_col ,
2895
+ self .unnamed_cols )
2883
2896
2884
2897
return index_name , orig_names , columns
2885
2898
@@ -3178,7 +3191,7 @@ def _clean_na_values(na_values, keep_default_na=True):
3178
3191
return na_values , na_fvalues
3179
3192
3180
3193
3181
- def _clean_index_names (columns , index_col ):
3194
+ def _clean_index_names (columns , index_col , unnamed_cols ):
3182
3195
if not _is_index_col (index_col ):
3183
3196
return None , columns , index_col
3184
3197
@@ -3203,10 +3216,10 @@ def _clean_index_names(columns, index_col):
3203
3216
columns .remove (name )
3204
3217
index_names .append (name )
3205
3218
3206
- # hack
3207
- if ( isinstance ( index_names [ 0 ], compat . string_types ) and
3208
- 'Unnamed' in index_names [ 0 ]) :
3209
- index_names [0 ] = None
3219
+ # Only clean index names that were placeholders.
3220
+ for i , name in enumerate ( index_names ):
3221
+ if isinstance ( name , compat . string_types ) and name in unnamed_cols :
3222
+ index_names [i ] = None
3210
3223
3211
3224
return index_names , columns , index_col
3212
3225
0 commit comments