@@ -1265,6 +1265,7 @@ def __init__(self, kwds):
1265
1265
self .prefix = kwds .pop ('prefix' , None )
1266
1266
1267
1267
self .index_col = kwds .get ('index_col' , None )
1268
+ self .unnamed_cols = set ()
1268
1269
self .index_names = None
1269
1270
self .col_names = None
1270
1271
@@ -1374,7 +1375,8 @@ def _extract_multi_indexer_columns(self, header, index_names, col_names,
1374
1375
# clean the index_names
1375
1376
index_names = header .pop (- 1 )
1376
1377
index_names , names , index_col = _clean_index_names (index_names ,
1377
- self .index_col )
1378
+ self .index_col ,
1379
+ self .unnamed_cols )
1378
1380
1379
1381
# extract the columns
1380
1382
field_count = len (header [0 ])
@@ -1454,7 +1456,8 @@ def _make_index(self, data, alldata, columns, indexnamerow=False):
1454
1456
if not self ._name_processed :
1455
1457
(self .index_names , _ ,
1456
1458
self .index_col ) = _clean_index_names (list (columns ),
1457
- self .index_col )
1459
+ self .index_col ,
1460
+ self .unnamed_cols )
1458
1461
self ._name_processed = True
1459
1462
index = self ._get_complex_date_index (data , columns )
1460
1463
index = self ._agg_index (index , try_parse_dates = False )
@@ -1732,6 +1735,7 @@ def __init__(self, src, **kwds):
1732
1735
kwds ['usecols' ] = self .usecols
1733
1736
1734
1737
self ._reader = parsers .TextReader (src , ** kwds )
1738
+ self .unnamed_cols = self ._reader .unnamed_cols
1735
1739
1736
1740
passed_names = self .names is None
1737
1741
@@ -1792,7 +1796,8 @@ def __init__(self, src, **kwds):
1792
1796
self ._name_processed = True
1793
1797
(index_names , self .names ,
1794
1798
self .index_col ) = _clean_index_names (self .names ,
1795
- self .index_col )
1799
+ self .index_col ,
1800
+ self .unnamed_cols )
1796
1801
1797
1802
if self .index_names is None :
1798
1803
self .index_names = index_names
@@ -1966,7 +1971,8 @@ def _get_index_names(self):
1966
1971
1967
1972
if self ._reader .leading_cols == 0 and self .index_col is not None :
1968
1973
(idx_names , names ,
1969
- self .index_col ) = _clean_index_names (names , self .index_col )
1974
+ self .index_col ) = _clean_index_names (names , self .index_col ,
1975
+ self .unnamed_cols )
1970
1976
1971
1977
return names , idx_names
1972
1978
@@ -2112,7 +2118,8 @@ def __init__(self, f, **kwds):
2112
2118
# Get columns in two steps: infer from data, then
2113
2119
# infer column indices from self.usecols if it is specified.
2114
2120
self ._col_indices = None
2115
- self .columns , self .num_original_columns = self ._infer_columns ()
2121
+ (self .columns , self .num_original_columns ,
2122
+ self .unnamed_cols ) = self ._infer_columns ()
2116
2123
2117
2124
# Now self.columns has the set of columns that we will process.
2118
2125
# The original set is stored in self.original_columns.
@@ -2367,6 +2374,8 @@ def _infer_columns(self):
2367
2374
names = self .names
2368
2375
num_original_columns = 0
2369
2376
clear_buffer = True
2377
+ unnamed_cols = set ()
2378
+
2370
2379
if self .header is not None :
2371
2380
header = self .header
2372
2381
@@ -2400,24 +2409,27 @@ def _infer_columns(self):
2400
2409
if clear_buffer :
2401
2410
self ._clear_buffer ()
2402
2411
columns .append ([None ] * len (columns [- 1 ]))
2403
- return columns , num_original_columns
2412
+ return columns , num_original_columns , unnamed_cols
2404
2413
2405
2414
if not self .names :
2406
2415
raise EmptyDataError (
2407
2416
"No columns to parse from file" )
2408
2417
2409
2418
line = self .names [:]
2410
2419
2411
- unnamed_count = 0
2412
2420
this_columns = []
2421
+ this_unnamed_cols = []
2422
+
2413
2423
for i , c in enumerate (line ):
2414
2424
if c == '' :
2415
2425
if have_mi_columns :
2416
- this_columns . append ( ' Unnamed: %d_level_%d'
2417
- % ( i , level ))
2426
+ col_name = ( " Unnamed: {i}_level_{level}"
2427
+ . format ( i = i , level = level ))
2418
2428
else :
2419
- this_columns .append ('Unnamed: %d' % i )
2420
- unnamed_count += 1
2429
+ col_name = "Unnamed: {i}" .format (i = i )
2430
+
2431
+ this_unnamed_cols .append (i )
2432
+ this_columns .append (col_name )
2421
2433
else :
2422
2434
this_columns .append (c )
2423
2435
@@ -2443,12 +2455,17 @@ def _infer_columns(self):
2443
2455
lc = len (this_columns )
2444
2456
ic = (len (self .index_col )
2445
2457
if self .index_col is not None else 0 )
2458
+ unnamed_count = len (this_unnamed_cols )
2459
+
2446
2460
if lc != unnamed_count and lc - ic > unnamed_count :
2447
2461
clear_buffer = False
2448
2462
this_columns = [None ] * lc
2449
2463
self .buf = [self .buf [- 1 ]]
2450
2464
2451
2465
columns .append (this_columns )
2466
+ unnamed_cols .update ({this_columns [i ]
2467
+ for i in this_unnamed_cols })
2468
+
2452
2469
if len (columns ) == 1 :
2453
2470
num_original_columns = len (this_columns )
2454
2471
@@ -2513,7 +2530,7 @@ def _infer_columns(self):
2513
2530
columns = [names ]
2514
2531
num_original_columns = ncols
2515
2532
2516
- return columns , num_original_columns
2533
+ return columns , num_original_columns , unnamed_cols
2517
2534
2518
2535
def _handle_usecols (self , columns , usecols_key ):
2519
2536
"""
@@ -2879,7 +2896,8 @@ def _get_index_name(self, columns):
2879
2896
else :
2880
2897
# Case 2
2881
2898
(index_name , columns_ ,
2882
- self .index_col ) = _clean_index_names (columns , self .index_col )
2899
+ self .index_col ) = _clean_index_names (columns , self .index_col ,
2900
+ self .unnamed_cols )
2883
2901
2884
2902
return index_name , orig_names , columns
2885
2903
@@ -3178,7 +3196,7 @@ def _clean_na_values(na_values, keep_default_na=True):
3178
3196
return na_values , na_fvalues
3179
3197
3180
3198
3181
- def _clean_index_names (columns , index_col ):
3199
+ def _clean_index_names (columns , index_col , unnamed_cols ):
3182
3200
if not _is_index_col (index_col ):
3183
3201
return None , columns , index_col
3184
3202
@@ -3203,10 +3221,10 @@ def _clean_index_names(columns, index_col):
3203
3221
columns .remove (name )
3204
3222
index_names .append (name )
3205
3223
3206
- # hack
3207
- if ( isinstance ( index_names [ 0 ], compat . string_types ) and
3208
- 'Unnamed' in index_names [ 0 ]) :
3209
- index_names [0 ] = None
3224
+ # Only clean index names that were placeholders.
3225
+ for i , name in enumerate ( index_names ):
3226
+ if isinstance ( name , compat . string_types ) and name in unnamed_cols :
3227
+ index_names [i ] = None
3210
3228
3211
3229
return index_names , columns , index_col
3212
3230
0 commit comments