@@ -179,10 +179,8 @@ def get_column_metadata(column, name, arrow_type, field_name):
179
179
}
180
180
181
181
182
- index_level_name = '__index_level_{:d}__' .format
183
-
184
-
185
- def construct_metadata (df , column_names , index_levels , preserve_index , types ):
182
+ def construct_metadata (df , column_names , index_levels , index_column_names ,
183
+ preserve_index , types ):
186
184
"""Returns a dictionary containing enough metadata to reconstruct a pandas
187
185
DataFrame as an Arrow Table, including index columns.
188
186
@@ -197,9 +195,8 @@ def construct_metadata(df, column_names, index_levels, preserve_index, types):
197
195
-------
198
196
dict
199
197
"""
200
- ncolumns = len (column_names )
201
- df_types = types [:ncolumns - len (index_levels )]
202
- index_types = types [ncolumns - len (index_levels ):]
198
+ df_types = types [:- len (index_levels )]
199
+ index_types = types [- len (index_levels ):]
203
200
204
201
column_metadata = [
205
202
get_column_metadata (
@@ -213,9 +210,6 @@ def construct_metadata(df, column_names, index_levels, preserve_index, types):
213
210
]
214
211
215
212
if preserve_index :
216
- index_column_names = list (map (
217
- index_level_name , range (len (index_levels ))
218
- ))
219
213
index_column_metadata = [
220
214
get_column_metadata (
221
215
level ,
@@ -294,9 +288,29 @@ def _column_name_to_strings(name):
294
288
return str (name )
295
289
296
290
291
+ def _index_level_name (index , i , column_names ):
292
+ """Return the name of an index level or a default name if `index.name` is
293
+ None or is already a column name.
294
+
295
+ Parameters
296
+ ----------
297
+ index : pandas.Index
298
+ i : int
299
+
300
+ Returns
301
+ -------
302
+ name : str
303
+ """
304
+ if index .name is not None and index .name not in column_names :
305
+ return index .name
306
+ else :
307
+ return '__index_level_{:d}__' .format (i )
308
+
309
+
297
310
def dataframe_to_arrays (df , schema , preserve_index , nthreads = 1 ):
298
- names = []
311
+ column_names = []
299
312
index_columns = []
313
+ index_column_names = []
300
314
type = None
301
315
302
316
if preserve_index :
@@ -324,12 +338,13 @@ def dataframe_to_arrays(df, schema, preserve_index, nthreads=1):
324
338
325
339
columns_to_convert .append (col )
326
340
convert_types .append (type )
327
- names .append (name )
341
+ column_names .append (name )
328
342
329
343
for i , column in enumerate (index_columns ):
330
344
columns_to_convert .append (column )
331
345
convert_types .append (None )
332
- names .append (index_level_name (i ))
346
+ name = _index_level_name (column , i , column_names )
347
+ index_column_names .append (name )
333
348
334
349
# NOTE(wesm): If nthreads=None, then we use a heuristic to decide whether
335
350
# using a thread pool is worth it. Currently the heuristic is whether the
@@ -358,8 +373,10 @@ def convert_column(col, ty):
358
373
types = [x .type for x in arrays ]
359
374
360
375
metadata = construct_metadata (
361
- df , names , index_columns , preserve_index , types
376
+ df , column_names , index_columns , index_column_names , preserve_index ,
377
+ types
362
378
)
379
+ names = column_names + index_column_names
363
380
return names , arrays , metadata
364
381
365
382
0 commit comments