Skip to content

Commit e931248

Browse files
committed
Fixed duplicate index
xref pandas-dev/pandas#30965
1 parent 0a617d7 commit e931248

File tree

2 files changed

+7
-2
lines changed

2 files changed

+7
-2
lines changed

dask/dataframe/io/parquet/arrow.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,12 @@ def read_partition(
326326
fs, piece, columns, index, categories=(), partitions=(), **kwargs
327327
):
328328
if isinstance(index, list):
329-
columns += index
329+
for level in index:
330+
# unclear if we can use set ops here. I think the order matters.
331+
# Need the membership test to avoid duplicating index when
332+
# we slice with `columns` later on.
333+
if level not in columns:
334+
columns.append(index)
330335
if isinstance(piece, str):
331336
# `piece` is a file-path string
332337
piece = pq.ParquetDatasetPiece(

dask/dataframe/io/tests/test_io.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ def test_from_pandas_small():
263263
ddf = dd.from_pandas(df, npartitions=5, sort=sort)
264264
assert_eq(df, ddf)
265265

266-
s = pd.Series([0] * i, name="x")
266+
s = pd.Series([0] * i, name="x", dtype=int)
267267
ds = dd.from_pandas(s, npartitions=5, sort=sort)
268268
assert_eq(s, ds)
269269

0 commit comments

Comments
 (0)