Fixed duplicate index

TomAugspurger · TomAugspurger · commit e931248fbe11 · 2020-01-13T08:04:32.000-06:00
xref pandas-dev/pandas#30965
diff --git a/dask/dataframe/io/parquet/arrow.py b/dask/dataframe/io/parquet/arrow.py
@@ -326,7 +326,12 @@ def read_partition(
         fs, piece, columns, index, categories=(), partitions=(), **kwargs
     ):
         if isinstance(index, list):
-            columns += index
+            for level in index:
+                # unclear if we can use set ops here. I think the order matters.
+                # Need the membership test to avoid duplicating index when
+                # we slice with `columns` later on.
+                if level not in columns:
+                    columns.append(index)
         if isinstance(piece, str):
             # `piece` is a file-path string
             piece = pq.ParquetDatasetPiece(
diff --git a/dask/dataframe/io/tests/test_io.py b/dask/dataframe/io/tests/test_io.py
@@ -263,7 +263,7 @@ def test_from_pandas_small():
             ddf = dd.from_pandas(df, npartitions=5, sort=sort)
             assert_eq(df, ddf)
 
-            s = pd.Series([0] * i, name="x")
+            s = pd.Series([0] * i, name="x", dtype=int)
             ds = dd.from_pandas(s, npartitions=5, sort=sort)
             assert_eq(s, ds)