Skip to content

Commit 0b9a62b

Browse files
Pandas 1.0 compat (dask#5782)
* Use pytest warns * Fixed duplicate index: xref pandas-dev/pandas#30965
1 parent d3c3ed9 commit 0b9a62b

File tree

5 files changed

+11
-7
lines changed

5 files changed

+11
-7
lines changed

dask/dataframe/io/parquet/arrow.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,12 @@ def read_partition(
326326
fs, piece, columns, index, categories=(), partitions=(), **kwargs
327327
):
328328
if isinstance(index, list):
329-
columns += index
329+
for level in index:
330+
# unclear if we can use set ops here. I think the order matters.
331+
# Need the membership test to avoid duplicating index when
332+
# we slice with `columns` later on.
333+
if level not in columns:
334+
columns.append(level)
330335
if isinstance(piece, str):
331336
# `piece` is a file-path string
332337
piece = pq.ParquetDatasetPiece(

dask/dataframe/io/tests/test_csv.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -672,7 +672,7 @@ def test_compression_multiple_files():
672672
f.write(csv_text.encode())
673673
f.close()
674674

675-
with tm.assert_produces_warning(UserWarning):
675+
with pytest.warns(UserWarning):
676676
df = dd.read_csv(os.path.join(tdir, "*.csv.gz"), compression="gzip")
677677

678678
assert len(df.compute()) == (len(csv_text.split("\n")) - 1) * 2

dask/dataframe/io/tests/test_io.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ def test_from_pandas_small():
263263
ddf = dd.from_pandas(df, npartitions=5, sort=sort)
264264
assert_eq(df, ddf)
265265

266-
s = pd.Series([0] * i, name="x")
266+
s = pd.Series([0] * i, name="x", dtype=int)
267267
ds = dd.from_pandas(s, npartitions=5, sort=sort)
268268
assert_eq(s, ds)
269269

dask/dataframe/io/tests/test_parquet.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
import dask
1010
import dask.multiprocessing
1111
import dask.dataframe as dd
12-
from dask.dataframe._compat import tm
1312
from dask.dataframe.utils import assert_eq, PANDAS_VERSION
1413
from dask.dataframe.io.parquet.utils import _parse_pandas_metadata
1514
from dask.dataframe.optimize import optimize_read_parquet_getitem
@@ -908,7 +907,7 @@ def test_empty_partition(tmpdir, engine):
908907

909908
def test_timestamp_index(tmpdir, engine):
910909
fn = str(tmpdir)
911-
df = tm.makeTimeDataFrame()
910+
df = dd._compat.makeTimeDataFrame()
912911
df.index.name = "foo"
913912
ddf = dd.from_pandas(df, npartitions=5)
914913
ddf.to_parquet(fn, engine=engine)

dask/tests/test_base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -617,8 +617,8 @@ def test_compute_dataframe():
617617
ddf1 = ddf.a + 1
618618
ddf2 = ddf.a + ddf.b
619619
out1, out2 = compute(ddf1, ddf2)
620-
pd.util.testing.assert_series_equal(out1, df.a + 1)
621-
pd.util.testing.assert_series_equal(out2, df.a + df.b)
620+
pd.testing.assert_series_equal(out1, df.a + 1)
621+
pd.testing.assert_series_equal(out2, df.a + df.b)
622622

623623

624624
@pytest.mark.skipif("not dd or not da")

0 commit comments

Comments
 (0)