Skip to content

Commit 0dc9db8

Browse files
authored
Avoid pandas 2.2 DeprecationWarning in test_hdf (#15044)
The `DeprecationWarning` was from integer data potentially being downcast (e.g. large ints to int8) Additionally did some cleanup in this file: * Used `pytest.importorskip` * Removed testing unsigned ints as they were raising a `NotImplementedError` in tables * Only tested 1 `datetime64` type as the column naming format would conflict with how resolutions were dropped * Made testing data deterministic Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) URL: #15044
1 parent 093fe6a commit 0dc9db8

File tree

1 file changed

+15
-24
lines changed

1 file changed

+15
-24
lines changed

python/cudf/cudf/tests/test_hdf.py

Lines changed: 15 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -8,43 +8,35 @@
88
import pytest
99

1010
import cudf
11-
from cudf.testing._utils import DATETIME_TYPES, NUMERIC_TYPES, assert_eq
12-
13-
try:
14-
import tables # noqa F401
15-
except ImportError:
16-
pytest.skip(
17-
"PyTables is not installed and is required for HDF reading/writing",
18-
allow_module_level=True,
19-
)
11+
from cudf.testing._utils import NUMERIC_TYPES, UNSIGNED_TYPES, assert_eq
12+
13+
pytest.importorskip("tables")
2014

2115

2216
@pytest.fixture(params=[0, 1, 10, 100])
2317
def pdf(request):
24-
types = NUMERIC_TYPES + DATETIME_TYPES + ["bool"]
18+
types = set(NUMERIC_TYPES + ["datetime64[ns]"] + ["bool"]) - set(
19+
UNSIGNED_TYPES
20+
)
2521
typer = {"col_" + val: val for val in types}
2622
ncols = len(types)
2723
nrows = request.param
2824

25+
rng = np.random.default_rng(1)
2926
# Create a pandas dataframe with random data of mixed types
3027
test_pdf = pd.DataFrame(
31-
[list(range(ncols * i, ncols * (i + 1))) for i in range(nrows)],
32-
columns=pd.Index([f"col_{typ}" for typ in types], name="foo"),
28+
rng.integers(0, 50, size=(nrows, ncols)),
29+
columns=pd.Index([f"col_{typ}" for typ in types]),
30+
index=pd.RangeIndex(nrows, name="test_index"),
3331
)
34-
# Delete the name of the column index, and rename the row index
35-
test_pdf.columns.name = None
36-
test_pdf.index.name = "test_index"
37-
3832
# Cast all the column dtypes to objects, rename them, and then cast to
3933
# appropriate types
40-
test_pdf = (
41-
test_pdf.astype("object")
42-
.astype(typer)
43-
.rename({"col_datetime64[ms]": "col_datetime64"}, axis=1)
34+
test_pdf = test_pdf.astype(typer).rename(
35+
{"col_datetime64[ns]": "col_datetime64"}, axis=1
4436
)
4537

4638
# Create non-numeric categorical data otherwise may be typecasted
47-
data = [ascii_letters[np.random.randint(0, 52)] for i in range(nrows)]
39+
data = rng.choice(list(ascii_letters), size=nrows)
4840
test_pdf["col_category"] = pd.Series(data, dtype="category")
4941

5042
return (test_pdf, nrows)
@@ -107,6 +99,8 @@ def test_hdf_reader(hdf_files, columns):
10799
@pytest.mark.filterwarnings("ignore:Using CPU")
108100
def test_hdf_writer(tmpdir, pdf, gdf, complib, format):
109101
pdf, nrows = pdf
102+
if format == "table" and nrows == 0:
103+
pytest.skip("Can't read 0 row table with format 'table'")
110104
gdf, _ = gdf
111105

112106
if format == "fixed":
@@ -122,9 +116,6 @@ def test_hdf_writer(tmpdir, pdf, gdf, complib, format):
122116
assert os.path.exists(pdf_df_fname)
123117
assert os.path.exists(gdf_df_fname)
124118

125-
if format == "table" and nrows == 0:
126-
pytest.skip("Can't read 0 row table with format 'table'")
127-
128119
expect = pd.read_hdf(pdf_df_fname)
129120
got = pd.read_hdf(gdf_df_fname)
130121

0 commit comments

Comments
 (0)