Skip to content

Commit b26b1d2

Browse files
TST (string dtype): resolve xfails in common IO tests (#60320)
1 parent 9bc88c7 commit b26b1d2

File tree

4 files changed

+29
-37
lines changed

4 files changed

+29
-37
lines changed

pandas/tests/io/test_clipboard.py

+6-7
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
import numpy as np
44
import pytest
55

6-
from pandas._config import using_string_dtype
7-
86
from pandas.errors import (
97
PyperclipException,
108
PyperclipWindowsException,
@@ -26,10 +24,6 @@
2624
init_qt_clipboard,
2725
)
2826

29-
pytestmark = pytest.mark.xfail(
30-
using_string_dtype(), reason="TODO(infer_string)", strict=False
31-
)
32-
3327

3428
def build_kwargs(sep, excel):
3529
kwargs = {}
@@ -351,7 +345,7 @@ def test_raw_roundtrip(self, data):
351345

352346
@pytest.mark.parametrize("engine", ["c", "python"])
353347
def test_read_clipboard_dtype_backend(
354-
self, clipboard, string_storage, dtype_backend, engine
348+
self, clipboard, string_storage, dtype_backend, engine, using_infer_string
355349
):
356350
# GH#50502
357351
if dtype_backend == "pyarrow":
@@ -396,6 +390,11 @@ def test_read_clipboard_dtype_backend(
396390
)
397391
expected["g"] = ArrowExtensionArray(pa.array([None, None]))
398392

393+
if using_infer_string:
394+
expected.columns = expected.columns.astype(
395+
pd.StringDtype(string_storage, na_value=np.nan)
396+
)
397+
399398
tm.assert_frame_equal(result, expected)
400399

401400
def test_invalid_dtype_backend(self):

pandas/tests/io/test_common.py

+15-18
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,6 @@ def test_bytesiowrapper_returns_correct_bytes(self):
140140
assert result == data.encode("utf-8")
141141

142142
# Test that pyarrow can handle a file opened with get_handle
143-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
144143
def test_get_handle_pyarrow_compat(self):
145144
pa_csv = pytest.importorskip("pyarrow.csv")
146145

@@ -155,6 +154,8 @@ def test_get_handle_pyarrow_compat(self):
155154
s = StringIO(data)
156155
with icom.get_handle(s, "rb", is_text=False) as handles:
157156
df = pa_csv.read_csv(handles.handle).to_pandas()
157+
# TODO will have to update this when pyarrow' to_pandas() is fixed
158+
expected = expected.astype("object")
158159
tm.assert_frame_equal(df, expected)
159160
assert not s.closed
160161

@@ -338,7 +339,6 @@ def test_read_fspath_all(self, reader, module, path, datapath):
338339
("to_stata", {"time_stamp": pd.to_datetime("2019-01-01 00:00")}, "os"),
339340
],
340341
)
341-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
342342
def test_write_fspath_all(self, writer_name, writer_kwargs, module):
343343
if writer_name in ["to_latex"]: # uses Styler implementation
344344
pytest.importorskip("jinja2")
@@ -365,7 +365,7 @@ def test_write_fspath_all(self, writer_name, writer_kwargs, module):
365365
expected = f_path.read()
366366
assert result == expected
367367

368-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
368+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string) hdf support")
369369
def test_write_fspath_hdf5(self):
370370
# Same test as write_fspath_all, except HDF5 files aren't
371371
# necessarily byte-for-byte identical for a given dataframe, so we'll
@@ -438,14 +438,13 @@ def test_unknown_engine(self):
438438
with tm.ensure_clean() as path:
439439
df = pd.DataFrame(
440440
1.1 * np.arange(120).reshape((30, 4)),
441-
columns=pd.Index(list("ABCD"), dtype=object),
442-
index=pd.Index([f"i-{i}" for i in range(30)], dtype=object),
441+
columns=pd.Index(list("ABCD")),
442+
index=pd.Index([f"i-{i}" for i in range(30)]),
443443
)
444444
df.to_csv(path)
445445
with pytest.raises(ValueError, match="Unknown engine"):
446446
pd.read_csv(path, engine="pyt")
447447

448-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
449448
def test_binary_mode(self):
450449
"""
451450
'encoding' shouldn't be passed to 'open' in binary mode.
@@ -455,8 +454,8 @@ def test_binary_mode(self):
455454
with tm.ensure_clean() as path:
456455
df = pd.DataFrame(
457456
1.1 * np.arange(120).reshape((30, 4)),
458-
columns=pd.Index(list("ABCD"), dtype=object),
459-
index=pd.Index([f"i-{i}" for i in range(30)], dtype=object),
457+
columns=pd.Index(list("ABCD")),
458+
index=pd.Index([f"i-{i}" for i in range(30)]),
460459
)
461460
df.to_csv(path, mode="w+b")
462461
tm.assert_frame_equal(df, pd.read_csv(path, index_col=0))
@@ -473,8 +472,8 @@ def test_warning_missing_utf_bom(self, encoding, compression_):
473472
"""
474473
df = pd.DataFrame(
475474
1.1 * np.arange(120).reshape((30, 4)),
476-
columns=pd.Index(list("ABCD"), dtype=object),
477-
index=pd.Index([f"i-{i}" for i in range(30)], dtype=object),
475+
columns=pd.Index(list("ABCD")),
476+
index=pd.Index([f"i-{i}" for i in range(30)]),
478477
)
479478
with tm.ensure_clean() as path:
480479
with tm.assert_produces_warning(UnicodeWarning, match="byte order mark"):
@@ -504,15 +503,14 @@ def test_is_fsspec_url():
504503
assert icom.is_fsspec_url("RFC-3986+compliant.spec://something")
505504

506505

507-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
508506
@pytest.mark.parametrize("encoding", [None, "utf-8"])
509507
@pytest.mark.parametrize("format", ["csv", "json"])
510508
def test_codecs_encoding(encoding, format):
511509
# GH39247
512510
expected = pd.DataFrame(
513511
1.1 * np.arange(120).reshape((30, 4)),
514-
columns=pd.Index(list("ABCD"), dtype=object),
515-
index=pd.Index([f"i-{i}" for i in range(30)], dtype=object),
512+
columns=pd.Index(list("ABCD")),
513+
index=pd.Index([f"i-{i}" for i in range(30)]),
516514
)
517515
with tm.ensure_clean() as path:
518516
with codecs.open(path, mode="w", encoding=encoding) as handle:
@@ -525,13 +523,12 @@ def test_codecs_encoding(encoding, format):
525523
tm.assert_frame_equal(expected, df)
526524

527525

528-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
529526
def test_codecs_get_writer_reader():
530527
# GH39247
531528
expected = pd.DataFrame(
532529
1.1 * np.arange(120).reshape((30, 4)),
533-
columns=pd.Index(list("ABCD"), dtype=object),
534-
index=pd.Index([f"i-{i}" for i in range(30)], dtype=object),
530+
columns=pd.Index(list("ABCD")),
531+
index=pd.Index([f"i-{i}" for i in range(30)]),
535532
)
536533
with tm.ensure_clean() as path:
537534
with open(path, "wb") as handle:
@@ -556,8 +553,8 @@ def test_explicit_encoding(io_class, mode, msg):
556553
# wrong mode is requested
557554
expected = pd.DataFrame(
558555
1.1 * np.arange(120).reshape((30, 4)),
559-
columns=pd.Index(list("ABCD"), dtype=object),
560-
index=pd.Index([f"i-{i}" for i in range(30)], dtype=object),
556+
columns=pd.Index(list("ABCD")),
557+
index=pd.Index([f"i-{i}" for i in range(30)]),
561558
)
562559
with io_class() as buffer:
563560
with pytest.raises(TypeError, match=msg):

pandas/tests/io/test_compression.py

+6-9
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212
import numpy as np
1313
import pytest
1414

15-
from pandas._config import using_string_dtype
16-
1715
from pandas.compat import is_platform_windows
1816

1917
import pandas as pd
@@ -139,7 +137,6 @@ def test_compression_warning(compression_only):
139137
df.to_csv(handles.handle, compression=compression_only)
140138

141139

142-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
143140
def test_compression_binary(compression_only):
144141
"""
145142
Binary file handles support compression.
@@ -148,8 +145,8 @@ def test_compression_binary(compression_only):
148145
"""
149146
df = pd.DataFrame(
150147
1.1 * np.arange(120).reshape((30, 4)),
151-
columns=pd.Index(list("ABCD"), dtype=object),
152-
index=pd.Index([f"i-{i}" for i in range(30)], dtype=object),
148+
columns=pd.Index(list("ABCD")),
149+
index=pd.Index([f"i-{i}" for i in range(30)]),
153150
)
154151

155152
# with a file
@@ -180,8 +177,8 @@ def test_gzip_reproducibility_file_name():
180177
"""
181178
df = pd.DataFrame(
182179
1.1 * np.arange(120).reshape((30, 4)),
183-
columns=pd.Index(list("ABCD"), dtype=object),
184-
index=pd.Index([f"i-{i}" for i in range(30)], dtype=object),
180+
columns=pd.Index(list("ABCD")),
181+
index=pd.Index([f"i-{i}" for i in range(30)]),
185182
)
186183
compression_options = {"method": "gzip", "mtime": 1}
187184

@@ -203,8 +200,8 @@ def test_gzip_reproducibility_file_object():
203200
"""
204201
df = pd.DataFrame(
205202
1.1 * np.arange(120).reshape((30, 4)),
206-
columns=pd.Index(list("ABCD"), dtype=object),
207-
index=pd.Index([f"i-{i}" for i in range(30)], dtype=object),
203+
columns=pd.Index(list("ABCD")),
204+
index=pd.Index([f"i-{i}" for i in range(30)]),
208205
)
209206
compression_options = {"method": "gzip", "mtime": 1}
210207

pandas/tests/io/test_gcs.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,6 @@ def assert_equal_zip_safe(result: bytes, expected: bytes, compression: str):
158158
assert result == expected
159159

160160

161-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
162161
@pytest.mark.parametrize("encoding", ["utf-8", "cp1251"])
163162
def test_to_csv_compression_encoding_gcs(
164163
gcs_buffer, compression_only, encoding, compression_to_extension
@@ -171,8 +170,8 @@ def test_to_csv_compression_encoding_gcs(
171170
"""
172171
df = DataFrame(
173172
1.1 * np.arange(120).reshape((30, 4)),
174-
columns=Index(list("ABCD"), dtype=object),
175-
index=Index([f"i-{i}" for i in range(30)], dtype=object),
173+
columns=Index(list("ABCD")),
174+
index=Index([f"i-{i}" for i in range(30)]),
176175
)
177176

178177
# reference of compressed and encoded file

0 commit comments

Comments
 (0)