Skip to content

Commit e37ffb3

Browse files
Backport PR #60320 on branch 2.3.x (TST (string dtype): resolve xfails in common IO tests) (#60325)
Backport PR #60320: TST (string dtype): resolve xfails in common IO tests Co-authored-by: Joris Van den Bossche <[email protected]>
1 parent fe1f4f9 commit e37ffb3

File tree

4 files changed

+29
-37
lines changed

4 files changed

+29
-37
lines changed

pandas/tests/io/test_clipboard.py

+6-7
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,6 @@
33
import numpy as np
44
import pytest
55

6-
from pandas._config import using_string_dtype
7-
86
from pandas.errors import (
97
PyperclipException,
108
PyperclipWindowsException,
@@ -26,10 +24,6 @@
2624
init_qt_clipboard,
2725
)
2826

29-
pytestmark = pytest.mark.xfail(
30-
using_string_dtype(), reason="TODO(infer_string)", strict=False
31-
)
32-
3327

3428
def build_kwargs(sep, excel):
3529
kwargs = {}
@@ -351,7 +345,7 @@ def test_raw_roundtrip(self, data):
351345

352346
@pytest.mark.parametrize("engine", ["c", "python"])
353347
def test_read_clipboard_dtype_backend(
354-
self, clipboard, string_storage, dtype_backend, engine
348+
self, clipboard, string_storage, dtype_backend, engine, using_infer_string
355349
):
356350
# GH#50502
357351
if dtype_backend == "pyarrow":
@@ -396,6 +390,11 @@ def test_read_clipboard_dtype_backend(
396390
)
397391
expected["g"] = ArrowExtensionArray(pa.array([None, None]))
398392

393+
if using_infer_string:
394+
expected.columns = expected.columns.astype(
395+
pd.StringDtype(string_storage, na_value=np.nan)
396+
)
397+
399398
tm.assert_frame_equal(result, expected)
400399

401400
def test_invalid_dtype_backend(self):

pandas/tests/io/test_common.py

+15-18
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,6 @@ def test_bytesiowrapper_returns_correct_bytes(self):
154154
assert result == data.encode("utf-8")
155155

156156
# Test that pyarrow can handle a file opened with get_handle
157-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
158157
def test_get_handle_pyarrow_compat(self):
159158
pa_csv = pytest.importorskip("pyarrow.csv")
160159

@@ -169,6 +168,8 @@ def test_get_handle_pyarrow_compat(self):
169168
s = StringIO(data)
170169
with icom.get_handle(s, "rb", is_text=False) as handles:
171170
df = pa_csv.read_csv(handles.handle).to_pandas()
171+
# TODO will have to update this when pyarrow' to_pandas() is fixed
172+
expected = expected.astype("object")
172173
tm.assert_frame_equal(df, expected)
173174
assert not s.closed
174175

@@ -352,7 +353,6 @@ def test_read_fspath_all(self, reader, module, path, datapath):
352353
("to_stata", {"time_stamp": pd.to_datetime("2019-01-01 00:00")}, "os"),
353354
],
354355
)
355-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
356356
def test_write_fspath_all(self, writer_name, writer_kwargs, module):
357357
if writer_name in ["to_latex"]: # uses Styler implementation
358358
pytest.importorskip("jinja2")
@@ -379,7 +379,7 @@ def test_write_fspath_all(self, writer_name, writer_kwargs, module):
379379
expected = f_path.read()
380380
assert result == expected
381381

382-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
382+
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string) hdf support")
383383
def test_write_fspath_hdf5(self):
384384
# Same test as write_fspath_all, except HDF5 files aren't
385385
# necessarily byte-for-byte identical for a given dataframe, so we'll
@@ -450,14 +450,13 @@ def test_unknown_engine(self):
450450
with tm.ensure_clean() as path:
451451
df = pd.DataFrame(
452452
1.1 * np.arange(120).reshape((30, 4)),
453-
columns=pd.Index(list("ABCD"), dtype=object),
454-
index=pd.Index([f"i-{i}" for i in range(30)], dtype=object),
453+
columns=pd.Index(list("ABCD")),
454+
index=pd.Index([f"i-{i}" for i in range(30)]),
455455
)
456456
df.to_csv(path)
457457
with pytest.raises(ValueError, match="Unknown engine"):
458458
pd.read_csv(path, engine="pyt")
459459

460-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
461460
def test_binary_mode(self):
462461
"""
463462
'encoding' shouldn't be passed to 'open' in binary mode.
@@ -467,8 +466,8 @@ def test_binary_mode(self):
467466
with tm.ensure_clean() as path:
468467
df = pd.DataFrame(
469468
1.1 * np.arange(120).reshape((30, 4)),
470-
columns=pd.Index(list("ABCD"), dtype=object),
471-
index=pd.Index([f"i-{i}" for i in range(30)], dtype=object),
469+
columns=pd.Index(list("ABCD")),
470+
index=pd.Index([f"i-{i}" for i in range(30)]),
472471
)
473472
df.to_csv(path, mode="w+b")
474473
tm.assert_frame_equal(df, pd.read_csv(path, index_col=0))
@@ -485,8 +484,8 @@ def test_warning_missing_utf_bom(self, encoding, compression_):
485484
"""
486485
df = pd.DataFrame(
487486
1.1 * np.arange(120).reshape((30, 4)),
488-
columns=pd.Index(list("ABCD"), dtype=object),
489-
index=pd.Index([f"i-{i}" for i in range(30)], dtype=object),
487+
columns=pd.Index(list("ABCD")),
488+
index=pd.Index([f"i-{i}" for i in range(30)]),
490489
)
491490
with tm.ensure_clean() as path:
492491
with tm.assert_produces_warning(UnicodeWarning):
@@ -516,15 +515,14 @@ def test_is_fsspec_url():
516515
assert icom.is_fsspec_url("RFC-3986+compliant.spec://something")
517516

518517

519-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
520518
@pytest.mark.parametrize("encoding", [None, "utf-8"])
521519
@pytest.mark.parametrize("format", ["csv", "json"])
522520
def test_codecs_encoding(encoding, format):
523521
# GH39247
524522
expected = pd.DataFrame(
525523
1.1 * np.arange(120).reshape((30, 4)),
526-
columns=pd.Index(list("ABCD"), dtype=object),
527-
index=pd.Index([f"i-{i}" for i in range(30)], dtype=object),
524+
columns=pd.Index(list("ABCD")),
525+
index=pd.Index([f"i-{i}" for i in range(30)]),
528526
)
529527
with tm.ensure_clean() as path:
530528
with codecs.open(path, mode="w", encoding=encoding) as handle:
@@ -537,13 +535,12 @@ def test_codecs_encoding(encoding, format):
537535
tm.assert_frame_equal(expected, df)
538536

539537

540-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
541538
def test_codecs_get_writer_reader():
542539
# GH39247
543540
expected = pd.DataFrame(
544541
1.1 * np.arange(120).reshape((30, 4)),
545-
columns=pd.Index(list("ABCD"), dtype=object),
546-
index=pd.Index([f"i-{i}" for i in range(30)], dtype=object),
542+
columns=pd.Index(list("ABCD")),
543+
index=pd.Index([f"i-{i}" for i in range(30)]),
547544
)
548545
with tm.ensure_clean() as path:
549546
with open(path, "wb") as handle:
@@ -568,8 +565,8 @@ def test_explicit_encoding(io_class, mode, msg):
568565
# wrong mode is requested
569566
expected = pd.DataFrame(
570567
1.1 * np.arange(120).reshape((30, 4)),
571-
columns=pd.Index(list("ABCD"), dtype=object),
572-
index=pd.Index([f"i-{i}" for i in range(30)], dtype=object),
568+
columns=pd.Index(list("ABCD")),
569+
index=pd.Index([f"i-{i}" for i in range(30)]),
573570
)
574571
with io_class() as buffer:
575572
with pytest.raises(TypeError, match=msg):

pandas/tests/io/test_compression.py

+6-9
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,6 @@
1212
import numpy as np
1313
import pytest
1414

15-
from pandas._config import using_string_dtype
16-
1715
from pandas.compat import is_platform_windows
1816

1917
import pandas as pd
@@ -139,7 +137,6 @@ def test_compression_warning(compression_only):
139137
df.to_csv(handles.handle, compression=compression_only)
140138

141139

142-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
143140
def test_compression_binary(compression_only):
144141
"""
145142
Binary file handles support compression.
@@ -148,8 +145,8 @@ def test_compression_binary(compression_only):
148145
"""
149146
df = pd.DataFrame(
150147
1.1 * np.arange(120).reshape((30, 4)),
151-
columns=pd.Index(list("ABCD"), dtype=object),
152-
index=pd.Index([f"i-{i}" for i in range(30)], dtype=object),
148+
columns=pd.Index(list("ABCD")),
149+
index=pd.Index([f"i-{i}" for i in range(30)]),
153150
)
154151

155152
# with a file
@@ -180,8 +177,8 @@ def test_gzip_reproducibility_file_name():
180177
"""
181178
df = pd.DataFrame(
182179
1.1 * np.arange(120).reshape((30, 4)),
183-
columns=pd.Index(list("ABCD"), dtype=object),
184-
index=pd.Index([f"i-{i}" for i in range(30)], dtype=object),
180+
columns=pd.Index(list("ABCD")),
181+
index=pd.Index([f"i-{i}" for i in range(30)]),
185182
)
186183
compression_options = {"method": "gzip", "mtime": 1}
187184

@@ -203,8 +200,8 @@ def test_gzip_reproducibility_file_object():
203200
"""
204201
df = pd.DataFrame(
205202
1.1 * np.arange(120).reshape((30, 4)),
206-
columns=pd.Index(list("ABCD"), dtype=object),
207-
index=pd.Index([f"i-{i}" for i in range(30)], dtype=object),
203+
columns=pd.Index(list("ABCD")),
204+
index=pd.Index([f"i-{i}" for i in range(30)]),
208205
)
209206
compression_options = {"method": "gzip", "mtime": 1}
210207

pandas/tests/io/test_gcs.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,6 @@ def assert_equal_zip_safe(result: bytes, expected: bytes, compression: str):
147147
assert result == expected
148148

149149

150-
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
151150
@pytest.mark.parametrize("encoding", ["utf-8", "cp1251"])
152151
def test_to_csv_compression_encoding_gcs(
153152
gcs_buffer, compression_only, encoding, compression_to_extension
@@ -160,8 +159,8 @@ def test_to_csv_compression_encoding_gcs(
160159
"""
161160
df = DataFrame(
162161
1.1 * np.arange(120).reshape((30, 4)),
163-
columns=Index(list("ABCD"), dtype=object),
164-
index=Index([f"i-{i}" for i in range(30)], dtype=object),
162+
columns=Index(list("ABCD")),
163+
index=Index([f"i-{i}" for i in range(30)]),
165164
)
166165

167166
# reference of compressed and encoded file

0 commit comments

Comments
 (0)