Skip to content

Commit d7509e9

Browse files
Debian Science Teamrebecca-palmer
Debian Science Team
authored andcommitted
HDF5 and Stata I/O are broken on some architectures
Warn on use, and xfail tests HDF5 known to fail on s390x, Stata on s390x and ppc64el (in 1.0.0), but not all architectures have been checked yet In 0.25.3 HDF5 _crashes_ on armhf, so skip (pytest-forked allows continuing past a crash, but still seems to fail on xfailed crashes) Author: Andreas Tille <[email protected]>, Graham Inggs <[email protected]>, Yaroslav Halchenko <[email protected]>, Rebecca N. Palmer <[email protected]> Bug-Debian: https://bugs.debian.org/877419 Gbp-Pq: Name xfail_tests_nonintel_io.patch
1 parent 2473ed6 commit d7509e9

File tree

8 files changed

+46
-6
lines changed

8 files changed

+46
-6
lines changed

pandas/io/clipboards.py

+6
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
""" io on the clipboard """
22
from io import StringIO
33
import warnings
4+
from pandas.compat import is_platform_little_endian
5+
warn_clipboard_platform="Non-x86 system detected, clipboard I/O may give wrong results - https://bugs.debian.org/877419" if not is_platform_little_endian() else False
46

57
from pandas.core.dtypes.generic import ABCDataFrame
68

@@ -22,6 +24,8 @@ def read_clipboard(sep=r"\s+", **kwargs): # pragma: no cover
2224
-------
2325
parsed : DataFrame
2426
"""
27+
if warn_clipboard_platform:
28+
warnings.warn(warn_clipboard_platform)
2529
encoding = kwargs.pop("encoding", "utf-8")
2630

2731
# only utf-8 is valid for passed value because that's what clipboard
@@ -96,6 +100,8 @@ def to_clipboard(obj, excel=True, sep=None, **kwargs): # pragma: no cover
96100
- Windows:
97101
- OS X:
98102
"""
103+
if warn_clipboard_platform:
104+
warnings.warn(warn_clipboard_platform)
99105
encoding = kwargs.pop("encoding", "utf-8")
100106

101107
# testing if an invalid encoding is passed to clipboard

pandas/io/pytables.py

+5
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@
1111
import time
1212
from typing import List, Optional, Type, Union
1313
import warnings
14+
import platform
15+
import re
16+
warn_hdf_platform = "Non-x86 system detected, HDF(5) format I/O may give wrong results - https://bugs.debian.org/877419" if not bool(re.match('i.?86|x86',platform.uname()[4])) else False
1417

1518
import numpy as np
1619

@@ -477,6 +480,8 @@ class HDFStore:
477480
def __init__(
478481
self, path, mode=None, complevel=None, complib=None, fletcher32=False, **kwargs
479482
):
483+
if warn_hdf_platform:
484+
warnings.warn(warn_hdf_platform)
480485

481486
if "format" in kwargs:
482487
raise ValueError("format is not a defined argument for HDFStore")

pandas/io/stata.py

+5
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717
import struct
1818
import sys
1919
import warnings
20+
import platform
21+
import re
22+
warn_stata_platform = "Non-x86 system detected, Stata format I/O may give wrong results - https://bugs.debian.org/877419" if not bool(re.match('i.?86|x86',platform.uname()[4])) else False
2023

2124
from dateutil.relativedelta import relativedelta
2225
import numpy as np
@@ -911,6 +914,8 @@ def __init__(self):
911914
# NOTE: the byte type seems to be reserved for categorical variables
912915
# with a label, but the underlying variable is -127 to 100
913916
# we're going to drop the label and cast to int
917+
if warn_stata_platform:
918+
warnings.warn(warn_stata_platform)
914919
self.DTYPE_MAP = dict(
915920
list(zip(range(1, 245), ["a" + str(i) for i in range(1, 245)]))
916921
+ [

pandas/tests/io/pytables/test_pytables.py

+9
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,11 @@
4747
read_hdf,
4848
)
4949
from pandas.io.pytables import TableIterator # noqa:E402
50+
import platform
51+
import re
52+
is_intel=bool(re.match('i.?86|x86',platform.uname()[4]))
53+
from pandas.compat import is_platform_little_endian
54+
pytestmark = [pytest.mark.xfail(condition=not is_intel,reason="known failure of hdf on some non-x86",strict=False),pytest.mark.forked]
5055

5156
tables = pytest.importorskip("tables")
5257

@@ -1097,6 +1102,7 @@ def check(format, index):
10971102
check("table", index)
10981103
check("fixed", index)
10991104

1105+
@pytest.mark.skipif(condition=not is_intel,reason="crashes on armhf, https://bugs.debian.org/877419")
11001106
@pytest.mark.skipif(
11011107
not is_platform_little_endian(), reason="reason platform is not little endian"
11021108
)
@@ -1129,6 +1135,7 @@ def test_encoding(self):
11291135
],
11301136
)
11311137
@pytest.mark.parametrize("dtype", ["category", object])
1138+
@pytest.mark.skipif(condition=not is_intel,reason="similar to tests crashing on armhf, https://bugs.debian.org/877419")
11321139
def test_latin_encoding(self, dtype, val):
11331140
enc = "latin-1"
11341141
nan_rep = ""
@@ -1308,6 +1315,7 @@ def test_read_missing_key_close_store(self):
13081315
# read with KeyError before another write
13091316
df.to_hdf(path, "k2")
13101317

1318+
@pytest.mark.skipif(condition=not is_intel,reason="crashes on armhf, https://bugs.debian.org/877419")
13111319
def test_append_frame_column_oriented(self):
13121320

13131321
with ensure_clean_store(self.path) as store:
@@ -3935,6 +3943,7 @@ def test_start_stop_fixed(self):
39353943
with pytest.raises(NotImplementedError):
39363944
store.select("dfs", start=0, stop=5)
39373945

3946+
@pytest.mark.skipif(condition=not is_intel,reason="crashes on armhf, https://bugs.debian.org/877419")
39383947
def test_select_filter_corner(self):
39393948

39403949
df = DataFrame(np.random.randn(50, 100))

pandas/tests/io/test_clipboard.py

+2
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from pandas import DataFrame, get_option, read_clipboard
99
from pandas.util import testing as tm
1010
from pandas.util.testing import makeCustomDataframe as mkdf
11+
from pandas.compat import is_platform_little_endian
1112

1213
from pandas.io.clipboard import clipboard_get, clipboard_set
1314
from pandas.io.clipboard.exceptions import PyperclipException
@@ -258,6 +259,7 @@ def test_round_trip_valid_encodings(self, enc, df):
258259

259260
@pytest.mark.single
260261
@pytest.mark.clipboard
262+
@pytest.mark.xfail(condition=not is_platform_little_endian(),reason="https://bugs.debian.org/877419",strict=False)
261263
@pytest.mark.skipif(not _DEPS_INSTALLED, reason="clipboard primitives not installed")
262264
@pytest.mark.parametrize("data", ["\U0001f44d...", "Ωœ∑´...", "abcd..."])
263265
def test_raw_roundtrip(data):

pandas/tests/io/test_common.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import pytest
99

1010
from pandas.compat import is_platform_windows
11+
from pandas.compat import is_platform_little_endian
1112
import pandas.util._test_decorators as td
1213

1314
import pandas as pd
@@ -213,10 +214,10 @@ def test_read_expands_user_home_dir(
213214
(pd.read_fwf, "os", ("io", "data", "fixed_width_format.txt")),
214215
(pd.read_excel, "xlrd", ("io", "data", "test1.xlsx")),
215216
(pd.read_feather, "feather", ("io", "data", "feather-0_3_1.feather")),
216-
(
217+
pytest.param(
217218
pd.read_hdf,
218219
"tables",
219-
("io", "data", "legacy_hdf", "datetimetz_object.h5"),
220+
("io", "data", "legacy_hdf", "datetimetz_object.h5"),marks=pytest.mark.xfail(condition=not is_platform_little_endian(),reason="https://bugs.debian.org/877419",strict=False)
220221
),
221222
(pd.read_stata, "os", ("io", "data", "stata10_115.dta")),
222223
(pd.read_sas, "os", ("io", "sas", "data", "test1.sas7bdat")),

pandas/tests/io/test_stata.py

+13-4
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,12 @@
2424
StataReader,
2525
read_stata,
2626
)
27+
import platform
28+
import re
29+
is_intel=bool(re.match('i.?86|x86',platform.uname()[4]))
2730

31+
from pandas.compat import is_platform_little_endian
32+
pytestmark = pytest.mark.xfail(condition=not is_platform_little_endian(),reason="known failure of test_stata on non-little endian",strict=False)
2833

2934
@pytest.fixture
3035
def dirpath(datapath):
@@ -196,7 +201,7 @@ def test_read_dta2(self):
196201
# parsed_113 = self.read_dta(self.dta2_113)
197202

198203
# Remove resource warnings
199-
w = [x for x in w if x.category is UserWarning]
204+
w = [x for x in w if x.category is UserWarning and not "Non-x86 system detected" in str(x.message)]
200205

201206
# should get warning for each call to read_dta
202207
assert len(w) == 3
@@ -453,7 +458,7 @@ def test_read_write_dta12(self, version):
453458
warnings.simplefilter("always", InvalidColumnName)
454459
original.to_stata(path, None, version=version)
455460
# should get a warning for that format.
456-
assert len(w) == 1
461+
assert len([x for x in w if not "Non-x86 system detected" in str(x.message)]) == 1
457462

458463
written_and_read_again = self.read_dta(path)
459464
tm.assert_frame_equal(written_and_read_again.set_index("index"), formatted)
@@ -496,6 +501,7 @@ def test_read_write_reread_dta14(self, file, parsed_114, version):
496501
written_and_read_again = self.read_dta(path)
497502
tm.assert_frame_equal(written_and_read_again.set_index("index"), parsed_114)
498503

504+
@pytest.mark.xfail(condition=not is_intel,reason="https://bugs.debian.org/877419",strict=False)
499505
@pytest.mark.parametrize(
500506
"file", ["dta15_113", "dta15_114", "dta15_115", "dta15_117"]
501507
)
@@ -1264,6 +1270,7 @@ def test_write_variable_labels(self, version):
12641270
read_labels = sr.variable_labels()
12651271
assert read_labels == variable_labels
12661272

1273+
@pytest.mark.xfail(condition=not is_intel,reason="https://bugs.debian.org/877419",strict=False)
12671274
@pytest.mark.parametrize("version", [114, 117])
12681275
def test_invalid_variable_labels(self, version):
12691276
original = pd.DataFrame(
@@ -1330,6 +1337,7 @@ def test_write_variable_label_errors(self):
13301337
with tm.ensure_clean() as path:
13311338
original.to_stata(path, variable_labels=variable_labels_long)
13321339

1340+
@pytest.mark.xfail(condition=not is_intel,reason="https://bugs.debian.org/877419",strict=False)
13331341
def test_default_date_conversion(self):
13341342
# GH 12259
13351343
dates = [
@@ -1775,8 +1783,9 @@ def test_encoding_latin1_118(self):
17751783
the string values returned are correct."""
17761784
with tm.assert_produces_warning(UnicodeWarning) as w:
17771785
encoded = read_stata(self.dta_encoding_118)
1778-
assert len(w) == 151
1779-
assert w[0].message.args[0] == msg
1786+
w2 = [x for x in w if not "Non-x86 system detected" in str(x.message)]
1787+
assert len(w2) == 151
1788+
assert w2[0].message.args[0] == msg
17801789

17811790
expected = pd.DataFrame([["Düsseldorf"]] * 151, columns=["kreis1849"])
17821791
tm.assert_frame_equal(encoded, expected)

pandas/util/testing.py

+3
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import http.client
88
import os
99
import re
10+
import platform
1011
from shutil import rmtree
1112
import string
1213
import tempfile
@@ -2692,6 +2693,8 @@ class for all warnings. To check that no warning is returned,
26922693
)
26932694
assert actual_warning.filename == caller.filename, msg
26942695
else:
2696+
if actual_warning.category==UserWarning and "Non-x86 system detected" in str(actual_warning.message) and not bool(re.match('i.?86|x86',platform.uname()[4])):
2697+
continue
26952698
extra_warnings.append(
26962699
(
26972700
actual_warning.category.__name__,

0 commit comments

Comments
 (0)