Skip to content

Commit 38cc5a4

Browse files
Debian Science Teamrebecca-palmer
Debian Science Team
authored andcommitted
HDF5 and Stata I/O are broken on some architectures
Fix some issues, warn on use and xfail tests for the remainder armhf TestHDF5Store::test*encoding only sometimes crashes (1.1.3+dfsg-1 passed on build but failed autopkgtest) HDF5 and Stata are known to fail on big-endian architectures Stata also fails on qemu-ppc64el, but not real ppc64el Author: Andreas Tille <[email protected]>, Graham Inggs <[email protected]>, Yaroslav Halchenko <[email protected]>, Rebecca N. Palmer <[email protected]> Bug-Debian: https://bugs.debian.org/877419 Forwarded: no Gbp-Pq: Name xfail_tests_nonintel_io.patch
1 parent 33298fb commit 38cc5a4

10 files changed

+51
-9
lines changed

pandas/_testing/_warnings.py

+3
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
cast,
99
)
1010
import warnings
11+
import platform
1112

1213

1314
@contextmanager
@@ -149,6 +150,8 @@ def _assert_caught_no_extra_warnings(
149150
# FIXME: kludge because pytest.filterwarnings does not
150151
# suppress these, xref GH#38630
151152
continue
153+
if actual_warning.category==UserWarning and "Non-x86 system detected" in str(actual_warning.message) and not bool(re.match('i.?86|x86',platform.uname()[4])):
154+
continue
152155

153156
extra_warnings.append(
154157
(

pandas/io/pytables.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@
2323
cast,
2424
)
2525
import warnings
26+
import platform
27+
import re
28+
from pandas.compat import is_platform_little_endian
29+
warn_hdf_platform = "Non-x86 system detected, HDF(5) format I/O may give wrong results (particularly on files created with older versions) or crash - https://bugs.debian.org/877419" if not bool(re.match('i.?86|x86',platform.uname()[4])) else False
2630

2731
import numpy as np
2832

@@ -565,6 +569,8 @@ def __init__(
565569
fletcher32: bool = False,
566570
**kwargs,
567571
):
572+
if warn_hdf_platform:
573+
warnings.warn(warn_hdf_platform)
568574

569575
if "format" in kwargs:
570576
raise ValueError("format is not a defined argument for HDFStore")
@@ -776,7 +782,10 @@ def flush(self, fsync: bool = False):
776782
self._handle.flush()
777783
if fsync:
778784
with suppress(OSError):
779-
os.fsync(self._handle.fileno())
785+
if is_platform_little_endian():
786+
os.fsync(self._handle.fileno())
787+
else:
788+
os.sync() # due to a pytables bad-cast bug, fileno is invalid on 64-bit big-endian
780789

781790
def get(self, key: str):
782791
"""

pandas/io/stata.py

+5
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@
2525
cast,
2626
)
2727
import warnings
28+
import platform
29+
import re
30+
warn_stata_platform = "Non-x86 system detected, Stata format I/O may give wrong results (particularly on strings) - https://bugs.debian.org/877419" if not bool(re.match('i.?86|x86',platform.uname()[4])) else False
2831

2932
from dateutil.relativedelta import relativedelta
3033
import numpy as np
@@ -896,6 +899,8 @@ def __init__(self):
896899
# NOTE: the byte type seems to be reserved for categorical variables
897900
# with a label, but the underlying variable is -127 to 100
898901
# we're going to drop the label and cast to int
902+
if warn_stata_platform:
903+
warnings.warn(warn_stata_platform)
899904
self.DTYPE_MAP = dict(
900905
list(zip(range(1, 245), [np.dtype("a" + str(i)) for i in range(1, 245)]))
901906
+ [

pandas/tests/io/pytables/test_append.py

+5
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@
2424
ensure_clean_path,
2525
ensure_clean_store,
2626
)
27+
import platform
28+
import re
29+
import sys
30+
is_crashing_arch=bool((platform.uname()[4].startswith('arm') or platform.uname()[4].startswith('aarch')) and sys.maxsize<2**33) # meant for armhf, though this form will also skip on armel - uname = kernel arch
2731

2832
pytestmark = pytest.mark.single
2933

@@ -277,6 +281,7 @@ def test_append_all_nans(setup_path):
277281
tm.assert_frame_equal(store["df2"], df)
278282

279283

284+
@pytest.mark.xfail(condition=is_crashing_arch,reason="https://bugs.debian.org/790925",strict=False,run=False)
280285
def test_append_frame_column_oriented(setup_path):
281286
with ensure_clean_store(setup_path) as store:
282287

pandas/tests/io/pytables/test_file_handling.py

+6
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@
2525
PossibleDataLossError,
2626
Term,
2727
)
28+
import platform
29+
import re
30+
import sys
31+
is_crashing_arch=bool((platform.uname()[4].startswith('arm') or platform.uname()[4].startswith('aarch')) and sys.maxsize<2**33) # meant for armhf, though this form will also skip on armel - uname = kernel arch
2832

2933
pytestmark = pytest.mark.single
3034

@@ -270,6 +274,7 @@ def test_complibs(setup_path):
270274
h5table.close()
271275

272276

277+
@pytest.mark.xfail(condition=is_crashing_arch,reason="https://bugs.debian.org/790925",strict=False,run=False)
273278
@pytest.mark.skipif(
274279
not is_platform_little_endian(), reason="reason platform is not little endian"
275280
)
@@ -303,6 +308,7 @@ def test_encoding(setup_path):
303308
],
304309
)
305310
@pytest.mark.parametrize("dtype", ["category", object])
311+
@pytest.mark.xfail(condition=is_crashing_arch,reason="https://bugs.debian.org/790925",strict=False,run=False)
306312
def test_latin_encoding(setup_path, dtype, val):
307313
enc = "latin-1"
308314
nan_rep = ""

pandas/tests/io/pytables/test_read.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import pytest
66

77
from pandas._libs.tslibs import Timestamp
8-
from pandas.compat import is_platform_windows
8+
from pandas.compat import is_platform_windows, is_platform_little_endian
99

1010
import pandas as pd
1111
from pandas import (
@@ -155,6 +155,7 @@ def test_pytables_native2_read(datapath, setup_path):
155155
assert isinstance(d1, DataFrame)
156156

157157

158+
@pytest.mark.xfail(condition=not is_platform_little_endian(),reason="known failure of hdf on non-little endian",strict=False,raises=AttributeError)
158159
def test_legacy_table_fixed_format_read_py2(datapath, setup_path):
159160
# GH 24510
160161
# legacy table with fixed format written in Python 2
@@ -170,6 +171,7 @@ def test_legacy_table_fixed_format_read_py2(datapath, setup_path):
170171
tm.assert_frame_equal(expected, result)
171172

172173

174+
@pytest.mark.xfail(condition=not is_platform_little_endian(),reason="known failure of hdf on non-little endian",strict=False,raises=AttributeError)
173175
def test_legacy_table_fixed_format_read_datetime_py2(datapath, setup_path):
174176
# GH 31750
175177
# legacy table with fixed format and datetime64 column written in Python 2
@@ -319,6 +321,7 @@ def test_read_hdf_series_mode_r(format, setup_path):
319321
tm.assert_series_equal(result, series)
320322

321323

324+
@pytest.mark.xfail(condition=not is_platform_little_endian(),reason="known failure of hdf on non-little endian",strict=False,raises=AttributeError)
322325
def test_read_py2_hdf_file_in_py3(datapath):
323326
# GH 16781
324327

pandas/tests/io/pytables/test_store.py

+5
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@
3939
HDFStore,
4040
read_hdf,
4141
)
42+
import platform
43+
import re
44+
import sys
45+
is_crashing_arch=bool((platform.uname()[4].startswith('arm') or platform.uname()[4].startswith('aarch')) and sys.maxsize<2**33) # meant for armhf, though this form will also skip on armel - uname = kernel arch
4246

4347
pytestmark = pytest.mark.single
4448

@@ -789,6 +793,7 @@ def test_start_stop_fixed(setup_path):
789793
df.iloc[8:10, -2] = np.nan
790794

791795

796+
@pytest.mark.xfail(condition=is_crashing_arch,reason="https://bugs.debian.org/790925",strict=False,run=False)
792797
def test_select_filter_corner(setup_path):
793798

794799
df = DataFrame(np.random.randn(50, 100))

pandas/tests/io/pytables/test_timezones.py

+3
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
from pandas._libs.tslibs.timezones import maybe_get_tz
1010
import pandas.util._test_decorators as td
11+
from pandas.compat import is_platform_little_endian
1112

1213
import pandas as pd
1314
from pandas import (
@@ -304,6 +305,7 @@ def test_store_timezone(setup_path):
304305
tm.assert_frame_equal(result, df)
305306

306307

308+
@pytest.mark.xfail(condition=not is_platform_little_endian(),reason="known failure of hdf on non-little endian",strict=False,raises=AttributeError)
307309
def test_legacy_datetimetz_object(datapath, setup_path):
308310
# legacy from < 0.17.0
309311
# 8260
@@ -356,6 +358,7 @@ def test_read_with_where_tz_aware_index(setup_path):
356358
tm.assert_frame_equal(result, expected)
357359

358360

361+
@pytest.mark.xfail(condition=not is_platform_little_endian(),reason="known failure of hdf on non-little endian",strict=False,raises=AttributeError)
359362
def test_py2_created_with_datetimez(datapath, setup_path):
360363
# The test HDF5 file was created in Python 2, but could not be read in
361364
# Python 3.

pandas/tests/io/test_common.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
import pytest
1717

18-
from pandas.compat import is_platform_windows
18+
from pandas.compat import is_platform_windows, is_platform_little_endian
1919
import pandas.util._test_decorators as td
2020

2121
import pandas as pd
@@ -244,11 +244,11 @@ def test_read_expands_user_home_dir(
244244
"pyarrow",
245245
("io", "data", "feather", "feather-0_3_1.feather"),
246246
),
247-
(
247+
pytest.param(
248248
pd.read_hdf,
249249
"tables",
250250
("io", "data", "legacy_hdf", "datetimetz_object.h5"),
251-
),
251+
marks=pytest.mark.xfail(condition=not is_platform_little_endian(),reason="known failure of hdf on non-little endian",strict=False,raises=AttributeError)),
252252
(pd.read_stata, "os", ("io", "data", "stata", "stata10_115.dta")),
253253
(pd.read_sas, "os", ("io", "sas", "data", "test1.sas7bdat")),
254254
(pd.read_json, "os", ("io", "json", "data", "tsframe_v012.json")),

pandas/tests/io/test_stata.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@
3333
read_stata,
3434
)
3535

36+
from pandas.compat import is_platform_little_endian
37+
pytestmark = pytest.mark.xfail(condition=not is_platform_little_endian(),reason="known failure of test_stata on non-little endian",strict=False)
3638

3739
@pytest.fixture()
3840
def mixed_frame():
@@ -207,7 +209,7 @@ def test_read_dta2(self):
207209
# parsed_113 = self.read_dta(self.dta2_113)
208210

209211
# Remove resource warnings
210-
w = [x for x in w if x.category is UserWarning]
212+
w = [x for x in w if x.category is UserWarning and not "Non-x86 system detected" in str(x.message)]
211213

212214
# should get warning for each call to read_dta
213215
assert len(w) == 3
@@ -469,7 +471,7 @@ def test_read_write_dta12(self, version):
469471
warnings.simplefilter("always", InvalidColumnName)
470472
original.to_stata(path, None, version=version)
471473
# should get a warning for that format.
472-
assert len(w) == 1
474+
assert len([x for x in w if not "Non-x86 system detected" in str(x.message)]) == 1
473475

474476
written_and_read_again = self.read_dta(path)
475477
tm.assert_frame_equal(written_and_read_again.set_index("index"), formatted)
@@ -1793,8 +1795,9 @@ def test_encoding_latin1_118(self):
17931795
the string values returned are correct."""
17941796
with tm.assert_produces_warning(UnicodeWarning) as w:
17951797
encoded = read_stata(self.dta_encoding_118)
1796-
assert len(w) == 151
1797-
assert w[0].message.args[0] == msg
1798+
w2 = [x for x in w if not "Non-x86 system detected" in str(x.message)]
1799+
assert len(w2) == 151
1800+
assert w2[0].message.args[0] == msg
17981801

17991802
expected = DataFrame([["Düsseldorf"]] * 151, columns=["kreis1849"])
18001803
tm.assert_frame_equal(encoded, expected)

0 commit comments

Comments
 (0)