Skip to content

Commit ea51da3

Browse files
Debian Science Teamrebecca-palmer
Debian Science Team
authored andcommitted
HDF5 and Stata I/O are broken on some architectures
Fix some issues, warn on use and xfail tests for the remainder Everything that has a run=False xfail in here should also be in the run-and-ignore set in debian/tests/numbatests armhf TestHDF5Store::test*encoding only sometimes crashes (1.1.3+dfsg-1 passed on build but failed autopkgtest) HDF5 and Stata are known to fail on big-endian architectures Stata was previously seen to fail on qemu-ppc64el, but not real ppc64el Author: Andreas Tille <[email protected]>, Graham Inggs <[email protected]>, Yaroslav Halchenko <[email protected]>, Rebecca N. Palmer <[email protected]> Bug-Debian: https://bugs.debian.org/877419 Bug: partly pandas-dev/pandas#54396 Forwarded: no Gbp-Pq: Name xfail_tests_nonintel_io.patch
1 parent e6ed276 commit ea51da3

11 files changed

+46
-5
lines changed

pandas/_testing/_warnings.py

+3
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
cast,
1313
)
1414
import warnings
15+
import platform
1516

1617
from pandas.compat import PY311
1718

@@ -186,6 +187,8 @@ def _assert_caught_no_extra_warnings(
186187
# pyproject.toml errors on EncodingWarnings in pandas
187188
# Ignore EncodingWarnings from other libraries
188189
continue
190+
if (actual_warning.category==UserWarning and "Non-x86 system detected" in str(actual_warning.message) and not bool(re.match('i.?86|x86',platform.uname()[4]))) or (actual_warning.category==RuntimeWarning and "invalid value encountered" in str(actual_warning.message) and 'mips' in platform.uname()[4]):
191+
continue
189192
extra_warnings.append(
190193
(
191194
actual_warning.category.__name__,

pandas/io/pytables.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@
2424
overload,
2525
)
2626
import warnings
27+
import platform
28+
import sys
29+
from pandas.compat import is_platform_little_endian
30+
warn_hdf_platform = "Non-x86 system detected, HDF(5) format I/O may give wrong results (particularly on files created with older versions) or crash - https://bugs.debian.org/877419" if (((platform.uname()[4].startswith('arm') or platform.uname()[4].startswith('aarch')) and sys.maxsize<2**33) or not is_platform_little_endian()) else False
2731

2832
import numpy as np
2933

@@ -559,6 +563,8 @@ def __init__(
559563
fletcher32: bool = False,
560564
**kwargs,
561565
) -> None:
566+
if warn_hdf_platform:
567+
warnings.warn(warn_hdf_platform)
562568
if "format" in kwargs:
563569
raise ValueError("format is not a defined argument for HDFStore")
564570

@@ -780,7 +786,10 @@ def flush(self, fsync: bool = False) -> None:
780786
self._handle.flush()
781787
if fsync:
782788
with suppress(OSError):
783-
os.fsync(self._handle.fileno())
789+
if is_platform_little_endian():
790+
os.fsync(self._handle.fileno())
791+
else:
792+
os.sync() # due to a pytables bad-cast bug, fileno is invalid on 64-bit big-endian#
784793

785794
def get(self, key: str):
786795
"""

pandas/io/stata.py

+4
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
cast,
3131
)
3232
import warnings
33+
from pandas.compat import is_platform_little_endian
34+
warn_stata_platform = "Non-x86 system detected, Stata format I/O may give wrong results (particularly on strings) - https://bugs.debian.org/877419" if not is_platform_little_endian() else False
3335

3436
import numpy as np
3537

@@ -976,6 +978,8 @@ def __init__(self) -> None:
976978
# NOTE: the byte type seems to be reserved for categorical variables
977979
# with a label, but the underlying variable is -127 to 100
978980
# we're going to drop the label and cast to int
981+
if warn_stata_platform:
982+
warnings.warn(warn_stata_platform)
979983
self.DTYPE_MAP = dict(
980984
[(i, np.dtype(f"S{i}")) for i in range(1, 245)]
981985
+ [

pandas/tests/io/pytables/test_append.py

+5
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@
2121
_maybe_remove,
2222
ensure_clean_store,
2323
)
24+
import platform
25+
import re
26+
import sys
27+
is_crashing_arch=bool((platform.uname()[4].startswith('arm') or platform.uname()[4].startswith('aarch')) and sys.maxsize<2**33) # meant for armhf, though this form will also skip on armel - uname = kernel arch
2428

2529
pytestmark = pytest.mark.single_cpu
2630

@@ -275,6 +279,7 @@ def test_append_all_nans(setup_path):
275279
tm.assert_frame_equal(store["df2"], df, check_index_type=True)
276280

277281

282+
@pytest.mark.xfail(condition=is_crashing_arch,reason="https://bugs.debian.org/790925",strict=False,run=False)
278283
def test_append_frame_column_oriented(setup_path):
279284
with ensure_clean_store(setup_path) as store:
280285
# column oriented

pandas/tests/io/pytables/test_file_handling.py

+6
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,10 @@
2626
ensure_clean_store,
2727
tables,
2828
)
29+
import platform
30+
import re
31+
import sys
32+
is_crashing_arch=bool((platform.uname()[4].startswith('arm') or platform.uname()[4].startswith('aarch')) and sys.maxsize<2**33) # meant for armhf, though this form will also skip on armel - uname = kernel arch
2933

3034
from pandas.io import pytables
3135
from pandas.io.pytables import Term
@@ -267,6 +271,7 @@ def test_complibs(tmp_path, lvl, lib):
267271
assert node.filters.complib == lib
268272

269273

274+
@pytest.mark.xfail(condition=is_crashing_arch,reason="https://bugs.debian.org/790925",strict=False,run=False)
270275
@pytest.mark.skipif(
271276
not is_platform_little_endian(), reason="reason platform is not little endian"
272277
)
@@ -299,6 +304,7 @@ def test_encoding(setup_path):
299304
],
300305
)
301306
@pytest.mark.parametrize("dtype", ["category", object])
307+
@pytest.mark.xfail(condition=is_crashing_arch,reason="https://bugs.debian.org/790925",strict=False,run=False)
302308
def test_latin_encoding(tmp_path, setup_path, dtype, val):
303309
enc = "latin-1"
304310
nan_rep = ""

pandas/tests/io/pytables/test_read.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import pytest
77

88
from pandas._libs.tslibs import Timestamp
9-
from pandas.compat import is_platform_windows
9+
from pandas.compat import is_platform_windows, is_platform_little_endian
1010

1111
import pandas as pd
1212
from pandas import (
@@ -167,6 +167,7 @@ def test_pytables_native2_read(datapath):
167167
assert isinstance(d1, DataFrame)
168168

169169

170+
@pytest.mark.xfail(condition=not is_platform_little_endian(),reason="known failure of hdf on non-little endian",strict=False,raises=AttributeError)
170171
def test_legacy_table_fixed_format_read_py2(datapath):
171172
# GH 24510
172173
# legacy table with fixed format written in Python 2
@@ -182,6 +183,7 @@ def test_legacy_table_fixed_format_read_py2(datapath):
182183
tm.assert_frame_equal(expected, result)
183184

184185

186+
@pytest.mark.xfail(condition=not is_platform_little_endian(),reason="known failure of hdf on non-little endian",strict=False,raises=AttributeError)
185187
def test_legacy_table_fixed_format_read_datetime_py2(datapath):
186188
# GH 31750
187189
# legacy table with fixed format and datetime64 column written in Python 2
@@ -364,6 +366,7 @@ def test_read_hdf_series_mode_r(tmp_path, format, setup_path):
364366

365367
@pytest.mark.filterwarnings(r"ignore:Period with BDay freq is deprecated:FutureWarning")
366368
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
369+
@pytest.mark.xfail(condition=not is_platform_little_endian(),reason="known failure of hdf on non-little endian",strict=False,raises=AttributeError)
367370
def test_read_py2_hdf_file_in_py3(datapath):
368371
# GH 16781
369372

pandas/tests/io/pytables/test_store.py

+5
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,10 @@
2929
HDFStore,
3030
read_hdf,
3131
)
32+
import platform
33+
import re
34+
import sys
35+
is_crashing_arch=bool((platform.uname()[4].startswith('arm') or platform.uname()[4].startswith('aarch')) and sys.maxsize<2**33) # meant for armhf, though this form will also skip on armel - uname = kernel arch
3236

3337
pytestmark = pytest.mark.single_cpu
3438

@@ -759,6 +763,7 @@ def test_start_stop_fixed(setup_path):
759763
df.iloc[8:10, -2] = np.nan
760764

761765

766+
@pytest.mark.xfail(condition=is_crashing_arch,reason="https://bugs.debian.org/790925",strict=False,run=False)
762767
def test_select_filter_corner(setup_path):
763768
df = DataFrame(np.random.default_rng(2).standard_normal((50, 100)))
764769
df.index = [f"{c:3d}" for c in df.index]

pandas/tests/io/pytables/test_timezones.py

+3
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
from pandas._libs.tslibs.timezones import maybe_get_tz
1010
import pandas.util._test_decorators as td
11+
from pandas.compat import is_platform_little_endian
1112

1213
import pandas as pd
1314
from pandas import (
@@ -308,6 +309,7 @@ def test_store_timezone(setup_path):
308309
tm.assert_frame_equal(result, df)
309310

310311

312+
@pytest.mark.xfail(condition=not is_platform_little_endian(),reason="known failure of hdf on non-little endian",strict=False,raises=AttributeError)
311313
def test_legacy_datetimetz_object(datapath):
312314
# legacy from < 0.17.0
313315
# 8260
@@ -360,6 +362,7 @@ def test_read_with_where_tz_aware_index(tmp_path, setup_path):
360362
tm.assert_frame_equal(result, expected)
361363

362364

365+
@pytest.mark.xfail(condition=not is_platform_little_endian(),reason="known failure of hdf on non-little endian",strict=False,raises=AttributeError)
363366
def test_py2_created_with_datetimez(datapath):
364367
# The test HDF5 file was created in Python 2, but could not be read in
365368
# Python 3.

pandas/tests/io/test_common.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
import pytest
1919

20-
from pandas.compat import is_platform_windows
20+
from pandas.compat import is_platform_windows, is_platform_little_endian
2121
import pandas.util._test_decorators as td
2222

2323
import pandas as pd
@@ -300,11 +300,11 @@ def test_read_expands_user_home_dir(
300300
"pyarrow",
301301
("io", "data", "feather", "feather-0_3_1.feather"),
302302
),
303-
(
303+
pytest.param(
304304
pd.read_hdf,
305305
"tables",
306306
("io", "data", "legacy_hdf", "datetimetz_object.h5"),
307-
),
307+
marks=pytest.mark.xfail(condition=not is_platform_little_endian(),reason="known failure of hdf on non-little endian",strict=False,raises=AttributeError)),
308308
(pd.read_stata, "os", ("io", "data", "stata", "stata10_115.dta")),
309309
(pd.read_sas, "os", ("io", "sas", "data", "test1.sas7bdat")),
310310
(pd.read_json, "os", ("io", "json", "data", "tsframe_v012.json")),

pandas/tests/io/test_stata.py

+2
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@
3232
read_stata,
3333
)
3434

35+
from pandas.compat import is_platform_little_endian
36+
pytestmark = pytest.mark.xfail(condition=not is_platform_little_endian(),reason="known failure of test_stata on non-little endian",strict=False)
3537

3638
@pytest.fixture
3739
def mixed_frame():

pyproject.toml

+1
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,7 @@ filterwarnings = [
480480
"error:::pandas",
481481
"error::ResourceWarning",
482482
"error::pytest.PytestUnraisableExceptionWarning",
483+
"ignore:Non-x86 system detected:UserWarning:pandas",
483484
# TODO(PY311-minimum): Specify EncodingWarning
484485
# Ignore 3rd party EncodingWarning but raise on pandas'
485486
"ignore:.*encoding.* argument not specified",

0 commit comments

Comments
 (0)