Skip to content

Commit a9422d8

Browse files
Debian Science Teamrebecca-palmer
Debian Science Team
authored andcommitted
HDF5 and Stata I/O are broken on some architectures
Fix some issues, warn on use and xfail tests for the remainder Everything that has a run=False xfail in here should also be in the run-and-ignore set in debian/tests/numbatests armhf TestHDF5Store::test*encoding only sometimes crashes (1.1.3+dfsg-1 passed on build but failed autopkgtest) HDF5 and Stata are known to fail on big-endian architectures Stata was previously seen to fail on qemu-ppc64el, but not real ppc64el Author: Andreas Tille <[email protected]>, Graham Inggs <[email protected]>, Yaroslav Halchenko <[email protected]>, Rebecca N. Palmer <[email protected]> Bug-Debian: https://bugs.debian.org/877419 Bug: partly pandas-dev/pandas#54396 Forwarded: no Gbp-Pq: Name xfail_tests_nonintel_io.patch
1 parent f198119 commit a9422d8

11 files changed

+46
-5
lines changed

pandas/_testing/_warnings.py

+3
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
cast,
1414
)
1515
import warnings
16+
import platform
1617

1718
from pandas.compat import PY311
1819

@@ -187,6 +188,8 @@ def _assert_caught_no_extra_warnings(
187188
# pyproject.toml errors on EncodingWarnings in pandas
188189
# Ignore EncodingWarnings from other libraries
189190
continue
191+
if (actual_warning.category==UserWarning and "Non-x86 system detected" in str(actual_warning.message) and not bool(re.match('i.?86|x86',platform.uname()[4]))) or (actual_warning.category==RuntimeWarning and "invalid value encountered" in str(actual_warning.message) and 'mips' in platform.uname()[4]):
192+
continue
190193
extra_warnings.append(
191194
(
192195
actual_warning.category.__name__,

pandas/io/pytables.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@
2424
overload,
2525
)
2626
import warnings
27+
import platform
28+
import sys
29+
from pandas.compat import is_platform_little_endian
30+
warn_hdf_platform = "Non-x86 system detected, HDF(5) format I/O may give wrong results (particularly on files created with older versions) or crash - https://bugs.debian.org/877419" if (((platform.uname()[4].startswith('arm') or platform.uname()[4].startswith('aarch')) and sys.maxsize<2**33) or not is_platform_little_endian()) else False
2731

2832
import numpy as np
2933

@@ -560,6 +564,8 @@ def __init__(
560564
fletcher32: bool = False,
561565
**kwargs,
562566
) -> None:
567+
if warn_hdf_platform:
568+
warnings.warn(warn_hdf_platform)
563569
if "format" in kwargs:
564570
raise ValueError("format is not a defined argument for HDFStore")
565571

@@ -781,7 +787,10 @@ def flush(self, fsync: bool = False) -> None:
781787
self._handle.flush()
782788
if fsync:
783789
with suppress(OSError):
784-
os.fsync(self._handle.fileno())
790+
if is_platform_little_endian():
791+
os.fsync(self._handle.fileno())
792+
else:
793+
os.sync() # due to a pytables bad-cast bug, fileno is invalid on 64-bit big-endian#
785794

786795
def get(self, key: str):
787796
"""

pandas/io/stata.py

+4
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
cast,
3030
)
3131
import warnings
32+
from pandas.compat import is_platform_little_endian
33+
warn_stata_platform = "Non-x86 system detected, Stata format I/O may give wrong results (particularly on strings) - https://bugs.debian.org/877419" if not is_platform_little_endian() else False
3234

3335
import numpy as np
3436

@@ -971,6 +973,8 @@ def __init__(self) -> None:
971973
# NOTE: the byte type seems to be reserved for categorical variables
972974
# with a label, but the underlying variable is -127 to 100
973975
# we're going to drop the label and cast to int
976+
if warn_stata_platform:
977+
warnings.warn(warn_stata_platform)
974978
self.DTYPE_MAP = dict(
975979
[(i, np.dtype(f"S{i}")) for i in range(1, 245)]
976980
+ [

pandas/tests/io/pytables/test_append.py

+5
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@
2222
_maybe_remove,
2323
ensure_clean_store,
2424
)
25+
import platform
26+
import re
27+
import sys
28+
is_crashing_arch=bool((platform.uname()[4].startswith('arm') or platform.uname()[4].startswith('aarch')) and sys.maxsize<2**33) # meant for armhf, though this form will also skip on armel - uname = kernel arch
2529

2630
pytestmark = pytest.mark.single_cpu
2731

@@ -282,6 +286,7 @@ def test_append_all_nans(setup_path):
282286
tm.assert_frame_equal(store["df2"], df, check_index_type=True)
283287

284288

289+
@pytest.mark.xfail(condition=is_crashing_arch,reason="https://bugs.debian.org/790925",strict=False,run=False)
285290
def test_append_frame_column_oriented(setup_path):
286291
with ensure_clean_store(setup_path) as store:
287292
# column oriented

pandas/tests/io/pytables/test_file_handling.py

+6
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@
2828
ensure_clean_store,
2929
tables,
3030
)
31+
import platform
32+
import re
33+
import sys
34+
is_crashing_arch=bool((platform.uname()[4].startswith('arm') or platform.uname()[4].startswith('aarch')) and sys.maxsize<2**33) # meant for armhf, though this form will also skip on armel - uname = kernel arch
3135

3236
from pandas.io import pytables
3337
from pandas.io.pytables import Term
@@ -297,6 +301,7 @@ def test_complibs(tmp_path, lvl, lib, request):
297301
assert node.filters.complib == lib
298302

299303

304+
@pytest.mark.xfail(condition=is_crashing_arch,reason="https://bugs.debian.org/790925",strict=False,run=False)
300305
@pytest.mark.skipif(
301306
not is_platform_little_endian(), reason="reason platform is not little endian"
302307
)
@@ -329,6 +334,7 @@ def test_encoding(setup_path):
329334
],
330335
)
331336
@pytest.mark.parametrize("dtype", ["category", object])
337+
@pytest.mark.xfail(condition=is_crashing_arch,reason="https://bugs.debian.org/790925",strict=False,run=False)
332338
def test_latin_encoding(tmp_path, setup_path, dtype, val):
333339
enc = "latin-1"
334340
nan_rep = ""

pandas/tests/io/pytables/test_read.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import pytest
77

88
from pandas._libs.tslibs import Timestamp
9-
from pandas.compat import is_platform_windows
9+
from pandas.compat import is_platform_windows, is_platform_little_endian
1010

1111
import pandas as pd
1212
from pandas import (
@@ -172,6 +172,7 @@ def test_pytables_native2_read(datapath):
172172
assert isinstance(d1, DataFrame)
173173

174174

175+
@pytest.mark.xfail(condition=not is_platform_little_endian(),reason="known failure of hdf on non-little endian",strict=False,raises=AttributeError)
175176
def test_legacy_table_fixed_format_read_py2(datapath):
176177
# GH 24510
177178
# legacy table with fixed format written in Python 2
@@ -187,6 +188,7 @@ def test_legacy_table_fixed_format_read_py2(datapath):
187188
tm.assert_frame_equal(expected, result)
188189

189190

191+
@pytest.mark.xfail(condition=not is_platform_little_endian(),reason="known failure of hdf on non-little endian",strict=False,raises=AttributeError)
190192
def test_legacy_table_fixed_format_read_datetime_py2(datapath):
191193
# GH 31750
192194
# legacy table with fixed format and datetime64 column written in Python 2
@@ -370,6 +372,7 @@ def test_read_hdf_series_mode_r(tmp_path, format, setup_path):
370372

371373
@pytest.mark.filterwarnings(r"ignore:Period with BDay freq is deprecated:FutureWarning")
372374
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
375+
@pytest.mark.xfail(condition=not is_platform_little_endian(),reason="known failure of hdf on non-little endian",strict=False,raises=AttributeError)
373376
def test_read_py2_hdf_file_in_py3(datapath):
374377
# GH 16781
375378

pandas/tests/io/pytables/test_store.py

+5
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@
3030
HDFStore,
3131
read_hdf,
3232
)
33+
import platform
34+
import re
35+
import sys
36+
is_crashing_arch=bool((platform.uname()[4].startswith('arm') or platform.uname()[4].startswith('aarch')) and sys.maxsize<2**33) # meant for armhf, though this form will also skip on armel - uname = kernel arch
3337

3438
pytestmark = pytest.mark.single_cpu
3539

@@ -880,6 +884,7 @@ def test_start_stop_fixed(setup_path):
880884
df.iloc[8:10, -2] = np.nan
881885

882886

887+
@pytest.mark.xfail(condition=is_crashing_arch,reason="https://bugs.debian.org/790925",strict=False,run=False)
883888
def test_select_filter_corner(setup_path):
884889
df = DataFrame(np.random.default_rng(2).standard_normal((50, 100)))
885890
df.index = [f"{c:3d}" for c in df.index]

pandas/tests/io/pytables/test_timezones.py

+3
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
from pandas._libs.tslibs.timezones import maybe_get_tz
1010
import pandas.util._test_decorators as td
11+
from pandas.compat import is_platform_little_endian
1112

1213
import pandas as pd
1314
from pandas import (
@@ -312,6 +313,7 @@ def test_store_timezone(setup_path):
312313
tm.assert_frame_equal(result, df)
313314

314315

316+
@pytest.mark.xfail(condition=not is_platform_little_endian(),reason="known failure of hdf on non-little endian",strict=False,raises=AttributeError)
315317
def test_legacy_datetimetz_object(datapath):
316318
# legacy from < 0.17.0
317319
# 8260
@@ -364,6 +366,7 @@ def test_read_with_where_tz_aware_index(tmp_path, setup_path):
364366
tm.assert_frame_equal(result, expected)
365367

366368

369+
@pytest.mark.xfail(condition=not is_platform_little_endian(),reason="known failure of hdf on non-little endian",strict=False,raises=AttributeError)
367370
def test_py2_created_with_datetimez(datapath):
368371
# The test HDF5 file was created in Python 2, but could not be read in
369372
# Python 3.

pandas/tests/io/test_common.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
import numpy as np
1919
import pytest
2020

21-
from pandas.compat import is_platform_windows
21+
from pandas.compat import is_platform_windows, is_platform_little_endian
2222
import pandas.util._test_decorators as td
2323

2424
import pandas as pd
@@ -305,11 +305,11 @@ def test_read_expands_user_home_dir(
305305
"pyarrow",
306306
("io", "data", "feather", "feather-0_3_1.feather"),
307307
),
308-
(
308+
pytest.param(
309309
pd.read_hdf,
310310
"tables",
311311
("io", "data", "legacy_hdf", "datetimetz_object.h5"),
312-
),
312+
marks=pytest.mark.xfail(condition=not is_platform_little_endian(),reason="known failure of hdf on non-little endian",strict=False,raises=AttributeError)),
313313
(pd.read_stata, "os", ("io", "data", "stata", "stata10_115.dta")),
314314
(pd.read_sas, "os", ("io", "sas", "data", "test1.sas7bdat")),
315315
(pd.read_json, "os", ("io", "json", "data", "tsframe_v012.json")),

pandas/tests/io/test_stata.py

+2
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@
3434
read_stata,
3535
)
3636

37+
from pandas.compat import is_platform_little_endian
38+
pytestmark = pytest.mark.xfail(condition=not is_platform_little_endian(),reason="known failure of test_stata on non-little endian",strict=False)
3739

3840
@pytest.fixture
3941
def mixed_frame():

pyproject.toml

+1
Original file line numberDiff line numberDiff line change
@@ -495,6 +495,7 @@ filterwarnings = [
495495
"error:::pandas",
496496
"error::ResourceWarning",
497497
"error::pytest.PytestUnraisableExceptionWarning",
498+
"ignore:Non-x86 system detected:UserWarning:pandas",
498499
# TODO(PY311-minimum): Specify EncodingWarning
499500
# Ignore 3rd party EncodingWarning but raise on pandas'
500501
"ignore:.*encoding.* argument not specified",

0 commit comments

Comments
 (0)