Skip to content

Fix a few test failures on big-endian systems #46681

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions pandas/_testing/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import os
import re
import string
from sys import byteorder
from typing import (
TYPE_CHECKING,
Callable,
Expand Down Expand Up @@ -168,6 +169,8 @@
np.uint32,
]

ENDIAN = {"little": "<", "big": ">"}[byteorder]

NULL_OBJECTS = [None, np.nan, pd.NaT, float("nan"), pd.NA, Decimal("NaN")]
NP_NAT_OBJECTS = [
cls("NaT", unit)
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arrays/boolean/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def test_astype():
tm.assert_numpy_array_equal(result, expected)

result = arr.astype("str")
expected = np.array(["True", "False", "<NA>"], dtype="<U5")
expected = np.array(["True", "False", "<NA>"], dtype=f"{tm.ENDIAN}U5")
tm.assert_numpy_array_equal(result, expected)

# no missing values
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arrays/boolean/test_construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ def test_to_numpy(box):

arr = con([True, False, None], dtype="boolean")
result = arr.to_numpy(dtype="str")
expected = np.array([True, False, pd.NA], dtype="<U5")
expected = np.array([True, False, pd.NA], dtype=f"{tm.ENDIAN}U5")
tm.assert_numpy_array_equal(result, expected)

# no missing values -> can convert to bool, otherwise raises
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arrays/floating/test_to_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def test_to_numpy_string(box, dtype):
arr = con([0.0, 1.0, None], dtype="Float64")

result = arr.to_numpy(dtype="str")
expected = np.array([0.0, 1.0, pd.NA], dtype="<U32")
expected = np.array([0.0, 1.0, pd.NA], dtype=f"{tm.ENDIAN}U32")
tm.assert_numpy_array_equal(result, expected)


Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/arrays/integer/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ def test_to_numpy_na_raises(dtype):

def test_astype_str():
a = pd.array([1, 2, None], dtype="Int64")
expected = np.array(["1", "2", "<NA>"], dtype="<U21")
expected = np.array(["1", "2", "<NA>"], dtype=f"{tm.ENDIAN}U21")

tm.assert_numpy_array_equal(a.astype(str), expected)
tm.assert_numpy_array_equal(a.astype("str"), expected)
Expand Down
147 changes: 116 additions & 31 deletions pandas/tests/frame/methods/test_to_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,106 +151,176 @@ def test_to_records_with_categorical(self):
{},
np.rec.array(
[(0, 1, 0.2, "a"), (1, 2, 1.5, "bc")],
dtype=[("index", "<i8"), ("A", "<i8"), ("B", "<f8"), ("C", "O")],
dtype=[
("index", f"{tm.ENDIAN}i8"),
("A", f"{tm.ENDIAN}i8"),
("B", f"{tm.ENDIAN}f8"),
("C", "O"),
],
),
),
# Should have no effect in this case.
(
{"index": True},
np.rec.array(
[(0, 1, 0.2, "a"), (1, 2, 1.5, "bc")],
dtype=[("index", "<i8"), ("A", "<i8"), ("B", "<f8"), ("C", "O")],
dtype=[
("index", f"{tm.ENDIAN}i8"),
("A", f"{tm.ENDIAN}i8"),
("B", f"{tm.ENDIAN}f8"),
("C", "O"),
],
),
),
# Column dtype applied across the board. Index unaffected.
(
{"column_dtypes": "<U4"},
{"column_dtypes": f"{tm.ENDIAN}U4"},
np.rec.array(
[("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
dtype=[("index", "<i8"), ("A", "<U4"), ("B", "<U4"), ("C", "<U4")],
dtype=[
("index", f"{tm.ENDIAN}i8"),
("A", f"{tm.ENDIAN}U4"),
("B", f"{tm.ENDIAN}U4"),
("C", f"{tm.ENDIAN}U4"),
],
),
),
# Index dtype applied across the board. Columns unaffected.
(
{"index_dtypes": "<U1"},
{"index_dtypes": f"{tm.ENDIAN}U1"},
np.rec.array(
[("0", 1, 0.2, "a"), ("1", 2, 1.5, "bc")],
dtype=[("index", "<U1"), ("A", "<i8"), ("B", "<f8"), ("C", "O")],
dtype=[
("index", f"{tm.ENDIAN}U1"),
("A", f"{tm.ENDIAN}i8"),
("B", f"{tm.ENDIAN}f8"),
("C", "O"),
],
),
),
# Pass in a type instance.
(
{"column_dtypes": str},
np.rec.array(
[("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
dtype=[("index", "<i8"), ("A", "<U"), ("B", "<U"), ("C", "<U")],
dtype=[
("index", f"{tm.ENDIAN}i8"),
("A", f"{tm.ENDIAN}U"),
("B", f"{tm.ENDIAN}U"),
("C", f"{tm.ENDIAN}U"),
],
),
),
# Pass in a dtype instance.
(
{"column_dtypes": np.dtype("unicode")},
np.rec.array(
[("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
dtype=[("index", "<i8"), ("A", "<U"), ("B", "<U"), ("C", "<U")],
dtype=[
("index", f"{tm.ENDIAN}i8"),
("A", f"{tm.ENDIAN}U"),
("B", f"{tm.ENDIAN}U"),
("C", f"{tm.ENDIAN}U"),
],
),
),
# Pass in a dictionary (name-only).
(
{"column_dtypes": {"A": np.int8, "B": np.float32, "C": "<U2"}},
{
"column_dtypes": {
"A": np.int8,
"B": np.float32,
"C": f"{tm.ENDIAN}U2",
}
},
np.rec.array(
[("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
dtype=[("index", "<i8"), ("A", "i1"), ("B", "<f4"), ("C", "<U2")],
dtype=[
("index", f"{tm.ENDIAN}i8"),
("A", "i1"),
("B", f"{tm.ENDIAN}f4"),
("C", f"{tm.ENDIAN}U2"),
],
),
),
# Pass in a dictionary (indices-only).
(
{"index_dtypes": {0: "int16"}},
np.rec.array(
[(0, 1, 0.2, "a"), (1, 2, 1.5, "bc")],
dtype=[("index", "i2"), ("A", "<i8"), ("B", "<f8"), ("C", "O")],
dtype=[
("index", "i2"),
("A", f"{tm.ENDIAN}i8"),
("B", f"{tm.ENDIAN}f8"),
("C", "O"),
],
),
),
# Ignore index mappings if index is not True.
(
{"index": False, "index_dtypes": "<U2"},
{"index": False, "index_dtypes": f"{tm.ENDIAN}U2"},
np.rec.array(
[(1, 0.2, "a"), (2, 1.5, "bc")],
dtype=[("A", "<i8"), ("B", "<f8"), ("C", "O")],
dtype=[
("A", f"{tm.ENDIAN}i8"),
("B", f"{tm.ENDIAN}f8"),
("C", "O"),
],
),
),
# Non-existent names / indices in mapping should not error.
(
{"index_dtypes": {0: "int16", "not-there": "float32"}},
np.rec.array(
[(0, 1, 0.2, "a"), (1, 2, 1.5, "bc")],
dtype=[("index", "i2"), ("A", "<i8"), ("B", "<f8"), ("C", "O")],
dtype=[
("index", "i2"),
("A", f"{tm.ENDIAN}i8"),
("B", f"{tm.ENDIAN}f8"),
("C", "O"),
],
),
),
# Names / indices not in mapping default to array dtype.
(
{"column_dtypes": {"A": np.int8, "B": np.float32}},
np.rec.array(
[("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
dtype=[("index", "<i8"), ("A", "i1"), ("B", "<f4"), ("C", "O")],
dtype=[
("index", f"{tm.ENDIAN}i8"),
("A", "i1"),
("B", f"{tm.ENDIAN}f4"),
("C", "O"),
],
),
),
# Names / indices not in dtype mapping default to array dtype.
(
{"column_dtypes": {"A": np.dtype("int8"), "B": np.dtype("float32")}},
np.rec.array(
[("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
dtype=[("index", "<i8"), ("A", "i1"), ("B", "<f4"), ("C", "O")],
dtype=[
("index", f"{tm.ENDIAN}i8"),
("A", "i1"),
("B", f"{tm.ENDIAN}f4"),
("C", "O"),
],
),
),
# Mixture of everything.
(
{
"column_dtypes": {"A": np.int8, "B": np.float32},
"index_dtypes": "<U2",
"index_dtypes": f"{tm.ENDIAN}U2",
},
np.rec.array(
[("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
dtype=[("index", "<U2"), ("A", "i1"), ("B", "<f4"), ("C", "O")],
dtype=[
("index", f"{tm.ENDIAN}U2"),
("A", "i1"),
("B", f"{tm.ENDIAN}f4"),
("C", "O"),
],
),
),
# Invalid dype values.
Expand Down Expand Up @@ -299,7 +369,11 @@ def test_to_records_dtype(self, kwargs, expected):
{"column_dtypes": "float64", "index_dtypes": {0: "int32", 1: "int8"}},
np.rec.array(
[(1, 2, 3.0), (4, 5, 6.0), (7, 8, 9.0)],
dtype=[("a", "<i4"), ("b", "i1"), ("c", "<f8")],
dtype=[
("a", f"{tm.ENDIAN}i4"),
("b", "i1"),
("c", f"{tm.ENDIAN}f8"),
],
),
),
# MultiIndex in the columns.
Expand All @@ -310,14 +384,17 @@ def test_to_records_dtype(self, kwargs, expected):
[("a", "d"), ("b", "e"), ("c", "f")]
),
),
{"column_dtypes": {0: "<U1", 2: "float32"}, "index_dtypes": "float32"},
{
"column_dtypes": {0: f"{tm.ENDIAN}U1", 2: "float32"},
"index_dtypes": "float32",
},
np.rec.array(
[(0.0, "1", 2, 3.0), (1.0, "4", 5, 6.0), (2.0, "7", 8, 9.0)],
dtype=[
("index", "<f4"),
("('a', 'd')", "<U1"),
("('b', 'e')", "<i8"),
("('c', 'f')", "<f4"),
("index", f"{tm.ENDIAN}f4"),
("('a', 'd')", f"{tm.ENDIAN}U1"),
("('b', 'e')", f"{tm.ENDIAN}i8"),
("('c', 'f')", f"{tm.ENDIAN}f4"),
],
),
),
Expand All @@ -332,19 +409,22 @@ def test_to_records_dtype(self, kwargs, expected):
[("d", -4), ("d", -5), ("f", -6)], names=list("cd")
),
),
{"column_dtypes": "float64", "index_dtypes": {0: "<U2", 1: "int8"}},
{
"column_dtypes": "float64",
"index_dtypes": {0: f"{tm.ENDIAN}U2", 1: "int8"},
},
np.rec.array(
[
("d", -4, 1.0, 2.0, 3.0),
("d", -5, 4.0, 5.0, 6.0),
("f", -6, 7, 8, 9.0),
],
dtype=[
("c", "<U2"),
("c", f"{tm.ENDIAN}U2"),
("d", "i1"),
("('a', 'd')", "<f8"),
("('b', 'e')", "<f8"),
("('c', 'f')", "<f8"),
("('a', 'd')", f"{tm.ENDIAN}f8"),
("('b', 'e')", f"{tm.ENDIAN}f8"),
("('c', 'f')", f"{tm.ENDIAN}f8"),
],
),
),
Expand Down Expand Up @@ -374,13 +454,18 @@ def keys(self):

dtype_mappings = {
"column_dtypes": DictLike(**{"A": np.int8, "B": np.float32}),
"index_dtypes": "<U2",
"index_dtypes": f"{tm.ENDIAN}U2",
}

result = df.to_records(**dtype_mappings)
expected = np.rec.array(
[("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
dtype=[("index", "<U2"), ("A", "i1"), ("B", "<f4"), ("C", "O")],
dtype=[
("index", f"{tm.ENDIAN}U2"),
("A", "i1"),
("B", f"{tm.ENDIAN}f4"),
("C", "O"),
],
)
tm.assert_almost_equal(result, expected)

Expand Down
7 changes: 5 additions & 2 deletions pandas/tests/io/parser/test_c_parser_only.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,9 +144,12 @@ def test_dtype_and_names_error(c_parser_only):
"the dtype timedelta64 is not supported for parsing",
{"dtype": {"A": "timedelta64", "B": "float64"}},
),
("the dtype <U8 is not supported for parsing", {"dtype": {"A": "U8"}}),
(
f"the dtype {tm.ENDIAN}U8 is not supported for parsing",
{"dtype": {"A": "U8"}},
),
],
ids=["dt64-0", "dt64-1", "td64", "<U8"],
ids=["dt64-0", "dt64-1", "td64", f"{tm.ENDIAN}U8"],
)
def test_unsupported_dtype(c_parser_only, match, kwargs):
parser = c_parser_only
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/tools/test_to_timedelta.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,8 @@ def test_to_timedelta_on_missing_values(self):

actual = to_timedelta(Series(["00:00:01", np.nan]))
expected = Series(
[np.timedelta64(1000000000, "ns"), timedelta_NaT], dtype="<m8[ns]"
[np.timedelta64(1000000000, "ns"), timedelta_NaT],
dtype=f"{tm.ENDIAN}m8[ns]",
)
tm.assert_series_equal(actual, expected)

Expand Down