Fix a few test failures on big-endian systems (pandas-dev#46681)

musicinmybrain · web-flow · commit 40e9cbe90a7c · 2022-04-07T12:35:15.000-04:00
diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py
@@ -8,6 +8,7 @@
 import os
 import re
 import string
+from sys import byteorder
 from typing import (
     TYPE_CHECKING,
     Callable,
@@ -168,6 +169,8 @@
     np.uint32,
 ]
 
+ENDIAN = {"little": "<", "big": ">"}[byteorder]
+
 NULL_OBJECTS = [None, np.nan, pd.NaT, float("nan"), pd.NA, Decimal("NaN")]
 NP_NAT_OBJECTS = [
     cls("NaT", unit)
diff --git a/pandas/tests/arrays/boolean/test_astype.py b/pandas/tests/arrays/boolean/test_astype.py
@@ -20,7 +20,7 @@ def test_astype():
     tm.assert_numpy_array_equal(result, expected)
 
     result = arr.astype("str")
-    expected = np.array(["True", "False", "<NA>"], dtype="<U5")
+    expected = np.array(["True", "False", "<NA>"], dtype=f"{tm.ENDIAN}U5")
     tm.assert_numpy_array_equal(result, expected)
 
     # no missing values
diff --git a/pandas/tests/arrays/boolean/test_construction.py b/pandas/tests/arrays/boolean/test_construction.py
@@ -273,7 +273,7 @@ def test_to_numpy(box):
 
     arr = con([True, False, None], dtype="boolean")
     result = arr.to_numpy(dtype="str")
-    expected = np.array([True, False, pd.NA], dtype="<U5")
+    expected = np.array([True, False, pd.NA], dtype=f"{tm.ENDIAN}U5")
     tm.assert_numpy_array_equal(result, expected)
 
     # no missing values -> can convert to bool, otherwise raises
diff --git a/pandas/tests/arrays/floating/test_to_numpy.py b/pandas/tests/arrays/floating/test_to_numpy.py
@@ -115,7 +115,7 @@ def test_to_numpy_string(box, dtype):
     arr = con([0.0, 1.0, None], dtype="Float64")
 
     result = arr.to_numpy(dtype="str")
-    expected = np.array([0.0, 1.0, pd.NA], dtype="<U32")
+    expected = np.array([0.0, 1.0, pd.NA], dtype=f"{tm.ENDIAN}U32")
     tm.assert_numpy_array_equal(result, expected)
 
 
diff --git a/pandas/tests/arrays/integer/test_dtypes.py b/pandas/tests/arrays/integer/test_dtypes.py
@@ -283,7 +283,7 @@ def test_to_numpy_na_raises(dtype):
 
 def test_astype_str():
     a = pd.array([1, 2, None], dtype="Int64")
-    expected = np.array(["1", "2", "<NA>"], dtype="<U21")
+    expected = np.array(["1", "2", "<NA>"], dtype=f"{tm.ENDIAN}U21")
 
     tm.assert_numpy_array_equal(a.astype(str), expected)
     tm.assert_numpy_array_equal(a.astype("str"), expected)
diff --git a/pandas/tests/frame/methods/test_to_records.py b/pandas/tests/frame/methods/test_to_records.py
@@ -151,106 +151,176 @@ def test_to_records_with_categorical(self):
                 {},
                 np.rec.array(
                     [(0, 1, 0.2, "a"), (1, 2, 1.5, "bc")],
-                    dtype=[("index", "<i8"), ("A", "<i8"), ("B", "<f8"), ("C", "O")],
+                    dtype=[
+                        ("index", f"{tm.ENDIAN}i8"),
+                        ("A", f"{tm.ENDIAN}i8"),
+                        ("B", f"{tm.ENDIAN}f8"),
+                        ("C", "O"),
+                    ],
                 ),
             ),
             # Should have no effect in this case.
             (
                 {"index": True},
                 np.rec.array(
                     [(0, 1, 0.2, "a"), (1, 2, 1.5, "bc")],
-                    dtype=[("index", "<i8"), ("A", "<i8"), ("B", "<f8"), ("C", "O")],
+                    dtype=[
+                        ("index", f"{tm.ENDIAN}i8"),
+                        ("A", f"{tm.ENDIAN}i8"),
+                        ("B", f"{tm.ENDIAN}f8"),
+                        ("C", "O"),
+                    ],
                 ),
             ),
             # Column dtype applied across the board. Index unaffected.
             (
-                {"column_dtypes": "<U4"},
+                {"column_dtypes": f"{tm.ENDIAN}U4"},
                 np.rec.array(
                     [("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
-                    dtype=[("index", "<i8"), ("A", "<U4"), ("B", "<U4"), ("C", "<U4")],
+                    dtype=[
+                        ("index", f"{tm.ENDIAN}i8"),
+                        ("A", f"{tm.ENDIAN}U4"),
+                        ("B", f"{tm.ENDIAN}U4"),
+                        ("C", f"{tm.ENDIAN}U4"),
+                    ],
                 ),
             ),
             # Index dtype applied across the board. Columns unaffected.
             (
-                {"index_dtypes": "<U1"},
+                {"index_dtypes": f"{tm.ENDIAN}U1"},
                 np.rec.array(
                     [("0", 1, 0.2, "a"), ("1", 2, 1.5, "bc")],
-                    dtype=[("index", "<U1"), ("A", "<i8"), ("B", "<f8"), ("C", "O")],
+                    dtype=[
+                        ("index", f"{tm.ENDIAN}U1"),
+                        ("A", f"{tm.ENDIAN}i8"),
+                        ("B", f"{tm.ENDIAN}f8"),
+                        ("C", "O"),
+                    ],
                 ),
             ),
             # Pass in a type instance.
             (
                 {"column_dtypes": str},
                 np.rec.array(
                     [("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
-                    dtype=[("index", "<i8"), ("A", "<U"), ("B", "<U"), ("C", "<U")],
+                    dtype=[
+                        ("index", f"{tm.ENDIAN}i8"),
+                        ("A", f"{tm.ENDIAN}U"),
+                        ("B", f"{tm.ENDIAN}U"),
+                        ("C", f"{tm.ENDIAN}U"),
+                    ],
                 ),
             ),
             # Pass in a dtype instance.
             (
                 {"column_dtypes": np.dtype("unicode")},
                 np.rec.array(
                     [("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
-                    dtype=[("index", "<i8"), ("A", "<U"), ("B", "<U"), ("C", "<U")],
+                    dtype=[
+                        ("index", f"{tm.ENDIAN}i8"),
+                        ("A", f"{tm.ENDIAN}U"),
+                        ("B", f"{tm.ENDIAN}U"),
+                        ("C", f"{tm.ENDIAN}U"),
+                    ],
                 ),
             ),
             # Pass in a dictionary (name-only).
             (
-                {"column_dtypes": {"A": np.int8, "B": np.float32, "C": "<U2"}},
+                {
+                    "column_dtypes": {
+                        "A": np.int8,
+                        "B": np.float32,
+                        "C": f"{tm.ENDIAN}U2",
+                    }
+                },
                 np.rec.array(
                     [("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
-                    dtype=[("index", "<i8"), ("A", "i1"), ("B", "<f4"), ("C", "<U2")],
+                    dtype=[
+                        ("index", f"{tm.ENDIAN}i8"),
+                        ("A", "i1"),
+                        ("B", f"{tm.ENDIAN}f4"),
+                        ("C", f"{tm.ENDIAN}U2"),
+                    ],
                 ),
             ),
             # Pass in a dictionary (indices-only).
             (
                 {"index_dtypes": {0: "int16"}},
                 np.rec.array(
                     [(0, 1, 0.2, "a"), (1, 2, 1.5, "bc")],
-                    dtype=[("index", "i2"), ("A", "<i8"), ("B", "<f8"), ("C", "O")],
+                    dtype=[
+                        ("index", "i2"),
+                        ("A", f"{tm.ENDIAN}i8"),
+                        ("B", f"{tm.ENDIAN}f8"),
+                        ("C", "O"),
+                    ],
                 ),
             ),
             # Ignore index mappings if index is not True.
             (
-                {"index": False, "index_dtypes": "<U2"},
+                {"index": False, "index_dtypes": f"{tm.ENDIAN}U2"},
                 np.rec.array(
                     [(1, 0.2, "a"), (2, 1.5, "bc")],
-                    dtype=[("A", "<i8"), ("B", "<f8"), ("C", "O")],
+                    dtype=[
+                        ("A", f"{tm.ENDIAN}i8"),
+                        ("B", f"{tm.ENDIAN}f8"),
+                        ("C", "O"),
+                    ],
                 ),
             ),
             # Non-existent names / indices in mapping should not error.
             (
                 {"index_dtypes": {0: "int16", "not-there": "float32"}},
                 np.rec.array(
                     [(0, 1, 0.2, "a"), (1, 2, 1.5, "bc")],
-                    dtype=[("index", "i2"), ("A", "<i8"), ("B", "<f8"), ("C", "O")],
+                    dtype=[
+                        ("index", "i2"),
+                        ("A", f"{tm.ENDIAN}i8"),
+                        ("B", f"{tm.ENDIAN}f8"),
+                        ("C", "O"),
+                    ],
                 ),
             ),
             # Names / indices not in mapping default to array dtype.
             (
                 {"column_dtypes": {"A": np.int8, "B": np.float32}},
                 np.rec.array(
                     [("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
-                    dtype=[("index", "<i8"), ("A", "i1"), ("B", "<f4"), ("C", "O")],
+                    dtype=[
+                        ("index", f"{tm.ENDIAN}i8"),
+                        ("A", "i1"),
+                        ("B", f"{tm.ENDIAN}f4"),
+                        ("C", "O"),
+                    ],
                 ),
             ),
             # Names / indices not in dtype mapping default to array dtype.
             (
                 {"column_dtypes": {"A": np.dtype("int8"), "B": np.dtype("float32")}},
                 np.rec.array(
                     [("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
-                    dtype=[("index", "<i8"), ("A", "i1"), ("B", "<f4"), ("C", "O")],
+                    dtype=[
+                        ("index", f"{tm.ENDIAN}i8"),
+                        ("A", "i1"),
+                        ("B", f"{tm.ENDIAN}f4"),
+                        ("C", "O"),
+                    ],
                 ),
             ),
             # Mixture of everything.
             (
                 {
                     "column_dtypes": {"A": np.int8, "B": np.float32},
-                    "index_dtypes": "<U2",
+                    "index_dtypes": f"{tm.ENDIAN}U2",
                 },
                 np.rec.array(
                     [("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
-                    dtype=[("index", "<U2"), ("A", "i1"), ("B", "<f4"), ("C", "O")],
+                    dtype=[
+                        ("index", f"{tm.ENDIAN}U2"),
+                        ("A", "i1"),
+                        ("B", f"{tm.ENDIAN}f4"),
+                        ("C", "O"),
+                    ],
                 ),
             ),
             # Invalid dype values.
@@ -299,7 +369,11 @@ def test_to_records_dtype(self, kwargs, expected):
                 {"column_dtypes": "float64", "index_dtypes": {0: "int32", 1: "int8"}},
                 np.rec.array(
                     [(1, 2, 3.0), (4, 5, 6.0), (7, 8, 9.0)],
-                    dtype=[("a", "<i4"), ("b", "i1"), ("c", "<f8")],
+                    dtype=[
+                        ("a", f"{tm.ENDIAN}i4"),
+                        ("b", "i1"),
+                        ("c", f"{tm.ENDIAN}f8"),
+                    ],
                 ),
             ),
             # MultiIndex in the columns.
@@ -310,14 +384,17 @@ def test_to_records_dtype(self, kwargs, expected):
                         [("a", "d"), ("b", "e"), ("c", "f")]
                     ),
                 ),
-                {"column_dtypes": {0: "<U1", 2: "float32"}, "index_dtypes": "float32"},
+                {
+                    "column_dtypes": {0: f"{tm.ENDIAN}U1", 2: "float32"},
+                    "index_dtypes": "float32",
+                },
                 np.rec.array(
                     [(0.0, "1", 2, 3.0), (1.0, "4", 5, 6.0), (2.0, "7", 8, 9.0)],
                     dtype=[
-                        ("index", "<f4"),
-                        ("('a', 'd')", "<U1"),
-                        ("('b', 'e')", "<i8"),
-                        ("('c', 'f')", "<f4"),
+                        ("index", f"{tm.ENDIAN}f4"),
+                        ("('a', 'd')", f"{tm.ENDIAN}U1"),
+                        ("('b', 'e')", f"{tm.ENDIAN}i8"),
+                        ("('c', 'f')", f"{tm.ENDIAN}f4"),
                     ],
                 ),
             ),
@@ -332,19 +409,22 @@ def test_to_records_dtype(self, kwargs, expected):
                         [("d", -4), ("d", -5), ("f", -6)], names=list("cd")
                     ),
                 ),
-                {"column_dtypes": "float64", "index_dtypes": {0: "<U2", 1: "int8"}},
+                {
+                    "column_dtypes": "float64",
+                    "index_dtypes": {0: f"{tm.ENDIAN}U2", 1: "int8"},
+                },
                 np.rec.array(
                     [
                         ("d", -4, 1.0, 2.0, 3.0),
                         ("d", -5, 4.0, 5.0, 6.0),
                         ("f", -6, 7, 8, 9.0),
                     ],
                     dtype=[
-                        ("c", "<U2"),
+                        ("c", f"{tm.ENDIAN}U2"),
                         ("d", "i1"),
-                        ("('a', 'd')", "<f8"),
-                        ("('b', 'e')", "<f8"),
-                        ("('c', 'f')", "<f8"),
+                        ("('a', 'd')", f"{tm.ENDIAN}f8"),
+                        ("('b', 'e')", f"{tm.ENDIAN}f8"),
+                        ("('c', 'f')", f"{tm.ENDIAN}f8"),
                     ],
                 ),
             ),
@@ -374,13 +454,18 @@ def keys(self):
 
         dtype_mappings = {
             "column_dtypes": DictLike(**{"A": np.int8, "B": np.float32}),
-            "index_dtypes": "<U2",
+            "index_dtypes": f"{tm.ENDIAN}U2",
         }
 
         result = df.to_records(**dtype_mappings)
         expected = np.rec.array(
             [("0", "1", "0.2", "a"), ("1", "2", "1.5", "bc")],
-            dtype=[("index", "<U2"), ("A", "i1"), ("B", "<f4"), ("C", "O")],
+            dtype=[
+                ("index", f"{tm.ENDIAN}U2"),
+                ("A", "i1"),
+                ("B", f"{tm.ENDIAN}f4"),
+                ("C", "O"),
+            ],
         )
         tm.assert_almost_equal(result, expected)
 
diff --git a/pandas/tests/io/parser/test_c_parser_only.py b/pandas/tests/io/parser/test_c_parser_only.py
@@ -144,9 +144,12 @@ def test_dtype_and_names_error(c_parser_only):
             "the dtype timedelta64 is not supported for parsing",
             {"dtype": {"A": "timedelta64", "B": "float64"}},
         ),
-        ("the dtype <U8 is not supported for parsing", {"dtype": {"A": "U8"}}),
+        (
+            f"the dtype {tm.ENDIAN}U8 is not supported for parsing",
+            {"dtype": {"A": "U8"}},
+        ),
     ],
-    ids=["dt64-0", "dt64-1", "td64", "<U8"],
+    ids=["dt64-0", "dt64-1", "td64", f"{tm.ENDIAN}U8"],
 )
 def test_unsupported_dtype(c_parser_only, match, kwargs):
     parser = c_parser_only
diff --git a/pandas/tests/tools/test_to_timedelta.py b/pandas/tests/tools/test_to_timedelta.py
@@ -198,7 +198,8 @@ def test_to_timedelta_on_missing_values(self):
 
         actual = to_timedelta(Series(["00:00:01", np.nan]))
         expected = Series(
-            [np.timedelta64(1000000000, "ns"), timedelta_NaT], dtype="<m8[ns]"
+            [np.timedelta64(1000000000, "ns"), timedelta_NaT],
+            dtype=f"{tm.ENDIAN}m8[ns]",
         )
         tm.assert_series_equal(actual, expected)
 

Original file line number	Diff line number	Diff line change
`@@ -198,7 +198,8 @@ def test_to_timedelta_on_missing_values(self):`
`198`	`198`
`199`	`199`	`actual = to_timedelta(Series(["00:00:01", np.nan]))`
`200`	`200`	`expected = Series(`
`201`		`- [np.timedelta64(1000000000, "ns"), timedelta_NaT], dtype="<m8[ns]"`
	`201`	`+ [np.timedelta64(1000000000, "ns"), timedelta_NaT],`
	`202`	`+ dtype=f"{tm.ENDIAN}m8[ns]",`
`202`	`203`	`)`
`203`	`204`	`tm.assert_series_equal(actual, expected)`
`204`	`205`