pandas-dev · jbrockmendel · Dec 12, 2022
diff --git a/pandas/_libs/index_class_helper.pxi.in b/pandas/_libs/index_class_helper.pxi.in
@@ -63,3 +63,15 @@ cdef class {{name}}Engine(IndexEngine):
 
 
 {{endfor}}
+
+
+class Float16Engine(Float32Engine):
+    # See GH#49536 discussion
+    def __init__(self, ndarray values):
+        values = values.astype(np.float32)
+        super().__init__(values)
+
+    def get_indexer(self, ndarray values) -> np.ndarray:
+        if values.dtype == np.float16:
+            values = values.astype(np.float32)
+        return super().get_indexer(values)
diff --git a/pandas/conftest.py b/pandas/conftest.py
@@ -600,6 +600,7 @@ def _create_mi_with_dt64tz_level():
     "uint16": tm.makeUIntIndex(100, dtype="uint16"),
     "uint32": tm.makeUIntIndex(100, dtype="uint32"),
     "uint64": tm.makeUIntIndex(100, dtype="uint64"),
+    "float16": tm.makeNumericIndex(100, dtype="float32").astype("float16"),
     "float32": tm.makeFloatIndex(100, dtype="float32"),
     "float64": tm.makeFloatIndex(100, dtype="float64"),
     "bool-object": tm.makeBoolIndex(10).astype(object),

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -4720,6 +4720,11 @@ def _can_use_libjoin(self) -> bool:
         """
         Whether we can use the fastpaths implement in _libs.join
         """
+        if self.dtype == np.float16:
+            # Alternatively, we could have the _outer_indexer etc methods
+            #  cast np.float16 to np.float32 and then cast results back
+            #  to np.float16
+            return False
         if type(self) is Index:
             # excludes EAs
             return isinstance(self.dtype, np.dtype)

diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py
@@ -99,6 +99,7 @@ class NumericIndex(Index):
         np.dtype(np.uint16): libindex.UInt16Engine,
         np.dtype(np.uint32): libindex.UInt32Engine,
         np.dtype(np.uint64): libindex.UInt64Engine,
+        np.dtype(np.float16): libindex.Float16Engine,
         np.dtype(np.float32): libindex.Float32Engine,
         np.dtype(np.float64): libindex.Float64Engine,
         np.dtype(np.complex64): libindex.Complex64Engine,

diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py
@@ -147,7 +147,7 @@ def test_copy_and_deepcopy(self, index_flat):
         new_copy = index.copy(deep=True, name="banana")
         assert new_copy.name == "banana"
 
-    def test_copy_name(self, index_flat):
+    def test_copy_name(self, index_flat, request):
         # GH#12309: Check that the "name" argument
         # passed at initialization is honored.
         index = index_flat
@@ -165,6 +165,15 @@ def test_copy_name(self, index_flat):
         assert first.name == "mario"
         assert second.name == "mario"
 
+        if index.dtype == np.float16:
+            mark = pytest.mark.xfail(
+                reason="Looks like alignment fails in "
+                "_convert_arrays_and_get_rizer_klass",
+                # xpasses about 1 time in 6 locally
+                strict=False,
+            )
+            request.node.add_marker(mark)
+
         # TODO: belongs in series arithmetic tests?
         s1 = pd.Series(2, index=first)
         s2 = pd.Series(3, index=second[:-1])

diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py
@@ -268,7 +268,7 @@ def test_union_base(self, index):
     @pytest.mark.filterwarnings(
         "ignore:Falling back on a non-pyarrow:pandas.errors.PerformanceWarning"
     )
-    def test_difference_base(self, sort, index):
+    def test_difference_base(self, sort, index, request):
         first = index[2:]
         second = index[:4]
         if index.is_boolean():
@@ -280,6 +280,11 @@ def test_difference_base(self, sort, index):
         else:
             answer = index[4:]
         result = first.difference(second, sort)
+
+        if index.dtype == np.float16:
+            mark = pytest.mark.xfail(reason="Looks like it messes up on inf")
+            request.node.add_marker(mark)
+
         assert tm.equalContents(result, answer)
 
         # GH#10149