diff --git a/pandas/tests/groupby/test_bin_groupby.py b/pandas/tests/groupby/test_bin_groupby.py
index f20eed4575e91..aff9911961b25 100644
--- a/pandas/tests/groupby/test_bin_groupby.py
+++ b/pandas/tests/groupby/test_bin_groupby.py
@@ -1,12 +1,10 @@
 import numpy as np
 import pytest
 
-from pandas._libs import groupby, lib, reduction as libreduction
-
-from pandas.core.dtypes.common import ensure_int64
+from pandas._libs import lib, reduction as libreduction
 
 import pandas as pd
-from pandas import Series, isna
+from pandas import Series
 import pandas._testing as tm
 
 
@@ -103,36 +101,5 @@ def test_generate_bins(binner, closed, expected):
     tm.assert_numpy_array_equal(result, expected)
 
 
-def test_group_ohlc():
-    def _check(dtype):
-        obj = np.array(np.random.randn(20), dtype=dtype)
-
-        bins = np.array([6, 12, 20])
-        out = np.zeros((3, 4), dtype)
-        counts = np.zeros(len(out), dtype=np.int64)
-        labels = ensure_int64(np.repeat(np.arange(3), np.diff(np.r_[0, bins])))
-
-        func = getattr(groupby, f"group_ohlc_{dtype}")
-        func(out, counts, obj[:, None], labels)
-
-        def _ohlc(group):
-            if isna(group).all():
-                return np.repeat(np.nan, 4)
-            return [group[0], group.max(), group.min(), group[-1]]
-
-        expected = np.array([_ohlc(obj[:6]), _ohlc(obj[6:12]), _ohlc(obj[12:])])
-
-        tm.assert_almost_equal(out, expected)
-        tm.assert_numpy_array_equal(counts, np.array([6, 6, 8], dtype=np.int64))
-
-        obj[:6] = np.nan
-        func(out, counts, obj[:, None], labels)
-        expected[0] = np.nan
-        tm.assert_almost_equal(out, expected)
-
-    _check("float32")
-    _check("float64")
-
-
 class TestMoments:
     pass
diff --git a/pandas/tests/groupby/test_libgroupby.py b/pandas/tests/groupby/test_libgroupby.py
new file mode 100644
index 0000000000000..28b740355f351
--- /dev/null
+++ b/pandas/tests/groupby/test_libgroupby.py
@@ -0,0 +1,237 @@
+import numpy as np
+
+from pandas._libs import groupby as libgroupby
+from pandas._libs.groupby import (
+    group_cumprod_float64,
+    group_cumsum,
+    group_var_float32,
+    group_var_float64,
+)
+
+from pandas.core.dtypes.common import ensure_int64
+
+from pandas import isna
+import pandas._testing as tm
+
+
+class GroupVarTestMixin:
+    def test_group_var_generic_1d(self):
+        prng = np.random.RandomState(1234)
+
+        out = (np.nan * np.ones((5, 1))).astype(self.dtype)
+        counts = np.zeros(5, dtype="int64")
+        values = 10 * prng.rand(15, 1).astype(self.dtype)
+        labels = np.tile(np.arange(5), (3,)).astype("int64")
+
+        expected_out = (
+            np.squeeze(values).reshape((5, 3), order="F").std(axis=1, ddof=1) ** 2
+        )[:, np.newaxis]
+        expected_counts = counts + 3
+
+        self.algo(out, counts, values, labels)
+        assert np.allclose(out, expected_out, self.rtol)
+        tm.assert_numpy_array_equal(counts, expected_counts)
+
+    def test_group_var_generic_1d_flat_labels(self):
+        prng = np.random.RandomState(1234)
+
+        out = (np.nan * np.ones((1, 1))).astype(self.dtype)
+        counts = np.zeros(1, dtype="int64")
+        values = 10 * prng.rand(5, 1).astype(self.dtype)
+        labels = np.zeros(5, dtype="int64")
+
+        expected_out = np.array([[values.std(ddof=1) ** 2]])
+        expected_counts = counts + 5
+
+        self.algo(out, counts, values, labels)
+
+        assert np.allclose(out, expected_out, self.rtol)
+        tm.assert_numpy_array_equal(counts, expected_counts)
+
+    def test_group_var_generic_2d_all_finite(self):
+        prng = np.random.RandomState(1234)
+
+        out = (np.nan * np.ones((5, 2))).astype(self.dtype)
+        counts = np.zeros(5, dtype="int64")
+        values = 10 * prng.rand(10, 2).astype(self.dtype)
+        labels = np.tile(np.arange(5), (2,)).astype("int64")
+
+        expected_out = np.std(values.reshape(2, 5, 2), ddof=1, axis=0) ** 2
+        expected_counts = counts + 2
+
+        self.algo(out, counts, values, labels)
+        assert np.allclose(out, expected_out, self.rtol)
+        tm.assert_numpy_array_equal(counts, expected_counts)
+
+    def test_group_var_generic_2d_some_nan(self):
+        prng = np.random.RandomState(1234)
+
+        out = (np.nan * np.ones((5, 2))).astype(self.dtype)
+        counts = np.zeros(5, dtype="int64")
+        values = 10 * prng.rand(10, 2).astype(self.dtype)
+        values[:, 1] = np.nan
+        labels = np.tile(np.arange(5), (2,)).astype("int64")
+
+        expected_out = np.vstack(
+            [
+                values[:, 0].reshape(5, 2, order="F").std(ddof=1, axis=1) ** 2,
+                np.nan * np.ones(5),
+            ]
+        ).T.astype(self.dtype)
+        expected_counts = counts + 2
+
+        self.algo(out, counts, values, labels)
+        tm.assert_almost_equal(out, expected_out, rtol=0.5e-06)
+        tm.assert_numpy_array_equal(counts, expected_counts)
+
+    def test_group_var_constant(self):
+        # Regression test from GH 10448.
+
+        out = np.array([[np.nan]], dtype=self.dtype)
+        counts = np.array([0], dtype="int64")
+        values = 0.832845131556193 * np.ones((3, 1), dtype=self.dtype)
+        labels = np.zeros(3, dtype="int64")
+
+        self.algo(out, counts, values, labels)
+
+        assert counts[0] == 3
+        assert out[0, 0] >= 0
+        tm.assert_almost_equal(out[0, 0], 0.0)
+
+
+class TestGroupVarFloat64(GroupVarTestMixin):
+    __test__ = True
+
+    algo = staticmethod(group_var_float64)
+    dtype = np.float64
+    rtol = 1e-5
+
+    def test_group_var_large_inputs(self):
+        prng = np.random.RandomState(1234)
+
+        out = np.array([[np.nan]], dtype=self.dtype)
+        counts = np.array([0], dtype="int64")
+        values = (prng.rand(10 ** 6) + 10 ** 12).astype(self.dtype)
+        values.shape = (10 ** 6, 1)
+        labels = np.zeros(10 ** 6, dtype="int64")
+
+        self.algo(out, counts, values, labels)
+
+        assert counts[0] == 10 ** 6
+        tm.assert_almost_equal(out[0, 0], 1.0 / 12, rtol=0.5e-3)
+
+
+class TestGroupVarFloat32(GroupVarTestMixin):
+    __test__ = True
+
+    algo = staticmethod(group_var_float32)
+    dtype = np.float32
+    rtol = 1e-2
+
+
+def test_group_ohlc():
+    def _check(dtype):
+        obj = np.array(np.random.randn(20), dtype=dtype)
+
+        bins = np.array([6, 12, 20])
+        out = np.zeros((3, 4), dtype)
+        counts = np.zeros(len(out), dtype=np.int64)
+        labels = ensure_int64(np.repeat(np.arange(3), np.diff(np.r_[0, bins])))
+
+        func = getattr(libgroupby, f"group_ohlc_{dtype}")
+        func(out, counts, obj[:, None], labels)
+
+        def _ohlc(group):
+            if isna(group).all():
+                return np.repeat(np.nan, 4)
+            return [group[0], group.max(), group.min(), group[-1]]
+
+        expected = np.array([_ohlc(obj[:6]), _ohlc(obj[6:12]), _ohlc(obj[12:])])
+
+        tm.assert_almost_equal(out, expected)
+        tm.assert_numpy_array_equal(counts, np.array([6, 6, 8], dtype=np.int64))
+
+        obj[:6] = np.nan
+        func(out, counts, obj[:, None], labels)
+        expected[0] = np.nan
+        tm.assert_almost_equal(out, expected)
+
+    _check("float32")
+    _check("float64")
+
+
+def _check_cython_group_transform_cumulative(pd_op, np_op, dtype):
+    """
+    Check a group transform that executes a cumulative function.
+
+    Parameters
+    ----------
+    pd_op : callable
+        The pandas cumulative function.
+    np_op : callable
+        The analogous one in NumPy.
+    dtype : type
+        The specified dtype of the data.
+    """
+    is_datetimelike = False
+
+    data = np.array([[1], [2], [3], [4]], dtype=dtype)
+    ans = np.zeros_like(data)
+
+    labels = np.array([0, 0, 0, 0], dtype=np.int64)
+    ngroups = 1
+    pd_op(ans, data, labels, ngroups, is_datetimelike)
+
+    tm.assert_numpy_array_equal(np_op(data), ans[:, 0], check_dtype=False)
+
+
+def test_cython_group_transform_cumsum(any_real_dtype):
+    # see gh-4095
+    dtype = np.dtype(any_real_dtype).type
+    pd_op, np_op = group_cumsum, np.cumsum
+    _check_cython_group_transform_cumulative(pd_op, np_op, dtype)
+
+
+def test_cython_group_transform_cumprod():
+    # see gh-4095
+    dtype = np.float64
+    pd_op, np_op = group_cumprod_float64, np.cumproduct
+    _check_cython_group_transform_cumulative(pd_op, np_op, dtype)
+
+
+def test_cython_group_transform_algos():
+    # see gh-4095
+    is_datetimelike = False
+
+    # with nans
+    labels = np.array([0, 0, 0, 0, 0], dtype=np.int64)
+    ngroups = 1
+
+    data = np.array([[1], [2], [3], [np.nan], [4]], dtype="float64")
+    actual = np.zeros_like(data)
+    actual.fill(np.nan)
+    group_cumprod_float64(actual, data, labels, ngroups, is_datetimelike)
+    expected = np.array([1, 2, 6, np.nan, 24], dtype="float64")
+    tm.assert_numpy_array_equal(actual[:, 0], expected)
+
+    actual = np.zeros_like(data)
+    actual.fill(np.nan)
+    group_cumsum(actual, data, labels, ngroups, is_datetimelike)
+    expected = np.array([1, 3, 6, np.nan, 10], dtype="float64")
+    tm.assert_numpy_array_equal(actual[:, 0], expected)
+
+    # timedelta
+    is_datetimelike = True
+    data = np.array([np.timedelta64(1, "ns")] * 5, dtype="m8[ns]")[:, None]
+    actual = np.zeros_like(data, dtype="int64")
+    group_cumsum(actual, data.view("int64"), labels, ngroups, is_datetimelike)
+    expected = np.array(
+        [
+            np.timedelta64(1, "ns"),
+            np.timedelta64(2, "ns"),
+            np.timedelta64(3, "ns"),
+            np.timedelta64(4, "ns"),
+            np.timedelta64(5, "ns"),
+        ]
+    )
+    tm.assert_numpy_array_equal(actual[:, 0].view("m8[ns]"), expected)
diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
index 946e60d17e0bb..cd3c2771db8a4 100644
--- a/pandas/tests/groupby/transform/test_transform.py
+++ b/pandas/tests/groupby/transform/test_transform.py
@@ -4,8 +4,6 @@
 import numpy as np
 import pytest
 
-from pandas._libs.groupby import group_cumprod_float64, group_cumsum
-
 from pandas.core.dtypes.common import ensure_platform_int, is_timedelta64_dtype
 
 import pandas as pd
@@ -515,83 +513,6 @@ def f(group):
             tm.assert_frame_equal(res, result.loc[key])
 
 
-def _check_cython_group_transform_cumulative(pd_op, np_op, dtype):
-    """
-    Check a group transform that executes a cumulative function.
-
-    Parameters
-    ----------
-    pd_op : callable
-        The pandas cumulative function.
-    np_op : callable
-        The analogous one in NumPy.
-    dtype : type
-        The specified dtype of the data.
-    """
-    is_datetimelike = False
-
-    data = np.array([[1], [2], [3], [4]], dtype=dtype)
-    ans = np.zeros_like(data)
-
-    labels = np.array([0, 0, 0, 0], dtype=np.int64)
-    ngroups = 1
-    pd_op(ans, data, labels, ngroups, is_datetimelike)
-
-    tm.assert_numpy_array_equal(np_op(data), ans[:, 0], check_dtype=False)
-
-
-def test_cython_group_transform_cumsum(any_real_dtype):
-    # see gh-4095
-    dtype = np.dtype(any_real_dtype).type
-    pd_op, np_op = group_cumsum, np.cumsum
-    _check_cython_group_transform_cumulative(pd_op, np_op, dtype)
-
-
-def test_cython_group_transform_cumprod():
-    # see gh-4095
-    dtype = np.float64
-    pd_op, np_op = group_cumprod_float64, np.cumproduct
-    _check_cython_group_transform_cumulative(pd_op, np_op, dtype)
-
-
-def test_cython_group_transform_algos():
-    # see gh-4095
-    is_datetimelike = False
-
-    # with nans
-    labels = np.array([0, 0, 0, 0, 0], dtype=np.int64)
-    ngroups = 1
-
-    data = np.array([[1], [2], [3], [np.nan], [4]], dtype="float64")
-    actual = np.zeros_like(data)
-    actual.fill(np.nan)
-    group_cumprod_float64(actual, data, labels, ngroups, is_datetimelike)
-    expected = np.array([1, 2, 6, np.nan, 24], dtype="float64")
-    tm.assert_numpy_array_equal(actual[:, 0], expected)
-
-    actual = np.zeros_like(data)
-    actual.fill(np.nan)
-    group_cumsum(actual, data, labels, ngroups, is_datetimelike)
-    expected = np.array([1, 3, 6, np.nan, 10], dtype="float64")
-    tm.assert_numpy_array_equal(actual[:, 0], expected)
-
-    # timedelta
-    is_datetimelike = True
-    data = np.array([np.timedelta64(1, "ns")] * 5, dtype="m8[ns]")[:, None]
-    actual = np.zeros_like(data, dtype="int64")
-    group_cumsum(actual, data.view("int64"), labels, ngroups, is_datetimelike)
-    expected = np.array(
-        [
-            np.timedelta64(1, "ns"),
-            np.timedelta64(2, "ns"),
-            np.timedelta64(3, "ns"),
-            np.timedelta64(4, "ns"),
-            np.timedelta64(5, "ns"),
-        ]
-    )
-    tm.assert_numpy_array_equal(actual[:, 0].view("m8[ns]"), expected)
-
-
 @pytest.mark.parametrize(
     "op, args, targop",
     [
diff --git a/pandas/tests/libs/__init__.py b/pandas/tests/libs/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/pandas/tests/test_join.py b/pandas/tests/libs/test_join.py
similarity index 68%
rename from pandas/tests/test_join.py
rename to pandas/tests/libs/test_join.py
index 03198ec3289dd..95d6dcbaf3baf 100644
--- a/pandas/tests/test_join.py
+++ b/pandas/tests/libs/test_join.py
@@ -2,8 +2,8 @@
 import pytest
 
 from pandas._libs import join as libjoin
+from pandas._libs.join import inner_join, left_outer_join
 
-from pandas import Categorical, DataFrame, Index, merge
 import pandas._testing as tm
 
 
@@ -42,6 +42,98 @@ def test_outer_join_indexer(self, dtype):
         exp = np.array([-1, -1, -1], dtype=np.int64)
         tm.assert_numpy_array_equal(rindexer, exp)
 
+    def test_cython_left_outer_join(self):
+        left = np.array([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.int64)
+        right = np.array([1, 1, 0, 4, 2, 2, 1], dtype=np.int64)
+        max_group = 5
+
+        ls, rs = left_outer_join(left, right, max_group)
+
+        exp_ls = left.argsort(kind="mergesort")
+        exp_rs = right.argsort(kind="mergesort")
+
+        exp_li = np.array([0, 1, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10])
+        exp_ri = np.array(
+            [0, 0, 0, 1, 2, 3, 1, 2, 3, 1, 2, 3, 4, 5, 4, 5, 4, 5, -1, -1]
+        )
+
+        exp_ls = exp_ls.take(exp_li)
+        exp_ls[exp_li == -1] = -1
+
+        exp_rs = exp_rs.take(exp_ri)
+        exp_rs[exp_ri == -1] = -1
+
+        tm.assert_numpy_array_equal(ls, exp_ls, check_dtype=False)
+        tm.assert_numpy_array_equal(rs, exp_rs, check_dtype=False)
+
+    def test_cython_right_outer_join(self):
+        left = np.array([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.int64)
+        right = np.array([1, 1, 0, 4, 2, 2, 1], dtype=np.int64)
+        max_group = 5
+
+        rs, ls = left_outer_join(right, left, max_group)
+
+        exp_ls = left.argsort(kind="mergesort")
+        exp_rs = right.argsort(kind="mergesort")
+
+        #            0        1        1        1
+        exp_li = np.array(
+            [
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                3,
+                4,
+                5,
+                3,
+                4,
+                5,
+                #            2        2        4
+                6,
+                7,
+                8,
+                6,
+                7,
+                8,
+                -1,
+            ]
+        )
+        exp_ri = np.array([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6])
+
+        exp_ls = exp_ls.take(exp_li)
+        exp_ls[exp_li == -1] = -1
+
+        exp_rs = exp_rs.take(exp_ri)
+        exp_rs[exp_ri == -1] = -1
+
+        tm.assert_numpy_array_equal(ls, exp_ls, check_dtype=False)
+        tm.assert_numpy_array_equal(rs, exp_rs, check_dtype=False)
+
+    def test_cython_inner_join(self):
+        left = np.array([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.int64)
+        right = np.array([1, 1, 0, 4, 2, 2, 1, 4], dtype=np.int64)
+        max_group = 5
+
+        ls, rs = inner_join(left, right, max_group)
+
+        exp_ls = left.argsort(kind="mergesort")
+        exp_rs = right.argsort(kind="mergesort")
+
+        exp_li = np.array([0, 1, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 8, 8])
+        exp_ri = np.array([0, 0, 0, 1, 2, 3, 1, 2, 3, 1, 2, 3, 4, 5, 4, 5, 4, 5])
+
+        exp_ls = exp_ls.take(exp_li)
+        exp_ls[exp_li == -1] = -1
+
+        exp_rs = exp_rs.take(exp_ri)
+        exp_rs[exp_ri == -1] = -1
+
+        tm.assert_numpy_array_equal(ls, exp_ls, check_dtype=False)
+        tm.assert_numpy_array_equal(rs, exp_rs, check_dtype=False)
+
 
 def test_left_join_indexer_unique():
     a = np.array([1, 2, 3, 4, 5], dtype=np.int64)
@@ -243,10 +335,10 @@ def test_left_join_indexer():
 
 
 def test_left_join_indexer2():
-    idx = Index([1, 1, 2, 5])
-    idx2 = Index([1, 2, 5, 7, 9])
+    idx = np.array([1, 1, 2, 5], dtype=np.int64)
+    idx2 = np.array([1, 2, 5, 7, 9], dtype=np.int64)
 
-    res, lidx, ridx = libjoin.left_join_indexer(idx2.values, idx.values)
+    res, lidx, ridx = libjoin.left_join_indexer(idx2, idx)
 
     exp_res = np.array([1, 1, 2, 5, 7, 9], dtype=np.int64)
     tm.assert_almost_equal(res, exp_res)
@@ -259,10 +351,10 @@ def test_left_join_indexer2():
 
 
 def test_outer_join_indexer2():
-    idx = Index([1, 1, 2, 5])
-    idx2 = Index([1, 2, 5, 7, 9])
+    idx = np.array([1, 1, 2, 5], dtype=np.int64)
+    idx2 = np.array([1, 2, 5, 7, 9], dtype=np.int64)
 
-    res, lidx, ridx = libjoin.outer_join_indexer(idx2.values, idx.values)
+    res, lidx, ridx = libjoin.outer_join_indexer(idx2, idx)
 
     exp_res = np.array([1, 1, 2, 5, 7, 9], dtype=np.int64)
     tm.assert_almost_equal(res, exp_res)
@@ -275,10 +367,10 @@ def test_outer_join_indexer2():
 
 
 def test_inner_join_indexer2():
-    idx = Index([1, 1, 2, 5])
-    idx2 = Index([1, 2, 5, 7, 9])
+    idx = np.array([1, 1, 2, 5], dtype=np.int64)
+    idx2 = np.array([1, 2, 5, 7, 9], dtype=np.int64)
 
-    res, lidx, ridx = libjoin.inner_join_indexer(idx2.values, idx.values)
+    res, lidx, ridx = libjoin.inner_join_indexer(idx2, idx)
 
     exp_res = np.array([1, 1, 2, 5], dtype=np.int64)
     tm.assert_almost_equal(res, exp_res)
@@ -288,59 +380,3 @@ def test_inner_join_indexer2():
 
     exp_ridx = np.array([0, 1, 2, 3], dtype=np.int64)
     tm.assert_almost_equal(ridx, exp_ridx)
-
-
-def test_merge_join_categorical_multiindex():
-    # From issue 16627
-    a = {
-        "Cat1": Categorical(["a", "b", "a", "c", "a", "b"], ["a", "b", "c"]),
-        "Int1": [0, 1, 0, 1, 0, 0],
-    }
-    a = DataFrame(a)
-
-    b = {
-        "Cat": Categorical(["a", "b", "c", "a", "b", "c"], ["a", "b", "c"]),
-        "Int": [0, 0, 0, 1, 1, 1],
-        "Factor": [1.1, 1.2, 1.3, 1.4, 1.5, 1.6],
-    }
-    b = DataFrame(b).set_index(["Cat", "Int"])["Factor"]
-
-    expected = merge(
-        a,
-        b.reset_index(),
-        left_on=["Cat1", "Int1"],
-        right_on=["Cat", "Int"],
-        how="left",
-    )
-    result = a.join(b, on=["Cat1", "Int1"])
-    expected = expected.drop(["Cat", "Int"], axis=1)
-    tm.assert_frame_equal(expected, result)
-
-    # Same test, but with ordered categorical
-    a = {
-        "Cat1": Categorical(
-            ["a", "b", "a", "c", "a", "b"], ["b", "a", "c"], ordered=True
-        ),
-        "Int1": [0, 1, 0, 1, 0, 0],
-    }
-    a = DataFrame(a)
-
-    b = {
-        "Cat": Categorical(
-            ["a", "b", "c", "a", "b", "c"], ["b", "a", "c"], ordered=True
-        ),
-        "Int": [0, 0, 0, 1, 1, 1],
-        "Factor": [1.1, 1.2, 1.3, 1.4, 1.5, 1.6],
-    }
-    b = DataFrame(b).set_index(["Cat", "Int"])["Factor"]
-
-    expected = merge(
-        a,
-        b.reset_index(),
-        left_on=["Cat1", "Int1"],
-        right_on=["Cat", "Int"],
-        how="left",
-    )
-    result = a.join(b, on=["Cat1", "Int1"])
-    expected = expected.drop(["Cat", "Int"], axis=1)
-    tm.assert_frame_equal(expected, result)
diff --git a/pandas/tests/test_lib.py b/pandas/tests/libs/test_lib.py
similarity index 100%
rename from pandas/tests/test_lib.py
rename to pandas/tests/libs/test_lib.py
diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py
index 8108cd14b872a..af1e95313f365 100644
--- a/pandas/tests/reshape/merge/test_join.py
+++ b/pandas/tests/reshape/merge/test_join.py
@@ -2,8 +2,6 @@
 from numpy.random import randn
 import pytest
 
-from pandas._libs.join import inner_join, left_outer_join
-
 import pandas as pd
 from pandas import DataFrame, Index, MultiIndex, Series, concat, merge
 import pandas._testing as tm
@@ -43,96 +41,6 @@ def setup_method(self, method):
             {"MergedA": data["A"], "MergedD": data["D"]}, index=data["C"]
         )
 
-    def test_cython_left_outer_join(self):
-        left = a_([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.int64)
-        right = a_([1, 1, 0, 4, 2, 2, 1], dtype=np.int64)
-        max_group = 5
-
-        ls, rs = left_outer_join(left, right, max_group)
-
-        exp_ls = left.argsort(kind="mergesort")
-        exp_rs = right.argsort(kind="mergesort")
-
-        exp_li = a_([0, 1, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 8, 8, 9, 10])
-        exp_ri = a_([0, 0, 0, 1, 2, 3, 1, 2, 3, 1, 2, 3, 4, 5, 4, 5, 4, 5, -1, -1])
-
-        exp_ls = exp_ls.take(exp_li)
-        exp_ls[exp_li == -1] = -1
-
-        exp_rs = exp_rs.take(exp_ri)
-        exp_rs[exp_ri == -1] = -1
-
-        tm.assert_numpy_array_equal(ls, exp_ls, check_dtype=False)
-        tm.assert_numpy_array_equal(rs, exp_rs, check_dtype=False)
-
-    def test_cython_right_outer_join(self):
-        left = a_([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.int64)
-        right = a_([1, 1, 0, 4, 2, 2, 1], dtype=np.int64)
-        max_group = 5
-
-        rs, ls = left_outer_join(right, left, max_group)
-
-        exp_ls = left.argsort(kind="mergesort")
-        exp_rs = right.argsort(kind="mergesort")
-
-        #            0        1        1        1
-        exp_li = a_(
-            [
-                0,
-                1,
-                2,
-                3,
-                4,
-                5,
-                3,
-                4,
-                5,
-                3,
-                4,
-                5,
-                #            2        2        4
-                6,
-                7,
-                8,
-                6,
-                7,
-                8,
-                -1,
-            ]
-        )
-        exp_ri = a_([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6])
-
-        exp_ls = exp_ls.take(exp_li)
-        exp_ls[exp_li == -1] = -1
-
-        exp_rs = exp_rs.take(exp_ri)
-        exp_rs[exp_ri == -1] = -1
-
-        tm.assert_numpy_array_equal(ls, exp_ls, check_dtype=False)
-        tm.assert_numpy_array_equal(rs, exp_rs, check_dtype=False)
-
-    def test_cython_inner_join(self):
-        left = a_([0, 1, 2, 1, 2, 0, 0, 1, 2, 3, 3], dtype=np.int64)
-        right = a_([1, 1, 0, 4, 2, 2, 1, 4], dtype=np.int64)
-        max_group = 5
-
-        ls, rs = inner_join(left, right, max_group)
-
-        exp_ls = left.argsort(kind="mergesort")
-        exp_rs = right.argsort(kind="mergesort")
-
-        exp_li = a_([0, 1, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 8, 8])
-        exp_ri = a_([0, 0, 0, 1, 2, 3, 1, 2, 3, 1, 2, 3, 4, 5, 4, 5, 4, 5])
-
-        exp_ls = exp_ls.take(exp_li)
-        exp_ls[exp_li == -1] = -1
-
-        exp_rs = exp_rs.take(exp_ri)
-        exp_rs[exp_ri == -1] = -1
-
-        tm.assert_numpy_array_equal(ls, exp_ls, check_dtype=False)
-        tm.assert_numpy_array_equal(rs, exp_rs, check_dtype=False)
-
     def test_left_outer_join(self):
         joined_key2 = merge(self.df, self.df2, on="key2")
         _check_join(self.df, self.df2, joined_key2, ["key2"], how="left")
diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py
index 7d701d26185f1..c4c9b0e516192 100644
--- a/pandas/tests/reshape/merge/test_merge.py
+++ b/pandas/tests/reshape/merge/test_merge.py
@@ -2227,3 +2227,59 @@ def test_categorical_non_unique_monotonic(n_categories):
         index=left_index,
     )
     tm.assert_frame_equal(expected, result)
+
+
+def test_merge_join_categorical_multiindex():
+    # From issue 16627
+    a = {
+        "Cat1": Categorical(["a", "b", "a", "c", "a", "b"], ["a", "b", "c"]),
+        "Int1": [0, 1, 0, 1, 0, 0],
+    }
+    a = DataFrame(a)
+
+    b = {
+        "Cat": Categorical(["a", "b", "c", "a", "b", "c"], ["a", "b", "c"]),
+        "Int": [0, 0, 0, 1, 1, 1],
+        "Factor": [1.1, 1.2, 1.3, 1.4, 1.5, 1.6],
+    }
+    b = DataFrame(b).set_index(["Cat", "Int"])["Factor"]
+
+    expected = merge(
+        a,
+        b.reset_index(),
+        left_on=["Cat1", "Int1"],
+        right_on=["Cat", "Int"],
+        how="left",
+    )
+    expected = expected.drop(["Cat", "Int"], axis=1)
+    result = a.join(b, on=["Cat1", "Int1"])
+    tm.assert_frame_equal(expected, result)
+
+    # Same test, but with ordered categorical
+    a = {
+        "Cat1": Categorical(
+            ["a", "b", "a", "c", "a", "b"], ["b", "a", "c"], ordered=True
+        ),
+        "Int1": [0, 1, 0, 1, 0, 0],
+    }
+    a = DataFrame(a)
+
+    b = {
+        "Cat": Categorical(
+            ["a", "b", "c", "a", "b", "c"], ["b", "a", "c"], ordered=True
+        ),
+        "Int": [0, 0, 0, 1, 1, 1],
+        "Factor": [1.1, 1.2, 1.3, 1.4, 1.5, 1.6],
+    }
+    b = DataFrame(b).set_index(["Cat", "Int"])["Factor"]
+
+    expected = merge(
+        a,
+        b.reset_index(),
+        left_on=["Cat1", "Int1"],
+        right_on=["Cat", "Int"],
+        how="left",
+    )
+    expected = expected.drop(["Cat", "Int"], axis=1)
+    result = a.join(b, on=["Cat1", "Int1"])
+    tm.assert_frame_equal(expected, result)
diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py
index 3a1279c481a1d..ee8e2385fe698 100644
--- a/pandas/tests/test_algos.py
+++ b/pandas/tests/test_algos.py
@@ -3,11 +3,9 @@
 import struct
 
 import numpy as np
-from numpy.random import RandomState
 import pytest
 
 from pandas._libs import algos as libalgos, hashtable as ht
-from pandas._libs.groupby import group_var_float32, group_var_float64
 from pandas.compat import IS64
 from pandas.compat.numpy import np_array_datetime64_compat
 import pandas.util._test_decorators as td
@@ -1409,122 +1407,6 @@ def test_unique_tuples(self, arr, unique):
         tm.assert_numpy_array_equal(result, expected)
 
 
-class GroupVarTestMixin:
-    def test_group_var_generic_1d(self):
-        prng = RandomState(1234)
-
-        out = (np.nan * np.ones((5, 1))).astype(self.dtype)
-        counts = np.zeros(5, dtype="int64")
-        values = 10 * prng.rand(15, 1).astype(self.dtype)
-        labels = np.tile(np.arange(5), (3,)).astype("int64")
-
-        expected_out = (
-            np.squeeze(values).reshape((5, 3), order="F").std(axis=1, ddof=1) ** 2
-        )[:, np.newaxis]
-        expected_counts = counts + 3
-
-        self.algo(out, counts, values, labels)
-        assert np.allclose(out, expected_out, self.rtol)
-        tm.assert_numpy_array_equal(counts, expected_counts)
-
-    def test_group_var_generic_1d_flat_labels(self):
-        prng = RandomState(1234)
-
-        out = (np.nan * np.ones((1, 1))).astype(self.dtype)
-        counts = np.zeros(1, dtype="int64")
-        values = 10 * prng.rand(5, 1).astype(self.dtype)
-        labels = np.zeros(5, dtype="int64")
-
-        expected_out = np.array([[values.std(ddof=1) ** 2]])
-        expected_counts = counts + 5
-
-        self.algo(out, counts, values, labels)
-
-        assert np.allclose(out, expected_out, self.rtol)
-        tm.assert_numpy_array_equal(counts, expected_counts)
-
-    def test_group_var_generic_2d_all_finite(self):
-        prng = RandomState(1234)
-
-        out = (np.nan * np.ones((5, 2))).astype(self.dtype)
-        counts = np.zeros(5, dtype="int64")
-        values = 10 * prng.rand(10, 2).astype(self.dtype)
-        labels = np.tile(np.arange(5), (2,)).astype("int64")
-
-        expected_out = np.std(values.reshape(2, 5, 2), ddof=1, axis=0) ** 2
-        expected_counts = counts + 2
-
-        self.algo(out, counts, values, labels)
-        assert np.allclose(out, expected_out, self.rtol)
-        tm.assert_numpy_array_equal(counts, expected_counts)
-
-    def test_group_var_generic_2d_some_nan(self):
-        prng = RandomState(1234)
-
-        out = (np.nan * np.ones((5, 2))).astype(self.dtype)
-        counts = np.zeros(5, dtype="int64")
-        values = 10 * prng.rand(10, 2).astype(self.dtype)
-        values[:, 1] = np.nan
-        labels = np.tile(np.arange(5), (2,)).astype("int64")
-
-        expected_out = np.vstack(
-            [
-                values[:, 0].reshape(5, 2, order="F").std(ddof=1, axis=1) ** 2,
-                np.nan * np.ones(5),
-            ]
-        ).T.astype(self.dtype)
-        expected_counts = counts + 2
-
-        self.algo(out, counts, values, labels)
-        tm.assert_almost_equal(out, expected_out, rtol=0.5e-06)
-        tm.assert_numpy_array_equal(counts, expected_counts)
-
-    def test_group_var_constant(self):
-        # Regression test from GH 10448.
-
-        out = np.array([[np.nan]], dtype=self.dtype)
-        counts = np.array([0], dtype="int64")
-        values = 0.832845131556193 * np.ones((3, 1), dtype=self.dtype)
-        labels = np.zeros(3, dtype="int64")
-
-        self.algo(out, counts, values, labels)
-
-        assert counts[0] == 3
-        assert out[0, 0] >= 0
-        tm.assert_almost_equal(out[0, 0], 0.0)
-
-
-class TestGroupVarFloat64(GroupVarTestMixin):
-    __test__ = True
-
-    algo = staticmethod(group_var_float64)
-    dtype = np.float64
-    rtol = 1e-5
-
-    def test_group_var_large_inputs(self):
-
-        prng = RandomState(1234)
-
-        out = np.array([[np.nan]], dtype=self.dtype)
-        counts = np.array([0], dtype="int64")
-        values = (prng.rand(10 ** 6) + 10 ** 12).astype(self.dtype)
-        values.shape = (10 ** 6, 1)
-        labels = np.zeros(10 ** 6, dtype="int64")
-
-        self.algo(out, counts, values, labels)
-
-        assert counts[0] == 10 ** 6
-        tm.assert_almost_equal(out[0, 0], 1.0 / 12, rtol=0.5e-3)
-
-
-class TestGroupVarFloat32(GroupVarTestMixin):
-    __test__ = True
-
-    algo = staticmethod(group_var_float32)
-    dtype = np.float32
-    rtol = 1e-2
-
-
 class TestHashTable:
     def test_string_hashtable_set_item_signature(self):
         # GH#30419 fix typing in StringHashTable.set_item to prevent segfault