diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
index 14e9fda1b910c..42cd34c94b069 100644
--- a/doc/source/whatsnew/v1.5.0.rst
+++ b/doc/source/whatsnew/v1.5.0.rst
@@ -83,32 +83,44 @@ did not have the same index as the input.
 
 .. code-block:: ipython
 
-    In [3]: df.groupby('a', dropna=True).transform(lambda x: x.sum())
+    In [3]: # Value in the last row should be np.nan
+            df.groupby('a', dropna=True).transform('sum')
     Out[3]:
        b
     0  5
     1  5
+    2  5
 
-    In [3]: df.groupby('a', dropna=True).transform(lambda x: x)
+    In [3]: # Should have one additional row with the value np.nan
+            df.groupby('a', dropna=True).transform(lambda x: x.sum())
     Out[3]:
        b
-    0  2
-    1  3
+    0  5
+    1  5
+
+    In [3]: # The value in the last row is np.nan interpreted as an integer
+            df.groupby('a', dropna=True).transform('ffill')
+    Out[3]:
+                         b
+    0                    2
+    1                    3
+    2 -9223372036854775808
 
-    In [3]: df.groupby('a', dropna=True).transform('sum')
+    In [3]: # Should have one additional row with the value np.nan
+            df.groupby('a', dropna=True).transform(lambda x: x)
     Out[3]:
        b
-    0  5
-    1  5
-    2  5
+    0  2
+    1  3
 
 *New behavior*:
 
 .. ipython:: python
 
+    df.groupby('a', dropna=True).transform('sum')
     df.groupby('a', dropna=True).transform(lambda x: x.sum())
+    df.groupby('a', dropna=True).transform('ffill')
     df.groupby('a', dropna=True).transform(lambda x: x)
-    df.groupby('a', dropna=True).transform('sum')
 
 .. _whatsnew_150.notable_bug_fixes.visualization:
 
diff --git a/pandas/core/groupby/base.py b/pandas/core/groupby/base.py
index 0c8474c9badc7..ec9a2e4a4b5c0 100644
--- a/pandas/core/groupby/base.py
+++ b/pandas/core/groupby/base.py
@@ -70,7 +70,6 @@ class OutputKey:
         "mean",
         "median",
         "min",
-        "ngroup",
         "nth",
         "nunique",
         "prod",
@@ -113,6 +112,7 @@ def maybe_normalize_deprecated_kernels(kernel):
         "diff",
         "ffill",
         "fillna",
+        "ngroup",
         "pad",
         "pct_change",
         "rank",
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 9f80c8f91ac6f..4a617d1b63639 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -62,6 +62,7 @@ class providing the base-class of operations.
 )
 from pandas.util._exceptions import find_stack_level
 
+from pandas.core.dtypes.cast import ensure_dtype_can_hold_na
 from pandas.core.dtypes.common import (
     is_bool_dtype,
     is_datetime64_dtype,
@@ -950,7 +951,13 @@ def curried(x):
             if name in base.plotting_methods:
                 return self.apply(curried)
 
-            return self._python_apply_general(curried, self._obj_with_exclusions)
+            result = self._python_apply_general(curried, self._obj_with_exclusions)
+
+            if self.grouper.has_dropped_na and name in base.transformation_kernels:
+                # result will have dropped rows due to nans, fill with null
+                # and ensure index is ordered same as the input
+                result = self._set_result_index_ordered(result)
+            return result
 
         wrapper.__name__ = name
         return wrapper
@@ -2608,7 +2615,11 @@ def blk_func(values: ArrayLike) -> ArrayLike:
                 #  then there will be no -1s in indexer, so we can use
                 #  the original dtype (no need to ensure_dtype_can_hold_na)
                 if isinstance(values, np.ndarray):
-                    out = np.empty(values.shape, dtype=values.dtype)
+                    dtype = values.dtype
+                    if self.grouper.has_dropped_na:
+                        # dropped null groups give rise to nan in the result
+                        dtype = ensure_dtype_can_hold_na(values.dtype)
+                    out = np.empty(values.shape, dtype=dtype)
                 else:
                     out = type(values)._empty(values.shape, dtype=values.dtype)
 
@@ -3114,9 +3125,16 @@ def ngroup(self, ascending: bool = True):
         """
         with self._group_selection_context():
             index = self._selected_obj.index
-            result = self._obj_1d_constructor(
-                self.grouper.group_info[0], index, dtype=np.int64
-            )
+            comp_ids = self.grouper.group_info[0]
+
+            dtype: type
+            if self.grouper.has_dropped_na:
+                comp_ids = np.where(comp_ids == -1, np.nan, comp_ids)
+                dtype = np.float64
+            else:
+                dtype = np.int64
+
+            result = self._obj_1d_constructor(comp_ids, index, dtype=dtype)
             if not ascending:
                 result = self.ngroups - 1 - result
             return result
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 15bc262997075..5f15e11c4740c 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -98,15 +98,25 @@
 class WrappedCythonOp:
     """
     Dispatch logic for functions defined in _libs.groupby
+
+    Parameters
+    ----------
+    kind: str
+        Whether the operation is an aggregate or transform.
+    how: str
+        Operation name, e.g. "mean".
+    has_dropped_na: bool
+        True precisely when dropna=True and the grouper contains a null value.
     """
 
     # Functions for which we do _not_ attempt to cast the cython result
     #  back to the original dtype.
     cast_blocklist = frozenset(["rank", "count", "size", "idxmin", "idxmax"])
 
-    def __init__(self, kind: str, how: str) -> None:
+    def __init__(self, kind: str, how: str, has_dropped_na: bool) -> None:
         self.kind = kind
         self.how = how
+        self.has_dropped_na = has_dropped_na
 
     _CYTHON_FUNCTIONS = {
         "aggregate": {
@@ -194,7 +204,9 @@ def _get_cython_vals(self, values: np.ndarray) -> np.ndarray:
             values = ensure_float64(values)
 
         elif values.dtype.kind in ["i", "u"]:
-            if how in ["add", "var", "prod", "mean", "ohlc"]:
+            if how in ["add", "var", "prod", "mean", "ohlc"] or (
+                self.kind == "transform" and self.has_dropped_na
+            ):
                 # result may still include NaN, so we have to cast
                 values = ensure_float64(values)
 
@@ -582,6 +594,10 @@ def _call_cython_op(
 
         result = result.T
 
+        if self.how == "rank" and self.has_dropped_na:
+            # TODO: Wouldn't need this if group_rank supported mask
+            result = np.where(comp_ids < 0, np.nan, result)
+
         if self.how not in self.cast_blocklist:
             # e.g. if we are int64 and need to restore to datetime64/timedelta64
             # "rank" is the only member of cast_blocklist we get here
@@ -959,7 +975,7 @@ def _cython_operation(
         """
         assert kind in ["transform", "aggregate"]
 
-        cy_op = WrappedCythonOp(kind=kind, how=how)
+        cy_op = WrappedCythonOp(kind=kind, how=how, has_dropped_na=self.has_dropped_na)
 
         ids, _, _ = self.group_info
         ngroups = self.ngroups
diff --git a/pandas/tests/apply/test_frame_transform.py b/pandas/tests/apply/test_frame_transform.py
index 3384f9d46029a..2d4deff8ebc4a 100644
--- a/pandas/tests/apply/test_frame_transform.py
+++ b/pandas/tests/apply/test_frame_transform.py
@@ -139,6 +139,10 @@ def test_transform_bad_dtype(op, frame_or_series, request):
                 raises=ValueError, reason="GH 40418: rank does not raise a TypeError"
             )
         )
+    elif op == "ngroup":
+        request.node.add_marker(
+            pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame")
+        )
 
     obj = DataFrame({"A": 3 * [object]})  # DataFrame that will fail on most transforms
     obj = tm.get_obj(obj, frame_or_series)
@@ -157,9 +161,14 @@ def test_transform_bad_dtype(op, frame_or_series, request):
 
 
 @pytest.mark.parametrize("op", frame_kernels_raise)
-def test_transform_partial_failure_typeerror(op):
+def test_transform_partial_failure_typeerror(request, op):
     # GH 35964
 
+    if op == "ngroup":
+        request.node.add_marker(
+            pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame")
+        )
+
     # Using object makes most transform kernels fail
     df = DataFrame({"A": 3 * [object], "B": [1, 2, 3]})
 
diff --git a/pandas/tests/apply/test_str.py b/pandas/tests/apply/test_str.py
index 554fd4174c7a4..2af063dee8b55 100644
--- a/pandas/tests/apply/test_str.py
+++ b/pandas/tests/apply/test_str.py
@@ -243,8 +243,12 @@ def test_agg_cython_table_transform_frame(df, func, expected, axis):
 
 
 @pytest.mark.parametrize("op", series_transform_kernels)
-def test_transform_groupby_kernel_series(string_series, op):
+def test_transform_groupby_kernel_series(request, string_series, op):
     # GH 35964
+    if op == "ngroup":
+        request.node.add_marker(
+            pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame")
+        )
     # TODO(2.0) Remove after pad/backfill deprecation enforced
     op = maybe_normalize_deprecated_kernels(op)
     args = [0.0] if op == "fillna" else []
@@ -255,9 +259,15 @@ def test_transform_groupby_kernel_series(string_series, op):
 
 
 @pytest.mark.parametrize("op", frame_transform_kernels)
-def test_transform_groupby_kernel_frame(axis, float_frame, op):
+def test_transform_groupby_kernel_frame(request, axis, float_frame, op):
     # TODO(2.0) Remove after pad/backfill deprecation enforced
     op = maybe_normalize_deprecated_kernels(op)
+
+    if op == "ngroup":
+        request.node.add_marker(
+            pytest.mark.xfail(raises=ValueError, reason="ngroup not valid for NDFrame")
+        )
+
     # GH 35964
 
     args = [0.0] if op == "fillna" else []
diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py
index 83b8d5c29bbf0..7830c229ece2f 100644
--- a/pandas/tests/groupby/test_rank.py
+++ b/pandas/tests/groupby/test_rank.py
@@ -658,6 +658,8 @@ def test_non_unique_index():
     )
     result = df.groupby([df.index, "A"]).value.rank(ascending=True, pct=True)
     expected = Series(
-        [1.0] * 4, index=[pd.Timestamp("20170101", tz="US/Eastern")] * 4, name="value"
+        [1.0, 1.0, 1.0, np.nan],
+        index=[pd.Timestamp("20170101", tz="US/Eastern")] * 4,
+        name="value",
     )
     tm.assert_series_equal(result, expected)
diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
index 0f450adf67b23..3042e38d9014c 100644
--- a/pandas/tests/groupby/transform/test_transform.py
+++ b/pandas/tests/groupby/transform/test_transform.py
@@ -168,6 +168,10 @@ def test_transform_axis_1(request, transformation_func):
     # TODO(2.0) Remove after pad/backfill deprecation enforced
     transformation_func = maybe_normalize_deprecated_kernels(transformation_func)
 
+    if transformation_func == "ngroup":
+        msg = "ngroup fails with axis=1: #45986"
+        request.node.add_marker(pytest.mark.xfail(reason=msg))
+
     warn = None
     if transformation_func == "tshift":
         warn = FutureWarning
@@ -383,6 +387,15 @@ def test_transform_transformation_func(request, transformation_func):
     elif transformation_func == "fillna":
         test_op = lambda x: x.transform("fillna", value=0)
         mock_op = lambda x: x.fillna(value=0)
+    elif transformation_func == "ngroup":
+        test_op = lambda x: x.transform("ngroup")
+        counter = -1
+
+        def mock_op(x):
+            nonlocal counter
+            counter += 1
+            return Series(counter, index=x.index)
+
     elif transformation_func == "tshift":
         msg = (
             "Current behavior of groupby.tshift is inconsistent with other "
@@ -394,10 +407,14 @@ def test_transform_transformation_func(request, transformation_func):
         mock_op = lambda x: getattr(x, transformation_func)()
 
     result = test_op(df.groupby("A"))
-    groups = [df[["B"]].iloc[:4], df[["B"]].iloc[4:6], df[["B"]].iloc[6:]]
-    expected = concat([mock_op(g) for g in groups])
+    # pass the group in same order as iterating `for ... in df.groupby(...)`
+    # but reorder to match df's index since this is a transform
+    groups = [df[["B"]].iloc[4:6], df[["B"]].iloc[6:], df[["B"]].iloc[:4]]
+    expected = concat([mock_op(g) for g in groups]).sort_index()
+    # sort_index does not preserve the freq
+    expected = expected.set_axis(df.index)
 
-    if transformation_func == "cumcount":
+    if transformation_func in ("cumcount", "ngroup"):
         tm.assert_series_equal(result, expected)
     else:
         tm.assert_frame_equal(result, expected)
@@ -1122,10 +1139,6 @@ def test_transform_agg_by_name(request, reduction_func, obj):
     func = reduction_func
     g = obj.groupby(np.repeat([0, 1], 3))
 
-    if func == "ngroup":  # GH#27468
-        request.node.add_marker(
-            pytest.mark.xfail(reason="TODO: g.transform('ngroup') doesn't work")
-        )
     if func == "corrwith" and isinstance(obj, Series):  # GH#32293
         request.node.add_marker(
             pytest.mark.xfail(reason="TODO: implement SeriesGroupBy.corrwith")
@@ -1137,8 +1150,8 @@ def test_transform_agg_by_name(request, reduction_func, obj):
     # this is the *definition* of a transformation
     tm.assert_index_equal(result.index, obj.index)
 
-    if func != "size" and obj.ndim == 2:
-        # size returns a Series, unlike other transforms
+    if func not in ("ngroup", "size") and obj.ndim == 2:
+        # size/ngroup return a Series, unlike other transforms
         tm.assert_index_equal(result.columns, obj.columns)
 
     # verify that values were broadcasted across each group
@@ -1312,7 +1325,7 @@ def test_null_group_lambda_self(sort, dropna):
 
 def test_null_group_str_reducer(request, dropna, reduction_func):
     # GH 17093
-    if reduction_func in ("corrwith", "ngroup"):
+    if reduction_func == "corrwith":
         msg = "incorrectly raises"
         request.node.add_marker(pytest.mark.xfail(reason=msg))
     index = [1, 2, 3, 4]  # test transform preserves non-standard index
@@ -1358,31 +1371,11 @@ def test_null_group_str_reducer(request, dropna, reduction_func):
 
 
 @pytest.mark.filterwarnings("ignore:tshift is deprecated:FutureWarning")
-def test_null_group_str_transformer(
-    request, using_array_manager, dropna, transformation_func
-):
+def test_null_group_str_transformer(request, dropna, transformation_func):
     # GH 17093
-    xfails_block = (
-        "cummax",
-        "cummin",
-        "cumsum",
-        "fillna",
-        "rank",
-        "backfill",
-        "ffill",
-        "bfill",
-        "pad",
-    )
-    xfails_array = ("cummax", "cummin", "cumsum", "fillna", "rank")
     if transformation_func == "tshift":
         msg = "tshift requires timeseries"
         request.node.add_marker(pytest.mark.xfail(reason=msg))
-    elif dropna and (
-        (not using_array_manager and transformation_func in xfails_block)
-        or (using_array_manager and transformation_func in xfails_array)
-    ):
-        msg = "produces incorrect results when nans are present"
-        request.node.add_marker(pytest.mark.xfail(reason=msg))
     args = (0,) if transformation_func == "fillna" else ()
     df = DataFrame({"A": [1, 1, np.nan], "B": [1, 2, 2]}, index=[1, 2, 3])
     gb = df.groupby("A", dropna=dropna)
@@ -1420,10 +1413,6 @@ def test_null_group_str_reducer_series(request, dropna, reduction_func):
         msg = "corrwith not implemented for SeriesGroupBy"
         request.node.add_marker(pytest.mark.xfail(reason=msg))
 
-    if reduction_func == "ngroup":
-        msg = "ngroup fails"
-        request.node.add_marker(pytest.mark.xfail(reason=msg))
-
     # GH 17093
     index = [1, 2, 3, 4]  # test transform preserves non-standard index
     ser = Series([1, 2, 2, 3], index=index)
@@ -1470,15 +1459,6 @@ def test_null_group_str_transformer_series(request, dropna, transformation_func)
     if transformation_func == "tshift":
         msg = "tshift requires timeseries"
         request.node.add_marker(pytest.mark.xfail(reason=msg))
-    elif dropna and transformation_func in (
-        "cummax",
-        "cummin",
-        "cumsum",
-        "fillna",
-        "rank",
-    ):
-        msg = "produces incorrect results when nans are present"
-        request.node.add_marker(pytest.mark.xfail(reason=msg))
     args = (0,) if transformation_func == "fillna" else ()
     ser = Series([1, 2, 2], index=[1, 2, 3])
     gb = ser.groupby([1, 1, np.nan], dropna=dropna)
@@ -1502,4 +1482,10 @@ def test_null_group_str_transformer_series(request, dropna, transformation_func)
     msg = f"{transformation_func} is deprecated"
     with tm.assert_produces_warning(warn, match=msg):
         result = gb.transform(transformation_func, *args)
-    tm.assert_equal(result, expected)
+    if dropna and transformation_func == "fillna":
+        # GH#46369 - result name is the group; remove this block when fixed.
+        tm.assert_equal(result, expected, check_names=False)
+        # This should be None
+        assert result.name == 1.0
+    else:
+        tm.assert_equal(result, expected)