pandas-dev · TomAugspurger · Jul 15, 2020 · Jun 27, 2020 · Jun 27, 2020 · Jun 27, 2020
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -992,6 +992,7 @@ Missing
 - :meth:`DataFrame.interpolate` uses the correct axis convention now. Previously interpolating along columns lead to interpolation along indices and vice versa. Furthermore interpolating with methods ``pad``, ``ffill``, ``bfill`` and ``backfill`` are identical to using these methods with :meth:`fillna` (:issue:`12918`, :issue:`29146`)
 - Bug in :meth:`DataFrame.interpolate` when called on a DataFrame with column names of string type was throwing a ValueError. The method is no independing of the type of column names (:issue:`33956`)
 - passing :class:`NA` will into a format string using format specs will now work. For example ``"{:.1f}".format(pd.NA)`` would previously raise a ``ValueError``, but will now return the string ``"<NA>"`` (:issue:`34740`)
+- Bug in :meth:`SeriesGroupBy.transform` now correctly handles missing values for `dropna=False` (:issue:`35014`)
 
 MultiIndex
 ^^^^^^^^^^

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -548,8 +548,10 @@ def _transform_general(
         # we will only try to coerce the result type if
         # we have a numeric dtype, as these are *always* user-defined funcs
         # the cython take a different path (and casting)
+        # make sure we don't accidentally upcast (GH35014)
+        types = ["bool", "int32", "int64", "float32", "float64"]
         dtype = self._selected_obj.dtype
-        if is_numeric_dtype(dtype):
+        if is_numeric_dtype(dtype) and types.index(dtype) < types.index(result.dtype):
             result = maybe_downcast_to_dtype(result, dtype)
 
         result.name = self._selected_obj.name

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -54,6 +54,7 @@ class providing the base-class of operations.
 )
 from pandas.core.dtypes.missing import isna, notna
 
+import pandas as pd
 from pandas.core import nanops
 import pandas.core.algorithms as algorithms
 from pandas.core.arrays import Categorical, DatetimeArray
@@ -624,7 +625,10 @@ def _get_index(self, name):
         """
         Safe get index, translate keys for datelike to underlying repr.
         """
-        return self._get_indices([name])[0]
+        if isna(name):
+            return self._get_indices([pd.NaT])[0]
+        else:
+            return self._get_indices([name])[0]
 
     @cache_readonly
     def _selected_obj(self):
@@ -802,7 +806,7 @@ def get_group(self, name, obj=None):
         if obj is None:
             obj = self._selected_obj
 
-        inds = self._get_index(name)
+        inds = self._get_index(pd.NaT) if pd.isna(name) else self._get_index(name)
         if not len(inds):
             raise KeyError(name)
 

diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
@@ -20,6 +20,7 @@
 )
 from pandas.core.dtypes.generic import ABCSeries
 
+import pandas as pd
 import pandas.core.algorithms as algorithms
 from pandas.core.arrays import Categorical, ExtensionArray
 import pandas.core.common as com
@@ -558,7 +559,16 @@ def indices(self):
             return self.grouper.indices
 
         values = Categorical(self.grouper)
-        return values._reverse_indexer()
+
+        # GH35014
+        reverse_indexer = values._reverse_indexer()
+        if not self.dropna and any(pd.isna(v) for v in values):
+            return {
+                **reverse_indexer,
+                pd.NaT: np.array([i for i, v in enumerate(values) if pd.isna(v)]),
+            }
+        else:
+            return reverse_indexer
 
     @property
     def codes(self) -> np.ndarray:

diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
@@ -162,6 +162,27 @@ def test_groupby_dropna_series_by(dropna, expected):
     tm.assert_series_equal(result, expected)
 
 
+def test_slice_groupby_then_transform():
+    # GH35014
+
+    df = pd.DataFrame({"A": [0, 0, 1, None], "B": [1, 2, 3, None]})
+    gb = df.groupby("A", dropna=False)
+
+    res = gb.transform(len)
+    expected = pd.DataFrame({"B": [2, 2, 1, 1]})
+    tm.assert_frame_equal(res, expected)
+
+    gb_slice = gb[["B"]]
+    res = gb_slice.transform(len)
+    expected = pd.DataFrame({"B": [2, 2, 1, 1]})
+    tm.assert_frame_equal(res, expected)
+
+    gb_slice = gb["B"]
+    res = gb["B"].transform(len)
+    expected = pd.Series(data=[2, 2, 1, 1], name="B")
+    tm.assert_series_equal(res, expected)
+
+
 @pytest.mark.parametrize(
     "dropna, tuples, outputs",
     [

diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
@@ -1250,23 +1250,31 @@ def test_to_json_large_numbers(self, bigNum):
         json = series.to_json()
         expected = '{"articleId":' + str(bigNum) + "}"
         assert json == expected
-        # GH 20599
+
+        df = DataFrame(bigNum, dtype=object, index=["articleId"], columns=[0])
+        json = df.to_json()
+        expected = '{"0":{"articleId":' + str(bigNum) + "}}"
+        assert json == expected
+
+    @pytest.mark.parametrize("bigNum", [2 ** 64 + 1, -(2 ** 64 + 2)])
+    def test_read_json_large_numbers(self, bigNum):
+        # GH20599
+
+        series = Series(bigNum, dtype=object, index=["articleId"])
+        json = '{"articleId":' + str(bigNum) + "}"
         with pytest.raises(ValueError):
             json = StringIO(json)
             result = read_json(json)
             tm.assert_series_equal(series, result)
 
         df = DataFrame(bigNum, dtype=object, index=["articleId"], columns=[0])
-        json = df.to_json()
-        expected = '{"0":{"articleId":' + str(bigNum) + "}}"
-        assert json == expected
-        # GH 20599
+        json = '{"0":{"articleId":' + str(bigNum) + "}}"
         with pytest.raises(ValueError):
             json = StringIO(json)
             result = read_json(json)
             tm.assert_frame_equal(df, result)
 
-    def test_read_json_large_numbers(self):
+    def test_read_json_large_numbers2(self):
         # GH18842
         json = '{"articleId": "1404366058080022500245"}'
         json = StringIO(json)