From 30c9b83447a6d5d578344ddfd48995ae0f3bb01f Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Sat, 27 Jun 2020 05:24:03 +0000
Subject: [PATCH 01/47] add values.dtype.kind==f branch to
 array_with_unit_datetime

---
 pandas/_libs/tslib.pyx | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index 44693d60486a9..7ff309b3725d6 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -416,7 +416,6 @@ def array_with_unit_to_datetime(
     m = cast_from_unit(None, unit)
 
     if is_raise:
-
         # try a quick conversion to i8
         # if we have nulls that are not type-compat
         # then need to iterate
@@ -429,9 +428,17 @@ def array_with_unit_to_datetime(
             fvalues = iresult.astype('f8') * m
             need_to_iterate = False
 
+        # GH20445
+        if values.dtype.kind == "f":
+            fresult = values.astype('f8', casting='same_kind', copy=False)
+            # fill by comparing to NPY_NAT constant
+            mask = fresult == NPY_NAT
+            fresult[mask] = 0.0
+            fvalues = fvalues.astype('f8') * m  # FIXME: this line segfaults rn
+            need_to_iterate = False
+
         # check the bounds
         if not need_to_iterate:
-
             if ((fvalues < Timestamp.min.value).any()
                     or (fvalues > Timestamp.max.value).any()):
                 raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'")
@@ -599,7 +606,6 @@ cpdef array_to_datetime(
         float offset_seconds, tz_offset
         set out_tzoffset_vals = set()
         bint string_to_dts_failed
-
     # specify error conditions
     assert is_raise or is_ignore or is_coerce
 

From cf67f903456e8742381b0f83eaa312529602708d Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Sat, 27 Jun 2020 05:35:09 +0000
Subject: [PATCH 02/47] remove unnecessary styling changes

---
 pandas/_libs/tslib.pyx | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index 7ff309b3725d6..faeb5ac829ee6 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -416,6 +416,7 @@ def array_with_unit_to_datetime(
     m = cast_from_unit(None, unit)
 
     if is_raise:
+
         # try a quick conversion to i8
         # if we have nulls that are not type-compat
         # then need to iterate
@@ -439,6 +440,7 @@ def array_with_unit_to_datetime(
 
         # check the bounds
         if not need_to_iterate:
+
             if ((fvalues < Timestamp.min.value).any()
                     or (fvalues > Timestamp.max.value).any()):
                 raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'")
@@ -606,6 +608,7 @@ cpdef array_to_datetime(
         float offset_seconds, tz_offset
         set out_tzoffset_vals = set()
         bint string_to_dts_failed
+        
     # specify error conditions
     assert is_raise or is_ignore or is_coerce
 

From a69b28cf5ae65c1f4c9df2f80891eb7f1b2f4c94 Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Sat, 27 Jun 2020 05:48:15 +0000
Subject: [PATCH 03/47] added cast_from_unit definition for float

---
 pandas/_libs/tslibs/conversion.pyx | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx
index 0811ba22977fd..2f7c0dc0bbcc0 100644
--- a/pandas/_libs/tslibs/conversion.pyx
+++ b/pandas/_libs/tslibs/conversion.pyx
@@ -76,6 +76,15 @@ cdef inline int64_t cast_from_unit(object ts, str unit) except? -1:
         frac = round(frac, p)
     return <int64_t>(base * m) + <int64_t>(frac * m)
 
+cdef inline float cast_from_unit(object ts, str unit) except? -1:
+    """ return a casting of the unit represented to nanoseconds
+        round the fractional part of a float to our precision, p """
+    cdef:
+        float m
+        int p
+
+    # TO DO: fill in body
+
 
 cpdef inline object precision_from_unit(str unit):
     """

From 9df9d4d8dbe854636a66f017cc9811618db7bc81 Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Mon, 29 Jun 2020 00:36:05 +0000
Subject: [PATCH 04/47] to_datetime: added astyping for floats

---
 pandas/_libs/tslib.pyx | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index faeb5ac829ee6..346c24b901aae 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -435,20 +435,25 @@ def array_with_unit_to_datetime(
             # fill by comparing to NPY_NAT constant
             mask = fresult == NPY_NAT
             fresult[mask] = 0.0
-            fvalues = fvalues.astype('f8') * m  # FIXME: this line segfaults rn
+            m_as_float = <float64_t> m
+            fvalues = fresult.astype('f8') * m_as_float
             need_to_iterate = False
 
         # check the bounds
         if not need_to_iterate:
-
             if ((fvalues < Timestamp.min.value).any()
                     or (fvalues > Timestamp.max.value).any()):
                 raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'")
-            result = (iresult * m).astype('M8[ns]')
-            iresult = result.view('i8')
-            iresult[mask] = NPY_NAT
-            return result, tz
-
+            if values.dtype.kind == 'i':
+                result = (iresult * m).astype('M8[ns]')
+                iresult = result.view('i8')
+                iresult[mask] = NPY_NAT
+                return result, tz
+            elif values.dtype.kind == 'f':
+                result = (fresult * m_as_float).astype('M8[ns]')
+                fresult = result.view('f8')
+                fresult[mask] = NPY_NAT
+                return result, tz
     result = np.empty(n, dtype='M8[ns]')
     iresult = result.view('i8')
 
@@ -608,7 +613,6 @@ cpdef array_to_datetime(
         float offset_seconds, tz_offset
         set out_tzoffset_vals = set()
         bint string_to_dts_failed
-        
     # specify error conditions
     assert is_raise or is_ignore or is_coerce
 

From 5746581165bf70fd263bc7b9a56da8ef4cba1ef9 Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Mon, 29 Jun 2020 00:38:59 +0000
Subject: [PATCH 05/47] revert changes

---
 pandas/_libs/tslibs/conversion.pyx | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx
index 2f7c0dc0bbcc0..e9c3ba912298f 100644
--- a/pandas/_libs/tslibs/conversion.pyx
+++ b/pandas/_libs/tslibs/conversion.pyx
@@ -76,16 +76,6 @@ cdef inline int64_t cast_from_unit(object ts, str unit) except? -1:
         frac = round(frac, p)
     return <int64_t>(base * m) + <int64_t>(frac * m)
 
-cdef inline float cast_from_unit(object ts, str unit) except? -1:
-    """ return a casting of the unit represented to nanoseconds
-        round the fractional part of a float to our precision, p """
-    cdef:
-        float m
-        int p
-
-    # TO DO: fill in body
-
-
 cpdef inline object precision_from_unit(str unit):
     """
     Return a casting of the unit represented to nanoseconds + the precision

From 6b9d4de824746ce64b03b6e0ddc8aee66f988240 Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Mon, 29 Jun 2020 00:39:35 +0000
Subject: [PATCH 06/47] revert changes

---
 pandas/_libs/tslibs/conversion.pyx | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx
index e9c3ba912298f..0811ba22977fd 100644
--- a/pandas/_libs/tslibs/conversion.pyx
+++ b/pandas/_libs/tslibs/conversion.pyx
@@ -76,6 +76,7 @@ cdef inline int64_t cast_from_unit(object ts, str unit) except? -1:
         frac = round(frac, p)
     return <int64_t>(base * m) + <int64_t>(frac * m)
 
+
 cpdef inline object precision_from_unit(str unit):
     """
     Return a casting of the unit represented to nanoseconds + the precision

From 0e3a8763389a549f35014f7374df37f938f15c7e Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Mon, 29 Jun 2020 00:40:45 +0000
Subject: [PATCH 07/47] revert styling change

---
 pandas/_libs/tslib.pyx | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index 346c24b901aae..9661dda105414 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -613,6 +613,7 @@ cpdef array_to_datetime(
         float offset_seconds, tz_offset
         set out_tzoffset_vals = set()
         bint string_to_dts_failed
+        
     # specify error conditions
     assert is_raise or is_ignore or is_coerce
 

From f1ae8f562db24046c1e75258cc1091ada507347a Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Mon, 29 Jun 2020 00:42:29 +0000
Subject: [PATCH 08/47] _libs/tslib.pyx added comments

---
 pandas/_libs/tslib.pyx | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index 9661dda105414..387b12555c546 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -444,6 +444,7 @@ def array_with_unit_to_datetime(
             if ((fvalues < Timestamp.min.value).any()
                     or (fvalues > Timestamp.max.value).any()):
                 raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'")
+            # GH20445
             if values.dtype.kind == 'i':
                 result = (iresult * m).astype('M8[ns]')
                 iresult = result.view('i8')
@@ -613,7 +614,7 @@ cpdef array_to_datetime(
         float offset_seconds, tz_offset
         set out_tzoffset_vals = set()
         bint string_to_dts_failed
-        
+
     # specify error conditions
     assert is_raise or is_ignore or is_coerce
 

From 572363a2928fa1de6b2ca2789147d5a416710faa Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Mon, 29 Jun 2020 01:06:23 +0000
Subject: [PATCH 09/47] revert pandas/_libs/tslib.pyx

---
 pandas/_libs/tslib.pyx | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index 7ff309b3725d6..44693d60486a9 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -416,6 +416,7 @@ def array_with_unit_to_datetime(
     m = cast_from_unit(None, unit)
 
     if is_raise:
+
         # try a quick conversion to i8
         # if we have nulls that are not type-compat
         # then need to iterate
@@ -428,17 +429,9 @@ def array_with_unit_to_datetime(
             fvalues = iresult.astype('f8') * m
             need_to_iterate = False
 
-        # GH20445
-        if values.dtype.kind == "f":
-            fresult = values.astype('f8', casting='same_kind', copy=False)
-            # fill by comparing to NPY_NAT constant
-            mask = fresult == NPY_NAT
-            fresult[mask] = 0.0
-            fvalues = fvalues.astype('f8') * m  # FIXME: this line segfaults rn
-            need_to_iterate = False
-
         # check the bounds
         if not need_to_iterate:
+
             if ((fvalues < Timestamp.min.value).any()
                     or (fvalues > Timestamp.max.value).any()):
                 raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'")
@@ -606,6 +599,7 @@ cpdef array_to_datetime(
         float offset_seconds, tz_offset
         set out_tzoffset_vals = set()
         bint string_to_dts_failed
+
     # specify error conditions
     assert is_raise or is_ignore or is_coerce
 

From 38bac1ada3eadac144c4cbd5e6f61fb4138d05fc Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Wed, 1 Jul 2020 02:43:36 +0000
Subject: [PATCH 10/47] update Grouping.indicies to return for nan values

---
 pandas/core/groupby/grouper.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index 67003dffb90bb..3aaea43af3623 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -20,6 +20,7 @@
 )
 from pandas.core.dtypes.generic import ABCSeries
 
+import pandas as pd
 import pandas.core.algorithms as algorithms
 from pandas.core.arrays import Categorical, ExtensionArray
 import pandas.core.common as com
@@ -558,7 +559,12 @@ def indices(self):
             return self.grouper.indices
 
         values = Categorical(self.grouper)
-        return values._reverse_indexer()
+
+        # GH35014
+        res = values._reverse_indexer()
+        res[np.nan] = [i for i, v in enumerate(values) if pd.isna(v)]
+        print(res)
+        return res
 
     @property
     def codes(self) -> np.ndarray:

From 65a29637e03da98f2485cc70aed9df26fc37a54e Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Wed, 1 Jul 2020 02:46:43 +0000
Subject: [PATCH 11/47] updated _GroupBy._get_index to return for nan values

---
 pandas/core/groupby/groupby.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index d039b715b3c08..929efdecdcd1b 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -624,7 +624,10 @@ def _get_index(self, name):
         """
         Safe get index, translate keys for datelike to underlying repr.
         """
-        return self._get_indices([name])[0]
+        if isna(name):
+            return [i for i, v in enumerate(self.indices) if isna(v)]
+        else:
+            return self._get_indices([name])[0]
 
     @cache_readonly
     def _selected_obj(self):
@@ -896,6 +899,7 @@ def _iterate_slices(self) -> Iterable[Series]:
         raise AbstractMethodError(self)
 
     def transform(self, func, *args, **kwargs):
+        print(f"name={name}, group={group}")
         raise AbstractMethodError(self)
 
     def _cumcount_array(self, ascending: bool = True):

From 7df44d10f08d1458f449208ffcdb5f74387a5fda Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Wed, 1 Jul 2020 02:51:08 +0000
Subject: [PATCH 12/47] revert accidental changes

---
 pandas/_libs/tslib.pyx | 27 ++++++---------------------
 1 file changed, 6 insertions(+), 21 deletions(-)

diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index 387b12555c546..44693d60486a9 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -429,32 +429,17 @@ def array_with_unit_to_datetime(
             fvalues = iresult.astype('f8') * m
             need_to_iterate = False
 
-        # GH20445
-        if values.dtype.kind == "f":
-            fresult = values.astype('f8', casting='same_kind', copy=False)
-            # fill by comparing to NPY_NAT constant
-            mask = fresult == NPY_NAT
-            fresult[mask] = 0.0
-            m_as_float = <float64_t> m
-            fvalues = fresult.astype('f8') * m_as_float
-            need_to_iterate = False
-
         # check the bounds
         if not need_to_iterate:
+
             if ((fvalues < Timestamp.min.value).any()
                     or (fvalues > Timestamp.max.value).any()):
                 raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'")
-            # GH20445
-            if values.dtype.kind == 'i':
-                result = (iresult * m).astype('M8[ns]')
-                iresult = result.view('i8')
-                iresult[mask] = NPY_NAT
-                return result, tz
-            elif values.dtype.kind == 'f':
-                result = (fresult * m_as_float).astype('M8[ns]')
-                fresult = result.view('f8')
-                fresult[mask] = NPY_NAT
-                return result, tz
+            result = (iresult * m).astype('M8[ns]')
+            iresult = result.view('i8')
+            iresult[mask] = NPY_NAT
+            return result, tz
+
     result = np.empty(n, dtype='M8[ns]')
     iresult = result.view('i8')
 

From 4eb8a17c99a0253e5f5af2d1688c920fc25893b9 Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Wed, 1 Jul 2020 02:52:48 +0000
Subject: [PATCH 13/47] revert accidental changes

---
 pandas/core/groupby/groupby.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 929efdecdcd1b..195bd1422c1b3 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -899,7 +899,7 @@ def _iterate_slices(self) -> Iterable[Series]:
         raise AbstractMethodError(self)
 
     def transform(self, func, *args, **kwargs):
-        print(f"name={name}, group={group}")
+
         raise AbstractMethodError(self)
 
     def _cumcount_array(self, ascending: bool = True):

From 21bb8e745e91e42c031b5cf0a58a3fe85900d608 Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Wed, 1 Jul 2020 02:53:20 +0000
Subject: [PATCH 14/47] revert accidental changes

---
 pandas/core/groupby/groupby.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 195bd1422c1b3..58bfd73f55fd6 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -899,7 +899,6 @@ def _iterate_slices(self) -> Iterable[Series]:
         raise AbstractMethodError(self)
 
     def transform(self, func, *args, **kwargs):
-
         raise AbstractMethodError(self)
 
     def _cumcount_array(self, ascending: bool = True):

From 0daca6677d741f0e89dd22d85008936b47b1de66 Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Wed, 1 Jul 2020 03:00:02 +0000
Subject: [PATCH 15/47] styling change

---
 pandas/core/groupby/grouper.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index 3aaea43af3623..550d736329ae5 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -563,7 +563,6 @@ def indices(self):
         # GH35014
         res = values._reverse_indexer()
         res[np.nan] = [i for i, v in enumerate(values) if pd.isna(v)]
-        print(res)
         return res
 
     @property

From 0c0e28935e453f28b01791c52bb922179bdb6cc7 Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Thu, 2 Jul 2020 18:27:58 +0000
Subject: [PATCH 16/47] added tests

---
 pandas/tests/groupby/test_groupby_dropna.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
index 1a525d306e9f5..4e65760df50aa 100644
--- a/pandas/tests/groupby/test_groupby_dropna.py
+++ b/pandas/tests/groupby/test_groupby_dropna.py
@@ -162,6 +162,27 @@ def test_groupby_dropna_series_by(dropna, expected):
     tm.assert_series_equal(result, expected)
 
 
+def test_slice_groupby_then_transform():
+    # GH35014
+
+    df = pd.DataFrame({"A": [0, 0, 1, None], "B": [1, 2, 3, None]})
+    gb = df.groupby("A", dropna=False)
+
+    res = gb.transform(len)
+    expected = pd.DataFrame({"B": [2, 2, 1, 1]})
+    tm.assert_frame_equal(res, expected)
+
+    gb_slice = gb[["B"]]
+    res = gb_slice.transform(len)
+    expected = pd.DataFrame({"B": [2, 2, 1, 1]})
+    tm.assert_frame_equal(res, expected)
+
+    gb_slice = gb["B"]
+    res = gb["B"].transform(len)
+    expected = pd.Series([2, 2, 1, 1])
+    tm.assert_series_equal(res, expected)
+
+
 @pytest.mark.parametrize(
     "dropna, tuples, outputs",
     [

From a804909bea0ffe0220be30eed6af53268dae29fd Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Mon, 6 Jul 2020 20:27:22 +0000
Subject: [PATCH 17/47] fixed groupby/groupby.py's _get_indicies

---
 pandas/core/groupby/groupby.py | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 58bfd73f55fd6..98a7aba3430ca 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -575,6 +575,7 @@ def _get_indices(self, names):
         Safe get multiple indices, translate keys for
         datelike to underlying repr.
         """
+        print(f"names={names}")
 
         def get_converter(s):
             # possibly convert to the actual key types
@@ -618,16 +619,20 @@ def get_converter(s):
             converter = get_converter(index_sample)
             names = (converter(name) for name in names)
 
-        return [self.indices.get(name, []) for name in names]
+        res = []
+        for name in names:
+            if isna(name):
+                res += [v for k, v in self.indices.items() if isna(k)]
+            else:
+                res += [self.indices.get(name, [])]
+
+        return res
 
     def _get_index(self, name):
         """
         Safe get index, translate keys for datelike to underlying repr.
         """
-        if isna(name):
-            return [i for i, v in enumerate(self.indices) if isna(v)]
-        else:
-            return self._get_indices([name])[0]
+        return self._get_indices([name])[0]
 
     @cache_readonly
     def _selected_obj(self):

From 5e4419e34f5a0a7ea1071055b9ab63b987e3bad8 Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Mon, 6 Jul 2020 20:28:36 +0000
Subject: [PATCH 18/47] removed debug statement

---
 pandas/core/groupby/groupby.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 98a7aba3430ca..9be80c24ea167 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -575,7 +575,6 @@ def _get_indices(self, names):
         Safe get multiple indices, translate keys for
         datelike to underlying repr.
         """
-        print(f"names={names}")
 
         def get_converter(s):
             # possibly convert to the actual key types

From 1e694c97824a58bb4f8356ea04083389172b18f0 Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Tue, 7 Jul 2020 04:52:45 +0000
Subject: [PATCH 19/47] fixed naming error in test

---
 pandas/tests/groupby/test_groupby_dropna.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
index 4e65760df50aa..f81f6a1bf70c6 100644
--- a/pandas/tests/groupby/test_groupby_dropna.py
+++ b/pandas/tests/groupby/test_groupby_dropna.py
@@ -179,7 +179,9 @@ def test_slice_groupby_then_transform():
 
     gb_slice = gb["B"]
     res = gb["B"].transform(len)
-    expected = pd.Series([2, 2, 1, 1])
+    expected = pd.Series(data=[2, 2, 1, 1], name="B")
+    print(f"res={res}")
+    print(f"expected={expected}")
     tm.assert_series_equal(res, expected)
 
 

From d5e1a3b46f88484be5a1b0edab901d6b3e60ebe2 Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Tue, 7 Jul 2020 05:14:06 +0000
Subject: [PATCH 20/47] remove type coercion block

---
 pandas/core/groupby/generic.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index dab8475d9580c..618460ecd3026 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -38,7 +38,6 @@
     maybe_cast_result_dtype,
     maybe_convert_objects,
     maybe_downcast_numeric,
-    maybe_downcast_to_dtype,
 )
 from pandas.core.dtypes.common import (
     ensure_int64,
@@ -46,7 +45,6 @@
     is_bool,
     is_integer_dtype,
     is_interval_dtype,
-    is_numeric_dtype,
     is_object_dtype,
     is_scalar,
     needs_i8_conversion,
@@ -528,13 +526,6 @@ def _transform_general(
         else:
             result = self.obj._constructor(dtype=np.float64)
 
-        # we will only try to coerce the result type if
-        # we have a numeric dtype, as these are *always* user-defined funcs
-        # the cython take a different path (and casting)
-        dtype = self._selected_obj.dtype
-        if is_numeric_dtype(dtype):
-            result = maybe_downcast_to_dtype(result, dtype)
-
         result.name = self._selected_obj.name
         result.index = self._selected_obj.index
         return result

From 91947c5a6e1a842e697dbed3ab74ea91155e4453 Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Tue, 7 Jul 2020 05:41:55 +0000
Subject: [PATCH 21/47] added missing values handing for _GroupBy.get_group
 method

---
 pandas/core/groupby/groupby.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 9be80c24ea167..8006bd5eb04af 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -54,6 +54,7 @@ class providing the base-class of operations.
 )
 from pandas.core.dtypes.missing import isna, notna
 
+import pandas as pd
 from pandas.core import nanops
 import pandas.core.algorithms as algorithms
 from pandas.core.arrays import Categorical, DatetimeArray
@@ -631,7 +632,8 @@ def _get_index(self, name):
         """
         Safe get index, translate keys for datelike to underlying repr.
         """
-        return self._get_indices([name])[0]
+        res = self._get_indices([name])
+        return res[0] if res else []
 
     @cache_readonly
     def _selected_obj(self):
@@ -809,7 +811,10 @@ def get_group(self, name, obj=None):
         if obj is None:
             obj = self._selected_obj
 
-        inds = self._get_index(name)
+        if pd.isna(name):
+            inds = self._get_index(np.nan)
+        else:
+            inds = self._get_index(name)
         if not len(inds):
             raise KeyError(name)
 

From ce80f7cf01d7f8537b8619523e645568032adb2d Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Tue, 7 Jul 2020 05:43:44 +0000
Subject: [PATCH 22/47] updated indicies for case dropna=True

---
 pandas/core/groupby/grouper.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index 550d736329ae5..7c01f3a58093f 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -562,7 +562,10 @@ def indices(self):
 
         # GH35014
         res = values._reverse_indexer()
-        res[np.nan] = [i for i, v in enumerate(values) if pd.isna(v)]
+        if self.dropna is False:
+            nan_locs = [i for i, v in enumerate(values) if pd.isna(v)]
+            if nan_locs:
+                res[np.nan] = nan_locs
         return res
 
     @property

From 5c992d2981d62aa1ead140d1613fe31d5544931b Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Tue, 7 Jul 2020 05:45:49 +0000
Subject: [PATCH 23/47] cleaned up syntax

---
 pandas/core/groupby/grouper.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index 7c01f3a58093f..65bb5508934c3 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -562,10 +562,8 @@ def indices(self):
 
         # GH35014
         res = values._reverse_indexer()
-        if self.dropna is False:
-            nan_locs = [i for i, v in enumerate(values) if pd.isna(v)]
-            if nan_locs:
-                res[np.nan] = nan_locs
+        if self.dropna is False and any(pd.isna(v) for v in values):
+            res[np.nan] = [i for i, v in enumerate(values) if pd.isna(v)]
         return res
 
     @property

From 15215fe58a5dbe761da7fe7c230fc5d3923591e3 Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Tue, 7 Jul 2020 05:48:12 +0000
Subject: [PATCH 24/47] cleaned up syntax

---
 pandas/core/groupby/groupby.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 8006bd5eb04af..823dd75cfa2eb 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -811,10 +811,7 @@ def get_group(self, name, obj=None):
         if obj is None:
             obj = self._selected_obj
 
-        if pd.isna(name):
-            inds = self._get_index(np.nan)
-        else:
-            inds = self._get_index(name)
+        inds = self._get_index(np.nan) if pd.isna(name) else self._get_index(name)
         if not len(inds):
             raise KeyError(name)
 

From 30c2fb598f95e7d390aa03786c47c0c33229ba75 Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Tue, 7 Jul 2020 06:07:23 +0000
Subject: [PATCH 25/47] removed print statements

---
 pandas/tests/groupby/test_groupby_dropna.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
index f81f6a1bf70c6..3f158e99fab31 100644
--- a/pandas/tests/groupby/test_groupby_dropna.py
+++ b/pandas/tests/groupby/test_groupby_dropna.py
@@ -180,8 +180,6 @@ def test_slice_groupby_then_transform():
     gb_slice = gb["B"]
     res = gb["B"].transform(len)
     expected = pd.Series(data=[2, 2, 1, 1], name="B")
-    print(f"res={res}")
-    print(f"expected={expected}")
     tm.assert_series_equal(res, expected)
 
 

From 0746a76606d9952240276279826e1e9ab3f0e131 Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Tue, 7 Jul 2020 06:36:42 +0000
Subject: [PATCH 26/47] _transform_general: add a check that we don't
 accidentally upcast

---
 pandas/core/groupby/generic.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 618460ecd3026..8d64c7279285d 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -38,6 +38,7 @@
     maybe_cast_result_dtype,
     maybe_convert_objects,
     maybe_downcast_numeric,
+    maybe_downcast_to_dtype,
 )
 from pandas.core.dtypes.common import (
     ensure_int64,
@@ -45,6 +46,7 @@
     is_bool,
     is_integer_dtype,
     is_interval_dtype,
+    is_numeric_dtype,
     is_object_dtype,
     is_scalar,
     needs_i8_conversion,
@@ -526,6 +528,15 @@ def _transform_general(
         else:
             result = self.obj._constructor(dtype=np.float64)
 
+        # we will only try to coerce the result type if
+        # we have a numeric dtype, as these are *always* user-defined funcs
+        # the cython take a different path (and casting)
+        # make sure we don't accidentally upcast (GH35014)
+        types = ["bool", "int64", "float64"]
+        dtype = self._selected_obj.dtype
+        if is_numeric_dtype(dtype) and types.index(dtype) < types.index(result.dtype):
+            result = maybe_downcast_to_dtype(result, dtype)
+
         result.name = self._selected_obj.name
         result.index = self._selected_obj.index
         return result

From d4316cde2cf0b41c1da9d2781559c7b2c06dc7bf Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Tue, 7 Jul 2020 06:51:39 +0000
Subject: [PATCH 27/47] _transform_general: add int32, float32 to upcasting
 check

---
 pandas/core/groupby/generic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 8d64c7279285d..21b5670747a27 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -532,7 +532,7 @@ def _transform_general(
         # we have a numeric dtype, as these are *always* user-defined funcs
         # the cython take a different path (and casting)
         # make sure we don't accidentally upcast (GH35014)
-        types = ["bool", "int64", "float64"]
+        types = ["bool", "int32", "int64", "float32", "float64"]
         dtype = self._selected_obj.dtype
         if is_numeric_dtype(dtype) and types.index(dtype) < types.index(result.dtype):
             result = maybe_downcast_to_dtype(result, dtype)

From 7a4315574eb7e5b3084938cb1ed0a3e643e763a0 Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Tue, 7 Jul 2020 06:55:35 +0000
Subject: [PATCH 28/47] rewrite for loop as list comprehension

---
 pandas/core/groupby/groupby.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 823dd75cfa2eb..3ce0154105d23 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -619,12 +619,12 @@ def get_converter(s):
             converter = get_converter(index_sample)
             names = (converter(name) for name in names)
 
-        res = []
-        for name in names:
-            if isna(name):
-                res += [v for k, v in self.indices.items() if isna(k)]
-            else:
-                res += [self.indices.get(name, [])]
+        res = [
+            [v for k, v in self.indices.items() if isna(k)]
+            if isna(name)
+            else self.indices.get(name, [])
+            for name in names
+        ]
 
         return res
 

From 2bd58859d5ca7fbed6cd03190161d4a5dff6c69c Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Tue, 7 Jul 2020 07:05:14 +0000
Subject: [PATCH 29/47] rewrote if statement as dict comp + ternary

---
 pandas/core/groupby/grouper.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index 65bb5508934c3..14ccffba0dad9 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -561,10 +561,12 @@ def indices(self):
         values = Categorical(self.grouper)
 
         # GH35014
-        res = values._reverse_indexer()
-        if self.dropna is False and any(pd.isna(v) for v in values):
-            res[np.nan] = [i for i, v in enumerate(values) if pd.isna(v)]
-        return res
+        reverse_indexer = values._reverse_indexer()
+        return (
+            {**reverse_indexer, pd.NaT: [i for i, v in enumerate(values) if pd.isna(v)]}
+            if not self.dropna and any(pd.isna(v) for v in values)
+            else reverse_indexer
+        )
 
     @property
     def codes(self) -> np.ndarray:

From 550985fc94b64da80ece71dc36ddbf4a7e37f61f Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Tue, 7 Jul 2020 07:37:36 +0000
Subject: [PATCH 30/47] fixed small bug in list comp in groupby/groupby.py

---
 pandas/core/groupby/groupby.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 3ce0154105d23..d3a4008d0f4be 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -622,10 +622,11 @@ def get_converter(s):
         res = [
             [v for k, v in self.indices.items() if isna(k)]
             if isna(name)
-            else self.indices.get(name, [])
+            else [self.indices.get(name, [])]
             for name in names
         ]
 
+        print(f"groupby.py res={res}")
         return res
 
     def _get_index(self, name):
@@ -811,7 +812,7 @@ def get_group(self, name, obj=None):
         if obj is None:
             obj = self._selected_obj
 
-        inds = self._get_index(np.nan) if pd.isna(name) else self._get_index(name)
+        inds = self._get_index(pd.NaT) if pd.isna(name) else self._get_index(name)
         if not len(inds):
             raise KeyError(name)
 

From 57a8da40620f929140b2deb5c92c53a7a642c6e1 Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Tue, 7 Jul 2020 07:40:17 +0000
Subject: [PATCH 31/47] deleted debug statement in groupby/groupby.py

---
 pandas/core/groupby/groupby.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index d3a4008d0f4be..6200335854cdc 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -626,7 +626,6 @@ def get_converter(s):
             for name in names
         ]
 
-        print(f"groupby.py res={res}")
         return res
 
     def _get_index(self, name):

From c1e7bcec4796ec348a544679656b2e7a8f142c8b Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Tue, 7 Jul 2020 07:52:44 +0000
Subject: [PATCH 32/47] rewrite _get_index using next_iter to set default value

---
 pandas/core/groupby/groupby.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 6200335854cdc..f9ee765b8ab42 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -632,8 +632,7 @@ def _get_index(self, name):
         """
         Safe get index, translate keys for datelike to underlying repr.
         """
-        res = self._get_indices([name])
-        return res[0] if res else []
+        return next(iter(self._get_indices([name])), [])
 
     @cache_readonly
     def _selected_obj(self):

From 62f52b8f44c98b4029f3a60044cc1bbe454d4b47 Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Tue, 7 Jul 2020 15:30:21 +0000
Subject: [PATCH 33/47] update exepcted test_groupby_nat_exclude for new
 missing values handling

---
 pandas/tests/groupby/test_groupby.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 0d040b8e6955a..0299356216c50 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -1252,13 +1252,15 @@ def test_groupby_nat_exclude():
     }
 
     for k in grouped.indices:
+        if pd.isna(k):
+            continue  # GH 35014
         tm.assert_numpy_array_equal(grouped.indices[k], expected[k])
 
     tm.assert_frame_equal(grouped.get_group(Timestamp("2013-01-01")), df.iloc[[1, 7]])
     tm.assert_frame_equal(grouped.get_group(Timestamp("2013-02-01")), df.iloc[[3, 5]])
 
-    with pytest.raises(KeyError, match=r"^NaT$"):
-        grouped.get_group(pd.NaT)
+    # GH35014
+    grouped.get_group(pd.NaT)
 
     nan_df = DataFrame(
         {"nan": [np.nan, np.nan, np.nan], "nat": [pd.NaT, pd.NaT, pd.NaT]}
@@ -1268,6 +1270,7 @@ def test_groupby_nat_exclude():
 
     for key in ["nan", "nat"]:
         grouped = nan_df.groupby(key)
+        print(f"grouped.__dict__={grouped.__dict__}")
         assert grouped.groups == {}
         assert grouped.ngroups == 0
         assert grouped.indices == {}

From ef3c1992d41af5b00614be6aebe04c7fe83bcd2a Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Tue, 7 Jul 2020 15:46:14 +0000
Subject: [PATCH 34/47] remove print statement

---
 pandas/tests/groupby/test_groupby.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 0299356216c50..9f0da6b01383a 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -1270,7 +1270,6 @@ def test_groupby_nat_exclude():
 
     for key in ["nan", "nat"]:
         grouped = nan_df.groupby(key)
-        print(f"grouped.__dict__={grouped.__dict__}")
         assert grouped.groups == {}
         assert grouped.ngroups == 0
         assert grouped.indices == {}

From b55021dc79ac4bd0fcdfac029a7e8712a838cd86 Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Wed, 8 Jul 2020 17:53:03 +0000
Subject: [PATCH 35/47] removed xfail tests

---
 pandas/tests/io/json/test_pandas.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index 10f49b9b81528..aef55f63e866e 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -1250,21 +1250,11 @@ def test_to_json_large_numbers(self, bigNum):
         json = series.to_json()
         expected = '{"articleId":' + str(bigNum) + "}"
         assert json == expected
-        # GH 20599
-        with pytest.raises(ValueError):
-            json = StringIO(json)
-            result = read_json(json)
-            tm.assert_series_equal(series, result)
 
         df = DataFrame(bigNum, dtype=object, index=["articleId"], columns=[0])
         json = df.to_json()
         expected = '{"0":{"articleId":' + str(bigNum) + "}}"
         assert json == expected
-        # GH 20599
-        with pytest.raises(ValueError):
-            json = StringIO(json)
-            result = read_json(json)
-            tm.assert_frame_equal(df, result)
 
     def test_read_json_large_numbers(self):
         # GH18842

From 15c1c333f9acbde47554594c644ee4f1ee2536df Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Thu, 9 Jul 2020 17:16:40 +0000
Subject: [PATCH 36/47] reworked solution

---
 pandas/core/groupby/groupby.py       | 15 +++++----------
 pandas/core/groupby/grouper.py       |  8 +++-----
 pandas/tests/groupby/test_groupby.py |  5 ++---
 3 files changed, 10 insertions(+), 18 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index f9ee765b8ab42..c29ad20ed60a7 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -619,21 +619,16 @@ def get_converter(s):
             converter = get_converter(index_sample)
             names = (converter(name) for name in names)
 
-        res = [
-            [v for k, v in self.indices.items() if isna(k)]
-            if isna(name)
-            else [self.indices.get(name, [])]
-            for name in names
-        ]
-
-        return res
+        return [self.indices.get(name, []) for name in names]
 
     def _get_index(self, name):
         """
         Safe get index, translate keys for datelike to underlying repr.
         """
-        return next(iter(self._get_indices([name])), [])
-
+        if isna(name):
+            return self._get_indices([pd.NaT])[0]
+        else:
+            return self._get_indices([name])[0]
     @cache_readonly
     def _selected_obj(self):
         # Note: _selected_obj is always just `self.obj` for SeriesGroupBy
diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index 14ccffba0dad9..57b7afabfb075 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -562,11 +562,9 @@ def indices(self):
 
         # GH35014
         reverse_indexer = values._reverse_indexer()
-        return (
-            {**reverse_indexer, pd.NaT: [i for i, v in enumerate(values) if pd.isna(v)]}
-            if not self.dropna and any(pd.isna(v) for v in values)
-            else reverse_indexer
-        )
+        res = {**reverse_indexer, pd.NaT: np.array([i for i, v in enumerate(values) if pd.isna(v)])} if not self.dropna and any(pd.isna(v) for v in values) else reverse_indexer
+        print(f"grouper.py Grouping.indices returns {res}")
+        return res
 
     @property
     def codes(self) -> np.ndarray:
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 9f0da6b01383a..5bf2411cf5a0d 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -1252,15 +1252,14 @@ def test_groupby_nat_exclude():
     }
 
     for k in grouped.indices:
-        if pd.isna(k):
-            continue  # GH 35014
         tm.assert_numpy_array_equal(grouped.indices[k], expected[k])
 
     tm.assert_frame_equal(grouped.get_group(Timestamp("2013-01-01")), df.iloc[[1, 7]])
     tm.assert_frame_equal(grouped.get_group(Timestamp("2013-02-01")), df.iloc[[3, 5]])
 
     # GH35014
-    grouped.get_group(pd.NaT)
+    with pytest.raises(KeyError):
+        grouped.get_group(pd.NaT)
 
     nan_df = DataFrame(
         {"nan": [np.nan, np.nan, np.nan], "nat": [pd.NaT, pd.NaT, pd.NaT]}

From 657c13bf9a12759f11becc52ae58b4d6c7793683 Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Thu, 9 Jul 2020 17:20:38 +0000
Subject: [PATCH 37/47] fixed PEP8 issue

---
 pandas/core/groupby/grouper.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index 57b7afabfb075..efdaa4afe3511 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -562,9 +562,13 @@ def indices(self):
 
         # GH35014
         reverse_indexer = values._reverse_indexer()
-        res = {**reverse_indexer, pd.NaT: np.array([i for i, v in enumerate(values) if pd.isna(v)])} if not self.dropna and any(pd.isna(v) for v in values) else reverse_indexer
-        print(f"grouper.py Grouping.indices returns {res}")
-        return res
+        if not self.dropna and any(pd.isna(v) for v in values):
+            return {
+                **reverse_indexer, 
+                pd.NaT: np.array([i for i, v in enumerate(values) if pd.isna(v)])
+            } 
+        else:
+            return  reverse_indexer
 
     @property
     def codes(self) -> np.ndarray:

From 70e3a19995819e9f18fabb66a42b4ae63e28ea94 Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Thu, 9 Jul 2020 17:55:42 +0000
Subject: [PATCH 38/47] run pre-commit checks

---
 pandas/tests/groupby/test_groupby.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 5bf2411cf5a0d..0d040b8e6955a 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -1257,8 +1257,7 @@ def test_groupby_nat_exclude():
     tm.assert_frame_equal(grouped.get_group(Timestamp("2013-01-01")), df.iloc[[1, 7]])
     tm.assert_frame_equal(grouped.get_group(Timestamp("2013-02-01")), df.iloc[[3, 5]])
 
-    # GH35014
-    with pytest.raises(KeyError):
+    with pytest.raises(KeyError, match=r"^NaT$"):
         grouped.get_group(pd.NaT)
 
     nan_df = DataFrame(

From 5ace6acde2f81693754cf5fd49bbe4e86498eecc Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Thu, 9 Jul 2020 18:25:52 +0000
Subject: [PATCH 39/47] styling fix

---
 pandas/core/groupby/grouper.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index efdaa4afe3511..0b5593c3e0a5a 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -564,11 +564,11 @@ def indices(self):
         reverse_indexer = values._reverse_indexer()
         if not self.dropna and any(pd.isna(v) for v in values):
             return {
-                **reverse_indexer, 
-                pd.NaT: np.array([i for i, v in enumerate(values) if pd.isna(v)])
-            } 
+                **reverse_indexer,
+                pd.NaT: np.array([i for i, v in enumerate(values) if pd.isna(v)]),
+            }
         else:
-            return  reverse_indexer
+            return reverse_indexer
 
     @property
     def codes(self) -> np.ndarray:

From 90e9b6a108d8b1da13183233aa1e642ee8a47fcc Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Thu, 9 Jul 2020 19:10:14 +0000
Subject: [PATCH 40/47] update whatnew + styling improvements

---
 doc/source/whatsnew/v1.1.0.rst | 1 +
 pandas/core/groupby/groupby.py | 1 +
 2 files changed, 2 insertions(+)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 986ee371566cd..86915248cbc66 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -990,6 +990,7 @@ Missing
 - :meth:`DataFrame.interpolate` uses the correct axis convention now. Previously interpolating along columns lead to interpolation along indices and vice versa. Furthermore interpolating with methods ``pad``, ``ffill``, ``bfill`` and ``backfill`` are identical to using these methods with :meth:`fillna` (:issue:`12918`, :issue:`29146`)
 - Bug in :meth:`DataFrame.interpolate` when called on a DataFrame with column names of string type was throwing a ValueError. The method is no independing of the type of column names (:issue:`33956`)
 - passing :class:`NA` will into a format string using format specs will now work. For example ``"{:.1f}".format(pd.NA)`` would previously raise a ``ValueError``, but will now return the string ``"<NA>"`` (:issue:`34740`)
+- Bug in :meth:`SeriesGroupBy.transform` now correctly handles missing values for `dropna=False` (:issue:`35014`)
 
 MultiIndex
 ^^^^^^^^^^
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index c29ad20ed60a7..e206782a948d1 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -629,6 +629,7 @@ def _get_index(self, name):
             return self._get_indices([pd.NaT])[0]
         else:
             return self._get_indices([name])[0]
+
     @cache_readonly
     def _selected_obj(self):
         # Note: _selected_obj is always just `self.obj` for SeriesGroupBy

From be0557581e43517ca7b0e3afe2e0f98cddecf8da Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Sat, 11 Jul 2020 07:42:12 +0000
Subject: [PATCH 41/47] add read_json tests

---
 pandas/tests/io/json/test_pandas.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index aef55f63e866e..17f13e0f0050b 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -1256,6 +1256,24 @@ def test_to_json_large_numbers(self, bigNum):
         expected = '{"0":{"articleId":' + str(bigNum) + "}}"
         assert json == expected
 
+    @pytest.mark.parametrize("bigNum", [2**64 + 1, -(2**64 + 2)])
+    def test_read_json_large_numbers(self, bigNum):
+        # GH20599
+
+        series = Series(bigNum, dtype=object, index=["articleId"])
+        json = '{"articleId":' + str(bigNum) + "}"
+        with pytest.raises(ValueError):	
+            json = StringIO(json)	
+            result = read_json(json)	
+            tm.assert_series_equal(series, result)
+
+        df = DataFrame(bigNum, dtype=object, index=["articleId"], columns=[0])
+        json = '{"0":{"articleId":' + str(bigNum) + "}}"
+        with pytest.raises(ValueError):	
+            json = StringIO(json)	
+            result = read_json(json)	
+            tm.assert_frame_equal(df, result)
+
     def test_read_json_large_numbers(self):
         # GH18842
         json = '{"articleId": "1404366058080022500245"}'

From 319ae661eb7898b2b8a0e0c78a64f4f303de8d31 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 14 Jul 2020 08:24:13 -0500
Subject: [PATCH 42/47] Fixups

---
 pandas/tests/io/json/test_pandas.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index 17f13e0f0050b..052aa03f78912 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -1256,25 +1256,25 @@ def test_to_json_large_numbers(self, bigNum):
         expected = '{"0":{"articleId":' + str(bigNum) + "}}"
         assert json == expected
 
-    @pytest.mark.parametrize("bigNum", [2**64 + 1, -(2**64 + 2)])
+    @pytest.mark.parametrize("bigNum", [2 ** 64 + 1, -(2 ** 64 + 2)])
     def test_read_json_large_numbers(self, bigNum):
         # GH20599
 
         series = Series(bigNum, dtype=object, index=["articleId"])
         json = '{"articleId":' + str(bigNum) + "}"
-        with pytest.raises(ValueError):	
-            json = StringIO(json)	
-            result = read_json(json)	
+        with pytest.raises(ValueError):
+            json = StringIO(json)
+            result = read_json(json)
             tm.assert_series_equal(series, result)
 
         df = DataFrame(bigNum, dtype=object, index=["articleId"], columns=[0])
         json = '{"0":{"articleId":' + str(bigNum) + "}}"
-        with pytest.raises(ValueError):	
-            json = StringIO(json)	
-            result = read_json(json)	
+        with pytest.raises(ValueError):
+            json = StringIO(json)
+            result = read_json(json)
             tm.assert_frame_equal(df, result)
 
-    def test_read_json_large_numbers(self):
+    def test_read_json_large_numbers2(self):
         # GH18842
         json = '{"articleId": "1404366058080022500245"}'
         json = StringIO(json)

From 7a78da5c83dab468901b8fa861e08172083d63e3 Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Tue, 14 Jul 2020 19:24:36 +0000
Subject: [PATCH 43/47] fixed git mistake

---
 doc/source/whatsnew/v1.1.0.rst              |  1 -
 pandas/core/groupby/generic.py              |  4 +---
 pandas/core/groupby/groupby.py              |  8 ++------
 pandas/core/groupby/grouper.py              | 11 +----------
 pandas/tests/groupby/test_groupby_dropna.py | 20 --------------------
 5 files changed, 4 insertions(+), 40 deletions(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 64e473acf37c0..a4c107ddefd7b 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -992,7 +992,6 @@ Missing
 - :meth:`DataFrame.interpolate` uses the correct axis convention now. Previously interpolating along columns lead to interpolation along indices and vice versa. Furthermore interpolating with methods ``pad``, ``ffill``, ``bfill`` and ``backfill`` are identical to using these methods with :meth:`fillna` (:issue:`12918`, :issue:`29146`)
 - Bug in :meth:`DataFrame.interpolate` when called on a DataFrame with column names of string type was throwing a ValueError. The method is no independing of the type of column names (:issue:`33956`)
 - passing :class:`NA` will into a format string using format specs will now work. For example ``"{:.1f}".format(pd.NA)`` would previously raise a ``ValueError``, but will now return the string ``"<NA>"`` (:issue:`34740`)
-- Bug in :meth:`SeriesGroupBy.transform` now correctly handles missing values for `dropna=False` (:issue:`35014`)
 
 MultiIndex
 ^^^^^^^^^^
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index e9558878f3fbb..1f49ee2b0b665 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -548,10 +548,8 @@ def _transform_general(
         # we will only try to coerce the result type if
         # we have a numeric dtype, as these are *always* user-defined funcs
         # the cython take a different path (and casting)
-        # make sure we don't accidentally upcast (GH35014)
-        types = ["bool", "int32", "int64", "float32", "float64"]
         dtype = self._selected_obj.dtype
-        if is_numeric_dtype(dtype) and types.index(dtype) < types.index(result.dtype):
+        if is_numeric_dtype(dtype):
             result = maybe_downcast_to_dtype(result, dtype)
 
         result.name = self._selected_obj.name
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index e206782a948d1..d039b715b3c08 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -54,7 +54,6 @@ class providing the base-class of operations.
 )
 from pandas.core.dtypes.missing import isna, notna
 
-import pandas as pd
 from pandas.core import nanops
 import pandas.core.algorithms as algorithms
 from pandas.core.arrays import Categorical, DatetimeArray
@@ -625,10 +624,7 @@ def _get_index(self, name):
         """
         Safe get index, translate keys for datelike to underlying repr.
         """
-        if isna(name):
-            return self._get_indices([pd.NaT])[0]
-        else:
-            return self._get_indices([name])[0]
+        return self._get_indices([name])[0]
 
     @cache_readonly
     def _selected_obj(self):
@@ -806,7 +802,7 @@ def get_group(self, name, obj=None):
         if obj is None:
             obj = self._selected_obj
 
-        inds = self._get_index(pd.NaT) if pd.isna(name) else self._get_index(name)
+        inds = self._get_index(name)
         if not len(inds):
             raise KeyError(name)
 
diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index 0b5593c3e0a5a..76a7c1ccb0b6a 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -20,7 +20,6 @@
 )
 from pandas.core.dtypes.generic import ABCSeries
 
-import pandas as pd
 import pandas.core.algorithms as algorithms
 from pandas.core.arrays import Categorical, ExtensionArray
 import pandas.core.common as com
@@ -560,15 +559,7 @@ def indices(self):
 
         values = Categorical(self.grouper)
 
-        # GH35014
-        reverse_indexer = values._reverse_indexer()
-        if not self.dropna and any(pd.isna(v) for v in values):
-            return {
-                **reverse_indexer,
-                pd.NaT: np.array([i for i, v in enumerate(values) if pd.isna(v)]),
-            }
-        else:
-            return reverse_indexer
+        return values._reverse_indexer()
 
     @property
     def codes(self) -> np.ndarray:
diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
index 3f158e99fab31..bbf71d59be140 100644
--- a/pandas/tests/groupby/test_groupby_dropna.py
+++ b/pandas/tests/groupby/test_groupby_dropna.py
@@ -162,26 +162,6 @@ def test_groupby_dropna_series_by(dropna, expected):
     tm.assert_series_equal(result, expected)
 
 
-def test_slice_groupby_then_transform():
-    # GH35014
-
-    df = pd.DataFrame({"A": [0, 0, 1, None], "B": [1, 2, 3, None]})
-    gb = df.groupby("A", dropna=False)
-
-    res = gb.transform(len)
-    expected = pd.DataFrame({"B": [2, 2, 1, 1]})
-    tm.assert_frame_equal(res, expected)
-
-    gb_slice = gb[["B"]]
-    res = gb_slice.transform(len)
-    expected = pd.DataFrame({"B": [2, 2, 1, 1]})
-    tm.assert_frame_equal(res, expected)
-
-    gb_slice = gb["B"]
-    res = gb["B"].transform(len)
-    expected = pd.Series(data=[2, 2, 1, 1], name="B")
-    tm.assert_series_equal(res, expected)
-
 
 @pytest.mark.parametrize(
     "dropna, tuples, outputs",

From 128173c30d127661f22993d48c7acfc25ce72a64 Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Tue, 14 Jul 2020 19:28:15 +0000
Subject: [PATCH 44/47] minimize diff

---
 pandas/core/groupby/grouper.py              | 1 -
 pandas/tests/groupby/test_groupby_dropna.py | 1 -
 2 files changed, 2 deletions(-)

diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index 76a7c1ccb0b6a..67003dffb90bb 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -558,7 +558,6 @@ def indices(self):
             return self.grouper.indices
 
         values = Categorical(self.grouper)
-
         return values._reverse_indexer()
 
     @property
diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
index bbf71d59be140..1a525d306e9f5 100644
--- a/pandas/tests/groupby/test_groupby_dropna.py
+++ b/pandas/tests/groupby/test_groupby_dropna.py
@@ -162,7 +162,6 @@ def test_groupby_dropna_series_by(dropna, expected):
     tm.assert_series_equal(result, expected)
 
 
-
 @pytest.mark.parametrize(
     "dropna, tuples, outputs",
     [

From fc5bce69c53109e2f43b2f8d84be12e08d37a3f5 Mon Sep 17 00:00:00 2001
From: arw2019 <andrew.r.wieteska@gmail.com>
Date: Tue, 14 Jul 2020 22:54:01 +0000
Subject: [PATCH 45/47] fix input to test

---
 pandas/tests/io/json/test_pandas.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index 052aa03f78912..d0fb504c9635f 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -1256,7 +1256,7 @@ def test_to_json_large_numbers(self, bigNum):
         expected = '{"0":{"articleId":' + str(bigNum) + "}}"
         assert json == expected
 
-    @pytest.mark.parametrize("bigNum", [2 ** 64 + 1, -(2 ** 64 + 2)])
+    @pytest.mark.parametrize("bigNum", [sys.maxsize + 1, -(sys.maxsize + 2)])
     def test_read_json_large_numbers(self, bigNum):
         # GH20599
 

From 1f2fa9e1b26258e6d7f67e554a0978adbfd4acf4 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 14 Jul 2020 19:41:20 -0500
Subject: [PATCH 46/47] xfail

---
 pandas/tests/io/json/test_pandas.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index d0fb504c9635f..37ce92047ba1a 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -1256,7 +1256,8 @@ def test_to_json_large_numbers(self, bigNum):
         expected = '{"0":{"articleId":' + str(bigNum) + "}}"
         assert json == expected
 
-    @pytest.mark.parametrize("bigNum", [sys.maxsize + 1, -(sys.maxsize + 2)])
+    @pytest.mark.parametrize("bigNum", [2**64 + 1, -(2**64 + 2)])
+    @pytest.mark.xfail(sys.maxsize <= 2**32, reason="GH-35279")
     def test_read_json_large_numbers(self, bigNum):
         # GH20599
 

From 72a612f5e6321ba5c805110fe905571713d26cfd Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 14 Jul 2020 20:42:09 -0500
Subject: [PATCH 47/47] fixup

---
 pandas/tests/io/json/test_pandas.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index 37ce92047ba1a..97b53a6e66575 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -1256,8 +1256,8 @@ def test_to_json_large_numbers(self, bigNum):
         expected = '{"0":{"articleId":' + str(bigNum) + "}}"
         assert json == expected
 
-    @pytest.mark.parametrize("bigNum", [2**64 + 1, -(2**64 + 2)])
-    @pytest.mark.xfail(sys.maxsize <= 2**32, reason="GH-35279")
+    @pytest.mark.parametrize("bigNum", [sys.maxsize + 1, -(sys.maxsize + 2)])
+    @pytest.mark.skipif(sys.maxsize <= 2 ** 32, reason="GH-35279")
     def test_read_json_large_numbers(self, bigNum):
         # GH20599