redo solution - modify SeriesGroupBy._transform_general only

arw2019 · arw2019 · commit 516d4740ce61 · 2020-08-07T01:13:07.000Z
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
@@ -17,7 +17,11 @@ Enhancements
 
 Other enhancements
 ^^^^^^^^^^^^^^^^^^
+<<<<<<< HEAD
 - :class:`Index` with object dtype supports division and multiplication (:issue:`34160`)
+=======
+
+>>>>>>> 860bd139f... whatsnew
 -
 -
 
@@ -59,20 +63,32 @@ Categorical
 
 Datetimelike
 ^^^^^^^^^^^^
+<<<<<<< HEAD
 - Bug in :attr:`DatetimeArray.date` where a ``ValueError`` would be raised with a read-only backing array (:issue:`33530`)
 - Bug in ``NaT`` comparisons failing to raise ``TypeError`` on invalid inequality comparisons (:issue:`35046`)
+=======
+-
+>>>>>>> 860bd139f... whatsnew
 -
 
 Timedelta
 ^^^^^^^^^
+<<<<<<< HEAD
 - Bug in :class:`TimedeltaIndex`, :class:`Series`, and :class:`DataFrame` floor-division with ``timedelta64`` dtypes and ``NaT`` in the denominator (:issue:`35529`)
+=======
+
+>>>>>>> 860bd139f... whatsnew
 -
 -
 
 Timezones
 ^^^^^^^^^
 
+<<<<<<< HEAD
 - Bug in :func:`date_range` was raising AmbiguousTimeError for valid input with `ambiguous=False` (:issue:`35297`)
+=======
+-
+>>>>>>> 860bd139f... whatsnew
 -
 
 
@@ -109,7 +125,11 @@ Indexing
 Missing
 ^^^^^^^
 
+<<<<<<< HEAD
 -
+=======
+- Bug in :meth:`SeriesGroupBy.transform` now correctly handles missing values for `dropna=False` (:issue:`35014`)
+>>>>>>> 860bd139f... whatsnew
 -
 
 MultiIndex
@@ -133,7 +153,10 @@ Plotting
 Groupby/resample/rolling
 ^^^^^^^^^^^^^^^^^^^^^^^^
 
+<<<<<<< HEAD
 - Bug in :meth:`DataFrameGroupBy.apply` that would some times throw an erroneous ``ValueError`` if the grouping axis had duplicate entries (:issue:`16646`)
+=======
+>>>>>>> 860bd139f... whatsnew
 -
 -
 
diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx
@@ -120,11 +120,18 @@ cdef inline int64_t cast_from_unit(object ts, str unit) except? -1:
     return <int64_t>(base * m) + <int64_t>(frac * m)
 
 
+<<<<<<< HEAD
+<<<<<<< HEAD
 <<<<<<< HEAD
 cpdef inline (int64_t, int) precision_from_unit(str unit):
 =======
+=======
+>>>>>>> 04e46f888... fix merge conflict
 cpdef inline object precision_from_unit(str unit):
 >>>>>>> 6b9d4de82... revert changes
+=======
+cpdef inline (int64_t, int) precision_from_unit(str unit):
+>>>>>>> 165d091d8... fix merge conflict
     """
     Return a casting of the unit represented to nanoseconds + the precision
     to round the fractional part.
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -39,7 +39,6 @@
     maybe_cast_result_dtype,
     maybe_convert_objects,
     maybe_downcast_numeric,
-    maybe_downcast_to_dtype,
 )
 from pandas.core.dtypes.common import (
     ensure_int64,
@@ -535,26 +534,25 @@ def _transform_general(
             if isinstance(res, (ABCDataFrame, ABCSeries)):
                 res = res._values
 
-            indexer = self._get_index(name)
-            ser = klass(res, indexer)
-            results.append(ser)
+            results.append(klass(res, index=group.index))
 
         # check for empty "results" to avoid concat ValueError
         if results:
             from pandas.core.reshape.concat import concat
 
-            result = concat(results).sort_index()
+            concatenated = concat(results)
+            result = self._set_result_index_ordered(concatenated)
         else:
             result = self.obj._constructor(dtype=np.float64)
-
         # we will only try to coerce the result type if
         # we have a numeric dtype, as these are *always* user-defined funcs
         # the cython take a different path (and casting)
-        # make sure we don't accidentally upcast (GH35014)
-        types = ["bool", "int32", "int64", "float32", "float64"]
-        dtype = self._selected_obj.dtype
-        if is_numeric_dtype(dtype) and types.index(dtype) < types.index(result.dtype):
-            result = maybe_downcast_to_dtype(result, dtype)
+        if is_numeric_dtype(result.dtype):
+            common_dtype = np.find_common_type(
+                [self._selected_obj.dtype, result.dtype], []
+            )
+            if common_dtype is result.dtype:
+                result = maybe_downcast_numeric(result, self._selected_obj.dtype)
 
         result.name = self._selected_obj.name
         result.index = self._selected_obj.index
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -54,7 +54,6 @@ class providing the base-class of operations.
 )
 from pandas.core.dtypes.missing import isna, notna
 
-import pandas as pd
 from pandas.core import nanops
 import pandas.core.algorithms as algorithms
 from pandas.core.arrays import Categorical, DatetimeArray
@@ -624,12 +623,7 @@ def get_converter(s):
             converter = get_converter(index_sample)
             names = (converter(name) for name in names)
 
-        return [
-            self.indices.get(name, [])
-            if not isna(name)
-            else self.indices.get(pd.NaT, [])
-            for name in names
-        ]
+        return [self.indices.get(name, []) for name in names]
 
     def _get_index(self, name):
         """
@@ -813,7 +807,7 @@ def get_group(self, name, obj=None):
         if obj is None:
             obj = self._selected_obj
 
-        inds = self._get_index(pd.NaT) if pd.isna(name) else self._get_index(name)
+        inds = self._get_index(name)
         if not len(inds):
             raise KeyError(name)
 
diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
@@ -20,7 +20,6 @@
 )
 from pandas.core.dtypes.generic import ABCSeries
 
-import pandas as pd
 import pandas.core.algorithms as algorithms
 from pandas.core.arrays import Categorical, ExtensionArray
 import pandas.core.common as com
@@ -558,16 +557,7 @@ def indices(self):
             return self.grouper.indices
 
         values = Categorical(self.grouper)
-
-        # GH35014
-        reverse_indexer = values._reverse_indexer()
-        if not self.dropna and any(pd.isna(v) for v in values):
-            return {
-                **reverse_indexer,
-                pd.NaT: np.array([i for i, v in enumerate(values) if pd.isna(v)]),
-            }
-        else:
-            return reverse_indexer
+        return values._reverse_indexer()
 
     @property
     def codes(self) -> np.ndarray:

Original file line number	Diff line number	Diff line change
`@@ -17,7 +17,11 @@ Enhancements`
`17`	`17`
`18`	`18`	`Other enhancements`
`19`	`19`	`^^^^^^^^^^^^^^^^^^`
	`20`	`+<<<<<<< HEAD`
`20`	`21`	- :class:`Index` with object dtype supports division and multiplication (:issue:`34160`)
	`22`	`+=======`
	`23`	`+`
	`24`	`+>>>>>>> 860bd139f... whatsnew`
`21`	`25`	`-`
`22`	`26`	`-`
`23`	`27`
`@@ -59,20 +63,32 @@ Categorical`
`59`	`63`
`60`	`64`	`Datetimelike`
`61`	`65`	`^^^^^^^^^^^^`
	`66`	`+<<<<<<< HEAD`
`62`	`67`	- Bug in :attr:`DatetimeArray.date` where a ``ValueError`` would be raised with a read-only backing array (:issue:`33530`)
`63`	`68`	- Bug in ``NaT`` comparisons failing to raise ``TypeError`` on invalid inequality comparisons (:issue:`35046`)
	`69`	`+=======`
	`70`	`+-`
	`71`	`+>>>>>>> 860bd139f... whatsnew`
`64`	`72`	`-`
`65`	`73`
`66`	`74`	`Timedelta`
`67`	`75`	`^^^^^^^^^`
	`76`	`+<<<<<<< HEAD`
`68`	`77`	- Bug in :class:`TimedeltaIndex`, :class:`Series`, and :class:`DataFrame` floor-division with ``timedelta64`` dtypes and ``NaT`` in the denominator (:issue:`35529`)
	`78`	`+=======`
	`79`	`+`
	`80`	`+>>>>>>> 860bd139f... whatsnew`
`69`	`81`	`-`
`70`	`82`	`-`
`71`	`83`
`72`	`84`	`Timezones`
`73`	`85`	`^^^^^^^^^`
`74`	`86`
	`87`	`+<<<<<<< HEAD`
`75`	`88`	- Bug in :func:`date_range` was raising AmbiguousTimeError for valid input with `ambiguous=False` (:issue:`35297`)
	`89`	`+=======`
	`90`	`+-`
	`91`	`+>>>>>>> 860bd139f... whatsnew`
`76`	`92`	`-`
`77`	`93`
`78`	`94`
`@@ -109,7 +125,11 @@ Indexing`
`109`	`125`	`Missing`
`110`	`126`	`^^^^^^^`
`111`	`127`
	`128`	`+<<<<<<< HEAD`
`112`	`129`	`-`
	`130`	`+=======`
	`131`	+- Bug in :meth:`SeriesGroupBy.transform` now correctly handles missing values for `dropna=False` (:issue:`35014`)
	`132`	`+>>>>>>> 860bd139f... whatsnew`
`113`	`133`	`-`
`114`	`134`
`115`	`135`	`MultiIndex`
`@@ -133,7 +153,10 @@ Plotting`
`133`	`153`	`Groupby/resample/rolling`
`134`	`154`	`^^^^^^^^^^^^^^^^^^^^^^^^`
`135`	`155`
	`156`	`+<<<<<<< HEAD`
`136`	`157`	- Bug in :meth:`DataFrameGroupBy.apply` that would some times throw an erroneous ``ValueError`` if the grouping axis had duplicate entries (:issue:`16646`)
	`158`	`+=======`
	`159`	`+>>>>>>> 860bd139f... whatsnew`
`137`	`160`	`-`
`138`	`161`	`-`
`139`	`162`