Skip to content

Commit fa2d90a

Browse files
committed
redo solution - modify SeriesGroupBy._transform_general only
1 parent c9f6f7e commit fa2d90a

File tree

6 files changed

+20
-45
lines changed

6 files changed

+20
-45
lines changed

doc/source/whatsnew/v1.1.0.rst

-14
Original file line numberDiff line numberDiff line change
@@ -1017,7 +1017,6 @@ Indexing
10171017

10181018
Missing
10191019
^^^^^^^
1020-
<<<<<<< HEAD
10211020
- Calling :meth:`fillna` on an empty :class:`Series` now correctly returns a shallow copied object. The behaviour is now consistent with :class:`Index`, :class:`DataFrame` and a non-empty :class:`Series` (:issue:`32543`).
10221021
- Bug in :meth:`Series.replace` when argument ``to_replace`` is of type dict/list and is used on a :class:`Series` containing ``<NA>`` was raising a ``TypeError``. The method now handles this by ignoring ``<NA>`` values when doing the comparison for the replacement (:issue:`32621`)
10231022
- Bug in :meth:`~Series.any` and :meth:`~Series.all` incorrectly returning ``<NA>`` for all ``False`` or all ``True`` values using the nulllable Boolean dtype and with ``skipna=False`` (:issue:`33253`)
@@ -1026,19 +1025,6 @@ Missing
10261025
- Bug in :meth:`DataFrame.interpolate` when called on a :class:`DataFrame` with column names of string type was throwing a ValueError. The method is now independent of the type of the column names (:issue:`33956`)
10271026
- Passing :class:`NA` into a format string using format specs will now work. For example ``"{:.1f}".format(pd.NA)`` would previously raise a ``ValueError``, but will now return the string ``"<NA>"`` (:issue:`34740`)
10281027
- Bug in :meth:`Series.map` not raising on invalid ``na_action`` (:issue:`32815`)
1029-
=======
1030-
- Calling :meth:`fillna` on an empty Series now correctly returns a shallow copied object. The behaviour is now consistent with :class:`Index`, :class:`DataFrame` and a non-empty :class:`Series` (:issue:`32543`).
1031-
- Bug in :meth:`replace` when argument ``to_replace`` is of type dict/list and is used on a :class:`Series` containing ``<NA>`` was raising a ``TypeError``. The method now handles this by ignoring ``<NA>`` values when doing the comparison for the replacement (:issue:`32621`)
1032-
- Bug in :meth:`~Series.any` and :meth:`~Series.all` incorrectly returning ``<NA>`` for all ``False`` or all ``True`` values using the nulllable boolean dtype and with ``skipna=False`` (:issue:`33253`)
1033-
- Clarified documentation on interpolate with method =akima. The ``der`` parameter must be scalar or None (:issue:`33426`)
1034-
- :meth:`DataFrame.interpolate` uses the correct axis convention now. Previously interpolating along columns lead to interpolation along indices and vice versa. Furthermore interpolating with methods ``pad``, ``ffill``, ``bfill`` and ``backfill`` are identical to using these methods with :meth:`fillna` (:issue:`12918`, :issue:`29146`)
1035-
- Bug in :meth:`DataFrame.interpolate` when called on a DataFrame with column names of string type was throwing a ValueError. The method is no independing of the type of column names (:issue:`33956`)
1036-
- passing :class:`NA` will into a format string using format specs will now work. For example ``"{:.1f}".format(pd.NA)`` would previously raise a ``ValueError``, but will now return the string ``"<NA>"`` (:issue:`34740`)
1037-
<<<<<<< HEAD
1038-
- Bug in :meth:`SeriesGroupBy.transform` now correctly handles missing values for `dropna=False` (:issue:`35014`)
1039-
>>>>>>> 90e9b6a10... update whatnew + styling improvements
1040-
=======
1041-
>>>>>>> 8c11b6072... removed 1.1 release note
10421028

10431029
MultiIndex
10441030
^^^^^^^^^^

doc/source/whatsnew/v1.2.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ Indexing
109109
Missing
110110
^^^^^^^
111111

112-
-
112+
- Bug in :meth:`SeriesGroupBy.transform` now correctly handles missing values for `dropna=False` (:issue:`35014`)
113113
-
114114

115115
MultiIndex

pandas/_libs/tslibs/conversion.pyx

+7
Original file line numberDiff line numberDiff line change
@@ -120,11 +120,18 @@ cdef inline int64_t cast_from_unit(object ts, str unit) except? -1:
120120
return <int64_t>(base * m) + <int64_t>(frac * m)
121121

122122

123+
<<<<<<< HEAD
124+
<<<<<<< HEAD
123125
<<<<<<< HEAD
124126
cpdef inline (int64_t, int) precision_from_unit(str unit):
125127
=======
128+
=======
129+
>>>>>>> 04e46f888... fix merge conflict
126130
cpdef inline object precision_from_unit(str unit):
127131
>>>>>>> 6b9d4de82... revert changes
132+
=======
133+
cpdef inline (int64_t, int) precision_from_unit(str unit):
134+
>>>>>>> 165d091d8... fix merge conflict
128135
"""
129136
Return a casting of the unit represented to nanoseconds + the precision
130137
to round the fractional part.

pandas/core/groupby/generic.py

+9-11
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@
3939
maybe_cast_result_dtype,
4040
maybe_convert_objects,
4141
maybe_downcast_numeric,
42-
maybe_downcast_to_dtype,
4342
)
4443
from pandas.core.dtypes.common import (
4544
ensure_int64,
@@ -535,26 +534,25 @@ def _transform_general(
535534
if isinstance(res, (ABCDataFrame, ABCSeries)):
536535
res = res._values
537536

538-
indexer = self._get_index(name)
539-
ser = klass(res, indexer)
540-
results.append(ser)
537+
results.append(klass(res, index=group.index))
541538

542539
# check for empty "results" to avoid concat ValueError
543540
if results:
544541
from pandas.core.reshape.concat import concat
545542

546-
result = concat(results).sort_index()
543+
concatenated = concat(results)
544+
result = self._set_result_index_ordered(concatenated)
547545
else:
548546
result = self.obj._constructor(dtype=np.float64)
549-
550547
# we will only try to coerce the result type if
551548
# we have a numeric dtype, as these are *always* user-defined funcs
552549
# the cython take a different path (and casting)
553-
# make sure we don't accidentally upcast (GH35014)
554-
types = ["bool", "int32", "int64", "float32", "float64"]
555-
dtype = self._selected_obj.dtype
556-
if is_numeric_dtype(dtype) and types.index(dtype) < types.index(result.dtype):
557-
result = maybe_downcast_to_dtype(result, dtype)
550+
if is_numeric_dtype(result.dtype):
551+
common_dtype = np.find_common_type(
552+
[self._selected_obj.dtype, result.dtype], []
553+
)
554+
if common_dtype is result.dtype:
555+
result = maybe_downcast_numeric(result, self._selected_obj.dtype)
558556

559557
result.name = self._selected_obj.name
560558
result.index = self._selected_obj.index

pandas/core/groupby/groupby.py

+2-8
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@ class providing the base-class of operations.
5454
)
5555
from pandas.core.dtypes.missing import isna, notna
5656

57-
import pandas as pd
5857
from pandas.core import nanops
5958
import pandas.core.algorithms as algorithms
6059
from pandas.core.arrays import Categorical, DatetimeArray
@@ -624,12 +623,7 @@ def get_converter(s):
624623
converter = get_converter(index_sample)
625624
names = (converter(name) for name in names)
626625

627-
return [
628-
self.indices.get(name, [])
629-
if not isna(name)
630-
else self.indices.get(pd.NaT, [])
631-
for name in names
632-
]
626+
return [self.indices.get(name, []) for name in names]
633627

634628
def _get_index(self, name):
635629
"""
@@ -813,7 +807,7 @@ def get_group(self, name, obj=None):
813807
if obj is None:
814808
obj = self._selected_obj
815809

816-
inds = self._get_index(pd.NaT) if pd.isna(name) else self._get_index(name)
810+
inds = self._get_index(name)
817811
if not len(inds):
818812
raise KeyError(name)
819813

pandas/core/groupby/grouper.py

+1-11
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
)
2121
from pandas.core.dtypes.generic import ABCSeries
2222

23-
import pandas as pd
2423
import pandas.core.algorithms as algorithms
2524
from pandas.core.arrays import Categorical, ExtensionArray
2625
import pandas.core.common as com
@@ -558,16 +557,7 @@ def indices(self):
558557
return self.grouper.indices
559558

560559
values = Categorical(self.grouper)
561-
562-
# GH35014
563-
reverse_indexer = values._reverse_indexer()
564-
if not self.dropna and any(pd.isna(v) for v in values):
565-
return {
566-
**reverse_indexer,
567-
pd.NaT: np.array([i for i, v in enumerate(values) if pd.isna(v)]),
568-
}
569-
else:
570-
return reverse_indexer
560+
return values._reverse_indexer()
571561

572562
@property
573563
def codes(self) -> np.ndarray:

0 commit comments

Comments
 (0)