Skip to content

Commit 9b536dd

Browse files
committed
redo solution - modify SeriesGroupBy._transform_general only
1 parent c9f6f7e commit 9b536dd

File tree

7 files changed

+13
-69
lines changed

7 files changed

+13
-69
lines changed

doc/source/whatsnew/v1.1.0.rst

-14
Original file line numberDiff line numberDiff line change
@@ -1017,7 +1017,6 @@ Indexing
10171017

10181018
Missing
10191019
^^^^^^^
1020-
<<<<<<< HEAD
10211020
- Calling :meth:`fillna` on an empty :class:`Series` now correctly returns a shallow copied object. The behaviour is now consistent with :class:`Index`, :class:`DataFrame` and a non-empty :class:`Series` (:issue:`32543`).
10221021
- Bug in :meth:`Series.replace` when argument ``to_replace`` is of type dict/list and is used on a :class:`Series` containing ``<NA>`` was raising a ``TypeError``. The method now handles this by ignoring ``<NA>`` values when doing the comparison for the replacement (:issue:`32621`)
10231022
- Bug in :meth:`~Series.any` and :meth:`~Series.all` incorrectly returning ``<NA>`` for all ``False`` or all ``True`` values using the nulllable Boolean dtype and with ``skipna=False`` (:issue:`33253`)
@@ -1026,19 +1025,6 @@ Missing
10261025
- Bug in :meth:`DataFrame.interpolate` when called on a :class:`DataFrame` with column names of string type was throwing a ValueError. The method is now independent of the type of the column names (:issue:`33956`)
10271026
- Passing :class:`NA` into a format string using format specs will now work. For example ``"{:.1f}".format(pd.NA)`` would previously raise a ``ValueError``, but will now return the string ``"<NA>"`` (:issue:`34740`)
10281027
- Bug in :meth:`Series.map` not raising on invalid ``na_action`` (:issue:`32815`)
1029-
=======
1030-
- Calling :meth:`fillna` on an empty Series now correctly returns a shallow copied object. The behaviour is now consistent with :class:`Index`, :class:`DataFrame` and a non-empty :class:`Series` (:issue:`32543`).
1031-
- Bug in :meth:`replace` when argument ``to_replace`` is of type dict/list and is used on a :class:`Series` containing ``<NA>`` was raising a ``TypeError``. The method now handles this by ignoring ``<NA>`` values when doing the comparison for the replacement (:issue:`32621`)
1032-
- Bug in :meth:`~Series.any` and :meth:`~Series.all` incorrectly returning ``<NA>`` for all ``False`` or all ``True`` values using the nulllable boolean dtype and with ``skipna=False`` (:issue:`33253`)
1033-
- Clarified documentation on interpolate with method =akima. The ``der`` parameter must be scalar or None (:issue:`33426`)
1034-
- :meth:`DataFrame.interpolate` uses the correct axis convention now. Previously interpolating along columns lead to interpolation along indices and vice versa. Furthermore interpolating with methods ``pad``, ``ffill``, ``bfill`` and ``backfill`` are identical to using these methods with :meth:`fillna` (:issue:`12918`, :issue:`29146`)
1035-
- Bug in :meth:`DataFrame.interpolate` when called on a DataFrame with column names of string type was throwing a ValueError. The method is no independing of the type of column names (:issue:`33956`)
1036-
- passing :class:`NA` will into a format string using format specs will now work. For example ``"{:.1f}".format(pd.NA)`` would previously raise a ``ValueError``, but will now return the string ``"<NA>"`` (:issue:`34740`)
1037-
<<<<<<< HEAD
1038-
- Bug in :meth:`SeriesGroupBy.transform` now correctly handles missing values for `dropna=False` (:issue:`35014`)
1039-
>>>>>>> 90e9b6a10... update whatnew + styling improvements
1040-
=======
1041-
>>>>>>> 8c11b6072... removed 1.1 release note
10421028

10431029
MultiIndex
10441030
^^^^^^^^^^

doc/source/whatsnew/v1.2.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ Indexing
109109
Missing
110110
^^^^^^^
111111

112-
-
112+
- Bug in :meth:`SeriesGroupBy.transform` now correctly handles missing values for `dropna=False` (:issue:`35014`)
113113
-
114114

115115
MultiIndex

pandas/_libs/tslib.pyx

-20
Original file line numberDiff line numberDiff line change
@@ -246,31 +246,11 @@ def array_with_unit_to_datetime(
246246
if ((fvalues < Timestamp.min.value).any()
247247
or (fvalues > Timestamp.max.value).any()):
248248
raise OutOfBoundsDatetime(f"cannot convert input with unit '{unit}'")
249-
<<<<<<< HEAD
250-
<<<<<<< HEAD
251-
=======
252-
>>>>>>> 7df44d10f... revert accidental changes
253249
result = (iresult * m).astype('M8[ns]')
254250
iresult = result.view('i8')
255251
iresult[mask] = NPY_NAT
256252
return result, tz
257253

258-
<<<<<<< HEAD
259-
=======
260-
# GH20445
261-
if values.dtype.kind == 'i':
262-
result = (iresult * m).astype('M8[ns]')
263-
iresult = result.view('i8')
264-
iresult[mask] = NPY_NAT
265-
return result, tz
266-
elif values.dtype.kind == 'f':
267-
result = (fresult * m_as_float).astype('M8[ns]')
268-
fresult = result.view('f8')
269-
fresult[mask] = NPY_NAT
270-
return result, tz
271-
>>>>>>> f1ae8f562... _libs/tslib.pyx added comments
272-
=======
273-
>>>>>>> 7df44d10f... revert accidental changes
274254
result = np.empty(n, dtype='M8[ns]')
275255
iresult = result.view('i8')
276256

pandas/_libs/tslibs/conversion.pyx

-4
Original file line numberDiff line numberDiff line change
@@ -120,11 +120,7 @@ cdef inline int64_t cast_from_unit(object ts, str unit) except? -1:
120120
return <int64_t>(base * m) + <int64_t>(frac * m)
121121

122122

123-
<<<<<<< HEAD
124123
cpdef inline (int64_t, int) precision_from_unit(str unit):
125-
=======
126-
cpdef inline object precision_from_unit(str unit):
127-
>>>>>>> 6b9d4de82... revert changes
128124
"""
129125
Return a casting of the unit represented to nanoseconds + the precision
130126
to round the fractional part.

pandas/core/groupby/generic.py

+9-11
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@
3939
maybe_cast_result_dtype,
4040
maybe_convert_objects,
4141
maybe_downcast_numeric,
42-
maybe_downcast_to_dtype,
4342
)
4443
from pandas.core.dtypes.common import (
4544
ensure_int64,
@@ -535,26 +534,25 @@ def _transform_general(
535534
if isinstance(res, (ABCDataFrame, ABCSeries)):
536535
res = res._values
537536

538-
indexer = self._get_index(name)
539-
ser = klass(res, indexer)
540-
results.append(ser)
537+
results.append(klass(res, index=group.index))
541538

542539
# check for empty "results" to avoid concat ValueError
543540
if results:
544541
from pandas.core.reshape.concat import concat
545542

546-
result = concat(results).sort_index()
543+
concatenated = concat(results)
544+
result = self._set_result_index_ordered(concatenated)
547545
else:
548546
result = self.obj._constructor(dtype=np.float64)
549-
550547
# we will only try to coerce the result type if
551548
# we have a numeric dtype, as these are *always* user-defined funcs
552549
# the cython take a different path (and casting)
553-
# make sure we don't accidentally upcast (GH35014)
554-
types = ["bool", "int32", "int64", "float32", "float64"]
555-
dtype = self._selected_obj.dtype
556-
if is_numeric_dtype(dtype) and types.index(dtype) < types.index(result.dtype):
557-
result = maybe_downcast_to_dtype(result, dtype)
550+
if is_numeric_dtype(result.dtype):
551+
common_dtype = np.find_common_type(
552+
[self._selected_obj.dtype, result.dtype], []
553+
)
554+
if common_dtype is result.dtype:
555+
result = maybe_downcast_numeric(result, self._selected_obj.dtype)
558556

559557
result.name = self._selected_obj.name
560558
result.index = self._selected_obj.index

pandas/core/groupby/groupby.py

+2-8
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@ class providing the base-class of operations.
5454
)
5555
from pandas.core.dtypes.missing import isna, notna
5656

57-
import pandas as pd
5857
from pandas.core import nanops
5958
import pandas.core.algorithms as algorithms
6059
from pandas.core.arrays import Categorical, DatetimeArray
@@ -624,12 +623,7 @@ def get_converter(s):
624623
converter = get_converter(index_sample)
625624
names = (converter(name) for name in names)
626625

627-
return [
628-
self.indices.get(name, [])
629-
if not isna(name)
630-
else self.indices.get(pd.NaT, [])
631-
for name in names
632-
]
626+
return [self.indices.get(name, []) for name in names]
633627

634628
def _get_index(self, name):
635629
"""
@@ -813,7 +807,7 @@ def get_group(self, name, obj=None):
813807
if obj is None:
814808
obj = self._selected_obj
815809

816-
inds = self._get_index(pd.NaT) if pd.isna(name) else self._get_index(name)
810+
inds = self._get_index(name)
817811
if not len(inds):
818812
raise KeyError(name)
819813

pandas/core/groupby/grouper.py

+1-11
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
)
2121
from pandas.core.dtypes.generic import ABCSeries
2222

23-
import pandas as pd
2423
import pandas.core.algorithms as algorithms
2524
from pandas.core.arrays import Categorical, ExtensionArray
2625
import pandas.core.common as com
@@ -558,16 +557,7 @@ def indices(self):
558557
return self.grouper.indices
559558

560559
values = Categorical(self.grouper)
561-
562-
# GH35014
563-
reverse_indexer = values._reverse_indexer()
564-
if not self.dropna and any(pd.isna(v) for v in values):
565-
return {
566-
**reverse_indexer,
567-
pd.NaT: np.array([i for i, v in enumerate(values) if pd.isna(v)]),
568-
}
569-
else:
570-
return reverse_indexer
560+
return values._reverse_indexer()
571561

572562
@property
573563
def codes(self) -> np.ndarray:

0 commit comments

Comments
 (0)