Skip to content

Commit 80d7f9d

Browse files
authored
Merge branch 'main' into reduction_dtypes_II
2 parents 52a7276 + 71cfd3a commit 80d7f9d

File tree

15 files changed

+138
-27
lines changed

15 files changed

+138
-27
lines changed

ci/code_checks.sh

-7
Original file line numberDiff line numberDiff line change
@@ -170,8 +170,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
170170
pandas.Period.asfreq \
171171
pandas.Period.now \
172172
pandas.arrays.PeriodArray \
173-
pandas.arrays.IntervalArray.from_arrays \
174-
pandas.arrays.IntervalArray.to_tuples \
175173
pandas.Int8Dtype \
176174
pandas.Int16Dtype \
177175
pandas.Int32Dtype \
@@ -181,8 +179,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
181179
pandas.UInt32Dtype \
182180
pandas.UInt64Dtype \
183181
pandas.NA \
184-
pandas.Float32Dtype \
185-
pandas.Float64Dtype \
186182
pandas.CategoricalDtype.categories \
187183
pandas.CategoricalDtype.ordered \
188184
pandas.Categorical.dtype \
@@ -258,9 +254,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
258254
pandas.util.hash_pandas_object \
259255
pandas_object \
260256
pandas.api.interchange.from_dataframe \
261-
pandas.Index.T \
262-
pandas.Index.memory_usage \
263-
pandas.Index.copy \
264257
pandas.Index.drop \
265258
pandas.Index.identical \
266259
pandas.Index.insert \

pandas/_libs/groupby.pyx

+7
Original file line numberDiff line numberDiff line change
@@ -1075,6 +1075,13 @@ def group_mean(
10751075
y = val - compensation[lab, j]
10761076
t = sumx[lab, j] + y
10771077
compensation[lab, j] = t - sumx[lab, j] - y
1078+
if compensation[lab, j] != compensation[lab, j]:
1079+
# GH#50367
1080+
# If val is +/- infinity, compensation is NaN
1081+
# which would lead to results being NaN instead
1082+
# of +/-infinity. We cannot use util.is_nan
1083+
# because of no gil
1084+
compensation[lab, j] = 0.
10781085
sumx[lab, j] = t
10791086

10801087
for i in range(ncounts):

pandas/_libs/tslibs/timedeltas.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -1592,7 +1592,7 @@ cdef class _Timedelta(timedelta):
15921592

15931593
def as_unit(self, str unit, bint round_ok=True):
15941594
"""
1595-
Convert the underlying int64 representaton to the given unit.
1595+
Convert the underlying int64 representation to the given unit.
15961596
15971597
Parameters
15981598
----------

pandas/core/algorithms.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,10 @@
3232
from pandas.util._decorators import doc
3333
from pandas.util._exceptions import find_stack_level
3434

35-
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
35+
from pandas.core.dtypes.cast import (
36+
construct_1d_object_array_from_listlike,
37+
np_find_common_type,
38+
)
3639
from pandas.core.dtypes.common import (
3740
ensure_float64,
3841
ensure_object,
@@ -518,7 +521,7 @@ def f(c, v):
518521
f = np.in1d
519522

520523
else:
521-
common = np.find_common_type([values.dtype, comps_array.dtype], [])
524+
common = np_find_common_type(values.dtype, comps_array.dtype)
522525
values = values.astype(common, copy=False)
523526
comps_array = comps_array.astype(common, copy=False)
524527
f = htable.ismember

pandas/core/arrays/floating.py

+14
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,20 @@ class FloatingArray(NumericArray):
134134
Methods
135135
-------
136136
None
137+
138+
Examples
139+
--------
140+
For Float32Dtype:
141+
142+
>>> ser = pd.Series([2.25, pd.NA], dtype=pd.Float32Dtype())
143+
>>> ser.dtype
144+
Float32Dtype()
145+
146+
For Float64Dtype:
147+
148+
>>> ser = pd.Series([2.25, pd.NA], dtype=pd.Float64Dtype())
149+
>>> ser.dtype
150+
Float64Dtype()
137151
"""
138152

139153
# create the Dtype

pandas/core/arrays/interval.py

+23-4
Original file line numberDiff line numberDiff line change
@@ -509,6 +509,8 @@ def from_breaks(
509509
"name": "",
510510
"examples": textwrap.dedent(
511511
"""\
512+
Examples
513+
--------
512514
>>> pd.arrays.IntervalArray.from_arrays([0, 1, 2], [1, 2, 3])
513515
<IntervalArray>
514516
[(0, 1], (1, 2], (2, 3]]
@@ -1635,9 +1637,8 @@ def __arrow_array__(self, type=None):
16351637

16361638
return pyarrow.ExtensionArray.from_storage(interval_type, storage_array)
16371639

1638-
_interval_shared_docs[
1639-
"to_tuples"
1640-
] = """
1640+
_interval_shared_docs["to_tuples"] = textwrap.dedent(
1641+
"""
16411642
Return an %(return_type)s of tuples of the form (left, right).
16421643
16431644
Parameters
@@ -1651,9 +1652,27 @@ def __arrow_array__(self, type=None):
16511652
tuples: %(return_type)s
16521653
%(examples)s\
16531654
"""
1655+
)
16541656

16551657
@Appender(
1656-
_interval_shared_docs["to_tuples"] % {"return_type": "ndarray", "examples": ""}
1658+
_interval_shared_docs["to_tuples"]
1659+
% {
1660+
"return_type": "ndarray",
1661+
"examples": textwrap.dedent(
1662+
"""\
1663+
1664+
Examples
1665+
--------
1666+
>>> idx = pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 2)])
1667+
>>> idx
1668+
<IntervalArray>
1669+
[(0, 1], (1, 2]]
1670+
Length: 2, dtype: interval[int64, right]
1671+
>>> idx.to_tuples()
1672+
array([(0, 1), (1, 2)], dtype=object)
1673+
"""
1674+
),
1675+
}
16571676
)
16581677
def to_tuples(self, na_tuple: bool = True) -> np.ndarray:
16591678
tuples = com.asarray_tuplesafe(zip(self._left, self._right))

pandas/core/base.py

+14
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,8 @@ def transpose(self, *args, **kwargs) -> Self:
300300
301301
Examples
302302
--------
303+
For Series:
304+
303305
>>> s = pd.Series(['Ant', 'Bear', 'Cow'])
304306
>>> s
305307
0 Ant
@@ -311,6 +313,12 @@ def transpose(self, *args, **kwargs) -> Self:
311313
1 Bear
312314
2 Cow
313315
dtype: object
316+
317+
For Index:
318+
319+
>>> idx = pd.Index([1, 2, 3])
320+
>>> idx.T
321+
Index([1, 2, 3], dtype='int64')
314322
""",
315323
)
316324

@@ -1088,6 +1096,12 @@ def _memory_usage(self, deep: bool = False) -> int:
10881096
-----
10891097
Memory usage does not include memory consumed by elements that
10901098
are not components of the array if deep=False or if used on PyPy
1099+
1100+
Examples
1101+
--------
1102+
>>> idx = pd.Index([1, 2, 3])
1103+
>>> idx.memory_usage()
1104+
24
10911105
"""
10921106
if hasattr(self.array, "memory_usage"):
10931107
return self.array.memory_usage( # pyright: ignore[reportGeneralTypeIssues]

pandas/core/dtypes/cast.py

+27-1
Original file line numberDiff line numberDiff line change
@@ -1328,6 +1328,32 @@ def common_dtype_categorical_compat(
13281328
return dtype
13291329

13301330

1331+
def np_find_common_type(*dtypes: np.dtype) -> np.dtype:
1332+
"""
1333+
np.find_common_type implementation pre-1.25 deprecation using np.result_type
1334+
https://github.com/pandas-dev/pandas/pull/49569#issuecomment-1308300065
1335+
1336+
Parameters
1337+
----------
1338+
dtypes : np.dtypes
1339+
1340+
Returns
1341+
-------
1342+
np.dtype
1343+
"""
1344+
try:
1345+
common_dtype = np.result_type(*dtypes)
1346+
if common_dtype.kind in "mMSU":
1347+
# NumPy promotion currently (1.25) misbehaves for for times and strings,
1348+
# so fall back to object (find_common_dtype did unless there
1349+
# was only one dtype)
1350+
common_dtype = np.dtype("O")
1351+
1352+
except TypeError:
1353+
common_dtype = np.dtype("O")
1354+
return common_dtype
1355+
1356+
13311357
@overload
13321358
def find_common_type(types: list[np.dtype]) -> np.dtype:
13331359
...
@@ -1395,7 +1421,7 @@ def find_common_type(types):
13951421
if t.kind in "iufc":
13961422
return np.dtype("object")
13971423

1398-
return np.find_common_type(types, [])
1424+
return np_find_common_type(*types)
13991425

14001426

14011427
def construct_2d_arraylike_from_scalar(

pandas/core/dtypes/concat.py

+4-5
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from pandas.core.dtypes.cast import (
1818
common_dtype_categorical_compat,
1919
find_common_type,
20+
np_find_common_type,
2021
)
2122
from pandas.core.dtypes.dtypes import CategoricalDtype
2223
from pandas.core.dtypes.generic import (
@@ -156,11 +157,9 @@ def _get_result_dtype(
156157
target_dtype = np.dtype(object)
157158
kinds = {"o"}
158159
else:
159-
# Argument 1 to "list" has incompatible type "Set[Union[ExtensionDtype,
160-
# Any]]"; expected "Iterable[Union[dtype[Any], None, Type[Any],
161-
# _SupportsDType[dtype[Any]], str, Tuple[Any, Union[SupportsIndex,
162-
# Sequence[SupportsIndex]]], List[Any], _DTypeDict, Tuple[Any, Any]]]"
163-
target_dtype = np.find_common_type(list(dtypes), []) # type: ignore[arg-type]
160+
# error: Argument 1 to "np_find_common_type" has incompatible type
161+
# "*Set[Union[ExtensionDtype, Any]]"; expected "dtype[Any]"
162+
target_dtype = np_find_common_type(*dtypes) # type: ignore[arg-type]
164163

165164
return any_ea, kinds, target_dtype
166165

pandas/core/dtypes/dtypes.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1921,6 +1921,8 @@ def _subtype_with_str(self):
19211921
def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
19221922
# TODO for now only handle SparseDtypes and numpy dtypes => extend
19231923
# with other compatible extension dtypes
1924+
from pandas.core.dtypes.cast import np_find_common_type
1925+
19241926
if any(
19251927
isinstance(x, ExtensionDtype) and not isinstance(x, SparseDtype)
19261928
for x in dtypes
@@ -1943,8 +1945,8 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
19431945
stacklevel=find_stack_level(),
19441946
)
19451947

1946-
np_dtypes = [x.subtype if isinstance(x, SparseDtype) else x for x in dtypes]
1947-
return SparseDtype(np.find_common_type(np_dtypes, []), fill_value=fill_value)
1948+
np_dtypes = (x.subtype if isinstance(x, SparseDtype) else x for x in dtypes)
1949+
return SparseDtype(np_find_common_type(*np_dtypes), fill_value=fill_value)
19481950

19491951

19501952
@register_extension_dtype

pandas/core/indexes/base.py

+7
Original file line numberDiff line numberDiff line change
@@ -1221,6 +1221,13 @@ def copy(
12211221
-----
12221222
In most cases, there should be no functional difference from using
12231223
``deep``, but if ``deep`` is passed it will attempt to deepcopy.
1224+
1225+
Examples
1226+
--------
1227+
>>> idx = pd.Index(['a', 'b', 'c'])
1228+
>>> new_idx = idx.copy()
1229+
>>> idx is new_idx
1230+
False
12241231
"""
12251232

12261233
name = self._validate_names(name=name, deep=deep)[0]

pandas/core/internals/array_manager.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
ensure_dtype_can_hold_na,
3030
find_common_type,
3131
infer_dtype_from_scalar,
32+
np_find_common_type,
3233
)
3334
from pandas.core.dtypes.common import (
3435
ensure_platform_int,
@@ -1409,7 +1410,7 @@ def concat_arrays(to_concat: list) -> ArrayLike:
14091410
target_dtype = to_concat_no_proxy[0].dtype
14101411
elif all(x.kind in "iub" and isinstance(x, np.dtype) for x in dtypes):
14111412
# GH#42092
1412-
target_dtype = np.find_common_type(list(dtypes), [])
1413+
target_dtype = np_find_common_type(*dtypes)
14131414
else:
14141415
target_dtype = find_common_type([arr.dtype for arr in to_concat_no_proxy])
14151416

pandas/tests/dtypes/test_inference.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -997,9 +997,7 @@ def test_maybe_convert_objects_itemsize(self, data0, data1):
997997
data = [data0, data1]
998998
arr = np.array(data, dtype="object")
999999

1000-
common_kind = np.find_common_type(
1001-
[type(data0), type(data1)], scalar_types=[]
1002-
).kind
1000+
common_kind = np.result_type(type(data0), type(data1)).kind
10031001
kind0 = "python" if not hasattr(data0, "dtype") else data0.dtype.kind
10041002
kind1 = "python" if not hasattr(data1, "dtype") else data1.dtype.kind
10051003
if kind0 != "python" and kind1 != "python":

pandas/tests/groupby/test_libgroupby.py

+20
Original file line numberDiff line numberDiff line change
@@ -282,3 +282,23 @@ def test_cython_group_mean_not_datetimelike_but_has_NaT_values():
282282
tm.assert_numpy_array_equal(
283283
actual[:, 0], np.array(np.divide(np.add(data[0], data[1]), 2), dtype="float64")
284284
)
285+
286+
287+
def test_cython_group_mean_Inf_at_begining_and_end():
288+
# GH 50367
289+
actual = np.array([[np.nan, np.nan], [np.nan, np.nan]], dtype="float64")
290+
counts = np.array([0, 0], dtype="int64")
291+
data = np.array(
292+
[[np.inf, 1.0], [1.0, 2.0], [2.0, 3.0], [3.0, 4.0], [4.0, 5.0], [5, np.inf]],
293+
dtype="float64",
294+
)
295+
labels = np.array([0, 1, 0, 1, 0, 1], dtype=np.intp)
296+
297+
group_mean(actual, counts, data, labels, is_datetimelike=False)
298+
299+
expected = np.array([[np.inf, 3], [3, np.inf]], dtype="float64")
300+
301+
tm.assert_numpy_array_equal(
302+
actual,
303+
expected,
304+
)

web/pandas/community/ecosystem.md

+9-1
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,14 @@ which support geometric operations. If your work entails maps and
321321
geographical coordinates, and you love pandas, you should take a close
322322
look at Geopandas.
323323

324-
### [staricase](https://github.com/staircase-dev/staircase)
324+
### [gurobipy-pandas](https://github.com/Gurobi/gurobipy-pandas)
325+
326+
gurobipy-pandas provides a convenient accessor API to connect pandas with
327+
gurobipy. It enables users to more easily and efficiently build mathematical
328+
optimization models from data stored in DataFrames and Series, and to read
329+
solutions back directly as pandas objects.
330+
331+
### [staircase](https://github.com/staircase-dev/staircase)
325332

326333
staircase is a data analysis package, built upon pandas and numpy, for modelling and
327334
manipulation of mathematical step functions. It provides a rich variety of arithmetic
@@ -546,6 +553,7 @@ authors to coordinate on the namespace.
546553
| [composeml](https://github.com/alteryx/compose) | `slice` | `DataFrame` |
547554
| [datatest](https://datatest.readthedocs.io/en/stable/) | `validate` | `Series`, `DataFrame` |
548555
| [composeml](https://github.com/alteryx/compose) | `slice` | `DataFrame` |
556+
| [gurobipy-pandas](https://github.com/Gurobi/gurobipy-pandas) | `gppd` | `Series`, `DataFrame` |
549557
| [staircase](https://www.staircase.dev/) | `sc` | `Series`, `DataFrame` |
550558
| [woodwork](https://github.com/alteryx/woodwork) | `slice` | `Series`, `DataFrame` |
551559

0 commit comments

Comments
 (0)