Skip to content

Commit 1d0ba61

Browse files
committed
merge with master
2 parents 9e4ac71 + 42fd7e7 commit 1d0ba61

File tree

17 files changed

+442
-77
lines changed

17 files changed

+442
-77
lines changed

doc/source/development/code_style.rst

+15
Original file line numberDiff line numberDiff line change
@@ -159,3 +159,18 @@ For example:
159159

160160
# wrong
161161
from common import test_base
162+
163+
164+
Miscellaneous
165+
=============
166+
167+
Reading from a url
168+
------------------
169+
170+
**Good:**
171+
172+
.. code-block:: python
173+
174+
from pandas.io.common import urlopen
175+
with urlopen('http://www.google.com') as url:
176+
raw_text = url.read()

doc/source/whatsnew/v1.1.0.rst

+3-1
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,7 @@ Other enhancements
280280
- :meth:`Styler.highlight_null` now accepts ``subset`` argument (:issue:`31345`)
281281
- When writing directly to a sqlite connection :func:`to_sql` now supports the ``multi`` method (:issue:`29921`)
282282
- `OptionError` is now exposed in `pandas.errors` (:issue:`27553`)
283+
- Add :meth:`ExtensionArray.argmax` and :meth:`ExtensionArray.argmin` (:issue:`24382`)
283284
- :func:`timedelta_range` will now infer a frequency when passed ``start``, ``stop``, and ``periods`` (:issue:`32377`)
284285
- Positional slicing on a :class:`IntervalIndex` now supports slices with ``step > 1`` (:issue:`31658`)
285286
- :class:`Series.str` now has a `fullmatch` method that matches a regular expression against the entire string in each row of the series, similar to `re.fullmatch` (:issue:`32806`).
@@ -321,7 +322,7 @@ Other enhancements
321322
- :meth:`DataFrame.hist`, :meth:`Series.hist`, :meth:`core.groupby.DataFrameGroupBy.hist`, and :meth:`core.groupby.SeriesGroupBy.hist` have gained the ``legend`` argument. Set to True to show a legend in the histogram. (:issue:`6279`)
322323
- :func:`concat` and :meth:`~DataFrame.append` now preserve extension dtypes, for example
323324
combining a nullable integer column with a numpy integer column will no longer
324-
result in object dtype but preserve the integer dtype (:issue:`33607`, :issue:`34339`).
325+
result in object dtype but preserve the integer dtype (:issue:`33607`, :issue:`34339`, :issue:`34095`).
325326
- :meth:`~pandas.io.gbq.read_gbq` now allows to disable progress bar (:issue:`33360`).
326327
- :meth:`~pandas.io.gbq.read_gbq` now supports the ``max_results`` kwarg from ``pandas-gbq`` (:issue:`34639`).
327328
- :meth:`DataFrame.cov` and :meth:`Series.cov` now support a new parameter ddof to support delta degrees of freedom as in the corresponding numpy methods (:issue:`34611`).
@@ -1124,6 +1125,7 @@ Sparse
11241125
- Bug where :class:`DataFrame` containing :class:`SparseArray` filled with ``NaN`` when indexed by a list-like (:issue:`27781`, :issue:`29563`)
11251126
- The repr of :class:`SparseDtype` now includes the repr of its ``fill_value`` attribute. Previously it used ``fill_value``'s string representation (:issue:`34352`)
11261127
- Bug where empty :class:`DataFrame` could not be cast to :class:`SparseDtype` (:issue:`33113`)
1128+
- Bug in :meth:`arrays.SparseArray` was returning the incorrect type when indexing a sparse dataframe with an iterable (:issue:`34526`, :issue:`34540`)
11271129

11281130
ExtensionArray
11291131
^^^^^^^^^^^^^^

pandas/core/apply.py

+6-8
Original file line numberDiff line numberDiff line change
@@ -291,16 +291,14 @@ def apply_series_generator(self, partial_result=None) -> Tuple[ResType, "Index"]
291291
res_index = res_index.take(successes)
292292

293293
else:
294-
for i, v in series_gen_enumeration:
295-
296-
with option_context("mode.chained_assignment", None):
294+
with option_context("mode.chained_assignment", None):
295+
for i, v in series_gen_enumeration:
297296
# ignore SettingWithCopy here in case the user mutates
298297
results[i] = self.f(v)
299-
300-
if isinstance(results[i], ABCSeries):
301-
# If we have a view on v, we need to make a copy because
302-
# series_generator will swap out the underlying data
303-
results[i] = results[i].copy(deep=False)
298+
if isinstance(results[i], ABCSeries):
299+
# If we have a view on v, we need to make a copy because
300+
# series_generator will swap out the underlying data
301+
results[i] = results[i].copy(deep=False)
304302

305303
return results, res_index
306304

pandas/core/arrays/base.py

+35-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
from pandas.core import ops
2929
from pandas.core.algorithms import _factorize_array, unique
3030
from pandas.core.missing import backfill_1d, pad_1d
31-
from pandas.core.sorting import nargsort
31+
from pandas.core.sorting import nargminmax, nargsort
3232

3333
_extension_array_shared_docs: Dict[str, str] = dict()
3434

@@ -533,6 +533,40 @@ def argsort(
533533
result = nargsort(self, kind=kind, ascending=ascending, na_position="last")
534534
return result
535535

536+
def argmin(self):
537+
"""
538+
Return the index of minimum value.
539+
540+
In case of multiple occurrences of the minimum value, the index
541+
corresponding to the first occurrence is returned.
542+
543+
Returns
544+
-------
545+
int
546+
547+
See Also
548+
--------
549+
ExtensionArray.argmax
550+
"""
551+
return nargminmax(self, "argmin")
552+
553+
def argmax(self):
554+
"""
555+
Return the index of maximum value.
556+
557+
In case of multiple occurrences of the maximum value, the index
558+
corresponding to the first occurrence is returned.
559+
560+
Returns
561+
-------
562+
int
563+
564+
See Also
565+
--------
566+
ExtensionArray.argmin
567+
"""
568+
return nargminmax(self, "argmax")
569+
536570
def fillna(self, value=None, method=None, limit=None):
537571
"""
538572
Fill NA/NaN values using the specified method.

pandas/core/arrays/integer.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -92,10 +92,13 @@ def construct_array_type(cls) -> Type["IntegerArray"]:
9292
return IntegerArray
9393

9494
def _get_common_dtype(self, dtypes: List[DtypeObj]) -> Optional[DtypeObj]:
95-
# for now only handle other integer types
95+
# we only handle nullable EA dtypes and numeric numpy dtypes
9696
if not all(
97-
isinstance(t, _IntegerDtype)
98-
or (isinstance(t, np.dtype) and np.issubdtype(t, np.integer))
97+
isinstance(t, BaseMaskedDtype)
98+
or (
99+
isinstance(t, np.dtype)
100+
and (np.issubdtype(t, np.number) or np.issubdtype(t, np.bool_))
101+
)
99102
for t in dtypes
100103
):
101104
return None

pandas/core/arrays/sparse/array.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -866,11 +866,8 @@ def _take_with_fill(self, indices, fill_value=None) -> np.ndarray:
866866

867867
if self.sp_index.npoints == 0:
868868
# Avoid taking from the empty self.sp_values
869-
taken = np.full(
870-
sp_indexer.shape,
871-
fill_value=fill_value,
872-
dtype=np.result_type(type(fill_value)),
873-
)
869+
_dtype = np.result_type(self.dtype.subtype, type(fill_value))
870+
taken = np.full(sp_indexer.shape, fill_value=fill_value, dtype=_dtype)
874871
else:
875872
taken = self.sp_values.take(sp_indexer)
876873

pandas/core/indexing.py

+4
Original file line numberDiff line numberDiff line change
@@ -1165,6 +1165,10 @@ def _convert_to_indexer(self, key, axis: int, is_setter: bool = False):
11651165
if len(key) == labels.nlevels:
11661166
return {"key": key}
11671167
raise
1168+
except InvalidIndexError:
1169+
# GH35015, using datetime as column indices raises exception
1170+
if not isinstance(labels, ABCMultiIndex):
1171+
raise
11681172
except TypeError:
11691173
pass
11701174
except ValueError:

pandas/core/sorting.py

+27
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,33 @@ def nargsort(
319319
return indexer
320320

321321

322+
def nargminmax(values, method: str):
323+
"""
324+
Implementation of np.argmin/argmax but for ExtensionArray and which
325+
handles missing values.
326+
327+
Parameters
328+
----------
329+
values : ExtensionArray
330+
method : {"argmax", "argmin"}
331+
332+
Returns
333+
-------
334+
int
335+
"""
336+
assert method in {"argmax", "argmin"}
337+
func = np.argmax if method == "argmax" else np.argmin
338+
339+
mask = np.asarray(isna(values))
340+
values = values._values_for_argsort()
341+
342+
idx = np.arange(len(values))
343+
non_nans = values[~mask]
344+
non_nan_idx = idx[~mask]
345+
346+
return non_nan_idx[func(non_nans)]
347+
348+
322349
def ensure_key_mapped_multiindex(index, key: Callable, level=None):
323350
"""
324351
Returns a new MultiIndex in which key has been applied

pandas/tests/arrays/integer/test_concat.py

+43-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import numpy as np
12
import pytest
23

34
import pandas as pd
@@ -15,12 +16,52 @@
1516
(["Int32", "UInt32"], "Int64"),
1617
# this still gives object (awaiting float extension dtype)
1718
(["Int64", "UInt64"], "object"),
19+
(["Int64", "boolean"], "Int64"),
20+
(["UInt8", "boolean"], "UInt8"),
1821
],
1922
)
2023
def test_concat_series(to_concat_dtypes, result_dtype):
2124

22-
result = pd.concat([pd.Series([1, 2, pd.NA], dtype=t) for t in to_concat_dtypes])
23-
expected = pd.concat([pd.Series([1, 2, pd.NA], dtype=object)] * 2).astype(
25+
result = pd.concat([pd.Series([0, 1, pd.NA], dtype=t) for t in to_concat_dtypes])
26+
expected = pd.concat([pd.Series([0, 1, pd.NA], dtype=object)] * 2).astype(
2427
result_dtype
2528
)
2629
tm.assert_series_equal(result, expected)
30+
31+
# order doesn't matter for result
32+
result = pd.concat(
33+
[pd.Series([0, 1, pd.NA], dtype=t) for t in to_concat_dtypes[::-1]]
34+
)
35+
expected = pd.concat([pd.Series([0, 1, pd.NA], dtype=object)] * 2).astype(
36+
result_dtype
37+
)
38+
tm.assert_series_equal(result, expected)
39+
40+
41+
@pytest.mark.parametrize(
42+
"to_concat_dtypes, result_dtype",
43+
[
44+
(["Int64", "int64"], "Int64"),
45+
(["UInt64", "uint64"], "UInt64"),
46+
(["Int8", "int8"], "Int8"),
47+
(["Int8", "int16"], "Int16"),
48+
(["UInt8", "int8"], "Int16"),
49+
(["Int32", "uint32"], "Int64"),
50+
# this still gives object (awaiting float extension dtype)
51+
(["Int64", "uint64"], "object"),
52+
(["Int64", "bool"], "Int64"),
53+
(["UInt8", "bool"], "UInt8"),
54+
],
55+
)
56+
def test_concat_series_with_numpy(to_concat_dtypes, result_dtype):
57+
58+
s1 = pd.Series([0, 1, pd.NA], dtype=to_concat_dtypes[0])
59+
s2 = pd.Series(np.array([0, 1], dtype=to_concat_dtypes[1]))
60+
result = pd.concat([s1, s2], ignore_index=True)
61+
expected = pd.Series([0, 1, pd.NA, 0, 1], dtype=object).astype(result_dtype)
62+
tm.assert_series_equal(result, expected)
63+
64+
# order doesn't matter for result
65+
result = pd.concat([s2, s1], ignore_index=True)
66+
expected = pd.Series([0, 1, 0, 1, pd.NA], dtype=object).astype(result_dtype)
67+
tm.assert_series_equal(result, expected)

pandas/tests/extension/base/methods.py

+36
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,42 @@ def test_argsort_missing(self, data_missing_for_sorting):
7575
expected = pd.Series(np.array([1, -1, 0], dtype=np.int64))
7676
self.assert_series_equal(result, expected)
7777

78+
def test_argmin_argmax(self, data_for_sorting, data_missing_for_sorting, na_value):
79+
# GH 24382
80+
81+
# data_for_sorting -> [B, C, A] with A < B < C
82+
assert data_for_sorting.argmax() == 1
83+
assert data_for_sorting.argmin() == 2
84+
85+
# with repeated values -> first occurence
86+
data = data_for_sorting.take([2, 0, 0, 1, 1, 2])
87+
assert data.argmax() == 3
88+
assert data.argmin() == 0
89+
90+
# with missing values
91+
# data_missing_for_sorting -> [B, NA, A] with A < B and NA missing.
92+
assert data_missing_for_sorting.argmax() == 0
93+
assert data_missing_for_sorting.argmin() == 2
94+
95+
@pytest.mark.parametrize(
96+
"method", ["argmax", "argmin"],
97+
)
98+
def test_argmin_argmax_empty_array(self, method, data):
99+
# GH 24382
100+
err_msg = "attempt to get"
101+
with pytest.raises(ValueError, match=err_msg):
102+
getattr(data[:0], method)()
103+
104+
@pytest.mark.parametrize(
105+
"method", ["argmax", "argmin"],
106+
)
107+
def test_argmin_argmax_all_na(self, method, data, na_value):
108+
# all missing with skipna=True is the same as emtpy
109+
err_msg = "attempt to get"
110+
data_na = type(data)._from_sequence([na_value, na_value], dtype=data.dtype)
111+
with pytest.raises(ValueError, match=err_msg):
112+
getattr(data_na, method)()
113+
78114
@pytest.mark.parametrize(
79115
"na_position, expected",
80116
[

pandas/tests/extension/test_boolean.py

+17
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,23 @@ def test_searchsorted(self, data_for_sorting, as_series):
235235
def test_value_counts(self, all_data, dropna):
236236
return super().test_value_counts(all_data, dropna)
237237

238+
def test_argmin_argmax(self, data_for_sorting, data_missing_for_sorting):
239+
# override because there are only 2 unique values
240+
241+
# data_for_sorting -> [B, C, A] with A < B < C -> here True, True, False
242+
assert data_for_sorting.argmax() == 0
243+
assert data_for_sorting.argmin() == 2
244+
245+
# with repeated values -> first occurence
246+
data = data_for_sorting.take([2, 0, 0, 1, 1, 2])
247+
assert data.argmax() == 1
248+
assert data.argmin() == 0
249+
250+
# with missing values
251+
# data_missing_for_sorting -> [B, NA, A] with A < B and NA missing.
252+
assert data_missing_for_sorting.argmax() == 0
253+
assert data_missing_for_sorting.argmin() == 2
254+
238255

239256
class TestCasting(base.BaseCastingTests):
240257
pass

pandas/tests/extension/test_sparse.py

+8
Original file line numberDiff line numberDiff line change
@@ -321,6 +321,14 @@ def test_shift_0_periods(self, data):
321321
data._sparse_values[0] = data._sparse_values[1]
322322
assert result._sparse_values[0] != result._sparse_values[1]
323323

324+
@pytest.mark.parametrize(
325+
"method", ["argmax", "argmin"],
326+
)
327+
def test_argmin_argmax_all_na(self, method, data, na_value):
328+
# overriding because Sparse[int64, 0] cannot handle na_value
329+
self._check_unsupported(data)
330+
super().test_argmin_argmax_all_na(method, data, na_value)
331+
324332
@pytest.mark.parametrize("box", [pd.array, pd.Series, pd.DataFrame])
325333
def test_equals(self, data, na_value, as_series, box):
326334
self._check_unsupported(data)

pandas/tests/frame/indexing/test_indexing.py

-15
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
notna,
2222
)
2323
import pandas._testing as tm
24-
from pandas.arrays import SparseArray
2524
import pandas.core.common as com
2625
from pandas.core.indexing import IndexingError
2726

@@ -1907,20 +1906,6 @@ def test_getitem_ix_float_duplicates(self):
19071906
expect = df.iloc[[1, -1], 0]
19081907
tm.assert_series_equal(df.loc[0.2, "a"], expect)
19091908

1910-
def test_getitem_sparse_column(self):
1911-
# https://github.com/pandas-dev/pandas/issues/23559
1912-
data = SparseArray([0, 1])
1913-
df = pd.DataFrame({"A": data})
1914-
expected = pd.Series(data, name="A")
1915-
result = df["A"]
1916-
tm.assert_series_equal(result, expected)
1917-
1918-
result = df.iloc[:, 0]
1919-
tm.assert_series_equal(result, expected)
1920-
1921-
result = df.loc[:, "A"]
1922-
tm.assert_series_equal(result, expected)
1923-
19241909
def test_setitem_with_unaligned_tz_aware_datetime_column(self):
19251910
# GH 12981
19261911
# Assignment of unaligned offset-aware datetime series.

0 commit comments

Comments
 (0)