Skip to content

Commit f44d489

Browse files
author
MomIsBestFriend
committed
Merge remote-tracking branch 'upstream/master' into STY-repr-batch-3
2 parents 2d0dd2b + 40fdbea commit f44d489

27 files changed

+220
-296
lines changed

ci/print_skipped.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
def main(filename):
77
if not os.path.isfile(filename):
8-
raise RuntimeError(f"Could not find junit file {filename!r}")
8+
raise RuntimeError(f"Could not find junit file {repr(filename)}")
99

1010
tree = et.parse(filename)
1111
root = tree.getroot()

doc/redirects.csv

-3
Original file line numberDiff line numberDiff line change
@@ -360,7 +360,6 @@ generated/pandas.DataFrame.from_records,../reference/api/pandas.DataFrame.from_r
360360
generated/pandas.DataFrame.ge,../reference/api/pandas.DataFrame.ge
361361
generated/pandas.DataFrame.get,../reference/api/pandas.DataFrame.get
362362
generated/pandas.DataFrame.get_value,../reference/api/pandas.DataFrame.get_value
363-
generated/pandas.DataFrame.get_values,../reference/api/pandas.DataFrame.get_values
364363
generated/pandas.DataFrame.groupby,../reference/api/pandas.DataFrame.groupby
365364
generated/pandas.DataFrame.gt,../reference/api/pandas.DataFrame.gt
366365
generated/pandas.DataFrame.head,../reference/api/pandas.DataFrame.head
@@ -643,7 +642,6 @@ generated/pandas.Index.get_level_values,../reference/api/pandas.Index.get_level_
643642
generated/pandas.Index.get_loc,../reference/api/pandas.Index.get_loc
644643
generated/pandas.Index.get_slice_bound,../reference/api/pandas.Index.get_slice_bound
645644
generated/pandas.Index.get_value,../reference/api/pandas.Index.get_value
646-
generated/pandas.Index.get_values,../reference/api/pandas.Index.get_values
647645
generated/pandas.Index.groupby,../reference/api/pandas.Index.groupby
648646
generated/pandas.Index.has_duplicates,../reference/api/pandas.Index.has_duplicates
649647
generated/pandas.Index.hasnans,../reference/api/pandas.Index.hasnans
@@ -1044,7 +1042,6 @@ generated/pandas.Series.from_csv,../reference/api/pandas.Series.from_csv
10441042
generated/pandas.Series.ge,../reference/api/pandas.Series.ge
10451043
generated/pandas.Series.get,../reference/api/pandas.Series.get
10461044
generated/pandas.Series.get_value,../reference/api/pandas.Series.get_value
1047-
generated/pandas.Series.get_values,../reference/api/pandas.Series.get_values
10481045
generated/pandas.Series.groupby,../reference/api/pandas.Series.groupby
10491046
generated/pandas.Series.gt,../reference/api/pandas.Series.gt
10501047
generated/pandas.Series.hasnans,../reference/api/pandas.Series.hasnans

doc/source/reference/frame.rst

-1
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ Attributes and underlying data
3030
DataFrame.dtypes
3131
DataFrame.select_dtypes
3232
DataFrame.values
33-
DataFrame.get_values
3433
DataFrame.axes
3534
DataFrame.ndim
3635
DataFrame.size

doc/source/reference/indexing.rst

-1
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,6 @@ Selecting
159159
Index.get_loc
160160
Index.get_slice_bound
161161
Index.get_value
162-
Index.get_values
163162
Index.isin
164163
Index.slice_indexer
165164
Index.slice_locs

doc/source/reference/series.rst

-1
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@ Conversion
5353
Series.to_period
5454
Series.to_timestamp
5555
Series.to_list
56-
Series.get_values
5756
Series.__array__
5857

5958
Indexing, iteration

pandas/_libs/missing.pyx

+2-1
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,8 @@ cdef inline bint is_null_period(v):
289289
def _create_binary_propagating_op(name, divmod=False):
290290

291291
def method(self, other):
292-
if other is C_NA or isinstance(other, str) or isinstance(other, numbers.Number):
292+
if (other is C_NA or isinstance(other, str)
293+
or isinstance(other, (numbers.Number, np.bool_))):
293294
if divmod:
294295
return NA, NA
295296
else:

pandas/_libs/reduction.pyx

+1-10
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,6 @@ cdef _check_result_array(object obj, Py_ssize_t cnt):
2626
raise ValueError('Function does not reduce')
2727

2828

29-
cdef bint _is_sparse_array(object obj):
30-
# TODO can be removed one SparseArray.values is removed (GH26421)
31-
if hasattr(obj, '_subtyp'):
32-
if obj._subtyp == 'sparse_array':
33-
return True
34-
return False
35-
36-
3729
cdef class Reducer:
3830
"""
3931
Performs generic reduction operation on a C or Fortran-contiguous ndarray
@@ -404,8 +396,7 @@ cdef class SeriesGrouper(_BaseGrouper):
404396
cdef inline _extract_result(object res, bint squeeze=True):
405397
""" extract the result object, it might be a 0-dim ndarray
406398
or a len-1 0-dim, or a scalar """
407-
if (not _is_sparse_array(res) and hasattr(res, 'values')
408-
and util.is_array(res.values)):
399+
if hasattr(res, 'values') and util.is_array(res.values):
409400
res = res.values
410401
if util.is_array(res):
411402
if res.ndim == 0:

pandas/compat/__init__.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -98,11 +98,11 @@ def is_platform_32bit() -> bool:
9898

9999
def _import_lzma():
100100
"""
101-
Attempts to import the lzma module.
101+
Importing the `lzma` module.
102102
103103
Warns
104104
-----
105-
When the lzma module is not available.
105+
When the `lzma` module is not available.
106106
"""
107107
try:
108108
import lzma
@@ -119,22 +119,22 @@ def _import_lzma():
119119

120120
def _get_lzma_file(lzma):
121121
"""
122-
Attempting to get the lzma.LZMAFile class.
122+
Importing the `LZMAFile` class from the `lzma` module.
123123
124124
Returns
125125
-------
126126
class
127-
The lzma.LZMAFile class.
127+
The `LZMAFile` class from the `lzma` module.
128128
129129
Raises
130130
------
131131
RuntimeError
132-
If the module lzma was not imported correctly, or didn't exist.
132+
If the `lzma` module was not imported correctly, or didn't exist.
133133
"""
134134
if lzma is None:
135135
raise RuntimeError(
136136
"lzma module not available. "
137-
"A Python re-install with the proper "
138-
"dependencies might be required to solve this issue."
137+
"A Python re-install with the proper dependencies, "
138+
"might be required to solve this issue."
139139
)
140140
return lzma.LZMAFile

pandas/core/arrays/boolean.py

+52-43
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
import numbers
2-
from typing import TYPE_CHECKING, Type
2+
from typing import TYPE_CHECKING, Any, Tuple, Type
33
import warnings
44

55
import numpy as np
66

7-
from pandas._libs import lib
7+
from pandas._libs import lib, missing as libmissing
88
from pandas.compat import set_function_name
99

1010
from pandas.core.dtypes.base import ExtensionDtype
@@ -61,13 +61,13 @@ class BooleanDtype(ExtensionDtype):
6161
@property
6262
def na_value(self) -> "Scalar":
6363
"""
64-
BooleanDtype uses :attr:`numpy.nan` as the missing NA value.
64+
BooleanDtype uses :attr:`pandas.NA` as the missing NA value.
6565
6666
.. warning::
6767
6868
`na_value` may change in a future release.
6969
"""
70-
return np.nan
70+
return libmissing.NA
7171

7272
@property
7373
def type(self) -> Type:
@@ -223,7 +223,7 @@ class BooleanArray(ExtensionArray, ExtensionOpsMixin):
223223
224224
>>> pd.array([True, False, None], dtype="boolean")
225225
<BooleanArray>
226-
[True, False, NaN]
226+
[True, False, NA]
227227
Length: 3, dtype: boolean
228228
"""
229229

@@ -262,17 +262,17 @@ def _from_sequence(cls, scalars, dtype=None, copy: bool = False):
262262
values, mask = coerce_to_array(scalars, copy=copy)
263263
return BooleanArray(values, mask)
264264

265+
def _values_for_factorize(self) -> Tuple[np.ndarray, Any]:
266+
data = self._data.astype("int8")
267+
data[self._mask] = -1
268+
return data, -1
269+
265270
@classmethod
266271
def _from_factorized(cls, values, original: "BooleanArray"):
267272
return cls._from_sequence(values, dtype=original.dtype)
268273

269274
def _formatter(self, boxed=False):
270-
def fmt(x):
271-
if isna(x):
272-
return "NaN"
273-
return str(x)
274-
275-
return fmt
275+
return str
276276

277277
def __getitem__(self, item):
278278
if is_integer(item):
@@ -281,25 +281,29 @@ def __getitem__(self, item):
281281
return self._data[item]
282282
return type(self)(self._data[item], self._mask[item])
283283

284-
def _coerce_to_ndarray(self, force_bool: bool = False):
284+
def _coerce_to_ndarray(self, dtype=None, na_value: "Scalar" = libmissing.NA):
285285
"""
286286
Coerce to an ndarary of object dtype or bool dtype (if force_bool=True).
287287
288288
Parameters
289289
----------
290-
force_bool : bool, default False
291-
If True, return bool array or raise error if not possible (in
292-
presence of missing values)
290+
dtype : dtype, default object
291+
The numpy dtype to convert to
292+
na_value : scalar, optional
293+
Scalar missing value indicator to use in numpy array. Defaults
294+
to the native missing value indicator of this array (pd.NA).
293295
"""
294-
if force_bool:
296+
if dtype is None:
297+
dtype = object
298+
if is_bool_dtype(dtype):
295299
if not self.isna().any():
296300
return self._data
297301
else:
298302
raise ValueError(
299303
"cannot convert to bool numpy array in presence of missing values"
300304
)
301-
data = self._data.astype(object)
302-
data[self._mask] = self._na_value
305+
data = self._data.astype(dtype)
306+
data[self._mask] = na_value
303307
return data
304308

305309
__array_priority__ = 1000 # higher than ndarray so ops dispatch to us
@@ -309,15 +313,8 @@ def __array__(self, dtype=None):
309313
the array interface, return my values
310314
We return an object array here to preserve our scalar values
311315
"""
312-
if dtype is not None:
313-
if is_bool_dtype(dtype):
314-
return self._coerce_to_ndarray(force_bool=True)
315-
# TODO can optimize this to not go through object dtype for
316-
# numeric dtypes
317-
arr = self._coerce_to_ndarray()
318-
return arr.astype(dtype, copy=False)
319316
# by default (no dtype specified), return an object array
320-
return self._coerce_to_ndarray()
317+
return self._coerce_to_ndarray(dtype=dtype)
321318

322319
def __arrow_array__(self, type=None):
323320
"""
@@ -483,8 +480,17 @@ def astype(self, dtype, copy=True):
483480
return IntegerArray(
484481
self._data.astype(dtype.numpy_dtype), self._mask.copy(), copy=False
485482
)
483+
# for integer, error if there are missing values
484+
if is_integer_dtype(dtype):
485+
if self.isna().any():
486+
raise ValueError("cannot convert NA to integer")
487+
# for float dtype, ensure we use np.nan before casting (numpy cannot
488+
# deal with pd.NA)
489+
na_value = self._na_value
490+
if is_float_dtype(dtype):
491+
na_value = np.nan
486492
# coerce
487-
data = self._coerce_to_ndarray()
493+
data = self._coerce_to_ndarray(na_value=na_value)
488494
return astype_nansafe(data, dtype, copy=None)
489495

490496
def value_counts(self, dropna=True):
@@ -594,8 +600,6 @@ def logical_method(self, other):
594600

595601
@classmethod
596602
def _create_comparison_method(cls, op):
597-
op_name = op.__name__
598-
599603
def cmp_method(self, other):
600604

601605
if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)):
@@ -617,21 +621,26 @@ def cmp_method(self, other):
617621
if len(self) != len(other):
618622
raise ValueError("Lengths must match to compare")
619623

620-
# numpy will show a DeprecationWarning on invalid elementwise
621-
# comparisons, this will raise in the future
622-
with warnings.catch_warnings():
623-
warnings.filterwarnings("ignore", "elementwise", FutureWarning)
624-
with np.errstate(all="ignore"):
625-
result = op(self._data, other)
626-
627-
# nans propagate
628-
if mask is None:
629-
mask = self._mask
624+
if other is libmissing.NA:
625+
# numpy does not handle pd.NA well as "other" scalar (it returns
626+
# a scalar False instead of an array)
627+
result = np.zeros_like(self._data)
628+
mask = np.ones_like(self._data)
630629
else:
631-
mask = self._mask | mask
630+
# numpy will show a DeprecationWarning on invalid elementwise
631+
# comparisons, this will raise in the future
632+
with warnings.catch_warnings():
633+
warnings.filterwarnings("ignore", "elementwise", FutureWarning)
634+
with np.errstate(all="ignore"):
635+
result = op(self._data, other)
636+
637+
# nans propagate
638+
if mask is None:
639+
mask = self._mask.copy()
640+
else:
641+
mask = self._mask | mask
632642

633-
result[mask] = op_name == "ne"
634-
return BooleanArray(result, np.zeros(len(result), dtype=bool), copy=False)
643+
return BooleanArray(result, mask, copy=False)
635644

636645
name = "__{name}__".format(name=op.__name__)
637646
return set_function_name(cmp_method, name, cls)
@@ -643,7 +652,7 @@ def _reduce(self, name, skipna=True, **kwargs):
643652
# coerce to a nan-aware float if needed
644653
if mask.any():
645654
data = self._data.astype("float64")
646-
data[mask] = self._na_value
655+
data[mask] = np.nan
647656

648657
op = getattr(nanops, "nan" + name)
649658
result = op(data, axis=0, skipna=skipna, mask=mask, **kwargs)

pandas/core/arrays/categorical.py

+3-16
Original file line numberDiff line numberDiff line change
@@ -302,9 +302,7 @@ class Categorical(ExtensionArray, PandasObject):
302302
__array_priority__ = 1000
303303
_dtype = CategoricalDtype(ordered=False)
304304
# tolist is not actually deprecated, just suppressed in the __dir__
305-
_deprecations = PandasObject._deprecations | frozenset(
306-
["tolist", "itemsize", "get_values"]
307-
)
305+
_deprecations = PandasObject._deprecations | frozenset(["tolist", "itemsize"])
308306
_typ = "categorical"
309307

310308
def __init__(
@@ -1461,29 +1459,18 @@ def value_counts(self, dropna=True):
14611459

14621460
return Series(count, index=CategoricalIndex(ix), dtype="int64")
14631461

1464-
def get_values(self):
1462+
def _internal_get_values(self):
14651463
"""
14661464
Return the values.
14671465
1468-
.. deprecated:: 0.25.0
1469-
14701466
For internal compatibility with pandas formatting.
14711467
14721468
Returns
14731469
-------
1474-
numpy.array
1470+
np.ndarray or Index
14751471
A numpy array of the same dtype as categorical.categories.dtype or
14761472
Index if datetime / periods.
14771473
"""
1478-
warn(
1479-
"The 'get_values' method is deprecated and will be removed in a "
1480-
"future version",
1481-
FutureWarning,
1482-
stacklevel=2,
1483-
)
1484-
return self._internal_get_values()
1485-
1486-
def _internal_get_values(self):
14871474
# if we are a datetime and period index, return Index to keep metadata
14881475
if needs_i8_conversion(self.categories):
14891476
return self.categories.take(self._codes, fill_value=np.nan)

0 commit comments

Comments
 (0)