Skip to content

Commit 81b1016

Browse files
Merge remote-tracking branch 'upstream/master' into refactop-manager-ops
2 parents c454025 + 9f792cd commit 81b1016

31 files changed

+453
-376
lines changed

doc/make.py

+14
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,14 @@ def __init__(
4141
self,
4242
num_jobs=0,
4343
include_api=True,
44+
whatsnew=False,
4445
single_doc=None,
4546
verbosity=0,
4647
warnings_are_errors=False,
4748
):
4849
self.num_jobs = num_jobs
4950
self.include_api = include_api
51+
self.whatsnew = whatsnew
5052
self.verbosity = verbosity
5153
self.warnings_are_errors = warnings_are_errors
5254

@@ -56,6 +58,8 @@ def __init__(
5658
os.environ["SPHINX_PATTERN"] = single_doc
5759
elif not include_api:
5860
os.environ["SPHINX_PATTERN"] = "-api"
61+
elif whatsnew:
62+
os.environ["SPHINX_PATTERN"] = "whatsnew"
5963

6064
self.single_doc_html = None
6165
if single_doc and single_doc.endswith(".rst"):
@@ -235,6 +239,9 @@ def html(self):
235239
self._open_browser(self.single_doc_html)
236240
else:
237241
self._add_redirects()
242+
if self.whatsnew:
243+
self._open_browser(os.path.join("whatsnew", "index.html"))
244+
238245
return ret_code
239246

240247
def latex(self, force=False):
@@ -302,6 +309,12 @@ def main():
302309
argparser.add_argument(
303310
"--no-api", default=False, help="omit api and autosummary", action="store_true"
304311
)
312+
argparser.add_argument(
313+
"--whatsnew",
314+
default=False,
315+
help="only build whatsnew (and api for links)",
316+
action="store_true",
317+
)
305318
argparser.add_argument(
306319
"--single",
307320
metavar="FILENAME",
@@ -353,6 +366,7 @@ def main():
353366
builder = DocBuilder(
354367
args.num_jobs,
355368
not args.no_api,
369+
args.whatsnew,
356370
args.single,
357371
args.verbosity,
358372
args.warnings_are_errors,

doc/source/conf.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,8 @@
9191
# (e.g. '10min.rst' or 'pandas.DataFrame.head')
9292
source_path = os.path.dirname(os.path.abspath(__file__))
9393
pattern = os.environ.get("SPHINX_PATTERN")
94-
single_doc = pattern is not None and pattern != "-api"
95-
include_api = pattern != "-api"
94+
single_doc = pattern is not None and pattern not in ("-api", "whatsnew")
95+
include_api = pattern is None or pattern == "whatsnew"
9696
if pattern:
9797
for dirname, dirs, fnames in os.walk(source_path):
9898
reldir = os.path.relpath(dirname, source_path)
@@ -104,7 +104,13 @@
104104
continue
105105
elif pattern == "-api" and reldir.startswith("reference"):
106106
exclude_patterns.append(fname)
107-
elif pattern != "-api" and fname != pattern:
107+
elif (
108+
pattern == "whatsnew"
109+
and not reldir.startswith("reference")
110+
and reldir != "whatsnew"
111+
):
112+
exclude_patterns.append(fname)
113+
elif single_doc and fname != pattern:
108114
exclude_patterns.append(fname)
109115

110116
with open(os.path.join(source_path, "index.rst.template")) as f:

doc/source/development/contributing.rst

+4
Original file line numberDiff line numberDiff line change
@@ -604,6 +604,10 @@ reducing the turn-around time for checking your changes.
604604
python make.py clean
605605
python make.py --single pandas.DataFrame.join
606606

607+
# compile whatsnew and API section (to resolve links in the whatsnew)
608+
python make.py clean
609+
python make.py --whatsnew
610+
607611
For comparison, a full documentation build may take 15 minutes, but a single
608612
section may take 15 seconds. Subsequent builds, which only process portions
609613
you have changed, will be faster.

doc/source/user_guide/visualization.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -665,7 +665,7 @@ given by column ``z``. The bins are aggregated with NumPy's ``max`` function.
665665
.. ipython:: python
666666
667667
df = pd.DataFrame(np.random.randn(1000, 2), columns=["a", "b"])
668-
df["b"] = df["b"] = df["b"] + np.arange(1000)
668+
df["b"] = df["b"] + np.arange(1000)
669669
df["z"] = np.random.uniform(0, 3, 1000)
670670
671671
@savefig hexbin_plot_agg.png

doc/source/whatsnew/v1.3.0.rst

+4
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,7 @@ I/O
390390
- Bug in :func:`read_json` when ``orient="split"`` does not maintain numeric string index (:issue:`28556`)
391391
- :meth:`read_sql` returned an empty generator if ``chunksize`` was no-zero and the query returned no results. Now returns a generator with a single empty dataframe (:issue:`34411`)
392392
- Bug in :func:`read_hdf` returning unexpected records when filtering on categorical string columns using ``where`` parameter (:issue:`39189`)
393+
- Bug in :func:`read_sas` raising ``ValueError`` when ``datetimes`` were null (:issue:`39725`)
393394

394395
Period
395396
^^^^^^
@@ -430,6 +431,7 @@ Reshaping
430431
- Bug in :meth:`DataFrame.apply` would give incorrect results when used with a string argument and ``axis=1`` when the axis argument was not supported and now raises a ``ValueError`` instead (:issue:`39211`)
431432
- Bug in :meth:`DataFrame.sort_values` not reshaping index correctly after sorting on columns, when ``ignore_index=True`` (:issue:`39464`)
432433
- Bug in :meth:`DataFrame.append` returning incorrect dtypes with combinations of ``ExtensionDtype`` dtypes (:issue:`39454`)
434+
- Bug in :meth:`DataFrame.append` returning incorrect dtypes with combinations of ``datetime64`` and ``timedelta64`` dtypes (:issue:`39574`)
433435

434436
Sparse
435437
^^^^^^
@@ -448,6 +450,8 @@ ExtensionArray
448450
Other
449451
^^^^^
450452
- Bug in :class:`Index` constructor sometimes silently ignorning a specified ``dtype`` (:issue:`38879`)
453+
- Bug in :func:`pandas.api.types.infer_dtype` not recognizing Series, Index or array with a period dtype (:issue:`23553`)
454+
- Bug in :func:`pandas.api.types.infer_dtype` raising an error for general :class:`.ExtensionArray` objects. It will now return ``"unknown-array"`` instead of raising (:issue:`37367`)
451455
- Bug in constructing a :class:`Series` from a list and a :class:`PandasDtype` (:issue:`39357`)
452456
- Bug in :class:`Styler` which caused CSS to duplicate on multiple renders. (:issue:`39395`)
453457
- ``inspect.getmembers(Series)`` no longer raises an ``AbstractMethodError`` (:issue:`38782`)

pandas/_libs/lib.pyx

+12-8
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ from pandas._libs cimport util
6969
from pandas._libs.util cimport INT64_MAX, INT64_MIN, UINT64_MAX, is_nan
7070

7171
from pandas._libs.tslib import array_to_datetime
72+
from pandas._libs.tslibs.period import Period
7273

7374
from pandas._libs.missing cimport (
7475
C_NA,
@@ -1082,6 +1083,7 @@ _TYPE_MAP = {
10821083
"timedelta64[ns]": "timedelta64",
10831084
"m": "timedelta64",
10841085
"interval": "interval",
1086+
Period: "period",
10851087
}
10861088

10871089
# types only exist on certain platform
@@ -1233,8 +1235,8 @@ cdef object _try_infer_map(object dtype):
12331235
cdef:
12341236
object val
12351237
str attr
1236-
for attr in ["name", "kind", "base"]:
1237-
val = getattr(dtype, attr)
1238+
for attr in ["name", "kind", "base", "type"]:
1239+
val = getattr(dtype, attr, None)
12381240
if val in _TYPE_MAP:
12391241
return _TYPE_MAP[val]
12401242
return None
@@ -1275,6 +1277,7 @@ def infer_dtype(value: object, skipna: bool = True) -> str:
12751277
- time
12761278
- period
12771279
- mixed
1280+
- unknown-array
12781281

12791282
Raises
12801283
------
@@ -1287,6 +1290,9 @@ def infer_dtype(value: object, skipna: bool = True) -> str:
12871290
specialized
12881291
- 'mixed-integer-float' are floats and integers
12891292
- 'mixed-integer' are integers mixed with non-integers
1293+
- 'unknown-array' is the catchall for something that *is* an array (has
1294+
a dtype attribute), but has a dtype unknown to pandas (e.g. external
1295+
extension array)
12901296

12911297
Examples
12921298
--------
@@ -1355,12 +1361,10 @@ def infer_dtype(value: object, skipna: bool = True) -> str:
13551361
# e.g. categoricals
13561362
dtype = value.dtype
13571363
if not isinstance(dtype, np.dtype):
1358-
value = _try_infer_map(value.dtype)
1359-
if value is not None:
1360-
return value
1361-
1362-
# its ndarray-like but we can't handle
1363-
raise ValueError(f"cannot infer type for {type(value)}")
1364+
inferred = _try_infer_map(value.dtype)
1365+
if inferred is not None:
1366+
return inferred
1367+
return "unknown-array"
13641368

13651369
# Unwrap Series/Index
13661370
values = np.asarray(value)

pandas/core/array_algos/putmask.py

+58-3
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,24 @@
11
"""
22
EA-compatible analogue to to np.putmask
33
"""
4-
from typing import Any
4+
from typing import Any, Tuple
55
import warnings
66

77
import numpy as np
88

99
from pandas._libs import lib
1010
from pandas._typing import ArrayLike
1111

12-
from pandas.core.dtypes.cast import convert_scalar_for_putitemlike, find_common_type
12+
from pandas.core.dtypes.cast import (
13+
convert_scalar_for_putitemlike,
14+
find_common_type,
15+
infer_dtype_from,
16+
)
1317
from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype, is_list_like
1418
from pandas.core.dtypes.missing import isna_compat
1519

20+
from pandas.core.arrays import ExtensionArray
21+
1622

1723
def putmask_inplace(values: ArrayLike, mask: np.ndarray, value: Any) -> None:
1824
"""
@@ -22,7 +28,7 @@ def putmask_inplace(values: ArrayLike, mask: np.ndarray, value: Any) -> None:
2228
Parameters
2329
----------
2430
mask : np.ndarray[bool]
25-
We assume _extract_bool_array has already been called.
31+
We assume extract_bool_array has already been called.
2632
value : Any
2733
"""
2834

@@ -152,3 +158,52 @@ def putmask_without_repeat(values: np.ndarray, mask: np.ndarray, new: Any) -> No
152158
raise ValueError("cannot assign mismatch length to masked array")
153159
else:
154160
np.putmask(values, mask, new)
161+
162+
163+
def validate_putmask(values: ArrayLike, mask: np.ndarray) -> Tuple[np.ndarray, bool]:
164+
"""
165+
Validate mask and check if this putmask operation is a no-op.
166+
"""
167+
mask = extract_bool_array(mask)
168+
if mask.shape != values.shape:
169+
raise ValueError("putmask: mask and data must be the same size")
170+
171+
noop = not mask.any()
172+
return mask, noop
173+
174+
175+
def extract_bool_array(mask: ArrayLike) -> np.ndarray:
176+
"""
177+
If we have a SparseArray or BooleanArray, convert it to ndarray[bool].
178+
"""
179+
if isinstance(mask, ExtensionArray):
180+
# We could have BooleanArray, Sparse[bool], ...
181+
# Except for BooleanArray, this is equivalent to just
182+
# np.asarray(mask, dtype=bool)
183+
mask = mask.to_numpy(dtype=bool, na_value=False)
184+
185+
mask = np.asarray(mask, dtype=bool)
186+
return mask
187+
188+
189+
def setitem_datetimelike_compat(values: np.ndarray, num_set: int, other):
190+
"""
191+
Parameters
192+
----------
193+
values : np.ndarray
194+
num_set : int
195+
For putmask, this is mask.sum()
196+
other : Any
197+
"""
198+
if values.dtype == object:
199+
dtype, _ = infer_dtype_from(other, pandas_dtype=True)
200+
201+
if isinstance(dtype, np.dtype) and dtype.kind in ["m", "M"]:
202+
# https://github.com/numpy/numpy/issues/12550
203+
# timedelta64 will incorrectly cast to int
204+
if not is_list_like(other):
205+
other = [other] * num_set
206+
else:
207+
other = list(other)
208+
209+
return other

pandas/core/dtypes/cast.py

+22-21
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424

2525
import numpy as np
2626

27-
from pandas._libs import lib, missing as libmissing, tslib
27+
from pandas._libs import lib, tslib
2828
from pandas._libs.tslibs import (
2929
NaT,
3030
OutOfBoundsDatetime,
@@ -86,7 +86,12 @@
8686
ABCSeries,
8787
)
8888
from pandas.core.dtypes.inference import is_list_like
89-
from pandas.core.dtypes.missing import is_valid_na_for_dtype, isna, notna
89+
from pandas.core.dtypes.missing import (
90+
is_valid_na_for_dtype,
91+
isna,
92+
na_value_for_dtype,
93+
notna,
94+
)
9095

9196
if TYPE_CHECKING:
9297
from pandas import Series
@@ -529,16 +534,26 @@ def maybe_promote(dtype: np.dtype, fill_value=np.nan):
529534
dtype = np.dtype(object)
530535
return dtype, fill_value
531536

537+
kinds = ["i", "u", "f", "c", "m", "M"]
538+
if is_valid_na_for_dtype(fill_value, dtype) and dtype.kind in kinds:
539+
dtype = ensure_dtype_can_hold_na(dtype)
540+
fv = na_value_for_dtype(dtype)
541+
return dtype, fv
542+
543+
elif isna(fill_value):
544+
dtype = np.dtype(object)
545+
if fill_value is None:
546+
# but we retain e.g. pd.NA
547+
fill_value = np.nan
548+
return dtype, fill_value
549+
532550
# returns tuple of (dtype, fill_value)
533551
if issubclass(dtype.type, np.datetime64):
534552
if isinstance(fill_value, datetime) and fill_value.tzinfo is not None:
535553
# Trying to insert tzaware into tznaive, have to cast to object
536554
dtype = np.dtype(np.object_)
537-
elif is_integer(fill_value) or (is_float(fill_value) and not isna(fill_value)):
555+
elif is_integer(fill_value) or is_float(fill_value):
538556
dtype = np.dtype(np.object_)
539-
elif is_valid_na_for_dtype(fill_value, dtype):
540-
# e.g. pd.NA, which is not accepted by Timestamp constructor
541-
fill_value = np.datetime64("NaT", "ns")
542557
else:
543558
try:
544559
fill_value = Timestamp(fill_value).to_datetime64()
@@ -547,14 +562,11 @@ def maybe_promote(dtype: np.dtype, fill_value=np.nan):
547562
elif issubclass(dtype.type, np.timedelta64):
548563
if (
549564
is_integer(fill_value)
550-
or (is_float(fill_value) and not np.isnan(fill_value))
565+
or is_float(fill_value)
551566
or isinstance(fill_value, str)
552567
):
553568
# TODO: What about str that can be a timedelta?
554569
dtype = np.dtype(np.object_)
555-
elif is_valid_na_for_dtype(fill_value, dtype):
556-
# e.g pd.NA, which is not accepted by the Timedelta constructor
557-
fill_value = np.timedelta64("NaT", "ns")
558570
else:
559571
try:
560572
fv = Timedelta(fill_value)
@@ -615,17 +627,6 @@ def maybe_promote(dtype: np.dtype, fill_value=np.nan):
615627
# e.g. mst is np.complex128 and dtype is np.complex64
616628
dtype = mst
617629

618-
elif fill_value is None or fill_value is libmissing.NA:
619-
# Note: we already excluded dt64/td64 dtypes above
620-
if is_float_dtype(dtype) or is_complex_dtype(dtype):
621-
fill_value = np.nan
622-
elif is_integer_dtype(dtype):
623-
dtype = np.dtype(np.float64)
624-
fill_value = np.nan
625-
else:
626-
dtype = np.dtype(np.object_)
627-
if fill_value is not libmissing.NA:
628-
fill_value = np.nan
629630
else:
630631
dtype = np.dtype(np.object_)
631632

pandas/core/dtypes/concat.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def _cast_to_common_type(arr: ArrayLike, dtype: DtypeObj) -> ArrayLike:
6161
return arr.astype(dtype, copy=False)
6262

6363

64-
def concat_compat(to_concat, axis: int = 0):
64+
def concat_compat(to_concat, axis: int = 0, ea_compat_axis: bool = False):
6565
"""
6666
provide concatenation of an array of arrays each of which is a single
6767
'normalized' dtypes (in that for example, if it's object, then it is a
@@ -72,6 +72,9 @@ def concat_compat(to_concat, axis: int = 0):
7272
----------
7373
to_concat : array of arrays
7474
axis : axis to provide concatenation
75+
ea_compat_axis : bool, default False
76+
For ExtensionArray compat, behave as if axis == 1 when determining
77+
whether to drop empty arrays.
7578
7679
Returns
7780
-------
@@ -91,7 +94,8 @@ def is_nonempty(x) -> bool:
9194
# marginal given that it would still require shape & dtype calculation and
9295
# np.concatenate which has them both implemented is compiled.
9396
non_empties = [x for x in to_concat if is_nonempty(x)]
94-
if non_empties and axis == 0:
97+
if non_empties and axis == 0 and not ea_compat_axis:
98+
# ea_compat_axis see GH#39574
9599
to_concat = non_empties
96100

97101
kinds = {obj.dtype.kind for obj in to_concat}

0 commit comments

Comments
 (0)