Skip to content

Commit 31bef27

Browse files
committed
Merge branch 'main' into bug-gb-cat
2 parents 2c6e10d + a5d1bb4 commit 31bef27

30 files changed

+411
-137
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ repos:
9292
args: [--disable=all, --enable=redefined-outer-name]
9393
stages: [manual]
9494
- repo: https://github.com/PyCQA/isort
95-
rev: 5.11.4
95+
rev: 5.12.0
9696
hooks:
9797
- id: isort
9898
- repo: https://github.com/asottile/pyupgrade

ci/code_checks.sh

Lines changed: 68 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
8383
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX04,GL01,GL02,GL03,GL04,GL05,GL06,GL07,GL09,GL10,PR03,PR04,PR05,PR06,PR08,PR09,PR10,RT01,RT02,RT04,RT05,SA02,SA03,SA04,SS01,SS02,SS03,SS04,SS05,SS06
8484
RET=$(($RET + $?)) ; echo $MSG "DONE"
8585

86-
MSG='Partially validate docstrings (EX01)' ; echo $MSG
86+
MSG='Partially validate docstrings (EX01)' ; echo $MSG
8787
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX01 --ignore_functions \
8888
pandas.Series.index \
8989
pandas.Series.dtype \
@@ -574,7 +574,73 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
574574
pandas.DataFrame.sparse.to_coo \
575575
pandas.DataFrame.to_gbq \
576576
pandas.DataFrame.style \
577-
pandas.DataFrame.__dataframe__ \
577+
pandas.DataFrame.__dataframe__
578+
RET=$(($RET + $?)) ; echo $MSG "DONE"
579+
580+
MSG='Partially validate docstrings (EX02)' ; echo $MSG
581+
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX02 --ignore_functions \
582+
pandas.DataFrame.copy \
583+
pandas.DataFrame.plot.line \
584+
pandas.DataFrame.std \
585+
pandas.DataFrame.var \
586+
pandas.Index.factorize \
587+
pandas.Period.strftime \
588+
pandas.Series.copy \
589+
pandas.Series.factorize \
590+
pandas.Series.floordiv \
591+
pandas.Series.plot.line \
592+
pandas.Series.rfloordiv \
593+
pandas.Series.sparse.density \
594+
pandas.Series.sparse.npoints \
595+
pandas.Series.sparse.sp_values \
596+
pandas.Series.std \
597+
pandas.Series.var \
598+
pandas.Timestamp.fromtimestamp \
599+
pandas.api.types.infer_dtype \
600+
pandas.api.types.is_bool_dtype \
601+
pandas.api.types.is_categorical_dtype \
602+
pandas.api.types.is_complex_dtype \
603+
pandas.api.types.is_datetime64_any_dtype \
604+
pandas.api.types.is_datetime64_dtype \
605+
pandas.api.types.is_datetime64_ns_dtype \
606+
pandas.api.types.is_datetime64tz_dtype \
607+
pandas.api.types.is_dict_like \
608+
pandas.api.types.is_file_like \
609+
pandas.api.types.is_float_dtype \
610+
pandas.api.types.is_hashable \
611+
pandas.api.types.is_int64_dtype \
612+
pandas.api.types.is_integer_dtype \
613+
pandas.api.types.is_interval_dtype \
614+
pandas.api.types.is_iterator \
615+
pandas.api.types.is_list_like \
616+
pandas.api.types.is_named_tuple \
617+
pandas.api.types.is_numeric_dtype \
618+
pandas.api.types.is_object_dtype \
619+
pandas.api.types.is_period_dtype \
620+
pandas.api.types.is_re \
621+
pandas.api.types.is_re_compilable \
622+
pandas.api.types.is_signed_integer_dtype \
623+
pandas.api.types.is_sparse \
624+
pandas.api.types.is_string_dtype \
625+
pandas.api.types.is_timedelta64_dtype \
626+
pandas.api.types.is_timedelta64_ns_dtype \
627+
pandas.api.types.is_unsigned_integer_dtype \
628+
pandas.core.groupby.DataFrameGroupBy.take \
629+
pandas.core.groupby.SeriesGroupBy.take \
630+
pandas.factorize \
631+
pandas.io.formats.style.Styler.concat \
632+
pandas.io.formats.style.Styler.export \
633+
pandas.io.formats.style.Styler.set_td_classes \
634+
pandas.io.formats.style.Styler.use \
635+
pandas.io.json.build_table_schema \
636+
pandas.merge_ordered \
637+
pandas.option_context \
638+
pandas.plotting.andrews_curves \
639+
pandas.plotting.autocorrelation_plot \
640+
pandas.plotting.lag_plot \
641+
pandas.plotting.parallel_coordinates \
642+
pandas.plotting.radviz \
643+
pandas.tseries.frequencies.to_offset
578644
RET=$(($RET + $?)) ; echo $MSG "DONE"
579645

580646
fi

doc/source/reference/frame.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ Binary operator functions
8383
.. autosummary::
8484
:toctree: api/
8585

86+
DataFrame.__add__
8687
DataFrame.add
8788
DataFrame.sub
8889
DataFrame.mul

doc/source/whatsnew/v2.0.0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -639,6 +639,7 @@ Deprecations
639639
- :meth:`Index.is_categorical` has been deprecated. Use :func:`pandas.api.types.is_categorical_dtype` instead (:issue:`50042`)
640640
- :meth:`Index.is_object` has been deprecated. Use :func:`pandas.api.types.is_object_dtype` instead (:issue:`50042`)
641641
- :meth:`Index.is_interval` has been deprecated. Use :func:`pandas.api.types.is_intterval_dtype` instead (:issue:`50042`)
642+
- Deprecated ``all`` and ``any`` reductions with ``datetime64`` and :class:`DatetimeTZDtype` dtypes, use e.g. ``(obj != pd.Timestamp(0), tz=obj.tz).all()`` instead (:issue:`34479`)
642643
-
643644

644645
.. ---------------------------------------------------------------------------
@@ -1020,6 +1021,7 @@ Conversion
10201021
- Bug in :class:`.arrays.ArrowExtensionArray` that would raise ``NotImplementedError`` when passed a sequence of strings or binary (:issue:`49172`)
10211022
- Bug in :meth:`Series.astype` raising ``pyarrow.ArrowInvalid`` when converting from a non-pyarrow string dtype to a pyarrow numeric type (:issue:`50430`)
10221023
- Bug in :meth:`Series.to_numpy` converting to NumPy array before applying ``na_value`` (:issue:`48951`)
1024+
- Bug in :meth:`DataFrame.astype` not copying data when converting to pyarrow dtype (:issue:`50984`)
10231025
- Bug in :func:`to_datetime` was not respecting ``exact`` argument when ``format`` was an ISO8601 format (:issue:`12649`)
10241026
- Bug in :meth:`TimedeltaArray.astype` raising ``TypeError`` when converting to a pyarrow duration type (:issue:`49795`)
10251027
-

pandas/core/arraylike.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,92 @@ def _arith_method(self, other, op):
9797

9898
@unpack_zerodim_and_defer("__add__")
9999
def __add__(self, other):
100+
"""
101+
Get Addition of DataFrame and other, column-wise.
102+
103+
Equivalent to ``DataFrame.add(other)``.
104+
105+
Parameters
106+
----------
107+
other : scalar, sequence, Series, dict or DataFrame
108+
Object to be added to the DataFrame.
109+
110+
Returns
111+
-------
112+
DataFrame
113+
The result of adding ``other`` to DataFrame.
114+
115+
See Also
116+
--------
117+
DataFrame.add : Add a DataFrame and another object, with option for index-
118+
or column-oriented addition.
119+
120+
Examples
121+
--------
122+
>>> df = pd.DataFrame({'height': [1.5, 2.6], 'weight': [500, 800]},
123+
... index=['elk', 'moose'])
124+
>>> df
125+
height weight
126+
elk 1.5 500
127+
moose 2.6 800
128+
129+
Adding a scalar affects all rows and columns.
130+
131+
>>> df[['height', 'weight']] + 1.5
132+
height weight
133+
elk 3.0 501.5
134+
moose 4.1 801.5
135+
136+
Each element of a list is added to a column of the DataFrame, in order.
137+
138+
>>> df[['height', 'weight']] + [0.5, 1.5]
139+
height weight
140+
elk 2.0 501.5
141+
moose 3.1 801.5
142+
143+
Keys of a dictionary are aligned to the DataFrame, based on column names;
144+
each value in the dictionary is added to the corresponding column.
145+
146+
>>> df[['height', 'weight']] + {'height': 0.5, 'weight': 1.5}
147+
height weight
148+
elk 2.0 501.5
149+
moose 3.1 801.5
150+
151+
When `other` is a :class:`Series`, the index of `other` is aligned with the
152+
columns of the DataFrame.
153+
154+
>>> s1 = pd.Series([0.5, 1.5], index=['weight', 'height'])
155+
>>> df[['height', 'weight']] + s1
156+
height weight
157+
elk 3.0 500.5
158+
moose 4.1 800.5
159+
160+
Even when the index of `other` is the same as the index of the DataFrame,
161+
the :class:`Series` will not be reoriented. If index-wise alignment is desired,
162+
:meth:`DataFrame.add` should be used with `axis='index'`.
163+
164+
>>> s2 = pd.Series([0.5, 1.5], index=['elk', 'moose'])
165+
>>> df[['height', 'weight']] + s2
166+
elk height moose weight
167+
elk NaN NaN NaN NaN
168+
moose NaN NaN NaN NaN
169+
170+
>>> df[['height', 'weight']].add(s2, axis='index')
171+
height weight
172+
elk 2.0 500.5
173+
moose 4.1 801.5
174+
175+
When `other` is a :class:`DataFrame`, both columns names and the
176+
index are aligned.
177+
178+
>>> other = pd.DataFrame({'height': [0.2, 0.4, 0.6]},
179+
... index=['elk', 'moose', 'deer'])
180+
>>> df[['height', 'weight']] + other
181+
height weight
182+
deer NaN NaN
183+
elk 1.7 NaN
184+
moose 3.0 NaN
185+
"""
100186
return self._arith_method(other, operator.add)
101187

102188
@unpack_zerodim_and_defer("__radd__")

pandas/core/arrays/arrow/array.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
from copy import deepcopy
34
from typing import (
45
TYPE_CHECKING,
56
Any,
@@ -220,6 +221,9 @@ def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = Fal
220221
if isinstance(scalars, cls):
221222
scalars = scalars._data
222223
elif not isinstance(scalars, (pa.Array, pa.ChunkedArray)):
224+
if copy and is_array_like(scalars):
225+
# pa array should not get updated when numpy array is updated
226+
scalars = deepcopy(scalars)
223227
try:
224228
scalars = pa.array(scalars, type=pa_dtype, from_pandas=True)
225229
except pa.ArrowInvalid:

pandas/core/arrays/datetimelike.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2034,11 +2034,12 @@ def ceil(
20342034
# Reductions
20352035

20362036
def any(self, *, axis: AxisInt | None = None, skipna: bool = True) -> bool:
2037-
# GH#34479 discussion of desired behavior long-term
2037+
# GH#34479 the nanops call will issue a FutureWarning for non-td64 dtype
20382038
return nanops.nanany(self._ndarray, axis=axis, skipna=skipna, mask=self.isna())
20392039

20402040
def all(self, *, axis: AxisInt | None = None, skipna: bool = True) -> bool:
2041-
# GH#34479 discussion of desired behavior long-term
2041+
# GH#34479 the nanops call will issue a FutureWarning for non-td64 dtype
2042+
20422043
return nanops.nanall(self._ndarray, axis=axis, skipna=skipna, mask=self.isna())
20432044

20442045
# --------------------------------------------------------------

pandas/core/nanops.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
npt,
3333
)
3434
from pandas.compat._optional import import_optional_dependency
35+
from pandas.util._exceptions import find_stack_level
3536

3637
from pandas.core.dtypes.common import (
3738
is_any_int_dtype,
@@ -529,6 +530,15 @@ def nanany(
529530
>>> nanops.nanany(s)
530531
False
531532
"""
533+
if needs_i8_conversion(values.dtype) and values.dtype.kind != "m":
534+
# GH#34479
535+
warnings.warn(
536+
"'any' with datetime64 dtypes is deprecated and will raise in a "
537+
"future version. Use (obj != pd.Timestamp(0)).any() instead.",
538+
FutureWarning,
539+
stacklevel=find_stack_level(),
540+
)
541+
532542
values, _, _, _, _ = _get_values(values, skipna, fill_value=False, mask=mask)
533543

534544
# For object type, any won't necessarily return
@@ -575,6 +585,15 @@ def nanall(
575585
>>> nanops.nanall(s)
576586
False
577587
"""
588+
if needs_i8_conversion(values.dtype) and values.dtype.kind != "m":
589+
# GH#34479
590+
warnings.warn(
591+
"'all' with datetime64 dtypes is deprecated and will raise in a "
592+
"future version. Use (obj != pd.Timestamp(0)).all() instead.",
593+
FutureWarning,
594+
stacklevel=find_stack_level(),
595+
)
596+
578597
values, _, _, _, _ = _get_values(values, skipna, fill_value=True, mask=mask)
579598

580599
# For object type, all won't necessarily return

pandas/core/shared_docs.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -444,8 +444,8 @@
444444
a reproducible gzip archive:
445445
``compression={'method': 'gzip', 'compresslevel': 1, 'mtime': 1}``.
446446
447-
.. versionadded:: 1.5.0
448-
Added support for `.tar` files."""
447+
.. versionadded:: 1.5.0
448+
Added support for `.tar` files."""
449449

450450
_shared_docs[
451451
"decompression_options"
@@ -466,8 +466,8 @@
466466
custom compression dictionary:
467467
``compression={'method': 'zstd', 'dict_data': my_compression_dict}``.
468468
469-
.. versionadded:: 1.5.0
470-
Added support for `.tar` files."""
469+
.. versionadded:: 1.5.0
470+
Added support for `.tar` files."""
471471

472472
_shared_docs[
473473
"replace"

pandas/core/tools/numeric.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,16 @@ def to_numeric(
9191
set to True, nullable dtypes are used for all dtypes that have a nullable
9292
implementation, even if no nulls are present.
9393
94+
.. note::
95+
96+
The nullable dtype implementation can be configured by calling
97+
``pd.set_option("mode.dtype_backend", "pandas")`` to use
98+
numpy-backed nullable dtypes or
99+
``pd.set_option("mode.dtype_backend", "pyarrow")`` to use
100+
pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).
101+
102+
.. versionadded:: 2.0.0
103+
94104
Returns
95105
-------
96106
ret

pandas/io/clipboards.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,15 @@ def read_clipboard(
3636
set to True, nullable dtypes are used for all dtypes that have a nullable
3737
implementation, even if no nulls are present.
3838
39-
The nullable dtype implementation can be configured by calling
40-
``pd.set_option("mode.dtype_backend", "pandas")`` to use
41-
numpy-backed nullable dtypes or
42-
``pd.set_option("mode.dtype_backend", "pyarrow")`` to use
43-
pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).
44-
This is only implemented for the ``python``
45-
engine.
39+
.. note::
40+
41+
The nullable dtype implementation can be configured by calling
42+
``pd.set_option("mode.dtype_backend", "pandas")`` to use
43+
numpy-backed nullable dtypes or
44+
``pd.set_option("mode.dtype_backend", "pyarrow")`` to use
45+
pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).
46+
This is only implemented for the ``python``
47+
engine.
4648
4749
.. versionadded:: 2.0
4850

pandas/io/excel/_base.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,14 @@
278278
set to True, nullable dtypes are used for all dtypes that have a nullable
279279
implementation, even if no nulls are present. Dtype takes precedence if given.
280280
281+
.. note::
282+
283+
The nullable dtype implementation can be configured by calling
284+
``pd.set_option("mode.dtype_backend", "pandas")`` to use
285+
numpy-backed nullable dtypes or
286+
``pd.set_option("mode.dtype_backend", "pyarrow")`` to use
287+
pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).
288+
281289
.. versionadded:: 2.0
282290
283291
Returns

pandas/io/feather_format.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -131,11 +131,13 @@ def read_feather(
131131
set to True, nullable dtypes are used for all dtypes that have a nullable
132132
implementation, even if no nulls are present.
133133
134-
The nullable dtype implementation can be configured by calling
135-
``pd.set_option("mode.dtype_backend", "pandas")`` to use
136-
numpy-backed nullable dtypes or
137-
``pd.set_option("mode.dtype_backend", "pyarrow")`` to use
138-
pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).
134+
.. note::
135+
136+
The nullable dtype implementation can be configured by calling
137+
``pd.set_option("mode.dtype_backend", "pandas")`` to use
138+
numpy-backed nullable dtypes or
139+
``pd.set_option("mode.dtype_backend", "pyarrow")`` to use
140+
pyarrow-backed nullable dtypes (using ``pd.ArrowDtype``).
139141
140142
.. versionadded:: 2.0
141143

pandas/io/formats/format.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2174,6 +2174,8 @@ def set_eng_float_format(accuracy: int = 3, use_eng_prefix: bool = False) -> Non
21742174
2 1.0
21752175
3 1.0k
21762176
4 1.0M
2177+
2178+
>>> pd.set_option("display.float_format", None) # unset option
21772179
"""
21782180
set_option("display.float_format", EngFormatter(accuracy, use_eng_prefix))
21792181

0 commit comments

Comments
 (0)