Skip to content

Commit 3c3f908

Browse files
author
luke
committed
Merge branch 'add-FutureWarning-for-pandas.io.sql.execute' of https://github.com/luke396/pandas into add-FutureWarning-for-pandas.io.sql.execute
2 parents d055ee0 + 8705d6e commit 3c3f908

File tree

13 files changed

+253
-60
lines changed

13 files changed

+253
-60
lines changed

asv_bench/benchmarks/array.py

+9
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,15 @@ def time_from_integer_array(self):
4444
pd.array(self.values_integer, dtype="Int64")
4545

4646

47+
class IntervalArray:
48+
def setup(self):
49+
N = 10_000
50+
self.tuples = [(i, i + 1) for i in range(N)]
51+
52+
def time_from_tuples(self):
53+
pd.arrays.IntervalArray.from_tuples(self.tuples)
54+
55+
4756
class StringArray:
4857
def setup(self):
4958
N = 100_000

ci/code_checks.sh

+30
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,36 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
8383
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX04,GL01,GL02,GL03,GL04,GL05,GL06,GL07,GL09,GL10,PR03,PR04,PR05,PR06,PR08,PR09,PR10,RT01,RT04,RT05,SA02,SA03,SA04,SS01,SS02,SS03,SS04,SS05,SS06
8484
RET=$(($RET + $?)) ; echo $MSG "DONE"
8585

86+
MSG='Partially validate docstrings (RT02)' ; echo $MSG
87+
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=RT02 --ignore_functions \
88+
pandas.Series.align \
89+
pandas.Series.dt.total_seconds \
90+
pandas.Series.cat.rename_categories \
91+
pandas.Series.cat.reorder_categories \
92+
pandas.Series.cat.add_categories \
93+
pandas.Series.cat.remove_categories \
94+
pandas.Series.cat.remove_unused_categories \
95+
pandas.Index.all \
96+
pandas.Index.any \
97+
pandas.CategoricalIndex.rename_categories \
98+
pandas.CategoricalIndex.reorder_categories \
99+
pandas.CategoricalIndex.add_categories \
100+
pandas.CategoricalIndex.remove_categories \
101+
pandas.CategoricalIndex.remove_unused_categories \
102+
pandas.MultiIndex.drop \
103+
pandas.DatetimeIndex.to_pydatetime \
104+
pandas.TimedeltaIndex.to_pytimedelta \
105+
pandas.core.groupby.SeriesGroupBy.apply \
106+
pandas.core.groupby.DataFrameGroupBy.apply \
107+
pandas.io.formats.style.Styler.export \
108+
pandas.api.extensions.ExtensionArray.astype \
109+
pandas.api.extensions.ExtensionArray.dropna \
110+
pandas.api.extensions.ExtensionArray.isna \
111+
pandas.api.extensions.ExtensionArray.repeat \
112+
pandas.api.extensions.ExtensionArray.unique \
113+
pandas.DataFrame.align
114+
RET=$(($RET + $?)) ; echo $MSG "DONE"
115+
86116
fi
87117

88118
### DOCUMENTATION NOTEBOOKS ###

doc/source/whatsnew/v2.0.0.rst

+3
Original file line numberDiff line numberDiff line change
@@ -628,6 +628,7 @@ Removal of prior version deprecations/changes
628628
- Disallow passing non-keyword arguments to :meth:`DataFrame.replace`, :meth:`Series.replace` except for ``to_replace`` and ``value`` (:issue:`47587`)
629629
- Disallow passing non-keyword arguments to :meth:`DataFrame.sort_values` except for ``by`` (:issue:`41505`)
630630
- Disallow passing non-keyword arguments to :meth:`Series.sort_values` (:issue:`41505`)
631+
- Disallow passing 2 non-keyword arguments to :meth:`DataFrame.reindex` (:issue:`17966`)
631632
- Disallow :meth:`Index.reindex` with non-unique :class:`Index` objects (:issue:`42568`)
632633
- Disallowed constructing :class:`Categorical` with scalar ``data`` (:issue:`38433`)
633634
- Disallowed constructing :class:`CategoricalIndex` without passing ``data`` (:issue:`38944`)
@@ -752,6 +753,7 @@ Removal of prior version deprecations/changes
752753
Performance improvements
753754
~~~~~~~~~~~~~~~~~~~~~~~~
754755
- Performance improvement in :meth:`.DataFrameGroupBy.median` and :meth:`.SeriesGroupBy.median` and :meth:`.GroupBy.cumprod` for nullable dtypes (:issue:`37493`)
756+
- Performance improvement in :meth:`.DataFrameGroupBy.all`, :meth:`.DataFrameGroupBy.any`, :meth:`.SeriesGroupBy.all`, and :meth:`.SeriesGroupBy.any` for object dtype (:issue:`50623`)
755757
- Performance improvement in :meth:`MultiIndex.argsort` and :meth:`MultiIndex.sort_values` (:issue:`48406`)
756758
- Performance improvement in :meth:`MultiIndex.size` (:issue:`48723`)
757759
- Performance improvement in :meth:`MultiIndex.union` without missing values and without duplicates (:issue:`48505`, :issue:`48752`)
@@ -775,6 +777,7 @@ Performance improvements
775777
- Performance improvement for :func:`concat` with extension array backed indexes (:issue:`49128`, :issue:`49178`)
776778
- Reduce memory usage of :meth:`DataFrame.to_pickle`/:meth:`Series.to_pickle` when using BZ2 or LZMA (:issue:`49068`)
777779
- Performance improvement for :class:`~arrays.StringArray` constructor passing a numpy array with type ``np.str_`` (:issue:`49109`)
780+
- Performance improvement in :meth:`~arrays.IntervalArray.from_tuples` (:issue:`50620`)
778781
- Performance improvement in :meth:`~arrays.ArrowExtensionArray.factorize` (:issue:`49177`)
779782
- Performance improvement in :meth:`~arrays.ArrowExtensionArray.__setitem__` when key is a null slice (:issue:`50248`)
780783
- Performance improvement in :class:`~arrays.ArrowExtensionArray` comparison methods when array contains NA (:issue:`50524`)

pandas/_libs/interval.pyx

+68
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,21 @@ cdef class IntervalMixin:
5858
-------
5959
bool
6060
True if the Interval is closed on the left-side.
61+
62+
See Also
63+
--------
64+
Interval.closed_right : Check if the interval is closed on the right side.
65+
Interval.open_left : Boolean inverse of closed_left.
66+
67+
Examples
68+
--------
69+
>>> iv = pd.Interval(0, 5, closed='left')
70+
>>> iv.closed_left
71+
True
72+
73+
>>> iv = pd.Interval(0, 5, closed='right')
74+
>>> iv.closed_left
75+
False
6176
"""
6277
return self.closed in ("left", "both")
6378

@@ -72,6 +87,21 @@ cdef class IntervalMixin:
7287
-------
7388
bool
7489
True if the Interval is closed on the left-side.
90+
91+
See Also
92+
--------
93+
Interval.closed_left : Check if the interval is closed on the left side.
94+
Interval.open_right : Boolean inverse of closed_right.
95+
96+
Examples
97+
--------
98+
>>> iv = pd.Interval(0, 5, closed='both')
99+
>>> iv.closed_right
100+
True
101+
102+
>>> iv = pd.Interval(0, 5, closed='left')
103+
>>> iv.closed_right
104+
False
75105
"""
76106
return self.closed in ("right", "both")
77107

@@ -86,6 +116,21 @@ cdef class IntervalMixin:
86116
-------
87117
bool
88118
True if the Interval is not closed on the left-side.
119+
120+
See Also
121+
--------
122+
Interval.open_right : Check if the interval is open on the right side.
123+
Interval.closed_left : Boolean inverse of open_left.
124+
125+
Examples
126+
--------
127+
>>> iv = pd.Interval(0, 5, closed='neither')
128+
>>> iv.open_left
129+
True
130+
131+
>>> iv = pd.Interval(0, 5, closed='both')
132+
>>> iv.open_left
133+
False
89134
"""
90135
return not self.closed_left
91136

@@ -100,6 +145,21 @@ cdef class IntervalMixin:
100145
-------
101146
bool
102147
True if the Interval is not closed on the left-side.
148+
149+
See Also
150+
--------
151+
Interval.open_left : Check if the interval is open on the left side.
152+
Interval.closed_right : Boolean inverse of open_right.
153+
154+
Examples
155+
--------
156+
>>> iv = pd.Interval(0, 5, closed='left')
157+
>>> iv.open_right
158+
True
159+
160+
>>> iv = pd.Interval(0, 5)
161+
>>> iv.open_right
162+
False
103163
"""
104164
return not self.closed_right
105165

@@ -124,6 +184,10 @@ cdef class IntervalMixin:
124184
def length(self):
125185
"""
126186
Return the length of the Interval.
187+
188+
See Also
189+
--------
190+
Interval.is_empty : Indicates if an interval contains no points.
127191
"""
128192
return self.right - self.left
129193

@@ -140,6 +204,10 @@ cdef class IntervalMixin:
140204
an :class:`~arrays.IntervalArray` or :class:`IntervalIndex` is
141205
empty.
142206
207+
See Also
208+
--------
209+
Interval.length : Return the length of the Interval.
210+
143211
Examples
144212
--------
145213
An :class:`Interval` that contains points is not empty:

pandas/_libs/tslibs/timestamps.pyx

+42-4
Original file line numberDiff line numberDiff line change
@@ -579,7 +579,16 @@ cdef class _Timestamp(ABCTimestamp):
579579
@property
580580
def is_month_start(self) -> bool:
581581
"""
582-
Return True if date is first day of month.
582+
Check if the date is the first day of the month.
583+
584+
Returns
585+
-------
586+
bool
587+
True if the date is the first day of the month.
588+
589+
See Also
590+
--------
591+
Timestamp.is_month_end : Similar property indicating the last day of the month.
583592

584593
Examples
585594
--------
@@ -596,7 +605,16 @@ cdef class _Timestamp(ABCTimestamp):
596605
@property
597606
def is_month_end(self) -> bool:
598607
"""
599-
Return True if date is last day of month.
608+
Check if the date is the last day of the month.
609+
610+
Returns
611+
-------
612+
bool
613+
True if the date is the last day of the month.
614+
615+
See Also
616+
--------
617+
Timestamp.is_month_start : Similar property indicating month start.
600618

601619
Examples
602620
--------
@@ -613,7 +631,17 @@ cdef class _Timestamp(ABCTimestamp):
613631
@property
614632
def is_quarter_start(self) -> bool:
615633
"""
616-
Return True if date is first day of the quarter.
634+
Check if the date is the first day of the quarter.
635+
636+
Returns
637+
-------
638+
bool
639+
True if date is first day of the quarter.
640+
641+
See Also
642+
--------
643+
Timestamp.is_quarter_end : Similar property indicating the quarter end.
644+
Timestamp.quarter : Return the quarter of the date.
617645

618646
Examples
619647
--------
@@ -630,7 +658,17 @@ cdef class _Timestamp(ABCTimestamp):
630658
@property
631659
def is_quarter_end(self) -> bool:
632660
"""
633-
Return True if date is last day of the quarter.
661+
Check if date is last day of the quarter.
662+
663+
Returns
664+
-------
665+
bool
666+
True if date is last day of the quarter.
667+
668+
See Also
669+
--------
670+
Timestamp.is_quarter_start : Similar property indicating the quarter start.
671+
Timestamp.quarter : Return the quarter of the date.
634672

635673
Examples
636674
--------

pandas/core/arrays/interval.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -608,7 +608,7 @@ def from_tuples(
608608
left = right = data
609609

610610
for d in data:
611-
if isna(d):
611+
if not isinstance(d, tuple) and isna(d):
612612
lhs = rhs = np.nan
613613
else:
614614
name = cls.__name__

pandas/core/groupby/groupby.py

+8-14
Original file line numberDiff line numberDiff line change
@@ -1774,22 +1774,16 @@ def _bool_agg(self, val_test: Literal["any", "all"], skipna: bool):
17741774
"""
17751775

17761776
def objs_to_bool(vals: ArrayLike) -> tuple[np.ndarray, type]:
1777-
if is_object_dtype(vals.dtype):
1777+
if is_object_dtype(vals.dtype) and skipna:
17781778
# GH#37501: don't raise on pd.NA when skipna=True
1779-
if skipna:
1780-
func = np.vectorize(
1781-
lambda x: bool(x) if not isna(x) else True, otypes=[bool]
1782-
)
1783-
vals = func(vals)
1784-
else:
1785-
vals = vals.astype(bool, copy=False)
1786-
1787-
vals = cast(np.ndarray, vals)
1779+
mask = isna(vals)
1780+
if mask.any():
1781+
# mask on original values computed separately
1782+
vals = vals.copy()
1783+
vals[mask] = True
17881784
elif isinstance(vals, BaseMaskedArray):
1789-
vals = vals._data.astype(bool, copy=False)
1790-
else:
1791-
vals = vals.astype(bool, copy=False)
1792-
1785+
vals = vals._data
1786+
vals = vals.astype(bool, copy=False)
17931787
return vals.view(np.int8), bool
17941788

17951789
def result_to_bool(

pandas/tests/frame/methods/test_describe.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,9 @@ def test_ea_with_na(self, any_numeric_ea_dtype):
388388
# GH#48778
389389

390390
df = DataFrame({"a": [1, pd.NA, pd.NA], "b": pd.NA}, dtype=any_numeric_ea_dtype)
391-
result = df.describe()
391+
# Warning from numpy for taking std of single element
392+
with tm.assert_produces_warning(RuntimeWarning, check_stacklevel=False):
393+
result = df.describe()
392394
expected = DataFrame(
393395
{"a": [1.0, 1.0, pd.NA] + [1.0] * 5, "b": [0.0] + [pd.NA] * 7},
394396
index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],

pandas/tests/frame/methods/test_reindex.py

+5-9
Original file line numberDiff line numberDiff line change
@@ -840,14 +840,12 @@ def test_reindex_axis_style(self):
840840
result = df.reindex([0, 1, 3], axis="index")
841841
tm.assert_frame_equal(result, expected)
842842

843-
def test_reindex_positional_warns(self):
843+
def test_reindex_positional_raises(self):
844844
# https://github.com/pandas-dev/pandas/issues/12392
845+
# Enforced in 2.0
845846
df = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
846-
expected = DataFrame({"A": [1.0, 2], "B": [4.0, 5], "C": [np.nan, np.nan]})
847-
with tm.assert_produces_warning(FutureWarning):
848-
result = df.reindex([0, 1], ["A", "B", "C"])
849-
850-
tm.assert_frame_equal(result, expected)
847+
with pytest.raises(TypeError, match=r".* is ambiguous."):
848+
df.reindex([0, 1], ["A", "B", "C"])
851849

852850
def test_reindex_axis_style_raises(self):
853851
# https://github.com/pandas-dev/pandas/issues/12392
@@ -914,9 +912,7 @@ def test_reindex_api_equivalence(self):
914912
for res in [res2, res3]:
915913
tm.assert_frame_equal(res1, res)
916914

917-
with tm.assert_produces_warning(FutureWarning) as m:
918-
res1 = df.reindex(["b", "a"], ["e", "d"])
919-
assert "reindex" in str(m[0].message)
915+
res1 = df.reindex(index=["b", "a"], columns=["e", "d"])
920916
res2 = df.reindex(columns=["e", "d"], index=["b", "a"])
921917
res3 = df.reindex(labels=["b", "a"], axis=0).reindex(labels=["e", "d"], axis=1)
922918
for res in [res2, res3]:

pandas/tests/frame/methods/test_shift.py

+14
Original file line numberDiff line numberDiff line change
@@ -397,6 +397,13 @@ def test_shift_axis1_multiple_blocks(self, using_array_manager):
397397
result = df3.shift(2, axis=1)
398398

399399
expected = df3.take([-1, -1, 0, 1, 2], axis=1)
400+
# Explicit cast to float to avoid implicit cast when setting nan.
401+
# Column names aren't unique, so directly calling `expected.astype` won't work.
402+
expected = expected.pipe(
403+
lambda df: df.set_axis(range(df.shape[1]), axis=1)
404+
.astype({0: "float", 1: "float"})
405+
.set_axis(df.columns, axis=1)
406+
)
400407
expected.iloc[:, :2] = np.nan
401408
expected.columns = df3.columns
402409

@@ -410,6 +417,13 @@ def test_shift_axis1_multiple_blocks(self, using_array_manager):
410417
result = df3.shift(-2, axis=1)
411418

412419
expected = df3.take([2, 3, 4, -1, -1], axis=1)
420+
# Explicit cast to float to avoid implicit cast when setting nan.
421+
# Column names aren't unique, so directly calling `expected.astype` won't work.
422+
expected = expected.pipe(
423+
lambda df: df.set_axis(range(df.shape[1]), axis=1)
424+
.astype({3: "float", 4: "float"})
425+
.set_axis(df.columns, axis=1)
426+
)
413427
expected.iloc[:, -2:] = np.nan
414428
expected.columns = df3.columns
415429

0 commit comments

Comments
 (0)