Skip to content

Commit 8766cdf

Browse files
authored
Merge branch 'main' into gh52234-nan-not-in-index
2 parents 86f35d5 + 942bf3e commit 8766cdf

File tree

12 files changed

+242
-227
lines changed

12 files changed

+242
-227
lines changed

doc/source/whatsnew/v2.1.0.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ Enhancements
1919
Copy-on-Write improvements
2020
^^^^^^^^^^^^^^^^^^^^^^^^^^
2121

22+
- :meth:`Series.transform` not respecting Copy-on-Write when ``func`` modifies :class:`Series` inplace (:issue:`53747`)
2223
- Calling :meth:`Index.values` will now return a read-only NumPy array (:issue:`53704`)
2324
- Setting a :class:`Series` into a :class:`DataFrame` now creates a lazy instead of a deep copy (:issue:`53142`)
2425
- The :class:`DataFrame` constructor, when constructing a DataFrame from a dictionary
@@ -288,6 +289,7 @@ Deprecations
288289
- Deprecated :func:`value_counts`, use ``pd.Series(obj).value_counts()`` instead (:issue:`47862`)
289290
- Deprecated :meth:`Series.first` and :meth:`DataFrame.first` (please create a mask and filter using ``.loc`` instead) (:issue:`45908`)
290291
- Deprecated :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` for object-dtype (:issue:`53631`)
292+
- Deprecated :meth:`Series.last` and :meth:`DataFrame.last` (please create a mask and filter using ``.loc`` instead) (:issue:`53692`)
291293
- Deprecated allowing ``downcast`` keyword other than ``None``, ``False``, "infer", or a dict with these as values in :meth:`Series.fillna`, :meth:`DataFrame.fillna` (:issue:`40988`)
292294
- Deprecated allowing arbitrary ``fill_value`` in :class:`SparseDtype`, in a future version the ``fill_value`` will need to be compatible with the ``dtype.subtype``, either a scalar that can be held by that subtype or ``NaN`` for integer or bool subtypes (:issue:`23124`)
293295
- Deprecated behavior of :func:`assert_series_equal` and :func:`assert_frame_equal` considering NA-like values (e.g. ``NaN`` vs ``None`` as equivalent) (:issue:`52081`)
@@ -491,7 +493,6 @@ Reshaping
491493
- Bug in :meth:`DataFrame.idxmin` and :meth:`DataFrame.idxmax`, where the axis dtype would be lost for empty frames (:issue:`53265`)
492494
- Bug in :meth:`DataFrame.merge` not merging correctly when having ``MultiIndex`` with single level (:issue:`52331`)
493495
- Bug in :meth:`DataFrame.stack` losing extension dtypes when columns is a :class:`MultiIndex` and frame contains mixed dtypes (:issue:`45740`)
494-
- Bug in :meth:`DataFrame.stack` would incorrectly order results when ``sort=True`` and the input had :class:`MultiIndex` levels that were not sorted (:issue:`53636`)
495496
- Bug in :meth:`DataFrame.transpose` inferring dtype for object column (:issue:`51546`)
496497
- Bug in :meth:`Series.combine_first` converting ``int64`` dtype to ``float64`` and losing precision on very large integers (:issue:`51764`)
497498
-

pandas/conftest.py

+11
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from decimal import Decimal
3131
import operator
3232
import os
33+
from pathlib import Path
3334
from typing import (
3435
Callable,
3536
Hashable,
@@ -1167,6 +1168,16 @@ def strict_data_files(pytestconfig):
11671168
return pytestconfig.getoption("--strict-data-files")
11681169

11691170

1171+
@pytest.fixture
1172+
def tests_path() -> Path:
1173+
return Path(__file__).parent / "tests"
1174+
1175+
1176+
@pytest.fixture
1177+
def tests_io_data_path(tests_path) -> Path:
1178+
return tests_path / "io" / "data"
1179+
1180+
11701181
@pytest.fixture
11711182
def datapath(strict_data_files: str) -> Callable[..., str]:
11721183
"""

pandas/core/generic.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -9321,6 +9321,11 @@ def last(self, offset) -> Self:
93219321
at_time : Select values at a particular time of the day.
93229322
between_time : Select values between particular times of the day.
93239323
9324+
Notes
9325+
-----
9326+
.. deprecated:: 2.1.0
9327+
Please create a mask and filter using `.loc` instead
9328+
93249329
Examples
93259330
--------
93269331
>>> i = pd.date_range('2018-04-09', periods=4, freq='2D')
@@ -9334,7 +9339,7 @@ def last(self, offset) -> Self:
93349339
93359340
Get the rows for the last 3 days:
93369341
9337-
>>> ts.last('3D')
9342+
>>> ts.last('3D') # doctest: +SKIP
93389343
A
93399344
2018-04-13 3
93409345
2018-04-15 4
@@ -9343,6 +9348,13 @@ def last(self, offset) -> Self:
93439348
3 observed days in the dataset, and therefore data for 2018-04-11 was
93449349
not returned.
93459350
"""
9351+
warnings.warn(
9352+
"last is deprecated and will be removed in a future version. "
9353+
"Please create a mask and filter using `.loc` instead",
9354+
FutureWarning,
9355+
stacklevel=find_stack_level(),
9356+
)
9357+
93469358
if not isinstance(self.index, DatetimeIndex):
93479359
raise TypeError("'last' only supports a DatetimeIndex index")
93489360

pandas/core/reshape/reshape.py

+1-10
Original file line numberDiff line numberDiff line change
@@ -756,16 +756,7 @@ def _convert_level_number(level_num: int, columns: Index):
756756
level_vals = mi_cols.levels[-1]
757757
level_codes = unique(mi_cols.codes[-1])
758758
if sort:
759-
_, index, inverse = np.unique(
760-
level_vals, return_index=True, return_inverse=True
761-
)
762-
sorted_level_vals = np.take(level_vals, index)
763759
level_codes = np.sort(level_codes)
764-
# Take level_codes according to where level_vals get sorted to, while
765-
# also allowing for NA (-1) values
766-
level_codes = np.where(level_codes == -1, -1, np.take(inverse, level_codes))
767-
else:
768-
sorted_level_vals = level_vals
769760
level_vals_nan = level_vals.insert(len(level_vals), None)
770761

771762
level_vals_used = np.take(level_vals_nan, level_codes)
@@ -827,7 +818,7 @@ def _convert_level_number(level_num: int, columns: Index):
827818
new_codes = [old_codes.repeat(levsize)]
828819
new_names = [this.index.name] # something better?
829820

830-
new_levels.append(sorted_level_vals)
821+
new_levels.append(level_vals)
831822
new_codes.append(np.tile(level_codes, N))
832823
new_names.append(frame.columns.names[level_num])
833824

pandas/core/series.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -4499,7 +4499,8 @@ def transform(
44994499
) -> DataFrame | Series:
45004500
# Validate axis argument
45014501
self._get_axis_number(axis)
4502-
result = SeriesApply(self, func=func, args=args, kwargs=kwargs).transform()
4502+
ser = self.copy(deep=False) if using_copy_on_write() else self
4503+
result = SeriesApply(ser, func=func, args=args, kwargs=kwargs).transform()
45034504
return result
45044505

45054506
def apply(

pandas/io/xml.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from __future__ import annotations
66

77
import io
8+
from os import PathLike
89
from typing import (
910
TYPE_CHECKING,
1011
Any,
@@ -326,10 +327,13 @@ def _iterparse_nodes(self, iterparse: Callable) -> list[dict[str, str | None]]:
326327
)
327328

328329
if (not hasattr(self.path_or_buffer, "read")) and (
329-
not isinstance(self.path_or_buffer, str)
330+
not isinstance(self.path_or_buffer, (str, PathLike))
330331
or is_url(self.path_or_buffer)
331332
or is_fsspec_url(self.path_or_buffer)
332-
or self.path_or_buffer.startswith(("<?xml", "<"))
333+
or (
334+
isinstance(self.path_or_buffer, str)
335+
and self.path_or_buffer.startswith(("<?xml", "<"))
336+
)
333337
or infer_compression(self.path_or_buffer, "infer") is not None
334338
):
335339
raise ParserError(

pandas/tests/copy_view/test_methods.py

+26
Original file line numberDiff line numberDiff line change
@@ -1764,6 +1764,32 @@ def test_transpose_ea_single_column(using_copy_on_write):
17641764
assert not np.shares_memory(get_array(df, "a"), get_array(result, 0))
17651765

17661766

1767+
def test_transform_frame(using_copy_on_write):
1768+
df = DataFrame({"a": [1, 2, 3], "b": 1})
1769+
df_orig = df.copy()
1770+
1771+
def func(ser):
1772+
ser.iloc[0] = 100
1773+
return ser
1774+
1775+
df.transform(func)
1776+
if using_copy_on_write:
1777+
tm.assert_frame_equal(df, df_orig)
1778+
1779+
1780+
def test_transform_series(using_copy_on_write):
1781+
ser = Series([1, 2, 3])
1782+
ser_orig = ser.copy()
1783+
1784+
def func(ser):
1785+
ser.iloc[0] = 100
1786+
return ser
1787+
1788+
ser.transform(func)
1789+
if using_copy_on_write:
1790+
tm.assert_series_equal(ser, ser_orig)
1791+
1792+
17671793
def test_count_read_only_array():
17681794
df = DataFrame({"a": [1, 2], "b": 3})
17691795
result = df.count()

pandas/tests/frame/methods/test_first_and_last.py

+18-7
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import pandas._testing as tm
1212

1313
deprecated_msg = "first is deprecated"
14+
last_deprecated_msg = "last is deprecated"
1415

1516

1617
class TestFirst:
@@ -55,29 +56,38 @@ def test_first_last_raises(self, frame_or_series):
5556
obj.first("1D")
5657

5758
msg = "'last' only supports a DatetimeIndex index"
58-
with pytest.raises(TypeError, match=msg): # index is not a DatetimeIndex
59+
with tm.assert_produces_warning(
60+
FutureWarning, match=last_deprecated_msg
61+
), pytest.raises(
62+
TypeError, match=msg
63+
): # index is not a DatetimeIndex
5964
obj.last("1D")
6065

6166
def test_last_subset(self, frame_or_series):
6267
ts = tm.makeTimeDataFrame(freq="12h")
6368
ts = tm.get_obj(ts, frame_or_series)
64-
result = ts.last("10d")
69+
with tm.assert_produces_warning(FutureWarning, match=last_deprecated_msg):
70+
result = ts.last("10d")
6571
assert len(result) == 20
6672

6773
ts = tm.makeTimeDataFrame(nper=30, freq="D")
6874
ts = tm.get_obj(ts, frame_or_series)
69-
result = ts.last("10d")
75+
with tm.assert_produces_warning(FutureWarning, match=last_deprecated_msg):
76+
result = ts.last("10d")
7077
assert len(result) == 10
7178

72-
result = ts.last("21D")
79+
with tm.assert_produces_warning(FutureWarning, match=last_deprecated_msg):
80+
result = ts.last("21D")
7381
expected = ts["2000-01-10":]
7482
tm.assert_equal(result, expected)
7583

76-
result = ts.last("21D")
84+
with tm.assert_produces_warning(FutureWarning, match=last_deprecated_msg):
85+
result = ts.last("21D")
7786
expected = ts[-21:]
7887
tm.assert_equal(result, expected)
7988

80-
result = ts[:0].last("3M")
89+
with tm.assert_produces_warning(FutureWarning, match=last_deprecated_msg):
90+
result = ts[:0].last("3M")
8191
tm.assert_equal(result, ts[:0])
8292

8393
@pytest.mark.parametrize("start, periods", [("2010-03-31", 1), ("2010-03-30", 2)])
@@ -104,7 +114,8 @@ def test_first_with_first_day_end_of_frq_n_greater_one(self, frame_or_series):
104114
def test_empty_not_input(self):
105115
# GH#51032
106116
df = DataFrame(index=pd.DatetimeIndex([]))
107-
result = df.last(offset=1)
117+
with tm.assert_produces_warning(FutureWarning, match=last_deprecated_msg):
118+
result = df.last(offset=1)
108119

109120
with tm.assert_produces_warning(FutureWarning, match=deprecated_msg):
110121
result = df.first(offset=1)

pandas/tests/frame/test_stack_unstack.py

+8-30
Original file line numberDiff line numberDiff line change
@@ -2000,20 +2000,18 @@ def __init__(self, *args, **kwargs) -> None:
20002000
),
20012001
)
20022002
@pytest.mark.parametrize("stack_lev", range(2))
2003-
@pytest.mark.parametrize("sort", [True, False])
2004-
def test_stack_order_with_unsorted_levels(self, levels, stack_lev, sort):
2003+
def test_stack_order_with_unsorted_levels(self, levels, stack_lev):
20052004
# GH#16323
20062005
# deep check for 1-row case
20072006
columns = MultiIndex(levels=levels, codes=[[0, 0, 1, 1], [0, 1, 0, 1]])
20082007
df = DataFrame(columns=columns, data=[range(4)])
2009-
df_stacked = df.stack(stack_lev, sort=sort)
2010-
for row in df.index:
2011-
for col in df.columns:
2012-
expected = df.loc[row, col]
2013-
result_row = row, col[stack_lev]
2014-
result_col = col[1 - stack_lev]
2015-
result = df_stacked.loc[result_row, result_col]
2016-
assert result == expected
2008+
df_stacked = df.stack(stack_lev)
2009+
assert all(
2010+
df.loc[row, col]
2011+
== df_stacked.loc[(row, col[stack_lev]), col[1 - stack_lev]]
2012+
for row in df.index
2013+
for col in df.columns
2014+
)
20172015

20182016
def test_stack_order_with_unsorted_levels_multi_row(self):
20192017
# GH#16323
@@ -2032,26 +2030,6 @@ def test_stack_order_with_unsorted_levels_multi_row(self):
20322030
for col in df.columns
20332031
)
20342032

2035-
def test_stack_order_with_unsorted_levels_multi_row_2(self):
2036-
# GH#53636
2037-
levels = ((0, 1), (1, 0))
2038-
stack_lev = 1
2039-
columns = MultiIndex(levels=levels, codes=[[0, 0, 1, 1], [0, 1, 0, 1]])
2040-
df = DataFrame(columns=columns, data=[range(4)], index=[1, 0, 2, 3])
2041-
result = df.stack(stack_lev, sort=True)
2042-
expected_index = MultiIndex(
2043-
levels=[[0, 1, 2, 3], [0, 1]],
2044-
codes=[[1, 1, 0, 0, 2, 2, 3, 3], [1, 0, 1, 0, 1, 0, 1, 0]],
2045-
)
2046-
expected = DataFrame(
2047-
{
2048-
0: [0, 1, 0, 1, 0, 1, 0, 1],
2049-
1: [2, 3, 2, 3, 2, 3, 2, 3],
2050-
},
2051-
index=expected_index,
2052-
)
2053-
tm.assert_frame_equal(result, expected)
2054-
20552033
def test_stack_unstack_unordered_multiindex(self):
20562034
# GH# 18265
20572035
values = np.arange(5)

pandas/tests/generic/test_finalize.py

+19-1
Original file line numberDiff line numberDiff line change
@@ -395,7 +395,8 @@ def ndframe_method(request):
395395

396396

397397
@pytest.mark.filterwarnings(
398-
"ignore:DataFrame.fillna with 'method' is deprecated:FutureWarning"
398+
"ignore:DataFrame.fillna with 'method' is deprecated:FutureWarning",
399+
"ignore:last is deprecated:FutureWarning",
399400
)
400401
def test_finalize_called(ndframe_method):
401402
cls, init_args, method = ndframe_method
@@ -423,6 +424,23 @@ def test_finalize_first(data):
423424
assert result.attrs == {"a": 1}
424425

425426

427+
@pytest.mark.parametrize(
428+
"data",
429+
[
430+
pd.Series(1, pd.date_range("2000", periods=4)),
431+
pd.DataFrame({"A": [1, 1, 1, 1]}, pd.date_range("2000", periods=4)),
432+
],
433+
)
434+
def test_finalize_last(data):
435+
# GH 53710
436+
deprecated_msg = "last is deprecated"
437+
438+
data.attrs = {"a": 1}
439+
with tm.assert_produces_warning(FutureWarning, match=deprecated_msg):
440+
result = data.last("3D")
441+
assert result.attrs == {"a": 1}
442+
443+
426444
@not_implemented_mark
427445
def test_finalize_called_eval_numexpr():
428446
pytest.importorskip("numexpr")

pandas/tests/io/xml/conftest.py

+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
import pytest
2+
3+
4+
@pytest.fixture
5+
def xml_data_path(tests_io_data_path):
6+
return tests_io_data_path / "xml"
7+
8+
9+
@pytest.fixture
10+
def xml_books(xml_data_path):
11+
return xml_data_path / "books.xml"
12+
13+
14+
@pytest.fixture
15+
def xml_doc_ch_utf(xml_data_path):
16+
return xml_data_path / "doc_ch_utf.xml"
17+
18+
19+
@pytest.fixture
20+
def xml_baby_names(xml_data_path):
21+
return xml_data_path / "baby_names.xml"
22+
23+
24+
@pytest.fixture
25+
def kml_cta_rail_lines(xml_data_path):
26+
return xml_data_path / "cta_rail_lines.kml"
27+
28+
29+
@pytest.fixture
30+
def xsl_flatten_doc(xml_data_path):
31+
return xml_data_path / "flatten_doc.xsl"

0 commit comments

Comments
 (0)