Skip to content

Commit 392d2b5

Browse files
Merge branch 'main' into plot
2 parents 950441b + b4d851c commit 392d2b5

File tree

7 files changed

+110
-54
lines changed

7 files changed

+110
-54
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ Other enhancements
6868
- :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`)
6969
- :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`)
7070
- :meth:`DataFrame.apply` supports using third-party execution engines like the Bodo.ai JIT compiler (:issue:`60668`)
71+
- :meth:`DataFrame.iloc` and :meth:`Series.iloc` now support boolean masks in ``__getitem__`` for more consistent indexing behavior (:issue:`60994`)
7172
- :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`)
7273
- :meth:`Rolling.agg`, :meth:`Expanding.agg` and :meth:`ExponentialMovingWindow.agg` now accept :class:`NamedAgg` aggregations through ``**kwargs`` (:issue:`28333`)
7374
- :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)
@@ -761,8 +762,9 @@ Plotting
761762
- Bug in :meth:`.DataFrameGroupBy.boxplot` failed when there were multiple groupings (:issue:`14701`)
762763
- Bug in :meth:`DataFrame.plot.bar` with ``stacked=True`` where labels on stacked bars with zero-height segments were incorrectly positioned at the base instead of the label position of the previous segment (:issue:`59429`)
763764
- Bug in :meth:`DataFrame.plot.line` raising ``ValueError`` when set both color and a ``dict`` style (:issue:`59461`)
764-
- Bug in :meth:`DataFrame.plot` that causes a shift to the right when the frequency multiplier is greater than one. (:issue:`57587`)
765-
- Bug in :meth:`Series.plot` preventing a line and bar from being aligned on the same plot (:issue:`61161`)
765+
- Bug in :meth:`DataFrame.plot` that causes a shift to the right when the frequency multiplier is greater than one. (:issue:`57587`
766+
- Bug in :meth:`Series.plot` preventing a line and bar from being aligned on the same plot (:issue:`611
767+
- Bug in :meth:`Series.plot` preventing a line and scatter plot from being aligned (:issue:`6100
766768
- Bug in :meth:`Series.plot` with ``kind="pie"`` with :class:`ArrowDtype` (:issue:`59192`)
767769

768770
Groupby/resample/rolling

pandas/core/indexing.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1582,11 +1582,7 @@ def _validate_key(self, key, axis: AxisInt) -> None:
15821582
if com.is_bool_indexer(key):
15831583
if hasattr(key, "index") and isinstance(key.index, Index):
15841584
if key.index.inferred_type == "integer":
1585-
raise NotImplementedError(
1586-
"iLocation based boolean "
1587-
"indexing on an integer type "
1588-
"is not available"
1589-
)
1585+
return
15901586
raise ValueError(
15911587
"iLocation based boolean indexing cannot use an indexable as a mask"
15921588
)

pandas/plotting/_matplotlib/core.py

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,9 @@
6767
from pandas.plotting._matplotlib.misc import unpack_single_str_list
6868
from pandas.plotting._matplotlib.style import get_standard_colors
6969
from pandas.plotting._matplotlib.timeseries import (
70-
decorate_axes,
7170
format_dateaxis,
7271
maybe_convert_index,
73-
maybe_resample,
72+
prepare_ts_data,
7473
use_dynamic_x,
7574
)
7675
from pandas.plotting._matplotlib.tools import (
@@ -304,7 +303,7 @@ def _is_ts_plot(self) -> bool:
304303

305304
@final
306305
def _use_dynamic_x(self) -> bool:
307-
return use_dynamic_x(self._get_ax(0), self.data)
306+
return use_dynamic_x(self._get_ax(0), self.data.index)
308307

309308
@final
310309
@staticmethod
@@ -1342,10 +1341,20 @@ def __init__(
13421341
c = self.data.columns[c]
13431342
self.c = c
13441343

1344+
@register_pandas_matplotlib_converters
13451345
def _make_plot(self, fig: Figure) -> None:
13461346
x, y, c, data = self.x, self.y, self.c, self.data
13471347
ax = self.axes[0]
13481348

1349+
from pandas import Series
1350+
1351+
x_data = data[x]
1352+
s = Series(index=x_data)
1353+
if use_dynamic_x(ax, s.index):
1354+
s = maybe_convert_index(ax, s)
1355+
freq, s = prepare_ts_data(s, ax, self.kwds)
1356+
x_data = s.index
1357+
13491358
c_is_column = is_hashable(c) and c in self.data.columns
13501359

13511360
color_by_categorical = c_is_column and isinstance(
@@ -1362,7 +1371,7 @@ def _make_plot(self, fig: Figure) -> None:
13621371
else:
13631372
label = None
13641373

1365-
# if a list of non color strings is passed in as c, color points
1374+
# if a list of non-color strings is passed in as c, color points
13661375
# by uniqueness of the strings, such same strings get same color
13671376
create_colors = not self._are_valid_colors(c_values)
13681377
if create_colors:
@@ -1378,7 +1387,7 @@ def _make_plot(self, fig: Figure) -> None:
13781387
)
13791388

13801389
scatter = ax.scatter(
1381-
data[x].values,
1390+
x_data.values,
13821391
data[y].values,
13831392
c=c_values,
13841393
label=label,
@@ -1630,15 +1639,8 @@ def _ts_plot(self, ax: Axes, x, data: Series, style=None, **kwds):
16301639
# accept x to be consistent with normal plot func,
16311640
# x is not passed to tsplot as it uses data.index as x coordinate
16321641
# column_num must be in kwds for stacking purpose
1633-
freq, data = maybe_resample(data, ax, kwds)
1642+
freq, data = prepare_ts_data(data, ax, kwds)
16341643

1635-
# Set ax with freq info
1636-
decorate_axes(ax, freq)
1637-
# digging deeper
1638-
if hasattr(ax, "left_ax"):
1639-
decorate_axes(ax.left_ax, freq)
1640-
if hasattr(ax, "right_ax"):
1641-
decorate_axes(ax.right_ax, freq)
16421644
# TODO #54485
16431645
ax._plot_data.append((data, self._kind, kwds)) # type: ignore[attr-defined]
16441646

pandas/plotting/_matplotlib/timeseries.py

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,6 @@
4848
from pandas._typing import NDFrameT
4949

5050
from pandas import (
51-
DataFrame,
5251
DatetimeIndex,
5352
Index,
5453
PeriodIndex,
@@ -231,8 +230,8 @@ def _get_freq(ax: Axes, series: Series):
231230
return freq, ax_freq
232231

233232

234-
def use_dynamic_x(ax: Axes, data: DataFrame | Series) -> bool:
235-
freq = _get_index_freq(data.index)
233+
def use_dynamic_x(ax: Axes, index: Index) -> bool:
234+
freq = _get_index_freq(index)
236235
ax_freq = _get_ax_freq(ax)
237236

238237
if freq is None: # convert irregular if axes has freq info
@@ -250,16 +249,15 @@ def use_dynamic_x(ax: Axes, data: DataFrame | Series) -> bool:
250249
return False
251250

252251
# FIXME: hack this for 0.10.1, creating more technical debt...sigh
253-
if isinstance(data.index, ABCDatetimeIndex):
252+
if isinstance(index, ABCDatetimeIndex):
254253
# error: "BaseOffset" has no attribute "_period_dtype_code"
255254
freq_str = OFFSET_TO_PERIOD_FREQSTR.get(freq_str, freq_str)
256255
base = to_offset(freq_str, is_period=True)._period_dtype_code # type: ignore[attr-defined]
257-
x = data.index
258256
if base <= FreqGroup.FR_DAY.value:
259-
return x[:1].is_normalized
260-
period = Period(x[0], freq_str)
257+
return index[:1].is_normalized
258+
period = Period(index[0], freq_str)
261259
assert isinstance(period, Period)
262-
return period.to_timestamp().tz_localize(x.tz) == x[0]
260+
return period.to_timestamp().tz_localize(index.tz) == index[0]
263261
return True
264262

265263

@@ -366,3 +364,19 @@ def format_dateaxis(
366364
raise TypeError("index type not supported")
367365

368366
plt.draw_if_interactive()
367+
368+
369+
def prepare_ts_data(
370+
series: Series, ax: Axes, kwargs: dict[str, Any]
371+
) -> tuple[BaseOffset | str, Series]:
372+
freq, data = maybe_resample(series, ax, kwargs)
373+
374+
# Set ax with freq info
375+
decorate_axes(ax, freq)
376+
# digging deeper
377+
if hasattr(ax, "left_ax"):
378+
decorate_axes(ax.left_ax, freq)
379+
if hasattr(ax, "right_ax"):
380+
decorate_axes(ax.right_ax, freq)
381+
382+
return freq, data

pandas/tests/indexing/test_iloc.py

Lines changed: 50 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -726,15 +726,16 @@ def test_iloc_setitem_with_scalar_index(self, indexer, value):
726726

727727
@pytest.mark.filterwarnings("ignore::UserWarning")
728728
def test_iloc_mask(self):
729-
# GH 3631, iloc with a mask (of a series) should raise
729+
# GH 60994, iloc with a mask (of a series) should return accordingly
730730
df = DataFrame(list(range(5)), index=list("ABCDE"), columns=["a"])
731731
mask = df.a % 2 == 0
732732
msg = "iLocation based boolean indexing cannot use an indexable as a mask"
733733
with pytest.raises(ValueError, match=msg):
734734
df.iloc[mask]
735+
735736
mask.index = range(len(mask))
736-
msg = "iLocation based boolean indexing on an integer type is not available"
737-
with pytest.raises(NotImplementedError, match=msg):
737+
msg = "Unalignable boolean Series provided as indexer"
738+
with pytest.raises(IndexingError, match=msg):
738739
df.iloc[mask]
739740

740741
# ndarray ok
@@ -753,18 +754,13 @@ def test_iloc_mask(self):
753754
(None, ".iloc"): "0b1100",
754755
("index", ""): "0b11",
755756
("index", ".loc"): "0b11",
756-
("index", ".iloc"): (
757-
"iLocation based boolean indexing cannot use an indexable as a mask"
758-
),
759-
("locs", ""): "Unalignable boolean Series provided as indexer "
760-
"(index of the boolean Series and of the indexed "
761-
"object do not match).",
762-
("locs", ".loc"): "Unalignable boolean Series provided as indexer "
763-
"(index of the boolean Series and of the "
764-
"indexed object do not match).",
765-
("locs", ".iloc"): (
766-
"iLocation based boolean indexing on an integer type is not available"
767-
),
757+
(
758+
"index",
759+
".iloc",
760+
): "iLocation based boolean indexing cannot use an indexable as a mask",
761+
("locs", ""): "Unalignable boolean Series provided as indexer",
762+
("locs", ".loc"): "Unalignable boolean Series provided as indexer",
763+
("locs", ".iloc"): "Unalignable boolean Series provided as indexer",
768764
}
769765

770766
# UserWarnings from reindex of a boolean mask
@@ -780,18 +776,52 @@ def test_iloc_mask(self):
780776
else:
781777
accessor = df
782778
answer = str(bin(accessor[mask]["nums"].sum()))
783-
except (ValueError, IndexingError, NotImplementedError) as err:
779+
except (ValueError, IndexingError) as err:
784780
answer = str(err)
785781

786782
key = (
787783
idx,
788784
method,
789785
)
790-
r = expected.get(key)
791-
if r != answer:
792-
raise AssertionError(
793-
f"[{key}] does not match [{answer}], received [{r}]"
786+
expected_result = expected.get(key)
787+
788+
# Fix the assertion to check for substring match
789+
if (
790+
idx is None or (idx == "index" and method != ".iloc")
791+
) and "0b" in expected_result:
792+
# For successful numeric results, exact match is needed
793+
assert expected_result == answer, (
794+
f"[{key}] does not match [{answer}]"
794795
)
796+
else:
797+
# For error messages, substring match is sufficient
798+
assert expected_result in answer, f"[{key}] not found in [{answer}]"
799+
800+
def test_iloc_with_numpy_bool_array(self):
801+
df = DataFrame(list(range(5)), index=list("ABCDE"), columns=["a"])
802+
result = df.iloc[np.array([True, False, True, False, True], dtype=bool)]
803+
expected = DataFrame({"a": [0, 2, 4]}, index=["A", "C", "E"])
804+
tm.assert_frame_equal(result, expected)
805+
806+
def test_iloc_series_mask_with_index_mismatch_raises(self):
807+
df = DataFrame(list(range(5)), index=list("ABCDE"), columns=["a"])
808+
mask = df.a % 2 == 0
809+
msg = "Unalignable boolean Series provided as indexer"
810+
with pytest.raises(IndexingError, match=msg):
811+
df.iloc[Series([True] * len(mask), dtype=bool)]
812+
813+
def test_iloc_series_mask_all_true(self):
814+
df = DataFrame(list(range(5)), columns=["a"])
815+
mask = Series([True] * len(df), dtype=bool)
816+
result = df.iloc[mask]
817+
tm.assert_frame_equal(result, df)
818+
819+
def test_iloc_series_mask_alternate_true(self):
820+
df = DataFrame(list(range(5)), columns=["a"])
821+
mask = Series([True, False, True, False, True], dtype=bool)
822+
result = df.iloc[mask]
823+
expected = DataFrame({"a": [0, 2, 4]}, index=[0, 2, 4])
824+
tm.assert_frame_equal(result, expected)
795825

796826
def test_iloc_non_unique_indexing(self):
797827
# GH 4017, non-unique indexing (on the axis)

pandas/tests/plotting/frame/test_frame.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -840,14 +840,26 @@ def test_plot_scatter_shape(self):
840840
axes = df.plot(x="x", y="y", kind="scatter", subplots=True)
841841
_check_axes_shape(axes, axes_num=1, layout=(1, 1))
842842

843-
def test_raise_error_on_datetime_time_data(self):
844-
# GH 8113, datetime.time type is not supported by matplotlib in scatter
843+
def test_scatter_on_datetime_time_data(self):
844+
# datetime.time type is now supported in scatter, since a converter
845+
# is implemented in ScatterPlot
845846
df = DataFrame(np.random.default_rng(2).standard_normal(10), columns=["a"])
846847
df["dtime"] = date_range(start="2014-01-01", freq="h", periods=10).time
847-
msg = "must be a string or a (real )?number, not 'datetime.time'"
848+
df.plot(kind="scatter", x="dtime", y="a")
848849

849-
with pytest.raises(TypeError, match=msg):
850-
df.plot(kind="scatter", x="dtime", y="a")
850+
def test_scatter_line_xticks(self):
851+
# GH#61005
852+
df = DataFrame(
853+
[(datetime(year=2025, month=1, day=1, hour=n), n) for n in range(3)],
854+
columns=["datetime", "y"],
855+
)
856+
fig, ax = plt.subplots(2, sharex=True)
857+
df.plot.scatter(x="datetime", y="y", ax=ax[0])
858+
scatter_xticks = ax[0].get_xticks()
859+
df.plot(x="datetime", y="y", ax=ax[1])
860+
line_xticks = ax[1].get_xticks()
861+
assert scatter_xticks[0] == line_xticks[0]
862+
assert scatter_xticks[-1] == line_xticks[-1]
851863

852864
@pytest.mark.parametrize("x, y", [("dates", "vals"), (0, 1)])
853865
def test_scatterplot_datetime_data(self, x, y):

web/pandas/community/ecosystem.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -469,7 +469,7 @@ read_record.data
469469
df.dtypes
470470
```
471471

472-
ArcticDB also supports appending, updating, and querying data from storage to a pandas DataFrame. Please find more information [here](https://docs.arcticdb.io/latest/api/query_builder/).
472+
ArcticDB also supports appending, updating, and querying data from storage to a pandas DataFrame. Please find more information [here](https://docs.arcticdb.io/latest/api/processing/#arcticdb.QueryBuilder).
473473

474474
### [Hugging Face](https://huggingface.co/datasets)
475475

0 commit comments

Comments
 (0)