Skip to content

Commit ec2cce8

Browse files
authored
Merge pull request #245 from pandas-dev/master
Sync Fork from Upstream Repo
2 parents 17ceeac + 91872e7 commit ec2cce8

File tree

16 files changed

+107
-52
lines changed

16 files changed

+107
-52
lines changed

asv_bench/benchmarks/groupby.py

+12
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,18 @@ def time_category_size(self):
369369
self.draws.groupby(self.cats).size()
370370

371371

372+
class Shift:
373+
def setup(self):
374+
N = 18
375+
self.df = DataFrame({"g": ["a", "b"] * 9, "v": list(range(N))})
376+
377+
def time_defaults(self):
378+
self.df.groupby("g").shift()
379+
380+
def time_fill_value(self):
381+
self.df.groupby("g").shift(fill_value=99)
382+
383+
372384
class FillNA:
373385
def setup(self):
374386
N = 100

doc/source/whatsnew/v1.3.2.rst

+1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ Fixed regressions
1818
- Regression in updating values of :class:`pandas.Series` using boolean index, created by using :meth:`pandas.DataFrame.pop` (:issue:`42530`)
1919
- Regression in :meth:`DataFrame.from_records` with empty records (:issue:`42456`)
2020
- Fixed regression in :meth:`DataFrame.shift` where TypeError occurred when shifting DataFrame created by concatenation of slices and fills with values (:issue:`42719`)
21+
- Regression in :meth:`DataFrame.agg` when the ``func`` argument returned lists and ``axis=1`` (:issue:`42727`)
2122
-
2223

2324
.. ---------------------------------------------------------------------------

doc/source/whatsnew/v1.4.0.rst

+3
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ Other enhancements
3535
- Additional options added to :meth:`.Styler.bar` to control alignment and display, with keyword only arguments (:issue:`26070`, :issue:`36419`)
3636
- :meth:`Styler.bar` now validates the input argument ``width`` and ``height`` (:issue:`42511`)
3737
- :meth:`Series.ewm`, :meth:`DataFrame.ewm`, now support a ``method`` argument with a ``'table'`` option that performs the windowing operation over an entire :class:`DataFrame`. See :ref:`Window Overview <window.overview>` for performance and functional benefits (:issue:`42273`)
38+
- Added ``sparse_index`` and ``sparse_columns`` keyword arguments to :meth:`.Styler.to_html` (:issue:`41946`)
3839
- Added keyword argument ``environment`` to :meth:`.Styler.to_latex` also allowing a specific "longtable" entry with a separate jinja2 template (:issue:`41866`)
3940
-
4041

@@ -168,6 +169,7 @@ Performance improvements
168169
- Performance improvement in :meth:`.GroupBy.sample`, especially when ``weights`` argument provided (:issue:`34483`)
169170
- Performance improvement in :meth:`.GroupBy.transform` for user-defined functions (:issue:`41598`)
170171
- Performance improvement in constructing :class:`DataFrame` objects (:issue:`42631`)
172+
- Performance improvement in :meth:`GroupBy.shift` when ``fill_value`` argument is provided (:issue:`26615`)
171173

172174
.. ---------------------------------------------------------------------------
173175
@@ -262,6 +264,7 @@ Groupby/resample/rolling
262264
- Fixed bug in :meth:`SeriesGroupBy.apply` where passing an unrecognized string argument failed to raise ``TypeError`` when the underlying ``Series`` is empty (:issue:`42021`)
263265
- Bug in :meth:`Series.rolling.apply`, :meth:`DataFrame.rolling.apply`, :meth:`Series.expanding.apply` and :meth:`DataFrame.expanding.apply` with ``engine="numba"`` where ``*args`` were being cached with the user passed function (:issue:`42287`)
264266
- Bug in :meth:`DataFrame.groupby.rolling.var` would calculate the rolling variance only on the first group (:issue:`42442`)
267+
- Bug in :meth:`GroupBy.shift` that would return the grouping columns if ``fill_value`` was not None (:issue:`41556`)
265268

266269
Reshaping
267270
^^^^^^^^^

pandas/core/apply.py

+18-11
Original file line numberDiff line numberDiff line change
@@ -690,21 +690,28 @@ def agg(self):
690690
obj = self.obj
691691
axis = self.axis
692692

693+
# TODO: Avoid having to change state
694+
self.obj = self.obj if self.axis == 0 else self.obj.T
695+
self.axis = 0
696+
697+
result = None
698+
try:
699+
result = super().agg()
700+
except TypeError as err:
701+
exc = TypeError(
702+
"DataFrame constructor called with "
703+
f"incompatible data and dtype: {err}"
704+
)
705+
raise exc from err
706+
finally:
707+
self.obj = obj
708+
self.axis = axis
709+
693710
if axis == 1:
694-
result = FrameRowApply(
695-
obj.T,
696-
self.orig_f,
697-
self.raw,
698-
self.result_type,
699-
self.args,
700-
self.kwargs,
701-
).agg()
702711
result = result.T if result is not None else result
703-
else:
704-
result = super().agg()
705712

706713
if result is None:
707-
result = obj.apply(self.orig_f, axis, args=self.args, **self.kwargs)
714+
result = self.obj.apply(self.orig_f, axis, args=self.args, **self.kwargs)
708715

709716
return result
710717

pandas/core/construction.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -420,9 +420,9 @@ def extract_array(
420420
return obj._values
421421
return obj
422422

423-
obj = obj.array
423+
obj = obj._values
424424

425-
if extract_numpy and isinstance(obj, ABCPandasArray):
425+
elif extract_numpy and isinstance(obj, ABCPandasArray):
426426
obj = obj.to_numpy()
427427

428428
return obj

pandas/core/groupby/groupby.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -2822,6 +2822,7 @@ def _get_cythonized_result(
28222822
result_is_index: bool = False,
28232823
pre_processing=None,
28242824
post_processing=None,
2825+
fill_value=None,
28252826
**kwargs,
28262827
):
28272828
"""
@@ -2872,6 +2873,8 @@ def _get_cythonized_result(
28722873
second argument, i.e. the signature should be
28732874
(ndarray, Type). If `needs_nullable=True`, a third argument should be
28742875
`nullable`, to allow for processing specific to nullable values.
2876+
fill_value : any, default None
2877+
The scalar value to use for newly introduced missing values.
28752878
**kwargs : dict
28762879
Extra arguments to be passed back to Cython funcs
28772880
@@ -2896,7 +2899,7 @@ def _get_cythonized_result(
28962899
grouper = self.grouper
28972900

28982901
ids, _, ngroups = grouper.group_info
2899-
output: dict[base.OutputKey, np.ndarray] = {}
2902+
output: dict[base.OutputKey, ArrayLike] = {}
29002903

29012904
base_func = getattr(libgroupby, how)
29022905
base_func = partial(base_func, labels=ids)
@@ -2911,6 +2914,7 @@ def blk_func(values: ArrayLike) -> ArrayLike:
29112914
else:
29122915
result_sz = len(values)
29132916

2917+
result: ArrayLike
29142918
result = np.zeros(result_sz, dtype=cython_dtype)
29152919
if needs_2d:
29162920
result = result.reshape((-1, 1))
@@ -2946,7 +2950,7 @@ def blk_func(values: ArrayLike) -> ArrayLike:
29462950
result = result.reshape(-1)
29472951

29482952
if result_is_index:
2949-
result = algorithms.take_nd(values, result)
2953+
result = algorithms.take_nd(values, result, fill_value=fill_value)
29502954

29512955
if post_processing:
29522956
pp_kwargs = {}
@@ -3022,7 +3026,7 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None):
30223026
tshift : Shift the time index, using the index’s frequency
30233027
if available.
30243028
"""
3025-
if freq is not None or axis != 0 or not isna(fill_value):
3029+
if freq is not None or axis != 0:
30263030
return self.apply(lambda x: x.shift(periods, freq, axis, fill_value))
30273031

30283032
return self._get_cythonized_result(
@@ -3032,6 +3036,7 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None):
30323036
needs_ngroups=True,
30333037
result_is_index=True,
30343038
periods=periods,
3039+
fill_value=fill_value,
30353040
)
30363041

30373042
@final

pandas/core/internals/blocks.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1920,11 +1920,11 @@ def get_block_type(values, dtype: DtypeObj | None = None):
19201920

19211921

19221922
def new_block(values, placement, *, ndim: int, klass=None) -> Block:
1923+
# caller is responsible for ensuring values is NOT a PandasArray
19231924

19241925
if not isinstance(placement, BlockPlacement):
19251926
placement = BlockPlacement(placement)
19261927

1927-
values, _ = extract_pandas_array(values, None, ndim)
19281928
check_ndim(values, placement, ndim)
19291929

19301930
if klass is None:

pandas/core/internals/managers.py

+1-6
Original file line numberDiff line numberDiff line change
@@ -1779,11 +1779,6 @@ def create_block_manager_from_blocks(
17791779
return mgr
17801780

17811781

1782-
# We define this here so we can override it in tests.extension.test_numpy
1783-
def _extract_array(obj):
1784-
return extract_array(obj, extract_numpy=True)
1785-
1786-
17871782
def create_block_manager_from_arrays(
17881783
arrays,
17891784
names: Index,
@@ -1795,7 +1790,7 @@ def create_block_manager_from_arrays(
17951790
# assert isinstance(axes, list)
17961791
# assert all(isinstance(x, Index) for x in axes)
17971792

1798-
arrays = [_extract_array(x) for x in arrays]
1793+
arrays = [extract_array(x, extract_numpy=True) for x in arrays]
17991794

18001795
try:
18011796
blocks = _form_blocks(arrays, names, axes, consolidate)

pandas/core/reshape/tile.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -418,7 +418,11 @@ def _bins_to_cuts(
418418
bins = unique_bins
419419

420420
side = "left" if right else "right"
421-
ids = ensure_platform_int(bins.searchsorted(x, side=side))
421+
# error: No overload variant of "searchsorted" of "ndarray" matches
422+
# argument types "Any", "str"
423+
ids = ensure_platform_int(
424+
bins.searchsorted(x, side=side) # type: ignore[call-overload]
425+
)
422426

423427
if include_lowest:
424428
ids[np.asarray(x) == bins[0]] = 1

pandas/core/strings/accessor.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
from pandas.core.dtypes.missing import isna
3535

3636
from pandas.core.base import NoNewAttributesMixin
37+
from pandas.core.construction import extract_array
3738

3839
if TYPE_CHECKING:
3940
from pandas import (
@@ -213,10 +214,7 @@ def _validate(data):
213214
# see _libs/lib.pyx for list of inferred types
214215
allowed_types = ["string", "empty", "bytes", "mixed", "mixed-integer"]
215216

216-
# TODO: avoid kludge for tests.extension.test_numpy
217-
from pandas.core.internals.managers import _extract_array
218-
219-
data = _extract_array(data)
217+
data = extract_array(data)
220218

221219
values = getattr(data, "categories", data) # categorical / normal
222220

pandas/io/formats/style.py

+24-3
Original file line numberDiff line numberDiff line change
@@ -476,8 +476,8 @@ def to_latex(
476476
Defaults to ``pandas.options.styler.sparse.index`` value.
477477
sparse_columns : bool, optional
478478
Whether to sparsify the display of a hierarchical index. Setting to False
479-
will display each explicit level element in a hierarchical key for each row.
480-
Defaults to ``pandas.options.styler.sparse.columns`` value.
479+
will display each explicit level element in a hierarchical key for each
480+
column. Defaults to ``pandas.options.styler.sparse.columns`` value.
481481
multirow_align : {"c", "t", "b"}
482482
If sparsifying hierarchical MultiIndexes whether to align text centrally,
483483
at the top or bottom.
@@ -815,6 +815,8 @@ def to_html(
815815
*,
816816
table_uuid: str | None = None,
817817
table_attributes: str | None = None,
818+
sparse_index: bool | None = None,
819+
sparse_columns: bool | None = None,
818820
encoding: str | None = None,
819821
doctype_html: bool = False,
820822
exclude_styles: bool = False,
@@ -840,6 +842,18 @@ def to_html(
840842
``<table .. <table_attributes> >``
841843
842844
If not given defaults to Styler's preexisting value.
845+
sparse_index : bool, optional
846+
Whether to sparsify the display of a hierarchical index. Setting to False
847+
will display each explicit level element in a hierarchical key for each row.
848+
Defaults to ``pandas.options.styler.sparse.index`` value.
849+
850+
.. versionadded:: 1.4.0
851+
sparse_columns : bool, optional
852+
Whether to sparsify the display of a hierarchical index. Setting to False
853+
will display each explicit level element in a hierarchical key for each
854+
column. Defaults to ``pandas.options.styler.sparse.columns`` value.
855+
856+
.. versionadded:: 1.4.0
843857
encoding : str, optional
844858
Character encoding setting for file output, and HTML meta tags,
845859
defaults to "utf-8" if None.
@@ -866,8 +880,15 @@ def to_html(
866880
if table_attributes:
867881
self.set_table_attributes(table_attributes)
868882

883+
if sparse_index is None:
884+
sparse_index = get_option("styler.sparse.index")
885+
if sparse_columns is None:
886+
sparse_columns = get_option("styler.sparse.columns")
887+
869888
# Build HTML string..
870-
html = self.render(
889+
html = self._render_html(
890+
sparse_index=sparse_index,
891+
sparse_columns=sparse_columns,
871892
exclude_styles=exclude_styles,
872893
encoding=encoding if encoding else "utf-8",
873894
doctype_html=doctype_html,

pandas/tests/apply/test_frame_apply.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -644,13 +644,14 @@ def test_apply_dup_names_multi_agg():
644644
tm.assert_frame_equal(result, expected)
645645

646646

647-
def test_apply_nested_result_axis_1():
647+
@pytest.mark.parametrize("op", ["apply", "agg"])
648+
def test_apply_nested_result_axis_1(op):
648649
# GH 13820
649650
def apply_list(row):
650651
return [2 * row["A"], 2 * row["C"], 2 * row["B"]]
651652

652653
df = DataFrame(np.zeros((4, 4)), columns=list("ABCD"))
653-
result = df.apply(apply_list, axis=1)
654+
result = getattr(df, op)(apply_list, axis=1)
654655
expected = Series(
655656
[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]
656657
)

pandas/tests/extension/test_numpy.py

+1-17
Original file line numberDiff line numberDiff line change
@@ -23,32 +23,17 @@
2323
ExtensionDtype,
2424
PandasDtype,
2525
)
26-
from pandas.core.dtypes.generic import ABCPandasArray
2726

2827
import pandas as pd
2928
import pandas._testing as tm
3029
from pandas.core.arrays.numpy_ import PandasArray
31-
from pandas.core.internals import (
32-
blocks,
33-
managers,
34-
)
30+
from pandas.core.internals import blocks
3531
from pandas.tests.extension import base
3632

3733
# TODO(ArrayManager) PandasArray
3834
pytestmark = td.skip_array_manager_not_yet_implemented
3935

4036

41-
def _extract_array_patched(obj):
42-
if isinstance(obj, (pd.Index, pd.Series)):
43-
obj = obj._values
44-
if isinstance(obj, ABCPandasArray):
45-
# TODO for reasons unclear, we get here in a couple of tests
46-
# with PandasArray._typ *not* patched
47-
obj = obj.to_numpy()
48-
49-
return obj
50-
51-
5237
def _can_hold_element_patched(obj, element) -> bool:
5338
if isinstance(element, PandasArray):
5439
element = element.to_numpy()
@@ -98,7 +83,6 @@ def allow_in_pandas(monkeypatch):
9883
"""
9984
with monkeypatch.context() as m:
10085
m.setattr(PandasArray, "_typ", "extension")
101-
m.setattr(managers, "_extract_array", _extract_array_patched)
10286
m.setattr(blocks, "can_hold_element", _can_hold_element_patched)
10387
m.setattr(tm.asserters, "assert_attr_equal", _assert_attr_equal)
10488
yield

pandas/tests/groupby/test_groupby_shift_diff.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def test_group_shift_with_fill_value():
5555
columns=["Z"],
5656
index=None,
5757
)
58-
result = g.shift(-1, fill_value=0)[["Z"]]
58+
result = g.shift(-1, fill_value=0)
5959

6060
tm.assert_frame_equal(result, expected)
6161

pandas/tests/internals/test_internals.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1376,9 +1376,11 @@ def test_make_block_no_pandas_array(block_maker):
13761376
# PandasArray, no dtype
13771377
result = block_maker(arr, slice(len(arr)), ndim=arr.ndim)
13781378
assert result.dtype.kind in ["i", "u"]
1379-
assert result.is_extension is False
13801379

13811380
if block_maker is make_block:
1381+
# new_block requires caller to unwrap PandasArray
1382+
assert result.is_extension is False
1383+
13821384
# PandasArray, PandasDtype
13831385
result = block_maker(arr, slice(len(arr)), dtype=arr.dtype, ndim=arr.ndim)
13841386
assert result.dtype.kind in ["i", "u"]

pandas/tests/io/formats/style/test_html.py

+22
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from pandas import (
77
DataFrame,
88
MultiIndex,
9+
option_context,
910
)
1011

1112
jinja2 = pytest.importorskip("jinja2")
@@ -429,3 +430,24 @@ def test_sticky_levels(styler_mi, index, columns):
429430
def test_sticky_raises(styler):
430431
with pytest.raises(ValueError, match="`axis` must be"):
431432
styler.set_sticky(axis="bad")
433+
434+
435+
@pytest.mark.parametrize(
436+
"sparse_index, sparse_columns",
437+
[(True, True), (True, False), (False, True), (False, False)],
438+
)
439+
def test_sparse_options(sparse_index, sparse_columns):
440+
cidx = MultiIndex.from_tuples([("Z", "a"), ("Z", "b"), ("Y", "c")])
441+
ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")])
442+
df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=ridx, columns=cidx)
443+
styler = df.style
444+
445+
default_html = styler.to_html() # defaults under pd.options to (True , True)
446+
447+
with option_context(
448+
"styler.sparse.index", sparse_index, "styler.sparse.columns", sparse_columns
449+
):
450+
html1 = styler.to_html()
451+
assert (html1 == default_html) is (sparse_index and sparse_columns)
452+
html2 = styler.to_html(sparse_index=sparse_index, sparse_columns=sparse_columns)
453+
assert html1 == html2

0 commit comments

Comments
 (0)