Skip to content

Sync Fork from Upstream Repo #245

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jul 29, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions asv_bench/benchmarks/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,18 @@ def time_category_size(self):
self.draws.groupby(self.cats).size()


class Shift:
def setup(self):
N = 18
self.df = DataFrame({"g": ["a", "b"] * 9, "v": list(range(N))})

def time_defaults(self):
self.df.groupby("g").shift()

def time_fill_value(self):
self.df.groupby("g").shift(fill_value=99)


class FillNA:
def setup(self):
N = 100
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ Fixed regressions
- Regression in updating values of :class:`pandas.Series` using boolean index, created by using :meth:`pandas.DataFrame.pop` (:issue:`42530`)
- Regression in :meth:`DataFrame.from_records` with empty records (:issue:`42456`)
- Fixed regression in :meth:`DataFrame.shift` where TypeError occurred when shifting DataFrame created by concatenation of slices and fills with values (:issue:`42719`)
- Regression in :meth:`DataFrame.agg` when the ``func`` argument returned lists and ``axis=1`` (:issue:`42727`)
-

.. ---------------------------------------------------------------------------
Expand Down
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ Other enhancements
- Additional options added to :meth:`.Styler.bar` to control alignment and display, with keyword only arguments (:issue:`26070`, :issue:`36419`)
- :meth:`Styler.bar` now validates the input argument ``width`` and ``height`` (:issue:`42511`)
- :meth:`Series.ewm`, :meth:`DataFrame.ewm`, now support a ``method`` argument with a ``'table'`` option that performs the windowing operation over an entire :class:`DataFrame`. See :ref:`Window Overview <window.overview>` for performance and functional benefits (:issue:`42273`)
- Added ``sparse_index`` and ``sparse_columns`` keyword arguments to :meth:`.Styler.to_html` (:issue:`41946`)
- Added keyword argument ``environment`` to :meth:`.Styler.to_latex` also allowing a specific "longtable" entry with a separate jinja2 template (:issue:`41866`)
-

Expand Down Expand Up @@ -168,6 +169,7 @@ Performance improvements
- Performance improvement in :meth:`.GroupBy.sample`, especially when ``weights`` argument provided (:issue:`34483`)
- Performance improvement in :meth:`.GroupBy.transform` for user-defined functions (:issue:`41598`)
- Performance improvement in constructing :class:`DataFrame` objects (:issue:`42631`)
- Performance improvement in :meth:`GroupBy.shift` when ``fill_value`` argument is provided (:issue:`26615`)

.. ---------------------------------------------------------------------------

Expand Down Expand Up @@ -262,6 +264,7 @@ Groupby/resample/rolling
- Fixed bug in :meth:`SeriesGroupBy.apply` where passing an unrecognized string argument failed to raise ``TypeError`` when the underlying ``Series`` is empty (:issue:`42021`)
- Bug in :meth:`Series.rolling.apply`, :meth:`DataFrame.rolling.apply`, :meth:`Series.expanding.apply` and :meth:`DataFrame.expanding.apply` with ``engine="numba"`` where ``*args`` were being cached with the user passed function (:issue:`42287`)
- Bug in :meth:`DataFrame.groupby.rolling.var` would calculate the rolling variance only on the first group (:issue:`42442`)
- Bug in :meth:`GroupBy.shift` that would return the grouping columns if ``fill_value`` was not None (:issue:`41556`)

Reshaping
^^^^^^^^^
Expand Down
29 changes: 18 additions & 11 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -690,21 +690,28 @@ def agg(self):
obj = self.obj
axis = self.axis

# TODO: Avoid having to change state
self.obj = self.obj if self.axis == 0 else self.obj.T
self.axis = 0

result = None
try:
result = super().agg()
except TypeError as err:
exc = TypeError(
"DataFrame constructor called with "
f"incompatible data and dtype: {err}"
)
raise exc from err
finally:
self.obj = obj
self.axis = axis

if axis == 1:
result = FrameRowApply(
obj.T,
self.orig_f,
self.raw,
self.result_type,
self.args,
self.kwargs,
).agg()
result = result.T if result is not None else result
else:
result = super().agg()

if result is None:
result = obj.apply(self.orig_f, axis, args=self.args, **self.kwargs)
result = self.obj.apply(self.orig_f, axis, args=self.args, **self.kwargs)

return result

Expand Down
4 changes: 2 additions & 2 deletions pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,9 +420,9 @@ def extract_array(
return obj._values
return obj

obj = obj.array
obj = obj._values

if extract_numpy and isinstance(obj, ABCPandasArray):
elif extract_numpy and isinstance(obj, ABCPandasArray):
obj = obj.to_numpy()

return obj
Expand Down
11 changes: 8 additions & 3 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -2822,6 +2822,7 @@ def _get_cythonized_result(
result_is_index: bool = False,
pre_processing=None,
post_processing=None,
fill_value=None,
**kwargs,
):
"""
Expand Down Expand Up @@ -2872,6 +2873,8 @@ def _get_cythonized_result(
second argument, i.e. the signature should be
(ndarray, Type). If `needs_nullable=True`, a third argument should be
`nullable`, to allow for processing specific to nullable values.
fill_value : any, default None
The scalar value to use for newly introduced missing values.
**kwargs : dict
Extra arguments to be passed back to Cython funcs

Expand All @@ -2896,7 +2899,7 @@ def _get_cythonized_result(
grouper = self.grouper

ids, _, ngroups = grouper.group_info
output: dict[base.OutputKey, np.ndarray] = {}
output: dict[base.OutputKey, ArrayLike] = {}

base_func = getattr(libgroupby, how)
base_func = partial(base_func, labels=ids)
Expand All @@ -2911,6 +2914,7 @@ def blk_func(values: ArrayLike) -> ArrayLike:
else:
result_sz = len(values)

result: ArrayLike
result = np.zeros(result_sz, dtype=cython_dtype)
if needs_2d:
result = result.reshape((-1, 1))
Expand Down Expand Up @@ -2946,7 +2950,7 @@ def blk_func(values: ArrayLike) -> ArrayLike:
result = result.reshape(-1)

if result_is_index:
result = algorithms.take_nd(values, result)
result = algorithms.take_nd(values, result, fill_value=fill_value)

if post_processing:
pp_kwargs = {}
Expand Down Expand Up @@ -3022,7 +3026,7 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None):
tshift : Shift the time index, using the index’s frequency
if available.
"""
if freq is not None or axis != 0 or not isna(fill_value):
if freq is not None or axis != 0:
return self.apply(lambda x: x.shift(periods, freq, axis, fill_value))

return self._get_cythonized_result(
Expand All @@ -3032,6 +3036,7 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None):
needs_ngroups=True,
result_is_index=True,
periods=periods,
fill_value=fill_value,
)

@final
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1920,11 +1920,11 @@ def get_block_type(values, dtype: DtypeObj | None = None):


def new_block(values, placement, *, ndim: int, klass=None) -> Block:
# caller is responsible for ensuring values is NOT a PandasArray

if not isinstance(placement, BlockPlacement):
placement = BlockPlacement(placement)

values, _ = extract_pandas_array(values, None, ndim)
check_ndim(values, placement, ndim)

if klass is None:
Expand Down
7 changes: 1 addition & 6 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -1779,11 +1779,6 @@ def create_block_manager_from_blocks(
return mgr


# We define this here so we can override it in tests.extension.test_numpy
def _extract_array(obj):
return extract_array(obj, extract_numpy=True)


def create_block_manager_from_arrays(
arrays,
names: Index,
Expand All @@ -1795,7 +1790,7 @@ def create_block_manager_from_arrays(
# assert isinstance(axes, list)
# assert all(isinstance(x, Index) for x in axes)

arrays = [_extract_array(x) for x in arrays]
arrays = [extract_array(x, extract_numpy=True) for x in arrays]

try:
blocks = _form_blocks(arrays, names, axes, consolidate)
Expand Down
6 changes: 5 additions & 1 deletion pandas/core/reshape/tile.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,11 @@ def _bins_to_cuts(
bins = unique_bins

side = "left" if right else "right"
ids = ensure_platform_int(bins.searchsorted(x, side=side))
# error: No overload variant of "searchsorted" of "ndarray" matches
# argument types "Any", "str"
ids = ensure_platform_int(
bins.searchsorted(x, side=side) # type: ignore[call-overload]
)

if include_lowest:
ids[np.asarray(x) == bins[0]] = 1
Expand Down
6 changes: 2 additions & 4 deletions pandas/core/strings/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
from pandas.core.dtypes.missing import isna

from pandas.core.base import NoNewAttributesMixin
from pandas.core.construction import extract_array

if TYPE_CHECKING:
from pandas import (
Expand Down Expand Up @@ -213,10 +214,7 @@ def _validate(data):
# see _libs/lib.pyx for list of inferred types
allowed_types = ["string", "empty", "bytes", "mixed", "mixed-integer"]

# TODO: avoid kludge for tests.extension.test_numpy
from pandas.core.internals.managers import _extract_array

data = _extract_array(data)
data = extract_array(data)

values = getattr(data, "categories", data) # categorical / normal

Expand Down
27 changes: 24 additions & 3 deletions pandas/io/formats/style.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,8 +476,8 @@ def to_latex(
Defaults to ``pandas.options.styler.sparse.index`` value.
sparse_columns : bool, optional
Whether to sparsify the display of a hierarchical index. Setting to False
will display each explicit level element in a hierarchical key for each row.
Defaults to ``pandas.options.styler.sparse.columns`` value.
will display each explicit level element in a hierarchical key for each
column. Defaults to ``pandas.options.styler.sparse.columns`` value.
multirow_align : {"c", "t", "b"}
If sparsifying hierarchical MultiIndexes whether to align text centrally,
at the top or bottom.
Expand Down Expand Up @@ -815,6 +815,8 @@ def to_html(
*,
table_uuid: str | None = None,
table_attributes: str | None = None,
sparse_index: bool | None = None,
sparse_columns: bool | None = None,
encoding: str | None = None,
doctype_html: bool = False,
exclude_styles: bool = False,
Expand All @@ -840,6 +842,18 @@ def to_html(
``<table .. <table_attributes> >``

If not given defaults to Styler's preexisting value.
sparse_index : bool, optional
Whether to sparsify the display of a hierarchical index. Setting to False
will display each explicit level element in a hierarchical key for each row.
Defaults to ``pandas.options.styler.sparse.index`` value.

.. versionadded:: 1.4.0
sparse_columns : bool, optional
Whether to sparsify the display of a hierarchical index. Setting to False
will display each explicit level element in a hierarchical key for each
column. Defaults to ``pandas.options.styler.sparse.columns`` value.

.. versionadded:: 1.4.0
encoding : str, optional
Character encoding setting for file output, and HTML meta tags,
defaults to "utf-8" if None.
Expand All @@ -866,8 +880,15 @@ def to_html(
if table_attributes:
self.set_table_attributes(table_attributes)

if sparse_index is None:
sparse_index = get_option("styler.sparse.index")
if sparse_columns is None:
sparse_columns = get_option("styler.sparse.columns")

# Build HTML string..
html = self.render(
html = self._render_html(
sparse_index=sparse_index,
sparse_columns=sparse_columns,
exclude_styles=exclude_styles,
encoding=encoding if encoding else "utf-8",
doctype_html=doctype_html,
Expand Down
5 changes: 3 additions & 2 deletions pandas/tests/apply/test_frame_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -644,13 +644,14 @@ def test_apply_dup_names_multi_agg():
tm.assert_frame_equal(result, expected)


def test_apply_nested_result_axis_1():
@pytest.mark.parametrize("op", ["apply", "agg"])
def test_apply_nested_result_axis_1(op):
# GH 13820
def apply_list(row):
return [2 * row["A"], 2 * row["C"], 2 * row["B"]]

df = DataFrame(np.zeros((4, 4)), columns=list("ABCD"))
result = df.apply(apply_list, axis=1)
result = getattr(df, op)(apply_list, axis=1)
expected = Series(
[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]
)
Expand Down
18 changes: 1 addition & 17 deletions pandas/tests/extension/test_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,32 +23,17 @@
ExtensionDtype,
PandasDtype,
)
from pandas.core.dtypes.generic import ABCPandasArray

import pandas as pd
import pandas._testing as tm
from pandas.core.arrays.numpy_ import PandasArray
from pandas.core.internals import (
blocks,
managers,
)
from pandas.core.internals import blocks
from pandas.tests.extension import base

# TODO(ArrayManager) PandasArray
pytestmark = td.skip_array_manager_not_yet_implemented


def _extract_array_patched(obj):
if isinstance(obj, (pd.Index, pd.Series)):
obj = obj._values
if isinstance(obj, ABCPandasArray):
# TODO for reasons unclear, we get here in a couple of tests
# with PandasArray._typ *not* patched
obj = obj.to_numpy()

return obj


def _can_hold_element_patched(obj, element) -> bool:
if isinstance(element, PandasArray):
element = element.to_numpy()
Expand Down Expand Up @@ -98,7 +83,6 @@ def allow_in_pandas(monkeypatch):
"""
with monkeypatch.context() as m:
m.setattr(PandasArray, "_typ", "extension")
m.setattr(managers, "_extract_array", _extract_array_patched)
m.setattr(blocks, "can_hold_element", _can_hold_element_patched)
m.setattr(tm.asserters, "assert_attr_equal", _assert_attr_equal)
yield
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/groupby/test_groupby_shift_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def test_group_shift_with_fill_value():
columns=["Z"],
index=None,
)
result = g.shift(-1, fill_value=0)[["Z"]]
result = g.shift(-1, fill_value=0)

tm.assert_frame_equal(result, expected)

Expand Down
4 changes: 3 additions & 1 deletion pandas/tests/internals/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -1376,9 +1376,11 @@ def test_make_block_no_pandas_array(block_maker):
# PandasArray, no dtype
result = block_maker(arr, slice(len(arr)), ndim=arr.ndim)
assert result.dtype.kind in ["i", "u"]
assert result.is_extension is False

if block_maker is make_block:
# new_block requires caller to unwrap PandasArray
assert result.is_extension is False

# PandasArray, PandasDtype
result = block_maker(arr, slice(len(arr)), dtype=arr.dtype, ndim=arr.ndim)
assert result.dtype.kind in ["i", "u"]
Expand Down
22 changes: 22 additions & 0 deletions pandas/tests/io/formats/style/test_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from pandas import (
DataFrame,
MultiIndex,
option_context,
)

jinja2 = pytest.importorskip("jinja2")
Expand Down Expand Up @@ -429,3 +430,24 @@ def test_sticky_levels(styler_mi, index, columns):
def test_sticky_raises(styler):
with pytest.raises(ValueError, match="`axis` must be"):
styler.set_sticky(axis="bad")


@pytest.mark.parametrize(
"sparse_index, sparse_columns",
[(True, True), (True, False), (False, True), (False, False)],
)
def test_sparse_options(sparse_index, sparse_columns):
cidx = MultiIndex.from_tuples([("Z", "a"), ("Z", "b"), ("Y", "c")])
ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")])
df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=ridx, columns=cidx)
styler = df.style

default_html = styler.to_html() # defaults under pd.options to (True , True)

with option_context(
"styler.sparse.index", sparse_index, "styler.sparse.columns", sparse_columns
):
html1 = styler.to_html()
assert (html1 == default_html) is (sparse_index and sparse_columns)
html2 = styler.to_html(sparse_index=sparse_index, sparse_columns=sparse_columns)
assert html1 == html2