From 1e88d66ef1edfecd3896e9ba0e82479d4b412287 Mon Sep 17 00:00:00 2001 From: Krishna Chivukula <63070026+KrishnaSai2020@users.noreply.github.com> Date: Fri, 30 Jul 2021 16:51:30 +0100 Subject: [PATCH 01/10] TST: fixed eng_formatter doctest for #42671 (#42705) --- ci/code_checks.sh | 1 + pandas/io/formats/format.py | 14 ++++++-------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 9f59958b4e827..f481ecf7a97ed 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -121,6 +121,7 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then pandas/io/parsers/ \ pandas/io/sas/ \ pandas/io/sql.py \ + pandas/io/formats/format.py \ pandas/tseries/ RET=$(($RET + $?)) ; echo $MSG "DONE" diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 83e0086958b9a..3fd3d84f90161 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1956,16 +1956,14 @@ def __call__(self, num: int | float) -> str: """ Formats a number in engineering notation, appending a letter representing the power of 1000 of the original number. Some examples: - - >>> format_eng(0) # for self.accuracy = 0 + >>> format_eng = EngFormatter(accuracy=0, use_eng_prefix=True) + >>> format_eng(0) ' 0' - - >>> format_eng(1000000) # for self.accuracy = 1, - # self.use_eng_prefix = True + >>> format_eng = EngFormatter(accuracy=1, use_eng_prefix=True) + >>> format_eng(1_000_000) ' 1.0M' - - >>> format_eng("-1e-6") # for self.accuracy = 2 - # self.use_eng_prefix = False + >>> format_eng = EngFormatter(accuracy=2, use_eng_prefix=False) + >>> format_eng("-1e-6") '-1.00E-06' @param num: the value to represent From 2131433d7cc21832b6db7673a09af9fcf377032d Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 30 Jul 2021 11:56:56 -0700 Subject: [PATCH 02/10] CI: catch warning produced in resample (#42820) --- pandas/tests/resample/test_resample_api.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index 76ac86d798086..addd6c17809a2 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -349,7 +349,12 @@ def test_agg(): expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1) expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]]) for t in cases: - result = t.aggregate([np.mean, np.std]) + warn = FutureWarning if t in cases[1:3] else None + with tm.assert_produces_warning( + warn, match="Dropping invalid columns", check_stacklevel=False + ): + # .var on dt64 column raises and is dropped + result = t.aggregate([np.mean, np.std]) tm.assert_frame_equal(result, expected) expected = pd.concat([a_mean, b_std], axis=1) From 722e789e1f7e64d8b244584a986ec19d26c695cc Mon Sep 17 00:00:00 2001 From: Rahul Gaikwad Date: Sat, 31 Jul 2021 03:45:03 +0530 Subject: [PATCH 03/10] Fixed bug: Dataframe.sort_values not raising ValueError for ascending-incompatible value and Series.sort_values raising ValueError for int value (#42684) --- doc/source/whatsnew/v1.4.0.rst | 2 ++ pandas/core/frame.py | 3 ++- pandas/core/series.py | 5 ++-- .../tests/frame/methods/test_sort_values.py | 20 +++++++++++++++ .../tests/series/methods/test_sort_values.py | 25 +++++++++++++++++-- 5 files changed, 49 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index e42360558d284..ce0158b05c2ab 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -229,6 +229,8 @@ Indexing - Bug in :meth:`Series.loc` when with a :class:`MultiIndex` whose first level contains only ``np.nan`` values (:issue:`42055`) - Bug in indexing on a :class:`Series` or :class:`DataFrame` with a :class:`DatetimeIndex` when passing a string, the return type depended on whether the index was monotonic (:issue:`24892`) - Bug in indexing on a :class:`MultiIndex` failing to drop scalar levels when the indexer is a tuple containing a datetime-like string (:issue:`42476`) +- Bug in :meth:`DataFrame.sort_values` and :meth:`Series.sort_values` when passing an ascending value, failed to raise or incorrectly raising ``ValueError`` (:issue:`41634`) +- Bug in updating values of :class:`pandas.Series` using boolean index, created by using :meth:`pandas.DataFrame.pop` (:issue:`42530`) Missing ^^^^^^^ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 27aa2ed939c1a..48b18a33f9c9f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -85,6 +85,7 @@ rewrite_axis_style_signature, ) from pandas.util._validators import ( + validate_ascending, validate_axis_style_args, validate_bool_kwarg, validate_percentile, @@ -6202,7 +6203,7 @@ def sort_values( # type: ignore[override] ): inplace = validate_bool_kwarg(inplace, "inplace") axis = self._get_axis_number(axis) - + ascending = validate_ascending(ascending) if not isinstance(by, list): by = [by] if is_sequence(ascending) and len(by) != len(ascending): diff --git a/pandas/core/series.py b/pandas/core/series.py index 32b56462788e5..ca71eb1f9043c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -57,6 +57,7 @@ doc, ) from pandas.util._validators import ( + validate_ascending, validate_bool_kwarg, validate_percentile, ) @@ -69,7 +70,6 @@ ) from pandas.core.dtypes.common import ( ensure_platform_int, - is_bool, is_dict_like, is_integer, is_iterator, @@ -3438,8 +3438,7 @@ def sort_values( ) ascending = ascending[0] - if not is_bool(ascending): - raise ValueError("ascending must be boolean") + ascending = validate_ascending(ascending) if na_position not in ["first", "last"]: raise ValueError(f"invalid na_position: {na_position}") diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py index d46796bcd978b..e104617552efc 100644 --- a/pandas/tests/frame/methods/test_sort_values.py +++ b/pandas/tests/frame/methods/test_sort_values.py @@ -868,3 +868,23 @@ def test_sort_values_pos_args_deprecation(self): result = df.sort_values("a", 0) expected = DataFrame({"a": [1, 2, 3]}) tm.assert_frame_equal(result, expected) + + def test_sort_values_validate_ascending_for_value_error(self): + # GH41634 + df = DataFrame({"D": [23, 7, 21]}) + + msg = 'For argument "ascending" expected type bool, received type str.' + with pytest.raises(ValueError, match=msg): + df.sort_values(by="D", ascending="False") + + @pytest.mark.parametrize("ascending", [False, 0, 1, True]) + def test_sort_values_validate_ascending_functional(self, ascending): + df = DataFrame({"D": [23, 7, 21]}) + indexer = df["D"].argsort().values + + if not ascending: + indexer = indexer[::-1] + + expected = df.loc[df.index[indexer]] + result = df.sort_values(by="D", ascending=ascending) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/methods/test_sort_values.py b/pandas/tests/series/methods/test_sort_values.py index 67f986c0949ca..adc578d948163 100644 --- a/pandas/tests/series/methods/test_sort_values.py +++ b/pandas/tests/series/methods/test_sort_values.py @@ -51,7 +51,7 @@ def test_sort_values(self, datetime_series): expected = ts.sort_values(ascending=False, na_position="first") tm.assert_series_equal(expected, ordered) - msg = "ascending must be boolean" + msg = 'For argument "ascending" expected type bool, received type NoneType.' with pytest.raises(ValueError, match=msg): ts.sort_values(ascending=None) msg = r"Length of ascending \(0\) must be 1 for Series" @@ -63,7 +63,7 @@ def test_sort_values(self, datetime_series): msg = r"Length of ascending \(2\) must be 1 for Series" with pytest.raises(ValueError, match=msg): ts.sort_values(ascending=[False, False]) - msg = "ascending must be boolean" + msg = 'For argument "ascending" expected type bool, received type str.' with pytest.raises(ValueError, match=msg): ts.sort_values(ascending="foobar") @@ -206,6 +206,27 @@ def test_mergesort_decending_stability(self): expected = Series([3, 2, 1, 1], ["c", "b", "first", "second"]) tm.assert_series_equal(result, expected) + def test_sort_values_validate_ascending_for_value_error(self): + # GH41634 + ser = Series([23, 7, 21]) + + msg = 'For argument "ascending" expected type bool, received type str.' + with pytest.raises(ValueError, match=msg): + ser.sort_values(ascending="False") + + @pytest.mark.parametrize("ascending", [False, 0, 1, True]) + def test_sort_values_validate_ascending_functional(self, ascending): + # GH41634 + ser = Series([23, 7, 21]) + expected = np.sort(ser.values) + + sorted_ser = ser.sort_values(ascending=ascending) + if not ascending: + expected = expected[::-1] + + result = sorted_ser.values + tm.assert_numpy_array_equal(result, expected) + class TestSeriesSortingKey: def test_sort_values_key(self): From ce830831350c77f1ae9472f2f3b022e4af0ccc8b Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 30 Jul 2021 16:08:32 -0700 Subject: [PATCH 04/10] CLN: collected cleanups (#42827) --- pandas/core/base.py | 4 ++-- pandas/core/construction.py | 2 +- pandas/core/indexes/datetimelike.py | 3 ++- pandas/core/indexing.py | 24 +++++++++++------------- pandas/core/internals/concat.py | 1 + pandas/core/series.py | 2 +- 6 files changed, 18 insertions(+), 18 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 4d380c6831071..7d51b50f783a5 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1221,7 +1221,7 @@ def factorize(self, sort: bool = False, na_sentinel: int | None = -1): """ @doc(_shared_docs["searchsorted"], klass="Index") - def searchsorted(self, value, side="left", sorter=None) -> np.ndarray: + def searchsorted(self, value, side="left", sorter=None) -> npt.NDArray[np.intp]: return algorithms.searchsorted(self._values, value, side=side, sorter=sorter) def drop_duplicates(self, keep="first"): @@ -1232,5 +1232,5 @@ def drop_duplicates(self, keep="first"): @final def _duplicated( self, keep: Literal["first", "last", False] = "first" - ) -> np.ndarray: + ) -> npt.NDArray[np.bool_]: return duplicated(self._values, keep=keep) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 89591f27e9092..f84aaa907f3fc 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -402,7 +402,7 @@ def extract_array( >>> extract_array([1, 2, 3]) [1, 2, 3] - For an ndarray-backed Series / Index a PandasArray is returned. + For an ndarray-backed Series / Index the ndarray is returned. >>> extract_array(pd.Series([1, 2, 3])) array([1, 2, 3]) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 07c6a84f75302..7dc59bdb1e840 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -238,10 +238,11 @@ def _format_attrs(self): """ attrs = super()._format_attrs() for attrib in self._attributes: + # iterating over _attributes prevents us from doing this for PeriodIndex if attrib == "freq": freq = self.freqstr if freq is not None: - freq = repr(freq) + freq = repr(freq) # e.g. D -> 'D' # Argument 1 to "append" of "list" has incompatible type # "Tuple[str, Optional[str]]"; expected "Tuple[str, Union[str, int]]" attrs.append(("freq", freq)) # type: ignore[arg-type] diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 3b42d1c4505da..60179b69f56a4 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -642,7 +642,7 @@ def _get_setitem_indexer(self, key): self._ensure_listlike_indexer(key) if self.axis is not None: - return self._convert_tuple(key, is_setter=True) + return self._convert_tuple(key) ax = self.obj._get_axis(0) @@ -653,12 +653,12 @@ def _get_setitem_indexer(self, key): if isinstance(key, tuple): with suppress(IndexingError): - return self._convert_tuple(key, is_setter=True) + return self._convert_tuple(key) if isinstance(key, range): return list(key) - return self._convert_to_indexer(key, axis=0, is_setter=True) + return self._convert_to_indexer(key, axis=0) def _ensure_listlike_indexer(self, key, axis=None, value=None): """ @@ -755,21 +755,19 @@ def _is_nested_tuple_indexer(self, tup: tuple) -> bool: return any(is_nested_tuple(tup, ax) for ax in self.obj.axes) return False - def _convert_tuple(self, key, is_setter: bool = False): + def _convert_tuple(self, key): keyidx = [] if self.axis is not None: axis = self.obj._get_axis_number(self.axis) for i in range(self.ndim): if i == axis: - keyidx.append( - self._convert_to_indexer(key, axis=axis, is_setter=is_setter) - ) + keyidx.append(self._convert_to_indexer(key, axis=axis)) else: keyidx.append(slice(None)) else: self._validate_key_length(key) for i, k in enumerate(key): - idx = self._convert_to_indexer(k, axis=i, is_setter=is_setter) + idx = self._convert_to_indexer(k, axis=i) keyidx.append(idx) return tuple(keyidx) @@ -867,8 +865,8 @@ def _getitem_nested_tuple(self, tup: tuple): # a tuple passed to a series with a multi-index if len(tup) > self.ndim: if self.name != "loc": - # This should never be reached, but lets be explicit about it - raise ValueError("Too many indices") + # This should never be reached, but let's be explicit about it + raise ValueError("Too many indices") # pragma: no cover if all(is_hashable(x) or com.is_null_slice(x) for x in tup): # GH#10521 Series should reduce MultiIndex dimensions instead of # DataFrame, IndexingError is not raised when slice(None,None,None) @@ -911,7 +909,7 @@ def _getitem_nested_tuple(self, tup: tuple): return obj - def _convert_to_indexer(self, key, axis: int, is_setter: bool = False): + def _convert_to_indexer(self, key, axis: int): raise AbstractMethodError(self) def __getitem__(self, key): @@ -1176,7 +1174,7 @@ def _get_slice_axis(self, slice_obj: slice, axis: int): # return a DatetimeIndex instead of a slice object. return self.obj.take(indexer, axis=axis) - def _convert_to_indexer(self, key, axis: int, is_setter: bool = False): + def _convert_to_indexer(self, key, axis: int): """ Convert indexing key into something we can use to do actual fancy indexing on a ndarray. @@ -1486,7 +1484,7 @@ def _get_slice_axis(self, slice_obj: slice, axis: int): labels._validate_positional_slice(slice_obj) return self.obj._slice(slice_obj, axis=axis) - def _convert_to_indexer(self, key, axis: int, is_setter: bool = False): + def _convert_to_indexer(self, key, axis: int): """ Much simpler as we only have to deal with our valid types. """ diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 34d0137c26fda..9bc2404cefcfa 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -672,6 +672,7 @@ def _combine_concat_plans(plans, concat_axis: int): offset += last_plc.as_slice.stop else: + # singleton list so we can modify it as a side-effect within _next_or_none num_ended = [0] def _next_or_none(seq): diff --git a/pandas/core/series.py b/pandas/core/series.py index ca71eb1f9043c..ce986f2dd8038 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4385,7 +4385,7 @@ def _reduce( return op(delegate, skipna=skipna, **kwds) def _reindex_indexer( - self, new_index: Index | None, indexer: np.ndarray | None, copy: bool + self, new_index: Index | None, indexer: npt.NDArray[np.intp] | None, copy: bool ) -> Series: # Note: new_index is None iff indexer is None # if not None, indexer is np.intp From a9996505859ef7c679ac6eaea584a1df71ae9f07 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Fri, 30 Jul 2021 16:09:32 -0700 Subject: [PATCH 05/10] REF: de-duplicate consolidation methods (#42822) --- pandas/core/internals/array_manager.py | 12 ------ pandas/core/internals/base.py | 16 ++++++++ pandas/core/internals/managers.py | 52 ++++++++++++-------------- 3 files changed, 39 insertions(+), 41 deletions(-) diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index bd049b80b281c..79c0aad66229c 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -167,15 +167,6 @@ def set_axis(self, axis: int, new_labels: Index) -> None: axis = self._normalize_axis(axis) self._axes[axis] = new_labels - def consolidate(self: T) -> T: - return self - - def is_consolidated(self) -> bool: - return True - - def _consolidate_inplace(self) -> None: - pass - def get_dtypes(self): return np.array([arr.dtype for arr in self.arrays], dtype="object") @@ -1262,9 +1253,6 @@ def _can_hold_na(self) -> bool: def is_single_block(self) -> bool: return True - def _consolidate_check(self): - pass - def fast_xs(self, loc: int) -> ArrayLike: raise NotImplementedError("Use series._values[loc] instead") diff --git a/pandas/core/internals/base.py b/pandas/core/internals/base.py index 0ee22200ed495..e65318dd29c52 100644 --- a/pandas/core/internals/base.py +++ b/pandas/core/internals/base.py @@ -33,6 +33,7 @@ class DataManager(PandasObject): def items(self) -> Index: raise AbstractMethodError(self) + @final def __len__(self) -> int: return len(self.items) @@ -105,6 +106,7 @@ def _equal_values(self: T, other: T) -> bool: """ raise AbstractMethodError(self) + @final def equals(self, other: object) -> bool: """ Implementation for DataFrame.equals @@ -129,13 +131,27 @@ def apply( ) -> T: raise AbstractMethodError(self) + @final def isna(self: T, func) -> T: return self.apply("apply", func=func) + # -------------------------------------------------------------------- + # Consolidation: No-ops for all but BlockManager + + def is_consolidated(self) -> bool: + return True + + def consolidate(self: T) -> T: + return self + + def _consolidate_inplace(self) -> None: + return + class SingleDataManager(DataManager): ndim = 1 + @final @property def array(self): """ diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 8937c2c107c62..9d35e1e8d4929 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -465,19 +465,6 @@ def to_native_types(self: T, **kwargs) -> T: """ return self.apply("to_native_types", **kwargs) - def is_consolidated(self) -> bool: - """ - Return True if more than one block with the same dtype - """ - if not self._known_consolidated: - self._consolidate_check() - return self._is_consolidated - - def _consolidate_check(self) -> None: - dtypes = [blk.dtype for blk in self.blocks if blk._can_consolidate] - self._is_consolidated = len(dtypes) == len(set(dtypes)) - self._known_consolidated = True - @property def is_numeric_mixed_type(self) -> bool: return all(block.is_numeric for block in self.blocks) @@ -623,13 +610,6 @@ def consolidate(self: T) -> T: bm._consolidate_inplace() return bm - def _consolidate_inplace(self) -> None: - if not self.is_consolidated(): - self.blocks = tuple(_consolidate(self.blocks)) - self._is_consolidated = True - self._known_consolidated = True - self._rebuild_blknos_and_blklocs() - def reindex_indexer( self: T, new_axis: Index, @@ -1551,6 +1531,29 @@ def _interleave( return result + # ---------------------------------------------------------------- + # Consolidation + + def is_consolidated(self) -> bool: + """ + Return True if more than one block with the same dtype + """ + if not self._known_consolidated: + self._consolidate_check() + return self._is_consolidated + + def _consolidate_check(self) -> None: + dtypes = [blk.dtype for blk in self.blocks if blk._can_consolidate] + self._is_consolidated = len(dtypes) == len(set(dtypes)) + self._known_consolidated = True + + def _consolidate_inplace(self) -> None: + if not self.is_consolidated(): + self.blocks = tuple(_consolidate(self.blocks)) + self._is_consolidated = True + self._known_consolidated = True + self._rebuild_blknos_and_blklocs() + class SingleBlockManager(BaseBlockManager, SingleDataManager): """manage a single block with""" @@ -1710,15 +1713,6 @@ def array_values(self): def _can_hold_na(self) -> bool: return self._block._can_hold_na - def is_consolidated(self) -> bool: - return True - - def _consolidate_check(self): - pass - - def _consolidate_inplace(self): - pass - def idelete(self, indexer) -> SingleBlockManager: """ Delete single location from SingleBlockManager. From d4eb66797ac0434dbcb04732d5db7823c2a3b4b7 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Sat, 31 Jul 2021 02:52:08 +0200 Subject: [PATCH 06/10] DataFrame.drop silently does nothing if MultiIndex has duplicates (#42801) --- doc/source/whatsnew/v1.3.2.rst | 1 + pandas/core/generic.py | 13 +++++++++++-- pandas/tests/frame/methods/test_drop.py | 11 +++++++++++ 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.3.2.rst b/doc/source/whatsnew/v1.3.2.rst index 116bdd6e1d98f..df35a2c08a25e 100644 --- a/doc/source/whatsnew/v1.3.2.rst +++ b/doc/source/whatsnew/v1.3.2.rst @@ -19,6 +19,7 @@ Fixed regressions - Regression in :meth:`DataFrame.from_records` with empty records (:issue:`42456`) - Fixed regression in :meth:`DataFrame.shift` where TypeError occurred when shifting DataFrame created by concatenation of slices and fills with values (:issue:`42719`) - Regression in :meth:`DataFrame.agg` when the ``func`` argument returned lists and ``axis=1`` (:issue:`42727`) +- Regression in :meth:`DataFrame.drop` does nothing if :class:`MultiIndex` has duplicates and indexer is a tuple or list of tuples (:issue:`42771`) - Fixed regression where :meth:`pandas.read_csv` raised a ``ValueError`` when parameters ``names`` and ``prefix`` were both set to None (:issue:`42387`) - Fixed regression in comparisons between :class:`Timestamp` object and ``datetime64`` objects outside the implementation bounds for nanosecond ``datetime64`` (:issue:`42794`) - diff --git a/pandas/core/generic.py b/pandas/core/generic.py index bf79e58077179..19dd06074bf78 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -99,7 +99,10 @@ ABCDataFrame, ABCSeries, ) -from pandas.core.dtypes.inference import is_hashable +from pandas.core.dtypes.inference import ( + is_hashable, + is_nested_list_like, +) from pandas.core.dtypes.missing import ( isna, notna, @@ -4182,6 +4185,7 @@ def _drop_axis( # Case for non-unique axis else: + is_tuple_labels = is_nested_list_like(labels) or isinstance(labels, tuple) labels = ensure_object(com.index_labels_to_array(labels)) if level is not None: if not isinstance(axis, MultiIndex): @@ -4191,9 +4195,14 @@ def _drop_axis( # GH 18561 MultiIndex.drop should raise if label is absent if errors == "raise" and indexer.all(): raise KeyError(f"{labels} not found in axis") - elif isinstance(axis, MultiIndex) and labels.dtype == "object": + elif ( + isinstance(axis, MultiIndex) + and labels.dtype == "object" + and not is_tuple_labels + ): # Set level to zero in case of MultiIndex and label is string, # because isin can't handle strings for MultiIndexes GH#36293 + # In case of tuples we get dtype object but have to use isin GH#42771 indexer = ~axis.get_level_values(0).isin(labels) else: indexer = ~axis.isin(labels) diff --git a/pandas/tests/frame/methods/test_drop.py b/pandas/tests/frame/methods/test_drop.py index be29a3e50b9fa..fa658d87c3ca0 100644 --- a/pandas/tests/frame/methods/test_drop.py +++ b/pandas/tests/frame/methods/test_drop.py @@ -479,6 +479,17 @@ def test_drop_with_non_unique_multiindex(self): expected = DataFrame([2], index=MultiIndex.from_arrays([["y"], ["j"]])) tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("indexer", [("a", "a"), [("a", "a")]]) + def test_drop_tuple_with_non_unique_multiindex(self, indexer): + # GH#42771 + idx = MultiIndex.from_product([["a", "b"], ["a", "a"]]) + df = DataFrame({"x": range(len(idx))}, index=idx) + result = df.drop(index=[("a", "a")]) + expected = DataFrame( + {"x": [2, 3]}, index=MultiIndex.from_tuples([("b", "a"), ("b", "a")]) + ) + tm.assert_frame_equal(result, expected) + def test_drop_with_duplicate_columns(self): df = DataFrame( [[1, 5, 7.0], [1, 5, 7.0], [1, 5, 7.0]], columns=["bar", "a", "a"] From d0eba0a0f2466187e8a5e802588b7680be25ec97 Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin <37011898+mzeitlin11@users.noreply.github.com> Date: Fri, 30 Jul 2021 20:58:16 -0400 Subject: [PATCH 07/10] BUG/ENH: group cummin/max handle skipna (#41854) --- doc/source/whatsnew/v1.4.0.rst | 1 + pandas/_libs/groupby.pyx | 86 +++++++++++++++++++-------- pandas/core/groupby/groupby.py | 6 +- pandas/tests/groupby/test_function.py | 33 ++++++++++ 4 files changed, 99 insertions(+), 27 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index ce0158b05c2ab..e74fb5602fc90 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -37,6 +37,7 @@ Other enhancements - :meth:`Series.ewm`, :meth:`DataFrame.ewm`, now support a ``method`` argument with a ``'table'`` option that performs the windowing operation over an entire :class:`DataFrame`. See :ref:`Window Overview ` for performance and functional benefits (:issue:`42273`) - Added ``sparse_index`` and ``sparse_columns`` keyword arguments to :meth:`.Styler.to_html` (:issue:`41946`) - Added keyword argument ``environment`` to :meth:`.Styler.to_latex` also allowing a specific "longtable" entry with a separate jinja2 template (:issue:`41866`) +- :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` now support the argument ``skipna`` (:issue:`34047`) - .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index 354b87e03e6c4..91921ba0e64c2 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -1317,6 +1317,7 @@ cdef group_cummin_max(groupby_t[:, ::1] out, const intp_t[:] labels, int ngroups, bint is_datetimelike, + bint skipna, bint compute_max): """ Cumulative minimum/maximum of columns of `values`, in row groups `labels`. @@ -1336,6 +1337,8 @@ cdef group_cummin_max(groupby_t[:, ::1] out, Number of groups, larger than all entries of `labels`. is_datetimelike : bool True if `values` contains datetime-like entries. + skipna : bool + If True, ignore nans in `values`. compute_max : bool True if cumulative maximum should be computed, False if cumulative minimum should be computed @@ -1356,9 +1359,9 @@ cdef group_cummin_max(groupby_t[:, ::1] out, accum[:] = -np.inf if compute_max else np.inf if mask is not None: - masked_cummin_max(out, values, mask, labels, accum, compute_max) + masked_cummin_max(out, values, mask, labels, accum, skipna, compute_max) else: - cummin_max(out, values, labels, accum, is_datetimelike, compute_max) + cummin_max(out, values, labels, accum, skipna, is_datetimelike, compute_max) @cython.boundscheck(False) @@ -1367,6 +1370,7 @@ cdef cummin_max(groupby_t[:, ::1] out, ndarray[groupby_t, ndim=2] values, const intp_t[:] labels, groupby_t[:, ::1] accum, + bint skipna, bint is_datetimelike, bint compute_max): """ @@ -1375,8 +1379,24 @@ cdef cummin_max(groupby_t[:, ::1] out, """ cdef: Py_ssize_t i, j, N, K - groupby_t val, mval + groupby_t val, mval, na_val + uint8_t[:, ::1] seen_na intp_t lab + bint na_possible + + if groupby_t is float64_t or groupby_t is float32_t: + na_val = NaN + na_possible = True + elif is_datetimelike: + na_val = NPY_NAT + na_possible = True + # Will never be used, just to avoid uninitialized warning + else: + na_val = 0 + na_possible = False + + if na_possible: + seen_na = np.zeros((accum).shape, dtype=np.uint8) N, K = (values).shape with nogil: @@ -1385,18 +1405,22 @@ cdef cummin_max(groupby_t[:, ::1] out, if lab < 0: continue for j in range(K): - val = values[i, j] - if not _treat_as_na(val, is_datetimelike): - mval = accum[lab, j] - if compute_max: - if val > mval: - accum[lab, j] = mval = val - else: - if val < mval: - accum[lab, j] = mval = val - out[i, j] = mval + if not skipna and na_possible and seen_na[lab, j]: + out[i, j] = na_val else: - out[i, j] = val + val = values[i, j] + if not _treat_as_na(val, is_datetimelike): + mval = accum[lab, j] + if compute_max: + if val > mval: + accum[lab, j] = mval = val + else: + if val < mval: + accum[lab, j] = mval = val + out[i, j] = mval + else: + seen_na[lab, j] = 1 + out[i, j] = val @cython.boundscheck(False) @@ -1406,6 +1430,7 @@ cdef masked_cummin_max(groupby_t[:, ::1] out, uint8_t[:, ::1] mask, const intp_t[:] labels, groupby_t[:, ::1] accum, + bint skipna, bint compute_max): """ Compute the cumulative minimum/maximum of columns of `values`, in row groups @@ -1414,25 +1439,32 @@ cdef masked_cummin_max(groupby_t[:, ::1] out, cdef: Py_ssize_t i, j, N, K groupby_t val, mval + uint8_t[:, ::1] seen_na intp_t lab N, K = (values).shape + seen_na = np.zeros((accum).shape, dtype=np.uint8) with nogil: for i in range(N): lab = labels[i] if lab < 0: continue for j in range(K): - if not mask[i, j]: - val = values[i, j] - mval = accum[lab, j] - if compute_max: - if val > mval: - accum[lab, j] = mval = val + if not skipna and seen_na[lab, j]: + mask[i, j] = 1 + else: + if not mask[i, j]: + val = values[i, j] + mval = accum[lab, j] + if compute_max: + if val > mval: + accum[lab, j] = mval = val + else: + if val < mval: + accum[lab, j] = mval = val + out[i, j] = mval else: - if val < mval: - accum[lab, j] = mval = val - out[i, j] = mval + seen_na[lab, j] = 1 @cython.boundscheck(False) @@ -1442,7 +1474,8 @@ def group_cummin(groupby_t[:, ::1] out, const intp_t[:] labels, int ngroups, bint is_datetimelike, - uint8_t[:, ::1] mask=None) -> None: + uint8_t[:, ::1] mask=None, + bint skipna=True) -> None: """See group_cummin_max.__doc__""" group_cummin_max( out, @@ -1451,6 +1484,7 @@ def group_cummin(groupby_t[:, ::1] out, labels, ngroups, is_datetimelike, + skipna, compute_max=False ) @@ -1462,7 +1496,8 @@ def group_cummax(groupby_t[:, ::1] out, const intp_t[:] labels, int ngroups, bint is_datetimelike, - uint8_t[:, ::1] mask=None) -> None: + uint8_t[:, ::1] mask=None, + bint skipna=True) -> None: """See group_cummin_max.__doc__""" group_cummin_max( out, @@ -1471,5 +1506,6 @@ def group_cummax(groupby_t[:, ::1] out, labels, ngroups, is_datetimelike, + skipna, compute_max=True ) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 939cff16bf1ae..e57e48cb3ab11 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2784,10 +2784,11 @@ def cummin(self, axis=0, **kwargs): ------- Series or DataFrame """ + skipna = kwargs.get("skipna", True) if axis != 0: return self.apply(lambda x: np.minimum.accumulate(x, axis)) - return self._cython_transform("cummin", numeric_only=False) + return self._cython_transform("cummin", numeric_only=False, skipna=skipna) @final @Substitution(name="groupby") @@ -2800,10 +2801,11 @@ def cummax(self, axis=0, **kwargs): ------- Series or DataFrame """ + skipna = kwargs.get("skipna", True) if axis != 0: return self.apply(lambda x: np.maximum.accumulate(x, axis)) - return self._cython_transform("cummax", numeric_only=False) + return self._cython_transform("cummax", numeric_only=False, skipna=skipna) @final def _get_cythonized_result( diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 5434fc49e2174..77e5e9ba133f5 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -803,6 +803,39 @@ def test_cummax(dtypes_for_minmax): tm.assert_series_equal(result, expected) +@pytest.mark.parametrize("method", ["cummin", "cummax"]) +@pytest.mark.parametrize("dtype", ["float", "Int64", "Float64"]) +@pytest.mark.parametrize( + "groups,expected_data", + [ + ([1, 1, 1], [1, None, None]), + ([1, 2, 3], [1, None, 2]), + ([1, 3, 3], [1, None, None]), + ], +) +def test_cummin_max_skipna(method, dtype, groups, expected_data): + # GH-34047 + df = DataFrame({"a": Series([1, None, 2], dtype=dtype)}) + gb = df.groupby(groups)["a"] + + result = getattr(gb, method)(skipna=False) + expected = Series(expected_data, dtype=dtype, name="a") + + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("method", ["cummin", "cummax"]) +def test_cummin_max_skipna_multiple_cols(method): + # Ensure missing value in "a" doesn't cause "b" to be nan-filled + df = DataFrame({"a": [np.nan, 2.0, 2.0], "b": [2.0, 2.0, 2.0]}) + gb = df.groupby([1, 1, 1])[["a", "b"]] + + result = getattr(gb, method)(skipna=False) + expected = DataFrame({"a": [np.nan, np.nan, np.nan], "b": [2.0, 2.0, 2.0]}) + + tm.assert_frame_equal(result, expected) + + @td.skip_if_32bit @pytest.mark.parametrize("method", ["cummin", "cummax"]) @pytest.mark.parametrize( From 13560bbc699d795b49b41a5a42a8141c6eb0c77d Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin <37011898+mzeitlin11@users.noreply.github.com> Date: Fri, 30 Jul 2021 20:59:44 -0400 Subject: [PATCH 08/10] PERF: nancorr pearson (#42761) --- doc/source/whatsnew/v1.4.0.rst | 2 ++ pandas/_libs/algos.pyx | 62 +++++++++++++++++++++++++++------- 2 files changed, 51 insertions(+), 13 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index e74fb5602fc90..ad6a9d994bf7b 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -171,6 +171,8 @@ Performance improvements - Performance improvement in :meth:`.GroupBy.transform` for user-defined functions (:issue:`41598`) - Performance improvement in constructing :class:`DataFrame` objects (:issue:`42631`) - Performance improvement in :meth:`GroupBy.shift` when ``fill_value`` argument is provided (:issue:`26615`) +- Performance improvement in :meth:`DataFrame.corr` for ``method=pearson`` on data without missing values (:issue:`40956`) +- .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index ff46c699c71e7..6c5388a38c345 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -326,8 +326,12 @@ def nancorr(const float64_t[:, :] mat, bint cov=False, minp=None): Py_ssize_t i, j, xi, yi, N, K bint minpv float64_t[:, ::1] result + # Initialize to None since we only use in the no missing value case + float64_t[::1] means=None, ssqds=None ndarray[uint8_t, ndim=2] mask + bint no_nans int64_t nobs = 0 + float64_t mean, ssqd, val float64_t vx, vy, dx, dy, meanx, meany, divisor, ssqdmx, ssqdmy, covxy N, K = (mat).shape @@ -339,25 +343,57 @@ def nancorr(const float64_t[:, :] mat, bint cov=False, minp=None): result = np.empty((K, K), dtype=np.float64) mask = np.isfinite(mat).view(np.uint8) + no_nans = mask.all() + + # Computing the online means and variances is expensive - so if possible we can + # precompute these and avoid repeating the computations each time we handle + # an (xi, yi) pair + if no_nans: + means = np.empty(K, dtype=np.float64) + ssqds = np.empty(K, dtype=np.float64) + + with nogil: + for j in range(K): + ssqd = mean = 0 + for i in range(N): + val = mat[i, j] + dx = val - mean + mean += 1 / (i + 1) * dx + ssqd += (val - mean) * dx + + means[j] = mean + ssqds[j] = ssqd with nogil: for xi in range(K): for yi in range(xi + 1): - # Welford's method for the variance-calculation - # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance - nobs = ssqdmx = ssqdmy = covxy = meanx = meany = 0 - for i in range(N): - if mask[i, xi] and mask[i, yi]: + covxy = 0 + if no_nans: + for i in range(N): vx = mat[i, xi] vy = mat[i, yi] - nobs += 1 - dx = vx - meanx - dy = vy - meany - meanx += 1 / nobs * dx - meany += 1 / nobs * dy - ssqdmx += (vx - meanx) * dx - ssqdmy += (vy - meany) * dy - covxy += (vx - meanx) * dy + covxy += (vx - means[xi]) * (vy - means[yi]) + + ssqdmx = ssqds[xi] + ssqdmy = ssqds[yi] + nobs = N + + else: + nobs = ssqdmx = ssqdmy = covxy = meanx = meany = 0 + for i in range(N): + # Welford's method for the variance-calculation + # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance + if mask[i, xi] and mask[i, yi]: + vx = mat[i, xi] + vy = mat[i, yi] + nobs += 1 + dx = vx - meanx + dy = vy - meany + meanx += 1 / nobs * dx + meany += 1 / nobs * dy + ssqdmx += (vx - meanx) * dx + ssqdmy += (vy - meany) * dy + covxy += (vx - meanx) * dy if nobs < minpv: result[xi, yi] = result[yi, xi] = NaN From 26e7c0f840197259f3af06a397df99b9b9f638a3 Mon Sep 17 00:00:00 2001 From: Fred Reiss Date: Fri, 30 Jul 2021 18:01:47 -0700 Subject: [PATCH 09/10] BUG: 1D slices over extension types turn into N-dimensional slices over ExtensionArrays (#42787) --- doc/source/whatsnew/v1.3.2.rst | 2 +- pandas/core/internals/blocks.py | 8 +++----- pandas/tests/extension/base/getitem.py | 20 ++++++++++++++++++++ 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v1.3.2.rst b/doc/source/whatsnew/v1.3.2.rst index df35a2c08a25e..8723b1b766485 100644 --- a/doc/source/whatsnew/v1.3.2.rst +++ b/doc/source/whatsnew/v1.3.2.rst @@ -30,7 +30,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ -- +- 1D slices over extension types turn into N-dimensional slices over ExtensionArrays (:issue:`42430`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index adfecb946d822..953ccedaa5222 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1552,12 +1552,10 @@ def _slice(self, slicer): def getitem_block_index(self, slicer: slice) -> ExtensionBlock: """ Perform __getitem__-like specialized to slicing along index. - - Assumes self.ndim == 2 """ - # error: Invalid index type "Tuple[ellipsis, slice]" for - # "Union[ndarray, ExtensionArray]"; expected type "Union[int, slice, ndarray]" - new_values = self.values[..., slicer] # type: ignore[index] + # GH#42787 in principle this is equivalent to values[..., slicer], but we don't + # require subclasses of ExtensionArray to support that form (for now). + new_values = self.values[slicer] return type(self)(new_values, self._mgr_locs, ndim=self.ndim) def fillna( diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py index 96833a2e49fa1..ac181af7875b5 100644 --- a/pandas/tests/extension/base/getitem.py +++ b/pandas/tests/extension/base/getitem.py @@ -425,3 +425,23 @@ def test_item(self, data): with pytest.raises(ValueError, match=msg): s.item() + + def test_ellipsis_index(self): + # GH42430 1D slices over extension types turn into N-dimensional slices over + # ExtensionArrays + class CapturingStringArray(pd.arrays.StringArray): + """Extend StringArray to capture arguments to __getitem__""" + + def __getitem__(self, item): + self.last_item_arg = item + return super().__getitem__(item) + + df = pd.DataFrame( + {"col1": CapturingStringArray(np.array(["hello", "world"], dtype=object))} + ) + _ = df.iloc[:1] + + # String comparison because there's no native way to compare slices. + # Before the fix for GH42430, last_item_arg would get set to the 2D slice + # (Ellipsis, slice(None, 1, None)) + self.assert_equal(str(df["col1"].array.last_item_arg), "slice(None, 1, None)") From 796342238c56147f11690a8135d29edd7462eddb Mon Sep 17 00:00:00 2001 From: Leonardo Freua Date: Sat, 31 Jul 2021 03:38:46 -0300 Subject: [PATCH 10/10] TST: Fix doctests in ``style.py`` (#42783) * TST: Fix doctests for pandas.io.formats.style * Modified: pandas/io/formats/style.py * Added some expected results * Skipped some tests * TST: Add link to redirect to Table Visualization user guide * Modified style.py * Updated the doctest of the apply() * Updated the doctest of the applymap() * Updated the doctest of the set_table_styles() * Updated the doctest of the set_properties() * TST: Add image to pipe function result * Modified style.py * Updated the doctest of the pipe() * TST: Remove unnecessary outputs * Modified pandas/io/formats/style.py * Updated the doctests of the set_tooltips() * Updated the doctests of the to_latex() * Updated the doctests of the set_td_classes() * Updated the doctests of the set_table_attributes() * TST: Add the output to the Styler.format doctest in to_latex() * REG: DataFrame.agg where func returns lists and axis=1 (#42762) * Fix typing issues for CI (#42770) * BUG: groupby.shift returns different columns when fill_value is specified (#41858) * PERF: extract_array earlier in DataFrame construction (#42774) * ENH: `sparse_columns` and `sparse_index` added to `Styler.to_html` (#41946) * TYP: Fix typing for searchsorted (#42788) * DOC GH42756 Update documentation for pandas.DataFrame.drop to clarify tuples. (#42789) * CI: Fix doctests (#42790) * REGR: nanosecond timestamp comparisons to OOB datetimes (#42796) * COMPAT: MPL 3.4.0 (#42803) * Delete duplicates and unused code from reshape tests (#42802) * REGR: ValueError raised when both prefix and names are set to None (#42690) * REGR: ValueError raised when both prefix and names are set to None * Update readers.py * whitespace * Update v1.3.1.rst * Update v1.3.2.rst * Update readers.py * Update readers.py Co-authored-by: Jeff Reback * TST: Add style.py to the doctest check * TST: fixed eng_formatter doctest for #42671 (#42705) * TST: Revert x and y position in some doctests * Updated the doctest of the hide_columns() Co-authored-by: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> Co-authored-by: Irv Lustig Co-authored-by: Thomas Smith Co-authored-by: jbrockmendel Co-authored-by: attack68 <24256554+attack68@users.noreply.github.com> Co-authored-by: Mike Phung Co-authored-by: Matthew Zeitlin <37011898+mzeitlin11@users.noreply.github.com> Co-authored-by: Thomas Li <47963215+lithomas1@users.noreply.github.com> Co-authored-by: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Co-authored-by: Jeff Reback Co-authored-by: Krishna Chivukula <63070026+KrishnaSai2020@users.noreply.github.com> --- ci/code_checks.sh | 1 + doc/source/_static/style/df_pipe.png | Bin 0 -> 8673 bytes pandas/io/formats/style.py | 139 +++++++++++++++++---------- 3 files changed, 89 insertions(+), 51 deletions(-) create mode 100644 doc/source/_static/style/df_pipe.png diff --git a/ci/code_checks.sh b/ci/code_checks.sh index f481ecf7a97ed..d04d0eaee6ec4 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -122,6 +122,7 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then pandas/io/sas/ \ pandas/io/sql.py \ pandas/io/formats/format.py \ + pandas/io/formats/style.py \ pandas/tseries/ RET=$(($RET + $?)) ; echo $MSG "DONE" diff --git a/doc/source/_static/style/df_pipe.png b/doc/source/_static/style/df_pipe.png new file mode 100644 index 0000000000000000000000000000000000000000..071a481ad5acc154ffa90b340f1c24cad2bee958 GIT binary patch literal 8673 zcmZ{K1yCK$w)NnWAVGtl5L|-=J2(UjuE7ZqTte^z90=|X0fM_b1lI%$PH=bU;Bc^? z+`9F@dtZI8YI?eBrl)77cJIAPh}b%@#!qR&!+!W zVK~Tq_yPdfJ%0;A0y8!l0Du5lNilV|?}us7_iC1NSM6m|iyb~^P>3Lt9 zuvhZ#)l5V97(Bi{Ay_;6&%;eRp5t35`k$7HBsx2Z;v0*@y677S(3hZ76Kjpvb^1N^aVO{-<`g%xA-Ee zxAXM#8C2K_NqY&K84t-w>LXAp^W29p1fi0e1JI;F?Fj7wI1NO&=cirnckdigR6%1q zSb3xf=C=mfY7GzE>5skZq>4RUJgIpqpX^4kdA#?>CrJgziXc#rd_FbZzI)oKw+!c3 z`t>Vkft)9oI&j5VQ5ql@KDd=52Z(Z?w_GLObaKhw8+5X}O1mFK38B@q;%RyH#K(2s z{B)W6FpnPcFgZOFc)Dw35~JDqV#A6s`D#nt2N;`G3EO0+B>f)F;#{j?8Hk&Y= z^V-estjvx^;GDx<=J)*u{f3n@)cZR!)^I4)QZDsn*yQ-~;MCpOD~5CU;!#25>^nsuv$BtZ(A_%my55(Hmu6 zfB;l!ztfC7dM4pb2OB?(?7O%@#0EYaw`H0P;GiTYzeu5Ul|9{DTc~DtM7UNv86=F3 zIr$tU!X-_;1evVdqTZA{c|AgY>e}=PMThm*J^L%@W;#CeCOLS%W6Dk;%m0~CgCs8G zk_ojmxwD5K69MoA;eWuCq62*SxZYk_*#=<{kYb>T=8HzZ2$8IGer8?T|D~EAkOqNJ z1UF}8n+c>$bh&muS&wfINhgR-F1y|v&=@k0AOixD6BAp5#Btj_AMsMwESRh=qNqe3 z#)OFpH^|?-#8lI@M1izlBff2}>n2?rIFrXRD@i0S*K0cqc{KC??nh%7c_J+$?MPiC zxQX^-26nV*8qKVAHYYQ@V#-43$d@{NM6zHyhAOXaMKqAXfp$cOZgsC@PCAP1l0UYsl)9DL*BJi3(ph<7hI^CLIr&*H*3Y?T>&+K`}sm-$5qAcX5;qw_MQMJ2QgR4j}S!1jDk^o93vn*JX@SUf>qTC zCcnt{`W4{bbt@9(a8Jz zK#~IxDfyM(HBqta1CL?p%vf7<+l&BtH^?Oco+IODT3+#Z-v=U zb8~Jw&zu#)-SYKfBK9kOnl=6?+8mPEJ?VpNM0`fmm4-F1W6iq{=W~Cf(d9o*>(R3r zct@?0?p@>TnTW`%tLBmWJ~cZxNDQUvMaRH$DZXFYSF3a=0wyJYDb@ zf!`7GesT5o{#En&bD2tm$~~Y@3&}!OEr!g^Dl`ilw4J&bI>L} zpia!z&~@82%)-Bi3Lit>u<|(#m~!4PRnFViE2@zFL_v4tMtqDH!~|5X;zi_hUihbj zD~Gqd?F6jh_em-4C1%`+07tvXS@dqH7=N>6uEeB0*F2Y#Ve0$*WoYv4xD^Q&LSILV zxuS7l8E-lhAs7m7^PUU71E+cwb~@Fs8f~SF7nY3d4GYg1SzYT$D=0{FaxUpXlV^LU zKC9Y;t>I1TE9oiHr#;`yvEda8<_dv}54}ovB_G*-Ove8Kr#Tm{miC|5u}#@~!|_y- zSa%oQ7$Dw@`1OY&=V!G+B{IVOV;P(j{hJTq!&RnQM>Zgm#xQ2rl|DVSH0oL3@R+Gb zzK{xNFZmRmyYXwVwTGf=A>kK5(Z##X&1FskfdqHMs)wA0n&NI98ydV8d_~E#Q2~ae zL61TP&c1LWI8^hbl+Id02>_W!oqm)KLQ}KxtcwzWab$!T=v!#5cUs~?b8q|p%z*}S zMr&WzF4?mlCBx014AASAXp}xjlx=&PisbMU4}mbGR3{@lol!}%O>r0{IyomnI=xxq zj8bI7il5j=v~>V7(DL>=n?TAeIBtR?s2e4lfN-!+Cnd8HG4@hf#ep~YU%#`I{=m-^ zv~KRZl?f0i`~-X165rx)a+oq(m?G+NzDR(1$ID(~aslp3!E1_f64g5{r1SF*$OW4q>Tn89${ z1_3y@N@?=65V9bFkO>}HhzD=S4gEITwvdUsFM<@%X#qYZzxz|X-cp7lX5J}LY8Ons zXS5G?!rf-#OaJVOEb8Q9Wn~b{{D`94#^SE|n`<))actH+r-m_u9yhG=~E7n%`VNFIF{5GdmQOb{`4hn1sdlT1Y4CBh3|xt=`?46F3mKxaRAFjTdmUsAHBLSgrt zy(&J;!y-i5|q*<8Ahx2LZ`5}9vao^tt4vRNYWHNvZlsE+m9 zeiM3T4<*Ens0pcfXR5modfZeiR0`4?GR~Yo@(ywk*VRFANbWwJZH5ta^bfQ0nEJ(e z!A8%81~9x2GGn>1LZ6qLR&-{@Gg9?@cRnr};3mw{Ts`o8;XRo}PFJx^R8_W6AKHhoKjV zt;nKj#n?2#=`>*<(;P%yxqp1`zBQ+Kw|4K^eV_d@315lK&$%lXCwt*N))Yo(iA|#p z^=85XcB{o0u7CZe6qphRt3KY^A6X0*FZJu8(G5&#sF8v>8U9B*cjE=JQgK-e|BKMx z^%FOX4XZrOf!FBi>S{}zd<~P-2Ac{cRWbvw-3A8c*$dtJ^oEft#^rt{fh8N~-nI*>!4> zvv_o*=}_±&V1L3MJu%FsFj|7y6)V`g2RB)a$nXO&G>k0}K=84q%BcIuZRJ!4WS zIX$i{S*Ocj6-)*N{E%2e5(Xv$OqCGQ0n?fWnsLc6)M!%awG+4RDI~kcRLNOec0?}* zyZ!bmi0Gb2z0A|L8u6EnyMr9LV8pz6<62|!A!qYf+mq#59Jvy9)^bz&v6B+N*^*vMy(c zkD-y>D{lmhO4up`&j%*o!!ITaCs)N~wQl2h2%hc~)5`h0|5t!?V66~*xC0n^r6*xfcB>cRPb z!X?K^km$zq1rgHH8^E6)a_n>&9*Dn)hdv63q9OWjw#}l)w8=@k9SXT7awBH2BgWH1 z(i96OcE2ZozD!pyvlnTHIgOZtz`Pu8HK8KzWkp_&8mW@hcut_`nQfGoRVi_r`N>CJ z*$Nq&5UGmR>9-^^2kBXsq@sK)wG9pPUE0sz+Zni;oED#xc1=$#%vm2>78k**|0u>~ zhmum*cTRc0BNvw@CKsAJCbV{Wk7;7svT@_m%3K7pUYs(W%XL}rS3!`Lmhy{>>(-G0 zi+Nd+qT(hlQh*WvL@fGFKLrHV=j~f(nhh7y8zHdg7wr=2CioedPi!tzZ3%@Bd@K^1-5nAI?;*-#7S~B~P~-%!uYf z@7Ae~^EgF=J#3bc`VoRkxKx0W>FWU?kSd$)SN*IR5TPwVvkvI!9;3=>&Y^R{C^2Wr>fIlG%Z z9rL5AfTLTcCe#YF>97$m>8MvMbm5Sd`5@jZa1B$p4SKDyuiG`I;$?Z_Kg1j^sDn7i zxx=^1o3G$J$r;Up@bob{Gt7>b_*J{ehbsEl_I6|P(W;IGO^bYC9t?dKhKO(7QI z!FZ}zoQO=Z3#(?;TU?Gf6#4QV6L{7c5YTQgetdI2p(HYqp~K63yCB@A8WSH~p?vCP zM}^^|=x-bg9PSJAD(kU(Y3myp7Y-99$W0y!3Sz@bSazuxM)XT617Z|x;CG09@C*q=``T?H8ROkU7WbF;n zUx|yBtDHK8gA7xM*mccI?X`7U>@88U{K`3|L!Wn~+-fi-jI%-CoV?m`3CP1_1hF)q znx`HXFoyJnq?58>RPLz^^#SX6kV`MFfjM$ZLa7HhY<-T;znEd?cyTuuP#xy!m@dN zPn+fHz{tq6HWT9K;y&yNU7PtYoSLr()Q@`N>sJQg&C@u~C@8j}4$s>6_TY3vtp%mW z0^R#lgFhX;8ermD_tIG}z8-;;njFvKerh!-j@k+|YPS@?W8ede9<)9y9sKJ!Ccw4X zbkhL2Go)ut=$+w*>4Z(=^{45e?(wSXN>t2CNZ=eka61raQlz(uYW(Z*Zd+uz^JkS! zlHvWH)_!EA;j{lQ4gN!@IR4EExX}C`JwQ@17Um=U;t!D&*`&Gxx)Mq*nnl;u6PHc` zvxw)W<{DXY`LX?{=LL{fK=evITCk7yTBCJeJO#0yu2rsQJ4_u;2LT!9w#pf*5r)?p7_!E)QMq zS1*ow2LfepM7X?v z9t&7_@PGX?KWjRXk$9oLdjT)LjBw0v$8P@YhE$z0HQ^K|M}yaY-da3GM}Yx*cH!Zr zux$ZaIT#a6El$xgyFjMM*Fsr)S{(#gs-!c41xWJg4-G!Nnw_7Fn=Y>5iPAiu5pZr* zaBX3%^&_wtn(Cxqt3W=B*;YD*7V zr`UX1VcETu)ax`t_$T;=Wv$C;YKpo6J?7bU^yX#&z1j^4f@bbe@7VA1W96%sZU{c>$F`%U5UrsZrl+3T(WX}X>4i9VBP}A zY`uJHG@q@b=P7YjgFm=U3d;vndb*rvfWSiJG6?Ge*GQk0I z%jLEYY^bo1j3L$qYKA$wQx1P^GND6@c+E-jU6B-_ATDE$T`ac}>ASpXjJ`2{`+=ZY zPa#ck4}=g|4410=o#!trmNA(;xjgkEh5SK<2OFfQUVNso@GTN1%BWt$S_`*?DYxn$ zl8hTC{sX6poui?7skF3ONnDS^-%7%2rRMT7yxhnRBt%d<%4Po#2>q+clWiE88TQ^) zR#upB;{98d|8Lg=@xEOaRXqngqug-@gBx+(E%cZZF48|v@w z!7o$o**){mZivTM%^=9l7Z5A^g+f;LY2qWs(z zxRgvxu!(svqT6)0F5@U&#-zRjrmd^7*kNCB;jIf(2?#&}x$Sd~jF-2aZ43VT3g9~B zfExW{<^RMQJ8V2~VMQrwXpDy9n~%DrPmkun{$#CaQ=?fokVUVt=F+HER+NwP{eFbH zLfY9OB|i@(1nt!SU{SQqL&Wc^7;6#Q>`Zd@D;HhF<+`TJ9-~$jtT6}6?;4+H_L4HA zwl(Sg4l)ZXe}YuXrpfD3rMs=l@oCp}R6r)Ioe{F#9pa)_+*;B0$=qKYAo^;o%0L(^ z>bA2gK|BPJr?ivqbziP3XBG!k8BrIwukDBpZ!|EU2D9uueufwZg}5!9;{u}bm`d@C z%g$N?MHcpP z?K3HPI+t?J^g=e4I-#wfNJdQ;@iHGXAz0~D%AK(6(#c(xg5Ak8dSkquqHqtz#|O)U z@Y=KR+^sdCB56FXa6CsYO7m7V?~Rp{TLZtg`4byku`JQm38z){(XzvCz0YOEuhzum z`OA~Pr*LS=LOt?w(W$4C79;*8)~entPQ%g z&&rzU@`?T!S(50?b*i@A<0Vo*D8#{$a#(b0!AhBh)sK!44|V)h{*6sLe#doK!0 zzMqF*du$fj3*V#I2uET$d?LX9D&zE5C!|2`Vz$%8-ry|llm6V-@-A1VJ_?gklL)=_t0u_(*}jM!}zN%h6B!>#A>%e)66vOfoe?imnHa4c6vK`4(D4;XW zs>EK=<_GV?Pvs|#L~6j)WkTRAETYxZ$15%0Y$#?FBp1JOlF8C*gnq)e#-6#_+4RAZ z{DjXgab+(oFN&*>O+1F^*ONTwdLYprg?pJrg=GAXdD_1&vhG;6{dUtB>i?tjBNf&z2)t^Yy%%ky#@a?F{A?bHARG%^xhswd^q6 zD$K5W`BXF?#pnF6h95&S+H|+@;E2K<>|WDkD4S63t}7c$1GJfVePEmBE_0mPjK)SvH<`>(BHiPDR7oL_%9HF@@Nv{_*2$Od3FD!0-r$;kkjVhUn8%EAQ)GWj?5pE-W+u2(5 z3j5u^-5pfimVQmIY+XNcy2ARQFLqyG3lI?-j3Z?v+T)>Oem=OOqlcLCSF#AFNn1t=#x(brIIVv7808ehs8z?}_N0ke z7Y#WzT(V6A)q_n2p3gB6*n*HSF9xtkYQI}O_x^GPCk^O|L>N@Y1zk}r9UrxE5TSNS z*e)KencODQi~I{_o}s?#NRnH1oIr2V_cUV?tRi@aXZS%m`he~91qO@&WiQHhQHDy) zHJV=kgKL+>71}~K62v7<&Xev+KjV5=+im6h*ymrB?cs(lWrO>> df.style.set_tooltips(ttips, css_class='tt-add', props=[ ... ('visibility', 'hidden'), ... ('position', 'absolute'), - ... ('z-index', 1)]) + ... ('z-index', 1)]) # doctest: +SKIP >>> df.style.set_tooltips(ttips, css_class='tt-add', ... props='visibility:hidden; position:absolute; z-index:1;') + ... # doctest: +SKIP """ if not self.cell_ids: # tooltips not optimised for individual cell check. requires reasonable @@ -553,7 +554,7 @@ def to_latex( >>> s = df.style.highlight_max(axis=None, ... props='cellcolor:{red}; bfseries: ;') - >>> s.to_latex() + >>> s.to_latex() # doctest: +SKIP Internally these structured LaTeX ``(, )`` pairs are translated to the @@ -592,7 +593,7 @@ def to_latex( ... props='cellcolor:[HTML]{FFFF00}; color:{red};' ... 'textit:--rwrap; textbf:--rwrap;' ... ) - >>> s.to_latex() + >>> s.to_latex() # doctest: +SKIP .. figure:: ../../_static/style/latex_1.png @@ -653,7 +654,7 @@ def to_latex( ... column_format="rrrrr", position="h", position_float="centering", ... hrules=True, label="table:5", caption="Styled LaTeX Table", ... multirow_align="t", multicol_align="r" - ... ) + ... ) # doctest: +SKIP .. figure:: ../../_static/style/latex_2.png @@ -670,8 +671,14 @@ def to_latex( ... ("Numeric", "Integers"): '\${}', ... ("Numeric", "Floats"): '{:.3f}', ... ("Non-Numeric", "Strings"): str.upper - ... }) - >>> s.to_latex() + ... }) # doctest: +SKIP + Numeric Non-Numeric + Integers Floats Strings + L0 ix1 $1 2.200 DOGS + ix2 $3 4.400 CATS + L1 ix3 $2 6.600 COWS + + >>> s.to_latex() # doctest: +SKIP \begin{tabular}{llrrl} {} & {} & \multicolumn{2}{r}{Numeric} & {Non-Numeric} \\ {} & {} & {Integers} & {Floats} & {Strings} \\ @@ -713,7 +720,7 @@ def to_latex( >>> df = pd.DataFrame([[1]]) >>> df.style.set_properties( ... **{"font-weight": "bold /* --dwrap */", "Huge": "--latex--rwrap"} - ... ).to_latex(convert_css=True) + ... ).to_latex(convert_css=True) # doctest: +SKIP \begin{tabular}{lr} {} & {0} \\ 0 & {\bfseries}{\Huge{1}} \\ @@ -934,7 +941,7 @@ def set_td_classes(self, classes: DataFrame) -> Styler: ... ["min-val red", "", "blue"], ... ["red", None, "blue max-val"] ... ], index=df.index, columns=df.columns) - >>> df.style.set_td_classes(classes) + >>> df.style.set_td_classes(classes) # doctest: +SKIP Using `MultiIndex` columns and a `classes` `DataFrame` as a subset of the underlying, @@ -943,14 +950,14 @@ def set_td_classes(self, classes: DataFrame) -> Styler: ... columns=[["level0", "level0"], ["level1a", "level1b"]]) >>> classes = pd.DataFrame(["min-val"], index=["a"], ... columns=[["level0"],["level1a"]]) - >>> df.style.set_td_classes(classes) + >>> df.style.set_td_classes(classes) # doctest: +SKIP Form of the output with new additional css classes, >>> df = pd.DataFrame([[1]]) >>> css = pd.DataFrame([["other-class"]]) >>> s = Styler(df, uuid="_", cell_ids=False).set_td_classes(css) - >>> s.hide_index().render() + >>> s.hide_index().render() # doctest: +SKIP '' '' ' ' @@ -1178,19 +1185,26 @@ def apply( >>> def highlight_max(x, color): ... return np.where(x == np.nanmax(x.to_numpy()), f"color: {color};", None) >>> df = pd.DataFrame(np.random.randn(5, 2), columns=["A", "B"]) - >>> df.style.apply(highlight_max, color='red') - >>> df.style.apply(highlight_max, color='blue', axis=1) - >>> df.style.apply(highlight_max, color='green', axis=None) + >>> df.style.apply(highlight_max, color='red') # doctest: +SKIP + >>> df.style.apply(highlight_max, color='blue', axis=1) # doctest: +SKIP + >>> df.style.apply(highlight_max, color='green', axis=None) # doctest: +SKIP Using ``subset`` to restrict application to a single column or multiple columns >>> df.style.apply(highlight_max, color='red', subset="A") + ... # doctest: +SKIP >>> df.style.apply(highlight_max, color='red', subset=["A", "B"]) + ... # doctest: +SKIP Using a 2d input to ``subset`` to select rows in addition to columns - >>> df.style.apply(highlight_max, color='red', subset=([0,1,2], slice(None)) - >>> df.style.apply(highlight_max, color='red', subset=(slice(0,5,2), "A") + >>> df.style.apply(highlight_max, color='red', subset=([0,1,2], slice(None))) + ... # doctest: +SKIP + >>> df.style.apply(highlight_max, color='red', subset=(slice(0,5,2), "A")) + ... # doctest: +SKIP + + See `Table Visualization <../../user_guide/style.ipynb>`_ user guide for + more details. """ self._todo.append( (lambda instance: getattr(instance, "_apply"), (func, axis, subset), kwargs) @@ -1246,17 +1260,24 @@ def applymap( >>> def color_negative(v, color): ... return f"color: {color};" if v < 0 else None >>> df = pd.DataFrame(np.random.randn(5, 2), columns=["A", "B"]) - >>> df.style.applymap(color_negative, color='red') + >>> df.style.applymap(color_negative, color='red') # doctest: +SKIP Using ``subset`` to restrict application to a single column or multiple columns >>> df.style.applymap(color_negative, color='red', subset="A") + ... # doctest: +SKIP >>> df.style.applymap(color_negative, color='red', subset=["A", "B"]) + ... # doctest: +SKIP Using a 2d input to ``subset`` to select rows in addition to columns - >>> df.style.applymap(color_negative, color='red', subset=([0,1,2], slice(None)) - >>> df.style.applymap(color_negative, color='red', subset=(slice(0,5,2), "A") + >>> df.style.applymap(color_negative, color='red', + ... subset=([0,1,2], slice(None))) # doctest: +SKIP + >>> df.style.applymap(color_negative, color='red', subset=(slice(0,5,2), "A")) + ... # doctest: +SKIP + + See `Table Visualization <../../user_guide/style.ipynb>`_ user guide for + more details. """ self._todo.append( (lambda instance: getattr(instance, "_applymap"), (func, subset), kwargs) @@ -1317,6 +1338,7 @@ def where( >>> def cond(v, limit=4): ... return v > 1 and v != limit >>> df.style.where(cond, value='color:green;', other='color:red;') + ... # doctest: +SKIP should be refactored to: @@ -1324,6 +1346,7 @@ def where( ... cond = v > 1 and v != limit ... return value if cond else other >>> df.style.applymap(style_func, value='color:green;', other='color:red;') + ... # doctest: +SKIP """ warnings.warn( "this method is deprecated in favour of `Styler.applymap()`", @@ -1389,7 +1412,7 @@ def set_table_attributes(self, attributes: str) -> Styler: Examples -------- >>> df = pd.DataFrame(np.random.randn(10, 4)) - >>> df.style.set_table_attributes('class="pure-table"') + >>> df.style.set_table_attributes('class="pure-table"') # doctest: +SKIP # ...
... """ self.table_attributes = attributes @@ -1637,14 +1660,14 @@ def set_table_styles( >>> df.style.set_table_styles( ... [{'selector': 'tr:hover', ... 'props': [('background-color', 'yellow')]}] - ... ) + ... ) # doctest: +SKIP Or with CSS strings >>> df.style.set_table_styles( ... [{'selector': 'tr:hover', - ... 'props': 'background-color: yellow; font-size: 1em;']}] - ... ) + ... 'props': 'background-color: yellow; font-size: 1em;'}] + ... ) # doctest: +SKIP Adding column styling by name @@ -1652,15 +1675,18 @@ def set_table_styles( ... 'A': [{'selector': '', ... 'props': [('color', 'red')]}], ... 'B': [{'selector': 'td', - ... 'props': 'color: blue;']}] - ... }, overwrite=False) + ... 'props': 'color: blue;'}] + ... }, overwrite=False) # doctest: +SKIP Adding row styling >>> df.style.set_table_styles({ ... 0: [{'selector': 'td:hover', ... 'props': [('font-size', '25px')]}] - ... }, axis=1, overwrite=False) + ... }, axis=1, overwrite=False) # doctest: +SKIP + + See `Table Visualization <../../user_guide/style.ipynb>`_ user guide for + more details. """ if isinstance(table_styles, dict): if axis in [0, "index"]: @@ -1753,7 +1779,7 @@ def hide_index(self, subset: Subset | None = None) -> Styler: Simple application hiding specific rows: >>> df = pd.DataFrame([[1,2], [3,4], [5,6]], index=["a", "b", "c"]) - >>> df.style.hide_index(["a", "b"]) + >>> df.style.hide_index(["a", "b"]) # doctest: +SKIP 0 1 c 5 6 @@ -1761,7 +1787,7 @@ def hide_index(self, subset: Subset | None = None) -> Styler: >>> midx = pd.MultiIndex.from_product([["x", "y"], ["a", "b", "c"]]) >>> df = pd.DataFrame(np.random.randn(6,6), index=midx, columns=midx) - >>> df.style.format("{:.1f}").hide_index() + >>> df.style.format("{:.1f}").hide_index() # doctest: +SKIP x y a b c a b c 0.1 0.0 0.4 1.3 0.6 -1.4 @@ -1774,6 +1800,7 @@ def hide_index(self, subset: Subset | None = None) -> Styler: Hide specific rows but retain the index: >>> df.style.format("{:.1f}").hide_index(subset=(slice(None), ["a", "c"])) + ... # doctest: +SKIP x y a b c a b c x b 0.7 1.0 1.3 1.5 -0.0 -0.2 @@ -1781,8 +1808,8 @@ def hide_index(self, subset: Subset | None = None) -> Styler: Hide specific rows and the index: - >>> df.style.format("{:.1f}").hide_index(subset=(slice(None), ["a", "c"])) - ... .hide_index() + >>> df.style.format("{:.1f}").hide_index( + ... subset=(slice(None), ["a", "c"])).hide_index() # doctest: +SKIP x y a b c a b c 0.7 1.0 1.3 1.5 -0.0 -0.2 @@ -1833,7 +1860,7 @@ def hide_columns(self, subset: Subset | None = None) -> Styler: Simple application hiding specific columns: >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "b", "c"]) - >>> df.style.hide_columns(["a", "b"]) + >>> df.style.hide_columns(["a", "b"]) # doctest: +SKIP c 0 3 1 6 @@ -1842,17 +1869,18 @@ def hide_columns(self, subset: Subset | None = None) -> Styler: >>> midx = pd.MultiIndex.from_product([["x", "y"], ["a", "b", "c"]]) >>> df = pd.DataFrame(np.random.randn(6,6), index=midx, columns=midx) - >>> df.style.format("{:.1f}").hide_columns() - x d 0.1 0.0 0.4 1.3 0.6 -1.4 - e 0.7 1.0 1.3 1.5 -0.0 -0.2 - f 1.4 -0.8 1.6 -0.2 -0.4 -0.3 - y d 0.4 1.0 -0.2 -0.8 -1.2 1.1 - e -0.6 1.2 1.8 1.9 0.3 0.3 - f 0.8 0.5 -0.3 1.2 2.2 -0.8 + >>> df.style.format("{:.1f}").hide_columns() # doctest: +SKIP + x a 0.1 0.0 0.4 1.3 0.6 -1.4 + b 0.7 1.0 1.3 1.5 -0.0 -0.2 + c 1.4 -0.8 1.6 -0.2 -0.4 -0.3 + y a 0.4 1.0 -0.2 -0.8 -1.2 1.1 + b -0.6 1.2 1.8 1.9 0.3 0.3 + c 0.8 0.5 -0.3 1.2 2.2 -0.8 Hide specific columns but retain the column headers: >>> df.style.format("{:.1f}").hide_columns(subset=(slice(None), ["a", "c"])) + ... # doctest: +SKIP x y b b x a 0.0 0.6 @@ -1864,8 +1892,8 @@ def hide_columns(self, subset: Subset | None = None) -> Styler: Hide specific columns and the column headers: - >>> df.style.format("{:.1f}").hide_columns(subset=(slice(None), ["a", "c"])) - ... .hide_columns() + >>> df.style.format("{:.1f}").hide_columns( + ... subset=(slice(None), ["a", "c"])).hide_columns() # doctest: +SKIP x a 0.0 0.6 b 1.0 -0.0 c -0.8 -0.4 @@ -1995,31 +2023,32 @@ def background_gradient( Shading the values column-wise, with ``axis=0``, preselecting numeric columns - >>> df.style.{name}_gradient(axis=0) + >>> df.style.{name}_gradient(axis=0) # doctest: +SKIP .. figure:: ../../_static/style/{image_prefix}_ax0.png Shading all values collectively using ``axis=None`` - >>> df.style.{name}_gradient(axis=None) + >>> df.style.{name}_gradient(axis=None) # doctest: +SKIP .. figure:: ../../_static/style/{image_prefix}_axNone.png Compress the color map from the both ``low`` and ``high`` ends - >>> df.style.{name}_gradient(axis=None, low=0.75, high=1.0) + >>> df.style.{name}_gradient(axis=None, low=0.75, high=1.0) # doctest: +SKIP .. figure:: ../../_static/style/{image_prefix}_axNone_lowhigh.png Manually setting ``vmin`` and ``vmax`` gradient thresholds - >>> df.style.{name}_gradient(axis=None, vmin=6.7, vmax=21.6) + >>> df.style.{name}_gradient(axis=None, vmin=6.7, vmax=21.6) # doctest: +SKIP .. figure:: ../../_static/style/{image_prefix}_axNone_vminvmax.png Setting a ``gmap`` and applying to all columns with another ``cmap`` >>> df.style.{name}_gradient(axis=0, gmap=df['Temp (c)'], cmap='YlOrRd') + ... # doctest: +SKIP .. figure:: ../../_static/style/{image_prefix}_gmap.png @@ -2029,7 +2058,7 @@ def background_gradient( >>> gmap = np.array([[1,2,3], [2,3,4], [3,4,5]]) >>> df.style.{name}_gradient(axis=None, gmap=gmap, ... cmap='YlOrRd', subset=['Temp (c)', 'Rain (mm)', 'Wind (m/s)'] - ... ) + ... ) # doctest: +SKIP .. figure:: ../../_static/style/{image_prefix}_axNone_gmap.png """ @@ -2111,8 +2140,11 @@ def set_properties(self, subset: Subset | None = None, **kwargs) -> Styler: Examples -------- >>> df = pd.DataFrame(np.random.randn(10, 4)) - >>> df.style.set_properties(color="white", align="right") - >>> df.style.set_properties(**{'background-color': 'yellow'}) + >>> df.style.set_properties(color="white", align="right") # doctest: +SKIP + >>> df.style.set_properties(**{'background-color': 'yellow'}) # doctest: +SKIP + + See `Table Visualization <../../user_guide/style.ipynb>`_ user guide for + more details. """ values = "".join([f"{p}: {v};" for p, v in kwargs.items()]) return self.applymap(lambda x: values, subset=subset) @@ -2447,7 +2479,7 @@ def highlight_between( ... 'Two': [2.9, 2.1, 2.5], ... 'Three': [3.1, 3.2, 3.8], ... }) - >>> df.style.highlight_between(left=2.1, right=2.9) + >>> df.style.highlight_between(left=2.1, right=2.9) # doctest: +SKIP .. figure:: ../../_static/style/hbetw_basic.png @@ -2455,7 +2487,7 @@ def highlight_between( and ``right`` for each column individually >>> df.style.highlight_between(left=[1.4, 2.4, 3.4], right=[1.6, 2.6, 3.6], - ... axis=1, color="#fffd75") + ... axis=1, color="#fffd75") # doctest: +SKIP .. figure:: ../../_static/style/hbetw_seq.png @@ -2463,14 +2495,14 @@ def highlight_between( matches the input DataFrame, with a constant ``right`` >>> df.style.highlight_between(left=[[2,2,3],[2,2,3],[3,3,3]], right=3.5, - ... axis=None, color="#fffd75") + ... axis=None, color="#fffd75") # doctest: +SKIP .. figure:: ../../_static/style/hbetw_axNone.png Using ``props`` instead of default background coloring >>> df.style.highlight_between(left=1.5, right=3.5, - ... props='font-weight:bold;color:#e83e8c') + ... props='font-weight:bold;color:#e83e8c') # doctest: +SKIP .. figure:: ../../_static/style/hbetw_props.png """ @@ -2547,19 +2579,21 @@ def highlight_quantile( >>> df = pd.DataFrame(np.arange(10).reshape(2,5) + 1) >>> df.style.highlight_quantile(axis=None, q_left=0.8, color="#fffd75") + ... # doctest: +SKIP .. figure:: ../../_static/style/hq_axNone.png Or highlight quantiles row-wise or column-wise, in this case by row-wise >>> df.style.highlight_quantile(axis=1, q_left=0.8, color="#fffd75") + ... # doctest: +SKIP .. figure:: ../../_static/style/hq_ax1.png Use ``props`` instead of default background coloring >>> df.style.highlight_quantile(axis=None, q_left=0.2, q_right=0.8, - ... props='font-weight:bold;color:#e83e8c') + ... props='font-weight:bold;color:#e83e8c') # doctest: +SKIP .. figure:: ../../_static/style/hq_props.png """ @@ -2703,6 +2737,9 @@ def pipe(self, func: Callable, *args, **kwargs): ... .highlight_min(subset=['conversion'], color='yellow') ... .pipe(format_conversion) ... .set_caption("Results with minimum conversion highlighted.")) + ... # doctest: +SKIP + + .. figure:: ../../_static/style/df_pipe.png """ return com.pipe(self, func, *args, **kwargs)