From 58519d050b411a1e5df55401bb3837af5bc8765b Mon Sep 17 00:00:00 2001 From: Leonardo Freua Date: Wed, 28 Jul 2021 11:59:36 -0300 Subject: [PATCH 01/20] TST: Fix doctests for pandas.io.formats.style * Modified: pandas/io/formats/style.py * Added some expected results * Skipped some tests --- pandas/io/formats/style.py | 184 +++++++++++++++++++++++++------------ 1 file changed, 123 insertions(+), 61 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 9dade82e9809c..22c5939ae28f4 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -341,9 +341,16 @@ def set_tooltips( >>> df.style.set_tooltips(ttips, css_class='tt-add', props=[ ... ('visibility', 'hidden'), ... ('position', 'absolute'), - ... ('z-index', 1)]) + ... ('z-index', 1)]) # doctest: +SKIP + 0 1 + 0 0 1 + 1 2 3 >>> df.style.set_tooltips(ttips, css_class='tt-add', ... props='visibility:hidden; position:absolute; z-index:1;') + ... # doctest: +SKIP + 0 1 + 0 0 1 + 1 2 3 """ if not self.cell_ids: # tooltips not optimised for individual cell check. requires reasonable @@ -547,13 +554,33 @@ def to_latex( >>> df = pd.DataFrame([[1,2], [3,4]]) >>> s = df.style.highlight_max(axis=None, ... props='background-color:red; font-weight:bold;') - >>> s.render() # doctest: +SKIP + >>> s.render() # doctest: +ELLIPSIS + '\n\n \n + \n \n + \n + \n + \n \n \n \n + \n + \n + \n \n + \n \n + \n + \n \n + \n
 01
012
134
\n...' The equivalent using LaTeX only commands is the following: >>> s = df.style.highlight_max(axis=None, ... props='cellcolor:{red}; bfseries: ;') - >>> s.to_latex() + >>> s.to_latex() # doctest: +SKIP + \begin{tabular}{lrr} + {} & {0} & {1} \\ + 0 & 1 & 2 \\ + 1 & 3 & \\ + cellcolor{red} \\ + bfseries 4 \\ + \end{tabular} Internally these structured LaTeX ``(, )`` pairs are translated to the @@ -592,7 +619,7 @@ def to_latex( ... props='cellcolor:[HTML]{FFFF00}; color:{red};' ... 'textit:--rwrap; textbf:--rwrap;' ... ) - >>> s.to_latex() + >>> s.to_latex() # doctest: +SKIP .. figure:: ../../_static/style/latex_1.png @@ -653,7 +680,7 @@ def to_latex( ... column_format="rrrrr", position="h", position_float="centering", ... hrules=True, label="table:5", caption="Styled LaTeX Table", ... multirow_align="t", multicol_align="r" - ... ) + ... ) # doctest: +SKIP .. figure:: ../../_static/style/latex_2.png @@ -670,8 +697,9 @@ def to_latex( ... ("Numeric", "Integers"): '\${}', ... ("Numeric", "Floats"): '{:.3f}', ... ("Non-Numeric", "Strings"): str.upper - ... }) - >>> s.to_latex() + ... }) # doctest: +ELLIPSIS + + >>> s.to_latex() # doctest: +SKIP \begin{tabular}{llrrl} {} & {} & \multicolumn{2}{r}{Numeric} & {Non-Numeric} \\ {} & {} & {Integers} & {Floats} & {Strings} \\ @@ -713,7 +741,7 @@ def to_latex( >>> df = pd.DataFrame([[1]]) >>> df.style.set_properties( ... **{"font-weight": "bold /* --dwrap */", "Huge": "--latex--rwrap"} - ... ).to_latex(convert_css=True) + ... ).to_latex(convert_css=True) # doctest: +SKIP \begin{tabular}{lr} {} & {0} \\ 0 & {\bfseries}{\Huge{1}} \\ @@ -913,7 +941,10 @@ def set_td_classes(self, classes: DataFrame) -> Styler: ... ["min-val red", "", "blue"], ... ["red", None, "blue max-val"] ... ], index=df.index, columns=df.columns) - >>> df.style.set_td_classes(classes) + >>> df.style.set_td_classes(classes) # doctest: +SKIP + A B C + 0 1 2 3 + 1 4 5 6 Using `MultiIndex` columns and a `classes` `DataFrame` as a subset of the underlying, @@ -922,14 +953,18 @@ def set_td_classes(self, classes: DataFrame) -> Styler: ... columns=[["level0", "level0"], ["level1a", "level1b"]]) >>> classes = pd.DataFrame(["min-val"], index=["a"], ... columns=[["level0"],["level1a"]]) - >>> df.style.set_td_classes(classes) + >>> df.style.set_td_classes(classes) # doctest: +SKIP + level0 + level1a level1b + a 1 2 + b 3 4 Form of the output with new additional css classes, >>> df = pd.DataFrame([[1]]) >>> css = pd.DataFrame([["other-class"]]) >>> s = Styler(df, uuid="_", cell_ids=False).set_td_classes(css) - >>> s.hide_index().render() + >>> s.hide_index().render() # doctest: +SKIP '' '' ' ' @@ -1157,19 +1192,23 @@ def apply( >>> def highlight_max(x, color): ... return np.where(x == np.nanmax(x.to_numpy()), f"color: {color};", None) >>> df = pd.DataFrame(np.random.randn(5, 2), columns=["A", "B"]) - >>> df.style.apply(highlight_max, color='red') - >>> df.style.apply(highlight_max, color='blue', axis=1) - >>> df.style.apply(highlight_max, color='green', axis=None) + >>> df.style.apply(highlight_max, color='red') # doctest: +SKIP + >>> df.style.apply(highlight_max, color='blue', axis=1) # doctest: +SKIP + >>> df.style.apply(highlight_max, color='green', axis=None) # doctest: +SKIP Using ``subset`` to restrict application to a single column or multiple columns >>> df.style.apply(highlight_max, color='red', subset="A") + ... # doctest: +SKIP >>> df.style.apply(highlight_max, color='red', subset=["A", "B"]) + ... # doctest: +SKIP Using a 2d input to ``subset`` to select rows in addition to columns - >>> df.style.apply(highlight_max, color='red', subset=([0,1,2], slice(None)) - >>> df.style.apply(highlight_max, color='red', subset=(slice(0,5,2), "A") + >>> df.style.apply(highlight_max, color='red', subset=([0,1,2], slice(None))) + ... # doctest: +SKIP + >>> df.style.apply(highlight_max, color='red', subset=(slice(0,5,2), "A")) + ... # doctest: +SKIP """ self._todo.append( (lambda instance: getattr(instance, "_apply"), (func, axis, subset), kwargs) @@ -1225,17 +1264,21 @@ def applymap( >>> def color_negative(v, color): ... return f"color: {color};" if v < 0 else None >>> df = pd.DataFrame(np.random.randn(5, 2), columns=["A", "B"]) - >>> df.style.applymap(color_negative, color='red') + >>> df.style.applymap(color_negative, color='red') # doctest: +SKIP Using ``subset`` to restrict application to a single column or multiple columns >>> df.style.applymap(color_negative, color='red', subset="A") + ... # doctest: +SKIP >>> df.style.applymap(color_negative, color='red', subset=["A", "B"]) + ... # doctest: +SKIP Using a 2d input to ``subset`` to select rows in addition to columns - >>> df.style.applymap(color_negative, color='red', subset=([0,1,2], slice(None)) - >>> df.style.applymap(color_negative, color='red', subset=(slice(0,5,2), "A") + >>> df.style.applymap(color_negative, color='red', + ... subset=([0,1,2], slice(None))) # doctest: +SKIP + >>> df.style.applymap(color_negative, color='red', subset=(slice(0,5,2), "A")) + ... # doctest: +SKIP """ self._todo.append( (lambda instance: getattr(instance, "_applymap"), (func, subset), kwargs) @@ -1296,6 +1339,7 @@ def where( >>> def cond(v, limit=4): ... return v > 1 and v != limit >>> df.style.where(cond, value='color:green;', other='color:red;') + ... # doctest: +SKIP should be refactored to: @@ -1303,6 +1347,7 @@ def where( ... cond = v > 1 and v != limit ... return value if cond else other >>> df.style.applymap(style_func, value='color:green;', other='color:red;') + ... # doctest: +SKIP """ warnings.warn( "this method is deprecated in favour of `Styler.applymap()`", @@ -1368,8 +1413,19 @@ def set_table_attributes(self, attributes: str) -> Styler: Examples -------- >>> df = pd.DataFrame(np.random.randn(10, 4)) - >>> df.style.set_table_attributes('class="pure-table"') - # ...
... + >>> df.style.set_table_attributes('class="pure-table"') # doctest: +SKIP + 0 1 2 3 + 0 0.4 1.0 -1.1 0.5 + 1 -0.6 1.2 1.8 -1.4 + 2 1.2 -0.6 -0.2 -0.2 + 3 -0.5 -0.8 -0.2 1.8 + 4 -0.5 -0.8 0.2 -1.7 + 5 0.1 1.5 0.3 -1.3 + 6 1.5 -0.6 -1.3 -0.1 + 7 -0.1 -0.8 -0.7 0.3 + 8 1.7 -0.5 -1.4 0.8 + 9 1.5 0.4 -0.8 0.7 + """ self.table_attributes = attributes return self @@ -1616,14 +1672,14 @@ def set_table_styles( >>> df.style.set_table_styles( ... [{'selector': 'tr:hover', ... 'props': [('background-color', 'yellow')]}] - ... ) + ... ) # doctest: +SKIP Or with CSS strings >>> df.style.set_table_styles( ... [{'selector': 'tr:hover', - ... 'props': 'background-color: yellow; font-size: 1em;']}] - ... ) + ... 'props': 'background-color: yellow; font-size: 1em;'}] + ... ) # doctest: +SKIP Adding column styling by name @@ -1631,15 +1687,15 @@ def set_table_styles( ... 'A': [{'selector': '', ... 'props': [('color', 'red')]}], ... 'B': [{'selector': 'td', - ... 'props': 'color: blue;']}] - ... }, overwrite=False) + ... 'props': 'color: blue;'}] + ... }, overwrite=False) # doctest: +SKIP Adding row styling >>> df.style.set_table_styles({ ... 0: [{'selector': 'td:hover', ... 'props': [('font-size', '25px')]}] - ... }, axis=1, overwrite=False) + ... }, axis=1, overwrite=False) # doctest: +SKIP """ if isinstance(table_styles, dict): if axis in [0, "index"]: @@ -1732,7 +1788,7 @@ def hide_index(self, subset: Subset | None = None) -> Styler: Simple application hiding specific rows: >>> df = pd.DataFrame([[1,2], [3,4], [5,6]], index=["a", "b", "c"]) - >>> df.style.hide_index(["a", "b"]) + >>> df.style.hide_index(["a", "b"]) # doctest: +SKIP 0 1 c 5 6 @@ -1740,7 +1796,7 @@ def hide_index(self, subset: Subset | None = None) -> Styler: >>> midx = pd.MultiIndex.from_product([["x", "y"], ["a", "b", "c"]]) >>> df = pd.DataFrame(np.random.randn(6,6), index=midx, columns=midx) - >>> df.style.format("{:.1f}").hide_index() + >>> df.style.format("{:.1f}").hide_index() # doctest: +SKIP x y a b c a b c 0.1 0.0 0.4 1.3 0.6 -1.4 @@ -1753,6 +1809,7 @@ def hide_index(self, subset: Subset | None = None) -> Styler: Hide specific rows but retain the index: >>> df.style.format("{:.1f}").hide_index(subset=(slice(None), ["a", "c"])) + ... # doctest: +SKIP x y a b c a b c x b 0.7 1.0 1.3 1.5 -0.0 -0.2 @@ -1760,8 +1817,8 @@ def hide_index(self, subset: Subset | None = None) -> Styler: Hide specific rows and the index: - >>> df.style.format("{:.1f}").hide_index(subset=(slice(None), ["a", "c"])) - ... .hide_index() + >>> df.style.format("{:.1f}").hide_index( + ... subset=(slice(None), ["a", "c"])).hide_index() # doctest: +SKIP x y a b c a b c 0.7 1.0 1.3 1.5 -0.0 -0.2 @@ -1812,7 +1869,7 @@ def hide_columns(self, subset: Subset | None = None) -> Styler: Simple application hiding specific columns: >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6]], columns=["a", "b", "c"]) - >>> df.style.hide_columns(["a", "b"]) + >>> df.style.hide_columns(["a", "b"]) # doctest: +SKIP c 0 3 1 6 @@ -1821,35 +1878,36 @@ def hide_columns(self, subset: Subset | None = None) -> Styler: >>> midx = pd.MultiIndex.from_product([["x", "y"], ["a", "b", "c"]]) >>> df = pd.DataFrame(np.random.randn(6,6), index=midx, columns=midx) - >>> df.style.format("{:.1f}").hide_columns() - x d 0.1 0.0 0.4 1.3 0.6 -1.4 - e 0.7 1.0 1.3 1.5 -0.0 -0.2 - f 1.4 -0.8 1.6 -0.2 -0.4 -0.3 - y d 0.4 1.0 -0.2 -0.8 -1.2 1.1 - e -0.6 1.2 1.8 1.9 0.3 0.3 - f 0.8 0.5 -0.3 1.2 2.2 -0.8 + >>> df.style.format("{:.1f}").hide_columns() # doctest: +SKIP + a 0.1 0.0 0.4 1.3 0.6 -1.4 + x b 0.7 1.0 1.3 1.5 -0.0 -0.2 + c 1.4 -0.8 1.6 -0.2 -0.4 -0.3 + a 0.4 1.0 -0.2 -0.8 -1.2 1.1 + y b -0.6 1.2 1.8 1.9 0.3 0.3 + c 0.8 0.5 -0.3 1.2 2.2 -0.8 Hide specific columns but retain the column headers: >>> df.style.format("{:.1f}").hide_columns(subset=(slice(None), ["a", "c"])) + ... # doctest: +SKIP x y b b - x a 0.0 0.6 - b 1.0 -0.0 + a 0.0 0.6 + x b 1.0 -0.0 c -0.8 -0.4 - y a 1.0 -1.2 - b 1.2 0.3 + a 1.0 -1.2 + y b 1.2 0.3 c 0.5 2.2 Hide specific columns and the column headers: - >>> df.style.format("{:.1f}").hide_columns(subset=(slice(None), ["a", "c"])) - ... .hide_columns() - x a 0.0 0.6 - b 1.0 -0.0 + >>> df.style.format("{:.1f}").hide_columns( + ... subset=(slice(None), ["a", "c"])).hide_columns() # doctest: +SKIP + a 0.0 0.6 + x b 1.0 -0.0 c -0.8 -0.4 - y a 1.0 -1.2 - b 1.2 0.3 + a 1.0 -1.2 + y b 1.2 0.3 c 0.5 2.2 """ if subset is None: @@ -1974,31 +2032,32 @@ def background_gradient( Shading the values column-wise, with ``axis=0``, preselecting numeric columns - >>> df.style.{name}_gradient(axis=0) + >>> df.style.{name}_gradient(axis=0) # doctest: +SKIP .. figure:: ../../_static/style/{image_prefix}_ax0.png Shading all values collectively using ``axis=None`` - >>> df.style.{name}_gradient(axis=None) + >>> df.style.{name}_gradient(axis=None) # doctest: +SKIP .. figure:: ../../_static/style/{image_prefix}_axNone.png Compress the color map from the both ``low`` and ``high`` ends - >>> df.style.{name}_gradient(axis=None, low=0.75, high=1.0) + >>> df.style.{name}_gradient(axis=None, low=0.75, high=1.0) # doctest: +SKIP .. figure:: ../../_static/style/{image_prefix}_axNone_lowhigh.png Manually setting ``vmin`` and ``vmax`` gradient thresholds - >>> df.style.{name}_gradient(axis=None, vmin=6.7, vmax=21.6) + >>> df.style.{name}_gradient(axis=None, vmin=6.7, vmax=21.6) # doctest: +SKIP .. figure:: ../../_static/style/{image_prefix}_axNone_vminvmax.png Setting a ``gmap`` and applying to all columns with another ``cmap`` >>> df.style.{name}_gradient(axis=0, gmap=df['Temp (c)'], cmap='YlOrRd') + ... # doctest: +SKIP .. figure:: ../../_static/style/{image_prefix}_gmap.png @@ -2008,7 +2067,7 @@ def background_gradient( >>> gmap = np.array([[1,2,3], [2,3,4], [3,4,5]]) >>> df.style.{name}_gradient(axis=None, gmap=gmap, ... cmap='YlOrRd', subset=['Temp (c)', 'Rain (mm)', 'Wind (m/s)'] - ... ) + ... ) # doctest: +SKIP .. figure:: ../../_static/style/{image_prefix}_axNone_gmap.png """ @@ -2090,8 +2149,8 @@ def set_properties(self, subset: Subset | None = None, **kwargs) -> Styler: Examples -------- >>> df = pd.DataFrame(np.random.randn(10, 4)) - >>> df.style.set_properties(color="white", align="right") - >>> df.style.set_properties(**{'background-color': 'yellow'}) + >>> df.style.set_properties(color="white", align="right") # doctest: +SKIP + >>> df.style.set_properties(**{'background-color': 'yellow'}) # doctest: +SKIP """ values = "".join([f"{p}: {v};" for p, v in kwargs.items()]) return self.applymap(lambda x: values, subset=subset) @@ -2426,7 +2485,7 @@ def highlight_between( ... 'Two': [2.9, 2.1, 2.5], ... 'Three': [3.1, 3.2, 3.8], ... }) - >>> df.style.highlight_between(left=2.1, right=2.9) + >>> df.style.highlight_between(left=2.1, right=2.9) # doctest: +SKIP .. figure:: ../../_static/style/hbetw_basic.png @@ -2434,7 +2493,7 @@ def highlight_between( and ``right`` for each column individually >>> df.style.highlight_between(left=[1.4, 2.4, 3.4], right=[1.6, 2.6, 3.6], - ... axis=1, color="#fffd75") + ... axis=1, color="#fffd75") # doctest: +SKIP .. figure:: ../../_static/style/hbetw_seq.png @@ -2442,14 +2501,14 @@ def highlight_between( matches the input DataFrame, with a constant ``right`` >>> df.style.highlight_between(left=[[2,2,3],[2,2,3],[3,3,3]], right=3.5, - ... axis=None, color="#fffd75") + ... axis=None, color="#fffd75") # doctest: +SKIP .. figure:: ../../_static/style/hbetw_axNone.png Using ``props`` instead of default background coloring >>> df.style.highlight_between(left=1.5, right=3.5, - ... props='font-weight:bold;color:#e83e8c') + ... props='font-weight:bold;color:#e83e8c') # doctest: +SKIP .. figure:: ../../_static/style/hbetw_props.png """ @@ -2526,19 +2585,21 @@ def highlight_quantile( >>> df = pd.DataFrame(np.arange(10).reshape(2,5) + 1) >>> df.style.highlight_quantile(axis=None, q_left=0.8, color="#fffd75") + ... # doctest: +SKIP .. figure:: ../../_static/style/hq_axNone.png Or highlight quantiles row-wise or column-wise, in this case by row-wise >>> df.style.highlight_quantile(axis=1, q_left=0.8, color="#fffd75") + ... # doctest: +SKIP .. figure:: ../../_static/style/hq_ax1.png Use ``props`` instead of default background coloring >>> df.style.highlight_quantile(axis=None, q_left=0.2, q_right=0.8, - ... props='font-weight:bold;color:#e83e8c') + ... props='font-weight:bold;color:#e83e8c') # doctest: +SKIP .. figure:: ../../_static/style/hq_props.png """ @@ -2682,6 +2743,7 @@ def pipe(self, func: Callable, *args, **kwargs): ... .highlight_min(subset=['conversion'], color='yellow') ... .pipe(format_conversion) ... .set_caption("Results with minimum conversion highlighted.")) + ... # doctest: +SKIP """ return com.pipe(self, func, *args, **kwargs) From bb7c3b2e23fdcc2b280e3e9436203020e7c72da0 Mon Sep 17 00:00:00 2001 From: Leonardo Freua Date: Wed, 28 Jul 2021 16:15:39 -0300 Subject: [PATCH 02/20] TST: Add link to redirect to Table Visualization user guide * Modified style.py * Updated the doctest of the apply() * Updated the doctest of the applymap() * Updated the doctest of the set_table_styles() * Updated the doctest of the set_properties() --- pandas/io/formats/style.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 22c5939ae28f4..859e4a16a968c 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -1209,6 +1209,9 @@ def apply( ... # doctest: +SKIP >>> df.style.apply(highlight_max, color='red', subset=(slice(0,5,2), "A")) ... # doctest: +SKIP + + See `Table Visualization <../../user_guide/style.ipynb>`_ user guide for + more details. """ self._todo.append( (lambda instance: getattr(instance, "_apply"), (func, axis, subset), kwargs) @@ -1279,6 +1282,9 @@ def applymap( ... subset=([0,1,2], slice(None))) # doctest: +SKIP >>> df.style.applymap(color_negative, color='red', subset=(slice(0,5,2), "A")) ... # doctest: +SKIP + + See `Table Visualization <../../user_guide/style.ipynb>`_ user guide for + more details. """ self._todo.append( (lambda instance: getattr(instance, "_applymap"), (func, subset), kwargs) @@ -1696,6 +1702,9 @@ def set_table_styles( ... 0: [{'selector': 'td:hover', ... 'props': [('font-size', '25px')]}] ... }, axis=1, overwrite=False) # doctest: +SKIP + + See `Table Visualization <../../user_guide/style.ipynb>`_ user guide for + more details. """ if isinstance(table_styles, dict): if axis in [0, "index"]: @@ -2151,6 +2160,9 @@ def set_properties(self, subset: Subset | None = None, **kwargs) -> Styler: >>> df = pd.DataFrame(np.random.randn(10, 4)) >>> df.style.set_properties(color="white", align="right") # doctest: +SKIP >>> df.style.set_properties(**{'background-color': 'yellow'}) # doctest: +SKIP + + See `Table Visualization <../../user_guide/style.ipynb>`_ user guide for + more details. """ values = "".join([f"{p}: {v};" for p, v in kwargs.items()]) return self.applymap(lambda x: values, subset=subset) From fa5a615ade5ca710ba7666474eeb8d9990b1194e Mon Sep 17 00:00:00 2001 From: Leonardo Freua Date: Wed, 28 Jul 2021 16:19:41 -0300 Subject: [PATCH 03/20] TST: Add image to pipe function result * Modified style.py * Updated the doctest of the pipe() --- doc/source/_static/style/df_pipe.png | Bin 0 -> 8673 bytes pandas/io/formats/style.py | 2 ++ 2 files changed, 2 insertions(+) create mode 100644 doc/source/_static/style/df_pipe.png diff --git a/doc/source/_static/style/df_pipe.png b/doc/source/_static/style/df_pipe.png new file mode 100644 index 0000000000000000000000000000000000000000..071a481ad5acc154ffa90b340f1c24cad2bee958 GIT binary patch literal 8673 zcmZ{K1yCK$w)NnWAVGtl5L|-=J2(UjuE7ZqTte^z90=|X0fM_b1lI%$PH=bU;Bc^? z+`9F@dtZI8YI?eBrl)77cJIAPh}b%@#!qR&!+!W zVK~Tq_yPdfJ%0;A0y8!l0Du5lNilV|?}us7_iC1NSM6m|iyb~^P>3Lt9 zuvhZ#)l5V97(Bi{Ay_;6&%;eRp5t35`k$7HBsx2Z;v0*@y677S(3hZ76Kjpvb^1N^aVO{-<`g%xA-Ee zxAXM#8C2K_NqY&K84t-w>LXAp^W29p1fi0e1JI;F?Fj7wI1NO&=cirnckdigR6%1q zSb3xf=C=mfY7GzE>5skZq>4RUJgIpqpX^4kdA#?>CrJgziXc#rd_FbZzI)oKw+!c3 z`t>Vkft)9oI&j5VQ5ql@KDd=52Z(Z?w_GLObaKhw8+5X}O1mFK38B@q;%RyH#K(2s z{B)W6FpnPcFgZOFc)Dw35~JDqV#A6s`D#nt2N;`G3EO0+B>f)F;#{j?8Hk&Y= z^V-estjvx^;GDx<=J)*u{f3n@)cZR!)^I4)QZDsn*yQ-~;MCpOD~5CU;!#25>^nsuv$BtZ(A_%my55(Hmu6 zfB;l!ztfC7dM4pb2OB?(?7O%@#0EYaw`H0P;GiTYzeu5Ul|9{DTc~DtM7UNv86=F3 zIr$tU!X-_;1evVdqTZA{c|AgY>e}=PMThm*J^L%@W;#CeCOLS%W6Dk;%m0~CgCs8G zk_ojmxwD5K69MoA;eWuCq62*SxZYk_*#=<{kYb>T=8HzZ2$8IGer8?T|D~EAkOqNJ z1UF}8n+c>$bh&muS&wfINhgR-F1y|v&=@k0AOixD6BAp5#Btj_AMsMwESRh=qNqe3 z#)OFpH^|?-#8lI@M1izlBff2}>n2?rIFrXRD@i0S*K0cqc{KC??nh%7c_J+$?MPiC zxQX^-26nV*8qKVAHYYQ@V#-43$d@{NM6zHyhAOXaMKqAXfp$cOZgsC@PCAP1l0UYsl)9DL*BJi3(ph<7hI^CLIr&*H*3Y?T>&+K`}sm-$5qAcX5;qw_MQMJ2QgR4j}S!1jDk^o93vn*JX@SUf>qTC zCcnt{`W4{bbt@9(a8Jz zK#~IxDfyM(HBqta1CL?p%vf7<+l&BtH^?Oco+IODT3+#Z-v=U zb8~Jw&zu#)-SYKfBK9kOnl=6?+8mPEJ?VpNM0`fmm4-F1W6iq{=W~Cf(d9o*>(R3r zct@?0?p@>TnTW`%tLBmWJ~cZxNDQUvMaRH$DZXFYSF3a=0wyJYDb@ zf!`7GesT5o{#En&bD2tm$~~Y@3&}!OEr!g^Dl`ilw4J&bI>L} zpia!z&~@82%)-Bi3Lit>u<|(#m~!4PRnFViE2@zFL_v4tMtqDH!~|5X;zi_hUihbj zD~Gqd?F6jh_em-4C1%`+07tvXS@dqH7=N>6uEeB0*F2Y#Ve0$*WoYv4xD^Q&LSILV zxuS7l8E-lhAs7m7^PUU71E+cwb~@Fs8f~SF7nY3d4GYg1SzYT$D=0{FaxUpXlV^LU zKC9Y;t>I1TE9oiHr#;`yvEda8<_dv}54}ovB_G*-Ove8Kr#Tm{miC|5u}#@~!|_y- zSa%oQ7$Dw@`1OY&=V!G+B{IVOV;P(j{hJTq!&RnQM>Zgm#xQ2rl|DVSH0oL3@R+Gb zzK{xNFZmRmyYXwVwTGf=A>kK5(Z##X&1FskfdqHMs)wA0n&NI98ydV8d_~E#Q2~ae zL61TP&c1LWI8^hbl+Id02>_W!oqm)KLQ}KxtcwzWab$!T=v!#5cUs~?b8q|p%z*}S zMr&WzF4?mlCBx014AASAXp}xjlx=&PisbMU4}mbGR3{@lol!}%O>r0{IyomnI=xxq zj8bI7il5j=v~>V7(DL>=n?TAeIBtR?s2e4lfN-!+Cnd8HG4@hf#ep~YU%#`I{=m-^ zv~KRZl?f0i`~-X165rx)a+oq(m?G+NzDR(1$ID(~aslp3!E1_f64g5{r1SF*$OW4q>Tn89${ z1_3y@N@?=65V9bFkO>}HhzD=S4gEITwvdUsFM<@%X#qYZzxz|X-cp7lX5J}LY8Ons zXS5G?!rf-#OaJVOEb8Q9Wn~b{{D`94#^SE|n`<))actH+r-m_u9yhG=~E7n%`VNFIF{5GdmQOb{`4hn1sdlT1Y4CBh3|xt=`?46F3mKxaRAFjTdmUsAHBLSgrt zy(&J;!y-i5|q*<8Ahx2LZ`5}9vao^tt4vRNYWHNvZlsE+m9 zeiM3T4<*Ens0pcfXR5modfZeiR0`4?GR~Yo@(ywk*VRFANbWwJZH5ta^bfQ0nEJ(e z!A8%81~9x2GGn>1LZ6qLR&-{@Gg9?@cRnr};3mw{Ts`o8;XRo}PFJx^R8_W6AKHhoKjV zt;nKj#n?2#=`>*<(;P%yxqp1`zBQ+Kw|4K^eV_d@315lK&$%lXCwt*N))Yo(iA|#p z^=85XcB{o0u7CZe6qphRt3KY^A6X0*FZJu8(G5&#sF8v>8U9B*cjE=JQgK-e|BKMx z^%FOX4XZrOf!FBi>S{}zd<~P-2Ac{cRWbvw-3A8c*$dtJ^oEft#^rt{fh8N~-nI*>!4> zvv_o*=}_±&V1L3MJu%FsFj|7y6)V`g2RB)a$nXO&G>k0}K=84q%BcIuZRJ!4WS zIX$i{S*Ocj6-)*N{E%2e5(Xv$OqCGQ0n?fWnsLc6)M!%awG+4RDI~kcRLNOec0?}* zyZ!bmi0Gb2z0A|L8u6EnyMr9LV8pz6<62|!A!qYf+mq#59Jvy9)^bz&v6B+N*^*vMy(c zkD-y>D{lmhO4up`&j%*o!!ITaCs)N~wQl2h2%hc~)5`h0|5t!?V66~*xC0n^r6*xfcB>cRPb z!X?K^km$zq1rgHH8^E6)a_n>&9*Dn)hdv63q9OWjw#}l)w8=@k9SXT7awBH2BgWH1 z(i96OcE2ZozD!pyvlnTHIgOZtz`Pu8HK8KzWkp_&8mW@hcut_`nQfGoRVi_r`N>CJ z*$Nq&5UGmR>9-^^2kBXsq@sK)wG9pPUE0sz+Zni;oED#xc1=$#%vm2>78k**|0u>~ zhmum*cTRc0BNvw@CKsAJCbV{Wk7;7svT@_m%3K7pUYs(W%XL}rS3!`Lmhy{>>(-G0 zi+Nd+qT(hlQh*WvL@fGFKLrHV=j~f(nhh7y8zHdg7wr=2CioedPi!tzZ3%@Bd@K^1-5nAI?;*-#7S~B~P~-%!uYf z@7Ae~^EgF=J#3bc`VoRkxKx0W>FWU?kSd$)SN*IR5TPwVvkvI!9;3=>&Y^R{C^2Wr>fIlG%Z z9rL5AfTLTcCe#YF>97$m>8MvMbm5Sd`5@jZa1B$p4SKDyuiG`I;$?Z_Kg1j^sDn7i zxx=^1o3G$J$r;Up@bob{Gt7>b_*J{ehbsEl_I6|P(W;IGO^bYC9t?dKhKO(7QI z!FZ}zoQO=Z3#(?;TU?Gf6#4QV6L{7c5YTQgetdI2p(HYqp~K63yCB@A8WSH~p?vCP zM}^^|=x-bg9PSJAD(kU(Y3myp7Y-99$W0y!3Sz@bSazuxM)XT617Z|x;CG09@C*q=``T?H8ROkU7WbF;n zUx|yBtDHK8gA7xM*mccI?X`7U>@88U{K`3|L!Wn~+-fi-jI%-CoV?m`3CP1_1hF)q znx`HXFoyJnq?58>RPLz^^#SX6kV`MFfjM$ZLa7HhY<-T;znEd?cyTuuP#xy!m@dN zPn+fHz{tq6HWT9K;y&yNU7PtYoSLr()Q@`N>sJQg&C@u~C@8j}4$s>6_TY3vtp%mW z0^R#lgFhX;8ermD_tIG}z8-;;njFvKerh!-j@k+|YPS@?W8ede9<)9y9sKJ!Ccw4X zbkhL2Go)ut=$+w*>4Z(=^{45e?(wSXN>t2CNZ=eka61raQlz(uYW(Z*Zd+uz^JkS! zlHvWH)_!EA;j{lQ4gN!@IR4EExX}C`JwQ@17Um=U;t!D&*`&Gxx)Mq*nnl;u6PHc` zvxw)W<{DXY`LX?{=LL{fK=evITCk7yTBCJeJO#0yu2rsQJ4_u;2LT!9w#pf*5r)?p7_!E)QMq zS1*ow2LfepM7X?v z9t&7_@PGX?KWjRXk$9oLdjT)LjBw0v$8P@YhE$z0HQ^K|M}yaY-da3GM}Yx*cH!Zr zux$ZaIT#a6El$xgyFjMM*Fsr)S{(#gs-!c41xWJg4-G!Nnw_7Fn=Y>5iPAiu5pZr* zaBX3%^&_wtn(Cxqt3W=B*;YD*7V zr`UX1VcETu)ax`t_$T;=Wv$C;YKpo6J?7bU^yX#&z1j^4f@bbe@7VA1W96%sZU{c>$F`%U5UrsZrl+3T(WX}X>4i9VBP}A zY`uJHG@q@b=P7YjgFm=U3d;vndb*rvfWSiJG6?Ge*GQk0I z%jLEYY^bo1j3L$qYKA$wQx1P^GND6@c+E-jU6B-_ATDE$T`ac}>ASpXjJ`2{`+=ZY zPa#ck4}=g|4410=o#!trmNA(;xjgkEh5SK<2OFfQUVNso@GTN1%BWt$S_`*?DYxn$ zl8hTC{sX6poui?7skF3ONnDS^-%7%2rRMT7yxhnRBt%d<%4Po#2>q+clWiE88TQ^) zR#upB;{98d|8Lg=@xEOaRXqngqug-@gBx+(E%cZZF48|v@w z!7o$o**){mZivTM%^=9l7Z5A^g+f;LY2qWs(z zxRgvxu!(svqT6)0F5@U&#-zRjrmd^7*kNCB;jIf(2?#&}x$Sd~jF-2aZ43VT3g9~B zfExW{<^RMQJ8V2~VMQrwXpDy9n~%DrPmkun{$#CaQ=?fokVUVt=F+HER+NwP{eFbH zLfY9OB|i@(1nt!SU{SQqL&Wc^7;6#Q>`Zd@D;HhF<+`TJ9-~$jtT6}6?;4+H_L4HA zwl(Sg4l)ZXe}YuXrpfD3rMs=l@oCp}R6r)Ioe{F#9pa)_+*;B0$=qKYAo^;o%0L(^ z>bA2gK|BPJr?ivqbziP3XBG!k8BrIwukDBpZ!|EU2D9uueufwZg}5!9;{u}bm`d@C z%g$N?MHcpP z?K3HPI+t?J^g=e4I-#wfNJdQ;@iHGXAz0~D%AK(6(#c(xg5Ak8dSkquqHqtz#|O)U z@Y=KR+^sdCB56FXa6CsYO7m7V?~Rp{TLZtg`4byku`JQm38z){(XzvCz0YOEuhzum z`OA~Pr*LS=LOt?w(W$4C79;*8)~entPQ%g z&&rzU@`?T!S(50?b*i@A<0Vo*D8#{$a#(b0!AhBh)sK!44|V)h{*6sLe#doK!0 zzMqF*du$fj3*V#I2uET$d?LX9D&zE5C!|2`Vz$%8-ry|llm6V-@-A1VJ_?gklL)=_t0u_(*}jM!}zN%h6B!>#A>%e)66vOfoe?imnHa4c6vK`4(D4;XW zs>EK=<_GV?Pvs|#L~6j)WkTRAETYxZ$15%0Y$#?FBp1JOlF8C*gnq)e#-6#_+4RAZ z{DjXgab+(oFN&*>O+1F^*ONTwdLYprg?pJrg=GAXdD_1&vhG;6{dUtB>i?tjBNf&z2)t^Yy%%ky#@a?F{A?bHARG%^xhswd^q6 zD$K5W`BXF?#pnF6h95&S+H|+@;E2K<>|WDkD4S63t}7c$1GJfVePEmBE_0mPjK)SvH<`>(BHiPDR7oL_%9HF@@Nv{_*2$Od3FD!0-r$;kkjVhUn8%EAQ)GWj?5pE-W+u2(5 z3j5u^-5pfimVQmIY+XNcy2ARQFLqyG3lI?-j3Z?v+T)>Oem=OOqlcLCSF#AFNn1t=#x(brIIVv7808ehs8z?}_N0ke z7Y#WzT(V6A)q_n2p3gB6*n*HSF9xtkYQI}O_x^GPCk^O|L>N@Y1zk}r9UrxE5TSNS z*e)KencODQi~I{_o}s?#NRnH1oIr2V_cUV?tRi@aXZS%m`he~91qO@&WiQHhQHDy) zHJV=kgKL+>71}~K62v7<&Xev+KjV5=+im6h*ymrB?cs(lWrO Date: Fri, 30 Jul 2021 13:05:09 -0300 Subject: [PATCH 04/20] TST: Remove unnecessary outputs * Modified pandas/io/formats/style.py * Updated the doctests of the set_tooltips() * Updated the doctests of the to_latex() * Updated the doctests of the set_td_classes() * Updated the doctests of the set_table_attributes() --- pandas/io/formats/style.py | 48 ++------------------------------------ 1 file changed, 2 insertions(+), 46 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 301b4745706c2..704a9d94e96c6 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -342,15 +342,9 @@ def set_tooltips( ... ('visibility', 'hidden'), ... ('position', 'absolute'), ... ('z-index', 1)]) # doctest: +SKIP - 0 1 - 0 0 1 - 1 2 3 >>> df.style.set_tooltips(ttips, css_class='tt-add', ... props='visibility:hidden; position:absolute; z-index:1;') ... # doctest: +SKIP - 0 1 - 0 0 1 - 1 2 3 """ if not self.cell_ids: # tooltips not optimised for individual cell check. requires reasonable @@ -554,33 +548,13 @@ def to_latex( >>> df = pd.DataFrame([[1,2], [3,4]]) >>> s = df.style.highlight_max(axis=None, ... props='background-color:red; font-weight:bold;') - >>> s.render() # doctest: +ELLIPSIS - '\n
\n \n - \n \n - \n - \n - \n \n \n \n - \n - \n - \n \n - \n \n - \n - \n \n - \n
 01
012
134
\n...' + >>> s.render() # doctest: +SKIP The equivalent using LaTeX only commands is the following: >>> s = df.style.highlight_max(axis=None, ... props='cellcolor:{red}; bfseries: ;') >>> s.to_latex() # doctest: +SKIP - \begin{tabular}{lrr} - {} & {0} & {1} \\ - 0 & 1 & 2 \\ - 1 & 3 & \\ - cellcolor{red} \\ - bfseries 4 \\ - \end{tabular} Internally these structured LaTeX ``(, )`` pairs are translated to the @@ -942,9 +916,6 @@ def set_td_classes(self, classes: DataFrame) -> Styler: ... ["red", None, "blue max-val"] ... ], index=df.index, columns=df.columns) >>> df.style.set_td_classes(classes) # doctest: +SKIP - A B C - 0 1 2 3 - 1 4 5 6 Using `MultiIndex` columns and a `classes` `DataFrame` as a subset of the underlying, @@ -954,10 +925,6 @@ def set_td_classes(self, classes: DataFrame) -> Styler: >>> classes = pd.DataFrame(["min-val"], index=["a"], ... columns=[["level0"],["level1a"]]) >>> df.style.set_td_classes(classes) # doctest: +SKIP - level0 - level1a level1b - a 1 2 - b 3 4 Form of the output with new additional css classes, @@ -1420,18 +1387,7 @@ def set_table_attributes(self, attributes: str) -> Styler: -------- >>> df = pd.DataFrame(np.random.randn(10, 4)) >>> df.style.set_table_attributes('class="pure-table"') # doctest: +SKIP - 0 1 2 3 - 0 0.4 1.0 -1.1 0.5 - 1 -0.6 1.2 1.8 -1.4 - 2 1.2 -0.6 -0.2 -0.2 - 3 -0.5 -0.8 -0.2 1.8 - 4 -0.5 -0.8 0.2 -1.7 - 5 0.1 1.5 0.3 -1.3 - 6 1.5 -0.6 -1.3 -0.1 - 7 -0.1 -0.8 -0.7 0.3 - 8 1.7 -0.5 -1.4 0.8 - 9 1.5 0.4 -0.8 0.7 - + # ... ... """ self.table_attributes = attributes return self From 9dc52e466e1264e5428f75debd9faac1d0acfde9 Mon Sep 17 00:00:00 2001 From: Leonardo Freua Date: Fri, 30 Jul 2021 13:09:11 -0300 Subject: [PATCH 05/20] TST: Add the output to the Styler.format doctest in to_latex() --- pandas/io/formats/style.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 704a9d94e96c6..c53a808148639 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -671,8 +671,13 @@ def to_latex( ... ("Numeric", "Integers"): '\${}', ... ("Numeric", "Floats"): '{:.3f}', ... ("Non-Numeric", "Strings"): str.upper - ... }) # doctest: +ELLIPSIS - + ... }) # doctest: +SKIP + Numeric Non-Numeric + Integers Floats Strings + L0 ix1 $1 2.200 DOGS + ix2 $3 4.400 CATS + L1 ix3 $2 6.600 COWS + >>> s.to_latex() # doctest: +SKIP \begin{tabular}{llrrl} {} & {} & \multicolumn{2}{r}{Numeric} & {Non-Numeric} \\ From 63535ce4a6363aa910688178456c95ad0ee35916 Mon Sep 17 00:00:00 2001 From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> Date: Wed, 28 Jul 2021 18:49:26 -0400 Subject: [PATCH 06/20] REG: DataFrame.agg where func returns lists and axis=1 (#42762) --- doc/source/whatsnew/v1.3.2.rst | 1 + pandas/core/apply.py | 29 ++++++++++++++++---------- pandas/tests/apply/test_frame_apply.py | 5 +++-- 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/doc/source/whatsnew/v1.3.2.rst b/doc/source/whatsnew/v1.3.2.rst index f4804215db8c1..94d62ae988f0c 100644 --- a/doc/source/whatsnew/v1.3.2.rst +++ b/doc/source/whatsnew/v1.3.2.rst @@ -18,6 +18,7 @@ Fixed regressions - Regression in updating values of :class:`pandas.Series` using boolean index, created by using :meth:`pandas.DataFrame.pop` (:issue:`42530`) - Regression in :meth:`DataFrame.from_records` with empty records (:issue:`42456`) - Fixed regression in :meth:`DataFrame.shift` where TypeError occurred when shifting DataFrame created by concatenation of slices and fills with values (:issue:`42719`) +- Regression in :meth:`DataFrame.agg` when the ``func`` argument returned lists and ``axis=1`` (:issue:`42727`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/apply.py b/pandas/core/apply.py index ff3fc30b870dc..bb39e18caeaa2 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -690,21 +690,28 @@ def agg(self): obj = self.obj axis = self.axis + # TODO: Avoid having to change state + self.obj = self.obj if self.axis == 0 else self.obj.T + self.axis = 0 + + result = None + try: + result = super().agg() + except TypeError as err: + exc = TypeError( + "DataFrame constructor called with " + f"incompatible data and dtype: {err}" + ) + raise exc from err + finally: + self.obj = obj + self.axis = axis + if axis == 1: - result = FrameRowApply( - obj.T, - self.orig_f, - self.raw, - self.result_type, - self.args, - self.kwargs, - ).agg() result = result.T if result is not None else result - else: - result = super().agg() if result is None: - result = obj.apply(self.orig_f, axis, args=self.args, **self.kwargs) + result = self.obj.apply(self.orig_f, axis, args=self.args, **self.kwargs) return result diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 801cbdf3d0a87..62983b5327a26 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -644,13 +644,14 @@ def test_apply_dup_names_multi_agg(): tm.assert_frame_equal(result, expected) -def test_apply_nested_result_axis_1(): +@pytest.mark.parametrize("op", ["apply", "agg"]) +def test_apply_nested_result_axis_1(op): # GH 13820 def apply_list(row): return [2 * row["A"], 2 * row["C"], 2 * row["B"]] df = DataFrame(np.zeros((4, 4)), columns=list("ABCD")) - result = df.apply(apply_list, axis=1) + result = getattr(df, op)(apply_list, axis=1) expected = Series( [[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]] ) From 9d72fe0102b867a440042bad687603c653ab3dcf Mon Sep 17 00:00:00 2001 From: Irv Lustig Date: Wed, 28 Jul 2021 18:53:53 -0400 Subject: [PATCH 07/20] Fix typing issues for CI (#42770) --- pandas/core/groupby/groupby.py | 3 ++- pandas/core/reshape/tile.py | 6 +++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 9c695148a75c0..97f49eabcc5c3 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2896,7 +2896,7 @@ def _get_cythonized_result( grouper = self.grouper ids, _, ngroups = grouper.group_info - output: dict[base.OutputKey, np.ndarray] = {} + output: dict[base.OutputKey, ArrayLike] = {} base_func = getattr(libgroupby, how) base_func = partial(base_func, labels=ids) @@ -2911,6 +2911,7 @@ def blk_func(values: ArrayLike) -> ArrayLike: else: result_sz = len(values) + result: ArrayLike result = np.zeros(result_sz, dtype=cython_dtype) if needs_2d: result = result.reshape((-1, 1)) diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index c5d06bcef72a4..656d38a50f77f 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -418,7 +418,11 @@ def _bins_to_cuts( bins = unique_bins side = "left" if right else "right" - ids = ensure_platform_int(bins.searchsorted(x, side=side)) + # error: No overload variant of "searchsorted" of "ndarray" matches + # argument types "Any", "str" + ids = ensure_platform_int( + bins.searchsorted(x, side=side) # type: ignore[call-overload] + ) if include_lowest: ids[np.asarray(x) == bins[0]] = 1 From 5ba05f5daa7d4b178951095a6d9041f813c0eb7e Mon Sep 17 00:00:00 2001 From: Thomas Smith Date: Wed, 28 Jul 2021 23:55:13 +0100 Subject: [PATCH 08/20] BUG: groupby.shift returns different columns when fill_value is specified (#41858) --- asv_bench/benchmarks/groupby.py | 12 ++++++++++++ doc/source/whatsnew/v1.4.0.rst | 2 ++ pandas/core/groupby/groupby.py | 8 ++++++-- pandas/tests/groupby/test_groupby_shift_diff.py | 2 +- 4 files changed, 21 insertions(+), 3 deletions(-) diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index 6ca951e946bad..48380bd9b46b8 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -369,6 +369,18 @@ def time_category_size(self): self.draws.groupby(self.cats).size() +class Shift: + def setup(self): + N = 18 + self.df = DataFrame({"g": ["a", "b"] * 9, "v": list(range(N))}) + + def time_defaults(self): + self.df.groupby("g").shift() + + def time_fill_value(self): + self.df.groupby("g").shift(fill_value=99) + + class FillNA: def setup(self): N = 100 diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 432dd46000eb3..1ca9104d3adf3 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -168,6 +168,7 @@ Performance improvements - Performance improvement in :meth:`.GroupBy.sample`, especially when ``weights`` argument provided (:issue:`34483`) - Performance improvement in :meth:`.GroupBy.transform` for user-defined functions (:issue:`41598`) - Performance improvement in constructing :class:`DataFrame` objects (:issue:`42631`) +- Performance improvement in :meth:`GroupBy.shift` when ``fill_value`` argument is provided (:issue:`26615`) .. --------------------------------------------------------------------------- @@ -262,6 +263,7 @@ Groupby/resample/rolling - Fixed bug in :meth:`SeriesGroupBy.apply` where passing an unrecognized string argument failed to raise ``TypeError`` when the underlying ``Series`` is empty (:issue:`42021`) - Bug in :meth:`Series.rolling.apply`, :meth:`DataFrame.rolling.apply`, :meth:`Series.expanding.apply` and :meth:`DataFrame.expanding.apply` with ``engine="numba"`` where ``*args`` were being cached with the user passed function (:issue:`42287`) - Bug in :meth:`DataFrame.groupby.rolling.var` would calculate the rolling variance only on the first group (:issue:`42442`) +- Bug in :meth:`GroupBy.shift` that would return the grouping columns if ``fill_value`` was not None (:issue:`41556`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 97f49eabcc5c3..939cff16bf1ae 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -2822,6 +2822,7 @@ def _get_cythonized_result( result_is_index: bool = False, pre_processing=None, post_processing=None, + fill_value=None, **kwargs, ): """ @@ -2872,6 +2873,8 @@ def _get_cythonized_result( second argument, i.e. the signature should be (ndarray, Type). If `needs_nullable=True`, a third argument should be `nullable`, to allow for processing specific to nullable values. + fill_value : any, default None + The scalar value to use for newly introduced missing values. **kwargs : dict Extra arguments to be passed back to Cython funcs @@ -2947,7 +2950,7 @@ def blk_func(values: ArrayLike) -> ArrayLike: result = result.reshape(-1) if result_is_index: - result = algorithms.take_nd(values, result) + result = algorithms.take_nd(values, result, fill_value=fill_value) if post_processing: pp_kwargs = {} @@ -3023,7 +3026,7 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None): tshift : Shift the time index, using the index’s frequency if available. """ - if freq is not None or axis != 0 or not isna(fill_value): + if freq is not None or axis != 0: return self.apply(lambda x: x.shift(periods, freq, axis, fill_value)) return self._get_cythonized_result( @@ -3033,6 +3036,7 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None): needs_ngroups=True, result_is_index=True, periods=periods, + fill_value=fill_value, ) @final diff --git a/pandas/tests/groupby/test_groupby_shift_diff.py b/pandas/tests/groupby/test_groupby_shift_diff.py index c6f3e7618e3f7..e9517b4544f0b 100644 --- a/pandas/tests/groupby/test_groupby_shift_diff.py +++ b/pandas/tests/groupby/test_groupby_shift_diff.py @@ -55,7 +55,7 @@ def test_group_shift_with_fill_value(): columns=["Z"], index=None, ) - result = g.shift(-1, fill_value=0)[["Z"]] + result = g.shift(-1, fill_value=0) tm.assert_frame_equal(result, expected) From 994ff25df047e0084533c3e2956b0b389f8477ea Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Wed, 28 Jul 2021 16:00:46 -0700 Subject: [PATCH 09/20] PERF: extract_array earlier in DataFrame construction (#42774) --- pandas/core/construction.py | 4 ++-- pandas/core/internals/blocks.py | 2 +- pandas/core/internals/managers.py | 7 +------ pandas/core/strings/accessor.py | 6 ++---- pandas/tests/extension/test_numpy.py | 18 +----------------- pandas/tests/internals/test_internals.py | 4 +++- 6 files changed, 10 insertions(+), 31 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 68d7f6c6f8a22..387df6c6a6b70 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -420,9 +420,9 @@ def extract_array( return obj._values return obj - obj = obj.array + obj = obj._values - if extract_numpy and isinstance(obj, ABCPandasArray): + elif extract_numpy and isinstance(obj, ABCPandasArray): obj = obj.to_numpy() return obj diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 5d63fcdf7b0dc..adfecb946d822 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1920,11 +1920,11 @@ def get_block_type(values, dtype: DtypeObj | None = None): def new_block(values, placement, *, ndim: int, klass=None) -> Block: + # caller is responsible for ensuring values is NOT a PandasArray if not isinstance(placement, BlockPlacement): placement = BlockPlacement(placement) - values, _ = extract_pandas_array(values, None, ndim) check_ndim(values, placement, ndim) if klass is None: diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 03bb47f3a6b6e..8937c2c107c62 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1779,11 +1779,6 @@ def create_block_manager_from_blocks( return mgr -# We define this here so we can override it in tests.extension.test_numpy -def _extract_array(obj): - return extract_array(obj, extract_numpy=True) - - def create_block_manager_from_arrays( arrays, names: Index, @@ -1795,7 +1790,7 @@ def create_block_manager_from_arrays( # assert isinstance(axes, list) # assert all(isinstance(x, Index) for x in axes) - arrays = [_extract_array(x) for x in arrays] + arrays = [extract_array(x, extract_numpy=True) for x in arrays] try: blocks = _form_blocks(arrays, names, axes, consolidate) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 717287360df8f..ef01602be7654 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -34,6 +34,7 @@ from pandas.core.dtypes.missing import isna from pandas.core.base import NoNewAttributesMixin +from pandas.core.construction import extract_array if TYPE_CHECKING: from pandas import ( @@ -213,10 +214,7 @@ def _validate(data): # see _libs/lib.pyx for list of inferred types allowed_types = ["string", "empty", "bytes", "mixed", "mixed-integer"] - # TODO: avoid kludge for tests.extension.test_numpy - from pandas.core.internals.managers import _extract_array - - data = _extract_array(data) + data = extract_array(data) values = getattr(data, "categories", data) # categorical / normal diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index a680ae5cd695c..ed26bf6481bd9 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -23,32 +23,17 @@ ExtensionDtype, PandasDtype, ) -from pandas.core.dtypes.generic import ABCPandasArray import pandas as pd import pandas._testing as tm from pandas.core.arrays.numpy_ import PandasArray -from pandas.core.internals import ( - blocks, - managers, -) +from pandas.core.internals import blocks from pandas.tests.extension import base # TODO(ArrayManager) PandasArray pytestmark = td.skip_array_manager_not_yet_implemented -def _extract_array_patched(obj): - if isinstance(obj, (pd.Index, pd.Series)): - obj = obj._values - if isinstance(obj, ABCPandasArray): - # TODO for reasons unclear, we get here in a couple of tests - # with PandasArray._typ *not* patched - obj = obj.to_numpy() - - return obj - - def _can_hold_element_patched(obj, element) -> bool: if isinstance(element, PandasArray): element = element.to_numpy() @@ -98,7 +83,6 @@ def allow_in_pandas(monkeypatch): """ with monkeypatch.context() as m: m.setattr(PandasArray, "_typ", "extension") - m.setattr(managers, "_extract_array", _extract_array_patched) m.setattr(blocks, "can_hold_element", _can_hold_element_patched) m.setattr(tm.asserters, "assert_attr_equal", _assert_attr_equal) yield diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 38a6209283080..362252e1a6b72 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -1376,9 +1376,11 @@ def test_make_block_no_pandas_array(block_maker): # PandasArray, no dtype result = block_maker(arr, slice(len(arr)), ndim=arr.ndim) assert result.dtype.kind in ["i", "u"] - assert result.is_extension is False if block_maker is make_block: + # new_block requires caller to unwrap PandasArray + assert result.is_extension is False + # PandasArray, PandasDtype result = block_maker(arr, slice(len(arr)), dtype=arr.dtype, ndim=arr.ndim) assert result.dtype.kind in ["i", "u"] From 6a70067c24b6b5be604a5214b0641dcff058c838 Mon Sep 17 00:00:00 2001 From: attack68 <24256554+attack68@users.noreply.github.com> Date: Thu, 29 Jul 2021 01:01:04 +0200 Subject: [PATCH 10/20] ENH: `sparse_columns` and `sparse_index` added to `Styler.to_html` (#41946) --- doc/source/whatsnew/v1.4.0.rst | 1 + pandas/io/formats/style.py | 27 +++++++++++++++++++--- pandas/tests/io/formats/style/test_html.py | 22 ++++++++++++++++++ 3 files changed, 47 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 1ca9104d3adf3..e42360558d284 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -35,6 +35,7 @@ Other enhancements - Additional options added to :meth:`.Styler.bar` to control alignment and display, with keyword only arguments (:issue:`26070`, :issue:`36419`) - :meth:`Styler.bar` now validates the input argument ``width`` and ``height`` (:issue:`42511`) - :meth:`Series.ewm`, :meth:`DataFrame.ewm`, now support a ``method`` argument with a ``'table'`` option that performs the windowing operation over an entire :class:`DataFrame`. See :ref:`Window Overview ` for performance and functional benefits (:issue:`42273`) +- Added ``sparse_index`` and ``sparse_columns`` keyword arguments to :meth:`.Styler.to_html` (:issue:`41946`) - Added keyword argument ``environment`` to :meth:`.Styler.to_latex` also allowing a specific "longtable" entry with a separate jinja2 template (:issue:`41866`) - diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index c53a808148639..24d1fce2a4679 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -477,8 +477,8 @@ def to_latex( Defaults to ``pandas.options.styler.sparse.index`` value. sparse_columns : bool, optional Whether to sparsify the display of a hierarchical index. Setting to False - will display each explicit level element in a hierarchical key for each row. - Defaults to ``pandas.options.styler.sparse.columns`` value. + will display each explicit level element in a hierarchical key for each + column. Defaults to ``pandas.options.styler.sparse.columns`` value. multirow_align : {"c", "t", "b"} If sparsifying hierarchical MultiIndexes whether to align text centrally, at the top or bottom. @@ -822,6 +822,8 @@ def to_html( *, table_uuid: str | None = None, table_attributes: str | None = None, + sparse_index: bool | None = None, + sparse_columns: bool | None = None, encoding: str | None = None, doctype_html: bool = False, exclude_styles: bool = False, @@ -847,6 +849,18 @@ def to_html( ``
>`` If not given defaults to Styler's preexisting value. + sparse_index : bool, optional + Whether to sparsify the display of a hierarchical index. Setting to False + will display each explicit level element in a hierarchical key for each row. + Defaults to ``pandas.options.styler.sparse.index`` value. + + .. versionadded:: 1.4.0 + sparse_columns : bool, optional + Whether to sparsify the display of a hierarchical index. Setting to False + will display each explicit level element in a hierarchical key for each + column. Defaults to ``pandas.options.styler.sparse.columns`` value. + + .. versionadded:: 1.4.0 encoding : str, optional Character encoding setting for file output, and HTML meta tags, defaults to "utf-8" if None. @@ -873,8 +887,15 @@ def to_html( if table_attributes: self.set_table_attributes(table_attributes) + if sparse_index is None: + sparse_index = get_option("styler.sparse.index") + if sparse_columns is None: + sparse_columns = get_option("styler.sparse.columns") + # Build HTML string.. - html = self.render( + html = self._render_html( + sparse_index=sparse_index, + sparse_columns=sparse_columns, exclude_styles=exclude_styles, encoding=encoding if encoding else "utf-8", doctype_html=doctype_html, diff --git a/pandas/tests/io/formats/style/test_html.py b/pandas/tests/io/formats/style/test_html.py index 4e71cb4c46626..2657370bf8258 100644 --- a/pandas/tests/io/formats/style/test_html.py +++ b/pandas/tests/io/formats/style/test_html.py @@ -6,6 +6,7 @@ from pandas import ( DataFrame, MultiIndex, + option_context, ) jinja2 = pytest.importorskip("jinja2") @@ -429,3 +430,24 @@ def test_sticky_levels(styler_mi, index, columns): def test_sticky_raises(styler): with pytest.raises(ValueError, match="`axis` must be"): styler.set_sticky(axis="bad") + + +@pytest.mark.parametrize( + "sparse_index, sparse_columns", + [(True, True), (True, False), (False, True), (False, False)], +) +def test_sparse_options(sparse_index, sparse_columns): + cidx = MultiIndex.from_tuples([("Z", "a"), ("Z", "b"), ("Y", "c")]) + ridx = MultiIndex.from_tuples([("A", "a"), ("A", "b"), ("B", "c")]) + df = DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], index=ridx, columns=cidx) + styler = df.style + + default_html = styler.to_html() # defaults under pd.options to (True , True) + + with option_context( + "styler.sparse.index", sparse_index, "styler.sparse.columns", sparse_columns + ): + html1 = styler.to_html() + assert (html1 == default_html) is (sparse_index and sparse_columns) + html2 = styler.to_html(sparse_index=sparse_index, sparse_columns=sparse_columns) + assert html1 == html2 From 26e2cd9e2bf6e7f40f62e93f299c40449543ff71 Mon Sep 17 00:00:00 2001 From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> Date: Thu, 29 Jul 2021 08:10:47 -0400 Subject: [PATCH 11/20] TYP: Fix typing for searchsorted (#42788) --- pandas/core/reshape/tile.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 656d38a50f77f..03dc124807f09 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -4,6 +4,7 @@ from typing import ( Any, Callable, + Literal, ) import numpy as np @@ -417,12 +418,8 @@ def _bins_to_cuts( else: bins = unique_bins - side = "left" if right else "right" - # error: No overload variant of "searchsorted" of "ndarray" matches - # argument types "Any", "str" - ids = ensure_platform_int( - bins.searchsorted(x, side=side) # type: ignore[call-overload] - ) + side: Literal["left", "right"] = "left" if right else "right" + ids = ensure_platform_int(bins.searchsorted(x, side=side)) if include_lowest: ids[np.asarray(x) == bins[0]] = 1 From 05e0c248ba6786c2840d72b97fae69ffc506ed40 Mon Sep 17 00:00:00 2001 From: Mike Phung Date: Thu, 29 Jul 2021 13:26:49 -0700 Subject: [PATCH 12/20] DOC GH42756 Update documentation for pandas.DataFrame.drop to clarify tuples. (#42789) --- pandas/core/frame.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8b82021375a28..27aa2ed939c1a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4754,7 +4754,8 @@ def drop( Parameters ---------- labels : single label or list-like - Index or column labels to drop. + Index or column labels to drop. A tuple will be used as a single + label and not treated as a list-like. axis : {0 or 'index', 1 or 'columns'}, default 0 Whether to drop labels from the index (0 or 'index') or columns (1 or 'columns'). @@ -4845,6 +4846,17 @@ def drop( weight 1.0 0.8 length 0.3 0.2 + >>> df.drop(index=('falcon', 'weight')) + big small + lama speed 45.0 30.0 + weight 200.0 100.0 + length 1.5 1.0 + cow speed 30.0 20.0 + weight 250.0 150.0 + length 1.5 0.8 + falcon speed 320.0 250.0 + length 0.3 0.2 + >>> df.drop(index='cow', columns='small') big lama speed 45.0 From a5d951e48a7daae03812f667de209354e628fbfd Mon Sep 17 00:00:00 2001 From: Richard Shadrach <45562402+rhshadrach@users.noreply.github.com> Date: Thu, 29 Jul 2021 17:53:13 -0400 Subject: [PATCH 13/20] CI: Fix doctests (#42790) --- pandas/core/construction.py | 4 +--- pandas/core/indexes/base.py | 3 +++ pandas/tests/indexing/test_loc.py | 12 ++++++++++++ 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 387df6c6a6b70..89591f27e9092 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -405,9 +405,7 @@ def extract_array( For an ndarray-backed Series / Index a PandasArray is returned. >>> extract_array(pd.Series([1, 2, 3])) - - [1, 2, 3] - Length: 3, dtype: int64 + array([1, 2, 3]) To extract all the way down to the ndarray, pass ``extract_numpy=True``. diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 5f12a918c0520..54271f0f9b492 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5406,6 +5406,9 @@ def _get_indexer_strict(self, key, axis_name: str_t) -> tuple[Index, np.ndarray] self._raise_if_missing(keyarr, indexer, axis_name) keyarr = self.take(indexer) + if isinstance(key, Index): + # GH 42790 - Preserve name from an Index + keyarr.name = key.name if keyarr.dtype.kind in ["m", "M"]: # DTI/TDI.take can infer a freq in some cases when we dont want one if isinstance(key, list) or ( diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 9d52e8ab25306..6692a06c79d45 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -2432,6 +2432,18 @@ def test_loc_getitem_listlike_of_datetimelike_keys(self, to_period): with pytest.raises(KeyError, match="not in index"): ser.loc[keys] + def test_loc_named_index(self): + # GH 42790 + df = DataFrame( + [[1, 2], [4, 5], [7, 8]], + index=["cobra", "viper", "sidewinder"], + columns=["max_speed", "shield"], + ) + expected = df.iloc[:2] + expected.index.name = "foo" + result = df.loc[Index(["cobra", "viper"], name="foo")] + tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize( "columns, column_key, expected_columns", From ebaa9c1fc0dc5a511c98838718cfa41584b89cb8 Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin <37011898+mzeitlin11@users.noreply.github.com> Date: Thu, 29 Jul 2021 19:23:17 -0400 Subject: [PATCH 14/20] REGR: nanosecond timestamp comparisons to OOB datetimes (#42796) --- doc/source/whatsnew/v1.3.2.rst | 1 + pandas/_libs/tslibs/timestamps.pyx | 4 ++-- pandas/tests/scalar/timestamp/test_comparisons.py | 13 +++++++++++++ pandas/tests/series/methods/test_clip.py | 11 +++++++++++ 4 files changed, 27 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.3.2.rst b/doc/source/whatsnew/v1.3.2.rst index 94d62ae988f0c..3350d9469a9ee 100644 --- a/doc/source/whatsnew/v1.3.2.rst +++ b/doc/source/whatsnew/v1.3.2.rst @@ -19,6 +19,7 @@ Fixed regressions - Regression in :meth:`DataFrame.from_records` with empty records (:issue:`42456`) - Fixed regression in :meth:`DataFrame.shift` where TypeError occurred when shifting DataFrame created by concatenation of slices and fills with values (:issue:`42719`) - Regression in :meth:`DataFrame.agg` when the ``func`` argument returned lists and ``axis=1`` (:issue:`42727`) +- Fixed regression in comparisons between :class:`Timestamp` object and ``datetime64`` objects outside the implementation bounds for nanosecond ``datetime64`` (:issue:`42794`) - .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index e4e9df5176459..fa86b7d9899af 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -270,9 +270,9 @@ cdef class _Timestamp(ABCTimestamp): if op == Py_EQ: return False if op == Py_LE or op == Py_LT: - return other.year <= self.year + return self.year <= other.year if op == Py_GE or op == Py_GT: - return other.year >= self.year + return self.year >= other.year cdef bint _can_compare(self, datetime other): if self.tzinfo is not None: diff --git a/pandas/tests/scalar/timestamp/test_comparisons.py b/pandas/tests/scalar/timestamp/test_comparisons.py index 555067f2aba1a..ee36223eb2496 100644 --- a/pandas/tests/scalar/timestamp/test_comparisons.py +++ b/pandas/tests/scalar/timestamp/test_comparisons.py @@ -266,6 +266,19 @@ def test_timestamp_compare_oob_dt64(self): assert Timestamp.max < other + us # Note: numpy gets the reversed comparison wrong + # GH-42794 + other = datetime(9999, 9, 9) + assert Timestamp.min < other + assert other > Timestamp.min + assert Timestamp.max < other + assert other > Timestamp.max + + other = datetime(1, 1, 1) + assert Timestamp.max > other + assert other < Timestamp.max + assert Timestamp.min > other + assert other < Timestamp.min + def test_compare_zerodim_array(self): # GH#26916 ts = Timestamp.now() diff --git a/pandas/tests/series/methods/test_clip.py b/pandas/tests/series/methods/test_clip.py index e4803a9cd3038..620f529b522ae 100644 --- a/pandas/tests/series/methods/test_clip.py +++ b/pandas/tests/series/methods/test_clip.py @@ -1,3 +1,5 @@ +from datetime import datetime + import numpy as np import pytest @@ -128,6 +130,15 @@ def test_clip_with_datetimes(self): ) tm.assert_series_equal(result, expected) + def test_clip_with_timestamps_and_oob_datetimes(self): + # GH-42794 + ser = Series([datetime(1, 1, 1), datetime(9999, 9, 9)]) + + result = ser.clip(lower=Timestamp.min, upper=Timestamp.max) + expected = Series([Timestamp.min, Timestamp.max], dtype="object") + + tm.assert_series_equal(result, expected) + def test_clip_pos_args_deprecation(self): # https://github.com/pandas-dev/pandas/issues/41485 ser = Series([1, 2, 3]) From 84f3302860564fc80d7c7a45535b65ccaf4b53ce Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Thu, 29 Jul 2021 16:30:51 -0700 Subject: [PATCH 15/20] COMPAT: MPL 3.4.0 (#42803) --- pandas/plotting/_matplotlib/tools.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/plotting/_matplotlib/tools.py b/pandas/plotting/_matplotlib/tools.py index 9d509d02c2e4f..6d27cc8686b65 100644 --- a/pandas/plotting/_matplotlib/tools.py +++ b/pandas/plotting/_matplotlib/tools.py @@ -417,8 +417,12 @@ def handle_shared_axes( except IndexError: # if gridspec is used, ax.rowNum and ax.colNum may different # from layout shape. in this case, use last_row logic + if compat.mpl_ge_3_4_0(): + is_last_row = lambda x: x.get_subplotspec().is_last_row() + else: + is_last_row = lambda x: x.is_last_row() for ax in axarr: - if ax.is_last_row(): + if is_last_row(ax): continue if sharex or _has_externally_shared_axis(ax, "x"): _remove_labels_from_axis(ax.xaxis) From 76162a483cb7742f5100f34e5575f8ce43a9c893 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Fri, 30 Jul 2021 02:48:30 +0200 Subject: [PATCH 16/20] Delete duplicates and unused code from reshape tests (#42802) --- pandas/tests/reshape/concat/test_append.py | 1 - .../reshape/concat/test_append_common.py | 2 +- pandas/tests/reshape/concat/test_concat.py | 3 --- pandas/tests/reshape/concat/test_dataframe.py | 6 ++--- pandas/tests/reshape/concat/test_index.py | 22 +++++++++---------- pandas/tests/reshape/test_cut.py | 17 +++----------- 6 files changed, 18 insertions(+), 33 deletions(-) diff --git a/pandas/tests/reshape/concat/test_append.py b/pandas/tests/reshape/concat/test_append.py index 43fe72b0776ed..ea766089f880d 100644 --- a/pandas/tests/reshape/concat/test_append.py +++ b/pandas/tests/reshape/concat/test_append.py @@ -1,5 +1,4 @@ import datetime as dt -from datetime import datetime from itertools import combinations import dateutil diff --git a/pandas/tests/reshape/concat/test_append_common.py b/pandas/tests/reshape/concat/test_append_common.py index 9bd098a9e4e72..b8b254e786194 100644 --- a/pandas/tests/reshape/concat/test_append_common.py +++ b/pandas/tests/reshape/concat/test_append_common.py @@ -371,7 +371,7 @@ def test_concatlike_datetimetz_to_object(self, tz_aware_fixture): ) res = dti1.append(dti3) - # tm.assert_index_equal(res, exp) + tm.assert_index_equal(res, exp) dts1 = Series(dti1) dts3 = Series(dti3) diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 17a7089f0ac85..a4c9e333f4d9c 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -79,9 +79,6 @@ def test_concat_copy(self): assert b.values.base is not None def test_concat_with_group_keys(self): - df = DataFrame(np.random.randn(4, 3)) - df2 = DataFrame(np.random.randn(4, 4)) - # axis=0 df = DataFrame(np.random.randn(3, 4)) df2 = DataFrame(np.random.randn(4, 4)) diff --git a/pandas/tests/reshape/concat/test_dataframe.py b/pandas/tests/reshape/concat/test_dataframe.py index 460546f4b478a..dde8c0c19165f 100644 --- a/pandas/tests/reshape/concat/test_dataframe.py +++ b/pandas/tests/reshape/concat/test_dataframe.py @@ -15,9 +15,9 @@ class TestDataFrameConcat: def test_concat_multiple_frames_dtypes(self): # GH#2759 - A = DataFrame(data=np.ones((10, 2)), columns=["foo", "bar"], dtype=np.float64) - B = DataFrame(data=np.ones((10, 2)), dtype=np.float32) - results = concat((A, B), axis=1).dtypes + df1 = DataFrame(data=np.ones((10, 2)), columns=["foo", "bar"], dtype=np.float64) + df2 = DataFrame(data=np.ones((10, 2)), dtype=np.float32) + results = concat((df1, df2), axis=1).dtypes expected = Series( [np.dtype("float64")] * 2 + [np.dtype("float32")] * 2, index=["foo", "bar", 0, 1], diff --git a/pandas/tests/reshape/concat/test_index.py b/pandas/tests/reshape/concat/test_index.py index bd845f73c7c69..f8ad9d1084c53 100644 --- a/pandas/tests/reshape/concat/test_index.py +++ b/pandas/tests/reshape/concat/test_index.py @@ -96,18 +96,18 @@ def test_concat_rename_index(self): tm.assert_frame_equal(result, exp) assert result.index.names == exp.index.names - @pytest.mark.parametrize("test_series", [True, False]) - def test_concat_copy_index(self, test_series, axis): + def test_concat_copy_index_series(self, axis): # GH 29879 - if test_series: - ser = Series([1, 2]) - comb = concat([ser, ser], axis=axis, copy=True) - assert comb.index is not ser.index - else: - df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"]) - comb = concat([df, df], axis=axis, copy=True) - assert comb.index is not df.index - assert comb.columns is not df.columns + ser = Series([1, 2]) + comb = concat([ser, ser], axis=axis, copy=True) + assert comb.index is not ser.index + + def test_concat_copy_index_frame(self, axis): + # GH 29879 + df = DataFrame([[1, 2], [3, 4]], columns=["a", "b"]) + comb = concat([df, df], axis=axis, copy=True) + assert comb.index is not df.index + assert comb.columns is not df.columns def test_default_index(self): # is_series and ignore_index diff --git a/pandas/tests/reshape/test_cut.py b/pandas/tests/reshape/test_cut.py index 127be504e82d5..c221a3a18911e 100644 --- a/pandas/tests/reshape/test_cut.py +++ b/pandas/tests/reshape/test_cut.py @@ -32,8 +32,9 @@ def test_simple(): tm.assert_numpy_array_equal(result, expected, check_dtype=False) -def test_bins(): - data = np.array([0.2, 1.4, 2.5, 6.2, 9.7, 2.1]) +@pytest.mark.parametrize("func", [list, np.array]) +def test_bins(func): + data = func([0.2, 1.4, 2.5, 6.2, 9.7, 2.1]) result, bins = cut(data, 3, retbins=True) intervals = IntervalIndex.from_breaks(bins.round(3)) @@ -68,18 +69,6 @@ def test_no_right(): tm.assert_almost_equal(bins, np.array([0.2, 2.575, 4.95, 7.325, 9.7095])) -def test_array_like(): - data = [0.2, 1.4, 2.5, 6.2, 9.7, 2.1] - result, bins = cut(data, 3, retbins=True) - - intervals = IntervalIndex.from_breaks(bins.round(3)) - intervals = intervals.take([0, 0, 0, 1, 2, 0]) - expected = Categorical(intervals, ordered=True) - - tm.assert_categorical_equal(result, expected) - tm.assert_almost_equal(bins, np.array([0.1905, 3.36666667, 6.53333333, 9.7])) - - def test_bins_from_interval_index(): c = cut(range(5), 3) expected = c From 2c21525318ba24dc7c77e51ae490c5e76af3bea1 Mon Sep 17 00:00:00 2001 From: Thomas Li <47963215+lithomas1@users.noreply.github.com> Date: Fri, 30 Jul 2021 01:07:42 -0700 Subject: [PATCH 17/20] REGR: ValueError raised when both prefix and names are set to None (#42690) * REGR: ValueError raised when both prefix and names are set to None * Update readers.py * whitespace * Update v1.3.1.rst * Update v1.3.2.rst * Update readers.py * Update readers.py Co-authored-by: Jeff Reback --- doc/source/whatsnew/v1.3.2.rst | 1 + pandas/io/parsers/readers.py | 7 ++++++- .../tests/io/parser/common/test_common_basic.py | 17 +++++++++++++---- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v1.3.2.rst b/doc/source/whatsnew/v1.3.2.rst index 3350d9469a9ee..116bdd6e1d98f 100644 --- a/doc/source/whatsnew/v1.3.2.rst +++ b/doc/source/whatsnew/v1.3.2.rst @@ -19,6 +19,7 @@ Fixed regressions - Regression in :meth:`DataFrame.from_records` with empty records (:issue:`42456`) - Fixed regression in :meth:`DataFrame.shift` where TypeError occurred when shifting DataFrame created by concatenation of slices and fills with values (:issue:`42719`) - Regression in :meth:`DataFrame.agg` when the ``func`` argument returned lists and ``axis=1`` (:issue:`42727`) +- Fixed regression where :meth:`pandas.read_csv` raised a ``ValueError`` when parameters ``names`` and ``prefix`` were both set to None (:issue:`42387`) - Fixed regression in comparisons between :class:`Timestamp` object and ``datetime64`` objects outside the implementation bounds for nanosecond ``datetime64`` (:issue:`42794`) - diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 06bdbe3054a15..c639a4a9d494e 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -1302,7 +1302,12 @@ def _refine_defaults_read( if delimiter and (sep is not lib.no_default): raise ValueError("Specified a sep and a delimiter; you can only specify one.") - if names is not lib.no_default and prefix is not lib.no_default: + if ( + names is not None + and names is not lib.no_default + and prefix is not None + and prefix is not lib.no_default + ): raise ValueError("Specified named and prefix; you can only specify one.") kwds["names"] = None if names is lib.no_default else names diff --git a/pandas/tests/io/parser/common/test_common_basic.py b/pandas/tests/io/parser/common/test_common_basic.py index a1c76e2740dbe..b2e528aa5f8d5 100644 --- a/pandas/tests/io/parser/common/test_common_basic.py +++ b/pandas/tests/io/parser/common/test_common_basic.py @@ -764,15 +764,24 @@ def test_read_table_delim_whitespace_non_default_sep(all_parsers, delimiter): @pytest.mark.parametrize("func", ["read_csv", "read_table"]) -@pytest.mark.parametrize("prefix", [None, "x"]) -@pytest.mark.parametrize("names", [None, ["a"]]) -def test_names_and_prefix_not_lib_no_default(all_parsers, names, prefix, func): +def test_names_and_prefix_not_None_raises(all_parsers, func): # GH#39123 f = StringIO("a,b\n1,2") parser = all_parsers msg = "Specified named and prefix; you can only specify one." with pytest.raises(ValueError, match=msg): - getattr(parser, func)(f, names=names, prefix=prefix) + getattr(parser, func)(f, names=["a", "b"], prefix="x") + + +@pytest.mark.parametrize("func", ["read_csv", "read_table"]) +@pytest.mark.parametrize("prefix, names", [(None, ["x0", "x1"]), ("x", None)]) +def test_names_and_prefix_explicit_None(all_parsers, names, prefix, func): + # GH42387 + f = StringIO("a,b\n1,2") + expected = DataFrame({"x0": ["a", "1"], "x1": ["b", "2"]}) + parser = all_parsers + result = getattr(parser, func)(f, names=names, sep=",", prefix=prefix, header=None) + tm.assert_frame_equal(result, expected) def test_dict_keys_as_names(all_parsers): From 13100d818ae59ad05a3ea6dea792d66193705fd9 Mon Sep 17 00:00:00 2001 From: Leonardo Freua Date: Fri, 30 Jul 2021 13:28:39 -0300 Subject: [PATCH 18/20] TST: Add style.py to the doctest check --- ci/code_checks.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index f481ecf7a97ed..d04d0eaee6ec4 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -122,6 +122,7 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then pandas/io/sas/ \ pandas/io/sql.py \ pandas/io/formats/format.py \ + pandas/io/formats/style.py \ pandas/tseries/ RET=$(($RET + $?)) ; echo $MSG "DONE" From adc540f4806a1dda413805a0466b1b02c397facc Mon Sep 17 00:00:00 2001 From: Krishna Chivukula <63070026+KrishnaSai2020@users.noreply.github.com> Date: Fri, 30 Jul 2021 16:51:30 +0100 Subject: [PATCH 19/20] TST: fixed eng_formatter doctest for #42671 (#42705) --- ci/code_checks.sh | 1 + pandas/io/formats/format.py | 14 ++++++-------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 9f59958b4e827..f481ecf7a97ed 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -121,6 +121,7 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then pandas/io/parsers/ \ pandas/io/sas/ \ pandas/io/sql.py \ + pandas/io/formats/format.py \ pandas/tseries/ RET=$(($RET + $?)) ; echo $MSG "DONE" diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 83e0086958b9a..3fd3d84f90161 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -1956,16 +1956,14 @@ def __call__(self, num: int | float) -> str: """ Formats a number in engineering notation, appending a letter representing the power of 1000 of the original number. Some examples: - - >>> format_eng(0) # for self.accuracy = 0 + >>> format_eng = EngFormatter(accuracy=0, use_eng_prefix=True) + >>> format_eng(0) ' 0' - - >>> format_eng(1000000) # for self.accuracy = 1, - # self.use_eng_prefix = True + >>> format_eng = EngFormatter(accuracy=1, use_eng_prefix=True) + >>> format_eng(1_000_000) ' 1.0M' - - >>> format_eng("-1e-6") # for self.accuracy = 2 - # self.use_eng_prefix = False + >>> format_eng = EngFormatter(accuracy=2, use_eng_prefix=False) + >>> format_eng("-1e-6") '-1.00E-06' @param num: the value to represent From 4654dfca4b7a77e280ef60f8aed8f79f2bb4f96b Mon Sep 17 00:00:00 2001 From: Leonardo Freua Date: Fri, 30 Jul 2021 15:19:26 -0300 Subject: [PATCH 20/20] TST: Revert x and y position in some doctests * Updated the doctest of the hide_columns() --- pandas/io/formats/style.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 24d1fce2a4679..3d6705ed593d2 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -1870,11 +1870,11 @@ def hide_columns(self, subset: Subset | None = None) -> Styler: >>> midx = pd.MultiIndex.from_product([["x", "y"], ["a", "b", "c"]]) >>> df = pd.DataFrame(np.random.randn(6,6), index=midx, columns=midx) >>> df.style.format("{:.1f}").hide_columns() # doctest: +SKIP - a 0.1 0.0 0.4 1.3 0.6 -1.4 - x b 0.7 1.0 1.3 1.5 -0.0 -0.2 + x a 0.1 0.0 0.4 1.3 0.6 -1.4 + b 0.7 1.0 1.3 1.5 -0.0 -0.2 c 1.4 -0.8 1.6 -0.2 -0.4 -0.3 - a 0.4 1.0 -0.2 -0.8 -1.2 1.1 - y b -0.6 1.2 1.8 1.9 0.3 0.3 + y a 0.4 1.0 -0.2 -0.8 -1.2 1.1 + b -0.6 1.2 1.8 1.9 0.3 0.3 c 0.8 0.5 -0.3 1.2 2.2 -0.8 Hide specific columns but retain the column headers: @@ -1883,22 +1883,22 @@ def hide_columns(self, subset: Subset | None = None) -> Styler: ... # doctest: +SKIP x y b b - a 0.0 0.6 - x b 1.0 -0.0 + x a 0.0 0.6 + b 1.0 -0.0 c -0.8 -0.4 - a 1.0 -1.2 - y b 1.2 0.3 + y a 1.0 -1.2 + b 1.2 0.3 c 0.5 2.2 Hide specific columns and the column headers: >>> df.style.format("{:.1f}").hide_columns( ... subset=(slice(None), ["a", "c"])).hide_columns() # doctest: +SKIP - a 0.0 0.6 - x b 1.0 -0.0 + x a 0.0 0.6 + b 1.0 -0.0 c -0.8 -0.4 - a 1.0 -1.2 - y b 1.2 0.3 + y a 1.0 -1.2 + b 1.2 0.3 c 0.5 2.2 """ if subset is None: