From ce4cb0074bdfb26771ef78b881c8fbb3b62024b4 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sat, 20 Feb 2021 13:35:12 +0100 Subject: [PATCH 01/26] add a gradient map to background gradient --- pandas/io/formats/style.py | 74 ++++++++++++++++----------- pandas/tests/io/formats/test_style.py | 49 ++++++++++++++---- 2 files changed, 83 insertions(+), 40 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 877e146fd8681..70701b4fd0879 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -1308,21 +1308,27 @@ def background_gradient( text_color_threshold: float = 0.408, vmin: Optional[float] = None, vmax: Optional[float] = None, + gmap: Optional[Sequence] = None, ) -> Styler: """ Color the background in a gradient style. The background color is determined according - to the data in each column (optionally row). Requires matplotlib. + to the data in each column, row or frame, or by a given + gradient map. Requires matplotlib. Parameters ---------- cmap : str or colormap Matplotlib colormap. low : float - Compress the range by the low. + Compress the color range at the low end. This is a multiple of the data + range to extend below the minimum; sound values usually in [0, 0.5], + defaults to 0. high : float - Compress the range by the high. + Compress the color range at the high end. This is a multiple of the data + range to extend above the maximum; sound values usually in [0, 0.5], + defaults to 0. axis : {0 or 'index', 1 or 'columns', None}, default 0 Apply to each column (``axis=0`` or ``'index'``), to each row (``axis=1`` or ``'columns'``), or to the entire DataFrame at once @@ -1330,41 +1336,50 @@ def background_gradient( subset : IndexSlice A valid slice for ``data`` to limit the style application to. text_color_threshold : float or int - Luminance threshold for determining text color. Facilitates text - visibility across varying background colors. From 0 to 1. - 0 = all text is dark colored, 1 = all text is light colored. + Luminance threshold for determining text color in [0, 1]. Facilitates text + visibility across varying background colors. All text is dark if 0, and + light if 1, defaults to 0.408. .. versionadded:: 0.24.0 vmin : float, optional Minimum data value that corresponds to colormap minimum value. - When None (default): the minimum value of the data will be used. + If not specified the minimum value of the data will be used. .. versionadded:: 1.0.0 vmax : float, optional Maximum data value that corresponds to colormap maximum value. - When None (default): the maximum value of the data will be used. + If not specified the maximum value of the data will be used. .. versionadded:: 1.0.0 + gmap : array-like, optional + Gradient map for determining the background colors. If not supplied + will use the input data from rows, columns or frame. Must be an + identical shape for sampling columns, rows or DataFrame based on ``axis``. + + .. versionadded:: 1.3.0 + Returns ------- self : Styler - Raises - ------ - ValueError - If ``text_color_threshold`` is not a value from 0 to 1. - Notes ----- - Set ``text_color_threshold`` or tune ``low`` and ``high`` to keep the - text legible by not using the entire range of the color map. The range - of the data is extended by ``low * (x.max() - x.min())`` and ``high * - (x.max() - x.min())`` before normalizing. + When using ``low`` and ``high`` the range + of the data is extended at the low end effectively by + `data.min - low * data.range` and at the high end by + `data.max + high * data.range` before the colors are normalized and determined. + + If combining with ``vmin`` and ``vmax`` the `data.min`, `data.max` and + `data.range` are replaced by values according to the values derived from + ``vmin`` and ``vmax``. + + This method will preselect numeric columns and ignore non-numeric columns + unless a ``gmap`` is supplied in which case no preselection occurs. """ - if subset is None: + if subset is None and gmap is None: subset = self.data.select_dtypes(include=np.number).columns self.apply( @@ -1377,6 +1392,7 @@ def background_gradient( text_color_threshold=text_color_threshold, vmin=vmin, vmax=vmax, + gmap=gmap, ) return self @@ -1389,26 +1405,22 @@ def _background_gradient( text_color_threshold: float = 0.408, vmin: Optional[float] = None, vmax: Optional[float] = None, + gmap: Optional[Sequence] = None, ): """ - Color background in a range according to the data. + Color background in a range according to the data or a gradient map """ - if ( - not isinstance(text_color_threshold, (float, int)) - or not 0 <= text_color_threshold <= 1 - ): - msg = "`text_color_threshold` must be a value from 0 to 1." - raise ValueError(msg) - + if gmap is None: + gmap = s.to_numpy(dtype=float) + else: + gmap = np.asarray(gmap, dtype=float).reshape(s.shape) with _mpl(Styler.background_gradient) as (plt, colors): - smin = np.nanmin(s.to_numpy()) if vmin is None else vmin - smax = np.nanmax(s.to_numpy()) if vmax is None else vmax + smin = np.nanmin(gmap) if vmin is None else vmin + smax = np.nanmax(gmap) if vmax is None else vmax rng = smax - smin # extend lower / upper bounds, compresses color range norm = colors.Normalize(smin - (rng * low), smax + (rng * high)) - # matplotlib colors.Normalize modifies inplace? - # https://github.com/matplotlib/matplotlib/issues/5427 - rgbas = plt.cm.get_cmap(cmap)(norm(s.to_numpy(dtype=float))) + rgbas = plt.cm.get_cmap(cmap)(norm(gmap)) def relative_luminance(rgba) -> float: """ diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py index d134f33e15525..1aad3fae473d2 100644 --- a/pandas/tests/io/formats/test_style.py +++ b/pandas/tests/io/formats/test_style.py @@ -1966,15 +1966,6 @@ def test_text_color_threshold(self, c_map, expected): result = df.style.background_gradient(cmap=c_map)._compute().ctx assert result == expected - @pytest.mark.parametrize("text_color_threshold", [1.1, "1", -1, [2, 2]]) - def test_text_color_threshold_raises(self, text_color_threshold): - df = DataFrame([[1, 2], [2, 4]], columns=["A", "B"]) - msg = "`text_color_threshold` must be a value from 0 to 1." - with pytest.raises(ValueError, match=msg): - df.style.background_gradient( - text_color_threshold=text_color_threshold - )._compute() - @td.skip_if_no_mpl def test_background_gradient_axis(self): df = DataFrame([[1, 2], [2, 4]], columns=["A", "B"]) @@ -2017,6 +2008,46 @@ def test_background_gradient_int64(self): assert ctx2[(1, 0)] == ctx1[(1, 0)] assert ctx2[(2, 0)] == ctx1[(2, 0)] + @pytest.mark.parametrize( + "axis, gmap, expected", + [ + ( + 0, + [1, 2], + { + (0, 0): [("background-color", "#fff7fb"), ("color", "#000000")], + (1, 0): [("background-color", "#023858"), ("color", "#f1f1f1")], + (0, 1): [("background-color", "#fff7fb"), ("color", "#000000")], + (1, 1): [("background-color", "#023858"), ("color", "#f1f1f1")], + }, + ), + ( + 1, + [1, 2], + { + (0, 0): [("background-color", "#fff7fb"), ("color", "#000000")], + (1, 0): [("background-color", "#fff7fb"), ("color", "#000000")], + (0, 1): [("background-color", "#023858"), ("color", "#f1f1f1")], + (1, 1): [("background-color", "#023858"), ("color", "#f1f1f1")], + }, + ), + ( + None, + np.array([[2, 1], [1, 2]]), + { + (0, 0): [("background-color", "#023858"), ("color", "#f1f1f1")], + (1, 0): [("background-color", "#fff7fb"), ("color", "#000000")], + (0, 1): [("background-color", "#fff7fb"), ("color", "#000000")], + (1, 1): [("background-color", "#023858"), ("color", "#f1f1f1")], + }, + ), + ], + ) + def test_background_gradient_gmap(self, axis, gmap, expected): + df = DataFrame([[1, 2], [2, 1]]) + result = df.style.background_gradient(axis=axis, gmap=gmap)._compute().ctx + assert result == expected + def test_block_names(): # catch accidental removal of a block From d12d0afafec160ff6556a0a1ae2b34169d1d050d Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sat, 20 Feb 2021 15:28:37 +0100 Subject: [PATCH 02/26] whats new --- doc/source/whatsnew/v1.3.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 388c5dbf6a7ee..ffe2b867b9e41 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -67,6 +67,7 @@ Other enhancements - :meth:`.Styler.set_tooltips_class` and :meth:`.Styler.set_table_styles` amended to optionally allow certain css-string input arguments (:issue:`39564`) - :meth:`.Styler.apply` now more consistently accepts ndarray function returns, i.e. in all cases for ``axis`` is ``0, 1 or None`` (:issue:`39359`) - :meth:`.Styler.apply` and :meth:`.Styler.applymap` now raise errors if wrong format CSS is passed on render (:issue:`39660`) +- :meth:`.Styler.background_gradient` now allows the ability to supply a specific gradient map (:issue:`22727`) - :meth:`Series.loc.__getitem__` and :meth:`Series.loc.__setitem__` with :class:`MultiIndex` now raising helpful error message when indexer has too many dimensions (:issue:`35349`) - :meth:`pandas.read_stata` and :class:`StataReader` support reading data from compressed files. - Add support for parsing ``ISO 8601``-like timestamps with negative signs to :meth:`pandas.Timedelta` (:issue:`37172`) From ea7b5ee379bf7e7a5f809d32a711704b081143b4 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Mon, 22 Feb 2021 16:04:31 +0100 Subject: [PATCH 03/26] add examples --- doc/source/_static/style/bg_ax0.png | Bin 0 -> 13699 bytes doc/source/_static/style/bg_axNone.png | Bin 0 -> 14203 bytes doc/source/_static/style/bg_axNone_gmap.png | Bin 0 -> 13629 bytes .../_static/style/bg_axNone_lowhigh.png | Bin 0 -> 14041 bytes .../_static/style/bg_axNone_vminvmax.png | Bin 0 -> 13030 bytes doc/source/_static/style/bg_gmap.png | Bin 0 -> 12925 bytes pandas/io/formats/style.py | 66 ++++++++++++++++-- 7 files changed, 59 insertions(+), 7 deletions(-) create mode 100644 doc/source/_static/style/bg_ax0.png create mode 100644 doc/source/_static/style/bg_axNone.png create mode 100644 doc/source/_static/style/bg_axNone_gmap.png create mode 100644 doc/source/_static/style/bg_axNone_lowhigh.png create mode 100644 doc/source/_static/style/bg_axNone_vminvmax.png create mode 100644 doc/source/_static/style/bg_gmap.png diff --git a/doc/source/_static/style/bg_ax0.png b/doc/source/_static/style/bg_ax0.png new file mode 100644 index 0000000000000000000000000000000000000000..1767d34136a02482983199f701bde9d21eba35f8 GIT binary patch literal 13699 zcmZX)b95y^xBneYY}@upGO=@F+qP{_%!zH=nAo;$PCOI)mwE1e@4aii=Z~{?*RI;T zs=KTEbH4jTD9DK;z~aJ!fPf%KN&uBWKtL(KY!hh6uWvfCY#0y_*kVf&5d}#R5n=@= zdoxQLQxFh|h~yL~6%|R$(DSb8h{ahG3#aJ>iV;d_NJY!`d`sa4Ha1!_QwIofbQBZ? zF_UoESSfrJAUb$Z0PHa+j>@7VGchggYg}>q8efO^Lyq&#>$GpH-ByLV>ogL`GiKt9 zd>{pg#E~iNk1n0x@rrXUn-BpYxTC~~teQAJsd>Z#0jvA3*AIT!V`3L2b3WDnpC9_N z2?V))U?8x0{^)AjGmHj$W8iL5n10mETHu*;B*nSo>qEfRuQ9)zEk)B_&h0j=`=6 zA4*}jtr?C588VdOgK*Z*W*+W#kOCoXAD^4CpSI;R{X<-A$W&CYok1rjnLDkAZ2Bm- z`(R*L=@WXVmN{r4+lEn-;|@`JChQAa>TK*3z4q~FzjELevt`6iN#W*yh~6Kt@(N?7 z(`aN!r{N!yW^m+?RHSEP#E(H(b08~WC7pg02qCO?O(%=*#5$S9ID zW5jr$n2vqcwP;>c2Ibb-5k@@?hjw(!sw;Z`Oe0kA_Jq!)$V)~CWMfC;vXM3p0=^~= z-oa>@^nUvYfYC_*bIx*t=O%qni{-!-k?w+ZI6Sz3!<$l1vbDLm2_NwM+Ir)?GvB>M z7w%?Dld}CSz?Km8kbWp_l*_^&4CMY@N#+2-iR}T*!7YFS-GpIq_7(}A7|f9v;V=-^ z$b)Q%r!r>2}IxKvU;yf^X@aA0z&Tm>m zRKGwNgU$^=HX$GTsMA20deJ?Q%t)a7h0w!c$3!aPFik+xMcCuW4nd!V`V`R7;T(wx z$%EO2dGdV9kZD6=g$0(A}aInnjL zIq$k~;!OuW_EI=d_~6?GGWD+PZQM}5f#rySqQi)al;=k)uv2rW5Yyr&2cYJ$7yeZk zDw9$ns{mh$YLCDb?E?A&jptO@2yBB}#Jvj#X6aqHy;!|C+R=5R?1Uck>m*kZR<~9MRw-?; z+>m)(FCiF)X z2=?uN-O|iW-uS+ClcG5sI5laY!_fvPq#% zv`EE{PfID1%8-PVDv`QMsY>mneU^cxDrPt$8A=(C=caU3w91Msuu|A6qd!Bm!nj9u z6~?8jCY$=2*mNpC$rckI1sO%745f})woLk@X1XG7WQnZttdhmICgmo>bt+9$O{_JK zHQk1o29X9hS3Zl$_;$YNtDU#W$0+Ht*cpCgfRtYrO)S(Pl+1eHjY>2jG0B~=2| zwhGaT6V==W(^JMfbT62$F#h6_qT^~x6`qRaTK3ACLc)@6-7d-Zd`QIsF*2b-i9&5- zDP`3Kedj{6pkqFUA1N_v1-2g|tQwd-7o!b4DJs(b6WJ+SDf(4a)eveiYKhAk%k|4g%Vg~H>_Y4vwl%9pHoCU0 zt8VLlO^po`O+2QLrCss94ZdCPjYr%Q2dU}|MD7ai2Hg0r*seu9!`vUIoM%PHgDw>= zUKdq+lWsL`erE+I35QObN6D4tmA%btuESH)wu#!QJmXsvd^24;BI9I}g^LIArR`KB zjeXAJ6Z6(V>Zu&V8!O(qkI*9NeMXCb5}ztx_UF5&1YZ#!X)!l+;4{dM4#^; zNMKE19-!5M#BnIU1a1uDQ|MKvV@z8ye3HAmW4q6mR9+r%rE5a&nmbF;uxOlj@-rY(4OomdE8`Ky+9EPu* z*KD?1T3UjwR94H?CF>4tJ9eR`WG8Lme_&;e(;Ul+H6BbJk*(jZEpHAGT zxb>P>nO`nO_>1t=_BQ9zaR>fe8BAZu6*8%wwva&?7lqqbSa;98zn*@is-X(WdSE3- zkE$)UwZ^{d<3;y8*Ua1Ovk9~-usPqmaDCVoZ-l+ghPxeS9ly2BJ8C#3ivdl$t7X}a z%uagy;oNQEGGt@tGJJI|zA&rtyZIYW?KQ5`kkhv|GM-C!HJ_g!Ij_E-j8VE_ZoX5- zC%O|HQ%uvJote$A>quq-NpH-pREF~YfvSD+U8`N4T?4OK4@;O*m}Xe9*N;0pNagq%u9U-{th|I~3nmY%M)lv#)5?YU|qnZoIJmSo*ti z>{t1bg15}E4rv3$4Vj9RgXe_X?y&HFe54(%y2wY7Sl>ND^7Hy<T7Mm8|~UpKBZ#T;Km_-FRJ1UA6wTx87#( zUg0BPdq4F(dtFQKMEXl)C9v*u@s9rI_wS4KnO`#$`gh$fKKJi028^zDu`}q*J(8g4 z`wqfjAof-uc|ekN(7CC>WVn~7Od+_854^z7K{(u(OifUQ19-tzHIO&t7$*ovhTySL zZ#~EmZt&H9;lt0SXb8C55G@nncO(AEsV-GWkQLBgtiH|7i@mlsCRK?>chFNQ{n?}2 zP|>?r$w}V(avR7`t4|Pqt#5w`c#xRC2sYbNMZ-lyR)*Wy-j>nO#NNo1(Zkl^3ur+= zcs;nkOj}bILt+nG8#`xi4?fcW^x*!o|1~p_691=*i!~pqhO7dyh`p03@pncxMrKle zSYl#gUMCYXZY7}De-HmU;v=!DXUmT#Rv$2zS6i6ne3eZ zds$x#Wcue}0x&W&{ZH(#p}hZExfLuuOl>rPmbRvL&R;qBS-9AE|I`2f=lS1^|7WD; z|BeJOGymU_|Ht#+BYBzrE#Utw=)ca^e_Fpz7e6d7)Bl`$eprEK#jguReIp4JQt<%2 z^o7$=8Cm-{P-lu-{o!vciiWnR!WPj-5d#<$`w@dK!&b8-R;~4;)k|6-2-Sibvsq{{ zX_r8R4HO~eJ|R3AqCl)HwOs;Tp;Hb0hV?Db!!YKBXNTwHLenjkudd?)c9=`R{V~hk zY}<3k+$@LH9C3O>Di8c^7uo@W6N~{m@Y7PL#X*@93?DQ$ARL5>avSB#4#W%tp(?># zA>Sm91rZNGL%Rn5@(V_Z1fVHLS)^UeF0xG1xE2Pf{! zw8L=UEZSCp(Efz{2e)qwOr)NcO0F|@q8xSHWlgw(UA?T`IH)a@7pD+rWmiQp)sjZs zvF!U(y6==q=2+X;QdD|@7y{o#cV$BBjgjvqMqINw6&Ck!gfE+Q4D}qskz4tsab;`- z>*a5FelWd@Sq$uh*EN}5xcf>vLc3LDMw8^S|*j*q8A0locB6DF}LhEMR2O&?** zU!FKxKua<qk_1xjTMeNldMmIjfC#ii>$Um3NEr+l7jaV)2=ZCo z=1~bXM@yVV&Gal+C0w6OoUvI?lNRFF32uKYwHAQ>nfq}g;g**h6%`A)V>P2Dtw6}z zwDfmm^xF-C%w!&yAiaq};ES@mDco9zQp8(&7cdx@bFx6rB0EZpIv#$rHYuB3h8Arr zWUJb=CMcntQ0CLXMtr`eNJq}vRiHDB^1}=Tg&n~Q1ub~4ihM1A03|Jk6O~q}W3jSP zXl7{8roJ|Kj-FS$io>MIM7p9zq9IkHQfgTq`?p02^;UQ|Z}S>kVv3pO*rHhiqnq;@ zg9$kJEG|7A2LVhUhF?#0kpBcjXru2gtNkTR3k-Cc`yr5)o8{;@(VL!z)56 zSb^~78lGJGNon#@&3l;+=tM`@ni;;~2AY3(ntw2cLE`cWkAz4E#qHUzW3m7wKVX(6 zf0&`|2T%&vG%hjU@{&zC9JR0|^u%!DO5(V^$fc+pN&HMDDNMIt^pAAimzCqw$`}eo zhRIx`+W-cW5khVx_H+=FGrz(A46KUg1aBml7I(Rl9wM&YYpx)7tTVoDw`i_7VO#QMM=b_hZl68k4e^S zp~nXO(>KI{VYFt2p8msS#N&8K-!9FJ2!;zK{?U@yi>z(>Y+=s70N z7K4_ySC|!4bNKQ;VsKvy%hPz1&q@*dxtZ)%)l6Ls@SWri$!rzb2KVNA!X$+-7Hb!D zzz@lOU%sRfWt)?FX;MbY9}-`v`*~Of+$}57A1=+|Q=!Qsk0bqdt(vo+cGU6W2Ur3ip6wt@PA@CeFFvaiRQN)TIVbC@T;I~h2Q{>2R@5_iPwv4bG zYXec{WuU@&UxsiCFu}!3qirua(&rXN70<6EYlN(CV$kO!p1vYn$dkZ>y38UuU2dZq zW?~ROi%&nTP|9agHlCx|g0C1#50d`zVRcV=EI0l%b5K&@vyPJM@*U)Gqa%Tie$F@?C?5{SY*JApV!bOqJ3cRMwI&Ui7h z_qKhc!W(IJ94gf%oVS4=5~KXSyIg{2h5|}yLLs!(9^u=2BQfHSD;%)QZdJQ5lCsiR zB%Yc=KFQjN;`nlY8hT-3X+qYxv@N1nDv7aE7FR1p3x95DW-Sj-lnACOUc_Np!03%& zNiT3KKh`olo>4UM(GX%}PRMw_1uBAf4(E0txVxg|9Z%K*XK0B{COZ_!U3#B^J z%Dyi{Mz=#cxB%q2JAcn=j@}ywv!U^+lb%zQVhG#FVhPD$XLFkLj<$7`35fy(qurp_^=s z=Jf~LVXhpm7h$&AU4C?Q2+YG-0WGXxAB?W+VZ#jOvFv`lj=CK1^CzJ4vB%(tf$=D_KSzaSyO|&>=Moun- z`f$h@K6^--SN@?pcWKw!E^A~r`_({1SK1Z$6ry~y-Lx$+#iib3aLIKMdl};xco*@G zt!{;&nIWyW(_bQ8Lpbspw4OgTvtjsjfv(fEj8OL3DIhg|`*zXHN%46mGK$j`wG3htc?;L#}HBzm%%);*? zju~zvm7`=zPimwzA^YR|>`6%!j~Y=nk^XC@@iPvZhMVpyUhT&7R`d^kIL{(Td1vbs zK=+KG(vM%FDyq75P*LXmNIjw(7#xNZ+RHcxGcm=scD02TXv|fgxbeZw3onL#wYo*O zxNeV;%x5mmn*jK;)OMvN1aT;a!K%}v@bHPDI?#PYdo!G$!%$@Ra|jaK??gb}7xzxQ z*O0Vr9VgaYSDBmry<$w4Dtw)3vvUo2yxON7#>|{tj>sL>CkDsgX;IrDddLmjP@pH61w#KFHA` zT>7~iJE9_0PNm(H+6CNFHcQ@X?pNc{EoYTEctlJ=B+0>sE6&*qgco+Fi|@`>y;sr}U}Gmj3dy14~RUze5@Gp6CpmX5;msx1)!w zjp_#$q1a~?1E-vCZ?O~9OMvcR5z|_fhOoQRxDxP<3ye;Qa!!>V5uID15uy5S@TM60 zMIuNp+Qw{%l<$O151Su5Xj=!Cfvlv*#AwQbXc0fQltB_M?pYUycESZajTx~I4>lm< z{S*T`Qj+bcM^icU8=>=`yLl0ijd1@mJlgOi?(z}C^*}!CxlHcon67Yv%3-dIe2mD( zMuNrN?-D+nMBY&51yv0RX13s#%b3w%QR0g&Qrte@|r~0CM zvyTZ%0XwSi`5khDB&ZF9>;5tjW{)W55;^>}f~@q|fg)aPdAFRQykb9X^s@Js+gQ~I zYBQH{g1~3h>Xqt1$FS8W@SNNJ+z~k(<%&w=;)NF-F&X}?+m`rzG_4$3|Cl8H(B$#R zIJ97Fi-3_8u-$_C17*CY+lF*4j~_TP;uAioYM(ZfQ~rrxO*#*_%e|CjlQWlm55w7| zJTP8^(xGo1@$3kfRKuVBg!!}B;UDl+d80sX?*62He`;W1uePt_M_AHz7*bvPiY7TF z*e8qR;SJTdG5|*i;TK{DRmTY#<{B>sdL%J; z4(S9+-XFuvt1b%61U!I=EzM1Z``IE!BCtwWdlEt8Od)#Lu0}6?(h3dkNXKQN2N5Xx zk6>6g4ny_lQ{t|b3k-J=#10C8I;UKhMIDk=e!t6o^uG!O?Dz#p`@_X46Nc-Dh5!EW zC>fd7yjyjNWtX|WjLNHD048V!%C7p)8L4PRd#mDU5`T_k?hN}gsbVuylGx%YPWogZ z=mNsqqXT9No%F=0Fz-htqwDAqmlW&|wltUID851G@xtQLORe-&@u&Py+nyP?ye&jV z4A~%@4UqT?_5;8_s;zSN3=apRg2>0WoSdmTP$TIyGvw6Bo#`ahm^&@sw{W8mxA6*J z6qY>k%UDY0(M}s)!Ob_u$A;2Vc~g~E+Y{e<`Kl1Fn(BoYD?p%@UhwmhZYqUZLvh)E zeO4<;d);u8>nV28zs!E|8k~)=>H(B$JlV%m@Cq4l$-{_n6;(=Y6Fz~6VYwYO``pB6 z;$btd&n1Hz&Wh6Vp`xHnv3&Ihh+NH4U1;52WPT$HXEtk`!^F$G2J~ZmNU4J`_}ppl zyEcBnj5+q`2E|5Uf$@ato}J}RPZF6l`zSv9L7YKSy`f4xZXN3zi{o-GzRT`2CoDoO46Q>LQ-uYS8K3fzij`_YInw@Qh{j z7lV>($3aU9&w4kQZ)=`#zIy_(#ag+6gfwnMUdTE=4v~x^EN_%3TnAX)=T7ac$u$e zQPx?%?^8+K3&?$LIp_jP?z=v)d%LIRv+TtH@;;DaqnO{p>Q#vrFOq)|N{80H2r11@ z+RjVPzAxA-H^W&t7~9o3A#bY?<8(|lFQQ06!w zp5dOl41e}ELD1q6off~61UGPx7QUh2==~c#7p-pY6bo=a*+z~}B41IH zKS9}nIOxiNTKIq{L^sXe49zhR1NF9#?TE!gLX89}#Zlr!9*l$5-K@x!+ao zy1c7jf{echtpT>EfiYG4&svf_{R~DB%4WOk1a0a$+ycN(u#D3gbc#YLw#Z;|fl%$1 zTXu9=e3cuKVL6bGhmIiW=a%>(b^EZ5j2zL#llHI+p8Oq~t>SmHpqTbUo0`oBVAD$E z!W2cSuUU`m+QT7Y*QM3MsLkQ@&sG|Fo(0;W+v9{x-2En7`N3(sinWK^lvyY(cLh6O zM}(xC?ihe8`*S@sfG%Q;9WH}PT{*nmlip3QT8@nJBz=)S1DBLCY&k6rieNjQZ4N8k+lCvYyHZz6xer=ZvuZbn0V9(|#w7Ccy1nwE^RgUCP7 z#rZ;D7Sy-YFW|HQKvRFT2}af5P}z;{y-f%>B2{-h{*`&?sUw;`aGN$qDm_TiTX_u{ zXovS1RtftiaPTi2^uKTtA`YzovT{Rk0+tN>X8tpSAOB`BkGu*0kFOEG61i&Ju>P|{ zhBr+09u(w=-aqPc-Q{pj% z7izd#I<(xxMOpcVv^;$k8|b$M0N9~1j(5ybk5w-@os(m}$PS5mD!*x)$F~U1b2;?A zaA((~Xg5wlYF>;Gk(%qc8&5NW_KehTojYFYpDVsRgLSH=E_&o_i^OM=BzL8tMBQxD zs~{58q)0b&K4#js@hZR$e77-!Wt8w9)~qhnOJhHg8{ z$9D;=JZiX{eybjKT6b#mm{wd=(-ld3Rk`!|%=E&b4taKREn}p3zlXO08gKwq7u?#{ ze;ywcLo@u9|1P)`qq1p~$YgHx#{S)Dp?|hH$fWpeOc1sPJ0vDU^_SpJ*P#upLRGp9 z#X0j;Z!HyV-ZNDq*#Tw|ugbWg(bm*Dzs_KM-k{%w{xP|&pA=wX{t$=DyW$jW7|atN z7%_`f0{w65e8h5TSJ^`~wK;vEtzeQTOuGWENM0v);*!--M}KXmB4eaTeWw_bd`bsk zVCPRMukRm5(QbY{@}+cs%4bpY1^Q#on~f3@dTgphDCLOj^%J}A9t>IkaUFvR7Bin? z@8m;ysERsLGjl09=5}EWwT}e$W7GI7Ld1%g16gDH^o;7_?g+3DYPS}12ZZfgXBX?PKgz`e0*hF(LqVp z!{bN*z3F#f%p9DQZ>qh(zrVlnUTs5C@yi1vvZnnoZtu{Xmqr(A*R{=9ebpyqWo`a? zp7Tp^HieU(`UIV}(ba2oKs60D3{9dS5BNOK%HCPQUOwCT{o+pjxsA%@J2NI$s6s}_v}u%>m@YMjG0nAexwqDQ<1ox=mYoLrVgDt znYm`b|87i2ZoMRi{aM={;1ohD@vDvLBc|)7K8mN(PZ8B&Xsw!|S;=0+7T~~_v{xMc z^>m3jXDDOkV9;Wu)V?NnNlw|2I90#ey&^CtA{w!0$PZ?5+@)=x(;t4r7o zJ0GHqLw?=_tZNCy)X?#@=~#0 zGDENMelp(dfEm?=pcl{{F{+sMg$dW3#l=kdt6X47%LmJMvFD_Pq{X|ZJNbNw{c$(W z3SwLJNv2(?n4mm=-`Msvvqm)AGLrdZsiB=WIj4q*YgTM}`vScNRa z-pfGPLo-yi{<2*yPn-+1uUxp=_8Vc9j{A7le88wn#>~<;ziFHr4@oTF#45E$hC#d}TnIDBm{eAbhG0!Fbo^oa7`4B%;Cf;{A~uCG#R)SNHl0J#>OT zI2BNp>X}xXg5E#DnOBv?`Rfv>)^|vtrbv9ni-&DC zlhbXcf+{uJ+0*Av>o$cZpT2klD<+a-rU|J2!CX7B_jrdk?Y z(h$u%u-|E*{VazgI#xfv1p9naw%5yi>&R|Z1rX%|XoLsQ{!zp6S1M&^l9}^=tb+bU z1VzvY^8XR_;JyfADulQ8AFC(;za~V=#vAyLTNoiSQqxZTuNT-cd3lwboB6-G%nGAe zup4nFp~ATTlsjFn#^d(s} zjrGt$q#YTlgF|sc3A@sI5lU%r>Q;t@AC)+yt>D4jH;?T=18zL$yVYG9OW>`7~fpx)>gNgGhzMqesF`cOJ*N0I3idcNka@Wq6rLbc<+m<7sqairfGg&6!OB92 z`uMxvIbYyS+=`s5c&G$R;^lgom`^O_l}wjDPyD4HM?y!0l~pm7zBEpd;BGZuMZbn= zGs4lS>x#ARGs{rR^&Z&_`|tzq0ELSLbAP%7IA8w($@5O8NW7n?WADq1PQK+GVHmMY zFE!AIN}9H~fNodD=Vv+kv7GP?Tg6!m-u_TiUKEe_Rl{3{+Y|HVui(_s!01!#gT3FZ z2sTX!Axri%?U5oZ-PgV=x4mB<6Y3)q0aW1KtTmWqoj$?&ypQ2Vzgy!@r&TPZ9vCyk zP}ySg*PMo~v~Tr||0deq1z&`^AftR2wK3AE<3s{n0}ZBfu-mD7%O~8LdbcV$V~9He zz59Njrv@^(2XKoQ;;pG7C^sR9$;qa(-NPiyNZ64_RPgc3ck^Y5DWX&?nqmT{AM4zxG=e_^dn#T31YK8U#9I-v} zWU_`#r?-RSn8K0Ej$Qe}@^b8{Xi4Tg{SvsF;|BO|^L)R_&0_qmUXj|g z=8|+J_$V+vhzgJrR4;lPgL?WrGCEQICNl!%=g$0IZL)~{V=UwRB4Ya!$;0QAvuH9S zoFSspfu6QEZD=V!GU*Nv!D2sxAwMwbFI#G71fw>3YRQZL+c`^Cbgnok?(tqwKn*63 z)3~SwB|2IF?iM>eV9j5rJVu8hwYuSGh2Ka?v1q}EU^)afFb_4e(R#`Jx-;G!!obd<-Y`unhu_Tg zN6BCpt96Jxe>I+pvRAb|@S2kZD16{~%NV-AcYw&OnTVQKrj(DpS<)xW*dy^d3E}<< z(PP9&72)v!O=MO>V%x&X{$3aWu+rq@5*9Fi>EE*v=4!pXH|zPA=3K;)lH$CzO1)yM z#s2-#oj7#xL!I95Trt+ct7xGTlGr_;2$6D#BxteD_rrdoe%_?nommo?%%I!CVW5$?8qm0dVf=-K+ z<`pAbDg8JJxUX{X23xI%oa=I;B$z6X{yY#0CGOgp|HiW&snZuMZ>vBwj-zkE6vBtF znyxD0nr`{eaa4B%+L5PUx<*5hZvIXPDjDyYk z<$SX4x6SP0H%>{CdPnL+Fuvhk)(pP;Wr&EM9zkXdg`zX2*n&UZ_i{BE13m*-DW0Du z9m3wYpP4v;B5KLu?J${Z2I{hL9Y^x zEDzb>{ifkcTPdUNfGSb2>2V-L%6_or0Go zo2@6!2L~;q@Xy@|kEM=zT-A02Ug=t*`64A*Bm!8B+MZyt5-P z_r6tSeaGt%DOXks^R-um*3^!y)gh>mkn9;F1EPJ^1rcHM!^-qkCJdvJ{(4X+xi)9r zYnl_GE8IggpHGU~hm+rqwAIdDGpOjW{suS7*8O80nXA`uoyiR+rHfxP{#6(3HdGOb z>g|g!C=_DyI52YKj$Qa+{|PYO-6)^_9G>AEXmE|mxP|hM<`J)Y`B_)OR6}=R=Xg+R< z=9eu`}2nN?llFBVKxjMK&hYsbZHqWv;$6{ z5c|v61q{5_5rfU|2gU^^hnofJqFci|6KD?IxVB$LP!6XN^4BooEaZw(fN*7usjPTe zjtc0f(ex(#&%9b80N=h*ejHlFX*eiS=EDa_=$Sk9(L$BwB`5PI+wudp;Tp~#p9(xF zk(bc;i+;gB%h7`IUVCA@*5;7-1;AtVp8J48zrh!vF4iA_`f0Ip;N(ioe=sY5su;~L zzy@uz6&`U-S>3U{Q>v8_9ZSftBKQOS{SW1BZef;|l)b1cu}lK_yy~g@9_FgsR3W{A z1d~n67!vXlVKYfwq|ay4(BSuor&ja3#%1C(hcRtN=jt^Z55Xbi1+C8L$Kh>??q5In z;uAi&;Z`-3Tlng~*%$LpQE=REb8X~EaIPp&(`t>?-6~j-ePQL4jl~99NBEe|<$E`& zk<)c>MyJ%|AJ+hgOar^V6r8)^n3P`27L_vSJLvN*h!|Hm`g6wPJN}L^P!T;?5hfgJCldYuNlaj zGfprh955tgfZ_xj=JIlKjJSXMt|*b;zv^m@c<)uKQw>MNO7EBa#e4AupI)opAdk6Z zvtVD`xCn+9k~xUtvHE4-)jfh-35e%?Bh7m{0b`g|vqSwt)SEcsUUL~FNfBobE-)n2 zFCB}pXnDWI34##yOUB|ON0{0=gyxm_7dK8TYGWM5L*k2e@8a0l+!M)ei2v1dg{&Ct h=)`wPgmync>;*`1#e^lt{{0nIQdACDBW&>F{{gi+8|451 literal 0 HcmV?d00001 diff --git a/doc/source/_static/style/bg_axNone.png b/doc/source/_static/style/bg_axNone.png new file mode 100644 index 0000000000000000000000000000000000000000..8882c6f689773e9cf8a52bc479fb03b64f00e56e GIT binary patch literal 14203 zcmZv?b9AJ^xAz;{wr$(CGttDhZEIrNnb@8<>2P9qY-8dHZ{~N-d){-`z5Pe8-BtD7 zh4nmDpRT>5Rg|O=;qc%>WhB%YLVCWRCaKQ3Z9oKp;EjNWQ}7ufsiPYI{RS7hAxtlS3{NyaAia`un)%9NKoPL*23VImzPi7IfuFL zf2c*?wP!mQXDd)i55wF3wea$EfD#Vx{P^66|Fo~99R%=lpioo8b%!1w=WMqha2TTA z?SX+|XH6QMSm&dMZyCq_o^XmaFymU-)Z*Zx>~~B;@5o0`%~Oy%A%mX}k$l)^7ZAhB zqSelp&m=e^%jV7}t;)*7!D}4T*|-UkoR&fQqd(z{4uBjqQk{VXQ=Y?9Xa8toViHfC zF=2X0$-+77S+p#zfcEJA6-hG2Xbe0?T!N~9eu0VPt>E_1b213x^*Y+FV?fKqK z`X~>3+Vm~RAbTRT0|r3m7_U_z7|6rBn!-M!3&$gvlSdFGh8g4V>>V-!379hp;z2N+ zOMqK2Dhu?tej*-lJAX5&~qb@4XCF9noJPpehe>U3sRUtQH&_Kaq+4|EHjWSajr!21JJ*s11cC8@XjPe z6k%Lq{DpoMD0JcRV(zmuD*SGsU2uqEo<*!yXg=WnA*w96=@Dth_Nx$W!TLsqJQxO$ zt~+i#_|w5p{gh6WegqD|%>667>o+uSVEIy@7_gG!l|?^PxM;XFNa*m=g3t=NN^VsE z6>=KnRp3jpUD0@wJre#BrgIt`g!W-=(!M1_vkY#0KI}f+T^RbY4x&$mj>V(1>L>L$ za8j_QL5)K!EJ-*>7K~Kb>OU$+I82$09ZacMt{b%(5UWF5M|@068e~@yS2tIOR;lc; zJy7`L^G0@^Zh63S!I@A-ht2m~oFkn^osXTxJ771U1z_uAxJP~WO8yAEdHAvR5(Od& zhXuwIP|BdRz<7j-faNsgtB6#PxuH0qP{A}y$QC2b1>?vmkU5a2qu@tpL?K5x%2Fir zjiw`8VP7MleuZlddHG8uJ`?q{WEg0%V~nj+b5(Evho9 z5vdAAsRDiga7^qP8BNSh7)``Y>W5+XboZF{ssYV_Sb%cuHcB9^XyR9=yACRuFjml;P!x@qd<^!6$ z7#@8s`PA3IrZa>{H<<+~$tjx^sJ>g|DP;ZD$x_9ODOWU|RkMO@QExE@QtO!OV6SU_`5Y`Z(KcTNt(XMuq*i9akm$a7j zl}tCu-=hWi$~7eSsx{0*m6^)<%PmaMO&Co09g0t2rx{kwYUFArYs6?wS1MGgX%cF7 zR!LSJYZfe+pD^8H_`vo=3YC?Y9@SE5@K-I@b5+-s5S91p_sG5%L8%T&k&Bkdl<1kt zscSA6x|Udk9tkprq{n?LUKXu2sKvD>yo9;T1J(j(+z#Au_}2Ik`HW|tE!wXJzfB)D8yC)jLQ@Yu_7 zUvV9|9XJEbi>=Y@n`TN^4yr55tYfUxPW{GkEeI_fe?Ls=9GM@rGtgk=W9cyf z84Q@&wXymx#+vxkHRJ;)^U^od4Qpy@A-=_ZOIgldZd^WGCg+;x66N}3U$<&vr*GfB z>H!RBX>OWq;WvN!*^}ho{jLG zb5XPV+oR4S;H>yK`M_o4Fs-_>y1#YJePn9dK1DBse`0e|aHeNle1iOU$>M&}&o1iG z<^k7<$$8sQtqksw^%dWOCm8Xp0h2}6a=#jXuD|!s$^PPg^8UoHr?0SYd~e2q#GmgU z$Y3pCUZAzXB#EdU!Z*f=N-93H%=8WPA>UGKz0mBix>-B8!|v#*U=tGNSACv@JW3 zJIHT6o_j1@hOcj5My<{zmE<;aTE6kuU*ov|Tp&Bi`7b@c`6YbhzxsbN#p*|T_)nQ0 z>rZk|F;9PX=d`{8ku8K%-&oqIjgCb5`g!2L|PqWAK!}IIrb>@nPa)-YrERmX#kqMoJ=CzBy zCmH@ZmA*>wjQ1h5uUxES5Q-2?cfXL;^wPXm<`Q1gq}Mps3as2+`mqeP^0=t8vb03M zgw4^9I2bX$&pHaN)KOq%%V8q`l(3x;hE;-77W(9UJl<9B{n@3wp6eh`;$GgJ1tg${d_KuP!TqiV-7{2BIc|DHKrB0;=C~Foh z3$h0;^PckZ-uNBfMPt8VH#56_Ccc%trCn9C2l9Qcabxmw{?Wbhxth9azjd_TV)R`R zB;04vHOow)}vAzYEtjli)NF`aRX72?eqO+K)Z3v2n56*~zRa)9eX)B4;>zco!jg z|0?@i;GxnE>eJ>EL`WC%mXIHr<%?i*tu?gWv=tTjOdai+jLjTP%$dCGoxXq;1Vq4# z@5{6|cQYpOvbS?^z5#zm7ALrA2YM3rzevqJCmb}B{M58FE29-8#5al<5vqtS8oS5 zV=qPrSMvXI@_+nDn7f*~SUb5{J35g3^J{G4=h5xJKzl8$K|Hl8n=JQ{R^`FwO)e?deVE&(#7lO0Ejy(nep*xk85Y_Mkz0`;O zt_i^VTwmyjf)l{TYIcHU!-kMYR76lLXwQHzpP}R&WL)_^h~O7CQ-)A0eGrp{ak2p2 zi9rQt4pn(b`CUXcRRWchXlI17<+YdhnCL1#W3oq#?e0~7I?K_b_wSJ~`{!L=XGRtj z9o$YZc|Yn7%pw|C!lGJFw{AR$bPzcSNxy6qGQ^i%_ziW3LZkZTZh{w#08~lL55Wn7 z2dp&)gt|yKyg+OEOX`GHWrIMIMnkI#=2<%S)R^w9L7rRC;P0-j&}@q1o@$KV`jI-XOFTIH=wWrk&ahE!Z|C zSO;10Q+~9_4{m7i=Jm3q$r>v8<69_u)G4eeSE_Ezw;8UXQ{5j|IRaQUZWpM<{Dwym z=JL1Ib|**Hp`{ySr(Xgfnx?KCDEFjQalN})JlyfDSdqdE)h!|Xl=in}8epIw`ALad zP-)aD6`)=(QztB4O6f>hl`t>JN5J=+HLCU)G{yXK=@@@$msO3ajqCIxQnb5Q(5L_y z;4~Ta(dZV(FW?PYG+I;5X#bkG)#U;6%5RJKj;XOS>*)EF`Wbaq6@6hwjph*xj5HPO za_N}3y3p?E+^wI&t*hPCViw3D@zHPHhdC3pfsgctv8ML2*+!uzW#6Mroqog=Y3Fun zL@_Hee#4|u({PTquG)iR|E+l}izC~C6p!v^oza95$>z?lUT15&(A5qCZH0NAmxa{8=xcyJu#FY(IU1UWoEo#>>rzy4BERTwo{5~)SMl9EKS8)kuZWz8%GQeV8TD(A4M{AwMZXRnMqmK&}h>)4n79`M3!#vWu;(Ewy zLUJ&oAHB?mMnvV+4T?tslTetIe3q9_G|0ZQJGXRPnvJV1Qx#Y>RcK?@UeQ+uV$x^X zyJ+e2GjAbdG%g&+ngFLTguaq11%j0(#loR%VG#<+n_^fg%9<4kqz3nRryH;+YMcw7q+4XRwzj z<%Sl@y&ct!R1Up!-ST7^87H2)pHb*ms=0Ea%5qWyA{2s*&2-V$)_!1!3RITh$yB-w z| zNKKo=q3(}6SJn81^Eg0U*S_#Lq0Qry&zCMz7wyqQR)x@WU@TshC)Qo1_G|xfXRrmw z7vONdZ65x)zJVclxf~^zB25u?7kvj9XJ^yEB*d%?&`hbu;9jcd;h@##A!bGz>#!rG z@^R*{idnjZ-K-6mqEkYp>iixVR>rqhs!=)Y(ZmO_RJGSE0cNS=O>0<@sI0)b&%8_1 z9nWgA_!lLsX%Ew!PLfWM&Ii$ePb=36A@H&{QAWnDV1kDD(gh9Zy|Ka zyFUZcOwA!?xi4MRzf4Urd9T6&5O1keVV) z82(IZXfSbaPlyEN@NaDHAz(kZzUY!}8vl<(Hzhcmp!8@dwh^P0?fW?|r87u3K zLI(io{`KE5GR*p>19*3Y8Vw%d^JO8kr|gk20!34zN$N4B@&h**EJruL@eUr3gIq2a zal1FN)@^I3z!;;l^ibZu!97Uq1znO)%(h{QZz#M}5}FBarL&esdrvFbR~Nd;Qy{k< zrNUEOtP&zTt#>f0P3w~pB89AU4EGf+KGn*qCYgOVMMaXMs4Y|Nv4Y}bQ^9Kk%+u@0 ztmOp-Az?QdJ@P!aAq-){j@s<>%gVI-1|bC(hFcq5EAS-1iN75e7^xW=8ngUXPAzd! zX>#Gn(vcBJ4O=W=0;vNf@hp%+#^)=7xhPJ_F>kdoYW37mmdjGtdlt9`KU8+k8Q~psc<^&Pz*OL zz_LNq(TZ%4WxMgG8*hYVtxCB1d_-_aAgw4USaJc)?O9Ej;_~C*X9MYS!q{RK<{2Qp z7hyVd_7Gvprv5Hqg{(YklQf}akU4t%kk3|8 zIo(3uU!TbzrjQsWhwND9wWxtoYn3yi;%1zDgsYTFK$4#pFz=q$y(gWZ|44mvPTcGX z(!i^@9Ux>RC#}UZSCrtBeg6w~(7Z=O3H!czLJcM;gi2+%_wi$5eC$PRSz>`Z&E_7c z)VTwUQ+uLw+O0!SN@&b|k@0IwnNPfS2)JQpTVZG%b`sWspT-?GP5{8ZTj5f^Y@p)PK9hBF z7+~n^4Ag#Exj1)x@!9HHSl3qMOsag9)y}PpCfaNYjBm4D|L*Bz)&G0G!}=}$Kd2|5 z4e^{k8w$u-8Tq2O6Vk7j2N{Lv5XK3Dhyg4&jEG^Yo9m1GM3BF5&nM`H=8Fe!j8Ivg zDdE?tLZJV{heq@e_kHl}Bt`4(j_Xwpbz1PCE3(A4fU}v)Mo+_f;kYH+N>}wBL+rD% zl@*#m!s>d#ne{2u3i5xjNKQB~^#z<-5InF;EBVKlM=*=1|Lmmr9SbLjGy1PFoY~IY zdALHH!GO8vgq@7#&FhtWDp!)@xUlnp5d`CrsBp{5WJNyg6Yk7*^KR&84-E_;ud~aC zTclfD4fUFaV{z(u+rSe&kLlrPnLa^$G}|?4d+4)pA*dYRokTbaYn;D1aahRn*)wuG zm_L;D5%vGxeP{K))keVIg1h!lGp*QHLK4bbfXg?y&uWJ1^jy@jk$12%o}ye~YxFZ> zZ*g;sV08rSnRKG2t;8Gstv1OIYUd)6egRBfvTL(zrncw4{0U9LXH_*c6N1$j{1LSq zAOVAPgWn4S&8%g*e753K@SIh9$AX-rs^Wux;VBWaU zH=-oQ%_Cf>=JL)*?yO|t>7N3Y(3YPcVJ{G_exzq8?-5@Uir9F=GvqTz!k;4IaKQ*m z^#)fm5-*XfDwfF)9h_`F(@}N*LZolDc5T#xDr6@>;XmdT@IDrx>dpu&<)4N@bpz_w zHCC5XE{snk8+AQhXefI?$@GoFd$?C2^-tr|Q61L0*j8Ja@c6edM@`1_tINGM7 zw99V3M3z4Z2Fu|8;vBP}Z)y=NF6hP}|LTcC2Yq$EvO_w0cL()RF4I4v@#DPK3G;o+ zIQYAIhWgTf-~)vhmn|z+roC1t{Eg}tnE)F>-MDI^uhalr;BW>tkICWEZ9@rWVAk=52?C7MX2Q7Y zM;FCz!WR>?Ki|{RR&je|YNhwxi)d1!v=(=FP_$3uH1T|&5XeTPN$3Pr82E+Y*4yi9 zS}Z4%Y{(AIVz^5EhPuRMCi!D&4Gp32)`}>{CmhrE?WVnRDUi|xb*+PsMk?{aX!n<= zXnB@uSk*lJLt$-^c5_p(->fEkX1K1zZMNz&Z~ritEL?C?4p!yC4^2v1bm{WpJP zm4Y26<O=3)A9qj_z`VTax;4bfK=#SaXkH>5fG*v@?0=` zcV@ruVR1^7CO=P6@_*4tIml4dFs%G3wPWOFX$FR<9RzrQ5eGaXVE6}zZi744yyLop z6LHT{yvFJ-s!%x4irbHuz0|tcpM*yp0Qy@)wR@jU{W_>|uPF^fkqFnl$shJeTea$k zwa%>z&S-tF>tLhD{)K zTDG9EVxw>&o3I}dkMp968iVyfh8*-gZeJTqiWL)<;%?5pH5J6S0PFX`y%4->vxasuZ=J9cgHBbd%mDMLTf8C{ON=ss9ksq zJTx5k=vzZ8`O`NTIm3t);}3VD@>hgYV1r?_KKtnsbq3e|t2(5ZZX6>|#k&$f zVIgO~Nh(>SOYSjae5G5M5Q$V~87XI&9{#;kp$GeP2 z@av4IGmT9n`%p%yt3DTRAS&OCyr;soD`G3` zIAKXLGo7W~6A8r>|y*;7-xY!eJ_^QGHV{$uLsrs~yQ=Pj!T>HLAE4RD=V=tIaYwJF%|ke&Gi#;Ihv( zZ`>Dg{IQO*0kVnPfaz!O(s~?qd%6sUn%xIx{Y0!+A|K?yWTaf%ddoZMzr$}o23Y?(T1Ls9-k5&pxE^S_pyU|9**ts4@dIfY1YoXwq1=5pi48|mA3)ColZ1|Iuk@%P~Hn;{eG>#S|ki91QNF05R{n+mb>@m*Opgr-c~dY(ni1yyfwdAE(n;)h^4 zX3!}M(w}}*v}+sH$pIX+zX}kK=+uv!H@^`fj{Dlg;D-Zzm^x@?s5_q-(h%Cdc3A1| zWixu@M~(JhGYhS`Y-_mV8hS35%X;?o$OFITmhC?LdA+4QTI>pm?HE_;>cjQaFKK>L zkyfU%klON9Q<^_K+wh}Rtrsqbg`Ig!UODD1$((#>E}as*P-xV0k}k91QWx@V4s7tm z_ZJ`r+oa4E)CO-D{JTkYAn%v0aCky)k0-yG{jyJru1&T?;zc&4f>KmAuZSH8rFV*? zgHmS2w*lS31i@W#K?4bx_}mWP+Zh97YF_uJ<04BbmuyE~rKh#(`^8gV-?0kfHJXOlhS0Rm%li7RUnDdx!S$Sjt-dGDD?K7Er@ar5__YpopnKhdD7_PLjg~o^yW~PKWOb z*kIJIUK~VQH(hZvZ0RcPAM<=7DcgYj^s}U}mvfMFP%w7AfH@_#d!a1n8aO6po=p_=>Cy<%2kLi*4WG%kB`o-42eLUR`5}xkHvV34N?r#6k@!wi@ zek2LEOzw@F)fn6e7-)CuS%&u19|^?lO99LQ^*y@-EFYR z>%)IYU*}*uCqI^>x8=HCiCHGiB?-^*C%Fh=r;sHiD|vQ5rp6s6V_OVMsTarG3OhZt z<98?^EXwflid!K2YC84exhv^!k{Lh$85K4{rfmfw!$&IYg2UH+ULW9kf_LBbBs z00A9Ax@(6E zCK%ILlf=&z#}N8`LN6Kam}nm*D>ppscr)=OnB6ZgPo*YXwG*{Bb?KmW$G0UX1&NX! za;otUW^rjULoPeziaNzPC&DDZmeEL;eND`PN!?Z#9d32&8; z3!&~^+WFJ4f7#{zr3TGX`a_{0{Am;vzT^kNUvf>gDQ9ZT>bUBpD=uTGbJn(*TX8(I z78FD>#PBV8jqwE7F%fB=@eLGR)rh-M@!fC9NZWN++A4>-NeB*DMoYdI)nl1k;A79I zLZYlu*xh9Pso$&i#`_*n3=VzzxI(d7Eq8Ez;&SdBb#ns-hv$UcBv8&3<3?+Q;Kp=a z#vAYq@H^WE{Q|i;)VBtLQ?wfBvXGvv7jcwrdu}s-?=OmI^Urs4HuE6{?xr%v;>vL`aH7HQIgCaw&5%{qS1_zAB~7TsloteVwE6*;x-TXEwVk&*Jxo| zci--e=>CO;(8)2Ldh;}GFg4faLh#=4+04o}_uT?ns&19e532VUvE}t9AcIw}@gswM zFiBVx9k9=Tq+mhq{2j2;mX57$>7}vp4zIg_s#M=*DIydSF!*{)+#z}{McBP@Q>BHT z`CHuu-J5Q(f}UR{XL{$xoL*MW3=60opk6WE6?igM5Dt|7D8=Kw9o2|CBMXpq)%3P$skd=mr6sKu)deesC7~6vQ=f)l=l85!9 zgkQV@zNa4( z&`K|0GtXY8UM2jQjr7>oYb`f)Ve2$Kp2*lU`j`fXRF2bw1F|zok@o~;XuSSyk@0V| zA@+wh>I~f)KFodkqGG||Qf>~hOLAIcKoG6x730NLz5+c^rRgL5%G`` zcbByJZbzTITQUJn2*Vj)OJhgB!a8a;ywif1!W$0pg6EqmsMy4lYEMkxai@9wnNmK_ zDt=Kx^7vwXk3%ZQmME`S=5j>9zG$Xhp|J<5e`-bSDSd#A0Xtk;xw!12sCeOUA4S>) z)@+(ksIwsLv~NKvh)ZitH$(1Z4TSmprBm0;G9^I2KH;v|0{ljFL8v>#(v`MBZE;@W zuW-7;Rn1E%TSizVD+ZMO>#vC7bwi^Aj zpf&W5A|k#hV$x~9>>owY|Dy;w5f{#X;9>p`JP^1IV*a56+CT7sp!olj7~Plx_&ray z>%ALz!7x`&ggMWRqj1-Hh}v}RZj&wsebzlLBkt-?TIY72zWj>RlJ^^q+VsG8fD#11 z#C^q`Up7Cx9|(eVzfeh~))SOFh+L)bUy!=*wV*wpT;&{Q5earKureb({g&L_rsuWu zO9+(S2~L^aor>MrStYo-y0W@D3F^POq^Y&>gsL_!{jUqc(e@s>$yzo>bh6@ayKyhkoo*=E1J=hnOV4qrN!vZ zNN_h@HyW&o&Kwnj|2gYUd21#AeSJ*u>Ip-_BR(I<5!4Ne*XSv*NgxU58 zQ6p@02hiVim-T0^5${*)Ie1CWTHAJ}8*_Fm<^mz%f!9)h9?qY;{9*C~+^d77)V;qh zJ&g)6A*h8e!*1SoR|B63+8CjaARR#Wu@$YtNhkXN$$w)TN$hA1P7@REi$~|)RCj(w zVWTWrz(0OE!Bwxn!IxlPVZ^L`VbIilUKWK$%mec)DH$s|>Es|Qhb#QZ3*v~VHM#J@ z3n#6c+4u{q9!irZbk>^3aZzeYIevO2LY>07%C>O>kFu@=>klm85GbLIW&#zK$SXyR(R7gAHwo|qdK5| zmk-Xul1pE^mdV^ROs=u4#29LT4gWh9se1o_TMr(}mFr4*PlNJ3kn+znY2+Bi6$0VR z0#5BBz`(E<#b0h_XV@NRt+5fcOK2GfTR9*6JE3Z}Cj{(hJqV*uNdoxIX_GroqH72Y zU6xy$xRVzCWSp1HlXcIZgI_-zX&dq9#h`{NVF^iYrj>8j+A(o~M75Oy2avdowzi#Z z_9J?l{--^zQF`mRbFmRc*514uuS_E!i*qCab76g}1Dhq_0w?=h&veVR_Tj5DQYEx$ zn>~H(T)jj=<#9Q4S*E_bS&x)16JNJ}<}z`4KAuNOiEPMvtKEj_@i*M!4IYj>l5EZ| zYMisZzL=IQFhf2(XW6qrVsz=bw$w6c*j1AI^%4tyj|a z6E3*;5mR#%RCrXt?$`hfT|rEUTJq1nXp%l|r-&Y4-dc#{Pd|vim4qJ@+q73yUbe*F z^6x~M4p+wHQI|{^eEbbbyews#+eCUJsf3rhd-Ve`q<$7ww7^vN1c+)B(m6bhM{hlgr>fZ--ni2Nh&hILU>}~4mb6so+=$n&-;99gtTao$ z;^>%dq15(8Z_oXWs{*cIZBqUrrbX|=6fb&odEKfW(w3HzdXw~s6}yUujT|lOWjV0= z<+qFJUvV=|l!D>kAU=7w*I~W>e5AWAr|n7C=}3{Uw`DhrCbWEYiU@88$mR|pI518pgA$UJclJ%GsUKD1~u==fT|(Z4Vg;|Jp7!bVpIv{vk9a3g+c38MwX5vOPBV2YPe zHnyE>h{w27Vmqw~^Um5y3GeH|BVDJ)W4|m*SC@Lcd_Rp8eCKqpDw6nWXxI|MthlYU zl$m+nJrWJ{fT2pkQqM%$9oc}G*PO*!q`Cva%LP4oH4CXZvInf%5=#nAjh}ASo@r~| zDO7M0Tw9wxJ4>guUS1F4d&?3Cw}sY+Y#0W$oZ)8k&D)u|2I$*?-GW=I&Uj4v(4>Cn zc4Q=`MEPowV!rPFaLV3uqmD>uo;4BBz7seL^{hiRzl7AW_72v;gNI~K-qd({N$SVg zof2=dmo1N=Ta4a8eP-!_8_f6EKOJ}Ml%RUYu&a4dT*GuXahQ}6&Im7u_73F6?HIVL zTP@6rBNs9A>=^3kgDVA|6jEDTzmFQwOO73ZLF_p8NSas!i}>H15c}{+i*g-{gI1^q z#reMSzu~6c%V-$_5F1*%n|iApe^Zd&8uuoM-PO5V;NisI$F`%Q8O4w(XsphHyP8Ch zk&#rmOi6lkqTjtp8L21;^R#)zz)cT1aLSqo5C!|=Kq31KHm9vg+Iqs*ThNp}WI|?X z&kdn7xy2}!&%^e-ybNv6VE!6`C~oN5Zke;)6RI)+6j$C{U&XNV7fUoHb*fxuX)||+ z%vEoXJkd(wZ?E$v2TBM?p6h&oew>{8UWZ`@Hf(Z{=+C6t!u z(eF-J3Ce(l0P48(9@!6Q=q+8KQ?7jaCzEVmYE%OHURv0?L}K0YMO z=KKLgfK`VQsexaeMQZA3^{fSCm1aZ-`kdhrO!C7=Nd|6B;n=kcH6ql)U;8~=#C*F| zGv_vheYxY@$?nrQ1fna3CvL9F+ss_MLF6LY5x zBwv4%pF`Qwl|GnNB{segguh!N!dHnp2@O&#;U%hnlOp(eh^gWsGrJ4QRcV#8E_N88 zs~v;Z=g3r6cA$DI-!tk6z2j&|W2Ur|{7r-Y3X6dqRvMnbRiUy&JN^Tmx=jIF`@=u4 z*N0xIN~^6DnRNe-oD zSI}Q&`yW+rOM^HHOBbg9vnL7rtB+w8J^w}PpHTbPXHYB5ArJznH6|Z#&c5>q{UT1KtPGU%K9)+U;kOP#yKD$aK%<)V#+dN zV#LbM4(3+2W*{Ka5y>gg>gqC>p=X^_5eqY>md;a&6vLEqP%2h!`BtLy?CiAWW{wb2 z7^tYqlBVHsv9kE;;uzpTKjDr*anu)_n2Bj=U*n6@R|VR=A9Gx`U#EP5_M5-8T&Ivh zUNDoUl>#V0qz}#D0z36i6I5ma8xTK1a7T!d*mQ7wQuBy~ey;4jUOoC@k4m1G%=%RM ze|{J$BogHIf`Pz^a>r)(%1S&V>}At;5u<}(5JlKjP999Vp!Usy6^R7f9H%$*WZ&Tf zd%(AyMT0g`J_tbg(?>BJL5LMdoV+ljf)+@~Dj-YvIa9-G+4^ED#VPT2t6=a-N=hd0 zoPu5VKUBl+fEi8&8S<1;1MoI4<{s|$P(mTCAD^v9gg2O;}@s7%9xDb3=lv3=At zGK&40HfDTCO2_`wxnNOL2JP0-9!5O{kA8T^rZ4gELL*Y}_Kd-#!cRsg&d!O>V=HG8 zB>tK-a1Z<4vgupgJ*#O|N7v;z6KKEfzjVaUK{xc+(C9 zH{^E_sumE&ptBz!8&FTZ)M+40Js2Ly<|HtEA{gOtqhi0~F-<|z#W>^14nSW-dX+IS z;GKvG$%8pX`SN_qP-sJ9MO|m6mH7amZE%R9?)fa1XkOsHfhx>6DWS(|t8U^$YY7_bsz<@wRdoYY+E#I(4{Khg3y z3vZPN%VgEbeuFPYwMF1cbc*|mo6M@S6W9eeOL-Ue&(H&Sz1Y0C+A#E^>_wjP914bK z)J|%!;Ur;Ae%AFfGbdmpnKMvgsYRC$v70a&*_%)@U)5>TBUS`84SAUq*2=6Pu57OK zuTa`zxuNjIW)JN;-g1Lyfit2E518#aJB2w8I~_ZTwZLvb^TXCeat(X$72fc_x%sek z5&9zu1^Y+lQb?mT!ng$sgJss{C<~X70#NKxC}A4JWeSjH1F&V~N$tr}Q1BvB!;!-s zWXKbFM^hRD2Po_)lH(Nvz~8H~k@YX)HVzV9*aRSY%^Mhz-OZKL?ph{VsNI41p& zwN0T;vP{KJNJ}Y_&5(hTEs?!UsZ8yleUXQwDrPt&8B7^V;H7j`vCfJtuvXqIqyK|s z{p|tGRTP)5ifr<0U{k4rB%4eE6l4?)@|1e!+4AWVI_WC7ktGTyGpd%5jcScXYg9UB zI#{b7tNQgZ^&!>3W;n2@?K-3eW#vUY`|_- zsu~0ut-mFHA8X{!o1HM;V|c-Kh6xsz6dhGjs`LF`s^P4tE+j1J((jac&xca!mn0J@ zlrGdYkyX=}H*_g94>}TH2uz95ELak$GN{6_Be;OM$X=^jn+6;Juz6Q`5qXgTG;NA) zr)|k?!24P+POnj~um`AnBVW09@psmDJgC}$WYFcE+TJ{pp_!$43vDZIV{uMrmN8aq zW?Z%soL3wNE_)6?vjQtLyZY&(<%5dyVyj52s8odD%n>J?m`A9uqin^Ttcy`KWieY7C6@+GtX3|o|Qr*(w5*g6v7vsvkK+RuO9%#0h9V{(e!MOF5MS=&uWj2-~ zC-2Og%tqSoEgl~79{SJn!QSFV1C~WH2PGqCDsU9JPT8C+6>YVh^ zv_s6t=xrV%EVwV&0v~$HZ_2bl&Dd#At{M;c>y>wuubkA1l83pK(vy*bw3y~wE^g-9 zRMH;Bo!6ZD+)^>(E#hQ^0YA%hw&DzC4o{;qpp9sO`+eI>F@ z|8kB2RckD;+M)B~ul`xCxwrXeBWPzpQ@(fM+K?UIFlVbRZyU}UKCsn0YA7X(0bRGV zdC8v4UT*90%x(T6WPST0d}TJFFsp&X;*GE73fFni8M2j(@4{WvC+;KX)%TMzN*rL&{6|26Az5mOe^94q$fX?t5$ zoN0{C%l*e>K*ivABpnhOG8}Sj%5!of!5n|%)6DVQz}&i7wVA@9>|y(u1yTbtGJ%ue zoOZrmg5k}n)McD|tQUb@`9d|lV5mTf>$!}EhsKo>r_iDXo%*qsfBEiW^b*wa8u>l!AByNYWVGJMOqc|DHEqWVQO zsHBm*B*5ms#B<8SbM13@7lHML)xhNP8UI%JmV8;k=Fj`N%Jq$helCQ7{k(Ymhu~k~PrT$$@0}zt5Q>@EIR?0iOf#xPLQsK$Z6qgjX~{-cVwkA)FY3 zM@PI3ph9@TSNcQ`J{zMU;O|1dn~HN73rLVa3)f(U+xyd~g6X8t1BEGuuZe>lqmikDu^FR>o#PkK zf`IUQ@O~BT%m7Bj9(K0&F1#KBr2nzt{VM;fW+Em2j|sp=fK*#SnOMxh*^HQjk)4s5 zR1l7sn3&($)SOpUT=IY1zpeyGEdc;WUM41YcXvj2Hbw_$3nmsG9v&uURwh@QdTf3>{IRvu=y+TvDrX7(;$eF(C0bMgPj{{PkTe|!92 zo;v^6lZWkpJN~bR|8eAJ`ZxanHJ|@3)_-chR!b0$pXvXsydc~ftN;cG2z7;wxQMz3 z=!Gw$p1Q{0Pb^TJKCq94$deSy2+c^bsy0*O7PHujBW$}r8RLoHzvId_2M-oJyJ(Ga z&P@P0?0WTI*t#f67}PP}7OcbNZzu-mK)7^-B|^Ebu6UlN2rdy^j684*&aC_WU>qU1!7ieOfo@+r$uR1_YjKE{>s|N&vL4hEl&|6`%8F+x zK4>fmmY}swkEy6mI&7H+-8Ea`G8$p(xH`05nJnzq@$Bl?{mzHdNqR1RVgW%mao_v z=jL7`4Ut-z;S%tI>cRB&{JC-&HjZzpK-vt2r{=Y9*`?LGOO~ULg=l&WCpEhDyFf*& zGM%YhX{-L8j8FAM-K3R9Td@h#TDy4GG?!q?lJY8+&EWm7m{v5Q)CQEkrS;!rE0rcL zLXa;=NsvQ z24w)ZEA~rU5jkH*R*c_I$+@I^%|k2lelWw-)_7~wM)S=WBxdWl@JBJt#aZyK)s@b& zE{WoV^GwOx>J$&T<|H=&3v-bh-z)spRssVJxQz^c6c1sDIP8)s zGS`QQ;PJ)=o2%O<){fBg7i!K88I-3!7sur2ki#Ui8!$8m3i8TgFxt{Ptw$qz&~Kni zJv0V(oG4-^(jPXV$^<4LCPA3u@0)0P1^tadGu=g3F|3(ukvw3dP2Sa^En%tj(^I4?TIm&J%P>l%#+Y-Hk6W4p4~5`;k3`19mGb)CxVoyn zY?_egiZYBDm(49>YBE<%5-S_Ranzc4yxfv_Q-WSEM;GEN_YfHC!LQ}1D2+dSL_bSf z)HF+@o+)|=OF|=xi#(*_x#T7;wmWZ&KY^as`fHb|sZzP?Y+G<@Mee73EaAn3w9`psyL=JIY)a{u2{mPuCeV!0vRcuW4L)1PrtE{#YMx`KP<3Ts?b|79EXuu{4E6jFCW{D$ z6;|fNNnH)?N7&O*b9r2tT+E#wZVnUF;b7-tl|B#6NXNZEmG4{y%}f0F zE!QfQ$ewI#aJh2@qWQJqDL+-Y)2YO|6?omUS9&lJ?b6_>ib{0=B?X7gkz<*XXb{H; zQAG;qs8p3QimXJVfWVvIFVte00U<8sbE5L2SZaw&l2OGA{5dTGNBinlB`vM4ntfWA zNp_MV@k673Q8F=Qr)Q3Y`u~g{?hQDrR)cQ`7kZo)0lqJl8Mb= z$IdA<)*2rY$1d30MNC{JywOMwIP>JNiMJqE(8!UC`st1$6}2P5HmsQS%UniEL>~eV zuPWE4@R@jCQhKc{7NY7~H>(o4`oNDP-+3-bQMCSPA5#-oafLBW;${eFuM=r0VXIA8 zj^kj7qt-bIdbGsE(_JdU!3-eOa|m0N2an}OQJRqjCjIirvNP%y9SCTrDz3rC=(kh% z8HhE}k-@Zx`MO|Z8}ndV=WKK{2Q2LwL?rPaxx(T&s!J_=_dCI9a^IT7OkXLVs%-$Z--5tJ659&fLVa49-L)NLl{ZD z)?5H6i?oorsNWhq3@lOH1yWPYs-nip#tch!GU!>L>yN@k0a;#RNKgC07U}X_ zt(b1<+7D|~BudlDLX9t03Bsnf50Iwk4VVx(h3RewVYw?MJBX#APILmGKr{OMQgZC-Odl{EXLM}#fGB7b2Hl10Y-kLP} zPH0Qsi2!pJ4EezUe_qnXsz12lrPY$mXdp+uD2y*`<8v9JQTKF&RHCwB3miKh5+VscU0>oA6483j= z-&NXW2xD3Hs*D&GY_>2vLB!(IvE)ETPdKlpgHxs@w*xwqf|MnxIP_3)?4vl59)fpf zUkOyz*eNll7^om^Hct5I`HjXhQ0lhXY1*DN5)ySG@V4Ug@W07%q*&KC7F9U$-U)kg z10-xba5yz_!<}M7J$W&rMp|ME)TpVzO}u6+ zK8$PzW2~R4m+N6y^(?&>relh6GV9c6o{@_;-X$>>tas+ak{c!ZRCUK(Tu_EJV>Y4l z6XaXwdA-FGMLQdJjd2@Fh?Rp2atu>U_2w~jAvuA&Q89T~2;u42f$w6CYj`z za+|N`lnc@BgBA$t!U?gLaI>~Syq#GT?m%l8V7uw4g zxF%Y+8$6DVM)Zpnru;+yLAEt$P`~4OLGC*PZZLdMyf1320}ZC)0mBb?6!{6C``DZQ z)c0k$LfjJ)he*sn`c-a32GjJ9>6_rV?)guMtdO6#? z!CAagM4{Z?wdngHXenE2{?RynAH;vuND%my5edNfu6})?;!a;!%@-p|<%yx}^p%SJ zIL>zdGSPd!=J*#L00Wk-^&EeeaclEuNsbqSVT(rUC)tcS3_x(#gg5Sitz+l#IP7PR z-}NorU~Gr8Q_rC%wBt_29wy=zz=y7b@~UZLQK$vv|%c z){!%xIO>wcyMO8bPDqPaHH}i#`R4m$(2^ap(y4e$xtDMAQRX?9qzm3DEHq+uMyT*n z`Y;km_8lmrK^VYf<3}m^+YZ^=1%j@htF2=hlQiemc7UB<#YS5hg(Jia(9|btW{Kf_ zvdN&VF4?eNC{iG4t}&TEEGs@nQI`)UKB!WZBkpO{@$Vpy_`V7JGI)WJv;Ty@d?#R& zfc|yp7klVZLP{>Ewy3N%6{(W;k&RTm9Jd3px{d39)h#Y!6Fx_FbO|h<6kBnBvX=Xr zKM<7I82!Z*$NAWSa8M|b(&UP7R{PnpUmtk4=<4w@RZh*)6^(rP2J_T?U!@H!Jh9_= z{OfiZM&kqKXvM^CMYrQc2D7-2wO|odheCVD9XyA;TX)9LvIJG|7g%R{Jb= zZ*fR&l~hJV{Haut2{BqOa;Q?=dcOaM|Hz-MZ!&6k=dmK)*0;8nJdOLwAc3U9)u z)dbK)YTv-9=*3U{b5N?_?G^FD&sP!_j;dxln+ew5aYiVrhRXqvTbP8p*fdj2{q@k& zJ9;%C$94iUXPlW-5}RhWA2UR`0`6Qs_Om}WI&@|Tc|-N)ym*5R7W)D?H7BF!IkujW zH-t7q?ad@x3`ogZafxR4x{5QD;?vwdE!p(^|Kbx$qV?DtpmY$55YPzQe=3r;p%T76 zsTzY=d-J4kUE>%TX~eSA_{f(c69>pnZ`6ZDYA(4f`-Kccj1y+^S9e6t9w>} z57#DOhanK>BfKHI=>5#Mr{9>FckjMKElEJ+zm3SU3Z;idJHV-^S`LI4Sz!ZNkf}d0 z-uuZ|*v{x_$KY=wpF}r+d(1u2r9wv$#%TP0YgTD4mO_+>*Tj6o(6zUG3{#0Eu6hgg zZnL3GQn%Up+$bpleTisuU6S#S97vwa7}2Wi)3s<4$d$q}qfP^O#;EDDS_><&rIW)O z*PDquwbcbVa|pq88NopD^GfLPI*W35@MUzR`cksLh@7*em zGEzo%m*3TWli*EVecNwJci4Zb>4xo&H}jZ0-<)rjQPN@? zoJ9PHoQN%;)3)?&G-l%GnhsjmpPs#;fm{seI(eMr)P84EUv=9^iIyug>pAK%MHOjlQ_3+TPS~b$Q%xT7meoiI?*>a-_9>Qb*EEDVm3}+^0D#?>s3|} z72U(Q?c$AoKo=D$fZz$a%5&JRgW-JX&ukGi+km7u46FUvTFB-)BGQiuHVRV_jC|oJWd(1_?W=BAE;J{7F~#gk4Fz|8;Of@@5r{_>nPke4_+m zvgu-UUGjUY?ZqPu;qz@D|;mc)nryz586bmBg=5yC*}Wm z!F-@x4Z~Fb>&mHLteZd&KQuJfXp#flZkR_lrNinhK#XltI)w^3YmMfXn*&_+l&4UJ zh-sIP_dlGV_4yR5<6%KBx4I%`3T@0Fnk>pmLRrvd9G{3@zbCVR98dUbIBDWbVv~xC z9AmnWU9a)ItfmZ&?$sDePx{)U#3Af@5n(uHW?3`AAtE77P!TtLdWYoTYL`_W06|y( zC#v|gFJ+7`*?!T&2CZoD{uT=o*=Eqf(J-0}T2R)VrL3x{73_zVSDQOVCr7_Vg*O)L zFRGj%0eX|>2uy7|Bjfq@M#hKS-*{3F}8 z^wJ?ed`!Vy9|pUfJ1Q4Eo0?=#c}ILaHRyS`6G7^l4)yc?*XlaJ zXG6~1uS$lYC9R{m1bXZ(L0d`8y&kI+P@gt>5EB2=VaWN#Kq-^N*v7J=A19|<&#cctTIP>N71nl_D$Om z<>68?aXO;~hsp-? z)_KDjFCNTe8zS}=fjdJ4CdvvQt7$uB1OlOcbTcurs;+Ct%2kM<9hAy`D0;Wsfycdo z#06jC=VvtKu-OYJyMzR_K}G96%k7!Mu?J|X`SF2E+;{8SmEuz_xTx}o1Wzs>nT3KyO&yQQIx(L*d-yrN~94HJpGXaGhX!yTx@G7q*+Y0#b)z$Pt2N+56TE5q;U5< zVrC>vem&XKR-CV4HMk$m#5RcxWA-3EdaotKxgQ!fD^ce@93g>dbCp`Kr|>0bRZ=S9 z%u>)|-pDAc5Ms0^2owCPkw($XhVuDgT2^LvMDz~RU7qo?4fNxApN}n`#fwbO&?w|` zs2drnn5E4XrIQ0(vt0L{u?-7B@*zCAMNtmFEc9%b!jWvUpM_6Ci%3K~u7CA-F_ z)NP6V%`D}!*^M|%Ex%;ey6fLd_A4)$$}S}`3#LkR-H)43eunJCUnj^#;ng*@>)4-L zyq(GK&ZisM#ay3G>^lbon^6U0?-yFfsYBxW-34u>aTz^$pC+6yxxw^d0?c7^zqIZt z+JB7t#mar9NlGXdab%7Sq&*?ile`T+diVi~{{+!&pFnPJ{D91Fpo~8DOX-LEdoTcJ zh)RPdcV`0!+(-K?ulEuhITxr~7r)InliR1sfIlT)#|ckV3?};-agcC?yI*?i8v2*_ z8oJ#U)>nT&zIdGo9)tfMt~XMB0dynPS_jh?Soi#+ZA@y9;4j#=RQgBw5}6-Sos|Wjqor`RjhwTVy8LP_&aaj{ObDn!R8a&EI_V z<~Gmp)BRHAIT4g6S1fZ!1z9D?od3yeubD^W;r_LBzZt*0+F`0YuXWotb71w}3+a3) zgTlvIQ$b`zR4i&TcwAh86`M%>G zHog;1p=QZlTeOONT+l4^?0(JsZenI29eSaPE*Avw%{RRX)YE#@x7Tpj_k^0=ee3tN ztrMS@tv1MV1=0T6f~C44V(_uILm~@{#e61VhFD)|w0tS8>$U zNvTyrHg1T4KvA95C6bh^CO+rca>38&#KS8+BrL4C=o7XN4e+t>5(G2DBI6F8Z3!TL z>*P0C4Kvfx8KUu$M-nV%0cOuN)17!&N{+UA7nhD7+hHCiJ*FM+Ev5HAPpE;c^-E@9 zQW}Y;xNYAEM7WO5l$)WC!pE_jShH8mqG_S5aRf&Bt?fu?4L#spAy>;d6AA2zgrY3q zS~HZrLZZj~qj~Eq&qy;n&UrxXfnXX@1|pNd)iRIRtTs=C(BUA(60U;u(YLXA`qGQH zjCjFtb`YN74xYn<$!i;0vy|Nel)ecey1?n0(s+&?3hMThDC&2#qTKS}E(e;FcIY22 z;uJVmTw(R}&q$fEML|^YAC63>Rksr1HOR6do|xcw&MJ$B$o@W_S?h z6G=gYd~bKat5?Vax?xkarfuJ9wb8;FFgr)Hn5j5VFB1$)AHO4&Kh_@Tm1nQUKGGVd zQMTEfl|B`AM3g=u-l2gap2{oU<|mQ=Eg*U>Gm0m|elS>}4|Gy0s0^1ka^V*#MWA&i zV*7fdiwo7}8%u~J_hAl=hspS-s8m-Urq!Vod^&Y!59cfHgf{>MNAE6xaY4^dilB4aHoaR8yY=Rx%htsvnERz;RA>XxjLWO&YVeeKX zhW?OGsC{th3F{DHKdo8T1+HY3uQz>j3Z;)Lhz6gKIDr72zCTddH*yeK+s50)&(~XR zvsSA)KAm7clil(p5Qm&Tnp3K{{!@WU2_|T#@HC_)LpJM`5~*{{8HJS`ge7JR(yB7+ zjt5S6wN9@kb^~S_Xpuj~4AwajB^|>EwBhwet5Ml|ic3Isz10#tn~~x7*oGddIUxqJ zvYJ%AyAGH|f5#CzRb3qH1DA70N@c-J!H;f(td`B4D$&{5j8Q(iu!eoYhfq^ltj%Pz&T%Ct zALQTbEgKj})bD?uy(jHF$fGo!f=<@U+)mfU9%Rih zQ<}S?QTIPG_1eF(5;tfobFPkc9d~9=OZg5%PZ;M1?D}CvYM+T3k+ix`fwGFHit1eP zh7cj@hPp{J;~I?p*6(Dmg!VV&*|<%5hU~77bok1r-5QfemE61^NC+SBLedPtC8`QhGyW$xh8S*iDaz5+v`yaSWDuoT+Qq9|GmEF50g%{VX091LGLLH;BzN@{`Dl9pmB1$4KS3z zF*%O-$^cCf1+vH1L&VslW6pDNv}me{$7D*jEC* z2;;bl_)4}z9^Z%`#KHLJ^K?)}gyikk3_^>&*&LAuY8 zrE|;wSk0$z$hspVe%OU@wMHASfyKJ6Cbq;{y)$w$$~|YoYhE%PmdiCe4j(UIMj3m6 z7W$oP{;ahD&qe-@X0(3{C;$5PHc;zC?|h05xUOw7aOZuLxbs4YQ!1ydq7o1(CmAgr zDc)1xAERNj&R+`Ox>Sn?rp67M^Z%y`!L8HzDi>$wN$WmoEPwjJEDMGWhdJcE9$)nd zaHYZlT%TC1FZF+!Xwl8904N;nu#i9R72a-}O;&jJtW5pEHj-K}x|KN%KfZPSy=&+G z)j}9%peWhv@7*(b!3sCM5j!e>t@nb>=Z_x4rUiC016?s2q<74nT5USzw*k7V7W?(; zo$zC%fA8=n?HBOdyy)=mJlZSbTJaL5$@@Z;c85PfpI4?v9HEG|4=?!D#xad1&8Ic= zy?jgE^g(j`K_+bbCr0GVs8@udCoJD>5u^2Z9hpH%m~ohPZ!m#>M%008-O(*;w*1l~ zv(a)!C}93(GmY};i~}n-heOx58-VRHc|RjmC4L?ak!7qy9*!zn>KJNx%702)5+jU)KfPd_MN zL6oqTJ5k}$BZV?(-gg{}_D~fY!_myZNlgjx5)+grZ(DnQzdxs8fdg8#E?#IK6}F89 zAci&TflSNAJ7+{-PD($%#YSg^z=NV+%guRo#lE4#IlI(y1?nAJLXmMVjNW_ynplnF zV(^hau1mgnI^+rWw_;hkh$|H*R5W0BP<_4lu4coOu((p^VT?WhijnV*Z7%pt{`XEP z46%i0;S;WkswzzXHAFB}CG>pFZ^c8Bt2xUohH~NES@p_j@&Hk=J;yn2am`5)Bc53o z15wkHJMO{zNi|(mZ#}xXxY7_*-_(QtKWm_sZ;DY3>RmYou?I%;%zMiDWie(QE#KNY z8w1LZAKr)ZJQOHp$K+~xPi)BX$i2R_CT0ef$U_$cTB(x;zM{v)zZwX@SP7Nt^;jxf^QXpjTF^5aVhf(tW@|oa*}iZx-+vB zfXp*v;ThgoW)0S6yph!hRS!<1s8c(?eA7C;8T>x4nqZcnl4G?Haj9a=*OSWrlH;se z{9&t@XdYE@TKt{UaQcAH9=)nRra8ziVv4;P-jF~}t{m-eFOSjy{Wp^XyQ0q;RJ8wjet;`Qo56clM=djCGt+bD#^@P`2jujhubx_AGavl z%`pG@S_@&jbJ|(PCbwe;T(Y{KTq7WEf$K{fGP6~SKm7pQ|1R}5`9QWn4Xvp&kz?KR z!oxmOAYciQ!ylN&7NJz*z-A%K5vRJ*(TG+mtGeobfjC>P8s2qbFA4HT=OAtMb@SKQ z7CdXk$uGby@ZQmJC{PsR3_N{?1SXUr5Gg)}8=~qLGgl3hvLBR`PurKW?j~Rc=r)V@ zkmbsoK?%lumnm~x0V^0-bE1sRy0GtB;)GR<6mye!K(PEZxCl&}1My#|m{6m3P?Og=GregU zm_Pc4I%KC5kEa0qx3lc`8=p0qdwf>RoAu+Qox0N5^h)vLa1wA9=5WD$_#e``F z8n0Cr#iQDr;o@lQ=+bRCJFPsp)kTo7K~#}_h4u$fdbv%YTqo3;Y%%y}FVvZIg@tP> zEg4Mt`pEv0=>|oJb>lK zY+4}>lVFFUbHhcQ9<^$oK`4KHs2xe*4KXts${2NX9+04PGLE)a@1e3d$hnDqq%C-U zA#{`Qp8L@g@(8K_aB9i0v|I_0b~=lK%%mhC;h%FZHE#PA7E*}D>^5)du`L)M$uL0J z;5g@i%5`PaO!oIIf>0^ZM@!jo%@+um>$BN+IH;mdL_jHu-| zTK*NXd8ccTS*?L7krnU{?&K7DSR4hVKLW^SgN2v`H=G~Is;0_Y{h9Y9iAn&&ZWpcw4l zdNuEbOW9Iryu2s22APS`-*1C5EergDA%3IYL0?az#k}3tKod6Z*YZKa{lmZiQuSWd zWPdu-4G|hPa)}@nTw>sOp~eCcL8{ddS&TXW6L=g zQoHAM)O&AS3qfaAeW!y8iy_{n#%r3=sAZ=*}_EDN4H--wXS_nN;okZvk?HfA@I@yY@kE=)*#8rOR(b*3OJ+7;`V|k{KPQM z#{3HE5d6Lx{w%O1>zrGts7J*Xe!G@4Yd><9G)D^?SzmZZ~Zs@_fVavR#-U z%=l0-cTQA?0_R)Ojj~RdIIrxbm!>{%mmw@-9`zBzq?%; zA=s|eR#y;Z>0ro16adR&eBJp`I4nx1UZEd?RKLi_yce{DPC*uX>=5}VXFH>xrIi8m zm1XHfb%;Y5U#m}kAv{z00#kCSPznov!H7MshHARxEWc1wLPNwuD-ST_(xE7AI>=n~ zjsCi|C=C9)$;VuhtPd20Lcd+Y3NNOE&3R|w6}26zFaQ0yW821&>tAj3)Szmq`aA;3 zca$28Y(;p;Ly<l32gLrv^VnSQDNv~$ ei-?|2P)I1gPE+VVQvW_8m61>suNM6g`2PU@q81DQ literal 0 HcmV?d00001 diff --git a/doc/source/_static/style/bg_axNone_lowhigh.png b/doc/source/_static/style/bg_axNone_lowhigh.png new file mode 100644 index 0000000000000000000000000000000000000000..c37a707e73692e1cbbe6379fd853111c4ce2a869 GIT binary patch literal 14041 zcma*NV|1iZmj)W!Nq20s;*Qg?ZQHhO+qP}nwrx9|4sZIKxihoY{d4O_t%J=|>%9Bf z&s$-#(jxFMSTI09K=5Lsg7QE>z{KBUZ77KEZ_^9=U?3ovA~OL2Sup_t0$F<-V>3%5 zARy7Oq-01%MX{g37hTg~i?fEN_S5mCBV-Z~a%Sy$X8a4x%v8okwqPP?$jGw7hM_Rg z;y8+eXrKYUFvq}{ii>vi1XNUSu|;WX+#Mdz*$z8z(_SssTjeTF(}+NSelsM z9vQ*-cWIu*$;~-#g82erjS?U*sbPAiazTg6`P!2W%mI z;sSA{jiK2B5l9i*x&Mp^SR^E_059fZO$n)G>W{7zB*WgTg2FB?E}nX@3v@d8ln;4m zNw>>Smn0Jzgtho*;trsylZX2O($hxql!putAYZHgsnhhtHB`JJL1Uv69^mM?)#s4#nQZ-#7 z73Y{J{Z}?&d0G|*R^6!D=B=;Lv?#)z*0>$&5O}|?+zd2`%p8^i(`P*$oj~G@KHXD7 z8pe6oqDf&Xq;qFS2*ork>d^y}meA8*O1}K}7c_tz7csRUGb<{)rG!C%;9J7rBec3< zFZib~v})Sj1>*^}v&2CSx-ENHnj`w*@ZbU_S8^TU*5=|SobTVamRpaV`R*<1P-iR3 zLaCJWNRv@~7 z3tga1h(CQ4sX%~UG*?7pLa2T|v{0Baf%4d&hCpcotg*z0z<>GrWYN%I?FjHl0$KSv zb3IFusDh&Ton~ibIURx9Vc_{)@)%4}+(Etk<>)b!gOl{ER>7M6v~+dY(6qrFb{*NU zr~Ur)lG>7b;#m6udRO)~ZYkbDvW0=spoIj=@*-tfDSjyuP+=wcqU5p`+{+G?iYpSA zgDyq1hhYhI33>?{%qcSCS_L+XcoYoG(l~OsGr9k2N7IV1=KGUtlRq-6a9V=_BMfce zTQ@*YABTZpOiPBY5Lq_NY(S@HZ9qnUQ>RJ;UlGtW>~2s{E4B*1y0tp6N@j`fjKmq8 zHN0ng&jy+aN{2KuXtZx{7h*eNcVZ{d3cU%*1zi*VYs6!};EwCv*^{9g&j*1w&?h{H zR1~QZ$~o{iNJee8?C(+{MeyjX{4se$uX=3uU>cGe!j;&LQI|0rO0;m+p)?e!vBJx|ML9ZoJUO0F zxuM7*41LQwT74sZT73cinnCD&^?kbiilK&~h#{GX9V8!0zS!Ah+XP*4%VdfK(-e%j z)Z{|(bTJ6=V)5(b%9Kv3zmhQIMYKnRL&@WD9Ar*%=9%I7=CWI*H0KEBKb}yW__3&~ zh^M{-n@-^&+yZcu5|cJal4%-eNv2JzrO9E17fTt;%A0~WDm3b?ldBo2p|82FY1K#7 z3)C~&hucrv-`Ts_OWC&^@r*4TaUUie{TXW+=T-boeM(&^t6E_zxEJ3qA!H`xA(X72 zy-zvhAzmBbEnhniQDh+QCB86DHLgAGc_c87o}^PgtC*=2uNba4T_#yBuY{}ARxVV2 zqLi~>bV~P#<__Hz!c$aScw9xM$XUKz!&*^YfLGkD)g|_k2O&2gOw3mxTA*PduAsD_ z<4|B6aLi5XpB$x}zsy&qU4?0ddj)lswO+M8<9O(Z!Lh~x&w=Pj*)H9F)}GYf@>uK6 z>OSTk@&xgy=OytW_`&#r4N>ct1iZ3a+m}l?Ji8ogqH4ybFUV@oFwSUBkHu7s`G#rp z%bLa8DBldls(z+$<*=fx$Sm9}>CB6Xk_o@z${5$!X7XuD?bztJg@)p1_D>C(bsB8| zlj_f&%h7tyWJL*|iLB(UWSz>&Dlp|J<%H$*<+|mgWn$KORzB7atLjyKOD(IGRp)i@ z#)kTdMoyzYC0%h|^BvHpF0iT4ull<>lTJ9~qE=Xlri!T||*D6^K<&R>*VcRcc|%cltrUMX#so z((39e>8kZ28R#LXH)vWYc33=mAqOM3S=wx>>FDa{^!O+qlLV@d$dR0HVX^1d&)rRY0qNh0Ni_ueImWUq>@iSzn20F|d-YP2LvW zQf<@q`Koo1W9(u4)d<|>*Occ`us&>sJ;K^%$cS5iy*cNsFq{)x2y?Y%Q_< zeBr!s6|}K)6}mbXSCHAjV)D*ebAx3+WDnj(%z5Ra>>2Z!{pR&W7oip6>@{U@qBZer z3NZcEnbGvNj%duA_)gzKt|#N;r_>kMwc6F$HSm`Cy!2E2r!ji;&7Ykec|pK9wY!V% zlwZZrL^w493L*?*bn;75Bknv`C3NOq z`2OIr1BMYu>DC-m3ube!bwP`9UTA3;8J;ip=aW5!?vi$yjZAB<0;l56wDrstkfutT zG;=9ST&oCkY|B4pH`lc{rk_JEDe2Np)AbFECa}}UGe?;joH<_J?GZaq_aGPS&;(J8Eu>wlcbU zo`8?}wgtjMhGG)&ls=39Xe^X2ovtzbsp}pgjJb(v7}j~uzI!_f%Op=EACghZS>|T) zS!O?DXTSA4dI&>*M{fW)e8s*OyeC~(F!^wNt^NAJ&T^-I>wZ0T-EwbZu}$l-!i~%P zaq4ycwwBh3c#m(+yY6}Ufp&LxcDX*&Izy`S*zM@~^zo`o=VTo{gGS#Y28_0E%MSu% zV-A!nNVpC>H#L|9`}&f>2b=zh?e{eZi}jkJ1}u93_j^?t=p8A_9?Xt5aBS2=8zP7U zbhV%V@T)Nr4E7;N-B6H4pJ#HaO9=vK1-KWzZ*%ipLs)-y zuxau^_Q7M(Itlgn^+zVMQHB73#Q4->QYHko9fp`A(urEhp#tPIJ5k;rOqaeveA+Mx z2&qYh$`Pu*u6&AMok+Pv)cJ4R58WLT*Y$vUSn5Qko}Ky|E~bY`rZ1P{uj$DSt_6N6 zU@luQHV_;?GT~e!)Rmcv6R3Z45RlABSCQTCaXR?-Fn!2sSNPw0X5{$-q#{*fQkn1h ztD!g0y&@`}$^;2^dAl40gh~SZ@ygV`B)k>y5qpv}U>ol204Tjiz%TGL~b6@l*oqC3m9Jv3R+|<+RZi_Vj7zMo!p1 z`!bvO5TYI#x2NyO;UrjbpUtS5m&y{w5@^IY4!8bc&SnfLR3^57LPpGqvu(NtVou`u zCItBBY0ouB-~Xv|044jVykU2<4WbvmCs^>yp$?X=OLHRN<}L{NQ$`IeX(q6 zXV%EVz%SuRR47;8nO26Z_skHpzOwC*#cg1g#O-F3EKOKLZ)euTm9AfeO|d3zfm~{-C1n=qciv9(xF%;e`|CM zs^eKoJz9vk6sJPXsl`G^GZ5tIkCT*Fs7&!iCy0D(22B;10D*1-IW4!NSogV?-%c+n zl>0#X0ij;QJ>}0uH?@Uad6GwVcDlnjOJ;NC7Pl@q7rIc&CKeo;WsK6el?}v_Rff1h z5Uv<;rl<3wPekx(!NRk@W1xOs75U|;7b>gBdmP191fU1P7!ItXG;66%4EJ;EDN0u_ zJNI7}ifVcPO%!d4XGwr!id~lG!ANC-pO_ynyA8xQ9;?@?`Vxr@VRbrxogeB|k$0ZKG@sPp%G=J&yTg zKxn}{8dbwmoWkI%@#T z)C{QD!K87Fx_2CN7x&EIMAZ(K<}_E7l{Ir(%(%~1?y~bmZacx3No^utqRCFzxZlJI z^-Dp5_m;tAG4kb3fBdhNo}J;Lr^1Tw8TJV(jYNt#g3o>&s^jD#Xp50Zfok=Fk_2}a z3hfi5qZCI+hcD9Xfh>(*q!%H=BmBJIABx6IAs!r5!*$A*m-6U;Hci$kD(lxpWF|1M z%CfXAU()AP!}T|JI-?{CP^j!=)k%$&k*pfg>6cS2X7FczK`N`?Z=qfqXU5WOEI{flPTj3qi~pu63@^6LA@c|duP{SvxRJfmfo0j>CU-Xsx{&d!nOCT zLuXa0l&CEYsHkT(S{al(b6|0GFUFIw_T*F2L}J-z%4g z?7l1*EqyIUSWj7Og9F|-1JJv?Lq&)`bXW{xAxGcIt(u-qOH&SSn1I?*{L8<|-gGAw zi_|njT?a>xZv*#epie!ilPT>JqQ4^t$J?Ql{AyFYD@^^V^w$RqTxFAA%mPlF|MND? zihtp8_qZmQ-yVL1tt_D%@zZ$`U?9V9ejRmBXiao0A=G!b>>wbsw;VVBtnbD#GE3J`gV$}yB*`1_QDoSH8nJ*W^rKJNKJt(O$25iVDI zuw53zAD?w}s-D$cx5>i-qGWOo^?u|~_5&vbUAyqLOraJb0{f}LUhOyAy!XWK_i-bn z;6VbJE`P>R^4Uy~MdA62^J|fnG1X^nMtg4w0aJY5cdyWwW;u$(eZ$a%>|zc8%i!4k zi&QG5p%U^iJ7n{`6Twfp5S7-&D6_0GW?zui4@!D{oIJ8HUgLcziS_GMd_STZAXv2i zf!{pg+^eck!TcB>`jH<=&D-DuDe}QyOm}nJaE6yf-5oLe?ydFrk)o5sc_g?@E+7mG zEQqW7hO2GQBA#{R+S-^=roq%v@%prwX@1{#D>!S6K;Sl*e~`AQuf>7InCBOwhM6T` z-4iMqjs!c!$wt`izz#K(gBF(}Qn@Yx>r^CFOF zI!O%_MwnA~MGW}@{-r(Jp$_`tt_S0t9Tf?F~l8`3kvZ>B6K&ooS+5 z9lON%$hqpr1%E<(XwYA08*p2>hJW~%tBL!# zQPnJz@YKkQ#WC`zcZoib70l`2u9#7f#z##mI|MlIkV?Cv@>?az=dIy0txD*>oc^HT zrs@VpbxgAu?YeJx7F%HrVkt^lM)f%@r~of}2k>Oe_MVbp;^ zcKccN!B=;LIv+?^Yd5w%SKPHvuxgbzD`53-Ga1FTiqTl>XNZ5Lcd?eNyhd?ON-ZrS zr5*D?XRN}{8K^$b%sG2yWFR8YrmC?luzC+6{|CZB0C$@#GPgdcvxDFO6N9l~VgmOU z-~dPeULfclD>bt{X|n&ucEV7$jEo?GBHu!O#eBc-(dj3-!4bd3fJA!Lp)h(l6G#fe z!(lAedY{e->5mq4p7ar?k0f}u@G^(7nii{BbYe01hy>ZGie>j0zm;~oiF?ORy9K7H z55IAwB;og^MWfOj{tju=FN7! zsJLQi7Bgs?v90!Xvo6@pXwzu0#}E7SIe0k*uQ~$`FaNjwvFP9y^Wy~2+F_B4q)}0g z3j|fxJH5KgM_|=inS>exbU~h~xxbC*+4JX(&6}fK5e48odRM$;xd-3r!o{Sr)2EiWsYwI&|nnwIaV>K^^d8sDX z>sM?wAp_`o`JR%i$#3Mp^z!RQYeTS0(!+H)B7Y>H3Bo zFVH49_reH~vnA>Xy?j25mNpMeejQM<&Bwnz|5mI8WLxgQ6u%Nw01Pw9JuR(?_AfL& zr9HU?w{h`I14x0ZPnDH|LqAOH8pVT6z_ON;HG+N1vT+LSn zn(99qjcJ+tO=3h$a4w1XwyPOAE>)Nq|ya z)U&~_%*Z-Zm(7a!(V$FV?pCYntc-@@EO>eu#;s4rk9#?nmPqd7gRJJ0sQ+osUZCJFWD62MGn5uJpu;G>ks#@M(h%3{EG35uz<2_8tCZrL+{Gm#kGxP z-7!VLA?L{exgmJ>OnOWikEQ7^>r)0h_~B>xzy}}SIzkD0+80X6%u^X_DM-RIMH>IB z_&U3A^zj(M*p6vQ5`4hmtn^T*{lkE3ieupng+PrjQl$iEKnlc$-k%EVTeX9T_%g#2 z3{EsuN99 z%K6mOLEM=^(gU8sEFY6LUUUaybmj(vp>6g?aBDyo_8r~KZEJ-@y6l&eFO-b#=V=({ z%`w(`1Sh=rmXK;Dl>TFsl-E>?c>C{Y!<-4;qTW(0h>UGF?F{}ZM}Z# zu@#%(mD^BX+LqV5zJEnl^oC*wDDeBrwL{hfw7{_ams4^BJ@F3S-O`Byb5&q=vD2d0 z0@T=lPyv89h)TlA&A}uGRCf$WrbkR}hlal{5Fx3~k=XFDL*o&%<@73ZlRlvk{-*3E zg=vHAMQTY=#z>)AI-G9FWK2}vl#h4Vp6`iIG|`=Ussv4WOk>B6bbS`=j)N2B;@9+# zl%OZWI}RzZFS5bN|Jq!)Z@{vldce)y3Hcf#2Mrzw6V@KO$=aTByXyB_l~h-d!$|~7+*;^P8*o~L*|LKi z@i`V4pvxyM9+@uBv=sh2TR|E2B2K4w{(t{<4Ue--41MU4Lxh zhQZa{Pt7_NNV5a;QR~=fMKZ*z8w!vC;M?~>0mKC%L90IUo2NXj z{WG@{shfLVCho2%<@C0{miJU7g;ZE2#^0bv z9z?AdVEw%6<7^2uWVLJwu-pHd}-^f zGsQ;gm;yns;H%z7uwCt7t+e0XYNLb|>%6ctwo_Ms!QPcw=E=YYHkwj#{@B`3Q`45g zDKtLuuN(F;4GLOMhV+*&B0V8#E`76x5t%?-NC9i^$vH~`J5hIPV99-OTY ze=ZqU0ss>W6Sr2Nh!NHh|E==*y=M$KhSd&XiU@(WKsjnAy-nx{v)>^V-8n(j94~P# zqPMuqN(elP@X$NUuEU@@F8{lc)HDHJgD=w`>Guf1ASkKca0B;lM#H4O08Us#2eFYZ zp-=ZQ+Evr>WGd5uoojV-y*j6n@FfkNOqNNOUk2S6QU&X^nRWLk*##*}DKM0{@r=6> zR=;FnGMs-otUoxqCo;RI$EG}TaJ(>m4%5{dM(beX#L;t*}w~{Q7jUA1^ zlj5fVb-o9RK%Zvht+2>x>aQr$zyXyinFR>e+k?!!)ygWb7j@63Ys^Rw0D?$B_F!rf zjUF12GY|2ufmYjcXnPMS?)^@u+k&Ckk1p*j1`@&4yxC^PX51~|j1>g32iEl1D%#a< zxGXKwwzEE}0#8ftd~W#Uu)yBKM+P1p@eIu!n;#!9^3DM4^9sp?8FxBC^?A#2h+j2k z(osumq~kTQncOrM{!K?LEOiyZtw?$@ZW$h2Ru!hKx&{WfqDXM2RBhfD*i%7J9p5jv z`FgZJE{=T%4hFa$rTo_n3u$TLCWE7`tLhH{^i3>bbmMR@E z65@nBge!v>r3I&6g&hvqKzvxiXvhk9H3ZtazJcuT2Vvg?xrc7vO^)bZ&;TIv0?1?K zhhTfK^k4rd)IT44%XVOR7~g1@o4zIeS0eFjCvxojoWVx(rHKodDz@K-%i?l%zS{Xr zXZc?>uZG6yc42sjJe~0eSy*5mrgCs-^nVnrl;5}I;@?>-W8X-6k^kEzt!S$+aWAqd z%(rP4@bgaTe~|g-w@-7ve(-;RDg1-^bc<&_j}znLfnP5*)Yviy-!d z)uNQHf4R-tRZl_+`=^x(k$rNfu&!fL!x1!1ve5p8+O{xdvQQS+W}Uk$xuYK5tF@B8 z$2#RDC80_qihV*8TL+Z(x0zxXwC^W8GOig#xqe0Ni zU#ySH{&jnS2Q2d8%`rL>mwNKE!<5{n)RtS!R?$`z_K!Q>xT_vKR1dSwve<_Yz$!GM znW1=ZMyMJqc`ue?lS?*_|>31pu` z6qxIE{l(mA;p?ix`$l0T;{0=%%g-YeX_Xc~{_UjNx@Jua4+HO&lTxDlSJ!)>{S}_D zHjQouTenAaj&djpVwDE5FSW=XfEfR9n6P8hdg`AF!giQ=28R-ykbQwY@euo3f(u;V z)<$|u_)jPkGsi>EHY{58MkF4Y&a}Y#bY$W~z#BWBjOn}vSha9)8_AI_;&Fl9O&`d+ zHvcwe%mO0VSt*eSWt(ipUXESCn{ICvHk(8OOy8H$Ml{B6nHihznUMNJETVkUqFKMXmo6K(?_{zANpKVJp z=k9iD2)@)wyNy4?G}ii8#>VZV4&(+n7kX8_qN((ikbZ9AYX!Oi^RRgLe~F9yVBif8 z+h!J+-7mweTtC&>(@+|JHxKETxzN&CpcvUpARJ*#+#3CDddoZPY)mpIC*DdRIJayxsnWJJ{(-DO5Cb7J29vC<}5HE-JxJbqyWp;gp z!~&Kq8l3$l1YGr&-XsUd;8fNuFNtHl3ncMnj+>h9ieCry-I}GHb)SphziN4f79u#_ z6<@Wf1d8oqskH^Z7)jC2Xr#IvQ;Hd5;C6A*#FhM@6rmR4KHycOLzj`o%RXn!)n`Kn zs<2`G=U7^eB||CQ;j9p|w)bWR&MZ5TZCSjiCG~VVdst=r*mKBc?cSRAZG(~?G40Cg zbXcOM0=E=tX6>h($h-F=i8leE!-0>+j9PP|hL6RB({q~LT=;Wvq=1texHq8Pk9x6q zdW)lfiw$(B=-!lEZCI$1_E&X{+i>3|_7vyAD(BPNd2otYep^1*MK173h1qr>I+~5?|Olw}; z-r;ASDzX)<<=&g+tW5d?WRuk$`Mnbb(omTaOqwSFnXaN(!|Z3;AnS@O48Qd)tU<@B zN*Wv845^lDwLD4yUtLlY)*4tag<1uMs&U}aT)7p(;?$zC;%*( z%P;%tieOn>z6C!*?vx#!CU%e(YDX|TEC>_+c| z3Uv^vHVJ3HsjEslgHw?qQCY!>ZSpg474^!x{#uJQtAesWXv!6Rc?`Dp*??o2-K!#U zVoRtdM^lK(M8^x64wcLgdl^ODavn;Fs{R)DoIU+fwyEJF-1=0eltV=g9c#E+LR)` z#JZ6{3$` z0ju88=QVV{?N^P7M$|4~q2#=A+`djfbG!nT?0>$q&t$Mf>>jhH+VJ;L;C@=q$5 z36HUpd!q)mn0UHMXk|U$Luq>)B25cTEh5Nae6CfQ9I|$v_fI4AnbC~Qc0m{;mly>| zfm6{@GSD!BSyv!rl$8Omk^1N@0cAxgF9Kf(JMoRwPq=!y;6*F93}uqP_NtZaAlyW| zAj^Erj+xU5yTv;+WBceTjbm1FJ0fTVdMrXz40jZGC{b>&d%^H3ukE?0C=gtIXU#C7!JUeSSp&mYua#?HMt#JqHzV z#(kuI(-U((r=bDI-6*bp1cQW&_2p;IE0Y+8i&sp<)oN0!5b#mt^7msqAUDMs>-y7G z;nZ4`^jDv*+bbVAQ)5&SGy8}NSIXprjsDj*Up<=EmIAb1cp3w zzle%A;H+(v+;2ItyQGQ}A9k|86&Cqnh9-%#f*7Eqm9J(TaWgwrp0M^{Y%o1JBi!2mREWDG31-28_n4Ok16nom1!XQ53}a> zO!;2CTghGZx9^MGn_CvkhDu_!~bZKJQTY)BFnQ6N0+) z=-qJT3|*_X=Imha4Re#mDnoZBA%QA+w{&r9Q!RtR`Z{wI+6FQne>E8{|HOqAlBDl@ zi`7n}fq=*z6Z7u z2Ed&Z6;PWdWvvY>Ej!N9iLomr^1-Hix$pQw^z(1z&1lO*IxWCo>0}o*@Nud4HF3xG z6N=$*Nb+*M9=c=Rf#SgWa;7~8i`ISkRXO?j!ExH;0kzwXfzhd`*(XN%SQTsDJITDuQ2tVfss z65vXGLZ->pv1evCZ}tMV#8k|Vr&MTy*Tc-OJAcmXne2?BVX!@NQd89{l+|F{6{#X$ zd%cUnb#PNnZrPo39Ea@I;e2)hK2iZ`#=ZdS*v^uCQv|%Z6baGXTtq5L_KbbSQpQ+d z%Eb9oSi`O`^*m7)(C|bmg%;}Q3F*-{QG(CaDNJN0f->wrySA?tu)9=MTq~K`lA645 zf2Wnc{ONE4?TE|vbHZhkoQRR-Kmk+stk&;ib|!)+TqU;A0henvL8E*dOX1p$#nhB{ zFeX1arvwzY#b^&Zozw|`mq?dEbWZ|%qx)1wqUCHM>Q`Nb-JUsSHzjHXoBJbTI&hP} zf(G?BI(9mox%tW_*>Tp%FmUw1=U;_&y> z6d2VoF{c?g^^T_93^BN%z6ip(_T@+Kx_tmg5wUSR@|0P3jQ>OL?@PP}oU;?^T9V)4 zR3ubZSAHt`{n=OWsWO|GvW*kneKa9;?~!vwhkAf9@$18_{+7m;Dnyl(r5 z9i>hJxOs<@jAeh_Jp{=cF31U?{vhI%!S749{&g47bc7{uNkLM37Eto`5pA{eNmn}l z#TuUcwy-OcPd<>d-i5W+1J? zY^m-Mh^b{d3_F4v5>M28+L}rB7GMqD42jQjRiF3!;73Q$94$%NZB7#xgs)$U z_z#8=o_>3qgGA3An9nYj-B!6RDJ-~#QP-T|6Sr9sf;)s`c_MW=WgxOP2G&Mxx6g$H)m0?i?dw90dM2vk?LxjO`C~K`EbPgYKMa{p?Nmq3 zNrI83xa}_a{3V)|;`%{PVMxP>zL33mib69cF|?Uom{b0QJ3uiSaI8w_%1sQ-blBx+ z)5JRKogikOT)q-WDY@8H65vV_=OT0YXW6j4+pn#T+%~dR3~s5_P9WE)$*U8SkWmqT_MC?5Tb{1R2Eil0~%Ty0+5k^JvkY( z-C!1$L(9I_UtrsjFtsK%Bb1cBhE2HN7OVihV+y{TbILM7w8B9bDNiFBIrURE-zZ12 z^q@W{-M`FHflg3S4cZou2*uAk4PHcoenUh*_rip^yLc@k{5gD8UeZ`oQ?vjrE7Kdl zkRbSP;Q2WvU8^}R>R__cWuwLe#qumw6t_QuvHFERi6B|8#k8lNtr9sj=wCXDiG#hO zG7+JtadSA9MVFY>Q>l=fxPke@WafZyc!D=3y zr4?CSVafAUT(2MhN?vVd6QM1E|+qsQcQHLLo>pg>*Oh;D9vQo7r7JCN|}=l)6xCenoG zv3f_r!EyEPj`WGu{g7@tWv;zaS=SyS->$ZKvz2G!fb|K(_dnIh&7w%+TTZe-QO#na zNMoUvT~N!tDwb^E5=9;LJR#_ZIYZIB%PWMUqT}+w&f8J`R`uQ5? z=?RAwPp)Gy;Y(bVkh6}-@cC(DbH~X9@*xUo2nCDQJd2-mY-}{9CiYW@d`75bud35oM9paRt;>g)LbG#zvbPxt4ANK5wY{)8LtZ8uTMSM z1pJ&H5FnVJTrpWaQligryIFLdM5sV$gyB}@6Z=z6$i1^5g+f7AM`?B4S$B9%-Jn~J zKLa)g zQLCp*|HeBcN$1QaE=|kA!l@b7SiklYos>Yh(HV6>9RTmuSD1nZk(mt2Zg=xJh7k-w*Jr#>4|E$_2p#ZP*6m0@oCx zYzAToJktkShj{9t`VGX`jpl}EN(|L2gcb%fB3v4WVGNWe%n?Vr5Bwt3Baem#>p(<6 z7R2$BH`nVg5>0T-Pv_|=d0qf;D-8Tk*F0u(6c14E00k!Ol#pZtn`N*@e;s{2E;LZi?xg=yH@Q8z7oM#@WB1a|+BMZXNVXU-8nmczNnVsZ2NkC>5e-hVA4)Dq!L9tj zUnynMQqYCS)^Hrr4iRq=qZws3e4C&~anFLjX*vLp2df8XE1FKEt}$#K6DD&qzv2uBo+y=!r~L+Hq^h?nBphFwSOsL>)g2GUSuM+(pL<`oze z2^0jv6b7OOuneth=nYK_=?#SqtNNjLHFp_y%LeKOA_wFmw~&0Ph2o}D>=X5+tW&5G z%~P@Bf2R~mrAtCc6-!;Fl&7}SyvV>%7SSIN52TF7^H4Y|SY}4#Tgq?zr8`BiM1MeW z{)t0dK|1lB*kq~z@dhJ5IVpLa428C7mQ32XMw$XnM6s;Vw4yn9y;8lwDy4>r2Ih*} zicW2Gt#B=?V}#?R+GE;sdG#`Tk)4EIX;BMN zPtg>^>|N>sPpRsJPQ~h3h$15?Z>hOano-?RuLI#x%w)aNY2{3n1my_j$r71TMHPIN zmQvBuBbA&vlVgT^G!N*GP=TW2!ovy*W!}=oDvq+s0)paBoes&5JP3t8F;bxdi2^Mn zDJ7LTJ*NWGz(aoefRt#}{6(P(-3n|Q{0pdytksIuDZoAei)V!go(B;?-TJHbq&2y< z>Au>7!(+rF^a0}Dz+3u5Cx+j--aC$M$Ox=RZP=v#gd6dPH35T^9 z`wiQU)0W-GB;NwXrgo}uX}_$b$Rffb`NW%*nw7Bb!W7@sZv0_F&>1sH|(zm4YF05^d1{k>FdGNkISe)YlqLAa_} zjRxC=g$0;0CDj}q($0{ULjWBG2T7yABP(m1#zi|zo8E&OxK`wXD%9`r;BL{G>WBB_R^fL;j)naBI5&W>wu4c$Yxqo5J!tOPb>B+UQ>%_cz*z!z#L1rDh**kC56^`S8BX|ob?}e+XSL|o@oA(z(q)w=d_k_`r z&KTze6;paz}ed-&^M60)`ZZDQ3*o)7F-v2;(TN zhpYaCf7!rT1T6vzA`D_o%5!o({w!bp)AZ47|LmGcrHSl;)Ir;*8A2T*BEEybta_ey zyxz@;_+_kXj0e6=$$TZ9KnQ<|^SPvoo640Ohv0$=t@4qYZ^_O=)FQ;v|TLe_G|%GHv+^oQvDjRx_7C z8p`d`EM=|nZ6Ymkt)DEeE~~H1KL?&u(|Gi9b{(k=6L(GMs7Xaf*kd_ zJ*8j1VUjX#o@&g@`HnozCXZ#OGCsE6XLs$|7u}U^EIe9qENND0>DZmrom+h_+%6sZ zlzpb)E^@9ySVQtaq#|bHI^wk2&wU&oXhmr)*+H%o;3434vZAoEc!YW%t!yNWvq)eh5kW~r1xCn z$7lOE_C9@ENoz;ECA1V=^*aARyE!>IU!7{6BGv8)R8juh<(=0G1bGVG}f z5zGU+-1~F?t3C=0_AXe{ScKhBV0@xO1p;UZxEr%)ef@l=rG-&NqRtigSW0jD;4Vb; z{!Mb6@1evR;>+?2NI(<(7M~Z9=^MdjS}3an)MaIOjO=U}42A^tj4Ukl-xl;v?zR8} zH+owq(*GLd{~1Tb#L39f!X98@XG`>NTmwTpX8=D5$-hAV^ZTznP24R0ZzNl%|DM+O z1R4Kn7?~NE82@MNZ&$v5y*%<3ZYI|1A{I6#woczM1X#E@`2J)6|I+;5i2ut|9c(Z7d8m<;*7dtF~K7iQuN3L$v`^ zNJvmneS0HV)tU-QM&On1iZ2|0ouaRwOT?5E&KTLZ?KiGRq#QZwayg?fV>tiRaLqgA z;Pwh(5e(sOdw5r%xZA;t(h3R!j<-WqZ8QxIM@g=^P57%}Dl{8v(em^;%6^#8`W4@KBZS6`7VS zj{uDIG%d3yo5j!ZnXppY(&H)*{-O=P7vPaN2&saNt1pUR@%DJ^vo;lCngC{8W)9V+XU;_!PNj^pO!{ z4m`kh(#X`E#JxpeH5C}Yy>rxXXi2Ln3)Nt&Y6>{+{G~>#{#b6D!ojI=nmC5ZNgG{+ zTyd>kQRPY55t)|#D#NB~{-lykyOUyat4Pqx9OEWTI8?i|U%Sq(;WB=9aNx#RU*t%` zoN6q=a+u>p!pT3UzCp`5W@KYqLl$TuRG%~yXFaIEhFK)}QcuEK#)Mw0ZN$u~_wYko zE9-qG=qyeIeP z3Kv;=kuH`@ND$XZwY^Tl((iTS^5x$$ z7xyyQ_My?(kFT#Y*yH^RWP88I4}Sl?TI|#fFmw}JYA9njk#rn9aNwJu^R>;S<2(}V zH?>aothqtsH6eSshhXWK;^!Yz>FH$hH%c9(F{e4CJU-d^IhZ*)8yIpwJe5evv&3E& z7F8a~#i8k%Ko)&gI7JdZPB-RKmGow%*=-VOI&i4j=w9hBni%!RQm&?+_&vL51-@Q@ zI8{@rlz*H>Z0uDZYT>9|ybYJ2%AtCrySD|;H!WumYa20o;=q^&7Ynradf5-aPmQsW zTN9xG7qP=wG+s}ymG1Q#WNpghD3T5Ec#%}$QzKw1magPyW7iu)>m#M7W55_qG*x9_ zok5Mq#U%>NjtIRNq@ricfgt^JCnS+*14)!jczesZ)rC^9_Gp(O^h}fZ0su! zfaB3~KVg*}_hCHN#n&}r)pt=v%de=WF@moM9m(Ib(}m!3EV{a)(q22RMtrZI`1l4A%RjMKpuTrPvP0+q=^F^Xym zU>O@7ukHvDWt}5mJA;)4%l>sR63En~JvvgxjzeXdAuxh#Qzd`+)51`k7hAVTP5>D@ z@nPRvqCD+|OZI)Qy+e^vqm(k$<64?as!DSmiwEEMhx=Vw!?>~tYPhb$9Lm1r;EiiL zX2eeXgVCv&pDO5V5P6^$Wtq)^o3?Hq&C{&yI)DF+EnuhUk}SMM!sjEBV8XfJM#(mT z!0>hEmWC}osywkhLdICW&^VFb^UETo=ccZs_=aN^W^@6I0|L@xd$`K`w%JHcu0~|7 zBZ44nn1$t}hqA+GK@cLyDNU?dbG9L+WDs7ox-H6=T2w?H697Hx>^jTM1dS~=2U+B1 z`pi{0t+GhFgHY7jvag5i7I8U;ztQF*p2>{4LR(BST@1N8_MvH`cW7CqnBDm+jk~2I zM_Z!HXSAUbG=Qu$9deG&D8G(f>6J)%+%)v{mFRh8>|BSwm99GErU zw>#qYVGap{8LbRq{^u9lpEB~wcwpk5_HiMMY!#G|d4IjRRPH96(NyVF5K$F52N6;L z;U<`;dqO?qWYU;;+<^%0W7E76gw=snUQlc(7HO ze`ZcLyN!TYPB`ZwZBZ+7{pPee&?>=i@$ME*ic{d(9T=8k0~LqFfbH`!_f9;R2)r{Z zlj`)YruF)RPau-U8atFcLrS4FeDD%uly`e{&8c9A9bthStBGLPXXKHog2qp8I&iq` zG^WIfrswS({_JUa8E|N{1=FBlAVZdi{`$Mr z{$+LqQ5FJmX7pI@U0qE|_B+A`7De_TYiy9h8;Hmhy~w%|FhbEcQaCadIdO0G;lU10 zbD;XokA^643m5gug_eyAxzR93mEVKSPgdv=s#Mc&``2^?U>$c^HL~-HgW5%c-DR>3P$Yse zc;4u`KPIqfg|TtBzqw#G$f-{E4>TCqHw^#uU)hKxK;JV1vxbHeTR+SExowSl;>uiY z4NIq-8(m$r&}WmeN~p0^#Cl%^^suqmW3Scb+HG`W#pxjo*mXd~+2uW=;AeYvl8F_! z)1{cdCK!AOcRlr>4-B<@S;kx8i0V{rJUBwmH??M-*LYki`S-`uLkg`7Hl z)EisXLRl7FuN_ej7F-@tJ!8p0Q0F>4MjQWL`rGoo<&0FZCMY?(C|eaC@D4z zr(G8mU)pLskV*zVenpp-C0X&O<^8=d%6TWZa8KRg{-<73PBw4lGI3wu7KZSsku&r% zhNTG(p`sbeS(?#-{2wR-^NRJmzIi4vnhE@VCffmiOneM}Vdej1Pq3h!8~(w3$Zsgu z4GwJJrVWIw@lSq&GXD&Q-g5kJ{I?AY#s)uiFqY~)le2vi@FUc0s7>BB_B=UX&K6NL zIHeRSOpRVe9ZcaH9A1-y^j<%s==5lPH{?ZsU=jTPP;iFs)CHe6U-~G-zx| zMzobMariOT6TxUQ{Iq7oC4<2nHRdC6@cj`FLRNa zaMo{DmdvOhyg{*G;uu63m->Gy)e zV#8;f&v=`vWt%d0j2ZJ~YAa8oDvWk`4TvzGd{|!|hrCfyQ-hp~w!agRV<-F1SC*Hm zpX$(m&mJ(CoBO$@{u=6j{Bu3quRP8QJSCg%CvZKq`ofrd#7NPO_C?(JP7Y-!`8`uR zJP^x@QP+cRE<<2PwW^-O_d1Cv7?(2B%ymfCt1YOzfyiYqUlal__X_1dMaQ-v9BJ}_ zqCK&Bd_IVl6PUH-?Ttq&QvO1nh3832UA*AAx7zxu7}mS+ zx^XXsqKhKX`VntE)$*jYr|FOfVe`8ijQqQ9HQnPQ1)W)DW&j4Sv*e-9O%qtiG}xI@rY%J+1i#$AVr+5S?6vVx1X#YvAe}t(bIe7}F{7CREFM{--dg5XMmh zamglr2cGMM$dSME6Pv9|x-&jmuczO$fVPhLW)W3}I)8|NsLO12f|GhQ7I}v^8TLT) z|C)t4i>q&a&D0&D8*XRB(}G0L&2LRt)d4kMUvAfYD^!Q0X%2M*E#v9Fg!>%JL&nU@G^%bYf{P_$SmGQmF zS!@E6Cd`!GmpOm`xsW=5KRd9b7ga;At?6Ivh_7*CXjSu#OmgU(IS77=Yw#+(!>DWR zWYSp`UW>i`RnFvdO~cgD)G-!i)HGiNS za)e10e1M>h?HVl)fwV%%l^hS2mPOqY*}H`sr*NBm8tkQOs%R^5AJ zjZum#bVyrj<$w|5rbW@~5u8+AVPPKDlA|!Ds4(@ZkLQW~QRSK3npq(&D22=^!61o= z4QHYS(9z0j2j*Rv+1{RA<>$4Z&ER@IlF&B;CLl_2lP_*vi_UXo--+@TNq+ z7+#ZRsrT3G&O(t9QscMLWxRuSySIqobXcV>gpGYvIGKl^ zLOp20$mboIl}S7GRG+5>zX9l3!w&hF_n9pNS9lXC&QmPQ1LXHM;< ztZbLG^J1IB=RSp(R}Cv5?GqWJjW!puUgW3(4IXb@Cl&r}EvBjF>I)C4F^ zQYB(mBQDm3;4qR0vc~oSKAu5IM{IGZY42=`{E2~c*m;uWg}sN`2s$*p6_S6HEm7+h z`%}bX^vUy|IIUH*=(rL%}t50EdQ5RSxG+!qA| zXBeg7RUtC&PZ~$wIn0o8rzXGJU6ueG8M<0RPp`CSxii{|eCW9o&%?toh)1f-?7-jKLY8^GNWDVC?<^=TYTzg0n!S=t5eeL|Uq)+`ETIzZ}RUy^A zfsXi#iU3S}oPs8+3?<);GrvBhM)w?6god|fRtRWc56LA4f6D_#Ud)s4O zsVAu-Md_KW2D{=+wq@L*vLc7;)oUtCOX$j^$T~Bq=gDigMPKO!%ZMEd>NL@=iO?Sf zUVQDIS)+4>Fa0Q)U?3p=TG;+#xY=6RXn7g`fTz|!;7Q*Z(DvQP{f9_ta-kCbKNQ+u zy91kRI<6ZgT^N`k{)b}vH=N?>83(d`c7t^tb~qZ|!W{Kwe>wp%9a5oeIpQpP@mUjHT`BQdkR1INZI(BTKnLVNyeN!y5 za#eB)(>|bgjf_gc-V1+FEV8TA9v;>Aoeg-v?DSD4s0B~!a~?9h@0EymEbQy^4-8@j z&!W=vrA~{@Zd;kbR@1~103A6)W>+hn18s9~QS#)SS>18k05l1gY+SAYUT&mU0V=}) zurgOP61NEiS3Zop297#uRBvzdWq&K&`oN_++W(MHKsjem_S`uF*88MwdLnlzgq=_xGsWYkhxR7`d@ z%QfGr>x2mouBYLvX5saMgyG?L(2iVi?n9RA-cxKp5M=2NSchuX)-Qvl7$h0OJF<6x zE9w4#bF=GJ>T}KF5TF4)t$3CIV|uYOIj=5`xtP&L7S(SEk{oEQY#kG*-bPm9UfUS^ zwO2}OQ!2bsM91Vja<`Be%AC;exQPm7V6lO!NF`40gvRve9b@W(K{bhBQ1-i2o{b^R ziq?)bdxm5*R}eUt73%df)#?fdID1zT$z)u>`(6VRSVsJ2F|2BIWl{8yO-V5n{%f@G zH?cSD%K{||%S%))3^Zdop_RC=fZV^@c*`sbRX%|SczvR8uhnY|s?1-dHpFhoUvA(V@k*#Di2I(V*aq6)ei_E6z4>D>c7# zVQNz}vFJ}H^U;KK5{||{CIVrVtcB(mGCc!hS0LJSH}@cXlhapCN(=uY(w-PYTi?`? zuu6l;V5T>x(|Si&)@ay#Bou^gsGs}s5q$=`P*CEbIK&EVyRHcz+@wIJreQ-RO7E_k z4V*pJy(HnawWGy$GMom-f<}?LRPYT(=WIWTp{Bu!Uo7c=UC4>a%R=`OoCKsjy!vDY zj=IBSsJh|OxArzsPDp&FbJ(x`jL7Ol`Zz|X-$8X`db)PN!mVK9j?kCo|I&`xmEE6f z*|P5X#9R^E|e^n_~tVaEmeX@`Gh=oI&0>{YuTc_Cii#VvC z4a+7bYc>;2^E4)*x7rOZ{$HLv%&NU>c&w&I#OrUQb z#^e}iGXodj-Ktf>dUEcGdmz;pcZW2Xx$N|(NgF4zogRB^hZ7t;x2pR%w8^&ztj35O zxd;^B9hf>%%Jr_9KddP3_$n__>%O|xv$ZB4^F&RQ0VRaw4{gkyN03)B6WbWOj=GB9 zMDPV|Y>cX6gzif5SC6E?M|GD(V;>_g0`HL}00$qT@IPJ2XJpGg#CM9l2yy~r=G5JM z97*^q;7QzRvG;>JTi{kVo-Uok z$l96|b3ex-V>pb?=|7`|+b6V){1hk}%jZ0+`kKqV<~W{tvR!+|37B|WBn~ole0e0} zb21@c?(@!SEzfw`ry1D_34kOJF5}WQfr3FCOz<6E0#G^>Mwuo-Kb6Lyz3%f3#-dG$ zUbNQO#?TL?b3U-s%et%MeOi_{vCoJcRZZCfL=AL*i3@w~Ho^NUSU6-&b|M%R3~Xt1 zyx0|Sp|+1WY{?0iK}Yi}eYyvCMzna=d&#;gSFJP-#6jeenoU(T1|3?nlI(^s1mI+8dc|D?O zl<#slhw?X4034)=`FPxL~x>#P&kL|;sKd`bh&U~OPzGH)d&|A43`}%6pVIT_}reI0^g!cD4f~x<$ zvy#RNd-48F2Bk6_A8#Rq%sW?4bD(s}sg@IPW0-Irr^H2k!iUs2!0QyS6*?Uq>Gp+B z(AwMXp=cN0^|;47mZ&~s>&@XCTg?`!nq!C;YdU|kH&j|qI(lV7o98=dJ|*z@iSqXI zCQN#~q`_U#o<3Y5w?^N`0#jn#RW5-=EX{`D4D9_vvZ{xaObWN(I~;Mtvc@%grH`4woMDN*z1C{`pFU z@8DJ5)et2)1i3JpJ7(VxG^K)?yJDIyhOgnY9&JOR@vxOaV;OqyS0`BA3kpINM98Dg zA&LnJl0z9H(bER_ux3Jd31W_zkcunVOpHD!90D4r6nngo{-rp$79NZkP;KG8GDpDV z4=P%&GF7!eQ|^&n_cwa<$@f_F>LPA(lnO&ngo2LtA|Ir*Q7Ia(D$5j3{-j_ru@S&z zpXyg_QNY2t@^K#BpP;)+Q|>vV<@YEzEpNzH;)s3MB0bZEwmWN{n@%J3o=;1SL8V~2 zK?=F!du0qe3BzK`gcECM>DnFtX*X4po!o^g9s*Z^&#UnZth$sLS}ISo-`Xy6r?#Vn z#<8oXosTx;bxBvIyfic`@644KdZvU~jn$xMsWr#*h#G+LF_y1?9Lw&DZdAqR?}z?7 z%Ew7#!nuw8#5c1BwKTP%B2YRUZ;cyKE#5nV;vpnc7T)WbQYVFiimH>CTKBRlRqxrE zAK%n7Wk(r9?P?@u+RMAB&3C0(+L>2;{=2=s5%EnmmW>Uc7p4+DM3d32B}k>Q;BjBo z&X-Nq>$Uk6g!SHioc^4UT)4sy`pum8&65D!X)ppg)8ut@fQ`Pmp{{E2ymXuo%&|uq z;08xd$Nny7g@V`YsJqTaWMK;ot&NmVA)jtS0j6P;&))I$Q_o@d<0M zx39i0#bP=W*^$@OJj} zX}jh8hp&G{Xk~87=@D6!zhH+dxOjUQ6$1x(Fu}3Nllc2eB36*MdTL@gq5xR_ zdkLwnwPe7&8pJfmA09Y2^5k)f5%(gPE#xZS^Dl(4RQjPwrNX(!ryji~^v}xgdjtHk zr3ThGP&%YH6!E^xjiAt|lzD5{l5QtsPR&;%N6f)d$a0q%KDvah>yghk3}6XgE_n5Z zLGDvHg?yN?j_hB@vA9g5TqqVH+lQx(E6;9bZsy+vYIXbVI zth;8DC{aa1W9xHBxk=i-xh_0+h##ro=^XQ!eT5aSR3=rtKeTr}rU(B#Zchh9U-M$v zU;T8$Eb-i>4rh44t%pd~FJzppjw_$`&QB;FL6qS6hWnr92aA*J8d>9qe&QYlu8cgr zUm_0K7^J`7V(MR=O;qG`>7$}ek8%E(X9?~-*C=`!MqXaaJ~GU{G5`>oNBY*xgH0X! z<;y1RR99y=nQ!ThZU)bW%@=P$4J@yJ0d!CgZp~l0*2A=NI`x762Vw>`@ zLn^EU?tHavilq5RbMz@c-r!je+(8vnau0V;Xf`T%Tm%%sIhm#y_;h6-zoT_rYk)Fs zmk42@5ri@7vgAr#KzH-bN!`OVCyyRZrAGAd0 zbTvKvaYh>1?8Jl+h5Px{4a*d@8MAt5uW1!jGXtk1EG7nR7x?&bvsr(#jJFnAXcgFW zD3(}%blvcuvS0%t59<V)9It}dZe0B)k~Fn#Y7m2UNr%Z@`-nv z@fITG=jD^sMVjp6^HHH=*u#%wnA_4LY( zua+~L>@M?O-xj4}_HWJ*dFm^S35z<3XTE8UiOJQM&wc@^E&%{LeupC+ZOkI-+Hg&2 zls|F#^)~rPiHDnlUWmJ=F=Uz8KQ=@3@sKZ(0WEh}K-I~0c+!LtZ#Z5C*;@S*8uExY zbqAtHh;=k*wGnGk_-wjHk8Xnbs2kYg1Qy+ z)LmOe8dDC-yy1aeCw_wV8&6rLv0`Vt;2Thvh zIfv;o2WE8FRF+)9$>|Vs?y4xkO-28WjT_+wvnA;hj$j{(807UjwbY{jn{PMia{;om z)fuxsPix?o9+7MA%Stkxl$vl>WhZRF?M;pBy^2^U{O|Tv?g^CJhf3Lr4h#Yauz&YO z%r||}^ah3W3JazQ^bMvofdDf_Kek=Ozwy(WHyG3hI0(I;-=SIsMw}>s`uhNIodPNh meagnMAUGYw9~avH3!FyZ`@})gzV+X2KS|MFB9%Y&1O5*r7Xdo} literal 0 HcmV?d00001 diff --git a/doc/source/_static/style/bg_gmap.png b/doc/source/_static/style/bg_gmap.png new file mode 100644 index 0000000000000000000000000000000000000000..039ff6b78958ec0d20add9a0663cd275ca7270f1 GIT binary patch literal 12925 zcmajGb980Fvp*W!$;7r#Y)ow1wv!V~Y)-6+GqG*kwrz8g318-W@4LUd-dpRv^T%Gb ztE;NJYo9)!&#CSmp(rnb2!{&?0s?|4B`K;50s>0e& zM2Hog9n7t4%|JjTBa%~~RaK=hL(e;>A{J&$EuE(lD26Fzpp>lIep?C8v$4^dn>j*A zV4$KZikpVR#Yp3;iei8V`NJK7;;1e-F%i?!zQz@%t@5{f{mFLOex34Zw%;t%aGgR1 z`HPu2tq?!~B6(;A7uczL8m}}9+<@>0!5tw+V%5U&PR%72^k3O~z5e5iJt}@tH0xdI z_xWKUmq3uy3kCuw%oUT>D=qenu$M*OMT`!DK@?$AF?lfUg4#C+Rv;8?bDUP+lXZ{Z z+ylPtEF83n@<9N?mo|#w2tq7J;^c`L6|_J?Rt{Oj$B`OV!`c^9Axeq2TM2_#R8%y1 z?-cC1|Dhaq-<%Nm*JJHg4^R*2ay$*pwvFt=^au`XFSVq0%%gn8GZs3hPH5 zBcn*tv@zpjVjA{Y=YmB+3A9^Bdl>Z;Jo@21tDe~7UmBsjw`UB15+50zC>sYlx2=px zkmzgTzyqwdX%FOwKdffj?K#Uao}0{mHI^fHL>dt5U}#_-hcBg;WOHL-1Hu3AYx9lQ z_FUH{UAUVaP0ALeza1gk0sY{w5pGLAFp$T0W!Zg1XSP3Jj&A-G7^VyZGxx{{#9&Uu zhz9|1&c46^R3>Qk9zrf~R%v$9t;m;a}t<7A&hXiQIWDZOjD3F5so;r1JJ)hy^0tZ z@J_^pbD6}Cl!mcyZio8J3HaJ9K_utHxXrACcfl5p`DWSF=M49Mqhu#I(4{{%E-z z`FDzgCDN*7W#EfZZ4tO)ouWRXCbOz+1a`qq5?=ZJGxR_nPgYOPHVnNed!eUXhrHn# zm6K|0IB{4L|Jr^grg&^5a|TK*m7k?UY$l9G_9m1}*R`7Th~+_zL!Ku2HBu{xE1N6* zE0nfaZYaDlSwp*ycU<6^;EX8417>^9PGOG2PRC9nEwCHVe6ZD#oWovw`L}#;Zr;pY zgnmeZ!G4iB6p|T_k;#Qjyw0`&Sl@s*vH3WH4nco`=#^$vQJK&suS_g#HZ4 z`r9L#t1vEICE4WHz@}0KNHzid6l4_jvXr{!S+Z#pT4_qSkwtPQGs>2b4Jr*rYgAfh zT3D+dt9o_Obs}}F&XLYj&bQ7#oaLOG4+Tc&5BU!g51&Sx#{^Zs)1A;&C~B5FitZ-# z$%t8rd5NVMXYbJrdP&zLbSc-&K^2-v`$*4^(T?elc^`_5VI>=s&8TLoC8$QKPL;}* zDXS5vwU&vM9joQco1HK|V0glIh6xlF6&zJks`8dCRdbYA2*rI|AtcP7bg?S zm(15OkycTgH*m=}4?5y!2uz7q&s!3z)UU*`Be;aQ%v!5ln+6^Lv3XW`5P6V+G;Q*2 zr)|k?%?~x69G;_|VUJJ`Mm{p{qVFv4cu+L~$)L+SHNCkcLo-Wp7MfOE#-bd~%wsIp zOt`E?IIlPkoc8R#W_ea`a_=a{ZtJGB6mf1LmqrrY}W$bA)b#DuCs!p z0bm)>^P*yR!mY~9_bl%?;lO$0FuA<6yr*&1b!c+RE>S0ycWiT7`KkHK;Xcq#KuzS zT7# zLa3h8YP4TmT!brEQP0sM>k4f>0@71*kTwbYVP%ce8qJI`8Au+MtKHCq)&i_dU6B4U z?GW)cdYgv`3+@ZHz=xjloifc+F?QOMslsbcdgU4AEhV+0f%Ht!fx9i?lOFvvuSAlHP zznra4)f&@W<bYA7X>0bQrF zX~~|5V2-chY36utU~b*4%1rK1`mlY>0;wJunZQY4 zPV={Jyut0M#8s?&j3xGn>huXCQhv1?bo$9fMU+M1R&n2kkKMPvR zi;HxNSZqCreW9cK%)`*~EjgApY}R~hqBdiKu<~#U0-xM}j(1hMirW;{Gwu2EU5h%> z)-spD8Y>*qtmSM8?4qpkY@e*IuWGI>KL(#u)8!kd>gri6;HOZh4>L1(b9{W;qP8FJ zz>fPop3<*gvB&^hXIiuKexpxw$>Z6nfIn>y+1-1Nh4*Efi+^l5mbI&O^c+s>FKj** z@0O2z%Rf@^mN?g-Y@vCeQjxRqoN?P6=iiSGb$;qBJ3wy`;v?a8v7)iEc!v2LuWlx_ zS6`cLW^{Hxf*uKNiA01A#wHP}eH1ltTI2pV!9X0WL2^Y&)<9<`2a@4mo->5t(?9S6J_q1&Uox~n755Rouc(8(p+q}FI57l| zj(F)qh46r{^a&q)HvEKuzYozi6=gRTn3(KTg92Fw?ZN8Z*tpnjZ3U=F*1LnANE^%? z-iL}kyh=^*J(k)+eOiBl2xvpz5%3~2eKFWfD^*RPrkpH~iGv-Zk*R~R8KZ}t;}@X? z0pauD`4a8SfJVd~cDD8|JRba{|FPitlK-gzq{RO*0ow4BYRV}Ri#Rx&5wkP0F*1<~ zz!4J@^EsQE^C*jo|F`?s9Y3ih5a`GQ0JyumGrF@fIyhSZn7O&R0Zc3a78Zst3kH`T z_CO;K274E>{|fSd#}PGiF>$tX1X?-R6aR~AWbEJyNl*U|rc{%f3O9#;RSCwrIw zp4Qg{0smS6%#2Kc|B3zO%J)ypqiE$}W~(V`WoKsZ^3{g`3pW$rf9(HXE&r#-|K+Lm ze>}Na{vB`T!q0eb0! zsH?iT^t4?tPMqX%n{Mpc&k5fad-SW6BrUL$)|^t>H*R@YvEA2A>R~ju1viD}6y3Pc zsLMLY4e_UH0|py9ydd#CqDMil2HGTD032B4Q-@7X?Ct~;w0S5F=<4eeo_S8=lfGBw z=ayGi<1)@4D=I}W1$@vL5Q$tI)RuCb*CRa_2re*uP*hO+aNG=-#YJjBHIy}8`HJ|M z4Q%ZAVfvW5Mp97k;+_io=&7NQPW07TZ!K0B)X*v+ehY~*awLy|7pPQqv=A>RC-Ceb z)XlwF>H&R^65i_2a9_{=J=B-c}|(&fkGwWi#zIC1xkbGIZcuj%hN%%sjIAqDU64OR%^MGh7OF;o#z1;i|>8SUNNt+GAx~=lg)Vn-u#COWgby#7vTGJ1H zrLCoRZaW}qF4<}*)RG+X5G?G%_s>hv8^-tK`223B;h-ysa{8BsmIp z%Pr%p;o=Vp#d3UwP|73?_#cENMO!Q_e=#cQ)Sr)P-icu}&clyEOvpvBE-i-*!LG{6 zv7{);#%PT`u5n5#A}CqvwU=E$C{5jvhgzh?b@1sXWZV)l;2D_FcTr=&sYnX=z~Gn{ zX6rL)_6YC?8e!2T&CJQh%*;=Q{^a$E;v}3Sy;V?(@Sn*r1l$yH$7UbV%G4S3}GQ%S*;2Mi}o~VRyF4q zsnQ|bc1Vb@=iv~+FCxbisU~A&i;pGCZQ^SQm^v;RL=Ga)-Z*1`qu&|D#J#KKwO~J@ zMhLt4b~V5}&SlZNw6X=%RhPE%k{VD@he`tZK9U(3@(Q6yGOm72JcBp^mnYb9i&qLv z(=&fgQBp88RZf&B`dYQ+o{Uqr@0J5i`1_HI^yU7DyA@OJOB|G4h?c(7&%OQ<;*gYe z(Bgo1-+^M+KtR3a^p0mN@G~MdnLeQCml&#Jl48krGB|{zFx^SBD2koN{AnU&lH9Jw z@x30c#3!!-w4n3U{xoUw^>9R#odQ`~i zo-QmEfLHH=lS?&WFfNO3R7Si^c2@!0ERl+O(_=)ls35VJ{)bWon>iZ5jdhide0coV z0;{OoP7irCM~cu0!&J&q^vU3A3^=DK=23VZ(alUm)kT3mTG-84XhvwH;6#>NA^7%M-O7Pvz zSCf(Nr=@u-B{ozlnW~w%6Jz3ui>HDdvK88L*vABGqJItP^G2_>8;xY~=nDtYM~03y zSZ&Du6xbPyqyY`hbS+-_OVu2N9W=aKLVwF6CL| zo>7{rqXxeGDgI*xqn{mfGt946=x?Dh!~n_Nt?cW8F}L3z|7QrBZ;lg&=Ebo9D@;g3 zCm12UqBsBjf>pGP(3LPjYLrVWcn1=SR!T&X^W|?(&*NU69Cw_FZk#PWkkHk{#MGpS zcTsrRSG5g*C)UX0u{CX-PG)4wkld;nVUh@Z3jRbe|EdxqrqU~O`s8{0B9A2YbGv1705ib=o;Rd1@J2M%s9ugcz1F-?n(Yy21@rJx97}$0m77&RWRhU5K9VL9RF%JL z!fEQzZjTd2&*ys$GdjuevxiW|CSupv!x7ClRf@PbPB1`+xye@WxJMuVqh&EYmlFQN z;r4<@9TH!ptq0)?qb5LhQ~$dKFK<1T`#PSi2bD?0ACHvv`!j`vKbhkzYRxkpmfHZt ze^I?2Etp?|Wc8Nev~+YC8-CEu^>RnOIF4Vu&kh{4i`@oqf1>dKroj@Tg~WQczdvLj zrUCJv{Qs-h?jkAc3b>2JW55+<#Q*Afh7Cfyilj^+_A&9na@h!lXOkD^@+v*BAk!wV z>5y9@-fgc=FI^+&FaG7w1#7km8FOoZoC#@szR#L!XV;y8wmbV-mu3jkBtu3dO4uOKjJVD5` zf&}w!Ee$rSIp~XEk;H);{0grQmd;;ZX?OOVsK4Qq@2gxfKTcgRw9@Of?}yd@5~vw4 zbc=6sh&-&jYjSK6U0q$>5Orf1gooI`fl3WJT{VB~NHN{_qQsV`=6ZcO*-3C5A;h~kQK0quFI>1Ll`76(Q`pl{yx?k1!QmJNR6!CQm?=hbsZ?~sEHhm+h#5R$VjD;6SyUdOX=^3TFc;iP8{ z*d+`9`0RF)?=#&J@7t27dBPTOPBP#HO~0#QLK*H&1yImfktMxUxJ z6aWjpD3>on-;&FMmVSMJF(BdaiB)vF@>eW6`90Lo3S8)$3239ku5r>W$f3cYz2{9n zD*s?u#3kuyJW_Dk%?}AOwmont=g&K@>pfP`2fs#{+ZYI%)#2asR;msbl?-YS&XlBS8Jhf%@Km7w`KArp#9%;B!45=8ZUK zBW(YM@D8_+75RIM6U1yb-R$|~JP0qBZv!3UL2>_OyyV3AS~$wK;7se(f!*6d7}{MA zoY*y879VUIxNBNqgApD%GTEcj-IzJquy=U_D=HRLhp>AofB0rAl#y3kRJN|s8QTPY z-}6cwuxq5NHY;Y`)XtvR?9v7p%1i(&qmkVOj>^IT5b}wQX0*9R+ns;LSyZ6 zCEp#HnW~WA(GpJX`E6GLSQONEEj02%uiE)RZ8xcFW%CEOV-)6edtLsRWt;5O36+VS zkRhA-@*Y?NjN=|GMYtI(f&||!eFc9T`uG+xA|W0?E1)vo^n7$eZ$clB> zoIMiUgFj!Asv`8ga56Awp`6nIe7wvG78LyK34v=k%p$iV5+R~w;Q1Chna6SO*?^&k z4fW3$;l_c2b6NCkC_gN_N9yDp$$nX7o$a8}VreY)pDAULi&~!OqR+Wz;rR?^3myh@ zk5D)Xp*pEK-160+r2L~~@KhUl%`$iTTuDN|P%#NRnG|vq*2N>TC3Hsd`Je?AsOBf_ z(GYEB6|G1*h;XGHl;ek3EM7h5&begrY*e~bf2q0#^QTs@88)WtE=DW=~06Q=0lQvXu!n6Aq;=t?0=I9xGaJ$ z4>|Xurlx}|w4yEnc!pf1TpHih(?>yOl0Y~Z_FBUk1K`oB2R$*8inwAm0UF;1pdkO& z`K1OezF^bzxbOQvb1dB)V0czK#Kl!YEK%=Zv1pg6xVmM%9jQu%wg73KZ4uPV&+2lG zue^kYzSm)6YrH5!tK*Gm9M)O^MSM?2aFMRbJHbDziqJ+-1I zc)v>u(m}Up$vJ%25F@hbl9@A5+eITU$VoiFBC;+4I$#nsG6CbRti9n>&IyBS!l=Al z`|qm_y5((t`<8v&=KSJugzCbG5;wlTo%Hws5S(j}NA|~n z{(c6rw&Fb!)`z(dZ9rQzi3z$(Q8!^1FOL2Dn)CBe-Mnm+b(WnnD7nL4f?Y{J(k89= zC@*8&=$ic=2@h}lXillzz{}OHCW~touj02>3B?y^Qi3&_08-0kIB-Y{wb&=2^vLRa zjY1_)ll=bGyxft%%tJ?xuu(6K*gR0ZPA0e1GA9^v+-DRdL7;(0knu= zU5+H1DMQ%_Em1?n!b%}?v>@$4oXz5{(xr>}UM}p9m#5CItE(B)lNABNDe8XI;#|oE zRtfh-{Tf2ci!cwDtrdM_%E5~N%_8Q@oYaNfL2YmUuzc2cM%#lQ2l;7O-Q+e~=BQD& z>RvhFb|PCDT|1GHF=zPvUMkeLoZ0XP(p{2@R>MgOPrENH`fDMn0(bqTC(ROlibs=y zcV5g_@={Fe15@gmzg=V1uo@l!|Dx&V(2ssj+_6$CKPZo_Hmeg@oRV1xtAMPKF(f$xmscP@;6~OcNzQ~+f0nZ z>V&LksbZr(664*rSIx0P+`^3y&LmM$^0Z~l;~^OEqaV$;)_!=wKLMe#ja?1YE6dYk zCNgoFh8=Jr`SZX>e-Dukd>wLHGNB!QgY}qa2qwEpOh|Kj#qQ zvHKm%``(QRnCLfOdOnMuj^(=1t{EJE8N#d{Zka!)|lTZ=#=7b!cpT z`V;Byv}g%j%>|O?B#<-b8fhBo3j6bo6=-eGUev8}9BU804zZF3lOGQ@Aql z0Cz3Mr^oF12Y3>DL>hJ#EMKT|r%gX%Bkb%Nu&ttyaG`hBG}uh**z3-u9(x6rRZa&b z_`&5^H}@c3qE^g)Y7Z710?$A&S*|@WGip-Fo?4L!&Vik0P5$~g92{~jZSD2TX8WbO zyqaaKdB+!50+L(7(z-#11Y>ijz;bE=50&dO)qFdPWeP4m4)3rd8#mes2{ zRUO(LqqCFuMAlRmKO>U8^qAT6Shr(O=gEL&5lbu665n1+Xv_jx7i=(Fh)(`~2OUU~M}fQofvjDrdu?dZm-fb9G9 zaBU2fQEw60IMh};PbE009ZlupEL!NAZbst(l!&Nvoz$iAW$oK!S*l*v`IT8U@eAmx z`EB+c(MQ-;$_}K0hI@w_EtudX{!ja5X(r+aA9{NxH`B8pnslmvV=ZQ~jY;ioiuRXc zhG*sem^PfiZJ+hkdt@={W4DWK;7;*)VuBN{D+HYCw3Mi#b+58-O4)O#+0R*^GmZgv z-!G3x&YeQG4LptQ7oui}xa_TW8qXTsHWzEGPA#K*OI`0YZ6+xi-w+S%SVa+;+Q=fW z&O;QkyMn1wmH8xnmDTjUkpL4fNPw!@{;($fOClMuA5T}mSbVU%(%>s-sxlzk18HdiTwx-R>@$yN zL30k6a~3SLv6DmA+xlG3hcM5Z_YxD2ox$Ir3yS@I82HG9Hdf!CHl1vuP+peT?qn&{ zc1n6irVQ`tv?zE+$5ik(oB=Hl+sd70=i>1p%#cdTGY<50IFK-rr#Y2!x5U^JvfV{A zLqOH6ROFsc%i2B#Iz?Rtc28HHkgH$1S(_c?FH?dh<8udgf?U@71rA;{?xn2XC zdyGu+nP;axosXV?UyWH|swm^h;^`kO_N(Q`)VnV1IW$)xvMpc5F2vi{VO&2ew6^~hHLLtu3rGG8U{J-^d(OQf4|2ebs&0x!3F_b4}B!JrN!<3$Eq1I;N@6|tMhyc zW}_SOo$%WBqO|Fm+zmM`EKizjR4Ofu>F*mfH4bJl8KnGKAE+%qCH+vrbHvFAQa?R^ z6j`GAo+%RVlaTyN$S-$Ce0d}!;gmE&LRNL-m$6+-104~nUoClimV6xYOdu~zvI_bz z)iDG6(x@y{vJ#m4U!;fBMWws(K=ppHY+e`GXd5%u{z}H?LgD#f$aw>YAv)cAg*>(7 z^pqWblR0=Ixep*D({M?Qk!y{tFxrnloOM3_I$Wl7YE_Ybg#5L!+VVOrc6J(SP^%Iq zuUD0`tHL78lc~joXwv~j;w#EW>xIzsJTd5+oSB(lKdtR}H(U^nzMK7o;}+u)Bp>Mw zBl1)swAl{ljgpRE^n*P1+9kHX@mb~~`+;_jRDEY;&7{D_;-`i9$r;H7E^0tvxd1ll z9gt&FiOFWP^r%{`xb?((6+adE&6B}%!wae%NWyRZg*~G{SQ{O1(zMYBOU(1*1=d{_f1UXM~2-T+?pKt~} zsc?67epuHPUw3&^dm(_b<)@&={tJDBr z5ASZI=)eQ8Ti8bV$e=%V@L~ogkryBOs-YLw+Fo}V%8!?JSBX0z;UuY1od!30%?jBs z1GrK7eFYDgXVX4B%8U8N5eJ$^QgySoDU|`I*NI^t$#sP7qel9YWw>c}S!(Z!G)UCZ3`c44=1MZX!?d32{W za}qG<1ig8%VYAL0@0O<8BXa19(V0T3zat9l$62iWHC~3F?k@k zb^0I>giZE%uwd>H>>&@=)E_Heyj&-q+&*zKk7S&=J@{)(8`P9fW1AM}Q@$YZd}O0_ z>gDhIbQ4bo3xJrUzU1!(!Q9Oa5r1+bKX-8@&?5J^9Aq^J-r_VpWXghj0r+^fWPpCb zIOe(UTOI={?-ltx2rCWZd)148^YwJq8xVKH^U+{z%=r|>>6PkP zM|tIDHl82Iq)`H)oxif$2m$!Iu|09nvO{%M$anrt@}iTw9;RhY*9_mF&k^IVj0z4+ z_m6R66RNOk_w&v2rn>a(ew2p+yUaS#)i-^1&~H#WpkC3Y>sMIU{pY~iTm;+Gs)&RY z%++Yp)!IC1rZ9BHm2@HMoC64r5_cA($0VCbOlzR{2kB98;%@}=`4<7cyij5kskiUz z26-tQ6BPy_*$%>j5!3zZJ5&e~Cm#@el2NTq38t6)-+^8Voh`V9z~WJlTXfuq_T9Ul z{=8*p_id$L-i{33AKsj!Odwt4#eLfy24`z$syf13yAYoFR;NhM9^ZCB>}IVuC5&H& zcBx}BW4!kUfThiYK*$1E zbm`S6{KJsdz{LlS!f4`#s7_^6g3U#X#e;9e)m_H9(qSm$G6c>_TbJgx7@utUf}rMDM0WNFT>ZUofl%TL^bhwyipdd^VLS5A|CpG2q7 z4zg-)K=@F#1pd1ERY>y`c+gZq)cOFAc_H<(AWRsU%KQWixk&L3st<$|2{ z#8YW^x%Z_6l}U|qd6(J6I&NBpwwAe=z0a|sLar;6Xh1Z{rvF6ZCqA4KhQ&_7iI$2L z{BZ9V=kuM3_waZ`6hHfn>8*81)bjqH>ez}EjFqEdA5jVZF>@b&*UT^RZ!Oo>ZK5eg78Z}n6Pj&L{ z#go+03(U?B&5sHjS$pB-x5HVTx}LbOl8pKw@(`TWV&Q@mGu(alJvX0Ww*zmk*v2roJBHm4P%y{8L-X0RSEa`=%vt?5w-CSgLQE$FMafHN6tgJ)pck<>x z1aa25Z1COQkHXx|jG!F@eg&xn%f*gRLrBaGoO7Wm3|9`Cq6%stNO{H|2t?(9<4}{l zy$rs~UHHxL6b)-(aAh#!4GbmQ7l;RhzFGfcH2;3`V&?P^(O|>mehQuawX?|`$X>^H%yQ6fGFN$|{;mmu@wkM6iJa~Ql z;G=~yI0Kq z2dz85=g5|xoE4&Or>+t{KTbQN+<*)Q71}%5?IOX!)tt$FASxX*w_A&8jbs>FB;Yc< z%l73S#H7345=<+#UW8ZTC7%_(ozUD~ecQB^6E zNDDPh2MUj(#4_uqb3 zt4Tk_*WMS-Kbciz^4sD6Q&0a9Q!;5e{ng+Ip$!4HL1vBG-xKoXZuie+eSf;~7^VT{ epZq`i55dd#-u1Ll#D6cBfJlkSi&hC62L4})u9*M; literal 0 HcmV?d00001 diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index f0dca5b65c9c0..f8b80d8d7a04c 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -1324,11 +1324,11 @@ def background_gradient( Matplotlib colormap. low : float Compress the color range at the low end. This is a multiple of the data - range to extend below the minimum; sound values usually in [0, 0.5], + range to extend below the minimum; good values usually in [0, 1], defaults to 0. high : float Compress the color range at the high end. This is a multiple of the data - range to extend above the maximum; sound values usually in [0, 0.5], + range to extend above the maximum; good values usually in [0, 1], defaults to 0. axis : {0 or 'index', 1 or 'columns', None}, default 0 Apply to each column (``axis=0`` or ``'index'``), to each row @@ -1359,6 +1359,8 @@ def background_gradient( Gradient map for determining the background colors. If not supplied will use the input data from rows, columns or frame. Must be an identical shape for sampling columns, rows or DataFrame based on ``axis``. + If supplied ``vmin`` and ``vmax`` should be given relative to this + gradient map. .. versionadded:: 1.3.0 @@ -1369,16 +1371,66 @@ def background_gradient( Notes ----- When using ``low`` and ``high`` the range - of the data is extended at the low end effectively by - `data.min - low * data.range` and at the high end by - `data.max + high * data.range` before the colors are normalized and determined. + of the gradient, given by the data if ``gmap`` is not given or by ``gmap``, + is extended at the low end effectively by + `map.min - low * map.range` and at the high end by + `map.max + high * map.range` before the colors are normalized and determined. - If combining with ``vmin`` and ``vmax`` the `data.min`, `data.max` and - `data.range` are replaced by values according to the values derived from + If combining with ``vmin`` and ``vmax`` the `map.min`, `map.max` and + `map.range` are replaced by values according to the values derived from ``vmin`` and ``vmax``. This method will preselect numeric columns and ignore non-numeric columns unless a ``gmap`` is supplied in which case no preselection occurs. + + Examples + -------- + >>> df = pd.DataFrame({ + ... 'City': ['Stockholm', 'Oslo', 'Copenhagen'], + ... 'Temp (c)': [21.6, 22.4, 24.5], + ... 'Rain (mm)': [5.0, 13.3, 0.0], + ... 'Wind (m/s)': [3.2, 3.1, 6.7] + ... }) + + Shading the values column-wise + + >>> df.style.background_gradient(axis=0) + + .. figure:: ../../_static/style/bg_ax0.png + + Shading all values collectively + + >>> df.style.background_gradient(axis=None) + + .. figure:: ../../_static/style/bg_axNone.png + + Compress the color map from the both the low and high ends + + >>> df.style.background_gradient(axis=None, low=0.75, high=1.0) + + .. figure:: ../../_static/style/bg_axNone_lowhigh.png + + Manually setting minimum and maximum gradient thresholds + + >>> df.style.background_gradient(axis=None, vmin=6.7, vmax=21.6) + + .. figure:: ../../_static/style/bg_axNone_vminvmax.png + + Setting the gradient map and applying to all columns with a new colormap + + >>> df.style.background_gradient(axis=0, gmap=df['Temp (c)'], cmap='YlOrRd') + + .. figure:: ../../_static/style/bg_gmap.png + + Setting the gradient map for a dataframe (i.e. ``axis=None``), we need to + explicitly state the numeric columns here to match the gmap shape + + >>> gmap = np.array([[1,2,3], [2,3,4], [3,4,5]]) + >>> df.style.background_gradient(axis=None, gmap=gmap, + ... cmap='YlOrRd', subset=['Temp (c)', 'Rain (mm)', 'Wind (m/s)'] + ... ) + + .. figure:: ../../_static/style/bg_axNone_gmap.png """ if subset is None and gmap is None: subset = self.data.select_dtypes(include=np.number).columns From d0a3b9ef09639bb7342aa101cab613ac3ce93874 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Mon, 22 Feb 2021 16:05:34 +0100 Subject: [PATCH 04/26] add examples --- pandas/io/formats/style.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index f8b80d8d7a04c..887143ca6f0f0 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -1423,7 +1423,7 @@ def background_gradient( .. figure:: ../../_static/style/bg_gmap.png Setting the gradient map for a dataframe (i.e. ``axis=None``), we need to - explicitly state the numeric columns here to match the gmap shape + explicitly state the numeric columns here to match the ``gmap`` shape >>> gmap = np.array([[1,2,3], [2,3,4], [3,4,5]]) >>> df.style.background_gradient(axis=None, gmap=gmap, From ddaa18a0508ca904b0262c556ca47c04ca555120 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Mon, 22 Feb 2021 16:07:25 +0100 Subject: [PATCH 05/26] add examples --- pandas/io/formats/style.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 887143ca6f0f0..3f18b9bf60ae7 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -1392,31 +1392,31 @@ def background_gradient( ... 'Wind (m/s)': [3.2, 3.1, 6.7] ... }) - Shading the values column-wise + Shading the values column-wise, with ``axis=0`` >>> df.style.background_gradient(axis=0) .. figure:: ../../_static/style/bg_ax0.png - Shading all values collectively + Shading all values collectively using ``axis=None`` >>> df.style.background_gradient(axis=None) .. figure:: ../../_static/style/bg_axNone.png - Compress the color map from the both the low and high ends + Compress the color map from the both ``low`` and ``high`` ends >>> df.style.background_gradient(axis=None, low=0.75, high=1.0) .. figure:: ../../_static/style/bg_axNone_lowhigh.png - Manually setting minimum and maximum gradient thresholds + Manually setting ``vmin`` and ``vmax`` gradient thresholds >>> df.style.background_gradient(axis=None, vmin=6.7, vmax=21.6) .. figure:: ../../_static/style/bg_axNone_vminvmax.png - Setting the gradient map and applying to all columns with a new colormap + Setting a ``gmap`` and applying to all columns with another ``cmap`` >>> df.style.background_gradient(axis=0, gmap=df['Temp (c)'], cmap='YlOrRd') From 72c082596bd63c05370830c67d1eedebc1fb0e37 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Mon, 22 Feb 2021 16:47:48 +0100 Subject: [PATCH 06/26] shape validation --- pandas/io/formats/style.py | 9 ++++++++- pandas/tests/io/formats/test_style.py | 9 +++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 3f18b9bf60ae7..3480bc5682b78 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -1466,7 +1466,14 @@ def _background_gradient( if gmap is None: gmap = s.to_numpy(dtype=float) else: - gmap = np.asarray(gmap, dtype=float).reshape(s.shape) + try: + gmap = np.asarray(gmap, dtype=float).reshape(s.shape) + except ValueError: + raise ValueError( + "supplied 'gmap' is not right shape for data over " + f"selected 'axis': got {np.asarray(gmap).shape}, " + f"expected {s.shape}" + ) with _mpl(Styler.background_gradient) as (plt, colors): smin = np.nanmin(gmap) if vmin is None else vmin smax = np.nanmax(gmap) if vmax is None else vmax diff --git a/pandas/tests/io/formats/test_style.py b/pandas/tests/io/formats/test_style.py index c848a36e2baa2..4387dc8e5b3b5 100644 --- a/pandas/tests/io/formats/test_style.py +++ b/pandas/tests/io/formats/test_style.py @@ -2056,6 +2056,15 @@ def test_background_gradient_gmap(self, axis, gmap, expected): result = df.style.background_gradient(axis=axis, gmap=gmap)._compute().ctx assert result == expected + @pytest.mark.parametrize( + "gmap, axis", [([1, 2, 3], 0), ([1, 2], 1), (np.array([[1, 2], [1, 2]]), None)] + ) + def test_background_gradient_gmap_raises(self, gmap, axis): + df = DataFrame([[1, 2, 3], [1, 2, 3]]) + msg = "supplied 'gmap' is not right shape" + with pytest.raises(ValueError, match=msg): + df.style.background_gradient(gmap=gmap, axis=axis)._compute() + def test_block_names(): # catch accidental removal of a block From c6adc82ebeb0daedbd0a9aef1758119f650a68d3 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Mon, 22 Feb 2021 17:07:03 +0100 Subject: [PATCH 07/26] update docs --- pandas/io/formats/style.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 3480bc5682b78..af33b31194563 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -1345,13 +1345,13 @@ def background_gradient( vmin : float, optional Minimum data value that corresponds to colormap minimum value. - If not specified the minimum value of the data will be used. + If not specified the minimum value of the data (or gmap) will be used. .. versionadded:: 1.0.0 vmax : float, optional Maximum data value that corresponds to colormap maximum value. - If not specified the maximum value of the data will be used. + If not specified the maximum value of the data (or gmap) will be used. .. versionadded:: 1.0.0 From aa3a83e893f43c5264702eab0c4b2c55ddfaaa14 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Tue, 23 Feb 2021 07:43:46 +0100 Subject: [PATCH 08/26] add tests to new modules. --- .../tests/io/formats/style/test_matplotlib.py | 58 ++++++++++++++++--- 1 file changed, 49 insertions(+), 9 deletions(-) diff --git a/pandas/tests/io/formats/style/test_matplotlib.py b/pandas/tests/io/formats/style/test_matplotlib.py index f01e818e40b22..058de101096da 100644 --- a/pandas/tests/io/formats/style/test_matplotlib.py +++ b/pandas/tests/io/formats/style/test_matplotlib.py @@ -57,15 +57,6 @@ def test_text_color_threshold(self, cmap, expected): for k in expected.keys(): assert result[k] == expected[k] - @pytest.mark.parametrize("text_color_threshold", [1.1, "1", -1, [2, 2]]) - def test_text_color_threshold_raises(self, text_color_threshold): - df = DataFrame([[1, 2], [2, 4]], columns=["A", "B"]) - msg = "`text_color_threshold` must be a value from 0 to 1." - with pytest.raises(ValueError, match=msg): - df.style.background_gradient( - text_color_threshold=text_color_threshold - )._compute() - def test_background_gradient_axis(self): df = DataFrame([[1, 2], [2, 4]], columns=["A", "B"]) @@ -106,3 +97,52 @@ def test_background_gradient_int64(self): assert ctx2[(0, 0)] == ctx1[(0, 0)] assert ctx2[(1, 0)] == ctx1[(1, 0)] assert ctx2[(2, 0)] == ctx1[(2, 0)] + + @pytest.mark.parametrize( + "axis, gmap, expected", + [ + ( + 0, + [1, 2], + { + (0, 0): [("background-color", "#fff7fb"), ("color", "#000000")], + (1, 0): [("background-color", "#023858"), ("color", "#f1f1f1")], + (0, 1): [("background-color", "#fff7fb"), ("color", "#000000")], + (1, 1): [("background-color", "#023858"), ("color", "#f1f1f1")], + }, + ), + ( + 1, + [1, 2], + { + (0, 0): [("background-color", "#fff7fb"), ("color", "#000000")], + (1, 0): [("background-color", "#fff7fb"), ("color", "#000000")], + (0, 1): [("background-color", "#023858"), ("color", "#f1f1f1")], + (1, 1): [("background-color", "#023858"), ("color", "#f1f1f1")], + }, + ), + ( + None, + np.array([[2, 1], [1, 2]]), + { + (0, 0): [("background-color", "#023858"), ("color", "#f1f1f1")], + (1, 0): [("background-color", "#fff7fb"), ("color", "#000000")], + (0, 1): [("background-color", "#fff7fb"), ("color", "#000000")], + (1, 1): [("background-color", "#023858"), ("color", "#f1f1f1")], + }, + ), + ], + ) + def test_background_gradient_gmap(self, axis, gmap, expected): + df = DataFrame([[1, 2], [2, 1]]) + result = df.style.background_gradient(axis=axis, gmap=gmap)._compute().ctx + assert result == expected + + @pytest.mark.parametrize( + "gmap, axis", [([1, 2, 3], 0), ([1, 2], 1), (np.array([[1, 2], [1, 2]]), None)] + ) + def test_background_gradient_gmap_raises(self, gmap, axis): + df = DataFrame([[1, 2, 3], [1, 2, 3]]) + msg = "supplied 'gmap' is not right shape" + with pytest.raises(ValueError, match=msg): + df.style.background_gradient(gmap=gmap, axis=axis)._compute() From af852d0d443dd392e4b32ee6dee241a0a0d1fbba Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Fri, 26 Feb 2021 07:44:16 +0100 Subject: [PATCH 09/26] update reshaping --- pandas/io/formats/style.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 983f1d52229f0..0ac60e3d70c8d 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -1465,12 +1465,14 @@ def _background_gradient( if gmap is None: gmap = s.to_numpy(dtype=float) else: - try: - gmap = np.asarray(gmap, dtype=float).reshape(s.shape) - except ValueError: + if s.ndim == 1: # s is Series(n): gmap can be (n,), (n,1) or (1,n) + gmap = np.asarray(gmap, dtype=float).reshape(-1) + else: # s is DataFrame(n,m): gmap must be (n,m) + gmap = np.asarray(gmap, dtype=float) + if gmap.shape != s.shape: raise ValueError( "supplied 'gmap' is not right shape for data over " - f"selected 'axis': got {np.asarray(gmap).shape}, " + f"selected 'axis': got {gmap.shape}, " f"expected {s.shape}" ) with _mpl(Styler.background_gradient) as (plt, colors): From 3c06bd3badf432cae0e8af0785e76c7afafad053 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Fri, 26 Feb 2021 08:05:07 +0100 Subject: [PATCH 10/26] update reshaping --- pandas/io/formats/style.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 0ac60e3d70c8d..47e8716578164 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -1422,7 +1422,7 @@ def background_gradient( .. figure:: ../../_static/style/bg_gmap.png Setting the gradient map for a dataframe (i.e. ``axis=None``), we need to - explicitly state the numeric columns here to match the ``gmap`` shape + explicitly state ``subset`` to match the ``gmap`` shape >>> gmap = np.array([[1,2,3], [2,3,4], [3,4,5]]) >>> df.style.background_gradient(axis=None, gmap=gmap, From 233e4c3698f3a9fcfee466e9c50ad320e8b4443a Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (MBP)" Date: Sat, 27 Feb 2021 20:43:59 +0100 Subject: [PATCH 11/26] mypy fix --- pandas/io/formats/style.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 47e8716578164..9cdbb743b9bab 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -1469,6 +1469,7 @@ def _background_gradient( gmap = np.asarray(gmap, dtype=float).reshape(-1) else: # s is DataFrame(n,m): gmap must be (n,m) gmap = np.asarray(gmap, dtype=float) + assert isinstance(gmap, np.ndarray) # mypy fix for gmap.shape if gmap.shape != s.shape: raise ValueError( "supplied 'gmap' is not right shape for data over " From 1f44a5b2e1836e7ede2b6ad287607cbda030eefc Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (MBP)" Date: Thu, 4 Mar 2021 11:02:24 +0100 Subject: [PATCH 12/26] req changes --- pandas/tests/io/formats/style/test_matplotlib.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/io/formats/style/test_matplotlib.py b/pandas/tests/io/formats/style/test_matplotlib.py index 058de101096da..7be07a1516b2a 100644 --- a/pandas/tests/io/formats/style/test_matplotlib.py +++ b/pandas/tests/io/formats/style/test_matplotlib.py @@ -131,6 +131,16 @@ def test_background_gradient_int64(self): (1, 1): [("background-color", "#023858"), ("color", "#f1f1f1")], }, ), + ( + None, + DataFrame([[2, 1], [1, 2]]), + { + (0, 0): [("background-color", "#023858"), ("color", "#f1f1f1")], + (1, 0): [("background-color", "#fff7fb"), ("color", "#000000")], + (0, 1): [("background-color", "#fff7fb"), ("color", "#000000")], + (1, 1): [("background-color", "#023858"), ("color", "#f1f1f1")], + }, + ), ], ) def test_background_gradient_gmap(self, axis, gmap, expected): From a92e42d7c9b19c42600139bf2bdaaaf02baa735c Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Tue, 9 Mar 2021 10:30:49 +0100 Subject: [PATCH 13/26] html tests --- pandas/io/formats/style.py | 56 ++++++++---- .../tests/io/formats/style/test_matplotlib.py | 89 ++++++++++++++++--- 2 files changed, 114 insertions(+), 31 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 2ceff6fce9660..5da79e7a66164 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -43,7 +43,10 @@ from pandas.api.types import is_list_like from pandas.core import generic import pandas.core.common as com -from pandas.core.frame import DataFrame +from pandas.core.frame import ( + DataFrame, + Series, +) from pandas.core.generic import NDFrame from pandas.core.indexes.api import Index @@ -1411,9 +1414,11 @@ def background_gradient( gmap : array-like, optional Gradient map for determining the background colors. If not supplied - will use the input data from rows, columns or frame. Must be an - identical shape for sampling columns, rows or DataFrame based on ``axis``. - If supplied ``vmin`` and ``vmax`` should be given relative to this + will use the underlying data from rows, columns or frame. If given as an + ndarray or list-like must be an identical shape to the underlying data + considering ``axis`` and ``subset``. If given as DataFrame or Series must + have same index and column labels considering ``axis`` and ``subset``. + If supplied, ``vmin`` and ``vmax`` should be given relative to this gradient map. .. versionadded:: 1.3.0 @@ -1446,7 +1451,7 @@ def background_gradient( ... 'Wind (m/s)': [3.2, 3.1, 6.7] ... }) - Shading the values column-wise, with ``axis=0`` + Shading the values column-wise, with ``axis=0``, preselecting numeric columns >>> df.style.background_gradient(axis=0) @@ -1489,6 +1494,23 @@ def background_gradient( if subset is None and gmap is None: subset = self.data.select_dtypes(include=np.number).columns + if isinstance(gmap, DataFrame): # will align columns + if axis is None: + subset = slice(None) if subset is None else subset + subset = _non_reducing_slice(subset) + data_ = self.data.loc[subset] + try: + gmap = gmap.loc[:, data_.columns] + except KeyError as e: + raise KeyError( + f"`gmap` as DataFrame must contain at least the columns in the " + f"underlying data. {str(e)}" + ) + else: + raise ValueError( + "`gmap` as DataFrame can only be used with `axis` is `None`" + ) + self.apply( self._background_gradient, cmap=cmap, @@ -1517,20 +1539,20 @@ def _background_gradient( """ Color background in a range according to the data or a gradient map """ - if gmap is None: + if gmap is None: # the data is used the gmap gmap = s.to_numpy(dtype=float) - else: - if s.ndim == 1: # s is Series(n): gmap can be (n,), (n,1) or (1,n) - gmap = np.asarray(gmap, dtype=float).reshape(-1) - else: # s is DataFrame(n,m): gmap must be (n,m) + else: # gmap is conformed to the data shape + if isinstance(gmap, (Series, DataFrame)): + gmap = gmap.reindex_like(s, method=None).to_numpy() # align indexes + else: gmap = np.asarray(gmap, dtype=float) - assert isinstance(gmap, np.ndarray) # mypy fix for gmap.shape - if gmap.shape != s.shape: - raise ValueError( - "supplied 'gmap' is not right shape for data over " - f"selected 'axis': got {gmap.shape}, " - f"expected {s.shape}" - ) + if gmap.shape != s.shape: # check valid input + raise ValueError( + "supplied 'gmap' is not right shape for data over " + f"selected 'axis': got {gmap.shape}, " + f"expected {s.shape}" + ) + with _mpl(Styler.background_gradient) as (plt, colors): smin = np.nanmin(gmap) if vmin is None else vmin smax = np.nanmax(gmap) if vmax is None else vmax diff --git a/pandas/tests/io/formats/style/test_matplotlib.py b/pandas/tests/io/formats/style/test_matplotlib.py index 7be07a1516b2a..1d5582538e41f 100644 --- a/pandas/tests/io/formats/style/test_matplotlib.py +++ b/pandas/tests/io/formats/style/test_matplotlib.py @@ -131,28 +131,89 @@ def test_background_gradient_int64(self): (1, 1): [("background-color", "#023858"), ("color", "#f1f1f1")], }, ), - ( - None, - DataFrame([[2, 1], [1, 2]]), - { - (0, 0): [("background-color", "#023858"), ("color", "#f1f1f1")], - (1, 0): [("background-color", "#fff7fb"), ("color", "#000000")], - (0, 1): [("background-color", "#fff7fb"), ("color", "#000000")], - (1, 1): [("background-color", "#023858"), ("color", "#f1f1f1")], - }, - ), ], ) - def test_background_gradient_gmap(self, axis, gmap, expected): - df = DataFrame([[1, 2], [2, 1]]) + def test_background_gradient_gmap_array(self, axis, gmap, expected): + # tests when gmap is given as a sequence and converted to ndarray + df = DataFrame([[0, 0], [0, 0]]) result = df.style.background_gradient(axis=axis, gmap=gmap)._compute().ctx assert result == expected @pytest.mark.parametrize( "gmap, axis", [([1, 2, 3], 0), ([1, 2], 1), (np.array([[1, 2], [1, 2]]), None)] ) - def test_background_gradient_gmap_raises(self, gmap, axis): - df = DataFrame([[1, 2, 3], [1, 2, 3]]) + def test_background_gradient_gmap_array_raises(self, gmap, axis): + # test when gmap as converted ndarray is bad shape + df = DataFrame([[0, 0, 0], [0, 0, 0]]) msg = "supplied 'gmap' is not right shape" with pytest.raises(ValueError, match=msg): df.style.background_gradient(gmap=gmap, axis=axis)._compute() + + @pytest.mark.parametrize( + "gmap", + [ + DataFrame( # reverse the columns + [[2, 1], [1, 2]], columns=["B", "A"], index=["X", "Y"] + ), + DataFrame( # reverse the index + [[2, 1], [1, 2]], columns=["A", "B"], index=["Y", "X"] + ), + DataFrame( # reverse the index and columns + [[1, 2], [2, 1]], columns=["B", "A"], index=["Y", "X"] + ), + DataFrame( # add unnecessary columns + [[1, 2, 3], [2, 1, 3]], columns=["A", "B", "C"], index=["X", "Y"] + ), + DataFrame( # add unnecessary index + [[1, 2], [2, 1], [3, 3]], columns=["A", "B"], index=["X", "Y", "Z"] + ), + ], + ) + @pytest.mark.parametrize( + "subset, exp_gmap", # exp_gmap is underlying map DataFrame should conform to + [ + (None, [[1, 2], [2, 1]]), + (["A"], [[1], [2]]), # slice only column "A" in data and gmap + (["B", "A"], [[2, 1], [1, 2]]), # reverse the columns in data + (IndexSlice["X", :], [[1, 2]]), # slice only index "X" in data and gmap + (IndexSlice[["Y", "X"], :], [[2, 1], [1, 2]]), # reverse the index in data + ], + ) + def test_background_gradient_gmap_dataframe_align(self, gmap, subset, exp_gmap): + # test gmap given as DataFrame that it aligns to the the data including subset + df = DataFrame([[0, 0], [0, 0]], columns=["A", "B"], index=["X", "Y"]) + + expected = df.style.background_gradient(axis=None, gmap=exp_gmap, subset=subset) + result = df.style.background_gradient(axis=None, gmap=gmap, subset=subset) + assert expected._compute().ctx == result._compute().ctx + + @pytest.mark.parametrize( + "gmap, axis, exp_gmap", + [ + (Series([2, 1], index=["Y", "X"]), 0, [[1, 1], [2, 2]]), # revrse the index + (Series([2, 1], index=["B", "A"]), 1, [[1, 2], [1, 2]]), # revrse the cols + (Series([1, 2, 3], index=["X", "Y", "Z"]), 0, [[1, 1], [2, 2]]), # add idx + (Series([1, 2, 3], index=["A", "B", "C"]), 1, [[1, 2], [1, 2]]), # add col + ], + ) + def test_background_gradient_gmap_series_align(self, gmap, axis, exp_gmap): + # test gmap given as Series that it aligns to the the data including subset + df = DataFrame([[0, 0], [0, 0]], columns=["A", "B"], index=["X", "Y"]) + + expected = df.style.background_gradient(axis=None, gmap=exp_gmap)._compute() + result = df.style.background_gradient(axis=axis, gmap=gmap)._compute() + assert expected.ctx == result.ctx + + def test_background_gradient_gmap_dataframe_raises(self): + df = DataFrame([[0, 0, 0], [0, 0, 0]], columns=["A", "B", "C"]) + + msg = "`gmap` as DataFrame must contain at least the columns" + gmap = DataFrame([[1, 2, 3], [1, 2, 3]], columns=["A", "B", "X"]) + with pytest.raises(KeyError, match=msg): + df.style.background_gradient(gmap=gmap, axis=None)._compute() + + msg = "`gmap` as DataFrame can only be used with `axis` is `None`" + with pytest.raises(ValueError, match=msg): + df.style.background_gradient(gmap=gmap, axis=1)._compute() + with pytest.raises(ValueError, match=msg): + df.style.background_gradient(gmap=gmap, axis=0)._compute() From 0ec339e193eed140c1982dff1d497580749c9f68 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Tue, 9 Mar 2021 11:23:46 +0100 Subject: [PATCH 14/26] align DataFrame and Series gmap with underlying data --- pandas/io/formats/style.py | 19 +------------------ .../tests/io/formats/style/test_matplotlib.py | 14 -------------- 2 files changed, 1 insertion(+), 32 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 5da79e7a66164..437542d05d8f2 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -1494,23 +1494,6 @@ def background_gradient( if subset is None and gmap is None: subset = self.data.select_dtypes(include=np.number).columns - if isinstance(gmap, DataFrame): # will align columns - if axis is None: - subset = slice(None) if subset is None else subset - subset = _non_reducing_slice(subset) - data_ = self.data.loc[subset] - try: - gmap = gmap.loc[:, data_.columns] - except KeyError as e: - raise KeyError( - f"`gmap` as DataFrame must contain at least the columns in the " - f"underlying data. {str(e)}" - ) - else: - raise ValueError( - "`gmap` as DataFrame can only be used with `axis` is `None`" - ) - self.apply( self._background_gradient, cmap=cmap, @@ -1543,7 +1526,7 @@ def _background_gradient( gmap = s.to_numpy(dtype=float) else: # gmap is conformed to the data shape if isinstance(gmap, (Series, DataFrame)): - gmap = gmap.reindex_like(s, method=None).to_numpy() # align indexes + gmap = gmap.reindex_like(s, method=None).to_numpy() # align indx / cols else: gmap = np.asarray(gmap, dtype=float) if gmap.shape != s.shape: # check valid input diff --git a/pandas/tests/io/formats/style/test_matplotlib.py b/pandas/tests/io/formats/style/test_matplotlib.py index 1d5582538e41f..370d423ab82e8 100644 --- a/pandas/tests/io/formats/style/test_matplotlib.py +++ b/pandas/tests/io/formats/style/test_matplotlib.py @@ -203,17 +203,3 @@ def test_background_gradient_gmap_series_align(self, gmap, axis, exp_gmap): expected = df.style.background_gradient(axis=None, gmap=exp_gmap)._compute() result = df.style.background_gradient(axis=axis, gmap=gmap)._compute() assert expected.ctx == result.ctx - - def test_background_gradient_gmap_dataframe_raises(self): - df = DataFrame([[0, 0, 0], [0, 0, 0]], columns=["A", "B", "C"]) - - msg = "`gmap` as DataFrame must contain at least the columns" - gmap = DataFrame([[1, 2, 3], [1, 2, 3]], columns=["A", "B", "X"]) - with pytest.raises(KeyError, match=msg): - df.style.background_gradient(gmap=gmap, axis=None)._compute() - - msg = "`gmap` as DataFrame can only be used with `axis` is `None`" - with pytest.raises(ValueError, match=msg): - df.style.background_gradient(gmap=gmap, axis=1)._compute() - with pytest.raises(ValueError, match=msg): - df.style.background_gradient(gmap=gmap, axis=0)._compute() From b6c2fdae3945f8910df854be7007a28c5baac552 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Tue, 9 Mar 2021 11:25:50 +0100 Subject: [PATCH 15/26] align DataFrame and Series gmap with underlying data --- pandas/io/formats/style.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 437542d05d8f2..86e5c49174c25 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -1525,8 +1525,8 @@ def _background_gradient( if gmap is None: # the data is used the gmap gmap = s.to_numpy(dtype=float) else: # gmap is conformed to the data shape - if isinstance(gmap, (Series, DataFrame)): - gmap = gmap.reindex_like(s, method=None).to_numpy() # align indx / cols + if isinstance(gmap, (Series, DataFrame)): # align indx / cols + gmap = gmap.reindex_like(s, method=None).to_numpy(dtype=float) else: gmap = np.asarray(gmap, dtype=float) if gmap.shape != s.shape: # check valid input From bd025b87cc7e6c461ec3e20fe73e1287770534cb Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Tue, 9 Mar 2021 13:49:33 +0100 Subject: [PATCH 16/26] mypy fixup --- pandas/io/formats/style.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 86e5c49174c25..33e42659af269 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -1529,6 +1529,7 @@ def _background_gradient( gmap = gmap.reindex_like(s, method=None).to_numpy(dtype=float) else: gmap = np.asarray(gmap, dtype=float) + assert isinstance(gmap, np.ndarray) # mypy requirement if gmap.shape != s.shape: # check valid input raise ValueError( "supplied 'gmap' is not right shape for data over " From 277e2d74a047359eb4898491626475e4fa625466 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Wed, 10 Mar 2021 07:29:40 +0100 Subject: [PATCH 17/26] make elif --- pandas/io/formats/style.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 33e42659af269..ec12aa5ab03bc 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -1524,18 +1524,17 @@ def _background_gradient( """ if gmap is None: # the data is used the gmap gmap = s.to_numpy(dtype=float) - else: # gmap is conformed to the data shape - if isinstance(gmap, (Series, DataFrame)): # align indx / cols - gmap = gmap.reindex_like(s, method=None).to_numpy(dtype=float) - else: - gmap = np.asarray(gmap, dtype=float) - assert isinstance(gmap, np.ndarray) # mypy requirement - if gmap.shape != s.shape: # check valid input - raise ValueError( - "supplied 'gmap' is not right shape for data over " - f"selected 'axis': got {gmap.shape}, " - f"expected {s.shape}" - ) + elif isinstance(gmap, (Series, DataFrame)): # align indx / cols to data + gmap = gmap.reindex_like(s, method=None).to_numpy(dtype=float) + else: + gmap = np.asarray(gmap, dtype=float) + assert isinstance(gmap, np.ndarray) # mypy requirement + if gmap.shape != s.shape: # check valid input + raise ValueError( + "supplied 'gmap' is not right shape for data over " + f"selected 'axis': got {gmap.shape}, " + f"expected {s.shape}" + ) with _mpl(Styler.background_gradient) as (plt, colors): smin = np.nanmin(gmap) if vmin is None else vmin From 7141e4cbc2521946bb2c5e6ae6231181eb4313f7 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Wed, 10 Mar 2021 07:40:18 +0100 Subject: [PATCH 18/26] add tests --- pandas/tests/io/formats/style/test_matplotlib.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pandas/tests/io/formats/style/test_matplotlib.py b/pandas/tests/io/formats/style/test_matplotlib.py index 370d423ab82e8..b5c0a8bdeef1f 100644 --- a/pandas/tests/io/formats/style/test_matplotlib.py +++ b/pandas/tests/io/formats/style/test_matplotlib.py @@ -203,3 +203,18 @@ def test_background_gradient_gmap_series_align(self, gmap, axis, exp_gmap): expected = df.style.background_gradient(axis=None, gmap=exp_gmap)._compute() result = df.style.background_gradient(axis=axis, gmap=gmap)._compute() assert expected.ctx == result.ctx + + @pytest.mark.parametrize( + "gmap, axis", + [ + (DataFrame([[1, 2], [2, 1]], columns=["A", "B"], index=["X", "Y"]), 1), + (DataFrame([[1, 2], [2, 1]], columns=["A", "B"], index=["X", "Y"]), 0), + (Series([1, 2], index=["X", "Y"]), None), + (Series([1, 2], index=["X", "Y"]), 1), + (Series([1, 2], index=["A", "B"]), 0), + ], + ) + def test_background_gradient_gmap_wrong_series_dataframe(self, gmap, axis): + # test giving a gmap in DataFrame or Series form but with wrong axis + df = DataFrame([[0, 0], [0, 0]], columns=["A", "B"], index=["X", "Y"]) + df.style.background_gradient(axis=axis, gmap=gmap)._compute() From 439a8535b39700773ba3a72af32a0204ebe5f034 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Wed, 17 Mar 2021 15:50:12 +0100 Subject: [PATCH 19/26] add arg shape validation function --- pandas/io/formats/style.py | 71 ++++++++++++++++--- .../tests/io/formats/style/test_matplotlib.py | 19 +++-- 2 files changed, 73 insertions(+), 17 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index ec12aa5ab03bc..41d85695255cc 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -1505,6 +1505,7 @@ def background_gradient( vmin=vmin, vmax=vmax, gmap=gmap, + axis_=axis, ) return self @@ -1518,23 +1519,15 @@ def _background_gradient( vmin: Optional[float] = None, vmax: Optional[float] = None, gmap: Optional[Sequence] = None, + axis_: Optional[Axis] = None, ): """ Color background in a range according to the data or a gradient map """ if gmap is None: # the data is used the gmap gmap = s.to_numpy(dtype=float) - elif isinstance(gmap, (Series, DataFrame)): # align indx / cols to data - gmap = gmap.reindex_like(s, method=None).to_numpy(dtype=float) - else: - gmap = np.asarray(gmap, dtype=float) - assert isinstance(gmap, np.ndarray) # mypy requirement - if gmap.shape != s.shape: # check valid input - raise ValueError( - "supplied 'gmap' is not right shape for data over " - f"selected 'axis': got {gmap.shape}, " - f"expected {s.shape}" - ) + else: # else validate gmap against the underlying data + gmap = _validate_apply_axis_arg(gmap, "gmap", float, axis_, s) with _mpl(Styler.background_gradient) as (plt, colors): smin = np.nanmin(gmap) if vmin is None else vmin @@ -2323,3 +2316,59 @@ def pred(part) -> bool: else: slice_ = [part if pred(part) else [part] for part in slice_] return tuple(slice_) + + +def _validate_apply_axis_arg( + arg: Union[FrameOrSeries, Sequence], + arg_name: str, + dtype: Optional[Any], + axis: Optional[Axis], + data: FrameOrSeries, +) -> np.ndarray: + """ + For the apply-type methods, ``axis=None`` creates ``data`` as DataFrame, and for + ``axis=[1,0]`` it creates a Series. Where ``arg`` is expected as an element + of some operator with ``data`` we must make sure that the two are compatible shapes, + or raise. + + Parameters + ---------- + arg : sequence, Series or DataFrame + the user input arg + arg_name : string + name of the arg for use in error messages + dtype : numpy dtype, optional + forced numpy dtype if given + axis : {0,1, None} + axis over which apply-type method is used + data : Series or DataFrame + underling subset of Styler data on which operations are performed + + Returns + ------- + ndarray + """ + dtype = {"dtype": dtype} if dtype else {} + # raise if input is wrong for axis: + if isinstance(arg, Series) and axis is None: + raise ValueError( + f"'{arg_name}' is a Series but underlying data for operations " + f"is a DataFrame since 'axis=None'" + ) + elif isinstance(arg, DataFrame) and axis in [0, 1]: + raise ValueError( + f"'{arg_name}' is a DataFrame but underlying data for " + f"operations is a Series with 'axis={axis}'" + ) + elif isinstance(arg, (Series, DataFrame)): # align indx / cols to data + arg = arg.reindex_like(data, method=None).to_numpy(**dtype) + else: + arg = np.asarray(arg, **dtype) + assert isinstance(arg, np.ndarray) # mypy requirement + if arg.shape != data.shape: # check valid input + raise ValueError( + f"supplied '{arg_name}' is not right shape for data over " + f"selected 'axis': got {arg.shape}, " + f"expected {data.shape}" + ) + return arg diff --git a/pandas/tests/io/formats/style/test_matplotlib.py b/pandas/tests/io/formats/style/test_matplotlib.py index b5c0a8bdeef1f..b0ca555e0cdd2 100644 --- a/pandas/tests/io/formats/style/test_matplotlib.py +++ b/pandas/tests/io/formats/style/test_matplotlib.py @@ -209,12 +209,19 @@ def test_background_gradient_gmap_series_align(self, gmap, axis, exp_gmap): [ (DataFrame([[1, 2], [2, 1]], columns=["A", "B"], index=["X", "Y"]), 1), (DataFrame([[1, 2], [2, 1]], columns=["A", "B"], index=["X", "Y"]), 0), - (Series([1, 2], index=["X", "Y"]), None), - (Series([1, 2], index=["X", "Y"]), 1), - (Series([1, 2], index=["A", "B"]), 0), ], ) - def test_background_gradient_gmap_wrong_series_dataframe(self, gmap, axis): - # test giving a gmap in DataFrame or Series form but with wrong axis + def test_background_gradient_gmap_wrong_dataframe(self, gmap, axis): + # test giving a gmap in DataFrame but with wrong axis df = DataFrame([[0, 0], [0, 0]], columns=["A", "B"], index=["X", "Y"]) - df.style.background_gradient(axis=axis, gmap=gmap)._compute() + msg = "'gmap' is a DataFrame but underlying data for operations is a Series" + with pytest.raises(ValueError, match=msg): + df.style.background_gradient(gmap=gmap, axis=axis)._compute() + + def test_background_gradient_gmap_wrong_series(self): + # test giving a gmap in Series form but with wrong axis + df = DataFrame([[0, 0], [0, 0]], columns=["A", "B"], index=["X", "Y"]) + msg = "'gmap' is a Series but underlying data for operations is a DataFrame" + gmap = Series([1, 2], index=["X", "Y"]) + with pytest.raises(ValueError, match=msg): + df.style.background_gradient(gmap=gmap, axis=None)._compute() From 88117eb419a67b8615326993e121f9422d389c15 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Wed, 17 Mar 2021 16:07:47 +0100 Subject: [PATCH 20/26] change 'right' text to 'correct' to avoid ambiguity --- pandas/io/formats/style.py | 2 +- pandas/tests/io/formats/style/test_matplotlib.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 41d85695255cc..9ee8e41cb3383 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -2367,7 +2367,7 @@ def _validate_apply_axis_arg( assert isinstance(arg, np.ndarray) # mypy requirement if arg.shape != data.shape: # check valid input raise ValueError( - f"supplied '{arg_name}' is not right shape for data over " + f"supplied '{arg_name}' is not correct shape for data over " f"selected 'axis': got {arg.shape}, " f"expected {data.shape}" ) diff --git a/pandas/tests/io/formats/style/test_matplotlib.py b/pandas/tests/io/formats/style/test_matplotlib.py index b0ca555e0cdd2..f0158711664ce 100644 --- a/pandas/tests/io/formats/style/test_matplotlib.py +++ b/pandas/tests/io/formats/style/test_matplotlib.py @@ -145,7 +145,7 @@ def test_background_gradient_gmap_array(self, axis, gmap, expected): def test_background_gradient_gmap_array_raises(self, gmap, axis): # test when gmap as converted ndarray is bad shape df = DataFrame([[0, 0, 0], [0, 0, 0]]) - msg = "supplied 'gmap' is not right shape" + msg = "supplied 'gmap' is not correct shape" with pytest.raises(ValueError, match=msg): df.style.background_gradient(gmap=gmap, axis=axis)._compute() From b7b71792d4094368ca54ce275ad6e3aae0861380 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Wed, 17 Mar 2021 20:04:50 +0100 Subject: [PATCH 21/26] mypy updates --- pandas/io/formats/style.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 78eddc1e08e6c..8d52609fe0e3b 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -1520,7 +1520,7 @@ def _background_gradient( text_color_threshold: float = 0.408, vmin: Optional[float] = None, vmax: Optional[float] = None, - gmap: Optional[Sequence] = None, + gmap: Optional[Union[Sequence, np.ndarray, FrameOrSeries]] = None, axis_: Optional[Axis] = None, ): """ @@ -2332,7 +2332,7 @@ def pred(part) -> bool: def _validate_apply_axis_arg( - arg: Union[FrameOrSeries, Sequence], + arg: Union[FrameOrSeries, Sequence, np.ndarray], arg_name: str, dtype: Optional[Any], axis: Optional[Axis], From 8db2b50a80e5773d1e16ca7417491c91d68ecf36 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Sun, 28 Mar 2021 23:32:03 +0200 Subject: [PATCH 22/26] partial out axis_ --- pandas/io/formats/style.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 2fb76c919062d..0b4e7a3b5c211 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -1563,7 +1563,7 @@ def background_gradient( subset = self.data.select_dtypes(include=np.number).columns self.apply( - self._background_gradient, + partial(self._background_gradient, axis=axis), cmap=cmap, subset=subset, axis=axis, @@ -1573,7 +1573,6 @@ def background_gradient( vmin=vmin, vmax=vmax, gmap=gmap, - axis_=axis, ) return self @@ -1587,7 +1586,7 @@ def _background_gradient( vmin: Optional[float] = None, vmax: Optional[float] = None, gmap: Optional[Union[Sequence, np.ndarray, FrameOrSeries]] = None, - axis_: Optional[Axis] = None, + axis: Optional[Axis] = None, ): """ Color background in a range according to the data or a gradient map @@ -1595,7 +1594,7 @@ def _background_gradient( if gmap is None: # the data is used the gmap gmap = s.to_numpy(dtype=float) else: # else validate gmap against the underlying data - gmap = _validate_apply_axis_arg(gmap, "gmap", float, axis_, s) + gmap = _validate_apply_axis_arg(gmap, "gmap", float, axis, s) with _mpl(Styler.background_gradient) as (plt, colors): smin = np.nanmin(gmap) if vmin is None else vmin From 36124311879d1152acad53d63afa26c4fb2cdcbf Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Mon, 29 Mar 2021 16:29:19 +0200 Subject: [PATCH 23/26] remove axis and remove staticmethod --- pandas/io/formats/style.py | 138 ++++++++++++++++++------------------- 1 file changed, 67 insertions(+), 71 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 0b4e7a3b5c211..562aa7a183459 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -1563,7 +1563,7 @@ def background_gradient( subset = self.data.select_dtypes(include=np.number).columns self.apply( - partial(self._background_gradient, axis=axis), + _background_gradient, cmap=cmap, subset=subset, axis=axis, @@ -1576,70 +1576,6 @@ def background_gradient( ) return self - @staticmethod - def _background_gradient( - s, - cmap="PuBu", - low: float = 0, - high: float = 0, - text_color_threshold: float = 0.408, - vmin: Optional[float] = None, - vmax: Optional[float] = None, - gmap: Optional[Union[Sequence, np.ndarray, FrameOrSeries]] = None, - axis: Optional[Axis] = None, - ): - """ - Color background in a range according to the data or a gradient map - """ - if gmap is None: # the data is used the gmap - gmap = s.to_numpy(dtype=float) - else: # else validate gmap against the underlying data - gmap = _validate_apply_axis_arg(gmap, "gmap", float, axis, s) - - with _mpl(Styler.background_gradient) as (plt, colors): - smin = np.nanmin(gmap) if vmin is None else vmin - smax = np.nanmax(gmap) if vmax is None else vmax - rng = smax - smin - # extend lower / upper bounds, compresses color range - norm = colors.Normalize(smin - (rng * low), smax + (rng * high)) - rgbas = plt.cm.get_cmap(cmap)(norm(gmap)) - - def relative_luminance(rgba) -> float: - """ - Calculate relative luminance of a color. - - The calculation adheres to the W3C standards - (https://www.w3.org/WAI/GL/wiki/Relative_luminance) - - Parameters - ---------- - color : rgb or rgba tuple - - Returns - ------- - float - The relative luminance as a value from 0 to 1 - """ - r, g, b = ( - x / 12.92 if x <= 0.04045 else ((x + 0.055) / 1.055) ** 2.4 - for x in rgba[:3] - ) - return 0.2126 * r + 0.7152 * g + 0.0722 * b - - def css(rgba) -> str: - dark = relative_luminance(rgba) < text_color_threshold - text_color = "#f1f1f1" if dark else "#000000" - return f"background-color: {colors.rgb2hex(rgba)};color: {text_color};" - - if s.ndim == 1: - return [css(rgba) for rgba in rgbas] - else: - return pd.DataFrame( - [[css(rgba) for rgba in row] for row in rgbas], - index=s.index, - columns=s.columns, - ) - def set_properties(self, subset=None, **kwargs) -> Styler: """ Set defined CSS-properties to each ```` HTML element within the given @@ -2416,7 +2352,6 @@ def _validate_apply_axis_arg( arg: Union[FrameOrSeries, Sequence, np.ndarray], arg_name: str, dtype: Optional[Any], - axis: Optional[Axis], data: FrameOrSeries, ) -> np.ndarray: """ @@ -2433,8 +2368,6 @@ def _validate_apply_axis_arg( name of the arg for use in error messages dtype : numpy dtype, optional forced numpy dtype if given - axis : {0,1, None} - axis over which apply-type method is used data : Series or DataFrame underling subset of Styler data on which operations are performed @@ -2444,15 +2377,15 @@ def _validate_apply_axis_arg( """ dtype = {"dtype": dtype} if dtype else {} # raise if input is wrong for axis: - if isinstance(arg, Series) and axis is None: + if isinstance(arg, Series) and isinstance(data, DataFrame): raise ValueError( f"'{arg_name}' is a Series but underlying data for operations " f"is a DataFrame since 'axis=None'" ) - elif isinstance(arg, DataFrame) and axis in [0, 1]: + elif isinstance(arg, DataFrame) and isinstance(data, Series): raise ValueError( f"'{arg_name}' is a DataFrame but underlying data for " - f"operations is a Series with 'axis={axis}'" + f"operations is a Series with 'axis in [0,1]'" ) elif isinstance(arg, (Series, DataFrame)): # align indx / cols to data arg = arg.reindex_like(data, method=None).to_numpy(**dtype) @@ -2466,3 +2399,66 @@ def _validate_apply_axis_arg( f"expected {data.shape}" ) return arg + + +def _background_gradient( + data, + cmap="PuBu", + low: float = 0, + high: float = 0, + text_color_threshold: float = 0.408, + vmin: Optional[float] = None, + vmax: Optional[float] = None, + gmap: Optional[Union[Sequence, np.ndarray, FrameOrSeries]] = None, +): + """ + Color background in a range according to the data or a gradient map + """ + if gmap is None: # the data is used the gmap + gmap = data.to_numpy(dtype=float) + else: # else validate gmap against the underlying data + gmap = _validate_apply_axis_arg(gmap, "gmap", float, data) + + with _mpl(Styler.background_gradient) as (plt, colors): + smin = np.nanmin(gmap) if vmin is None else vmin + smax = np.nanmax(gmap) if vmax is None else vmax + rng = smax - smin + # extend lower / upper bounds, compresses color range + norm = colors.Normalize(smin - (rng * low), smax + (rng * high)) + rgbas = plt.cm.get_cmap(cmap)(norm(gmap)) + + def relative_luminance(rgba) -> float: + """ + Calculate relative luminance of a color. + + The calculation adheres to the W3C standards + (https://www.w3.org/WAI/GL/wiki/Relative_luminance) + + Parameters + ---------- + color : rgb or rgba tuple + + Returns + ------- + float + The relative luminance as a value from 0 to 1 + """ + r, g, b = ( + x / 12.92 if x <= 0.04045 else ((x + 0.055) / 1.055) ** 2.4 + for x in rgba[:3] + ) + return 0.2126 * r + 0.7152 * g + 0.0722 * b + + def css(rgba) -> str: + dark = relative_luminance(rgba) < text_color_threshold + text_color = "#f1f1f1" if dark else "#000000" + return f"background-color: {colors.rgb2hex(rgba)};color: {text_color};" + + if data.ndim == 1: + return [css(rgba) for rgba in rgbas] + else: + return pd.DataFrame( + [[css(rgba) for rgba in row] for row in rgbas], + index=data.index, + columns=data.columns, + ) From aad0fa9b45cd632c2fc2979307c604ec2280e007 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Fri, 2 Apr 2021 20:08:13 +0200 Subject: [PATCH 24/26] Merge remote-tracking branch 'upstream/master' into background_gradient_gmap # Conflicts: # pandas/io/formats/style.py --- .pre-commit-config.yaml | 12 +- asv_bench/benchmarks/arithmetic.py | 20 +- asv_bench/benchmarks/sparse.py | 4 +- codecov.yml | 2 +- doc/source/ecosystem.rst | 23 +- doc/source/whatsnew/v1.2.4.rst | 1 + environment.yml | 1 + pandas/_libs/algos.pyx | 136 +++---- pandas/_libs/groupby.pyi | 168 ++++++++ pandas/_libs/groupby.pyx | 127 +++--- pandas/_libs/hashtable_class_helper.pxi.in | 30 +- pandas/_libs/index.pyi | 86 +++++ pandas/_libs/index.pyx | 25 +- pandas/_libs/internals.pyx | 2 +- pandas/_libs/lib.pyx | 5 +- pandas/_libs/ops.pyx | 2 +- pandas/_libs/reshape.pyx | 6 +- pandas/_libs/tslibs/conversion.pyi | 41 ++ pandas/_libs/tslibs/conversion.pyx | 4 +- pandas/_libs/tslibs/dtypes.pyi | 65 ++++ pandas/_libs/tslibs/fields.pyi | 69 ++++ pandas/_libs/tslibs/fields.pyx | 6 +- pandas/_libs/tslibs/nattype.pyx | 4 + pandas/_libs/tslibs/timedeltas.pyx | 10 +- pandas/_testing/__init__.py | 16 +- pandas/conftest.py | 36 +- pandas/core/aggregation.py | 2 +- pandas/core/algorithms.py | 14 +- pandas/core/array_algos/take.py | 2 +- pandas/core/arrays/_mixins.py | 5 +- pandas/core/arrays/categorical.py | 20 +- pandas/core/arrays/datetimelike.py | 30 +- pandas/core/arrays/datetimes.py | 47 ++- pandas/core/arrays/numpy_.py | 80 ++-- pandas/core/arrays/period.py | 18 +- pandas/core/arrays/string_.py | 7 +- pandas/core/arrays/string_arrow.py | 6 +- pandas/core/arrays/timedeltas.py | 42 +- pandas/core/base.py | 2 +- pandas/core/common.py | 2 +- pandas/core/computation/expressions.py | 19 +- pandas/core/dtypes/cast.py | 58 ++- pandas/core/dtypes/common.py | 4 +- pandas/core/frame.py | 63 ++- pandas/core/generic.py | 22 +- pandas/core/groupby/categorical.py | 8 +- pandas/core/groupby/groupby.py | 6 +- pandas/core/groupby/grouper.py | 4 +- pandas/core/groupby/ops.py | 363 +++++++++--------- pandas/core/indexes/base.py | 37 +- pandas/core/indexes/datetimelike.py | 3 +- pandas/core/indexes/multi.py | 9 +- pandas/core/indexes/range.py | 23 +- pandas/core/indexing.py | 4 +- pandas/core/internals/__init__.py | 4 - pandas/core/internals/array_manager.py | 2 +- pandas/core/internals/blocks.py | 140 +++---- pandas/core/internals/concat.py | 14 +- pandas/core/internals/construction.py | 34 +- pandas/core/internals/managers.py | 93 +++-- pandas/core/internals/ops.py | 4 +- pandas/core/missing.py | 2 +- pandas/core/ops/array_ops.py | 12 +- pandas/core/resample.py | 2 +- pandas/core/reshape/pivot.py | 2 +- pandas/core/series.py | 8 +- pandas/core/sorting.py | 2 +- pandas/core/strings/accessor.py | 4 +- pandas/core/tools/datetimes.py | 8 +- pandas/core/window/rolling.py | 2 +- pandas/io/clipboards.py | 2 +- pandas/io/common.py | 4 +- pandas/io/excel/_odfreader.py | 6 +- pandas/io/excel/_openpyxl.py | 2 +- pandas/io/excel/_xlrd.py | 2 +- pandas/io/feather_format.py | 2 +- pandas/io/formats/excel.py | 24 +- pandas/io/formats/printing.py | 2 +- pandas/io/formats/xml.py | 4 +- pandas/io/json/_table_schema.py | 2 +- pandas/io/parsers/readers.py | 2 +- pandas/io/pytables.py | 6 +- pandas/io/sas/sas7bdat.py | 4 +- pandas/io/sas/sas_xport.py | 8 +- pandas/io/sql.py | 16 +- pandas/plotting/_matplotlib/boxplot.py | 2 +- pandas/plotting/_matplotlib/compat.py | 1 + pandas/plotting/_matplotlib/converter.py | 2 +- pandas/plotting/_matplotlib/tools.py | 7 +- pandas/tests/arithmetic/test_numeric.py | 10 +- pandas/tests/arrays/test_datetimelike.py | 4 +- pandas/tests/dtypes/test_common.py | 5 +- pandas/tests/extension/decimal/array.py | 2 +- .../tests/extension/decimal/test_decimal.py | 13 +- pandas/tests/extension/json/array.py | 3 +- pandas/tests/extension/test_numpy.py | 13 +- pandas/tests/extension/test_sparse.py | 2 +- .../frame/constructors/test_from_records.py | 2 +- .../tests/frame/methods/test_combine_first.py | 10 +- pandas/tests/frame/methods/test_quantile.py | 2 +- pandas/tests/frame/methods/test_to_csv.py | 5 +- .../frame/methods/test_to_dict_of_blocks.py | 4 +- pandas/tests/frame/test_constructors.py | 101 ++++- pandas/tests/frame/test_reductions.py | 8 +- pandas/tests/groupby/test_libgroupby.py | 7 +- pandas/tests/groupby/test_rank.py | 25 ++ .../indexes/categorical/test_indexing.py | 4 +- pandas/tests/indexes/common.py | 4 +- .../tests/indexes/datetimes/test_indexing.py | 2 +- pandas/tests/indexes/test_numpy_compat.py | 5 +- pandas/tests/indexing/test_loc.py | 32 ++ pandas/tests/indexing/test_partial.py | 3 +- pandas/tests/internals/test_api.py | 2 - pandas/tests/internals/test_internals.py | 18 + pandas/tests/io/formats/style/test_style.py | 5 + pandas/tests/io/sas/test_sas7bdat.py | 6 +- pandas/tests/plotting/frame/test_frame.py | 11 +- pandas/tests/plotting/test_datetimelike.py | 8 +- pandas/tests/resample/test_datetime_index.py | 2 +- pandas/tests/scalar/period/test_period.py | 2 +- pandas/tests/scalar/test_nat.py | 41 ++ pandas/tests/series/methods/test_dropna.py | 4 +- pandas/tests/series/methods/test_fillna.py | 4 +- pandas/tests/series/test_constructors.py | 7 +- pandas/tests/tools/test_to_numeric.py | 4 +- pandas/tseries/frequencies.py | 8 +- requirements-dev.txt | 1 + ...check_for_inconsistent_pandas_namespace.py | 71 ++-- .../test_inconsistent_namespace_check.py | 61 ++- scripts/tests/test_use_pd_array_in_core.py | 26 ++ scripts/use_pd_array_in_core.py | 77 ++++ scripts/validate_docstrings.py | 2 +- setup.cfg | 12 +- web/pandas/community/ecosystem.md | 21 +- 134 files changed, 1942 insertions(+), 949 deletions(-) create mode 100644 pandas/_libs/groupby.pyi create mode 100644 pandas/_libs/index.pyi create mode 100644 pandas/_libs/tslibs/conversion.pyi create mode 100644 pandas/_libs/tslibs/dtypes.pyi create mode 100644 pandas/_libs/tslibs/fields.pyi create mode 100644 scripts/tests/test_use_pd_array_in_core.py create mode 100644 scripts/use_pd_array_in_core.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5bfceec6605c0..c63f50b3c1421 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -36,7 +36,7 @@ repos: rev: 3.9.0 hooks: - id: flake8 - additional_dependencies: [flake8-comprehensions>=3.1.0] + additional_dependencies: [flake8-comprehensions>=3.1.0, flake8-bugbear>=21.3.2] - id: flake8 name: flake8 (cython) types: [cython] @@ -86,11 +86,10 @@ repos: types: [python] exclude: ^pandas/_typing\.py$ - id: inconsistent-namespace-usage - name: 'Check for inconsistent use of pandas namespace in tests' + name: 'Check for inconsistent use of pandas namespace' entry: python scripts/check_for_inconsistent_pandas_namespace.py language: python types: [python] - files: ^pandas/tests/ - id: incorrect-code-directives name: Check for incorrect code block or IPython directives language: pygrep @@ -213,3 +212,10 @@ repos: |\#\ type:\s?ignore(?!\[) language: pygrep types: [python] + - id: use-pd_array-in-core + name: Import pandas.array as pd_array in core + language: python + entry: python scripts/use_pd_array_in_core.py + files: ^pandas/core/ + exclude: ^pandas/core/api\.py$ + types: [python] diff --git a/asv_bench/benchmarks/arithmetic.py b/asv_bench/benchmarks/arithmetic.py index 488237a6f5a8b..bfb1be8705495 100644 --- a/asv_bench/benchmarks/arithmetic.py +++ b/asv_bench/benchmarks/arithmetic.py @@ -140,9 +140,7 @@ def setup(self, op, shape): # construct dataframe with 2 blocks arr1 = np.random.randn(n_rows, n_cols // 2).astype("f8") arr2 = np.random.randn(n_rows, n_cols // 2).astype("f4") - df = pd.concat( - [pd.DataFrame(arr1), pd.DataFrame(arr2)], axis=1, ignore_index=True - ) + df = pd.concat([DataFrame(arr1), DataFrame(arr2)], axis=1, ignore_index=True) # should already be the case, but just to be sure df._consolidate_inplace() @@ -151,7 +149,7 @@ def setup(self, op, shape): arr2 = np.random.randn(n_rows, n_cols // 2).astype("i8") arr3 = np.random.randn(n_rows, n_cols // 4).astype("f8") df2 = pd.concat( - [pd.DataFrame(arr1), pd.DataFrame(arr2), pd.DataFrame(arr3)], + [DataFrame(arr1), DataFrame(arr2), DataFrame(arr3)], axis=1, ignore_index=True, ) @@ -459,9 +457,9 @@ class OffsetArrayArithmetic: def setup(self, offset): N = 10000 - rng = pd.date_range(start="1/1/2000", periods=N, freq="T") + rng = date_range(start="1/1/2000", periods=N, freq="T") self.rng = rng - self.ser = pd.Series(rng) + self.ser = Series(rng) def time_add_series_offset(self, offset): with warnings.catch_warnings(record=True): @@ -478,7 +476,7 @@ class ApplyIndex: def setup(self, offset): N = 10000 - rng = pd.date_range(start="1/1/2000", periods=N, freq="T") + rng = date_range(start="1/1/2000", periods=N, freq="T") self.rng = rng def time_apply_index(self, offset): @@ -490,17 +488,17 @@ class BinaryOpsMultiIndex: param_names = ["func"] def setup(self, func): - date_range = pd.date_range("20200101 00:00", "20200102 0:00", freq="S") + array = date_range("20200101 00:00", "20200102 0:00", freq="S") level_0_names = [str(i) for i in range(30)] - index = pd.MultiIndex.from_product([level_0_names, date_range]) + index = pd.MultiIndex.from_product([level_0_names, array]) column_names = ["col_1", "col_2"] - self.df = pd.DataFrame( + self.df = DataFrame( np.random.rand(len(index), 2), index=index, columns=column_names ) - self.arg_df = pd.DataFrame( + self.arg_df = DataFrame( np.random.randint(1, 10, (len(level_0_names), 2)), index=level_0_names, columns=column_names, diff --git a/asv_bench/benchmarks/sparse.py b/asv_bench/benchmarks/sparse.py index 5006a0dbf1f98..35e5818cd3b2b 100644 --- a/asv_bench/benchmarks/sparse.py +++ b/asv_bench/benchmarks/sparse.py @@ -28,7 +28,7 @@ def setup(self): data = np.random.randn(N)[:-i] idx = rng[:-i] data[100:] = np.nan - self.series[i] = pd.Series(pd.SparseArray(data), index=idx) + self.series[i] = Series(SparseArray(data), index=idx) def time_series_to_frame(self): pd.DataFrame(self.series) @@ -63,7 +63,7 @@ def setup(self): ) def time_sparse_series_from_coo(self): - pd.Series.sparse.from_coo(self.matrix) + Series.sparse.from_coo(self.matrix) class ToCoo: diff --git a/codecov.yml b/codecov.yml index 893e40db004a6..3f3df474956da 100644 --- a/codecov.yml +++ b/codecov.yml @@ -8,7 +8,7 @@ coverage: status: project: default: - target: '82' + target: '72' patch: default: target: '50' diff --git a/doc/source/ecosystem.rst b/doc/source/ecosystem.rst index f569fe7451fa7..56aa734deddd6 100644 --- a/doc/source/ecosystem.rst +++ b/doc/source/ecosystem.rst @@ -475,7 +475,7 @@ arrays can be stored inside pandas' Series and DataFrame. `Pandas-Genomics`_ ~~~~~~~~~~~~~~~~~~ -Pandas-Genomics provides extension types and extension arrays for working with genomics data +Pandas-Genomics provides extension types, extension arrays, and extension accessors for working with genomics data `Pint-Pandas`_ ~~~~~~~~~~~~~~ @@ -502,16 +502,17 @@ A directory of projects providing :ref:`extension accessors `. This is for users to discover new accessors and for library authors to coordinate on the namespace. -=============== ============ ==================================== =============================================================== -Library Accessor Classes Description -=============== ============ ==================================== =============================================================== -`cyberpandas`_ ``ip`` ``Series`` Provides common operations for working with IP addresses. -`pdvega`_ ``vgplot`` ``Series``, ``DataFrame`` Provides plotting functions from the Altair_ library. -`pandas_path`_ ``path`` ``Index``, ``Series`` Provides `pathlib.Path`_ functions for Series. -`pint-pandas`_ ``pint`` ``Series``, ``DataFrame`` Provides units support for numeric Series and DataFrames. -`composeml`_ ``slice`` ``DataFrame`` Provides a generator for enhanced data slicing. -`datatest`_ ``validate`` ``Series``, ``DataFrame``, ``Index`` Provides validation, differences, and acceptance managers. -=============== ============ ==================================== =============================================================== +================== ============ ==================================== =============================================================================== +Library Accessor Classes Description +================== ============ ==================================== =============================================================================== +`cyberpandas`_ ``ip`` ``Series`` Provides common operations for working with IP addresses. +`pdvega`_ ``vgplot`` ``Series``, ``DataFrame`` Provides plotting functions from the Altair_ library. +`pandas-genomics`_ ``genomics`` ``Series``, ``DataFrame`` Provides common operations for quality control and analysis of genomics data +`pandas_path`_ ``path`` ``Index``, ``Series`` Provides `pathlib.Path`_ functions for Series. +`pint-pandas`_ ``pint`` ``Series``, ``DataFrame`` Provides units support for numeric Series and DataFrames. +`composeml`_ ``slice`` ``DataFrame`` Provides a generator for enhanced data slicing. +`datatest`_ ``validate`` ``Series``, ``DataFrame``, ``Index`` Provides validation, differences, and acceptance managers. +================== ============ ==================================== =============================================================================== .. _cyberpandas: https://cyberpandas.readthedocs.io/en/latest .. _pdvega: https://altair-viz.github.io/pdvega/ diff --git a/doc/source/whatsnew/v1.2.4.rst b/doc/source/whatsnew/v1.2.4.rst index 26d768f830830..9cef1307278e8 100644 --- a/doc/source/whatsnew/v1.2.4.rst +++ b/doc/source/whatsnew/v1.2.4.rst @@ -17,6 +17,7 @@ Fixed regressions - Fixed regression in :meth:`DataFrame.sum` when ``min_count`` greater than the :class:`DataFrame` shape was passed resulted in a ``ValueError`` (:issue:`39738`) - Fixed regression in :meth:`DataFrame.to_json` raising ``AttributeError`` when run on PyPy (:issue:`39837`) +- Fixed regression in (in)equality comparison of ``pd.NaT`` with a non-datetimelike numpy array returning a scalar instead of an array (:issue:`40722`) - Fixed regression in :meth:`DataFrame.where` not returning a copy in the case of an all True condition (:issue:`39595`) - Fixed regression in :meth:`DataFrame.replace` raising ``IndexError`` when ``regex`` was a multi-key dictionary (:issue:`39338`) - diff --git a/environment.yml b/environment.yml index 1259d0dd4ae44..feea3445cb4fe 100644 --- a/environment.yml +++ b/environment.yml @@ -21,6 +21,7 @@ dependencies: - black=20.8b1 - cpplint - flake8 + - flake8-bugbear>=21.3.2 # used by flake8, find likely bugs - flake8-comprehensions>=3.1.0 # used by flake8, linting of unnecessary comprehensions - isort>=5.2.1 # check that imports are in the right order - mypy=0.812 diff --git a/pandas/_libs/algos.pyx b/pandas/_libs/algos.pyx index a4bc2443e0eeb..a28f4929995c6 100644 --- a/pandas/_libs/algos.pyx +++ b/pandas/_libs/algos.pyx @@ -794,68 +794,14 @@ def backfill(ndarray[algos_t] old, ndarray[algos_t] new, limit=None) -> ndarray: return indexer -@cython.boundscheck(False) -@cython.wraparound(False) def backfill_inplace(algos_t[:] values, uint8_t[:] mask, limit=None): - cdef: - Py_ssize_t i, N - algos_t val - uint8_t prev_mask - int lim, fill_count = 0 - - N = len(values) - - # GH#2778 - if N == 0: - return + pad_inplace(values[::-1], mask[::-1], limit=limit) - lim = validate_limit(N, limit) - val = values[N - 1] - prev_mask = mask[N - 1] - for i in range(N - 1, -1, -1): - if mask[i]: - if fill_count >= lim: - continue - fill_count += 1 - values[i] = val - mask[i] = prev_mask - else: - fill_count = 0 - val = values[i] - prev_mask = mask[i] - - -@cython.boundscheck(False) -@cython.wraparound(False) def backfill_2d_inplace(algos_t[:, :] values, const uint8_t[:, :] mask, limit=None): - cdef: - Py_ssize_t i, j, N, K - algos_t val - int lim, fill_count = 0 - - K, N = (values).shape - - # GH#2778 - if N == 0: - return - - lim = validate_limit(N, limit) - - for j in range(K): - fill_count = 0 - val = values[j, N - 1] - for i in range(N - 1, -1, -1): - if mask[j, i]: - if fill_count >= lim: - continue - fill_count += 1 - values[j, i] = val - else: - fill_count = 0 - val = values[j, i] + pad_2d_inplace(values[:, ::-1], mask[:, ::-1], limit) @cython.boundscheck(False) @@ -987,10 +933,10 @@ def rank_1d( * max: highest rank in group * first: ranks assigned in order they appear in the array * dense: like 'min', but rank always increases by 1 between groups - ascending : boolean, default True + ascending : bool, default True False for ranks by high (1) to low (N) na_option : {'keep', 'top', 'bottom'}, default 'keep' - pct : boolean, default False + pct : bool, default False Compute percentage rank of data within each group na_option : {'keep', 'top', 'bottom'}, default 'keep' * keep: leave NA values where they are @@ -1001,12 +947,14 @@ def rank_1d( TiebreakEnumType tiebreak Py_ssize_t i, j, N, grp_start=0, dups=0, sum_ranks=0 Py_ssize_t grp_vals_seen=1, grp_na_count=0 - ndarray[int64_t, ndim=1] lexsort_indexer - ndarray[float64_t, ndim=1] grp_sizes, out + ndarray[int64_t, ndim=1] grp_sizes + ndarray[intp_t, ndim=1] lexsort_indexer + ndarray[float64_t, ndim=1] out ndarray[rank_t, ndim=1] masked_vals ndarray[uint8_t, ndim=1] mask bint keep_na, at_end, next_val_diff, check_labels, group_changed rank_t nan_fill_val + int64_t grp_size tiebreak = tiebreakers[ties_method] if tiebreak == TIEBREAK_FIRST: @@ -1019,7 +967,7 @@ def rank_1d( # TODO Cython 3.0: cast won't be necessary (#2992) assert len(labels) == N out = np.empty(N) - grp_sizes = np.ones(N) + grp_sizes = np.ones(N, dtype=np.int64) # If all 0 labels, can short-circuit later label # comparisons @@ -1076,7 +1024,7 @@ def rank_1d( # each label corresponds to a different group value, # the mask helps you differentiate missing values before # performing sort on the actual values - lexsort_indexer = np.lexsort(order).astype(np.int64, copy=False) + lexsort_indexer = np.lexsort(order).astype(np.intp, copy=False) if not ascending: lexsort_indexer = lexsort_indexer[::-1] @@ -1147,13 +1095,15 @@ def rank_1d( for j in range(i - dups + 1, i + 1): out[lexsort_indexer[j]] = grp_vals_seen - # Look forward to the next value (using the sorting in lexsort_indexer) - # if the value does not equal the current value then we need to - # reset the dups and sum_ranks, knowing that a new value is - # coming up. The conditional also needs to handle nan equality - # and the end of iteration - if next_val_diff or (mask[lexsort_indexer[i]] - ^ mask[lexsort_indexer[i+1]]): + # Look forward to the next value (using the sorting in + # lexsort_indexer). If the value does not equal the current + # value then we need to reset the dups and sum_ranks, knowing + # that a new value is coming up. The conditional also needs + # to handle nan equality and the end of iteration. If group + # changes we do not record seeing a new value in the group + if not group_changed and (next_val_diff or + (mask[lexsort_indexer[i]] + ^ mask[lexsort_indexer[i+1]])): dups = sum_ranks = 0 grp_vals_seen += 1 @@ -1164,14 +1114,21 @@ def rank_1d( # group encountered (used by pct calculations later). Also be # sure to reset any of the items helping to calculate dups if group_changed: + + # If not dense tiebreak, group size used to compute + # percentile will be # of non-null elements in group if tiebreak != TIEBREAK_DENSE: - for j in range(grp_start, i + 1): - grp_sizes[lexsort_indexer[j]] = \ - (i - grp_start + 1 - grp_na_count) + grp_size = i - grp_start + 1 - grp_na_count + + # Otherwise, it will be the number of distinct values + # in the group, subtracting 1 if NaNs are present + # since that is a distinct value we shouldn't count else: - for j in range(grp_start, i + 1): - grp_sizes[lexsort_indexer[j]] = \ - (grp_vals_seen - 1 - (grp_na_count > 0)) + grp_size = grp_vals_seen - (grp_na_count > 0) + + for j in range(grp_start, i + 1): + grp_sizes[lexsort_indexer[j]] = grp_size + dups = sum_ranks = 0 grp_na_count = 0 grp_start = i + 1 @@ -1238,12 +1195,14 @@ def rank_1d( out[lexsort_indexer[j]] = grp_vals_seen # Look forward to the next value (using the sorting in - # lexsort_indexer) if the value does not equal the current + # lexsort_indexer). If the value does not equal the current # value then we need to reset the dups and sum_ranks, knowing # that a new value is coming up. The conditional also needs - # to handle nan equality and the end of iteration - if next_val_diff or (mask[lexsort_indexer[i]] - ^ mask[lexsort_indexer[i+1]]): + # to handle nan equality and the end of iteration. If group + # changes we do not record seeing a new value in the group + if not group_changed and (next_val_diff or + (mask[lexsort_indexer[i]] + ^ mask[lexsort_indexer[i+1]])): dups = sum_ranks = 0 grp_vals_seen += 1 @@ -1254,14 +1213,21 @@ def rank_1d( # group encountered (used by pct calculations later). Also be # sure to reset any of the items helping to calculate dups if group_changed: + + # If not dense tiebreak, group size used to compute + # percentile will be # of non-null elements in group if tiebreak != TIEBREAK_DENSE: - for j in range(grp_start, i + 1): - grp_sizes[lexsort_indexer[j]] = \ - (i - grp_start + 1 - grp_na_count) + grp_size = i - grp_start + 1 - grp_na_count + + # Otherwise, it will be the number of distinct values + # in the group, subtracting 1 if NaNs are present + # since that is a distinct value we shouldn't count else: - for j in range(grp_start, i + 1): - grp_sizes[lexsort_indexer[j]] = \ - (grp_vals_seen - 1 - (grp_na_count > 0)) + grp_size = grp_vals_seen - (grp_na_count > 0) + + for j in range(grp_start, i + 1): + grp_sizes[lexsort_indexer[j]] = grp_size + dups = sum_ranks = 0 grp_na_count = 0 grp_start = i + 1 diff --git a/pandas/_libs/groupby.pyi b/pandas/_libs/groupby.pyi new file mode 100644 index 0000000000000..8721624e9881c --- /dev/null +++ b/pandas/_libs/groupby.pyi @@ -0,0 +1,168 @@ +from typing import Literal + +import numpy as np + +def group_median_float64( + out: np.ndarray, # ndarray[float64_t, ndim=2] + counts: np.ndarray, # ndarray[int64_t] + values: np.ndarray, # ndarray[float64_t, ndim=2] + labels: np.ndarray, # ndarray[int64_t] + min_count: int = ..., # Py_ssize_t +) -> None: ... + +def group_cumprod_float64( + out: np.ndarray, # float64_t[:, ::1] + values: np.ndarray, # const float64_t[:, :] + labels: np.ndarray, # const int64_t[:] + ngroups: int, + is_datetimelike: bool, + skipna: bool = ..., +) -> None: ... + +def group_cumsum( + out: np.ndarray, # numeric[:, ::1] + values: np.ndarray, # ndarray[numeric, ndim=2] + labels: np.ndarray, # const int64_t[:] + ngroups: int, + is_datetimelike: bool, + skipna: bool = ..., +) -> None: ... + + +def group_shift_indexer( + out: np.ndarray, # int64_t[::1] + labels: np.ndarray, # const int64_t[:] + ngroups: int, + periods: int, +) -> None: ... + + +def group_fillna_indexer( + out: np.ndarray, # ndarray[int64_t] + labels: np.ndarray, # ndarray[int64_t] + mask: np.ndarray, # ndarray[uint8_t] + direction: Literal["ffill", "bfill"], + limit: int, # int64_t + dropna: bool, +) -> None: ... + + +def group_any_all( + out: np.ndarray, # uint8_t[::1] + values: np.ndarray, # const uint8_t[::1] + labels: np.ndarray, # const int64_t[:] + mask: np.ndarray, # const uint8_t[::1] + val_test: Literal["any", "all"], + skipna: bool, +) -> None: ... + +def group_add( + out: np.ndarray, # complexfloating_t[:, ::1] + counts: np.ndarray, # int64_t[::1] + values: np.ndarray, # ndarray[complexfloating_t, ndim=2] + labels: np.ndarray, # const intp_t[:] + min_count: int = ... +) -> None: ... + +def group_prod( + out: np.ndarray, # floating[:, ::1] + counts: np.ndarray, # int64_t[::1] + values: np.ndarray, # ndarray[floating, ndim=2] + labels: np.ndarray, # const intp_t[:] + min_count: int = ... +) -> None: ... + +def group_var( + out: np.ndarray, # floating[:, ::1] + counts: np.ndarray, # int64_t[::1] + values: np.ndarray, # ndarray[floating, ndim=2] + labels: np.ndarray, # const intp_t[:] + min_count: int = ..., # Py_ssize_t + ddof: int = ..., # int64_t +) -> None: ... + +def group_mean( + out: np.ndarray, # floating[:, ::1] + counts: np.ndarray, # int64_t[::1] + values: np.ndarray, # ndarray[floating, ndim=2] + labels: np.ndarray, # const intp_t[:] + min_count: int = ... +) -> None: ... + +def group_ohlc( + out: np.ndarray, # floating[:, ::1] + counts: np.ndarray, # int64_t[::1] + values: np.ndarray, # ndarray[floating, ndim=2] + labels: np.ndarray, # const intp_t[:] + min_count: int = ... +) -> None: ... + +def group_quantile( + out: np.ndarray, # ndarray[float64_t] + values: np.ndarray, # ndarray[numeric, ndim=1] + labels: np.ndarray, # ndarray[int64_t] + mask: np.ndarray, # ndarray[uint8_t] + q: float, # float64_t + interpolation: Literal["linear", "lower", "higher", "nearest", "midpoint"], +) -> None: ... + +def group_last( + out: np.ndarray, # rank_t[:, ::1] + counts: np.ndarray, # int64_t[::1] + values: np.ndarray, # ndarray[rank_t, ndim=2] + labels: np.ndarray, # const int64_t[:] + min_count: int = ..., # Py_ssize_t +) -> None: ... + +def group_nth( + out: np.ndarray, # rank_t[:, ::1] + counts: np.ndarray, # int64_t[::1] + values: np.ndarray, # ndarray[rank_t, ndim=2] + labels: np.ndarray, # const int64_t[:] + min_count: int = ..., # int64_t + rank: int = ..., # int64_t +) -> None: ... + +def group_rank( + out: np.ndarray, # float64_t[:, ::1] + values: np.ndarray, # ndarray[rank_t, ndim=2] + labels: np.ndarray, # const int64_t[:] + ngroups: int, + is_datetimelike: bool, + ties_method: Literal["aveage", "min", "max", "first", "dense"] = ..., + ascending: bool = ..., + pct: bool = ..., + na_option: Literal["keep", "top", "bottom"] = ..., +) -> None: ... + +def group_max( + out: np.ndarray, # groupby_t[:, ::1] + counts: np.ndarray, # int64_t[::1] + values: np.ndarray, # ndarray[groupby_t, ndim=2] + labels: np.ndarray, # const int64_t[:] + min_count: int = ..., +) -> None: ... + +def group_min( + out: np.ndarray, # groupby_t[:, ::1] + counts: np.ndarray, # int64_t[::1] + values: np.ndarray, # ndarray[groupby_t, ndim=2] + labels: np.ndarray, # const int64_t[:] + min_count: int = ..., +) -> None: ... + +def group_cummin( + out: np.ndarray, # groupby_t[:, ::1] + values: np.ndarray, # ndarray[groupby_t, ndim=2] + labels: np.ndarray, # const int64_t[:] + ngroups: int, + is_datetimelike: bool, +) -> None: ... + +def group_cummax( + out: np.ndarray, # groupby_t[:, ::1] + values: np.ndarray, # ndarray[groupby_t, ndim=2] + labels: np.ndarray, # const int64_t[:] + ngroups: int, + is_datetimelike: bool, +) -> None: ... diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index e23fa9b82f12e..e7cd7cd898d5b 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -154,9 +154,9 @@ def group_cumprod_float64(float64_t[:, ::1] out, Parameters ---------- - out : float64 array + out : np.ndarray[np.float64, ndim=2] Array to store cumprod in. - values : float64 array + values : np.ndarray[np.float64, ndim=2] Values to take cumprod of. labels : np.ndarray[np.intp] Labels to group by. @@ -211,9 +211,9 @@ def group_cumsum(numeric[:, ::1] out, Parameters ---------- - out : array + out : np.ndarray[ndim=2] Array to store cumsum in. - values : array + values : np.ndarray[ndim=2] Values to take cumsum of. labels : np.ndarray[np.intp] Labels to group by. @@ -329,12 +329,15 @@ def group_fillna_indexer(ndarray[int64_t] out, ndarray[intp_t] labels, Parameters ---------- - out : array of int64_t values which this method will write its results to - Missing values will be written to with a value of -1 + out : np.ndarray[np.uint8] + Values into which this method will write its results. labels : np.ndarray[np.intp] Array containing unique label for each group, with its ordering matching up to the corresponding record in `values`. - mask : array of int64_t values where a 1 indicates a missing value + values : np.ndarray[np.uint8] + Containing the truth value of each element. + mask : np.ndarray[np.uint8] + Indicating whether a value is na or not. direction : {'ffill', 'bfill'} Direction for fill to be applied (forwards or backwards, respectively) limit : Consecutive values to fill before stopping, or -1 for no limit @@ -396,15 +399,18 @@ def group_any_all(uint8_t[::1] out, Parameters ---------- - out : array of values which this method will write its results to + out : np.ndarray[np.uint8] + Values into which this method will write its results. labels : np.ndarray[np.intp] Array containing unique label for each group, with its ordering matching up to the corresponding record in `values` - values : array containing the truth value of each element - mask : array indicating whether a value is na or not - val_test : str {'any', 'all'} + values : np.ndarray[np.uint8] + Containing the truth value of each element. + mask : np.ndarray[np.uint8] + Indicating whether a value is na or not. + val_test : {'any', 'all'} String object dictating whether to use any or all truth testing - skipna : boolean + skipna : bool Flag to ignore nan values during truth testing Notes @@ -455,11 +461,11 @@ ctypedef fused complexfloating_t: @cython.wraparound(False) @cython.boundscheck(False) -def _group_add(complexfloating_t[:, ::1] out, - int64_t[::1] counts, - ndarray[complexfloating_t, ndim=2] values, - const intp_t[:] labels, - Py_ssize_t min_count=0): +def group_add(complexfloating_t[:, ::1] out, + int64_t[::1] counts, + ndarray[complexfloating_t, ndim=2] values, + const intp_t[:] labels, + Py_ssize_t min_count=0): """ Only aggregates on axis=0 using Kahan summation """ @@ -506,19 +512,13 @@ def _group_add(complexfloating_t[:, ::1] out, out[i, j] = sumx[i, j] -group_add_float32 = _group_add['float32_t'] -group_add_float64 = _group_add['float64_t'] -group_add_complex64 = _group_add['float complex'] -group_add_complex128 = _group_add['double complex'] - - @cython.wraparound(False) @cython.boundscheck(False) -def _group_prod(floating[:, ::1] out, - int64_t[::1] counts, - ndarray[floating, ndim=2] values, - const intp_t[:] labels, - Py_ssize_t min_count=0): +def group_prod(floating[:, ::1] out, + int64_t[::1] counts, + ndarray[floating, ndim=2] values, + const intp_t[:] labels, + Py_ssize_t min_count=0): """ Only aggregates on axis=0 """ @@ -560,19 +560,15 @@ def _group_prod(floating[:, ::1] out, out[i, j] = prodx[i, j] -group_prod_float32 = _group_prod['float'] -group_prod_float64 = _group_prod['double'] - - @cython.wraparound(False) @cython.boundscheck(False) @cython.cdivision(True) -def _group_var(floating[:, ::1] out, - int64_t[::1] counts, - ndarray[floating, ndim=2] values, - const intp_t[:] labels, - Py_ssize_t min_count=-1, - int64_t ddof=1): +def group_var(floating[:, ::1] out, + int64_t[::1] counts, + ndarray[floating, ndim=2] values, + const intp_t[:] labels, + Py_ssize_t min_count=-1, + int64_t ddof=1): cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) floating val, ct, oldmean @@ -619,17 +615,13 @@ def _group_var(floating[:, ::1] out, out[i, j] /= (ct - ddof) -group_var_float32 = _group_var['float'] -group_var_float64 = _group_var['double'] - - @cython.wraparound(False) @cython.boundscheck(False) -def _group_mean(floating[:, ::1] out, - int64_t[::1] counts, - ndarray[floating, ndim=2] values, - const intp_t[::1] labels, - Py_ssize_t min_count=-1): +def group_mean(floating[:, ::1] out, + int64_t[::1] counts, + ndarray[floating, ndim=2] values, + const intp_t[::1] labels, + Py_ssize_t min_count=-1): cdef: Py_ssize_t i, j, N, K, lab, ncounts = len(counts) floating val, count, y, t @@ -675,10 +667,6 @@ def _group_mean(floating[:, ::1] out, out[i, j] = sumx[i, j] / count -group_mean_float32 = _group_mean['float'] -group_mean_float64 = _group_mean['double'] - - @cython.wraparound(False) @cython.boundscheck(False) def group_ohlc(floating[:, ::1] out, @@ -739,14 +727,17 @@ def group_quantile(ndarray[float64_t] out, Parameters ---------- - out : ndarray + out : np.ndarray[np.float64] Array of aggregated values that will be written to. + values : np.ndarray + Array containing the values to apply the function against. labels : ndarray[np.intp] Array containing the unique group labels. values : ndarray Array containing the values to apply the function against. q : float The quantile value to search for. + interpolation : {'linear', 'lower', 'highest', 'nearest', 'midpoint'} Notes ----- @@ -1066,8 +1057,9 @@ def group_rank(float64_t[:, ::1] out, Parameters ---------- - out : array of float64_t values which this method will write its results to - values : array of rank_t values to be ranked + out : np.ndarray[np.float64, ndim=2] + Values to which this method will write its results. + values : np.ndarray of rank_t values to be ranked labels : np.ndarray[np.intp] Array containing unique label for each group, with its ordering matching up to the corresponding record in `values` @@ -1076,17 +1068,16 @@ def group_rank(float64_t[:, ::1] out, groupby functions. is_datetimelike : bool True if `values` contains datetime-like entries. - ties_method : {'average', 'min', 'max', 'first', 'dense'}, default - 'average' + ties_method : {'average', 'min', 'max', 'first', 'dense'}, default 'average' * average: average rank of group * min: lowest rank in group * max: highest rank in group * first: ranks assigned in order they appear in the array * dense: like 'min', but rank always increases by 1 between groups - ascending : boolean, default True + ascending : bool, default True False for ranks by high (1) to low (N) na_option : {'keep', 'top', 'bottom'}, default 'keep' - pct : boolean, default False + pct : bool, default False Compute percentage rank of data within each group na_option : {'keep', 'top', 'bottom'}, default 'keep' * keep: leave NA values where they are @@ -1138,9 +1129,9 @@ cdef group_min_max(groupby_t[:, ::1] out, Parameters ---------- - out : array + out : np.ndarray[groupby_t, ndim=2] Array to store result in. - counts : int64 array + counts : np.ndarray[int64] Input as a zeroed array, populated by group sizes during algorithm values : array Values to find column-wise min/max of. @@ -1248,20 +1239,20 @@ def group_min(groupby_t[:, ::1] out, @cython.boundscheck(False) @cython.wraparound(False) -def group_cummin_max(groupby_t[:, ::1] out, - ndarray[groupby_t, ndim=2] values, - const intp_t[:] labels, - int ngroups, - bint is_datetimelike, - bint compute_max): +cdef group_cummin_max(groupby_t[:, ::1] out, + ndarray[groupby_t, ndim=2] values, + const intp_t[:] labels, + int ngroups, + bint is_datetimelike, + bint compute_max): """ Cumulative minimum/maximum of columns of `values`, in row groups `labels`. Parameters ---------- - out : array + out : np.ndarray[groupby_t, ndim=2] Array to store cummin/max in. - values : array + values : np.ndarray[groupby_t, ndim=2] Values to take cummin/max of. labels : np.ndarray[np.intp] Labels to group by. diff --git a/pandas/_libs/hashtable_class_helper.pxi.in b/pandas/_libs/hashtable_class_helper.pxi.in index 6ace327ca3599..301644274111b 100644 --- a/pandas/_libs/hashtable_class_helper.pxi.in +++ b/pandas/_libs/hashtable_class_helper.pxi.in @@ -411,7 +411,7 @@ cdef class {{name}}HashTable(HashTable): k = kh_get_{{dtype}}(self.table, ckey) return k != self.table.n_buckets - def sizeof(self, deep=False): + def sizeof(self, deep: bool = False) -> int: """ return the size of my table in bytes """ overhead = 4 * sizeof(uint32_t) + 3 * sizeof(uint32_t*) for_flags = max(1, self.table.n_buckets >> 5) * sizeof(uint32_t) @@ -419,7 +419,7 @@ cdef class {{name}}HashTable(HashTable): sizeof(Py_ssize_t)) # vals return overhead + for_flags + for_pairs - def get_state(self): + def get_state(self) -> dict[str, int]: """ returns infos about the state of the hashtable""" return { 'n_buckets' : self.table.n_buckets, @@ -523,7 +523,7 @@ cdef class {{name}}HashTable(HashTable): any value "val" satisfying val != val is considered missing. If na_value is not None, then _additionally_, any value "val" satisfying val == na_value is considered missing. - ignore_na : boolean, default False + ignore_na : bool, default False Whether NA-values should be ignored for calculating the uniques. If True, the labels corresponding to missing values will be set to na_sentinel. @@ -531,7 +531,7 @@ cdef class {{name}}HashTable(HashTable): If not None, the mask is used as indicator for missing values (True = missing, False = valid) instead of `na_value` or condition "val != val". - return_inverse : boolean, default False + return_inverse : bool, default False Whether the mapping of the original array values to their location in the vector of uniques should be returned. @@ -625,7 +625,7 @@ cdef class {{name}}HashTable(HashTable): ---------- values : ndarray[{{dtype}}] Array of values of which unique will be calculated - return_inverse : boolean, default False + return_inverse : bool, default False Whether the mapping of the original array values to their location in the vector of uniques should be returned. @@ -747,14 +747,14 @@ cdef class StringHashTable(HashTable): kh_destroy_str(self.table) self.table = NULL - def sizeof(self, deep=False): + def sizeof(self, deep: bool = False) -> int: overhead = 4 * sizeof(uint32_t) + 3 * sizeof(uint32_t*) for_flags = max(1, self.table.n_buckets >> 5) * sizeof(uint32_t) for_pairs = self.table.n_buckets * (sizeof(char *) + # keys sizeof(Py_ssize_t)) # vals return overhead + for_flags + for_pairs - def get_state(self): + def get_state(self) -> dict[str, int]: """ returns infos about the state of the hashtable""" return { 'n_buckets' : self.table.n_buckets, @@ -906,11 +906,11 @@ cdef class StringHashTable(HashTable): that is not a string is considered missing. If na_value is not None, then _additionally_ any value "val" satisfying val == na_value is considered missing. - ignore_na : boolean, default False + ignore_na : bool, default False Whether NA-values should be ignored for calculating the uniques. If True, the labels corresponding to missing values will be set to na_sentinel. - return_inverse : boolean, default False + return_inverse : bool, default False Whether the mapping of the original array values to their location in the vector of uniques should be returned. @@ -998,7 +998,7 @@ cdef class StringHashTable(HashTable): ---------- values : ndarray[object] Array of values of which unique will be calculated - return_inverse : boolean, default False + return_inverse : bool, default False Whether the mapping of the original array values to their location in the vector of uniques should be returned. @@ -1079,7 +1079,7 @@ cdef class PyObjectHashTable(HashTable): k = kh_get_pymap(self.table, key) return k != self.table.n_buckets - def sizeof(self, deep=False): + def sizeof(self, deep: bool = False) -> int: """ return the size of my table in bytes """ overhead = 4 * sizeof(uint32_t) + 3 * sizeof(uint32_t*) for_flags = max(1, self.table.n_buckets >> 5) * sizeof(uint32_t) @@ -1087,7 +1087,7 @@ cdef class PyObjectHashTable(HashTable): sizeof(Py_ssize_t)) # vals return overhead + for_flags + for_pairs - def get_state(self): + def get_state(self) -> dict[str, int]: """ returns infos about the current state of the hashtable like size, number of buckets and so on. @@ -1181,11 +1181,11 @@ cdef class PyObjectHashTable(HashTable): any value "val" satisfying val != val is considered missing. If na_value is not None, then _additionally_, any value "val" satisfying val == na_value is considered missing. - ignore_na : boolean, default False + ignore_na : bool, default False Whether NA-values should be ignored for calculating the uniques. If True, the labels corresponding to missing values will be set to na_sentinel. - return_inverse : boolean, default False + return_inverse : bool, default False Whether the mapping of the original array values to their location in the vector of uniques should be returned. @@ -1251,7 +1251,7 @@ cdef class PyObjectHashTable(HashTable): ---------- values : ndarray[object] Array of values of which unique will be calculated - return_inverse : boolean, default False + return_inverse : bool, default False Whether the mapping of the original array values to their location in the vector of uniques should be returned. diff --git a/pandas/_libs/index.pyi b/pandas/_libs/index.pyi new file mode 100644 index 0000000000000..979619c3d14c4 --- /dev/null +++ b/pandas/_libs/index.pyi @@ -0,0 +1,86 @@ +import numpy as np + +class IndexEngine: + over_size_threshold: bool + + def __init__(self, vgetter, n: int): ... + + def __contains__(self, val: object) -> bool: ... + + # -> int | slice | np.ndarray[bool] + def get_loc(self, val: object) -> int | slice | np.ndarray: ... + + def sizeof(self, deep: bool = False) -> int: ... + def __sizeof__(self) -> int: ... + + @property + def is_unique(self) -> bool: ... + + @property + def is_monotonic_increasing(self) -> bool: ... + + @property + def is_monotonic_decreasing(self) -> bool: ... + + def get_backfill_indexer(self, other: np.ndarray, limit: int | None =...) -> np.ndarray: ... + def get_pad_indexer(self, other: np.ndarray, limit: int | None =...) -> np.ndarray: ... + + @property + def is_mapping_populated(self) -> bool: ... + + def clear_mapping(self): ... + def get_indexer(self, values: np.ndarray) -> np.ndarray: ... # np.ndarray[np.intp] + def get_indexer_non_unique( + self, + targets: np.ndarray, + ) -> tuple[ + np.ndarray, # np.ndarray[np.intp] + np.ndarray, # np.ndarray[np.intp] + ]: ... + + +class Float64Engine(IndexEngine): ... +class Float32Engine(IndexEngine): ... + +class Int64Engine(IndexEngine): ... +class Int32Engine(IndexEngine): ... +class Int16Engine(IndexEngine): ... +class Int8Engine(IndexEngine): ... + +class UInt64Engine(IndexEngine): ... +class UInt32Engine(IndexEngine): ... +class UInt16Engine(IndexEngine): ... +class UInt8Engine(IndexEngine): ... + +class ObjectEngine(IndexEngine): ... + +class DatetimeEngine(Int64Engine): ... +class TimedeltaEngine(DatetimeEngine): ... +class PeriodEngine(Int64Engine): ... + + +class BaseMultiIndexCodesEngine: + levels: list[np.ndarray] + offsets: np.ndarray # ndarray[uint64_t, ndim=1] + + def __init__( + self, + levels: list[np.ndarray], # all entries hashable + labels: list[np.ndarray], # all entries integer-dtyped + offsets: np.ndarray, # np.ndarray[np.uint64, ndim=1] + ): ... + + def get_indexer( + self, + target: np.ndarray, # np.ndarray[object] + ) -> np.ndarray: ... # np.ndarray[np.intp] + + def _extract_level_codes(self, target: object): ... + + def get_indexer_with_fill( + self, + target: np.ndarray, # np.ndarray[object] of tuples + values: np.ndarray, # np.ndarray[object] of tuples + method: str, + limit: int | None, + ) -> np.ndarray: ... # np.ndarray[np.int64] diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx index 71f4b0c0ae18f..47e6d417bb925 100644 --- a/pandas/_libs/index.pyx +++ b/pandas/_libs/index.pyx @@ -259,7 +259,7 @@ cdef class IndexEngine: self.monotonic_inc = 0 self.monotonic_dec = 0 - def get_indexer(self, ndarray values): + def get_indexer(self, ndarray values) -> np.ndarray: self._ensure_mapping_populated() return self.mapping.lookup(values) @@ -269,6 +269,11 @@ cdef class IndexEngine: return the labels in the same order as the target and a missing indexer into the targets (which correspond to the -1 indices in the results + + Returns + ------- + indexer : np.ndarray[np.intp] + missing : np.ndarray[np.intp] """ cdef: ndarray values, x @@ -455,7 +460,7 @@ cdef class DatetimeEngine(Int64Engine): # we may get datetime64[ns] or timedelta64[ns], cast these to int64 return super().get_indexer_non_unique(targets.view("i8")) - def get_indexer(self, ndarray values): + def get_indexer(self, ndarray values) -> np.ndarray: self._ensure_mapping_populated() if values.dtype != self._get_box_dtype(): return np.repeat(-1, len(values)).astype(np.intp) @@ -572,17 +577,17 @@ cdef class BaseMultiIndexCodesEngine: # integers representing labels: we will use its get_loc and get_indexer self._base.__init__(self, lambda: lab_ints, len(lab_ints)) - def _codes_to_ints(self, codes): + def _codes_to_ints(self, ndarray[uint64_t] codes) -> np.ndarray: raise NotImplementedError("Implemented by subclass") - def _extract_level_codes(self, object target): + def _extract_level_codes(self, ndarray[object] target) -> np.ndarray: """ Map the requested list of (tuple) keys to their integer representations for searching in the underlying integer index. Parameters ---------- - target : list-like of keys + target : ndarray[object] Each key is a tuple, with a label for each level of the index. Returns @@ -607,7 +612,7 @@ cdef class BaseMultiIndexCodesEngine: Returns ------- - np.ndarray[int64_t, ndim=1] of the indexer of `target` into + np.ndarray[intp_t, ndim=1] of the indexer of `target` into `self.values` """ lab_ints = self._extract_level_codes(target) @@ -635,7 +640,7 @@ cdef class BaseMultiIndexCodesEngine: the same as the length of all tuples in `values` values : ndarray[object] of tuples must be sorted and all have the same length. Should be the set of - the MultiIndex's values. Needed only if `method` is not None + the MultiIndex's values. method: string "backfill" or "pad" limit: int or None @@ -643,7 +648,7 @@ cdef class BaseMultiIndexCodesEngine: Returns ------- - np.ndarray[int64_t, ndim=1] of the indexer of `target` into `values`, + np.ndarray[intp_t, ndim=1] of the indexer of `target` into `values`, filled with the `method` (and optionally `limit`) specified """ assert method in ("backfill", "pad") @@ -714,9 +719,7 @@ cdef class BaseMultiIndexCodesEngine: return self._base.get_loc(self, lab_int) - def get_indexer_non_unique(self, ndarray target): - # This needs to be overridden just because the default one works on - # target._values, and target can be itself a MultiIndex. + def get_indexer_non_unique(self, ndarray[object] target): lab_ints = self._extract_level_codes(target) indexer = self._base.get_indexer_non_unique(self, lab_ints) diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 31b6935e9b2ba..4295db9d1613c 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -385,7 +385,7 @@ def get_blkno_indexers(int64_t[:] blknos, bint group=True): Returns ------- - iter : iterator of (int, slice or array) + list[tuple[int, slice | np.ndarray]] """ # There's blkno in this function's name because it's used in block & # blockno handling. diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 646b5a05afcad..4b423175172d2 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -916,7 +916,7 @@ def indices_fast(ndarray[intp_t] index, const int64_t[:] labels, list keys, """ Parameters ---------- - index : ndarray + index : ndarray[intp] labels : ndarray[int64] keys : list sorted_labels : list[ndarray[int64]] @@ -2440,6 +2440,9 @@ class NoDefault(Enum): # 2) because mypy does not understand singletons no_default = "NO_DEFAULT" + def __repr__(self) -> str: + return "" + # Note: no_default is exported to the public API in pandas.api.extensions no_default = NoDefault.no_default # Sentinel indicating the default value. diff --git a/pandas/_libs/ops.pyx b/pandas/_libs/ops.pyx index ecb7041fb2c5a..7951bb5c093ef 100644 --- a/pandas/_libs/ops.pyx +++ b/pandas/_libs/ops.pyx @@ -258,7 +258,7 @@ def vec_binop(object[:] left, object[:] right, object op) -> ndarray: def maybe_convert_bool(ndarray[object] arr, - true_values=None, false_values=None): + true_values=None, false_values=None) -> ndarray: cdef: Py_ssize_t i, n ndarray[uint8_t] result diff --git a/pandas/_libs/reshape.pyx b/pandas/_libs/reshape.pyx index 05b255c40f4b2..959d83a55d4f3 100644 --- a/pandas/_libs/reshape.pyx +++ b/pandas/_libs/reshape.pyx @@ -48,13 +48,13 @@ def unstack(reshape_t[:, :] values, const uint8_t[:] mask, Parameters ---------- values : typed ndarray - mask : boolean ndarray + mask : np.ndarray[bool] stride : int length : int width : int - new_values : typed ndarray + new_values : np.ndarray[bool] result array - new_mask : boolean ndarray + new_mask : np.ndarray[bool] result mask """ cdef: diff --git a/pandas/_libs/tslibs/conversion.pyi b/pandas/_libs/tslibs/conversion.pyi new file mode 100644 index 0000000000000..6470361542597 --- /dev/null +++ b/pandas/_libs/tslibs/conversion.pyi @@ -0,0 +1,41 @@ +from datetime import ( + datetime, + tzinfo, +) + +import numpy as np + +DT64NS_DTYPE: np.dtype +TD64NS_DTYPE: np.dtype + +class OutOfBoundsTimedelta(ValueError): ... + +def precision_from_unit( + unit: str, +) -> tuple[ + int, # int64_t + int, +]: ... + + +def ensure_datetime64ns( + arr: np.ndarray, # np.ndarray[datetime64[ANY]] + copy: bool = ..., +) -> np.ndarray: ... # np.ndarray[datetime64ns] + + +def ensure_timedelta64ns( + arr: np.ndarray, # np.ndarray[timedelta64[ANY]] + copy: bool = ..., +) -> np.ndarray: ... # np.ndarray[timedelta64ns] + + +def datetime_to_datetime64( + values: np.ndarray, # np.ndarray[object] +) -> tuple[ + np.ndarray, # np.ndarray[dt64ns] + tzinfo | None, +]: ... + + +def localize_pydatetime(dt: datetime, tz: tzinfo | None) -> datetime: ... diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 56280d55e479d..1bda35206ccef 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -267,7 +267,7 @@ def ensure_timedelta64ns(arr: ndarray, copy: bool=True): Parameters ---------- arr : ndarray - copy : boolean, default True + copy : bool, default True Returns ------- @@ -316,7 +316,7 @@ def datetime_to_datetime64(ndarray[object] values): Returns ------- - result : ndarray[int64_t] + result : ndarray[datetime64ns] inferred_tz : tzinfo or None """ cdef: diff --git a/pandas/_libs/tslibs/dtypes.pyi b/pandas/_libs/tslibs/dtypes.pyi new file mode 100644 index 0000000000000..53752098bafe9 --- /dev/null +++ b/pandas/_libs/tslibs/dtypes.pyi @@ -0,0 +1,65 @@ +from enum import Enum + +from pandas._libs.tslibs.offsets import BaseOffset + +_attrname_to_abbrevs: dict[str, str] +_period_code_map: dict[str, int] + + +class PeriodDtypeBase: + # actually __cinit__ + def __new__(self, code: int): ... + + def freq_group_code(self) -> int: ... + def date_offset(self) -> BaseOffset: ... + + @classmethod + def from_date_offset(cls, offset: BaseOffset) -> PeriodDtypeBase: ... + + +class FreqGroup(Enum): + FR_ANN: int = ... + FR_QTR: int = ... + FR_MTH: int = ... + FR_WK: int = ... + FR_BUS: int = ... + FR_DAY: int = ... + FR_HR: int = ... + FR_MIN: int = ... + FR_SEC: int = ... + FR_MS: int = ... + FR_US: int = ... + FR_NS: int = ... + FR_UND: int = ... + + @staticmethod + def get_freq_group(code: int) -> FreqGroup: ... + + +class Resolution(Enum): + RESO_NS: int = ... + RESO_US: int = ... + RESO_MS: int = ... + RESO_SEC: int = ... + RESO_MIN: int = ... + RESO_HR: int = ... + RESO_DAY: int = ... + RESO_MTH: int = ... + RESO_QTR: int = ... + RESO_YR: int = ... + + def __lt__(self, other: Resolution) -> bool: ... + + def __ge__(self, other: Resolution) -> bool: ... + + @property + def freq_group(self) -> FreqGroup: ... + + @property + def attrname(self) -> str: ... + + @classmethod + def from_attrname(cls, attrname: str) -> Resolution: ... + + @classmethod + def get_reso_from_freq(cls, freq: str) -> Resolution: ... diff --git a/pandas/_libs/tslibs/fields.pyi b/pandas/_libs/tslibs/fields.pyi new file mode 100644 index 0000000000000..22ae156d78b7d --- /dev/null +++ b/pandas/_libs/tslibs/fields.pyi @@ -0,0 +1,69 @@ +import numpy as np + +def build_field_sarray( + dtindex: np.ndarray, # const int64_t[:] +) -> np.ndarray: ... + +def month_position_check(fields, weekdays) -> str | None: ... + +def get_date_name_field( + dtindex: np.ndarray, # const int64_t[:] + field: str, + locale=..., +) -> np.ndarray: ... # np.ndarray[object] + +def get_start_end_field( + dtindex: np.ndarray, # const int64_t[:] + field: str, + freqstr: str | None = ..., + month_kw: int = ... +) -> np.ndarray: ... # np.ndarray[bool] + + +def get_date_field( + dtindex: np.ndarray, # const int64_t[:] + + field: str, +) -> np.ndarray: ... # np.ndarray[in32] + + +def get_timedelta_field( + tdindex: np.ndarray, # const int64_t[:] + field: str, +) -> np.ndarray: ... # np.ndarray[int32] + + +def isleapyear_arr( + years: np.ndarray, +) -> np.ndarray: ... # np.ndarray[bool] + +def build_isocalendar_sarray( + dtindex: np.ndarray, # const int64_t[:] +) -> np.ndarray: ... + + +def get_locale_names(name_type: str, locale: object = None): ... + + +class RoundTo: + @property + def MINUS_INFTY(self) -> int: ... + + @property + def PLUS_INFTY(self) -> int: ... + + @property + def NEAREST_HALF_EVEN(self) -> int: ... + + @property + def NEAREST_HALF_PLUS_INFTY(self) -> int: ... + + @property + def NEAREST_HALF_MINUS_INFTY(self) -> int: ... + + +def round_nsint64( + values: np.ndarray, # np.ndarray[np.int64] + mode: RoundTo, + nanos: int, +) -> np.ndarray: ... # np.ndarray[np.int64] diff --git a/pandas/_libs/tslibs/fields.pyx b/pandas/_libs/tslibs/fields.pyx index 79d6a42075e83..d6ca38e57d2d8 100644 --- a/pandas/_libs/tslibs/fields.pyx +++ b/pandas/_libs/tslibs/fields.pyx @@ -635,9 +635,9 @@ def get_locale_names(name_type: str, locale: object = None): Parameters ---------- - name_type : string, attribute of LocaleTime() in which to return localized - names - locale : string + name_type : str + Attribute of LocaleTime() in which to return localized names. + locale : str Returns ------- diff --git a/pandas/_libs/tslibs/nattype.pyx b/pandas/_libs/tslibs/nattype.pyx index d86d3261d404e..0c598beb6ad16 100644 --- a/pandas/_libs/tslibs/nattype.pyx +++ b/pandas/_libs/tslibs/nattype.pyx @@ -127,6 +127,10 @@ cdef class _NaT(datetime): result.fill(_nat_scalar_rules[op]) elif other.dtype.kind == "O": result = np.array([PyObject_RichCompare(self, x, op) for x in other]) + elif op == Py_EQ: + result = np.zeros(other.shape, dtype=bool) + elif op == Py_NE: + result = np.ones(other.shape, dtype=bool) else: return NotImplemented return result diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index 3cdb654642b9c..f536c8dd76f0d 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -311,10 +311,16 @@ cdef convert_to_timedelta64(object ts, str unit): @cython.boundscheck(False) @cython.wraparound(False) -def array_to_timedelta64(ndarray[object] values, str unit=None, str errors="raise"): +def array_to_timedelta64( + ndarray[object] values, str unit=None, str errors="raise" +) -> ndarray: """ Convert an ndarray to an array of timedeltas. If errors == 'coerce', coerce non-convertible objects to NaT. Otherwise, raise. + + Returns + ------- + np.ndarray[timedelta64ns] """ cdef: @@ -540,7 +546,7 @@ cdef inline int64_t timedelta_as_neg(int64_t value, bint neg): Parameters ---------- value : int64_t of the timedelta value - neg : boolean if the a negative value + neg : bool if the a negative value """ if neg: return -value diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index b38ca516c4393..9bacb30b78a64 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -207,12 +207,12 @@ def box_expected(expected, box_cls, transpose=True): """ if box_cls is pd.array: expected = pd.array(expected) - elif box_cls is pd.Index: - expected = pd.Index(expected) - elif box_cls is pd.Series: - expected = pd.Series(expected) - elif box_cls is pd.DataFrame: - expected = pd.Series(expected).to_frame() + elif box_cls is Index: + expected = Index(expected) + elif box_cls is Series: + expected = Series(expected) + elif box_cls is DataFrame: + expected = Series(expected).to_frame() if transpose: # for vector operations, we need a DataFrame to be a single-row, # not a single-column, in order to operate against non-DataFrame @@ -400,7 +400,7 @@ def _make_timeseries(start="2000-01-01", end="2000-12-31", freq="1D", seed=None) "x": state.rand(n) * 2 - 1, "y": state.rand(n) * 2 - 1, } - df = pd.DataFrame(columns, index=index, columns=sorted(columns)) + df = DataFrame(columns, index=index, columns=sorted(columns)) if df.index[-1] == end: df = df.iloc[:-1] return df @@ -952,7 +952,7 @@ def get_op_from_name(op_name: str) -> Callable: Parameters ---------- - op_name : string + op_name : str The op name, in form of "add" or "__add__". Returns diff --git a/pandas/conftest.py b/pandas/conftest.py index f3356d2998ff8..3fdde3261bd68 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -328,7 +328,7 @@ def unique_nulls_fixture(request): # ---------------------------------------------------------------- -@pytest.fixture(params=[pd.DataFrame, pd.Series]) +@pytest.fixture(params=[DataFrame, Series]) def frame_or_series(request): """ Fixture to parametrize over DataFrame and Series. @@ -338,7 +338,7 @@ def frame_or_series(request): # error: List item 0 has incompatible type "Type[Index]"; expected "Type[IndexOpsMixin]" @pytest.fixture( - params=[pd.Index, pd.Series], ids=["index", "series"] # type: ignore[list-item] + params=[Index, Series], ids=["index", "series"] # type: ignore[list-item] ) def index_or_series(request): """ @@ -356,9 +356,7 @@ def index_or_series(request): index_or_series2 = index_or_series -@pytest.fixture( - params=[pd.Index, pd.Series, pd.array], ids=["index", "series", "array"] -) +@pytest.fixture(params=[Index, Series, pd.array], ids=["index", "series", "array"]) def index_or_series_or_array(request): """ Fixture to parametrize over Index, Series, and ExtensionArray @@ -559,7 +557,7 @@ def index_with_missing(request): # ---------------------------------------------------------------- @pytest.fixture def empty_series(): - return pd.Series([], index=[], dtype=np.float64) + return Series([], index=[], dtype=np.float64) @pytest.fixture @@ -596,7 +594,7 @@ def _create_series(index): """ Helper for the _series dict """ size = len(index) data = np.random.randn(size) - return pd.Series(data, index=index, name="a") + return Series(data, index=index, name="a") _series = { @@ -1131,6 +1129,24 @@ def string_dtype(request): return request.param +@pytest.fixture( + params=[ + "string", + pytest.param( + "arrow_string", marks=td.skip_if_no("pyarrow", min_version="1.0.0") + ), + ] +) +def nullable_string_dtype(request): + """ + Parametrized fixture for string dtypes. + + * 'string' + * 'arrow_string' + """ + return request.param + + @pytest.fixture(params=tm.BYTES_DTYPES) def bytes_dtype(request): """ @@ -1419,7 +1435,7 @@ def any_numpy_dtype(request): ("boolean", [True, np.nan, False]), ("boolean", [True, pd.NA, False]), ("datetime64", [np.datetime64("2013-01-01"), np.nan, np.datetime64("2018-01-01")]), - ("datetime", [pd.Timestamp("20130101"), np.nan, pd.Timestamp("20180101")]), + ("datetime", [Timestamp("20130101"), np.nan, Timestamp("20180101")]), ("date", [date(2013, 1, 1), np.nan, date(2018, 1, 1)]), # The following two dtypes are commented out due to GH 23554 # ('complex', [1 + 1j, np.nan, 2 + 2j]), @@ -1427,8 +1443,8 @@ def any_numpy_dtype(request): # np.nan, np.timedelta64(2, 'D')]), ("timedelta", [timedelta(1), np.nan, timedelta(2)]), ("time", [time(1), np.nan, time(2)]), - ("period", [pd.Period(2013), pd.NaT, pd.Period(2018)]), - ("interval", [pd.Interval(0, 1), np.nan, pd.Interval(0, 2)]), + ("period", [Period(2013), pd.NaT, Period(2018)]), + ("interval", [Interval(0, 1), np.nan, Interval(0, 2)]), ] ids, _ = zip(*_any_skipna_inferred_dtype) # use inferred type as fixture-id diff --git a/pandas/core/aggregation.py b/pandas/core/aggregation.py index a8a761b5f4aac..7a1a5f5b30590 100644 --- a/pandas/core/aggregation.py +++ b/pandas/core/aggregation.py @@ -162,7 +162,7 @@ def normalize_keyword_aggregation(kwargs: dict) -> Tuple[dict, List[str], List[i order = [] columns, pairs = list(zip(*kwargs.items())) - for name, (column, aggfunc) in zip(columns, pairs): + for column, aggfunc in pairs: aggspec[column].append(aggfunc) order.append((column, com.get_callable_name(aggfunc) or aggfunc)) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index f52aff424eb0b..0c8a5bbc33c91 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -943,7 +943,7 @@ def duplicated(values: ArrayLike, keep: Union[str, bool] = "first") -> np.ndarra Returns ------- - duplicated : ndarray + duplicated : ndarray[bool] """ values, _ = _ensure_data(values) ndtype = values.dtype.name @@ -959,7 +959,7 @@ def mode(values, dropna: bool = True) -> Series: ---------- values : array-like Array over which to check for duplicate values. - dropna : boolean, default True + dropna : bool, default True Don't consider counts of NaN/NaT. .. versionadded:: 0.24.0 @@ -1025,9 +1025,9 @@ def rank( - ``keep``: rank each NaN value with a NaN ranking - ``top``: replace each NaN with either +/- inf so that they there are ranked at the top - ascending : boolean, default True + ascending : bool, default True Whether or not the elements should be ranked in ascending order. - pct : boolean, default False + pct : bool, default False Whether or not to the display the returned rankings in integer form (e.g. 1, 2, 3) or in percentile form (e.g. 0.333..., 0.666..., 1). """ @@ -1619,7 +1619,7 @@ def searchsorted(arr, value, side="left", sorter=None) -> np.ndarray: _diff_special = {"float64", "float32", "int64", "int32", "int16", "int8"} -def diff(arr, n: int, axis: int = 0, stacklevel=3): +def diff(arr, n: int, axis: int = 0, stacklevel: int = 3): """ difference of n between self, analogous to s-s.shift(n) @@ -1631,7 +1631,7 @@ def diff(arr, n: int, axis: int = 0, stacklevel=3): number of periods axis : {0, 1} axis to shift on - stacklevel : int + stacklevel : int, default 3 The stacklevel for the lost dtype warning. Returns @@ -1865,7 +1865,7 @@ def safe_sort( return ordered, ensure_platform_int(new_codes) -def _sort_mixed(values): +def _sort_mixed(values) -> np.ndarray: """ order ints before strings in 1d arrays, safe in py3 """ str_pos = np.array([isinstance(x, str) for x in values], dtype=bool) nums = np.sort(values[~str_pos]) diff --git a/pandas/core/array_algos/take.py b/pandas/core/array_algos/take.py index 8318a02d5d214..d062b39725867 100644 --- a/pandas/core/array_algos/take.py +++ b/pandas/core/array_algos/take.py @@ -76,7 +76,7 @@ def take_nd( Axis to take from fill_value : any, default np.nan Fill value to replace -1 values with - allow_fill : boolean, default True + allow_fill : bool, default True If False, indexer is assumed to contain no -1 values so no filling will be done. This short-circuits computation of a mask. Result is undefined if allow_fill == False and -1 is present in indexer. diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index 427b3106ea10c..08061eb1ec28c 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -157,8 +157,7 @@ def ndim(self) -> int: @cache_readonly def size(self) -> int: - # error: Incompatible return value type (got "number", expected "int") - return np.prod(self.shape) # type: ignore[return-value] + return self._ndarray.size @cache_readonly def nbytes(self) -> int: @@ -190,7 +189,7 @@ def equals(self, other) -> bool: return False return bool(array_equivalent(self._ndarray, other._ndarray)) - def _values_for_argsort(self): + def _values_for_argsort(self) -> np.ndarray: return self._ndarray # Signature of "argmin" incompatible with supertype "ExtensionArray" diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 08646c4d25a50..32c3095c3e6ee 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -606,7 +606,9 @@ def _from_inferred_categories( if true_values is None: true_values = ["True", "TRUE", "true"] - cats = cats.isin(true_values) + # error: Incompatible types in assignment (expression has type + # "ndarray", variable has type "Index") + cats = cats.isin(true_values) # type: ignore[assignment] if known_categories: # Recode from observation order to dtype.categories order. @@ -1444,7 +1446,7 @@ def memory_usage(self, deep: bool = False) -> int: """ return self._codes.nbytes + self.dtype.categories.memory_usage(deep=deep) - def isna(self): + def isna(self) -> np.ndarray: """ Detect missing values @@ -1452,7 +1454,7 @@ def isna(self): Returns ------- - a boolean array of whether my values are null + np.ndarray[bool] of whether my values are null See Also -------- @@ -1465,7 +1467,7 @@ def isna(self): isnull = isna - def notna(self): + def notna(self) -> np.ndarray: """ Inverse of isna @@ -1474,7 +1476,7 @@ def notna(self): Returns ------- - a boolean array of whether my values are not null + np.ndarray[bool] of whether my values are not null See Also -------- @@ -1731,7 +1733,7 @@ def view(self, dtype=None): raise NotImplementedError(dtype) return self._from_backing_data(self._ndarray) - def to_dense(self): + def to_dense(self) -> np.ndarray: """ Return my 'dense' representation @@ -1804,14 +1806,14 @@ def __contains__(self, key) -> bool: """ # if key is a NaN, check if any NaN is in self. if is_valid_na_for_dtype(key, self.categories.dtype): - return self.isna().any() + return bool(self.isna().any()) return contains(self, key, container=self._codes) # ------------------------------------------------------------------ # Rendering Methods - def _formatter(self, boxed=False): + def _formatter(self, boxed: bool = False): # Defer to CategoricalFormatter's formatter. return None @@ -1889,7 +1891,7 @@ def _repr_footer(self) -> str: info = self._repr_categories_info() return f"Length: {len(self)}\n{info}" - def _get_repr(self, length=True, na_rep="NaN", footer=True) -> str: + def _get_repr(self, length: bool = True, na_rep="NaN", footer: bool = True) -> str: from pandas.io.formats import format as fmt formatter = fmt.CategoricalFormatter( diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 67241a866ef35..7be06fe92c418 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -327,7 +327,7 @@ def _format_native_types(self, na_rep="NaT", date_format=None): """ raise AbstractMethodError(self) - def _formatter(self, boxed=False): + def _formatter(self, boxed: bool = False): # TODO: Remove Datetime & DatetimeTZ formatters. return "'{}'".format @@ -354,7 +354,7 @@ def __getitem__( result._freq = self._get_getitem_freq(key) return result - def _get_getitem_freq(self, key): + def _get_getitem_freq(self, key) -> Optional[BaseOffset]: """ Find the `freq` attribute to assign to the result of a __getitem__ lookup. """ @@ -406,7 +406,7 @@ def _maybe_clear_freq(self): # DatetimeArray and TimedeltaArray pass - def astype(self, dtype, copy=True): + def astype(self, dtype, copy: bool = True): # Some notes on cases we don't have to handle here in the base class: # 1. PeriodArray.astype handles period -> period # 2. DatetimeArray.astype handles conversion between tz. @@ -545,7 +545,7 @@ def _values_for_factorize(self): @classmethod def _from_factorized( - cls: Type[DatetimeLikeArrayT], values, original + cls: Type[DatetimeLikeArrayT], values, original: DatetimeLikeArrayT ) -> DatetimeLikeArrayT: return cls(values, dtype=original.dtype) @@ -939,7 +939,7 @@ def freq(self, value): self._freq = value @property - def freqstr(self): + def freqstr(self) -> Optional[str]: """ Return the frequency object as a string if its set, otherwise None. """ @@ -948,7 +948,7 @@ def freqstr(self): return self.freq.freqstr @property # NB: override with cache_readonly in immutable subclasses - def inferred_freq(self): + def inferred_freq(self) -> Optional[str]: """ Tries to return a string representing a frequency guess, generated by infer_freq. Returns None if it can't autodetect the @@ -963,8 +963,11 @@ def inferred_freq(self): @property # NB: override with cache_readonly in immutable subclasses def _resolution_obj(self) -> Optional[Resolution]: + freqstr = self.freqstr + if freqstr is None: + return None try: - return Resolution.get_reso_from_freq(self.freqstr) + return Resolution.get_reso_from_freq(freqstr) except KeyError: return None @@ -1241,7 +1244,7 @@ def _addsub_object_array(self, other: np.ndarray, op): ) return result - def _time_shift(self, periods, freq=None): + def _time_shift(self, periods: int, freq=None): """ Shift each value by `periods`. @@ -1440,7 +1443,7 @@ def __isub__(self, other): # -------------------------------------------------------------- # Reductions - def min(self, *, axis=None, skipna=True, **kwargs): + def min(self, *, axis: Optional[int] = None, skipna: bool = True, **kwargs): """ Return the minimum value of the Array or minimum along an axis. @@ -1469,7 +1472,7 @@ def min(self, *, axis=None, skipna=True, **kwargs): result = nanops.nanmin(self._ndarray, axis=axis, skipna=skipna) return self._wrap_reduction_result(axis, result) - def max(self, *, axis=None, skipna=True, **kwargs): + def max(self, *, axis: Optional[int] = None, skipna: bool = True, **kwargs): """ Return the maximum value of the Array or maximum along an axis. @@ -1500,7 +1503,7 @@ def max(self, *, axis=None, skipna=True, **kwargs): result = nanops.nanmax(self._ndarray, axis=axis, skipna=skipna) return self._wrap_reduction_result(axis, result) - def mean(self, *, skipna=True, axis: Optional[int] = 0): + def mean(self, *, skipna: bool = True, axis: Optional[int] = 0): """ Return the mean value of the Array. @@ -1568,7 +1571,7 @@ class DatelikeOps(DatetimeLikeArrayMixin): URL="https://docs.python.org/3/library/datetime.html" "#strftime-and-strptime-behavior" ) - def strftime(self, date_format): + def strftime(self, date_format: str) -> np.ndarray: """ Convert to Index using specified date_format. @@ -1727,6 +1730,7 @@ def _round(self, freq, mode, ambiguous, nonexistent): ) values = self.view("i8") + values = cast(np.ndarray, values) nanos = to_offset(freq).nanos result = round_nsint64(values, mode, nanos) result = self._maybe_mask_results(result, fill_value=iNaT) @@ -1759,7 +1763,7 @@ def all(self, *, axis: Optional[int] = None, skipna: bool = True): # -------------------------------------------------------------- # Frequency Methods - def _maybe_clear_freq(self): + def _maybe_clear_freq(self) -> None: self._freq = None def _with_freq(self, freq): diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index c0a8c20832fa8..956a93a142afe 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -84,6 +84,11 @@ if TYPE_CHECKING: from typing import Literal + from pandas.core.arrays import ( + PeriodArray, + TimedeltaArray, + ) + _midnight = time(0, 0) @@ -244,7 +249,7 @@ class DatetimeArray(dtl.TimelikeOps, dtl.DatelikeOps): _dtype: Union[np.dtype, DatetimeTZDtype] _freq = None - def __init__(self, values, dtype=DT64NS_DTYPE, freq=None, copy=False): + def __init__(self, values, dtype=DT64NS_DTYPE, freq=None, copy: bool = False): values = extract_array(values, extract_numpy=True) if isinstance(values, IntegerArray): values = values.to_numpy("int64", na_value=iNaT) @@ -319,7 +324,7 @@ def __init__(self, values, dtype=DT64NS_DTYPE, freq=None, copy=False): @classmethod def _simple_new( - cls, values, freq: Optional[BaseOffset] = None, dtype=DT64NS_DTYPE + cls, values: np.ndarray, freq: Optional[BaseOffset] = None, dtype=DT64NS_DTYPE ) -> DatetimeArray: assert isinstance(values, np.ndarray) assert values.dtype == DT64NS_DTYPE @@ -339,11 +344,11 @@ def _from_sequence_not_strict( cls, data, dtype=None, - copy=False, + copy: bool = False, tz=None, freq=lib.no_default, - dayfirst=False, - yearfirst=False, + dayfirst: bool = False, + yearfirst: bool = False, ambiguous="raise", ): explicit_none = freq is None @@ -492,7 +497,7 @@ def _unbox_scalar(self, value, setitem: bool = False) -> np.datetime64: self._check_compatible_with(value, setitem=setitem) return value.asm8 - def _scalar_from_string(self, value): + def _scalar_from_string(self, value) -> Union[Timestamp, NaTType]: return Timestamp(value, tz=self.tz) def _check_compatible_with(self, other, setitem: bool = False): @@ -536,7 +541,7 @@ def dtype(self) -> Union[np.dtype, DatetimeTZDtype]: # type: ignore[override] return self._dtype @property - def tz(self): + def tz(self) -> Optional[tzinfo]: """ Return timezone, if any. @@ -557,14 +562,14 @@ def tz(self, value): ) @property - def tzinfo(self): + def tzinfo(self) -> Optional[tzinfo]: """ Alias for tz attribute """ return self.tz @property # NB: override with cache_readonly in immutable subclasses - def is_normalized(self): + def is_normalized(self) -> bool: """ Returns True if all of the dates are at midnight ("no time") """ @@ -609,7 +614,7 @@ def __iter__(self): ) yield from converted - def astype(self, dtype, copy=True): + def astype(self, dtype, copy: bool = True): # We handle # --> datetime # --> period @@ -636,7 +641,9 @@ def astype(self, dtype, copy=True): # Rendering Methods @dtl.ravel_compat - def _format_native_types(self, na_rep="NaT", date_format=None, **kwargs): + def _format_native_types( + self, na_rep="NaT", date_format=None, **kwargs + ) -> np.ndarray: from pandas.io.formats.format import get_format_datetime64_from_values fmt = get_format_datetime64_from_values(self, date_format) @@ -660,7 +667,7 @@ def _has_same_tz(self, other) -> bool: other_tz = other.tzinfo return timezones.tz_compare(self.tzinfo, other_tz) - def _assert_tzawareness_compat(self, other): + def _assert_tzawareness_compat(self, other) -> None: # adapted from _Timestamp._assert_tzawareness_compat other_tz = getattr(other, "tzinfo", None) other_dtype = getattr(other, "dtype", None) @@ -708,7 +715,7 @@ def _sub_datetime_arraylike(self, other): np.putmask(new_values, arr_mask, iNaT) return new_values.view("timedelta64[ns]") - def _add_offset(self, offset): + def _add_offset(self, offset) -> DatetimeArray: if self.ndim == 2: return self.ravel()._add_offset(offset).reshape(self.shape) @@ -756,7 +763,7 @@ def _sub_datetimelike_scalar(self, other): # ----------------------------------------------------------------- # Timezone Conversion and Localization Methods - def _local_timestamps(self): + def _local_timestamps(self) -> np.ndarray: """ Convert to an i8 (unix-like nanosecond timestamp) representation while keeping the local timezone and not using UTC. @@ -767,7 +774,7 @@ def _local_timestamps(self): return self.asi8 return tzconversion.tz_convert_from_utc(self.asi8, self.tz) - def tz_convert(self, tz): + def tz_convert(self, tz) -> DatetimeArray: """ Convert tz-aware Datetime Array/Index from one time zone to another. @@ -844,7 +851,7 @@ def tz_convert(self, tz): return self._simple_new(self._ndarray, dtype=dtype, freq=self.freq) @dtl.ravel_compat - def tz_localize(self, tz, ambiguous="raise", nonexistent="raise"): + def tz_localize(self, tz, ambiguous="raise", nonexistent="raise") -> DatetimeArray: """ Localize tz-naive Datetime Array/Index to tz-aware Datetime Array/Index. @@ -1031,11 +1038,11 @@ def to_pydatetime(self) -> np.ndarray: Returns ------- - datetimes : ndarray + datetimes : ndarray[object] """ return ints_to_pydatetime(self.asi8, tz=self.tz) - def normalize(self): + def normalize(self) -> DatetimeArray: """ Convert times to midnight. @@ -1077,7 +1084,7 @@ def normalize(self): return type(self)(new_values)._with_freq("infer").tz_localize(self.tz) @dtl.ravel_compat - def to_period(self, freq=None): + def to_period(self, freq=None) -> PeriodArray: """ Cast to PeriodArray/Index at a particular frequency. @@ -1148,7 +1155,7 @@ def to_period(self, freq=None): return PeriodArray._from_datetime64(self._ndarray, freq, tz=self.tz) - def to_perioddelta(self, freq): + def to_perioddelta(self, freq) -> TimedeltaArray: """ Calculate TimedeltaArray of difference between index values and index converted to PeriodArray at specified diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 45656459792ba..a824e27e3e36a 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -205,38 +205,60 @@ def _values_for_factorize(self) -> Tuple[np.ndarray, int]: # ------------------------------------------------------------------------ # Reductions - def any(self, *, axis=None, out=None, keepdims=False, skipna=True): + def any( + self, + *, + axis: Optional[int] = None, + out=None, + keepdims: bool = False, + skipna: bool = True, + ): nv.validate_any((), {"out": out, "keepdims": keepdims}) result = nanops.nanany(self._ndarray, axis=axis, skipna=skipna) return self._wrap_reduction_result(axis, result) - def all(self, *, axis=None, out=None, keepdims=False, skipna=True): + def all( + self, + *, + axis: Optional[int] = None, + out=None, + keepdims: bool = False, + skipna: bool = True, + ): nv.validate_all((), {"out": out, "keepdims": keepdims}) result = nanops.nanall(self._ndarray, axis=axis, skipna=skipna) return self._wrap_reduction_result(axis, result) - def min(self, *, axis=None, skipna: bool = True, **kwargs) -> Scalar: + def min( + self, *, axis: Optional[int] = None, skipna: bool = True, **kwargs + ) -> Scalar: nv.validate_min((), kwargs) result = nanops.nanmin( values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna ) return self._wrap_reduction_result(axis, result) - def max(self, *, axis=None, skipna: bool = True, **kwargs) -> Scalar: + def max( + self, *, axis: Optional[int] = None, skipna: bool = True, **kwargs + ) -> Scalar: nv.validate_max((), kwargs) result = nanops.nanmax( values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna ) return self._wrap_reduction_result(axis, result) - def sum(self, *, axis=None, skipna=True, min_count=0, **kwargs) -> Scalar: + def sum( + self, *, axis: Optional[int] = None, skipna: bool = True, min_count=0, **kwargs + ) -> Scalar: nv.validate_sum((), kwargs) result = nanops.nansum( self._ndarray, axis=axis, skipna=skipna, min_count=min_count ) return self._wrap_reduction_result(axis, result) - def prod(self, *, axis=None, skipna=True, min_count=0, **kwargs) -> Scalar: + def prod( + self, *, axis: Optional[int] = None, skipna: bool = True, min_count=0, **kwargs + ) -> Scalar: nv.validate_prod((), kwargs) result = nanops.nanprod( self._ndarray, axis=axis, skipna=skipna, min_count=min_count @@ -246,18 +268,24 @@ def prod(self, *, axis=None, skipna=True, min_count=0, **kwargs) -> Scalar: def mean( self, *, - axis=None, + axis: Optional[int] = None, dtype: Optional[NpDtype] = None, out=None, - keepdims=False, - skipna=True, + keepdims: bool = False, + skipna: bool = True, ): nv.validate_mean((), {"dtype": dtype, "out": out, "keepdims": keepdims}) result = nanops.nanmean(self._ndarray, axis=axis, skipna=skipna) return self._wrap_reduction_result(axis, result) def median( - self, *, axis=None, out=None, overwrite_input=False, keepdims=False, skipna=True + self, + *, + axis: Optional[int] = None, + out=None, + overwrite_input: bool = False, + keepdims: bool = False, + skipna: bool = True, ): nv.validate_median( (), {"out": out, "overwrite_input": overwrite_input, "keepdims": keepdims} @@ -268,12 +296,12 @@ def median( def std( self, *, - axis=None, + axis: Optional[int] = None, dtype: Optional[NpDtype] = None, out=None, ddof=1, - keepdims=False, - skipna=True, + keepdims: bool = False, + skipna: bool = True, ): nv.validate_stat_ddof_func( (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="std" @@ -284,12 +312,12 @@ def std( def var( self, *, - axis=None, + axis: Optional[int] = None, dtype: Optional[NpDtype] = None, out=None, ddof=1, - keepdims=False, - skipna=True, + keepdims: bool = False, + skipna: bool = True, ): nv.validate_stat_ddof_func( (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="var" @@ -300,12 +328,12 @@ def var( def sem( self, *, - axis=None, + axis: Optional[int] = None, dtype: Optional[NpDtype] = None, out=None, ddof=1, - keepdims=False, - skipna=True, + keepdims: bool = False, + skipna: bool = True, ): nv.validate_stat_ddof_func( (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="sem" @@ -316,11 +344,11 @@ def sem( def kurt( self, *, - axis=None, + axis: Optional[int] = None, dtype: Optional[NpDtype] = None, out=None, - keepdims=False, - skipna=True, + keepdims: bool = False, + skipna: bool = True, ): nv.validate_stat_ddof_func( (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="kurt" @@ -331,11 +359,11 @@ def kurt( def skew( self, *, - axis=None, + axis: Optional[int] = None, dtype: Optional[NpDtype] = None, out=None, - keepdims=False, - skipna=True, + keepdims: bool = False, + skipna: bool = True, ): nv.validate_stat_ddof_func( (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="skew" @@ -368,7 +396,7 @@ def to_numpy( # type: ignore[override] # ------------------------------------------------------------------------ # Ops - def __invert__(self): + def __invert__(self) -> PandasArray: return type(self)(~self._ndarray) def _cmp_method(self, other, op): diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index d91522a9e1bb6..2355999933a7a 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -3,6 +3,7 @@ from datetime import timedelta import operator from typing import ( + TYPE_CHECKING, Any, Callable, List, @@ -76,6 +77,9 @@ from pandas.core.arrays import datetimelike as dtl import pandas.core.common as com +if TYPE_CHECKING: + from pandas.core.arrays import DatetimeArray + _shared_doc_kwargs = { "klass": "PeriodArray", } @@ -186,7 +190,9 @@ class PeriodArray(PeriodMixin, dtl.DatelikeOps): # -------------------------------------------------------------------- # Constructors - def __init__(self, values, dtype: Optional[Dtype] = None, freq=None, copy=False): + def __init__( + self, values, dtype: Optional[Dtype] = None, freq=None, copy: bool = False + ): freq = validate_dtype_freq(dtype, freq) if freq is not None: @@ -250,7 +256,7 @@ def _from_sequence( @classmethod def _from_sequence_of_strings( - cls, strings, *, dtype: Optional[Dtype] = None, copy=False + cls, strings, *, dtype: Optional[Dtype] = None, copy: bool = False ) -> PeriodArray: return cls._from_sequence(strings, dtype=dtype, copy=copy) @@ -448,7 +454,7 @@ def is_leap_year(self) -> np.ndarray: """ return isleapyear_arr(np.asarray(self.year)) - def to_timestamp(self, freq=None, how="start"): + def to_timestamp(self, freq=None, how: str = "start") -> DatetimeArray: """ Cast to DatetimeArray/Index. @@ -492,7 +498,7 @@ def to_timestamp(self, freq=None, how="start"): # -------------------------------------------------------------------- - def _time_shift(self, periods, freq=None): + def _time_shift(self, periods: int, freq=None) -> PeriodArray: """ Shift each value by `periods`. @@ -597,7 +603,9 @@ def _formatter(self, boxed: bool = False): return "'{}'".format @dtl.ravel_compat - def _format_native_types(self, na_rep="NaT", date_format=None, **kwargs): + def _format_native_types( + self, na_rep="NaT", date_format=None, **kwargs + ) -> np.ndarray: """ actually format my specific types """ diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 67cd6c63c1faa..0a2893ac49a49 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -405,12 +405,7 @@ def _cmp_method(self, other, op): _str_na_value = StringDtype.na_value def _str_map(self, f, na_value=None, dtype: Optional[Dtype] = None): - from pandas.arrays import ( - BooleanArray, - IntegerArray, - StringArray, - ) - from pandas.core.arrays.string_ import StringDtype + from pandas.arrays import BooleanArray if dtype is None: dtype = StringDtype() diff --git a/pandas/core/arrays/string_arrow.py b/pandas/core/arrays/string_arrow.py index 7251faee333bb..e1262d691128f 100644 --- a/pandas/core/arrays/string_arrow.py +++ b/pandas/core/arrays/string_arrow.py @@ -222,7 +222,7 @@ def _chk_pyarrow_available(cls) -> None: raise ImportError(msg) @classmethod - def _from_sequence(cls, scalars, dtype: Optional[Dtype] = None, copy=False): + def _from_sequence(cls, scalars, dtype: Optional[Dtype] = None, copy: bool = False): cls._chk_pyarrow_available() # convert non-na-likes to str, and nan-likes to ArrowStringDtype.na_value scalars = lib.ensure_string_array(scalars, copy=False) @@ -230,7 +230,7 @@ def _from_sequence(cls, scalars, dtype: Optional[Dtype] = None, copy=False): @classmethod def _from_sequence_of_strings( - cls, strings, dtype: Optional[Dtype] = None, copy=False + cls, strings, dtype: Optional[Dtype] = None, copy: bool = False ): return cls._from_sequence(strings, dtype=dtype, copy=copy) @@ -431,7 +431,7 @@ def fillna(self, value=None, method=None, limit=None): new_values = self.copy() return new_values - def _reduce(self, name, skipna=True, **kwargs): + def _reduce(self, name: str, skipna: bool = True, **kwargs): if name in ["min", "max"]: return getattr(self, name)(skipna=skipna) diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index f3889ff360aa8..59077bfceaa4a 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -2,6 +2,7 @@ from datetime import timedelta from typing import ( + TYPE_CHECKING, List, Optional, Tuple, @@ -35,7 +36,10 @@ ints_to_pytimedelta, parse_timedelta_unit, ) -from pandas._typing import NpDtype +from pandas._typing import ( + DtypeObj, + NpDtype, +) from pandas.compat.numpy import function as nv from pandas.core.dtypes.cast import astype_td64_unit_conversion @@ -70,6 +74,12 @@ from pandas.core.construction import extract_array from pandas.core.ops.common import unpack_zerodim_and_defer +if TYPE_CHECKING: + from pandas.core.arrays import ( + DatetimeArray, + PeriodArray, + ) + def _field_accessor(name: str, alias: str, docstring: str): def f(self) -> np.ndarray: @@ -171,7 +181,9 @@ def dtype(self) -> np.dtype: # type: ignore[override] _freq = None - def __init__(self, values, dtype=TD64NS_DTYPE, freq=lib.no_default, copy=False): + def __init__( + self, values, dtype=TD64NS_DTYPE, freq=lib.no_default, copy: bool = False + ): values = extract_array(values, extract_numpy=True) if isinstance(values, IntegerArray): values = values.to_numpy("int64", na_value=tslibs.iNaT) @@ -230,7 +242,7 @@ def __init__(self, values, dtype=TD64NS_DTYPE, freq=lib.no_default, copy=False): @classmethod def _simple_new( - cls, values, freq: Optional[BaseOffset] = None, dtype=TD64NS_DTYPE + cls, values: np.ndarray, freq: Optional[BaseOffset] = None, dtype=TD64NS_DTYPE ) -> TimedeltaArray: assert dtype == TD64NS_DTYPE, dtype assert isinstance(values, np.ndarray), type(values) @@ -331,10 +343,10 @@ def _unbox_scalar(self, value, setitem: bool = False) -> np.timedelta64: self._check_compatible_with(value, setitem=setitem) return np.timedelta64(value.value, "ns") - def _scalar_from_string(self, value): + def _scalar_from_string(self, value) -> Union[Timedelta, NaTType]: return Timedelta(value) - def _check_compatible_with(self, other, setitem: bool = False): + def _check_compatible_with(self, other, setitem: bool = False) -> None: # we don't have anything to validate. pass @@ -375,7 +387,7 @@ def __iter__(self): def sum( self, *, - axis=None, + axis: Optional[int] = None, dtype: Optional[NpDtype] = None, out=None, keepdims: bool = False, @@ -395,7 +407,7 @@ def sum( def std( self, *, - axis=None, + axis: Optional[int] = None, dtype: Optional[NpDtype] = None, out=None, ddof: int = 1, @@ -414,13 +426,15 @@ def std( # ---------------------------------------------------------------- # Rendering Methods - def _formatter(self, boxed=False): + def _formatter(self, boxed: bool = False): from pandas.io.formats.format import get_format_timedelta64 return get_format_timedelta64(self, box=True) @dtl.ravel_compat - def _format_native_types(self, na_rep="NaT", date_format=None, **kwargs): + def _format_native_types( + self, na_rep="NaT", date_format=None, **kwargs + ) -> np.ndarray: from pandas.io.formats.format import get_format_timedelta64 formatter = get_format_timedelta64(self._ndarray, na_rep) @@ -435,7 +449,7 @@ def _add_offset(self, other): f"cannot add the type {type(other).__name__} to a {type(self).__name__}" ) - def _add_period(self, other: Period): + def _add_period(self, other: Period) -> PeriodArray: """ Add a Period object. """ @@ -459,7 +473,7 @@ def _add_datetime_arraylike(self, other): # defer to implementation in DatetimeArray return other + self - def _add_datetimelike_scalar(self, other): + def _add_datetimelike_scalar(self, other) -> DatetimeArray: # adding a timedeltaindex to a datetimelike from pandas.core.arrays import DatetimeArray @@ -853,7 +867,7 @@ def to_pytimedelta(self) -> np.ndarray: Returns ------- - datetimes : ndarray + timedeltas : ndarray[object] """ return tslibs.ints_to_pytimedelta(self.asi8) @@ -919,7 +933,7 @@ def f(x): def sequence_to_td64ns( - data, copy=False, unit=None, errors="raise" + data, copy: bool = False, unit=None, errors="raise" ) -> Tuple[np.ndarray, Optional[Tick]]: """ Parameters @@ -1095,7 +1109,7 @@ def objects_to_td64ns(data, unit=None, errors="raise"): return result.view("timedelta64[ns]") -def _validate_td64_dtype(dtype): +def _validate_td64_dtype(dtype) -> DtypeObj: dtype = pandas_dtype(dtype) if is_dtype_equal(dtype, np.dtype("timedelta64")): # no precision disallowed GH#24806 diff --git a/pandas/core/base.py b/pandas/core/base.py index 18fc76fe79a5a..b0c2af89ad0c7 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1164,7 +1164,7 @@ def is_monotonic_decreasing(self) -> bool: return Index(self).is_monotonic_decreasing - def memory_usage(self, deep=False): + def _memory_usage(self, deep: bool = False) -> int: """ Memory usage of the values. diff --git a/pandas/core/common.py b/pandas/core/common.py index 6790a3e54192a..98606f5d3d240 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -336,7 +336,7 @@ def get_callable_name(obj): if isinstance(obj, partial): return get_callable_name(obj.func) # fall back to class name - if hasattr(obj, "__call__"): + if callable(obj): return type(obj).__name__ # everything failed (probably because the argument # wasn't actually callable); we return None diff --git a/pandas/core/computation/expressions.py b/pandas/core/computation/expressions.py index 8205534c9d48b..957a493925405 100644 --- a/pandas/core/computation/expressions.py +++ b/pandas/core/computation/expressions.py @@ -104,11 +104,20 @@ def _evaluate_numexpr(op, op_str, a, b): a_value = a b_value = b - result = ne.evaluate( - f"a_value {op_str} b_value", - local_dict={"a_value": a_value, "b_value": b_value}, - casting="safe", - ) + try: + result = ne.evaluate( + f"a_value {op_str} b_value", + local_dict={"a_value": a_value, "b_value": b_value}, + casting="safe", + ) + except TypeError: + # numexpr raises eg for array ** array with integers + # (https://github.com/pydata/numexpr/issues/379) + pass + + if is_reversed: + # reverse order to original for fallback + a, b = b, a if _TEST_MODE: _store_test_result(result is not None) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 7a2175a364a8a..edc43bc68b2a8 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -113,6 +113,9 @@ from pandas.core.arrays import ( DatetimeArray, ExtensionArray, + IntervalArray, + PeriodArray, + TimedeltaArray, ) _int8_max = np.iinfo(np.int8).max @@ -508,7 +511,7 @@ def maybe_upcast_putmask(result: np.ndarray, mask: np.ndarray) -> np.ndarray: result : ndarray The destination array. This will be mutated in-place if no upcasting is necessary. - mask : boolean ndarray + mask : np.ndarray[bool] Returns ------- @@ -1733,6 +1736,8 @@ def maybe_cast_to_datetime( except ValueError: # TODO(GH#40048): only catch dateutil's ParserError # once we can reliably import it in all supported versions + if is_timedelta64: + raise pass # coerce datetimelike to object @@ -2167,24 +2172,51 @@ def validate_numeric_casting(dtype: np.dtype, value: Scalar) -> None: raise ValueError(f"Cannot assign {type(value).__name__} to bool series") -def can_hold_element(dtype: np.dtype, element: Any) -> bool: +def can_hold_element(arr: ArrayLike, element: Any) -> bool: """ Can we do an inplace setitem with this element in an array with this dtype? Parameters ---------- - dtype : np.dtype + arr : np.ndarray or ExtensionArray element : Any Returns ------- bool """ + dtype = arr.dtype + if not isinstance(dtype, np.dtype) or dtype.kind in ["m", "M"]: + if isinstance(dtype, (PeriodDtype, IntervalDtype, DatetimeTZDtype, np.dtype)): + # np.dtype here catches datetime64ns and timedelta64ns; we assume + # in this case that we have DatetimeArray/TimedeltaArray + arr = cast( + "PeriodArray | DatetimeArray | TimedeltaArray | IntervalArray", arr + ) + try: + arr._validate_setitem_value(element) + return True + except (ValueError, TypeError): + return False + + # This is technically incorrect, but maintains the behavior of + # ExtensionBlock._can_hold_element + return True + tipo = maybe_infer_dtype_type(element) if dtype.kind in ["i", "u"]: if tipo is not None: - return tipo.kind in ["i", "u"] and dtype.itemsize >= tipo.itemsize + if tipo.kind not in ["i", "u"]: + # Anything other than integer we cannot hold + return False + elif dtype.itemsize < tipo.itemsize: + return False + elif not isinstance(tipo, np.dtype): + # i.e. nullable IntegerDtype; we can put this into an ndarray + # losslessly iff it has no NAs + return not element._mask.any() + return True # We have not inferred an integer from the dtype # check if we have a builtin int or a float equal to an int @@ -2192,7 +2224,16 @@ def can_hold_element(dtype: np.dtype, element: Any) -> bool: elif dtype.kind == "f": if tipo is not None: - return tipo.kind in ["f", "i", "u"] + # TODO: itemsize check? + if tipo.kind not in ["f", "i", "u"]: + # Anything other than float/integer we cannot hold + return False + elif not isinstance(tipo, np.dtype): + # i.e. nullable IntegerDtype or FloatingDtype; + # we can put this into an ndarray losslessly iff it has no NAs + return not element._mask.any() + return True + return lib.is_integer(element) or lib.is_float(element) elif dtype.kind == "c": @@ -2210,4 +2251,11 @@ def can_hold_element(dtype: np.dtype, element: Any) -> bool: elif dtype == object: return True + elif dtype.kind == "S": + # TODO: test tests.frame.methods.test_replace tests get here, + # need more targeted tests. xref phofl has a PR about this + if tipo is not None: + return tipo.kind == "S" and tipo.itemsize <= dtype.itemsize + return isinstance(element, bytes) and len(element) <= dtype.itemsize + raise NotImplementedError(dtype) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index b9e785ff2f887..3d8d189046d8a 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -1576,7 +1576,7 @@ def _is_dtype(arr_or_dtype, condition) -> bool: return False try: dtype = get_dtype(arr_or_dtype) - except (TypeError, ValueError, UnicodeEncodeError): + except (TypeError, ValueError): return False return condition(dtype) @@ -1651,7 +1651,7 @@ def _is_dtype_type(arr_or_dtype, condition) -> bool: try: tipo = pandas_dtype(arr_or_dtype).type - except (TypeError, ValueError, UnicodeEncodeError): + except (TypeError, ValueError): if is_scalar(arr_or_dtype): return condition(type(None)) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 510bdfcb0079f..484b01f2c04f0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -231,7 +231,7 @@ If 0 or 'index': apply function to each column. If 1 or 'columns': apply function to each row.""", "inplace": """ - inplace : boolean, default False + inplace : bool, default False If True, performs operation inplace and returns None.""", "optional_by": """ by : str or list of str @@ -251,7 +251,7 @@ you to specify a location to update with some value.""", } -_numeric_only_doc = """numeric_only : boolean, default None +_numeric_only_doc = """numeric_only : bool or None, default None Include only float, int, boolean data. If None, will attempt to use everything, then use only numeric data """ @@ -472,12 +472,17 @@ class DataFrame(NDFrame, OpsMixin): Index to use for resulting frame. Will default to RangeIndex if no indexing information part of input data and no index provided. columns : Index or array-like - Column labels to use for resulting frame. Will default to - RangeIndex (0, 1, 2, ..., n) if no column labels are provided. + Column labels to use for resulting frame when data does not have them, + defaulting to RangeIndex(0, 1, 2, ..., n). If data contains column labels, + will perform column selection instead. dtype : dtype, default None Data type to force. Only a single dtype is allowed. If None, infer. - copy : bool, default False - Copy data from inputs. Only affects DataFrame / 2d ndarray input. + copy : bool or None, default None + Copy data from inputs. + For dict data, the default of None behaves like ``copy=True``. For DataFrame + or 2d ndarray input, the default of None behaves like ``copy=False``. + + .. versionchanged:: 1.3.0 See Also -------- @@ -523,6 +528,18 @@ class DataFrame(NDFrame, OpsMixin): 1 4 5 6 2 7 8 9 + Constructing DataFrame from a numpy ndarray that has labeled columns: + + >>> data = np.array([(1, 2, 3), (4, 5, 6), (7, 8, 9)], + ... dtype=[("a", "i4"), ("b", "i4"), ("c", "i4")]) + >>> df3 = pd.DataFrame(data, columns=['c', 'a']) + ... + >>> df3 + c a + 0 3 1 + 1 6 4 + 2 9 7 + Constructing DataFrame from dataclass: >>> from dataclasses import make_dataclass @@ -555,8 +572,16 @@ def __init__( index: Optional[Axes] = None, columns: Optional[Axes] = None, dtype: Optional[Dtype] = None, - copy: bool = False, + copy: Optional[bool] = None, ): + + if copy is None: + if isinstance(data, dict) or data is None: + # retain pre-GH#38939 default behavior + copy = True + else: + copy = False + if data is None: data = {} if dtype is not None: @@ -565,18 +590,13 @@ def __init__( if isinstance(data, DataFrame): data = data._mgr - # first check if a Manager is passed without any other arguments - # -> use fastpath (without checking Manager type) - if ( - index is None - and columns is None - and dtype is None - and copy is False - and isinstance(data, (BlockManager, ArrayManager)) - ): - # GH#33357 fastpath - NDFrame.__init__(self, data) - return + if isinstance(data, (BlockManager, ArrayManager)): + # first check if a Manager is passed without any other arguments + # -> use fastpath (without checking Manager type) + if index is None and columns is None and dtype is None and not copy: + # GH#33357 fastpath + NDFrame.__init__(self, data) + return manager = get_option("mode.data_manager") @@ -586,7 +606,8 @@ def __init__( ) elif isinstance(data, dict): - mgr = dict_to_mgr(data, index, columns, dtype=dtype, typ=manager) + # GH#38939 de facto copy defaults to False only in non-dict cases + mgr = dict_to_mgr(data, index, columns, dtype=dtype, copy=copy, typ=manager) elif isinstance(data, ma.MaskedArray): import numpy.ma.mrecords as mrecords @@ -8170,7 +8191,7 @@ def _gotitem( Parameters ---------- key : string / list of selections - ndim : 1,2 + ndim : {1, 2} requested ndim of result subset : object, default None subset to act on diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e47fa0eb45d94..6b4e3c7caef50 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -181,7 +181,7 @@ "axes_single_arg": "int or labels for object", "args_transpose": "axes to permute (int or label for object)", "inplace": """ - inplace : boolean, default False + inplace : bool, default False If True, performs operation inplace and returns None.""", "optional_by": """ by : str or list of str @@ -1656,9 +1656,9 @@ def _is_label_reference(self, key, axis=0) -> bool_t: Parameters ---------- - key: str + key : str Potential label name - axis: int, default 0 + axis : int, default 0 Axis perpendicular to the axis that labels are associated with (0 means search for column labels, 1 means search for index labels) @@ -1687,14 +1687,14 @@ def _is_label_or_level_reference(self, key: str, axis: int = 0) -> bool_t: Parameters ---------- - key: str + key : str Potential label or level name - axis: int, default 0 + axis : int, default 0 Axis that levels are associated with (0 for index, 1 for columns) Returns ------- - is_label_or_level: bool + bool """ return self._is_level_reference(key, axis=axis) or self._is_label_reference( key, axis=axis @@ -1710,9 +1710,9 @@ def _check_label_or_level_ambiguity(self, key, axis: int = 0) -> None: Parameters ---------- - key: str or object + key : str or object Label or level name. - axis: int, default 0 + axis : int, default 0 Axis that levels are associated with (0 for index, 1 for columns). Raises @@ -1760,14 +1760,14 @@ def _get_label_or_level_values(self, key: str, axis: int = 0) -> np.ndarray: Parameters ---------- - key: str + key : str Label or level name. - axis: int, default 0 + axis : int, default 0 Axis that levels are associated with (0 for index, 1 for columns) Returns ------- - values: np.ndarray + values : np.ndarray Raises ------ diff --git a/pandas/core/groupby/categorical.py b/pandas/core/groupby/categorical.py index c9dd420ec33df..6de8c1d789097 100644 --- a/pandas/core/groupby/categorical.py +++ b/pandas/core/groupby/categorical.py @@ -34,14 +34,14 @@ def recode_for_groupby( Parameters ---------- c : Categorical - sort : boolean + sort : bool The value of the sort parameter groupby was called with. - observed : boolean + observed : bool Account only for the observed values Returns ------- - New Categorical + Categorical If sort=False, the new categories are set to the order of appearance in codes (unless ordered=True, in which case the original order is preserved), followed by any unrepresented @@ -93,7 +93,7 @@ def recode_from_groupby( Parameters ---------- c : Categorical - sort : boolean + sort : bool The value of the sort parameter groupby was called with. ci : CategoricalIndex The codes / categories to recode diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 1ee38834c5758..a6c3cb3ff5d0b 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1602,7 +1602,7 @@ def std(self, ddof: int = 1): Standard deviation of values within each group. """ return self._get_cythonized_result( - "group_var_float64", + "group_var", aggregate=True, needs_counts=True, needs_values=True, @@ -1807,7 +1807,9 @@ def describe(self, **kwargs): result = self.apply(lambda x: x.describe(**kwargs)) if self.axis == 1: return result.T - return result.unstack() + # FIXME: not being consolidated breaks + # test_describe_with_duplicate_output_column_names + return result._consolidate().unstack() @final def resample(self, rule, *args, **kwargs): diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index 51f7b44f6d69d..d9cda1b6cbe3f 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -293,7 +293,7 @@ def _get_grouper(self, obj, validate: bool = True): Parameters ---------- obj : the subject object - validate : boolean, default True + validate : bool, default True if True, validate the grouper Returns @@ -789,7 +789,7 @@ def is_in_obj(gpr) -> bool: # lambda here return False - for i, (gpr, level) in enumerate(zip(keys, levels)): + for gpr, level in zip(keys, levels): if is_in_obj(gpr): # df.groupby(df['name']) in_axis, name = True, gpr.name diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index 5442f90a25580..4b68717763d87 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -97,63 +97,172 @@ get_indexer_dict, ) -_CYTHON_FUNCTIONS = { - "aggregate": { - "add": "group_add", - "prod": "group_prod", - "min": "group_min", - "max": "group_max", - "mean": "group_mean", - "median": "group_median", - "var": "group_var", - "first": "group_nth", - "last": "group_last", - "ohlc": "group_ohlc", - }, - "transform": { - "cumprod": "group_cumprod", - "cumsum": "group_cumsum", - "cummin": "group_cummin", - "cummax": "group_cummax", - "rank": "group_rank", - }, -} - - -@functools.lru_cache(maxsize=None) -def _get_cython_function(kind: str, how: str, dtype: np.dtype, is_numeric: bool): - - dtype_str = dtype.name - ftype = _CYTHON_FUNCTIONS[kind][how] - - # see if there is a fused-type version of function - # only valid for numeric - f = getattr(libgroupby, ftype, None) - if f is not None and is_numeric: - return f - - # otherwise find dtype-specific version, falling back to object - for dt in [dtype_str, "object"]: - f2 = getattr(libgroupby, f"{ftype}_{dt}", None) - if f2 is not None: - return f2 - - if hasattr(f, "__signatures__"): - # inspect what fused types are implemented - if dtype_str == "object" and "object" not in f.__signatures__: - # disallow this function so we get a NotImplementedError below - # instead of a TypeError at runtime - f = None - - func = f - - if func is None: + +class WrappedCythonOp: + """ + Dispatch logic for functions defined in _libs.groupby + """ + + def __init__(self, kind: str, how: str): + self.kind = kind + self.how = how + + _CYTHON_FUNCTIONS = { + "aggregate": { + "add": "group_add", + "prod": "group_prod", + "min": "group_min", + "max": "group_max", + "mean": "group_mean", + "median": "group_median", + "var": "group_var", + "first": "group_nth", + "last": "group_last", + "ohlc": "group_ohlc", + }, + "transform": { + "cumprod": "group_cumprod", + "cumsum": "group_cumsum", + "cummin": "group_cummin", + "cummax": "group_cummax", + "rank": "group_rank", + }, + } + + _cython_arity = {"ohlc": 4} # OHLC + + # Note: we make this a classmethod and pass kind+how so that caching + # works at the class level and not the instance level + @classmethod + @functools.lru_cache(maxsize=None) + def _get_cython_function( + cls, kind: str, how: str, dtype: np.dtype, is_numeric: bool + ): + + dtype_str = dtype.name + ftype = cls._CYTHON_FUNCTIONS[kind][how] + + # see if there is a fused-type version of function + # only valid for numeric + f = getattr(libgroupby, ftype, None) + if f is not None: + if is_numeric: + return f + elif dtype == object: + if "object" not in f.__signatures__: + # raise NotImplementedError here rather than TypeError later + raise NotImplementedError( + f"function is not implemented for this dtype: " + f"[how->{how},dtype->{dtype_str}]" + ) + return f + raise NotImplementedError( f"function is not implemented for this dtype: " f"[how->{how},dtype->{dtype_str}]" ) - return func + def get_cython_func_and_vals(self, values: np.ndarray, is_numeric: bool): + """ + Find the appropriate cython function, casting if necessary. + + Parameters + ---------- + values : np.ndarray + is_numeric : bool + + Returns + ------- + func : callable + values : np.ndarray + """ + how = self.how + kind = self.kind + + if how in ["median", "cumprod"]: + # these two only have float64 implementations + if is_numeric: + values = ensure_float64(values) + else: + raise NotImplementedError( + f"function is not implemented for this dtype: " + f"[how->{how},dtype->{values.dtype.name}]" + ) + func = getattr(libgroupby, f"group_{how}_float64") + return func, values + + func = self._get_cython_function(kind, how, values.dtype, is_numeric) + + if values.dtype.kind in ["i", "u"]: + if how in ["add", "var", "prod", "mean", "ohlc"]: + # result may still include NaN, so we have to cast + values = ensure_float64(values) + + return func, values + + def disallow_invalid_ops(self, dtype: DtypeObj, is_numeric: bool = False): + """ + Check if we can do this operation with our cython functions. + + Raises + ------ + NotImplementedError + This is either not a valid function for this dtype, or + valid but not implemented in cython. + """ + how = self.how + + if is_numeric: + # never an invalid op for those dtypes, so return early as fastpath + return + + if is_categorical_dtype(dtype) or is_sparse(dtype): + # categoricals are only 1d, so we + # are not setup for dim transforming + raise NotImplementedError(f"{dtype} dtype not supported") + elif is_datetime64_any_dtype(dtype): + # we raise NotImplemented if this is an invalid operation + # entirely, e.g. adding datetimes + if how in ["add", "prod", "cumsum", "cumprod"]: + raise NotImplementedError( + f"datetime64 type does not support {how} operations" + ) + elif is_timedelta64_dtype(dtype): + if how in ["prod", "cumprod"]: + raise NotImplementedError( + f"timedelta64 type does not support {how} operations" + ) + + def get_output_shape(self, ngroups: int, values: np.ndarray) -> Shape: + how = self.how + kind = self.kind + + arity = self._cython_arity.get(how, 1) + + out_shape: Shape + if how == "ohlc": + out_shape = (ngroups, 4) + elif arity > 1: + raise NotImplementedError( + "arity of more than 1 is not supported for the 'how' argument" + ) + elif kind == "transform": + out_shape = values.shape + else: + out_shape = (ngroups,) + values.shape[1:] + return out_shape + + def get_out_dtype(self, dtype: np.dtype) -> np.dtype: + how = self.how + + if how == "rank": + out_dtype = "float64" + else: + if is_numeric_dtype(dtype): + out_dtype = f"{dtype.kind}{dtype.itemsize}" + else: + out_dtype = "object" + return np.dtype(out_dtype) class BaseGrouper: @@ -446,8 +555,6 @@ def get_group_levels(self) -> List[Index]: # ------------------------------------------------------------ # Aggregation functions - _cython_arity = {"ohlc": 4} # OHLC - @final def _is_builtin_func(self, arg): """ @@ -456,81 +563,6 @@ def _is_builtin_func(self, arg): """ return SelectionMixin._builtin_table.get(arg, arg) - @final - def _get_cython_func_and_vals( - self, kind: str, how: str, values: np.ndarray, is_numeric: bool - ): - """ - Find the appropriate cython function, casting if necessary. - - Parameters - ---------- - kind : str - how : str - values : np.ndarray - is_numeric : bool - - Returns - ------- - func : callable - values : np.ndarray - """ - try: - func = _get_cython_function(kind, how, values.dtype, is_numeric) - except NotImplementedError: - if is_numeric: - try: - values = ensure_float64(values) - except TypeError: - if lib.infer_dtype(values, skipna=False) == "complex": - values = values.astype(complex) - else: - raise - func = _get_cython_function(kind, how, values.dtype, is_numeric) - else: - raise - else: - if values.dtype.kind in ["i", "u"]: - if how in ["ohlc"]: - # The output may still include nans, so we have to cast - values = ensure_float64(values) - - return func, values - - @final - def _disallow_invalid_ops( - self, dtype: DtypeObj, how: str, is_numeric: bool = False - ): - """ - Check if we can do this operation with our cython functions. - - Raises - ------ - NotImplementedError - This is either not a valid function for this dtype, or - valid but not implemented in cython. - """ - if is_numeric: - # never an invalid op for those dtypes, so return early as fastpath - return - - if is_categorical_dtype(dtype) or is_sparse(dtype): - # categoricals are only 1d, so we - # are not setup for dim transforming - raise NotImplementedError(f"{dtype} dtype not supported") - elif is_datetime64_any_dtype(dtype): - # we raise NotImplemented if this is an invalid operation - # entirely, e.g. adding datetimes - if how in ["add", "prod", "cumsum", "cumprod"]: - raise NotImplementedError( - f"datetime64 type does not support {how} operations" - ) - elif is_timedelta64_dtype(dtype): - if how in ["prod", "cumprod"]: - raise NotImplementedError( - f"timedelta64 type does not support {how} operations" - ) - @final def _ea_wrap_cython_operation( self, kind: str, values, how: str, axis: int, min_count: int = -1, **kwargs @@ -603,9 +635,11 @@ def _cython_operation( dtype = values.dtype is_numeric = is_numeric_dtype(dtype) + cy_op = WrappedCythonOp(kind=kind, how=how) + # can we do this operation with our cython functions # if not raise NotImplementedError - self._disallow_invalid_ops(dtype, how, is_numeric) + cy_op.disallow_invalid_ops(dtype, is_numeric) if is_extension_array_dtype(dtype): return self._ea_wrap_cython_operation( @@ -643,48 +677,27 @@ def _cython_operation( values = ensure_float64(values) else: values = ensure_int_or_float(values) - elif is_numeric and not is_complex_dtype(dtype): - values = ensure_float64(values) - else: - values = values.astype(object) + elif is_numeric: + if not is_complex_dtype(dtype): + values = ensure_float64(values) - arity = self._cython_arity.get(how, 1) ngroups = self.ngroups + comp_ids, _, _ = self.group_info assert axis == 1 values = values.T - if how == "ohlc": - out_shape = (ngroups, 4) - elif arity > 1: - raise NotImplementedError( - "arity of more than 1 is not supported for the 'how' argument" - ) - elif kind == "transform": - out_shape = values.shape - else: - out_shape = (ngroups,) + values.shape[1:] - func, values = self._get_cython_func_and_vals(kind, how, values, is_numeric) - - if how == "rank": - out_dtype = "float" - else: - if is_numeric: - out_dtype = f"{values.dtype.kind}{values.dtype.itemsize}" - else: - out_dtype = "object" - - codes, _, _ = self.group_info + out_shape = cy_op.get_output_shape(ngroups, values) + func, values = cy_op.get_cython_func_and_vals(values, is_numeric) + out_dtype = cy_op.get_out_dtype(values.dtype) result = maybe_fill(np.empty(out_shape, dtype=out_dtype)) if kind == "aggregate": - counts = np.zeros(self.ngroups, dtype=np.int64) - result = self._aggregate(result, counts, values, codes, func, min_count) + counts = np.zeros(ngroups, dtype=np.int64) + func(result, counts, values, comp_ids, min_count) elif kind == "transform": # TODO: min_count - result = self._transform( - result, values, codes, func, is_datetimelike, **kwargs - ) + func(result, values, comp_ids, ngroups, is_datetimelike, **kwargs) if is_integer_dtype(result.dtype) and not is_datetimelike: mask = result == iNaT @@ -708,28 +721,6 @@ def _cython_operation( return op_result - @final - def _aggregate( - self, result, counts, values, comp_ids, agg_func, min_count: int = -1 - ): - if agg_func is libgroupby.group_nth: - # different signature from the others - agg_func(result, counts, values, comp_ids, min_count, rank=1) - else: - agg_func(result, counts, values, comp_ids, min_count) - - return result - - @final - def _transform( - self, result, values, comp_ids, transform_func, is_datetimelike: bool, **kwargs - ): - - _, _, ngroups = self.group_info - transform_func(result, values, comp_ids, ngroups, is_datetimelike, **kwargs) - - return result - def agg_series(self, obj: Series, func: F): # Caller is responsible for checking ngroups != 0 assert self.ngroups != 0 @@ -817,9 +808,9 @@ class BinGrouper(BaseGrouper): ---------- bins : the split index of binlabels to group the item of axis binlabels : the label list - filter_empty : boolean, default False - mutated : boolean, default False - indexer : a intp array + filter_empty : bool, default False + mutated : bool, default False + indexer : np.ndarray[np.intp] Examples -------- diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index fc4eeebc86642..5163c55036fd0 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -12,7 +12,6 @@ FrozenSet, Hashable, List, - NewType, Optional, Sequence, Set, @@ -195,9 +194,6 @@ _o_dtype = np.dtype("object") -_Identity = NewType("_Identity", object) - - def disallow_kwargs(kwargs: Dict[str, Any]): if kwargs: raise TypeError(f"Unexpected keyword arguments {repr(set(kwargs))}") @@ -320,7 +316,7 @@ def _outer_indexer( # would we like our indexing holder to defer to us _defer_to_indexing = False - _engine_type = libindex.ObjectEngine + _engine_type: Type[libindex.IndexEngine] = libindex.ObjectEngine # whether we support partial string indexing. Overridden # in DatetimeIndex and PeriodIndex _supports_partial_string_indexing = False @@ -723,8 +719,8 @@ def _cleanup(self) -> None: self._engine.clear_mapping() @cache_readonly - def _engine(self) -> libindex.ObjectEngine: - # property, for now, slow to look up + def _engine(self) -> libindex.IndexEngine: + # For base class (object dtype) we get ObjectEngine # to avoid a reference cycle, bind `target_values` to a local variable, so # `self` is not passed into the lambda. @@ -2376,7 +2372,7 @@ def __reduce__(self): """The expected NA value to use with this index.""" @cache_readonly - def _isnan(self): + def _isnan(self) -> np.ndarray: """ Return if each value is NaN. """ @@ -2412,7 +2408,7 @@ def isna(self) -> np.ndarray: Returns ------- - numpy.ndarray + numpy.ndarray[bool] A boolean array of whether my values are NA. See Also @@ -2470,7 +2466,7 @@ def notna(self) -> np.ndarray: Returns ------- - numpy.ndarray + numpy.ndarray[bool] Boolean array to indicate which entries are not NA. See Also @@ -4405,9 +4401,9 @@ def _get_engine_target(self) -> np.ndarray: # ndarray]", expected "ndarray") return self._values # type: ignore[return-value] - @doc(IndexOpsMixin.memory_usage) + @doc(IndexOpsMixin._memory_usage) def memory_usage(self, deep: bool = False) -> int: - result = super().memory_usage(deep=deep) + result = self._memory_usage(deep=deep) # include our engine hashtable result += self._engine.sizeof(deep=deep) @@ -4482,7 +4478,7 @@ def _validate_fill_value(self, value): TypeError If the value cannot be inserted into an array of this dtype. """ - if not can_hold_element(self.dtype, value): + if not can_hold_element(self._values, value): raise TypeError return value @@ -5238,7 +5234,7 @@ def get_indexer_non_unique(self, target): return ensure_platform_int(indexer), missing @final - def get_indexer_for(self, target, **kwargs): + def get_indexer_for(self, target, **kwargs) -> np.ndarray: """ Guaranteed return of an indexer even when non-unique. @@ -5306,7 +5302,7 @@ def _index_as_unique(self) -> bool: _requires_unique_msg = "Reindexing only valid with uniquely valued Index objects" @final - def _maybe_promote(self, other: Index): + def _maybe_promote(self, other: Index) -> Tuple[Index, Index]: """ When dealing with an object-dtype Index and a non-object Index, see if we can upcast the object-dtype one to improve performance. @@ -5481,7 +5477,7 @@ def _transform_index(self, func, level=None) -> Index: items = [func(x) for x in self] return Index(items, name=self.name, tupleize_cols=False) - def isin(self, values, level=None): + def isin(self, values, level=None) -> np.ndarray: """ Return a boolean array where the index values are in `values`. @@ -5499,7 +5495,7 @@ def isin(self, values, level=None): Returns ------- - is_contained : ndarray + is_contained : ndarray[bool] NumPy array of boolean values. See Also @@ -5849,7 +5845,7 @@ def slice_locs(self, start=None, end=None, step=None, kind=None): return start_slice, end_slice - def delete(self, loc): + def delete(self, loc) -> Index: """ Make new Index with passed location(-s) deleted. @@ -5881,7 +5877,7 @@ def delete(self, loc): res_values = np.delete(self._data, loc) return type(self)._simple_new(res_values, name=self.name) - def insert(self, loc: int, item): + def insert(self, loc: int, item) -> Index: """ Make new Index inserting new item at location. @@ -5916,7 +5912,7 @@ def insert(self, loc: int, item): idx = np.concatenate((arr[:loc], item, arr[loc:])) return Index(idx, name=self.name) - def drop(self: _IndexT, labels, errors: str_t = "raise") -> _IndexT: + def drop(self, labels, errors: str_t = "raise") -> Index: """ Make new Index with passed list of labels deleted. @@ -5929,6 +5925,7 @@ def drop(self: _IndexT, labels, errors: str_t = "raise") -> _IndexT: Returns ------- dropped : Index + Will be same type as self, except for RangeIndex. Raises ------ diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index e194148f0fc24..bac00b2399121 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -7,6 +7,7 @@ Any, List, Optional, + Sequence, Tuple, TypeVar, Union, @@ -536,7 +537,7 @@ def shift(self: _T, periods: int = 1, freq=None) -> _T: # -------------------------------------------------------------------- # List-like Methods - def _get_delete_freq(self, loc: int): + def _get_delete_freq(self, loc: Union[int, slice, Sequence[int]]): """ Find the `freq` for self.delete(loc). """ diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index fedb955ce83b9..9751e12c373cd 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -557,7 +557,7 @@ def from_tuples( arrays = [[]] * len(names) elif isinstance(tuples, (np.ndarray, Index)): if isinstance(tuples, Index): - tuples = tuples._values + tuples = np.asarray(tuples._values) arrays = list(lib.tuples_to_object_array(tuples).T) elif isinstance(tuples, list): @@ -2689,11 +2689,16 @@ def _get_indexer( target, method=method, limit=limit, tolerance=tolerance ) + # TODO: explicitly raise here? we only have one test that + # gets here, and it is checking that we raise with method="nearest" + if method == "pad" or method == "backfill": if tolerance is not None: raise NotImplementedError( "tolerance not implemented yet for MultiIndex" ) + # TODO: get_indexer_with_fill docstring says values must be _sorted_ + # but that doesn't appear to be enforced indexer = self._engine.get_indexer_with_fill( target=target._values, values=self._values, method=method, limit=limit ) @@ -2705,6 +2710,8 @@ def _get_indexer( else: indexer = self._engine.get_indexer(target._values) + # Note: we only get here (in extant tests at least) with + # target.nlevels == self.nlevels return ensure_platform_int(indexer) def get_slice_bound( diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index f37faa4ab844b..7f2361eeb4d05 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -46,6 +46,7 @@ from pandas.core.indexes.numeric import ( Float64Index, Int64Index, + NumericIndex, ) from pandas.core.ops.common import unpack_zerodim_and_defer @@ -55,7 +56,7 @@ _empty_range = range(0) -class RangeIndex(Int64Index): +class RangeIndex(NumericIndex): """ Immutable Index implementing a monotonic integer range. @@ -97,6 +98,7 @@ class RangeIndex(Int64Index): _typ = "rangeindex" _engine_type = libindex.Int64Engine + _can_hold_na = False _range: range # -------------------------------------------------------------------- @@ -381,6 +383,10 @@ def __contains__(self, key: Any) -> bool: return False return key in self._range + @property + def inferred_type(self) -> str: + return "integer" + # -------------------------------------------------------------------- # Indexing Methods @@ -402,7 +408,7 @@ def _get_indexer( method: Optional[str] = None, limit: Optional[int] = None, tolerance=None, - ): + ) -> np.ndarray: if com.any_not_none(method, tolerance, limit): return super()._get_indexer( target, method=method, tolerance=tolerance, limit=limit @@ -436,10 +442,11 @@ def repeat(self, repeats, axis=None) -> Int64Index: return self._int64index.repeat(repeats, axis=axis) def delete(self, loc) -> Int64Index: - return self._int64index.delete(loc) + # error: Incompatible return value type (got "Index", expected "Int64Index") + return self._int64index.delete(loc) # type: ignore[return-value] def take( - self, indices, axis=0, allow_fill=True, fill_value=None, **kwargs + self, indices, axis: int = 0, allow_fill: bool = True, fill_value=None, **kwargs ) -> Int64Index: with rewrite_exception("Int64Index", type(self).__name__): return self._int64index.take( @@ -471,7 +478,13 @@ def _view(self: RangeIndex) -> RangeIndex: return result @doc(Int64Index.copy) - def copy(self, name=None, deep=False, dtype: Optional[Dtype] = None, names=None): + def copy( + self, + name: Hashable = None, + deep: bool = False, + dtype: Optional[Dtype] = None, + names=None, + ): name = self._validate_names(name=name, names=names, deep=deep)[0] new_index = self._rename(name=name) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index ae0f853db628e..4c8a6a200b196 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -2009,7 +2009,7 @@ def _align_series(self, indexer, ser: Series, multiindex_indexer: bool = False): Indexer used to get the locations that will be set to `ser`. ser : pd.Series Values to assign to the locations specified by `indexer`. - multiindex_indexer : boolean, optional + multiindex_indexer : bool, optional Defaults to False. Should be set to True if `indexer` was from a `pd.MultiIndex`, to avoid unnecessary broadcasting. @@ -2248,7 +2248,7 @@ def _convert_key(self, key, is_setter: bool = False): """ Require integer args. (and convert to label arguments) """ - for a, i in zip(self.obj.axes, key): + for i in key: if not is_integer(i): raise ValueError("iAt based indexing can only have integer indexers") return key diff --git a/pandas/core/internals/__init__.py b/pandas/core/internals/__init__.py index f0018928255e6..af1350f088b7a 100644 --- a/pandas/core/internals/__init__.py +++ b/pandas/core/internals/__init__.py @@ -9,12 +9,10 @@ ) from pandas.core.internals.blocks import ( # io.pytables, io.packers Block, - DatetimeBlock, DatetimeTZBlock, ExtensionBlock, NumericBlock, ObjectBlock, - TimeDeltaBlock, ) from pandas.core.internals.concat import concatenate_managers from pandas.core.internals.managers import ( @@ -28,11 +26,9 @@ "Block", "CategoricalBlock", "NumericBlock", - "DatetimeBlock", "DatetimeTZBlock", "ExtensionBlock", "ObjectBlock", - "TimeDeltaBlock", "make_block", "DataManager", "ArrayManager", diff --git a/pandas/core/internals/array_manager.py b/pandas/core/internals/array_manager.py index 367fe04678cd8..294d1fd078b08 100644 --- a/pandas/core/internals/array_manager.py +++ b/pandas/core/internals/array_manager.py @@ -471,7 +471,7 @@ def apply_with_block(self: T, f, align_keys=None, swap_axis=True, **kwargs) -> T # error: Item "ExtensionArray" of "Union[Any, ExtensionArray]" has no # attribute "tz" if hasattr(arr, "tz") and arr.tz is None: # type: ignore[union-attr] - # DatetimeArray needs to be converted to ndarray for DatetimeBlock + # DatetimeArray needs to be converted to ndarray for DatetimeLikeBlock # error: Item "ExtensionArray" of "Union[Any, ExtensionArray]" has no # attribute "_data" diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index a77ea61d9e6de..d1d0db913f854 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -5,7 +5,6 @@ from typing import ( TYPE_CHECKING, Any, - Callable, List, Optional, Tuple, @@ -18,8 +17,6 @@ import numpy as np from pandas._libs import ( - Interval, - Period, Timestamp, algos as libalgos, internals as libinternals, @@ -53,7 +50,6 @@ is_dtype_equal, is_extension_array_dtype, is_list_like, - is_object_dtype, is_sparse, pandas_dtype, ) @@ -102,6 +98,7 @@ PeriodArray, TimedeltaArray, ) +from pandas.core.arrays._mixins import NDArrayBackedExtensionArray from pandas.core.base import PandasObject import pandas.core.common as com import pandas.core.computation.expressions as expressions @@ -122,7 +119,6 @@ Float64Index, Index, ) - from pandas.core.arrays._mixins import NDArrayBackedExtensionArray # comparison is faster than is_object_dtype _dtype_obj = np.dtype("object") @@ -210,12 +206,6 @@ def is_bool(self) -> bool: def external_values(self): return external_values(self.values) - def internal_values(self): - """ - The array that Series._values returns (internal values). - """ - return self.values - @property def array_values(self) -> ExtensionArray: """ @@ -280,6 +270,11 @@ def make_block_same_class( """ Wrap given values in a block of same type as self. """ if placement is None: placement = self._mgr_locs + + if values.dtype.kind == "m": + # TODO: remove this once fastparquet has stopped relying on it + values = ensure_wrapped_if_datetimelike(values) + # We assume maybe_coerce_values has already been called return type(self)(values, placement=placement, ndim=self.ndim) @@ -371,7 +366,6 @@ def set_inplace(self, locs, values): """ self.values[locs] = values - @final def delete(self, loc) -> None: """ Delete given loc(-s) from block in-place. @@ -596,8 +590,6 @@ def astype(self, dtype, copy: bool = False, errors: str = "raise"): Block """ values = self.values - if values.dtype.kind in ["m", "M"]: - values = self.array_values new_values = astype_array_safe(values, dtype, copy=copy, errors=errors) @@ -625,9 +617,11 @@ def convert( """ return [self.copy()] if copy else [self] + @final def _can_hold_element(self, element: Any) -> bool: """ require the same dtype as ourselves """ - raise NotImplementedError("Implemented on subclasses") + element = extract_array(element, extract_numpy=True) + return can_hold_element(self.values, element) @final def should_store(self, value: ArrayLike) -> bool: @@ -1545,7 +1539,7 @@ def setitem(self, indexer, value): be a compatible shape. """ if not self._can_hold_element(value): - # This is only relevant for DatetimeTZBlock, ObjectValuesExtensionBlock, + # This is only relevant for DatetimeTZBlock, PeriodDtype, IntervalDtype, # which has a non-trivial `_can_hold_element`. # https://github.com/pandas-dev/pandas/issues/24020 # Need a dedicated setitem until GH#24020 (type promotion in setitem @@ -1597,10 +1591,6 @@ def take_nd( return self.make_block_same_class(new_values, new_mgr_locs) - def _can_hold_element(self, element: Any) -> bool: - # TODO: We may need to think about pushing this onto the array. - return True - def _slice(self, slicer): """ Return a slice of my values. @@ -1746,70 +1736,35 @@ def _unstack(self, unstacker, fill_value, new_placement): return blocks, mask -class HybridMixin: - """ - Mixin for Blocks backed (maybe indirectly) by ExtensionArrays. - """ - - array_values: Callable - - def _can_hold_element(self, element: Any) -> bool: - values = self.array_values - - try: - # error: "Callable[..., Any]" has no attribute "_validate_setitem_value" - values._validate_setitem_value(element) # type: ignore[attr-defined] - return True - except (ValueError, TypeError): - return False - - -class ObjectValuesExtensionBlock(HybridMixin, ExtensionBlock): - """ - Block providing backwards-compatibility for `.values`. - - Used by PeriodArray and IntervalArray to ensure that - Series[T].values is an ndarray of objects. - """ - - pass - - class NumericBlock(Block): __slots__ = () is_numeric = True - def _can_hold_element(self, element: Any) -> bool: - element = extract_array(element, extract_numpy=True) - if isinstance(element, (IntegerArray, FloatingArray)): - if element._mask.any(): - return False - return can_hold_element(self.dtype, element) - -class NDArrayBackedExtensionBlock(HybridMixin, Block): +class NDArrayBackedExtensionBlock(Block): """ Block backed by an NDArrayBackedExtensionArray """ values: NDArrayBackedExtensionArray + @property + def array_values(self) -> NDArrayBackedExtensionArray: + return self.values + @property def is_view(self) -> bool: """ return a boolean if I am possibly a view """ # check the ndarray values of the DatetimeIndex values return self.values._ndarray.base is not None - def internal_values(self): - # Override to return DatetimeArray and TimedeltaArray - return self.values - def get_values(self, dtype: Optional[DtypeObj] = None) -> np.ndarray: """ return object dtype as boxed values, such as Timestamps/Timedelta """ values = self.values - if is_object_dtype(dtype): + if dtype == _dtype_obj: + # DTA/TDA constructor and astype can handle 2D values = values.astype(object) # TODO(EA2D): reshape not needed with 2D EAs return np.asarray(values).reshape(self.shape) @@ -1859,7 +1814,7 @@ def diff(self, n: int, axis: int = 0) -> List[Block]: Returns ------- - A list with a new TimeDeltaBlock. + A list with a new Block. Notes ----- @@ -1893,24 +1848,30 @@ def fillna( new_values = values.fillna(value=value, limit=limit) return [self.make_block_same_class(values=new_values)] + def delete(self, loc) -> None: + """ + Delete given loc(-s) from block in-place. + """ + # This will be unnecessary if/when __array_function__ is implemented + self.values = self.values.delete(loc, axis=0) + self.mgr_locs = self._mgr_locs.delete(loc) + try: + self._cache.clear() + except AttributeError: + # _cache not yet initialized + pass -class DatetimeLikeBlockMixin(NDArrayBackedExtensionBlock): - """Mixin class for DatetimeBlock, DatetimeTZBlock, and TimedeltaBlock.""" - values: Union[DatetimeArray, TimedeltaArray] +class DatetimeLikeBlock(NDArrayBackedExtensionBlock): + """Mixin class for DatetimeLikeBlock, DatetimeTZBlock.""" + __slots__ = () is_numeric = False - @cache_readonly - def array_values(self): - return self.values - - -class DatetimeBlock(DatetimeLikeBlockMixin): - __slots__ = () + values: Union[DatetimeArray, TimedeltaArray] -class DatetimeTZBlock(ExtensionBlock, DatetimeLikeBlockMixin): +class DatetimeTZBlock(ExtensionBlock, DatetimeLikeBlock): """ implement a datetime64 block with a tz attribute """ values: DatetimeArray @@ -1919,12 +1880,12 @@ class DatetimeTZBlock(ExtensionBlock, DatetimeLikeBlockMixin): is_extension = True is_numeric = False - internal_values = Block.internal_values - _can_hold_element = DatetimeBlock._can_hold_element - diff = DatetimeBlock.diff - where = DatetimeBlock.where - putmask = DatetimeLikeBlockMixin.putmask - fillna = DatetimeLikeBlockMixin.fillna + diff = NDArrayBackedExtensionBlock.diff + where = NDArrayBackedExtensionBlock.where + putmask = NDArrayBackedExtensionBlock.putmask + fillna = NDArrayBackedExtensionBlock.fillna + + get_values = NDArrayBackedExtensionBlock.get_values # error: Incompatible types in assignment (expression has type # "Callable[[NDArrayBackedExtensionBlock], bool]", base class "ExtensionBlock" @@ -1932,10 +1893,6 @@ class DatetimeTZBlock(ExtensionBlock, DatetimeLikeBlockMixin): is_view = NDArrayBackedExtensionBlock.is_view # type: ignore[assignment] -class TimeDeltaBlock(DatetimeLikeBlockMixin): - __slots__ = () - - class ObjectBlock(Block): __slots__ = () is_object = True @@ -1983,9 +1940,6 @@ def convert( res_values = ensure_block_shape(res_values, self.ndim) return [self.make_block(res_values)] - def _can_hold_element(self, element: Any) -> bool: - return True - class CategoricalBlock(ExtensionBlock): # this Block type is kept for backwards-compatibility @@ -2052,16 +2006,12 @@ def get_block_type(values, dtype: Optional[Dtype] = None): cls = CategoricalBlock elif vtype is Timestamp: cls = DatetimeTZBlock - elif vtype is Interval or vtype is Period: - cls = ObjectValuesExtensionBlock elif isinstance(dtype, ExtensionDtype): # Note: need to be sure PandasArray is unwrapped before we get here cls = ExtensionBlock - elif kind == "M": - cls = DatetimeBlock - elif kind == "m": - cls = TimeDeltaBlock + elif kind in ["M", "m"]: + cls = DatetimeLikeBlock elif kind in ["f", "c", "i", "u", "b"]: cls = NumericBlock else: @@ -2173,7 +2123,9 @@ def ensure_block_shape(values: ArrayLike, ndim: int = 1) -> ArrayLike: # TODO(EA2D): https://github.com/pandas-dev/pandas/issues/23023 # block.shape is incorrect for "2D" ExtensionArrays # We can't, and don't need to, reshape. - values = np.asarray(values).reshape(1, -1) + values = cast(Union[np.ndarray, DatetimeArray, TimedeltaArray], values) + values = values.reshape(1, -1) + return values diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index 02d582c439ea2..19c9b27db9f70 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -43,6 +43,7 @@ DatetimeArray, ExtensionArray, ) +from pandas.core.construction import ensure_wrapped_if_datetimelike from pandas.core.internals.array_manager import ArrayManager from pandas.core.internals.blocks import ( ensure_block_shape, @@ -142,8 +143,9 @@ def concatenate_managers( else: # TODO(EA2D): special-casing not needed with 2D EAs values = concat_compat(vals) - if not isinstance(values, ExtensionArray): - values = values.reshape(1, len(values)) + values = ensure_block_shape(values, ndim=2) + + values = ensure_wrapped_if_datetimelike(values) if blk.values.dtype == values.dtype: # Fast-path @@ -505,12 +507,16 @@ def _is_uniform_join_units(join_units: List[JoinUnit]) -> bool: _concatenate_join_units (which uses `concat_compat`). """ - # TODO: require dtype match in addition to same type? e.g. DatetimeTZBlock - # cannot necessarily join return ( # all blocks need to have the same type all(type(ju.block) is type(join_units[0].block) for ju in join_units) # noqa and + # e.g. DatetimeLikeBlock can be dt64 or td64, but these are not uniform + all( + is_dtype_equal(ju.block.dtype, join_units[0].block.dtype) + for ju in join_units + ) + and # no blocks that would get missing values (can lead to type upcasts) # unless we're an extension dtype. all(not ju.is_na or ju.block.is_extension for ju in join_units) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 9959174373034..5b4b710838ef8 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -101,9 +101,11 @@ def arrays_to_mgr( arr_names, index, columns, + *, dtype: Optional[DtypeObj] = None, verify_integrity: bool = True, typ: Optional[str] = None, + consolidate: bool = True, ) -> Manager: """ Segregate Series based on type and coerce into matrices. @@ -131,7 +133,9 @@ def arrays_to_mgr( axes = [columns, index] if typ == "block": - return create_block_manager_from_arrays(arrays, arr_names, axes) + return create_block_manager_from_arrays( + arrays, arr_names, axes, consolidate=consolidate + ) elif typ == "array": if len(columns) != len(arrays): assert len(arrays) == 0 @@ -181,7 +185,7 @@ def rec_array_to_mgr( if columns is None: columns = arr_columns - mgr = arrays_to_mgr(arrays, arr_columns, index, columns, dtype, typ=typ) + mgr = arrays_to_mgr(arrays, arr_columns, index, columns, dtype=dtype, typ=typ) if copy: mgr = mgr.copy() @@ -376,7 +380,13 @@ def maybe_squeeze_dt64tz(dta: ArrayLike) -> ArrayLike: def dict_to_mgr( - data: Dict, index, columns, dtype: Optional[DtypeObj], typ: str + data: Dict, + index, + columns, + *, + dtype: Optional[DtypeObj] = None, + typ: str = "block", + copy: bool = True, ) -> Manager: """ Segregate Series based on type and coerce into matrices. @@ -414,6 +424,8 @@ def dict_to_mgr( val = construct_1d_arraylike_from_scalar(np.nan, len(index), nan_dtype) arrays.loc[missing] = [val] * missing.sum() + arrays = list(arrays) + else: keys = list(data.keys()) columns = data_names = Index(keys) @@ -424,7 +436,21 @@ def dict_to_mgr( arrays = [ arr if not is_datetime64tz_dtype(arr) else arr.copy() for arr in arrays ] - return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype, typ=typ) + + if copy: + # arrays_to_mgr (via form_blocks) won't make copies for EAs + # dtype attr check to exclude EADtype-castable strs + arrays = [ + x + if not hasattr(x, "dtype") or not isinstance(x.dtype, ExtensionDtype) + else x.copy() + for x in arrays + ] + # TODO: can we get rid of the dt64tz special case above? + + return arrays_to_mgr( + arrays, data_names, index, columns, dtype=dtype, typ=typ, consolidate=copy + ) def nested_data_to_arrays( diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 69338abcd7d58..b688f1b4fea5f 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -35,7 +35,6 @@ from pandas.core.dtypes.cast import infer_dtype_from_scalar from pandas.core.dtypes.common import ( - DT64NS_DTYPE, ensure_int64, is_dtype_equal, is_extension_array_dtype, @@ -53,7 +52,10 @@ import pandas.core.algorithms as algos from pandas.core.arrays.sparse import SparseDtype -from pandas.core.construction import extract_array +from pandas.core.construction import ( + ensure_wrapped_if_datetimelike, + extract_array, +) from pandas.core.indexers import maybe_convert_indices from pandas.core.indexes.api import ( Float64Index, @@ -70,7 +72,6 @@ CategoricalBlock, DatetimeTZBlock, ExtensionBlock, - ObjectValuesExtensionBlock, ensure_block_shape, extend_blocks, get_block_type, @@ -991,6 +992,8 @@ def fast_xs(self, loc: int) -> ArrayLike: # Any]]" result = np.empty(n, dtype=dtype) # type: ignore[arg-type] + result = ensure_wrapped_if_datetimelike(result) + for blk in self.blocks: # Such assignment may incorrectly coerce NaT to None # result[blk.mgr_locs] = blk._slice((slice(None), loc)) @@ -1081,17 +1084,11 @@ def value_getitem(placement): else: if value.ndim == 2: value = value.T - - if value.ndim == self.ndim - 1: - value = ensure_block_shape(value, ndim=2) - - def value_getitem(placement): - return value - else: + value = ensure_block_shape(value, ndim=2) - def value_getitem(placement): - return value[placement.indexer] + def value_getitem(placement): + return value[placement.indexer] if value.shape[1:] != self.shape[1:]: raise AssertionError( @@ -1641,7 +1638,7 @@ def external_values(self): def internal_values(self): """The array that Series._values returns""" - return self._block.internal_values() + return self._block.values def array_values(self): """The array that Series.array returns""" @@ -1693,7 +1690,7 @@ def set_values(self, values: ArrayLike): def create_block_manager_from_blocks( - blocks: List[Block], axes: List[Index] + blocks: List[Block], axes: List[Index], consolidate: bool = True ) -> BlockManager: try: mgr = BlockManager(blocks, axes) @@ -1703,7 +1700,8 @@ def create_block_manager_from_blocks( tot_items = sum(arr.shape[0] for arr in arrays) raise construction_error(tot_items, arrays[0].shape[1:], axes, err) - mgr._consolidate_inplace() + if consolidate: + mgr._consolidate_inplace() return mgr @@ -1713,7 +1711,10 @@ def _extract_array(obj): def create_block_manager_from_arrays( - arrays, names: Index, axes: List[Index] + arrays, + names: Index, + axes: List[Index], + consolidate: bool = True, ) -> BlockManager: assert isinstance(names, Index) assert isinstance(axes, list) @@ -1722,12 +1723,13 @@ def create_block_manager_from_arrays( arrays = [_extract_array(x) for x in arrays] try: - blocks = _form_blocks(arrays, names, axes) + blocks = _form_blocks(arrays, names, axes, consolidate) mgr = BlockManager(blocks, axes) - mgr._consolidate_inplace() - return mgr except ValueError as e: raise construction_error(len(arrays), arrays[0].shape, axes, e) + if consolidate: + mgr._consolidate_inplace() + return mgr def construction_error( @@ -1760,7 +1762,7 @@ def construction_error( def _form_blocks( - arrays: List[ArrayLike], names: Index, axes: List[Index] + arrays: List[ArrayLike], names: Index, axes: List[Index], consolidate: bool ) -> List[Block]: # put "leftover" items in float bucket, where else? # generalize? @@ -1786,16 +1788,16 @@ def _form_blocks( blocks: List[Block] = [] if len(items_dict["NumericBlock"]): - numeric_blocks = _multi_blockify(items_dict["NumericBlock"]) + numeric_blocks = _multi_blockify( + items_dict["NumericBlock"], consolidate=consolidate + ) blocks.extend(numeric_blocks) - if len(items_dict["TimeDeltaBlock"]): - timedelta_blocks = _multi_blockify(items_dict["TimeDeltaBlock"]) - blocks.extend(timedelta_blocks) - - if len(items_dict["DatetimeBlock"]): - datetime_blocks = _simple_blockify(items_dict["DatetimeBlock"], DT64NS_DTYPE) - blocks.extend(datetime_blocks) + if len(items_dict["DatetimeLikeBlock"]): + dtlike_blocks = _multi_blockify( + items_dict["DatetimeLikeBlock"], consolidate=consolidate + ) + blocks.extend(dtlike_blocks) if len(items_dict["DatetimeTZBlock"]): dttz_blocks = [ @@ -1805,7 +1807,9 @@ def _form_blocks( blocks.extend(dttz_blocks) if len(items_dict["ObjectBlock"]) > 0: - object_blocks = _simple_blockify(items_dict["ObjectBlock"], np.object_) + object_blocks = _simple_blockify( + items_dict["ObjectBlock"], np.object_, consolidate=consolidate + ) blocks.extend(object_blocks) if len(items_dict["CategoricalBlock"]) > 0: @@ -1823,14 +1827,6 @@ def _form_blocks( blocks.extend(external_blocks) - if len(items_dict["ObjectValuesExtensionBlock"]): - external_blocks = [ - new_block(array, klass=ObjectValuesExtensionBlock, placement=i, ndim=2) - for i, array in items_dict["ObjectValuesExtensionBlock"] - ] - - blocks.extend(external_blocks) - if len(extra_locs): shape = (len(extra_locs),) + tuple(len(x) for x in axes[1:]) @@ -1844,11 +1840,14 @@ def _form_blocks( return blocks -def _simple_blockify(tuples, dtype) -> List[Block]: +def _simple_blockify(tuples, dtype, consolidate: bool) -> List[Block]: """ return a single array of a block that has a single dtype; if dtype is not None, coerce to this dtype """ + if not consolidate: + return _tuples_to_blocks_no_consolidate(tuples, dtype=dtype) + values, placement = _stack_arrays(tuples, dtype) # TODO: CHECK DTYPE? @@ -1859,8 +1858,12 @@ def _simple_blockify(tuples, dtype) -> List[Block]: return [block] -def _multi_blockify(tuples, dtype: Optional[Dtype] = None): +def _multi_blockify(tuples, dtype: Optional[DtypeObj] = None, consolidate: bool = True): """ return an array of blocks that potentially have different dtypes """ + + if not consolidate: + return _tuples_to_blocks_no_consolidate(tuples, dtype=dtype) + # group by dtype grouper = itertools.groupby(tuples, lambda x: x[1].dtype) @@ -1880,6 +1883,18 @@ def _multi_blockify(tuples, dtype: Optional[Dtype] = None): return new_blocks +def _tuples_to_blocks_no_consolidate(tuples, dtype: Optional[DtypeObj]) -> List[Block]: + # tuples produced within _form_blocks are of the form (placement, whatever, array) + if dtype is not None: + return [ + new_block( + np.atleast_2d(x[1].astype(dtype, copy=False)), placement=x[0], ndim=2 + ) + for x in tuples + ] + return [new_block(np.atleast_2d(x[1]), placement=x[0], ndim=2) for x in tuples] + + def _stack_arrays(tuples, dtype: np.dtype): placement, arrays = zip(*tuples) diff --git a/pandas/core/internals/ops.py b/pandas/core/internals/ops.py index df5cd66060659..be5224fe32ae1 100644 --- a/pandas/core/internals/ops.py +++ b/pandas/core/internals/ops.py @@ -26,7 +26,7 @@ def _iter_block_pairs( # At this point we have already checked the parent DataFrames for # assert rframe._indexed_same(lframe) - for n, blk in enumerate(left.blocks): + for blk in left.blocks: locs = blk.mgr_locs blk_vals = blk.values @@ -40,7 +40,7 @@ def _iter_block_pairs( # assert len(rblks) == 1, rblks # assert rblks[0].shape[0] == 1, rblks[0].shape - for k, rblk in enumerate(rblks): + for rblk in rblks: right_ea = rblk.values.ndim == 1 lvals, rvals = _get_same_shape_values(blk, rblk, left_ea, right_ea) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index feaecec382704..53dce412f084f 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -789,7 +789,7 @@ def _interp_limit(invalid, fw_limit, bw_limit): Parameters ---------- - invalid : boolean ndarray + invalid : np.ndarray[bool] fw_limit : int or None forward limit to index bw_limit : int or None diff --git a/pandas/core/ops/array_ops.py b/pandas/core/ops/array_ops.py index 1d7c16de0c05d..5c26377f44c2b 100644 --- a/pandas/core/ops/array_ops.py +++ b/pandas/core/ops/array_ops.py @@ -158,12 +158,14 @@ def _na_arithmetic_op(left, right, op, is_cmp: bool = False): try: result = expressions.evaluate(op, left, right) except TypeError: - if is_cmp: - # numexpr failed on comparison op, e.g. ndarray[float] > datetime - # In this case we do not fall back to the masked op, as that - # will handle complex numbers incorrectly, see GH#32047 + if is_object_dtype(left) or is_object_dtype(right) and not is_cmp: + # For object dtype, fallback to a masked operation (only operating + # on the non-missing values) + # Don't do this for comparisons, as that will handle complex numbers + # incorrectly, see GH#32047 + result = _masked_arith_op(left, right, op) + else: raise - result = _masked_arith_op(left, right, op) if is_cmp and (is_scalar(result) or result is NotImplemented): # numpy returned a scalar instead of operating element-wise diff --git a/pandas/core/resample.py b/pandas/core/resample.py index abfd6932d7b21..70c9d2bc1e4e5 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1272,7 +1272,7 @@ def _upsample(self, method, limit=None, fill_value=None): """ Parameters ---------- - method : string {'backfill', 'bfill', 'pad', 'ffill'} + method : {'backfill', 'bfill', 'pad', 'ffill'} Method for upsampling. limit : int, default None Maximum size gap to fill when reindexing. diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 0c0b37791f883..fa0e5c422501a 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -458,7 +458,7 @@ def _convert_by(by): elif ( is_scalar(by) or isinstance(by, (np.ndarray, Index, ABCSeries, Grouper)) - or hasattr(by, "__call__") + or callable(by) ): by = [by] else: diff --git a/pandas/core/series.py b/pandas/core/series.py index 641a57a554a9b..4ade9992e9e3e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -162,7 +162,7 @@ "axes_single_arg": "{0 or 'index'}", "axis": """axis : {0 or 'index'} Parameter needed for compatibility with DataFrame.""", - "inplace": """inplace : boolean, default False + "inplace": """inplace : bool, default False If True, performs operation inplace and returns None.""", "unique": "np.ndarray", "duplicated": "Series", @@ -3999,7 +3999,7 @@ def _gotitem(self, key, ndim, subset=None) -> Series: Parameters ---------- key : string / list of selections - ndim : 1,2 + ndim : {1, 2} Requested ndim of result. subset : object, default None Subset to act on. @@ -4609,7 +4609,7 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None) -> Series: periods=periods, freq=freq, axis=axis, fill_value=fill_value ) - def memory_usage(self, index=True, deep=False): + def memory_usage(self, index: bool = True, deep: bool = False) -> int: """ Return the memory usage of the Series. @@ -4658,7 +4658,7 @@ def memory_usage(self, index=True, deep=False): >>> s.memory_usage(deep=True) 244 """ - v = super().memory_usage(deep=deep) + v = self._memory_usage(deep=deep) if index: v += self.index.memory_usage(deep=deep) return v diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 6024c083fcc6b..816c1d9195778 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -278,7 +278,7 @@ def lexsort_indexer( ---------- keys : sequence of arrays Sequence of ndarrays to be sorted by the indexer - orders : boolean or list of booleans, optional + orders : bool or list of booleans, optional Determines the sorting order for each element in keys. If a list, it must be the same length as keys. This determines whether the corresponding element in keys should be sorted in ascending diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 1eda06dbbb1c4..14c77ec2fdf8f 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -3023,7 +3023,7 @@ def _str_extract_noexpand(arr, pat, flags=0): """ from pandas import ( DataFrame, - array, + array as pd_array, ) regex = re.compile(pat, flags=flags) @@ -3034,7 +3034,7 @@ def _str_extract_noexpand(arr, pat, flags=0): result = np.array([groups_or_na(val)[0] for val in arr], dtype=object) name = _get_single_group_name(regex) # not dispatching, so we have to reconstruct here. - result = array(result, dtype=result_dtype) + result = pd_array(result, dtype=result_dtype) else: if isinstance(arr, ABCIndex): raise ValueError("only one regex group is supported with Index") diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 5f33d00530361..7619623bb9eda 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -171,7 +171,7 @@ def _maybe_cache( arg : listlike, tuple, 1-d array, Series format : string Strftime format to parse time - cache : boolean + cache : bool True attempts to create a cache of converted values convert_listlike : function Conversion function to apply on dates @@ -313,9 +313,9 @@ def _convert_listlike_datetimes( error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore' infer_datetime_format : bool, default False inferring format behavior from to_datetime - dayfirst : boolean + dayfirst : bool dayfirst parsing behavior from to_datetime - yearfirst : boolean + yearfirst : bool yearfirst parsing behavior from to_datetime exact : bool, default True exact format matching behavior from to_datetime @@ -576,7 +576,7 @@ def _adjust_to_origin(arg, origin, unit): date to be adjusted origin : 'julian' or Timestamp origin offset for the arg - unit : string + unit : str passed unit from to_datetime, must be 'D' Returns diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index b482934dd25d2..b90722857938e 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -239,7 +239,7 @@ def _gotitem(self, key, ndim, subset=None): Parameters ---------- key : str / list of selections - ndim : 1,2 + ndim : {1, 2} requested ndim of result subset : object, default None subset to act on diff --git a/pandas/io/clipboards.py b/pandas/io/clipboards.py index 54cb6b9f91137..00a99eb8a4480 100644 --- a/pandas/io/clipboards.py +++ b/pandas/io/clipboards.py @@ -86,7 +86,7 @@ def to_clipboard(obj, excel=True, sep=None, **kwargs): # pragma: no cover Parameters ---------- obj : the object to write to the clipboard - excel : boolean, defaults to True + excel : bool, defaults to True if True, use the provided separator, writing in a csv format for allowing easy pasting into excel. if False, write a string representation of the object diff --git a/pandas/io/common.py b/pandas/io/common.py index b87e8fcae1064..eab13cb7cd598 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -565,9 +565,9 @@ def get_handle( Passing compression options as keys in dict is now supported for compression modes 'gzip' and 'bz2' as well as 'zip'. - memory_map : boolean, default False + memory_map : bool, default False See parsers._parser_params for more information. - is_text : boolean, default True + is_text : bool, default True Whether the type of the content passed to the file/buffer is string or bytes. This is not the same as `"b" not in mode`. If a string content is passed to a binary file/buffer, a wrapper is inserted. diff --git a/pandas/io/excel/_odfreader.py b/pandas/io/excel/_odfreader.py index 0278b22995089..1324485f49bdb 100644 --- a/pandas/io/excel/_odfreader.py +++ b/pandas/io/excel/_odfreader.py @@ -23,7 +23,7 @@ class ODFReader(BaseExcelReader): Parameters ---------- - filepath_or_buffer : string, path to be parsed or + filepath_or_buffer : str, path to be parsed or an open readable stream. storage_options : dict, optional passed to fsspec for appropriate URLs (see ``_get_filepath_or_buffer``) @@ -101,12 +101,12 @@ def get_sheet_data(self, sheet, convert_float: bool) -> List[List[Scalar]]: table: List[List[Scalar]] = [] - for i, sheet_row in enumerate(sheet_rows): + for sheet_row in sheet_rows: sheet_cells = [x for x in sheet_row.childNodes if x.qname in cell_names] empty_cells = 0 table_row: List[Scalar] = [] - for j, sheet_cell in enumerate(sheet_cells): + for sheet_cell in sheet_cells: if sheet_cell.qname == table_cell_name: value = self._get_cell_value(sheet_cell, convert_float) else: diff --git a/pandas/io/excel/_openpyxl.py b/pandas/io/excel/_openpyxl.py index 72950db72e067..20d9a998505cd 100644 --- a/pandas/io/excel/_openpyxl.py +++ b/pandas/io/excel/_openpyxl.py @@ -488,7 +488,7 @@ def __init__( Parameters ---------- - filepath_or_buffer : string, path object or Workbook + filepath_or_buffer : str, path object or Workbook Object to be parsed. storage_options : dict, optional passed to fsspec for appropriate URLs (see ``_get_filepath_or_buffer``) diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py index 5eb88a694218a..eea0f1c03b998 100644 --- a/pandas/io/excel/_xlrd.py +++ b/pandas/io/excel/_xlrd.py @@ -15,7 +15,7 @@ def __init__(self, filepath_or_buffer, storage_options: StorageOptions = None): Parameters ---------- - filepath_or_buffer : string, path object or Workbook + filepath_or_buffer : str, path object or Workbook Object to be parsed. storage_options : dict, optional passed to fsspec for appropriate URLs (see ``_get_filepath_or_buffer``) diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py index 3999f91a7b141..d032c54395c6d 100644 --- a/pandas/io/feather_format.py +++ b/pandas/io/feather_format.py @@ -56,7 +56,7 @@ def to_feather( # validate that we have only a default index # raise on anything else as we don't serialize the index - if not isinstance(df.index, Int64Index): + if not isinstance(df.index, (Int64Index, RangeIndex)): typ = type(df.index) raise ValueError( f"feather does not support serializing {typ} " diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py index 1ec2f7bfdd4be..648df0ff2b6d9 100644 --- a/pandas/io/formats/excel.py +++ b/pandas/io/formats/excel.py @@ -431,22 +431,22 @@ class ExcelFormatter: ---------- df : DataFrame or Styler na_rep: na representation - float_format : string, default None - Format string for floating point numbers + float_format : str, default None + Format string for floating point numbers cols : sequence, optional Columns to write - header : boolean or sequence of str, default True + header : bool or sequence of str, default True Write out column names. If a list of string is given it is assumed to be aliases for the column names - index : boolean, default True + index : bool, default True output row names (index) - index_label : string or sequence, default None - Column label for index column(s) if desired. If None is given, and - `header` and `index` are True, then the index names are used. A - sequence should be given if the DataFrame uses MultiIndex. - merge_cells : boolean, default False - Format MultiIndex and Hierarchical Rows as merged cells. - inf_rep : string, default `'inf'` + index_label : str or sequence, default None + Column label for index column(s) if desired. If None is given, and + `header` and `index` are True, then the index names are used. A + sequence should be given if the DataFrame uses MultiIndex. + merge_cells : bool, default False + Format MultiIndex and Hierarchical Rows as merged cells. + inf_rep : str, default `'inf'` representation for np.inf values (which aren't representable in Excel) A `'-'` sign will be added in front of -inf. style_converter : callable, optional @@ -796,7 +796,7 @@ def write( """ writer : path-like, file-like, or ExcelWriter object File path or existing ExcelWriter - sheet_name : string, default 'Sheet1' + sheet_name : str, default 'Sheet1' Name of sheet which will contain DataFrame startrow : upper left cell row to dump data frame diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py index acb17aee50b76..cbc407c2624f2 100644 --- a/pandas/io/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -303,7 +303,7 @@ def format_object_summary( must be iterable and support __getitem__ formatter : callable string formatter for an element - is_justify : boolean + is_justify : bool should justify the display name : name, optional defaults to the class name of the obj diff --git a/pandas/io/formats/xml.py b/pandas/io/formats/xml.py index 5c7255d5e6ee4..c9dc87ec0588b 100644 --- a/pandas/io/formats/xml.py +++ b/pandas/io/formats/xml.py @@ -307,7 +307,7 @@ def build_tree(self) -> bytes: f"{self.prefix_uri}{self.root_name}", attrib=self.other_namespaces() ) - for k, d in self.frame_dicts.items(): + for d in self.frame_dicts.values(): self.d = d self.elem_row = SubElement(self.root, f"{self.prefix_uri}{self.row_name}") @@ -477,7 +477,7 @@ def build_tree(self) -> bytes: self.root = Element(f"{self.prefix_uri}{self.root_name}", nsmap=self.namespaces) - for k, d in self.frame_dicts.items(): + for d in self.frame_dicts.values(): self.d = d self.elem_row = SubElement(self.root, f"{self.prefix_uri}{self.row_name}") diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py index 4824dab764259..ea47dca4f079e 100644 --- a/pandas/io/json/_table_schema.py +++ b/pandas/io/json/_table_schema.py @@ -296,7 +296,7 @@ def parse_table_schema(json, precise_float): ---------- json : A JSON table schema - precise_float : boolean + precise_float : bool Flag controlling precision when decoding string to double values, as dictated by ``read_json`` diff --git a/pandas/io/parsers/readers.py b/pandas/io/parsers/readers.py index 6adf1b20b769f..796d44dc7877a 100644 --- a/pandas/io/parsers/readers.py +++ b/pandas/io/parsers/readers.py @@ -403,7 +403,7 @@ def validate_integer(name, val, min_val=0): Parameters ---------- - name : string + name : str Parameter name (used for error reporting) val : int or float The value to check diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 8658bb654b787..8a3e3ea556bea 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -980,7 +980,7 @@ def select_as_multiple( columns : the columns I want back start : integer (defaults to None), row number to start selection stop : integer (defaults to None), row number to stop selection - iterator : boolean, return an iterator, default False + iterator : bool, return an iterator, default False chunksize : nrows to include in iteration, return an iterator auto_close : bool, default False Should automatically close the store when finished. @@ -1145,7 +1145,7 @@ def remove(self, key: str, where=None, start=None, stop=None): Parameters ---------- - key : string + key : str Node to remove or delete rows from where : list of Term (or convertible) objects, optional start : integer (defaults to None), row number to start selection @@ -3540,7 +3540,7 @@ def validate_min_itemsize(self, min_itemsize): return q = self.queryables() - for k, v in min_itemsize.items(): + for k in min_itemsize: # ok, apply generally if k == "values": diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index 05cc742b45d83..cd9667bb1ce4b 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -137,10 +137,10 @@ class SAS7BDATReader(ReaderBase, abc.Iterator): contents. index : column identifier, defaults to None Column to use as index. - convert_dates : boolean, defaults to True + convert_dates : bool, defaults to True Attempt to convert dates to Pandas datetime values. Note that some rarely used SAS date formats may be unsupported. - blank_missing : boolean, defaults to True + blank_missing : bool, defaults to True Convert empty strings to missing values (SAS uses blanks to indicate missing character variables). chunksize : int, defaults to None diff --git a/pandas/io/sas/sas_xport.py b/pandas/io/sas/sas_xport.py index c71de542bbf77..f7e1c56cbb196 100644 --- a/pandas/io/sas/sas_xport.py +++ b/pandas/io/sas/sas_xport.py @@ -63,23 +63,23 @@ _base_params_doc = """\ Parameters ---------- -filepath_or_buffer : string or file-like object +filepath_or_buffer : str or file-like object Path to SAS file or object implementing binary read method.""" _params2_doc = """\ index : identifier of index column Identifier of column that should be used as index of the DataFrame. -encoding : string +encoding : str Encoding for text data. chunksize : int Read file `chunksize` lines at a time, returns iterator.""" _format_params_doc = """\ -format : string +format : str File format, only `xport` is currently supported.""" _iterator_doc = """\ -iterator : boolean, default False +iterator : bool, default False Return XportReader object for reading file incrementally.""" diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 200565b837dea..4b5baa0a18c90 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -666,7 +666,7 @@ def to_sql( - fail: If table exists, do nothing. - replace: If table exists, drop it, recreate it, and insert data. - append: If table exists, insert data. Create if does not exist. - index : boolean, default True + index : bool, default True Write DataFrame index as a column. index_label : str or sequence, optional Column label for index column(s). If None is given (default) and @@ -1341,11 +1341,11 @@ def read_table( Parameters ---------- - table_name : string + table_name : str Name of SQL table in database. index_col : string, optional, default: None Column to set as index. - coerce_float : boolean, default True + coerce_float : bool, default True Attempts to convert values of non-string, non-numeric objects (like decimal.Decimal) to floating point. This can result in loss of precision. @@ -1436,11 +1436,11 @@ def read_query( Parameters ---------- - sql : string + sql : str SQL query to be executed. index_col : string, optional, default: None Column name to use as index for the returned DataFrame object. - coerce_float : boolean, default True + coerce_float : bool, default True Attempt to convert values of non-string, non-numeric objects (like decimal.Decimal) to floating point, useful for SQL result sets. params : list, tuple or dict, optional, default: None @@ -1530,7 +1530,7 @@ def to_sql( - fail: If table exists, do nothing. - replace: If table exists, drop it, recreate it, and insert data. - append: If table exists, insert data. Create if does not exist. - index : boolean, default True + index : bool, default True Write DataFrame index as a column. index_label : string or sequence, default None Column label for index column(s). If None is given (default) and @@ -2028,7 +2028,7 @@ def to_sql( fail: If table exists, do nothing. replace: If table exists, drop it, recreate it, and insert data. append: If table exists, insert data. Create if it does not exist. - index : boolean, default True + index : bool, default True Write DataFrame index as a column index_label : string or sequence, default None Column label for index column(s). If None is given (default) and @@ -2135,7 +2135,7 @@ def get_schema( Parameters ---------- frame : DataFrame - name : string + name : str name of SQL table keys : string or sequence, default: None columns to use a primary key diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py index 1ec4efe7b4795..6a81e3ae43b5d 100644 --- a/pandas/plotting/_matplotlib/boxplot.py +++ b/pandas/plotting/_matplotlib/boxplot.py @@ -84,7 +84,7 @@ def _validate_color_args(self): if isinstance(self.color, dict): valid_keys = ["boxes", "whiskers", "medians", "caps"] - for key, values in self.color.items(): + for key in self.color: if key not in valid_keys: raise ValueError( f"color dict contains invalid key '{key}'. " diff --git a/pandas/plotting/_matplotlib/compat.py b/pandas/plotting/_matplotlib/compat.py index 964596d9b6319..729d2bf1f019a 100644 --- a/pandas/plotting/_matplotlib/compat.py +++ b/pandas/plotting/_matplotlib/compat.py @@ -22,3 +22,4 @@ def inner(): mpl_ge_3_1_0 = _mpl_version("3.1.0", operator.ge) mpl_ge_3_2_0 = _mpl_version("3.2.0", operator.ge) mpl_ge_3_3_0 = _mpl_version("3.3.0", operator.ge) +mpl_ge_3_4_0 = _mpl_version("3.4.0", operator.ge) diff --git a/pandas/plotting/_matplotlib/converter.py b/pandas/plotting/_matplotlib/converter.py index 677c3e791c72b..7c6a718b34e89 100644 --- a/pandas/plotting/_matplotlib/converter.py +++ b/pandas/plotting/_matplotlib/converter.py @@ -506,7 +506,7 @@ def period_break(dates: PeriodIndex, period: str) -> np.ndarray: ---------- dates : PeriodIndex Array of intervals to monitor. - period : string + period : str Name of the period to monitor. """ current = getattr(dates, period) diff --git a/pandas/plotting/_matplotlib/tools.py b/pandas/plotting/_matplotlib/tools.py index 500d570835493..03d73d1d36953 100644 --- a/pandas/plotting/_matplotlib/tools.py +++ b/pandas/plotting/_matplotlib/tools.py @@ -392,6 +392,11 @@ def handle_shared_axes( row_num = lambda x: x.rowNum col_num = lambda x: x.colNum + if compat.mpl_ge_3_4_0(): + is_first_col = lambda x: x.get_subplotspec().is_first_col() + else: + is_first_col = lambda x: x.is_first_col() + if nrows > 1: try: # first find out the ax layout, @@ -423,7 +428,7 @@ def handle_shared_axes( # only the first column should get y labels -> set all other to # off as we only have labels in the first column and we always # have a subplot there, we can skip the layout test - if ax.is_first_col(): + if is_first_col(ax): continue if sharey or _has_externally_shared_axis(ax, "y"): _remove_labels_from_axis(ax.yaxis) diff --git a/pandas/tests/arithmetic/test_numeric.py b/pandas/tests/arithmetic/test_numeric.py index 1e2622d6a8fcd..ef86a8e6a1cb0 100644 --- a/pandas/tests/arithmetic/test_numeric.py +++ b/pandas/tests/arithmetic/test_numeric.py @@ -538,7 +538,6 @@ def test_df_div_zero_series_does_not_commute(self): def test_df_mod_zero_df(self, using_array_manager): # GH#3590, modulo as ints df = pd.DataFrame({"first": [3, 4, 5, 8], "second": [0, 0, 0, 3]}) - # this is technically wrong, as the integer portion is coerced to float first = Series([0, 0, 0, 0]) if not using_array_manager: @@ -551,6 +550,15 @@ def test_df_mod_zero_df(self, using_array_manager): result = df % df tm.assert_frame_equal(result, expected) + # GH#38939 If we dont pass copy=False, df is consolidated and + # result["first"] is float64 instead of int64 + df = pd.DataFrame({"first": [3, 4, 5, 8], "second": [0, 0, 0, 3]}, copy=False) + first = Series([0, 0, 0, 0], dtype="int64") + second = Series([np.nan, np.nan, np.nan, 0]) + expected = pd.DataFrame({"first": first, "second": second}) + result = df % df + tm.assert_frame_equal(result, expected) + def test_df_mod_zero_array(self): # GH#3590, modulo as ints df = pd.DataFrame({"first": [3, 4, 5, 8], "second": [0, 0, 0, 3]}) diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 1a7e2d1d820f7..62d368264752b 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -1372,9 +1372,9 @@ def array_likes(request): data = memoryview(arr) elif name == "array": # stdlib array - from array import array as array_stdlib + import array - data = array_stdlib("i", arr) + data = array.array("i", arr) elif name == "dask": import dask.array diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index 406aec9d4c16e..616f46624bfd7 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -281,7 +281,10 @@ def test_is_string_dtype(): assert com.is_string_dtype(object) assert com.is_string_dtype(np.array(["a", "b"])) assert com.is_string_dtype(pd.StringDtype()) - assert com.is_string_dtype(pd.array(["a", "b"], dtype="string")) + + +def test_is_string_dtype_nullable(nullable_string_dtype): + assert com.is_string_dtype(pd.array(["a", "b"], dtype=nullable_string_dtype)) integer_dtypes: List = [] diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 366b24e328642..68dbdd9e0bf35 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -150,7 +150,7 @@ def take(self, indexer, allow_fill=False, fill_value=None): return self._from_sequence(result) def copy(self): - return type(self)(self._data.copy()) + return type(self)(self._data.copy(), dtype=self.dtype) def astype(self, dtype, copy=True): if is_dtype_equal(dtype, self._dtype): diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index 437160e78741b..55f9d85574f94 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -261,7 +261,18 @@ def test_dataframe_constructor_with_dtype(): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize("frame", [True, False]) +@pytest.mark.parametrize( + "frame", + [ + pytest.param( + True, + marks=pytest.mark.xfail( + reason="pd.concat call inside NDFrame.astype reverts the dtype" + ), + ), + False, + ], +) def test_astype_dispatches(frame): # This is a dtype-specific test that ensures Series[decimal].astype # gets all the way through to ExtensionArray.astype diff --git a/pandas/tests/extension/json/array.py b/pandas/tests/extension/json/array.py index 6c1161294dd17..a63c849d25a9f 100644 --- a/pandas/tests/extension/json/array.py +++ b/pandas/tests/extension/json/array.py @@ -39,6 +39,7 @@ ExtensionDtype, ) from pandas.api.types import is_bool_dtype +from pandas.core.arrays.string_arrow import ArrowStringDtype class JSONDtype(ExtensionDtype): @@ -194,7 +195,7 @@ def astype(self, dtype, copy=True): if copy: return self.copy() return self - elif isinstance(dtype, StringDtype): + elif isinstance(dtype, (StringDtype, ArrowStringDtype)): value = self.astype(str) # numpy doesn'y like nested dicts return dtype.construct_array_type()._from_sequence(value, copy=False) diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index 051871513a14e..e11e74f16030c 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -18,6 +18,7 @@ import pandas.util._test_decorators as td +from pandas.core.dtypes.cast import can_hold_element from pandas.core.dtypes.dtypes import ( ExtensionDtype, PandasDtype, @@ -27,7 +28,10 @@ import pandas as pd import pandas._testing as tm from pandas.core.arrays.numpy_ import PandasArray -from pandas.core.internals import managers +from pandas.core.internals import ( + blocks, + managers, +) from pandas.tests.extension import base # TODO(ArrayManager) PandasArray @@ -45,6 +49,12 @@ def _extract_array_patched(obj): return obj +def _can_hold_element_patched(obj, element) -> bool: + if isinstance(element, PandasArray): + element = element.to_numpy() + return can_hold_element(obj, element) + + @pytest.fixture(params=["float", "object"]) def dtype(request): return PandasDtype(np.dtype(request.param)) @@ -70,6 +80,7 @@ def allow_in_pandas(monkeypatch): with monkeypatch.context() as m: m.setattr(PandasArray, "_typ", "extension") m.setattr(managers, "_extract_array", _extract_array_patched) + m.setattr(blocks, "can_hold_element", _can_hold_element_patched) yield diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index 0613c727dec98..759277a47f62b 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -285,7 +285,7 @@ def test_combine_le(self, data_repeated): def test_fillna_copy_frame(self, data_missing): arr = data_missing.take([1, 1]) - df = pd.DataFrame({"A": arr}) + df = pd.DataFrame({"A": arr}, copy=False) filled_val = df.iloc[0, 0] result = df.fillna(filled_val) diff --git a/pandas/tests/frame/constructors/test_from_records.py b/pandas/tests/frame/constructors/test_from_records.py index 5093b88413110..e8d0a789e7cbd 100644 --- a/pandas/tests/frame/constructors/test_from_records.py +++ b/pandas/tests/frame/constructors/test_from_records.py @@ -149,7 +149,7 @@ def test_from_records_dictlike(self): # from the dict blocks = df._to_dict_of_blocks() columns = [] - for dtype, b in blocks.items(): + for b in blocks.values(): columns.extend(b.columns) asdict = {x: y for x, y in df.items()} diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py index b4d8a53e4b23f..dd91b32c8eb8c 100644 --- a/pandas/tests/frame/methods/test_combine_first.py +++ b/pandas/tests/frame/methods/test_combine_first.py @@ -381,15 +381,17 @@ def test_combine_first_with_asymmetric_other(self, val): tm.assert_frame_equal(res, exp) - def test_combine_first_string_dtype_only_na(self): + def test_combine_first_string_dtype_only_na(self, nullable_string_dtype): # GH: 37519 - df = DataFrame({"a": ["962", "85"], "b": [pd.NA] * 2}, dtype="string") - df2 = DataFrame({"a": ["85"], "b": [pd.NA]}, dtype="string") + df = DataFrame( + {"a": ["962", "85"], "b": [pd.NA] * 2}, dtype=nullable_string_dtype + ) + df2 = DataFrame({"a": ["85"], "b": [pd.NA]}, dtype=nullable_string_dtype) df.set_index(["a", "b"], inplace=True) df2.set_index(["a", "b"], inplace=True) result = df.combine_first(df2) expected = DataFrame( - {"a": ["962", "85"], "b": [pd.NA] * 2}, dtype="string" + {"a": ["962", "85"], "b": [pd.NA] * 2}, dtype=nullable_string_dtype ).set_index(["a", "b"]) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index bc1d4605e985a..dbb5cb357de47 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -336,7 +336,7 @@ def test_quantile_box(self): ) tm.assert_frame_equal(res, exp) - # DatetimeBlock may be consolidated and contain NaT in different loc + # DatetimeLikeBlock may be consolidated and contain NaT in different loc df = DataFrame( { "A": [ diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py index aed784a6e4c3c..3b2668aea001c 100644 --- a/pandas/tests/frame/methods/test_to_csv.py +++ b/pandas/tests/frame/methods/test_to_csv.py @@ -773,10 +773,7 @@ def test_to_csv_dups_cols(self): [df_float, df_int, df_bool, df_object, df_dt], axis=1, ignore_index=True ) - cols = [] - for i in range(5): - cols.extend([0, 1, 2]) - df.columns = cols + df.columns = [0, 1, 2] * 5 with tm.ensure_clean() as filename: df.to_csv(filename) diff --git a/pandas/tests/frame/methods/test_to_dict_of_blocks.py b/pandas/tests/frame/methods/test_to_dict_of_blocks.py index ca222180322bf..c81bed9d93cc4 100644 --- a/pandas/tests/frame/methods/test_to_dict_of_blocks.py +++ b/pandas/tests/frame/methods/test_to_dict_of_blocks.py @@ -20,7 +20,7 @@ def test_copy_blocks(self, float_frame): # use the default copy=True, change a column blocks = df._to_dict_of_blocks(copy=True) - for dtype, _df in blocks.items(): + for _df in blocks.values(): if column in _df: _df.loc[:, column] = _df[column] + 1 @@ -34,7 +34,7 @@ def test_no_copy_blocks(self, float_frame): # use the copy=False, change a column blocks = df._to_dict_of_blocks(copy=False) - for dtype, _df in blocks.items(): + for _df in blocks.values(): if column in _df: _df.loc[:, column] = _df[column] + 1 diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index b76a44b3c86be..ca68885fdc470 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -89,7 +89,12 @@ def test_array_of_dt64_nat_with_td64dtype_raises(self, frame_or_series): if frame_or_series is DataFrame: arr = arr.reshape(1, 1) - msg = "Could not convert object to NumPy timedelta" + msg = "|".join( + [ + "Could not convert object to NumPy timedelta", + "Invalid type for timedelta scalar: ", + ] + ) with pytest.raises(ValueError, match=msg): frame_or_series(arr, dtype="m8[ns]") @@ -1231,14 +1236,14 @@ def __len__(self, n): def test_constructor_stdlib_array(self): # GH 4297 # support Array - from array import array as stdlib_array + import array - result = DataFrame({"A": stdlib_array("i", range(10))}) + result = DataFrame({"A": array.array("i", range(10))}) expected = DataFrame({"A": list(range(10))}) tm.assert_frame_equal(result, expected, check_dtype=False) expected = DataFrame([list(range(10)), list(range(10))]) - result = DataFrame([stdlib_array("i", range(10)), stdlib_array("i", range(10))]) + result = DataFrame([array.array("i", range(10)), array.array("i", range(10))]) tm.assert_frame_equal(result, expected, check_dtype=False) def test_constructor_range(self): @@ -1649,10 +1654,10 @@ def test_constructor_empty_with_string_dtype(self): df = DataFrame(index=[0, 1], columns=[0, 1], dtype="U5") tm.assert_frame_equal(df, expected) - def test_constructor_empty_with_string_extension(self): + def test_constructor_empty_with_string_extension(self, nullable_string_dtype): # GH 34915 - expected = DataFrame(index=[], columns=["c1"], dtype="string") - df = DataFrame(columns=["c1"], dtype="string") + expected = DataFrame(index=[], columns=["c1"], dtype=nullable_string_dtype) + df = DataFrame(columns=["c1"], dtype=nullable_string_dtype) tm.assert_frame_equal(df, expected) def test_constructor_single_value(self): @@ -1997,7 +2002,7 @@ def test_constructor_ndarray_copy(self, float_frame): def test_constructor_series_copy(self, float_frame): series = float_frame._series - df = DataFrame({"A": series["A"]}) + df = DataFrame({"A": series["A"]}, copy=True) df["A"][:] = 5 assert not (series["A"] == 5).all() @@ -2311,6 +2316,86 @@ def test_constructor_list_str_na(self, string_dtype): expected = DataFrame({"A": ["1.0", "2.0", None]}, dtype=object) tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize("copy", [False, True]) + @td.skip_array_manager_not_yet_implemented + def test_dict_nocopy(self, copy, any_nullable_numeric_dtype, any_numpy_dtype): + a = np.array([1, 2], dtype=any_numpy_dtype) + b = np.array([3, 4], dtype=any_numpy_dtype) + if b.dtype.kind in ["S", "U"]: + # These get cast, making the checks below more cumbersome + return + + c = pd.array([1, 2], dtype=any_nullable_numeric_dtype) + df = DataFrame({"a": a, "b": b, "c": c}, copy=copy) + + def get_base(obj): + if isinstance(obj, np.ndarray): + return obj.base + elif isinstance(obj.dtype, np.dtype): + # i.e. DatetimeArray, TimedeltaArray + return obj._ndarray.base + else: + raise TypeError + + def check_views(): + # written to work for either BlockManager or ArrayManager + assert sum(x is c for x in df._mgr.arrays) == 1 + assert ( + sum( + get_base(x) is a + for x in df._mgr.arrays + if isinstance(x.dtype, np.dtype) + ) + == 1 + ) + assert ( + sum( + get_base(x) is b + for x in df._mgr.arrays + if isinstance(x.dtype, np.dtype) + ) + == 1 + ) + + if not copy: + # constructor preserves views + check_views() + + df.iloc[0, 0] = 0 + df.iloc[0, 1] = 0 + if not copy: + # Check that the underlying data behind df["c"] is still `c` + # after setting with iloc. Since we don't know which entry in + # df._mgr.arrays corresponds to df["c"], we just check that exactly + # one of these arrays is `c`. GH#38939 + assert sum(x is c for x in df._mgr.arrays) == 1 + # TODO: we can call check_views if we stop consolidating + # in setitem_with_indexer + + # FIXME: until GH#35417, iloc.setitem into EA values does not preserve + # view, so we have to check in the other direction + # df.iloc[0, 2] = 0 + # if not copy: + # check_views() + c[0] = 0 + + if copy: + if a.dtype.kind == "M": + assert a[0] == a.dtype.type(1, "ns") + assert b[0] == b.dtype.type(3, "ns") + else: + assert a[0] == a.dtype.type(1) + assert b[0] == b.dtype.type(3) + # FIXME: enable after GH#35417 + # assert c[0] == 1 + assert df.iloc[0, 2] == 1 + else: + # TODO: we can call check_views if we stop consolidating + # in setitem_with_indexer + # FIXME: enable after GH#35417 + # assert b[0] == 0 + assert df.iloc[0, 2] == 0 + class TestDataFrameConstructorWithDatetimeTZ: @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 616405c01cc2a..1304e861f948e 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -43,7 +43,7 @@ def assert_stat_op_calc( Parameters ---------- - opname : string + opname : str Name of the operator to test on frame alternative : function Function that opname is tested against; i.e. "frame.opname()" should @@ -146,7 +146,7 @@ def assert_stat_op_api(opname, float_frame, float_string_frame, has_numeric_only Parameters ---------- - opname : string + opname : str Name of the operator to test on frame float_frame : DataFrame DataFrame with columns of type float @@ -172,7 +172,7 @@ def assert_bool_op_calc(opname, alternative, frame, has_skipna=True): Parameters ---------- - opname : string + opname : str Name of the operator to test on frame alternative : function Function that opname is tested against; i.e. "frame.opname()" should @@ -237,7 +237,7 @@ def assert_bool_op_api( Parameters ---------- - opname : string + opname : str Name of the operator to test on frame float_frame : DataFrame DataFrame with columns of type float diff --git a/pandas/tests/groupby/test_libgroupby.py b/pandas/tests/groupby/test_libgroupby.py index d776c34f5b5ec..7a9cadb6c8232 100644 --- a/pandas/tests/groupby/test_libgroupby.py +++ b/pandas/tests/groupby/test_libgroupby.py @@ -4,8 +4,7 @@ from pandas._libs.groupby import ( group_cumprod_float64, group_cumsum, - group_var_float32, - group_var_float64, + group_var, ) from pandas.core.dtypes.common import ensure_platform_int @@ -102,7 +101,7 @@ def test_group_var_constant(self): class TestGroupVarFloat64(GroupVarTestMixin): __test__ = True - algo = staticmethod(group_var_float64) + algo = staticmethod(group_var) dtype = np.float64 rtol = 1e-5 @@ -124,7 +123,7 @@ def test_group_var_large_inputs(self): class TestGroupVarFloat32(GroupVarTestMixin): __test__ = True - algo = staticmethod(group_var_float32) + algo = staticmethod(group_var) dtype = np.float32 rtol = 1e-2 diff --git a/pandas/tests/groupby/test_rank.py b/pandas/tests/groupby/test_rank.py index 00641effac08d..2e666c27386b4 100644 --- a/pandas/tests/groupby/test_rank.py +++ b/pandas/tests/groupby/test_rank.py @@ -542,3 +542,28 @@ def test_rank_min_int(): ) tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("use_nan", [True, False]) +def test_rank_pct_equal_values_on_group_transition(use_nan): + # GH#40518 + fill_value = np.nan if use_nan else 3 + df = DataFrame( + [ + [-1, 1], + [-1, 2], + [1, fill_value], + [-1, fill_value], + ], + columns=["group", "val"], + ) + result = df.groupby(["group"])["val"].rank( + method="dense", + pct=True, + ) + if use_nan: + expected = Series([0.5, 1, np.nan, np.nan], name="val") + else: + expected = Series([1 / 3, 2 / 3, 1, 1], name="val") + + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/indexes/categorical/test_indexing.py b/pandas/tests/indexes/categorical/test_indexing.py index 490a68233367a..0e0849fdb8dcf 100644 --- a/pandas/tests/indexes/categorical/test_indexing.py +++ b/pandas/tests/indexes/categorical/test_indexing.py @@ -244,9 +244,9 @@ def test_get_indexer_non_unique(self): for indexer in [idx2, list("abf"), Index(list("abf"))]: msg = "Reindexing only valid with uniquely valued Index objects" with pytest.raises(InvalidIndexError, match=msg): - idx1.get_indexer(idx2) + idx1.get_indexer(indexer) - r1, _ = idx1.get_indexer_non_unique(idx2) + r1, _ = idx1.get_indexer_non_unique(indexer) expected = np.array([0, 1, 2, -1], dtype=np.intp) tm.assert_almost_equal(r1, expected) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index e5a24e9b938e2..ab2b2db7eec53 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -516,7 +516,7 @@ def test_hasnans_isnans(self, index_flat): return elif isinstance(index, DatetimeIndexOpsMixin): values[1] = iNaT - elif isinstance(index, (Int64Index, UInt64Index)): + elif isinstance(index, (Int64Index, UInt64Index, RangeIndex)): return else: values[1] = np.nan @@ -555,7 +555,7 @@ def test_fillna(self, index): if isinstance(index, DatetimeIndexOpsMixin): values[1] = iNaT - elif isinstance(index, (Int64Index, UInt64Index)): + elif isinstance(index, (Int64Index, UInt64Index, RangeIndex)): return else: values[1] = np.nan diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py index d29d4647f4753..3da6414332cb8 100644 --- a/pandas/tests/indexes/datetimes/test_indexing.py +++ b/pandas/tests/indexes/datetimes/test_indexing.py @@ -173,7 +173,7 @@ def test_where_other(self): i = date_range("20130101", periods=3, tz="US/Eastern") for arr in [np.nan, pd.NaT]: - result = i.where(notna(i), other=np.nan) + result = i.where(notna(i), other=arr) expected = i tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/test_numpy_compat.py b/pandas/tests/indexes/test_numpy_compat.py index 1ee7c5547ecf9..59c30c3abac03 100644 --- a/pandas/tests/indexes/test_numpy_compat.py +++ b/pandas/tests/indexes/test_numpy_compat.py @@ -12,6 +12,7 @@ Index, Int64Index, PeriodIndex, + RangeIndex, TimedeltaIndex, UInt64Index, ) @@ -55,7 +56,7 @@ def test_numpy_ufuncs_basic(index, func): with tm.external_error_raised((TypeError, AttributeError)): with np.errstate(all="ignore"): func(index) - elif isinstance(index, (Float64Index, Int64Index, UInt64Index)): + elif isinstance(index, (Float64Index, Int64Index, UInt64Index, RangeIndex)): # coerces to float (e.g. np.sin) with np.errstate(all="ignore"): result = func(index) @@ -104,7 +105,7 @@ def test_numpy_ufuncs_other(index, func, request): with tm.external_error_raised(TypeError): func(index) - elif isinstance(index, (Float64Index, Int64Index, UInt64Index)): + elif isinstance(index, (Float64Index, Int64Index, UInt64Index, RangeIndex)): # Results in bool array result = func(index) assert isinstance(result, np.ndarray) diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 85accac5a8235..734cf13289c1f 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -1063,6 +1063,7 @@ def test_loc_setitem_empty_append_raises(self): [ "cannot copy sequence with size 2 to array axis with dimension 0", r"could not broadcast input array from shape \(2,\) into shape \(0,\)", + "Must have equal len keys and value when setting with an iterable", ] ) with pytest.raises(ValueError, match=msg): @@ -1163,6 +1164,37 @@ def test_loc_getitem_listlike_all_retains_sparse(self): result = df.loc[[0, 1]] tm.assert_frame_equal(result, df) + @td.skip_if_no_scipy + def test_loc_getitem_sparse_frame(self): + # GH34687 + from scipy.sparse import eye + + df = DataFrame.sparse.from_spmatrix(eye(5)) + result = df.loc[range(2)] + expected = DataFrame( + [[1.0, 0.0, 0.0, 0.0, 0.0], [0.0, 1.0, 0.0, 0.0, 0.0]], + dtype=SparseDtype("float64", 0.0), + ) + tm.assert_frame_equal(result, expected) + + result = df.loc[range(2)].loc[range(1)] + expected = DataFrame( + [[1.0, 0.0, 0.0, 0.0, 0.0]], dtype=SparseDtype("float64", 0.0) + ) + tm.assert_frame_equal(result, expected) + + def test_loc_getitem_sparse_series(self): + # GH34687 + s = Series([1.0, 0.0, 0.0, 0.0, 0.0], dtype=SparseDtype("float64", 0.0)) + + result = s.loc[range(2)] + expected = Series([1.0, 0.0], dtype=SparseDtype("float64", 0.0)) + tm.assert_series_equal(result, expected) + + result = s.loc[range(3)].loc[range(2)] + expected = Series([1.0, 0.0], dtype=SparseDtype("float64", 0.0)) + tm.assert_series_equal(result, expected) + @pytest.mark.parametrize("key_type", [iter, np.array, Series, Index]) def test_loc_getitem_iterable(self, float_frame, key_type): idx = key_type(["A", "B", "C"]) diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py index b0d41a89931e9..b8680cc4e611e 100644 --- a/pandas/tests/indexing/test_partial.py +++ b/pandas/tests/indexing/test_partial.py @@ -171,7 +171,8 @@ def test_partial_setting_mixed_dtype(self): tm.assert_frame_equal(df, DataFrame(columns=["A", "B"], index=[0])) # columns will align - df = DataFrame(columns=["A", "B"]) + # TODO: it isn't great that this behavior depends on consolidation + df = DataFrame(columns=["A", "B"])._consolidate() df.loc[0] = Series(1, index=["B"]) exp = DataFrame([[np.nan, 1]], columns=["A", "B"], index=[0], dtype="float64") diff --git a/pandas/tests/internals/test_api.py b/pandas/tests/internals/test_api.py index 60fbd2da70e79..0062d5aa34319 100644 --- a/pandas/tests/internals/test_api.py +++ b/pandas/tests/internals/test_api.py @@ -27,11 +27,9 @@ def test_namespace(): expected = [ "Block", "NumericBlock", - "DatetimeBlock", "DatetimeTZBlock", "ExtensionBlock", "ObjectBlock", - "TimeDeltaBlock", "make_block", "DataManager", "ArrayManager", diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index c242623520b75..c67ef9177ca96 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -30,6 +30,7 @@ from pandas.core.arrays import ( DatetimeArray, SparseArray, + TimedeltaArray, ) from pandas.core.internals import ( BlockManager, @@ -300,6 +301,23 @@ def test_delete(self): with pytest.raises(IndexError, match=None): newb.delete(3) + def test_delete_datetimelike(self): + # dont use np.delete on values, as that will coerce from DTA/TDA to ndarray + arr = np.arange(20, dtype="i8").reshape(5, 4).view("m8[ns]") + df = DataFrame(arr) + blk = df._mgr.blocks[0] + assert isinstance(blk.values, TimedeltaArray) + + blk.delete(1) + assert isinstance(blk.values, TimedeltaArray) + + df = DataFrame(arr.view("M8[ns]")) + blk = df._mgr.blocks[0] + assert isinstance(blk.values, DatetimeArray) + + blk.delete([1, 3]) + assert isinstance(blk.values, DatetimeArray) + def test_split(self): # GH#37799 values = np.random.randn(3, 4) diff --git a/pandas/tests/io/formats/style/test_style.py b/pandas/tests/io/formats/style/test_style.py index 302019b702829..3422eb9dc64b7 100644 --- a/pandas/tests/io/formats/style/test_style.py +++ b/pandas/tests/io/formats/style/test_style.py @@ -174,10 +174,13 @@ def test_clear(self): tt = DataFrame({"A": [None, "tt"]}) css = DataFrame({"A": [None, "cls-a"]}) s = self.df.style.highlight_max().set_tooltips(tt).set_td_classes(css) + s = s.hide_index().hide_columns("A") # _todo, tooltips and cell_context items added to.. assert len(s._todo) > 0 assert s.tooltips assert len(s.cell_context) > 0 + assert s.hidden_index is True + assert len(s.hidden_columns) > 0 s = s._compute() # ctx item affected when a render takes place. _todo is maintained @@ -190,6 +193,8 @@ def test_clear(self): assert len(s._todo) == 0 assert not s.tooltips assert len(s.cell_context) == 0 + assert s.hidden_index is False + assert len(s.hidden_columns) == 0 def test_render(self): df = DataFrame({"A": [0, 1]}) diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index bc9dc8e966e33..e20d78effa931 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -95,9 +95,11 @@ def test_iterator_loop(self): # github #13654 for j in 0, 1: for k in self.test_ix[j]: - for chunksize in 3, 5, 10, 11: + for chunksize in (3, 5, 10, 11): fname = os.path.join(self.dirpath, f"test{k}.sas7bdat") - with pd.read_sas(fname, chunksize=10, encoding="utf-8") as rdr: + with pd.read_sas( + fname, chunksize=chunksize, encoding="utf-8" + ) as rdr: y = 0 for x in rdr: y += x.shape[0] diff --git a/pandas/tests/plotting/frame/test_frame.py b/pandas/tests/plotting/frame/test_frame.py index bed60be169e57..e23abc1eee167 100644 --- a/pandas/tests/plotting/frame/test_frame.py +++ b/pandas/tests/plotting/frame/test_frame.py @@ -746,7 +746,9 @@ def test_plot_scatter_with_categorical_data(self, x, y): _check_plot_works(df.plot.scatter, x=x, y=y) - def test_plot_scatter_with_c(self): + def test_plot_scatter_with_c(self, request): + from pandas.plotting._matplotlib.compat import mpl_ge_3_4_0 + df = DataFrame( np.random.randn(6, 4), index=list(string.ascii_letters[:6]), @@ -758,9 +760,10 @@ def test_plot_scatter_with_c(self): # default to Greys assert ax.collections[0].cmap.name == "Greys" - # n.b. there appears to be no public method - # to get the colorbar label - assert ax.collections[0].colorbar._label == "z" + if mpl_ge_3_4_0(): + assert ax.collections[0].colorbar.ax.get_ylabel() == "z" + else: + assert ax.collections[0].colorbar._label == "z" cm = "cubehelix" ax = df.plot.scatter(x="x", y="y", c="z", colormap=cm) diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index 6e71b56e8182b..6d269a27e2656 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -417,7 +417,7 @@ def test_finder_daily(self): xpl1 = xpl2 = [Period("1999-1-1", freq="B").ordinal] * len(day_lst) rs1 = [] rs2 = [] - for i, n in enumerate(day_lst): + for n in day_lst: rng = bdate_range("1999-1-1", periods=n) ser = Series(np.random.randn(len(rng)), rng) _, ax = self.plt.subplots() @@ -439,7 +439,7 @@ def test_finder_quarterly(self): xpl1 = xpl2 = [Period("1988Q1").ordinal] * len(yrs) rs1 = [] rs2 = [] - for i, n in enumerate(yrs): + for n in yrs: rng = period_range("1987Q2", periods=int(n * 4), freq="Q") ser = Series(np.random.randn(len(rng)), rng) _, ax = self.plt.subplots() @@ -461,7 +461,7 @@ def test_finder_monthly(self): xpl1 = xpl2 = [Period("Jan 1988").ordinal] * len(yrs) rs1 = [] rs2 = [] - for i, n in enumerate(yrs): + for n in yrs: rng = period_range("1987Q2", periods=int(n * 12), freq="M") ser = Series(np.random.randn(len(rng)), rng) _, ax = self.plt.subplots() @@ -491,7 +491,7 @@ def test_finder_annual(self): xp = [1987, 1988, 1990, 1990, 1995, 2020, 2070, 2170] xp = [Period(x, freq="A").ordinal for x in xp] rs = [] - for i, nyears in enumerate([5, 10, 19, 49, 99, 199, 599, 1001]): + for nyears in [5, 10, 19, 49, 99, 199, 599, 1001]: rng = period_range("1987", periods=nyears, freq="A") ser = Series(np.random.randn(len(rng)), rng) _, ax = self.plt.subplots() diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index ea8b8fc7aa6a2..71e6aa38d60e5 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -1342,7 +1342,7 @@ def test_resample_nunique(): assert expected.name == "ID" for t in [r, g]: - result = r.ID.nunique() + result = t.ID.nunique() tm.assert_series_equal(result, expected) result = df.ID.resample("D").nunique() diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index 7dcd4dc979eb2..3cc81ef851306 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -646,7 +646,7 @@ def _ex(p): return p.start_time + Timedelta(days=1, nanoseconds=-1) return Timestamp((p + p.freq).start_time.value - 1) - for i, fcode in enumerate(from_lst): + for fcode in from_lst: p = Period("1982", freq=fcode) result = p.to_timestamp().to_period(fcode) assert result == p diff --git a/pandas/tests/scalar/test_nat.py b/pandas/tests/scalar/test_nat.py index 96aea4da9fac5..08c5ea706111a 100644 --- a/pandas/tests/scalar/test_nat.py +++ b/pandas/tests/scalar/test_nat.py @@ -590,6 +590,47 @@ def test_nat_comparisons_invalid(other_and_type, symbol_and_op): op(other, NaT) +@pytest.mark.parametrize( + "other", + [ + np.array(["foo"] * 2, dtype=object), + np.array([2, 3], dtype="int64"), + np.array([2.0, 3.5], dtype="float64"), + ], + ids=["str", "int", "float"], +) +def test_nat_comparisons_invalid_ndarray(other): + # GH#40722 + expected = np.array([False, False]) + result = NaT == other + tm.assert_numpy_array_equal(result, expected) + result = other == NaT + tm.assert_numpy_array_equal(result, expected) + + expected = np.array([True, True]) + result = NaT != other + tm.assert_numpy_array_equal(result, expected) + result = other != NaT + tm.assert_numpy_array_equal(result, expected) + + for symbol, op in [ + ("<=", operator.le), + ("<", operator.lt), + (">=", operator.ge), + (">", operator.gt), + ]: + msg = f"'{symbol}' not supported between" + + with pytest.raises(TypeError, match=msg): + op(NaT, other) + + if other.dtype == np.dtype("object"): + # uses the reverse operator, so symbol changes + msg = None + with pytest.raises(TypeError, match=msg): + op(other, NaT) + + def test_compare_date(): # GH#39151 comparing NaT with date object is deprecated # See also: tests.scalar.timestamps.test_comparisons::test_compare_date diff --git a/pandas/tests/series/methods/test_dropna.py b/pandas/tests/series/methods/test_dropna.py index 1c7c52d228cfa..5bff7306fac33 100644 --- a/pandas/tests/series/methods/test_dropna.py +++ b/pandas/tests/series/methods/test_dropna.py @@ -70,7 +70,7 @@ def test_dropna_period_dtype(self): tm.assert_series_equal(result, expected) def test_datetime64_tz_dropna(self): - # DatetimeBlock + # DatetimeLikeBlock ser = Series( [ Timestamp("2011-01-01 10:00"), @@ -85,7 +85,7 @@ def test_datetime64_tz_dropna(self): ) tm.assert_series_equal(result, expected) - # DatetimeBlockTZ + # DatetimeTZBlock idx = DatetimeIndex( ["2011-01-01 10:00", NaT, "2011-01-03 10:00", NaT], tz="Asia/Tokyo" ) diff --git a/pandas/tests/series/methods/test_fillna.py b/pandas/tests/series/methods/test_fillna.py index cf6b357d0a418..51864df915f8c 100644 --- a/pandas/tests/series/methods/test_fillna.py +++ b/pandas/tests/series/methods/test_fillna.py @@ -334,7 +334,7 @@ def test_datetime64_fillna(self): @pytest.mark.parametrize("tz", ["US/Eastern", "Asia/Tokyo"]) def test_datetime64_tz_fillna(self, tz): - # DatetimeBlock + # DatetimeLikeBlock ser = Series( [ Timestamp("2011-01-01 10:00"), @@ -414,7 +414,7 @@ def test_datetime64_tz_fillna(self, tz): tm.assert_series_equal(expected, result) tm.assert_series_equal(isna(ser), null_loc) - # DatetimeBlockTZ + # DatetimeTZBlock idx = DatetimeIndex(["2011-01-01 10:00", NaT, "2011-01-03 10:00", NaT], tz=tz) ser = Series(idx) assert ser.dtype == f"datetime64[ns, {tz}]" diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index ab484e7ae9d8a..75474a29169a7 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1351,7 +1351,12 @@ def test_constructor_dtype_timedelta64(self): td.astype("int32") # this is an invalid casting - msg = "Could not convert object to NumPy timedelta" + msg = "|".join( + [ + "Could not convert object to NumPy timedelta", + "Could not convert 'foo' to NumPy timedelta", + ] + ) with pytest.raises(ValueError, match=msg): Series([timedelta(days=1), "foo"], dtype="m8[ns]") diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py index 65aa189a3e965..30d6436c7e250 100644 --- a/pandas/tests/tools/test_to_numeric.py +++ b/pandas/tests/tools/test_to_numeric.py @@ -725,9 +725,9 @@ def test_precision_float_conversion(strrep): (["1", "2", "3.5"], Series([1, 2, 3.5])), ], ) -def test_to_numeric_from_nullable_string(values, expected): +def test_to_numeric_from_nullable_string(values, nullable_string_dtype, expected): # https://github.com/pandas-dev/pandas/issues/37262 - s = Series(values, dtype="string") + s = Series(values, dtype=nullable_string_dtype) result = to_numeric(s) tm.assert_series_equal(result, expected) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index c5b875b8f027e..05b8885b7d9b7 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -145,6 +145,12 @@ def infer_freq(index, warn: bool = True) -> Optional[str]: If the index is not datetime-like. ValueError If there are fewer than three values. + + Examples + -------- + >>> idx = pd.date_range(start='2020/12/01', end='2020/12/30', periods=30) + >>> pd.infer_freq(idx) + 'D' """ import pandas as pd @@ -558,7 +564,7 @@ def _maybe_coerce_freq(code) -> str: Parameters ---------- - source : string or DateOffset + source : str or DateOffset Frequency converting from Returns diff --git a/requirements-dev.txt b/requirements-dev.txt index 1817d79f96139..349b176253acb 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -9,6 +9,7 @@ cython>=0.29.21 black==20.8b1 cpplint flake8 +flake8-bugbear>=21.3.2 flake8-comprehensions>=3.1.0 isort>=5.2.1 mypy==0.812 diff --git a/scripts/check_for_inconsistent_pandas_namespace.py b/scripts/check_for_inconsistent_pandas_namespace.py index c84a92324f976..3c21821e794a9 100644 --- a/scripts/check_for_inconsistent_pandas_namespace.py +++ b/scripts/check_for_inconsistent_pandas_namespace.py @@ -2,7 +2,7 @@ Check that test suite file doesn't use the pandas namespace inconsistently. We check for cases of ``Series`` and ``pd.Series`` appearing in the same file -(likewise for some other common classes). +(likewise for other pandas objects). This is meant to be run as a pre-commit hook - to run it manually, you can do: @@ -15,43 +15,50 @@ though note that you may need to manually fixup some imports and that you will also need the additional dependency `tokenize-rt` (which is left out from the pre-commit hook so that it uses the same virtualenv as the other local ones). + +The general structure is similar to that of some plugins from +https://github.com/asottile/pyupgrade . """ import argparse import ast +import sys from typing import ( MutableMapping, + NamedTuple, Optional, Sequence, Set, - Tuple, ) -ERROR_MESSAGE = "Found both `pd.{name}` and `{name}` in {path}" -EXCLUDE = { - "eval", # built-in, different from `pd.eval` - "np", # pd.np is deprecated but still tested -} -Offset = Tuple[int, int] +ERROR_MESSAGE = ( + "{path}:{lineno}:{col_offset}: " + "Found both '{prefix}.{name}' and '{name}' in {path}" +) + + +class OffsetWithNamespace(NamedTuple): + lineno: int + col_offset: int + namespace: str class Visitor(ast.NodeVisitor): def __init__(self) -> None: - self.pandas_namespace: MutableMapping[Offset, str] = {} - self.no_namespace: Set[str] = set() + self.pandas_namespace: MutableMapping[OffsetWithNamespace, str] = {} + self.imported_from_pandas: Set[str] = set() def visit_Attribute(self, node: ast.Attribute) -> None: - if ( - isinstance(node.value, ast.Name) - and node.value.id == "pd" - and node.attr not in EXCLUDE - ): - self.pandas_namespace[(node.lineno, node.col_offset)] = node.attr + if isinstance(node.value, ast.Name) and node.value.id in {"pandas", "pd"}: + offset_with_namespace = OffsetWithNamespace( + node.lineno, node.col_offset, node.value.id + ) + self.pandas_namespace[offset_with_namespace] = node.attr self.generic_visit(node) - def visit_Name(self, node: ast.Name) -> None: - if node.id not in EXCLUDE: - self.no_namespace.add(node.id) + def visit_ImportFrom(self, node: ast.ImportFrom) -> None: + if node.module is not None and "pandas" in node.module: + self.imported_from_pandas.update(name.name for name in node.names) self.generic_visit(node) @@ -64,9 +71,11 @@ def replace_inconsistent_pandas_namespace(visitor: Visitor, content: str) -> str tokens = src_to_tokens(content) for n, i in reversed_enumerate(tokens): + offset_with_namespace = OffsetWithNamespace(i.offset[0], i.offset[1], i.src) if ( - i.offset in visitor.pandas_namespace - and visitor.pandas_namespace[i.offset] in visitor.no_namespace + offset_with_namespace in visitor.pandas_namespace + and visitor.pandas_namespace[offset_with_namespace] + in visitor.imported_from_pandas ): # Replace `pd` tokens[n] = i._replace(src="") @@ -85,16 +94,28 @@ def check_for_inconsistent_pandas_namespace( visitor = Visitor() visitor.visit(tree) - inconsistencies = visitor.no_namespace.intersection( + inconsistencies = visitor.imported_from_pandas.intersection( visitor.pandas_namespace.values() ) + if not inconsistencies: # No inconsistent namespace usage, nothing to replace. - return content + return None if not replace: - msg = ERROR_MESSAGE.format(name=inconsistencies.pop(), path=path) - raise RuntimeError(msg) + inconsistency = inconsistencies.pop() + lineno, col_offset, prefix = next( + key for key, val in visitor.pandas_namespace.items() if val == inconsistency + ) + msg = ERROR_MESSAGE.format( + lineno=lineno, + col_offset=col_offset, + prefix=prefix, + name=inconsistency, + path=path, + ) + sys.stdout.write(msg) + sys.exit(1) return replace_inconsistent_pandas_namespace(visitor, content) diff --git a/scripts/tests/test_inconsistent_namespace_check.py b/scripts/tests/test_inconsistent_namespace_check.py index 9562a30ba0ffd..eb995158d8cb4 100644 --- a/scripts/tests/test_inconsistent_namespace_check.py +++ b/scripts/tests/test_inconsistent_namespace_check.py @@ -4,35 +4,58 @@ check_for_inconsistent_pandas_namespace, ) -BAD_FILE_0 = "cat_0 = Categorical()\ncat_1 = pd.Categorical()" -BAD_FILE_1 = "cat_0 = pd.Categorical()\ncat_1 = Categorical()" -GOOD_FILE_0 = "cat_0 = Categorical()\ncat_1 = Categorical()" +BAD_FILE_0 = ( + "from pandas import Categorical\n" + "cat_0 = Categorical()\n" + "cat_1 = pd.Categorical()" +) +BAD_FILE_1 = ( + "from pandas import Categorical\n" + "cat_0 = pd.Categorical()\n" + "cat_1 = Categorical()" +) +BAD_FILE_2 = ( + "from pandas import Categorical\n" + "cat_0 = pandas.Categorical()\n" + "cat_1 = Categorical()" +) +GOOD_FILE_0 = ( + "from pandas import Categorical\ncat_0 = Categorical()\ncat_1 = Categorical()" +) GOOD_FILE_1 = "cat_0 = pd.Categorical()\ncat_1 = pd.Categorical()" +GOOD_FILE_2 = "from array import array\nimport pandas as pd\narr = pd.array([])" PATH = "t.py" -@pytest.mark.parametrize("content", [BAD_FILE_0, BAD_FILE_1]) -def test_inconsistent_usage(content): - msg = r"Found both `pd\.Categorical` and `Categorical` in t\.py" - with pytest.raises(RuntimeError, match=msg): +@pytest.mark.parametrize( + "content, expected", + [ + (BAD_FILE_0, "t.py:3:8: Found both 'pd.Categorical' and 'Categorical' in t.py"), + (BAD_FILE_1, "t.py:2:8: Found both 'pd.Categorical' and 'Categorical' in t.py"), + ( + BAD_FILE_2, + "t.py:2:8: Found both 'pandas.Categorical' and 'Categorical' in t.py", + ), + ], +) +def test_inconsistent_usage(content, expected, capsys): + with pytest.raises(SystemExit): check_for_inconsistent_pandas_namespace(content, PATH, replace=False) + result, _ = capsys.readouterr() + assert result == expected -@pytest.mark.parametrize("content", [GOOD_FILE_0, GOOD_FILE_1]) -def test_consistent_usage(content): +@pytest.mark.parametrize("content", [GOOD_FILE_0, GOOD_FILE_1, GOOD_FILE_2]) +@pytest.mark.parametrize("replace", [True, False]) +def test_consistent_usage(content, replace): # should not raise - check_for_inconsistent_pandas_namespace(content, PATH, replace=False) + check_for_inconsistent_pandas_namespace(content, PATH, replace=replace) -@pytest.mark.parametrize("content", [BAD_FILE_0, BAD_FILE_1]) +@pytest.mark.parametrize("content", [BAD_FILE_0, BAD_FILE_1, BAD_FILE_2]) def test_inconsistent_usage_with_replace(content): result = check_for_inconsistent_pandas_namespace(content, PATH, replace=True) - expected = "cat_0 = Categorical()\ncat_1 = Categorical()" - assert result == expected - - -@pytest.mark.parametrize("content", [GOOD_FILE_0, GOOD_FILE_1]) -def test_consistent_usage_with_replace(content): - result = check_for_inconsistent_pandas_namespace(content, PATH, replace=True) - expected = content + expected = ( + "from pandas import Categorical\ncat_0 = Categorical()\ncat_1 = Categorical()" + ) assert result == expected diff --git a/scripts/tests/test_use_pd_array_in_core.py b/scripts/tests/test_use_pd_array_in_core.py new file mode 100644 index 0000000000000..9c66199a82846 --- /dev/null +++ b/scripts/tests/test_use_pd_array_in_core.py @@ -0,0 +1,26 @@ +import pytest + +from scripts.use_pd_array_in_core import use_pd_array + +BAD_FILE_0 = "import pandas as pd\npd.array" +BAD_FILE_1 = "\nfrom pandas import array" +GOOD_FILE_0 = "from pandas import array as pd_array" +GOOD_FILE_1 = "from pandas.core.construction import array as pd_array" +PATH = "t.py" + + +@pytest.mark.parametrize("content", [BAD_FILE_0, BAD_FILE_1]) +def test_inconsistent_usage(content, capsys): + result_msg = ( + "t.py:2:0: Don't use pd.array in core, import array as pd_array instead\n" + ) + with pytest.raises(SystemExit): + use_pd_array(content, PATH) + expected_msg, _ = capsys.readouterr() + assert result_msg == expected_msg + + +@pytest.mark.parametrize("content", [GOOD_FILE_0, GOOD_FILE_1]) +def test_consistent_usage(content): + # should not raise + use_pd_array(content, PATH) diff --git a/scripts/use_pd_array_in_core.py b/scripts/use_pd_array_in_core.py new file mode 100644 index 0000000000000..531084683bdb1 --- /dev/null +++ b/scripts/use_pd_array_in_core.py @@ -0,0 +1,77 @@ +""" +Check that pandas/core imports pandas.array as pd_array. + +This makes it easier to grep for usage of pandas array. + +This is meant to be run as a pre-commit hook - to run it manually, you can do: + + pre-commit run use-pd_array-in-core --all-files + +""" + +import argparse +import ast +import sys +from typing import ( + Optional, + Sequence, +) + +ERROR_MESSAGE = ( + "{path}:{lineno}:{col_offset}: " + "Don't use pd.array in core, import array as pd_array instead\n" +) + + +class Visitor(ast.NodeVisitor): + def __init__(self, path: str) -> None: + self.path = path + + def visit_ImportFrom(self, node: ast.ImportFrom) -> None: + # If array has been imported from somewhere in pandas, + # check it's aliased as pd_array. + if ( + node.module is not None + and node.module.startswith("pandas") + and any(i.name == "array" and i.asname != "pd_array" for i in node.names) + ): + msg = ERROR_MESSAGE.format( + path=self.path, lineno=node.lineno, col_offset=node.col_offset + ) + sys.stdout.write(msg) + sys.exit(1) + super().generic_visit(node) + + def visit_Attribute(self, node: ast.Attribute) -> None: + if ( + isinstance(node.value, ast.Name) + and node.value.id == "pd" + and node.attr == "array" + ): + msg = ERROR_MESSAGE.format( + path=self.path, lineno=node.lineno, col_offset=node.col_offset + ) + sys.stdout.write(msg) + sys.exit(1) + super().generic_visit(node) + + +def use_pd_array(content: str, path: str) -> None: + tree = ast.parse(content) + visitor = Visitor(path) + visitor.visit(tree) + + +def main(argv: Optional[Sequence[str]] = None) -> None: + parser = argparse.ArgumentParser() + parser.add_argument("paths", nargs="*") + args = parser.parse_args(argv) + + for path in args.paths: + with open(path, encoding="utf-8") as fd: + content = fd.read() + use_pd_array(content, path) + + +if __name__ == "__main__": + main() diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index c6b998e3dbddf..98de5b2b1eb84 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -223,7 +223,7 @@ def pandas_validate(func_name: str): ) if doc.see_also: - for rel_name, rel_desc in doc.see_also.items(): + for rel_name in doc.see_also: if rel_name.startswith("pandas."): result["errors"].append( pandas_error( diff --git a/setup.cfg b/setup.cfg index a0b6a0cdfc260..2aaafa0391531 100644 --- a/setup.cfg +++ b/setup.cfg @@ -76,7 +76,16 @@ ignore = W504, # line break after binary operator E402, # module level import not at top of file E731, # do not assign a lambda expression, use a def - S001 # found modulo formatter (incorrect picks up mod operations) + S001, # found modulo formatter (incorrect picks up mod operations) + B005, # controversial + B006, # controversial + B007, # controversial + B008, # controversial + B009, # setattr is used to side-step mypy + B010, # getattr is used to side-step mypy + B011, # tests use assert False + B015, # tests use comparisons but not their returned value + B301 # false positives exclude = doc/sphinxext/*.py, doc/build/*.py, @@ -131,6 +140,7 @@ omit = pandas/_typing.py pandas/_version.py plugins = Cython.Coverage +source = pandas [coverage:report] ignore_errors = False diff --git a/web/pandas/community/ecosystem.md b/web/pandas/community/ecosystem.md index 7cf78958370ac..547a5f30e0516 100644 --- a/web/pandas/community/ecosystem.md +++ b/web/pandas/community/ecosystem.md @@ -360,6 +360,12 @@ Cyberpandas provides an extension type for storing arrays of IP Addresses. These arrays can be stored inside pandas' Series and DataFrame. +### [Pandas-Genomics](https://pandas-genomics.readthedocs.io/en/latest/) + +Pandas-Genomics provides an extension type and extension array for working + with genomics data. It also includes `genomics` accessors for many useful properties + and methods related to QC and analysis of genomics data. + ### [Pint-Pandas](https://github.com/hgrecco/pint-pandas) Pint-Pandas provides an extension type for storing numeric arrays with units. @@ -373,10 +379,11 @@ A directory of projects providing `extension accessors `. This is for users to discover new accessors and for library authors to coordinate on the namespace. - | Library | Accessor | Classes | - | --------------------------------------------------------------|----------|-----------------------| - | [cyberpandas](https://cyberpandas.readthedocs.io/en/latest) | `ip` | `Series` | - | [pdvega](https://altair-viz.github.io/pdvega/) | `vgplot` | `Series`, `DataFrame` | - | [pandas_path](https://github.com/drivendataorg/pandas-path/) | `path` | `Index`, `Series` | - | [pint-pandas](https://github.com/hgrecco/pint-pandas) | `pint` | `Series`, `DataFrame` | - | [composeml](https://github.com/FeatureLabs/compose) | `slice` | `DataFrame` | + | Library | Accessor | Classes | + | ---------------------------------------------------------------------|------------|-----------------------| + | [cyberpandas](https://cyberpandas.readthedocs.io/en/latest) | `ip` | `Series` | + | [pdvega](https://altair-viz.github.io/pdvega/) | `vgplot` | `Series`, `DataFrame` | + | [pandas-genomics](https://pandas-genomics.readthedocs.io/en/latest/) | `genomics` | `Series`, `DataFrame` | + | [pandas_path](https://github.com/drivendataorg/pandas-path/) | `path` | `Index`, `Series` | + | [pint-pandas](https://github.com/hgrecco/pint-pandas) | `pint` | `Series`, `DataFrame` | + | [composeml](https://github.com/FeatureLabs/compose) | `slice` | `DataFrame` | From 968e99f0f64b5d1db4b96da20a5180e03d5b11e1 Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Fri, 2 Apr 2021 22:29:18 +0200 Subject: [PATCH 25/26] pre-commit fix --- pandas/io/formats/style.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 33d3bc672a90b..e758468dd6d65 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -182,7 +182,7 @@ def __init__( escape: bool = False, ): # validate ordered args - if isinstance(data, pd.Series): + if isinstance(data, Series): data = data.to_frame() if not isinstance(data, DataFrame): raise TypeError("``data`` must be a Series or DataFrame") @@ -2464,7 +2464,7 @@ def css(rgba) -> str: if data.ndim == 1: return [css(rgba) for rgba in rgbas] else: - return pd.DataFrame( + return DataFrame( [[css(rgba) for rgba in row] for row in rgbas], index=data.index, columns=data.columns, From 946d11fd04f94716bcf3a743a79ed9511acb537d Mon Sep 17 00:00:00 2001 From: "JHM Darbyshire (iMac)" Date: Tue, 6 Apr 2021 21:52:18 +0200 Subject: [PATCH 26/26] fix typing to new standard --- pandas/io/formats/style.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index 798b54006bf58..267606461f003 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -2356,7 +2356,7 @@ def pred(part) -> bool: def _validate_apply_axis_arg( - arg: Union[FrameOrSeries, Sequence, np.ndarray], + arg: FrameOrSeries | Sequence | np.ndarray, arg_name: str, dtype: Any | None, data: FrameOrSeries, @@ -2416,7 +2416,7 @@ def _background_gradient( text_color_threshold: float = 0.408, vmin: float | None = None, vmax: float | None = None, - gmap: Union[Sequence, np.ndarray, FrameOrSeries] | None = None, + gmap: Sequence | np.ndarray | FrameOrSeries | None = None, ): """ Color background in a range according to the data or a gradient map