From f755cd8046c72e6fbb8559d2a974cde75feeeb32 Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Thu, 13 May 2021 22:00:23 -0500 Subject: [PATCH 1/4] DOC: freeze old whatsnew notes #6856 --- doc/source/whatsnew/v0.5.0.rst | 6 -- doc/source/whatsnew/v0.6.0.rst | 6 -- doc/source/whatsnew/v0.7.0.rst | 111 +++++++++++++++++++++++++++------ doc/source/whatsnew/v0.7.3.rst | 90 +++++++++++++++++++------- 4 files changed, 160 insertions(+), 53 deletions(-) diff --git a/doc/source/whatsnew/v0.5.0.rst b/doc/source/whatsnew/v0.5.0.rst index 7447a10fa1d6b..8757d9c887785 100644 --- a/doc/source/whatsnew/v0.5.0.rst +++ b/doc/source/whatsnew/v0.5.0.rst @@ -6,12 +6,6 @@ Version 0.5.0 (October 24, 2011) {{ header }} -.. ipython:: python - :suppress: - - from pandas import * # noqa F401, F403 - - New features ~~~~~~~~~~~~ diff --git a/doc/source/whatsnew/v0.6.0.rst b/doc/source/whatsnew/v0.6.0.rst index 253ca4d4188e5..19e2e85c09a87 100644 --- a/doc/source/whatsnew/v0.6.0.rst +++ b/doc/source/whatsnew/v0.6.0.rst @@ -5,12 +5,6 @@ Version 0.6.0 (November 25, 2011) {{ header }} -.. ipython:: python - :suppress: - - from pandas import * # noqa F401, F403 - - New features ~~~~~~~~~~~~ - :ref:`Added ` ``melt`` function to ``pandas.core.reshape`` diff --git a/doc/source/whatsnew/v0.7.0.rst b/doc/source/whatsnew/v0.7.0.rst index 2fe686d8858a2..733bffa9fdc46 100644 --- a/doc/source/whatsnew/v0.7.0.rst +++ b/doc/source/whatsnew/v0.7.0.rst @@ -31,10 +31,22 @@ New features - Handle differently-indexed output values in ``DataFrame.apply`` (:issue:`498`) -.. ipython:: python +.. code-block:: ipython - df = pd.DataFrame(np.random.randn(10, 4)) - df.apply(lambda x: x.describe()) + In [1]: df = pd.DataFrame(np.random.randn(10, 4)) + In [2]: df.apply(lambda x: x.describe()) + Out[2]: + 0 1 2 3 + count 10.000000 10.000000 10.000000 10.000000 + mean 0.190912 -0.395125 -0.731920 -0.403130 + std 0.730951 0.813266 1.112016 0.961912 + min -0.861849 -2.104569 -1.776904 -1.469388 + 25% -0.411391 -0.698728 -1.501401 -1.076610 + 50% 0.380863 -0.228039 -1.191943 -1.004091 + 75% 0.658444 0.057974 -0.034326 0.461706 + max 1.212112 0.577046 1.643563 1.071804 + + [8 rows x 4 columns] - :ref:`Add` ``reorder_levels`` method to Series and DataFrame (:issue:`534`) @@ -116,13 +128,31 @@ One of the potentially riskiest API changes in 0.7.0, but also one of the most important, was a complete review of how **integer indexes** are handled with regard to label-based indexing. Here is an example: -.. ipython:: python +.. code-block:: ipython - s = pd.Series(np.random.randn(10), index=range(0, 20, 2)) - s - s[0] - s[2] - s[4] + In [3]: s = pd.Series(np.random.randn(10), index=range(0, 20, 2)) + In [4]: s + Out[4]: + 0 -1.294524 + 2 0.413738 + 4 0.276662 + 6 -0.472035 + 8 -0.013960 + 10 -0.362543 + 12 -0.006154 + 14 -0.923061 + 16 0.895717 + 18 0.805244 + Length: 10, dtype: float64 + + In [5]: s[0] + Out[5]: -1.2945235902555294 + + In [6]: s[2] + Out[6]: 0.41373810535784006 + + In [7]: s[4] + Out[7]: 0.2766617129497566 This is all exactly identical to the behavior before. However, if you ask for a key **not** contained in the Series, in versions 0.6.1 and prior, Series would @@ -235,22 +265,65 @@ slice to a Series when getting and setting values via ``[]`` (i.e. the ``__getitem__`` and ``__setitem__`` methods). The behavior will be the same as passing similar input to ``ix`` **except in the case of integer indexing**: -.. ipython:: python +.. code-block:: ipython - s = pd.Series(np.random.randn(6), index=list('acegkm')) - s - s[['m', 'a', 'c', 'e']] - s['b':'l'] - s['c':'k'] + In [8]: s = pd.Series(np.random.randn(6), index=list('acegkm')) + + In [9]: s + Out[9]: + a -1.206412 + c 2.565646 + e 1.431256 + g 1.340309 + k -1.170299 + m -0.226169 + Length: 6, dtype: float64 + + In [10]: s[['m', 'a', 'c', 'e']] + Out[10]: + m -0.226169 + a -1.206412 + c 2.565646 + e 1.431256 + Length: 4, dtype: float64 + + In [11]: s['b':'l'] + Out[11]: + c 2.565646 + e 1.431256 + g 1.340309 + k -1.170299 + Length: 4, dtype: float64 + + In [12]: s['c':'k'] + Out[12]: + c 2.565646 + e 1.431256 + g 1.340309 + k -1.170299 + Length: 4, dtype: float64 In the case of integer indexes, the behavior will be exactly as before (shadowing ``ndarray``): -.. ipython:: python +.. code-block:: python - s = pd.Series(np.random.randn(6), index=range(0, 12, 2)) - s[[4, 0, 2]] - s[1:5] + In [13]: s = pd.Series(np.random.randn(6), index=range(0, 12, 2)) + + In [14]: s[[4, 0, 2]] + Out[14]: + 4 0.132003 + 0 0.410835 + 2 0.813850 + Length: 3, dtype: float64 + + In [15]: s[1:5] + Out[15]: + 2 0.813850 + 4 0.132003 + 6 -0.827317 + 8 -0.076467 + Length: 4, dtype: float64 If you wish to do indexing with sequences and slicing on an integer index with label semantics, use ``ix``. diff --git a/doc/source/whatsnew/v0.7.3.rst b/doc/source/whatsnew/v0.7.3.rst index 4ca31baf560bb..3e5ea49f87ff2 100644 --- a/doc/source/whatsnew/v0.7.3.rst +++ b/doc/source/whatsnew/v0.7.3.rst @@ -51,21 +51,37 @@ NA boolean comparison API change Reverted some changes to how NA values (represented typically as ``NaN`` or ``None``) are handled in non-numeric Series: -.. ipython:: python +.. code-block:: ipython - series = pd.Series(["Steve", np.nan, "Joe"]) - series == "Steve" - series != "Steve" + In [1]: series = pd.Series(["Steve", np.nan, "Joe"]) + + In [2]: series == "Steve" + Out[2]: + 0 True + 1 False + 2 False + Length: 3, dtype: bool + + In [3]: series != "Steve" + Out[3]: + 0 False + 1 True + 2 True + Length: 3, dtype: bool In comparisons, NA / NaN will always come through as ``False`` except with ``!=`` which is ``True``. *Be very careful* with boolean arithmetic, especially negation, in the presence of NA data. You may wish to add an explicit NA filter into boolean array operations if you are worried about this: -.. ipython:: python +.. code-block:: ipython + + In [4]: mask = series == "Steve" - mask = series == "Steve" - series[mask & series.notnull()] + In [5]: series[mask & series.notnull()] + Out[5]: + 0 Steve + Length: 1, dtype: object While propagating NA in comparisons may seem like the right behavior to some users (and you could argue on purely technical grounds that this is the right @@ -80,21 +96,51 @@ Other API changes When calling ``apply`` on a grouped Series, the return value will also be a Series, to be more consistent with the ``groupby`` behavior with DataFrame: -.. ipython:: python - :okwarning: - - df = pd.DataFrame( - { - "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], - "B": ["one", "one", "two", "three", "two", "two", "one", "three"], - "C": np.random.randn(8), - "D": np.random.randn(8), - } - ) - df - grouped = df.groupby("A")["C"] - grouped.describe() - grouped.apply(lambda x: x.sort_values()[-2:]) # top 2 values +.. code-block:: ipython + + In [6]: df = pd.DataFrame( + ...: { + ...: "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + ...: "B": ["one", "one", "two", "three", "two", "two", "one", "three"], + ...: "C": np.random.randn(8), + ...: "D": np.random.randn(8), + ...: } + ...: ) + ...: + + In [7]: df + Out[7]: + A B C D + 0 foo one 0.469112 -0.861849 + 1 bar one -0.282863 -2.104569 + 2 foo two -1.509059 -0.494929 + 3 bar three -1.135632 1.071804 + 4 foo two 1.212112 0.721555 + 5 bar two -0.173215 -0.706771 + 6 foo one 0.119209 -1.039575 + 7 foo three -1.044236 0.271860 + + [8 rows x 4 columns] + + In [8]: grouped = df.groupby("A")["C"] + + In [9]: grouped.describe() + Out[9]: + count mean std min 25% 50% 75% max + A + bar 3.0 -0.530570 0.526860 -1.135632 -0.709248 -0.282863 -0.228039 -0.173215 + foo 5.0 -0.150572 1.113308 -1.509059 -1.044236 0.119209 0.469112 1.212112 + + [2 rows x 8 columns] + + In [10]: grouped.apply(lambda x: x.sort_values()[-2:]) # top 2 values + Out[10]: + A + bar 1 -0.282863 + 5 -0.173215 + foo 0 0.469112 + 4 1.212112 + Name: C, Length: 4, dtype: float64 .. _whatsnew_0.7.3.contributors: From 9db9315d1605d64046e827dd5a1d43fbc1b8fec4 Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Thu, 13 May 2021 22:03:46 -0500 Subject: [PATCH 2/4] fix typo --- doc/source/whatsnew/v0.7.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.7.0.rst b/doc/source/whatsnew/v0.7.0.rst index 733bffa9fdc46..c77b7a5309527 100644 --- a/doc/source/whatsnew/v0.7.0.rst +++ b/doc/source/whatsnew/v0.7.0.rst @@ -306,7 +306,7 @@ passing similar input to ``ix`` **except in the case of integer indexing**: In the case of integer indexes, the behavior will be exactly as before (shadowing ``ndarray``): -.. code-block:: python +.. code-block:: ipython In [13]: s = pd.Series(np.random.randn(6), index=range(0, 12, 2)) From 939b3e83284d2862c50d924da629df1d8483ba0a Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Fri, 14 May 2021 08:40:49 -0500 Subject: [PATCH 3/4] fix extra space --- doc/source/whatsnew/v0.7.0.rst | 16 ++++++++-------- doc/source/whatsnew/v0.7.3.rst | 12 ++++++------ 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/doc/source/whatsnew/v0.7.0.rst b/doc/source/whatsnew/v0.7.0.rst index c77b7a5309527..52747f2992dc4 100644 --- a/doc/source/whatsnew/v0.7.0.rst +++ b/doc/source/whatsnew/v0.7.0.rst @@ -35,7 +35,7 @@ New features In [1]: df = pd.DataFrame(np.random.randn(10, 4)) In [2]: df.apply(lambda x: x.describe()) - Out[2]: + Out[2]: 0 1 2 3 count 10.000000 10.000000 10.000000 10.000000 mean 0.190912 -0.395125 -0.731920 -0.403130 @@ -132,7 +132,7 @@ regard to label-based indexing. Here is an example: In [3]: s = pd.Series(np.random.randn(10), index=range(0, 20, 2)) In [4]: s - Out[4]: + Out[4]: 0 -1.294524 2 0.413738 4 0.276662 @@ -270,7 +270,7 @@ passing similar input to ``ix`` **except in the case of integer indexing**: In [8]: s = pd.Series(np.random.randn(6), index=list('acegkm')) In [9]: s - Out[9]: + Out[9]: a -1.206412 c 2.565646 e 1.431256 @@ -280,7 +280,7 @@ passing similar input to ``ix`` **except in the case of integer indexing**: Length: 6, dtype: float64 In [10]: s[['m', 'a', 'c', 'e']] - Out[10]: + Out[10]: m -0.226169 a -1.206412 c 2.565646 @@ -288,7 +288,7 @@ passing similar input to ``ix`` **except in the case of integer indexing**: Length: 4, dtype: float64 In [11]: s['b':'l'] - Out[11]: + Out[11]: c 2.565646 e 1.431256 g 1.340309 @@ -296,7 +296,7 @@ passing similar input to ``ix`` **except in the case of integer indexing**: Length: 4, dtype: float64 In [12]: s['c':'k'] - Out[12]: + Out[12]: c 2.565646 e 1.431256 g 1.340309 @@ -311,14 +311,14 @@ In the case of integer indexes, the behavior will be exactly as before In [13]: s = pd.Series(np.random.randn(6), index=range(0, 12, 2)) In [14]: s[[4, 0, 2]] - Out[14]: + Out[14]: 4 0.132003 0 0.410835 2 0.813850 Length: 3, dtype: float64 In [15]: s[1:5] - Out[15]: + Out[15]: 2 0.813850 4 0.132003 6 -0.827317 diff --git a/doc/source/whatsnew/v0.7.3.rst b/doc/source/whatsnew/v0.7.3.rst index 3e5ea49f87ff2..961a50be4c95a 100644 --- a/doc/source/whatsnew/v0.7.3.rst +++ b/doc/source/whatsnew/v0.7.3.rst @@ -56,14 +56,14 @@ Reverted some changes to how NA values (represented typically as ``NaN`` or In [1]: series = pd.Series(["Steve", np.nan, "Joe"]) In [2]: series == "Steve" - Out[2]: + Out[2]: 0 True 1 False 2 False Length: 3, dtype: bool In [3]: series != "Steve" - Out[3]: + Out[3]: 0 False 1 True 2 True @@ -79,7 +79,7 @@ filter into boolean array operations if you are worried about this: In [4]: mask = series == "Steve" In [5]: series[mask & series.notnull()] - Out[5]: + Out[5]: 0 Steve Length: 1, dtype: object @@ -109,7 +109,7 @@ Series, to be more consistent with the ``groupby`` behavior with DataFrame: ...: In [7]: df - Out[7]: + Out[7]: A B C D 0 foo one 0.469112 -0.861849 1 bar one -0.282863 -2.104569 @@ -125,7 +125,7 @@ Series, to be more consistent with the ``groupby`` behavior with DataFrame: In [8]: grouped = df.groupby("A")["C"] In [9]: grouped.describe() - Out[9]: + Out[9]: count mean std min 25% 50% 75% max A bar 3.0 -0.530570 0.526860 -1.135632 -0.709248 -0.282863 -0.228039 -0.173215 @@ -134,7 +134,7 @@ Series, to be more consistent with the ``groupby`` behavior with DataFrame: [2 rows x 8 columns] In [10]: grouped.apply(lambda x: x.sort_values()[-2:]) # top 2 values - Out[10]: + Out[10]: A bar 1 -0.282863 5 -0.173215 From 05f2271511ee59b40bb7ad73ee19bc1fa6779287 Mon Sep 17 00:00:00 2001 From: Fangchen Li Date: Fri, 14 May 2021 09:15:01 -0500 Subject: [PATCH 4/4] fix extra space --- doc/source/whatsnew/v0.7.3.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.7.3.rst b/doc/source/whatsnew/v0.7.3.rst index 961a50be4c95a..5da6bef0c4f03 100644 --- a/doc/source/whatsnew/v0.7.3.rst +++ b/doc/source/whatsnew/v0.7.3.rst @@ -106,7 +106,7 @@ Series, to be more consistent with the ``groupby`` behavior with DataFrame: ...: "D": np.random.randn(8), ...: } ...: ) - ...: + ...: In [7]: df Out[7]: @@ -127,7 +127,7 @@ Series, to be more consistent with the ``groupby`` behavior with DataFrame: In [9]: grouped.describe() Out[9]: count mean std min 25% 50% 75% max - A + A bar 3.0 -0.530570 0.526860 -1.135632 -0.709248 -0.282863 -0.228039 -0.173215 foo 5.0 -0.150572 1.113308 -1.509059 -1.044236 0.119209 0.469112 1.212112 @@ -135,7 +135,7 @@ Series, to be more consistent with the ``groupby`` behavior with DataFrame: In [10]: grouped.apply(lambda x: x.sort_values()[-2:]) # top 2 values Out[10]: - A + A bar 1 -0.282863 5 -0.173215 foo 0 0.469112