From 903026b1153a3d861125b5f3d75f3b2cb91955e9 Mon Sep 17 00:00:00 2001 From: Fabian Haase Date: Tue, 4 Dec 2018 15:15:29 +0100 Subject: [PATCH 1/4] Fix PEP-8 issues in indexing-, missing_data-, options- and release.rst Signed-off-by: Fabian Haase --- doc/source/indexing.rst | 131 +++++++++++++++++++----------------- doc/source/missing_data.rst | 24 +++---- doc/source/options.rst | 51 +++++++------- doc/source/release.rst | 17 +++-- setup.cfg | 4 -- 5 files changed, 115 insertions(+), 112 deletions(-) diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index dc0c6dd027b3c..7ef3316c1c677 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -6,10 +6,11 @@ :suppress: import numpy as np + import pandas as pd + np.random.seed(123456) np.set_printoptions(precision=4, suppress=True) - import pandas as pd - pd.options.display.max_rows=15 + pd.options.display.max_rows = 15 *************************** Indexing and Selecting Data @@ -143,9 +144,10 @@ indexing functionality: .. ipython:: python dates = pd.date_range('1/1/2000', periods=8) - df = pd.DataFrame(np.random.randn(8, 4), index=dates, columns=['A', 'B', 'C', 'D']) + df = pd.DataFrame(np.random.randn(8, 4), + index=dates, columns=['A', 'B', 'C', 'D']) df - panel = pd.Panel({'one' : df, 'two' : df - df.mean()}) + panel = pd.Panel({'one': df, 'two': df - df.mean()}) panel .. note:: @@ -183,14 +185,14 @@ columns. .. ipython:: python df[['A', 'B']] - df.loc[:,['B', 'A']] = df[['A', 'B']] + df.loc[:, ['B', 'A']] = df[['A', 'B']] df[['A', 'B']] The correct way to swap column values is by using raw values: .. ipython:: python - df.loc[:,['B', 'A']] = df[['A', 'B']].to_numpy() + df.loc[:, ['B', 'A']] = df[['A', 'B']].to_numpy() df[['A', 'B']] @@ -208,7 +210,7 @@ as an attribute: .. ipython:: python - sa = pd.Series([1,2,3],index=list('abc')) + sa = pd.Series([1, 2, 3], index=list('abc')) dfa = df.copy() .. ipython:: python @@ -248,7 +250,7 @@ You can also assign a ``dict`` to a row of a ``DataFrame``: .. ipython:: python x = pd.DataFrame({'x': [1, 2, 3], 'y': [3, 4, 5]}) - x.iloc[1] = dict(x=9, y=99) + x.iloc[1] = {'x': 9, 'y': 99} x You can use attribute access to modify an existing element of a Series or column of a DataFrame, but be careful; @@ -257,10 +259,10 @@ new column. In 0.21.0 and later, this will raise a ``UserWarning``: .. code-block:: ipython - In[1]: df = pd.DataFrame({'one': [1., 2., 3.]}) - In[2]: df.two = [4, 5, 6] + In [1]: df = pd.DataFrame({'one': [1., 2., 3.]}) + In [2]: df.two = [4, 5, 6] UserWarning: Pandas doesn't allow Series to be assigned into nonexistent columns - see https://pandas.pydata.org/pandas-docs/stable/indexing.html#attribute_access - In[3]: df + In [3]: df Out[3]: one 0 1.0 @@ -317,7 +319,9 @@ Selection By Label .. ipython:: python - dfl = pd.DataFrame(np.random.randn(5,4), columns=list('ABCD'), index=pd.date_range('20130101',periods=5)) + dfl = pd.DataFrame(np.random.randn(5, 4), + columns=list('ABCD'), + index=pd.date_range('20130101', periods=5)) dfl .. code-block:: ipython @@ -354,7 +358,7 @@ The ``.loc`` attribute is the primary access method. The following are valid inp .. ipython:: python - s1 = pd.Series(np.random.randn(6),index=list('abcdef')) + s1 = pd.Series(np.random.randn(6), index=list('abcdef')) s1 s1.loc['c':] s1.loc['b'] @@ -370,7 +374,7 @@ With a DataFrame: .. ipython:: python - df1 = pd.DataFrame(np.random.randn(6,4), + df1 = pd.DataFrame(np.random.randn(6, 4), index=list('abcdef'), columns=list('ABCD')) df1 @@ -413,7 +417,7 @@ are returned: .. ipython:: python - s = pd.Series(list('abcde'), index=[0,3,2,5,4]) + s = pd.Series(list('abcde'), index=[0, 3, 2, 5, 4]) s.loc[3:5] If at least one of the two is absent, but the index is sorted, and can be @@ -453,7 +457,7 @@ The ``.iloc`` attribute is the primary access method. The following are valid in .. ipython:: python - s1 = pd.Series(np.random.randn(5), index=list(range(0,10,2))) + s1 = pd.Series(np.random.randn(5), index=list(range(0, 10, 2))) s1 s1.iloc[:3] s1.iloc[3] @@ -469,9 +473,9 @@ With a DataFrame: .. ipython:: python - df1 = pd.DataFrame(np.random.randn(6,4), - index=list(range(0,12,2)), - columns=list(range(0,8,2))) + df1 = pd.DataFrame(np.random.randn(6, 4), + index=list(range(0, 12, 2)), + columns=list(range(0, 8, 2))) df1 Select via integer slicing: @@ -525,7 +529,7 @@ an empty axis (e.g. an empty DataFrame being returned). .. ipython:: python - dfl = pd.DataFrame(np.random.randn(5,2), columns=list('AB')) + dfl = pd.DataFrame(np.random.randn(5, 2), columns=list('AB')) dfl dfl.iloc[:, 2:3] dfl.iloc[:, 1:3] @@ -827,7 +831,7 @@ In the ``Series`` case this is effectively an appending operation. .. ipython:: python - se = pd.Series([1,2,3]) + se = pd.Series([1, 2, 3]) se se[5] = 5. se @@ -836,10 +840,10 @@ A ``DataFrame`` can be enlarged on either axis via ``.loc``. .. ipython:: python - dfi = pd.DataFrame(np.arange(6).reshape(3,2), - columns=['A','B']) + dfi = pd.DataFrame(np.arange(6).reshape(3, 2), + columns=['A', 'B']) dfi - dfi.loc[:,'C'] = dfi.loc[:,'A'] + dfi.loc[:, 'C'] = dfi.loc[:, 'A'] dfi This is like an ``append`` operation on the ``DataFrame``. @@ -879,7 +883,7 @@ You can also set using these same indexers. .. ipython:: python - df.at[dates[-1]+1, 0] = 7 + df.at[dates[-1] + 1, 0] = 7 df Boolean indexing @@ -917,9 +921,9 @@ more complex criteria: .. ipython:: python - df2 = pd.DataFrame({'a' : ['one', 'one', 'two', 'three', 'two', 'one', 'six'], - 'b' : ['x', 'y', 'y', 'x', 'y', 'x', 'x'], - 'c' : np.random.randn(7)}) + df2 = pd.DataFrame({'a': ['one', 'one', 'two', 'three', 'two', 'one', 'six'], + 'b': ['x', 'y', 'y', 'x', 'y', 'x', 'x'], + 'c': np.random.randn(7)}) # only want 'two' or 'three' criterion = df2['a'].map(lambda x: x.startswith('t')) @@ -937,7 +941,7 @@ and :ref:`Advanced Indexing ` you may select along more than one axis .. ipython:: python - df2.loc[criterion & (df2['b'] == 'x'),'b':'c'] + df2.loc[criterion & (df2['b'] == 'x'), 'b':'c'] .. _indexing.basics.indexing_isin: @@ -1041,7 +1045,8 @@ The code below is equivalent to ``df.where(df < 0)``. :suppress: dates = pd.date_range('1/1/2000', periods=8) - df = pd.DataFrame(np.random.randn(8, 4), index=dates, columns=['A', 'B', 'C', 'D']) + df = pd.DataFrame(np.random.randn(8, 4), + index=dates, columns=['A', 'B', 'C', 'D']) .. ipython:: python @@ -1074,7 +1079,7 @@ without creating a copy: .. ipython:: python df_orig = df.copy() - df_orig.where(df > 0, -df, inplace=True); + df_orig.where(df > 0, -df, inplace=True) df_orig .. note:: @@ -1095,7 +1100,7 @@ partial setting via ``.loc`` (but on the contents rather than the axis labels). .. ipython:: python df2 = df.copy() - df2[ df2[1:4] > 0] = 3 + df2[df2[1:4] > 0] = 3 df2 Where can also accept ``axis`` and ``level`` parameters to align the input when @@ -1104,14 +1109,14 @@ performing the ``where``. .. ipython:: python df2 = df.copy() - df2.where(df2>0,df2['A'],axis='index') + df2.where(df2 > 0, df2['A'], axis='index') This is equivalent to (but faster than) the following. .. ipython:: python df2 = df.copy() - df.apply(lambda x, y: x.where(x>0,y), y=df['A']) + df.apply(lambda x, y: x.where(x > 0, y), y=df['A']) .. versionadded:: 0.18.1 @@ -1175,8 +1180,8 @@ If instead you don't want to or cannot name your index, you can use the name .. ipython:: python :suppress: - old_index = index - del index + old_index = index # noqa: F821 + del index # noqa: F821 .. ipython:: python @@ -1200,7 +1205,7 @@ If instead you don't want to or cannot name your index, you can use the name df = pd.DataFrame({'a': np.random.randint(5, size=5)}) df.index.name = 'a' - df.query('a > 2') # uses the column 'a', not the index + df.query('a > 2') # uses the column 'a', not the index You can still use the index in a query expression by using the special identifier 'index': @@ -1307,7 +1312,7 @@ The ``in`` and ``not in`` operators try: old_d = d - del d + del d # noqa: F821 except NameError: pass @@ -1334,7 +1339,8 @@ You can combine this with other expressions for very succinct queries: .. ipython:: python - # rows where cols a and b have overlapping values and col c's values are less than col d's + # rows where cols a and b have overlapping values + # and col c's values are less than col d's df.query('a in b and c < d') # pure Python @@ -1442,7 +1448,8 @@ floating point values generated using ``numpy.random.randn()``. .. ipython:: python :suppress: - df = pd.DataFrame(np.random.randn(8, 4), index=dates, columns=['A', 'B', 'C', 'D']) + df = pd.DataFrame(np.random.randn(8, 4), + index=dates, columns=['A', 'B', 'C', 'D']) df2 = df.copy() @@ -1509,7 +1516,7 @@ default value. .. ipython:: python - s = pd.Series([1,2,3], index=['a','b','c']) + s = pd.Series([1, 2, 3], index=['a', 'b', 'c']) s.get('a') # equivalent to s['a'] s.get('x', default=-1) @@ -1522,8 +1529,8 @@ NumPy array. For instance: .. ipython:: python - dflookup = pd.DataFrame(np.random.rand(20,4), columns = ['A','B','C','D']) - dflookup.lookup(list(range(0,10,2)), ['B','C','A','B','D']) + dflookup = pd.DataFrame(np.random.rand(20, 4), columns = ['A', 'B', 'C', 'D']) + dflookup.lookup(list(range(0, 10, 2)), ['B', 'C', 'A', 'B', 'D']) .. _indexing.class: @@ -1650,7 +1657,9 @@ Missing values idx1 idx1.fillna(2) - idx2 = pd.DatetimeIndex([pd.Timestamp('2011-01-01'), pd.NaT, pd.Timestamp('2011-01-03')]) + idx2 = pd.DatetimeIndex([pd.Timestamp('2011-01-01'), + pd.NaT, + pd.Timestamp('2011-01-03')]) idx2 idx2.fillna(pd.Timestamp('2011-01-02')) @@ -1673,10 +1682,10 @@ To create a new, re-indexed DataFrame: .. ipython:: python :suppress: - data = pd.DataFrame({'a' : ['bar', 'bar', 'foo', 'foo'], - 'b' : ['one', 'two', 'one', 'two'], - 'c' : ['z', 'y', 'x', 'w'], - 'd' : [1., 2., 3, 4]}) + data = pd.DataFrame({'a': ['bar', 'bar', 'foo', 'foo'], + 'b': ['one', 'two', 'one', 'two'], + 'c': ['z', 'y', 'x', 'w'], + 'd': [1., 2., 3, 4]}) .. ipython:: python @@ -1755,8 +1764,8 @@ When setting values in a pandas object, care must be taken to avoid what is call list('efgh'), list('ijkl'), list('mnop')], - columns=pd.MultiIndex.from_product([['one','two'], - ['first','second']])) + columns=pd.MultiIndex.from_product([['one', 'two'], + ['first', 'second']])) dfmi Compare these two access methods: @@ -1767,7 +1776,7 @@ Compare these two access methods: .. ipython:: python - dfmi.loc[:,('one','second')] + dfmi.loc[:, ('one', 'second')] These both yield the same results, so which should you use? It is instructive to understand the order of operations on these and why method 2 (``.loc``) is much preferred over method 1 (chained ``[]``). @@ -1794,17 +1803,17 @@ interpreter executes this code: .. code-block:: python - dfmi.loc[:, ('one', 'second')] = value + dfmi.loc[:, ('one', 'second')] = value # noqa: F821 # becomes - dfmi.loc.__setitem__((slice(None), ('one', 'second')), value) + dfmi.loc.__setitem__((slice(None), ('one', 'second')), value) # noqa: F821 But this code is handled differently: .. code-block:: python - dfmi['one']['second'] = value + dfmi['one']['second'] = value # noqa: F821 # becomes - dfmi.__getitem__('one').__setitem__('second', value) + dfmi.__getitem__('one').__setitem__('second', value) # noqa: F821 See that ``__getitem__`` in there? Outside of simple cases, it's very hard to predict whether it will return a view or a copy (it depends on the memory layout @@ -1829,7 +1838,7 @@ that you've done this: def do_something(df): foo = df[['bar', 'baz']] # Is foo a view? A copy? Nobody knows! # ... many lines here ... - foo['quux'] = value # We don't know whether this will modify df or not! + foo['quux'] = value # We don't know whether this will modify df or not! # noqa: E501, F821 return foo Yikes! @@ -1859,9 +1868,9 @@ chained indexing expression, you can set the :ref:`option ` .. ipython:: python :okwarning: - dfb = pd.DataFrame({'a' : ['one', 'one', 'two', - 'three', 'two', 'one', 'six'], - 'c' : np.arange(7)}) + dfb = pd.DataFrame({'a': ['one', 'one', 'two', + 'three', 'two', 'one', 'six'], + 'c': np.arange(7)}) # This will show the SettingWithCopyWarning # but the frame values will be set @@ -1889,8 +1898,8 @@ This is the correct access method: .. ipython:: python - dfc = pd.DataFrame({'A':['aaa','bbb','ccc'],'B':[1,2,3]}) - dfc.loc[0,'A'] = 11 + dfc = pd.DataFrame({'A': ['aaa', 'bbb', 'ccc'], 'B': [1, 2, 3]}) + dfc.loc[0, 'A'] = 11 dfc This *can* work at times, but it is not guaranteed to, and therefore should be avoided: diff --git a/doc/source/missing_data.rst b/doc/source/missing_data.rst index 7b6d338ee5b6a..85a376e52ef7a 100644 --- a/doc/source/missing_data.rst +++ b/doc/source/missing_data.rst @@ -5,10 +5,7 @@ import numpy as np import pandas as pd - pd.options.display.max_rows=15 - import matplotlib - # matplotlib.style.use('default') - import matplotlib.pyplot as plt + pd.options.display.max_rows = 15 .. _missing_data: @@ -91,7 +88,7 @@ Series and DataFrame objects: .. ipython:: python - None == None + None == None # noqa: E711 np.nan == np.nan So as compared to above, a scalar equality comparison versus a ``None/np.nan`` doesn't provide useful information. @@ -112,7 +109,7 @@ pandas objects provide compatibility between ``NaT`` and ``NaN``. df2 = df.copy() df2['timestamp'] = pd.Timestamp('20120101') df2 - df2.loc[['a','c','h'],['one','timestamp']] = np.nan + df2.loc[['a', 'c', 'h'], ['one', 'timestamp']] = np.nan df2 df2.get_dtype_counts() @@ -197,7 +194,7 @@ The sum of an empty or all-NA Series or column of a DataFrame is 0. .. ipython:: python pd.Series([np.nan]).sum() - + pd.Series([]).sum() The product of an empty or all-NA Series or column of a DataFrame is 1. @@ -205,7 +202,7 @@ The product of an empty or all-NA Series or column of a DataFrame is 1. .. ipython:: python pd.Series([np.nan]).prod() - + pd.Series([]).prod() @@ -297,10 +294,10 @@ use case of this is to fill a DataFrame with the mean of that column. .. ipython:: python - dff = pd.DataFrame(np.random.randn(10,3), columns=list('ABC')) - dff.iloc[3:5,0] = np.nan - dff.iloc[4:6,1] = np.nan - dff.iloc[5:8,2] = np.nan + dff = pd.DataFrame(np.random.randn(10, 3), columns=list('ABC')) + dff.iloc[3:5, 0] = np.nan + dff.iloc[4:6, 1] = np.nan + dff.iloc[5:8, 2] = np.nan dff dff.fillna(dff.mean()) @@ -483,7 +480,8 @@ filled since the last valid observation: .. ipython:: python - ser = pd.Series([np.nan, np.nan, 5, np.nan, np.nan, np.nan, 13, np.nan, np.nan]) + ser = pd.Series([np.nan, np.nan, 5, np.nan, np.nan, + np.nan, 13, np.nan, np.nan]) # fill all consecutive values in a forward direction ser.interpolate() diff --git a/doc/source/options.rst b/doc/source/options.rst index dc4d0da32008c..cffeb211489be 100644 --- a/doc/source/options.rst +++ b/doc/source/options.rst @@ -5,7 +5,6 @@ .. ipython:: python :suppress: - import pandas as pd import numpy as np np.random.seed(123456) @@ -45,9 +44,9 @@ and so passing in a substring will work - as long as it is unambiguous: .. ipython:: python pd.get_option("display.max_rows") - pd.set_option("display.max_rows",101) + pd.set_option("display.max_rows", 101) pd.get_option("display.max_rows") - pd.set_option("max_r",102) + pd.set_option("max_r", 102) pd.get_option("display.max_rows") @@ -100,7 +99,7 @@ All options also have a default value, and you can use ``reset_option`` to do ju .. ipython:: python pd.get_option("display.max_rows") - pd.set_option("display.max_rows",999) + pd.set_option("display.max_rows", 999) pd.get_option("display.max_rows") pd.reset_option("display.max_rows") pd.get_option("display.max_rows") @@ -120,9 +119,9 @@ are restored automatically when you exit the `with` block: .. ipython:: python - with pd.option_context("display.max_rows",10,"display.max_columns", 5): - print(pd.get_option("display.max_rows")) - print(pd.get_option("display.max_columns")) + with pd.option_context("display.max_rows", 10, "display.max_columns", 5): + print(pd.get_option("display.max_rows")) + print(pd.get_option("display.max_columns")) print(pd.get_option("display.max_rows")) print(pd.get_option("display.max_columns")) @@ -157,7 +156,7 @@ lines are replaced by an ellipsis. .. ipython:: python - df = pd.DataFrame(np.random.randn(7,2)) + df = pd.DataFrame(np.random.randn(7, 2)) pd.set_option('max_rows', 7) df pd.set_option('max_rows', 5) @@ -169,7 +168,7 @@ dataframes to stretch across pages, wrapped over the full column vs row-wise. .. ipython:: python - df = pd.DataFrame(np.random.randn(5,10)) + df = pd.DataFrame(np.random.randn(5, 10)) pd.set_option('expand_frame_repr', True) df pd.set_option('expand_frame_repr', False) @@ -181,7 +180,7 @@ dataframes to stretch across pages, wrapped over the full column vs row-wise. .. ipython:: python - df = pd.DataFrame(np.random.randn(10,10)) + df = pd.DataFrame(np.random.randn(10, 10)) pd.set_option('max_rows', 5) pd.set_option('large_repr', 'truncate') df @@ -197,7 +196,7 @@ of this length or longer will be truncated with an ellipsis. df = pd.DataFrame(np.array([['foo', 'bar', 'bim', 'uncomfortably long string'], ['horse', 'cow', 'banana', 'apple']])) - pd.set_option('max_colwidth',40) + pd.set_option('max_colwidth', 40) df pd.set_option('max_colwidth', 6) df @@ -208,7 +207,7 @@ will be given. .. ipython:: python - df = pd.DataFrame(np.random.randn(10,10)) + df = pd.DataFrame(np.random.randn(10, 10)) pd.set_option('max_info_columns', 11) df.info() pd.set_option('max_info_columns', 5) @@ -222,7 +221,7 @@ can specify the option ``df.info(null_counts=True)`` to override on showing a pa .. ipython:: python - df = pd.DataFrame(np.random.choice([0,1,np.nan], size=(10,10))) + df = pd.DataFrame(np.random.choice([0, 1, np.nan], size=(10, 10))) df pd.set_option('max_info_rows', 11) df.info() @@ -235,10 +234,10 @@ This is only a suggestion. .. ipython:: python - df = pd.DataFrame(np.random.randn(5,5)) - pd.set_option('precision',7) + df = pd.DataFrame(np.random.randn(5, 5)) + pd.set_option('precision', 7) df - pd.set_option('precision',4) + pd.set_option('precision', 4) df ``display.chop_threshold`` sets at what level pandas rounds to zero when @@ -247,7 +246,7 @@ precision at which the number is stored. .. ipython:: python - df = pd.DataFrame(np.random.randn(6,6)) + df = pd.DataFrame(np.random.randn(6, 6)) pd.set_option('chop_threshold', 0) df pd.set_option('chop_threshold', .5) @@ -259,7 +258,9 @@ The options are 'right', and 'left'. .. ipython:: python - df = pd.DataFrame(np.array([np.random.randn(6), np.random.randint(1,9,6)*.1, np.zeros(6)]).T, + df = pd.DataFrame(np.array([np.random.randn(6), + np.random.randint(1, 9, 6) * .1, + np.zeros(6)]).T, columns=['A', 'B', 'C'], dtype='float') pd.set_option('colheader_justify', 'right') df @@ -461,14 +462,14 @@ For instance: pd.set_eng_float_format(accuracy=3, use_eng_prefix=True) s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e']) - s/1.e3 - s/1.e6 + s / 1.e3 + s / 1.e6 .. ipython:: python :suppress: :okwarning: - pd.reset_option('^display\.') + pd.reset_option(r'^display.') To round floats on a case-by-case basis, you can also use :meth:`~pandas.Series.round` and :meth:`~pandas.DataFrame.round`. @@ -490,7 +491,7 @@ If a DataFrame or Series contains these characters, the default output mode may .. ipython:: python df = pd.DataFrame({u'国籍': ['UK', u'日本'], u'名前': ['Alice', u'しのぶ']}) - df; + df .. image:: _static/option_unicode01.png @@ -501,7 +502,7 @@ times than the standard ``len`` function. .. ipython:: python pd.set_option('display.unicode.east_asian_width', True) - df; + df .. image:: _static/option_unicode02.png @@ -513,7 +514,7 @@ By default, an "Ambiguous" character's width, such as "¡" (inverted exclamation .. ipython:: python df = pd.DataFrame({'a': ['xxx', u'¡¡'], 'b': ['yyy', u'¡¡']}) - df; + df .. image:: _static/option_unicode03.png @@ -525,7 +526,7 @@ However, setting this option incorrectly for your terminal will cause these char .. ipython:: python pd.set_option('display.unicode.ambiguous_as_wide', True) - df; + df .. image:: _static/option_unicode04.png diff --git a/doc/source/release.rst b/doc/source/release.rst index af6fc23e12b78..7842fdbe30886 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -12,8 +12,7 @@ import matplotlib.pyplot as plt plt.close('all') - pd.options.display.max_rows=15 - import pandas.util.testing as tm + pd.options.display.max_rows = 15 ************* Release Notes @@ -2859,7 +2858,7 @@ API Changes In [5]: arr / arr2 Out[5]: array([0, 0, 1, 4]) - In [6]: pd.Series(arr) / pd.Series(arr2) # no future import required + In [6]: pd.Series(arr) / pd.Series(arr2) # no future import required Out[6]: 0 0.200000 1 0.666667 @@ -3670,12 +3669,12 @@ Improvements to existing features .. ipython:: python - p = pd.Panel(np.random.randn(3,4,4),items=['ItemA','ItemB','ItemC'], - major_axis=pd.date_range('20010102',periods=4), - minor_axis=['A','B','C','D']) + p = pd.Panel(np.random.randn(3, 4, 4), items=['ItemA', 'ItemB', 'ItemC'], + major_axis=pd.date_range('20010102', periods=4), + minor_axis=['A', 'B', 'C', 'D']) p p.reindex(items=['ItemA']).squeeze() - p.reindex(items=['ItemA'],minor=['B']).squeeze() + p.reindex(items=['ItemA'], minor=['B']).squeeze() - Improvement to Yahoo API access in ``pd.io.data.Options`` (:issue:`2758`) - added option `display.max_seq_items` to control the number of elements printed per sequence pprinting it. (:issue:`2979`) @@ -3689,10 +3688,10 @@ Improvements to existing features .. ipython:: python idx = pd.date_range("2001-10-1", periods=5, freq='M') - ts = pd.Series(np.random.rand(len(idx)),index=idx) + ts = pd.Series(np.random.rand(len(idx)), index=idx) ts['2001'] - df = pd.DataFrame(dict(A = ts)) + df = pd.DataFrame({'A': ts}) df['2001'] - added option `display.mpl_style` providing a sleeker visual style for plots. Based on https://gist.github.com/huyng/816622 (:issue:`3075`). diff --git a/setup.cfg b/setup.cfg index 25f713822f127..99daadb16d7ca 100644 --- a/setup.cfg +++ b/setup.cfg @@ -74,11 +74,7 @@ exclude = doc/source/enhancingperf.rst doc/source/extending.rst doc/source/groupby.rst - doc/source/indexing.rst doc/source/merging.rst - doc/source/missing_data.rst - doc/source/options.rst - doc/source/release.rst doc/source/reshaping.rst doc/source/visualization.rst From 89cf4911b11ad2d8c0ccd48db72f0dd45d51038e Mon Sep 17 00:00:00 2001 From: Fabian Haase Date: Tue, 11 Dec 2018 01:17:53 +0100 Subject: [PATCH 2/4] Review of @datapythonista Signed-off-by: Fabian Haase --- doc/source/indexing.rst | 27 +++------------------------ 1 file changed, 3 insertions(+), 24 deletions(-) diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index 772635bd24a1b..3a50f066e1603 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -1167,25 +1167,12 @@ with the name ``a``. If instead you don't want to or cannot name your index, you can use the name ``index`` in your query expression: -.. ipython:: python - :suppress: - - old_index = index # noqa: F821 - del index # noqa: F821 - .. ipython:: python df = pd.DataFrame(np.random.randint(n, size=(n, 2)), columns=list('bc')) df df.query('index < b < c') -.. ipython:: python - :suppress: - - index = old_index - del old_index - - .. note:: If the name of your index overlaps with a column name, the column name is @@ -1297,15 +1284,6 @@ The ``in`` and ``not in`` operators ``not in`` comparison operators, providing a succinct syntax for calling the ``isin`` method of a ``Series`` or ``DataFrame``. -.. ipython:: python - :suppress: - - try: - old_d = d - del d # noqa: F821 - except NameError: - pass - .. ipython:: python # get all rows where columns "a" and "b" have overlapping values @@ -1507,7 +1485,7 @@ default value. .. ipython:: python s = pd.Series([1, 2, 3], index=['a', 'b', 'c']) - s.get('a') # equivalent to s['a'] + s.get('a') # equivalent to s['a'] s.get('x', default=-1) The :meth:`~pandas.DataFrame.lookup` Method @@ -1828,7 +1806,8 @@ that you've done this: def do_something(df): foo = df[['bar', 'baz']] # Is foo a view? A copy? Nobody knows! # ... many lines here ... - foo['quux'] = value # We don't know whether this will modify df or not! # noqa: E501, F821 + # We don't know whether this will modify df or not! + foo['quux'] = value # noqa: F821 return foo Yikes! From 7817f3eb82824a608a1ccbeb1ce025645cf454a8 Mon Sep 17 00:00:00 2001 From: Fabian Haase Date: Tue, 11 Dec 2018 01:55:18 +0100 Subject: [PATCH 3/4] Review of @datapythonista Signed-off-by: Fabian Haase --- doc/source/indexing.rst | 23 +++++++++-------------- doc/source/options.rst | 2 +- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index 3a50f066e1603..7937387f1f07f 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -1384,15 +1384,6 @@ Of course, expressions can be arbitrarily complex too: shorter == longer -.. ipython:: python - :suppress: - - try: - d = old_d - del old_d - except NameError: - pass - Performance of :meth:`~pandas.DataFrame.query` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1769,19 +1760,23 @@ But it turns out that assigning to the product of chained indexing has inherently unpredictable results. To see this, think about how the Python interpreter executes this code: +.. ipython:: python + :suppress: + value = None + .. code-block:: python - dfmi.loc[:, ('one', 'second')] = value # noqa: F821 + dfmi.loc[:, ('one', 'second')] = value # becomes - dfmi.loc.__setitem__((slice(None), ('one', 'second')), value) # noqa: F821 + dfmi.loc.__setitem__((slice(None), ('one', 'second')), value) But this code is handled differently: .. code-block:: python - dfmi['one']['second'] = value # noqa: F821 + dfmi['one']['second'] = value # becomes - dfmi.__getitem__('one').__setitem__('second', value) # noqa: F821 + dfmi.__getitem__('one').__setitem__('second', value) See that ``__getitem__`` in there? Outside of simple cases, it's very hard to predict whether it will return a view or a copy (it depends on the memory layout @@ -1807,7 +1802,7 @@ that you've done this: foo = df[['bar', 'baz']] # Is foo a view? A copy? Nobody knows! # ... many lines here ... # We don't know whether this will modify df or not! - foo['quux'] = value # noqa: F821 + foo['quux'] = value return foo Yikes! diff --git a/doc/source/options.rst b/doc/source/options.rst index 59a009f810e58..e91be3e6ae730 100644 --- a/doc/source/options.rst +++ b/doc/source/options.rst @@ -463,7 +463,7 @@ For instance: :suppress: :okwarning: - pd.reset_option(r'^display.') + pd.reset_option("^display") To round floats on a case-by-case basis, you can also use :meth:`~pandas.Series.round` and :meth:`~pandas.DataFrame.round`. From eca5f2dcc589995592cb79078cd21c759ea231fc Mon Sep 17 00:00:00 2001 From: Fabian Haase Date: Tue, 11 Dec 2018 01:55:18 +0100 Subject: [PATCH 4/4] Review of @datapythonista Signed-off-by: Fabian Haase --- doc/source/indexing.rst | 24 ++++++++++-------------- doc/source/options.rst | 2 +- doc/source/release.rst | 5 ----- 3 files changed, 11 insertions(+), 20 deletions(-) diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst index 3a50f066e1603..add1a4e587240 100644 --- a/doc/source/indexing.rst +++ b/doc/source/indexing.rst @@ -1384,15 +1384,6 @@ Of course, expressions can be arbitrarily complex too: shorter == longer -.. ipython:: python - :suppress: - - try: - d = old_d - del old_d - except NameError: - pass - Performance of :meth:`~pandas.DataFrame.query` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1769,19 +1760,24 @@ But it turns out that assigning to the product of chained indexing has inherently unpredictable results. To see this, think about how the Python interpreter executes this code: +.. ipython:: python + :suppress: + + value = None + .. code-block:: python - dfmi.loc[:, ('one', 'second')] = value # noqa: F821 + dfmi.loc[:, ('one', 'second')] = value # becomes - dfmi.loc.__setitem__((slice(None), ('one', 'second')), value) # noqa: F821 + dfmi.loc.__setitem__((slice(None), ('one', 'second')), value) But this code is handled differently: .. code-block:: python - dfmi['one']['second'] = value # noqa: F821 + dfmi['one']['second'] = value # becomes - dfmi.__getitem__('one').__setitem__('second', value) # noqa: F821 + dfmi.__getitem__('one').__setitem__('second', value) See that ``__getitem__`` in there? Outside of simple cases, it's very hard to predict whether it will return a view or a copy (it depends on the memory layout @@ -1807,7 +1803,7 @@ that you've done this: foo = df[['bar', 'baz']] # Is foo a view? A copy? Nobody knows! # ... many lines here ... # We don't know whether this will modify df or not! - foo['quux'] = value # noqa: F821 + foo['quux'] = value return foo Yikes! diff --git a/doc/source/options.rst b/doc/source/options.rst index 59a009f810e58..e91be3e6ae730 100644 --- a/doc/source/options.rst +++ b/doc/source/options.rst @@ -463,7 +463,7 @@ For instance: :suppress: :okwarning: - pd.reset_option(r'^display.') + pd.reset_option("^display") To round floats on a case-by-case basis, you can also use :meth:`~pandas.Series.round` and :meth:`~pandas.DataFrame.round`. diff --git a/doc/source/release.rst b/doc/source/release.rst index 33e0d9be6d73a..abbba9d6ff8ec 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -2,11 +2,6 @@ {{ header }} -.. ipython:: python - :suppress: - - import pandas.util.testing as tm - ************* Release Notes *************