From 6f4a546350212b8c48cf208727969750ceb5737c Mon Sep 17 00:00:00 2001 From: thoo Date: Tue, 11 Dec 2018 12:41:53 -0500 Subject: [PATCH 1/8] Fix flake8 issues on v19, v20 and v21.0.rst --- doc/source/whatsnew/v0.19.0.rst | 188 ++++++++++---------- doc/source/whatsnew/v0.20.0.rst | 293 ++++++++++++++++---------------- doc/source/whatsnew/v0.21.0.rst | 27 +-- setup.cfg | 3 - 4 files changed, 253 insertions(+), 258 deletions(-) diff --git a/doc/source/whatsnew/v0.19.0.rst b/doc/source/whatsnew/v0.19.0.rst index 1e4e7a6c80fa4..c9cf65d8018f8 100644 --- a/doc/source/whatsnew/v0.19.0.rst +++ b/doc/source/whatsnew/v0.19.0.rst @@ -5,12 +5,6 @@ v0.19.0 (October 2, 2016) {{ header }} -.. ipython:: python - :suppress: - - from pandas import * # noqa F401, F403 - - This is a major release from 0.18.1 and includes number of API changes, several new features, enhancements, and performance improvements along with a large number of bug fixes. We recommend that all users upgrade to this version. @@ -82,42 +76,41 @@ This also illustrates using the ``by`` parameter to group data before merging. .. ipython:: python - trades = pd.DataFrame({ - 'time': pd.to_datetime(['20160525 13:30:00.023', - '20160525 13:30:00.038', - '20160525 13:30:00.048', - '20160525 13:30:00.048', - '20160525 13:30:00.048']), - 'ticker': ['MSFT', 'MSFT', - 'GOOG', 'GOOG', 'AAPL'], - 'price': [51.95, 51.95, - 720.77, 720.92, 98.00], - 'quantity': [75, 155, - 100, 100, 100]}, - columns=['time', 'ticker', 'price', 'quantity']) - - quotes = pd.DataFrame({ - 'time': pd.to_datetime(['20160525 13:30:00.023', - '20160525 13:30:00.023', - '20160525 13:30:00.030', - '20160525 13:30:00.041', - '20160525 13:30:00.048', - '20160525 13:30:00.049', - '20160525 13:30:00.072', - '20160525 13:30:00.075']), - 'ticker': ['GOOG', 'MSFT', 'MSFT', - 'MSFT', 'GOOG', 'AAPL', 'GOOG', - 'MSFT'], - 'bid': [720.50, 51.95, 51.97, 51.99, - 720.50, 97.99, 720.50, 52.01], - 'ask': [720.93, 51.96, 51.98, 52.00, - 720.93, 98.01, 720.88, 52.03]}, - columns=['time', 'ticker', 'bid', 'ask']) + trades = pd.DataFrame({ + 'time': pd.to_datetime(['20160525 13:30:00.023', + '20160525 13:30:00.038', + '20160525 13:30:00.048', + '20160525 13:30:00.048', + '20160525 13:30:00.048']), + 'ticker': ['MSFT', 'MSFT', + 'GOOG', 'GOOG', 'AAPL'], + 'price': [51.95, 51.95, + 720.77, 720.92, 98.00], + 'quantity': [75, 155, + 100, 100, 100]}, + columns=['time', 'ticker', 'price', 'quantity']) + + quotes = pd.DataFrame({ + 'time': pd.to_datetime(['20160525 13:30:00.023', + '20160525 13:30:00.023', + '20160525 13:30:00.030', + '20160525 13:30:00.041', + '20160525 13:30:00.048', + '20160525 13:30:00.049', + '20160525 13:30:00.072', + '20160525 13:30:00.075']), + 'ticker': ['GOOG', 'MSFT', 'MSFT', 'MSFT', + 'GOOG', 'AAPL', 'GOOG', 'MSFT'], + 'bid': [720.50, 51.95, 51.97, 51.99, + 720.50, 97.99, 720.50, 52.01], + 'ask': [720.93, 51.96, 51.98, 52.00, + 720.93, 98.01, 720.88, 52.03]}, + columns=['time', 'ticker', 'bid', 'ask']) .. ipython:: python - trades - quotes + trades + quotes An asof merge joins on the ``on``, typically a datetimelike field, which is ordered, and in this case we are using a grouper in the ``by`` field. This is like a left-outer join, except @@ -125,9 +118,9 @@ that forward filling happens automatically taking the most recent non-NaN value. .. ipython:: python - pd.merge_asof(trades, quotes, - on='time', - by='ticker') + pd.merge_asof(trades, quotes, + on='time', + by='ticker') This returns a merged DataFrame with the entries in the same order as the original left passed DataFrame (``trades`` in this case), with the fields of the ``quotes`` merged. @@ -142,16 +135,17 @@ See the full documentation :ref:`here `. .. ipython:: python - dft = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}, - index=pd.date_range('20130101 09:00:00', periods=5, freq='s')) - dft + dft = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}, + index=pd.date_range('20130101 09:00:00', + periods=5, freq='s')) + dft This is a regular frequency index. Using an integer window parameter works to roll along the window frequency. .. ipython:: python - dft.rolling(2).sum() - dft.rolling(2, min_periods=1).sum() + dft.rolling(2).sum() + dft.rolling(2, min_periods=1).sum() Specifying an offset allows a more intuitive specification of the rolling frequency. @@ -164,13 +158,13 @@ Using a non-regular, but still monotonic index, rolling with an integer window d .. ipython:: python - dft = DataFrame({'B': [0, 1, 2, np.nan, 4]}, - index = pd.Index([pd.Timestamp('20130101 09:00:00'), - pd.Timestamp('20130101 09:00:02'), - pd.Timestamp('20130101 09:00:03'), - pd.Timestamp('20130101 09:00:05'), - pd.Timestamp('20130101 09:00:06')], - name='foo')) + dft = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}, + index=pd.Index([pd.Timestamp('20130101 09:00:00'), + pd.Timestamp('20130101 09:00:02'), + pd.Timestamp('20130101 09:00:03'), + pd.Timestamp('20130101 09:00:05'), + pd.Timestamp('20130101 09:00:06')], + name='foo')) dft dft.rolling(2).sum() @@ -291,7 +285,7 @@ Categorical Concatenation **Previous behavior**: - .. code-block:: ipython +.. code-block:: ipython In [1]: pd.concat([s1, s2]) ValueError: incompatible categories in categorical concat @@ -319,7 +313,7 @@ These provide date offsets anchored (by default) to the 15th and end of month, a .. ipython:: python - Timestamp('2016-01-01') + SemiMonthEnd() + pd.Timestamp('2016-01-01') + SemiMonthEnd() pd.date_range('2015-01-01', freq='SM', periods=4) @@ -327,7 +321,7 @@ These provide date offsets anchored (by default) to the 15th and end of month, a .. ipython:: python - Timestamp('2016-01-01') + SemiMonthBegin() + pd.Timestamp('2016-01-01') + SemiMonthBegin() pd.date_range('2015-01-01', freq='SMS', periods=4) @@ -366,18 +360,18 @@ For ``MultiIndex``, values are dropped if any level is missing by default. Speci .. ipython:: python - midx = pd.MultiIndex.from_arrays([[1, 2, np.nan, 4], - [1, 2, np.nan, np.nan]]) - midx - midx.dropna() - midx.dropna(how='all') + midx = pd.MultiIndex.from_arrays([[1, 2, np.nan, 4], + [1, 2, np.nan, np.nan]]) + midx + midx.dropna() + midx.dropna(how='all') ``Index`` now supports ``.str.extractall()`` which returns a ``DataFrame``, see the :ref:`docs here ` (:issue:`10008`, :issue:`13156`) .. ipython:: python idx = pd.Index(["a1a2", "b1", "c1"]) - idx.str.extractall("[ab](?P\d)") + idx.str.extractall(r"[ab](?P\d)") ``Index.astype()`` now accepts an optional boolean argument ``copy``, which allows optional copying if the requirements on dtype are satisfied (:issue:`13209`) @@ -453,7 +447,7 @@ The following are now part of this API: import pprint from pandas.api import types - funcs = [ f for f in dir(types) if not f.startswith('_') ] + funcs = [f for f in dir(types) if not f.startswith('_')] pprint.pprint(funcs) .. note:: @@ -478,15 +472,15 @@ Other enhancements .. ipython:: python - df = pd.DataFrame({'date': pd.date_range('2015-01-01', freq='W', periods=5), - 'a': np.arange(5)}, - index=pd.MultiIndex.from_arrays([ - [1,2,3,4,5], - pd.date_range('2015-01-01', freq='W', periods=5)], - names=['v','d'])) - df - df.resample('M', on='date').sum() - df.resample('M', level='d').sum() + df = pd.DataFrame({'date': pd.date_range('2015-01-01', freq='W', periods=5), + 'a': np.arange(5)}, + index=pd.MultiIndex.from_arrays([[1, 2, 3, 4, 5], + pd.date_range('2015-01-01', freq='W', periods=5)], + names=['v', 'd']) + ) + df + df.resample('M', on='date').sum() + df.resample('M', level='d').sum() - The ``.get_credentials()`` method of ``GbqConnector`` can now first try to fetch `the application default credentials `__. See the docs for more details (:issue:`13577`). - The ``.tz_localize()`` method of ``DatetimeIndex`` and ``Timestamp`` has gained the ``errors`` keyword, so you can potentially coerce nonexistent timestamps to ``NaT``. The default behavior remains to raising a ``NonExistentTimeError`` (:issue:`13057`) @@ -547,7 +541,7 @@ API changes .. ipython:: python - s = pd.Series([1,2,3]) + s = pd.Series([1, 2, 3]) **Previous behavior**: @@ -953,7 +947,7 @@ of integers (:issue:`13988`). In [6]: pi = pd.PeriodIndex(['2011-01', '2011-02'], freq='M') In [7]: pi.values - array([492, 493]) + Out[7]: array([492, 493]) **New behavior**: @@ -1006,7 +1000,8 @@ DatetimeIndex objects resulting in a TimedeltaIndex: .. code-block:: ipython - In [1]: pd.DatetimeIndex(['2016-01-01', '2016-01-02']) - pd.DatetimeIndex(['2016-01-02', '2016-01-03']) + In [1]: (pd.DatetimeIndex(['2016-01-01', '2016-01-02']) + ...: - pd.DatetimeIndex(['2016-01-02', '2016-01-03'])) FutureWarning: using '-' to provide set differences with datetimelike Indexes is deprecated, use .difference() Out[1]: DatetimeIndex(['2016-01-01'], dtype='datetime64[ns]', freq=None) @@ -1014,7 +1009,8 @@ DatetimeIndex objects resulting in a TimedeltaIndex: .. ipython:: python - pd.DatetimeIndex(['2016-01-01', '2016-01-02']) - pd.DatetimeIndex(['2016-01-02', '2016-01-03']) + (pd.DatetimeIndex(['2016-01-01', '2016-01-02']) + - pd.DatetimeIndex(['2016-01-02', '2016-01-03'])) .. _whatsnew_0190.api.difference: @@ -1060,21 +1056,23 @@ Previously, most ``Index`` classes returned ``np.ndarray``, and ``DatetimeIndex` .. code-block:: ipython - In [1]: pd.Index([1, 2, 3]).unique() - Out[1]: array([1, 2, 3]) + In [1]: pd.Index([1, 2, 3]).unique() + Out[1]: array([1, 2, 3]) - In [2]: pd.DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], tz='Asia/Tokyo').unique() - Out[2]: - DatetimeIndex(['2011-01-01 00:00:00+09:00', '2011-01-02 00:00:00+09:00', - '2011-01-03 00:00:00+09:00'], - dtype='datetime64[ns, Asia/Tokyo]', freq=None) + In [2]: pd.DatetimeIndex(['2011-01-01', '2011-01-02', + ...: '2011-01-03'], tz='Asia/Tokyo').unique() + Out[2]: + DatetimeIndex(['2011-01-01 00:00:00+09:00', '2011-01-02 00:00:00+09:00', + '2011-01-03 00:00:00+09:00'], + dtype='datetime64[ns, Asia/Tokyo]', freq=None) **New behavior**: .. ipython:: python - pd.Index([1, 2, 3]).unique() - pd.DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], tz='Asia/Tokyo').unique() + pd.Index([1, 2, 3]).unique() + pd.DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], + tz='Asia/Tokyo').unique() .. _whatsnew_0190.api.multiindex: @@ -1236,25 +1234,25 @@ Operators now preserve dtypes - Sparse data structure now can preserve ``dtype`` after arithmetic ops (:issue:`13848`) - .. ipython:: python +.. ipython:: python - s = pd.SparseSeries([0, 2, 0, 1], fill_value=0, dtype=np.int64) - s.dtype + s = pd.SparseSeries([0, 2, 0, 1], fill_value=0, dtype=np.int64) + s.dtype - s + 1 + s + 1 - Sparse data structure now support ``astype`` to convert internal ``dtype`` (:issue:`13900`) - .. ipython:: python +.. ipython:: python - s = pd.SparseSeries([1., 0., 2., 0.], fill_value=0) - s - s.astype(np.int64) + s = pd.SparseSeries([1., 0., 2., 0.], fill_value=0) + s + s.astype(np.int64) ``astype`` fails if data contains values which cannot be converted to specified ``dtype``. Note that the limitation is applied to ``fill_value`` which default is ``np.nan``. - .. code-block:: ipython +.. code-block:: ipython In [7]: pd.SparseSeries([1., np.nan, 2., np.nan], fill_value=np.nan).astype(np.int64) Out[7]: diff --git a/doc/source/whatsnew/v0.20.0.rst b/doc/source/whatsnew/v0.20.0.rst index d5a2422e456ee..852cc6a9c9922 100644 --- a/doc/source/whatsnew/v0.20.0.rst +++ b/doc/source/whatsnew/v0.20.0.rst @@ -71,7 +71,7 @@ Here is a sample .. ipython:: python df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C'], - index=pd.date_range('1/1/2000', periods=10)) + index=pd.date_range('1/1/2000', periods=10)) df.iloc[3:7] = np.nan df @@ -95,7 +95,7 @@ per unique function. Those functions applied to a particular column will be ``Na .. ipython:: python - df.agg({'A' : ['sum', 'min'], 'B' : ['min', 'max']}) + df.agg({'A': ['sum', 'min'], 'B': ['min', 'max']}) The API also supports a ``.transform()`` function for broadcasting results. @@ -136,7 +136,7 @@ fixed-width text files and :func:`read_excel` for parsing Excel files, now accep data = "a b\n1 2\n3 4" pd.read_fwf(StringIO(data)).dtypes - pd.read_fwf(StringIO(data), dtype={'a':'float64', 'b':'object'}).dtypes + pd.read_fwf(StringIO(data), dtype={'a': 'float64', 'b': 'object'}).dtypes .. _whatsnew_0120.enhancements.datetime_origin: @@ -194,13 +194,12 @@ Previously, only ``gzip`` compression was supported. By default, compression of URLs and paths are now inferred using their file extensions. Additionally, support for bz2 compression in the python 2 C-engine improved (:issue:`14874`). -.. code-block:: python +.. ipython:: python - url = 'https://github.com/{repo}/raw/{branch}/{path}'.format( - repo = 'pandas-dev/pandas', - branch = 'master', - path = 'pandas/tests/io/parser/data/salaries.csv.bz2', - ) + url = (r'https://github.com/{repo}/raw/{branch}/{path}' + .format(repo='pandas-dev/pandas', + branch='master', + path='pandas/tests/io/parser/data/salaries.csv.bz2')) df = pd.read_table(url, compression='infer') # default, infer compression df = pd.read_table(url, compression='bz2') # explicitly specify compression df.head(2) @@ -217,10 +216,9 @@ See :ref:`the docs here. ` .. ipython:: python - df = pd.DataFrame({ - 'A': np.random.randn(1000), - 'B': 'foo', - 'C': pd.date_range('20130101', periods=1000, freq='s')}) + df = pd.DataFrame({'A': np.random.randn(1000), + 'B': 'foo', + 'C': pd.date_range('20130101', periods=1000, freq='s')}) Using an explicit compression type @@ -319,8 +317,8 @@ the data. df = pd.DataFrame( {'A': [1, 2, 3], 'B': ['a', 'b', 'c'], - 'C': pd.date_range('2016-01-01', freq='d', periods=3), - }, index=pd.Index(range(3), name='idx')) + 'C': pd.date_range('2016-01-01', freq='d', periods=3)}, + index=pd.Index(range(3), name='idx')) df df.to_json(orient='table') @@ -384,9 +382,9 @@ For example, after running the following, ``styled.xlsx`` renders as below: axis=1) df.iloc[0, 2] = np.nan df - styled = df.style.\ - applymap(lambda val: 'color: %s' % 'red' if val < 0 else 'black').\ - highlight_max() + styled = (df.style + .applymap(lambda val: 'color: %s' % 'red' if val < 0 else 'black') + .highlight_max()) styled.to_excel('styled.xlsx', engine='openpyxl') .. image:: ../_static/style-excel.png @@ -449,8 +447,8 @@ An ``IntervalIndex`` can also be used in ``Series`` and ``DataFrame`` as the ind .. ipython:: python df = pd.DataFrame({'A': range(4), - 'B': pd.cut([0, 3, 1, 1], bins=c.categories)} - ).set_index('B') + 'B': pd.cut([0, 3, 1, 1], bins=c.categories) + }).set_index('B') df Selecting via a specific interval: @@ -551,7 +549,7 @@ then write them out again after applying the procedure below. .. code-block:: ipython - In [2]: s = pd.TimeSeries([1,2,3], index=pd.date_range('20130101', periods=3)) + In [2]: s = pd.TimeSeries([1, 2, 3], index=pd.date_range('20130101', periods=3)) In [3]: s Out[3]: @@ -585,9 +583,9 @@ Map on Index types now return other Index types .. ipython:: python - idx = Index([1, 2]) + idx = pd.Index([1, 2]) idx - mi = MultiIndex.from_tuples([(1, 2), (2, 4)]) + mi = pd.MultiIndex.from_tuples([(1, 2), (2, 4)]) mi Previous Behavior: @@ -622,7 +620,8 @@ New Behavior: .. ipython:: python - s = Series(date_range('2011-01-02T00:00', '2011-01-02T02:00', freq='H').tz_localize('Asia/Tokyo')) + s = pd.Series(pd.date_range('2011-01-02T00:00', '2011-01-02T02:00', freq='H') + .tz_localize('Asia/Tokyo')) s Previous Behavior: @@ -684,65 +683,65 @@ data-types would yield different return types. These are now made consistent. (: - Datetime tz-aware - Previous behaviour: +Previous behaviour: .. code-block:: ipython - # Series - In [5]: pd.Series([pd.Timestamp('20160101', tz='US/Eastern'), - pd.Timestamp('20160101', tz='US/Eastern')]).unique() - Out[5]: array([Timestamp('2016-01-01 00:00:00-0500', tz='US/Eastern')], dtype=object) + # Series + In [5]: pd.Series([pd.Timestamp('20160101', tz='US/Eastern'), + pd.Timestamp('20160101', tz='US/Eastern')]).unique() + Out[5]: array([Timestamp('2016-01-01 00:00:00-0500', tz='US/Eastern')], dtype=object) - In [6]: pd.unique(pd.Series([pd.Timestamp('20160101', tz='US/Eastern'), - pd.Timestamp('20160101', tz='US/Eastern')])) - Out[6]: array(['2016-01-01T05:00:00.000000000'], dtype='datetime64[ns]') + In [6]: pd.unique(pd.Series([pd.Timestamp('20160101', tz='US/Eastern'), + pd.Timestamp('20160101', tz='US/Eastern')])) + Out[6]: array(['2016-01-01T05:00:00.000000000'], dtype='datetime64[ns]') - # Index - In [7]: pd.Index([pd.Timestamp('20160101', tz='US/Eastern'), - pd.Timestamp('20160101', tz='US/Eastern')]).unique() - Out[7]: DatetimeIndex(['2016-01-01 00:00:00-05:00'], dtype='datetime64[ns, US/Eastern]', freq=None) + # Index + In [7]: pd.Index([pd.Timestamp('20160101', tz='US/Eastern'), + pd.Timestamp('20160101', tz='US/Eastern')]).unique() + Out[7]: DatetimeIndex(['2016-01-01 00:00:00-05:00'], dtype='datetime64[ns, US/Eastern]', freq=None) - In [8]: pd.unique([pd.Timestamp('20160101', tz='US/Eastern'), - pd.Timestamp('20160101', tz='US/Eastern')]) - Out[8]: array(['2016-01-01T05:00:00.000000000'], dtype='datetime64[ns]') + In [8]: pd.unique([pd.Timestamp('20160101', tz='US/Eastern'), + pd.Timestamp('20160101', tz='US/Eastern')]) + Out[8]: array(['2016-01-01T05:00:00.000000000'], dtype='datetime64[ns]') - New Behavior: +New Behavior: - .. ipython:: python +.. ipython:: python - # Series, returns an array of Timestamp tz-aware - pd.Series([pd.Timestamp('20160101', tz='US/Eastern'), - pd.Timestamp('20160101', tz='US/Eastern')]).unique() - pd.unique(pd.Series([pd.Timestamp('20160101', tz='US/Eastern'), - pd.Timestamp('20160101', tz='US/Eastern')])) + # Series, returns an array of Timestamp tz-aware + pd.Series([pd.Timestamp(r'20160101', tz=r'US/Eastern'), + pd.Timestamp(r'20160101', tz=r'US/Eastern')]).unique() + pd.unique(pd.Series([pd.Timestamp('20160101', tz='US/Eastern'), + pd.Timestamp('20160101', tz='US/Eastern')])) - # Index, returns a DatetimeIndex - pd.Index([pd.Timestamp('20160101', tz='US/Eastern'), - pd.Timestamp('20160101', tz='US/Eastern')]).unique() - pd.unique(pd.Index([pd.Timestamp('20160101', tz='US/Eastern'), - pd.Timestamp('20160101', tz='US/Eastern')])) + # Index, returns a DatetimeIndex + pd.Index([pd.Timestamp('20160101', tz='US/Eastern'), + pd.Timestamp('20160101', tz='US/Eastern')]).unique() + pd.unique(pd.Index([pd.Timestamp('20160101', tz='US/Eastern'), + pd.Timestamp('20160101', tz='US/Eastern')])) - Categoricals - Previous behaviour: +Previous behaviour: .. code-block:: ipython - In [1]: pd.Series(list('baabc'), dtype='category').unique() - Out[1]: - [b, a, c] - Categories (3, object): [b, a, c] + In [1]: pd.Series(list('baabc'), dtype='category').unique() + Out[1]: + [b, a, c] + Categories (3, object): [b, a, c] - In [2]: pd.unique(pd.Series(list('baabc'), dtype='category')) - Out[2]: array(['b', 'a', 'c'], dtype=object) + In [2]: pd.unique(pd.Series(list('baabc'), dtype='category')) + Out[2]: array(['b', 'a', 'c'], dtype=object) - New Behavior: +New Behavior: - .. ipython:: python +.. ipython:: python - # returns a Categorical - pd.Series(list('baabc'), dtype='category').unique() - pd.unique(pd.Series(list('baabc'), dtype='category')) + # returns a Categorical + pd.Series(list('baabc'), dtype='category').unique() + pd.unique(pd.Series(list('baabc'), dtype='category')) .. _whatsnew_0200.api_breaking.s3: @@ -762,33 +761,33 @@ Partial String Indexing Changes .. ipython:: python - df = DataFrame({'a': [1, 2, 3]}, DatetimeIndex(['2011-12-31 23:59:59', - '2012-01-01 00:00:00', - '2012-01-01 00:00:01'])) + df = pd.DataFrame({'a': [1, 2, 3]}, pd.DatetimeIndex(['2011-12-31 23:59:59', + '2012-01-01 00:00:00', + '2012-01-01 00:00:01'])) Previous Behavior: -.. code-block:: ipython + .. code-block:: ipython - In [4]: df['2011-12-31 23:59:59'] - Out[4]: - a - 2011-12-31 23:59:59 1 + In [4]: df['2011-12-31 23:59:59'] + Out[4]: + a + 2011-12-31 23:59:59 1 - In [5]: df['a']['2011-12-31 23:59:59'] - Out[5]: - 2011-12-31 23:59:59 1 - Name: a, dtype: int64 + In [5]: df['a']['2011-12-31 23:59:59'] + Out[5]: + 2011-12-31 23:59:59 1 + Name: a, dtype: int64 New Behavior: -.. code-block:: ipython + .. code-block:: ipython - In [4]: df['2011-12-31 23:59:59'] - KeyError: '2011-12-31 23:59:59' + In [4]: df['2011-12-31 23:59:59'] + KeyError: '2011-12-31 23:59:59' - In [5]: df['a']['2011-12-31 23:59:59'] - Out[5]: 1 + In [5]: df['a']['2011-12-31 23:59:59'] + Out[5]: 1 .. _whatsnew_0200.api_breaking.concat_dtypes: @@ -808,7 +807,7 @@ Now the smallest acceptable dtype will be used (:issue:`13247`) Previous Behavior: -.. code-block:: ipython + .. code-block:: ipython In [7]: pd.concat([df1, df2]).dtypes Out[7]: @@ -839,7 +838,7 @@ In previous versions, showing ``.memory_usage()`` on a pandas structure that has Previous Behavior: -.. code-block:: ipython + .. code-block:: ipython In [8]: index = Index(['foo', 'bar', 'baz']) @@ -854,7 +853,7 @@ Previous Behavior: New Behavior: -.. code-block:: ipython + .. code-block:: ipython In [8]: index = Index(['foo', 'bar', 'baz']) @@ -879,39 +878,39 @@ This is *unchanged* from prior versions, but shown for illustration purposes: .. ipython:: python - df = DataFrame(np.arange(6), columns=['value'], index=MultiIndex.from_product([list('BA'), range(3)])) - df + df = pd.DataFrame(np.arange(6), columns=['value'], + index=pd.MultiIndex.from_product([list('BA'), range(3)])) + df .. ipython:: python - df.index.is_lexsorted() - df.index.is_monotonic + df.index.is_lexsorted() + df.index.is_monotonic Sorting works as expected .. ipython:: python - df.sort_index() + df.sort_index() .. ipython:: python - df.sort_index().index.is_lexsorted() - df.sort_index().index.is_monotonic + df.sort_index().index.is_lexsorted() + df.sort_index().index.is_monotonic However, this example, which has a non-monotonic 2nd level, doesn't behave as desired. .. ipython:: python - df = pd.DataFrame( - {'value': [1, 2, 3, 4]}, - index=pd.MultiIndex([['a', 'b'], ['bb', 'aa']], - [[0, 0, 1, 1], [0, 1, 0, 1]])) + df = pd.DataFrame({'value': [1, 2, 3, 4]}, + index=pd.MultiIndex([['a', 'b'], ['bb', 'aa']], + [[0, 0, 1, 1], [0, 1, 0, 1]])) df Previous Behavior: -.. code-block:: python + .. code-block:: python In [11]: df.sort_index() Out[11]: @@ -946,7 +945,7 @@ This format is consistent with ``groupby.agg()`` when applying multiple function Previous Behavior: -.. code-block:: ipython + .. code-block:: ipython In [1]: df = pd.DataFrame({'A': [1, 1, 2, 2], 'B': [1, 2, 3, 4]}) @@ -1011,7 +1010,7 @@ See the section on :ref:`Windowed Binary Operations ` for Previous Behavior: -.. code-block:: ipython + .. code-block:: ipython In [2]: df.rolling(12).corr() Out[2]: @@ -1034,7 +1033,7 @@ Retrieving a correlation matrix for a cross-section df.rolling(12).corr().loc['2016-04-07'] -.. _whatsnew_0200.api_breaking.hdfstore_where: + .. _whatsnew_0200.api_breaking.hdfstore_where: HDFStore where string comparison ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1051,7 +1050,7 @@ usually resulting in an invalid comparison, returning an empty result frame. The Previous Behavior: -.. code-block:: ipython + .. code-block:: ipython In [4]: pd.read_hdf('store.h5', 'key', where='unparsed_date > ts') File "", line 1 @@ -1061,7 +1060,7 @@ Previous Behavior: New Behavior: -.. code-block:: ipython + .. code-block:: ipython In [18]: ts = pd.Timestamp('2014-01-01') @@ -1086,50 +1085,50 @@ joins, :meth:`DataFrame.join` and :func:`merge`, and the ``.align`` method. - ``Index.intersection`` - .. ipython:: python +.. ipython:: python - left = pd.Index([2, 1, 0]) - left - right = pd.Index([1, 2, 3]) - right + left = pd.Index([2, 1, 0]) + left + right = pd.Index([1, 2, 3]) + right - Previous Behavior: +Previous Behavior: .. code-block:: ipython - In [4]: left.intersection(right) - Out[4]: Int64Index([1, 2], dtype='int64') + In [4]: left.intersection(right) + Out[4]: Int64Index([1, 2], dtype='int64') - New Behavior: +New Behavior: - .. ipython:: python +.. ipython:: python - left.intersection(right) + left.intersection(right) - ``DataFrame.join`` and ``pd.merge`` - .. ipython:: python +.. ipython:: python - left = pd.DataFrame({'a': [20, 10, 0]}, index=[2, 1, 0]) - left - right = pd.DataFrame({'b': [100, 200, 300]}, index=[1, 2, 3]) - right + left = pd.DataFrame({'a': [20, 10, 0]}, index=[2, 1, 0]) + left + right = pd.DataFrame({'b': [100, 200, 300]}, index=[1, 2, 3]) + right Previous Behavior: .. code-block:: ipython - In [4]: left.join(right, how='inner') - Out[4]: - a b - 1 10 100 - 2 20 200 + In [4]: left.join(right, how='inner') + Out[4]: + a b + 1 10 100 + 2 20 200 - New Behavior: +New Behavior: - .. ipython:: python +.. ipython:: python - left.join(right, how='inner') + left.join(right, how='inner') .. _whatsnew_0200.api_breaking.pivot_table: @@ -1141,14 +1140,14 @@ is fixed that allowed this to return a ``Series`` under certain circumstance. (: .. ipython:: python - df = DataFrame({'col1': [3, 4, 5], - 'col2': ['C', 'D', 'E'], - 'col3': [1, 3, 9]}) + df = pd.DataFrame({'col1': [3, 4, 5], + 'col2': ['C', 'D', 'E'], + 'col3': [1, 3, 9]}) df Previous Behavior: -.. code-block:: ipython + .. code-block:: ipython In [2]: df.pivot_table('col1', index=['col3', 'col2'], aggfunc=np.sum) Out[2]: @@ -1264,7 +1263,7 @@ will be removed from the ``*.common`` locations in a future release. (:issue:`15 The following are now part of this API: -.. code-block:: python + .. code-block:: python ['DtypeWarning', 'EmptyDataError', @@ -1338,25 +1337,25 @@ Using ``.ix`` will now show a ``DeprecationWarning`` with a link to some example Previous Behavior, where you wish to get the 0th and the 2nd elements from the index in the 'A' column. -.. code-block:: ipython + .. code-block:: ipython - In [3]: df.ix[[0, 2], 'A'] - Out[3]: - a 1 - c 3 - Name: A, dtype: int64 + In [3]: df.ix[[0, 2], 'A'] + Out[3]: + a 1 + c 3 + Name: A, dtype: int64 Using ``.loc``. Here we will select the appropriate indexes from the index, then use *label* indexing. .. ipython:: python - df.loc[df.index[[0, 2]], 'A'] + df.loc[df.index[[0, 2]], 'A'] Using ``.iloc``. Here we will get the location of the 'A' column, then use *positional* indexing to select things. .. ipython:: python - df.iloc[[0, 2], df.columns.get_loc('A')] + df.iloc[[0, 2], df.columns.get_loc('A')] .. _whatsnew_0200.api_breaking.deprecate_panel: @@ -1408,10 +1407,10 @@ This is an illustrative example: .. ipython:: python - df = pd.DataFrame({'A': [1, 1, 1, 2, 2], - 'B': range(5), - 'C': range(5)}) - df + df = pd.DataFrame({'A': [1, 1, 1, 2, 2], + 'B': range(5), + 'C': range(5)}) + df Here is a typical useful syntax for computing different aggregations for different columns. This is a natural, and useful syntax. We aggregate from the dict-to-list by taking the specified @@ -1424,7 +1423,7 @@ columns and applying the list of functions. This returns a ``MultiIndex`` for th Here's an example of the first deprecation, passing a dict to a grouped ``Series``. This is a combination aggregation & renaming: -.. code-block:: ipython + .. code-block:: ipython In [6]: df.groupby('A').B.agg({'foo': 'count'}) FutureWarning: using a dict on a Series for aggregation @@ -1445,7 +1444,7 @@ You can accomplish the same operation, more idiomatically by: Here's an example of the second deprecation, passing a dict-of-dict to a grouped ``DataFrame``: -.. code-block:: python + .. code-block:: python In [23]: (df.groupby('A') .agg({'B': {'foo': 'sum'}, 'C': {'bar': 'min'}}) @@ -1468,7 +1467,7 @@ You can accomplish nearly the same by: (df.groupby('A') .agg({'B': 'sum', 'C': 'min'}) .rename(columns={'B': 'foo', 'C': 'bar'}) - ) + ) @@ -1485,16 +1484,16 @@ Users can import these from ``pandas.plotting`` as well. Previous script: -.. code-block:: python + .. code-block:: python pd.tools.plotting.scatter_matrix(df) pd.scatter_matrix(df) Should be changed to: -.. code-block:: python + .. code-block:: python - pd.plotting.scatter_matrix(df) + pd.plotting.scatter_matrix(df) diff --git a/doc/source/whatsnew/v0.21.0.rst b/doc/source/whatsnew/v0.21.0.rst index 73bdedb3d3194..b99164aa797ed 100644 --- a/doc/source/whatsnew/v0.21.0.rst +++ b/doc/source/whatsnew/v0.21.0.rst @@ -97,14 +97,14 @@ attribute on the ``DataFrame``: .. code-block:: ipython - In[1]: df = pd.DataFrame({'one': [1., 2., 3.]}) - In[2]: df.two = [4, 5, 6] + In [1]: df = pd.DataFrame({'one': [1., 2., 3.]}) + In [2]: df.two = [4, 5, 6] This does not raise any obvious exceptions, but also does not create a new column: .. code-block:: ipython - In[3]: df + In [3]: df Out[3]: one 0 1.0 @@ -126,7 +126,7 @@ For example: .. ipython:: python - df = pd.DataFrame(np.arange(8).reshape(2,4), + df = pd.DataFrame(np.arange(8).reshape(2, 4), columns=['A', 'B', 'C', 'D']) df df.drop(['B', 'C'], axis=1) @@ -244,8 +244,9 @@ First we set the data: import numpy as np n = 1000 df = pd.DataFrame({'Store': np.random.choice(['Store_1', 'Store_2'], n), - 'Product': np.random.choice(['Product_1', 'Product_2', 'Product_3'], n), - 'Revenue': (np.random.random(n)*50+10).round(2), + 'Product': np.random.choice(['Product_1', + 'Product_2', 'Product_3'], n), + 'Revenue': (np.random.random(n) * 50 + 10).round(2), 'Quantity': np.random.randint(1, 10, size=n)}) df.head(2) @@ -254,7 +255,7 @@ Now, to find prices per store/product, we can simply do: .. ipython:: python (df.groupby(['Store', 'Product']) - .pipe(lambda grp: grp.Revenue.sum()/grp.Quantity.sum()) + .pipe(lambda grp: grp.Revenue.sum() / grp.Quantity.sum()) .unstack().round(2)) See the :ref:`documentation ` for more. @@ -393,7 +394,7 @@ Calling ``sum`` or ``prod`` on an empty or all-``NaN`` ``Series``, or columns of .. ipython:: python - s = Series([np.nan]) + s = pd.Series([np.nan]) Previously WITHOUT ``bottleneck`` installed: @@ -531,7 +532,7 @@ Furthermore this will now correctly box the results of iteration for :func:`Data .. ipython:: python - d = {'a':[1], 'b':['b']} + d = {'a': [1], 'b': ['b']} df = pd.DataFrame(d) Previously: @@ -589,7 +590,7 @@ Previously Behavior: .. ipython:: python - s = pd.Series([1,2,3], index=['a', 'b', 'c']) + s = pd.Series([1, 2, 3], index=['a', 'b', 'c']) s .. code-block:: ipython @@ -736,7 +737,7 @@ Previously assignments, ``.where()`` and ``.fillna()`` with a ``bool`` assignmen .. ipython:: python - s = Series([1, 2, 3]) + s = pd.Series([1, 2, 3]) .. code-block:: python @@ -819,7 +820,7 @@ Previous Behavior .. ipython:: python - s = Series(['20130101 00:00:00'] * 3) + s = pd.Series(['20130101 00:00:00'] * 3) .. code-block:: ipython @@ -966,7 +967,7 @@ The :meth:`Series.select` and :meth:`DataFrame.select` methods are deprecated in .. ipython:: python - df = DataFrame({'A': [1, 2, 3]}, index=['foo', 'bar', 'baz']) + df = pd.DataFrame({'A': [1, 2, 3]}, index=['foo', 'bar', 'baz']) .. code-block:: ipython diff --git a/setup.cfg b/setup.cfg index 30b4d13bd0a66..e89a33748dc1a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -68,9 +68,6 @@ exclude = doc/source/whatsnew/v0.17.1.rst doc/source/whatsnew/v0.18.0.rst doc/source/whatsnew/v0.18.1.rst - doc/source/whatsnew/v0.19.0.rst - doc/source/whatsnew/v0.20.0.rst - doc/source/whatsnew/v0.21.0.rst doc/source/whatsnew/v0.23.1.rst doc/source/whatsnew/v0.23.2.rst doc/source/basics.rst From 78372e4e61fca89ad0a444e836d0e938a63c5679 Mon Sep 17 00:00:00 2001 From: thoo Date: Tue, 11 Dec 2018 13:49:31 -0500 Subject: [PATCH 2/8] Fix at L293 --- doc/source/whatsnew/v0.19.0.rst | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.19.0.rst b/doc/source/whatsnew/v0.19.0.rst index c9cf65d8018f8..78ab4e76076b8 100644 --- a/doc/source/whatsnew/v0.19.0.rst +++ b/doc/source/whatsnew/v0.19.0.rst @@ -283,18 +283,18 @@ Categorical Concatenation s1 = pd.Series(['a', 'b'], dtype='category') s2 = pd.Series(['b', 'c'], dtype='category') - **Previous behavior**: +**Previous behavior**: .. code-block:: ipython - In [1]: pd.concat([s1, s2]) - ValueError: incompatible categories in categorical concat + In [1]: pd.concat([s1, s2]) + ValueError: incompatible categories in categorical concat - **New behavior**: +**New behavior**: - .. ipython:: python +.. ipython:: python - pd.concat([s1, s2]) + pd.concat([s1, s2]) .. _whatsnew_0190.enhancements.semi_month_offsets: From 1131f7130ddf81c4d391d630c5c4294650f3278d Mon Sep 17 00:00:00 2001 From: thoo Date: Tue, 11 Dec 2018 14:20:00 -0500 Subject: [PATCH 3/8] Fix code-block --- doc/source/whatsnew/v0.20.0.rst | 62 +++++++++++++++------------------ 1 file changed, 28 insertions(+), 34 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.rst b/doc/source/whatsnew/v0.20.0.rst index 852cc6a9c9922..fad4edf7d80e3 100644 --- a/doc/source/whatsnew/v0.20.0.rst +++ b/doc/source/whatsnew/v0.20.0.rst @@ -5,12 +5,6 @@ v0.20.1 (May 5, 2017) {{ header }} -.. ipython:: python - :suppress: - - from pandas import * # noqa F401, F403 - - This is a major release from 0.19.2 and includes a number of API changes, deprecations, new features, enhancements, and performance improvements along with a large number of bug fixes. We recommend that all users upgrade to this version. @@ -685,24 +679,24 @@ data-types would yield different return types. These are now made consistent. (: Previous behaviour: - .. code-block:: ipython +.. code-block:: ipython # Series In [5]: pd.Series([pd.Timestamp('20160101', tz='US/Eastern'), - pd.Timestamp('20160101', tz='US/Eastern')]).unique() + ...: pd.Timestamp('20160101', tz='US/Eastern')]).unique() Out[5]: array([Timestamp('2016-01-01 00:00:00-0500', tz='US/Eastern')], dtype=object) In [6]: pd.unique(pd.Series([pd.Timestamp('20160101', tz='US/Eastern'), - pd.Timestamp('20160101', tz='US/Eastern')])) + ...: pd.Timestamp('20160101', tz='US/Eastern')])) Out[6]: array(['2016-01-01T05:00:00.000000000'], dtype='datetime64[ns]') # Index In [7]: pd.Index([pd.Timestamp('20160101', tz='US/Eastern'), - pd.Timestamp('20160101', tz='US/Eastern')]).unique() + ...: pd.Timestamp('20160101', tz='US/Eastern')]).unique() Out[7]: DatetimeIndex(['2016-01-01 00:00:00-05:00'], dtype='datetime64[ns, US/Eastern]', freq=None) In [8]: pd.unique([pd.Timestamp('20160101', tz='US/Eastern'), - pd.Timestamp('20160101', tz='US/Eastern')]) + ...: pd.Timestamp('20160101', tz='US/Eastern')]) Out[8]: array(['2016-01-01T05:00:00.000000000'], dtype='datetime64[ns]') New Behavior: @@ -725,7 +719,7 @@ New Behavior: Previous behaviour: - .. code-block:: ipython +.. code-block:: ipython In [1]: pd.Series(list('baabc'), dtype='category').unique() Out[1]: @@ -766,7 +760,7 @@ Partial String Indexing Changes '2012-01-01 00:00:01'])) Previous Behavior: - .. code-block:: ipython +.. code-block:: ipython In [4]: df['2011-12-31 23:59:59'] Out[4]: @@ -781,7 +775,7 @@ Previous Behavior: New Behavior: - .. code-block:: ipython +.. code-block:: ipython In [4]: df['2011-12-31 23:59:59'] KeyError: '2011-12-31 23:59:59' @@ -807,7 +801,7 @@ Now the smallest acceptable dtype will be used (:issue:`13247`) Previous Behavior: - .. code-block:: ipython +.. code-block:: ipython In [7]: pd.concat([df1, df2]).dtypes Out[7]: @@ -838,9 +832,9 @@ In previous versions, showing ``.memory_usage()`` on a pandas structure that has Previous Behavior: - .. code-block:: ipython +.. code-block:: ipython - In [8]: index = Index(['foo', 'bar', 'baz']) + In [8]: index = pd.Index(['foo', 'bar', 'baz']) In [9]: index.memory_usage(deep=True) Out[9]: 180 @@ -853,9 +847,9 @@ Previous Behavior: New Behavior: - .. code-block:: ipython +.. code-block:: ipython - In [8]: index = Index(['foo', 'bar', 'baz']) + In [8]: index = pd.Index(['foo', 'bar', 'baz']) In [9]: index.memory_usage(deep=True) Out[9]: 180 @@ -910,7 +904,7 @@ doesn't behave as desired. Previous Behavior: - .. code-block:: python +.. code-block:: python In [11]: df.sort_index() Out[11]: @@ -945,7 +939,7 @@ This format is consistent with ``groupby.agg()`` when applying multiple function Previous Behavior: - .. code-block:: ipython +.. code-block:: ipython In [1]: df = pd.DataFrame({'A': [1, 1, 2, 2], 'B': [1, 2, 3, 4]}) @@ -1010,7 +1004,7 @@ See the section on :ref:`Windowed Binary Operations ` for Previous Behavior: - .. code-block:: ipython +.. code-block:: ipython In [2]: df.rolling(12).corr() Out[2]: @@ -1050,7 +1044,7 @@ usually resulting in an invalid comparison, returning an empty result frame. The Previous Behavior: - .. code-block:: ipython +.. code-block:: ipython In [4]: pd.read_hdf('store.h5', 'key', where='unparsed_date > ts') File "", line 1 @@ -1060,7 +1054,7 @@ Previous Behavior: New Behavior: - .. code-block:: ipython +.. code-block:: ipython In [18]: ts = pd.Timestamp('2014-01-01') @@ -1116,7 +1110,7 @@ New Behavior: Previous Behavior: - .. code-block:: ipython +.. code-block:: ipython In [4]: left.join(right, how='inner') Out[4]: @@ -1147,7 +1141,7 @@ is fixed that allowed this to return a ``Series`` under certain circumstance. (: Previous Behavior: - .. code-block:: ipython +.. code-block:: ipython In [2]: df.pivot_table('col1', index=['col3', 'col2'], aggfunc=np.sum) Out[2]: @@ -1263,7 +1257,7 @@ will be removed from the ``*.common`` locations in a future release. (:issue:`15 The following are now part of this API: - .. code-block:: python +.. code-block:: python ['DtypeWarning', 'EmptyDataError', @@ -1337,7 +1331,7 @@ Using ``.ix`` will now show a ``DeprecationWarning`` with a link to some example Previous Behavior, where you wish to get the 0th and the 2nd elements from the index in the 'A' column. - .. code-block:: ipython +.. code-block:: ipython In [3]: df.ix[[0, 2], 'A'] Out[3]: @@ -1423,7 +1417,7 @@ columns and applying the list of functions. This returns a ``MultiIndex`` for th Here's an example of the first deprecation, passing a dict to a grouped ``Series``. This is a combination aggregation & renaming: - .. code-block:: ipython +.. code-block:: ipython In [6]: df.groupby('A').B.agg({'foo': 'count'}) FutureWarning: using a dict on a Series for aggregation @@ -1444,11 +1438,11 @@ You can accomplish the same operation, more idiomatically by: Here's an example of the second deprecation, passing a dict-of-dict to a grouped ``DataFrame``: - .. code-block:: python +.. code-block:: python In [23]: (df.groupby('A') - .agg({'B': {'foo': 'sum'}, 'C': {'bar': 'min'}}) - ) + ...: .agg({'B': {'foo': 'sum'}, 'C': {'bar': 'min'}}) + ...: ) FutureWarning: using a dict with renaming is deprecated and will be removed in a future version @@ -1484,14 +1478,14 @@ Users can import these from ``pandas.plotting`` as well. Previous script: - .. code-block:: python +.. code-block:: python pd.tools.plotting.scatter_matrix(df) pd.scatter_matrix(df) Should be changed to: - .. code-block:: python +.. code-block:: python pd.plotting.scatter_matrix(df) From c529863dedbdb14abf819f59d2ae81a9c7f659cf Mon Sep 17 00:00:00 2001 From: thoo Date: Tue, 11 Dec 2018 14:25:29 -0500 Subject: [PATCH 4/8] Fix code-block indent --- doc/source/whatsnew/v0.20.0.rst | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.rst b/doc/source/whatsnew/v0.20.0.rst index fad4edf7d80e3..1bf4154e5a71c 100644 --- a/doc/source/whatsnew/v0.20.0.rst +++ b/doc/source/whatsnew/v0.20.0.rst @@ -1079,29 +1079,29 @@ joins, :meth:`DataFrame.join` and :func:`merge`, and the ``.align`` method. - ``Index.intersection`` -.. ipython:: python + .. ipython:: python left = pd.Index([2, 1, 0]) left right = pd.Index([1, 2, 3]) right -Previous Behavior: + Previous Behavior: .. code-block:: ipython In [4]: left.intersection(right) Out[4]: Int64Index([1, 2], dtype='int64') -New Behavior: + New Behavior: -.. ipython:: python + .. ipython:: python left.intersection(right) - ``DataFrame.join`` and ``pd.merge`` -.. ipython:: python + .. ipython:: python left = pd.DataFrame({'a': [20, 10, 0]}, index=[2, 1, 0]) left @@ -1110,7 +1110,7 @@ New Behavior: Previous Behavior: -.. code-block:: ipython + .. code-block:: ipython In [4]: left.join(right, how='inner') Out[4]: @@ -1118,9 +1118,9 @@ New Behavior: 1 10 100 2 20 200 -New Behavior: + New Behavior: -.. ipython:: python + .. ipython:: python left.join(right, how='inner') From b67a4b5825a030c28c1a1556e5259f634c26bb70 Mon Sep 17 00:00:00 2001 From: thoo Date: Tue, 11 Dec 2018 15:33:01 -0500 Subject: [PATCH 5/8] Fix code-block --- doc/source/whatsnew/v0.20.0.rst | 116 ++++++++++++++++---------------- doc/source/whatsnew/v0.21.0.rst | 4 +- 2 files changed, 61 insertions(+), 59 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.rst b/doc/source/whatsnew/v0.20.0.rst index 1bf4154e5a71c..dc6dba1e5a6ae 100644 --- a/doc/source/whatsnew/v0.20.0.rst +++ b/doc/source/whatsnew/v0.20.0.rst @@ -677,65 +677,65 @@ data-types would yield different return types. These are now made consistent. (: - Datetime tz-aware -Previous behaviour: + Previous behaviour: -.. code-block:: ipython + .. code-block:: ipython - # Series - In [5]: pd.Series([pd.Timestamp('20160101', tz='US/Eastern'), - ...: pd.Timestamp('20160101', tz='US/Eastern')]).unique() - Out[5]: array([Timestamp('2016-01-01 00:00:00-0500', tz='US/Eastern')], dtype=object) + # Series + In [5]: pd.Series([pd.Timestamp('20160101', tz='US/Eastern'), + ...: pd.Timestamp('20160101', tz='US/Eastern')]).unique() + Out[5]: array([Timestamp('2016-01-01 00:00:00-0500', tz='US/Eastern')], dtype=object) - In [6]: pd.unique(pd.Series([pd.Timestamp('20160101', tz='US/Eastern'), - ...: pd.Timestamp('20160101', tz='US/Eastern')])) - Out[6]: array(['2016-01-01T05:00:00.000000000'], dtype='datetime64[ns]') + In [6]: pd.unique(pd.Series([pd.Timestamp('20160101', tz='US/Eastern'), + ...: pd.Timestamp('20160101', tz='US/Eastern')])) + Out[6]: array(['2016-01-01T05:00:00.000000000'], dtype='datetime64[ns]') - # Index - In [7]: pd.Index([pd.Timestamp('20160101', tz='US/Eastern'), - ...: pd.Timestamp('20160101', tz='US/Eastern')]).unique() - Out[7]: DatetimeIndex(['2016-01-01 00:00:00-05:00'], dtype='datetime64[ns, US/Eastern]', freq=None) + # Index + In [7]: pd.Index([pd.Timestamp('20160101', tz='US/Eastern'), + ...: pd.Timestamp('20160101', tz='US/Eastern')]).unique() + Out[7]: DatetimeIndex(['2016-01-01 00:00:00-05:00'], dtype='datetime64[ns, US/Eastern]', freq=None) - In [8]: pd.unique([pd.Timestamp('20160101', tz='US/Eastern'), - ...: pd.Timestamp('20160101', tz='US/Eastern')]) - Out[8]: array(['2016-01-01T05:00:00.000000000'], dtype='datetime64[ns]') + In [8]: pd.unique([pd.Timestamp('20160101', tz='US/Eastern'), + ...: pd.Timestamp('20160101', tz='US/Eastern')]) + Out[8]: array(['2016-01-01T05:00:00.000000000'], dtype='datetime64[ns]') -New Behavior: + New Behavior: -.. ipython:: python + .. ipython:: python - # Series, returns an array of Timestamp tz-aware - pd.Series([pd.Timestamp(r'20160101', tz=r'US/Eastern'), - pd.Timestamp(r'20160101', tz=r'US/Eastern')]).unique() - pd.unique(pd.Series([pd.Timestamp('20160101', tz='US/Eastern'), - pd.Timestamp('20160101', tz='US/Eastern')])) + # Series, returns an array of Timestamp tz-aware + pd.Series([pd.Timestamp(r'20160101', tz=r'US/Eastern'), + pd.Timestamp(r'20160101', tz=r'US/Eastern')]).unique() + pd.unique(pd.Series([pd.Timestamp('20160101', tz='US/Eastern'), + pd.Timestamp('20160101', tz='US/Eastern')])) - # Index, returns a DatetimeIndex - pd.Index([pd.Timestamp('20160101', tz='US/Eastern'), - pd.Timestamp('20160101', tz='US/Eastern')]).unique() - pd.unique(pd.Index([pd.Timestamp('20160101', tz='US/Eastern'), - pd.Timestamp('20160101', tz='US/Eastern')])) + # Index, returns a DatetimeIndex + pd.Index([pd.Timestamp('20160101', tz='US/Eastern'), + pd.Timestamp('20160101', tz='US/Eastern')]).unique() + pd.unique(pd.Index([pd.Timestamp('20160101', tz='US/Eastern'), + pd.Timestamp('20160101', tz='US/Eastern')])) - Categoricals -Previous behaviour: + Previous behaviour: -.. code-block:: ipython + .. code-block:: ipython - In [1]: pd.Series(list('baabc'), dtype='category').unique() - Out[1]: - [b, a, c] - Categories (3, object): [b, a, c] + In [1]: pd.Series(list('baabc'), dtype='category').unique() + Out[1]: + [b, a, c] + Categories (3, object): [b, a, c] - In [2]: pd.unique(pd.Series(list('baabc'), dtype='category')) - Out[2]: array(['b', 'a', 'c'], dtype=object) + In [2]: pd.unique(pd.Series(list('baabc'), dtype='category')) + Out[2]: array(['b', 'a', 'c'], dtype=object) -New Behavior: + New Behavior: -.. ipython:: python + .. ipython:: python - # returns a Categorical - pd.Series(list('baabc'), dtype='category').unique() - pd.unique(pd.Series(list('baabc'), dtype='category')) + # returns a Categorical + pd.Series(list('baabc'), dtype='category').unique() + pd.unique(pd.Series(list('baabc'), dtype='category')) .. _whatsnew_0200.api_breaking.s3: @@ -1081,48 +1081,48 @@ joins, :meth:`DataFrame.join` and :func:`merge`, and the ``.align`` method. .. ipython:: python - left = pd.Index([2, 1, 0]) - left - right = pd.Index([1, 2, 3]) - right + left = pd.Index([2, 1, 0]) + left + right = pd.Index([1, 2, 3]) + right Previous Behavior: .. code-block:: ipython - In [4]: left.intersection(right) - Out[4]: Int64Index([1, 2], dtype='int64') + In [4]: left.intersection(right) + Out[4]: Int64Index([1, 2], dtype='int64') New Behavior: .. ipython:: python - left.intersection(right) + left.intersection(right) - ``DataFrame.join`` and ``pd.merge`` .. ipython:: python - left = pd.DataFrame({'a': [20, 10, 0]}, index=[2, 1, 0]) - left - right = pd.DataFrame({'b': [100, 200, 300]}, index=[1, 2, 3]) - right + left = pd.DataFrame({'a': [20, 10, 0]}, index=[2, 1, 0]) + left + right = pd.DataFrame({'b': [100, 200, 300]}, index=[1, 2, 3]) + right Previous Behavior: .. code-block:: ipython - In [4]: left.join(right, how='inner') - Out[4]: - a b - 1 10 100 - 2 20 200 + In [4]: left.join(right, how='inner') + Out[4]: + a b + 1 10 100 + 2 20 200 New Behavior: .. ipython:: python - left.join(right, how='inner') + left.join(right, how='inner') .. _whatsnew_0200.api_breaking.pivot_table: diff --git a/doc/source/whatsnew/v0.21.0.rst b/doc/source/whatsnew/v0.21.0.rst index b99164aa797ed..8b0cf0a1134f6 100644 --- a/doc/source/whatsnew/v0.21.0.rst +++ b/doc/source/whatsnew/v0.21.0.rst @@ -245,7 +245,9 @@ First we set the data: n = 1000 df = pd.DataFrame({'Store': np.random.choice(['Store_1', 'Store_2'], n), 'Product': np.random.choice(['Product_1', - 'Product_2', 'Product_3'], n), + 'Product_2', + 'Product_3' + ], n), 'Revenue': (np.random.random(n) * 50 + 10).round(2), 'Quantity': np.random.randint(1, 10, size=n)}) df.head(2) From 221d35f1827cf55e2332ecc7fd24f721b4949126 Mon Sep 17 00:00:00 2001 From: thoo Date: Tue, 11 Dec 2018 20:14:40 -0500 Subject: [PATCH 6/8] Add spaces to align code-lines --- doc/source/whatsnew/v0.21.0.rst | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.rst b/doc/source/whatsnew/v0.21.0.rst index 8b0cf0a1134f6..1e479dd180b89 100644 --- a/doc/source/whatsnew/v0.21.0.rst +++ b/doc/source/whatsnew/v0.21.0.rst @@ -854,11 +854,11 @@ Previous Behavior: .. code-block:: ipython - In [2]: pd.interval_range(start=0, end=4, periods=6) - Out[2]: - IntervalIndex([(0, 1], (1, 2], (2, 3]] - closed='right', - dtype='interval[int64]') + In [2]: pd.interval_range(start=0, end=4, periods=6) + Out[2]: + IntervalIndex([(0, 1], (1, 2], (2, 3]] + closed='right', + dtype='interval[int64]') In [3]: pd.period_range(start='2017Q1', end='2017Q4', periods=6, freq='Q') Out[3]: PeriodIndex(['2017Q1', '2017Q2', '2017Q3', '2017Q4', '2018Q1', '2018Q2'], dtype='period[Q-DEC]', freq='Q-DEC') @@ -881,11 +881,11 @@ Previous Behavior: .. code-block:: ipython - In [4]: pd.interval_range(start=0, end=4) - Out[4]: - IntervalIndex([(0, 1], (1, 2], (2, 3]] - closed='right', - dtype='interval[int64]') + In [4]: pd.interval_range(start=0, end=4) + Out[4]: + IntervalIndex([(0, 1], (1, 2], (2, 3]] + closed='right', + dtype='interval[int64]') New Behavior: From 02f58449682d3227bff766148b1773d916744ef7 Mon Sep 17 00:00:00 2001 From: thoo Date: Wed, 12 Dec 2018 07:54:18 -0500 Subject: [PATCH 7/8] fix raw string --- doc/source/whatsnew/v0.20.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.20.0.rst b/doc/source/whatsnew/v0.20.0.rst index dc6dba1e5a6ae..235e0b078c3c8 100644 --- a/doc/source/whatsnew/v0.20.0.rst +++ b/doc/source/whatsnew/v0.20.0.rst @@ -190,7 +190,7 @@ support for bz2 compression in the python 2 C-engine improved (:issue:`14874`). .. ipython:: python - url = (r'https://github.com/{repo}/raw/{branch}/{path}' + url = ('https://github.com/{repo}/raw/{branch}/{path}' .format(repo='pandas-dev/pandas', branch='master', path='pandas/tests/io/parser/data/salaries.csv.bz2')) From fc214e329b4b2d0203ee18bb053f6caeb40f2a00 Mon Sep 17 00:00:00 2001 From: thoo Date: Wed, 12 Dec 2018 10:20:31 -0500 Subject: [PATCH 8/8] code block alignments --- doc/source/whatsnew/v0.19.0.rst | 191 ++++++++++++++++---------------- doc/source/whatsnew/v0.20.0.rst | 44 ++++---- doc/source/whatsnew/v0.21.0.rst | 22 ++-- 3 files changed, 129 insertions(+), 128 deletions(-) diff --git a/doc/source/whatsnew/v0.19.0.rst b/doc/source/whatsnew/v0.19.0.rst index 78ab4e76076b8..6f4e8e36cdc04 100644 --- a/doc/source/whatsnew/v0.19.0.rst +++ b/doc/source/whatsnew/v0.19.0.rst @@ -76,41 +76,41 @@ This also illustrates using the ``by`` parameter to group data before merging. .. ipython:: python - trades = pd.DataFrame({ - 'time': pd.to_datetime(['20160525 13:30:00.023', - '20160525 13:30:00.038', - '20160525 13:30:00.048', - '20160525 13:30:00.048', - '20160525 13:30:00.048']), - 'ticker': ['MSFT', 'MSFT', - 'GOOG', 'GOOG', 'AAPL'], - 'price': [51.95, 51.95, - 720.77, 720.92, 98.00], - 'quantity': [75, 155, - 100, 100, 100]}, - columns=['time', 'ticker', 'price', 'quantity']) - - quotes = pd.DataFrame({ - 'time': pd.to_datetime(['20160525 13:30:00.023', - '20160525 13:30:00.023', - '20160525 13:30:00.030', - '20160525 13:30:00.041', - '20160525 13:30:00.048', - '20160525 13:30:00.049', - '20160525 13:30:00.072', - '20160525 13:30:00.075']), - 'ticker': ['GOOG', 'MSFT', 'MSFT', 'MSFT', - 'GOOG', 'AAPL', 'GOOG', 'MSFT'], - 'bid': [720.50, 51.95, 51.97, 51.99, - 720.50, 97.99, 720.50, 52.01], - 'ask': [720.93, 51.96, 51.98, 52.00, - 720.93, 98.01, 720.88, 52.03]}, - columns=['time', 'ticker', 'bid', 'ask']) + trades = pd.DataFrame({ + 'time': pd.to_datetime(['20160525 13:30:00.023', + '20160525 13:30:00.038', + '20160525 13:30:00.048', + '20160525 13:30:00.048', + '20160525 13:30:00.048']), + 'ticker': ['MSFT', 'MSFT', + 'GOOG', 'GOOG', 'AAPL'], + 'price': [51.95, 51.95, + 720.77, 720.92, 98.00], + 'quantity': [75, 155, + 100, 100, 100]}, + columns=['time', 'ticker', 'price', 'quantity']) + + quotes = pd.DataFrame({ + 'time': pd.to_datetime(['20160525 13:30:00.023', + '20160525 13:30:00.023', + '20160525 13:30:00.030', + '20160525 13:30:00.041', + '20160525 13:30:00.048', + '20160525 13:30:00.049', + '20160525 13:30:00.072', + '20160525 13:30:00.075']), + 'ticker': ['GOOG', 'MSFT', 'MSFT', 'MSFT', + 'GOOG', 'AAPL', 'GOOG', 'MSFT'], + 'bid': [720.50, 51.95, 51.97, 51.99, + 720.50, 97.99, 720.50, 52.01], + 'ask': [720.93, 51.96, 51.98, 52.00, + 720.93, 98.01, 720.88, 52.03]}, + columns=['time', 'ticker', 'bid', 'ask']) .. ipython:: python - trades - quotes + trades + quotes An asof merge joins on the ``on``, typically a datetimelike field, which is ordered, and in this case we are using a grouper in the ``by`` field. This is like a left-outer join, except @@ -118,9 +118,9 @@ that forward filling happens automatically taking the most recent non-NaN value. .. ipython:: python - pd.merge_asof(trades, quotes, - on='time', - by='ticker') + pd.merge_asof(trades, quotes, + on='time', + by='ticker') This returns a merged DataFrame with the entries in the same order as the original left passed DataFrame (``trades`` in this case), with the fields of the ``quotes`` merged. @@ -135,17 +135,17 @@ See the full documentation :ref:`here `. .. ipython:: python - dft = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}, - index=pd.date_range('20130101 09:00:00', - periods=5, freq='s')) - dft + dft = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}, + index=pd.date_range('20130101 09:00:00', + periods=5, freq='s')) + dft This is a regular frequency index. Using an integer window parameter works to roll along the window frequency. .. ipython:: python - dft.rolling(2).sum() - dft.rolling(2, min_periods=1).sum() + dft.rolling(2).sum() + dft.rolling(2, min_periods=1).sum() Specifying an offset allows a more intuitive specification of the rolling frequency. @@ -271,10 +271,10 @@ Categorical Concatenation .. ipython:: python - from pandas.api.types import union_categoricals - a = pd.Categorical(["b", "c"]) - b = pd.Categorical(["a", "b"]) - union_categoricals([a, b]) + from pandas.api.types import union_categoricals + a = pd.Categorical(["b", "c"]) + b = pd.Categorical(["a", "b"]) + union_categoricals([a, b]) - ``concat`` and ``append`` now can concat ``category`` dtypes with different ``categories`` as ``object`` dtype (:issue:`13524`) @@ -287,14 +287,14 @@ Categorical Concatenation .. code-block:: ipython - In [1]: pd.concat([s1, s2]) - ValueError: incompatible categories in categorical concat + In [1]: pd.concat([s1, s2]) + ValueError: incompatible categories in categorical concat **New behavior**: .. ipython:: python - pd.concat([s1, s2]) + pd.concat([s1, s2]) .. _whatsnew_0190.enhancements.semi_month_offsets: @@ -307,31 +307,31 @@ These provide date offsets anchored (by default) to the 15th and end of month, a .. ipython:: python - from pandas.tseries.offsets import SemiMonthEnd, SemiMonthBegin + from pandas.tseries.offsets import SemiMonthEnd, SemiMonthBegin **SemiMonthEnd**: .. ipython:: python - pd.Timestamp('2016-01-01') + SemiMonthEnd() + pd.Timestamp('2016-01-01') + SemiMonthEnd() - pd.date_range('2015-01-01', freq='SM', periods=4) + pd.date_range('2015-01-01', freq='SM', periods=4) **SemiMonthBegin**: .. ipython:: python - pd.Timestamp('2016-01-01') + SemiMonthBegin() + pd.Timestamp('2016-01-01') + SemiMonthBegin() - pd.date_range('2015-01-01', freq='SMS', periods=4) + pd.date_range('2015-01-01', freq='SMS', periods=4) Using the anchoring suffix, you can also specify the day of month to use instead of the 15th. .. ipython:: python - pd.date_range('2015-01-01', freq='SMS-16', periods=4) + pd.date_range('2015-01-01', freq='SMS-16', periods=4) - pd.date_range('2015-01-01', freq='SM-14', periods=4) + pd.date_range('2015-01-01', freq='SM-14', periods=4) .. _whatsnew_0190.enhancements.index: @@ -360,11 +360,11 @@ For ``MultiIndex``, values are dropped if any level is missing by default. Speci .. ipython:: python - midx = pd.MultiIndex.from_arrays([[1, 2, np.nan, 4], - [1, 2, np.nan, np.nan]]) - midx - midx.dropna() - midx.dropna(how='all') + midx = pd.MultiIndex.from_arrays([[1, 2, np.nan, 4], + [1, 2, np.nan, np.nan]]) + midx + midx.dropna() + midx.dropna(how='all') ``Index`` now supports ``.str.extractall()`` which returns a ``DataFrame``, see the :ref:`docs here ` (:issue:`10008`, :issue:`13156`) @@ -464,23 +464,24 @@ Other enhancements .. ipython:: python - pd.Timestamp(2012, 1, 1) + pd.Timestamp(2012, 1, 1) - pd.Timestamp(year=2012, month=1, day=1, hour=8, minute=30) + pd.Timestamp(year=2012, month=1, day=1, hour=8, minute=30) - The ``.resample()`` function now accepts a ``on=`` or ``level=`` parameter for resampling on a datetimelike column or ``MultiIndex`` level (:issue:`13500`) .. ipython:: python - df = pd.DataFrame({'date': pd.date_range('2015-01-01', freq='W', periods=5), - 'a': np.arange(5)}, + df = pd.DataFrame({'date': pd.date_range('2015-01-01', freq='W', periods=5), + 'a': np.arange(5)}, index=pd.MultiIndex.from_arrays([[1, 2, 3, 4, 5], - pd.date_range('2015-01-01', freq='W', periods=5)], - names=['v', 'd']) - ) - df - df.resample('M', on='date').sum() - df.resample('M', level='d').sum() + pd.date_range('2015-01-01', + freq='W', + periods=5) + ], names=['v', 'd'])) + df + df.resample('M', on='date').sum() + df.resample('M', level='d').sum() - The ``.get_credentials()`` method of ``GbqConnector`` can now first try to fetch `the application default credentials `__. See the docs for more details (:issue:`13577`). - The ``.tz_localize()`` method of ``DatetimeIndex`` and ``Timestamp`` has gained the ``errors`` keyword, so you can potentially coerce nonexistent timestamps to ``NaT``. The default behavior remains to raising a ``NonExistentTimeError`` (:issue:`13057`) @@ -975,15 +976,15 @@ Previous behavior: .. code-block:: ipython - In [1]: pd.Index(['a', 'b']) + pd.Index(['a', 'c']) - FutureWarning: using '+' to provide set union with Indexes is deprecated, use '|' or .union() - Out[1]: Index(['a', 'b', 'c'], dtype='object') + In [1]: pd.Index(['a', 'b']) + pd.Index(['a', 'c']) + FutureWarning: using '+' to provide set union with Indexes is deprecated, use '|' or .union() + Out[1]: Index(['a', 'b', 'c'], dtype='object') **New behavior**: the same operation will now perform element-wise addition: .. ipython:: python - pd.Index(['a', 'b']) + pd.Index(['a', 'c']) + pd.Index(['a', 'b']) + pd.Index(['a', 'c']) Note that numeric Index objects already performed element-wise operations. For example, the behavior of adding two integer Indexes is unchanged. @@ -991,7 +992,7 @@ The base ``Index`` is now made consistent with this behavior. .. ipython:: python - pd.Index([1, 2, 3]) + pd.Index([2, 3, 4]) + pd.Index([1, 2, 3]) + pd.Index([2, 3, 4]) Further, because of this change, it is now possible to subtract two DatetimeIndex objects resulting in a TimedeltaIndex: @@ -1056,23 +1057,23 @@ Previously, most ``Index`` classes returned ``np.ndarray``, and ``DatetimeIndex` .. code-block:: ipython - In [1]: pd.Index([1, 2, 3]).unique() - Out[1]: array([1, 2, 3]) + In [1]: pd.Index([1, 2, 3]).unique() + Out[1]: array([1, 2, 3]) - In [2]: pd.DatetimeIndex(['2011-01-01', '2011-01-02', - ...: '2011-01-03'], tz='Asia/Tokyo').unique() - Out[2]: - DatetimeIndex(['2011-01-01 00:00:00+09:00', '2011-01-02 00:00:00+09:00', - '2011-01-03 00:00:00+09:00'], - dtype='datetime64[ns, Asia/Tokyo]', freq=None) + In [2]: pd.DatetimeIndex(['2011-01-01', '2011-01-02', + ...: '2011-01-03'], tz='Asia/Tokyo').unique() + Out[2]: + DatetimeIndex(['2011-01-01 00:00:00+09:00', '2011-01-02 00:00:00+09:00', + '2011-01-03 00:00:00+09:00'], + dtype='datetime64[ns, Asia/Tokyo]', freq=None) **New behavior**: .. ipython:: python - pd.Index([1, 2, 3]).unique() - pd.DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], - tz='Asia/Tokyo').unique() + pd.Index([1, 2, 3]).unique() + pd.DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], + tz='Asia/Tokyo').unique() .. _whatsnew_0190.api.multiindex: @@ -1236,27 +1237,27 @@ Operators now preserve dtypes .. ipython:: python - s = pd.SparseSeries([0, 2, 0, 1], fill_value=0, dtype=np.int64) - s.dtype + s = pd.SparseSeries([0, 2, 0, 1], fill_value=0, dtype=np.int64) + s.dtype - s + 1 + s + 1 - Sparse data structure now support ``astype`` to convert internal ``dtype`` (:issue:`13900`) .. ipython:: python - s = pd.SparseSeries([1., 0., 2., 0.], fill_value=0) - s - s.astype(np.int64) + s = pd.SparseSeries([1., 0., 2., 0.], fill_value=0) + s + s.astype(np.int64) ``astype`` fails if data contains values which cannot be converted to specified ``dtype``. Note that the limitation is applied to ``fill_value`` which default is ``np.nan``. .. code-block:: ipython - In [7]: pd.SparseSeries([1., np.nan, 2., np.nan], fill_value=np.nan).astype(np.int64) - Out[7]: - ValueError: unable to coerce current fill_value nan to int64 dtype + In [7]: pd.SparseSeries([1., np.nan, 2., np.nan], fill_value=np.nan).astype(np.int64) + Out[7]: + ValueError: unable to coerce current fill_value nan to int64 dtype Other sparse fixes """""""""""""""""" diff --git a/doc/source/whatsnew/v0.20.0.rst b/doc/source/whatsnew/v0.20.0.rst index 235e0b078c3c8..2002c4bb9bc51 100644 --- a/doc/source/whatsnew/v0.20.0.rst +++ b/doc/source/whatsnew/v0.20.0.rst @@ -273,29 +273,29 @@ In previous versions, ``.groupby(..., sort=False)`` would fail with a ``ValueErr .. ipython:: python - chromosomes = np.r_[np.arange(1, 23).astype(str), ['X', 'Y']] - df = pd.DataFrame({ - 'A': np.random.randint(100), - 'B': np.random.randint(100), - 'C': np.random.randint(100), - 'chromosomes': pd.Categorical(np.random.choice(chromosomes, 100), - categories=chromosomes, - ordered=True)}) - df + chromosomes = np.r_[np.arange(1, 23).astype(str), ['X', 'Y']] + df = pd.DataFrame({ + 'A': np.random.randint(100), + 'B': np.random.randint(100), + 'C': np.random.randint(100), + 'chromosomes': pd.Categorical(np.random.choice(chromosomes, 100), + categories=chromosomes, + ordered=True)}) + df **Previous Behavior**: .. code-block:: ipython - In [3]: df[df.chromosomes != '1'].groupby('chromosomes', sort=False).sum() - --------------------------------------------------------------------------- - ValueError: items in new_categories are not the same as in old categories + In [3]: df[df.chromosomes != '1'].groupby('chromosomes', sort=False).sum() + --------------------------------------------------------------------------- + ValueError: items in new_categories are not the same as in old categories **New Behavior**: .. ipython:: python - df[df.chromosomes != '1'].groupby('chromosomes', sort=False).sum() + df[df.chromosomes != '1'].groupby('chromosomes', sort=False).sum() .. _whatsnew_0200.enhancements.table_schema: @@ -650,17 +650,17 @@ Previous behaviour: .. code-block:: ipython - In [1]: idx = pd.date_range("2015-01-01", periods=5, freq='10H') + In [1]: idx = pd.date_range("2015-01-01", periods=5, freq='10H') - In [2]: idx.hour - Out[2]: array([ 0, 10, 20, 6, 16], dtype=int32) + In [2]: idx.hour + Out[2]: array([ 0, 10, 20, 6, 16], dtype=int32) New Behavior: .. ipython:: python - idx = pd.date_range("2015-01-01", periods=5, freq='10H') - idx.hour + idx = pd.date_range("2015-01-01", periods=5, freq='10H') + idx.hour This has the advantage that specific ``Index`` methods are still available on the result. On the other hand, this might have backward incompatibilities: e.g. @@ -1323,11 +1323,11 @@ Using ``.ix`` will now show a ``DeprecationWarning`` with a link to some example .. ipython:: python - df = pd.DataFrame({'A': [1, 2, 3], - 'B': [4, 5, 6]}, - index=list('abc')) + df = pd.DataFrame({'A': [1, 2, 3], + 'B': [4, 5, 6]}, + index=list('abc')) - df + df Previous Behavior, where you wish to get the 0th and the 2nd elements from the index in the 'A' column. diff --git a/doc/source/whatsnew/v0.21.0.rst b/doc/source/whatsnew/v0.21.0.rst index 1e479dd180b89..47cd17efe3f75 100644 --- a/doc/source/whatsnew/v0.21.0.rst +++ b/doc/source/whatsnew/v0.21.0.rst @@ -481,7 +481,7 @@ The idiomatic way to achieve selecting potentially not-found elements is via ``. .. ipython:: python - s.reindex([1, 2, 3]) + s.reindex([1, 2, 3]) Selection with all keys found is unchanged. @@ -592,13 +592,13 @@ Previously Behavior: .. ipython:: python - s = pd.Series([1, 2, 3], index=['a', 'b', 'c']) - s + s = pd.Series([1, 2, 3], index=['a', 'b', 'c']) + s .. code-block:: ipython - In [39]: s.loc[pd.Index([True, False, True])] - KeyError: "None of [Index([True, False, True], dtype='object')] are in the [index]" + In [39]: s.loc[pd.Index([True, False, True])] + KeyError: "None of [Index([True, False, True], dtype='object')] are in the [index]" Current Behavior @@ -699,10 +699,10 @@ Previously, if you attempted the following expression, you would get a not very .. code-block:: ipython - In [3]: pd.eval("a = 1 + 2", target=arr, inplace=True) - ... - IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) - and integer or boolean arrays are valid indices + In [3]: pd.eval("a = 1 + 2", target=arr, inplace=True) + ... + IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) + and integer or boolean arrays are valid indices This is a very long way of saying numpy arrays don't support string-item indexing. With this change, the error message is now this: @@ -717,8 +717,8 @@ It also used to be possible to evaluate expressions inplace, even if there was n .. code-block:: ipython - In [4]: pd.eval("1 + 2", target=arr, inplace=True) - Out[4]: 3 + In [4]: pd.eval("1 + 2", target=arr, inplace=True) + Out[4]: 3 However, this input does not make much sense because the output is not being assigned to the target. Now, a ``ValueError`` will be raised when such an input is passed in: