diff --git a/doc/source/whatsnew/v0.16.0.rst b/doc/source/whatsnew/v0.16.0.rst index 8d2d589c44e1b..7ae17febe168d 100644 --- a/doc/source/whatsnew/v0.16.0.rst +++ b/doc/source/whatsnew/v0.16.0.rst @@ -5,11 +5,6 @@ v0.16.0 (March 22, 2015) {{ header }} -.. ipython:: python - :suppress: - - from pandas import * # noqa F401, F403 - This is a major release from 0.15.2 and includes a small number of API changes, several new features, enhancements, and performance improvements along with a large number of bug fixes. We recommend that all @@ -58,7 +53,7 @@ and the entire DataFrame (with all original and new columns) is returned. .. ipython :: python - iris = read_csv('data/iris.data') + iris = pd.read_csv('data/iris.data') iris.head() iris.assign(sepal_ratio=iris['SepalWidth'] / iris['SepalLength']).head() @@ -77,9 +72,10 @@ calculate the ratio, and plot .. ipython:: python + iris = pd.read_csv('data/iris.data') (iris.query('SepalLength > 5') - .assign(SepalRatio = lambda x: x.SepalWidth / x.SepalLength, - PetalRatio = lambda x: x.PetalWidth / x.PetalLength) + .assign(SepalRatio=lambda x: x.SepalWidth / x.SepalLength, + PetalRatio=lambda x: x.PetalWidth / x.PetalLength) .plot(kind='scatter', x='SepalRatio', y='PetalRatio')) .. image:: ../_static/whatsnew_assign.png @@ -97,15 +93,14 @@ Added :meth:`SparseSeries.to_coo` and :meth:`SparseSeries.from_coo` methods (:is .. ipython:: python - from numpy import nan - s = Series([3.0, nan, 1.0, 3.0, nan, nan]) - s.index = MultiIndex.from_tuples([(1, 2, 'a', 0), - (1, 2, 'a', 1), - (1, 1, 'b', 0), - (1, 1, 'b', 1), - (2, 1, 'b', 0), - (2, 1, 'b', 1)], - names=['A', 'B', 'C', 'D']) + s = pd.Series([3.0, np.nan, 1.0, 3.0, np.nan, np.nan]) + s.index = pd.MultiIndex.from_tuples([(1, 2, 'a', 0), + (1, 2, 'a', 1), + (1, 1, 'b', 0), + (1, 1, 'b', 1), + (2, 1, 'b', 0), + (2, 1, 'b', 1)], + names=['A', 'B', 'C', 'D']) s @@ -129,11 +124,11 @@ from a ``scipy.sparse.coo_matrix``: from scipy import sparse A = sparse.coo_matrix(([3.0, 1.0, 2.0], ([1, 0, 0], [0, 2, 3])), - shape=(3, 4)) + shape=(3, 4)) A A.todense() - ss = SparseSeries.from_coo(A) + ss = pd.SparseSeries.from_coo(A) ss .. _whatsnew_0160.enhancements.string: @@ -153,7 +148,7 @@ String Methods Enhancements .. ipython:: python - s = Series(['abcd', '3456', 'EFGH']) + s = pd.Series(['abcd', '3456', 'EFGH']) s.str.isalpha() s.str.find('ab') @@ -161,14 +156,14 @@ String Methods Enhancements .. ipython:: python - s = Series(['12', '300', '25']) + s = pd.Series(['12', '300', '25']) s.str.pad(5, fillchar='_') - Added :meth:`Series.str.slice_replace`, which previously raised ``NotImplementedError`` (:issue:`8888`) .. ipython:: python - s = Series(['ABCD', 'EFGH', 'IJK']) + s = pd.Series(['ABCD', 'EFGH', 'IJK']) s.str.slice_replace(1, 3, 'X') # replaced with empty char s.str.slice_replace(0, 1) @@ -192,7 +187,7 @@ Other enhancements .. code-block:: python # Returns the 1st and 4th sheet, as a dictionary of DataFrames. - pd.read_excel('path_to_file.xls',sheetname=['Sheet1',3]) + pd.read_excel('path_to_file.xls', sheetname=['Sheet1', 3]) - Allow Stata files to be read incrementally with an iterator; support for long strings in Stata files. See the docs :ref:`here` (:issue:`9493`:). @@ -273,11 +268,11 @@ The behavior of a small sub-set of edge cases for using ``.loc`` have changed (: .. ipython:: python - df = DataFrame(np.random.randn(5,4), - columns=list('ABCD'), - index=date_range('20130101',periods=5)) + df = pd.DataFrame(np.random.randn(5, 4), + columns=list('ABCD'), + index=pd.date_range('20130101', periods=5)) df - s = Series(range(5),[-2,-1,1,2,3]) + s = pd.Series(range(5), [-2, -1, 1, 2, 3]) s Previous Behavior @@ -347,7 +342,7 @@ Previous Behavior .. code-block:: ipython - In [3]: s = Series([0,1,2], dtype='category') + In [3]: s = pd.Series([0, 1, 2], dtype='category') In [4]: s Out[4]: @@ -374,7 +369,7 @@ New Behavior .. ipython:: python - s = Series([0,1,2], dtype='category') + s = pd.Series([0, 1, 2], dtype='category') s s.cat.ordered s = s.cat.as_ordered() @@ -382,7 +377,7 @@ New Behavior s.cat.ordered # you can set in the constructor of the Categorical - s = Series(Categorical([0,1,2],ordered=True)) + s = pd.Series(pd.Categorical([0, 1, 2], ordered=True)) s s.cat.ordered @@ -390,7 +385,7 @@ For ease of creation of series of categorical data, we have added the ability to .. code-block:: python - In [54]: s = Series(["a","b","c","a"]).astype('category',ordered=True) + In [54]: s = pd.Series(["a", "b", "c", "a"]).astype('category', ordered=True) In [55]: s Out[55]: @@ -401,7 +396,8 @@ For ease of creation of series of categorical data, we have added the ability to dtype: category Categories (3, object): [a < b < c] - In [56]: s = Series(["a","b","c","a"]).astype('category',categories=list('abcdef'),ordered=False) + In [56]: s = (pd.Series(["a", "b", "c", "a"]) + ....: .astype('category', categories=list('abcdef'), ordered=False)) In [57]: s Out[57]: @@ -449,7 +445,7 @@ Other API Changes .. code-block:: ipython - In [2]: pd.Series([0,1,2,3], list('abcd')) | pd.Series([4,4,4,4], list('abcd')) + In [2]: pd.Series([0, 1, 2, 3], list('abcd')) | pd.Series([4, 4, 4, 4], list('abcd')) Out[2]: a True b True @@ -462,7 +458,7 @@ Other API Changes .. code-block:: ipython - In [2]: pd.Series([0,1,2,3], list('abcd')) | pd.Series([4,4,4,4], list('abcd')) + In [2]: pd.Series([0, 1, 2, 3], list('abcd')) | pd.Series([4, 4, 4, 4], list('abcd')) Out[2]: a 4 b 5 @@ -680,7 +676,8 @@ Bug Fixes .. ipython:: python - df1 = DataFrame({'x': Series(['a','b','c']), 'y': Series(['d','e','f'])}) + df1 = pd.DataFrame({'x': pd.Series(['a', 'b', 'c']), + 'y': pd.Series(['d', 'e', 'f'])}) df2 = df1[['x']] df2['y'] = ['g', 'h', 'i'] diff --git a/doc/source/whatsnew/v0.16.1.rst b/doc/source/whatsnew/v0.16.1.rst index 5d98d3715a933..cfd7218e11157 100644 --- a/doc/source/whatsnew/v0.16.1.rst +++ b/doc/source/whatsnew/v0.16.1.rst @@ -5,11 +5,6 @@ v0.16.1 (May 11, 2015) {{ header }} -.. ipython:: python - :suppress: - - from pandas import * # noqa F401, F403 - This is a minor bug-fix release from 0.16.0 and includes a a large number of bug fixes along several new features, enhancements, and performance improvements. @@ -51,10 +46,10 @@ setting the index of a ``DataFrame/Series`` with a ``category`` dtype would conv .. code-block:: ipython - In [1]: df = DataFrame({'A' : np.arange(6), - ...: 'B' : Series(list('aabbca')).astype('category', - ...: categories=list('cab')) - ...: }) + In [1]: df = pd.DataFrame({'A': np.arange(6), + ...: 'B': pd.Series(list('aabbca')) + ...: .astype('category', categories=list('cab')) + ...: }) ...: In [2]: df @@ -146,7 +141,7 @@ values NOT in the categories, similarly to how you can reindex ANY pandas index. .. code-block:: ipython - In [12]: df2.reindex(['a','e']) + In [12]: df2.reindex(['a', 'e']) Out[12]: A B @@ -155,10 +150,10 @@ values NOT in the categories, similarly to how you can reindex ANY pandas index. a 5.0 e NaN - In [13]: df2.reindex(['a','e']).index - Out[13]: Index(['a', 'a', 'a', 'e'], dtype='object', name='B') + In [13]: df2.reindex(['a', 'e']).index + Out[13]: pd.Index(['a', 'a', 'a', 'e'], dtype='object', name='B') - In [14]: df2.reindex(pd.Categorical(['a','e'],categories=list('abcde'))) + In [14]: df2.reindex(pd.Categorical(['a', 'e'], categories=list('abcde'))) Out[14]: A B @@ -167,8 +162,11 @@ values NOT in the categories, similarly to how you can reindex ANY pandas index. a 5.0 e NaN - In [15]: df2.reindex(pd.Categorical(['a','e'],categories=list('abcde'))).index - Out[15]: CategoricalIndex(['a', 'a', 'a', 'e'], categories=['a', 'b', 'c', 'd', 'e'], ordered=False, name='B', dtype='category') + In [15]: df2.reindex(pd.Categorical(['a', 'e'], categories=list('abcde'))).index + Out[15]: pd.CategoricalIndex(['a', 'a', 'a', 'e'], + categories=['a', 'b', 'c', 'd', 'e'], + ordered=False, name='B', + dtype='category') See the :ref:`documentation ` for more. (:issue:`7629`, :issue:`10038`, :issue:`10039`) @@ -230,7 +228,7 @@ enhancements make string operations easier and more consistent with standard pyt .. ipython:: python - idx = Index([' jack', 'jill ', ' jesse ', 'frank']) + idx = pd.Index([' jack', 'jill ', ' jesse ', 'frank']) idx.str.strip() One special case for the `.str` accessor on ``Index`` is that if a string method returns ``bool``, the ``.str`` accessor @@ -239,8 +237,8 @@ enhancements make string operations easier and more consistent with standard pyt .. ipython:: python - idx = Index(['a1', 'a2', 'b1', 'b2']) - s = Series(range(4), index=idx) + idx = pd.Index(['a1', 'a2', 'b1', 'b2']) + s = pd.Series(range(4), index=idx) s idx.str.startswith('a') s[s.index.str.startswith('a')] @@ -258,7 +256,7 @@ enhancements make string operations easier and more consistent with standard pyt .. ipython:: python - s = Series(['a,b', 'a,c', 'b,c']) + s = pd.Series(['a,b', 'a,c', 'b,c']) # return Series s.str.split(',') @@ -266,7 +264,7 @@ enhancements make string operations easier and more consistent with standard pyt # return DataFrame s.str.split(',', expand=True) - idx = Index(['a,b', 'a,c', 'b,c']) + idx = pd.Index(['a,b', 'a,c', 'b,c']) # return Index idx.str.split(',') @@ -287,10 +285,9 @@ Other Enhancements .. ipython:: python - from pandas.tseries.offsets import BusinessHour - Timestamp('2014-08-01 09:00') + BusinessHour() - Timestamp('2014-08-01 07:00') + BusinessHour() - Timestamp('2014-08-01 16:30') + BusinessHour() + pd.Timestamp('2014-08-01 09:00') + pd.tseries.offsets.BusinessHour() + pd.Timestamp('2014-08-01 07:00') + pd.tseries.offsets.BusinessHour() + pd.Timestamp('2014-08-01 16:30') + pd.tseries.offsets.BusinessHour() - ``DataFrame.diff`` now takes an ``axis`` parameter that determines the direction of differencing (:issue:`9727`) @@ -302,7 +299,7 @@ Other Enhancements .. ipython:: python - df = DataFrame(np.random.randn(3, 3), columns=['A', 'B', 'C']) + df = pd.DataFrame(np.random.randn(3, 3), columns=['A', 'B', 'C']) df.drop(['A', 'X'], axis=1, errors='ignore') - Add support for separating years and quarters using dashes, for @@ -362,19 +359,19 @@ Previous Behavior .. code-block:: ipython - In [2]: pd.Index(range(4),name='foo') + In [2]: pd.Index(range(4), name='foo') Out[2]: Int64Index([0, 1, 2, 3], dtype='int64') - In [3]: pd.Index(range(104),name='foo') + In [3]: pd.Index(range(104), name='foo') Out[3]: Int64Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, ...], dtype='int64') - In [4]: pd.date_range('20130101',periods=4,name='foo',tz='US/Eastern') + In [4]: pd.date_range('20130101', periods=4, name='foo', tz='US/Eastern') Out[4]: [2013-01-01 00:00:00-05:00, ..., 2013-01-04 00:00:00-05:00] Length: 4, Freq: D, Timezone: US/Eastern - In [5]: pd.date_range('20130101',periods=104,name='foo',tz='US/Eastern') + In [5]: pd.date_range('20130101', periods=104, name='foo', tz='US/Eastern') Out[5]: [2013-01-01 00:00:00-05:00, ..., 2013-04-14 00:00:00-04:00] @@ -388,12 +385,15 @@ New Behavior pd.Index(range(4), name='foo') pd.Index(range(30), name='foo') pd.Index(range(104), name='foo') - pd.CategoricalIndex(['a','bb','ccc','dddd'], ordered=True, name='foobar') - pd.CategoricalIndex(['a','bb','ccc','dddd']*10, ordered=True, name='foobar') - pd.CategoricalIndex(['a','bb','ccc','dddd']*100, ordered=True, name='foobar') - pd.date_range('20130101',periods=4, name='foo', tz='US/Eastern') - pd.date_range('20130101',periods=25, freq='D') - pd.date_range('20130101',periods=104, name='foo', tz='US/Eastern') + pd.CategoricalIndex(['a', 'bb', 'ccc', 'dddd'], + ordered=True, name='foobar') + pd.CategoricalIndex(['a', 'bb', 'ccc', 'dddd'] * 10, + ordered=True, name='foobar') + pd.CategoricalIndex(['a', 'bb', 'ccc', 'dddd'] * 100, + ordered=True, name='foobar') + pd.date_range('20130101', periods=4, name='foo', tz='US/Eastern') + pd.date_range('20130101', periods=25, freq='D') + pd.date_range('20130101', periods=104, name='foo', tz='US/Eastern') .. _whatsnew_0161.performance: diff --git a/doc/source/whatsnew/v0.16.2.rst b/doc/source/whatsnew/v0.16.2.rst index 932f70d3e0e19..ca0ad8d3ae7f9 100644 --- a/doc/source/whatsnew/v0.16.2.rst +++ b/doc/source/whatsnew/v0.16.2.rst @@ -5,11 +5,6 @@ v0.16.2 (June 12, 2015) {{ header }} -.. ipython:: python - :suppress: - - from pandas import * # noqa F401, F403 - This is a minor bug-fix release from 0.16.1 and includes a a large number of bug fixes along some new features (:meth:`~DataFrame.pipe` method), enhancements, and performance improvements. @@ -46,16 +41,16 @@ The goal is to avoid confusing nested function calls like # df is a DataFrame # f, g, and h are functions that take and return DataFrames - f(g(h(df), arg1=1), arg2=2, arg3=3) + f(g(h(df), arg1=1), arg2=2, arg3=3) # noqa F821 The logic flows from inside out, and function names are separated from their keyword arguments. This can be rewritten as .. code-block:: python - (df.pipe(h) - .pipe(g, arg1=1) - .pipe(f, arg2=2, arg3=3) + (df.pipe(h) # noqa F821 + .pipe(g, arg1=1) # noqa F821 + .pipe(f, arg2=2, arg3=3) # noqa F821 ) Now both the code and the logic flow from top to bottom. Keyword arguments are next to @@ -73,11 +68,11 @@ of ``(function, keyword)`` indicating where the DataFrame should flow. For examp # sm.ols takes (formula, data) (bb.query('h > 0') - .assign(ln_h = lambda df: np.log(df.h)) + .assign(ln_h=lambda df: np.log(df.h)) .pipe((sm.ols, 'data'), 'hr ~ ln_h + year + g + C(lg)') .fit() .summary() - ) + ) The pipe method is inspired by unix pipes, which stream text through processes. More recently dplyr_ and magrittr_ have introduced the diff --git a/setup.cfg b/setup.cfg index e68c14177c39a..7096d52ccd586 100644 --- a/setup.cfg +++ b/setup.cfg @@ -50,9 +50,6 @@ exclude = doc/source/whatsnew/v0.15.0.rst doc/source/whatsnew/v0.15.1.rst doc/source/whatsnew/v0.15.2.rst - doc/source/whatsnew/v0.16.0.rst - doc/source/whatsnew/v0.16.1.rst - doc/source/whatsnew/v0.16.2.rst doc/source/whatsnew/v0.17.0.rst doc/source/whatsnew/v0.17.1.rst doc/source/whatsnew/v0.18.0.rst