diff --git a/doc/source/dsintro.rst b/doc/source/dsintro.rst index 7870bdbeb97d3..c1d034d0d8e58 100644 --- a/doc/source/dsintro.rst +++ b/doc/source/dsintro.rst @@ -482,19 +482,23 @@ column-wise: .. ipython:: python index = date_range('1/1/2000', periods=8) - df = DataFrame(randn(8, 3), index=index, - columns=['A', 'B', 'C']) + df = DataFrame(randn(8, 3), index=index, columns=list('ABC')) df type(df['A']) df - df['A'] -Technical purity aside, this case is so common in practice that supporting the -special case is preferable to the alternative of forcing the user to transpose -and do column-based alignment like so: +.. warning:: -.. ipython:: python + .. code-block:: python + + df - df['A'] + + is now deprecated and will be removed in a future release. The preferred way + to replicate this behavior is + + .. code-block:: python - (df.T - df['A']).T + df.sub(df['A'], axis=0) For explicit control over the matching and broadcasting behavior, see the section on :ref:`flexible binary operations `. diff --git a/doc/source/timeseries.rst b/doc/source/timeseries.rst index 3f6a4b7c59067..7f572c8c8e191 100644 --- a/doc/source/timeseries.rst +++ b/doc/source/timeseries.rst @@ -930,89 +930,103 @@ They can be both positive and negative. .. ipython:: python - from datetime import datetime, timedelta - s = Series(date_range('2012-1-1', periods=3, freq='D')) - td = Series([ timedelta(days=i) for i in range(3) ]) - df = DataFrame(dict(A = s, B = td)) - df - df['C'] = df['A'] + df['B'] - df - df.dtypes - - s - s.max() - s - datetime(2011,1,1,3,5) - s + timedelta(minutes=5) + from datetime import datetime, timedelta + s = Series(date_range('2012-1-1', periods=3, freq='D')) + td = Series([ timedelta(days=i) for i in range(3) ]) + df = DataFrame(dict(A = s, B = td)) + df + df['C'] = df['A'] + df['B'] + df + df.dtypes + + s - s.max() + s - datetime(2011,1,1,3,5) + s + timedelta(minutes=5) Getting scalar results from a ``timedelta64[ns]`` series +.. ipython:: python + :suppress: + + from distutils.version import LooseVersion + .. ipython:: python y = s - s[0] y - y.apply(lambda x: x.item().total_seconds()) - y.apply(lambda x: x.item().days) - -.. note:: - These operations are different in numpy 1.6.2 and in numpy >= 1.7. The ``timedelta64[ns]`` scalar - type in 1.6.2 is much like a ``datetime.timedelta``, while in 1.7 it is a nanosecond based integer. - A future version of pandas will make this transparent. + if LooseVersion(np.__version__) <= '1.6.2': + y.apply(lambda x: x.item().total_seconds()) + y.apply(lambda x: x.item().days) + else: + y.apply(lambda x: x / np.timedelta64(1, 's')) + y.apply(lambda x: x / np.timedelta64(1, 'D')) + +.. note:: - These are the equivalent operation to above in numpy >= 1.7 + As you can see from the conditional statement above, these operations are + different in numpy 1.6.2 and in numpy >= 1.7. The ``timedelta64[ns]`` scalar + type in 1.6.2 is much like a ``datetime.timedelta``, while in 1.7 it is a + nanosecond based integer. A future version of pandas will make this + transparent. - ``y.apply(lambda x: x.item()/np.timedelta64(1,'s'))`` +.. note:: - ``y.apply(lambda x: x.item()/np.timedelta64(1,'D'))`` + In numpy >= 1.7 dividing a ``timedelta64`` array by another ``timedelta64`` + array will yield an array with dtype ``np.float64``. Series of timedeltas with ``NaT`` values are supported .. ipython:: python - y = s - s.shift() - y + y = s - s.shift() + y + The can be set to ``NaT`` using ``np.nan`` analagously to datetimes .. ipython:: python - y[1] = np.nan - y + y[1] = np.nan + y Operands can also appear in a reversed order (a singluar object operated with a Series) .. ipython:: python - s.max() - s - datetime(2011,1,1,3,5) - s - timedelta(minutes=5) + s + s.max() - s + datetime(2011,1,1,3,5) - s + timedelta(minutes=5) + s Some timedelta numeric like operations are supported. .. ipython:: python - td - timedelta(minutes=5,seconds=5,microseconds=5) + td - timedelta(minutes=5, seconds=5, microseconds=5) ``min, max`` and the corresponding ``idxmin, idxmax`` operations are support on frames .. ipython:: python - df = DataFrame(dict(A = s - Timestamp('20120101')-timedelta(minutes=5,seconds=5), - B = s - Series(date_range('2012-1-2', periods=3, freq='D')))) - df + A = s - Timestamp('20120101') - timedelta(minutes=5, seconds=5) + B = s - Series(date_range('2012-1-2', periods=3, freq='D')) + df = DataFrame(dict(A=A, B=B)) + df - df.min() - df.min(axis=1) + df.min() + df.min(axis=1) - df.idxmin() - df.idxmax() + df.idxmin() + df.idxmax() -``min, max`` operations are support on series, these return a single element ``timedelta64[ns]`` Series (this avoids -having to deal with numpy timedelta64 issues). ``idxmin, idxmax`` are supported as well. +``min, max`` operations are support on series, these return a single element +``timedelta64[ns]`` Series (this avoids having to deal with numpy timedelta64 +issues). ``idxmin, idxmax`` are supported as well. .. ipython:: python - df.min().max() - df.min(axis=1).min() + df.min().max() + df.min(axis=1).min() - df.min().idxmax() - df.min(axis=1).idxmin() + df.min().idxmax() + df.min(axis=1).idxmin() diff --git a/doc/source/v0.10.1.txt b/doc/source/v0.10.1.txt index 3c22e9552c3a2..dafa4300af0e3 100644 --- a/doc/source/v0.10.1.txt +++ b/doc/source/v0.10.1.txt @@ -69,7 +69,7 @@ Retrieving unique values in an indexable or data column. import warnings with warnings.catch_warnings(): - warnings.simplefilter('ignore', category=DeprecationWarning) + warnings.simplefilter('ignore', category=UserWarning) store.unique('df','index') store.unique('df','string') diff --git a/doc/source/visualization.rst b/doc/source/visualization.rst index 63b5920bb0146..f0790396a5c39 100644 --- a/doc/source/visualization.rst +++ b/doc/source/visualization.rst @@ -5,14 +5,14 @@ :suppress: import numpy as np + from numpy.random import randn, rand, randint np.random.seed(123456) - from pandas import * + from pandas import DataFrame, Series, date_range, options import pandas.util.testing as tm - randn = np.random.randn np.set_printoptions(precision=4, suppress=True) import matplotlib.pyplot as plt plt.close('all') - options.display.mpl_style='default' + options.display.mpl_style = 'default' ************************ Plotting with matplotlib @@ -60,8 +60,7 @@ On DataFrame, ``plot`` is a convenience to plot all of the columns with labels: .. ipython:: python - df = DataFrame(randn(1000, 4), index=ts.index, - columns=['A', 'B', 'C', 'D']) + df = DataFrame(randn(1000, 4), index=ts.index, columns=list('ABCD')) df = df.cumsum() @savefig frame_plot_basic.png width=6in @@ -101,7 +100,7 @@ You can plot one column versus another using the `x` and `y` keywords in plt.figure() - df3 = DataFrame(np.random.randn(1000, 2), columns=['B', 'C']).cumsum() + df3 = DataFrame(randn(1000, 2), columns=['B', 'C']).cumsum() df3['A'] = Series(range(len(df))) @savefig df_plot_xy.png width=6in @@ -169,7 +168,7 @@ Here is the default behavior, notice how the x-axis tick labelling is performed: df.A.plot() -Using the ``x_compat`` parameter, you can suppress this bevahior: +Using the ``x_compat`` parameter, you can suppress this behavior: .. ipython:: python @@ -200,6 +199,15 @@ Targeting different subplots You can pass an ``ax`` argument to ``Series.plot`` to plot on a particular axis: +.. ipython:: python + :suppress: + + ts = Series(randn(1000), index=date_range('1/1/2000', periods=1000)) + ts = ts.cumsum() + + df = DataFrame(randn(1000, 4), index=ts.index, columns=list('ABCD')) + df = df.cumsum() + .. ipython:: python fig, axes = plt.subplots(nrows=2, ncols=2) @@ -210,6 +218,7 @@ You can pass an ``ax`` argument to ``Series.plot`` to plot on a particular axis: @savefig series_plot_multi.png width=6in df['D'].plot(ax=axes[1,1]); axes[1,1].set_title('D') + .. _visualization.other: Other plotting features @@ -239,7 +248,7 @@ bar plot: .. ipython:: python - df2 = DataFrame(np.random.rand(10, 4), columns=['a', 'b', 'c', 'd']) + df2 = DataFrame(rand(10, 4), columns=['a', 'b', 'c', 'd']) @savefig bar_plot_multi_ex.png width=5in df2.plot(kind='bar'); @@ -298,10 +307,10 @@ New since 0.10.0, the ``by`` keyword can be specified to plot grouped histograms .. ipython:: python - data = Series(np.random.randn(1000)) + data = Series(randn(1000)) @savefig grouped_hist.png width=6in - data.hist(by=np.random.randint(0, 4, 1000)) + data.hist(by=randint(0, 4, 1000)) .. _visualization.box: @@ -317,7 +326,7 @@ a uniform random variable on [0,1). .. ipython:: python - df = DataFrame(np.random.rand(10,5)) + df = DataFrame(rand(10,5)) plt.figure(); @savefig box_plot_ex.png width=6in @@ -328,7 +337,7 @@ groupings. For instance, .. ipython:: python - df = DataFrame(np.random.rand(10,2), columns=['Col1', 'Col2'] ) + df = DataFrame(rand(10,2), columns=['Col1', 'Col2'] ) df['X'] = Series(['A','A','A','A','A','B','B','B','B','B']) plt.figure(); @@ -341,7 +350,7 @@ columns: .. ipython:: python - df = DataFrame(np.random.rand(10,3), columns=['Col1', 'Col2', 'Col3']) + df = DataFrame(rand(10,3), columns=['Col1', 'Col2', 'Col3']) df['X'] = Series(['A','A','A','A','A','B','B','B','B','B']) df['Y'] = Series(['A','B','A','B','A','B','A','B','A','B']) @@ -361,7 +370,7 @@ Scatter plot matrix .. ipython:: python from pandas.tools.plotting import scatter_matrix - df = DataFrame(np.random.randn(1000, 4), columns=['a', 'b', 'c', 'd']) + df = DataFrame(randn(1000, 4), columns=['a', 'b', 'c', 'd']) @savefig scatter_matrix_kde.png width=6in scatter_matrix(df, alpha=0.2, figsize=(6, 6), diagonal='kde') @@ -378,7 +387,7 @@ setting `kind='kde'`: .. ipython:: python - ser = Series(np.random.randn(1000)) + ser = Series(randn(1000)) @savefig kde_plot.png width=6in ser.plot(kind='kde') @@ -444,7 +453,7 @@ implies that the underlying data are not random. plt.figure() - data = Series(0.1 * np.random.random(1000) + + data = Series(0.1 * rand(1000) + 0.9 * np.sin(np.linspace(-99 * np.pi, 99 * np.pi, num=1000))) @savefig lag_plot.png width=6in @@ -467,7 +476,7 @@ confidence band. plt.figure() - data = Series(0.7 * np.random.random(1000) + + data = Series(0.7 * rand(1000) + 0.3 * np.sin(np.linspace(-9 * np.pi, 9 * np.pi, num=1000))) @savefig autocorrelation_plot.png width=6in @@ -488,7 +497,7 @@ are what constitutes the bootstrap plot. from pandas.tools.plotting import bootstrap_plot - data = Series(np.random.random(1000)) + data = Series(rand(1000)) @savefig bootstrap_plot.png width=6in bootstrap_plot(data, size=50, samples=500, color='grey')