diff --git a/doc/source/categorical.rst b/doc/source/categorical.rst index 090998570a358..86d0c61398be1 100644 --- a/doc/source/categorical.rst +++ b/doc/source/categorical.rst @@ -129,8 +129,7 @@ To get back to the original Series or `numpy` array, use ``Series.astype(origina s s2 = s.astype('category') s2 - s3 = s2.astype('string') - s3 + s2.astype(str) np.asarray(s2) If you have already `codes` and `categories`, you can use the :func:`~pandas.Categorical.from_codes` diff --git a/doc/source/cookbook.rst b/doc/source/cookbook.rst index a9d0ab5476b66..d2df72b284a12 100644 --- a/doc/source/cookbook.rst +++ b/doc/source/cookbook.rst @@ -7,6 +7,7 @@ import pandas as pd import numpy as np + from pandas.compat import StringIO import random import os @@ -985,9 +986,6 @@ Skip row between header and data .. ipython:: python - from io import StringIO - import pandas as pd - data = """;;;; ;;;; ;;;; @@ -1014,7 +1012,7 @@ Option 1: pass rows explicitly to skiprows .. ipython:: python - pd.read_csv(StringIO(data.decode('UTF-8')), sep=';', skiprows=[11,12], + pd.read_csv(StringIO(data), sep=';', skiprows=[11,12], index_col=0, parse_dates=True, header=10) Option 2: read column names and then data @@ -1022,15 +1020,12 @@ Option 2: read column names and then data .. ipython:: python - pd.read_csv(StringIO(data.decode('UTF-8')), sep=';', - header=10, parse_dates=True, nrows=10).columns - columns = pd.read_csv(StringIO(data.decode('UTF-8')), sep=';', - header=10, parse_dates=True, nrows=10).columns - pd.read_csv(StringIO(data.decode('UTF-8')), sep=';', + pd.read_csv(StringIO(data), sep=';', header=10, nrows=10).columns + columns = pd.read_csv(StringIO(data), sep=';', header=10, nrows=10).columns + pd.read_csv(StringIO(data), sep=';', index_col=0, header=12, parse_dates=True, names=columns) - .. _cookbook.sql: SQL diff --git a/doc/source/gotchas.rst b/doc/source/gotchas.rst index 8a1e06fa6d86c..a1c12044adc34 100644 --- a/doc/source/gotchas.rst +++ b/doc/source/gotchas.rst @@ -249,7 +249,7 @@ normal Python ``list``. Monotonicity of an index can be tested with the ``is_mon .. ipython:: python - df = pd.DataFrame(index=[2,3,3,4,5], columns=['data'], data=range(5)) + df = pd.DataFrame(index=[2,3,3,4,5], columns=['data'], data=list(range(5))) df.index.is_monotonic_increasing # no rows 0 or 1, but still returns rows 2, 3 (both of them), and 4: @@ -263,7 +263,7 @@ On the other hand, if the index is not monotonic, then both slice bounds must be .. ipython:: python - df = pd.DataFrame(index=[2,3,1,4,3,5], columns=['data'], data=range(6)) + df = pd.DataFrame(index=[2,3,1,4,3,5], columns=['data'], data=list(range(6))) df.index.is_monotonic_increasing # OK because 2 and 4 are in the index diff --git a/doc/source/io.rst b/doc/source/io.rst index 9dfe241062952..8ddf4186eba25 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -502,7 +502,7 @@ worth trying. .. ipython:: python :okwarning: - df = pd.DataFrame({'col_1':range(500000) + ['a', 'b'] + range(500000)}) + df = pd.DataFrame({'col_1': list(range(500000)) + ['a', 'b'] + list(range(500000))}) df.to_csv('foo') mixed_df = pd.read_csv('foo') mixed_df['col_1'].apply(type).value_counts() diff --git a/doc/source/reshaping.rst b/doc/source/reshaping.rst index 3a2c48834991f..f90c2960fa30c 100644 --- a/doc/source/reshaping.rst +++ b/doc/source/reshaping.rst @@ -650,10 +650,16 @@ handling of NaN: because of an ordering bug. See also `Here `__ -.. ipython:: python +.. code-block:: ipython + + In [2]: pd.factorize(x, sort=True) + Out[2]: + (array([ 2, 2, -1, 3, 0, 1]), + Index([3.14, inf, u'A', u'B'], dtype='object')) + + In [3]: np.unique(x, return_inverse=True)[::-1] + Out[3]: (array([3, 3, 0, 4, 1, 2]), array([nan, 3.14, inf, 'A', 'B'], dtype=object)) - pd.factorize(x, sort=True) - np.unique(x, return_inverse=True)[::-1] .. note:: If you just want to handle one column as a categorical variable (like R's factor),