pandas-dev
diff --git a/‎README.md
+4-1 b/‎README.md
+4-1
diff --git a/‎asv_bench/benchmarks/groupby.py
+19-7 b/‎asv_bench/benchmarks/groupby.py
+19-7
diff --git a/‎ci/lint.sh
+9 b/‎ci/lint.sh
+9
diff --git a/‎ci/requirements-2.7.sh
+1-1 b/‎ci/requirements-2.7.sh
+1-1
diff --git a/‎ci/requirements-3.6_DOC.run
+1-1 b/‎ci/requirements-3.6_DOC.run
+1-1
diff --git a/‎ci/requirements_dev.txt
+1-1 b/‎ci/requirements_dev.txt
+1-1
diff --git a/‎doc/source/api.rst
+12-8 b/‎doc/source/api.rst
+12-8
diff --git a/‎doc/source/basics.rst
+3-3 b/‎doc/source/basics.rst
+3-3
diff --git a/‎doc/source/categorical.rst
+84-14 b/‎doc/source/categorical.rst
+84-14
diff --git a/‎doc/source/dsintro.rst
+14-23 b/‎doc/source/dsintro.rst
+14-23
@@ -216,13 +216,16 @@ Further, general questions and discussions can also take place on the [pydata ma
 ## Discussion and Development
 Most development discussion is taking place on github in this repo. Further, the [pandas-dev mailing list](https://mail.python.org/mailman/listinfo/pandas-dev) can also be used for specialized discussions or design issues, and a [Gitter channel](https://gitter.im/pydata/pandas) is available for quick development related questions.
 
-## Contributing to pandas
+## Contributing to pandas [![Open Source Helpers](https://www.codetriage.com/pandas-dev/pandas/badges/users.svg)](https://www.codetriage.com/pandas-dev/pandas)
+
 All contributions, bug reports, bug fixes, documentation improvements, enhancements and ideas are welcome.
 
 A detailed overview on how to contribute can be found in the **[contributing guide.](https://pandas.pydata.org/pandas-docs/stable/contributing.html)**
 
 If you are simply looking to start working with the pandas codebase, navigate to the [GitHub “issues” tab](https://github.com/pandas-dev/pandas/issues) and start looking through interesting issues. There are a number of issues listed under [Docs](https://github.com/pandas-dev/pandas/issues?labels=Docs&sort=updated&state=open) and [Difficulty Novice](https://github.com/pandas-dev/pandas/issues?q=is%3Aopen+is%3Aissue+label%3A%22Difficulty+Novice%22) where you could start out.
 
+You can also triage issues which may include reproducing bug reports, or asking for vital information such as version numbers or reproduction instructions. If you would like to start triaging issues, one easy way to get started is to [subscribe to pandas on CodeTriage](https://www.codetriage.com/pandas-dev/pandas).
+
 Or maybe through using pandas you have an idea of your own or are looking for something in the documentation and thinking ‘this can be improved’...you can do something about it!
 
 Feel free to ask questions on the [mailing list](https://groups.google.com/forum/?fromgroups#!forum/pydata) or on [Gitter](https://gitter.im/pydata/pandas).
@@ -11,6 +11,13 @@
 from .pandas_vb_common import setup  # noqa
 
 
+method_blacklist = {
+    'object': {'median', 'prod', 'sem', 'cumsum', 'sum', 'cummin', 'mean',
+               'max', 'skew', 'cumprod', 'cummax', 'rank', 'pct_change', 'min',
+               'var', 'mad', 'describe', 'std'}
+}
+
+
 class ApplyDictReturn(object):
     goal_time = 0.2
 
@@ -153,6 +160,7 @@ def time_frame_nth_any(self, df):
     def time_frame_nth(self, df):
         df.groupby(0).nth(0)
 
+
     def time_series_nth_any(self, df):
         df[1].groupby(df[0]).nth(0, dropna='any')
 
@@ -369,23 +377,27 @@ class GroupByMethods(object):
     goal_time = 0.2
 
     param_names = ['dtype', 'method']
-    params = [['int', 'float'],
-              ['all', 'any', 'count', 'cumcount', 'cummax', 'cummin',
-               'cumprod', 'cumsum', 'describe', 'first', 'head', 'last', 'mad',
-               'max', 'min', 'median', 'mean', 'nunique', 'pct_change', 'prod',
-               'rank', 'sem', 'shift', 'size', 'skew', 'std', 'sum', 'tail',
-               'unique', 'value_counts', 'var']]
+    params = [['int', 'float', 'object'],
+              ['all', 'any', 'bfill', 'count', 'cumcount', 'cummax', 'cummin',
+               'cumprod', 'cumsum', 'describe', 'ffill', 'first', 'head',
+               'last', 'mad', 'max', 'min', 'median', 'mean', 'nunique',
+               'pct_change', 'prod', 'rank', 'sem', 'shift', 'size', 'skew',
+               'std', 'sum', 'tail', 'unique', 'value_counts', 'var']]
 
     def setup(self, dtype, method):
+        if method in method_blacklist.get(dtype, {}):
+            raise NotImplementedError  # skip benchmark
         ngroups = 1000
         size = ngroups * 2
         rng = np.arange(ngroups)
         values = rng.take(np.random.randint(0, ngroups, size=size))
         if dtype == 'int':
             key = np.random.randint(0, size, size=size)
-        else:
+        elif dtype == 'float':
             key = np.concatenate([np.random.random(ngroups) * 0.1,
                                   np.random.random(ngroups) * 10.0])
+        elif dtype == 'object':
+            key = ['foo'] * size
 
         df = DataFrame({'values': values, 'key': key})
         self.df_groupby_method = getattr(df.groupby('key')['values'], method)
 
@@ -111,6 +111,15 @@ if [ "$LINT" ]; then
         RET=1
     fi
 
+    # Check for the following code in the extension array base tests
+    # tm.assert_frame_equal
+    # tm.assert_series_equal
+    grep -r -E --include '*.py' --exclude base.py 'tm.assert_(series|frame)_equal' pandas/tests/extension/base
+
+    if [ $? = "0" ]; then
+        RET=1
+    fi
+
     echo "Check for invalid testing DONE"
 
     # Check for imports from pandas.core.common instead
 
@@ -4,4 +4,4 @@ source activate pandas
 
 echo "install 27"
 
-conda install -n pandas -c conda-forge feather-format pyarrow=0.4.1 fastparquet
+conda install -n pandas -c conda-forge feather-format pyarrow=0.4.1 jemalloc=4.5.0.post fastparquet
@@ -1,7 +1,7 @@
 ipython
 ipykernel
 ipywidgets
-sphinx=1.5*
+sphinx
 nbconvert
 nbformat
 notebook
 
@@ -7,4 +7,4 @@ pytest>=3.1
 python-dateutil>=2.5.0
 pytz
 setuptools>=3.3
-sphinx=1.5*
+sphinx
@@ -6,19 +6,18 @@ API Reference
 *************
 
 This page gives an overview of all public pandas objects, functions and
-methods. In general, all classes and functions exposed in the top-level
-``pandas.*`` namespace are regarded as public.
+methods. All classes and functions exposed in ``pandas.*`` namespace are public.
 
-Further some of the subpackages are public, including ``pandas.errors``,
-``pandas.plotting``, and ``pandas.testing``. Certain functions in the
-``pandas.io`` and ``pandas.tseries`` submodules are public as well (those
-mentioned in the documentation). Further, the ``pandas.api.types`` subpackage
-holds some public functions related to data types in pandas.
+Some subpackages are public which include ``pandas.errors``,
+``pandas.plotting``, and ``pandas.testing``. Public functions in
+``pandas.io`` and ``pandas.tseries`` submodules are mentioned in
+the documentation. ``pandas.api.types`` subpackage holds some
+public functions related to data types in pandas.
 
 
 .. warning::
 
-    The ``pandas.core``, ``pandas.compat``, and ``pandas.util`` top-level modules are considered to be PRIVATE. Stability of functionality in those modules in not guaranteed.
+    The ``pandas.core``, ``pandas.compat``, and ``pandas.util`` top-level modules are PRIVATE. Stable functionality in such modules is not guaranteed.
 
 
 .. _api.functions:
@@ -2180,8 +2179,12 @@ Computations / Descriptive Stats
 .. autosummary::
    :toctree: generated/
 
+   GroupBy.all
+   GroupBy.any
+   GroupBy.bfill
    GroupBy.count
    GroupBy.cumcount
+   GroupBy.ffill
    GroupBy.first
    GroupBy.head
    GroupBy.last
@@ -2193,6 +2196,7 @@ Computations / Descriptive Stats
    GroupBy.nth
    GroupBy.ohlc
    GroupBy.prod
+   GroupBy.rank
    GroupBy.size
    GroupBy.sem
    GroupBy.std
 
@@ -746,7 +746,7 @@ What if the function you wish to apply takes its data as, say, the second argume
 In this case, provide ``pipe`` with a tuple of ``(callable, data_keyword)``.
 ``.pipe`` will route the ``DataFrame`` to the argument specified in the tuple.
 
-For example, we can fit a regression using statsmodels. Their API expects a formula first and a ``DataFrame`` as the second argument, ``data``. We pass in the function, keyword pair ``(sm.poisson, 'data')`` to ``pipe``:
+For example, we can fit a regression using statsmodels. Their API expects a formula first and a ``DataFrame`` as the second argument, ``data``. We pass in the function, keyword pair ``(sm.ols, 'data')`` to ``pipe``:
 
 .. ipython:: python
 
@@ -756,7 +756,7 @@ For example, we can fit a regression using statsmodels. Their API expects a form
 
    (bb.query('h > 0')
       .assign(ln_h = lambda df: np.log(df.h))
-      .pipe((sm.poisson, 'data'), 'hr ~ ln_h + year + g + C(lg)')
+      .pipe((sm.ols, 'data'), 'hr ~ ln_h + year + g + C(lg)')
       .fit()
       .summary()
    )
@@ -2312,4 +2312,4 @@ All NumPy dtypes are subclasses of ``numpy.generic``:
 .. note::
 
     Pandas also defines the types ``category``, and ``datetime64[ns, tz]``, which are not integrated into the normal
-    NumPy hierarchy and wont show up with the above function.
+    NumPy hierarchy and won't show up with the above function.
@@ -46,9 +46,14 @@ The categorical data type is useful in the following cases:
 
 See also the :ref:`API docs on categoricals<api.categorical>`.
 
+.. _categorical.objectcreation:
+
 Object Creation
 ---------------
 
+Series Creation
+~~~~~~~~~~~~~~~
+
 Categorical ``Series`` or columns in a ``DataFrame`` can be created in several ways:
 
 By specifying ``dtype="category"`` when constructing a ``Series``:
@@ -77,7 +82,7 @@ discrete bins. See the :ref:`example on tiling <reshaping.tile.cut>` in the docs
     df['group'] = pd.cut(df.value, range(0, 105, 10), right=False, labels=labels)
     df.head(10)
 
-By passing a :class:`pandas.Categorical` object to a `Series` or assigning it to a `DataFrame`.
+By passing a :class:`pandas.Categorical` object to a ``Series`` or assigning it to a ``DataFrame``.
 
 .. ipython:: python
 
@@ -89,6 +94,55 @@ By passing a :class:`pandas.Categorical` object to a `Series` or assigning it to
     df["B"] = raw_cat
     df
 
+Categorical data has a specific ``category`` :ref:`dtype <basics.dtypes>`:
+
+.. ipython:: python
+
+    df.dtypes
+
+DataFrame Creation
+~~~~~~~~~~~~~~~~~~
+
+Similar to the previous section where a single column was converted to categorical, all columns in a
+``DataFrame`` can be batch converted to categorical either during or after construction.
+
+This can be done during construction by specifying ``dtype="category"`` in the ``DataFrame`` constructor:
+
+.. ipython:: python
+
+    df = pd.DataFrame({'A': list('abca'), 'B': list('bccd')}, dtype="category")
+    df.dtypes
+
+Note that the categories present in each column differ; the conversion is done column by column, so
+only labels present in a given column are categories:
+
+.. ipython:: python
+
+    df['A']
+    df['B']
+
+
+.. versionadded:: 0.23.0
+
+Analogously, all columns in an existing ``DataFrame`` can be batch converted using :meth:`DataFrame.astype`:
+
+.. ipython:: python
+
+    df = pd.DataFrame({'A': list('abca'), 'B': list('bccd')})
+    df_cat = df.astype('category')
+    df_cat.dtypes
+
+This conversion is likewise done column by column:
+
+.. ipython:: python
+
+    df_cat['A']
+    df_cat['B']
+
+
+Controlling Behavior
+~~~~~~~~~~~~~~~~~~~~
+
 In the examples above where we passed ``dtype='category'``, we used the default 
 behavior:
 
@@ -108,21 +162,36 @@ of :class:`~pandas.api.types.CategoricalDtype`.
     s_cat = s.astype(cat_type)
     s_cat
 
-Categorical data has a specific ``category`` :ref:`dtype <basics.dtypes>`:
+Similarly, a ``CategoricalDtype`` can be used with a ``DataFrame`` to ensure that categories
+are consistent among all columns.
 
 .. ipython:: python
 
-    df.dtypes
+    df = pd.DataFrame({'A': list('abca'), 'B': list('bccd')})
+    cat_type = CategoricalDtype(categories=list('abcd'),
+                                ordered=True)
+    df_cat = df.astype(cat_type)
+    df_cat['A']
+    df_cat['B']
 
 .. note::
 
-    In contrast to R's `factor` function, categorical data is not converting input values to
-    strings and categories will end up the same data type as the original values.
+    To perform table-wise conversion, where all labels in the entire ``DataFrame`` are used as
+    categories for each column, the ``categories`` parameter can be determined programatically by
+    ``categories = pd.unique(df.values.ravel())``.
 
-.. note::
+If you already have ``codes`` and ``categories``, you can use the 
+:func:`~pandas.Categorical.from_codes` constructor to save the factorize step 
+during normal constructor mode:
 
-    In contrast to R's `factor` function, there is currently no way to assign/change labels at
-    creation time. Use `categories` to change the categories after creation time.
+.. ipython:: python
+
+    splitter = np.random.choice([0,1], 5, p=[0.5,0.5])
+    s = pd.Series(pd.Categorical.from_codes(splitter, categories=["train", "test"]))
+
+
+Regaining Original Data
+~~~~~~~~~~~~~~~~~~~~~~~
 
 To get back to the original ``Series`` or NumPy array, use 
 ``Series.astype(original_dtype)`` or ``np.asarray(categorical)``:
@@ -136,14 +205,15 @@ To get back to the original ``Series`` or NumPy array, use
     s2.astype(str)
     np.asarray(s2)
 
-If you already have `codes` and `categories`, you can use the 
-:func:`~pandas.Categorical.from_codes` constructor to save the factorize step 
-during normal constructor mode:
+.. note::
 
-.. ipython:: python
+    In contrast to R's `factor` function, categorical data is not converting input values to
+    strings; categories will end up the same data type as the original values.
 
-    splitter = np.random.choice([0,1], 5, p=[0.5,0.5])
-    s = pd.Series(pd.Categorical.from_codes(splitter, categories=["train", "test"]))
+.. note::
+
+    In contrast to R's `factor` function, there is currently no way to assign/change labels at
+    creation time. Use `categories` to change the categories after creation time.
 
 .. _categorical.categoricaldtype:
 
 
@@ -364,6 +364,19 @@ and returns a DataFrame. It operates like the ``DataFrame`` constructor except
 for the ``orient`` parameter which is ``'columns'`` by default, but which can be
 set to ``'index'`` in order to use the dict keys as row labels.
 
+
+.. ipython:: python
+
+   pd.DataFrame.from_dict(dict([('A', [1, 2, 3]), ('B', [4, 5, 6])]))
+
+If you pass ``orient='index'``, the keys will be the row labels. In this
+case, you can also pass the desired column names:
+
+.. ipython:: python
+
+   pd.DataFrame.from_dict(dict([('A', [1, 2, 3]), ('B', [4, 5, 6])]),
+                          orient='index', columns=['one', 'two', 'three'])
+
 .. _basics.dataframe.from_records:
 
 **DataFrame.from_records**
@@ -378,28 +391,6 @@ dtype. For example:
    data
    pd.DataFrame.from_records(data, index='C')
 
-.. _basics.dataframe.from_items:
-
-**DataFrame.from_items**
-
-``DataFrame.from_items`` works analogously to the form of the ``dict``
-constructor that takes a sequence of ``(key, value)`` pairs, where the keys are
-column (or row, in the case of ``orient='index'``) names, and the value are the
-column values (or row values). This can be useful for constructing a DataFrame
-with the columns in a particular order without having to pass an explicit list
-of columns:
-
-.. ipython:: python
-
-   pd.DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])])
-
-If you pass ``orient='index'``, the keys will be the row labels. But in this
-case you must also pass the desired column names:
-
-.. ipython:: python
-
-   pd.DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])],
-                           orient='index', columns=['one', 'two', 'three'])
 
 Column selection, addition, deletion
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -539,7 +530,7 @@ To write code compatible with all versions of Python, split the assignment in tw
    you'll need to take care when passing ``assign`` expressions that
 
    * Updating an existing column
-   * Refering to the newly updated column in the same ``assign``
+   * Referring to the newly updated column in the same ``assign``
 
    For example, we'll update column "A" and then refer to it when creating "B".
Original file line number	Diff line number	Diff line change
`@@ -4,4 +4,4 @@ source activate pandas`
`4`	`4`
`5`	`5`	`echo "install 27"`
`6`	`6`
`7`		`-conda install -n pandas -c conda-forge feather-format pyarrow=0.4.1 fastparquet`
	`7`	`+conda install -n pandas -c conda-forge feather-format pyarrow=0.4.1 jemalloc=4.5.0.post fastparquet`