HannahFerch
diff --git a/‎.coveragerc
-29 b/‎.coveragerc
-29
diff --git a/‎Makefile
+1-1 b/‎Makefile
+1-1
diff --git a/‎asv_bench/benchmarks/strings.py
+27-4 b/‎asv_bench/benchmarks/strings.py
+27-4
diff --git a/‎ci/appveyor-27.yaml
+1-1 b/‎ci/appveyor-27.yaml
+1-1
diff --git a/‎ci/appveyor-36.yaml
+1-1 b/‎ci/appveyor-36.yaml
+1-1
diff --git a/‎ci/circle-27-compat.yaml
+1-1 b/‎ci/circle-27-compat.yaml
+1-1
diff --git a/‎ci/circle-36-locale.yaml
+1-1 b/‎ci/circle-36-locale.yaml
+1-1
diff --git a/‎ci/circle-36-locale_slow.yaml
+1-1 b/‎ci/circle-36-locale_slow.yaml
+1-1
diff --git a/‎ci/lint.sh
+6-1 b/‎ci/lint.sh
+6-1
diff --git a/‎ci/requirements-optional-conda.txt
+1-1 b/‎ci/requirements-optional-conda.txt
+1-1
diff --git a/‎ci/requirements-optional-pip.txt
+2-2 b/‎ci/requirements-optional-pip.txt
+2-2
diff --git a/‎ci/script_single.sh
+2 b/‎ci/script_single.sh
+2
diff --git a/‎ci/travis-35-osx.yaml
+1-1 b/‎ci/travis-35-osx.yaml
+1-1
diff --git a/‎ci/travis-36-doc.yaml
+1-1 b/‎ci/travis-36-doc.yaml
+1-1
diff --git a/‎ci/travis-36-slow.yaml
+1-1 b/‎ci/travis-36-slow.yaml
+1-1
diff --git a/‎ci/travis-36.yaml
+1-1 b/‎ci/travis-36.yaml
+1-1
diff --git a/‎doc/source/advanced.rst
+27-26 b/‎doc/source/advanced.rst
+27-26
diff --git a/‎doc/source/api.rst
+1 b/‎doc/source/api.rst
+1
@@ -13,7 +13,7 @@ build: clean_pyc
 	python setup.py build_ext --inplace
 
 lint-diff:
-	git diff master --name-only -- "*.py" | grep "pandas" | xargs flake8
+	git diff master --name-only -- "*.py" | grep -E "pandas|scripts" | xargs flake8
 
 develop: build
 	-python setup.py develop
 
@@ -1,7 +1,7 @@
 import warnings
 
 import numpy as np
-from pandas import Series
+from pandas import Series, DataFrame
 import pandas.util.testing as tm
 
 
@@ -12,9 +12,6 @@ class Methods(object):
     def setup(self):
         self.s = Series(tm.makeStringIndex(10**5))
 
-    def time_cat(self):
-        self.s.str.cat(sep=',')
-
     def time_center(self):
         self.s.str.center(100)
 
@@ -87,6 +84,32 @@ def time_repeat(self, repeats):
         self.s.str.repeat(self.repeat)
 
 
+class Cat(object):
+
+    goal_time = 0.2
+    params = ([0, 3], [None, ','], [None, '-'], [0.0, 0.001, 0.15])
+    param_names = ['other_cols', 'sep', 'na_rep', 'na_frac']
+
+    def setup(self, other_cols, sep, na_rep, na_frac):
+        N = 10 ** 5
+        mask_gen = lambda: np.random.choice([True, False], N,
+                                            p=[1 - na_frac, na_frac])
+        self.s = Series(tm.makeStringIndex(N)).where(mask_gen())
+        if other_cols == 0:
+            # str.cat self-concatenates only for others=None
+            self.others = None
+        else:
+            self.others = DataFrame({i: tm.makeStringIndex(N).where(mask_gen())
+                                     for i in range(other_cols)})
+
+    def time_cat(self, other_cols, sep, na_rep, na_frac):
+        # before the concatenation (one caller + other_cols columns), the total
+        # expected fraction of rows containing any NaN is:
+        # reduce(lambda t, _: t + (1 - t) * na_frac, range(other_cols + 1), 0)
+        # for other_cols=3 and na_frac=0.15, this works out to ~48%
+        self.s.str.cat(others=self.others, sep=sep, na_rep=na_rep)
+
+
 class Contains(object):
 
     goal_time = 0.2
 
@@ -13,7 +13,7 @@ dependencies:
   - matplotlib
   - numexpr
   - numpy=1.12*
-  - openpyxl
+  - openpyxl=2.5.5
   - pytables
   - python=2.7.*
   - pytz
 
@@ -10,7 +10,7 @@ dependencies:
   - matplotlib
   - numexpr
   - numpy=1.14*
-  - openpyxl
+  - openpyxl=2.5.5
   - pyarrow
   - pytables
   - python-dateutil
 
@@ -8,7 +8,7 @@ dependencies:
   - jinja2=2.8
   - numexpr=2.4.4 # we test that we correctly don't use an unsupported numexpr
   - numpy=1.9.3
-  - openpyxl
+  - openpyxl=2.5.5
   - psycopg2
   - pytables=3.2.2
   - python-dateutil=2.5.0
 
@@ -13,7 +13,7 @@ dependencies:
   - nomkl
   - numexpr
   - numpy
-  - openpyxl
+  - openpyxl=2.5.5
   - psycopg2
   - pymysql
   - pytables
 
@@ -14,7 +14,7 @@ dependencies:
   - nomkl
   - numexpr
   - numpy
-  - openpyxl
+  - openpyxl=2.5.5
   - psycopg2
   - pymysql
   - pytables
 
@@ -24,6 +24,11 @@ if [ "$LINT" ]; then
     if [ $? -ne "0" ]; then
         RET=1
     fi
+
+    flake8 scripts/tests --filename=*.py
+    if [ $? -ne "0" ]; then
+        RET=1
+    fi
     echo "Linting *.py DONE"
 
     echo "Linting setup.py"
@@ -175,7 +180,7 @@ if [ "$LINT" ]; then
         RET=1
     fi
     echo "Check for old-style classes DONE"
-    
+
     echo "Check for backticks incorrectly rendering because of missing spaces"
     grep -R --include="*.rst" -E "[a-zA-Z0-9]\`\`?[a-zA-Z0-9]" doc/source/
 
 
@@ -12,7 +12,7 @@ lxml
 matplotlib
 nbsphinx
 numexpr
-openpyxl
+openpyxl=2.5.5
 pyarrow
 pymysql
 pytables
 
@@ -14,7 +14,7 @@ lxml
 matplotlib
 nbsphinx
 numexpr
-openpyxl
+openpyxl=2.5.5
 pyarrow
 pymysql
 tables
@@ -28,4 +28,4 @@ statsmodels
 xarray
 xlrd
 xlsxwriter
-xlwt
+xlwt
@@ -28,6 +28,8 @@ elif [ "$COVERAGE" ]; then
     echo pytest -s -m "single" -r xXs --strict --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas
     pytest      -s -m "single" -r xXs --strict --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas
 
+    echo pytest -s -r xXs --strict scripts
+    pytest      -s -r xXs --strict scripts
 else
     echo pytest -m "single" -r xXs --junitxml=/tmp/single.xml --strict $TEST_ARGS pandas
     pytest      -m "single" -r xXs --junitxml=/tmp/single.xml --strict $TEST_ARGS pandas # TODO: doctest
 
@@ -12,7 +12,7 @@ dependencies:
   - nomkl
   - numexpr
   - numpy=1.10.4
-  - openpyxl
+  - openpyxl=2.5.5
   - pytables
   - python=3.5*
   - pytz
 
@@ -22,7 +22,7 @@ dependencies:
   - notebook
   - numexpr
   - numpy=1.13*
-  - openpyxl
+  - openpyxl=2.5.5
   - pandoc
   - pyqt
   - pytables
 
@@ -10,7 +10,7 @@ dependencies:
   - matplotlib
   - numexpr
   - numpy
-  - openpyxl
+  - openpyxl=2.5.5
   - patsy
   - psycopg2
   - pymysql
 
@@ -18,7 +18,7 @@ dependencies:
   - nomkl
   - numexpr
   - numpy
-  - openpyxl
+  - openpyxl=2.5.5
   - psycopg2
   - pyarrow
   - pymysql
 
@@ -15,7 +15,7 @@
 MultiIndex / Advanced Indexing
 ******************************
 
-This section covers indexing with a ``MultiIndex`` and more advanced indexing features.
+This section covers indexing with a ``MultiIndex`` and :ref:`more advanced indexing features <indexing.index_types>`.
 
 See the :ref:`Indexing and Selecting Data <indexing>` for general indexing documentation.
 
@@ -51,13 +51,13 @@ See the :ref:`cookbook<cookbook.multi_index>` for some advanced strategies.
 Creating a MultiIndex (hierarchical index) object
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-The ``MultiIndex`` object is the hierarchical analogue of the standard
-``Index`` object which typically stores the axis labels in pandas objects. You
+The :class:`MultiIndex` object is the hierarchical analogue of the standard
+:class:`Index` object which typically stores the axis labels in pandas objects. You
 can think of ``MultiIndex`` as an array of tuples where each tuple is unique. A
 ``MultiIndex`` can be created from a list of arrays (using
-``MultiIndex.from_arrays``), an array of tuples (using
-``MultiIndex.from_tuples``), or a crossed set of iterables (using
-``MultiIndex.from_product``).  The ``Index`` constructor will attempt to return
+:meth:`MultiIndex.from_arrays`), an array of tuples (using
+:meth:`MultiIndex.from_tuples`), or a crossed set of iterables (using
+:meth:`MultiIndex.from_product`).  The ``Index`` constructor will attempt to return
 a ``MultiIndex`` when it is passed a list of tuples.  The following examples
 demonstrate different ways to initialize MultiIndexes.
 
@@ -76,15 +76,15 @@ demonstrate different ways to initialize MultiIndexes.
    s
 
 When you want every pairing of the elements in two iterables, it can be easier
-to use the ``MultiIndex.from_product`` function:
+to use the :meth:`MultiIndex.from_product` method:
 
 .. ipython:: python
 
    iterables = [['bar', 'baz', 'foo', 'qux'], ['one', 'two']]
    pd.MultiIndex.from_product(iterables, names=['first', 'second'])
 
 As a convenience, you can pass a list of arrays directly into Series or
-DataFrame to construct a MultiIndex automatically:
+DataFrame to construct a ``MultiIndex`` automatically:
 
 .. ipython:: python
 
@@ -140,7 +140,7 @@ may wish to generate your own ``MultiIndex`` when preparing the data set.
 Reconstructing the level labels
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-The method ``get_level_values`` will return a vector of the labels for each
+The method :meth:`~MultiIndex.get_level_values` will return a vector of the labels for each
 location at a particular level:
 
 .. ipython:: python
@@ -183,7 +183,7 @@ For example:
 
 This is done to avoid a recomputation of the levels in order to make slicing
 highly performant. If you want to see only the used levels, you can use the
-:func:`MultiIndex.get_level_values` method.
+:meth:`~MultiIndex.get_level_values` method.
 
 .. ipython:: python
 
@@ -193,7 +193,7 @@ highly performant. If you want to see only the used levels, you can use the
    df[['foo','qux']].columns.get_level_values(0)
 
 To reconstruct the ``MultiIndex`` with only the used levels, the
-``remove_unused_levels`` method may be used.
+:meth:`~MultiIndex.remove_unused_levels` method may be used.
 
 .. versionadded:: 0.20.0
 
@@ -400,8 +400,8 @@ You can use a right-hand-side of an alignable object as well.
 Cross-section
 ~~~~~~~~~~~~~
 
-The ``xs`` method of ``DataFrame`` additionally takes a level argument to make
-selecting data at a particular level of a MultiIndex easier.
+The :meth:`~DataFrame.xs` method of ``DataFrame`` additionally takes a level argument to make
+selecting data at a particular level of a ``MultiIndex`` easier.
 
 .. ipython:: python
 
@@ -519,7 +519,7 @@ to be sorted. As with any index, you can use ``sort_index``.
 
 .. _advanced.sortlevel_byname:
 
-You may also pass a level name to ``sort_index`` if the MultiIndex levels
+You may also pass a level name to ``sort_index`` if the ``MultiIndex`` levels
 are named.
 
 .. ipython:: python
@@ -566,7 +566,8 @@ Furthermore, if you try to index something that is not fully lexsorted, this can
     In [5]: dfm.loc[(0,'y'):(1, 'z')]
     UnsortedIndexError: 'Key length (2) was greater than MultiIndex lexsort depth (1)'
 
-The ``is_lexsorted()`` method on an ``Index`` show if the index is sorted, and the ``lexsort_depth`` property returns the sort depth:
+The :meth:`~MultiIndex.is_lexsorted` method on a ``MultiIndex`` shows if the
+index is sorted, and the ``lexsort_depth`` property returns the sort depth:
 
 .. ipython:: python
 
@@ -591,8 +592,8 @@ Take Methods
 
 .. _advanced.take:
 
-Similar to NumPy ndarrays, pandas Index, Series, and DataFrame also provides
-the ``take`` method that retrieves elements along a given axis at the given
+Similar to NumPy ndarrays, pandas ``Index``, ``Series``, and ``DataFrame`` also provides
+the :meth:`~DataFrame.take` method that retrieves elements along a given axis at the given
 indices. The given indices must be either a list or an ndarray of integer
 index positions. ``take`` will also accept negative integers as relative positions to the end of the object.
 
@@ -668,8 +669,8 @@ In the following sub-sections we will highlight some other index types.
 CategoricalIndex
 ~~~~~~~~~~~~~~~~
 
-``CategoricalIndex`` is a type of index that is useful for supporting
-indexing with duplicates. This is a container around a ``Categorical``
+:class:`CategoricalIndex` is a type of index that is useful for supporting
+indexing with duplicates. This is a container around a :class:`Categorical`
 and allows efficient indexing and storage of an index with a large number of duplicated elements.
 
 .. ipython:: python
@@ -758,19 +759,19 @@ Int64Index and RangeIndex
 
    Indexing on an integer-based Index with floats has been clarified in 0.18.0, for a summary of the changes, see :ref:`here <whatsnew_0180.float_indexers>`.
 
-``Int64Index`` is a fundamental basic index in pandas.
-This is an Immutable array implementing an ordered, sliceable set.
+:class:`Int64Index` is a fundamental basic index in pandas.
+This is an immutable array implementing an ordered, sliceable set.
 Prior to 0.18.0, the ``Int64Index`` would provide the default index for all ``NDFrame`` objects.
 
-``RangeIndex`` is a sub-class of ``Int64Index`` added in version 0.18.0, now providing the default index for all ``NDFrame`` objects.
+:class:`RangeIndex` is a sub-class of ``Int64Index`` added in version 0.18.0, now providing the default index for all ``NDFrame`` objects.
 ``RangeIndex`` is an optimized version of ``Int64Index`` that can represent a monotonic ordered set. These are analogous to Python `range types <https://docs.python.org/3/library/stdtypes.html#typesseq-range>`__.
 
 .. _indexing.float64index:
 
 Float64Index
 ~~~~~~~~~~~~
 
-By default a ``Float64Index`` will be automatically created when passing floating, or mixed-integer-floating values in index creation.
+By default a :class:`Float64Index` will be automatically created when passing floating, or mixed-integer-floating values in index creation.
 This enables a pure label-based slicing paradigm that makes ``[],ix,loc`` for scalar indexing and slicing work exactly the
 same.
 
@@ -875,9 +876,9 @@ IntervalIndex
 
 .. versionadded:: 0.20.0
 
-:class:`IntervalIndex` together with its own dtype, ``interval`` as well as the
-:class:`Interval` scalar type,  allow first-class support in pandas for interval
-notation.
+:class:`IntervalIndex` together with its own dtype, :class:`~pandas.api.types.IntervalDtype`
+as well as the :class:`Interval` scalar type,  allow first-class support in pandas
+for interval notation.
 
 The ``IntervalIndex`` allows some unique indexing and is also used as a
 return type for the categories in :func:`cut` and :func:`qcut`.
 
@@ -2559,6 +2559,7 @@ objects.
 .. autosummary::
    :toctree: generated/
 
+   api.extensions.register_extension_dtype
    api.extensions.register_dataframe_accessor
    api.extensions.register_series_accessor
    api.extensions.register_index_accessor