pandas-dev
diff --git a/‎asv_bench/benchmarks/index_object.py
Lines changed: 20 additions & 0 deletions b/‎asv_bench/benchmarks/index_object.py
Lines changed: 20 additions & 0 deletions
diff --git a/‎asv_bench/benchmarks/sparse.py
Lines changed: 2 additions & 2 deletions b/‎asv_bench/benchmarks/sparse.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎asv_bench/benchmarks/timeseries.py
Lines changed: 1 addition & 1 deletion b/‎asv_bench/benchmarks/timeseries.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎asv_bench/benchmarks/timestamp.py
Lines changed: 29 additions & 0 deletions b/‎asv_bench/benchmarks/timestamp.py
Lines changed: 29 additions & 0 deletions
diff --git a/‎ci/requirements-3.6_NUMPY_DEV.build
Lines changed: 0 additions & 1 deletion b/‎ci/requirements-3.6_NUMPY_DEV.build
Lines changed: 0 additions & 1 deletion
diff --git a/‎ci/requirements-3.6_NUMPY_DEV.build.sh
Lines changed: 3 additions & 0 deletions b/‎ci/requirements-3.6_NUMPY_DEV.build.sh
Lines changed: 3 additions & 0 deletions
diff --git a/‎ci/requirements-3.6_WIN.run
Lines changed: 0 additions & 1 deletion b/‎ci/requirements-3.6_WIN.run
Lines changed: 0 additions & 1 deletion
diff --git a/‎doc/README.rst
Lines changed: 5 additions & 3 deletions b/‎doc/README.rst
Lines changed: 5 additions & 3 deletions
diff --git a/‎doc/source/10min.rst
Lines changed: 2 additions & 12 deletions b/‎doc/source/10min.rst
Lines changed: 2 additions & 12 deletions
diff --git a/‎doc/source/advanced.rst
Lines changed: 3 additions & 1 deletion b/‎doc/source/advanced.rst
Lines changed: 3 additions & 1 deletion
diff --git a/‎doc/source/api.rst
Lines changed: 18 additions & 1 deletion b/‎doc/source/api.rst
Lines changed: 18 additions & 1 deletion
diff --git a/‎doc/source/categorical.rst
Lines changed: 101 additions & 8 deletions b/‎doc/source/categorical.rst
Lines changed: 101 additions & 8 deletions
diff --git a/‎doc/source/computation.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/computation.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/cookbook.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/cookbook.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/dsintro.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/dsintro.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/gotchas.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/gotchas.rst
Lines changed: 1 addition & 1 deletion
@@ -199,3 +199,23 @@ def time_datetime_level_values_full(self):
 
     def time_datetime_level_values_sliced(self):
         self.mi[:10].values
+
+
+class Range(object):
+    goal_time = 0.2
+
+    def setup(self):
+        self.idx_inc = RangeIndex(start=0, stop=10**7, step=3)
+        self.idx_dec = RangeIndex(start=10**7, stop=-1, step=-3)
+
+    def time_max(self):
+        self.idx_inc.max()
+
+    def time_max_trivial(self):
+        self.idx_dec.max()
+
+    def time_min(self):
+        self.idx_dec.min()
+
+    def time_min_trivial(self):
+        self.idx_inc.min()
@@ -1,4 +1,4 @@
-from itertools import repeat
+import itertools
 
 from .pandas_vb_common import *
 import scipy.sparse
@@ -33,7 +33,7 @@ def time_sparse_from_scipy(self):
         SparseDataFrame(scipy.sparse.rand(1000, 1000, 0.005))
 
     def time_sparse_from_dict(self):
-        SparseDataFrame(dict(zip(range(1000), repeat([0]))))
+        SparseDataFrame(dict(zip(range(1000), itertools.repeat([0]))))
 
 
 class sparse_series_from_coo(object):
 
@@ -56,7 +56,7 @@ def setup(self):
         self.no_freq = self.rng7[:50000].append(self.rng7[50002:])
         self.d_freq = self.rng7[:50000].append(self.rng7[50000:])
 
-        self.rng8 = date_range(start='1/1/1700', freq='B', periods=100000)
+        self.rng8 = date_range(start='1/1/1700', freq='B', periods=75000)
         self.b_freq = self.rng8[:50000].append(self.rng8[50000:])
 
     def time_add_timedelta(self):
 
@@ -1,5 +1,7 @@
 from .pandas_vb_common import *
 from pandas import to_timedelta, Timestamp
+import pytz
+import datetime
 
 
 class TimestampProperties(object):
@@ -58,3 +60,30 @@ def time_is_leap_year(self):
 
     def time_microsecond(self):
         self.ts.microsecond
+
+
+class TimestampOps(object):
+    goal_time = 0.2
+
+    def setup(self):
+        self.ts = Timestamp('2017-08-25 08:16:14')
+        self.ts_tz = Timestamp('2017-08-25 08:16:14', tz='US/Eastern')
+
+        dt = datetime.datetime(2016, 3, 27, 1)
+        self.tzinfo = pytz.timezone('CET').localize(dt, is_dst=False).tzinfo
+        self.ts2 = Timestamp(dt)
+
+    def time_replace_tz(self):
+        self.ts.replace(tzinfo=pytz.timezone('US/Eastern'))
+
+    def time_replace_across_dst(self):
+        self.ts2.replace(tzinfo=self.tzinfo)
+
+    def time_replace_None(self):
+        self.ts_tz.replace(tzinfo=None)
+
+    def time_to_pydatetime(self):
+        self.ts.to_pydatetime()
+
+    def time_to_pydatetime_tz(self):
+        self.ts_tz.to_pydatetime()
@@ -1,3 +1,2 @@
 python=3.6*
 pytz
-cython
@@ -14,4 +14,7 @@ pip install --pre --upgrade --timeout=60 -f $PRE_WHEELS numpy scipy
 # install dateutil from master
 pip install -U git+git://github.com/dateutil/dateutil.git
 
+# cython via pip
+pip install cython
+
 true
@@ -8,7 +8,6 @@ xlrd
 xlwt
 scipy
 feather-format
-pyarrow
 numexpr
 pytables
 matplotlib
 
@@ -3,9 +3,11 @@
 Contributing to the documentation
 =================================
 
-If you're not the developer type, contributing to the documentation is still
-of huge value. You don't even have to be an expert on
-*pandas* to do so! Something as simple as rewriting small passages for clarity
+Whether you are someone who loves writing, teaching, or development,
+contributing to the documentation is a huge value. If you don't see yourself
+as a developer type, please don't stress and know that we want you to
+contribute. You don't even have to be an expert on *pandas* to do so!
+Something as simple as rewriting small passages for clarity
 as you reference the docs is a simple but effective way to contribute. The
 next person to read that passage will be in your debt!
 
 
@@ -11,7 +11,7 @@
    np.random.seed(123456)
    np.set_printoptions(precision=4, suppress=True)
    import matplotlib
-   matplotlib.style.use('ggplot')
+   # matplotlib.style.use('default')
    pd.options.display.max_rows = 15
 
    #### portions of this were borrowed from the
@@ -95,17 +95,7 @@ will be completed:
    df2.append             df2.combine_first
    df2.apply              df2.compound
    df2.applymap           df2.consolidate
-   df2.as_blocks          df2.convert_objects
-   df2.asfreq             df2.copy
-   df2.as_matrix          df2.corr
-   df2.astype             df2.corrwith
-   df2.at                 df2.count
-   df2.at_time            df2.cov
-   df2.axes               df2.cummax
-   df2.B                  df2.cummin
-   df2.between_time       df2.cumprod
-   df2.bfill              df2.cumsum
-   df2.blocks             df2.D
+   df2.D
 
 As you can see, the columns ``A``, ``B``, ``C``, and ``D`` are automatically
 tab completed. ``E`` is there as well; the rest of the attributes have been
 
@@ -638,9 +638,11 @@ and allows efficient indexing and storage of an index with a large number of dup
 
 .. ipython:: python
 
+   from pandas.api.types import CategoricalDtype
+
    df = pd.DataFrame({'A': np.arange(6),
                       'B': list('aabbca')})
-   df['B'] = df['B'].astype('category', categories=list('cab'))
+   df['B'] = df['B'].astype(CategoricalDtype(list('cab')))
    df
    df.dtypes
    df.B.cat.categories
 
@@ -646,7 +646,10 @@ strings and apply several methods to it. These can be accessed like
 Categorical
 ~~~~~~~~~~~
 
-If the Series is of dtype ``category``, ``Series.cat`` can be used to change the the categorical
+.. autoclass:: api.types.CategoricalDtype
+   :members: categories, ordered
+
+If the Series is of dtype ``CategoricalDtype``, ``Series.cat`` can be used to change the categorical
 data. This accessor is similar to the ``Series.dt`` or ``Series.str`` and has the
 following usable methods and properties:
 
@@ -1416,6 +1419,20 @@ Selecting
    Index.slice_indexer
    Index.slice_locs
 
+.. _api.numericindex:
+
+Numeric Index
+-------------
+
+.. autosummary::
+   :toctree: generated/
+   :template: autosummary/class_without_autosummary.rst
+
+   RangeIndex
+   Int64Index
+   UInt64Index
+   Float64Index
+
 .. _api.categoricalindex:
 
 CategoricalIndex
 
@@ -89,12 +89,22 @@ By passing a :class:`pandas.Categorical` object to a `Series` or assigning it to
     df["B"] = raw_cat
     df
 
-You can also specify differently ordered categories or make the resulting data ordered, by passing these arguments to ``astype()``:
+Anywhere above we passed a keyword ``dtype='category'``, we used the default behavior of
+
+1. categories are inferred from the data
+2. categories are unordered.
+
+To control those behaviors, instead of passing ``'category'``, use an instance
+of :class:`~pandas.api.types.CategoricalDtype`.
 
 .. ipython:: python
 
-    s = pd.Series(["a","b","c","a"])
-    s_cat = s.astype("category", categories=["b","c","d"], ordered=False)
+    from pandas.api.types import CategoricalDtype
+
+    s = pd.Series(["a", "b", "c", "a"])
+    cat_type = CategoricalDtype(categories=["b", "c", "d"],
+                                ordered=True)
+    s_cat = s.astype(cat_type)
     s_cat
 
 Categorical data has a specific ``category`` :ref:`dtype <basics.dtypes>`:
@@ -133,6 +143,75 @@ constructor to save the factorize step during normal constructor mode:
     splitter = np.random.choice([0,1], 5, p=[0.5,0.5])
     s = pd.Series(pd.Categorical.from_codes(splitter, categories=["train", "test"]))
 
+.. _categorical.categoricaldtype:
+
+CategoricalDtype
+----------------
+
+.. versionchanged:: 0.21.0
+
+A categorical's type is fully described by
+
+1. ``categories``: a sequence of unique values and no missing values
+2. ``ordered``: a boolean
+
+This information can be stored in a :class:`~pandas.api.types.CategoricalDtype`.
+The ``categories`` argument is optional, which implies that the actual categories
+should be inferred from whatever is present in the data when the
+:class:`pandas.Categorical` is created. The categories are assumed to be unordered
+by default.      
+
+.. ipython:: python
+
+   from pandas.api.types import CategoricalDtype
+
+   CategoricalDtype(['a', 'b', 'c'])
+   CategoricalDtype(['a', 'b', 'c'], ordered=True)
+   CategoricalDtype()
+
+A :class:`~pandas.api.types.CategoricalDtype` can be used in any place pandas
+expects a `dtype`. For example :func:`pandas.read_csv`,
+:func:`pandas.DataFrame.astype`, or in the Series constructor.
+
+.. note::
+
+    As a convenience, you can use the string ``'category'`` in place of a
+    :class:`~pandas.api.types.CategoricalDtype` when you want the default behavior of
+    the categories being unordered, and equal to the set values present in the
+    array. In other words, ``dtype='category'`` is equivalent to
+    ``dtype=CategoricalDtype()``.
+
+Equality Semantics
+~~~~~~~~~~~~~~~~~~
+
+Two instances of :class:`~pandas.api.types.CategoricalDtype` compare equal
+whenever they have the same categories and orderedness. When comparing two
+unordered categoricals, the order of the ``categories`` is not considered
+
+.. ipython:: python
+
+   c1 = CategoricalDtype(['a', 'b', 'c'], ordered=False)
+
+   # Equal, since order is not considered when ordered=False
+   c1 == CategoricalDtype(['b', 'c', 'a'], ordered=False)
+
+   # Unequal, since the second CategoricalDtype is ordered
+   c1 == CategoricalDtype(['a',  'b', 'c'], ordered=True)
+
+All instances of ``CategoricalDtype`` compare equal to the string ``'category'``
+
+.. ipython:: python
+
+   c1 == 'category'
+
+.. warning::
+
+   Since ``dtype='category'`` is essentially ``CategoricalDtype(None, False)``,
+   and since all instances ``CategoricalDtype`` compare equal to ``'category'``,
+   all instances of ``CategoricalDtype`` compare equal to a
+   ``CategoricalDtype(None, False)``, regardless of ``categories`` or
+   ``ordered``.
+
 Description
 -----------
 
@@ -146,6 +225,8 @@ Using ``.describe()`` on categorical data will produce similar output to a `Seri
     df.describe()
     df["cat"].describe()
 
+.. _categorical.cat:
+
 Working with categories
 -----------------------
 
@@ -182,7 +263,7 @@ It's also possible to pass in the categories in a specific order:
 
     .. ipython:: python
 
-         s = pd.Series(list('babc')).astype('category', categories=list('abcd'))
+         s = pd.Series(list('babc')).astype(CategoricalDtype(list('abcd')))
          s
 
          # categories
@@ -204,6 +285,10 @@ by using the :func:`Categorical.rename_categories` method:
     s.cat.categories = ["Group %s" % g for g in s.cat.categories]
     s
     s.cat.rename_categories([1,2,3])
+    s
+    # You can also pass a dict-like object to map the renaming
+    s.cat.rename_categories({1: 'x', 2: 'y', 3: 'z'})
+    s
 
 .. note::
 
@@ -295,7 +380,9 @@ meaning and certain operations are possible. If the categorical is unordered, ``
 
     s = pd.Series(pd.Categorical(["a","b","c","a"], ordered=False))
     s.sort_values(inplace=True)
-    s = pd.Series(["a","b","c","a"]).astype('category', ordered=True)
+    s = pd.Series(["a","b","c","a"]).astype(
+        CategoricalDtype(ordered=True)
+    )
     s.sort_values(inplace=True)
     s
     s.min(), s.max()
@@ -395,9 +482,15 @@ categories or a categorical with any list-like object, will raise a TypeError.
 
 .. ipython:: python
 
-    cat = pd.Series([1,2,3]).astype("category", categories=[3,2,1], ordered=True)
-    cat_base = pd.Series([2,2,2]).astype("category", categories=[3,2,1], ordered=True)
-    cat_base2 = pd.Series([2,2,2]).astype("category", ordered=True)
+    cat = pd.Series([1,2,3]).astype(
+        CategoricalDtype([3, 2, 1], ordered=True)
+    )
+    cat_base = pd.Series([2,2,2]).astype(
+        CategoricalDtype([3, 2, 1], ordered=True)
+    )
+    cat_base2 = pd.Series([2,2,2]).astype(
+        CategoricalDtype(ordered=True)
+    )
 
     cat
     cat_base
 
@@ -8,7 +8,7 @@
    np.set_printoptions(precision=4, suppress=True)
    import pandas as pd
    import matplotlib
-   matplotlib.style.use('ggplot')
+   # matplotlib.style.use('default')
    import matplotlib.pyplot as plt
    plt.close('all')
    pd.options.display.max_rows=15
 
@@ -20,7 +20,7 @@
    pd.options.display.max_rows=15
 
    import matplotlib
-   matplotlib.style.use('ggplot')
+   # matplotlib.style.use('default')
 
    np.set_printoptions(precision=4, suppress=True)
 
 
@@ -10,7 +10,7 @@
    pd.options.display.max_rows = 15
 
    import matplotlib
-   matplotlib.style.use('ggplot')
+   # matplotlib.style.use('default')
    import matplotlib.pyplot as plt
    plt.close('all')
 
 
@@ -14,7 +14,7 @@ Frequently Asked Questions (FAQ)
    import pandas as pd
    pd.options.display.max_rows = 15
    import matplotlib
-   matplotlib.style.use('ggplot')
+   # matplotlib.style.use('default')
    import matplotlib.pyplot as plt
    plt.close('all')
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,2 @@`
`1`	`1`	`python=3.6*`
`2`	`2`	`pytz`
`3`		`-cython`