Skip to content

Commit 2babd92

Browse files
authored
Merge branch 'master' into add-sparsearray-all
2 parents 5d04485 + f9d88cd commit 2babd92

File tree

151 files changed

+5093
-3261
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

151 files changed

+5093
-3261
lines changed

asv_bench/benchmarks/index_object.py

+20
Original file line numberDiff line numberDiff line change
@@ -199,3 +199,23 @@ def time_datetime_level_values_full(self):
199199

200200
def time_datetime_level_values_sliced(self):
201201
self.mi[:10].values
202+
203+
204+
class Range(object):
205+
goal_time = 0.2
206+
207+
def setup(self):
208+
self.idx_inc = RangeIndex(start=0, stop=10**7, step=3)
209+
self.idx_dec = RangeIndex(start=10**7, stop=-1, step=-3)
210+
211+
def time_max(self):
212+
self.idx_inc.max()
213+
214+
def time_max_trivial(self):
215+
self.idx_dec.max()
216+
217+
def time_min(self):
218+
self.idx_dec.min()
219+
220+
def time_min_trivial(self):
221+
self.idx_inc.min()

asv_bench/benchmarks/sparse.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from itertools import repeat
1+
import itertools
22

33
from .pandas_vb_common import *
44
import scipy.sparse
@@ -33,7 +33,7 @@ def time_sparse_from_scipy(self):
3333
SparseDataFrame(scipy.sparse.rand(1000, 1000, 0.005))
3434

3535
def time_sparse_from_dict(self):
36-
SparseDataFrame(dict(zip(range(1000), repeat([0]))))
36+
SparseDataFrame(dict(zip(range(1000), itertools.repeat([0]))))
3737

3838

3939
class sparse_series_from_coo(object):

asv_bench/benchmarks/timeseries.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def setup(self):
5656
self.no_freq = self.rng7[:50000].append(self.rng7[50002:])
5757
self.d_freq = self.rng7[:50000].append(self.rng7[50000:])
5858

59-
self.rng8 = date_range(start='1/1/1700', freq='B', periods=100000)
59+
self.rng8 = date_range(start='1/1/1700', freq='B', periods=75000)
6060
self.b_freq = self.rng8[:50000].append(self.rng8[50000:])
6161

6262
def time_add_timedelta(self):

asv_bench/benchmarks/timestamp.py

+29
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
from .pandas_vb_common import *
22
from pandas import to_timedelta, Timestamp
3+
import pytz
4+
import datetime
35

46

57
class TimestampProperties(object):
@@ -58,3 +60,30 @@ def time_is_leap_year(self):
5860

5961
def time_microsecond(self):
6062
self.ts.microsecond
63+
64+
65+
class TimestampOps(object):
66+
goal_time = 0.2
67+
68+
def setup(self):
69+
self.ts = Timestamp('2017-08-25 08:16:14')
70+
self.ts_tz = Timestamp('2017-08-25 08:16:14', tz='US/Eastern')
71+
72+
dt = datetime.datetime(2016, 3, 27, 1)
73+
self.tzinfo = pytz.timezone('CET').localize(dt, is_dst=False).tzinfo
74+
self.ts2 = Timestamp(dt)
75+
76+
def time_replace_tz(self):
77+
self.ts.replace(tzinfo=pytz.timezone('US/Eastern'))
78+
79+
def time_replace_across_dst(self):
80+
self.ts2.replace(tzinfo=self.tzinfo)
81+
82+
def time_replace_None(self):
83+
self.ts_tz.replace(tzinfo=None)
84+
85+
def time_to_pydatetime(self):
86+
self.ts.to_pydatetime()
87+
88+
def time_to_pydatetime_tz(self):
89+
self.ts_tz.to_pydatetime()

ci/requirements-3.6_NUMPY_DEV.build

-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,2 @@
11
python=3.6*
22
pytz
3-
cython

ci/requirements-3.6_NUMPY_DEV.build.sh

+3
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,7 @@ pip install --pre --upgrade --timeout=60 -f $PRE_WHEELS numpy scipy
1414
# install dateutil from master
1515
pip install -U git+git://github.com/dateutil/dateutil.git
1616

17+
# cython via pip
18+
pip install cython
19+
1720
true

ci/requirements-3.6_WIN.run

-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ xlrd
88
xlwt
99
scipy
1010
feather-format
11-
pyarrow
1211
numexpr
1312
pytables
1413
matplotlib

doc/README.rst

+5-3
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,11 @@
33
Contributing to the documentation
44
=================================
55

6-
If you're not the developer type, contributing to the documentation is still
7-
of huge value. You don't even have to be an expert on
8-
*pandas* to do so! Something as simple as rewriting small passages for clarity
6+
Whether you are someone who loves writing, teaching, or development,
7+
contributing to the documentation is a huge value. If you don't see yourself
8+
as a developer type, please don't stress and know that we want you to
9+
contribute. You don't even have to be an expert on *pandas* to do so!
10+
Something as simple as rewriting small passages for clarity
911
as you reference the docs is a simple but effective way to contribute. The
1012
next person to read that passage will be in your debt!
1113

doc/source/10min.rst

+2-12
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
np.random.seed(123456)
1212
np.set_printoptions(precision=4, suppress=True)
1313
import matplotlib
14-
matplotlib.style.use('ggplot')
14+
# matplotlib.style.use('default')
1515
pd.options.display.max_rows = 15
1616
1717
#### portions of this were borrowed from the
@@ -95,17 +95,7 @@ will be completed:
9595
df2.append df2.combine_first
9696
df2.apply df2.compound
9797
df2.applymap df2.consolidate
98-
df2.as_blocks df2.convert_objects
99-
df2.asfreq df2.copy
100-
df2.as_matrix df2.corr
101-
df2.astype df2.corrwith
102-
df2.at df2.count
103-
df2.at_time df2.cov
104-
df2.axes df2.cummax
105-
df2.B df2.cummin
106-
df2.between_time df2.cumprod
107-
df2.bfill df2.cumsum
108-
df2.blocks df2.D
98+
df2.D
10999

110100
As you can see, the columns ``A``, ``B``, ``C``, and ``D`` are automatically
111101
tab completed. ``E`` is there as well; the rest of the attributes have been

doc/source/advanced.rst

+3-1
Original file line numberDiff line numberDiff line change
@@ -638,9 +638,11 @@ and allows efficient indexing and storage of an index with a large number of dup
638638

639639
.. ipython:: python
640640
641+
from pandas.api.types import CategoricalDtype
642+
641643
df = pd.DataFrame({'A': np.arange(6),
642644
'B': list('aabbca')})
643-
df['B'] = df['B'].astype('category', categories=list('cab'))
645+
df['B'] = df['B'].astype(CategoricalDtype(list('cab')))
644646
df
645647
df.dtypes
646648
df.B.cat.categories

doc/source/api.rst

+18-1
Original file line numberDiff line numberDiff line change
@@ -646,7 +646,10 @@ strings and apply several methods to it. These can be accessed like
646646
Categorical
647647
~~~~~~~~~~~
648648

649-
If the Series is of dtype ``category``, ``Series.cat`` can be used to change the the categorical
649+
.. autoclass:: api.types.CategoricalDtype
650+
:members: categories, ordered
651+
652+
If the Series is of dtype ``CategoricalDtype``, ``Series.cat`` can be used to change the categorical
650653
data. This accessor is similar to the ``Series.dt`` or ``Series.str`` and has the
651654
following usable methods and properties:
652655

@@ -1416,6 +1419,20 @@ Selecting
14161419
Index.slice_indexer
14171420
Index.slice_locs
14181421

1422+
.. _api.numericindex:
1423+
1424+
Numeric Index
1425+
-------------
1426+
1427+
.. autosummary::
1428+
:toctree: generated/
1429+
:template: autosummary/class_without_autosummary.rst
1430+
1431+
RangeIndex
1432+
Int64Index
1433+
UInt64Index
1434+
Float64Index
1435+
14191436
.. _api.categoricalindex:
14201437

14211438
CategoricalIndex

doc/source/categorical.rst

+101-8
Original file line numberDiff line numberDiff line change
@@ -89,12 +89,22 @@ By passing a :class:`pandas.Categorical` object to a `Series` or assigning it to
8989
df["B"] = raw_cat
9090
df
9191
92-
You can also specify differently ordered categories or make the resulting data ordered, by passing these arguments to ``astype()``:
92+
Anywhere above we passed a keyword ``dtype='category'``, we used the default behavior of
93+
94+
1. categories are inferred from the data
95+
2. categories are unordered.
96+
97+
To control those behaviors, instead of passing ``'category'``, use an instance
98+
of :class:`~pandas.api.types.CategoricalDtype`.
9399

94100
.. ipython:: python
95101
96-
s = pd.Series(["a","b","c","a"])
97-
s_cat = s.astype("category", categories=["b","c","d"], ordered=False)
102+
from pandas.api.types import CategoricalDtype
103+
104+
s = pd.Series(["a", "b", "c", "a"])
105+
cat_type = CategoricalDtype(categories=["b", "c", "d"],
106+
ordered=True)
107+
s_cat = s.astype(cat_type)
98108
s_cat
99109
100110
Categorical data has a specific ``category`` :ref:`dtype <basics.dtypes>`:
@@ -133,6 +143,75 @@ constructor to save the factorize step during normal constructor mode:
133143
splitter = np.random.choice([0,1], 5, p=[0.5,0.5])
134144
s = pd.Series(pd.Categorical.from_codes(splitter, categories=["train", "test"]))
135145
146+
.. _categorical.categoricaldtype:
147+
148+
CategoricalDtype
149+
----------------
150+
151+
.. versionchanged:: 0.21.0
152+
153+
A categorical's type is fully described by
154+
155+
1. ``categories``: a sequence of unique values and no missing values
156+
2. ``ordered``: a boolean
157+
158+
This information can be stored in a :class:`~pandas.api.types.CategoricalDtype`.
159+
The ``categories`` argument is optional, which implies that the actual categories
160+
should be inferred from whatever is present in the data when the
161+
:class:`pandas.Categorical` is created. The categories are assumed to be unordered
162+
by default.
163+
164+
.. ipython:: python
165+
166+
from pandas.api.types import CategoricalDtype
167+
168+
CategoricalDtype(['a', 'b', 'c'])
169+
CategoricalDtype(['a', 'b', 'c'], ordered=True)
170+
CategoricalDtype()
171+
172+
A :class:`~pandas.api.types.CategoricalDtype` can be used in any place pandas
173+
expects a `dtype`. For example :func:`pandas.read_csv`,
174+
:func:`pandas.DataFrame.astype`, or in the Series constructor.
175+
176+
.. note::
177+
178+
As a convenience, you can use the string ``'category'`` in place of a
179+
:class:`~pandas.api.types.CategoricalDtype` when you want the default behavior of
180+
the categories being unordered, and equal to the set values present in the
181+
array. In other words, ``dtype='category'`` is equivalent to
182+
``dtype=CategoricalDtype()``.
183+
184+
Equality Semantics
185+
~~~~~~~~~~~~~~~~~~
186+
187+
Two instances of :class:`~pandas.api.types.CategoricalDtype` compare equal
188+
whenever they have the same categories and orderedness. When comparing two
189+
unordered categoricals, the order of the ``categories`` is not considered
190+
191+
.. ipython:: python
192+
193+
c1 = CategoricalDtype(['a', 'b', 'c'], ordered=False)
194+
195+
# Equal, since order is not considered when ordered=False
196+
c1 == CategoricalDtype(['b', 'c', 'a'], ordered=False)
197+
198+
# Unequal, since the second CategoricalDtype is ordered
199+
c1 == CategoricalDtype(['a', 'b', 'c'], ordered=True)
200+
201+
All instances of ``CategoricalDtype`` compare equal to the string ``'category'``
202+
203+
.. ipython:: python
204+
205+
c1 == 'category'
206+
207+
.. warning::
208+
209+
Since ``dtype='category'`` is essentially ``CategoricalDtype(None, False)``,
210+
and since all instances ``CategoricalDtype`` compare equal to ``'category'``,
211+
all instances of ``CategoricalDtype`` compare equal to a
212+
``CategoricalDtype(None, False)``, regardless of ``categories`` or
213+
``ordered``.
214+
136215
Description
137216
-----------
138217

@@ -146,6 +225,8 @@ Using ``.describe()`` on categorical data will produce similar output to a `Seri
146225
df.describe()
147226
df["cat"].describe()
148227
228+
.. _categorical.cat:
229+
149230
Working with categories
150231
-----------------------
151232

@@ -182,7 +263,7 @@ It's also possible to pass in the categories in a specific order:
182263

183264
.. ipython:: python
184265
185-
s = pd.Series(list('babc')).astype('category', categories=list('abcd'))
266+
s = pd.Series(list('babc')).astype(CategoricalDtype(list('abcd')))
186267
s
187268
188269
# categories
@@ -204,6 +285,10 @@ by using the :func:`Categorical.rename_categories` method:
204285
s.cat.categories = ["Group %s" % g for g in s.cat.categories]
205286
s
206287
s.cat.rename_categories([1,2,3])
288+
s
289+
# You can also pass a dict-like object to map the renaming
290+
s.cat.rename_categories({1: 'x', 2: 'y', 3: 'z'})
291+
s
207292
208293
.. note::
209294

@@ -295,7 +380,9 @@ meaning and certain operations are possible. If the categorical is unordered, ``
295380
296381
s = pd.Series(pd.Categorical(["a","b","c","a"], ordered=False))
297382
s.sort_values(inplace=True)
298-
s = pd.Series(["a","b","c","a"]).astype('category', ordered=True)
383+
s = pd.Series(["a","b","c","a"]).astype(
384+
CategoricalDtype(ordered=True)
385+
)
299386
s.sort_values(inplace=True)
300387
s
301388
s.min(), s.max()
@@ -395,9 +482,15 @@ categories or a categorical with any list-like object, will raise a TypeError.
395482

396483
.. ipython:: python
397484
398-
cat = pd.Series([1,2,3]).astype("category", categories=[3,2,1], ordered=True)
399-
cat_base = pd.Series([2,2,2]).astype("category", categories=[3,2,1], ordered=True)
400-
cat_base2 = pd.Series([2,2,2]).astype("category", ordered=True)
485+
cat = pd.Series([1,2,3]).astype(
486+
CategoricalDtype([3, 2, 1], ordered=True)
487+
)
488+
cat_base = pd.Series([2,2,2]).astype(
489+
CategoricalDtype([3, 2, 1], ordered=True)
490+
)
491+
cat_base2 = pd.Series([2,2,2]).astype(
492+
CategoricalDtype(ordered=True)
493+
)
401494
402495
cat
403496
cat_base

doc/source/computation.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
np.set_printoptions(precision=4, suppress=True)
99
import pandas as pd
1010
import matplotlib
11-
matplotlib.style.use('ggplot')
11+
# matplotlib.style.use('default')
1212
import matplotlib.pyplot as plt
1313
plt.close('all')
1414
pd.options.display.max_rows=15

doc/source/cookbook.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
pd.options.display.max_rows=15
2121
2222
import matplotlib
23-
matplotlib.style.use('ggplot')
23+
# matplotlib.style.use('default')
2424
2525
np.set_printoptions(precision=4, suppress=True)
2626

doc/source/dsintro.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
pd.options.display.max_rows = 15
1111
1212
import matplotlib
13-
matplotlib.style.use('ggplot')
13+
# matplotlib.style.use('default')
1414
import matplotlib.pyplot as plt
1515
plt.close('all')
1616

doc/source/gotchas.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ Frequently Asked Questions (FAQ)
1414
import pandas as pd
1515
pd.options.display.max_rows = 15
1616
import matplotlib
17-
matplotlib.style.use('ggplot')
17+
# matplotlib.style.use('default')
1818
import matplotlib.pyplot as plt
1919
plt.close('all')
2020

0 commit comments

Comments
 (0)