Skip to content

Commit 38bf4e0

Browse files
committed
merge master
2 parents 0fd42eb + d7eb306 commit 38bf4e0

File tree

138 files changed

+2232
-1852
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

138 files changed

+2232
-1852
lines changed

.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,9 @@ coverage_html_report
6666
# hypothesis test database
6767
.hypothesis/
6868
__pycache__
69+
# pytest-monkeytype
70+
monkeytype.sqlite3
71+
6972

7073
# OS generated files #
7174
######################

.travis.yml

-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
sudo: false
21
language: python
32
python: 3.5
43

Makefile

+2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
.PHONY : develop build clean clean_pyc doc lint-diff black
22

3+
all: develop
4+
35
clean:
46
-python setup.py clean
57

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
import pandas as pd
2+
3+
4+
class IndexCache:
5+
number = 1
6+
repeat = (3, 100, 20)
7+
8+
params = [
9+
[
10+
"DatetimeIndex",
11+
"Float64Index",
12+
"IntervalIndex",
13+
"Int64Index",
14+
"MultiIndex",
15+
"PeriodIndex",
16+
"RangeIndex",
17+
"TimedeltaIndex",
18+
"UInt64Index",
19+
]
20+
]
21+
param_names = ["index_type"]
22+
23+
def setup(self, index_type):
24+
N = 10 ** 5
25+
if index_type == "MultiIndex":
26+
self.idx = pd.MultiIndex.from_product(
27+
[pd.date_range("1/1/2000", freq="T", periods=N // 2), ["a", "b"]]
28+
)
29+
elif index_type == "DatetimeIndex":
30+
self.idx = pd.date_range("1/1/2000", freq="T", periods=N)
31+
elif index_type == "Int64Index":
32+
self.idx = pd.Index(range(N))
33+
elif index_type == "PeriodIndex":
34+
self.idx = pd.period_range("1/1/2000", freq="T", periods=N)
35+
elif index_type == "RangeIndex":
36+
self.idx = pd.RangeIndex(start=0, stop=N)
37+
elif index_type == "IntervalIndex":
38+
self.idx = pd.IntervalIndex.from_arrays(range(N), range(1, N + 1))
39+
elif index_type == "TimedeltaIndex":
40+
self.idx = pd.TimedeltaIndex(range(N))
41+
elif index_type == "Float64Index":
42+
self.idx = pd.Float64Index(range(N))
43+
elif index_type == "UInt64Index":
44+
self.idx = pd.UInt64Index(range(N))
45+
else:
46+
raise ValueError
47+
assert len(self.idx) == N
48+
self.idx._cache = {}
49+
50+
def time_values(self, index_type):
51+
self.idx._values
52+
53+
def time_shape(self, index_type):
54+
self.idx.shape
55+
56+
def time_is_monotonic(self, index_type):
57+
self.idx.is_monotonic
58+
59+
def time_is_monotonic_decreasing(self, index_type):
60+
self.idx.is_monotonic_decreasing
61+
62+
def time_is_monotonic_increasing(self, index_type):
63+
self.idx.is_monotonic_increasing
64+
65+
def time_is_unique(self, index_type):
66+
self.idx.is_unique
67+
68+
def time_engine(self, index_type):
69+
self.idx._engine
70+
71+
def time_inferred_type(self, index_type):
72+
self.idx.inferred_type
73+
74+
def time_is_all_dates(self, index_type):
75+
self.idx.is_all_dates

asv_bench/benchmarks/io/parsers.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
pass
1111

1212

13-
class DoesStringLookLikeDatetime(object):
13+
class DoesStringLookLikeDatetime:
1414

1515
params = (["2Q2005", "0.0", "10000"],)
1616
param_names = ["value"]
@@ -23,7 +23,7 @@ def time_check_datetimes(self, value):
2323
_does_string_look_like_datetime(obj)
2424

2525

26-
class ConcatDateCols(object):
26+
class ConcatDateCols:
2727

2828
params = ([1234567890, "AAAA"], [1, 2])
2929
param_names = ["value", "dim"]

asv_bench/benchmarks/reshape.py

+13
Original file line numberDiff line numberDiff line change
@@ -240,4 +240,17 @@ def time_qcut_datetime(self, bins):
240240
pd.qcut(self.datetime_series, bins)
241241

242242

243+
class Explode:
244+
param_names = ["n_rows", "max_list_length"]
245+
params = [[100, 1000, 10000], [3, 5, 10]]
246+
247+
def setup(self, n_rows, max_list_length):
248+
249+
data = [np.arange(np.random.randint(max_list_length)) for _ in range(n_rows)]
250+
self.series = pd.Series(data)
251+
252+
def time_explode(self, n_rows, max_list_length):
253+
self.series.explode()
254+
255+
243256
from .pandas_vb_common import setup # noqa: F401

asv_bench/benchmarks/series_methods.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ def time_series_datetimeindex_repr(self):
219219
getattr(self.s, "a", None)
220220

221221

222-
class All(object):
222+
class All:
223223

224224
params = [[10 ** 3, 10 ** 6], ["fast", "slow"]]
225225
param_names = ["N", "case"]
@@ -232,7 +232,7 @@ def time_all(self, N, case):
232232
self.s.all()
233233

234234

235-
class Any(object):
235+
class Any:
236236

237237
params = [[10 ** 3, 10 ** 6], ["fast", "slow"]]
238238
param_names = ["N", "case"]
@@ -245,7 +245,7 @@ def time_any(self, N, case):
245245
self.s.any()
246246

247247

248-
class NanOps(object):
248+
class NanOps:
249249

250250
params = [
251251
[

asv_bench/benchmarks/timeseries.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,7 @@ def time_format_YYYYMMDD(self):
293293
to_datetime(self.stringsD, format="%Y%m%d")
294294

295295

296-
class ToDatetimeCacheSmallCount(object):
296+
class ToDatetimeCacheSmallCount:
297297

298298
params = ([True, False], [50, 500, 5000, 100000])
299299
param_names = ["cache", "count"]

ci/code_checks.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,7 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
156156
RET=$(($RET + $?)) ; echo $MSG "DONE"
157157

158158
MSG='Check for python2 new-style classes and for empty parentheses' ; echo $MSG
159-
invgrep -R --include="*.py" --include="*.pyx" -E "class\s\S*\((object)?\):" pandas scripts
159+
invgrep -R --include="*.py" --include="*.pyx" -E "class\s\S*\((object)?\):" pandas asv_bench/benchmarks scripts
160160
RET=$(($RET + $?)) ; echo $MSG "DONE"
161161

162162
MSG='Check for backticks incorrectly rendering because of missing spaces' ; echo $MSG

ci/deps/travis-36-cov.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ dependencies:
3939
- xlsxwriter
4040
- xlwt
4141
# universal
42-
- pytest>=4.0.2
42+
- pytest
4343
- pytest-xdist
4444
- pytest-cov
4545
- pytest-mock

doc/source/development/contributing.rst

+4-4
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,7 @@ complex changes to the documentation as well.
288288
Some other important things to know about the docs:
289289

290290
* The *pandas* documentation consists of two parts: the docstrings in the code
291-
itself and the docs in this folder ``pandas/doc/``.
291+
itself and the docs in this folder ``doc/``.
292292

293293
The docstrings provide a clear explanation of the usage of the individual
294294
functions, while the documentation in this folder consists of tutorial-like
@@ -404,11 +404,11 @@ Building the documentation
404404
~~~~~~~~~~~~~~~~~~~~~~~~~~
405405

406406
So how do you build the docs? Navigate to your local
407-
``pandas/doc/`` directory in the console and run::
407+
``doc/`` directory in the console and run::
408408

409409
python make.py html
410410

411-
Then you can find the HTML output in the folder ``pandas/doc/build/html/``.
411+
Then you can find the HTML output in the folder ``doc/build/html/``.
412412

413413
The first time you build the docs, it will take quite a while because it has to run
414414
all the code examples and build all the generated docstring pages. In subsequent
@@ -448,7 +448,7 @@ You can also specify to use multiple cores to speed up the documentation build::
448448
Open the following file in a web browser to see the full documentation you
449449
just built::
450450

451-
pandas/docs/build/html/index.html
451+
doc/build/html/index.html
452452

453453
And you'll have the satisfaction of seeing your new and improved documentation!
454454

doc/source/getting_started/basics.rst

-4
Original file line numberDiff line numberDiff line change
@@ -1422,8 +1422,6 @@ The :meth:`~DataFrame.rename` method also provides an ``inplace`` named
14221422
parameter that is by default ``False`` and copies the underlying data. Pass
14231423
``inplace=True`` to rename the data in place.
14241424

1425-
.. versionadded:: 0.18.0
1426-
14271425
Finally, :meth:`~Series.rename` also accepts a scalar or list-like
14281426
for altering the ``Series.name`` attribute.
14291427

@@ -2063,8 +2061,6 @@ Convert a subset of columns to a specified type using :meth:`~DataFrame.astype`.
20632061
dft
20642062
dft.dtypes
20652063
2066-
.. versionadded:: 0.19.0
2067-
20682064
Convert certain columns to a specific dtype by passing a dict to :meth:`~DataFrame.astype`.
20692065

20702066
.. ipython:: python

doc/source/getting_started/dsintro.rst

-2
Original file line numberDiff line numberDiff line change
@@ -251,8 +251,6 @@ Series can also have a ``name`` attribute:
251251
The Series ``name`` will be assigned automatically in many cases, in particular
252252
when taking 1D slices of DataFrame as you will see below.
253253

254-
.. versionadded:: 0.18.0
255-
256254
You can rename a Series with the :meth:`pandas.Series.rename` method.
257255

258256
.. ipython:: python

doc/source/reference/frame.rst

+1
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,7 @@ Reshaping, sorting, transposing
239239
DataFrame.unstack
240240
DataFrame.swapaxes
241241
DataFrame.melt
242+
DataFrame.explode
242243
DataFrame.squeeze
243244
DataFrame.to_xarray
244245
DataFrame.T

doc/source/reference/series.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,7 @@ Reshaping, sorting
245245
Series.sort_index
246246
Series.swaplevel
247247
Series.unstack
248+
Series.explode
248249
Series.searchsorted
249250
Series.ravel
250251
Series.repeat
@@ -590,4 +591,3 @@ Sparse
590591

591592
SparseSeries.to_coo
592593
SparseSeries.from_coo
593-

doc/source/user_guide/advanced.rst

+3-18
Original file line numberDiff line numberDiff line change
@@ -810,15 +810,10 @@ values **not** in the categories, similarly to how you can reindex **any** panda
810810
Int64Index and RangeIndex
811811
~~~~~~~~~~~~~~~~~~~~~~~~~
812812

813-
.. warning::
814-
815-
Indexing on an integer-based Index with floats has been clarified in 0.18.0, for a summary of the changes, see :ref:`here <whatsnew_0180.float_indexers>`.
816-
817-
:class:`Int64Index` is a fundamental basic index in pandas.
818-
This is an immutable array implementing an ordered, sliceable set.
819-
Prior to 0.18.0, the ``Int64Index`` would provide the default index for all ``NDFrame`` objects.
813+
:class:`Int64Index` is a fundamental basic index in pandas. This is an immutable array
814+
implementing an ordered, sliceable set.
820815

821-
:class:`RangeIndex` is a sub-class of ``Int64Index`` added in version 0.18.0, now providing the default index for all ``NDFrame`` objects.
816+
:class:`RangeIndex` is a sub-class of ``Int64Index`` that provides the default index for all ``NDFrame`` objects.
822817
``RangeIndex`` is an optimized version of ``Int64Index`` that can represent a monotonic ordered set. These are analogous to Python `range types <https://docs.python.org/3/library/stdtypes.html#typesseq-range>`__.
823818

824819
.. _indexing.float64index:
@@ -880,16 +875,6 @@ In non-float indexes, slicing using floats will raise a ``TypeError``.
880875
In [1]: pd.Series(range(5))[3.5:4.5]
881876
TypeError: the slice start [3.5] is not a proper indexer for this index type (Int64Index)
882877
883-
.. warning::
884-
885-
Using a scalar float indexer for ``.iloc`` has been removed in 0.18.0, so the following will raise a ``TypeError``:
886-
887-
.. code-block:: ipython
888-
889-
In [3]: pd.Series(range(5)).iloc[3.0]
890-
TypeError: cannot do positional indexing on <class 'pandas.indexes.range.RangeIndex'> with these indexers [3.0] of <type 'float'>
891-
892-
893878
Here is a typical use-case for using this type of indexing. Imagine that you have a somewhat
894879
irregular timedelta-like indexing scheme, but the data is recorded as floats. This could, for
895880
example, be millisecond offsets.

doc/source/user_guide/categorical.rst

-2
Original file line numberDiff line numberDiff line change
@@ -834,8 +834,6 @@ See also the section on :ref:`merge dtypes<merging.dtypes>` for notes about pres
834834
Unioning
835835
~~~~~~~~
836836

837-
.. versionadded:: 0.19.0
838-
839837
If you want to combine categoricals that do not necessarily have the same
840838
categories, the :func:`~pandas.api.types.union_categoricals` function will
841839
combine a list-like of categoricals. The new categories will be the union of

doc/source/user_guide/computation.rst

+4-7
Original file line numberDiff line numberDiff line change
@@ -408,9 +408,7 @@ For some windowing functions, additional parameters must be specified:
408408
Time-aware rolling
409409
~~~~~~~~~~~~~~~~~~
410410

411-
.. versionadded:: 0.19.0
412-
413-
New in version 0.19.0 are the ability to pass an offset (or convertible) to a ``.rolling()`` method and have it produce
411+
It is possible to pass an offset (or convertible) to a ``.rolling()`` method and have it produce
414412
variable sized windows based on the passed time window. For each time point, this includes all preceding values occurring
415413
within the indicated time delta.
416414

@@ -893,10 +891,9 @@ Therefore, there is an assumption that :math:`x_0` is not an ordinary value
893891
but rather an exponentially weighted moment of the infinite series up to that
894892
point.
895893

896-
One must have :math:`0 < \alpha \leq 1`, and while since version 0.18.0
897-
it has been possible to pass :math:`\alpha` directly, it's often easier
898-
to think about either the **span**, **center of mass (com)** or **half-life**
899-
of an EW moment:
894+
One must have :math:`0 < \alpha \leq 1`, and while it is possible to pass
895+
:math:`\alpha` directly, it's often easier to think about either the
896+
**span**, **center of mass (com)** or **half-life** of an EW moment:
900897

901898
.. math::
902899

doc/source/user_guide/enhancingperf.rst

+1-7
Original file line numberDiff line numberDiff line change
@@ -601,8 +601,6 @@ This allows for *formulaic evaluation*. The assignment target can be a
601601
new column name or an existing column name, and it must be a valid Python
602602
identifier.
603603

604-
.. versionadded:: 0.18.0
605-
606604
The ``inplace`` keyword determines whether this assignment will performed
607605
on the original ``DataFrame`` or return a copy with the new column.
608606

@@ -630,8 +628,6 @@ new or modified columns is returned and the original frame is unchanged.
630628
df.eval('e = a - c', inplace=False)
631629
df
632630
633-
.. versionadded:: 0.18.0
634-
635631
As a convenience, multiple assignments can be performed by using a
636632
multi-line string.
637633

@@ -652,9 +648,7 @@ The equivalent in standard Python would be
652648
df['a'] = 1
653649
df
654650
655-
.. versionadded:: 0.18.0
656-
657-
The ``query`` method gained the ``inplace`` keyword which determines
651+
The ``query`` method has a ``inplace`` keyword which determines
658652
whether the query modifies the original frame.
659653

660654
.. ipython:: python

doc/source/user_guide/groupby.rst

+3-6
Original file line numberDiff line numberDiff line change
@@ -827,13 +827,10 @@ and that the transformed data contains no NAs.
827827
828828
.. _groupby.transform.window_resample:
829829

830-
New syntax to window and resample operations
831-
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
832-
.. versionadded:: 0.18.1
830+
Window and resample operations
831+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
833832

834-
Working with the resample, expanding or rolling operations on the groupby
835-
level used to require the application of helper functions. However,
836-
now it is possible to use ``resample()``, ``expanding()`` and
833+
It is possible to use ``resample()``, ``expanding()`` and
837834
``rolling()`` as methods on groupbys.
838835

839836
The example below will apply the ``rolling()`` method on the samples of

0 commit comments

Comments
 (0)