Skip to content

Commit a6b3750

Browse files
committed
Merge remote-tracking branch 'upstream/master' into multiindex_at_fix
2 parents 186d6f2 + a6890c6 commit a6b3750

File tree

319 files changed

+8900
-5660
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

319 files changed

+8900
-5660
lines changed

asv_bench/benchmarks/arithmetic.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def time_series_op_with_fill_value_no_nas(self):
6767
self.ser.add(self.ser, fill_value=4)
6868

6969

70-
class MixedFrameWithSeriesAxis0:
70+
class MixedFrameWithSeriesAxis:
7171
params = [
7272
[
7373
"eq",
@@ -78,7 +78,7 @@ class MixedFrameWithSeriesAxis0:
7878
"gt",
7979
"add",
8080
"sub",
81-
"div",
81+
"truediv",
8282
"floordiv",
8383
"mul",
8484
"pow",
@@ -87,15 +87,19 @@ class MixedFrameWithSeriesAxis0:
8787
param_names = ["opname"]
8888

8989
def setup(self, opname):
90-
arr = np.arange(10 ** 6).reshape(100, -1)
90+
arr = np.arange(10 ** 6).reshape(1000, -1)
9191
df = DataFrame(arr)
9292
df["C"] = 1.0
9393
self.df = df
9494
self.ser = df[0]
95+
self.row = df.iloc[0]
9596

9697
def time_frame_op_with_series_axis0(self, opname):
9798
getattr(self.df, opname)(self.ser, axis=0)
9899

100+
def time_frame_op_with_series_axis1(self, opname):
101+
getattr(operator, opname)(self.df, self.ser)
102+
99103

100104
class Ops:
101105

asv_bench/benchmarks/groupby.py

+58
Original file line numberDiff line numberDiff line change
@@ -660,4 +660,62 @@ def function(values):
660660
self.grouper.transform(function, engine="cython")
661661

662662

663+
class AggEngine:
664+
def setup(self):
665+
N = 10 ** 3
666+
data = DataFrame(
667+
{0: [str(i) for i in range(100)] * N, 1: list(range(100)) * N},
668+
columns=[0, 1],
669+
)
670+
self.grouper = data.groupby(0)
671+
672+
def time_series_numba(self):
673+
def function(values, index):
674+
total = 0
675+
for i, value in enumerate(values):
676+
if i % 2:
677+
total += value + 5
678+
else:
679+
total += value * 2
680+
return total
681+
682+
self.grouper[1].agg(function, engine="numba")
683+
684+
def time_series_cython(self):
685+
def function(values):
686+
total = 0
687+
for i, value in enumerate(values):
688+
if i % 2:
689+
total += value + 5
690+
else:
691+
total += value * 2
692+
return total
693+
694+
self.grouper[1].agg(function, engine="cython")
695+
696+
def time_dataframe_numba(self):
697+
def function(values, index):
698+
total = 0
699+
for i, value in enumerate(values):
700+
if i % 2:
701+
total += value + 5
702+
else:
703+
total += value * 2
704+
return total
705+
706+
self.grouper.agg(function, engine="numba")
707+
708+
def time_dataframe_cython(self):
709+
def function(values):
710+
total = 0
711+
for i, value in enumerate(values):
712+
if i % 2:
713+
total += value + 5
714+
else:
715+
total += value * 2
716+
return total
717+
718+
self.grouper.agg(function, engine="cython")
719+
720+
663721
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/io/parsers.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
try:
44
from pandas._libs.tslibs.parsing import (
5-
_concat_date_cols,
5+
concat_date_cols,
66
_does_string_look_like_datetime,
77
)
88
except ImportError:
@@ -39,4 +39,4 @@ def setup(self, value, dim):
3939
)
4040

4141
def time_check_concat(self, value, dim):
42-
_concat_date_cols(self.object)
42+
concat_date_cols(self.object)

asv_bench/benchmarks/rolling.py

+12-13
Original file line numberDiff line numberDiff line change
@@ -150,19 +150,18 @@ def time_quantile(self, constructor, window, dtype, percentile, interpolation):
150150
self.roll.quantile(percentile, interpolation=interpolation)
151151

152152

153-
class PeakMemFixed:
154-
def setup(self):
155-
N = 10
156-
arr = 100 * np.random.random(N)
157-
self.roll = pd.Series(arr).rolling(10)
158-
159-
def peakmem_fixed(self):
160-
# GH 25926
161-
# This is to detect memory leaks in rolling operations.
162-
# To save time this is only ran on one method.
163-
# 6000 iterations is enough for most types of leaks to be detected
164-
for x in range(6000):
165-
self.roll.max()
153+
class PeakMemFixedWindowMinMax:
154+
155+
params = ["min", "max"]
156+
157+
def setup(self, operation):
158+
N = int(1e6)
159+
arr = np.random.random(N)
160+
self.roll = pd.Series(arr).rolling(2)
161+
162+
def peakmem_fixed(self, operation):
163+
for x in range(5):
164+
getattr(self.roll, operation)()
166165

167166

168167
class ForwardWindowMethods:

asv_bench/benchmarks/stat_ops.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ class FrameOps:
1111
param_names = ["op", "dtype", "axis"]
1212

1313
def setup(self, op, dtype, axis):
14-
if op == "mad" and dtype == "Int64" and axis == 1:
15-
# GH-33036
14+
if op == "mad" and dtype == "Int64":
15+
# GH-33036, GH#33600
1616
raise NotImplementedError
1717
values = np.random.randn(100000, 4)
1818
if dtype == "Int64":

ci/deps/azure-36-minimum_versions.yaml

+2-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
name: pandas-dev
22
channels:
3-
- defaults
43
- conda-forge
54
dependencies:
65
- python=3.6.1
@@ -19,12 +18,12 @@ dependencies:
1918
- jinja2=2.8
2019
- numba=0.46.0
2120
- numexpr=2.6.2
22-
- numpy=1.13.3
21+
- numpy=1.15.4
2322
- openpyxl=2.5.7
2423
- pytables=3.4.3
2524
- python-dateutil=2.7.3
2625
- pytz=2017.2
27-
- scipy=0.19.0
26+
- scipy=1.2
2827
- xlrd=1.1.0
2928
- xlsxwriter=0.9.8
3029
- xlwt=1.2.0

ci/deps/azure-37-numpydev.yaml

+1-2
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,7 @@ dependencies:
1414
- pytz
1515
- pip
1616
- pip:
17-
- cython==0.29.16
18-
# GH#33507 cython 3.0a1 is causing TypeErrors 2020-04-13
17+
- cython>=0.29.16
1918
- "git+git://github.com/dateutil/dateutil.git"
2019
- "-f https://7933911d6844c6c53a7d-47bd50c35cd79bd838daf386af554a83.ssl.cf2.rackcdn.com"
2120
- "--pre"

ci/deps/azure-macos-36.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ dependencies:
1919
- matplotlib=2.2.3
2020
- nomkl
2121
- numexpr
22-
- numpy=1.14
22+
- numpy=1.15.4
2323
- openpyxl
2424
- pyarrow>=0.13.0
2525
- pytables

ci/setup_env.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ conda list pandas
128128
echo "[Build extensions]"
129129
python setup.py build_ext -q -i -j2
130130

131-
# XXX: Some of our environments end up with old versions of pip (10.x)
131+
# TODO: Some of our environments end up with old versions of pip (10.x)
132132
# Adding a new enough version of pip to the requirements explodes the
133133
# solve time. Just using pip to update itself.
134134
# - py35_macos

conda.recipe/meta.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,12 @@ requirements:
2020
- cython
2121
- numpy
2222
- setuptools >=3.3
23-
- python-dateutil >=2.5.0
23+
- python-dateutil >=2.7.3
2424
- pytz
2525
run:
2626
- python {{ python }}
2727
- {{ pin_compatible('numpy') }}
28-
- python-dateutil >=2.5.0
28+
- python-dateutil >=2.7.3
2929
- pytz
3030

3131
test:

doc/source/development/contributing.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -581,7 +581,7 @@ do not make sudden changes to the code that could have the potential to break
581581
a lot of user code as a result, that is, we need it to be as *backwards compatible*
582582
as possible to avoid mass breakages.
583583

584-
Additional standards are outlined on the `pandas code style guide <code_style>`_
584+
Additional standards are outlined on the :ref:`pandas code style guide <code_style>`
585585

586586
Optional dependencies
587587
---------------------

doc/source/getting_started/install.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ Dependencies
220220
Package Minimum supported version
221221
================================================================ ==========================
222222
`setuptools <https://setuptools.readthedocs.io/en/latest/>`__ 24.2.0
223-
`NumPy <https://www.numpy.org>`__ 1.13.3
223+
`NumPy <https://www.numpy.org>`__ 1.15.4
224224
`python-dateutil <https://dateutil.readthedocs.io/en/stable/>`__ 2.7.3
225225
`pytz <https://pypi.org/project/pytz/>`__ 2017.2
226226
================================================================ ==========================

doc/source/reference/general_utility_functions.rst

+3
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,12 @@ Exceptions and warnings
3535
.. autosummary::
3636
:toctree: api/
3737

38+
errors.AccessorRegistrationWarning
3839
errors.DtypeWarning
3940
errors.EmptyDataError
4041
errors.OutOfBoundsDatetime
42+
errors.MergeError
43+
errors.NumbaUtilError
4144
errors.ParserError
4245
errors.ParserWarning
4346
errors.PerformanceWarning

doc/source/reference/groupby.rst

+4-2
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,10 @@ Function application
3636

3737
GroupBy.apply
3838
GroupBy.agg
39-
GroupBy.aggregate
40-
GroupBy.transform
39+
SeriesGroupBy.aggregate
40+
DataFrameGroupBy.aggregate
41+
SeriesGroupBy.transform
42+
DataFrameGroupBy.transform
4143
GroupBy.pipe
4244

4345
Computations / descriptive stats

doc/source/user_guide/basics.rst

+58
Original file line numberDiff line numberDiff line change
@@ -1781,6 +1781,31 @@ used to sort a pandas object by its index levels.
17811781
# Series
17821782
unsorted_df['three'].sort_index()
17831783
1784+
.. _basics.sort_index_key:
1785+
1786+
.. versionadded:: 1.1.0
1787+
1788+
Sorting by index also supports a ``key`` parameter that takes a callable
1789+
function to apply to the index being sorted. For `MultiIndex` objects,
1790+
the key is applied per-level to the levels specified by `level`.
1791+
1792+
.. ipython:: python
1793+
1794+
s1 = pd.DataFrame({
1795+
"a": ['B', 'a', 'C'],
1796+
"b": [1, 2, 3],
1797+
"c": [2, 3, 4]
1798+
}).set_index(list("ab"))
1799+
s1
1800+
1801+
.. ipython:: python
1802+
1803+
s1.sort_index(level="a")
1804+
s1.sort_index(level="a", key=lambda idx: idx.str.lower())
1805+
1806+
For information on key sorting by value, see :ref:`value sorting
1807+
<basics.sort_value_key>`.
1808+
17841809
.. _basics.sort_values:
17851810

17861811
By values
@@ -1813,6 +1838,39 @@ argument:
18131838
s.sort_values()
18141839
s.sort_values(na_position='first')
18151840
1841+
.. _basics.sort_value_key:
1842+
1843+
.. versionadded:: 1.1.0
1844+
1845+
Sorting also supports a ``key`` parameter that takes a callable function
1846+
to apply to the values being sorted.
1847+
1848+
.. ipython:: python
1849+
1850+
s1 = pd.Series(['B', 'a', 'C'])
1851+
1852+
.. ipython:: python
1853+
1854+
s1.sort_values()
1855+
s1.sort_values(key=lambda x: x.str.lower())
1856+
1857+
`key` will be given the :class:`Series` of values and should return a ``Series``
1858+
or array of the same shape with the transformed values. For `DataFrame` objects,
1859+
the key is applied per column, so the key should still expect a Series and return
1860+
a Series, e.g.
1861+
1862+
.. ipython:: python
1863+
1864+
df = pd.DataFrame({"a": ['B', 'a', 'C'], "b": [1, 2, 3]})
1865+
1866+
.. ipython:: python
1867+
1868+
df.sort_values(by='a')
1869+
df.sort_values(by='a', key=lambda col: col.str.lower())
1870+
1871+
The name or type of each column can be used to apply different functions to
1872+
different columns.
1873+
18161874
.. _basics.sort_indexes_and_values:
18171875

18181876
By indexes and values

doc/source/user_guide/computation.rst

+12-8
Original file line numberDiff line numberDiff line change
@@ -318,8 +318,8 @@ We provide a number of common statistical functions:
318318
:meth:`~Rolling.kurt`, Sample kurtosis (4th moment)
319319
:meth:`~Rolling.quantile`, Sample quantile (value at %)
320320
:meth:`~Rolling.apply`, Generic apply
321-
:meth:`~Rolling.cov`, Unbiased covariance (binary)
322-
:meth:`~Rolling.corr`, Correlation (binary)
321+
:meth:`~Rolling.cov`, Sample covariance (binary)
322+
:meth:`~Rolling.corr`, Sample correlation (binary)
323323

324324
.. _computation.window_variance.caveats:
325325

@@ -341,6 +341,8 @@ We provide a number of common statistical functions:
341341
sample variance under the circumstances would result in a biased estimator
342342
of the variable we are trying to determine.
343343

344+
The same caveats apply to using any supported statistical sample methods.
345+
344346
.. _stats.rolling_apply:
345347

346348
Rolling apply
@@ -380,8 +382,8 @@ and their default values are set to ``False``, ``True`` and ``False`` respective
380382
.. note::
381383

382384
In terms of performance, **the first time a function is run using the Numba engine will be slow**
383-
as Numba will have some function compilation overhead. However, ``rolling`` objects will cache
384-
the function and subsequent calls will be fast. In general, the Numba engine is performant with
385+
as Numba will have some function compilation overhead. However, the compiled functions are cached,
386+
and subsequent calls will be fast. In general, the Numba engine is performant with
385387
a larger amount of data points (e.g. 1+ million).
386388

387389
.. code-block:: ipython
@@ -870,12 +872,12 @@ Method summary
870872
:meth:`~Expanding.max`, Maximum
871873
:meth:`~Expanding.std`, Sample standard deviation
872874
:meth:`~Expanding.var`, Sample variance
873-
:meth:`~Expanding.skew`, Unbiased skewness (3rd moment)
874-
:meth:`~Expanding.kurt`, Unbiased kurtosis (4th moment)
875+
:meth:`~Expanding.skew`, Sample skewness (3rd moment)
876+
:meth:`~Expanding.kurt`, Sample kurtosis (4th moment)
875877
:meth:`~Expanding.quantile`, Sample quantile (value at %)
876878
:meth:`~Expanding.apply`, Generic apply
877-
:meth:`~Expanding.cov`, Unbiased covariance (binary)
878-
:meth:`~Expanding.corr`, Correlation (binary)
879+
:meth:`~Expanding.cov`, Sample covariance (binary)
880+
:meth:`~Expanding.corr`, Sample correlation (binary)
879881

880882
.. note::
881883

@@ -884,6 +886,8 @@ Method summary
884886
windows. See :ref:`this section <computation.window_variance.caveats>` for more
885887
information.
886888

889+
The same caveats apply to using any supported statistical sample methods.
890+
887891
.. currentmodule:: pandas
888892

889893
Aside from not having a ``window`` parameter, these functions have the same

0 commit comments

Comments
 (0)