Skip to content

Commit 406dd00

Browse files
authored
Merge branch 'master' into bug-fix-23020
2 parents a469624 + a784aee commit 406dd00

File tree

170 files changed

+4848
-3511
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

170 files changed

+4848
-3511
lines changed

.pep8speaks.yml

-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ pycodestyle:
1515
- E402, # module level import not at top of file
1616
- E722, # do not use bare except
1717
- E731, # do not assign a lambda expression, use a def
18-
- E741, # ambiguous variable name 'l'
1918
- C406, # Unnecessary list literal - rewrite as a dict literal.
2019
- C408, # Unnecessary dict call - rewrite as a literal.
2120
- C409 # Unnecessary list passed to tuple() - rewrite as a tuple literal.

asv_bench/benchmarks/indexing.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,17 @@
22

33
import numpy as np
44
import pandas.util.testing as tm
5-
from pandas import (Series, DataFrame, Panel, MultiIndex, Int64Index,
6-
Float64Index, IntervalIndex, CategoricalIndex,
5+
from pandas import (Series, DataFrame, Panel, MultiIndex,
6+
Int64Index, UInt64Index, Float64Index,
7+
IntervalIndex, CategoricalIndex,
78
IndexSlice, concat, date_range)
89

910

1011
class NumericSeriesIndexing(object):
1112

1213
goal_time = 0.2
1314
params = [
14-
(Int64Index, Float64Index),
15+
(Int64Index, UInt64Index, Float64Index),
1516
('unique_monotonic_inc', 'nonunique_monotonic_inc'),
1617
]
1718
param_names = ['index_dtype', 'index_structure']
+54
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
import numpy as np
2+
3+
from pandas._libs.index import (Int64Engine, UInt64Engine, Float64Engine,
4+
ObjectEngine)
5+
6+
7+
class NumericEngineIndexing(object):
8+
9+
goal_time = 0.2
10+
params = [[Int64Engine, UInt64Engine, Float64Engine],
11+
[np.int64, np.uint64, np.float64],
12+
['monotonic_incr', 'monotonic_decr', 'non_monotonic'],
13+
]
14+
param_names = ['engine', 'dtype', 'index_type']
15+
16+
def setup(self, engine, dtype, index_type):
17+
N = 10**5
18+
values = list([1] * N + [2] * N + [3] * N)
19+
arr = {
20+
'monotonic_incr': np.array(values, dtype=dtype),
21+
'monotonic_decr': np.array(list(reversed(values)),
22+
dtype=dtype),
23+
'non_monotonic': np.array([1, 2, 3] * N, dtype=dtype),
24+
}[index_type]
25+
26+
self.data = engine(lambda: arr, len(arr))
27+
# code belows avoids populating the mapping etc. while timing.
28+
self.data.get_loc(2)
29+
30+
def time_get_loc(self, engine, dtype, index_type):
31+
self.data.get_loc(2)
32+
33+
34+
class ObjectEngineIndexing(object):
35+
36+
goal_time = 0.2
37+
params = [('monotonic_incr', 'monotonic_decr', 'non_monotonic')]
38+
param_names = ['index_type']
39+
40+
def setup(self, index_type):
41+
N = 10**5
42+
values = list('a' * N + 'b' * N + 'c' * N)
43+
arr = {
44+
'monotonic_incr': np.array(values, dtype=object),
45+
'monotonic_decr': np.array(list(reversed(values)), dtype=object),
46+
'non_monotonic': np.array(list('abc') * N, dtype=object),
47+
}[index_type]
48+
49+
self.data = ObjectEngine(lambda: arr, len(arr))
50+
# code belows avoids populating the mapping etc. while timing.
51+
self.data.get_loc('b')
52+
53+
def time_get_loc(self, index_type):
54+
self.data.get_loc('b')

ci/azure-macos-35.yaml

+2-2
Original file line numberDiff line numberDiff line change
@@ -8,10 +8,10 @@ dependencies:
88
- html5lib
99
- jinja2
1010
- lxml
11-
- matplotlib
11+
- matplotlib=2.2.0
1212
- nomkl
1313
- numexpr
14-
- numpy=1.10.4
14+
- numpy=1.12.0
1515
- openpyxl=2.5.5
1616
- pytables
1717
- python=3.5*

ci/azure-windows-27.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ dependencies:
1010
- html5lib
1111
- jinja2=2.8
1212
- lxml
13-
- matplotlib
13+
- matplotlib=2.0.1
1414
- numexpr
1515
- numpy=1.12*
1616
- openpyxl=2.5.5

ci/azure-windows-36.yaml

+2
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,14 @@ channels:
55
dependencies:
66
- blosc
77
- bottleneck
8+
- boost-cpp<1.67
89
- fastparquet
910
- feather-format
1011
- matplotlib
1112
- numexpr
1213
- numpy=1.14*
1314
- openpyxl=2.5.5
15+
- parquet-cpp
1416
- pyarrow
1517
- pytables
1618
- python-dateutil

ci/azure/macos.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ jobs:
99
strategy:
1010
maxParallel: 11
1111
matrix:
12-
py35_np_110:
12+
py35_np_120:
1313
ENV_FILE: ci/azure-macos-35.yaml
1414
CONDA_PY: "35"
1515
CONDA_ENV: pandas

ci/azure/windows-py27.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ jobs:
99
strategy:
1010
maxParallel: 11
1111
matrix:
12-
py36_np14:
12+
py36_np121:
1313
ENV_FILE: ci/azure-windows-27.yaml
1414
CONDA_PY: "27"
1515
CONDA_ENV: pandas

ci/circle-27-compat.yaml

+5-5
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,18 @@ channels:
33
- defaults
44
- conda-forge
55
dependencies:
6-
- bottleneck=1.0.0
6+
- bottleneck=1.2.0
77
- cython=0.28.2
88
- jinja2=2.8
9-
- numexpr=2.4.4 # we test that we correctly don't use an unsupported numexpr
10-
- numpy=1.9.3
9+
- numexpr=2.6.1
10+
- numpy=1.12.0
1111
- openpyxl=2.5.5
1212
- psycopg2
13-
- pytables=3.2.2
13+
- pytables=3.4.2
1414
- python-dateutil=2.5.0
1515
- python=2.7*
1616
- pytz=2013b
17-
- scipy=0.14.0
17+
- scipy=0.18.1
1818
- sqlalchemy=0.7.8
1919
- xlrd=0.9.2
2020
- xlsxwriter=0.5.2

ci/code_checks.sh

+11-6
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
echo "inside $0"
1818
[[ $LINT ]] || { echo "NOT Linting. To lint use: LINT=true $0 $1"; exit 0; }
19-
[[ -z "$1" || "$1" == "lint" || "$1" == "patterns" || "$1" == "doctests" ]] || { echo "Unkown command $1. Usage: $0 [lint|patterns|doctests]"; exit 9999; }
19+
[[ -z "$1" || "$1" == "lint" || "$1" == "patterns" || "$1" == "doctests" ]] || { echo "Unknown command $1. Usage: $0 [lint|patterns|doctests]"; exit 9999; }
2020

2121
source activate pandas
2222
RET=0
@@ -56,6 +56,11 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
5656
cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime
5757
RET=$(($RET + $?)) ; echo $MSG "DONE"
5858

59+
# Imports - Check formatting using isort see setup.cfg for settings
60+
MSG='Check import format using isort ' ; echo $MSG
61+
isort --recursive --check-only pandas
62+
RET=$(($RET + $?)) ; echo $MSG "DONE"
63+
5964
fi
6065

6166
### PATTERNS ###
@@ -117,22 +122,22 @@ fi
117122
if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
118123

119124
MSG='Doctests frame.py' ; echo $MSG
120-
pytest --doctest-modules -v pandas/core/frame.py \
121-
-k"-axes -combine -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_dict -to_stata"
125+
pytest -q --doctest-modules pandas/core/frame.py \
126+
-k"-axes -combine -itertuples -join -nlargest -nsmallest -nunique -pivot_table -quantile -query -reindex -reindex_axis -replace -round -set_index -stack -to_stata"
122127
RET=$(($RET + $?)) ; echo $MSG "DONE"
123128

124129
MSG='Doctests series.py' ; echo $MSG
125-
pytest --doctest-modules -v pandas/core/series.py \
130+
pytest -q --doctest-modules pandas/core/series.py \
126131
-k"-nonzero -reindex -searchsorted -to_dict"
127132
RET=$(($RET + $?)) ; echo $MSG "DONE"
128133

129134
MSG='Doctests generic.py' ; echo $MSG
130-
pytest --doctest-modules -v pandas/core/generic.py \
135+
pytest -q --doctest-modules pandas/core/generic.py \
131136
-k"-_set_axis_name -_xs -describe -droplevel -groupby -interpolate -pct_change -pipe -reindex -reindex_axis -resample -to_json -transpose -values -xs"
132137
RET=$(($RET + $?)) ; echo $MSG "DONE"
133138

134139
MSG='Doctests top-level reshaping functions' ; echo $MSG
135-
pytest --doctest-modules -v \
140+
pytest -q --doctest-modules \
136141
pandas/core/reshape/concat.py \
137142
pandas/core/reshape/pivot.py \
138143
pandas/core/reshape/reshape.py \

ci/environment-dev.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ dependencies:
88
- flake8
99
- flake8-comprehensions
1010
- hypothesis>=3.58.0
11+
- isort
1112
- moto
1213
- pytest>=3.6
1314
- python-dateutil>=2.5.0

ci/requirements-optional-conda.txt

+5-5
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
beautifulsoup4>=4.2.1
22
blosc
3-
bottleneck
3+
bottleneck>=1.2.0
44
fastparquet
55
feather-format
66
gcsfs
@@ -9,17 +9,17 @@ ipython>=5.6.0
99
ipykernel
1010
jinja2
1111
lxml
12-
matplotlib
12+
matplotlib>=2.0.0
1313
nbsphinx
14-
numexpr
14+
numexpr>=2.6.1
1515
openpyxl=2.5.5
1616
pyarrow
1717
pymysql
18-
pytables
18+
pytables>=3.4.2
1919
pytest-cov
2020
pytest-xdist
2121
s3fs
22-
scipy
22+
scipy>=0.18.1
2323
seaborn
2424
sqlalchemy
2525
statsmodels

ci/requirements-optional-pip.txt

+4-4
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# Do not modify directly
33
beautifulsoup4>=4.2.1
44
blosc
5-
bottleneck
5+
bottleneck>=1.2.0
66
fastparquet
77
feather-format
88
gcsfs
@@ -11,17 +11,17 @@ ipython>=5.6.0
1111
ipykernel
1212
jinja2
1313
lxml
14-
matplotlib
14+
matplotlib>=2.0.0
1515
nbsphinx
16-
numexpr
16+
numexpr>=2.6.1
1717
openpyxl==2.5.5
1818
pyarrow
1919
pymysql
2020
tables
2121
pytest-cov
2222
pytest-xdist
2323
s3fs
24-
scipy
24+
scipy>=0.18.1
2525
seaborn
2626
sqlalchemy
2727
statsmodels

ci/requirements_dev.txt

+1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ NumPy
55
flake8
66
flake8-comprehensions
77
hypothesis>=3.58.0
8+
isort
89
moto
910
pytest>=3.6
1011
python-dateutil>=2.5.0

ci/travis-27-locale.yaml

+3-3
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,11 @@ channels:
33
- defaults
44
- conda-forge
55
dependencies:
6-
- bottleneck=1.0.0
6+
- bottleneck=1.2.0
77
- cython=0.28.2
88
- lxml
9-
- matplotlib=1.4.3
10-
- numpy=1.9.3
9+
- matplotlib=2.0.0
10+
- numpy=1.12.0
1111
- openpyxl=2.4.0
1212
- python-dateutil
1313
- python-blosc

ci/travis-27.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ dependencies:
1414
- jemalloc=4.5.0.post
1515
- jinja2=2.8
1616
- lxml
17-
- matplotlib
17+
- matplotlib=2.2.2
1818
- mock
1919
- nomkl
2020
- numexpr

ci/travis-36.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ dependencies:
1414
- geopandas
1515
- html5lib
1616
- ipython
17+
- isort
1718
- jinja2
1819
- lxml
1920
- matplotlib

ci/travis-37-numpydev.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,5 +13,5 @@ dependencies:
1313
- "git+git://github.com/dateutil/dateutil.git"
1414
- "-f https://7933911d6844c6c53a7d-47bd50c35cd79bd838daf386af554a83.ssl.cf2.rackcdn.com"
1515
- "--pre"
16-
- "numpy"
16+
- "numpy<=1.16.0.dev0+20181015190246"
1717
- "scipy"

doc/source/cookbook.rst

+33-29
Original file line numberDiff line numberDiff line change
@@ -1228,36 +1228,40 @@ Correlation
12281228

12291229
The `method` argument within `DataFrame.corr` can accept a callable in addition to the named correlation types. Here we compute the `distance correlation <https://en.wikipedia.org/wiki/Distance_correlation>`__ matrix for a `DataFrame` object.
12301230

1231-
.. ipython:: python
1232-
1233-
def distcorr(x, y):
1234-
n = len(x)
1235-
a = np.zeros(shape=(n, n))
1236-
b = np.zeros(shape=(n, n))
1237-
1238-
for i in range(n):
1239-
for j in range(i + 1, n):
1240-
a[i, j] = abs(x[i] - x[j])
1241-
b[i, j] = abs(y[i] - y[j])
1242-
1243-
a += a.T
1244-
b += b.T
1245-
1246-
a_bar = np.vstack([np.nanmean(a, axis=0)] * n)
1247-
b_bar = np.vstack([np.nanmean(b, axis=0)] * n)
1248-
1249-
A = a - a_bar - a_bar.T + np.full(shape=(n, n), fill_value=a_bar.mean())
1250-
B = b - b_bar - b_bar.T + np.full(shape=(n, n), fill_value=b_bar.mean())
1251-
1252-
cov_ab = np.sqrt(np.nansum(A * B)) / n
1253-
std_a = np.sqrt(np.sqrt(np.nansum(A**2)) / n)
1254-
std_b = np.sqrt(np.sqrt(np.nansum(B**2)) / n)
1255-
1256-
return cov_ab / std_a / std_b
1257-
1258-
df = pd.DataFrame(np.random.normal(size=(100, 3)))
1231+
.. code-block:: python
12591232
1260-
df.corr(method=distcorr)
1233+
>>> def distcorr(x, y):
1234+
... n = len(x)
1235+
... a = np.zeros(shape=(n, n))
1236+
... b = np.zeros(shape=(n, n))
1237+
...
1238+
... for i in range(n):
1239+
... for j in range(i + 1, n):
1240+
... a[i, j] = abs(x[i] - x[j])
1241+
... b[i, j] = abs(y[i] - y[j])
1242+
...
1243+
... a += a.T
1244+
... b += b.T
1245+
...
1246+
... a_bar = np.vstack([np.nanmean(a, axis=0)] * n)
1247+
... b_bar = np.vstack([np.nanmean(b, axis=0)] * n)
1248+
...
1249+
... A = a - a_bar - a_bar.T + np.full(shape=(n, n), fill_value=a_bar.mean())
1250+
... B = b - b_bar - b_bar.T + np.full(shape=(n, n), fill_value=b_bar.mean())
1251+
...
1252+
... cov_ab = np.sqrt(np.nansum(A * B)) / n
1253+
... std_a = np.sqrt(np.sqrt(np.nansum(A**2)) / n)
1254+
... std_b = np.sqrt(np.sqrt(np.nansum(B**2)) / n)
1255+
...
1256+
... return cov_ab / std_a / std_b
1257+
...
1258+
>>> df = pd.DataFrame(np.random.normal(size=(100, 3)))
1259+
...
1260+
>>> df.corr(method=distcorr)
1261+
0 1 2
1262+
0 1.000000 0.171368 0.145302
1263+
1 0.171368 1.000000 0.189919
1264+
2 0.145302 0.189919 1.000000
12611265
12621266
Timedeltas
12631267
----------

0 commit comments

Comments
 (0)