Skip to content

Commit 4d4acc5

Browse files
harisbalharisbal
authored and
harisbal
committed
Merge remote-tracking branch 'upstream/master' into multi-index-join
2 parents 2d61a12 + 5551bcf commit 4d4acc5

File tree

167 files changed

+4143
-2090
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

167 files changed

+4143
-2090
lines changed

.pep8speaks.yml

-1
Original file line numberDiff line numberDiff line change
@@ -8,5 +8,4 @@ pycodestyle:
88
ignore: # Errors and warnings to ignore
99
- E402, # module level import not at top of file
1010
- E731, # do not assign a lambda expression, use a def
11-
- E741, # do not use variables named 'l', 'O', or 'I'
1211
- W503 # line break before binary operator

.travis.yml

+7-12
Original file line numberDiff line numberDiff line change
@@ -53,18 +53,20 @@ matrix:
5353
- dist: trusty
5454
env:
5555
- JOB="3.6, coverage" ENV_FILE="ci/travis-36.yaml" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" COVERAGE=true DOCTEST=true
56-
# In allow_failures
57-
- dist: trusty
58-
env:
59-
- JOB="3.6, slow" ENV_FILE="ci/travis-36-slow.yaml" SLOW=true
60-
# In allow_failures
56+
6157
- dist: trusty
6258
env:
6359
- JOB="3.7, NumPy dev" ENV_FILE="ci/travis-37-numpydev.yaml" TEST_ARGS="--skip-slow --skip-network -W error" PANDAS_TESTING_MODE="deprecate"
6460
addons:
6561
apt:
6662
packages:
6763
- xsel
64+
65+
# In allow_failures
66+
- dist: trusty
67+
env:
68+
- JOB="3.6, slow" ENV_FILE="ci/travis-36-slow.yaml" SLOW=true
69+
6870
# In allow_failures
6971
- dist: trusty
7072
env:
@@ -73,13 +75,6 @@ matrix:
7375
- dist: trusty
7476
env:
7577
- JOB="3.6, slow" ENV_FILE="ci/travis-36-slow.yaml" SLOW=true
76-
- dist: trusty
77-
env:
78-
- JOB="3.7, NumPy dev" ENV_FILE="ci/travis-37-numpydev.yaml" TEST_ARGS="--skip-slow --skip-network -W error" PANDAS_TESTING_MODE="deprecate"
79-
addons:
80-
apt:
81-
packages:
82-
- xsel
8378
- dist: trusty
8479
env:
8580
- JOB="3.6, doc" ENV_FILE="ci/travis-36-doc.yaml" DOC=true

README.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,8 @@
5656
<tr>
5757
<td></td>
5858
<td>
59-
<a href="https://ci.appveyor.com/project/pandas-dev/pandas">
60-
<img src="https://ci.appveyor.com/api/projects/status/86vn83mxgnl4xf1s/branch/master?svg=true" alt="appveyor build status" />
59+
<a href="https://dev.azure.com/pandas-dev/pandas/_build/latest?definitionId=1&branch=master">
60+
<img src="https://dev.azure.com/pandas-dev/pandas/_apis/build/status/pandas-dev.pandas?branch=master" alt="Azure Pipelines build status" />
6161
</a>
6262
</td>
6363
</tr>
@@ -97,7 +97,7 @@ easy and intuitive. It aims to be the fundamental high-level building block for
9797
doing practical, **real world** data analysis in Python. Additionally, it has
9898
the broader goal of becoming **the most powerful and flexible open source data
9999
analysis / manipulation tool available in any language**. It is already well on
100-
its way toward this goal.
100+
its way towards this goal.
101101

102102
## Main Features
103103
Here are just a few of the things that pandas does well:

asv_bench/benchmarks/algorithms.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
try:
1010
hashing = import_module(imp)
1111
break
12-
except:
12+
except (ImportError, TypeError, ValueError):
1313
pass
1414

1515
from .pandas_vb_common import setup # noqa

asv_bench/benchmarks/frame_methods.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -505,14 +505,21 @@ class NSort(object):
505505
param_names = ['keep']
506506

507507
def setup(self, keep):
508-
self.df = DataFrame(np.random.randn(1000, 3), columns=list('ABC'))
508+
self.df = DataFrame(np.random.randn(100000, 3),
509+
columns=list('ABC'))
509510

510-
def time_nlargest(self, keep):
511+
def time_nlargest_one_column(self, keep):
511512
self.df.nlargest(100, 'A', keep=keep)
512513

513-
def time_nsmallest(self, keep):
514+
def time_nlargest_two_columns(self, keep):
515+
self.df.nlargest(100, ['A', 'B'], keep=keep)
516+
517+
def time_nsmallest_one_column(self, keep):
514518
self.df.nsmallest(100, 'A', keep=keep)
515519

520+
def time_nsmallest_two_columns(self, keep):
521+
self.df.nsmallest(100, ['A', 'B'], keep=keep)
522+
516523

517524
class Describe(object):
518525

asv_bench/benchmarks/indexing.py

+49-34
Original file line numberDiff line numberDiff line change
@@ -2,104 +2,119 @@
22

33
import numpy as np
44
import pandas.util.testing as tm
5-
from pandas import (Series, DataFrame, MultiIndex, Int64Index, Float64Index,
6-
IntervalIndex, CategoricalIndex,
7-
IndexSlice, concat, date_range)
8-
from .pandas_vb_common import setup, Panel # noqa
5+
from pandas import (Series, DataFrame, MultiIndex, Panel,
6+
Int64Index, Float64Index, IntervalIndex,
7+
CategoricalIndex, IndexSlice, concat, date_range)
8+
from .pandas_vb_common import setup # noqa
99

1010

1111
class NumericSeriesIndexing(object):
1212

1313
goal_time = 0.2
14-
params = [Int64Index, Float64Index]
15-
param = ['index']
14+
params = [
15+
(Int64Index, Float64Index),
16+
('unique_monotonic_inc', 'nonunique_monotonic_inc'),
17+
]
18+
param_names = ['index_dtype', 'index_structure']
1619

17-
def setup(self, index):
20+
def setup(self, index, index_structure):
1821
N = 10**6
19-
idx = index(range(N))
20-
self.data = Series(np.random.rand(N), index=idx)
22+
indices = {
23+
'unique_monotonic_inc': index(range(N)),
24+
'nonunique_monotonic_inc': index(
25+
list(range(55)) + [54] + list(range(55, N - 1))),
26+
}
27+
self.data = Series(np.random.rand(N), index=indices[index_structure])
2128
self.array = np.arange(10000)
2229
self.array_list = self.array.tolist()
2330

24-
def time_getitem_scalar(self, index):
31+
def time_getitem_scalar(self, index, index_structure):
2532
self.data[800000]
2633

27-
def time_getitem_slice(self, index):
34+
def time_getitem_slice(self, index, index_structure):
2835
self.data[:800000]
2936

30-
def time_getitem_list_like(self, index):
37+
def time_getitem_list_like(self, index, index_structure):
3138
self.data[[800000]]
3239

33-
def time_getitem_array(self, index):
40+
def time_getitem_array(self, index, index_structure):
3441
self.data[self.array]
3542

36-
def time_getitem_lists(self, index):
43+
def time_getitem_lists(self, index, index_structure):
3744
self.data[self.array_list]
3845

39-
def time_iloc_array(self, index):
46+
def time_iloc_array(self, index, index_structure):
4047
self.data.iloc[self.array]
4148

42-
def time_iloc_list_like(self, index):
49+
def time_iloc_list_like(self, index, index_structure):
4350
self.data.iloc[[800000]]
4451

45-
def time_iloc_scalar(self, index):
52+
def time_iloc_scalar(self, index, index_structure):
4653
self.data.iloc[800000]
4754

48-
def time_iloc_slice(self, index):
55+
def time_iloc_slice(self, index, index_structure):
4956
self.data.iloc[:800000]
5057

51-
def time_ix_array(self, index):
58+
def time_ix_array(self, index, index_structure):
5259
self.data.ix[self.array]
5360

54-
def time_ix_list_like(self, index):
61+
def time_ix_list_like(self, index, index_structure):
5562
self.data.ix[[800000]]
5663

57-
def time_ix_scalar(self, index):
64+
def time_ix_scalar(self, index, index_structure):
5865
self.data.ix[800000]
5966

60-
def time_ix_slice(self, index):
67+
def time_ix_slice(self, index, index_structure):
6168
self.data.ix[:800000]
6269

63-
def time_loc_array(self, index):
70+
def time_loc_array(self, index, index_structure):
6471
self.data.loc[self.array]
6572

66-
def time_loc_list_like(self, index):
73+
def time_loc_list_like(self, index, index_structure):
6774
self.data.loc[[800000]]
6875

69-
def time_loc_scalar(self, index):
76+
def time_loc_scalar(self, index, index_structure):
7077
self.data.loc[800000]
7178

72-
def time_loc_slice(self, index):
79+
def time_loc_slice(self, index, index_structure):
7380
self.data.loc[:800000]
7481

7582

7683
class NonNumericSeriesIndexing(object):
7784

7885
goal_time = 0.2
79-
params = ['string', 'datetime']
80-
param_names = ['index']
86+
params = [
87+
('string', 'datetime'),
88+
('unique_monotonic_inc', 'nonunique_monotonic_inc'),
89+
]
90+
param_names = ['index_dtype', 'index_structure']
8191

82-
def setup(self, index):
83-
N = 10**5
92+
def setup(self, index, index_structure):
93+
N = 10**6
8494
indexes = {'string': tm.makeStringIndex(N),
8595
'datetime': date_range('1900', periods=N, freq='s')}
8696
index = indexes[index]
97+
if index_structure == 'nonunique_monotonic_inc':
98+
index = index.insert(item=index[2], loc=2)[:-1]
8799
self.s = Series(np.random.rand(N), index=index)
88100
self.lbl = index[80000]
89101

90-
def time_getitem_label_slice(self, index):
102+
def time_getitem_label_slice(self, index, index_structure):
91103
self.s[:self.lbl]
92104

93-
def time_getitem_pos_slice(self, index):
105+
def time_getitem_pos_slice(self, index, index_structure):
94106
self.s[:80000]
95107

96-
def time_get_value(self, index):
108+
def time_get_value(self, index, index_structure):
97109
with warnings.catch_warnings(record=True):
98110
self.s.get_value(self.lbl)
99111

100-
def time_getitem_scalar(self, index):
112+
def time_getitem_scalar(self, index, index_structure):
101113
self.s[self.lbl]
102114

115+
def time_getitem_list_like(self, index, index_structure):
116+
self.s[[self.lbl]]
117+
103118

104119
class DataFrameStringIndexing(object):
105120

asv_bench/benchmarks/io/csv.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,9 @@
11
import random
2-
import timeit
32
import string
43

54
import numpy as np
65
import pandas.util.testing as tm
76
from pandas import DataFrame, Categorical, date_range, read_csv
8-
from pandas.compat import PY2
97
from pandas.compat import cStringIO as StringIO
108

119
from ..pandas_vb_common import setup, BaseIO # noqa
@@ -181,8 +179,8 @@ def time_read_csv(self, sep, decimal, float_precision):
181179
names=list('abc'), float_precision=float_precision)
182180

183181
def time_read_csv_python_engine(self, sep, decimal, float_precision):
184-
read_csv(self.data(self.StringIO_input), sep=sep, header=None, engine='python',
185-
float_precision=None, names=list('abc'))
182+
read_csv(self.data(self.StringIO_input), sep=sep, header=None,
183+
engine='python', float_precision=None, names=list('abc'))
186184

187185

188186
class ReadCSVCategorical(BaseIO):

asv_bench/benchmarks/join_merge.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,15 @@
33

44
import numpy as np
55
import pandas.util.testing as tm
6-
from pandas import (DataFrame, Series, MultiIndex, date_range, concat, merge,
7-
merge_asof)
6+
from pandas import (DataFrame, Series, Panel, MultiIndex,
7+
date_range, concat, merge, merge_asof)
8+
89
try:
910
from pandas import merge_ordered
1011
except ImportError:
1112
from pandas import ordered_merge as merge_ordered
1213

13-
from .pandas_vb_common import Panel, setup # noqa
14+
from .pandas_vb_common import setup # noqa
1415

1516

1617
class Append(object):
@@ -29,7 +30,7 @@ def setup(self):
2930
try:
3031
with warnings.catch_warnings(record=True):
3132
self.mdf1.consolidate(inplace=True)
32-
except:
33+
except (AttributeError, TypeError):
3334
pass
3435
self.mdf2 = self.mdf1.copy()
3536
self.mdf2.index = self.df2.index

asv_bench/benchmarks/pandas_vb_common.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,13 @@
22
from importlib import import_module
33

44
import numpy as np
5-
from pandas import Panel
65

76
# Compatibility import for lib
87
for imp in ['pandas._libs.lib', 'pandas.lib']:
98
try:
109
lib = import_module(imp)
1110
break
12-
except:
11+
except (ImportError, TypeError, ValueError):
1312
pass
1413

1514
numeric_dtypes = [np.int64, np.int32, np.uint32, np.uint64, np.float32,
@@ -34,7 +33,7 @@ def remove(self, f):
3433
"""Remove created files"""
3534
try:
3635
os.remove(f)
37-
except:
36+
except OSError:
3837
# On Windows, attempting to remove a file that is in use
3938
# causes an exception to be raised
4039
pass

asv_bench/benchmarks/panel_ctor.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
import warnings
22
from datetime import datetime, timedelta
33

4-
from pandas import DataFrame, DatetimeIndex, date_range
4+
from pandas import DataFrame, Panel, DatetimeIndex, date_range
55

6-
from .pandas_vb_common import Panel, setup # noqa
6+
from .pandas_vb_common import setup # noqa
77

88

99
class DifferentIndexes(object):

asv_bench/benchmarks/panel_methods.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
import warnings
22

33
import numpy as np
4+
from pandas import Panel
45

5-
from .pandas_vb_common import Panel, setup # noqa
6+
from .pandas_vb_common import setup # noqa
67

78

89
class PanelMethods(object):

asv_bench/benchmarks/stat_ops.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def setup(self, op, dtype, axis, use_bottleneck):
1818
df = pd.DataFrame(np.random.randn(100000, 4)).astype(dtype)
1919
try:
2020
pd.options.compute.use_bottleneck = use_bottleneck
21-
except:
21+
except TypeError:
2222
from pandas.core import nanops
2323
nanops._USE_BOTTLENECK = use_bottleneck
2424
self.df_func = getattr(df, op)
@@ -56,7 +56,7 @@ def setup(self, op, dtype, use_bottleneck):
5656
s = pd.Series(np.random.randn(100000)).astype(dtype)
5757
try:
5858
pd.options.compute.use_bottleneck = use_bottleneck
59-
except:
59+
except TypeError:
6060
from pandas.core import nanops
6161
nanops._USE_BOTTLENECK = use_bottleneck
6262
self.s_func = getattr(s, op)

asv_bench/benchmarks/timeseries.py

-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
import warnings
21
from datetime import timedelta
32

43
import numpy as np

azure-pipelines.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ jobs:
1818
- template: ci/azure/windows.yml
1919
parameters:
2020
name: Windows
21-
vmImage: vs2017-win2017
21+
vmImage: vs2017-win2016
2222
- template: ci/azure/windows-py27.yml
2323
parameters:
2424
name: WindowsPy27
25-
vmImage: vs2017-win2017
25+
vmImage: vs2017-win2016

ci/azure/macos.yml

+4
Original file line numberDiff line numberDiff line change
@@ -37,3 +37,7 @@ jobs:
3737
- script: |
3838
export PATH=$HOME/miniconda3/bin:$PATH
3939
source activate pandas && pushd /tmp && python -c "import pandas; pandas.show_versions();" && popd
40+
- task: PublishTestResults@2
41+
inputs:
42+
testResultsFiles: '/tmp/*.xml'
43+
testRunTitle: 'MacOS-35'

0 commit comments

Comments
 (0)