Skip to content

Commit ace5b6b

Browse files
committed
Merge remote-tracking branch 'upstream/master' into illegalnumbers-pandas-devGH-21980
2 parents cacf82f + 92dcf5f commit ace5b6b

File tree

362 files changed

+18732
-15588
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

362 files changed

+18732
-15588
lines changed

.circleci/config.yml

+147
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
version: 2
2+
jobs:
3+
4+
# --------------------------------------------------------------------------
5+
# 0. py27_compat
6+
# --------------------------------------------------------------------------
7+
py27_compat:
8+
docker:
9+
- image: continuumio/miniconda:latest
10+
# databases configuration
11+
- image: circleci/postgres:9.6.5-alpine-ram
12+
environment:
13+
POSTGRES_USER: postgres
14+
POSTGRES_DB: pandas_nosetest
15+
- image: circleci/mysql:8-ram
16+
environment:
17+
MYSQL_USER: "root"
18+
MYSQL_HOST: "localhost"
19+
MYSQL_ALLOW_EMPTY_PASSWORD: "true"
20+
MYSQL_DATABASE: "pandas_nosetest"
21+
environment:
22+
JOB: "2.7_COMPAT"
23+
ENV_FILE: "ci/circle-27-compat.yaml"
24+
LOCALE_OVERRIDE: "it_IT.UTF-8"
25+
MINICONDA_DIR: /home/ubuntu/miniconda3
26+
steps:
27+
- checkout
28+
- run:
29+
name: build
30+
command: |
31+
./ci/install_circle.sh
32+
./ci/show_circle.sh
33+
- run:
34+
name: test
35+
command: ./ci/run_circle.sh --skip-slow --skip-network
36+
37+
# --------------------------------------------------------------------------
38+
# 1. py36_locale
39+
# --------------------------------------------------------------------------
40+
py36_locale:
41+
docker:
42+
- image: continuumio/miniconda:latest
43+
# databases configuration
44+
- image: circleci/postgres:9.6.5-alpine-ram
45+
environment:
46+
POSTGRES_USER: postgres
47+
POSTGRES_DB: pandas_nosetest
48+
- image: circleci/mysql:8-ram
49+
environment:
50+
MYSQL_USER: "root"
51+
MYSQL_HOST: "localhost"
52+
MYSQL_ALLOW_EMPTY_PASSWORD: "true"
53+
MYSQL_DATABASE: "pandas_nosetest"
54+
55+
environment:
56+
JOB: "3.6_LOCALE"
57+
ENV_FILE: "ci/circle-36-locale.yaml"
58+
LOCALE_OVERRIDE: "zh_CN.UTF-8"
59+
MINICONDA_DIR: /home/ubuntu/miniconda3
60+
steps:
61+
- checkout
62+
- run:
63+
name: build
64+
command: |
65+
./ci/install_circle.sh
66+
./ci/show_circle.sh
67+
- run:
68+
name: test
69+
command: ./ci/run_circle.sh --skip-slow --skip-network
70+
71+
# --------------------------------------------------------------------------
72+
# 2. py36_locale_slow
73+
# --------------------------------------------------------------------------
74+
py36_locale_slow:
75+
docker:
76+
- image: continuumio/miniconda:latest
77+
# databases configuration
78+
- image: circleci/postgres:9.6.5-alpine-ram
79+
environment:
80+
POSTGRES_USER: postgres
81+
POSTGRES_DB: pandas_nosetest
82+
- image: circleci/mysql:8-ram
83+
environment:
84+
MYSQL_USER: "root"
85+
MYSQL_HOST: "localhost"
86+
MYSQL_ALLOW_EMPTY_PASSWORD: "true"
87+
MYSQL_DATABASE: "pandas_nosetest"
88+
89+
environment:
90+
JOB: "3.6_LOCALE_SLOW"
91+
ENV_FILE: "ci/circle-36-locale_slow.yaml"
92+
LOCALE_OVERRIDE: "zh_CN.UTF-8"
93+
MINICONDA_DIR: /home/ubuntu/miniconda3
94+
steps:
95+
- checkout
96+
- run:
97+
name: build
98+
command: |
99+
./ci/install_circle.sh
100+
./ci/show_circle.sh
101+
- run:
102+
name: test
103+
command: ./ci/run_circle.sh --only-slow --skip-network
104+
105+
# --------------------------------------------------------------------------
106+
# 3. py35_ascii
107+
# --------------------------------------------------------------------------
108+
py35_ascii:
109+
docker:
110+
- image: continuumio/miniconda:latest
111+
# databases configuration
112+
- image: circleci/postgres:9.6.5-alpine-ram
113+
environment:
114+
POSTGRES_USER: postgres
115+
POSTGRES_DB: pandas_nosetest
116+
- image: circleci/mysql:8-ram
117+
environment:
118+
MYSQL_USER: "root"
119+
MYSQL_HOST: "localhost"
120+
MYSQL_ALLOW_EMPTY_PASSWORD: "true"
121+
MYSQL_DATABASE: "pandas_nosetest"
122+
123+
environment:
124+
JOB: "3.5_ASCII"
125+
ENV_FILE: "ci/circle-35-ascii.yaml"
126+
LOCALE_OVERRIDE: "C"
127+
MINICONDA_DIR: /home/ubuntu/miniconda3
128+
steps:
129+
- checkout
130+
- run:
131+
name: build
132+
command: |
133+
./ci/install_circle.sh
134+
./ci/show_circle.sh
135+
- run:
136+
name: test
137+
command: ./ci/run_circle.sh --skip-slow --skip-network
138+
139+
140+
workflows:
141+
version: 2
142+
build_and_test:
143+
jobs:
144+
- py27_compat
145+
- py36_locale
146+
- py36_locale_slow
147+
- py35_ascii

.travis.yml

+2-1
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ matrix:
5656
- python-gtk2
5757
- dist: trusty
5858
env:
59-
- JOB="3.6, coverage" ENV_FILE="ci/travis-36.yaml" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" COVERAGE=true
59+
- JOB="3.6, coverage" ENV_FILE="ci/travis-36.yaml" TEST_ARGS="--skip-slow --skip-network" PANDAS_TESTING_MODE="deprecate" COVERAGE=true DOCTEST=true
6060
# In allow_failures
6161
- dist: trusty
6262
env:
@@ -119,6 +119,7 @@ script:
119119
- ci/script_single.sh
120120
- ci/script_multi.sh
121121
- ci/lint.sh
122+
- ci/doctests.sh
122123
- echo "checking imports"
123124
- source activate pandas && python ci/check_imports.py
124125
- echo "script done"

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@
8989

9090

9191

92-
## What is it
92+
## What is it?
9393

9494
**pandas** is a Python package providing fast, flexible, and expressive data
9595
structures designed to make working with "relational" or "labeled" data both

appveyor.yml

+2
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,14 @@ environment:
2020
matrix:
2121

2222
- CONDA_ROOT: "C:\\Miniconda3_64"
23+
APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017
2324
PYTHON_VERSION: "3.6"
2425
PYTHON_ARCH: "64"
2526
CONDA_PY: "36"
2627
CONDA_NPY: "113"
2728

2829
- CONDA_ROOT: "C:\\Miniconda3_64"
30+
APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015
2931
PYTHON_VERSION: "2.7"
3032
PYTHON_ARCH: "64"
3133
CONDA_PY: "27"

asv_bench/benchmarks/io/csv.py

+32-20
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,14 @@ def time_frame_date_formatting(self):
5454
self.data.to_csv(self.fname, date_format='%Y%m%d')
5555

5656

57-
class ReadCSVDInferDatetimeFormat(object):
57+
class StringIORewind(object):
58+
59+
def data(self, stringio_object):
60+
stringio_object.seek(0)
61+
return stringio_object
62+
63+
64+
class ReadCSVDInferDatetimeFormat(StringIORewind):
5865

5966
goal_time = 0.2
6067
params = ([True, False], ['custom', 'iso8601', 'ymd'])
@@ -66,10 +73,12 @@ def setup(self, infer_datetime_format, format):
6673
'iso8601': '%Y-%m-%d %H:%M:%S',
6774
'ymd': '%Y%m%d'}
6875
dt_format = formats[format]
69-
self.data = StringIO('\n'.join(rng.strftime(dt_format).tolist()))
76+
self.StringIO_input = StringIO('\n'.join(
77+
rng.strftime(dt_format).tolist()))
7078

7179
def time_read_csv(self, infer_datetime_format, format):
72-
read_csv(self.data, header=None, names=['foo'], parse_dates=['foo'],
80+
read_csv(self.data(self.StringIO_input),
81+
header=None, names=['foo'], parse_dates=['foo'],
7382
infer_datetime_format=infer_datetime_format)
7483

7584

@@ -95,7 +104,7 @@ def time_skipprows(self, skiprows):
95104
read_csv(self.fname, skiprows=skiprows)
96105

97106

98-
class ReadUint64Integers(object):
107+
class ReadUint64Integers(StringIORewind):
99108

100109
goal_time = 0.2
101110

@@ -108,13 +117,13 @@ def setup(self):
108117
self.data2 = StringIO('\n'.join(arr.astype(str).tolist()))
109118

110119
def time_read_uint64(self):
111-
read_csv(self.data1, header=None, names=['foo'])
120+
read_csv(self.data(self.data1), header=None, names=['foo'])
112121

113122
def time_read_uint64_neg_values(self):
114-
read_csv(self.data2, header=None, names=['foo'])
123+
read_csv(self.data(self.data2), header=None, names=['foo'])
115124

116125
def time_read_uint64_na_values(self):
117-
read_csv(self.data1, header=None, names=['foo'],
126+
read_csv(self.data(self.data1), header=None, names=['foo'],
118127
na_values=self.na_values)
119128

120129

@@ -140,19 +149,20 @@ def time_thousands(self, sep, thousands):
140149
read_csv(self.fname, sep=sep, thousands=thousands)
141150

142151

143-
class ReadCSVComment(object):
152+
class ReadCSVComment(StringIORewind):
144153

145154
goal_time = 0.2
146155

147156
def setup(self):
148157
data = ['A,B,C'] + (['1,2,3 # comment'] * 100000)
149-
self.s_data = StringIO('\n'.join(data))
158+
self.StringIO_input = StringIO('\n'.join(data))
150159

151160
def time_comment(self):
152-
read_csv(self.s_data, comment='#', header=None, names=list('abc'))
161+
read_csv(self.data(self.StringIO_input), comment='#',
162+
header=None, names=list('abc'))
153163

154164

155-
class ReadCSVFloatPrecision(object):
165+
class ReadCSVFloatPrecision(StringIORewind):
156166

157167
goal_time = 0.2
158168
params = ([',', ';'], ['.', '_'], [None, 'high', 'round_trip'])
@@ -164,14 +174,14 @@ def setup(self, sep, decimal, float_precision):
164174
rows = sep.join(['0{}'.format(decimal) + '{}'] * 3) + '\n'
165175
data = rows * 5
166176
data = data.format(*floats) * 200 # 1000 x 3 strings csv
167-
self.s_data = StringIO(data)
177+
self.StringIO_input = StringIO(data)
168178

169179
def time_read_csv(self, sep, decimal, float_precision):
170-
read_csv(self.s_data, sep=sep, header=None, names=list('abc'),
171-
float_precision=float_precision)
180+
read_csv(self.data(self.StringIO_input), sep=sep, header=None,
181+
names=list('abc'), float_precision=float_precision)
172182

173183
def time_read_csv_python_engine(self, sep, decimal, float_precision):
174-
read_csv(self.s_data, sep=sep, header=None, engine='python',
184+
read_csv(self.data(self.StringIO_input), sep=sep, header=None, engine='python',
175185
float_precision=None, names=list('abc'))
176186

177187

@@ -193,7 +203,7 @@ def time_convert_direct(self):
193203
read_csv(self.fname, dtype='category')
194204

195205

196-
class ReadCSVParseDates(object):
206+
class ReadCSVParseDates(StringIORewind):
197207

198208
goal_time = 0.2
199209

@@ -206,12 +216,14 @@ def setup(self):
206216
"""
207217
two_cols = ['KORD,19990127'] * 5
208218
data = data.format(*two_cols)
209-
self.s_data = StringIO(data)
219+
self.StringIO_input = StringIO(data)
210220

211221
def time_multiple_date(self):
212-
read_csv(self.s_data, sep=',', header=None,
213-
names=list(string.digits[:9]), parse_dates=[[1, 2], [1, 3]])
222+
read_csv(self.data(self.StringIO_input), sep=',', header=None,
223+
names=list(string.digits[:9]),
224+
parse_dates=[[1, 2], [1, 3]])
214225

215226
def time_baseline(self):
216-
read_csv(self.s_data, sep=',', header=None, parse_dates=[1],
227+
read_csv(self.data(self.StringIO_input), sep=',', header=None,
228+
parse_dates=[1],
217229
names=list(string.digits[:9]))

asv_bench/benchmarks/reshape.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ class GetDummies(object):
141141

142142
def setup(self):
143143
categories = list(string.ascii_letters[:12])
144-
s = pd.Series(np.random.choice(categories, size=1_000_000),
144+
s = pd.Series(np.random.choice(categories, size=1000000),
145145
dtype=pd.api.types.CategoricalDtype(categories))
146146
self.s = s
147147

asv_bench/benchmarks/series_methods.py

+58
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,64 @@ def time_isin(self, dtypes):
3838
self.s.isin(self.values)
3939

4040

41+
class IsInFloat64(object):
42+
43+
def setup(self):
44+
self.small = Series([1, 2], dtype=np.float64)
45+
self.many_different_values = np.arange(10**6, dtype=np.float64)
46+
self.few_different_values = np.zeros(10**7, dtype=np.float64)
47+
self.only_nans_values = np.full(10**7, np.nan, dtype=np.float64)
48+
49+
def time_isin_many_different(self):
50+
# runtime is dominated by creation of the lookup-table
51+
self.small.isin(self.many_different_values)
52+
53+
def time_isin_few_different(self):
54+
# runtime is dominated by creation of the lookup-table
55+
self.small.isin(self.few_different_values)
56+
57+
def time_isin_nan_values(self):
58+
# runtime is dominated by creation of the lookup-table
59+
self.small.isin(self.few_different_values)
60+
61+
62+
class IsInForObjects(object):
63+
64+
def setup(self):
65+
self.s_nans = Series(np.full(10**4, np.nan)).astype(np.object)
66+
self.vals_nans = np.full(10**4, np.nan).astype(np.object)
67+
self.s_short = Series(np.arange(2)).astype(np.object)
68+
self.s_long = Series(np.arange(10**5)).astype(np.object)
69+
self.vals_short = np.arange(2).astype(np.object)
70+
self.vals_long = np.arange(10**5).astype(np.object)
71+
# because of nans floats are special:
72+
self.s_long_floats = Series(np.arange(10**5,
73+
dtype=np.float)).astype(np.object)
74+
self.vals_long_floats = np.arange(10**5,
75+
dtype=np.float).astype(np.object)
76+
77+
def time_isin_nans(self):
78+
# if nan-objects are different objects,
79+
# this has the potential to trigger O(n^2) running time
80+
self.s_nans.isin(self.vals_nans)
81+
82+
def time_isin_short_series_long_values(self):
83+
# running time dominated by the preprocessing
84+
self.s_short.isin(self.vals_long)
85+
86+
def time_isin_long_series_short_values(self):
87+
# running time dominated by look-up
88+
self.s_long.isin(self.vals_short)
89+
90+
def time_isin_long_series_long_values(self):
91+
# no dominating part
92+
self.s_long.isin(self.vals_long)
93+
94+
def time_isin_long_series_long_values_floats(self):
95+
# no dominating part
96+
self.s_long_floats.isin(self.vals_long_floats)
97+
98+
4199
class NSort(object):
42100

43101
goal_time = 0.2

0 commit comments

Comments
 (0)