Skip to content

Commit dd3759d

Browse files
committed
Merge tag 'v0.19.1' into debian
Version 0.19.1 * tag 'v0.19.1': (43 commits) RLS: v0.19.1 DOC: update whatsnew/release notes for 0.19.1 (pandas-dev#14573) [Backport pandas-dev#14545] BUG/API: Index.append with mixed object/Categorical indices (pandas-dev#14545) DOC: rst fixes [Backport pandas-dev#14567] DEPR: add deprecation warning for com.array_equivalent (pandas-dev#14567) [Backport pandas-dev#14551] PERF: casting loc to labels dtype before searchsorted (pandas-dev#14551) [Backport pandas-dev#14536] BUG: DataFrame.quantile with NaNs (GH14357) (pandas-dev#14536) [Backport pandas-dev#14520] BUG: don't close user-provided file handles in C parser (GH14418) (pandas-dev#14520) [Backport pandas-dev#14392] BUG: Dataframe constructor when given dict with None value (pandas-dev#14392) [Backport pandas-dev#14514] BUG: Don't parse inline quotes in skipped lines (pandas-dev#14514) [Bacport pandas-dev#14543] BUG: tseries ceil doc fix (pandas-dev#14543) [Backport pandas-dev#14541] DOC: Simplify the gbq integration testing procedure for contributors (pandas-dev#14541) [Backport pandas-dev#14527] BUG/ERR: raise correct error when sql driver is not installed (pandas-dev#14527) [Backport pandas-dev#14501] BUG: fix DatetimeIndex._maybe_cast_slice_bound for empty index (GH14354) (pandas-dev#14501) [Backport pandas-dev#14442] DOC: Expand on reference docs for read_json() (pandas-dev#14442) BLD: fix 3.4 build for cython to 0.24.1 [Backport pandas-dev#14492] BUG: Accept unicode quotechars again in pd.read_csv [Backport pandas-dev#14496] BLD: Support Cython 0.25 [Backport pandas-dev#14498] COMPAT/TST: fix test for range testing of negative integers to neg powers [Backport pandas-dev#14476] PERF: performance regression in Series.asof (pandas-dev#14476) ...
2 parents e4b4db3 + 27b7839 commit dd3759d

File tree

125 files changed

+1607
-455
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

125 files changed

+1607
-455
lines changed

asv_bench/benchmarks/algorithms.py

+26
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,14 @@ def setup(self):
1515
self.int = pd.Int64Index(np.arange(N).repeat(5))
1616
self.float = pd.Float64Index(np.random.randn(N).repeat(5))
1717

18+
# Convenience naming.
19+
self.checked_add = pd.core.nanops._checked_add_with_arr
20+
21+
self.arr = np.arange(1000000)
22+
self.arrpos = np.arange(1000000)
23+
self.arrneg = np.arange(-1000000, 0)
24+
self.arrmixed = np.array([1, -1]).repeat(500000)
25+
1826
def time_int_factorize(self):
1927
self.int.factorize()
2028

@@ -29,3 +37,21 @@ def time_int_duplicated(self):
2937

3038
def time_float_duplicated(self):
3139
self.float.duplicated()
40+
41+
def time_add_overflow_pos_scalar(self):
42+
self.checked_add(self.arr, 1)
43+
44+
def time_add_overflow_neg_scalar(self):
45+
self.checked_add(self.arr, -1)
46+
47+
def time_add_overflow_zero_scalar(self):
48+
self.checked_add(self.arr, 0)
49+
50+
def time_add_overflow_pos_arr(self):
51+
self.checked_add(self.arr, self.arrpos)
52+
53+
def time_add_overflow_neg_arr(self):
54+
self.checked_add(self.arr, self.arrneg)
55+
56+
def time_add_overflow_mixed_arr(self):
57+
self.checked_add(self.arr, self.arrmixed)

asv_bench/benchmarks/attrs_caching.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,4 @@ def setup(self):
2020
self.cur_index = self.df.index
2121

2222
def time_setattr_dataframe_index(self):
23-
self.df.index = self.cur_index
23+
self.df.index = self.cur_index

asv_bench/benchmarks/ctors.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,4 +49,4 @@ def setup(self):
4949
self.s = Series(([Timestamp('20110101'), Timestamp('20120101'), Timestamp('20130101')] * 1000))
5050

5151
def time_index_from_series_ctor(self):
52-
Index(self.s)
52+
Index(self.s)

asv_bench/benchmarks/frame_ctor.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1703,4 +1703,4 @@ def setup(self):
17031703
self.dict_list = [dict(zip(self.columns, row)) for row in self.frame.values]
17041704

17051705
def time_series_ctor_from_dict(self):
1706-
Series(self.some_dict)
1706+
Series(self.some_dict)

asv_bench/benchmarks/groupby.py

+26
Original file line numberDiff line numberDiff line change
@@ -548,6 +548,32 @@ def time_groupby_sum(self):
548548
self.df.groupby(['a'])['b'].sum()
549549

550550

551+
class groupby_period(object):
552+
# GH 14338
553+
goal_time = 0.2
554+
555+
def make_grouper(self, N):
556+
return pd.period_range('1900-01-01', freq='D', periods=N)
557+
558+
def setup(self):
559+
N = 10000
560+
self.grouper = self.make_grouper(N)
561+
self.df = pd.DataFrame(np.random.randn(N, 2))
562+
563+
def time_groupby_sum(self):
564+
self.df.groupby(self.grouper).sum()
565+
566+
567+
class groupby_datetime(groupby_period):
568+
def make_grouper(self, N):
569+
return pd.date_range('1900-01-01', freq='D', periods=N)
570+
571+
572+
class groupby_datetimetz(groupby_period):
573+
def make_grouper(self, N):
574+
return pd.date_range('1900-01-01', freq='D', periods=N,
575+
tz='US/Central')
576+
551577
#----------------------------------------------------------------------
552578
# Series.value_counts
553579

asv_bench/benchmarks/hdfstore_bench.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -348,4 +348,4 @@ def remove(self, f):
348348
try:
349349
os.remove(self.f)
350350
except:
351-
pass
351+
pass

asv_bench/benchmarks/index_object.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -344,4 +344,4 @@ def setup(self):
344344
self.mi = MultiIndex.from_product([self.level1, self.level2])
345345

346346
def time_multiindex_with_datetime_level_sliced(self):
347-
self.mi[:10].values
347+
self.mi[:10].values

asv_bench/benchmarks/io_sql.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -212,4 +212,4 @@ def setup(self):
212212
self.df = DataFrame({'float1': randn(10000), 'float2': randn(10000), 'string1': (['foo'] * 10000), 'bool1': ([True] * 10000), 'int1': np.random.randint(0, 100000, size=10000), }, index=self.index)
213213

214214
def time_sql_write_sqlalchemy(self):
215-
self.df.to_sql('test1', self.engine, if_exists='replace')
215+
self.df.to_sql('test1', self.engine, if_exists='replace')

asv_bench/benchmarks/packers.py

+25
Original file line numberDiff line numberDiff line change
@@ -547,6 +547,31 @@ def remove(self, f):
547547
pass
548548

549549

550+
class packers_write_json_lines(object):
551+
goal_time = 0.2
552+
553+
def setup(self):
554+
self.f = '__test__.msg'
555+
self.N = 100000
556+
self.C = 5
557+
self.index = date_range('20000101', periods=self.N, freq='H')
558+
self.df = DataFrame(dict([('float{0}'.format(i), randn(self.N)) for i in range(self.C)]), index=self.index)
559+
self.remove(self.f)
560+
self.df.index = np.arange(self.N)
561+
562+
def time_packers_write_json_lines(self):
563+
self.df.to_json(self.f, orient="records", lines=True)
564+
565+
def teardown(self):
566+
self.remove(self.f)
567+
568+
def remove(self, f):
569+
try:
570+
os.remove(self.f)
571+
except:
572+
pass
573+
574+
550575
class packers_write_json_T(object):
551576
goal_time = 0.2
552577

asv_bench/benchmarks/panel_ctor.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -61,4 +61,4 @@ def setup(self):
6161
self.data_frames[x] = self.df
6262

6363
def time_panel_from_dict_two_different_indexes(self):
64-
Panel.from_dict(self.data_frames)
64+
Panel.from_dict(self.data_frames)

asv_bench/benchmarks/panel_methods.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -53,4 +53,4 @@ def setup(self):
5353
self.panel = Panel(np.random.randn(100, len(self.index), 1000))
5454

5555
def time_panel_shift_minor(self):
56-
self.panel.shift(1, axis='minor')
56+
self.panel.shift(1, axis='minor')

asv_bench/benchmarks/replace.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -45,4 +45,4 @@ def setup(self):
4545
self.ts = Series(np.random.randn(self.N), index=self.rng)
4646

4747
def time_replace_replacena(self):
48-
self.ts.replace(np.nan, 0.0, inplace=True)
48+
self.ts.replace(np.nan, 0.0, inplace=True)

asv_bench/benchmarks/reshape.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -73,4 +73,4 @@ def setup(self):
7373
break
7474

7575
def time_unstack_sparse_keyspace(self):
76-
self.idf.unstack()
76+
self.idf.unstack()

asv_bench/benchmarks/stat_ops.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -258,4 +258,4 @@ def time_rolling_skew(self):
258258
rolling_skew(self.arr, self.win)
259259

260260
def time_rolling_kurt(self):
261-
rolling_kurt(self.arr, self.win)
261+
rolling_kurt(self.arr, self.win)

asv_bench/benchmarks/strings.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -390,4 +390,4 @@ def time_strings_upper(self):
390390
self.many.str.upper()
391391

392392
def make_series(self, letters, strlen, size):
393-
return Series([str(x) for x in np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))])
393+
return Series([str(x) for x in np.fromiter(IT.cycle(letters), count=(size * strlen), dtype='|S1').view('|S{}'.format(strlen))])

asv_bench/benchmarks/timedelta.py

+12-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from .pandas_vb_common import *
2-
from pandas import to_timedelta
2+
from pandas import to_timedelta, Timestamp
33

44

55
class timedelta_convert_int(object):
@@ -47,3 +47,14 @@ def time_timedelta_convert_coerce(self):
4747

4848
def time_timedelta_convert_ignore(self):
4949
to_timedelta(self.arr, errors='ignore')
50+
51+
52+
class timedelta_add_overflow(object):
53+
goal_time = 0.2
54+
55+
def setup(self):
56+
self.td = to_timedelta(np.arange(1000000))
57+
self.ts = Timestamp('2000')
58+
59+
def test_add_td_ts(self):
60+
self.td + self.ts

asv_bench/benchmarks/timeseries.py

+51-30
Original file line numberDiff line numberDiff line change
@@ -284,56 +284,77 @@ class timeseries_asof(object):
284284
goal_time = 0.2
285285

286286
def setup(self):
287-
self.N = 100000
288-
self.rng = date_range(start='1/1/2000', periods=self.N, freq='T')
289-
if hasattr(Series, 'convert'):
290-
Series.resample = Series.convert
291-
self.ts = Series(np.random.randn(self.N), index=self.rng)
292287
self.N = 10000
293288
self.rng = date_range(start='1/1/1990', periods=self.N, freq='53s')
294-
self.ts = Series(np.random.randn(self.N), index=self.rng)
295289
self.dates = date_range(start='1/1/1990', periods=(self.N * 10), freq='5s')
290+
self.ts = Series(np.random.randn(self.N), index=self.rng)
291+
self.ts2 = self.ts.copy()
292+
self.ts2[250:5000] = np.nan
293+
self.ts3 = self.ts.copy()
294+
self.ts3[-5000:] = np.nan
296295

297-
def time_timeseries_asof(self):
296+
# test speed of pre-computing NAs.
297+
def time_asof_list(self):
298298
self.ts.asof(self.dates)
299299

300+
# should be roughly the same as above.
301+
def time_asof_nan_list(self):
302+
self.ts2.asof(self.dates)
300303

301-
class timeseries_asof_nan(object):
302-
goal_time = 0.2
304+
# test speed of the code path for a scalar index
305+
# without *while* loop
306+
def time_asof_single(self):
307+
self.ts.asof(self.dates[0])
303308

304-
def setup(self):
305-
self.N = 100000
306-
self.rng = date_range(start='1/1/2000', periods=self.N, freq='T')
307-
if hasattr(Series, 'convert'):
308-
Series.resample = Series.convert
309-
self.ts = Series(np.random.randn(self.N), index=self.rng)
310-
self.N = 10000
311-
self.rng = date_range(start='1/1/1990', periods=self.N, freq='53s')
312-
self.ts = Series(np.random.randn(self.N), index=self.rng)
313-
self.dates = date_range(start='1/1/1990', periods=(self.N * 10), freq='5s')
314-
self.ts[250:5000] = np.nan
309+
# test speed of the code path for a scalar index
310+
# before the start. should be the same as above.
311+
def time_asof_single_early(self):
312+
self.ts.asof(self.dates[0] - dt.timedelta(10))
315313

316-
def time_timeseries_asof_nan(self):
317-
self.ts.asof(self.dates)
314+
# test the speed of the code path for a scalar index
315+
# with a long *while* loop. should still be much
316+
# faster than pre-computing all the NAs.
317+
def time_asof_nan_single(self):
318+
self.ts3.asof(self.dates[-1])
318319

319320

320-
class timeseries_asof_single(object):
321+
class timeseries_dataframe_asof(object):
321322
goal_time = 0.2
322323

323324
def setup(self):
324-
self.N = 100000
325-
self.rng = date_range(start='1/1/2000', periods=self.N, freq='T')
326-
if hasattr(Series, 'convert'):
327-
Series.resample = Series.convert
328-
self.ts = Series(np.random.randn(self.N), index=self.rng)
329325
self.N = 10000
326+
self.M = 100
330327
self.rng = date_range(start='1/1/1990', periods=self.N, freq='53s')
331-
self.ts = Series(np.random.randn(self.N), index=self.rng)
332328
self.dates = date_range(start='1/1/1990', periods=(self.N * 10), freq='5s')
329+
self.ts = DataFrame(np.random.randn(self.N, self.M), index=self.rng)
330+
self.ts2 = self.ts.copy()
331+
self.ts2.iloc[250:5000] = np.nan
332+
self.ts3 = self.ts.copy()
333+
self.ts3.iloc[-5000:] = np.nan
334+
335+
# test speed of pre-computing NAs.
336+
def time_asof_list(self):
337+
self.ts.asof(self.dates)
333338

334-
def time_timeseries_asof_single(self):
339+
# should be roughly the same as above.
340+
def time_asof_nan_list(self):
341+
self.ts2.asof(self.dates)
342+
343+
# test speed of the code path for a scalar index
344+
# with pre-computing all NAs.
345+
def time_asof_single(self):
335346
self.ts.asof(self.dates[0])
336347

348+
# should be roughly the same as above.
349+
def time_asof_nan_single(self):
350+
self.ts3.asof(self.dates[-1])
351+
352+
# test speed of the code path for a scalar index
353+
# before the start. should be without the cost of
354+
# pre-computing all the NAs.
355+
def time_asof_single_early(self):
356+
self.ts.asof(self.dates[0] - dt.timedelta(10))
357+
337358

338359
class timeseries_custom_bday_apply(object):
339360
goal_time = 0.2

ci/requirements-3.4.build

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
numpy=1.8.1
2-
cython
2+
cython=0.24.1
33
libgfortran=1.0

ci/travis_encrypt_gbq.sh

+5-6
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
11
#!/bin/bash
22

33
GBQ_JSON_FILE=$1
4-
GBQ_PROJECT_ID=$2
54

6-
if [[ $# -ne 2 ]]; then
5+
if [[ $# -ne 1 ]]; then
76
echo -e "Too few arguments.\nUsage: ./travis_encrypt_gbq.sh "\
8-
"<gbq-json-credentials-file> <gbq-project-id>"
7+
"<gbq-json-credentials-file>"
98
exit 1
109
fi
1110

@@ -23,9 +22,9 @@ echo "Encrypting $GBQ_JSON_FILE..."
2322
read -d "\n" TRAVIS_KEY TRAVIS_IV <<<$(travis encrypt-file $GBQ_JSON_FILE \
2423
travis_gbq.json.enc -f | grep -o "\w*_iv\|\w*_key");
2524

26-
echo "Adding your secure key and project id to travis_gbq_config.txt ..."
27-
echo -e "TRAVIS_IV_ENV=$TRAVIS_IV\nTRAVIS_KEY_ENV=$TRAVIS_KEY\n"\
28-
"GBQ_PROJECT_ID='$GBQ_PROJECT_ID'" > travis_gbq_config.txt
25+
echo "Adding your secure key to travis_gbq_config.txt ..."
26+
echo -e "TRAVIS_IV_ENV=$TRAVIS_IV\nTRAVIS_KEY_ENV=$TRAVIS_KEY"\
27+
> travis_gbq_config.txt
2928

3029
echo "Done. Removing file $GBQ_JSON_FILE"
3130
rm $GBQ_JSON_FILE

ci/travis_gbq_config.txt

-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,2 @@
11
TRAVIS_IV_ENV=encrypted_1d9d7b1f171b_iv
22
TRAVIS_KEY_ENV=encrypted_1d9d7b1f171b_key
3-
GBQ_PROJECT_ID='pandas-travis'

ci/travis_process_gbq_encryption.sh

+4-2
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,12 @@
22

33
source ci/travis_gbq_config.txt
44

5-
if [[ -n ${!TRAVIS_IV_ENV} ]]; then
5+
if [[ -n ${SERVICE_ACCOUNT_KEY} ]]; then
6+
echo "${SERVICE_ACCOUNT_KEY}" > ci/travis_gbq.json;
7+
elif [[ -n ${!TRAVIS_IV_ENV} ]]; then
68
openssl aes-256-cbc -K ${!TRAVIS_KEY_ENV} -iv ${!TRAVIS_IV_ENV} \
79
-in ci/travis_gbq.json.enc -out ci/travis_gbq.json -d;
8-
export GBQ_PROJECT_ID=$GBQ_PROJECT_ID;
10+
export GBQ_PROJECT_ID='pandas-travis';
911
echo 'Successfully decrypted gbq credentials'
1012
fi
1113

doc/README.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -155,9 +155,9 @@ Where to start?
155155
---------------
156156

157157
There are a number of issues listed under `Docs
158-
<https://github.com/pydata/pandas/issues?labels=Docs&sort=updated&state=open>`_
158+
<https://github.com/pandas-dev/pandas/issues?labels=Docs&sort=updated&state=open>`_
159159
and `Good as first PR
160-
<https://github.com/pydata/pandas/issues?labels=Good+as+first+PR&sort=updated&state=open>`_
160+
<https://github.com/pandas-dev/pandas/issues?labels=Good+as+first+PR&sort=updated&state=open>`_
161161
where you could start out.
162162

163163
Or maybe you have an idea of your own, by using pandas, looking for something

doc/_templates/autosummary/accessor_attribute.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@
33

44
.. currentmodule:: {{ module.split('.')[0] }}
55

6-
.. autoaccessorattribute:: {{ [module.split('.')[1], objname]|join('.') }}
6+
.. autoaccessorattribute:: {{ [module.split('.')[1], objname]|join('.') }}

doc/_templates/autosummary/accessor_method.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@
33

44
.. currentmodule:: {{ module.split('.')[0] }}
55

6-
.. autoaccessormethod:: {{ [module.split('.')[1], objname]|join('.') }}
6+
.. autoaccessormethod:: {{ [module.split('.')[1], objname]|join('.') }}

0 commit comments

Comments
 (0)