Skip to content

Commit 483706d

Browse files
committed
Merge tag 'v0.20.2' into releases
Version 0.20.2 * tag 'v0.20.2': (68 commits) RLS: v0.20.2 DOC: Update release.rst DOC: Whatsnew fixups (pandas-dev#16596) ERRR: Raise error in usecols when column doesn't exist but length matches (pandas-dev#16460) BUG: convert numpy strings in index names in HDF pandas-dev#13492 (pandas-dev#16444) PERF: vectorize _interp_limit (pandas-dev#16592) DOC: whatsnew 0.20.2 edits (pandas-dev#16587) API: Make is_strictly_monotonic_* private (pandas-dev#16576) BUG: reimplement MultiIndex.remove_unused_levels (pandas-dev#16565) Strictly monotonic (pandas-dev#16555) ENH: add .ngroup() method to groupby objects (pandas-dev#14026) (pandas-dev#14026) fix linting BUG: Incorrect handling of rolling.cov with offset window (pandas-dev#16244) BUG: select_as_multiple doesn't respect start/stop kwargs GH16209 (pandas-dev#16317) return empty MultiIndex for symmetrical difference on equal MultiIndexes (pandas-dev#16486) BUG: Bug in .resample() and .groupby() when aggregating on integers (pandas-dev#16549) BUG: Fixed tput output on windows (pandas-dev#16496) Strictly monotonic (pandas-dev#16555) BUG: fixed wrong order of ordered labels in pd.cut() BUG: Fixed to_html ignoring index_names parameter ...
2 parents b3f6bc7 + 2814061 commit 483706d

File tree

116 files changed

+2456
-574
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

116 files changed

+2456
-574
lines changed

.travis.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ after_success:
123123

124124
after_script:
125125
- echo "after_script start"
126-
- source activate pandas && python -c "import pandas; pandas.show_versions();"
126+
- source activate pandas && pushd /tmp && python -c "import pandas; pandas.show_versions();" && popd
127127
- if [ -e /tmp/single.xml ]; then
128128
ci/print_skipped.py /tmp/single.xml;
129129
fi

asv_bench/benchmarks/groupby.py

+9
Original file line numberDiff line numberDiff line change
@@ -368,6 +368,11 @@ def setup(self):
368368
self.dates = (np.datetime64('now') + self.offsets)
369369
self.df = DataFrame({'key1': np.random.randint(0, 500, size=self.n), 'key2': np.random.randint(0, 100, size=self.n), 'value1': np.random.randn(self.n), 'value2': np.random.randn(self.n), 'value3': np.random.randn(self.n), 'dates': self.dates, })
370370

371+
N = 1000000
372+
self.draws = pd.Series(np.random.randn(N))
373+
labels = pd.Series(['foo', 'bar', 'baz', 'qux'] * (N // 4))
374+
self.cats = labels.astype('category')
375+
371376
def time_groupby_multi_size(self):
372377
self.df.groupby(['key1', 'key2']).size()
373378

@@ -377,6 +382,10 @@ def time_groupby_dt_size(self):
377382
def time_groupby_dt_timegrouper_size(self):
378383
self.df.groupby(TimeGrouper(key='dates', freq='M')).size()
379384

385+
def time_groupby_size(self):
386+
self.draws.groupby(self.cats).size()
387+
388+
380389

381390
#----------------------------------------------------------------------
382391
# groupby with a variable value for ngroups

asv_bench/benchmarks/indexing.py

+40-4
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ def time_getitem_list_like(self):
1919
def time_getitem_array(self):
2020
self.s[np.arange(10000)]
2121

22+
def time_getitem_lists(self):
23+
self.s[np.arange(10000).tolist()]
24+
2225
def time_iloc_array(self):
2326
self.s.iloc[np.arange(10000)]
2427

@@ -190,10 +193,22 @@ def setup(self):
190193
np.arange(1000)], names=['one', 'two'])
191194

192195
import string
193-
self.mistring = MultiIndex.from_product(
194-
[np.arange(1000),
195-
np.arange(20), list(string.ascii_letters)],
196+
197+
self.mi_large = MultiIndex.from_product(
198+
[np.arange(1000), np.arange(20), list(string.ascii_letters)],
196199
names=['one', 'two', 'three'])
200+
self.mi_med = MultiIndex.from_product(
201+
[np.arange(1000), np.arange(10), list('A')],
202+
names=['one', 'two', 'three'])
203+
self.mi_small = MultiIndex.from_product(
204+
[np.arange(100), list('A'), list('A')],
205+
names=['one', 'two', 'three'])
206+
207+
rng = np.random.RandomState(4)
208+
size = 1 << 16
209+
self.mi_unused_levels = pd.MultiIndex.from_arrays([
210+
rng.randint(0, 1 << 13, size),
211+
rng.randint(0, 1 << 10, size)])[rng.rand(size) < 0.1]
197212

198213
def time_series_xs_mi_ix(self):
199214
self.s.ix[999]
@@ -215,12 +230,33 @@ def time_multiindex_get_indexer(self):
215230
(0, 16), (0, 17), (0, 18),
216231
(0, 19)], dtype=object))
217232

233+
def time_multiindex_large_get_loc(self):
234+
self.mi_large.get_loc((999, 19, 'Z'))
235+
236+
def time_multiindex_large_get_loc_warm(self):
237+
for _ in range(1000):
238+
self.mi_large.get_loc((999, 19, 'Z'))
239+
240+
def time_multiindex_med_get_loc(self):
241+
self.mi_med.get_loc((999, 9, 'A'))
242+
243+
def time_multiindex_med_get_loc_warm(self):
244+
for _ in range(1000):
245+
self.mi_med.get_loc((999, 9, 'A'))
246+
218247
def time_multiindex_string_get_loc(self):
219-
self.mistring.get_loc((999, 19, 'Z'))
248+
self.mi_small.get_loc((99, 'A', 'A'))
249+
250+
def time_multiindex_small_get_loc_warm(self):
251+
for _ in range(1000):
252+
self.mi_small.get_loc((99, 'A', 'A'))
220253

221254
def time_is_monotonic(self):
222255
self.miint.is_monotonic
223256

257+
def time_remove_unused_levels(self):
258+
self.mi_unused_levels.remove_unused_levels()
259+
224260

225261
class IntervalIndexing(object):
226262
goal_time = 0.2

asv_bench/benchmarks/series_methods.py

+11
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ def setup(self):
111111
def time_series_dropna_int64(self):
112112
self.s.dropna()
113113

114+
114115
class series_dropna_datetime(object):
115116
goal_time = 0.2
116117

@@ -120,3 +121,13 @@ def setup(self):
120121

121122
def time_series_dropna_datetime(self):
122123
self.s.dropna()
124+
125+
126+
class series_clip(object):
127+
goal_time = 0.2
128+
129+
def setup(self):
130+
self.s = pd.Series(np.random.randn(50))
131+
132+
def time_series_dropna_datetime(self):
133+
self.s.clip(0, 1)

ci/install_travis.sh

+16-11
Original file line numberDiff line numberDiff line change
@@ -119,15 +119,7 @@ if [ "$COVERAGE" ]; then
119119
fi
120120

121121
echo
122-
if [ "$BUILD_TEST" ]; then
123-
124-
# build & install testing
125-
echo ["Starting installation test."]
126-
bash ci/install_release_build.sh
127-
conda uninstall -y cython
128-
time pip install dist/*tar.gz || exit 1
129-
130-
else
122+
if [ -z "$BUILD_TEST" ]; then
131123

132124
# build but don't install
133125
echo "[build em]"
@@ -163,9 +155,22 @@ fi
163155
# w/o removing anything else
164156
echo
165157
echo "[removing installed pandas]"
166-
conda remove pandas --force
158+
conda remove pandas -y --force
167159

168-
if [ -z "$BUILD_TEST" ]; then
160+
if [ "$BUILD_TEST" ]; then
161+
162+
# remove any installation
163+
pip uninstall -y pandas
164+
conda list pandas
165+
pip list --format columns |grep pandas
166+
167+
# build & install testing
168+
echo ["building release"]
169+
bash scripts/build_dist_for_release.sh
170+
conda uninstall -y cython
171+
time pip install dist/*tar.gz || exit 1
172+
173+
else
169174

170175
# install our pandas
171176
echo

ci/requirements-3.5_OSX.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,4 @@ source activate pandas
44

55
echo "install 35_OSX"
66

7-
conda install -n pandas -c conda-forge feather-format
7+
conda install -n pandas -c conda-forge feather-format==0.3.1

ci/script_multi.sh

+13-7
Original file line numberDiff line numberDiff line change
@@ -19,20 +19,26 @@ export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 429496
1919
echo PYTHONHASHSEED=$PYTHONHASHSEED
2020

2121
if [ "$BUILD_TEST" ]; then
22-
echo "build-test"
22+
echo "[build-test]"
23+
24+
echo "[env]"
25+
pip list --format columns |grep pandas
26+
27+
echo "[running]"
2328
cd /tmp
24-
pwd
25-
conda list pandas
26-
echo "running"
27-
python -c "import pandas; pandas.test(['-n 2'])"
29+
unset PYTHONPATH
30+
python -c 'import pandas; pandas.test(["-n 2", "--skip-slow", "--skip-network", "-r xX", "-m not single"])'
31+
2832
elif [ "$DOC" ]; then
2933
echo "We are not running pytest as this is a doc-build"
34+
3035
elif [ "$COVERAGE" ]; then
3136
echo pytest -s -n 2 -m "not single" --cov=pandas --cov-report xml:/tmp/cov-multiple.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas
3237
pytest -s -n 2 -m "not single" --cov=pandas --cov-report xml:/tmp/cov-multiple.xml --junitxml=/tmp/multiple.xml $TEST_ARGS pandas
38+
3339
else
34-
echo pytest -n 2 -m "not single" --junitxml=/tmp/multiple.xml $TEST_ARGS pandas
35-
pytest -n 2 -m "not single" --junitxml=/tmp/multiple.xml $TEST_ARGS pandas # TODO: doctest
40+
echo pytest -n 2 -r xX -m "not single" --junitxml=/tmp/multiple.xml $TEST_ARGS pandas
41+
pytest -n 2 -r xX -m "not single" --junitxml=/tmp/multiple.xml $TEST_ARGS pandas # TODO: doctest
3642
fi
3743

3844
RET="$?"

ci/script_single.sh

+2-2
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ elif [ "$COVERAGE" ]; then
2020
echo pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas
2121
pytest -s -m "single" --cov=pandas --cov-report xml:/tmp/cov-single.xml --junitxml=/tmp/single.xml $TEST_ARGS pandas
2222
else
23-
echo pytest -m "single" --junitxml=/tmp/single.xml $TEST_ARGS pandas
24-
pytest -m "single" --junitxml=/tmp/single.xml $TEST_ARGS pandas # TODO: doctest
23+
echo pytest -m "single" -r xX --junitxml=/tmp/single.xml $TEST_ARGS pandas
24+
pytest -m "single" -r xX --junitxml=/tmp/single.xml $TEST_ARGS pandas # TODO: doctest
2525
fi
2626

2727
RET="$?"

doc/make.py

+24-11
Original file line numberDiff line numberDiff line change
@@ -34,47 +34,60 @@
3434
SPHINX_BUILD = 'sphinxbuild'
3535

3636

37-
def upload_dev(user='pandas'):
37+
def _process_user(user):
38+
if user is None or user is False:
39+
user = ''
40+
else:
41+
user = user + '@'
42+
return user
43+
44+
45+
def upload_dev(user=None):
3846
'push a copy to the pydata dev directory'
39-
if os.system('cd build/html; rsync -avz . {0}@pandas.pydata.org'
47+
user = _process_user(user)
48+
if os.system('cd build/html; rsync -avz . {0}pandas.pydata.org'
4049
':/usr/share/nginx/pandas/pandas-docs/dev/ -essh'.format(user)):
4150
raise SystemExit('Upload to Pydata Dev failed')
4251

4352

44-
def upload_dev_pdf(user='pandas'):
53+
def upload_dev_pdf(user=None):
4554
'push a copy to the pydata dev directory'
46-
if os.system('cd build/latex; scp pandas.pdf {0}@pandas.pydata.org'
55+
user = _process_user(user)
56+
if os.system('cd build/latex; scp pandas.pdf {0}pandas.pydata.org'
4757
':/usr/share/nginx/pandas/pandas-docs/dev/'.format(user)):
4858
raise SystemExit('PDF upload to Pydata Dev failed')
4959

5060

51-
def upload_stable(user='pandas'):
61+
def upload_stable(user=None):
5262
'push a copy to the pydata stable directory'
53-
if os.system('cd build/html; rsync -avz . {0}@pandas.pydata.org'
63+
user = _process_user(user)
64+
if os.system('cd build/html; rsync -avz . {0}pandas.pydata.org'
5465
':/usr/share/nginx/pandas/pandas-docs/stable/ -essh'.format(user)):
5566
raise SystemExit('Upload to stable failed')
5667

5768

58-
def upload_stable_pdf(user='pandas'):
69+
def upload_stable_pdf(user=None):
5970
'push a copy to the pydata dev directory'
60-
if os.system('cd build/latex; scp pandas.pdf {0}@pandas.pydata.org'
71+
user = _process_user(user)
72+
if os.system('cd build/latex; scp pandas.pdf {0}pandas.pydata.org'
6173
':/usr/share/nginx/pandas/pandas-docs/stable/'.format(user)):
6274
raise SystemExit('PDF upload to stable failed')
6375

6476

65-
def upload_prev(ver, doc_root='./', user='pandas'):
77+
def upload_prev(ver, doc_root='./', user=None):
6678
'push a copy of older release to appropriate version directory'
79+
user = _process_user(user)
6780
local_dir = doc_root + 'build/html'
6881
remote_dir = '/usr/share/nginx/pandas/pandas-docs/version/%s/' % ver
69-
cmd = 'cd %s; rsync -avz . %s@pandas.pydata.org:%s -essh'
82+
cmd = 'cd %s; rsync -avz . %spandas.pydata.org:%s -essh'
7083
cmd = cmd % (local_dir, user, remote_dir)
7184
print(cmd)
7285
if os.system(cmd):
7386
raise SystemExit(
7487
'Upload to %s from %s failed' % (remote_dir, local_dir))
7588

7689
local_dir = doc_root + 'build/latex'
77-
pdf_cmd = 'cd %s; scp pandas.pdf %s@pandas.pydata.org:%s'
90+
pdf_cmd = 'cd %s; scp pandas.pdf %spandas.pydata.org:%s'
7891
pdf_cmd = pdf_cmd % (local_dir, user, remote_dir)
7992
if os.system(pdf_cmd):
8093
raise SystemExit('Upload PDF to %s from %s failed' % (ver, doc_root))

doc/source/advanced.rst

+10
Original file line numberDiff line numberDiff line change
@@ -948,6 +948,16 @@ On the other hand, if the index is not monotonic, then both slice bounds must be
948948
In [11]: df.loc[2:3, :]
949949
KeyError: 'Cannot get right slice bound for non-unique label: 3'
950950
951+
:meth:`Index.is_monotonic_increasing` and :meth:`Index.is_monotonic_decreasing` only check that
952+
an index is weakly monotonic. To check for strict montonicity, you can combine one of those with
953+
:meth:`Index.is_unique`
954+
955+
.. ipython:: python
956+
957+
weakly_monotonic = pd.Index(['a', 'b', 'c', 'c'])
958+
weakly_monotonic
959+
weakly_monotonic.is_monotonic_increasing
960+
weakly_monotonic.is_monotonic_increasing & weakly_monotonic.is_unique
951961
952962
Endpoints are inclusive
953963
~~~~~~~~~~~~~~~~~~~~~~~

doc/source/api.rst

+2
Original file line numberDiff line numberDiff line change
@@ -724,6 +724,7 @@ Serialization / IO / Conversion
724724
Series.to_dense
725725
Series.to_string
726726
Series.to_clipboard
727+
Series.to_latex
727728

728729
Sparse
729730
~~~~~~
@@ -1704,6 +1705,7 @@ Computations / Descriptive Stats
17041705
GroupBy.mean
17051706
GroupBy.median
17061707
GroupBy.min
1708+
GroupBy.ngroup
17071709
GroupBy.nth
17081710
GroupBy.ohlc
17091711
GroupBy.prod

doc/source/categorical.rst

+8
Original file line numberDiff line numberDiff line change
@@ -453,6 +453,14 @@ the original values:
453453
454454
np.asarray(cat) > base
455455
456+
When you compare two unordered categoricals with the same categories, the order is not considered:
457+
458+
.. ipython:: python
459+
460+
c1 = pd.Categorical(['a', 'b'], categories=['a', 'b'], ordered=False)
461+
c2 = pd.Categorical(['a', 'b'], categories=['b', 'a'], ordered=False)
462+
c1 == c2
463+
456464
Operations
457465
----------
458466

0 commit comments

Comments
 (0)