Skip to content

Commit 1114a86

Browse files
committed
Merge remote-tracking branch 'upstream/master' into categorical-bool-fixed
2 parents 9c79c72 + 2b13605 commit 1114a86

File tree

430 files changed

+25835
-11913
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

430 files changed

+25835
-11913
lines changed

.travis.yml

+5
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,11 @@ matrix:
3535
language: generic
3636
env:
3737
- JOB="3.5, OSX" ENV_FILE="ci/travis-35-osx.yaml" TEST_ARGS="--skip-slow --skip-network"
38+
39+
- dist: trusty
40+
env:
41+
- JOB="3.7" ENV_FILE="ci/travis-37.yaml" TEST_ARGS="--skip-slow --skip-network"
42+
3843
- dist: trusty
3944
env:
4045
- JOB="2.7, locale, slow, old NumPy" ENV_FILE="ci/travis-27-locale.yaml" LOCALE_OVERRIDE="zh_CN.UTF-8" SLOW=true

MANIFEST.in

+23-11
Original file line numberDiff line numberDiff line change
@@ -3,27 +3,39 @@ include LICENSE
33
include RELEASE.md
44
include README.md
55
include setup.py
6-
include pyproject.toml
76

87
graft doc
98
prune doc/build
109

10+
graft LICENSES
11+
1112
graft pandas
1213

13-
global-exclude *.so
14-
global-exclude *.pyd
14+
global-exclude *.bz2
15+
global-exclude *.csv
16+
global-exclude *.dta
17+
global-exclude *.gz
18+
global-exclude *.h5
19+
global-exclude *.html
20+
global-exclude *.json
21+
global-exclude *.msgpack
22+
global-exclude *.pickle
23+
global-exclude *.png
1524
global-exclude *.pyc
25+
global-exclude *.pyd
26+
global-exclude *.sas7bdat
27+
global-exclude *.so
28+
global-exclude *.xls
29+
global-exclude *.xlsm
30+
global-exclude *.xlsx
31+
global-exclude *.xpt
32+
global-exclude *.xz
33+
global-exclude *.zip
1634
global-exclude *~
17-
global-exclude \#*
18-
global-exclude .git*
1935
global-exclude .DS_Store
20-
global-exclude *.png
36+
global-exclude .git*
37+
global-exclude \#*
2138

22-
# include examples/data/*
23-
# recursive-include examples *.py
24-
# recursive-include doc/source *
25-
# recursive-include doc/sphinxext *
26-
# recursive-include LICENSES *
2739
include versioneer.py
2840
include pandas/_version.py
2941
include pandas/io/formats/templates/*.tpl

Makefile

+1
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,4 @@ doc:
2323
cd doc; \
2424
python make.py clean; \
2525
python make.py html
26+
python make.py spellcheck

README.md

+5-5
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
<tr>
1111
<td>Latest Release</td>
1212
<td>
13-
<a href="https://pypi.python.org/pypi/pandas/">
13+
<a href="https://pypi.org/project/pandas/">
1414
<img src="https://img.shields.io/pypi/v/pandas.svg" alt="latest release" />
1515
</a>
1616
</td>
@@ -25,7 +25,7 @@
2525
<tr>
2626
<td>Package Status</td>
2727
<td>
28-
<a href="https://pypi.python.org/pypi/pandas/">
28+
<a href="https://pypi.org/project/pandas/">
2929
<img src="https://img.shields.io/pypi/status/pandas.svg" alt="status" /></td>
3030
</a>
3131
</tr>
@@ -158,7 +158,7 @@ The source code is currently hosted on GitHub at:
158158
https://github.com/pandas-dev/pandas
159159

160160
Binary installers for the latest released version are available at the [Python
161-
package index](https://pypi.python.org/pypi/pandas) and on conda.
161+
package index](https://pypi.org/project/pandas) and on conda.
162162

163163
```sh
164164
# conda
@@ -171,7 +171,7 @@ pip install pandas
171171
```
172172

173173
## Dependencies
174-
- [NumPy](http://www.numpy.org): 1.9.0 or higher
174+
- [NumPy](https://www.numpy.org): 1.9.0 or higher
175175
- [python-dateutil](https://labix.org/python-dateutil): 2.5.0 or higher
176176
- [pytz](https://pythonhosted.org/pytz): 2011k or higher
177177

@@ -233,7 +233,7 @@ All contributions, bug reports, bug fixes, documentation improvements, enhanceme
233233

234234
A detailed overview on how to contribute can be found in the **[contributing guide.](https://pandas.pydata.org/pandas-docs/stable/contributing.html)**
235235

236-
If you are simply looking to start working with the pandas codebase, navigate to the [GitHub “issues” tab](https://github.com/pandas-dev/pandas/issues) and start looking through interesting issues. There are a number of issues listed under [Docs](https://github.com/pandas-dev/pandas/issues?labels=Docs&sort=updated&state=open) and [Difficulty Novice](https://github.com/pandas-dev/pandas/issues?q=is%3Aopen+is%3Aissue+label%3A%22Difficulty+Novice%22) where you could start out.
236+
If you are simply looking to start working with the pandas codebase, navigate to the [GitHub “issues” tab](https://github.com/pandas-dev/pandas/issues) and start looking through interesting issues. There are a number of issues listed under [Docs](https://github.com/pandas-dev/pandas/issues?labels=Docs&sort=updated&state=open) and [good first issue](https://github.com/pandas-dev/pandas/issues?labels=good+first+issue&sort=updated&state=open) where you could start out.
237237

238238
You can also triage issues which may include reproducing bug reports, or asking for vital information such as version numbers or reproduction instructions. If you would like to start triaging issues, one easy way to get started is to [subscribe to pandas on CodeTriage](https://www.codetriage.com/pandas-dev/pandas).
239239

asv_bench/benchmarks/categoricals.py

+76
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ def setup(self):
5151

5252
self.values_some_nan = list(np.tile(self.categories + [np.nan], N))
5353
self.values_all_nan = [np.nan] * len(self.values)
54+
self.values_all_int8 = np.ones(N, 'int8')
5455

5556
def time_regular(self):
5657
pd.Categorical(self.values, self.categories)
@@ -70,6 +71,9 @@ def time_with_nan(self):
7071
def time_all_nan(self):
7172
pd.Categorical(self.values_all_nan)
7273

74+
def time_from_codes_all_int8(self):
75+
pd.Categorical.from_codes(self.values_all_int8, self.categories)
76+
7377

7478
class ValueCounts(object):
7579

@@ -169,3 +173,75 @@ def setup(self, dtype):
169173

170174
def time_isin_categorical(self, dtype):
171175
self.series.isin(self.sample)
176+
177+
178+
class IsMonotonic(object):
179+
180+
def setup(self):
181+
N = 1000
182+
self.c = pd.CategoricalIndex(list('a' * N + 'b' * N + 'c' * N))
183+
self.s = pd.Series(self.c)
184+
185+
def time_categorical_index_is_monotonic_increasing(self):
186+
self.c.is_monotonic_increasing
187+
188+
def time_categorical_index_is_monotonic_decreasing(self):
189+
self.c.is_monotonic_decreasing
190+
191+
def time_categorical_series_is_monotonic_increasing(self):
192+
self.s.is_monotonic_increasing
193+
194+
def time_categorical_series_is_monotonic_decreasing(self):
195+
self.s.is_monotonic_decreasing
196+
197+
198+
class Contains(object):
199+
200+
goal_time = 0.2
201+
202+
def setup(self):
203+
N = 10**5
204+
self.ci = tm.makeCategoricalIndex(N)
205+
self.c = self.ci.values
206+
self.key = self.ci.categories[0]
207+
208+
def time_categorical_index_contains(self):
209+
self.key in self.ci
210+
211+
def time_categorical_contains(self):
212+
self.key in self.c
213+
214+
215+
class CategoricalSlicing(object):
216+
217+
goal_time = 0.2
218+
params = ['monotonic_incr', 'monotonic_decr', 'non_monotonic']
219+
param_names = ['index']
220+
221+
def setup(self, index):
222+
N = 10**6
223+
values = list('a' * N + 'b' * N + 'c' * N)
224+
indices = {
225+
'monotonic_incr': pd.Categorical(values),
226+
'monotonic_decr': pd.Categorical(reversed(values)),
227+
'non_monotonic': pd.Categorical(list('abc' * N))}
228+
self.data = indices[index]
229+
230+
self.scalar = 10000
231+
self.list = list(range(10000))
232+
self.cat_scalar = 'b'
233+
234+
def time_getitem_scalar(self, index):
235+
self.data[self.scalar]
236+
237+
def time_getitem_slice(self, index):
238+
self.data[:self.scalar]
239+
240+
def time_getitem_list_like(self, index):
241+
self.data[[self.scalar]]
242+
243+
def time_getitem_list(self, index):
244+
self.data[self.list]
245+
246+
def time_getitem_bool_array(self, index):
247+
self.data[self.data == self.cat_scalar]

asv_bench/benchmarks/frame_methods.py

+19-1
Original file line numberDiff line numberDiff line change
@@ -501,7 +501,7 @@ def time_info(self):
501501
class NSort(object):
502502

503503
goal_time = 0.2
504-
params = ['first', 'last']
504+
params = ['first', 'last', 'all']
505505
param_names = ['keep']
506506

507507
def setup(self, keep):
@@ -512,3 +512,21 @@ def time_nlargest(self, keep):
512512

513513
def time_nsmallest(self, keep):
514514
self.df.nsmallest(100, 'A', keep=keep)
515+
516+
517+
class Describe(object):
518+
519+
goal_time = 0.2
520+
521+
def setup(self):
522+
self.df = DataFrame({
523+
'a': np.random.randint(0, 100, int(1e6)),
524+
'b': np.random.randint(0, 100, int(1e6)),
525+
'c': np.random.randint(0, 100, int(1e6))
526+
})
527+
528+
def time_series_describe(self):
529+
self.df['a'].describe()
530+
531+
def time_dataframe_describe(self):
532+
self.df.describe()

asv_bench/benchmarks/groupby.py

+20-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
import numpy as np
77
from pandas import (DataFrame, Series, MultiIndex, date_range, period_range,
8-
TimeGrouper, Categorical)
8+
TimeGrouper, Categorical, Timestamp)
99
import pandas.util.testing as tm
1010

1111
from .pandas_vb_common import setup # noqa
@@ -385,6 +385,25 @@ def time_dtype_as_field(self, dtype, method, application):
385385
self.as_field_method()
386386

387387

388+
class RankWithTies(object):
389+
# GH 21237
390+
goal_time = 0.2
391+
param_names = ['dtype', 'tie_method']
392+
params = [['float64', 'float32', 'int64', 'datetime64'],
393+
['first', 'average', 'dense', 'min', 'max']]
394+
395+
def setup(self, dtype, tie_method):
396+
N = 10**4
397+
if dtype == 'datetime64':
398+
data = np.array([Timestamp("2011/01/01")] * N, dtype=dtype)
399+
else:
400+
data = np.array([1] * N, dtype=dtype)
401+
self.df = DataFrame({'values': data, 'key': ['foo'] * N})
402+
403+
def time_rank_ties(self, dtype, tie_method):
404+
self.df.groupby('key').rank(method=tie_method)
405+
406+
388407
class Float32(object):
389408
# GH 13335
390409
goal_time = 0.2

asv_bench/benchmarks/indexing.py

+45-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
import numpy as np
44
import pandas.util.testing as tm
55
from pandas import (Series, DataFrame, MultiIndex, Int64Index, Float64Index,
6-
IntervalIndex, IndexSlice, concat, date_range)
6+
IntervalIndex, CategoricalIndex,
7+
IndexSlice, concat, date_range)
78
from .pandas_vb_common import setup, Panel # noqa
89

910

@@ -230,6 +231,49 @@ def time_loc_list(self, monotonic):
230231
monotonic.loc[80000:]
231232

232233

234+
class CategoricalIndexIndexing(object):
235+
236+
goal_time = 0.2
237+
params = ['monotonic_incr', 'monotonic_decr', 'non_monotonic']
238+
param_names = ['index']
239+
240+
def setup(self, index):
241+
N = 10**5
242+
values = list('a' * N + 'b' * N + 'c' * N)
243+
indices = {
244+
'monotonic_incr': CategoricalIndex(values),
245+
'monotonic_decr': CategoricalIndex(reversed(values)),
246+
'non_monotonic': CategoricalIndex(list('abc' * N))}
247+
self.data = indices[index]
248+
249+
self.int_scalar = 10000
250+
self.int_list = list(range(10000))
251+
252+
self.cat_scalar = 'b'
253+
self.cat_list = ['a', 'c']
254+
255+
def time_getitem_scalar(self, index):
256+
self.data[self.int_scalar]
257+
258+
def time_getitem_slice(self, index):
259+
self.data[:self.int_scalar]
260+
261+
def time_getitem_list_like(self, index):
262+
self.data[[self.int_scalar]]
263+
264+
def time_getitem_list(self, index):
265+
self.data[self.int_list]
266+
267+
def time_getitem_bool_array(self, index):
268+
self.data[self.data == self.cat_scalar]
269+
270+
def time_get_loc_scalar(self, index):
271+
self.data.get_loc(self.cat_scalar)
272+
273+
def time_get_indexer_list(self, index):
274+
self.data.get_indexer(self.cat_list)
275+
276+
233277
class PanelIndexing(object):
234278

235279
goal_time = 0.2

asv_bench/benchmarks/pandas_vb_common.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,7 @@
22
from importlib import import_module
33

44
import numpy as np
5-
try:
6-
from pandas import Panel
7-
except ImportError:
8-
from pandas import WidePanel as Panel # noqa
5+
from pandas import Panel
96

107
# Compatibility import for lib
118
for imp in ['pandas._libs.lib', 'pandas.lib']:

asv_bench/benchmarks/period.py

+5
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,11 @@ def setup(self):
6464
def time_setitem_period_column(self):
6565
self.df['col'] = self.rng
6666

67+
def time_set_index(self):
68+
# GH#21582 limited by comparisons of Period objects
69+
self.df['col2'] = self.rng
70+
self.df.set_index('col2', append=True)
71+
6772

6873
class Algorithms(object):
6974

asv_bench/benchmarks/series_methods.py

+14-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def time_isin(self, dtypes):
4141
class NSort(object):
4242

4343
goal_time = 0.2
44-
params = ['last', 'first']
44+
params = ['first', 'last', 'all']
4545
param_names = ['keep']
4646

4747
def setup(self, keep):
@@ -121,3 +121,16 @@ def setup(self):
121121

122122
def time_dir_strings(self):
123123
dir(self.s)
124+
125+
126+
class SeriesGetattr(object):
127+
# https://github.com/pandas-dev/pandas/issues/19764
128+
goal_time = 0.2
129+
130+
def setup(self):
131+
self.s = Series(1,
132+
index=date_range("2012-01-01", freq='s',
133+
periods=int(1e6)))
134+
135+
def time_series_datetimeindex_repr(self):
136+
getattr(self.s, 'a', None)

0 commit comments

Comments
 (0)