Skip to content

Commit 4a6f6ad

Browse files
authored
Merge branch 'master' into parquet-categorical
2 parents a7c414d + b6cb1b3 commit 4a6f6ad

File tree

426 files changed

+12108
-15156
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

426 files changed

+12108
-15156
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ dist
5757
# wheel files
5858
*.whl
5959
**/wheelhouse/*
60+
pip-wheel-metadata
6061
# coverage
6162
.coverage
6263
coverage.xml

.pre-commit-config.yaml

+17-16
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,18 @@
11
repos:
2-
- repo: https://github.com/python/black
3-
rev: stable
4-
hooks:
5-
- id: black
6-
language_version: python3.7
7-
- repo: https://gitlab.com/pycqa/flake8
8-
rev: 3.7.7
9-
hooks:
10-
- id: flake8
11-
language: python_venv
12-
additional_dependencies: [flake8-comprehensions]
13-
- repo: https://github.com/pre-commit/mirrors-isort
14-
rev: v4.3.20
15-
hooks:
16-
- id: isort
17-
language: python_venv
2+
- repo: https://github.com/python/black
3+
rev: stable
4+
hooks:
5+
- id: black
6+
language_version: python3.7
7+
- repo: https://gitlab.com/pycqa/flake8
8+
rev: 3.7.7
9+
hooks:
10+
- id: flake8
11+
language: python_venv
12+
additional_dependencies: [flake8-comprehensions]
13+
- repo: https://github.com/pre-commit/mirrors-isort
14+
rev: v4.3.20
15+
hooks:
16+
- id: isort
17+
language: python_venv
18+
exclude: ^pandas/__init__\.py$|^pandas/core/api\.py$

MANIFEST.in

+5
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ include LICENSE
33
include RELEASE.md
44
include README.md
55
include setup.py
6+
include pyproject.toml
67

78
graft doc
89
prune doc/build
@@ -14,6 +15,7 @@ graft pandas
1415
global-exclude *.bz2
1516
global-exclude *.csv
1617
global-exclude *.dta
18+
global-exclude *.feather
1719
global-exclude *.gz
1820
global-exclude *.h5
1921
global-exclude *.html
@@ -23,7 +25,10 @@ global-exclude *.pickle
2325
global-exclude *.png
2426
global-exclude *.pyc
2527
global-exclude *.pyd
28+
global-exclude *.ods
29+
global-exclude *.odt
2630
global-exclude *.sas7bdat
31+
global-exclude *.sav
2732
global-exclude *.so
2833
global-exclude *.xls
2934
global-exclude *.xlsm

Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ black:
1818
black . --exclude '(asv_bench/env|\.egg|\.git|\.hg|\.mypy_cache|\.nox|\.tox|\.venv|_build|buck-out|build|dist|setup.py)'
1919

2020
develop: build
21-
python setup.py develop
21+
python -m pip install --no-build-isolation -e .
2222

2323
doc:
2424
-rm -rf doc/build doc/source/generated

README.md

+7-6
Original file line numberDiff line numberDiff line change
@@ -188,16 +188,17 @@ python setup.py install
188188

189189
or for installing in [development mode](https://pip.pypa.io/en/latest/reference/pip_install.html#editable-installs):
190190

191+
191192
```sh
192-
python setup.py develop
193+
python -m pip install --no-build-isolation -e .
193194
```
194195

195-
Alternatively, you can use `pip` if you want all the dependencies pulled
196-
in automatically (the `-e` option is for installing it in [development
197-
mode](https://pip.pypa.io/en/latest/reference/pip_install.html#editable-installs)):
196+
If you have `make`, you can also use `make develop` to run the same command.
197+
198+
or alternatively
198199

199200
```sh
200-
pip install -e .
201+
python setup.py develop
201202
```
202203

203204
See the full instructions for [installing from source](https://pandas.pydata.org/pandas-docs/stable/install.html#installing-from-source).
@@ -224,7 +225,7 @@ Most development discussion is taking place on github in this repo. Further, the
224225

225226
All contributions, bug reports, bug fixes, documentation improvements, enhancements and ideas are welcome.
226227

227-
A detailed overview on how to contribute can be found in the **[contributing guide](https://dev.pandas.io/contributing.html)**. There is also an [overview](.github/CONTRIBUTING.md) on GitHub.
228+
A detailed overview on how to contribute can be found in the **[contributing guide](https://dev.pandas.io/docs/contributing.html)**. There is also an [overview](.github/CONTRIBUTING.md) on GitHub.
228229

229230
If you are simply looking to start working with the pandas codebase, navigate to the [GitHub "issues" tab](https://github.com/pandas-dev/pandas/issues) and start looking through interesting issues. There are a number of issues listed under [Docs](https://github.com/pandas-dev/pandas/issues?labels=Docs&sort=updated&state=open) and [good first issue](https://github.com/pandas-dev/pandas/issues?labels=good+first+issue&sort=updated&state=open) where you could start out.
230231

asv_bench/asv.conf.json

+2-1
Original file line numberDiff line numberDiff line change
@@ -50,12 +50,13 @@
5050
"xlsxwriter": [],
5151
"xlrd": [],
5252
"xlwt": [],
53+
"odfpy": [],
5354
"pytest": [],
5455
// If using Windows with python 2.7 and want to build using the
5556
// mingw toolchain (rather than MSVC), uncomment the following line.
5657
// "libpython": [],
5758
},
58-
59+
"conda_channels": ["defaults", "conda-forge"],
5960
// Combinations of libraries/python versions can be excluded/included
6061
// from the set to test. Each entry is a dictionary containing additional
6162
// key-value pairs to include/exclude.

asv_bench/benchmarks/attrs_caching.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import numpy as np
2+
23
from pandas import DataFrame
34

45
try:
@@ -32,4 +33,4 @@ def time_cache_readonly(self):
3233
self.obj.prop
3334

3435

35-
from .pandas_vb_common import setup # noqa: F401
36+
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/binary_ops.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import numpy as np
2+
23
from pandas import DataFrame, Series, date_range
34
from pandas.core.algorithms import checked_add_with_arr
45

@@ -155,4 +156,4 @@ def time_add_overflow_both_arg_nan(self):
155156
)
156157

157158

158-
from .pandas_vb_common import setup # noqa: F401
159+
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/categoricals.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1+
import warnings
2+
13
import numpy as np
4+
25
import pandas as pd
36
import pandas.util.testing as tm
4-
import warnings
57

68
try:
79
from pandas.api.types import union_categoricals
@@ -280,4 +282,4 @@ def time_sort_values(self):
280282
self.index.sort_values(ascending=False)
281283

282284

283-
from .pandas_vb_common import setup # noqa: F401
285+
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/ctors.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import numpy as np
2+
3+
from pandas import DatetimeIndex, Index, MultiIndex, Series, Timestamp
24
import pandas.util.testing as tm
3-
from pandas import Series, Index, DatetimeIndex, Timestamp, MultiIndex
45

56

67
def no_change(arr):
@@ -113,4 +114,4 @@ def time_multiindex_from_iterables(self):
113114
MultiIndex.from_product(self.iterables)
114115

115116

116-
from .pandas_vb_common import setup # noqa: F401
117+
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/dtypes.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
1+
import numpy as np
2+
13
from pandas.api.types import pandas_dtype
24

3-
import numpy as np
45
from .pandas_vb_common import (
5-
numeric_dtypes,
66
datetime_dtypes,
7-
string_dtypes,
87
extension_dtypes,
8+
numeric_dtypes,
9+
string_dtypes,
910
)
1011

11-
1212
_numpy_dtypes = [
1313
np.dtype(dtype) for dtype in (numeric_dtypes + datetime_dtypes + string_dtypes)
1414
]
@@ -40,4 +40,4 @@ def time_pandas_dtype_invalid(self, dtype):
4040
pass
4141

4242

43-
from .pandas_vb_common import setup # noqa: F401
43+
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/eval.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import numpy as np
2+
23
import pandas as pd
34

45
try:
@@ -62,4 +63,4 @@ def time_query_with_boolean_selection(self):
6263
self.df.query("(a >= @self.min_val) & (a <= @self.max_val)")
6364

6465

65-
from .pandas_vb_common import setup # noqa: F401
66+
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/frame_ctor.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import numpy as np
2+
3+
from pandas import DataFrame, MultiIndex, Series, Timestamp, date_range
24
import pandas.util.testing as tm
3-
from pandas import DataFrame, Series, MultiIndex, Timestamp, date_range
45

56
try:
67
from pandas.tseries.offsets import Nano, Hour
@@ -104,4 +105,4 @@ def time_frame_from_lists(self):
104105
self.df = DataFrame(self.data)
105106

106107

107-
from .pandas_vb_common import setup # noqa: F401
108+
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/frame_methods.py

+13-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
import warnings
21
import string
2+
import warnings
33

44
import numpy as np
55

@@ -609,4 +609,15 @@ def time_dataframe_describe(self):
609609
self.df.describe()
610610

611611

612-
from .pandas_vb_common import setup # noqa: F401
612+
class SelectDtypes:
613+
params = [100, 1000]
614+
param_names = ["n"]
615+
616+
def setup(self, n):
617+
self.df = DataFrame(np.random.randn(10, n))
618+
619+
def time_select_dtypes(self, n):
620+
self.df.select_dtypes(include="int")
621+
622+
623+
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/gil.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import numpy as np
2-
import pandas.util.testing as tm
3-
from pandas import DataFrame, Series, read_csv, factorize, date_range
2+
3+
from pandas import DataFrame, Series, date_range, factorize, read_csv
44
from pandas.core.algorithms import take_1d
5+
import pandas.util.testing as tm
56

67
try:
78
from pandas import (
@@ -36,7 +37,7 @@ def wrapper(fname):
3637
return wrapper
3738

3839

39-
from .pandas_vb_common import BaseIO
40+
from .pandas_vb_common import BaseIO # noqa: E402 isort:skip
4041

4142

4243
class ParallelGroupbyMethods:
@@ -301,4 +302,4 @@ def time_loop(self, threads):
301302
self.loop()
302303

303304

304-
from .pandas_vb_common import setup # noqa: F401
305+
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/groupby.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
)
1616
import pandas.util.testing as tm
1717

18-
1918
method_blacklist = {
2019
"object": {
2120
"median",
@@ -626,4 +625,4 @@ def time_first(self):
626625
self.df_nans.groupby("key").transform("first")
627626

628627

629-
from .pandas_vb_common import setup # noqa: F401
628+
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/index_object.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,17 @@
11
import gc
2+
23
import numpy as np
3-
import pandas.util.testing as tm
4+
45
from pandas import (
5-
Series,
6-
date_range,
76
DatetimeIndex,
8-
Index,
9-
RangeIndex,
107
Float64Index,
8+
Index,
119
IntervalIndex,
10+
RangeIndex,
11+
Series,
12+
date_range,
1213
)
14+
import pandas.util.testing as tm
1315

1416

1517
class SetOperations:
@@ -243,4 +245,4 @@ def peakmem_gc_instances(self, N):
243245
gc.enable()
244246

245247

246-
from .pandas_vb_common import setup # noqa: F401
248+
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/indexing.py

+9-8
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,23 @@
11
import warnings
22

33
import numpy as np
4-
import pandas.util.testing as tm
4+
55
from pandas import (
6-
Series,
6+
CategoricalIndex,
77
DataFrame,
8-
MultiIndex,
9-
Int64Index,
10-
UInt64Index,
118
Float64Index,
12-
IntervalIndex,
13-
CategoricalIndex,
149
IndexSlice,
10+
Int64Index,
11+
IntervalIndex,
12+
MultiIndex,
13+
Series,
14+
UInt64Index,
1515
concat,
1616
date_range,
1717
option_context,
1818
period_range,
1919
)
20+
import pandas.util.testing as tm
2021

2122

2223
class NumericSeriesIndexing:
@@ -371,4 +372,4 @@ def time_chained_indexing(self, mode):
371372
df2["C"] = 1.0
372373

373374

374-
from .pandas_vb_common import setup # noqa: F401
375+
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/inference.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
import numpy as np
2-
import pandas.util.testing as tm
2+
33
from pandas import DataFrame, Series, to_numeric
4+
import pandas.util.testing as tm
45

5-
from .pandas_vb_common import numeric_dtypes, lib
6+
from .pandas_vb_common import lib, numeric_dtypes
67

78

89
class NumericInferOps:
@@ -120,4 +121,4 @@ def time_convert(self, data):
120121
lib.maybe_convert_numeric(data, set(), coerce_numeric=False)
121122

122123

123-
from .pandas_vb_common import setup # noqa: F401
124+
from .pandas_vb_common import setup # noqa: F401 isort:skip

0 commit comments

Comments
 (0)