Skip to content

Commit 5e8269d

Browse files
committed
Merge branch 'master' of https://github.com/pandas-dev/pandas into boilerplate-4
2 parents 9ced7d4 + 7cf2d0f commit 5e8269d

File tree

571 files changed

+15894
-9975
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

571 files changed

+15894
-9975
lines changed

.devcontainer.json

+3-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@
1717
"python.linting.pylintEnabled": false,
1818
"python.linting.mypyEnabled": true,
1919
"python.testing.pytestEnabled": true,
20-
"python.testing.cwd": "pandas/tests"
20+
"python.testing.pytestArgs": [
21+
"pandas"
22+
]
2123
},
2224

2325
// Add the IDs of extensions you want installed when the container is created in the array below.

.github/workflows/ci.yml

+3-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,9 @@ on:
44
push:
55
branches: master
66
pull_request:
7-
branches: master
7+
branches:
8+
- master
9+
- 1.1.x
810

911
env:
1012
ENV_FILE: environment.yml

.pre-commit-config.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ repos:
33
rev: 19.10b0
44
hooks:
55
- id: black
6-
language_version: python3.7
6+
language_version: python3
77
- repo: https://gitlab.com/pycqa/flake8
88
rev: 3.7.7
99
hooks:

.travis.yml

+2-8
Original file line numberDiff line numberDiff line change
@@ -58,20 +58,14 @@ matrix:
5858
services:
5959
- mysql
6060
- postgresql
61-
62-
- env:
63-
- JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow" SQL="1"
64-
services:
65-
- mysql
66-
- postgresql
6761
allow_failures:
6862
- arch: arm64
6963
env:
7064
- JOB="3.7, arm64" PYTEST_WORKERS=8 ENV_FILE="ci/deps/travis-37-arm64.yaml" PATTERN="(not slow and not network and not clipboard)"
7165
- dist: bionic
72-
python: 3.9-dev
7366
env:
74-
- JOB="3.9-dev" PATTERN="(not slow and not network)"
67+
- JOB="3.9-dev" PATTERN="(not slow and not network and not clipboard)"
68+
7569

7670
before_install:
7771
- echo "before_install"

LICENSES/XARRAY_LICENSE

+4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
Copyright 2014-2019, xarray Developers
2+
3+
--------------------------------------------------------------------------------
4+
15
Apache License
26
Version 2.0, January 2004
37
http://www.apache.org/licenses/

README.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
## What is it?
2222

23-
**pandas** is a Python package providing fast, flexible, and expressive data
23+
**pandas** is a Python package that provides fast, flexible, and expressive data
2424
structures designed to make working with "relational" or "labeled" data both
2525
easy and intuitive. It aims to be the fundamental high-level building block for
2626
doing practical, **real world** data analysis in Python. Additionally, it has
@@ -154,11 +154,11 @@ For usage questions, the best place to go to is [StackOverflow](https://stackove
154154
Further, general questions and discussions can also take place on the [pydata mailing list](https://groups.google.com/forum/?fromgroups#!forum/pydata).
155155

156156
## Discussion and Development
157-
Most development discussion is taking place on github in this repo. Further, the [pandas-dev mailing list](https://mail.python.org/mailman/listinfo/pandas-dev) can also be used for specialized discussions or design issues, and a [Gitter channel](https://gitter.im/pydata/pandas) is available for quick development related questions.
157+
Most development discussions take place on github in this repo. Further, the [pandas-dev mailing list](https://mail.python.org/mailman/listinfo/pandas-dev) can also be used for specialized discussions or design issues, and a [Gitter channel](https://gitter.im/pydata/pandas) is available for quick development related questions.
158158

159159
## Contributing to pandas [![Open Source Helpers](https://www.codetriage.com/pandas-dev/pandas/badges/users.svg)](https://www.codetriage.com/pandas-dev/pandas)
160160

161-
All contributions, bug reports, bug fixes, documentation improvements, enhancements and ideas are welcome.
161+
All contributions, bug reports, bug fixes, documentation improvements, enhancements, and ideas are welcome.
162162

163163
A detailed overview on how to contribute can be found in the **[contributing guide](https://pandas.pydata.org/docs/dev/development/contributing.html)**. There is also an [overview](.github/CONTRIBUTING.md) on GitHub.
164164

asv_bench/asv.conf.json

+1
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@
5353
"xlwt": [],
5454
"odfpy": [],
5555
"pytest": [],
56+
"jinja2": [],
5657
// If using Windows with python 2.7 and want to build using the
5758
// mingw toolchain (rather than MSVC), uncomment the following line.
5859
// "libpython": [],

asv_bench/benchmarks/categoricals.py

+4
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ def setup(self):
3434
self.values_all_int8 = np.ones(N, "int8")
3535
self.categorical = pd.Categorical(self.values, self.categories)
3636
self.series = pd.Series(self.categorical)
37+
self.intervals = pd.interval_range(0, 1, periods=N // 10)
3738

3839
def time_regular(self):
3940
pd.Categorical(self.values, self.categories)
@@ -44,6 +45,9 @@ def time_fastpath(self):
4445
def time_datetimes(self):
4546
pd.Categorical(self.datetimes)
4647

48+
def time_interval(self):
49+
pd.Categorical(self.datetimes, categories=self.datetimes)
50+
4751
def time_datetimes_with_nat(self):
4852
pd.Categorical(self.datetimes_with_nat)
4953

asv_bench/benchmarks/frame_ctor.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from .pandas_vb_common import tm
77

88
try:
9-
from pandas.tseries.offsets import Nano, Hour
9+
from pandas.tseries.offsets import Hour, Nano
1010
except ImportError:
1111
# For compatibility with older versions
1212
from pandas.core.datetools import * # noqa

asv_bench/benchmarks/gil.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,14 @@
77

88
try:
99
from pandas import (
10-
rolling_median,
10+
rolling_kurt,
11+
rolling_max,
1112
rolling_mean,
13+
rolling_median,
1214
rolling_min,
13-
rolling_max,
14-
rolling_var,
1515
rolling_skew,
16-
rolling_kurt,
1716
rolling_std,
17+
rolling_var,
1818
)
1919

2020
have_rolling_methods = True

asv_bench/benchmarks/groupby.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
from .pandas_vb_common import tm
1818

19-
method_blacklist = {
19+
method_blocklist = {
2020
"object": {
2121
"median",
2222
"prod",
@@ -403,7 +403,7 @@ class GroupByMethods:
403403
]
404404

405405
def setup(self, dtype, method, application):
406-
if method in method_blacklist.get(dtype, {}):
406+
if method in method_blocklist.get(dtype, {}):
407407
raise NotImplementedError # skip benchmark
408408
ngroups = 1000
409409
size = ngroups * 2

asv_bench/benchmarks/io/json.py

+30
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import sys
2+
13
import numpy as np
24

35
from pandas import DataFrame, concat, date_range, read_json, timedelta_range
@@ -82,6 +84,7 @@ def setup(self, orient, frame):
8284
timedeltas = timedelta_range(start=1, periods=N, freq="s")
8385
datetimes = date_range(start=1, periods=N, freq="s")
8486
ints = np.random.randint(100000000, size=N)
87+
longints = sys.maxsize * np.random.randint(100000000, size=N)
8588
floats = np.random.randn(N)
8689
strings = tm.makeStringIndex(N)
8790
self.df = DataFrame(np.random.randn(N, ncols), index=np.arange(N))
@@ -120,6 +123,18 @@ def setup(self, orient, frame):
120123
index=index,
121124
)
122125

126+
self.df_longint_float_str = DataFrame(
127+
{
128+
"longint_1": longints,
129+
"longint_2": longints,
130+
"float_1": floats,
131+
"float_2": floats,
132+
"str_1": strings,
133+
"str_2": strings,
134+
},
135+
index=index,
136+
)
137+
123138
def time_to_json(self, orient, frame):
124139
getattr(self, frame).to_json(self.fname, orient=orient)
125140

@@ -172,6 +187,7 @@ def setup(self):
172187
timedeltas = timedelta_range(start=1, periods=N, freq="s")
173188
datetimes = date_range(start=1, periods=N, freq="s")
174189
ints = np.random.randint(100000000, size=N)
190+
longints = sys.maxsize * np.random.randint(100000000, size=N)
175191
floats = np.random.randn(N)
176192
strings = tm.makeStringIndex(N)
177193
self.df = DataFrame(np.random.randn(N, ncols), index=np.arange(N))
@@ -209,6 +225,17 @@ def setup(self):
209225
},
210226
index=index,
211227
)
228+
self.df_longint_float_str = DataFrame(
229+
{
230+
"longint_1": longints,
231+
"longint_2": longints,
232+
"float_1": floats,
233+
"float_2": floats,
234+
"str_1": strings,
235+
"str_2": strings,
236+
},
237+
index=index,
238+
)
212239

213240
def time_floats_with_int_idex_lines(self):
214241
self.df.to_json(self.fname, orient="records", lines=True)
@@ -225,6 +252,9 @@ def time_float_int_lines(self):
225252
def time_float_int_str_lines(self):
226253
self.df_int_float_str.to_json(self.fname, orient="records", lines=True)
227254

255+
def time_float_longint_str_lines(self):
256+
self.df_longint_float_str.to_json(self.fname, orient="records", lines=True)
257+
228258

229259
class ToJSONMem:
230260
def setup_cache(self):

asv_bench/benchmarks/io/parsers.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22

33
try:
44
from pandas._libs.tslibs.parsing import (
5-
concat_date_cols,
65
_does_string_look_like_datetime,
6+
concat_date_cols,
77
)
88
except ImportError:
99
# Avoid whole benchmark suite import failure on asv (currently 0.4)

asv_bench/benchmarks/io/style.py

+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import numpy as np
2+
3+
from pandas import DataFrame
4+
5+
6+
class RenderApply:
7+
8+
params = [[12, 24, 36], [12, 120]]
9+
param_names = ["cols", "rows"]
10+
11+
def setup(self, cols, rows):
12+
self.df = DataFrame(
13+
np.random.randn(rows, cols),
14+
columns=[f"float_{i+1}" for i in range(cols)],
15+
index=[f"row_{i+1}" for i in range(rows)],
16+
)
17+
self._style_apply()
18+
19+
def time_render(self, cols, rows):
20+
self.st.render()
21+
22+
def peakmem_apply(self, cols, rows):
23+
self._style_apply()
24+
25+
def peakmem_render(self, cols, rows):
26+
self.st.render()
27+
28+
def _style_apply(self):
29+
def _apply_func(s):
30+
return [
31+
"background-color: lightcyan" if s.name == "row_1" else "" for v in s
32+
]
33+
34+
self.st = self.df.style.apply(_apply_func, axis=1)

asv_bench/benchmarks/pandas_vb_common.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
np.uint8,
3434
]
3535
datetime_dtypes = [np.datetime64, np.timedelta64]
36-
string_dtypes = [np.object]
36+
string_dtypes = [object]
3737
try:
3838
extension_dtypes = [
3939
pd.Int8Dtype,

asv_bench/benchmarks/rolling.py

+7
Original file line numberDiff line numberDiff line change
@@ -91,11 +91,18 @@ class EWMMethods:
9191
def setup(self, constructor, window, dtype, method):
9292
N = 10 ** 5
9393
arr = (100 * np.random.random(N)).astype(dtype)
94+
times = pd.date_range("1900", periods=N, freq="23s")
9495
self.ewm = getattr(pd, constructor)(arr).ewm(halflife=window)
96+
self.ewm_times = getattr(pd, constructor)(arr).ewm(
97+
halflife="1 Day", times=times
98+
)
9599

96100
def time_ewm(self, constructor, window, dtype, method):
97101
getattr(self.ewm, method)()
98102

103+
def time_ewm_times(self, constructor, window, dtype, method):
104+
self.ewm.mean()
105+
99106

100107
class VariableWindowMethods(Methods):
101108
params = (

asv_bench/benchmarks/series_methods.py

+8-10
Original file line numberDiff line numberDiff line change
@@ -58,17 +58,15 @@ def time_isin_nan_values(self):
5858

5959
class IsInForObjects:
6060
def setup(self):
61-
self.s_nans = Series(np.full(10 ** 4, np.nan)).astype(np.object)
62-
self.vals_nans = np.full(10 ** 4, np.nan).astype(np.object)
63-
self.s_short = Series(np.arange(2)).astype(np.object)
64-
self.s_long = Series(np.arange(10 ** 5)).astype(np.object)
65-
self.vals_short = np.arange(2).astype(np.object)
66-
self.vals_long = np.arange(10 ** 5).astype(np.object)
61+
self.s_nans = Series(np.full(10 ** 4, np.nan)).astype(object)
62+
self.vals_nans = np.full(10 ** 4, np.nan).astype(object)
63+
self.s_short = Series(np.arange(2)).astype(object)
64+
self.s_long = Series(np.arange(10 ** 5)).astype(object)
65+
self.vals_short = np.arange(2).astype(object)
66+
self.vals_long = np.arange(10 ** 5).astype(object)
6767
# because of nans floats are special:
68-
self.s_long_floats = Series(np.arange(10 ** 5, dtype=np.float)).astype(
69-
np.object
70-
)
71-
self.vals_long_floats = np.arange(10 ** 5, dtype=np.float).astype(np.object)
68+
self.s_long_floats = Series(np.arange(10 ** 5, dtype=np.float)).astype(object)
69+
self.vals_long_floats = np.arange(10 ** 5, dtype=np.float).astype(object)
7270

7371
def time_isin_nans(self):
7472
# if nan-objects are different objects,

asv_bench/benchmarks/sparse.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def time_series_to_frame(self):
3232

3333
class SparseArrayConstructor:
3434

35-
params = ([0.1, 0.01], [0, np.nan], [np.int64, np.float64, np.object])
35+
params = ([0.1, 0.01], [0, np.nan], [np.int64, np.float64, object])
3636
param_names = ["dense_proportion", "fill_value", "dtype"]
3737

3838
def setup(self, dense_proportion, fill_value, dtype):

0 commit comments

Comments
 (0)