Skip to content

Commit 06aa378

Browse files
committed
Merge branch 'master' of https://github.com/pandas-dev/pandas into 19356
� Conflicts: � doc/source/whatsnew/v1.2.0.rst � pandas/core/algorithms.py � pandas/tests/series/methods/test_isin.py
2 parents c0b1ab2 + 78d1498 commit 06aa378

File tree

344 files changed

+10018
-5874
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

344 files changed

+10018
-5874
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
*.log
1313
*.swp
1414
*.pdb
15+
*.zip
1516
.project
1617
.pydevproject
1718
.settings

.pre-commit-config.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ repos:
2626
name: isort (cython)
2727
types: [cython]
2828
- repo: https://github.com/asottile/pyupgrade
29-
rev: v2.7.3
29+
rev: v2.7.4
3030
hooks:
3131
- id: pyupgrade
3232
args: [--py37-plus]

Dockerfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,5 +43,5 @@ RUN conda env update -n base -f "$pandas_home/environment.yml"
4343

4444
# Build C extensions and pandas
4545
RUN cd "$pandas_home" \
46-
&& python setup.py build_ext --inplace -j 4 \
46+
&& python setup.py build_ext -j 4 \
4747
&& python -m pip install -e .

Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ clean_pyc:
99
-find . -name '*.py[co]' -exec rm {} \;
1010

1111
build: clean_pyc
12-
python setup.py build_ext --inplace
12+
python setup.py build_ext
1313

1414
lint-diff:
1515
git diff upstream/master --name-only -- "*.py" | xargs flake8

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ Here are just a few of the things that pandas does well:
6060
and saving/loading data from the ultrafast [**HDF5 format**][hdfstore]
6161
- [**Time series**][timeseries]-specific functionality: date range
6262
generation and frequency conversion, moving window statistics,
63-
date shifting and lagging.
63+
date shifting and lagging
6464

6565

6666
[missing-data]: https://pandas.pydata.org/pandas-docs/stable/missing_data.html#working-with-missing-data

asv_bench/benchmarks/algorithms.py

+12
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from pandas._libs import lib
66

77
import pandas as pd
8+
from pandas.core.algorithms import make_duplicates_of_left_unique_in_right
89

910
from .pandas_vb_common import tm
1011

@@ -174,4 +175,15 @@ def time_argsort(self, N):
174175
self.array.argsort()
175176

176177

178+
class RemoveDuplicates:
179+
def setup(self):
180+
N = 10 ** 5
181+
na = np.arange(int(N / 2))
182+
self.left = np.concatenate([na[: int(N / 4)], na[: int(N / 4)]])
183+
self.right = np.concatenate([na, na])
184+
185+
def time_make_duplicates_of_left_unique_in_right(self):
186+
make_duplicates_of_left_unique_in_right(self.left, self.right)
187+
188+
177189
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/categoricals.py

+43
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import string
2+
import sys
13
import warnings
24

35
import numpy as np
@@ -67,6 +69,47 @@ def time_existing_series(self):
6769
pd.Categorical(self.series)
6870

6971

72+
class AsType:
73+
def setup(self):
74+
N = 10 ** 5
75+
76+
random_pick = np.random.default_rng().choice
77+
78+
categories = {
79+
"str": list(string.ascii_letters),
80+
"int": np.random.randint(2 ** 16, size=154),
81+
"float": sys.maxsize * np.random.random((38,)),
82+
"timestamp": [
83+
pd.Timestamp(x, unit="s") for x in np.random.randint(2 ** 18, size=578)
84+
],
85+
}
86+
87+
self.df = pd.DataFrame(
88+
{col: random_pick(cats, N) for col, cats in categories.items()}
89+
)
90+
91+
for col in ("int", "float", "timestamp"):
92+
self.df[col + "_as_str"] = self.df[col].astype(str)
93+
94+
for col in self.df.columns:
95+
self.df[col] = self.df[col].astype("category")
96+
97+
def astype_str(self):
98+
[self.df[col].astype("str") for col in "int float timestamp".split()]
99+
100+
def astype_int(self):
101+
[self.df[col].astype("int") for col in "int_as_str timestamp".split()]
102+
103+
def astype_float(self):
104+
[
105+
self.df[col].astype("float")
106+
for col in "float_as_str int int_as_str timestamp".split()
107+
]
108+
109+
def astype_datetime(self):
110+
self.df["float"].astype(pd.DatetimeTZDtype(tz="US/Pacific"))
111+
112+
70113
class Concat:
71114
def setup(self):
72115
N = 10 ** 5

asv_bench/benchmarks/groupby.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -486,7 +486,7 @@ def setup(self):
486486
tmp2 = (np.random.random(10000) * 10.0).astype(np.float32)
487487
tmp = np.concatenate((tmp1, tmp2))
488488
arr = np.repeat(tmp, 10)
489-
self.df = DataFrame(dict(a=arr, b=arr))
489+
self.df = DataFrame({"a": arr, "b": arr})
490490

491491
def time_sum(self):
492492
self.df.groupby(["a"])["b"].sum()

asv_bench/benchmarks/join_merge.py

+6
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,9 @@ def time_join_dataframe_index_single_key_small(self, sort):
132132
def time_join_dataframe_index_shuffle_key_bigger_sort(self, sort):
133133
self.df_shuf.join(self.df_key2, on="key2", sort=sort)
134134

135+
def time_join_dataframes_cross(self, sort):
136+
self.df.loc[:2000].join(self.df_key1, how="cross", sort=sort)
137+
135138

136139
class JoinIndex:
137140
def setup(self):
@@ -205,6 +208,9 @@ def time_merge_dataframe_integer_2key(self, sort):
205208
def time_merge_dataframe_integer_key(self, sort):
206209
merge(self.df, self.df2, on="key1", sort=sort)
207210

211+
def time_merge_dataframes_cross(self, sort):
212+
merge(self.left.loc[:2000], self.right.loc[:2000], how="cross", sort=sort)
213+
208214

209215
class I8Merge:
210216

asv_bench/benchmarks/reshape.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,10 @@ def setup(self):
103103
nidvars = 20
104104
N = 5000
105105
self.letters = list("ABCD")
106-
yrvars = [l + str(num) for l, num in product(self.letters, range(1, nyrs + 1))]
106+
yrvars = [
107+
letter + str(num)
108+
for letter, num in product(self.letters, range(1, nyrs + 1))
109+
]
107110
columns = [str(i) for i in range(nidvars)] + yrvars
108111
self.df = DataFrame(np.random.randn(N, nidvars + len(yrvars)), columns=columns)
109112
self.df["id"] = self.df.index

asv_bench/benchmarks/rolling.py

+13
Original file line numberDiff line numberDiff line change
@@ -225,4 +225,17 @@ def time_rolling_offset(self, method):
225225
getattr(self.groupby_roll_offset, method)()
226226

227227

228+
class GroupbyEWM:
229+
230+
params = ["cython", "numba"]
231+
param_names = ["engine"]
232+
233+
def setup(self, engine):
234+
df = pd.DataFrame({"A": range(50), "B": range(50)})
235+
self.gb_ewm = df.groupby("A").ewm(com=1.0)
236+
237+
def time_groupby_mean(self, engine):
238+
self.gb_ewm.mean(engine=engine)
239+
240+
228241
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/series_methods.py

+72-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import numpy as np
44

5-
from pandas import NaT, Series, date_range
5+
from pandas import Categorical, NaT, Series, date_range
66

77
from .pandas_vb_common import tm
88

@@ -36,6 +36,28 @@ def time_isin(self, dtypes):
3636
self.s.isin(self.values)
3737

3838

39+
class IsInDatetime64:
40+
def setup(self):
41+
dti = date_range(
42+
start=datetime(2015, 10, 26), end=datetime(2016, 1, 1), freq="50s"
43+
)
44+
self.ser = Series(dti)
45+
self.subset = self.ser._values[::3]
46+
self.cat_subset = Categorical(self.subset)
47+
48+
def time_isin(self):
49+
self.ser.isin(self.subset)
50+
51+
def time_isin_cat_values(self):
52+
self.ser.isin(self.cat_subset)
53+
54+
def time_isin_mismatched_dtype(self):
55+
self.ser.isin([1, 2])
56+
57+
def time_isin_empty(self):
58+
self.ser.isin([])
59+
60+
3961
class IsInFloat64:
4062
def setup(self):
4163
self.small = Series([1, 2], dtype=np.float64)
@@ -90,6 +112,55 @@ def time_isin_long_series_long_values_floats(self):
90112
self.s_long_floats.isin(self.vals_long_floats)
91113

92114

115+
class IsInLongSeriesLookUpDominates:
116+
params = [
117+
["int64", "int32", "float64", "float32", "object"],
118+
[5, 1000],
119+
["random_hits", "random_misses", "monotone_hits", "monotone_misses"],
120+
]
121+
param_names = ["dtype", "MaxNumber", "series_type"]
122+
123+
def setup(self, dtype, MaxNumber, series_type):
124+
N = 10 ** 7
125+
if series_type == "random_hits":
126+
np.random.seed(42)
127+
array = np.random.randint(0, MaxNumber, N)
128+
if series_type == "random_misses":
129+
np.random.seed(42)
130+
array = np.random.randint(0, MaxNumber, N) + MaxNumber
131+
if series_type == "monotone_hits":
132+
array = np.repeat(np.arange(MaxNumber), N // MaxNumber)
133+
if series_type == "monotone_misses":
134+
array = np.arange(N) + MaxNumber
135+
self.series = Series(array).astype(dtype)
136+
self.values = np.arange(MaxNumber).astype(dtype)
137+
138+
def time_isin(self, dtypes, MaxNumber, series_type):
139+
self.series.isin(self.values)
140+
141+
142+
class IsInLongSeriesValuesDominate:
143+
params = [
144+
["int64", "int32", "float64", "float32", "object"],
145+
["random", "monotone"],
146+
]
147+
param_names = ["dtype", "series_type"]
148+
149+
def setup(self, dtype, series_type):
150+
N = 10 ** 7
151+
if series_type == "random":
152+
np.random.seed(42)
153+
vals = np.random.randint(0, 10 * N, N)
154+
if series_type == "monotone":
155+
vals = np.arange(N)
156+
self.values = vals.astype(dtype)
157+
M = 10 ** 6 + 1
158+
self.series = Series(np.arange(M)).astype(dtype)
159+
160+
def time_isin(self, dtypes, series_type):
161+
self.series.isin(self.values)
162+
163+
93164
class NSort:
94165

95166
params = ["first", "last", "all"]

azure-pipelines.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ jobs:
4040
. ~/virtualenvs/pandas-dev/bin/activate && \
4141
python -m pip install --no-deps -U pip wheel setuptools && \
4242
pip install cython numpy python-dateutil pytz pytest pytest-xdist hypothesis pytest-azurepipelines && \
43-
python setup.py build_ext -q -i -j2 && \
43+
python setup.py build_ext -q -j2 && \
4444
python -m pip install --no-build-isolation -e . && \
4545
pytest -m 'not slow and not network and not clipboard' pandas --junitxml=test-data.xml"
4646
displayName: 'Run 32-bit manylinux2014 Docker Build / Tests'

ci/azure/windows.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ jobs:
3434
- bash: |
3535
source activate pandas-dev
3636
conda list
37-
python setup.py build_ext -q -i -j 4
37+
python setup.py build_ext -q -j 4
3838
python -m pip install --no-build-isolation -e .
3939
displayName: 'Build'
4040

ci/deps/azure-39.yaml

+5
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,8 @@ dependencies:
1515
- numpy
1616
- python-dateutil
1717
- pytz
18+
19+
# optional dependencies
20+
- pytables
21+
- scipy
22+
- pyarrow=1.0

ci/deps/travis-37-locale.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ dependencies:
3434
- pyarrow>=0.17
3535
- pytables>=3.5.1
3636
- scipy
37-
- xarray=0.12.0
37+
- xarray=0.12.3
3838
- xlrd
3939
- xlsxwriter
4040
- xlwt

ci/run_tests.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ PYTEST_CMD="${XVFB}pytest -m \"$PATTERN\" -n $PYTEST_WORKERS --dist=loadfile -s
2525
if [[ $(uname) != "Linux" && $(uname) != "Darwin" ]]; then
2626
# GH#37455 windows py38 build appears to be running out of memory
2727
# skip collection of window tests
28-
PYTEST_CMD="$PYTEST_CMD --ignore=pandas/tests/window/"
28+
PYTEST_CMD="$PYTEST_CMD --ignore=pandas/tests/window/ --ignore=pandas/tests/plotting/"
2929
fi
3030

3131
echo $PYTEST_CMD

ci/setup_env.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ conda list pandas
131131
# Make sure any error below is reported as such
132132

133133
echo "[Build extensions]"
134-
python setup.py build_ext -q -i -j2
134+
python setup.py build_ext -q -j2
135135

136136
echo "[Updating pip]"
137137
python -m pip install --no-deps -U pip wheel setuptools

0 commit comments

Comments
 (0)