Skip to content

Commit c5a7f6e

Browse files
committed
Merge remote-tracking branch 'upstream/master' into bug/categorical-indexing-1row-df
* upstream/master: (333 commits) CI: troubleshoot Web_and_Docs failing (pandas-dev#30534) WARN: Ignore NumbaPerformanceWarning in test suite (pandas-dev#30525) DEPR: camelCase in offsets, get_offset (pandas-dev#30340) PERF: implement scalar ops blockwise (pandas-dev#29853) DEPR: Remove Series.compress (pandas-dev#30514) ENH: Add numba engine for rolling apply (pandas-dev#30151) [ENH] Add to_markdown method (pandas-dev#30350) DEPR: Deprecate pandas.np module (pandas-dev#30386) ENH: Add ignore_index for df.drop_duplicates (pandas-dev#30405) BUG: The setting xrot=0 in DataFrame.hist() doesn't work with by and subplots pandas-dev#30288 (pandas-dev#30491) CI: Fix GBQ Tests (pandas-dev#30478) Bug groupby quantile listlike q and int columns (pandas-dev#30485) ENH: Add ignore_index for df.sort_values and series.sort_values (pandas-dev#30402) TYP: Typing hints in pandas/io/formats/{css,csvs}.py (pandas-dev#30398) BUG: raise on non-hashable Index name, closes pandas-dev#29069 (pandas-dev#30335) Replace "foo!r" to "repr(foo)" syntax pandas-dev#29886 (pandas-dev#30502) BUG: preserve EA dtype in transpose (pandas-dev#30091) BLD: add check to prevent tempita name error, clsoes pandas-dev#28836 (pandas-dev#30498) REF/TST: method-specific files for test_append (pandas-dev#30503) marked unused parameters (pandas-dev#30504) ...
2 parents 5512119 + e817fff commit c5a7f6e

File tree

595 files changed

+18013
-22982
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

595 files changed

+18013
-22982
lines changed

.binstar.yml

-28
This file was deleted.

.github/workflows/ci.yml

+3-3
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,12 @@ jobs:
1515
runs-on: ubuntu-latest
1616
steps:
1717

18+
- name: Setting conda path
19+
run: echo "::add-path::${HOME}/miniconda3/bin"
20+
1821
- name: Checkout
1922
uses: actions/checkout@v1
2023

21-
- name: Setting conda path
22-
run: echo "::set-env name=PATH::${HOME}/miniconda3/bin:${PATH}"
23-
2424
- name: Looking for unwanted patterns
2525
run: ci/code_checks.sh patterns
2626
if: true

.travis.yml

+19-16
Original file line numberDiff line numberDiff line change
@@ -30,31 +30,34 @@ matrix:
3030
- python: 3.5
3131

3232
include:
33-
- dist: trusty
34-
env:
33+
- env:
3534
- JOB="3.8" ENV_FILE="ci/deps/travis-38.yaml" PATTERN="(not slow and not network)"
3635

37-
- dist: trusty
38-
env:
36+
- env:
3937
- JOB="3.7" ENV_FILE="ci/deps/travis-37.yaml" PATTERN="(not slow and not network)"
4038

41-
- dist: trusty
42-
env:
43-
- JOB="3.6, locale" ENV_FILE="ci/deps/travis-36-locale.yaml" PATTERN="((not slow and not network) or (single and db))" LOCALE_OVERRIDE="zh_CN.UTF-8"
39+
- env:
40+
- JOB="3.6, locale" ENV_FILE="ci/deps/travis-36-locale.yaml" PATTERN="((not slow and not network) or (single and db))" LOCALE_OVERRIDE="zh_CN.UTF-8" SQL="1"
41+
services:
42+
- mysql
43+
- postgresql
4444

45-
- dist: trusty
46-
env:
47-
- JOB="3.6, coverage" ENV_FILE="ci/deps/travis-36-cov.yaml" PATTERN="((not slow and not network) or (single and db))" PANDAS_TESTING_MODE="deprecate" COVERAGE=true
45+
- env:
46+
- JOB="3.6, coverage" ENV_FILE="ci/deps/travis-36-cov.yaml" PATTERN="((not slow and not network) or (single and db))" PANDAS_TESTING_MODE="deprecate" COVERAGE=true SQL="1"
47+
services:
48+
- mysql
49+
- postgresql
4850

4951
# In allow_failures
50-
- dist: trusty
51-
env:
52-
- JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow"
52+
- env:
53+
- JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow" SQL="1"
54+
services:
55+
- mysql
56+
- postgresql
5357

5458
allow_failures:
55-
- dist: trusty
56-
env:
57-
- JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow"
59+
- env:
60+
- JOB="3.6, slow" ENV_FILE="ci/deps/travis-36-slow.yaml" PATTERN="slow" SQL="1"
5861

5962
before_install:
6063
- echo "before_install"

LICENSES/MSGPACK_LICENSE

-13
This file was deleted.

LICENSES/MSGPACK_NUMPY_LICENSE

-33
This file was deleted.

MANIFEST.in

-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ global-exclude *.gz
2020
global-exclude *.h5
2121
global-exclude *.html
2222
global-exclude *.json
23-
global-exclude *.msgpack
2423
global-exclude *.pickle
2524
global-exclude *.png
2625
global-exclude *.pyc

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ Here are just a few of the things that pandas does well:
124124
and saving/loading data from the ultrafast [**HDF5 format**][hdfstore]
125125
- [**Time series**][timeseries]-specific functionality: date range
126126
generation and frequency conversion, moving window statistics,
127-
moving window linear regressions, date shifting and lagging, etc.
127+
date shifting and lagging.
128128

129129

130130
[missing-data]: https://pandas.pydata.org/pandas-docs/stable/missing_data.html#working-with-missing-data

asv_bench/benchmarks/array.py

+23
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import numpy as np
2+
3+
import pandas as pd
4+
5+
6+
class BooleanArray:
7+
def setup(self):
8+
self.values_bool = np.array([True, False, True, False])
9+
self.values_float = np.array([1.0, 0.0, 1.0, 0.0])
10+
self.values_integer = np.array([1, 0, 1, 0])
11+
self.values_integer_like = [1, 0, 1, 0]
12+
13+
def time_from_bool_array(self):
14+
pd.array(self.values_bool, dtype="boolean")
15+
16+
def time_from_integer_array(self):
17+
pd.array(self.values_integer, dtype="boolean")
18+
19+
def time_from_integer_like(self):
20+
pd.array(self.values_integer_like, dtype="boolean")
21+
22+
def time_from_float_array(self):
23+
pd.array(self.values_float, dtype="boolean")

asv_bench/benchmarks/binary_ops.py

+32
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import operator
2+
13
import numpy as np
24

35
from pandas import DataFrame, Series, date_range
@@ -9,6 +11,36 @@
911
import pandas.computation.expressions as expr
1012

1113

14+
class IntFrameWithScalar:
15+
params = [
16+
[np.float64, np.int64],
17+
[2, 3.0, np.int32(4), np.float64(5)],
18+
[
19+
operator.add,
20+
operator.sub,
21+
operator.mul,
22+
operator.truediv,
23+
operator.floordiv,
24+
operator.pow,
25+
operator.mod,
26+
operator.eq,
27+
operator.ne,
28+
operator.gt,
29+
operator.ge,
30+
operator.lt,
31+
operator.le,
32+
],
33+
]
34+
param_names = ["dtype", "scalar", "op"]
35+
36+
def setup(self, dtype, scalar, op):
37+
arr = np.random.randn(20000, 100)
38+
self.df = DataFrame(arr.astype(dtype))
39+
40+
def time_frame_op_with_scalar(self, dtype, scalar, op):
41+
op(self.df, scalar)
42+
43+
1244
class Ops:
1345

1446
params = [[True, False], ["default", 1]]

asv_bench/benchmarks/boolean.py

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import numpy as np
2+
3+
import pandas as pd
4+
5+
6+
class TimeLogicalOps:
7+
def setup(self):
8+
N = 10_000
9+
left, right, lmask, rmask = np.random.randint(0, 2, size=(4, N)).astype("bool")
10+
self.left = pd.arrays.BooleanArray(left, lmask)
11+
self.right = pd.arrays.BooleanArray(right, rmask)
12+
13+
def time_or_scalar(self):
14+
self.left | True
15+
self.left | False
16+
17+
def time_or_array(self):
18+
self.left | self.right
19+
20+
def time_and_scalar(self):
21+
self.left & True
22+
self.left & False
23+
24+
def time_and_array(self):
25+
self.left & self.right
26+
27+
def time_xor_scalar(self):
28+
self.left ^ True
29+
self.left ^ False
30+
31+
def time_xor_array(self):
32+
self.left ^ self.right

asv_bench/benchmarks/dtypes.py

+22
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from .pandas_vb_common import (
66
datetime_dtypes,
77
extension_dtypes,
8+
lib,
89
numeric_dtypes,
910
string_dtypes,
1011
)
@@ -40,4 +41,25 @@ def time_pandas_dtype_invalid(self, dtype):
4041
pass
4142

4243

44+
class InferDtypes:
45+
param_names = ["dtype"]
46+
data_dict = {
47+
"np-object": np.array([1] * 100000, dtype="O"),
48+
"py-object": [1] * 100000,
49+
"np-null": np.array([1] * 50000 + [np.nan] * 50000),
50+
"py-null": [1] * 50000 + [None] * 50000,
51+
"np-int": np.array([1] * 100000, dtype=int),
52+
"np-floating": np.array([1.0] * 100000, dtype=float),
53+
"empty": [],
54+
"bytes": [b"a"] * 100000,
55+
}
56+
params = list(data_dict.keys())
57+
58+
def time_infer_skipna(self, dtype):
59+
lib.infer_dtype(self.data_dict[dtype], skipna=True)
60+
61+
def time_infer(self, dtype):
62+
lib.infer_dtype(self.data_dict[dtype], skipna=False)
63+
64+
4365
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/frame_ctor.py

+12
Original file line numberDiff line numberDiff line change
@@ -105,4 +105,16 @@ def time_frame_from_lists(self):
105105
self.df = DataFrame(self.data)
106106

107107

108+
class FromRange:
109+
110+
goal_time = 0.2
111+
112+
def setup(self):
113+
N = 1_000_000
114+
self.data = range(N)
115+
116+
def time_frame_from_range(self):
117+
self.df = DataFrame(self.data)
118+
119+
108120
from .pandas_vb_common import setup # noqa: F401 isort:skip

asv_bench/benchmarks/frame_methods.py

+6-8
Original file line numberDiff line numberDiff line change
@@ -321,10 +321,9 @@ class Dropna:
321321

322322
def setup(self, how, axis):
323323
self.df = DataFrame(np.random.randn(10000, 1000))
324-
with warnings.catch_warnings(record=True):
325-
self.df.ix[50:1000, 20:50] = np.nan
326-
self.df.ix[2000:3000] = np.nan
327-
self.df.ix[:, 60:70] = np.nan
324+
self.df.iloc[50:1000, 20:50] = np.nan
325+
self.df.iloc[2000:3000] = np.nan
326+
self.df.iloc[:, 60:70] = np.nan
328327
self.df_mixed = self.df.copy()
329328
self.df_mixed["foo"] = "bar"
330329

@@ -342,10 +341,9 @@ class Count:
342341

343342
def setup(self, axis):
344343
self.df = DataFrame(np.random.randn(10000, 1000))
345-
with warnings.catch_warnings(record=True):
346-
self.df.ix[50:1000, 20:50] = np.nan
347-
self.df.ix[2000:3000] = np.nan
348-
self.df.ix[:, 60:70] = np.nan
344+
self.df.iloc[50:1000, 20:50] = np.nan
345+
self.df.iloc[2000:3000] = np.nan
346+
self.df.iloc[:, 60:70] = np.nan
349347
self.df_mixed = self.df.copy()
350348
self.df_mixed["foo"] = "bar"
351349

asv_bench/benchmarks/index_object.py

+13
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
Float64Index,
88
Index,
99
IntervalIndex,
10+
MultiIndex,
1011
RangeIndex,
1112
Series,
1213
date_range,
@@ -111,6 +112,18 @@ def time_get_loc_dec(self):
111112
self.idx_dec.get_loc(100000)
112113

113114

115+
class IndexEquals:
116+
def setup(self):
117+
idx_large_fast = RangeIndex(100000)
118+
idx_small_slow = date_range(start="1/1/2012", periods=1)
119+
self.mi_large_slow = MultiIndex.from_product([idx_large_fast, idx_small_slow])
120+
121+
self.idx_non_object = RangeIndex(1)
122+
123+
def time_non_object_equals_multiindex(self):
124+
self.idx_non_object.equals(self.mi_large_slow)
125+
126+
114127
class IndexAppend:
115128
def setup(self):
116129

0 commit comments

Comments
 (0)