Skip to content

Commit d01b62e

Browse files
committed
ENH: gb.is_monotonic_increasing pandas-dev#17015 rebase to master
2 parents f24e476 + 4091f64 commit d01b62e

File tree

279 files changed

+11550
-10412
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

279 files changed

+11550
-10412
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -106,3 +106,4 @@ doc/build/html/index.html
106106
doc/tmp.sv
107107
doc/source/styled.xlsx
108108
doc/source/templates/
109+
env/

asv_bench/benchmarks/algorithms.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
except:
1212
pass
1313

14+
from .pandas_vb_common import setup # noqa
15+
1416

1517
class Factorize(object):
1618

@@ -21,7 +23,6 @@ class Factorize(object):
2123

2224
def setup(self, sort):
2325
N = 10**5
24-
np.random.seed(1234)
2526
self.int_idx = pd.Int64Index(np.arange(N).repeat(5))
2627
self.float_idx = pd.Float64Index(np.random.randn(N).repeat(5))
2728
self.string_idx = tm.makeStringIndex(N)
@@ -45,7 +46,6 @@ class Duplicated(object):
4546

4647
def setup(self, keep):
4748
N = 10**5
48-
np.random.seed(1234)
4949
self.int_idx = pd.Int64Index(np.arange(N).repeat(5))
5050
self.float_idx = pd.Float64Index(np.random.randn(N).repeat(5))
5151
self.string_idx = tm.makeStringIndex(N)
@@ -79,7 +79,6 @@ class Match(object):
7979
goal_time = 0.2
8080

8181
def setup(self):
82-
np.random.seed(1234)
8382
self.uniques = tm.makeStringIndex(1000).values
8483
self.all = self.uniques.repeat(10)
8584

@@ -92,7 +91,6 @@ class Hashing(object):
9291
goal_time = 0.2
9392

9493
def setup_cache(self):
95-
np.random.seed(1234)
9694
N = 10**5
9795

9896
df = pd.DataFrame(

asv_bench/benchmarks/attrs_caching.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,18 @@
11
import numpy as np
22
from pandas import DataFrame
3-
43
try:
54
from pandas.util import cache_readonly
65
except ImportError:
76
from pandas.util.decorators import cache_readonly
87

8+
from .pandas_vb_common import setup # noqa
9+
910

1011
class DataFrameAttributes(object):
1112

1213
goal_time = 0.2
1314

1415
def setup(self):
15-
np.random.seed(1234)
1616
self.df = DataFrame(np.random.randn(10, 6))
1717
self.cur_index = self.df.index
1818

asv_bench/benchmarks/binary_ops.py

+7-10
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
except ImportError:
77
import pandas.computation.expressions as expr
88

9+
from .pandas_vb_common import setup # noqa
10+
911

1012
class Ops(object):
1113

@@ -15,7 +17,6 @@ class Ops(object):
1517
param_names = ['use_numexpr', 'threads']
1618

1719
def setup(self, use_numexpr, threads):
18-
np.random.seed(1234)
1920
self.df = DataFrame(np.random.randn(20000, 100))
2021
self.df2 = DataFrame(np.random.randn(20000, 100))
2122

@@ -47,7 +48,6 @@ class Ops2(object):
4748

4849
def setup(self):
4950
N = 10**3
50-
np.random.seed(1234)
5151
self.df = DataFrame(np.random.randn(N, N))
5252
self.df2 = DataFrame(np.random.randn(N, N))
5353

@@ -89,14 +89,12 @@ class Timeseries(object):
8989
param_names = ['tz']
9090

9191
def setup(self, tz):
92-
self.N = 10**6
93-
self.halfway = ((self.N // 2) - 1)
94-
self.s = Series(date_range('20010101', periods=self.N, freq='T',
95-
tz=tz))
96-
self.ts = self.s[self.halfway]
92+
N = 10**6
93+
halfway = (N // 2) - 1
94+
self.s = Series(date_range('20010101', periods=N, freq='T', tz=tz))
95+
self.ts = self.s[halfway]
9796

98-
self.s2 = Series(date_range('20010101', periods=self.N, freq='s',
99-
tz=tz))
97+
self.s2 = Series(date_range('20010101', periods=N, freq='s', tz=tz))
10098

10199
def time_series_timestamp_compare(self, tz):
102100
self.s <= self.ts
@@ -131,7 +129,6 @@ class AddOverflowArray(object):
131129
goal_time = 0.2
132130

133131
def setup(self):
134-
np.random.seed(1234)
135132
N = 10**6
136133
self.arr = np.arange(N)
137134
self.arr_rev = np.arange(-N, 0)

asv_bench/benchmarks/categoricals.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
except ImportError:
1010
pass
1111

12+
from .pandas_vb_common import setup # noqa
13+
1214

1315
class Concat(object):
1416

@@ -76,7 +78,6 @@ class ValueCounts(object):
7678

7779
def setup(self, dropna):
7880
n = 5 * 10**5
79-
np.random.seed(2718281)
8081
arr = ['s%04d' % i for i in np.random.randint(0, n // 10, size=n)]
8182
self.ts = pd.Series(arr).astype('category')
8283

@@ -101,7 +102,6 @@ class SetCategories(object):
101102

102103
def setup(self):
103104
n = 5 * 10**5
104-
np.random.seed(2718281)
105105
arr = ['s%04d' % i for i in np.random.randint(0, n // 10, size=n)]
106106
self.ts = pd.Series(arr).astype('category')
107107

@@ -116,7 +116,6 @@ class Rank(object):
116116
def setup(self):
117117
N = 10**5
118118
ncats = 100
119-
np.random.seed(1234)
120119

121120
self.s_str = pd.Series(tm.makeCategoricalIndex(N, ncats)).astype(str)
122121
self.s_str_cat = self.s_str.astype('category')

asv_bench/benchmarks/ctors.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
import numpy as np
22
from pandas import DataFrame, Series, Index, DatetimeIndex, Timestamp
33

4+
from .pandas_vb_common import setup # noqa
5+
46

57
class Constructors(object):
68

79
goal_time = 0.2
810

911
def setup(self):
1012
N = 10**2
11-
np.random.seed(1234)
1213
self.arr = np.random.randn(N, N)
1314
self.arr_str = np.array(['foo', 'bar', 'baz'], dtype=object)
1415

asv_bench/benchmarks/eval.py

+12-12
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
except ImportError:
66
import pandas.computation.expressions as expr
77

8+
from .pandas_vb_common import setup # noqa
9+
810

911
class Eval(object):
1012

@@ -14,7 +16,6 @@ class Eval(object):
1416
param_names = ['engine', 'threads']
1517

1618
def setup(self, engine, threads):
17-
np.random.seed(1234)
1819
self.df = pd.DataFrame(np.random.randn(20000, 100))
1920
self.df2 = pd.DataFrame(np.random.randn(20000, 100))
2021
self.df3 = pd.DataFrame(np.random.randn(20000, 100))
@@ -45,17 +46,16 @@ class Query(object):
4546
goal_time = 0.2
4647

4748
def setup(self):
48-
np.random.seed(1234)
49-
self.N = 10**6
50-
self.halfway = (self.N // 2) - 1
51-
self.index = pd.date_range('20010101', periods=self.N, freq='T')
52-
self.s = pd.Series(self.index)
53-
self.ts = self.s.iloc[self.halfway]
54-
self.df = pd.DataFrame({'a': np.random.randn(self.N), 'dates': self.s},
55-
index=self.index)
56-
self.data = np.random.randn(self.N)
57-
self.min_val = self.data.min()
58-
self.max_val = self.data.max()
49+
N = 10**6
50+
halfway = (N // 2) - 1
51+
index = pd.date_range('20010101', periods=N, freq='T')
52+
s = pd.Series(index)
53+
self.ts = s.iloc[halfway]
54+
self.df = pd.DataFrame({'a': np.random.randn(N), 'dates': s},
55+
index=index)
56+
data = np.random.randn(N)
57+
self.min_val = data.min()
58+
self.max_val = data.max()
5959

6060
def time_query_datetime_index(self):
6161
self.df.query('index < @self.ts')

asv_bench/benchmarks/frame_ctor.py

+40-83
Original file line numberDiff line numberDiff line change
@@ -2,125 +2,82 @@
22
import pandas.util.testing as tm
33
from pandas import DataFrame, Series, MultiIndex, Timestamp, date_range
44
try:
5-
from pandas.tseries import offsets
6-
except:
7-
from pandas.core.datetools import *
5+
from pandas.tseries.offsets import Nano, Hour
6+
except ImportError:
7+
# For compatability with older versions
8+
from pandas.core.datetools import * # noqa
89

10+
from .pandas_vb_common import setup # noqa
911

10-
# ----------------------------------------------------------------------
11-
# Creation from nested dict
1212

1313
class FromDicts(object):
1414

1515
goal_time = 0.2
1616

1717
def setup(self):
18-
np.random.seed(1234)
1918
N, K = 5000, 50
20-
self.index = tm.makeStringIndex(N)
21-
self.columns = tm.makeStringIndex(K)
22-
self.frame = DataFrame(np.random.randn(N, K),
23-
index=self.index,
24-
columns=self.columns)
25-
self.data = self.frame.to_dict()
19+
index = tm.makeStringIndex(N)
20+
columns = tm.makeStringIndex(K)
21+
frame = DataFrame(np.random.randn(N, K), index=index, columns=columns)
22+
self.data = frame.to_dict()
2623
self.some_dict = list(self.data.values())[0]
27-
self.dict_list = self.frame.to_dict(orient='records')
24+
self.dict_list = frame.to_dict(orient='records')
2825
self.data2 = {i: {j: float(j) for j in range(100)}
2926
for i in range(2000)}
3027

31-
def time_frame_ctor_list_of_dict(self):
28+
def time_list_of_dict(self):
3229
DataFrame(self.dict_list)
3330

34-
def time_frame_ctor_nested_dict(self):
31+
def time_nested_dict(self):
3532
DataFrame(self.data)
3633

37-
def time_series_ctor_from_dict(self):
34+
def time_dict(self):
3835
Series(self.some_dict)
3936

40-
def time_frame_ctor_nested_dict_int64(self):
37+
def time_nested_dict_int64(self):
4138
# nested dict, integer indexes, regression described in #621
4239
DataFrame(self.data2)
4340

4441

45-
# from a mi-series
46-
4742
class FromSeries(object):
43+
4844
goal_time = 0.2
4945

5046
def setup(self):
51-
self.mi = MultiIndex.from_product([range(100), range(100)])
52-
self.s = Series(np.random.randn(10000), index=self.mi)
47+
mi = MultiIndex.from_product([range(100), range(100)])
48+
self.s = Series(np.random.randn(10000), index=mi)
5349

54-
def time_frame_from_mi_series(self):
50+
def time_mi_series(self):
5551
DataFrame(self.s)
5652

57-
# ----------------------------------------------------------------------
58-
# From dict with DatetimeIndex with all offsets
59-
60-
# dynamically generate benchmarks for every offset
61-
#
62-
# get_period_count & get_index_for_offset are there because blindly taking each
63-
# offset times 1000 can easily go out of Timestamp bounds and raise errors.
64-
65-
66-
def get_period_count(start_date, off):
67-
ten_offsets_in_days = ((start_date + (off * 10)) - start_date).days
68-
if (ten_offsets_in_days == 0):
69-
return 1000
70-
else:
71-
periods = 9 * (Timestamp.max - start_date).days // ten_offsets_in_days
72-
return min(periods, 1000)
73-
7453

75-
def get_index_for_offset(off):
76-
start_date = Timestamp('1/1/1900')
77-
return date_range(start_date,
78-
periods=get_period_count(start_date, off),
79-
freq=off)
54+
class FromDictwithTimestamp(object):
8055

56+
goal_time = 0.2
57+
params = [Nano(1), Hour(1)]
58+
param_names = ['offset']
8159

82-
all_offsets = offsets.__all__
83-
# extra cases
84-
for off in ['FY5253', 'FY5253Quarter']:
85-
all_offsets.pop(all_offsets.index(off))
86-
all_offsets.extend([off + '_1', off + '_2'])
60+
def setup(self, offset):
61+
N = 10**3
62+
np.random.seed(1234)
63+
idx = date_range(Timestamp('1/1/1900'), freq=offset, periods=N)
64+
df = DataFrame(np.random.randn(N, 10), index=idx)
65+
self.d = df.to_dict()
8766

67+
def time_dict_with_timestamp_offsets(self, offset):
68+
DataFrame(self.d)
8869

89-
class FromDictwithTimestampOffsets(object):
9070

91-
params = [all_offsets, [1, 2]]
92-
param_names = ['offset', 'n_steps']
71+
class FromRecords(object):
9372

94-
offset_kwargs = {'WeekOfMonth': {'weekday': 1, 'week': 1},
95-
'LastWeekOfMonth': {'weekday': 1, 'week': 1},
96-
'FY5253': {'startingMonth': 1, 'weekday': 1},
97-
'FY5253Quarter': {'qtr_with_extra_week': 1,
98-
'startingMonth': 1,
99-
'weekday': 1}}
73+
goal_time = 0.2
74+
params = [None, 1000]
75+
param_names = ['nrows']
10076

101-
offset_extra_cases = {'FY5253': {'variation': ['nearest', 'last']},
102-
'FY5253Quarter': {'variation': ['nearest', 'last']}}
77+
def setup(self, nrows):
78+
N = 100000
79+
self.gen = ((x, (x * 20), (x * 100)) for x in range(N))
10380

104-
def setup(self, offset, n_steps):
105-
np.random.seed(1234)
106-
extra = False
107-
if offset.endswith("_", None, -1):
108-
extra = int(offset[-1])
109-
offset = offset[:-2]
110-
111-
kwargs = {}
112-
if offset in self.offset_kwargs:
113-
kwargs = self.offset_kwargs[offset]
114-
115-
if extra:
116-
extras = self.offset_extra_cases[offset]
117-
for extra_arg in extras:
118-
kwargs[extra_arg] = extras[extra_arg][extra - 1]
119-
120-
offset = getattr(offsets, offset)
121-
self.idx = get_index_for_offset(offset(n_steps, **kwargs))
122-
self.df = DataFrame(np.random.randn(len(self.idx), 10), index=self.idx)
123-
self.d = self.df.to_dict()
124-
125-
def time_frame_ctor(self, offset, n_steps):
126-
DataFrame(self.d)
81+
def time_frame_from_records_generator(self, nrows):
82+
# issue-6700
83+
self.df = DataFrame.from_records(self.gen, nrows=nrows)

0 commit comments

Comments
 (0)