Skip to content

Blacken the code base #27076

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 4, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
123 changes: 69 additions & 54 deletions asv_bench/benchmarks/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pandas as pd
from pandas.util import testing as tm

for imp in ['pandas.util', 'pandas.tools.hashing']:
for imp in ["pandas.util", "pandas.tools.hashing"]:
try:
hashing = import_module(imp)
break
Expand All @@ -15,15 +15,17 @@

class Factorize:

params = [[True, False], ['int', 'uint', 'float', 'string']]
param_names = ['sort', 'dtype']
params = [[True, False], ["int", "uint", "float", "string"]]
param_names = ["sort", "dtype"]

def setup(self, sort, dtype):
N = 10**5
data = {'int': pd.Int64Index(np.arange(N).repeat(5)),
'uint': pd.UInt64Index(np.arange(N).repeat(5)),
'float': pd.Float64Index(np.random.randn(N).repeat(5)),
'string': tm.makeStringIndex(N).repeat(5)}
N = 10 ** 5
data = {
"int": pd.Int64Index(np.arange(N).repeat(5)),
"uint": pd.UInt64Index(np.arange(N).repeat(5)),
"float": pd.Float64Index(np.random.randn(N).repeat(5)),
"string": tm.makeStringIndex(N).repeat(5),
}
self.idx = data[dtype]

def time_factorize(self, sort, dtype):
Expand All @@ -32,15 +34,17 @@ def time_factorize(self, sort, dtype):

class FactorizeUnique:

params = [[True, False], ['int', 'uint', 'float', 'string']]
param_names = ['sort', 'dtype']
params = [[True, False], ["int", "uint", "float", "string"]]
param_names = ["sort", "dtype"]

def setup(self, sort, dtype):
N = 10**5
data = {'int': pd.Int64Index(np.arange(N)),
'uint': pd.UInt64Index(np.arange(N)),
'float': pd.Float64Index(np.arange(N)),
'string': tm.makeStringIndex(N)}
N = 10 ** 5
data = {
"int": pd.Int64Index(np.arange(N)),
"uint": pd.UInt64Index(np.arange(N)),
"float": pd.Float64Index(np.arange(N)),
"string": tm.makeStringIndex(N),
}
self.idx = data[dtype]
assert self.idx.is_unique

Expand All @@ -50,15 +54,17 @@ def time_factorize(self, sort, dtype):

class Duplicated:

params = [['first', 'last', False], ['int', 'uint', 'float', 'string']]
param_names = ['keep', 'dtype']
params = [["first", "last", False], ["int", "uint", "float", "string"]]
param_names = ["keep", "dtype"]

def setup(self, keep, dtype):
N = 10**5
data = {'int': pd.Int64Index(np.arange(N).repeat(5)),
'uint': pd.UInt64Index(np.arange(N).repeat(5)),
'float': pd.Float64Index(np.random.randn(N).repeat(5)),
'string': tm.makeStringIndex(N).repeat(5)}
N = 10 ** 5
data = {
"int": pd.Int64Index(np.arange(N).repeat(5)),
"uint": pd.UInt64Index(np.arange(N).repeat(5)),
"float": pd.Float64Index(np.random.randn(N).repeat(5)),
"string": tm.makeStringIndex(N).repeat(5),
}
self.idx = data[dtype]
# cache is_unique
self.idx.is_unique
Expand All @@ -69,15 +75,17 @@ def time_duplicated(self, keep, dtype):

class DuplicatedUniqueIndex:

params = ['int', 'uint', 'float', 'string']
param_names = ['dtype']
params = ["int", "uint", "float", "string"]
param_names = ["dtype"]

def setup(self, dtype):
N = 10**5
data = {'int': pd.Int64Index(np.arange(N)),
'uint': pd.UInt64Index(np.arange(N)),
'float': pd.Float64Index(np.random.randn(N)),
'string': tm.makeStringIndex(N)}
N = 10 ** 5
data = {
"int": pd.Int64Index(np.arange(N)),
"uint": pd.UInt64Index(np.arange(N)),
"float": pd.Float64Index(np.random.randn(N)),
"string": tm.makeStringIndex(N),
}
self.idx = data[dtype]
# cache is_unique
self.idx.is_unique
Expand All @@ -87,67 +95,74 @@ def time_duplicated_unique(self, dtype):


class Hashing:

def setup_cache(self):
N = 10**5
N = 10 ** 5

df = pd.DataFrame(
{'strings': pd.Series(tm.makeStringIndex(10000).take(
np.random.randint(0, 10000, size=N))),
'floats': np.random.randn(N),
'ints': np.arange(N),
'dates': pd.date_range('20110101', freq='s', periods=N),
'timedeltas': pd.timedelta_range('1 day', freq='s', periods=N)})
df['categories'] = df['strings'].astype('category')
{
"strings": pd.Series(
tm.makeStringIndex(10000).take(np.random.randint(0, 10000, size=N))
),
"floats": np.random.randn(N),
"ints": np.arange(N),
"dates": pd.date_range("20110101", freq="s", periods=N),
"timedeltas": pd.timedelta_range("1 day", freq="s", periods=N),
}
)
df["categories"] = df["strings"].astype("category")
df.iloc[10:20] = np.nan
return df

def time_frame(self, df):
hashing.hash_pandas_object(df)

def time_series_int(self, df):
hashing.hash_pandas_object(df['ints'])
hashing.hash_pandas_object(df["ints"])

def time_series_string(self, df):
hashing.hash_pandas_object(df['strings'])
hashing.hash_pandas_object(df["strings"])

def time_series_float(self, df):
hashing.hash_pandas_object(df['floats'])
hashing.hash_pandas_object(df["floats"])

def time_series_categorical(self, df):
hashing.hash_pandas_object(df['categories'])
hashing.hash_pandas_object(df["categories"])

def time_series_timedeltas(self, df):
hashing.hash_pandas_object(df['timedeltas'])
hashing.hash_pandas_object(df["timedeltas"])

def time_series_dates(self, df):
hashing.hash_pandas_object(df['dates'])
hashing.hash_pandas_object(df["dates"])


class Quantile:
params = [[0, 0.5, 1],
['linear', 'nearest', 'lower', 'higher', 'midpoint'],
['float', 'int', 'uint']]
param_names = ['quantile', 'interpolation', 'dtype']
params = [
[0, 0.5, 1],
["linear", "nearest", "lower", "higher", "midpoint"],
["float", "int", "uint"],
]
param_names = ["quantile", "interpolation", "dtype"]

def setup(self, quantile, interpolation, dtype):
N = 10**5
data = {'int': np.arange(N),
'uint': np.arange(N).astype(np.uint64),
'float': np.random.randn(N)}
N = 10 ** 5
data = {
"int": np.arange(N),
"uint": np.arange(N).astype(np.uint64),
"float": np.random.randn(N),
}
self.idx = pd.Series(data[dtype].repeat(5))

def time_quantile(self, quantile, interpolation, dtype):
self.idx.quantile(quantile, interpolation=interpolation)


class SortIntegerArray:
params = [10**3, 10**5]
params = [10 ** 3, 10 ** 5]

def setup(self, N):
data = np.arange(N, dtype=float)
data[40] = np.nan
self.array = pd.array(data, dtype='Int64')
self.array = pd.array(data, dtype="Int64")

def time_argsort(self, N):
self.array.argsort()
Expand Down
6 changes: 2 additions & 4 deletions asv_bench/benchmarks/attrs_caching.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import numpy as np
from pandas import DataFrame

try:
from pandas.util import cache_readonly
except ImportError:
from pandas.util.decorators import cache_readonly


class DataFrameAttributes:

def setup(self):
self.df = DataFrame(np.random.randn(10, 6))
self.cur_index = self.df.index
Expand All @@ -20,14 +20,12 @@ def time_set_index(self):


class CacheReadonly:

def setup(self):

class Foo:

@cache_readonly
def prop(self):
return 5

self.obj = Foo()

def time_cache_readonly(self):
Expand Down
51 changes: 27 additions & 24 deletions asv_bench/benchmarks/binary_ops.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import numpy as np
from pandas import DataFrame, Series, date_range
from pandas.core.algorithms import checked_add_with_arr

try:
import pandas.core.computation.expressions as expr
except ImportError:
Expand All @@ -9,14 +10,14 @@

class Ops:

params = [[True, False], ['default', 1]]
param_names = ['use_numexpr', 'threads']
params = [[True, False], ["default", 1]]
param_names = ["use_numexpr", "threads"]

def setup(self, use_numexpr, threads):
self.df = DataFrame(np.random.randn(20000, 100))
self.df2 = DataFrame(np.random.randn(20000, 100))

if threads != 'default':
if threads != "default":
expr.set_numexpr_threads(threads)
if not use_numexpr:
expr.set_use_numexpr(False)
Expand All @@ -39,18 +40,21 @@ def teardown(self, use_numexpr, threads):


class Ops2:

def setup(self):
N = 10**3
N = 10 ** 3
self.df = DataFrame(np.random.randn(N, N))
self.df2 = DataFrame(np.random.randn(N, N))

self.df_int = DataFrame(np.random.randint(np.iinfo(np.int16).min,
np.iinfo(np.int16).max,
size=(N, N)))
self.df2_int = DataFrame(np.random.randint(np.iinfo(np.int16).min,
np.iinfo(np.int16).max,
size=(N, N)))
self.df_int = DataFrame(
np.random.randint(
np.iinfo(np.int16).min, np.iinfo(np.int16).max, size=(N, N)
)
)
self.df2_int = DataFrame(
np.random.randint(
np.iinfo(np.int16).min, np.iinfo(np.int16).max, size=(N, N)
)
)

self.s = Series(np.random.randn(N))

Expand Down Expand Up @@ -90,16 +94,16 @@ def time_frame_series_dot(self):

class Timeseries:

params = [None, 'US/Eastern']
param_names = ['tz']
params = [None, "US/Eastern"]
param_names = ["tz"]

def setup(self, tz):
N = 10**6
N = 10 ** 6
halfway = (N // 2) - 1
self.s = Series(date_range('20010101', periods=N, freq='T', tz=tz))
self.s = Series(date_range("20010101", periods=N, freq="T", tz=tz))
self.ts = self.s[halfway]

self.s2 = Series(date_range('20010101', periods=N, freq='s', tz=tz))
self.s2 = Series(date_range("20010101", periods=N, freq="s", tz=tz))

def time_series_timestamp_compare(self, tz):
self.s <= self.ts
Expand All @@ -117,20 +121,19 @@ def time_timestamp_ops_diff_with_shift(self, tz):
class AddOverflowScalar:

params = [1, -1, 0]
param_names = ['scalar']
param_names = ["scalar"]

def setup(self, scalar):
N = 10**6
N = 10 ** 6
self.arr = np.arange(N)

def time_add_overflow_scalar(self, scalar):
checked_add_with_arr(self.arr, scalar)


class AddOverflowArray:

def setup(self):
N = 10**6
N = 10 ** 6
self.arr = np.arange(N)
self.arr_rev = np.arange(-N, 0)
self.arr_mixed = np.array([1, -1]).repeat(N / 2)
Expand All @@ -144,12 +147,12 @@ def time_add_overflow_arr_mask_nan(self):
checked_add_with_arr(self.arr, self.arr_mixed, arr_mask=self.arr_nan_1)

def time_add_overflow_b_mask_nan(self):
checked_add_with_arr(self.arr, self.arr_mixed,
b_mask=self.arr_nan_1)
checked_add_with_arr(self.arr, self.arr_mixed, b_mask=self.arr_nan_1)

def time_add_overflow_both_arg_nan(self):
checked_add_with_arr(self.arr, self.arr_mixed, arr_mask=self.arr_nan_1,
b_mask=self.arr_nan_2)
checked_add_with_arr(
self.arr, self.arr_mixed, arr_mask=self.arr_nan_1, b_mask=self.arr_nan_2
)


from .pandas_vb_common import setup # noqa: F401
Loading