diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index b43fce73ee128..e63e66f441afe 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -408,7 +408,7 @@ def time_multi_size(self): self.df.groupby(["key1", "key2"]).size() def time_category_size(self): - self.draws.groupby(self.cats).size() + self.draws.groupby(self.cats, observed=True).size() class Shift: @@ -767,7 +767,10 @@ def time_str_func(self, dtype, method): class Categories: - def setup(self): + params = [True, False] + param_names = ["observed"] + + def setup(self, observed): N = 10**5 arr = np.random.random(N) data = {"a": Categorical(np.random.randint(10000, size=N)), "b": arr} @@ -785,23 +788,23 @@ def setup(self): } self.df_extra_cat = DataFrame(data) - def time_groupby_sort(self): - self.df.groupby("a")["b"].count() + def time_groupby_sort(self, observed): + self.df.groupby("a", observed=observed)["b"].count() - def time_groupby_nosort(self): - self.df.groupby("a", sort=False)["b"].count() + def time_groupby_nosort(self, observed): + self.df.groupby("a", observed=observed, sort=False)["b"].count() - def time_groupby_ordered_sort(self): - self.df_ordered.groupby("a")["b"].count() + def time_groupby_ordered_sort(self, observed): + self.df_ordered.groupby("a", observed=observed)["b"].count() - def time_groupby_ordered_nosort(self): - self.df_ordered.groupby("a", sort=False)["b"].count() + def time_groupby_ordered_nosort(self, observed): + self.df_ordered.groupby("a", observed=observed, sort=False)["b"].count() - def time_groupby_extra_cat_sort(self): - self.df_extra_cat.groupby("a")["b"].count() + def time_groupby_extra_cat_sort(self, observed): + self.df_extra_cat.groupby("a", observed=observed)["b"].count() - def time_groupby_extra_cat_nosort(self): - self.df_extra_cat.groupby("a", sort=False)["b"].count() + def time_groupby_extra_cat_nosort(self, observed): + self.df_extra_cat.groupby("a", observed=observed, sort=False)["b"].count() class Datelike: diff --git a/asv_bench/benchmarks/io/csv.py b/asv_bench/benchmarks/io/csv.py index 0d189bcd30b7c..c5e3e80571e30 100644 --- a/asv_bench/benchmarks/io/csv.py +++ b/asv_bench/benchmarks/io/csv.py @@ -226,12 +226,13 @@ def data(self, stringio_object): class ReadCSVDInferDatetimeFormat(StringIORewind): - params = ([True, False], ["custom", "iso8601", "ymd"]) - param_names = ["infer_datetime_format", "format"] + params = [None, "custom", "iso8601", "ymd"] + param_names = ["format"] - def setup(self, infer_datetime_format, format): + def setup(self, format): rng = date_range("1/1/2000", periods=1000) formats = { + None: None, "custom": "%m/%d/%Y %H:%M:%S.%f", "iso8601": "%Y-%m-%d %H:%M:%S", "ymd": "%Y%m%d", @@ -239,13 +240,12 @@ def setup(self, infer_datetime_format, format): dt_format = formats[format] self.StringIO_input = StringIO("\n".join(rng.strftime(dt_format).tolist())) - def time_read_csv(self, infer_datetime_format, format): + def time_read_csv(self, format): read_csv( self.data(self.StringIO_input), header=None, names=["foo"], parse_dates=["foo"], - infer_datetime_format=infer_datetime_format, ) @@ -262,7 +262,6 @@ def time_read_csv(self): header=None, names=["foo"], parse_dates=["foo"], - infer_datetime_format=False, ) @@ -279,7 +278,6 @@ def time_read_csv(self, bad_date_value): header=None, names=["foo", "bar"], parse_dates=["foo"], - infer_datetime_format=False, ) diff --git a/asv_bench/benchmarks/replace.py b/asv_bench/benchmarks/replace.py index 36b5b54e4440b..a9276b7dc32ce 100644 --- a/asv_bench/benchmarks/replace.py +++ b/asv_bench/benchmarks/replace.py @@ -42,7 +42,7 @@ class ReplaceList: param_names = ["inplace"] def setup(self, inplace): - self.df = pd.DataFrame({"A": 0, "B": 0}, index=range(4 * 10**7)) + self.df = pd.DataFrame({"A": 0, "B": 0}, index=range(10**7)) def time_replace_list(self, inplace): self.df.replace([np.inf, -np.inf], np.nan, inplace=inplace)