From 7c6def85971745acf1468afacce2cfbb209d9379 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 25 Jul 2019 15:03:44 -0700 Subject: [PATCH 1/4] Broke out lines tests --- asv_bench/benchmarks/io/json.py | 60 ++++++++++++++++++++++++++++++--- 1 file changed, 55 insertions(+), 5 deletions(-) diff --git a/asv_bench/benchmarks/io/json.py b/asv_bench/benchmarks/io/json.py index 0ce42856fb14a..9af24fa25194d 100644 --- a/asv_bench/benchmarks/io/json.py +++ b/asv_bench/benchmarks/io/json.py @@ -126,19 +126,69 @@ def time_float_int(self, orient): def time_float_int_str(self, orient): self.df_int_float_str.to_json(self.fname, orient=orient) - def time_floats_with_int_idex_lines(self, orient): + +class ToJSON(BaseIO): + + fname = "__test__.json" + + def setup(self): + N = 10 ** 5 + ncols = 5 + index = date_range("20000101", periods=N, freq="H") + timedeltas = timedelta_range(start=1, periods=N, freq="s") + datetimes = date_range(start=1, periods=N, freq="s") + ints = np.random.randint(100000000, size=N) + floats = np.random.randn(N) + strings = tm.makeStringIndex(N) + self.df = DataFrame(np.random.randn(N, ncols), index=np.arange(N)) + self.df_date_idx = DataFrame(np.random.randn(N, ncols), index=index) + self.df_td_int_ts = DataFrame( + { + "td_1": timedeltas, + "td_2": timedeltas, + "int_1": ints, + "int_2": ints, + "ts_1": datetimes, + "ts_2": datetimes, + }, + index=index, + ) + self.df_int_floats = DataFrame( + { + "int_1": ints, + "int_2": ints, + "int_3": ints, + "float_1": floats, + "float_2": floats, + "float_3": floats, + }, + index=index, + ) + self.df_int_float_str = DataFrame( + { + "int_1": ints, + "int_2": ints, + "float_1": floats, + "float_2": floats, + "str_1": strings, + "str_2": strings, + }, + index=index, + ) + + def time_floats_with_int_idex_lines(self): self.df.to_json(self.fname, orient="records", lines=True) - def time_floats_with_dt_index_lines(self, orient): + def time_floats_with_dt_index_lines(self): self.df_date_idx.to_json(self.fname, orient="records", lines=True) - def time_delta_int_tstamp_lines(self, orient): + def time_delta_int_tstamp_lines(self): self.df_td_int_ts.to_json(self.fname, orient="records", lines=True) - def time_float_int_lines(self, orient): + def time_float_int_lines(self): self.df_int_floats.to_json(self.fname, orient="records", lines=True) - def time_float_int_str_lines(self, orient): + def time_float_int_str_lines(self): self.df_int_float_str.to_json(self.fname, orient="records", lines=True) From de13a9a0186e0e1c0c9f9675d63f520aa1bffe35 Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 25 Jul 2019 15:15:04 -0700 Subject: [PATCH 2/4] Improved and expanded benchmarks --- asv_bench/benchmarks/io/json.py | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/asv_bench/benchmarks/io/json.py b/asv_bench/benchmarks/io/json.py index 9af24fa25194d..1fb613bc9862b 100644 --- a/asv_bench/benchmarks/io/json.py +++ b/asv_bench/benchmarks/io/json.py @@ -63,10 +63,12 @@ def peakmem_read_json_lines_concat(self, index): class ToJSON(BaseIO): fname = "__test__.json" - params = ["split", "columns", "index"] - param_names = ["orient"] + params = [["split", "columns", "index", "values", "records"], + ["df", "df_date_idx", "df_td_int_ts", "df_int_floats", + "df_int_float_str"]] + param_names = ["orient", "frame"] - def setup(self, lines_orient): + def setup(self, orient, frame): N = 10 ** 5 ncols = 5 index = date_range("20000101", periods=N, freq="H") @@ -111,20 +113,21 @@ def setup(self, lines_orient): index=index, ) - def time_floats_with_int_index(self, orient): - self.df.to_json(self.fname, orient=orient) + def time_to_json(self, orient, frame): + getattr(self, frame).to_json(self.fname, orient=orient) - def time_floats_with_dt_index(self, orient): - self.df_date_idx.to_json(self.fname, orient=orient) + def mem_to_json(self, orient, frame): + getattr(self, frame).to_json(self.fname, orient=orient) - def time_delta_int_tstamp(self, orient): - self.df_td_int_ts.to_json(self.fname, orient=orient) - - def time_float_int(self, orient): - self.df_int_floats.to_json(self.fname, orient=orient) + def time_to_json_wide(self, orient, frame): + base_df = getattr(self, frame).copy() + df = pd.concat([base_df.iloc[:100]] * 1000, ignore_index=True, axis=1) + df.to_json(self.fname, orient=orient) - def time_float_int_str(self, orient): - self.df_int_float_str.to_json(self.fname, orient=orient) + def mem_to_json_wide(self, orient, frame): + base_df = getattr(self, frame).copy() + df = pd.concat([base_df.iloc[:100]] * 1000, ignore_index=True, axis=1) + df.to_json(self.fname, orient=orient) class ToJSON(BaseIO): From d2d4ccd54fc582b3d595adb14098ded9cd0529ee Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 25 Jul 2019 15:34:48 -0700 Subject: [PATCH 3/4] blackify --- asv_bench/benchmarks/io/json.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/asv_bench/benchmarks/io/json.py b/asv_bench/benchmarks/io/json.py index 1fb613bc9862b..0944c5dd79c4d 100644 --- a/asv_bench/benchmarks/io/json.py +++ b/asv_bench/benchmarks/io/json.py @@ -63,9 +63,10 @@ def peakmem_read_json_lines_concat(self, index): class ToJSON(BaseIO): fname = "__test__.json" - params = [["split", "columns", "index", "values", "records"], - ["df", "df_date_idx", "df_td_int_ts", "df_int_floats", - "df_int_float_str"]] + params = [ + ["split", "columns", "index", "values", "records"], + ["df", "df_date_idx", "df_td_int_ts", "df_int_floats", "df_int_float_str"], + ] param_names = ["orient", "frame"] def setup(self, orient, frame): From 96a2e4c4766cf27d9082c1cf74d80512db15421d Mon Sep 17 00:00:00 2001 From: Will Ayd Date: Thu, 25 Jul 2019 17:12:50 -0700 Subject: [PATCH 4/4] issue fixup --- asv_bench/benchmarks/io/json.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/asv_bench/benchmarks/io/json.py b/asv_bench/benchmarks/io/json.py index 0944c5dd79c4d..fc07f2a484102 100644 --- a/asv_bench/benchmarks/io/json.py +++ b/asv_bench/benchmarks/io/json.py @@ -122,16 +122,16 @@ def mem_to_json(self, orient, frame): def time_to_json_wide(self, orient, frame): base_df = getattr(self, frame).copy() - df = pd.concat([base_df.iloc[:100]] * 1000, ignore_index=True, axis=1) + df = concat([base_df.iloc[:100]] * 1000, ignore_index=True, axis=1) df.to_json(self.fname, orient=orient) def mem_to_json_wide(self, orient, frame): base_df = getattr(self, frame).copy() - df = pd.concat([base_df.iloc[:100]] * 1000, ignore_index=True, axis=1) + df = concat([base_df.iloc[:100]] * 1000, ignore_index=True, axis=1) df.to_json(self.fname, orient=orient) -class ToJSON(BaseIO): +class ToJSONLines(BaseIO): fname = "__test__.json"