From 80471f8f062f3f45c6e1599062d11da00b89dc04 Mon Sep 17 00:00:00 2001 From: Bjoern Ludwig Date: Sat, 23 Apr 2022 23:45:18 +0200 Subject: [PATCH 1/4] refactor pandas df.append to pd.concat w/ asserts --- .../python/plotly/plotly/express/_core.py | 2 ++ .../figure_factory/_county_choropleth.py | 20 ++++++++++++++----- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index 54c6bb58411..a758c13112c 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -1634,7 +1634,9 @@ def aggfunc_continuous(x): df_tree["parent"] = df_tree["parent"].str.rstrip("/") if cols: df_tree[cols] = dfg[cols] + df_all_trees_old = df_all_trees.append(df_tree, ignore_index=True) df_all_trees = df_all_trees.append(df_tree, ignore_index=True) + assert df_all_trees.equals(df_all_trees_old) # we want to make sure than (?) is the first color of the sequence if args["color"] and discrete_color: diff --git a/packages/python/plotly/plotly/figure_factory/_county_choropleth.py b/packages/python/plotly/plotly/figure_factory/_county_choropleth.py index 5494e9c973e..b53ae2453f4 100644 --- a/packages/python/plotly/plotly/figure_factory/_county_choropleth.py +++ b/packages/python/plotly/plotly/figure_factory/_county_choropleth.py @@ -86,7 +86,9 @@ def _create_us_counties_df(st_to_state_name_dict, state_to_st_dict): columns=["State", "ST", "geometry", "FIPS", "STATEFP", "NAME"], index=[max(gdf.index) + 1], ) - gdf = gdf.append(singlerow, sort=True) + gdf_old = gdf.append(singlerow, sort=True) + gdf = pd.concat([gdf, singlerow], sort=True) + assert gdf.equals(gdf_old) f = 51515 singlerow = pd.DataFrame( @@ -103,7 +105,9 @@ def _create_us_counties_df(st_to_state_name_dict, state_to_st_dict): columns=["State", "ST", "geometry", "FIPS", "STATEFP", "NAME"], index=[max(gdf.index) + 1], ) - gdf = gdf.append(singlerow, sort=True) + gdf_old = gdf.append(singlerow, sort=True) + gdf = pd.concat([gdf, singlerow], sort=True) + assert gdf.equals(gdf_old) f = 2270 singlerow = pd.DataFrame( @@ -120,19 +124,25 @@ def _create_us_counties_df(st_to_state_name_dict, state_to_st_dict): columns=["State", "ST", "geometry", "FIPS", "STATEFP", "NAME"], index=[max(gdf.index) + 1], ) - gdf = gdf.append(singlerow, sort=True) + gdf_old = gdf.append(singlerow, sort=True) + gdf = pd.concat([gdf, singlerow], sort=True) + assert gdf.equals(gdf_old) row_2198 = gdf[gdf["FIPS"] == 2198] row_2198.index = [max(gdf.index) + 1] row_2198.loc[row_2198.index[0], "FIPS"] = 2201 row_2198.loc[row_2198.index[0], "STATEFP"] = "02" - gdf = gdf.append(row_2198, sort=True) + gdf_old = gdf.append(row_2198, sort=True) + gdf = pd.concat([gdf, row_2198], sort=True) + assert gdf.equals(gdf_old) row_2105 = gdf[gdf["FIPS"] == 2105] row_2105.index = [max(gdf.index) + 1] row_2105.loc[row_2105.index[0], "FIPS"] = 2232 row_2105.loc[row_2105.index[0], "STATEFP"] = "02" - gdf = gdf.append(row_2105, sort=True) + gdf_old = gdf.append(row_2105, sort=True) + gdf = pd.concat([gdf, row_2105], sort=True) + assert gdf.equals(gdf_old) gdf = gdf.rename(columns={"NAME": "COUNTY_NAME"}) gdf_reduced = gdf[["FIPS", "STATEFP", "COUNTY_NAME", "geometry"]] From 46609982c995108e84dab03ebd41e91170729555 Mon Sep 17 00:00:00 2001 From: Bjoern Ludwig Date: Sun, 24 Apr 2022 00:21:26 +0200 Subject: [PATCH 2/4] remove asserts from pd.concat refactoring --- packages/python/plotly/plotly/express/_core.py | 2 -- .../plotly/plotly/figure_factory/_county_choropleth.py | 10 ---------- 2 files changed, 12 deletions(-) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index a758c13112c..54c6bb58411 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -1634,9 +1634,7 @@ def aggfunc_continuous(x): df_tree["parent"] = df_tree["parent"].str.rstrip("/") if cols: df_tree[cols] = dfg[cols] - df_all_trees_old = df_all_trees.append(df_tree, ignore_index=True) df_all_trees = df_all_trees.append(df_tree, ignore_index=True) - assert df_all_trees.equals(df_all_trees_old) # we want to make sure than (?) is the first color of the sequence if args["color"] and discrete_color: diff --git a/packages/python/plotly/plotly/figure_factory/_county_choropleth.py b/packages/python/plotly/plotly/figure_factory/_county_choropleth.py index b53ae2453f4..65c5e5ffede 100644 --- a/packages/python/plotly/plotly/figure_factory/_county_choropleth.py +++ b/packages/python/plotly/plotly/figure_factory/_county_choropleth.py @@ -86,9 +86,7 @@ def _create_us_counties_df(st_to_state_name_dict, state_to_st_dict): columns=["State", "ST", "geometry", "FIPS", "STATEFP", "NAME"], index=[max(gdf.index) + 1], ) - gdf_old = gdf.append(singlerow, sort=True) gdf = pd.concat([gdf, singlerow], sort=True) - assert gdf.equals(gdf_old) f = 51515 singlerow = pd.DataFrame( @@ -105,9 +103,7 @@ def _create_us_counties_df(st_to_state_name_dict, state_to_st_dict): columns=["State", "ST", "geometry", "FIPS", "STATEFP", "NAME"], index=[max(gdf.index) + 1], ) - gdf_old = gdf.append(singlerow, sort=True) gdf = pd.concat([gdf, singlerow], sort=True) - assert gdf.equals(gdf_old) f = 2270 singlerow = pd.DataFrame( @@ -124,25 +120,19 @@ def _create_us_counties_df(st_to_state_name_dict, state_to_st_dict): columns=["State", "ST", "geometry", "FIPS", "STATEFP", "NAME"], index=[max(gdf.index) + 1], ) - gdf_old = gdf.append(singlerow, sort=True) gdf = pd.concat([gdf, singlerow], sort=True) - assert gdf.equals(gdf_old) row_2198 = gdf[gdf["FIPS"] == 2198] row_2198.index = [max(gdf.index) + 1] row_2198.loc[row_2198.index[0], "FIPS"] = 2201 row_2198.loc[row_2198.index[0], "STATEFP"] = "02" - gdf_old = gdf.append(row_2198, sort=True) gdf = pd.concat([gdf, row_2198], sort=True) - assert gdf.equals(gdf_old) row_2105 = gdf[gdf["FIPS"] == 2105] row_2105.index = [max(gdf.index) + 1] row_2105.loc[row_2105.index[0], "FIPS"] = 2232 row_2105.loc[row_2105.index[0], "STATEFP"] = "02" - gdf_old = gdf.append(row_2105, sort=True) gdf = pd.concat([gdf, row_2105], sort=True) - assert gdf.equals(gdf_old) gdf = gdf.rename(columns={"NAME": "COUNTY_NAME"}) gdf_reduced = gdf[["FIPS", "STATEFP", "COUNTY_NAME", "geometry"]] From 0f155ecaa4363c27c6638d4ff1ffab76f5431cf6 Mon Sep 17 00:00:00 2001 From: Bjoern Ludwig Date: Sun, 24 Apr 2022 00:28:51 +0200 Subject: [PATCH 3/4] refactor pandas df.append to pd.concat in _core --- packages/python/plotly/plotly/express/_core.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index 54c6bb58411..440845d6345 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -1634,7 +1634,9 @@ def aggfunc_continuous(x): df_tree["parent"] = df_tree["parent"].str.rstrip("/") if cols: df_tree[cols] = dfg[cols] - df_all_trees = df_all_trees.append(df_tree, ignore_index=True) + df_all_trees_old = df_all_trees.append(df_tree, ignore_index=True) + df_all_trees = pd.concat([df_all_trees, df_tree], ignore_index=True) + assert df_all_trees.equals(df_all_trees_old) # we want to make sure than (?) is the first color of the sequence if args["color"] and discrete_color: From 5da2df722e228c231b40516d82fdc8adb285c624 Mon Sep 17 00:00:00 2001 From: Bjoern Ludwig Date: Sun, 24 Apr 2022 02:04:40 +0200 Subject: [PATCH 4/4] remove assert from pd.concat refactoring in _core --- packages/python/plotly/plotly/express/_core.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py index 440845d6345..2c99f3b4e93 100644 --- a/packages/python/plotly/plotly/express/_core.py +++ b/packages/python/plotly/plotly/express/_core.py @@ -1634,9 +1634,7 @@ def aggfunc_continuous(x): df_tree["parent"] = df_tree["parent"].str.rstrip("/") if cols: df_tree[cols] = dfg[cols] - df_all_trees_old = df_all_trees.append(df_tree, ignore_index=True) df_all_trees = pd.concat([df_all_trees, df_tree], ignore_index=True) - assert df_all_trees.equals(df_all_trees_old) # we want to make sure than (?) is the first color of the sequence if args["color"] and discrete_color: