Merge remote-tracking branch 'upstream/master' into avoid-iter-row

MarcoGorelli · MarcoGorelli · commit 7f2ce4408cdd · 2024-11-22T17:15:01.000Z
diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py
@@ -652,9 +652,6 @@ def set_cartesian_axis_opts(args, axis, letter, orders):
 
 
 def configure_cartesian_marginal_axes(args, fig, orders):
-    if "histogram" in [args["marginal_x"], args["marginal_y"]]:
-        fig.layout["barmode"] = "overlay"
-
     nrows = len(fig._grid_ref)
     ncols = len(fig._grid_ref[0])
 
@@ -1489,25 +1486,21 @@ def build_dataframe(args, constructor):
     # PySpark to pandas.
     is_pd_like = False
 
-    # Flag that indicates if data_frame requires to be converted to arrow via the
-    # dataframe interchange protocol.
-    # True if Ibis, DuckDB, Vaex or implements __dataframe__
+    # Flag that indicates if data_frame needs to be converted to PyArrow.
+    # True if Ibis, DuckDB, Vaex, or implements __dataframe__
     needs_interchanging = False
 
     # If data_frame is provided, we parse it into a narwhals DataFrame, while accounting
     # for compatibility with pandas specific paths (e.g. Index/MultiIndex case).
     if df_provided:
-
         # data_frame is pandas-like DataFrame (pandas, modin.pandas, cudf)
         if nw.dependencies.is_pandas_like_dataframe(args["data_frame"]):
-
             columns = args["data_frame"].columns  # This can be multi index
             args["data_frame"] = nw.from_native(args["data_frame"], eager_only=True)
             is_pd_like = True
 
         # data_frame is pandas-like Series (pandas, modin.pandas, cudf)
         elif nw.dependencies.is_pandas_like_series(args["data_frame"]):
-
             args["data_frame"] = nw.from_native(
                 args["data_frame"], series_only=True
             ).to_frame()
@@ -1993,7 +1986,6 @@ def process_dataframe_hierarchy(args):
 
     if args["color"]:
         if discrete_color:
-
             discrete_aggs.append(args["color"])
             agg_f[args["color"]] = nw.col(args["color"]).max()
             agg_f[f'{args["color"]}{n_unique_token}'] = (
@@ -2048,7 +2040,6 @@ def post_agg(dframe: nw.LazyFrame, continuous_aggs, discrete_aggs) -> nw.LazyFra
         ).drop([f"{col}{n_unique_token}" for col in discrete_aggs])
 
     for i, level in enumerate(path):
-
         dfg = (
             df.group_by(path[i:], drop_null_keys=True)
             .agg(**agg_f)
@@ -2425,7 +2416,6 @@ def get_groups_and_orders(args, grouper):
     # figure out orders and what the single group name would be if there were one
     single_group_name = []
     unique_cache = dict()
-    grp_to_idx = dict()
 
     for i, col in enumerate(grouper):
         if col == one_group:
@@ -2443,27 +2433,28 @@ def get_groups_and_orders(args, grouper):
             else:
                 orders[col] = list(OrderedDict.fromkeys(list(orders[col]) + uniques))
 
-    grp_to_idx = {k: i for i, k in enumerate(orders)}
-
     if len(single_group_name) == len(grouper):
         # we have a single group, so we can skip all group-by operations!
         groups = {tuple(single_group_name): df}
     else:
-        required_grouper = list(orders.keys())
+        required_grouper = [group for group in orders if group in grouper]
         grouped = dict(df.group_by(required_grouper, drop_null_keys=True).__iter__())
-        sorted_group_names = list(grouped.keys())
 
-        for i, col in reversed(list(enumerate(required_grouper))):
-            sorted_group_names = sorted(
-                sorted_group_names,
-                key=lambda g: orders[col].index(g[i]) if g[i] in orders[col] else -1,
-            )
+        sorted_group_names = sorted(
+            grouped.keys(),
+            key=lambda values: [
+                orders[group].index(value) if value in orders[group] else -1
+                for group, value in zip(required_grouper, values)
+            ],
+        )
 
         # calculate the full group_names by inserting "" in the tuple index for one_group groups
         full_sorted_group_names = [
             tuple(
                 [
-                    "" if col == one_group else sub_group_names[grp_to_idx[col]]
+                    ""
+                    if col == one_group
+                    else sub_group_names[required_grouper.index(col)]
                     for col in grouper
                 ]
             )
@@ -2490,6 +2481,10 @@ def make_figure(args, constructor, trace_patch=None, layout_patch=None):
         constructor = go.Bar
         args = process_dataframe_timeline(args)
 
+    # If we have marginal histograms, set barmode to "overlay"
+    if "histogram" in [args.get("marginal_x"), args.get("marginal_y")]:
+        layout_patch["barmode"] = "overlay"
+
     trace_specs, grouped_mappings, sizeref, show_colorbar = infer_config(
         args, constructor, trace_patch, layout_patch
     )
@@ -2561,7 +2556,12 @@ def make_figure(args, constructor, trace_patch=None, layout_patch=None):
                     legendgroup=trace_name,
                     showlegend=(trace_name != "" and trace_name not in trace_names),
                 )
-            if trace_spec.constructor in [go.Bar, go.Violin, go.Box, go.Histogram]:
+
+            # Set 'offsetgroup' only in group barmode (or if no barmode is set)
+            barmode = layout_patch.get("barmode")
+            if trace_spec.constructor in [go.Bar, go.Box, go.Violin, go.Histogram] and (
+                barmode == "group" or barmode is None
+            ):
                 trace.update(alignmentgroup=True, offsetgroup=trace_name)
             trace_names.add(trace_name)
 
diff --git a/packages/python/plotly/plotly/tests/test_optional/test_px/test_px.py b/packages/python/plotly/plotly/tests/test_optional/test_px/test_px.py
@@ -289,6 +289,27 @@ def test_orthogonal_orderings(backend, days, times):
     assert_orderings(backend, days, days, times, times)
 
 
+def test_category_order_with_category_as_x(backend):
+    # https://github.com/plotly/plotly.py/issues/4875
+    tips = nw.from_native(px.data.tips(return_type=backend))
+    fig = px.bar(
+        tips,
+        x="day",
+        y="total_bill",
+        color="smoker",
+        barmode="group",
+        facet_col="sex",
+        category_orders={
+            "day": ["Thur", "Fri", "Sat", "Sun"],
+            "smoker": ["Yes", "No"],
+            "sex": ["Male", "Female"],
+        },
+    )
+    assert fig["layout"]["xaxis"]["categoryarray"] == ("Thur", "Fri", "Sat", "Sun")
+    for trace in fig["data"]:
+        assert set(trace["x"]) == {"Thur", "Fri", "Sat", "Sun"}
+
+
 def test_permissive_defaults():
     msg = "'PxDefaults' object has no attribute 'should_not_work'"
     with pytest.raises(AttributeError, match=msg):
diff --git a/packages/python/plotly/plotly/tests/test_optional/test_utils/test_utils.py b/packages/python/plotly/plotly/tests/test_optional/test_utils/test_utils.py
@@ -95,7 +95,7 @@ def np_inf():
     columns=["col 1"], data=[1, 2, 3, dt(2014, 1, 5), pd.NaT, np_nan(), np_inf()]
 )
 
-rng = pd.date_range("1/1/2011", periods=2, freq="H")
+rng = pd.date_range("1/1/2011", periods=2, freq="h")
 ts = pd.Series([1.5, 2.5], index=rng)
 
 

Original file line number	Diff line number	Diff line change
`@@ -95,7 +95,7 @@ def np_inf():`
`95`	`95`	`columns=["col 1"], data=[1, 2, 3, dt(2014, 1, 5), pd.NaT, np_nan(), np_inf()]`
`96`	`96`	`)`
`97`	`97`
`98`		`-rng = pd.date_range("1/1/2011", periods=2, freq="H")`
	`98`	`+rng = pd.date_range("1/1/2011", periods=2, freq="h")`
`99`	`99`	`ts = pd.Series([1.5, 2.5], index=rng)`
`100`	`100`
`101`	`101`