From e12c826326974d8539b1cb79a76fee12b604b9bf Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 3 May 2023 10:43:24 -0700
Subject: [PATCH 1/2] STY: Enable ruff C4 - comprehensions

---
 asv_bench/benchmarks/dtypes.py                         |  2 +-
 pandas/core/computation/expr.py                        |  2 +-
 pandas/core/groupby/groupby.py                         |  4 ++--
 pandas/io/formats/excel.py                             |  2 +-
 pandas/io/formats/html.py                              |  2 +-
 pandas/tests/computation/test_eval.py                  |  4 ++--
 pandas/tests/dtypes/test_missing.py                    |  2 +-
 pandas/tests/extension/date/array.py                   |  4 ++--
 pandas/tests/frame/methods/test_astype.py              |  4 ++--
 pandas/tests/groupby/test_grouping.py                  |  2 +-
 pandas/tests/groupby/test_raises.py                    |  6 +++---
 pandas/tests/groupby/transform/test_transform.py       | 10 +++++-----
 pandas/tests/indexing/multiindex/test_indexing_slow.py |  2 +-
 pandas/tests/indexing/test_partial.py                  | 10 +++++-----
 pandas/tests/io/formats/style/test_html.py             |  2 +-
 pandas/tests/io/formats/style/test_matplotlib.py       |  2 +-
 pandas/tests/io/formats/test_to_html.py                |  2 +-
 pandas/tests/io/parser/test_parse_dates.py             |  4 +---
 pandas/tests/libs/test_hashtable.py                    |  2 +-
 pandas/tests/series/test_constructors.py               |  6 +++---
 pandas/tests/util/test_assert_almost_equal.py          |  2 +-
 pyproject.toml                                         |  7 ++++---
 22 files changed, 41 insertions(+), 42 deletions(-)

diff --git a/asv_bench/benchmarks/dtypes.py b/asv_bench/benchmarks/dtypes.py
index 52c87455b12b3..c33043c0eddc1 100644
--- a/asv_bench/benchmarks/dtypes.py
+++ b/asv_bench/benchmarks/dtypes.py
@@ -24,7 +24,7 @@
 
 
 class Dtypes:
-    params = _dtypes + list(map(lambda dt: dt.name, _dtypes))
+    params = _dtypes + [dt.name for dt in _dtypes]
     param_names = ["dtype"]
 
     def time_pandas_dtype(self, dtype):
diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py
index 8436126232cf9..f8c8e6d87ff13 100644
--- a/pandas/core/computation/expr.py
+++ b/pandas/core/computation/expr.py
@@ -192,7 +192,7 @@ def _filter_nodes(superclass, all_nodes=_all_nodes):
     return frozenset(node_names)
 
 
-_all_node_names = frozenset(map(lambda x: x.__name__, _all_nodes))
+_all_node_names = frozenset(x.__name__ for x in _all_nodes)
 _mod_nodes = _filter_nodes(ast.mod)
 _stmt_nodes = _filter_nodes(ast.stmt)
 _expr_nodes = _filter_nodes(ast.expr)
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 56d6aa92022f9..072627b275a02 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -4115,9 +4115,9 @@ def _reindex_output(
         # reindex `output`, and then reset the in-axis grouper columns.
 
         # Select in-axis groupers
-        in_axis_grps = list(
+        in_axis_grps = [
             (i, ping.name) for (i, ping) in enumerate(groupings) if ping.in_axis
-        )
+        ]
         if len(in_axis_grps) > 0:
             g_nums, g_names = zip(*in_axis_grps)
             output = output.drop(labels=list(g_names), axis=1)
diff --git a/pandas/io/formats/excel.py b/pandas/io/formats/excel.py
index da35716a5b239..457bbced87d55 100644
--- a/pandas/io/formats/excel.py
+++ b/pandas/io/formats/excel.py
@@ -726,7 +726,7 @@ def _format_header(self) -> Iterable[ExcelCell]:
             row = [x if x is not None else "" for x in self.df.index.names] + [
                 ""
             ] * len(self.columns)
-            if reduce(lambda x, y: x and y, map(lambda x: x != "", row)):
+            if reduce(lambda x, y: x and y, (x != "" for x in row)):
                 gen2 = (
                     ExcelCell(self.rowcounter, colindex, val, self.header_style)
                     for colindex, val in enumerate(row)
diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py
index 6fbbbd01cf773..0ab02a81d4880 100644
--- a/pandas/io/formats/html.py
+++ b/pandas/io/formats/html.py
@@ -621,7 +621,7 @@ def write_style(self) -> None:
                 )
         else:
             element_props.append(("thead th", "text-align", "right"))
-        template_mid = "\n\n".join(map(lambda t: template_select % t, element_props))
+        template_mid = "\n\n".join(template_select % t for t in element_props)
         template = dedent("\n".join((template_first, template_mid, template_last)))
         self.write(template)
 
diff --git a/pandas/tests/computation/test_eval.py b/pandas/tests/computation/test_eval.py
index 9374b232f3cd2..93ae2bfdd01e4 100644
--- a/pandas/tests/computation/test_eval.py
+++ b/pandas/tests/computation/test_eval.py
@@ -218,7 +218,7 @@ def test_compound_invert_op(self, op, lhs, rhs, request, engine, parser):
         else:
             # compound
             if is_scalar(lhs) and is_scalar(rhs):
-                lhs, rhs = map(lambda x: np.array([x]), (lhs, rhs))
+                lhs, rhs = (np.array([x]) for x in (lhs, rhs))
             expected = _eval_single_bin(lhs, op, rhs, engine)
             if is_scalar(expected):
                 expected = not expected
@@ -746,7 +746,7 @@ def test_binop_typecasting(self, engine, parser, op, dt, left_right):
 def should_warn(*args):
     not_mono = not any(map(operator.attrgetter("is_monotonic_increasing"), args))
     only_one_dt = reduce(
-        operator.xor, map(lambda x: issubclass(x.dtype.type, np.datetime64), args)
+        operator.xor, (issubclass(x.dtype.type, np.datetime64) for x in args)
     )
     return not_mono and only_one_dt
 
diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py
index 58bb551e036a9..0158e7589b214 100644
--- a/pandas/tests/dtypes/test_missing.py
+++ b/pandas/tests/dtypes/test_missing.py
@@ -643,7 +643,7 @@ def test_array_equivalent_nested_mixed_list(strict_nan):
         np.array(["c", "d"], dtype=object),
     ]
     left = np.array([subarr, None], dtype=object)
-    right = np.array([list([[None, "b"], ["c", "d"]]), None], dtype=object)
+    right = np.array([[[None, "b"], ["c", "d"]], None], dtype=object)
     assert array_equivalent(left, right, strict_nan=strict_nan)
     assert not array_equivalent(left, right[::-1], strict_nan=strict_nan)
 
diff --git a/pandas/tests/extension/date/array.py b/pandas/tests/extension/date/array.py
index 08d7e0de82ba8..20373e323e2de 100644
--- a/pandas/tests/extension/date/array.py
+++ b/pandas/tests/extension/date/array.py
@@ -83,7 +83,7 @@ def __init__(
             self._day = np.zeros(ldates, dtype=np.uint8)  # 255 (1, 12)
             # populate them
             for i, (y, m, d) in enumerate(
-                map(lambda date: (date.year, date.month, date.day), dates)
+                (date.year, date.month, date.day) for date in dates
             ):
                 self._year[i] = y
                 self._month[i] = m
@@ -94,7 +94,7 @@ def __init__(
             if ldates != 3:
                 raise ValueError("only triples are valid")
             # check if all elements have the same type
-            if any(map(lambda x: not isinstance(x, np.ndarray), dates)):
+            if any(not isinstance(x, np.ndarray) for x in dates):
                 raise TypeError("invalid type")
             ly, lm, ld = (len(cast(np.ndarray, d)) for d in dates)
             if not ly == lm == ld:
diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py
index 08546f03cee69..9fdb600b6efc4 100644
--- a/pandas/tests/frame/methods/test_astype.py
+++ b/pandas/tests/frame/methods/test_astype.py
@@ -152,9 +152,9 @@ def test_astype_str(self):
 
         expected = DataFrame(
             {
-                "a": list(map(str, map(lambda x: Timestamp(x)._date_repr, a._values))),
+                "a": list(map(str, (Timestamp(x)._date_repr for x in a._values))),
                 "b": list(map(str, map(Timestamp, b._values))),
-                "c": list(map(lambda x: Timedelta(x)._repr_base(), c._values)),
+                "c": [Timedelta(x)._repr_base() for x in c._values],
                 "d": list(map(str, d._values)),
                 "e": list(map(str, e._values)),
             }
diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
index 2d39120e1c1ff..a73b1893c6b0a 100644
--- a/pandas/tests/groupby/test_grouping.py
+++ b/pandas/tests/groupby/test_grouping.py
@@ -992,7 +992,7 @@ def test_multi_iter_frame(self, three_group):
         # calling `dict` on a DataFrameGroupBy leads to a TypeError,
         # we need to use a dictionary comprehension here
         # pylint: disable-next=unnecessary-comprehension
-        groups = {key: gp for key, gp in grouped}
+        groups = dict(grouped)
         assert len(groups) == 2
 
         # axis = 1
diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py
index fb967430376de..6fb903b02b62f 100644
--- a/pandas/tests/groupby/test_raises.py
+++ b/pandas/tests/groupby/test_raises.py
@@ -229,7 +229,7 @@ def test_groupby_raises_string_np(
         ),
     }[groupby_func_np]
 
-    _call_and_check(klass, msg, how, gb, groupby_func_np, tuple())
+    _call_and_check(klass, msg, how, gb, groupby_func_np, ())
 
 
 @pytest.mark.parametrize("how", ["method", "agg", "transform"])
@@ -333,7 +333,7 @@ def test_groupby_raises_datetime_np(
         np.mean: (None, ""),
     }[groupby_func_np]
 
-    _call_and_check(klass, msg, how, gb, groupby_func_np, tuple())
+    _call_and_check(klass, msg, how, gb, groupby_func_np, ())
 
 
 @pytest.mark.parametrize("func", ["prod", "cumprod", "skew", "var"])
@@ -526,7 +526,7 @@ def test_groupby_raises_category_np(
         ),
     }[groupby_func_np]
 
-    _call_and_check(klass, msg, how, gb, groupby_func_np, tuple())
+    _call_and_check(klass, msg, how, gb, groupby_func_np, ())
 
 
 @pytest.mark.parametrize("how", ["method", "agg", "transform"])
diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
index 4adab3a1d6fff..04e6f5d2fdcaa 100644
--- a/pandas/tests/groupby/transform/test_transform.py
+++ b/pandas/tests/groupby/transform/test_transform.py
@@ -362,11 +362,11 @@ def test_dispatch_transform(tsframe):
 
 def test_transform_fillna_null():
     df = DataFrame(
-        dict(
-            price=[10, 10, 20, 20, 30, 30],
-            color=[10, 10, 20, 20, 30, 30],
-            cost=(100, 200, 300, 400, 500, 600),
-        )
+        {
+            "price": [10, 10, 20, 20, 30, 30],
+            "color": [10, 10, 20, 20, 30, 30],
+            "cost": (100, 200, 300, 400, 500, 600),
+        }
     )
     with pytest.raises(ValueError, match="Must specify a fill 'value' or 'method'"):
         df.groupby(["price"]).transform("fillna")
diff --git a/pandas/tests/indexing/multiindex/test_indexing_slow.py b/pandas/tests/indexing/multiindex/test_indexing_slow.py
index c59dd2724a94f..36b7dcfe4db12 100644
--- a/pandas/tests/indexing/multiindex/test_indexing_slow.py
+++ b/pandas/tests/indexing/multiindex/test_indexing_slow.py
@@ -34,7 +34,7 @@
     np.random.choice(list("ZYXWVUTSRQP"), m),
 ]
 keys = list(map(tuple, zip(*keys)))
-keys += list(map(lambda t: t[:-1], vals[:: n // m]))
+keys += [t[:-1] for t in vals[:: n // m]]
 
 
 # covers both unique index and non-unique index
diff --git a/pandas/tests/indexing/test_partial.py b/pandas/tests/indexing/test_partial.py
index a2fcd18ba5bfe..5d1d4ba6f638a 100644
--- a/pandas/tests/indexing/test_partial.py
+++ b/pandas/tests/indexing/test_partial.py
@@ -283,30 +283,30 @@ def test_partial_setting_frame(self, using_array_manager):
             df.iat[4, 2] = 5.0
 
         # row setting where it exists
-        expected = DataFrame(dict({"A": [0, 4, 4], "B": [1, 5, 5]}))
+        expected = DataFrame({"A": [0, 4, 4], "B": [1, 5, 5]})
         df = df_orig.copy()
         df.iloc[1] = df.iloc[2]
         tm.assert_frame_equal(df, expected)
 
-        expected = DataFrame(dict({"A": [0, 4, 4], "B": [1, 5, 5]}))
+        expected = DataFrame({"A": [0, 4, 4], "B": [1, 5, 5]})
         df = df_orig.copy()
         df.loc[1] = df.loc[2]
         tm.assert_frame_equal(df, expected)
 
         # like 2578, partial setting with dtype preservation
-        expected = DataFrame(dict({"A": [0, 2, 4, 4], "B": [1, 3, 5, 5]}))
+        expected = DataFrame({"A": [0, 2, 4, 4], "B": [1, 3, 5, 5]})
         df = df_orig.copy()
         df.loc[3] = df.loc[2]
         tm.assert_frame_equal(df, expected)
 
         # single dtype frame, overwrite
-        expected = DataFrame(dict({"A": [0, 2, 4], "B": [0, 2, 4]}))
+        expected = DataFrame({"A": [0, 2, 4], "B": [0, 2, 4]})
         df = df_orig.copy()
         df.loc[:, "B"] = df.loc[:, "A"]
         tm.assert_frame_equal(df, expected)
 
         # mixed dtype frame, overwrite
-        expected = DataFrame(dict({"A": [0, 2, 4], "B": Series([0.0, 2.0, 4.0])}))
+        expected = DataFrame({"A": [0, 2, 4], "B": Series([0.0, 2.0, 4.0])})
         df = df_orig.copy()
         df["B"] = df["B"].astype(np.float64)
         # as of 2.0, df.loc[:, "B"] = ... attempts (and here succeeds) at
diff --git a/pandas/tests/io/formats/style/test_html.py b/pandas/tests/io/formats/style/test_html.py
index 67f7e12fcc3c2..c44f42727faeb 100644
--- a/pandas/tests/io/formats/style/test_html.py
+++ b/pandas/tests/io/formats/style/test_html.py
@@ -978,7 +978,7 @@ def html_lines(foot_prefix: str):
 
 def test_to_html_na_rep_non_scalar_data(datapath):
     # GH47103
-    df = DataFrame([dict(a=1, b=[1, 2, 3], c=np.nan)])
+    df = DataFrame([{"a": 1, "b": [1, 2, 3], "c": np.nan}])
     result = df.style.format(na_rep="-").to_html(table_uuid="test")
     expected = """\
 <style type="text/css">
diff --git a/pandas/tests/io/formats/style/test_matplotlib.py b/pandas/tests/io/formats/style/test_matplotlib.py
index 591fa7f72050e..1485bd64e4b57 100644
--- a/pandas/tests/io/formats/style/test_matplotlib.py
+++ b/pandas/tests/io/formats/style/test_matplotlib.py
@@ -307,7 +307,7 @@ def test_pass_colormap_instance(df, plot_method):
     # https://github.com/pandas-dev/pandas/issues/49374
     cmap = mpl.colors.ListedColormap([[1, 1, 1], [0, 0, 0]])
     df["c"] = df.A + df.B
-    kwargs = dict(x="A", y="B", c="c", colormap=cmap)
+    kwargs = {"x": "A", "y": "B", "c": "c", "colormap": cmap}
     if plot_method == "hexbin":
         kwargs["C"] = kwargs.pop("c")
     getattr(df.plot, plot_method)(**kwargs)
diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py
index a66858d3f983e..9c128756339ab 100644
--- a/pandas/tests/io/formats/test_to_html.py
+++ b/pandas/tests/io/formats/test_to_html.py
@@ -884,7 +884,7 @@ def test_to_html_na_rep_and_float_format(na_rep, datapath):
 
 def test_to_html_na_rep_non_scalar_data(datapath):
     # GH47103
-    df = DataFrame([dict(a=1, b=[1, 2, 3])])
+    df = DataFrame([{"a": 1, "b": [1, 2, 3]}])
     result = df.to_html(na_rep="-")
     expected = expected_html(datapath, "gh47103_expected_output")
     assert result == expected
diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py
index 55efb9254ee34..81de4f13de81d 100644
--- a/pandas/tests/io/parser/test_parse_dates.py
+++ b/pandas/tests/io/parser/test_parse_dates.py
@@ -746,9 +746,7 @@ def test_date_parser_int_bug(all_parsers):
 def test_nat_parse(all_parsers):
     # see gh-3062
     parser = all_parsers
-    df = DataFrame(
-        dict({"A": np.arange(10, dtype="float64"), "B": Timestamp("20010101")})
-    )
+    df = DataFrame({"A": np.arange(10, dtype="float64"), "B": Timestamp("20010101")})
     df.iloc[3:6, :] = np.nan
 
     with tm.ensure_clean("__nat_parse_.csv") as path:
diff --git a/pandas/tests/libs/test_hashtable.py b/pandas/tests/libs/test_hashtable.py
index 4a5d5e0c85a09..0685897e723a2 100644
--- a/pandas/tests/libs/test_hashtable.py
+++ b/pandas/tests/libs/test_hashtable.py
@@ -28,7 +28,7 @@ def get_allocated_khash_memory():
     snapshot = snapshot.filter_traces(
         (tracemalloc.DomainFilter(True, ht.get_hashtable_trace_domain()),)
     )
-    return sum(map(lambda x: x.size, snapshot.traces))
+    return sum(x.size for x in snapshot.traces)
 
 
 @pytest.mark.parametrize(
diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py
index 8cfd0682e1f5d..7238232a46e60 100644
--- a/pandas/tests/series/test_constructors.py
+++ b/pandas/tests/series/test_constructors.py
@@ -369,14 +369,14 @@ def test_constructor_generator(self):
 
     def test_constructor_map(self):
         # GH8909
-        m = map(lambda x: x, range(10))
+        m = (x for x in range(10))
 
         result = Series(m)
         exp = Series(range(10))
         tm.assert_series_equal(result, exp)
 
         # same but with non-default index
-        m = map(lambda x: x, range(10))
+        m = (x for x in range(10))
         result = Series(m, index=range(10, 20))
         exp.index = range(10, 20)
         tm.assert_series_equal(result, exp)
@@ -647,7 +647,7 @@ def test_constructor_default_index(self):
             list(range(3)),
             Categorical(["a", "b", "a"]),
             (i for i in range(3)),
-            map(lambda x: x, range(3)),
+            (x for x in range(3)),
         ],
     )
     def test_constructor_index_mismatch(self, input):
diff --git a/pandas/tests/util/test_assert_almost_equal.py b/pandas/tests/util/test_assert_almost_equal.py
index 5c50e5ad83967..ad54606547909 100644
--- a/pandas/tests/util/test_assert_almost_equal.py
+++ b/pandas/tests/util/test_assert_almost_equal.py
@@ -512,7 +512,7 @@ def test_assert_almost_equal_iterable_values_mismatch():
     # same-length lists
     (
         np.array([subarr, None], dtype=object),
-        np.array([list([[None, "b"], ["c", "d"]]), None], dtype=object),
+        np.array([[[None, "b"], ["c", "d"]], None], dtype=object),
     ),
     # dicts
     (
diff --git a/pyproject.toml b/pyproject.toml
index 7604046471fe8..042d13ea7956a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -195,14 +195,13 @@ line-length = 88
 update-check = false
 target-version = "py310"
 fix = true
-unfixable = ["E711"]
+unfixable = []
 
 select = [
   # pyflakes
   "F",
   # pycodestyle
-  "E",
-  "W",
+  "E", "W",
   # flake8-2020
   "YTT",
   # flake8-bugbear
@@ -221,6 +220,8 @@ select = [
   "ISC",
   # type-checking imports
   "TCH",
+  # comprehensions
+  "C4",
 ]
 
 ignore = [

From ca3e18c02cfd6002e5782c996973291e9c94b43f Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 3 May 2023 12:11:50 -0700
Subject: [PATCH 2/2] Revert one

---
 pandas/tests/groupby/test_grouping.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py
index a73b1893c6b0a..41b7dde4bf631 100644
--- a/pandas/tests/groupby/test_grouping.py
+++ b/pandas/tests/groupby/test_grouping.py
@@ -992,7 +992,7 @@ def test_multi_iter_frame(self, three_group):
         # calling `dict` on a DataFrameGroupBy leads to a TypeError,
         # we need to use a dictionary comprehension here
         # pylint: disable-next=unnecessary-comprehension
-        groups = dict(grouped)
+        groups = {key: gp for key, gp in grouped}  # noqa: C416
         assert len(groups) == 2
 
         # axis = 1