STY: Enable ruff ambiguous unicode character (#54330)

mroeschke · web-flow · commit 435762115d96 · 2023-07-31T22:06:49.000-04:00
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -7131,7 +7131,7 @@ def value_counts(
         ascending : bool, default False
             Sort in ascending order.
         dropna : bool, default True
-            Don’t include counts of rows that contain NA values.
+            Don't include counts of rows that contain NA values.
 
             .. versionadded:: 1.3.0
 
@@ -9968,7 +9968,7 @@ def map(
         func : callable
             Python function, returns a single value from a single value.
         na_action : {None, 'ignore'}, default None
-            If ‘ignore’, propagate NaN values, without passing them to func.
+            If 'ignore', propagate NaN values, without passing them to func.
         **kwargs
             Additional keyword arguments to pass as keywords arguments to
             `func`.
@@ -10054,7 +10054,7 @@ def applymap(
         func : callable
             Python function, returns a single value from a single value.
         na_action : {None, 'ignore'}, default None
-            If ‘ignore’, propagate NaN values, without passing them to func.
+            If 'ignore', propagate NaN values, without passing them to func.
         **kwargs
             Additional keyword arguments to pass as keywords arguments to
             `func`.
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -5633,7 +5633,7 @@ def filter(
             Keep labels from axis for which "like in label == True".
         regex : str (regular expression)
             Keep labels from axis for which re.search(regex, label) == True.
-        axis : {0 or ‘index’, 1 or ‘columns’, None}, default None
+        axis : {0 or 'index', 1 or 'columns', None}, default None
             The axis to filter on, expressed either as an index (int)
             or axis name (str). By default this is the info axis, 'columns' for
             DataFrame. For `Series` this parameter is unused and defaults to `None`.
@@ -5922,7 +5922,7 @@ def sample(
 
                 np.random.Generator objects now accepted
 
-        axis : {0 or ‘index’, 1 or ‘columns’, None}, default None
+        axis : {0 or 'index', 1 or 'columns', None}, default None
             Axis to sample. Accepts axis number or name. Default is stat axis
             for given data type. For `Series` this parameter is unused and defaults to `None`.
         ignore_index : bool, default False
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -2322,7 +2322,7 @@ def value_counts(
         ascending : bool, default False
             Sort in ascending order.
         dropna : bool, default True
-            Don’t include counts of rows that contain NA values.
+            Don't include counts of rows that contain NA values.
 
         Returns
         -------
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
@@ -177,7 +177,7 @@ class DatetimeIndex(DatetimeTimedeltaMixin):
     yearfirst : bool, default False
         If True parse dates in `data` with the year first order.
     dtype : numpy.dtype or DatetimeTZDtype or str, default None
-        Note that the only NumPy dtype allowed is ‘datetime64[ns]’.
+        Note that the only NumPy dtype allowed is `datetime64[ns]`.
     copy : bool, default False
         Make a copy of input ndarray.
     name : label, default None
diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
@@ -2336,7 +2336,7 @@ def _factorize_keys(
     sort : bool, defaults to True
         If True, the encoding is done such that the unique elements in the
         keys are sorted.
-    how : {‘left’, ‘right’, ‘outer’, ‘inner’}, default ‘inner’
+    how : {'left', 'right', 'outer', 'inner'}, default 'inner'
         Type of merge.
 
     Returns
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
@@ -916,7 +916,7 @@ def to_datetime(
     - **DataFrame/dict-like** are converted to :class:`Series` with
       :class:`datetime64` dtype. For each row a datetime is created from assembling
       the various dataframe columns. Column keys can be common abbreviations
-      like [‘year’, ‘month’, ‘day’, ‘minute’, ‘second’, ‘ms’, ‘us’, ‘ns’]) or
+      like ['year', 'month', 'day', 'minute', 'second', 'ms', 'us', 'ns']) or
       plurals of the same.
 
     The following causes are responsible for :class:`datetime.datetime` objects
diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
@@ -1268,14 +1268,14 @@ def __init__(
     @property
     def date_format(self) -> str:
         """
-        Format string for dates written into Excel files (e.g. ‘YYYY-MM-DD’).
+        Format string for dates written into Excel files (e.g. 'YYYY-MM-DD').
         """
         return self._date_format
 
     @property
     def datetime_format(self) -> str:
         """
-        Format string for dates written into Excel files (e.g. ‘YYYY-MM-DD’).
+        Format string for dates written into Excel files (e.g. 'YYYY-MM-DD').
         """
         return self._datetime_format
 
diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py
@@ -94,7 +94,7 @@ def render(self) -> list[str]:
         self._write_table()
 
         if self.should_show_dimensions:
-            by = chr(215)  # ×
+            by = chr(215)  # ×  # noqa: RUF003
             self.write(
                 f"<p>{len(self.frame)} rows {by} {len(self.frame.columns)} columns</p>"
             )
diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py
@@ -3489,7 +3489,7 @@ def highlight_quantile(
             Left bound, in [0, q_right), for the target quantile range.
         q_right : float, default 1
             Right bound, in (q_left, 1], for the target quantile range.
-        interpolation : {‘linear’, ‘lower’, ‘higher’, ‘midpoint’, ‘nearest’}
+        interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
             Argument passed to ``Series.quantile`` or ``DataFrame.quantile`` for
             quantile estimation.
         inclusive : {'both', 'neither', 'left', 'right'}
diff --git a/pandas/io/sql.py b/pandas/io/sql.py
@@ -441,7 +441,7 @@ def read_sql_query(
         rows to include in each chunk.
     dtype : Type name or dict of columns
         Data type for data or columns. E.g. np.float64 or
-        {‘a’: np.float64, ‘b’: np.int32, ‘c’: ‘Int64’}.
+        {'a': np.float64, 'b': np.int32, 'c': 'Int64'}.
 
         .. versionadded:: 1.3.0
     dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable'
@@ -597,7 +597,7 @@ def read_sql(
         .. versionadded:: 2.0
     dtype : Type name or dict of columns
         Data type for data or columns. E.g. np.float64 or
-        {‘a’: np.float64, ‘b’: np.int32, ‘c’: ‘Int64’}.
+        {'a': np.float64, 'b': np.int32, 'c': 'Int64'}.
         The argument is ignored if a table is passed instead of a query.
 
         .. versionadded:: 2.0.0
@@ -1759,7 +1759,7 @@ def read_query(
             of rows to include in each chunk.
         dtype : Type name or dict of columns
             Data type for data or columns. E.g. np.float64 or
-            {‘a’: np.float64, ‘b’: np.int32, ‘c’: ‘Int64’}
+            {'a': np.float64, 'b': np.int32, 'c': 'Int64'}
 
             .. versionadded:: 1.3.0
 
diff --git a/pandas/io/stata.py b/pandas/io/stata.py
@@ -3768,7 +3768,7 @@ def _validate_variable_name(self, name: str) -> str:
                     and c != "_"
                 )
                 or 128 <= ord(c) < 192
-                or c in {"×", "÷"}
+                or c in {"×", "÷"}  # noqa: RUF001
             ):
                 name = name.replace(c, "_")
 
diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py
@@ -2061,7 +2061,7 @@ def test_str_slice_replace(start, stop, repl, exp):
         ["!|,", "isalnum", False],
         ["aaa", "isalpha", True],
         ["!!!", "isalpha", False],
-        ["٠", "isdecimal", True],
+        ["٠", "isdecimal", True],  # noqa: RUF001
         ["~!", "isdecimal", False],
         ["2", "isdigit", True],
         ["~", "isdigit", False],
diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py
@@ -957,7 +957,10 @@ def test_to_csv_path_is_none(self, float_frame):
             (DataFrame([["abc", "def", "ghi"]], columns=["X", "Y", "Z"]), "ascii"),
             (DataFrame(5 * [[123, "你好", "世界"]], columns=["X", "Y", "Z"]), "gb2312"),
             (
-                DataFrame(5 * [[123, "Γειά σου", "Κόσμε"]], columns=["X", "Y", "Z"]),
+                DataFrame(
+                    5 * [[123, "Γειά σου", "Κόσμε"]],  # noqa: RUF001
+                    columns=["X", "Y", "Z"],
+                ),
                 "cp737",
             ),
         ],
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -1402,7 +1402,7 @@ def test_groupby_dtype_inference_empty():
 
 
 def test_groupby_unit64_float_conversion():
-    #  GH: 30859 groupby converts unit64 to floats sometimes
+    # GH: 30859 groupby converts unit64 to floats sometimes
     df = DataFrame({"first": [1], "second": [1], "value": [16148277970000000000]})
     result = df.groupby(["first", "second"])["value"].max()
     expected = Series(
diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py
@@ -251,7 +251,7 @@ def test_read_json_from_to_json_results(self):
                 "recommender_id": {"row_0": 3},
                 "recommender_name_jp": {"row_0": "浦田"},
                 "recommender_name_en": {"row_0": "Urata"},
-                "name_jp": {"row_0": "博多人形（松尾吉将まつお よしまさ）"},
+                "name_jp": {"row_0": "博多人形(松尾吉将まつお よしまさ)"},
                 "name_en": {"row_0": "Hakata Dolls Matsuo"},
             }
         )
diff --git a/pandas/tests/io/parser/test_encoding.py b/pandas/tests/io/parser/test_encoding.py
@@ -223,12 +223,12 @@ def test_encoding_named_temp_file(all_parsers):
 def test_parse_encoded_special_characters(encoding):
     # GH16218 Verify parsing of data with encoded special characters
     # Data contains a Unicode 'FULLWIDTH COLON' (U+FF1A) at position (0,"a")
-    data = "a\tb\n：foo\t0\nbar\t1\nbaz\t2"
+    data = "a\tb\n：foo\t0\nbar\t1\nbaz\t2"  # noqa: RUF001
     encoded_data = BytesIO(data.encode(encoding))
     result = read_csv(encoded_data, delimiter="\t", encoding=encoding)
 
     expected = DataFrame(
-        data=[["：foo", 0], ["bar", 1], ["baz", 2]],
+        data=[["：foo", 0], ["bar", 1], ["baz", 2]],  # noqa: RUF001
         columns=["a", "b"],
     )
     tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py
@@ -190,7 +190,8 @@ def test_read_csv_compat():
 
 
 def test_bytes_io_input():
-    result = read_fwf(BytesIO("שלום\nשלום".encode()), widths=[2, 2], encoding="utf8")
+    data = BytesIO("שלום\nשלום".encode())  # noqa: RUF001
+    result = read_fwf(data, widths=[2, 2], encoding="utf8")
     expected = DataFrame([["של", "ום"]], columns=["של", "ום"])
     tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py
@@ -62,9 +62,9 @@ def df(request):
     data_type = request.param
 
     if data_type == "delims":
-        return DataFrame({"a": ['"a,\t"b|c', "d\tef´"], "b": ["hi'j", "k''lm"]})
+        return DataFrame({"a": ['"a,\t"b|c', "d\tef`"], "b": ["hi'j", "k''lm"]})
     elif data_type == "utf8":
-        return DataFrame({"a": ["µasd", "Ωœ∑´"], "b": ["øπ∆˚¬", "œ∑´®"]})
+        return DataFrame({"a": ["µasd", "Ωœ∑`"], "b": ["øπ∆˚¬", "œ∑`®"]})
     elif data_type == "utf16":
         return DataFrame(
             {"a": ["\U0001f44d\U0001f44d", "\U0001f44d\U0001f44d"], "b": ["abc", "def"]}
@@ -402,7 +402,7 @@ def test_round_trip_valid_encodings(self, enc, df):
         self.check_round_trip_frame(df, encoding=enc)
 
     @pytest.mark.single_cpu
-    @pytest.mark.parametrize("data", ["\U0001f44d...", "Ωœ∑´...", "abcd..."])
+    @pytest.mark.parametrize("data", ["\U0001f44d...", "Ωœ∑`...", "abcd..."])
     @pytest.mark.xfail(
         (os.environ.get("DISPLAY") is None and not is_platform_mac())
         or is_ci_environment(),
diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py
@@ -286,7 +286,7 @@ def test_read_dta18(self, datapath):
                 ["Cat", "Bogota", "Bogotá", 1, 1.0, "option b Ünicode", 1.0],
                 ["Dog", "Boston", "Uzunköprü", np.nan, np.nan, np.nan, np.nan],
                 ["Plane", "Rome", "Tromsø", 0, 0.0, "option a", 0.0],
-                ["Potato", "Tokyo", "Elâzığ", -4, 4.0, 4, 4],
+                ["Potato", "Tokyo", "Elâzığ", -4, 4.0, 4, 4],  # noqa: RUF001
                 ["", "", "", 0, 0.3332999, "option a", 1 / 3.0],
             ],
             columns=[
diff --git a/pandas/tests/series/methods/test_to_csv.py b/pandas/tests/series/methods/test_to_csv.py
@@ -122,7 +122,10 @@ def test_to_csv_path_is_none(self):
             # GH 21241, 21118
             (Series(["abc", "def", "ghi"], name="X"), "ascii"),
             (Series(["123", "你好", "世界"], name="中文"), "gb2312"),
-            (Series(["123", "Γειά σου", "Κόσμε"], name="Ελληνικά"), "cp737"),
+            (
+                Series(["123", "Γειά σου", "Κόσμε"], name="Ελληνικά"),  # noqa: RUF001
+                "cp737",
+            ),
         ],
     )
     def test_to_csv_compression(self, s, encoding, compression):
diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py
@@ -226,8 +226,10 @@ def test_isnumeric_unicode(method, expected, any_string_dtype):
     # 0x00bc: ¼ VULGAR FRACTION ONE QUARTER
     # 0x2605: ★ not number
     # 0x1378: ፸ ETHIOPIC NUMBER SEVENTY
-    # 0xFF13: ３ Em 3
-    ser = Series(["A", "3", "¼", "★", "፸", "３", "four"], dtype=any_string_dtype)
+    # 0xFF13: ３ Em 3  # noqa: RUF003
+    ser = Series(
+        ["A", "3", "¼", "★", "፸", "３", "four"], dtype=any_string_dtype  # noqa: RUF001
+    )
     expected_dtype = "bool" if any_string_dtype == "object" else "boolean"
     expected = Series(expected, dtype=expected_dtype)
     result = getattr(ser.str, method)()
@@ -246,7 +248,7 @@ def test_isnumeric_unicode(method, expected, any_string_dtype):
     ],
 )
 def test_isnumeric_unicode_missing(method, expected, any_string_dtype):
-    values = ["A", np.nan, "¼", "★", np.nan, "３", "four"]
+    values = ["A", np.nan, "¼", "★", np.nan, "３", "four"]  # noqa: RUF001
     ser = Series(values, dtype=any_string_dtype)
     expected_dtype = "object" if any_string_dtype == "object" else "boolean"
     expected = Series(expected, dtype=expected_dtype)
@@ -564,12 +566,12 @@ def test_decode_errors_kwarg():
     "form, expected",
     [
         ("NFKC", ["ABC", "ABC", "123", np.nan, "アイエ"]),
-        ("NFC", ["ABC", "ＡＢＣ", "１２３", np.nan, "ｱｲｴ"]),
+        ("NFC", ["ABC", "ＡＢＣ", "１２３", np.nan, "ｱｲｴ"]),  # noqa: RUF001
     ],
 )
 def test_normalize(form, expected, any_string_dtype):
     ser = Series(
-        ["ABC", "ＡＢＣ", "１２３", np.nan, "ｱｲｴ"],
+        ["ABC", "ＡＢＣ", "１２３", np.nan, "ｱｲｴ"],  # noqa: RUF001
         index=["a", "b", "c", "d", "e"],
         dtype=any_string_dtype,
     )
@@ -580,7 +582,7 @@ def test_normalize(form, expected, any_string_dtype):
 
 def test_normalize_bad_arg_raises(any_string_dtype):
     ser = Series(
-        ["ABC", "ＡＢＣ", "１２３", np.nan, "ｱｲｴ"],
+        ["ABC", "ＡＢＣ", "１２３", np.nan, "ｱｲｴ"],  # noqa: RUF001
         index=["a", "b", "c", "d", "e"],
         dtype=any_string_dtype,
     )
@@ -589,7 +591,7 @@ def test_normalize_bad_arg_raises(any_string_dtype):
 
 
 def test_normalize_index():
-    idx = Index(["ＡＢＣ", "１２３", "ｱｲｴ"])
+    idx = Index(["ＡＢＣ", "１２３", "ｱｲｴ"])  # noqa: RUF001
     expected = Index(["ABC", "123", "アイエ"])
     result = idx.str.normalize("NFKC")
     tm.assert_index_equal(result, expected)
diff --git a/pandas/tseries/holiday.py b/pandas/tseries/holiday.py
@@ -570,7 +570,7 @@ def merge(self, other, inplace: bool = False):
     offset=DateOffset(weekday=MO(3)),
 )
 USPresidentsDay = Holiday(
-    "Washington’s Birthday", month=2, day=1, offset=DateOffset(weekday=MO(3))
+    "Washington's Birthday", month=2, day=1, offset=DateOffset(weekday=MO(3))
 )
 GoodFriday = Holiday("Good Friday", month=1, day=1, offset=[Easter(), Day(-2)])
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -326,12 +326,6 @@ ignore = [
   "PLR0124",
   # Consider `elif` instead of `else` then `if` to remove indentation level
   "PLR5501",
-  # ambiguous-unicode-character-string
-  "RUF001",
-  # ambiguous-unicode-character-docstring
-  "RUF002",
-  # ambiguous-unicode-character-comment
-  "RUF003",
   # collection-literal-concatenation
   "RUF005",
   # pairwise-over-zipped (>=PY310 only)
diff --git a/web/pandas/about/governance.md b/web/pandas/about/governance.md
@@ -128,7 +128,7 @@ In particular, the Core Team may:
     and merging pull requests.
 -   Make decisions about the Services that are run by The Project and manage
     those Services for the benefit of the Project and Community.
--   Make decisions when regular community discussion doesn’t produce consensus
+-   Make decisions when regular community discussion doesn't produce consensus
     on an issue in a reasonable time frame.
 
 ### Core Team membership
@@ -157,7 +157,7 @@ they will be considered for removal from the Core Team. Before removal,
 inactive Member will be approached by the BDFL to see if they plan on returning
 to active participation. If not they will be removed immediately upon a Core
 Team vote. If they plan on returning to active participation soon, they will be
-given a grace period of one year. If they don’t return to active participation
+given a grace period of one year. If they don't return to active participation
 within that time period they will be removed by vote of the Core Team without
 further grace period. All former Core Team members can be considered for
 membership again at any time in the future, like any other Project Contributor.
diff --git a/web/pandas/community/coc.md b/web/pandas/community/coc.md
@@ -21,7 +21,7 @@ Examples of unacceptable behavior by participants include:
 * Other unethical or unprofessional conduct
 
 Furthermore, we encourage inclusive behavior - for example,
-please don’t say “hey guys!” but “hey everyone!”.
+please don't say “hey guys!” but “hey everyone!”.
 
 Project maintainers have the right and responsibility to remove, edit, or
 reject comments, commits, code, wiki edits, issues, and other contributions

Original file line number	Diff line number	Diff line change
`@@ -94,7 +94,7 @@ def render(self) -> list[str]:`
`94`	`94`	`self._write_table()`
`95`	`95`
`96`	`96`	`if self.should_show_dimensions:`
`97`		`- by = chr(215) # ×`
	`97`	`+ by = chr(215) # × # noqa: RUF003`
`98`	`98`	`self.write(`
`99`	`99`	`f"<p>{len(self.frame)} rows {by} {len(self.frame.columns)} columns</p>"`
`100`	`100`	`)`
Original file line number	Diff line number	Diff line change
`@@ -3768,7 +3768,7 @@ def _validate_variable_name(self, name: str) -> str:`
`3768`	`3768`	`and c != "_"`
`3769`	`3769`	`)`
`3770`	`3770`	`or 128 <= ord(c) < 192`
`3771`		`- or c in {"×", "÷"}`
	`3771`	`+ or c in {"×", "÷"} # noqa: RUF001`
`3772`	`3772`	`):`
`3773`	`3773`	`name = name.replace(c, "_")`
`3774`	`3774`
Original file line number	Diff line number	Diff line change
`@@ -251,7 +251,7 @@ def test_read_json_from_to_json_results(self):`
`251`	`251`	`"recommender_id": {"row_0": 3},`
`252`	`252`	`"recommender_name_jp": {"row_0": "浦田"},`
`253`	`253`	`"recommender_name_en": {"row_0": "Urata"},`
`254`		`- "name_jp": {"row_0": "博多人形（松尾吉将まつおよしまさ）"},`
	`254`	`+ "name_jp": {"row_0": "博多人形(松尾吉将まつおよしまさ)"},`
`255`	`255`	`"name_en": {"row_0": "Hakata Dolls Matsuo"},`
`256`	`256`	`}`
`257`	`257`	`)`
Original file line number	Diff line number	Diff line change
`@@ -570,7 +570,7 @@ def merge(self, other, inplace: bool = False):`
`570`	`570`	`offset=DateOffset(weekday=MO(3)),`
`571`	`571`	`)`
`572`	`572`	`USPresidentsDay = Holiday(`
`573`		`- "Washington’s Birthday", month=2, day=1, offset=DateOffset(weekday=MO(3))`
	`573`	`+ "Washington's Birthday", month=2, day=1, offset=DateOffset(weekday=MO(3))`
`574`	`574`	`)`
`575`	`575`	`GoodFriday = Holiday("Good Friday", month=1, day=1, offset=[Easter(), Day(-2)])`
`576`	`576`