diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5207fd5db1c4d..e4755d5dd2bdf 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7131,7 +7131,7 @@ def value_counts( ascending : bool, default False Sort in ascending order. dropna : bool, default True - Don’t include counts of rows that contain NA values. + Don't include counts of rows that contain NA values. .. versionadded:: 1.3.0 @@ -9968,7 +9968,7 @@ def map( func : callable Python function, returns a single value from a single value. na_action : {None, 'ignore'}, default None - If ‘ignore’, propagate NaN values, without passing them to func. + If 'ignore', propagate NaN values, without passing them to func. **kwargs Additional keyword arguments to pass as keywords arguments to `func`. @@ -10054,7 +10054,7 @@ def applymap( func : callable Python function, returns a single value from a single value. na_action : {None, 'ignore'}, default None - If ‘ignore’, propagate NaN values, without passing them to func. + If 'ignore', propagate NaN values, without passing them to func. **kwargs Additional keyword arguments to pass as keywords arguments to `func`. diff --git a/pandas/core/generic.py b/pandas/core/generic.py index f6bea7d89a0b9..aa6578bbcaf66 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5633,7 +5633,7 @@ def filter( Keep labels from axis for which "like in label == True". regex : str (regular expression) Keep labels from axis for which re.search(regex, label) == True. - axis : {0 or ‘index’, 1 or ‘columns’, None}, default None + axis : {0 or 'index', 1 or 'columns', None}, default None The axis to filter on, expressed either as an index (int) or axis name (str). By default this is the info axis, 'columns' for DataFrame. For `Series` this parameter is unused and defaults to `None`. @@ -5922,7 +5922,7 @@ def sample( np.random.Generator objects now accepted - axis : {0 or ‘index’, 1 or ‘columns’, None}, default None + axis : {0 or 'index', 1 or 'columns', None}, default None Axis to sample. Accepts axis number or name. Default is stat axis for given data type. For `Series` this parameter is unused and defaults to `None`. ignore_index : bool, default False diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index ceec715a40fd1..2ffdaa934e838 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -2322,7 +2322,7 @@ def value_counts( ascending : bool, default False Sort in ascending order. dropna : bool, default True - Don’t include counts of rows that contain NA values. + Don't include counts of rows that contain NA values. Returns ------- diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index eb85acbc4b819..c07ca760cbc8e 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -177,7 +177,7 @@ class DatetimeIndex(DatetimeTimedeltaMixin): yearfirst : bool, default False If True parse dates in `data` with the year first order. dtype : numpy.dtype or DatetimeTZDtype or str, default None - Note that the only NumPy dtype allowed is ‘datetime64[ns]’. + Note that the only NumPy dtype allowed is `datetime64[ns]`. copy : bool, default False Make a copy of input ndarray. name : label, default None diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index e8fd3398c4db8..da253da1428bf 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -2336,7 +2336,7 @@ def _factorize_keys( sort : bool, defaults to True If True, the encoding is done such that the unique elements in the keys are sorted. - how : {‘left’, ‘right’, ‘outer’, ‘inner’}, default ‘inner’ + how : {'left', 'right', 'outer', 'inner'}, default 'inner' Type of merge. Returns diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 95faea468fb5d..0360903424d54 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -916,7 +916,7 @@ def to_datetime( - **DataFrame/dict-like** are converted to :class:`Series` with :class:`datetime64` dtype. For each row a datetime is created from assembling the various dataframe columns. Column keys can be common abbreviations - like [‘year’, ‘month’, ‘day’, ‘minute’, ‘second’, ‘ms’, ‘us’, ‘ns’]) or + like ['year', 'month', 'day', 'minute', 'second', 'ms', 'us', 'ns']) or plurals of the same. The following causes are responsible for :class:`datetime.datetime` objects diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index c310b2614fa5f..7c67f85ed3d1e 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -1268,14 +1268,14 @@ def __init__( @property def date_format(self) -> str: """ - Format string for dates written into Excel files (e.g. ‘YYYY-MM-DD’). + Format string for dates written into Excel files (e.g. 'YYYY-MM-DD'). """ return self._date_format @property def datetime_format(self) -> str: """ - Format string for dates written into Excel files (e.g. ‘YYYY-MM-DD’). + Format string for dates written into Excel files (e.g. 'YYYY-MM-DD'). """ return self._datetime_format diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index 151bde4e1c4c2..ce59985b8f352 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -94,7 +94,7 @@ def render(self) -> list[str]: self._write_table() if self.should_show_dimensions: - by = chr(215) # × + by = chr(215) # × # noqa: RUF003 self.write( f"

{len(self.frame)} rows {by} {len(self.frame.columns)} columns

" ) diff --git a/pandas/io/formats/style.py b/pandas/io/formats/style.py index a45ea881d8dad..f77778ee45ae3 100644 --- a/pandas/io/formats/style.py +++ b/pandas/io/formats/style.py @@ -3489,7 +3489,7 @@ def highlight_quantile( Left bound, in [0, q_right), for the target quantile range. q_right : float, default 1 Right bound, in (q_left, 1], for the target quantile range. - interpolation : {‘linear’, ‘lower’, ‘higher’, ‘midpoint’, ‘nearest’} + interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'} Argument passed to ``Series.quantile`` or ``DataFrame.quantile`` for quantile estimation. inclusive : {'both', 'neither', 'left', 'right'} diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 467e8d2c3ff58..2cf9d144eb91c 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -441,7 +441,7 @@ def read_sql_query( rows to include in each chunk. dtype : Type name or dict of columns Data type for data or columns. E.g. np.float64 or - {‘a’: np.float64, ‘b’: np.int32, ‘c’: ‘Int64’}. + {'a': np.float64, 'b': np.int32, 'c': 'Int64'}. .. versionadded:: 1.3.0 dtype_backend : {'numpy_nullable', 'pyarrow'}, default 'numpy_nullable' @@ -597,7 +597,7 @@ def read_sql( .. versionadded:: 2.0 dtype : Type name or dict of columns Data type for data or columns. E.g. np.float64 or - {‘a’: np.float64, ‘b’: np.int32, ‘c’: ‘Int64’}. + {'a': np.float64, 'b': np.int32, 'c': 'Int64'}. The argument is ignored if a table is passed instead of a query. .. versionadded:: 2.0.0 @@ -1759,7 +1759,7 @@ def read_query( of rows to include in each chunk. dtype : Type name or dict of columns Data type for data or columns. E.g. np.float64 or - {‘a’: np.float64, ‘b’: np.int32, ‘c’: ‘Int64’} + {'a': np.float64, 'b': np.int32, 'c': 'Int64'} .. versionadded:: 1.3.0 diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 2181b33b315ae..054d73a8aba42 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -3768,7 +3768,7 @@ def _validate_variable_name(self, name: str) -> str: and c != "_" ) or 128 <= ord(c) < 192 - or c in {"×", "÷"} + or c in {"×", "÷"} # noqa: RUF001 ): name = name.replace(c, "_") diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index bdcd8f1ef0d50..224d383da633a 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -2119,7 +2119,7 @@ def test_str_slice_replace(start, stop, repl, exp): ["!|,", "isalnum", False], ["aaa", "isalpha", True], ["!!!", "isalpha", False], - ["٠", "isdecimal", True], + ["٠", "isdecimal", True], # noqa: RUF001 ["~!", "isdecimal", False], ["2", "isdigit", True], ["~", "isdigit", False], diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py index f7d132a1c0bf0..475c33d86e6e7 100644 --- a/pandas/tests/frame/methods/test_to_csv.py +++ b/pandas/tests/frame/methods/test_to_csv.py @@ -957,7 +957,10 @@ def test_to_csv_path_is_none(self, float_frame): (DataFrame([["abc", "def", "ghi"]], columns=["X", "Y", "Z"]), "ascii"), (DataFrame(5 * [[123, "你好", "世界"]], columns=["X", "Y", "Z"]), "gb2312"), ( - DataFrame(5 * [[123, "Γειά σου", "Κόσμε"]], columns=["X", "Y", "Z"]), + DataFrame( + 5 * [[123, "Γειά σου", "Κόσμε"]], # noqa: RUF001 + columns=["X", "Y", "Z"], + ), "cp737", ), ], diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index c8de1cd6785b6..6ffc975da4dd5 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1402,7 +1402,7 @@ def test_groupby_dtype_inference_empty(): def test_groupby_unit64_float_conversion(): - #  GH: 30859 groupby converts unit64 to floats sometimes + # GH: 30859 groupby converts unit64 to floats sometimes df = DataFrame({"first": [1], "second": [1], "value": [16148277970000000000]}) result = df.groupby(["first", "second"])["value"].max() expected = Series( diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index 25b0e4a9f1de9..fb032decc9fb9 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -251,7 +251,7 @@ def test_read_json_from_to_json_results(self): "recommender_id": {"row_0": 3}, "recommender_name_jp": {"row_0": "浦田"}, "recommender_name_en": {"row_0": "Urata"}, - "name_jp": {"row_0": "博多人形(松尾吉将まつお よしまさ)"}, + "name_jp": {"row_0": "博多人形(松尾吉将まつお よしまさ)"}, "name_en": {"row_0": "Hakata Dolls Matsuo"}, } ) diff --git a/pandas/tests/io/parser/test_encoding.py b/pandas/tests/io/parser/test_encoding.py index 31c7994f39058..f6dbb24f36f18 100644 --- a/pandas/tests/io/parser/test_encoding.py +++ b/pandas/tests/io/parser/test_encoding.py @@ -223,12 +223,12 @@ def test_encoding_named_temp_file(all_parsers): def test_parse_encoded_special_characters(encoding): # GH16218 Verify parsing of data with encoded special characters # Data contains a Unicode 'FULLWIDTH COLON' (U+FF1A) at position (0,"a") - data = "a\tb\n:foo\t0\nbar\t1\nbaz\t2" + data = "a\tb\n:foo\t0\nbar\t1\nbaz\t2" # noqa: RUF001 encoded_data = BytesIO(data.encode(encoding)) result = read_csv(encoded_data, delimiter="\t", encoding=encoding) expected = DataFrame( - data=[[":foo", 0], ["bar", 1], ["baz", 2]], + data=[[":foo", 0], ["bar", 1], ["baz", 2]], # noqa: RUF001 columns=["a", "b"], ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/parser/test_read_fwf.py b/pandas/tests/io/parser/test_read_fwf.py index 47794c09bf541..c19f8d2792a35 100644 --- a/pandas/tests/io/parser/test_read_fwf.py +++ b/pandas/tests/io/parser/test_read_fwf.py @@ -190,7 +190,8 @@ def test_read_csv_compat(): def test_bytes_io_input(): - result = read_fwf(BytesIO("שלום\nשלום".encode()), widths=[2, 2], encoding="utf8") + data = BytesIO("שלום\nשלום".encode()) # noqa: RUF001 + result = read_fwf(data, widths=[2, 2], encoding="utf8") expected = DataFrame([["של", "ום"]], columns=["של", "ום"]) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/io/test_clipboard.py b/pandas/tests/io/test_clipboard.py index ff81d0125144e..70144d65b99af 100644 --- a/pandas/tests/io/test_clipboard.py +++ b/pandas/tests/io/test_clipboard.py @@ -62,9 +62,9 @@ def df(request): data_type = request.param if data_type == "delims": - return DataFrame({"a": ['"a,\t"b|c', "d\tef´"], "b": ["hi'j", "k''lm"]}) + return DataFrame({"a": ['"a,\t"b|c', "d\tef`"], "b": ["hi'j", "k''lm"]}) elif data_type == "utf8": - return DataFrame({"a": ["µasd", "Ωœ∑´"], "b": ["øπ∆˚¬", "œ∑´®"]}) + return DataFrame({"a": ["µasd", "Ωœ∑`"], "b": ["øπ∆˚¬", "œ∑`®"]}) elif data_type == "utf16": return DataFrame( {"a": ["\U0001f44d\U0001f44d", "\U0001f44d\U0001f44d"], "b": ["abc", "def"]} @@ -402,7 +402,7 @@ def test_round_trip_valid_encodings(self, enc, df): self.check_round_trip_frame(df, encoding=enc) @pytest.mark.single_cpu - @pytest.mark.parametrize("data", ["\U0001f44d...", "Ωœ∑´...", "abcd..."]) + @pytest.mark.parametrize("data", ["\U0001f44d...", "Ωœ∑`...", "abcd..."]) @pytest.mark.xfail( (os.environ.get("DISPLAY") is None and not is_platform_mac()) or is_ci_environment(), diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index c4035ea867962..580373ba793f8 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -286,7 +286,7 @@ def test_read_dta18(self, datapath): ["Cat", "Bogota", "Bogotá", 1, 1.0, "option b Ünicode", 1.0], ["Dog", "Boston", "Uzunköprü", np.nan, np.nan, np.nan, np.nan], ["Plane", "Rome", "Tromsø", 0, 0.0, "option a", 0.0], - ["Potato", "Tokyo", "Elâzığ", -4, 4.0, 4, 4], + ["Potato", "Tokyo", "Elâzığ", -4, 4.0, 4, 4], # noqa: RUF001 ["", "", "", 0, 0.3332999, "option a", 1 / 3.0], ], columns=[ diff --git a/pandas/tests/series/methods/test_to_csv.py b/pandas/tests/series/methods/test_to_csv.py index 070ab872a4e5b..76ca05a60eb7a 100644 --- a/pandas/tests/series/methods/test_to_csv.py +++ b/pandas/tests/series/methods/test_to_csv.py @@ -122,7 +122,10 @@ def test_to_csv_path_is_none(self): # GH 21241, 21118 (Series(["abc", "def", "ghi"], name="X"), "ascii"), (Series(["123", "你好", "世界"], name="中文"), "gb2312"), - (Series(["123", "Γειά σου", "Κόσμε"], name="Ελληνικά"), "cp737"), + ( + Series(["123", "Γειά σου", "Κόσμε"], name="Ελληνικά"), # noqa: RUF001 + "cp737", + ), ], ) def test_to_csv_compression(self, s, encoding, compression): diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py index a5c4f8f7c8a4f..1e573bdfe8fb5 100644 --- a/pandas/tests/strings/test_strings.py +++ b/pandas/tests/strings/test_strings.py @@ -226,8 +226,10 @@ def test_isnumeric_unicode(method, expected, any_string_dtype): # 0x00bc: ¼ VULGAR FRACTION ONE QUARTER # 0x2605: ★ not number # 0x1378: ፸ ETHIOPIC NUMBER SEVENTY - # 0xFF13: 3 Em 3 - ser = Series(["A", "3", "¼", "★", "፸", "3", "four"], dtype=any_string_dtype) + # 0xFF13: 3 Em 3 # noqa: RUF003 + ser = Series( + ["A", "3", "¼", "★", "፸", "3", "four"], dtype=any_string_dtype # noqa: RUF001 + ) expected_dtype = "bool" if any_string_dtype == "object" else "boolean" expected = Series(expected, dtype=expected_dtype) result = getattr(ser.str, method)() @@ -246,7 +248,7 @@ def test_isnumeric_unicode(method, expected, any_string_dtype): ], ) def test_isnumeric_unicode_missing(method, expected, any_string_dtype): - values = ["A", np.nan, "¼", "★", np.nan, "3", "four"] + values = ["A", np.nan, "¼", "★", np.nan, "3", "four"] # noqa: RUF001 ser = Series(values, dtype=any_string_dtype) expected_dtype = "object" if any_string_dtype == "object" else "boolean" expected = Series(expected, dtype=expected_dtype) @@ -564,12 +566,12 @@ def test_decode_errors_kwarg(): "form, expected", [ ("NFKC", ["ABC", "ABC", "123", np.nan, "アイエ"]), - ("NFC", ["ABC", "ABC", "123", np.nan, "アイエ"]), + ("NFC", ["ABC", "ABC", "123", np.nan, "アイエ"]), # noqa: RUF001 ], ) def test_normalize(form, expected, any_string_dtype): ser = Series( - ["ABC", "ABC", "123", np.nan, "アイエ"], + ["ABC", "ABC", "123", np.nan, "アイエ"], # noqa: RUF001 index=["a", "b", "c", "d", "e"], dtype=any_string_dtype, ) @@ -580,7 +582,7 @@ def test_normalize(form, expected, any_string_dtype): def test_normalize_bad_arg_raises(any_string_dtype): ser = Series( - ["ABC", "ABC", "123", np.nan, "アイエ"], + ["ABC", "ABC", "123", np.nan, "アイエ"], # noqa: RUF001 index=["a", "b", "c", "d", "e"], dtype=any_string_dtype, ) @@ -589,7 +591,7 @@ def test_normalize_bad_arg_raises(any_string_dtype): def test_normalize_index(): - idx = Index(["ABC", "123", "アイエ"]) + idx = Index(["ABC", "123", "アイエ"]) # noqa: RUF001 expected = Index(["ABC", "123", "アイエ"]) result = idx.str.normalize("NFKC") tm.assert_index_equal(result, expected) diff --git a/pandas/tseries/holiday.py b/pandas/tseries/holiday.py index 70190b16767cf..44c21bc284121 100644 --- a/pandas/tseries/holiday.py +++ b/pandas/tseries/holiday.py @@ -570,7 +570,7 @@ def merge(self, other, inplace: bool = False): offset=DateOffset(weekday=MO(3)), ) USPresidentsDay = Holiday( - "Washington’s Birthday", month=2, day=1, offset=DateOffset(weekday=MO(3)) + "Washington's Birthday", month=2, day=1, offset=DateOffset(weekday=MO(3)) ) GoodFriday = Holiday("Good Friday", month=1, day=1, offset=[Easter(), Day(-2)]) diff --git a/pyproject.toml b/pyproject.toml index bc6789cd7a4f3..49eb8f6a7678d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -326,12 +326,6 @@ ignore = [ "PLR0124", # Consider `elif` instead of `else` then `if` to remove indentation level "PLR5501", - # ambiguous-unicode-character-string - "RUF001", - # ambiguous-unicode-character-docstring - "RUF002", - # ambiguous-unicode-character-comment - "RUF003", # collection-literal-concatenation "RUF005", # pairwise-over-zipped (>=PY310 only) diff --git a/web/pandas/about/governance.md b/web/pandas/about/governance.md index 46480acc69c31..d8777d1d0c15d 100644 --- a/web/pandas/about/governance.md +++ b/web/pandas/about/governance.md @@ -128,7 +128,7 @@ In particular, the Core Team may: and merging pull requests. - Make decisions about the Services that are run by The Project and manage those Services for the benefit of the Project and Community. -- Make decisions when regular community discussion doesn’t produce consensus +- Make decisions when regular community discussion doesn't produce consensus on an issue in a reasonable time frame. ### Core Team membership @@ -157,7 +157,7 @@ they will be considered for removal from the Core Team. Before removal, inactive Member will be approached by the BDFL to see if they plan on returning to active participation. If not they will be removed immediately upon a Core Team vote. If they plan on returning to active participation soon, they will be -given a grace period of one year. If they don’t return to active participation +given a grace period of one year. If they don't return to active participation within that time period they will be removed by vote of the Core Team without further grace period. All former Core Team members can be considered for membership again at any time in the future, like any other Project Contributor. diff --git a/web/pandas/community/coc.md b/web/pandas/community/coc.md index f6d0c3543840e..22cd77859c557 100644 --- a/web/pandas/community/coc.md +++ b/web/pandas/community/coc.md @@ -21,7 +21,7 @@ Examples of unacceptable behavior by participants include: * Other unethical or unprofessional conduct Furthermore, we encourage inclusive behavior - for example, -please don’t say “hey guys!” but “hey everyone!”. +please don't say “hey guys!” but “hey everyone!”. Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions