From 27a31888c23d4fd44cec30d4aa420526adf76147 Mon Sep 17 00:00:00 2001 From: Marat Kopytjuk Date: Tue, 3 Dec 2024 19:27:54 +0100 Subject: [PATCH 01/12] implement na_action='raise' to map_array() --- pandas/core/algorithms.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index aafd802b827a5..6b68ac224d951 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1627,7 +1627,7 @@ def union_with_duplicates( def map_array( arr: ArrayLike, mapper, - na_action: Literal["ignore"] | None = None, + na_action: Literal["ignore", "raise"] | None = None, ) -> np.ndarray | ExtensionArray | Index: """ Map values using an input mapping or function. @@ -1636,9 +1636,10 @@ def map_array( ---------- mapper : function, dict, or Series Mapping correspondence. - na_action : {None, 'ignore'}, default None + na_action : {None, 'ignore', 'raise'}, default None If 'ignore', propagate NA values, without passing them to the - mapping correspondence. + mapping correspondence. If 'raise', an error is raised when the + mapping correspondence does not cover all elements in the array. Returns ------- @@ -1647,7 +1648,7 @@ def map_array( If the function returns a tuple with more than one element a MultiIndex will be returned. """ - if na_action not in (None, "ignore"): + if na_action not in (None, "ignore", "raise"): msg = f"na_action must either be 'ignore' or None, {na_action} was passed" raise ValueError(msg) @@ -1686,6 +1687,11 @@ def map_array( # Since values were input this means we came from either # a dict or a series and mapper should be an index indexer = mapper.index.get_indexer(arr) + + if na_action == "raise" and (indexer == -1).any(): + raise ValueError("Provided mapping is not sufficient to cover" + "all values in the input array!") + new_values = take_nd(mapper._values, indexer) return new_values From 0931d083619767bd612cba8d7431ef7d9b054c34 Mon Sep 17 00:00:00 2001 From: Marat Kopytjuk Date: Thu, 6 Mar 2025 20:43:08 +0100 Subject: [PATCH 02/12] Remove whitespace --- pandas/core/algorithms.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 6b68ac224d951..698610f730b2f 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1691,7 +1691,7 @@ def map_array( if na_action == "raise" and (indexer == -1).any(): raise ValueError("Provided mapping is not sufficient to cover" "all values in the input array!") - + new_values = take_nd(mapper._values, indexer) return new_values From bc31e1902fb18b5c7695b675f256307f6b05d338 Mon Sep 17 00:00:00 2001 From: Marat Kopytjuk Date: Thu, 6 Mar 2025 20:48:25 +0100 Subject: [PATCH 03/12] Update quotes --- pandas/tests/frame/test_query_eval.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 375b9b00a4988..c37a0cc3df775 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -168,7 +168,7 @@ def test_query_duplicate_column_name(self, engine, parser): } ).rename(columns={"B": "A"}) - res = df.query('C == 1', engine=engine, parser=parser) + res = df.query("C == 1", engine=engine, parser=parser) expect = DataFrame( [[1, 1, 1]], @@ -1406,7 +1406,7 @@ def test_expr_with_column_name_with_backtick_and_hash(self): def test_expr_with_column_name_with_backtick(self): # GH 59285 df = DataFrame({"a`b": (1, 2, 3), "ab": (4, 5, 6)}) - result = df.query("`a``b` < 2") # noqa + result = df.query("`a``b` < 2") # Note: Formatting checks may wrongly consider the above ``inline code``. expected = df[df["a`b"] < 2] tm.assert_frame_equal(result, expected) From 071a2422e59359b3fd88db85520566227ee39cd6 Mon Sep 17 00:00:00 2001 From: Marat Kopytjuk Date: Thu, 6 Mar 2025 21:03:57 +0100 Subject: [PATCH 04/12] Apply ruff --- pandas/core/algorithms.py | 6 +- pandas/tests/frame/test_query_eval.py | 20 ++---- ...check_for_inconsistent_pandas_namespace.py | 3 +- scripts/check_test_naming.py | 1 + scripts/generate_pip_deps_from_conda.py | 1 + scripts/pandas_errors_documented.py | 1 + scripts/sort_whatsnew_note.py | 1 + scripts/tests/test_check_test_naming.py | 5 +- .../test_inconsistent_namespace_check.py | 8 +-- scripts/tests/test_validate_docstrings.py | 20 +++--- scripts/validate_docstrings.py | 69 ++++++++++--------- scripts/validate_exception_location.py | 1 + scripts/validate_min_versions_in_sync.py | 3 +- scripts/validate_rst_title_capitalization.py | 4 +- scripts/validate_unwanted_patterns.py | 44 +++++------- 15 files changed, 86 insertions(+), 101 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 698610f730b2f..4dfbde176f82b 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1689,8 +1689,10 @@ def map_array( indexer = mapper.index.get_indexer(arr) if na_action == "raise" and (indexer == -1).any(): - raise ValueError("Provided mapping is not sufficient to cover" - "all values in the input array!") + raise ValueError( + "Provided mapping is not sufficient to cover" + "all values in the input array!" + ) new_values = take_nd(mapper._values, indexer) diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index c37a0cc3df775..9b4be66f61914 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -160,21 +160,13 @@ def test_query_empty_string(self): df.query("") def test_query_duplicate_column_name(self, engine, parser): - df = DataFrame( - { - "A": range(3), - "B": range(3), - "C": range(3) - } - ).rename(columns={"B": "A"}) + df = DataFrame({"A": range(3), "B": range(3), "C": range(3)}).rename( + columns={"B": "A"} + ) res = df.query("C == 1", engine=engine, parser=parser) - expect = DataFrame( - [[1, 1, 1]], - columns=["A", "A", "C"], - index=[1] - ) + expect = DataFrame([[1, 1, 1]], columns=["A", "A", "C"], index=[1]) tm.assert_frame_equal(res, expect) @@ -1140,9 +1132,7 @@ def test_query_with_nested_special_character(self, parser, engine): [">=", operator.ge], ], ) - def test_query_lex_compare_strings( - self, parser, engine, op, func - ): + def test_query_lex_compare_strings(self, parser, engine, op, func): a = Series(np.random.default_rng(2).choice(list("abcde"), 20)) b = Series(np.arange(a.size)) df = DataFrame({"X": a, "Y": b}) diff --git a/scripts/check_for_inconsistent_pandas_namespace.py b/scripts/check_for_inconsistent_pandas_namespace.py index ec0a4a408c800..39e5fd2955e0a 100644 --- a/scripts/check_for_inconsistent_pandas_namespace.py +++ b/scripts/check_for_inconsistent_pandas_namespace.py @@ -30,8 +30,7 @@ from typing import NamedTuple ERROR_MESSAGE = ( - "{path}:{lineno}:{col_offset}: " - "Found both '{prefix}.{name}' and '{name}' in {path}" + "{path}:{lineno}:{col_offset}: Found both '{prefix}.{name}' and '{name}' in {path}" ) diff --git a/scripts/check_test_naming.py b/scripts/check_test_naming.py index f9190643b3246..629687a866508 100644 --- a/scripts/check_test_naming.py +++ b/scripts/check_test_naming.py @@ -8,6 +8,7 @@ NOTE: if this finds a false positive, you can add the comment `# not a test` to the class or function definition. Though hopefully that shouldn't be necessary. """ + from __future__ import annotations import argparse diff --git a/scripts/generate_pip_deps_from_conda.py b/scripts/generate_pip_deps_from_conda.py index a57876902ad36..4b416a2b32319 100755 --- a/scripts/generate_pip_deps_from_conda.py +++ b/scripts/generate_pip_deps_from_conda.py @@ -12,6 +12,7 @@ generated with this script: $ python scripts/generate_pip_deps_from_conda.py --compare """ + import argparse import pathlib import re diff --git a/scripts/pandas_errors_documented.py b/scripts/pandas_errors_documented.py index b68da137717de..a4716b70e5f26 100644 --- a/scripts/pandas_errors_documented.py +++ b/scripts/pandas_errors_documented.py @@ -6,6 +6,7 @@ pre-commit run pandas-errors-documented --all-files """ + from __future__ import annotations import argparse diff --git a/scripts/sort_whatsnew_note.py b/scripts/sort_whatsnew_note.py index 428ffca83ea26..3e23d88ef92d3 100644 --- a/scripts/sort_whatsnew_note.py +++ b/scripts/sort_whatsnew_note.py @@ -23,6 +23,7 @@ pre-commit run sort-whatsnew-items --all-files """ + from __future__ import annotations import argparse diff --git a/scripts/tests/test_check_test_naming.py b/scripts/tests/test_check_test_naming.py index dbd803ce4dd31..02c31ddef2ba2 100644 --- a/scripts/tests/test_check_test_naming.py +++ b/scripts/tests/test_check_test_naming.py @@ -24,10 +24,7 @@ 0, ), ( - "class Foo: # not a test\n" - " pass\n" - "def test_foo():\n" - " Class.foo()\n", + "class Foo: # not a test\n pass\ndef test_foo():\n Class.foo()\n", "", 0, ), diff --git a/scripts/tests/test_inconsistent_namespace_check.py b/scripts/tests/test_inconsistent_namespace_check.py index 64f66e6168efe..73893a3c86dac 100644 --- a/scripts/tests/test_inconsistent_namespace_check.py +++ b/scripts/tests/test_inconsistent_namespace_check.py @@ -5,14 +5,10 @@ ) BAD_FILE_0 = ( - "from pandas import Categorical\n" - "cat_0 = Categorical()\n" - "cat_1 = pd.Categorical()" + "from pandas import Categorical\ncat_0 = Categorical()\ncat_1 = pd.Categorical()" ) BAD_FILE_1 = ( - "from pandas import Categorical\n" - "cat_0 = pd.Categorical()\n" - "cat_1 = Categorical()" + "from pandas import Categorical\ncat_0 = pd.Categorical()\ncat_1 = Categorical()" ) BAD_FILE_2 = ( "from pandas import Categorical\n" diff --git a/scripts/tests/test_validate_docstrings.py b/scripts/tests/test_validate_docstrings.py index 3bffd1f1987aa..381baa1f666f1 100644 --- a/scripts/tests/test_validate_docstrings.py +++ b/scripts/tests/test_validate_docstrings.py @@ -34,8 +34,7 @@ def redundant_import(self, paramx=None, paramy=None) -> None: -------- >>> import numpy as np >>> import pandas as pd - >>> df = pd.DataFrame(np.ones((3, 3)), - ... columns=('a', 'b', 'c')) + >>> df = pd.DataFrame(np.ones((3, 3)), columns=("a", "b", "c")) >>> df.all(axis=1) 0 True 1 True @@ -50,14 +49,14 @@ def unused_import(self) -> None: Examples -------- >>> import pandas as pdf - >>> df = pd.DataFrame(np.ones((3, 3)), columns=('a', 'b', 'c')) + >>> df = pd.DataFrame(np.ones((3, 3)), columns=("a", "b", "c")) """ def missing_whitespace_around_arithmetic_operator(self) -> None: """ Examples -------- - >>> 2+5 + >>> 2 + 5 7 """ @@ -66,14 +65,14 @@ def indentation_is_not_a_multiple_of_four(self) -> None: Examples -------- >>> if 2 + 5: - ... pass + ... pass """ def missing_whitespace_after_comma(self) -> None: """ Examples -------- - >>> df = pd.DataFrame(np.ones((3,3)),columns=('a','b', 'c')) + >>> df = pd.DataFrame(np.ones((3, 3)), columns=("a", "b", "c")) """ def write_array_like_with_hyphen_not_underscore(self) -> None: @@ -227,13 +226,13 @@ def test_validate_all_ignore_errors(self, monkeypatch): "errors": [ ("ER01", "err desc"), ("ER02", "err desc"), - ("ER03", "err desc") + ("ER03", "err desc"), ], "warnings": [], "examples_errors": "", "deprecated": True, "file": "file1", - "file_line": "file_line1" + "file_line": "file_line1", }, ) monkeypatch.setattr( @@ -272,14 +271,13 @@ def test_validate_all_ignore_errors(self, monkeypatch): None: {"ER03"}, "pandas.DataFrame.align": {"ER01"}, # ignoring an error that is not requested should be of no effect - "pandas.Index.all": {"ER03"} - } + "pandas.Index.all": {"ER03"}, + }, ) # two functions * two not global ignored errors - one function ignored error assert exit_status == 2 * 2 - 1 - class TestApiItems: @property def api_doc(self): diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index 944575dcc8659..c878820d75487 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -13,6 +13,7 @@ $ ./validate_docstrings.py $ ./validate_docstrings.py pandas.DataFrame.head """ + from __future__ import annotations import argparse @@ -69,8 +70,10 @@ } ALL_ERRORS = set(NUMPYDOC_ERROR_MSGS).union(set(ERROR_MSGS)) duplicated_errors = set(NUMPYDOC_ERROR_MSGS).intersection(set(ERROR_MSGS)) -assert not duplicated_errors, (f"Errors {duplicated_errors} exist in both pandas " - "and numpydoc, should they be removed from pandas?") +assert not duplicated_errors, ( + f"Errors {duplicated_errors} exist in both pandas " + "and numpydoc, should they be removed from pandas?" +) def pandas_error(code, **kwargs): @@ -257,7 +260,7 @@ def pandas_validate(func_name: str): pandas_error( "SA05", reference_name=rel_name, - right_reference=rel_name[len("pandas."):], + right_reference=rel_name[len("pandas.") :], ) for rel_name in doc.see_also if rel_name.startswith("pandas.") @@ -365,17 +368,18 @@ def print_validate_all_results( for func_name, res in result.items(): error_messages = dict(res["errors"]) actual_failures = set(error_messages) - expected_failures = (ignore_errors.get(func_name, set()) - | ignore_errors.get(None, set())) + expected_failures = ignore_errors.get(func_name, set()) | ignore_errors.get( + None, set() + ) for err_code in actual_failures - expected_failures: sys.stdout.write( - f'{prefix}{res["file"]}:{res["file_line"]}:' - f'{err_code}:{func_name}:{error_messages[err_code]}\n' + f"{prefix}{res['file']}:{res['file_line']}:" + f"{err_code}:{func_name}:{error_messages[err_code]}\n" ) exit_status += 1 for err_code in ignore_errors.get(func_name, set()) - actual_failures: sys.stdout.write( - f'{prefix}{res["file"]}:{res["file_line"]}:' + f"{prefix}{res['file']}:{res['file_line']}:" f"{err_code}:{func_name}:" "EXPECTED TO FAIL, BUT NOT FAILING\n" ) @@ -384,8 +388,9 @@ def print_validate_all_results( return exit_status -def print_validate_one_results(func_name: str, - ignore_errors: dict[str, set[str]]) -> int: +def print_validate_one_results( + func_name: str, ignore_errors: dict[str, set[str]] +) -> int: def header(title, width=80, char="#") -> str: full_line = char * width side_len = (width - len(title) - 2) // 2 @@ -396,15 +401,18 @@ def header(title, width=80, char="#") -> str: result = pandas_validate(func_name) - result["errors"] = [(code, message) for code, message in result["errors"] - if code not in ignore_errors.get(None, set())] + result["errors"] = [ + (code, message) + for code, message in result["errors"] + if code not in ignore_errors.get(None, set()) + ] sys.stderr.write(header(f"Docstring ({func_name})")) sys.stderr.write(f"{result['docstring']}\n") sys.stderr.write(header("Validation")) if result["errors"]: - sys.stderr.write(f'{len(result["errors"])} Errors found for `{func_name}`:\n') + sys.stderr.write(f"{len(result['errors'])} Errors found for `{func_name}`:\n") for err_code, err_desc in result["errors"]: sys.stderr.write(f"\t{err_code}\t{err_desc}\n") else: @@ -431,14 +439,16 @@ def _format_ignore_errors(raw_ignore_errors): raise ValueError( f"Object `{obj_name}` is present in more than one " "--ignore_errors argument. Please use it once and specify " - "the errors separated by commas.") + "the errors separated by commas." + ) ignore_errors[obj_name] = set(error_codes.split(",")) unknown_errors = ignore_errors[obj_name] - ALL_ERRORS if unknown_errors: raise ValueError( f"Object `{obj_name}` is ignoring errors {unknown_errors} " - f"which are not known. Known errors are: {ALL_ERRORS}") + f"which are not known. Known errors are: {ALL_ERRORS}" + ) # global errors "PR02,ES01" else: @@ -448,27 +458,19 @@ def _format_ignore_errors(raw_ignore_errors): if unknown_errors: raise ValueError( f"Unknown errors {unknown_errors} specified using --ignore_errors " - "Known errors are: {ALL_ERRORS}") + "Known errors are: {ALL_ERRORS}" + ) return ignore_errors -def main( - func_name, - output_format, - prefix, - ignore_deprecated, - ignore_errors -): +def main(func_name, output_format, prefix, ignore_deprecated, ignore_errors): """ Main entry point. Call the validation for one or for all docstrings. """ if func_name is None: return print_validate_all_results( - output_format, - prefix, - ignore_deprecated, - ignore_errors + output_format, prefix, ignore_deprecated, ignore_errors ) else: return print_validate_one_results(func_name, ignore_errors) @@ -524,10 +526,11 @@ def main( args = argparser.parse_args(sys.argv[1:]) sys.exit( - main(args.function, - args.format, - args.prefix, - args.ignore_deprecated, - _format_ignore_errors(args.ignore_errors), - ) + main( + args.function, + args.format, + args.prefix, + args.ignore_deprecated, + _format_ignore_errors(args.ignore_errors), + ) ) diff --git a/scripts/validate_exception_location.py b/scripts/validate_exception_location.py index ecba1eb424ad5..8581a0c873f04 100644 --- a/scripts/validate_exception_location.py +++ b/scripts/validate_exception_location.py @@ -18,6 +18,7 @@ As a pre-commit hook: pre-commit run validate-errors-locations --all-files """ + from __future__ import annotations import argparse diff --git a/scripts/validate_min_versions_in_sync.py b/scripts/validate_min_versions_in_sync.py index 1001b00450354..7d5fea58b60ea 100755 --- a/scripts/validate_min_versions_in_sync.py +++ b/scripts/validate_min_versions_in_sync.py @@ -12,6 +12,7 @@ pre-commit run validate-min-versions-in-sync --all-files """ + from __future__ import annotations import pathlib @@ -105,7 +106,7 @@ def get_operator_from(dependency: str) -> str | None: def get_yaml_map_from( - yaml_dic: list[str | dict[str, list[str]]] + yaml_dic: list[str | dict[str, list[str]]], ) -> dict[str, list[str] | None]: yaml_map: dict[str, list[str] | None] = {} for dependency in yaml_dic: diff --git a/scripts/validate_rst_title_capitalization.py b/scripts/validate_rst_title_capitalization.py index 44318cd797163..c4857b56a89e2 100755 --- a/scripts/validate_rst_title_capitalization.py +++ b/scripts/validate_rst_title_capitalization.py @@ -11,6 +11,7 @@ From the command-line: python scripts/validate_rst_title_capitalization.py """ + from __future__ import annotations import argparse @@ -266,7 +267,8 @@ def main(source_paths: list[str]) -> int: if title != correct_title_capitalization(title): print( f"""{filename}:{line_number}:{err_msg} "{title}" to "{ - correct_title_capitalization(title)}" """ + correct_title_capitalization(title) + }" """ ) number_of_errors += 1 diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py index d804e15f6d48f..4e241c7eba659 100755 --- a/scripts/validate_unwanted_patterns.py +++ b/scripts/validate_unwanted_patterns.py @@ -179,17 +179,11 @@ def strings_with_wrong_placed_whitespace( For example: - >>> rule = ( - ... "We want the space at the end of the line, " - ... "not at the beginning" - ... ) + >>> rule = "We want the space at the end of the line, not at the beginning" Instead of: - >>> rule = ( - ... "We want the space at the end of the line," - ... " not at the beginning" - ... ) + >>> rule = "We want the space at the end of the line, not at the beginning" Parameters ---------- @@ -229,17 +223,11 @@ def has_wrong_whitespace(first_line: str, second_line: str) -> bool: For example, this is bad: - >>> rule = ( - ... "We want the space at the end of the line," - ... " not at the beginning" - ... ) + >>> rule = "We want the space at the end of the line, not at the beginning" And what we want is: - >>> rule = ( - ... "We want the space at the end of the line, " - ... "not at the beginning" - ... ) + >>> rule = "We want the space at the end of the line, not at the beginning" And if the string is ending with a new line character (\n) we do not want any trailing whitespaces after it. @@ -247,17 +235,17 @@ def has_wrong_whitespace(first_line: str, second_line: str) -> bool: For example, this is bad: >>> rule = ( - ... "We want the space at the begging of " - ... "the line if the previous line is ending with a \n " - ... "not at the end, like always" + ... "We want the space at the begging of " + ... "the line if the previous line is ending with a \n " + ... "not at the end, like always" ... ) And what we do want is: >>> rule = ( - ... "We want the space at the begging of " - ... "the line if the previous line is ending with a \n" - ... " not at the end, like always" + ... "We want the space at the begging of " + ... "the line if the previous line is ending with a \n" + ... " not at the end, like always" ... ) """ if first_line.endswith(r"\n"): @@ -319,10 +307,14 @@ def nodefault_used_not_only_for_typing(file_obj: IO[str]) -> Iterable[tuple[int, while nodes: in_annotation, node = nodes.pop() if not in_annotation and ( - (isinstance(node, ast.Name) # Case `NoDefault` - and node.id == "NoDefault") - or (isinstance(node, ast.Attribute) # Cases e.g. `lib.NoDefault` - and node.attr == "NoDefault") + ( + isinstance(node, ast.Name) # Case `NoDefault` + and node.id == "NoDefault" + ) + or ( + isinstance(node, ast.Attribute) # Cases e.g. `lib.NoDefault` + and node.attr == "NoDefault" + ) ): yield (node.lineno, "NoDefault is used not only for typing") From 5efce7b08d3cb5d9c7621b529b1236dbbf5d33d9 Mon Sep 17 00:00:00 2001 From: Marat Kopytjuk Date: Fri, 7 Mar 2025 09:05:06 +0100 Subject: [PATCH 05/12] Add tests and better docstring --- pandas/conftest.py | 2 +- pandas/core/algorithms.py | 4 ++- pandas/core/series.py | 9 ++++-- pandas/tests/series/methods/test_map.py | 43 +++++++++++++++++++------ 4 files changed, 43 insertions(+), 15 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index f9c10a7758bd2..f537b04b5a80a 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -402,7 +402,7 @@ def nselect_method(request): return request.param -@pytest.fixture(params=[None, "ignore"]) +@pytest.fixture(params=[None, "ignore", "raise"]) def na_action(request): """ Fixture for 'na_action' argument in map. diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 4dfbde176f82b..5e60a4aa53c69 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1639,7 +1639,9 @@ def map_array( na_action : {None, 'ignore', 'raise'}, default None If 'ignore', propagate NA values, without passing them to the mapping correspondence. If 'raise', an error is raised when the - mapping correspondence does not cover all elements in the array. + array contains non-NA values which do not exist as keys in the mapping + correspondance (does not apply to function & dict-like mappers with + a '__missing__' attribute). Returns ------- diff --git a/pandas/core/series.py b/pandas/core/series.py index da46f8ede3409..96697fb28c525 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4335,9 +4335,11 @@ def map( ---------- arg : function, collections.abc.Mapping subclass or Series Mapping correspondence. - na_action : {None, 'ignore'}, default None + na_action : {None, 'ignore', 'raise'}, default None If 'ignore', propagate NaN values, without passing them to the - mapping correspondence. + mapping correspondence. With 'raise' a missing value in the mapping + correspondence raises a ``ValueError`` instead of replacing it + with ``NaN``. **kwargs Additional keyword arguments to pass as keywords arguments to `arg`. @@ -4359,7 +4361,8 @@ def map( Notes ----- When ``arg`` is a dictionary, values in Series that are not in the - dictionary (as keys) are converted to ``NaN``. However, if the + dictionary (as keys) are converted to ``NaN``. This conversion + can be anticipated with ``na_action = 'raise'``. However, if the dictionary is a ``dict`` subclass that defines ``__missing__`` (i.e. provides a method for default values), then this default is used rather than ``NaN``. diff --git a/pandas/tests/series/methods/test_map.py b/pandas/tests/series/methods/test_map.py index 84b60a2afe6eb..31f10dcabb9cc 100644 --- a/pandas/tests/series/methods/test_map.py +++ b/pandas/tests/series/methods/test_map.py @@ -322,6 +322,19 @@ def test_map_dict_na_key(): tm.assert_series_equal(result, expected) +def test_map_missing_key(na_action): + s = Series([1, 2, 42]) + mapping = {1: "a", 2: "b", 3: "c"} + + if na_action == "raise": + with pytest.raises(ValueError): + s.map(mapping, na_action=na_action) + else: + expected = Series(["a", "b", np.nan]) + result = s.map(mapping, na_action=na_action) + tm.assert_series_equal(result, expected) + + def test_map_defaultdict_na_key(na_action): # GH 48813 s = Series([1, 2, np.nan]) @@ -380,7 +393,7 @@ def test_map_categorical_na_ignore(na_action, expected): tm.assert_series_equal(result, expected) -def test_map_dict_subclass_with_missing(): +def test_map_dict_subclass_with_missing(na_action): """ Test Series.map with a dictionary subclass that defines __missing__, i.e. sets a default value (GH #15999). @@ -392,30 +405,40 @@ def __missing__(self, key): s = Series([1, 2, 3]) dictionary = DictWithMissing({3: "three"}) - result = s.map(dictionary) + result = s.map(dictionary, na_action=na_action) # also works with 'raise' expected = Series(["missing", "missing", "three"]) tm.assert_series_equal(result, expected) -def test_map_dict_subclass_without_missing(): +def test_map_dict_subclass_without_missing(na_action): class DictWithoutMissing(dict): pass s = Series([1, 2, 3]) dictionary = DictWithoutMissing({3: "three"}) - result = s.map(dictionary) - expected = Series([np.nan, np.nan, "three"]) - tm.assert_series_equal(result, expected) + if na_action == "raise": + with pytest.raises(ValueError): + _ = s.map(dictionary, na_action=na_action) + else: + result = s.map(dictionary, na_action=na_action) + expected = Series([np.nan, np.nan, "three"]) + tm.assert_series_equal(result, expected) -def test_map_abc_mapping(non_dict_mapping_subclass): + +def test_map_abc_mapping(non_dict_mapping_subclass, na_action): # https://github.com/pandas-dev/pandas/issues/29733 # Check collections.abc.Mapping support as mapper for Series.map s = Series([1, 2, 3]) not_a_dictionary = non_dict_mapping_subclass({3: "three"}) - result = s.map(not_a_dictionary) - expected = Series([np.nan, np.nan, "three"]) - tm.assert_series_equal(result, expected) + + if na_action == "raise": + with pytest.raises(ValueError): + _ = s.map(not_a_dictionary, na_action=na_action) + else: + result = s.map(not_a_dictionary, na_action=na_action) + expected = Series([np.nan, np.nan, "three"]) + tm.assert_series_equal(result, expected) def test_map_abc_mapping_with_missing(non_dict_mapping_subclass): From ad9aab45c36adb8630e15709c0cc96e3f46e3429 Mon Sep 17 00:00:00 2001 From: Marat Kopytjuk Date: Fri, 7 Mar 2025 09:08:26 +0100 Subject: [PATCH 06/12] Revert whitespace changes --- ...check_for_inconsistent_pandas_namespace.py | 3 +- scripts/check_test_naming.py | 1 - scripts/generate_pip_deps_from_conda.py | 1 - scripts/pandas_errors_documented.py | 1 - scripts/sort_whatsnew_note.py | 1 - scripts/tests/test_check_test_naming.py | 5 +- .../test_inconsistent_namespace_check.py | 8 ++- scripts/tests/test_validate_docstrings.py | 20 +++--- scripts/validate_docstrings.py | 69 +++++++++---------- scripts/validate_exception_location.py | 1 - scripts/validate_min_versions_in_sync.py | 3 +- scripts/validate_rst_title_capitalization.py | 4 +- scripts/validate_unwanted_patterns.py | 44 +++++++----- 13 files changed, 84 insertions(+), 77 deletions(-) diff --git a/scripts/check_for_inconsistent_pandas_namespace.py b/scripts/check_for_inconsistent_pandas_namespace.py index 39e5fd2955e0a..ec0a4a408c800 100644 --- a/scripts/check_for_inconsistent_pandas_namespace.py +++ b/scripts/check_for_inconsistent_pandas_namespace.py @@ -30,7 +30,8 @@ from typing import NamedTuple ERROR_MESSAGE = ( - "{path}:{lineno}:{col_offset}: Found both '{prefix}.{name}' and '{name}' in {path}" + "{path}:{lineno}:{col_offset}: " + "Found both '{prefix}.{name}' and '{name}' in {path}" ) diff --git a/scripts/check_test_naming.py b/scripts/check_test_naming.py index 629687a866508..f9190643b3246 100644 --- a/scripts/check_test_naming.py +++ b/scripts/check_test_naming.py @@ -8,7 +8,6 @@ NOTE: if this finds a false positive, you can add the comment `# not a test` to the class or function definition. Though hopefully that shouldn't be necessary. """ - from __future__ import annotations import argparse diff --git a/scripts/generate_pip_deps_from_conda.py b/scripts/generate_pip_deps_from_conda.py index 4b416a2b32319..a57876902ad36 100755 --- a/scripts/generate_pip_deps_from_conda.py +++ b/scripts/generate_pip_deps_from_conda.py @@ -12,7 +12,6 @@ generated with this script: $ python scripts/generate_pip_deps_from_conda.py --compare """ - import argparse import pathlib import re diff --git a/scripts/pandas_errors_documented.py b/scripts/pandas_errors_documented.py index a4716b70e5f26..b68da137717de 100644 --- a/scripts/pandas_errors_documented.py +++ b/scripts/pandas_errors_documented.py @@ -6,7 +6,6 @@ pre-commit run pandas-errors-documented --all-files """ - from __future__ import annotations import argparse diff --git a/scripts/sort_whatsnew_note.py b/scripts/sort_whatsnew_note.py index 3e23d88ef92d3..428ffca83ea26 100644 --- a/scripts/sort_whatsnew_note.py +++ b/scripts/sort_whatsnew_note.py @@ -23,7 +23,6 @@ pre-commit run sort-whatsnew-items --all-files """ - from __future__ import annotations import argparse diff --git a/scripts/tests/test_check_test_naming.py b/scripts/tests/test_check_test_naming.py index 02c31ddef2ba2..dbd803ce4dd31 100644 --- a/scripts/tests/test_check_test_naming.py +++ b/scripts/tests/test_check_test_naming.py @@ -24,7 +24,10 @@ 0, ), ( - "class Foo: # not a test\n pass\ndef test_foo():\n Class.foo()\n", + "class Foo: # not a test\n" + " pass\n" + "def test_foo():\n" + " Class.foo()\n", "", 0, ), diff --git a/scripts/tests/test_inconsistent_namespace_check.py b/scripts/tests/test_inconsistent_namespace_check.py index 73893a3c86dac..64f66e6168efe 100644 --- a/scripts/tests/test_inconsistent_namespace_check.py +++ b/scripts/tests/test_inconsistent_namespace_check.py @@ -5,10 +5,14 @@ ) BAD_FILE_0 = ( - "from pandas import Categorical\ncat_0 = Categorical()\ncat_1 = pd.Categorical()" + "from pandas import Categorical\n" + "cat_0 = Categorical()\n" + "cat_1 = pd.Categorical()" ) BAD_FILE_1 = ( - "from pandas import Categorical\ncat_0 = pd.Categorical()\ncat_1 = Categorical()" + "from pandas import Categorical\n" + "cat_0 = pd.Categorical()\n" + "cat_1 = Categorical()" ) BAD_FILE_2 = ( "from pandas import Categorical\n" diff --git a/scripts/tests/test_validate_docstrings.py b/scripts/tests/test_validate_docstrings.py index 381baa1f666f1..3bffd1f1987aa 100644 --- a/scripts/tests/test_validate_docstrings.py +++ b/scripts/tests/test_validate_docstrings.py @@ -34,7 +34,8 @@ def redundant_import(self, paramx=None, paramy=None) -> None: -------- >>> import numpy as np >>> import pandas as pd - >>> df = pd.DataFrame(np.ones((3, 3)), columns=("a", "b", "c")) + >>> df = pd.DataFrame(np.ones((3, 3)), + ... columns=('a', 'b', 'c')) >>> df.all(axis=1) 0 True 1 True @@ -49,14 +50,14 @@ def unused_import(self) -> None: Examples -------- >>> import pandas as pdf - >>> df = pd.DataFrame(np.ones((3, 3)), columns=("a", "b", "c")) + >>> df = pd.DataFrame(np.ones((3, 3)), columns=('a', 'b', 'c')) """ def missing_whitespace_around_arithmetic_operator(self) -> None: """ Examples -------- - >>> 2 + 5 + >>> 2+5 7 """ @@ -65,14 +66,14 @@ def indentation_is_not_a_multiple_of_four(self) -> None: Examples -------- >>> if 2 + 5: - ... pass + ... pass """ def missing_whitespace_after_comma(self) -> None: """ Examples -------- - >>> df = pd.DataFrame(np.ones((3, 3)), columns=("a", "b", "c")) + >>> df = pd.DataFrame(np.ones((3,3)),columns=('a','b', 'c')) """ def write_array_like_with_hyphen_not_underscore(self) -> None: @@ -226,13 +227,13 @@ def test_validate_all_ignore_errors(self, monkeypatch): "errors": [ ("ER01", "err desc"), ("ER02", "err desc"), - ("ER03", "err desc"), + ("ER03", "err desc") ], "warnings": [], "examples_errors": "", "deprecated": True, "file": "file1", - "file_line": "file_line1", + "file_line": "file_line1" }, ) monkeypatch.setattr( @@ -271,13 +272,14 @@ def test_validate_all_ignore_errors(self, monkeypatch): None: {"ER03"}, "pandas.DataFrame.align": {"ER01"}, # ignoring an error that is not requested should be of no effect - "pandas.Index.all": {"ER03"}, - }, + "pandas.Index.all": {"ER03"} + } ) # two functions * two not global ignored errors - one function ignored error assert exit_status == 2 * 2 - 1 + class TestApiItems: @property def api_doc(self): diff --git a/scripts/validate_docstrings.py b/scripts/validate_docstrings.py index c878820d75487..944575dcc8659 100755 --- a/scripts/validate_docstrings.py +++ b/scripts/validate_docstrings.py @@ -13,7 +13,6 @@ $ ./validate_docstrings.py $ ./validate_docstrings.py pandas.DataFrame.head """ - from __future__ import annotations import argparse @@ -70,10 +69,8 @@ } ALL_ERRORS = set(NUMPYDOC_ERROR_MSGS).union(set(ERROR_MSGS)) duplicated_errors = set(NUMPYDOC_ERROR_MSGS).intersection(set(ERROR_MSGS)) -assert not duplicated_errors, ( - f"Errors {duplicated_errors} exist in both pandas " - "and numpydoc, should they be removed from pandas?" -) +assert not duplicated_errors, (f"Errors {duplicated_errors} exist in both pandas " + "and numpydoc, should they be removed from pandas?") def pandas_error(code, **kwargs): @@ -260,7 +257,7 @@ def pandas_validate(func_name: str): pandas_error( "SA05", reference_name=rel_name, - right_reference=rel_name[len("pandas.") :], + right_reference=rel_name[len("pandas."):], ) for rel_name in doc.see_also if rel_name.startswith("pandas.") @@ -368,18 +365,17 @@ def print_validate_all_results( for func_name, res in result.items(): error_messages = dict(res["errors"]) actual_failures = set(error_messages) - expected_failures = ignore_errors.get(func_name, set()) | ignore_errors.get( - None, set() - ) + expected_failures = (ignore_errors.get(func_name, set()) + | ignore_errors.get(None, set())) for err_code in actual_failures - expected_failures: sys.stdout.write( - f"{prefix}{res['file']}:{res['file_line']}:" - f"{err_code}:{func_name}:{error_messages[err_code]}\n" + f'{prefix}{res["file"]}:{res["file_line"]}:' + f'{err_code}:{func_name}:{error_messages[err_code]}\n' ) exit_status += 1 for err_code in ignore_errors.get(func_name, set()) - actual_failures: sys.stdout.write( - f"{prefix}{res['file']}:{res['file_line']}:" + f'{prefix}{res["file"]}:{res["file_line"]}:' f"{err_code}:{func_name}:" "EXPECTED TO FAIL, BUT NOT FAILING\n" ) @@ -388,9 +384,8 @@ def print_validate_all_results( return exit_status -def print_validate_one_results( - func_name: str, ignore_errors: dict[str, set[str]] -) -> int: +def print_validate_one_results(func_name: str, + ignore_errors: dict[str, set[str]]) -> int: def header(title, width=80, char="#") -> str: full_line = char * width side_len = (width - len(title) - 2) // 2 @@ -401,18 +396,15 @@ def header(title, width=80, char="#") -> str: result = pandas_validate(func_name) - result["errors"] = [ - (code, message) - for code, message in result["errors"] - if code not in ignore_errors.get(None, set()) - ] + result["errors"] = [(code, message) for code, message in result["errors"] + if code not in ignore_errors.get(None, set())] sys.stderr.write(header(f"Docstring ({func_name})")) sys.stderr.write(f"{result['docstring']}\n") sys.stderr.write(header("Validation")) if result["errors"]: - sys.stderr.write(f"{len(result['errors'])} Errors found for `{func_name}`:\n") + sys.stderr.write(f'{len(result["errors"])} Errors found for `{func_name}`:\n') for err_code, err_desc in result["errors"]: sys.stderr.write(f"\t{err_code}\t{err_desc}\n") else: @@ -439,16 +431,14 @@ def _format_ignore_errors(raw_ignore_errors): raise ValueError( f"Object `{obj_name}` is present in more than one " "--ignore_errors argument. Please use it once and specify " - "the errors separated by commas." - ) + "the errors separated by commas.") ignore_errors[obj_name] = set(error_codes.split(",")) unknown_errors = ignore_errors[obj_name] - ALL_ERRORS if unknown_errors: raise ValueError( f"Object `{obj_name}` is ignoring errors {unknown_errors} " - f"which are not known. Known errors are: {ALL_ERRORS}" - ) + f"which are not known. Known errors are: {ALL_ERRORS}") # global errors "PR02,ES01" else: @@ -458,19 +448,27 @@ def _format_ignore_errors(raw_ignore_errors): if unknown_errors: raise ValueError( f"Unknown errors {unknown_errors} specified using --ignore_errors " - "Known errors are: {ALL_ERRORS}" - ) + "Known errors are: {ALL_ERRORS}") return ignore_errors -def main(func_name, output_format, prefix, ignore_deprecated, ignore_errors): +def main( + func_name, + output_format, + prefix, + ignore_deprecated, + ignore_errors +): """ Main entry point. Call the validation for one or for all docstrings. """ if func_name is None: return print_validate_all_results( - output_format, prefix, ignore_deprecated, ignore_errors + output_format, + prefix, + ignore_deprecated, + ignore_errors ) else: return print_validate_one_results(func_name, ignore_errors) @@ -526,11 +524,10 @@ def main(func_name, output_format, prefix, ignore_deprecated, ignore_errors): args = argparser.parse_args(sys.argv[1:]) sys.exit( - main( - args.function, - args.format, - args.prefix, - args.ignore_deprecated, - _format_ignore_errors(args.ignore_errors), - ) + main(args.function, + args.format, + args.prefix, + args.ignore_deprecated, + _format_ignore_errors(args.ignore_errors), + ) ) diff --git a/scripts/validate_exception_location.py b/scripts/validate_exception_location.py index 8581a0c873f04..ecba1eb424ad5 100644 --- a/scripts/validate_exception_location.py +++ b/scripts/validate_exception_location.py @@ -18,7 +18,6 @@ As a pre-commit hook: pre-commit run validate-errors-locations --all-files """ - from __future__ import annotations import argparse diff --git a/scripts/validate_min_versions_in_sync.py b/scripts/validate_min_versions_in_sync.py index 7d5fea58b60ea..1001b00450354 100755 --- a/scripts/validate_min_versions_in_sync.py +++ b/scripts/validate_min_versions_in_sync.py @@ -12,7 +12,6 @@ pre-commit run validate-min-versions-in-sync --all-files """ - from __future__ import annotations import pathlib @@ -106,7 +105,7 @@ def get_operator_from(dependency: str) -> str | None: def get_yaml_map_from( - yaml_dic: list[str | dict[str, list[str]]], + yaml_dic: list[str | dict[str, list[str]]] ) -> dict[str, list[str] | None]: yaml_map: dict[str, list[str] | None] = {} for dependency in yaml_dic: diff --git a/scripts/validate_rst_title_capitalization.py b/scripts/validate_rst_title_capitalization.py index c4857b56a89e2..44318cd797163 100755 --- a/scripts/validate_rst_title_capitalization.py +++ b/scripts/validate_rst_title_capitalization.py @@ -11,7 +11,6 @@ From the command-line: python scripts/validate_rst_title_capitalization.py """ - from __future__ import annotations import argparse @@ -267,8 +266,7 @@ def main(source_paths: list[str]) -> int: if title != correct_title_capitalization(title): print( f"""{filename}:{line_number}:{err_msg} "{title}" to "{ - correct_title_capitalization(title) - }" """ + correct_title_capitalization(title)}" """ ) number_of_errors += 1 diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py index 4e241c7eba659..d804e15f6d48f 100755 --- a/scripts/validate_unwanted_patterns.py +++ b/scripts/validate_unwanted_patterns.py @@ -179,11 +179,17 @@ def strings_with_wrong_placed_whitespace( For example: - >>> rule = "We want the space at the end of the line, not at the beginning" + >>> rule = ( + ... "We want the space at the end of the line, " + ... "not at the beginning" + ... ) Instead of: - >>> rule = "We want the space at the end of the line, not at the beginning" + >>> rule = ( + ... "We want the space at the end of the line," + ... " not at the beginning" + ... ) Parameters ---------- @@ -223,11 +229,17 @@ def has_wrong_whitespace(first_line: str, second_line: str) -> bool: For example, this is bad: - >>> rule = "We want the space at the end of the line, not at the beginning" + >>> rule = ( + ... "We want the space at the end of the line," + ... " not at the beginning" + ... ) And what we want is: - >>> rule = "We want the space at the end of the line, not at the beginning" + >>> rule = ( + ... "We want the space at the end of the line, " + ... "not at the beginning" + ... ) And if the string is ending with a new line character (\n) we do not want any trailing whitespaces after it. @@ -235,17 +247,17 @@ def has_wrong_whitespace(first_line: str, second_line: str) -> bool: For example, this is bad: >>> rule = ( - ... "We want the space at the begging of " - ... "the line if the previous line is ending with a \n " - ... "not at the end, like always" + ... "We want the space at the begging of " + ... "the line if the previous line is ending with a \n " + ... "not at the end, like always" ... ) And what we do want is: >>> rule = ( - ... "We want the space at the begging of " - ... "the line if the previous line is ending with a \n" - ... " not at the end, like always" + ... "We want the space at the begging of " + ... "the line if the previous line is ending with a \n" + ... " not at the end, like always" ... ) """ if first_line.endswith(r"\n"): @@ -307,14 +319,10 @@ def nodefault_used_not_only_for_typing(file_obj: IO[str]) -> Iterable[tuple[int, while nodes: in_annotation, node = nodes.pop() if not in_annotation and ( - ( - isinstance(node, ast.Name) # Case `NoDefault` - and node.id == "NoDefault" - ) - or ( - isinstance(node, ast.Attribute) # Cases e.g. `lib.NoDefault` - and node.attr == "NoDefault" - ) + (isinstance(node, ast.Name) # Case `NoDefault` + and node.id == "NoDefault") + or (isinstance(node, ast.Attribute) # Cases e.g. `lib.NoDefault` + and node.attr == "NoDefault") ): yield (node.lineno, "NoDefault is used not only for typing") From aff9b7b0e06ea4faf59b748765960cf489409098 Mon Sep 17 00:00:00 2001 From: Marat Kopytjuk Date: Fri, 7 Mar 2025 09:09:11 +0100 Subject: [PATCH 07/12] Revert other changes --- pandas/tests/frame/test_query_eval.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 9b4be66f61914..375b9b00a4988 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -160,13 +160,21 @@ def test_query_empty_string(self): df.query("") def test_query_duplicate_column_name(self, engine, parser): - df = DataFrame({"A": range(3), "B": range(3), "C": range(3)}).rename( - columns={"B": "A"} - ) + df = DataFrame( + { + "A": range(3), + "B": range(3), + "C": range(3) + } + ).rename(columns={"B": "A"}) - res = df.query("C == 1", engine=engine, parser=parser) + res = df.query('C == 1', engine=engine, parser=parser) - expect = DataFrame([[1, 1, 1]], columns=["A", "A", "C"], index=[1]) + expect = DataFrame( + [[1, 1, 1]], + columns=["A", "A", "C"], + index=[1] + ) tm.assert_frame_equal(res, expect) @@ -1132,7 +1140,9 @@ def test_query_with_nested_special_character(self, parser, engine): [">=", operator.ge], ], ) - def test_query_lex_compare_strings(self, parser, engine, op, func): + def test_query_lex_compare_strings( + self, parser, engine, op, func + ): a = Series(np.random.default_rng(2).choice(list("abcde"), 20)) b = Series(np.arange(a.size)) df = DataFrame({"X": a, "Y": b}) @@ -1396,7 +1406,7 @@ def test_expr_with_column_name_with_backtick_and_hash(self): def test_expr_with_column_name_with_backtick(self): # GH 59285 df = DataFrame({"a`b": (1, 2, 3), "ab": (4, 5, 6)}) - result = df.query("`a``b` < 2") + result = df.query("`a``b` < 2") # noqa # Note: Formatting checks may wrongly consider the above ``inline code``. expected = df[df["a`b"] < 2] tm.assert_frame_equal(result, expected) From 4dc160b3b690a6c1513a76d682994549adedbed3 Mon Sep 17 00:00:00 2001 From: Marat Kopytjuk Date: Fri, 7 Mar 2025 09:10:21 +0100 Subject: [PATCH 08/12] Adapt typing --- pandas/core/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 96697fb28c525..0bbd0fc4e0d09 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -4321,7 +4321,7 @@ def unstack( def map( self, arg: Callable | Mapping | Series, - na_action: Literal["ignore"] | None = None, + na_action: Literal["ignore", "raise"] | None = None, **kwargs, ) -> Series: """ From 75b2b9c52ed5e79400b240f263d83dc4cea428ed Mon Sep 17 00:00:00 2001 From: Marat Kopytjuk Date: Fri, 7 Mar 2025 09:16:21 +0100 Subject: [PATCH 09/12] Update error message --- pandas/core/algorithms.py | 3 +-- pandas/tests/series/methods/test_map.py | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 5e60a4aa53c69..2f920aa86531d 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1692,8 +1692,7 @@ def map_array( if na_action == "raise" and (indexer == -1).any(): raise ValueError( - "Provided mapping is not sufficient to cover" - "all values in the input array!" + "At least one value is not covered in the mapping!" ) new_values = take_nd(mapper._values, indexer) diff --git a/pandas/tests/series/methods/test_map.py b/pandas/tests/series/methods/test_map.py index 31f10dcabb9cc..a912517a13e45 100644 --- a/pandas/tests/series/methods/test_map.py +++ b/pandas/tests/series/methods/test_map.py @@ -327,7 +327,7 @@ def test_map_missing_key(na_action): mapping = {1: "a", 2: "b", 3: "c"} if na_action == "raise": - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="not covered"): s.map(mapping, na_action=na_action) else: expected = Series(["a", "b", np.nan]) @@ -418,7 +418,7 @@ class DictWithoutMissing(dict): dictionary = DictWithoutMissing({3: "three"}) if na_action == "raise": - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="not covered"): _ = s.map(dictionary, na_action=na_action) else: result = s.map(dictionary, na_action=na_action) @@ -433,7 +433,7 @@ def test_map_abc_mapping(non_dict_mapping_subclass, na_action): not_a_dictionary = non_dict_mapping_subclass({3: "three"}) if na_action == "raise": - with pytest.raises(ValueError): + with pytest.raises(ValueError, match="not covered"): _ = s.map(not_a_dictionary, na_action=na_action) else: result = s.map(not_a_dictionary, na_action=na_action) From a2b8cff421b2e69aae47db2cdc411b211e1a97dd Mon Sep 17 00:00:00 2001 From: Marat Kopytjuk Date: Fri, 7 Mar 2025 09:17:19 +0100 Subject: [PATCH 10/12] fix typo --- pandas/core/algorithms.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 2f920aa86531d..229eb70b26df5 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1640,7 +1640,7 @@ def map_array( If 'ignore', propagate NA values, without passing them to the mapping correspondence. If 'raise', an error is raised when the array contains non-NA values which do not exist as keys in the mapping - correspondance (does not apply to function & dict-like mappers with + correspondence (does not apply to function & dict-like mappers with a '__missing__' attribute). Returns From 0903d150c1b14b9b5880abe8197700aa9e3d9537 Mon Sep 17 00:00:00 2001 From: Marat Kopytjuk Date: Fri, 7 Mar 2025 09:25:07 +0100 Subject: [PATCH 11/12] Run ruff --- pandas/core/algorithms.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 229eb70b26df5..c14c504eb91fb 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1691,9 +1691,7 @@ def map_array( indexer = mapper.index.get_indexer(arr) if na_action == "raise" and (indexer == -1).any(): - raise ValueError( - "At least one value is not covered in the mapping!" - ) + raise ValueError("At least one value is not covered in the mapping!") new_values = take_nd(mapper._values, indexer) From 16545e370cde411342a3296a11b05353f656db50 Mon Sep 17 00:00:00 2001 From: Marat Kopytjuk Date: Fri, 7 Mar 2025 09:50:26 +0100 Subject: [PATCH 12/12] Update na_action for dataframe map --- pandas/conftest.py | 10 +++++++++- pandas/tests/frame/methods/test_map.py | 6 +++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index f537b04b5a80a..8fe4afcf4fab0 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -405,7 +405,15 @@ def nselect_method(request): @pytest.fixture(params=[None, "ignore", "raise"]) def na_action(request): """ - Fixture for 'na_action' argument in map. + Fixture for 'na_action' argument in Series.map. + """ + return request.param + + +@pytest.fixture(params=[None, "ignore"]) +def na_action_frame(request): + """ + Fixture for 'na_action' argument in DataFrame.map. """ return request.param diff --git a/pandas/tests/frame/methods/test_map.py b/pandas/tests/frame/methods/test_map.py index 9850de14b2092..d3fd1b6216c7a 100644 --- a/pandas/tests/frame/methods/test_map.py +++ b/pandas/tests/frame/methods/test_map.py @@ -33,7 +33,7 @@ def test_map_float_object_conversion(val): assert result == object -def test_map_keeps_dtype(na_action): +def test_map_keeps_dtype(na_action_frame): # GH52219 arr = Series(["a", np.nan, "b"]) sparse_arr = arr.astype(pd.SparseDtype(object)) @@ -42,7 +42,7 @@ def test_map_keeps_dtype(na_action): def func(x): return str.upper(x) if not pd.isna(x) else x - result = df.map(func, na_action=na_action) + result = df.map(func, na_action=na_action_frame) expected_sparse = pd.array(["A", np.nan, "B"], dtype=pd.SparseDtype(object)) expected_arr = expected_sparse.astype(object) @@ -50,7 +50,7 @@ def func(x): tm.assert_frame_equal(result, expected) - result_empty = df.iloc[:0, :].map(func, na_action=na_action) + result_empty = df.iloc[:0, :].map(func, na_action=na_action_frame) expected_empty = expected.iloc[:0, :] tm.assert_frame_equal(result_empty, expected_empty)