From d24344376934b097d61a3c4f309c613c986a5edf Mon Sep 17 00:00:00 2001 From: Linus <95619282+linus-md@users.noreply.github.com> Date: Fri, 8 Dec 2023 11:14:31 +0100 Subject: [PATCH 1/8] Improve error message and add test --- pandas/core/indexes/base.py | 2 +- pandas/tests/indexes/base_class/test_indexing.py | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index ac4d2976593a2..9ba01606a254f 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3769,7 +3769,7 @@ def get_loc(self, key): and any(isinstance(x, slice) for x in casted_key) ): raise InvalidIndexError(key) - raise KeyError(key) from err + raise KeyError(f"{key} not found. Non-sorted index: get_loc() requires a sorted index.") from err except TypeError: # If we have a listlike key, _check_indexing_error will raise # InvalidIndexError. Otherwise we fall through and re-raise diff --git a/pandas/tests/indexes/base_class/test_indexing.py b/pandas/tests/indexes/base_class/test_indexing.py index 2988fa7d1baa1..07b8bcc142343 100644 --- a/pandas/tests/indexes/base_class/test_indexing.py +++ b/pandas/tests/indexes/base_class/test_indexing.py @@ -95,6 +95,15 @@ def test_get_loc_nan_object_dtype_nonmonotonic_nonunique(self): with pytest.raises(KeyError, match="NaT"): idx.get_loc(NaT) + def test_get_loc_non_sorted_index_error_message(self): + # Create an Index with a non-sorted order + idx = Index([3, 1, 4, 1, 5, 9]) + + # Attempt to get the location of a key not present in the non-sorted index + key = 1 + with pytest.raises(KeyError, match="not found. Non-sorted index: get_loc() requires a sorted index."): + idx.get_loc(key) + def test_getitem_boolean_ea_indexer(): # GH#45806 From d61eae3ae50cbe676dd384d87f3cee12c5e0cb26 Mon Sep 17 00:00:00 2001 From: Linus <95619282+linus-md@users.noreply.github.com> Date: Fri, 8 Dec 2023 11:18:09 +0100 Subject: [PATCH 2/8] Remove empty line --- pandas/tests/indexes/base_class/test_indexing.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/indexes/base_class/test_indexing.py b/pandas/tests/indexes/base_class/test_indexing.py index 07b8bcc142343..4ddb0ab457959 100644 --- a/pandas/tests/indexes/base_class/test_indexing.py +++ b/pandas/tests/indexes/base_class/test_indexing.py @@ -104,7 +104,6 @@ def test_get_loc_non_sorted_index_error_message(self): with pytest.raises(KeyError, match="not found. Non-sorted index: get_loc() requires a sorted index."): idx.get_loc(key) - def test_getitem_boolean_ea_indexer(): # GH#45806 ser = pd.Series([True, False, pd.NA], dtype="boolean") From ec4438822c23c4f56b05f6728ad18776b4b4cb95 Mon Sep 17 00:00:00 2001 From: Linus <95619282+linus-md@users.noreply.github.com> Date: Fri, 8 Dec 2023 11:22:21 +0100 Subject: [PATCH 3/8] update --- pandas/core/indexes/base.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 9ba01606a254f..1b653e2574855 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3769,7 +3769,9 @@ def get_loc(self, key): and any(isinstance(x, slice) for x in casted_key) ): raise InvalidIndexError(key) - raise KeyError(f"{key} not found. Non-sorted index: get_loc() requires a sorted index.") from err + if not self.is_monotonic_increasing: + raise KeyError(f"{key} not found. Non-sorted index: get_loc() requires a sorted index.") from err + raise KeyError(key) from err except TypeError: # If we have a listlike key, _check_indexing_error will raise # InvalidIndexError. Otherwise we fall through and re-raise From 46d19a605e6f0e13bc89a0a8d843ae221ae1666d Mon Sep 17 00:00:00 2001 From: Linus <95619282+linus-md@users.noreply.github.com> Date: Fri, 8 Dec 2023 13:08:27 +0100 Subject: [PATCH 4/8] Fix line length --- pandas/core/indexes/base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 1b653e2574855..df42095558509 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3770,7 +3770,8 @@ def get_loc(self, key): ): raise InvalidIndexError(key) if not self.is_monotonic_increasing: - raise KeyError(f"{key} not found. Non-sorted index: get_loc() requires a sorted index.") from err + raise KeyError(f"{key} not found. Non-sorted index:" \ + "get_loc() requires a sorted index.") from err raise KeyError(key) from err except TypeError: # If we have a listlike key, _check_indexing_error will raise From d65169292308b1ae8da281e7140477d964a22ae1 Mon Sep 17 00:00:00 2001 From: Linus <95619282+linus-md@users.noreply.github.com> Date: Fri, 8 Dec 2023 17:29:00 +0100 Subject: [PATCH 5/8] Update test_indexing.py --- .../tests/indexes/base_class/test_indexing.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/pandas/tests/indexes/base_class/test_indexing.py b/pandas/tests/indexes/base_class/test_indexing.py index 4ddb0ab457959..b5479545cd3e2 100644 --- a/pandas/tests/indexes/base_class/test_indexing.py +++ b/pandas/tests/indexes/base_class/test_indexing.py @@ -96,13 +96,19 @@ def test_get_loc_nan_object_dtype_nonmonotonic_nonunique(self): idx.get_loc(NaT) def test_get_loc_non_sorted_index_error_message(self): - # Create an Index with a non-sorted order - idx = Index([3, 1, 4, 1, 5, 9]) + # this calls get_loc on a non-sorted index + df = pd.DataFrame( + np.reshape(range(15), (5, 3)), + index=[7, 1, 5, 9, 3], + columns="A B C".split(), + ) + key = 4 + with pytest.raises( + KeyError, + match="not found. Non-sorted index: get_loc() requires a sorted index.", + ): + df.loc[:key] - # Attempt to get the location of a key not present in the non-sorted index - key = 1 - with pytest.raises(KeyError, match="not found. Non-sorted index: get_loc() requires a sorted index."): - idx.get_loc(key) def test_getitem_boolean_ea_indexer(): # GH#45806 From d7cb3500d7af6083fe49c397ffdf231c807bfd8c Mon Sep 17 00:00:00 2001 From: Linus <95619282+linus-md@users.noreply.github.com> Date: Mon, 11 Dec 2023 11:31:44 +0100 Subject: [PATCH 6/8] Add spaces --- pandas/io/excel/_base.py | 55 ++++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 27 deletions(-) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 6d66830ab1dfd..2884294377ec9 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -86,7 +86,7 @@ ) _read_excel_doc = ( """ -Read an Excel file into a pandas DataFrame. +Read an Excel file into a ``pandas`` ``DataFrame``. Supports `xls`, `xlsx`, `xlsm`, `xlsb`, `odf`, `ods` and `odt` file extensions read from a local filesystem or URL. Supports an option to read @@ -112,7 +112,7 @@ Strings are used for sheet names. Integers are used in zero-indexed sheet positions (chart sheets do not count as a sheet position). Lists of strings/integers are used to request multiple sheets. - Specify None to get all worksheets. + Specify ``None`` to get all worksheets. Available cases: @@ -121,7 +121,7 @@ * ``"Sheet1"``: Load sheet with name "Sheet1" * ``[0, 1, "Sheet5"]``: Load first, second and sheet named "Sheet5" as a dict of `DataFrame` - * None: All worksheets. + * ``None``: All worksheets. header : int, list of int, default 0 Row (0-indexed) to use for the column labels of the parsed @@ -155,21 +155,21 @@ Returns a subset of the columns according to behavior above. dtype : Type name or dict of column -> type, default None Data type for data or columns. E.g. {{'a': np.float64, 'b': np.int32}} - Use `object` to preserve data as stored in Excel and not interpret dtype, - which will necessarily result in `object` dtype. + Use ``object`` to preserve data as stored in Excel and not interpret dtype, + which will necessarily result in ``object`` dtype. If converters are specified, they will be applied INSTEAD of dtype conversion. - If you use `None`, it will infer the dtype of each column based on the data. + If you use ``None``, it will infer the dtype of each column based on the data. engine : str, default None If io is not a buffer or path, this must be set to identify io. Supported engines: "xlrd", "openpyxl", "odf", "pyxlsb", "calamine". Engine compatibility : - - "xlrd" supports old-style Excel files (.xls). - - "openpyxl" supports newer Excel file formats. - - "odf" supports OpenDocument file formats (.odf, .ods, .odt). - - "pyxlsb" supports Binary Excel files. - - "calamine" supports Excel (.xls, .xlsx, .xlsm, .xlsb) + - ``xlr`` supports old-style Excel files (.xls). + - ``openpyxl`` supports newer Excel file formats. + - ``odf`` supports OpenDocument file formats (.odf, .ods, .odt). + - ``pyxlsb`` supports Binary Excel files. + - ``calamine`` supports Excel (.xls, .xlsx, .xlsm, .xlsb) and OpenDocument (.ods) file formats. .. versionchanged:: 1.2.0 @@ -215,34 +215,34 @@ + """'. keep_default_na : bool, default True Whether or not to include the default NaN values when parsing the data. - Depending on whether `na_values` is passed in, the behavior is as follows: + Depending on whether ``na_values`` is passed in, the behavior is as follows: - * If `keep_default_na` is True, and `na_values` are specified, `na_values` - is appended to the default NaN values used for parsing. - * If `keep_default_na` is True, and `na_values` are not specified, only + * If ``keep_default_na`` is True, and ``na_values`` are specified, + ``na_values`` is appended to the default NaN values used for parsing. + * If ``keep_default_na`` is True, and ``na_values`` are not specified, only the default NaN values are used for parsing. - * If `keep_default_na` is False, and `na_values` are specified, only - the NaN values specified `na_values` are used for parsing. - * If `keep_default_na` is False, and `na_values` are not specified, no + * If ``keep_default_na`` is False, and ``na_values`` are specified, only + the NaN values specified ``na_values`` are used for parsing. + * If ``keep_default_na`` is False, and ``na_values`` are not specified, no strings will be parsed as NaN. - Note that if `na_filter` is passed in as False, the `keep_default_na` and - `na_values` parameters will be ignored. + Note that if `na_filter` is passed in as False, the ``keep_default_na`` and + ``na_values`` parameters will be ignored. na_filter : bool, default True Detect missing value markers (empty strings and the value of na_values). In - data without any NAs, passing na_filter=False can improve the performance - of reading a large file. + data without any NAs, passing ``na_filter=False`` can improve the + performance of reading a large file. verbose : bool, default False Indicate number of NA values placed in non-numeric columns. parse_dates : bool, list-like, or dict, default False The behavior is as follows: - * bool. If True -> try parsing the index. - * list of int or names. e.g. If [1, 2, 3] -> try parsing columns 1, 2, 3 + * ``bool``. If True -> try parsing the index. + * ``list`` of int or names. e.g. If [1, 2, 3] -> try parsing columns 1, 2, 3 each as a separate date column. - * list of lists. e.g. If [[1, 3]] -> combine columns 1 and 3 and parse as + * ``list`` of lists. e.g. If [[1, 3]] -> combine columns 1 and 3 and parse as a single date column. - * dict, e.g. {{'foo' : [1, 3]}} -> parse columns 1, 3 as date and call + * ``dict``, e.g. {{'foo' : [1, 3]}} -> parse columns 1, 3 as date and call result 'foo' If a column or index contains an unparsable date, the entire column or @@ -372,7 +372,8 @@ 1 NaN 2 2 #Comment 3 -Comment lines in the excel input file can be skipped using the `comment` kwarg +Comment lines in the excel input file can be skipped using the +``comment`` kwarg. >>> pd.read_excel('tmp.xlsx', index_col=0, comment='#') # doctest: +SKIP Name Value From b7d8da698145627c79e4374d2ce7c38845f5c2a8 Mon Sep 17 00:00:00 2001 From: Linus Sommer <95619282+linus-md@users.noreply.github.com> Date: Mon, 11 Dec 2023 11:36:11 +0100 Subject: [PATCH 7/8] Remove changes in test_indexing.py --- pandas/tests/indexes/base_class/test_indexing.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/pandas/tests/indexes/base_class/test_indexing.py b/pandas/tests/indexes/base_class/test_indexing.py index b5479545cd3e2..2988fa7d1baa1 100644 --- a/pandas/tests/indexes/base_class/test_indexing.py +++ b/pandas/tests/indexes/base_class/test_indexing.py @@ -95,20 +95,6 @@ def test_get_loc_nan_object_dtype_nonmonotonic_nonunique(self): with pytest.raises(KeyError, match="NaT"): idx.get_loc(NaT) - def test_get_loc_non_sorted_index_error_message(self): - # this calls get_loc on a non-sorted index - df = pd.DataFrame( - np.reshape(range(15), (5, 3)), - index=[7, 1, 5, 9, 3], - columns="A B C".split(), - ) - key = 4 - with pytest.raises( - KeyError, - match="not found. Non-sorted index: get_loc() requires a sorted index.", - ): - df.loc[:key] - def test_getitem_boolean_ea_indexer(): # GH#45806 From 108215f2adb0add27b14416d6d11de458e50c122 Mon Sep 17 00:00:00 2001 From: Linus Sommer <95619282+linus-md@users.noreply.github.com> Date: Mon, 11 Dec 2023 11:37:36 +0100 Subject: [PATCH 8/8] Update base.py --- pandas/core/indexes/base.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index df42095558509..ac4d2976593a2 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3769,9 +3769,6 @@ def get_loc(self, key): and any(isinstance(x, slice) for x in casted_key) ): raise InvalidIndexError(key) - if not self.is_monotonic_increasing: - raise KeyError(f"{key} not found. Non-sorted index:" \ - "get_loc() requires a sorted index.") from err raise KeyError(key) from err except TypeError: # If we have a listlike key, _check_indexing_error will raise