From 7ae8ebb3daa6671a63eb18ea05175c6c04226569 Mon Sep 17 00:00:00 2001 From: Shashank Shet Date: Fri, 14 Jan 2022 22:07:10 +0530 Subject: [PATCH 01/14] BUG: CategoricalIndex.get_indexer issue with NaNs (#45361) categorical_index_obj.get_indexer(target) yields incorrect results when categorical_index_obj contains NaNs, and target does not. The reason for this is that, if target contains elements which do not match any category in categorical_index_obj, they are replaced by NaNs. In such a situation, if categorical_index_obj also has NaNs, then the corresp elements in target are mapped to an index which is not -1 eg: ci = pd.CategoricalIndex([1, 2, np.nan, 3]) other = pd.Index([2, 3, 4]) ci.get_indexer(other) In the implementation of get_indexer, other becomes [2, 3, NaN] which is mapped to index 2, in ci --- pandas/core/indexes/base.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 95d9ed7adc360..413ac200c8acf 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3696,6 +3696,10 @@ def get_indexer( tolerance=None, ) -> npt.NDArray[np.intp]: method = missing.clean_reindex_fill_method(method) + # After _maybe_cast_listlike_indexer, target elements which do not + # belong to some category are changed to NaNs + # Mask to track actual NaN values compared to inserted NaN values + target_nans = np.isnan(target) target = self._maybe_cast_listlike_indexer(target) self._check_indexing_method(method, limit, tolerance) @@ -3720,7 +3724,8 @@ def get_indexer( if self.hasnans and target.hasnans: loc = self.get_loc(np.nan) mask = target.isna() - indexer[mask] = loc + indexer[target_nans] = loc + indexer[mask & ~target_nans] = -1 return indexer if is_categorical_dtype(target.dtype): From 8b3a671ca54e18c30a12090dc0a67160c7fb7e66 Mon Sep 17 00:00:00 2001 From: Shashank Shet Date: Fri, 14 Jan 2022 23:35:59 +0530 Subject: [PATCH 02/14] BUG: CategoricalIndex.get_indexer issue with NaNs (#45361) categorical_index_obj.get_indexer(target) yields incorrect results when categorical_index_obj contains NaNs, and target does not. The reason for this is that, if target contains elements which do not match any category in categorical_index_obj, they are replaced by NaNs. In such a situation, if categorical_index_obj also has NaNs, then the corresp elements in target are mapped to an index which is not -1 eg: ci = pd.CategoricalIndex([1, 2, np.nan, 3]) other = pd.Index([2, 3, 4]) ci.get_indexer(other) In the implementation of get_indexer, other becomes [2, 3, NaN] which is mapped to index 2, in ci Update: np.isnan(target) was breaking the existing codebase. As a solution, I have enclosed this line in a try-except block --- pandas/core/indexes/base.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 413ac200c8acf..a71ab7ae6df6a 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3699,7 +3699,10 @@ def get_indexer( # After _maybe_cast_listlike_indexer, target elements which do not # belong to some category are changed to NaNs # Mask to track actual NaN values compared to inserted NaN values - target_nans = np.isnan(target) + try: + target_nans = np.isnan(target) + except TypeError as e: + target_nans = False target = self._maybe_cast_listlike_indexer(target) self._check_indexing_method(method, limit, tolerance) From 6133f48cfb1de0f7ca3c37e8959a4473c584895e Mon Sep 17 00:00:00 2001 From: Shashank Shet Date: Fri, 21 Jan 2022 12:32:33 +0530 Subject: [PATCH 03/14] BUG: CategoricalIndex.get_indexer issue with NaNs (#45361) Update: Replaced `np.isnan` with `pandas.core.dtypes.missing.isna` --- pandas/core/indexes/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index a71ab7ae6df6a..de8fe07f77859 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3700,8 +3700,8 @@ def get_indexer( # belong to some category are changed to NaNs # Mask to track actual NaN values compared to inserted NaN values try: - target_nans = np.isnan(target) - except TypeError as e: + target_nans = isna(target) + except NotImplementedError: target_nans = False target = self._maybe_cast_listlike_indexer(target) From 0b75419e0c27393276e04a09176b9ca493164e66 Mon Sep 17 00:00:00 2001 From: Shashank Shet Date: Sun, 23 Jan 2022 09:31:25 +0530 Subject: [PATCH 04/14] Added a testcase to verify output behaviour --- pandas/tests/indexes/categorical/test_indexing.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/indexes/categorical/test_indexing.py b/pandas/tests/indexes/categorical/test_indexing.py index 2297f8cf87209..7a5d7a1c751fa 100644 --- a/pandas/tests/indexes/categorical/test_indexing.py +++ b/pandas/tests/indexes/categorical/test_indexing.py @@ -298,6 +298,13 @@ def test_get_indexer_same_categories_different_order(self): expected = np.array([1, 1], dtype="intp") tm.assert_numpy_array_equal(result, expected) + def test_get_indexer_nans_in_index_and_target(self): + # GH 45361 + ci = CategoricalIndex([1, 2, np.nan, 3]) + other = [2, 3, 4, np.nan] + res = ci.get_indexer(other) + expected = np.array([1, 3, -1, 2], dtype=np.intp) + tm.assert_numpy_array_equal(res, expected) class TestWhere: def test_where(self, listlike_box): From 7d051544324446a762a3f8f30a2b75b4b98b269e Mon Sep 17 00:00:00 2001 From: Shashank Shet Date: Sun, 23 Jan 2022 10:12:17 +0530 Subject: [PATCH 05/14] Made pre-commit changes --- pandas/tests/indexes/categorical/test_indexing.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexes/categorical/test_indexing.py b/pandas/tests/indexes/categorical/test_indexing.py index 7a5d7a1c751fa..a267f6a5e8249 100644 --- a/pandas/tests/indexes/categorical/test_indexing.py +++ b/pandas/tests/indexes/categorical/test_indexing.py @@ -304,7 +304,8 @@ def test_get_indexer_nans_in_index_and_target(self): other = [2, 3, 4, np.nan] res = ci.get_indexer(other) expected = np.array([1, 3, -1, 2], dtype=np.intp) - tm.assert_numpy_array_equal(res, expected) + tm.assert_numpy_array_equal(res, expected) + class TestWhere: def test_where(self, listlike_box): From 826bb0f53d20e19992e8b459cd52365cad52624b Mon Sep 17 00:00:00 2001 From: Shashank Shet Date: Mon, 24 Jan 2022 06:50:35 +0530 Subject: [PATCH 06/14] Added a test case without NaNs --- pandas/tests/indexes/categorical/test_indexing.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/pandas/tests/indexes/categorical/test_indexing.py b/pandas/tests/indexes/categorical/test_indexing.py index a267f6a5e8249..fd50260f608ab 100644 --- a/pandas/tests/indexes/categorical/test_indexing.py +++ b/pandas/tests/indexes/categorical/test_indexing.py @@ -301,10 +301,14 @@ def test_get_indexer_same_categories_different_order(self): def test_get_indexer_nans_in_index_and_target(self): # GH 45361 ci = CategoricalIndex([1, 2, np.nan, 3]) - other = [2, 3, 4, np.nan] - res = ci.get_indexer(other) - expected = np.array([1, 3, -1, 2], dtype=np.intp) - tm.assert_numpy_array_equal(res, expected) + other1 = [2, 3, 4, np.nan] + other2 = [1, 4, 2, 3] + res1 = ci.get_indexer(other1) + res2 = ci.get_indexer(other2) + expected1 = np.array([1, 3, -1, 2], dtype=np.intp) + expected2 = np.array([0, -1, 1, 3], dtype=np.intp) + tm.assert_numpy_array_equal(res1, expected1) + tm.assert_numpy_array_equal(res2, expected2) class TestWhere: From 361d40ccc08e4692b448f87eceb629f9c3347105 Mon Sep 17 00:00:00 2001 From: Shashank Shet Date: Mon, 24 Jan 2022 11:35:05 +0530 Subject: [PATCH 07/14] Moved NaN test to avoid unnecessary execution --- pandas/core/indexes/base.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index de8fe07f77859..62777f79926f1 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3696,13 +3696,7 @@ def get_indexer( tolerance=None, ) -> npt.NDArray[np.intp]: method = missing.clean_reindex_fill_method(method) - # After _maybe_cast_listlike_indexer, target elements which do not - # belong to some category are changed to NaNs - # Mask to track actual NaN values compared to inserted NaN values - try: - target_nans = isna(target) - except NotImplementedError: - target_nans = False + orig_target = target target = self._maybe_cast_listlike_indexer(target) self._check_indexing_method(method, limit, tolerance) @@ -3725,6 +3719,13 @@ def get_indexer( indexer = self._engine.get_indexer(target.codes) if self.hasnans and target.hasnans: + # After _maybe_cast_listlike_indexer, target elements which do not + # belong to some category are changed to NaNs + # Mask to track actual NaN values compared to inserted NaN values + try: + target_nans = isna(orig_target) + except NotImplementedError: + target_nans = False loc = self.get_loc(np.nan) mask = target.isna() indexer[target_nans] = loc From b6fcd444b069d15ef6f1b670a62798ba2a8e050a Mon Sep 17 00:00:00 2001 From: Shashank Shet Date: Mon, 24 Jan 2022 21:55:15 +0530 Subject: [PATCH 08/14] Re-aligned test cases --- pandas/tests/indexes/categorical/test_indexing.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/indexes/categorical/test_indexing.py b/pandas/tests/indexes/categorical/test_indexing.py index fd50260f608ab..588486452fc20 100644 --- a/pandas/tests/indexes/categorical/test_indexing.py +++ b/pandas/tests/indexes/categorical/test_indexing.py @@ -302,12 +302,12 @@ def test_get_indexer_nans_in_index_and_target(self): # GH 45361 ci = CategoricalIndex([1, 2, np.nan, 3]) other1 = [2, 3, 4, np.nan] - other2 = [1, 4, 2, 3] res1 = ci.get_indexer(other1) - res2 = ci.get_indexer(other2) expected1 = np.array([1, 3, -1, 2], dtype=np.intp) - expected2 = np.array([0, -1, 1, 3], dtype=np.intp) tm.assert_numpy_array_equal(res1, expected1) + other2 = [1, 4, 2, 3] + res2 = ci.get_indexer(other2) + expected2 = np.array([0, -1, 1, 3], dtype=np.intp) tm.assert_numpy_array_equal(res2, expected2) From 6628e539f11a2024218b4dc15e4526f26888c70a Mon Sep 17 00:00:00 2001 From: Shashank Shet Date: Tue, 25 Jan 2022 09:16:29 +0530 Subject: [PATCH 09/14] Removed try-except block --- pandas/core/indexes/base.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 62777f79926f1..84f9ea9c408f7 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3722,10 +3722,11 @@ def get_indexer( # After _maybe_cast_listlike_indexer, target elements which do not # belong to some category are changed to NaNs # Mask to track actual NaN values compared to inserted NaN values - try: - target_nans = isna(orig_target) - except NotImplementedError: - target_nans = False + # try: + # target_nans = isna(orig_target) + # except NotImplementedError: + # target_nans = False + target_nans = isna(orig_target) loc = self.get_loc(np.nan) mask = target.isna() indexer[target_nans] = loc From d6d7b0fc278e524b5fa4509422e6d88e802fa578 Mon Sep 17 00:00:00 2001 From: Shashank Shet Date: Wed, 26 Jan 2022 06:50:16 +0530 Subject: [PATCH 10/14] Cleaned up base.py --- pandas/core/indexes/base.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 84f9ea9c408f7..aac3bf3b91cd2 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3722,10 +3722,6 @@ def get_indexer( # After _maybe_cast_listlike_indexer, target elements which do not # belong to some category are changed to NaNs # Mask to track actual NaN values compared to inserted NaN values - # try: - # target_nans = isna(orig_target) - # except NotImplementedError: - # target_nans = False target_nans = isna(orig_target) loc = self.get_loc(np.nan) mask = target.isna() From 53f1ca147defd52ab6f58988cdf08e19c018bb2e Mon Sep 17 00:00:00 2001 From: Shashank Shet Date: Fri, 28 Jan 2022 09:20:04 +0530 Subject: [PATCH 11/14] Add GH#45361 comment to code --- pandas/core/indexes/base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index aac3bf3b91cd2..1ac61675afc28 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3722,6 +3722,7 @@ def get_indexer( # After _maybe_cast_listlike_indexer, target elements which do not # belong to some category are changed to NaNs # Mask to track actual NaN values compared to inserted NaN values + # GH#45361 target_nans = isna(orig_target) loc = self.get_loc(np.nan) mask = target.isna() From 99c9f4e37cebb479aaba8cbb93423b017e5f8dd5 Mon Sep 17 00:00:00 2001 From: Shashank Shet Date: Fri, 28 Jan 2022 09:50:12 +0530 Subject: [PATCH 12/14] Added whatsnew entry --- doc/source/whatsnew/v1.5.0.rst | 90 +++++++++++++++++++++++++++++----- 1 file changed, 78 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 9a6455d4d012f..941478d96d011 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -20,6 +20,7 @@ Styler ^^^^^^ - New method :meth:`.Styler.to_string` for alternative customisable output methods (:issue:`44502`) + - Various bug fixes, see below. .. _whatsnew_150.enhancements.enhancement2: @@ -30,7 +31,11 @@ enhancement2 Other enhancements ^^^^^^^^^^^^^^^^^^ +- :meth:`MultiIndex.to_frame` now supports the argument ``allow_duplicates`` and raises on duplicate labels if it is missing or False (:issue:`45245`) +- :class:`StringArray` now accepts array-likes containing nan-likes (``None``, ``np.nan``) for the ``values`` parameter in its constructor in addition to strings and :attr:`pandas.NA`. (:issue:`40839`) - Improved the rendering of ``categories`` in :class:`CategoricalIndex` (:issue:`45218`) +- :meth:`to_numeric` now preserves float64 arrays when downcasting would generate values not representable in float32 (:issue:`43693`) +- :meth:`.GroupBy.min` and :meth:`.GroupBy.max` now supports `Numba `_ execution with the ``engine`` keyword (:issue:`45428`) - .. --------------------------------------------------------------------------- @@ -67,9 +72,10 @@ If installed, we now require: +-----------------+-----------------+----------+---------+ | Package | Minimum Version | Required | Changed | +=================+=================+==========+=========+ -| | | X | X | +| mypy (dev) | 0.931 | | X | +-----------------+-----------------+----------+---------+ + For `optional libraries `_ the general recommendation is to use the latest version. The following table lists the lowest version per library that is currently being tested throughout the development of pandas. Optional libraries below the lowest tested version may still work, but are not considered supported. @@ -94,15 +100,66 @@ Other API changes Deprecations ~~~~~~~~~~~~ + +.. _whatsnew_150.deprecations.int_slicing_series: + +In a future version, integer slicing on a :class:`Series` with a :class:`Int64Index` or :class:`RangeIndex` will be treated as *label-based*, not positional. This will make the behavior consistent with other :meth:`Series.__getitem__` and :meth:`Series.__setitem__` behaviors (:issue:`45162`). + +For example: + +.. ipython:: python + + ser = pd.Series([1, 2, 3, 4, 5], index=[2, 3, 5, 7, 11]) + +In the old behavior, ``ser[2:4]`` treats the slice as positional: + +*Old behavior*: + +.. code-block:: ipython + + In [3]: ser[2:4] + Out[3]: + 5 3 + 7 4 + dtype: int64 + +In a future version, this will be treated as label-based: + +*Future behavior*: + +.. code-block:: ipython + + In [4]: ser.loc[2:4] + Out[4]: + 2 1 + 3 2 + dtype: int64 + +To retain the old behavior, use ``series.iloc[i:j]``. To get the future behavior, +use ``series.loc[i:j]``. + +Slicing on a :class:`DataFrame` will not be affected. + +.. _whatsnew_150.deprecations.other: + +Other Deprecations +^^^^^^^^^^^^^^^^^^ - Deprecated the keyword ``line_terminator`` in :meth:`DataFrame.to_csv` and :meth:`Series.to_csv`, use ``lineterminator`` instead; this is for consistency with :func:`read_csv` and the standard library 'csv' module (:issue:`9568`) +- Deprecated behavior of :meth:`SparseArray.astype`, :meth:`Series.astype`, and :meth:`DataFrame.astype` with :class:`SparseDtype` when passing a non-sparse ``dtype``. In a future version, this will cast to that non-sparse dtype instead of wrapping it in a :class:`SparseDtype` (:issue:`34457`) +- Deprecated behavior of :meth:`DatetimeIndex.intersection` and :meth:`DatetimeIndex.symmetric_difference` (``union`` behavior was already deprecated in version 1.3.0) with mixed timezones; in a future version both will be cast to UTC instead of object dtype (:issue:`39328`, :issue:`45357`) +- Deprecated :meth:`DataFrame.iteritems`, :meth:`Series.iteritems`, :meth:`HDFStore.iteritems` in favor of :meth:`DataFrame.items`, :meth:`Series.items`, :meth:`HDFStore.items` (:issue:`45321`) +- Deprecated :meth:`Series.is_monotonic` and :meth:`Index.is_monotonic` in favor of :meth:`Series.is_monotonic_increasing` and :meth:`Index.is_monotonic_increasing` (:issue:`45422`, :issue:`21335`) +- Deprecated the ``__array_wrap__`` method of DataFrame and Series, rely on standard numpy ufuncs instead (:issue:`45451`) - + .. --------------------------------------------------------------------------- .. _whatsnew_150.performance: Performance improvements ~~~~~~~~~~~~~~~~~~~~~~~~ -- +- Performance improvement in :meth:`.GroupBy.transform` for some user-defined DataFrame -> Series functions (:issue:`45387`) +- Performance improvement in :meth:`DataFrame.duplicated` when subset consists of only one column (:issue:`45236`) - .. --------------------------------------------------------------------------- @@ -113,8 +170,8 @@ Bug fixes Categorical ^^^^^^^^^^^ -- -- +- Bug in :meth:`CategoricalIndex.union` when the index's categories are integer-dtype and the index contains ``NaN`` values incorrectly raising instead of casting to ``float64`` (:issue:`45362`) +- Bug in :meth:`Categoricalindex.get_indexer` when index contains ``NaN`` values, resulting in elements that are in target but not present in the index to be mapped to the index of the NaN element, instead of -1 (:issue:`45361`) Datetimelike ^^^^^^^^^^^^ @@ -134,12 +191,15 @@ Timezones Numeric ^^^^^^^ -- +- Bug in operations with array-likes with ``dtype="boolean"`` and :attr:`NA` incorrectly altering the array in-place (:issue:`45421`) - Conversion ^^^^^^^^^^ +- Bug in :meth:`DataFrame.astype` not preserving subclasses (:issue:`40810`) - Bug in constructing a :class:`Series` from a float-containing list or a floating-dtype ndarray-like (e.g. ``dask.Array``) and an integer dtype raising instead of casting like we would with an ``np.ndarray`` (:issue:`40110`) +- Bug in :meth:`Float64Index.astype` to unsigned integer dtype incorrectly casting to ``np.int64`` dtype (:issue:`45309`) +- Bug in :meth:`Series.astype` and :meth:`DataFrame.astype` from floating dtype to unsigned integer dtype failing to raise in the presence of negative values (:issue:`45151`) - Strings @@ -149,14 +209,16 @@ Strings Interval ^^^^^^^^ -- +- Bug in :meth:`IntervalArray.__setitem__` when setting ``np.nan`` into an integer-backed array raising ``ValueError`` instead of ``TypeError`` (:issue:`45484`) - Indexing ^^^^^^^^ +- Bug in :meth:`loc.__getitem__` with a list of keys causing an internal inconsistency that could lead to a disconnect between ``frame.at[x, y]`` vs ``frame[y].loc[x]`` (:issue:`22372`) - Bug in :meth:`DataFrame.iloc` where indexing a single row on a :class:`DataFrame` with a single ExtensionDtype column gave a copy instead of a view on the underlying data (:issue:`45241`) - Bug in :meth:`Series.__setitem__` with a non-integer :class:`Index` when using an integer key to set a value that cannot be set inplace where a ``ValueError`` was raised insead of casting to a common dtype (:issue:`45070`) -- Bug when setting an integer too large for a :class:`Series` dtype failing to coerce to a common type (:issue:`26049`) +- Bug when setting a value too large for a :class:`Series` dtype failing to coerce to a common type (:issue:`26049`, :issue:`32878`) +- Bug in :meth:`Series.__setitem__` where setting :attr:`NA` into a numeric-dtpye :class:`Series` would incorrectly upcast to object-dtype rather than treating the value as ``np.nan`` (:issue:`44199`) - Missing @@ -171,7 +233,9 @@ MultiIndex I/O ^^^ -- +- Bug in :meth:`DataFrame.to_stata` where no error is raised if the :class:`DataFrame` contains ``-np.inf`` (:issue:`45350`) +- Bug in :meth:`DataFrame.info` where a new line at the end of the output is omitted when called on an empty :class:`DataFrame` (:issue:`45494`) +- Bug in :func:`read_parquet` when ``engine="pyarrow"`` which caused partial write to disk when column of unsupported datatype was passed (:issue:`44914`) - Period @@ -181,17 +245,17 @@ Period Plotting ^^^^^^^^ -- +- Bug in :meth:`DataFrame.plot.barh` that prevented labeling the x-axis and ``xlabel`` updating the y-axis label (:issue:`45144`) - Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ -- +- Bug in :meth:`DataFrame.resample` ignoring ``closed="right"`` on :class:`TimedeltaIndex` (:issue:`45414`) - Reshaping ^^^^^^^^^ -- +- Bug in :func:`concat` between a :class:`Series` with integer dtype and another with :class:`CategoricalDtype` with integer categories and containing ``NaN`` values casting to object dtype instead of ``float64`` (:issue:`45359`) - Sparse @@ -206,11 +270,13 @@ ExtensionArray Styler ^^^^^^ -- +- Minor bug when attempting to apply styling functions to an empty DataFrame subset (:issue:`45313`) - Other ^^^^^ +- Bug in :meth:`Series.asof` and :meth:`DataFrame.asof` incorrectly casting bool-dtype results to ``float64`` dtype (:issue:`16063`) +- .. ***DO NOT USE THIS SECTION*** From 61a85d0e7d0acbf10e46a9ead5de942b53bc8df5 Mon Sep 17 00:00:00 2001 From: Shashank Shet Date: Fri, 28 Jan 2022 09:57:42 +0530 Subject: [PATCH 13/14] Resolved merge conflict --- doc/source/whatsnew/v1.5.0.rst | 54 ++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 941478d96d011..6cde760821156 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -88,6 +88,48 @@ Optional libraries below the lowest tested version may still work, but are not c See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more. + +.. _whatsnew_150.read_xml_dtypes: + +read_xml now supports ``dtype``, ``converters``, and ``parse_dates`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Similar to other IO methods, :func:`pandas.read_xml` now supports assigning specific dtypes to columns, +apply converter methods, and parse dates (:issue:`43567`). + +.. ipython:: python + + xml_dates = """ + + + square + 00360 + 4.0 + 2020-01-01 + + + circle + 00360 + + 2021-01-01 + + + triangle + 00180 + 3.0 + 2022-01-01 + + """ + + df = pd.read_xml( + xml_dates, + dtype={'sides': 'Int64'}, + converters={'degrees': str}, + parse_dates=['date'] + ) + df + df.dtypes + .. _whatsnew_150.api_breaking.other: Other API changes @@ -192,6 +234,7 @@ Timezones Numeric ^^^^^^^ - Bug in operations with array-likes with ``dtype="boolean"`` and :attr:`NA` incorrectly altering the array in-place (:issue:`45421`) +- Bug in multiplying a :class:`Series` with ``IntegerDtype`` or ``FloatingDtype`` by an arraylike with ``timedelta64[ns]`` dtype incorrectly raising (:issue:`45622`) - Conversion @@ -200,6 +243,7 @@ Conversion - Bug in constructing a :class:`Series` from a float-containing list or a floating-dtype ndarray-like (e.g. ``dask.Array``) and an integer dtype raising instead of casting like we would with an ``np.ndarray`` (:issue:`40110`) - Bug in :meth:`Float64Index.astype` to unsigned integer dtype incorrectly casting to ``np.int64`` dtype (:issue:`45309`) - Bug in :meth:`Series.astype` and :meth:`DataFrame.astype` from floating dtype to unsigned integer dtype failing to raise in the presence of negative values (:issue:`45151`) +- Bug in :func:`array` with ``FloatingDtype`` and values containing float-castable strings incorrectly raising (:issue:`45424`) - Strings @@ -216,9 +260,15 @@ Indexing ^^^^^^^^ - Bug in :meth:`loc.__getitem__` with a list of keys causing an internal inconsistency that could lead to a disconnect between ``frame.at[x, y]`` vs ``frame[y].loc[x]`` (:issue:`22372`) - Bug in :meth:`DataFrame.iloc` where indexing a single row on a :class:`DataFrame` with a single ExtensionDtype column gave a copy instead of a view on the underlying data (:issue:`45241`) +- Bug in setting a NA value (``None`` or ``np.nan``) into a :class:`Series` with int-based :class:`IntervalDtype` incorrectly casting to object dtype instead of a float-based :class:`IntervalDtype` (:issue:`45568`) - Bug in :meth:`Series.__setitem__` with a non-integer :class:`Index` when using an integer key to set a value that cannot be set inplace where a ``ValueError`` was raised insead of casting to a common dtype (:issue:`45070`) - Bug when setting a value too large for a :class:`Series` dtype failing to coerce to a common type (:issue:`26049`, :issue:`32878`) +- Bug in :meth:`loc.__setitem__` treating ``range`` keys as positional instead of label-based (:issue:`45479`) +- Bug in :meth:`Series.__setitem__` when setting ``boolean`` dtype values containing ``NA`` incorrectly raising instead of casting to ``boolean`` dtype (:issue:`45462`) - Bug in :meth:`Series.__setitem__` where setting :attr:`NA` into a numeric-dtpye :class:`Series` would incorrectly upcast to object-dtype rather than treating the value as ``np.nan`` (:issue:`44199`) +- Bug in :meth:`DataFrame.mask` with ``inplace=True`` and ``ExtensionDtype`` columns incorrectly raising (:issue:`45577`) +- Bug in getting a column from a DataFrame with an object-dtype row index with datetime-like values: the resulting Series now preserves the exact object-dtype Index from the parent DataFrame (:issue:`42950`) +- Bug in indexing on a :class:`DatetimeIndex` with a ``np.str_`` key incorrectly raising (:issue:`45580`) - Missing @@ -246,6 +296,9 @@ Period Plotting ^^^^^^^^ - Bug in :meth:`DataFrame.plot.barh` that prevented labeling the x-axis and ``xlabel`` updating the y-axis label (:issue:`45144`) +- Bug in :meth:`DataFrame.plot.box` that prevented labeling the x-axis (:issue:`45463`) +- Bug in :meth:`DataFrame.boxplot` that prevented passing in ``xlabel`` and ``ylabel`` (:issue:`45463`) +- Bug in :meth:`DataFrame.boxplot` that prevented specifying ``vert=False`` (:issue:`36918`) - Groupby/resample/rolling @@ -256,6 +309,7 @@ Groupby/resample/rolling Reshaping ^^^^^^^^^ - Bug in :func:`concat` between a :class:`Series` with integer dtype and another with :class:`CategoricalDtype` with integer categories and containing ``NaN`` values casting to object dtype instead of ``float64`` (:issue:`45359`) +- Bug in :func:`get_dummies` that selected object and categorical dtypes but not string (:issue:`44965`) - Sparse From 0be77e530d8a3fba994dbb0ce87e0ad6f38123f1 Mon Sep 17 00:00:00 2001 From: Shashank Shet Date: Fri, 28 Jan 2022 10:03:20 +0530 Subject: [PATCH 14/14] Moved whatsnew entry to indexing section --- doc/source/whatsnew/v1.5.0.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 6cde760821156..b316b4ff2d688 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -213,7 +213,7 @@ Bug fixes Categorical ^^^^^^^^^^^ - Bug in :meth:`CategoricalIndex.union` when the index's categories are integer-dtype and the index contains ``NaN`` values incorrectly raising instead of casting to ``float64`` (:issue:`45362`) -- Bug in :meth:`Categoricalindex.get_indexer` when index contains ``NaN`` values, resulting in elements that are in target but not present in the index to be mapped to the index of the NaN element, instead of -1 (:issue:`45361`) +- Datetimelike ^^^^^^^^^^^^ @@ -269,6 +269,7 @@ Indexing - Bug in :meth:`DataFrame.mask` with ``inplace=True`` and ``ExtensionDtype`` columns incorrectly raising (:issue:`45577`) - Bug in getting a column from a DataFrame with an object-dtype row index with datetime-like values: the resulting Series now preserves the exact object-dtype Index from the parent DataFrame (:issue:`42950`) - Bug in indexing on a :class:`DatetimeIndex` with a ``np.str_`` key incorrectly raising (:issue:`45580`) +- Bug in :meth:`CategoricalIndex.get_indexer` when index contains ``NaN`` values, resulting in elements that are in target but not present in the index to be mapped to the index of the NaN element, instead of -1 (:issue:`45361`) - Missing