From 866d94f610d5fa5b2576dbb7dd6a41f466d6d7e1 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 28 Aug 2024 13:40:21 -0700
Subject: [PATCH 1/4] PERF: CategoricalDtype.update_dtype

---
 doc/source/whatsnew/v3.0.0.rst | 1 +
 pandas/core/dtypes/dtypes.py   | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index da0d85b7bb529..7b4229b059d76 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -527,6 +527,7 @@ Performance improvements
 - Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`, :issue:`57752`)
 - Performance improvement in :func:`merge` if hash-join can be used (:issue:`57970`)
 - Performance improvement in :meth:`to_hdf` avoid unnecessary reopenings of the HDF5 file to speedup data addition to files with a very large number of groups . (:issue:`58248`)
+- Performance improvement in :meth:`CategoricalDtype.update_dtype` when ``dtype`` is a :class:`CategoricalDtype` with non ``None`` categories and ordered (:issue:`?`)
 - Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`)
 - Performance improvement in indexing operations for string dtypes (:issue:`56997`)
 - Performance improvement in unary methods on a :class:`RangeIndex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57825`)
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index c0587d36bcb5a..536f98e14930c 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -611,6 +611,8 @@ def update_dtype(self, dtype: str_type | CategoricalDtype) -> CategoricalDtype:
             dtype = cast(CategoricalDtype, dtype)
 
         # update categories/ordered unless they've been explicitly passed as None
+        if dtype.categories is not None and dtype.ordered is not None:
+            return dtype
         new_categories = (
             dtype.categories if dtype.categories is not None else self.categories
         )

From f98f17e6234c2424161a63e242695f8a589eddc0 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 28 Aug 2024 13:48:44 -0700
Subject: [PATCH 2/4] Add whatsnew number add comment

---
 doc/source/whatsnew/v3.0.0.rst | 2 +-
 pandas/core/dtypes/dtypes.py   | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
index 7b4229b059d76..865abaeb9d130 100644
--- a/doc/source/whatsnew/v3.0.0.rst
+++ b/doc/source/whatsnew/v3.0.0.rst
@@ -526,8 +526,8 @@ Performance improvements
 - Performance improvement in :meth:`RangeIndex.reindex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57647`, :issue:`57752`)
 - Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`, :issue:`57752`)
 - Performance improvement in :func:`merge` if hash-join can be used (:issue:`57970`)
+- Performance improvement in :meth:`CategoricalDtype.update_dtype` when ``dtype`` is a :class:`CategoricalDtype` with non ``None`` categories and ordered (:issue:`59647`)
 - Performance improvement in :meth:`to_hdf` avoid unnecessary reopenings of the HDF5 file to speedup data addition to files with a very large number of groups . (:issue:`58248`)
-- Performance improvement in :meth:`CategoricalDtype.update_dtype` when ``dtype`` is a :class:`CategoricalDtype` with non ``None`` categories and ordered (:issue:`?`)
 - Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`)
 - Performance improvement in indexing operations for string dtypes (:issue:`56997`)
 - Performance improvement in unary methods on a :class:`RangeIndex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57825`)
diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index 536f98e14930c..339791dde05f4 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -612,6 +612,7 @@ def update_dtype(self, dtype: str_type | CategoricalDtype) -> CategoricalDtype:
 
         # update categories/ordered unless they've been explicitly passed as None
         if dtype.categories is not None and dtype.ordered is not None:
+            # Avoid re-validation in CategoricalDtype constructor
             return dtype
         new_categories = (
             dtype.categories if dtype.categories is not None else self.categories

From 4302ea015ecd6fcb67a188dfd3ab0a4a232024f2 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 28 Aug 2024 14:21:32 -0700
Subject: [PATCH 3/4] Fix unit test

---
 pandas/tests/dtypes/test_dtypes.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py
index 7c7da41124b83..4c65e1de76a7f 100644
--- a/pandas/tests/dtypes/test_dtypes.py
+++ b/pandas/tests/dtypes/test_dtypes.py
@@ -222,7 +222,9 @@ def test_repr_range_categories(self):
 
     def test_update_dtype(self):
         # GH 27338
-        result = CategoricalDtype(["a"]).update_dtype(Categorical(["b"], ordered=True))
+        result = CategoricalDtype(["a"]).update_dtype(
+            CategoricalDtype(["b"], ordered=True)
+        )
         expected = CategoricalDtype(["b"], ordered=True)
         assert result == expected
 

From a119f6ede4dcdca8c2c6b512eda00dbf6ed91476 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 30 Aug 2024 11:16:13 -0700
Subject: [PATCH 4/4] short circut only for the dtype

---
 pandas/core/dtypes/dtypes.py       | 6 +++++-
 pandas/tests/dtypes/test_dtypes.py | 4 +---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py
index 339791dde05f4..6e1e3dcb9cb3f 100644
--- a/pandas/core/dtypes/dtypes.py
+++ b/pandas/core/dtypes/dtypes.py
@@ -611,7 +611,11 @@ def update_dtype(self, dtype: str_type | CategoricalDtype) -> CategoricalDtype:
             dtype = cast(CategoricalDtype, dtype)
 
         # update categories/ordered unless they've been explicitly passed as None
-        if dtype.categories is not None and dtype.ordered is not None:
+        if (
+            isinstance(dtype, CategoricalDtype)
+            and dtype.categories is not None
+            and dtype.ordered is not None
+        ):
             # Avoid re-validation in CategoricalDtype constructor
             return dtype
         new_categories = (
diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py
index 4c65e1de76a7f..7c7da41124b83 100644
--- a/pandas/tests/dtypes/test_dtypes.py
+++ b/pandas/tests/dtypes/test_dtypes.py
@@ -222,9 +222,7 @@ def test_repr_range_categories(self):
 
     def test_update_dtype(self):
         # GH 27338
-        result = CategoricalDtype(["a"]).update_dtype(
-            CategoricalDtype(["b"], ordered=True)
-        )
+        result = CategoricalDtype(["a"]).update_dtype(Categorical(["b"], ordered=True))
         expected = CategoricalDtype(["b"], ordered=True)
         assert result == expected