From b5e5b28c95a98eb7c821de6db7e2b9d4940370e5 Mon Sep 17 00:00:00 2001
From: Rohith295 <rangaraju963@gmail.com>
Date: Tue, 23 Jun 2020 11:42:09 +0200
Subject: [PATCH 1/8] Changed the way we are generating tuple of keys/values to
 increase the performance

---
 pandas/core/series.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/pandas/core/series.py b/pandas/core/series.py
index cab8dd133b579..9497b31231f97 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -354,8 +354,11 @@ def _init_dict(self, data, index=None, dtype=None):
         # Looking for NaN in dict doesn't work ({np.nan : 1}[float('nan')]
         # raises KeyError), so we iterate the entire dict, and align
         if data:
-            keys, values = zip(*data.items())
-            values = list(values)
+            # GH:34717
+            # Using the below way to generate tuple of keys and values increasing the performance by 50%, instead of zip
+            keys = tuple(data.keys())
+            values = tuple([data[key] for key in keys])
+
         elif index is not None:
             # fastpath for Series(data=None). Just use broadcasting a scalar
             # instead of reindexing.

From c6956af484d25ac1ef5dc9358fb7cd2c8794029b Mon Sep 17 00:00:00 2001
From: Rohith295 <rangaraju963@gmail.com>
Date: Tue, 23 Jun 2020 12:03:00 +0200
Subject: [PATCH 2/8] Changed the way of generating the tuple

---
 pandas/core/series.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/pandas/core/series.py b/pandas/core/series.py
index 9497b31231f97..8a2dca8665576 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -355,9 +355,12 @@ def _init_dict(self, data, index=None, dtype=None):
         # raises KeyError), so we iterate the entire dict, and align
         if data:
             # GH:34717
-            # Using the below way to generate tuple of keys and values increasing the performance by 50%, instead of zip
+            # Using the below way to generate tuple of keys and values
+            # increasing the performance by 50%, instead of zip
             keys = tuple(data.keys())
-            values = tuple([data[key] for key in keys])
+            values = (
+                *[data[key] for key in keys],
+            )  # Generating tuple of values- faster way
 
         elif index is not None:
             # fastpath for Series(data=None). Just use broadcasting a scalar

From 510bce602f639e2bab8f71b97f45a11aebd09e39 Mon Sep 17 00:00:00 2001
From: Rohith295 <rangaraju963@gmail.com>
Date: Tue, 23 Jun 2020 13:15:27 +0200
Subject: [PATCH 3/8] Fixing the failing type annotation checks and also the
 code comments

---
 pandas/core/series.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/pandas/core/series.py b/pandas/core/series.py
index 8a2dca8665576..bebe3907dd6d8 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -355,12 +355,12 @@ def _init_dict(self, data, index=None, dtype=None):
         # raises KeyError), so we iterate the entire dict, and align
         if data:
             # GH:34717
-            # Using the below way to generate tuple of keys and values
+            # Using the below way for generating keys and values
             # increasing the performance by 50%, instead of zip
             keys = tuple(data.keys())
-            values = (
-                *[data[key] for key in keys],
-            )  # Generating tuple of values- faster way
+            values = [
+                data[key] for key in keys
+            ]  # Generating list of values- faster way
 
         elif index is not None:
             # fastpath for Series(data=None). Just use broadcasting a scalar

From 4655d7b03f9c87cd32deb5a7d0191b0c1abb0caf Mon Sep 17 00:00:00 2001
From: Rohith295 <rangaraju963@gmail.com>
Date: Tue, 23 Jun 2020 15:38:49 +0200
Subject: [PATCH 4/8] Fixed mypy static type analysis issue

---
 pandas/core/series.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/pandas/core/series.py b/pandas/core/series.py
index bebe3907dd6d8..a83a2e3c98431 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -358,17 +358,15 @@ def _init_dict(self, data, index=None, dtype=None):
             # Using the below way for generating keys and values
             # increasing the performance by 50%, instead of zip
             keys = tuple(data.keys())
-            values = [
-                data[key] for key in keys
-            ]  # Generating list of values- faster way
-
+            values = list(data.values())  # Generating list of values- faster way
+                
         elif index is not None:
             # fastpath for Series(data=None). Just use broadcasting a scalar
             # instead of reindexing.
             values = na_value_for_dtype(dtype)
             keys = index
         else:
-            keys, values = [], []
+            keys, values = tuple([]), []
 
         # Input is now list-like, so rely on "standard" construction:
 

From 6f7c242e945e961dbd7bcf7b164b0a4b7c87e45a Mon Sep 17 00:00:00 2001
From: Rohith295 <rangaraju963@gmail.com>
Date: Tue, 23 Jun 2020 16:07:07 +0200
Subject: [PATCH 5/8] Fixed linting issues

---
 pandas/core/series.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/core/series.py b/pandas/core/series.py
index a83a2e3c98431..83c7b5f7e3e94 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -359,7 +359,6 @@ def _init_dict(self, data, index=None, dtype=None):
             # increasing the performance by 50%, instead of zip
             keys = tuple(data.keys())
             values = list(data.values())  # Generating list of values- faster way
-                
         elif index is not None:
             # fastpath for Series(data=None). Just use broadcasting a scalar
             # instead of reindexing.

From e523480e43f4c4eae1b3a9b7c86b8ed14621d2fc Mon Sep 17 00:00:00 2001
From: Rohith295 <rangaraju963@gmail.com>
Date: Fri, 26 Jun 2020 18:33:25 +0200
Subject: [PATCH 6/8] removed unnecessary comments

---
 pandas/core/series.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pandas/core/series.py b/pandas/core/series.py
index 83c7b5f7e3e94..fd695b35850b9 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -355,8 +355,6 @@ def _init_dict(self, data, index=None, dtype=None):
         # raises KeyError), so we iterate the entire dict, and align
         if data:
             # GH:34717
-            # Using the below way for generating keys and values
-            # increasing the performance by 50%, instead of zip
             keys = tuple(data.keys())
             values = list(data.values())  # Generating list of values- faster way
         elif index is not None:

From 242aacbc8a69d82c70a6572c0805e3cee8d62f18 Mon Sep 17 00:00:00 2001
From: Rohith295 <rangaraju963@gmail.com>
Date: Sun, 13 Sep 2020 11:42:29 +0200
Subject: [PATCH 7/8] Added comments to explain more about the performance
 issue, Also added a note for the release v1.2

---
 doc/source/whatsnew/v1.2.0.rst | 2 +-
 pandas/core/series.py          | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index e577a8f26bd12..ace549050124a 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -207,7 +207,7 @@ Performance improvements
 
 - Performance improvements when creating Series with dtype `str` or :class:`StringDtype` from array with many string elements (:issue:`36304`)
 - Performance improvement in :meth:`GroupBy.agg` with the ``numba`` engine (:issue:`35759`)
--
+- Performance improvements when creating `pd.Series.map` from a huge dictionary (:issue:`34717`)
 
 .. ---------------------------------------------------------------------------
 
diff --git a/pandas/core/series.py b/pandas/core/series.py
index cf1ac61e9e9af..747aabb26ca84 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -362,7 +362,10 @@ def _init_dict(self, data, index=None, dtype=None):
         # Looking for NaN in dict doesn't work ({np.nan : 1}[float('nan')]
         # raises KeyError), so we iterate the entire dict, and align
         if data:
-            # GH:34717
+            # GH:34717, issue was using zip to extract key and values from data.
+            # using generators in effects the performance.
+            # Below is the new way of extracting the keys and values
+
             keys = tuple(data.keys())
             values = list(data.values())  # Generating list of values- faster way
         elif index is not None:

From adebd8ce2ef4c2e075e72ba85d5aaa2f72ca499d Mon Sep 17 00:00:00 2001
From: Rohith295 <rangaraju963@gmail.com>
Date: Sun, 13 Sep 2020 17:32:06 +0200
Subject: [PATCH 8/8] Fixed as per review comments

---
 doc/source/whatsnew/v1.2.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index f1223863f5ac4..dbc88d0b371e8 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -207,7 +207,7 @@ Performance improvements
 
 - Performance improvements when creating Series with dtype `str` or :class:`StringDtype` from array with many string elements (:issue:`36304`, :issue:`36317`)
 - Performance improvement in :meth:`GroupBy.agg` with the ``numba`` engine (:issue:`35759`)
-- Performance improvements when creating `pd.Series.map` from a huge dictionary (:issue:`34717`)
+- Performance improvements when creating :meth:`pd.Series.map` from a huge dictionary (:issue:`34717`)
 - Performance improvement in :meth:`GroupBy.transform` with the ``numba`` engine (:issue:`36240`)
 
 .. ---------------------------------------------------------------------------