From 4842d2699c35421e14329da68b1b51e02f9f160a Mon Sep 17 00:00:00 2001
From: Winand <winandfx@mail.ru>
Date: Wed, 27 Apr 2016 14:02:51 +0300
Subject: [PATCH] Improved performance for .str.encode/decode

Use default implementation for optimized encodings,
see https://docs.python.org/3.4/library/codecs.html#standard-encodings
---
 doc/source/whatsnew/v0.18.1.txt |  2 ++
 pandas/core/strings.py          | 22 ++++++++++++++++++++--
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt
index 4ab82ed3fcdd5..d8f817a0bea68 100644
--- a/doc/source/whatsnew/v0.18.1.txt
+++ b/doc/source/whatsnew/v0.18.1.txt
@@ -412,6 +412,8 @@ Performance Improvements
 
 - Improved performance of ``DataFrame.to_sql`` when checking case sensitivity for tables. Now only checks if table has been created correctly when table name is not lower case. (:issue:`12876`)
 - Improved performance of ``Period`` construction and plotting of ``Period``s. (:issue:`12903`, :issue:`11831`)
+- Improved performance of ``.str.encode()`` and ``.str.decode()`` methods
+
 
 
 
diff --git a/pandas/core/strings.py b/pandas/core/strings.py
index 66e4638a9e4b4..549b43be9abe5 100644
--- a/pandas/core/strings.py
+++ b/pandas/core/strings.py
@@ -13,6 +13,14 @@
 import pandas.lib as lib
 import warnings
 import textwrap
+import codecs
+
+_cpython_optimized_encoders = (
+    "utf-8", "utf8", "latin-1", "latin1", "iso-8859-1", "mbcs", "ascii"
+)
+_cpython_optimized_decoders = _cpython_optimized_encoders + (
+    "utf-16", "utf-32"
+)
 
 _shared_docs = dict()
 
@@ -1182,7 +1190,12 @@ def str_decode(arr, encoding, errors="strict"):
     -------
     decoded : Series/Index of objects
     """
-    f = lambda x: x.decode(encoding, errors)
+    if encoding in _cpython_optimized_decoders:
+        #CPython optimized implementation
+        f = lambda x: x.decode(encoding, errors)
+    else:
+        decoder = codecs.getdecoder(encoding)
+        f = lambda x: decoder(x, errors)[0]
     return _na_map(f, arr)
 
 
@@ -1200,7 +1213,12 @@ def str_encode(arr, encoding, errors="strict"):
     -------
     encoded : Series/Index of objects
     """
-    f = lambda x: x.encode(encoding, errors)
+    if encoding in _cpython_optimized_encoders:
+        #CPython optimized implementation
+        f = lambda x: x.encode(encoding, errors)
+    else:
+        encoder = codecs.getencoder(encoding)
+        f = lambda x: encoder(x, errors)[0]
     return _na_map(f, arr)