pandas-dev · jreback · Mar 29, 2019 · Mar 28, 2019 · Mar 28, 2019 · Mar 28, 2019
diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
@@ -592,8 +592,7 @@ cdef class TextReader:
         if not QUOTE_MINIMAL <= quoting <= QUOTE_NONE:
             raise TypeError('bad "quoting" value')
 
-        if not isinstance(quote_char, (str, compat.text_type,
-                                       bytes)) and quote_char is not None:
+        if not isinstance(quote_char, (str, bytes)) and quote_char is not None:
             dtype = type(quote_char).__name__
             raise TypeError('"quotechar" must be string, '
                             'not {dtype}'.format(dtype=dtype))
@@ -2123,7 +2122,7 @@ cdef raise_parser_error(object base, parser_t *parser):
 
             # PyErr_Fetch only returned the error message in *value,
             # so the Exception class must be extracted from *type.
-            if isinstance(old_exc, compat.string_types):
+            if isinstance(old_exc, str):
                 if type != NULL:
                     exc_type = <object>type
                 else:

diff --git a/pandas/_libs/testing.pyx b/pandas/_libs/testing.pyx
@@ -1,6 +1,5 @@
 import numpy as np
 
-from pandas import compat
 from pandas.core.dtypes.missing import isna, array_equivalent
 from pandas.core.dtypes.common import is_dtype_equal
 
@@ -108,8 +107,7 @@ cpdef assert_almost_equal(a, b,
     if isinstance(a, dict) or isinstance(b, dict):
         return assert_dict_equal(a, b)
 
-    if (isinstance(a, compat.string_types) or
-            isinstance(b, compat.string_types)):
+    if isinstance(a, str) or isinstance(b, str):
         assert a == b, "%r != %r" % (a, b)
         return True
 

diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx
@@ -12,7 +12,6 @@ from cpython.datetime cimport datetime
 import numpy as np
 
 import six
-from six import binary_type, text_type
 
 # Avoid import from outside _libs
 if sys.version_info.major == 2:
@@ -102,7 +101,7 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None):
 
     Parameters
     ----------
-    arg : compat.string_types
+    arg : str
     freq : str or DateOffset, default None
         Helps with interpreting time string if supplied
     dayfirst : bool, default None
@@ -537,13 +536,13 @@ class _timelex(object):
         if six.PY2:
             # In Python 2, we can't duck type properly because unicode has
             # a 'decode' function, and we'd be double-decoding
-            if isinstance(instream, (binary_type, bytearray)):
+            if isinstance(instream, (bytes, bytearray)):
                 instream = instream.decode()
         else:
             if getattr(instream, 'decode', None) is not None:
                 instream = instream.decode()
 
-        if isinstance(instream, text_type):
+        if isinstance(instream, str):
             self.stream = instream
         elif getattr(instream, 'read', None) is None:
             raise TypeError(

diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx
@@ -2390,7 +2390,7 @@ class Period(_Period):
 
     Parameters
     ----------
-    value : Period or compat.string_types, default None
+    value : Period or str, default None
         The time period represented (e.g., '4Q2005')
     freq : str, default None
         One of pandas period strings or corresponding objects

diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py
@@ -8,10 +8,6 @@
 * lists: lrange(), lmap(), lzip(), lfilter()
 * iterable method compatibility: iteritems, iterkeys, itervalues
   * Uses the original method if available, otherwise uses items, keys, values.
-* types:
-    * text_type: unicode in Python 2, str in Python 3
-    * binary_type: str in Python 2, bytes in Python 3
-    * string_types: basestring in Python 2, str in Python 3
 * bind_method: binds functions to classes
 * add_metaclass(metaclass) - class decorator that recreates class with with the
   given metaclass instead (and avoids intermediary class creation)
@@ -30,7 +26,6 @@
 import sys
 import platform
 import types
-from unicodedata import east_asian_width
 import struct
 import inspect
 from collections import namedtuple
@@ -203,39 +198,18 @@ class to receive bound method
 # The license for this library can be found in LICENSES/SIX and the code can be
 # found at https://bitbucket.org/gutworth/six
 
-# Definition of East Asian Width
-# http://unicode.org/reports/tr11/
-# Ambiguous width can be changed by option
-_EAW_MAP = {'Na': 1, 'N': 1, 'W': 2, 'F': 2, 'H': 1}
 
 if PY3:
-    string_types = str,
-    text_type = str
-    binary_type = bytes
-
     def to_str(s):
         """
         Convert bytes and non-string into Python 3 str
         """
-        if isinstance(s, binary_type):
+        if isinstance(s, bytes):
             s = bytes_to_str(s)
-        elif not isinstance(s, string_types):
+        elif not isinstance(s, str):
             s = str(s)
         return s
 
-    def strlen(data, encoding=None):
-        # encoding is for compat with PY2
-        return len(data)
-
-    def east_asian_len(data, encoding=None, ambiguous_width=1):
-        """
-        Calculate display width considering unicode East Asian Width
-        """
-        if isinstance(data, text_type):
-            return sum(_EAW_MAP.get(east_asian_width(c), ambiguous_width) for c in data)
-        else:
-            return len(data)
-
     def set_function_name(f, name, cls):
         """ Bind the name/qualname attributes of the function """
         f.__name__ = name
@@ -245,45 +219,19 @@ def set_function_name(f, name, cls):
         f.__module__ = cls.__module__
         return f
 else:
-    string_types = basestring,
-    text_type = unicode
-    binary_type = str
-
     def to_str(s):
         """
         Convert unicode and non-string into Python 2 str
         """
-        if not isinstance(s, string_types):
+        if not isinstance(s, basestring):
             s = str(s)
         return s
 
-    def strlen(data, encoding=None):
-        try:
-            data = data.decode(encoding)
-        except UnicodeError:
-            pass
-        return len(data)
-
-    def east_asian_len(data, encoding=None, ambiguous_width=1):
-        """
-        Calculate display width considering unicode East Asian Width
-        """
-        if isinstance(data, text_type):
-            try:
-                data = data.decode(encoding)
-            except UnicodeError:
-                pass
-            return sum(_EAW_MAP.get(east_asian_width(c), ambiguous_width) for c in data)
-        else:
-            return len(data)
-
     def set_function_name(f, name, cls):
         """ Bind the name attributes of the function """
         f.__name__ = name
         return f
 
-string_and_binary_types = string_types + (binary_type,)
-
 
 def add_metaclass(metaclass):
     """Class decorator for creating a class with a metaclass."""

diff --git a/pandas/compat/numpy/__init__.py b/pandas/compat/numpy/__init__.py
@@ -3,7 +3,6 @@
 import re
 import numpy as np
 from distutils.version import LooseVersion
-from pandas.compat import string_types, string_and_binary_types
 
 
 # numpy versioning
@@ -27,7 +26,7 @@
 
 
 def tz_replacer(s):
-    if isinstance(s, string_types):
+    if isinstance(s, str):
         if s.endswith('Z'):
             s = s[:-1]
         elif _tz_regex.search(s):
@@ -53,8 +52,7 @@ def np_array_datetime64_compat(arr, *args, **kwargs):
     warning, when need to pass '2015-01-01 09:00:00'
     """
     # is_list_like
-    if (hasattr(arr, '__iter__')
-            and not isinstance(arr, string_and_binary_types)):
+    if (hasattr(arr, '__iter__') and not isinstance(arr, (str, bytes))):
         arr = [tz_replacer(s) for s in arr]
     else:
         arr = tz_replacer(arr)

diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py
@@ -6,8 +6,6 @@
 import pickle as pkl
 import sys
 
-from pandas.compat import string_types  # noqa
-
 import pandas  # noqa
 from pandas import Index, compat
 
@@ -41,7 +39,7 @@ def load_reduce(self):
         # try to re-encode the arguments
         if getattr(self, 'encoding', None) is not None:
             args = tuple(arg.encode(self.encoding)
-                         if isinstance(arg, string_types)
+                         if isinstance(arg, str)
                          else arg for arg in args)
             try:
                 stack[-1] = func(*args)

diff --git a/pandas/core/apply.py b/pandas/core/apply.py
@@ -71,8 +71,7 @@ def __init__(self, obj, func, broadcast, raw, reduce, result_type,
         self.result_type = result_type
 
         # curry if needed
-        if ((kwds or args) and
-                not isinstance(func, (np.ufunc, compat.string_types))):
+        if (kwds or args) and not isinstance(func, (np.ufunc, str)):
 
             def f(x):
                 return func(x, *args, **kwds)
@@ -119,7 +118,7 @@ def get_result(self):
             return self.apply_empty_result()
 
         # string dispatch
-        if isinstance(self.f, compat.string_types):
+        if isinstance(self.f, str):
             # Support for `frame.transform('method')`
             # Some methods (shift, etc.) require the axis argument, others
             # don't, so inspect and insert if necessary.

diff --git a/pandas/core/arrays/array_.py b/pandas/core/arrays/array_.py
@@ -8,8 +8,6 @@
     is_datetime64_ns_dtype, is_extension_array_dtype, is_timedelta64_ns_dtype)
 from pandas.core.dtypes.dtypes import ExtensionDtype, registry
 
-from pandas import compat
-
 
 def array(data,         # type: Sequence[object]
           dtype=None,   # type: Optional[Union[str, np.dtype, ExtensionDtype]]
@@ -227,7 +225,7 @@ def array(data,         # type: Sequence[object]
         dtype = data.dtype
 
     # this returns None for not-found dtypes.
-    if isinstance(dtype, compat.string_types):
+    if isinstance(dtype, str):
         dtype = registry.find(dtype) or dtype
 
     if is_extension_array_dtype(dtype):

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -1946,7 +1946,7 @@ def _tidy_repr(self, max_vals=10, footer=True):
             result = '{result}\n{footer}'.format(
                 result=result, footer=self._repr_footer())
 
-        return compat.text_type(result)
+        return str(result)
 
     def _repr_categories(self):
         """
@@ -2010,7 +2010,7 @@ def _get_repr(self, length=True, na_rep='NaN', footer=True):
         formatter = fmt.CategoricalFormatter(self, length=length,
                                              na_rep=na_rep, footer=footer)
         result = formatter.to_string()
-        return compat.text_type(result)
+        return str(result)
 
     def __unicode__(self):
         """

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
@@ -12,7 +12,6 @@
 from pandas._libs.tslibs.timedeltas import Timedelta, delta_to_nanoseconds
 from pandas._libs.tslibs.timestamps import (
     RoundTo, maybe_integer_op_deprecated, round_nsint64)
-import pandas.compat as compat
 from pandas.compat.numpy import function as nv
 from pandas.errors import (
     AbstractMethodError, NullFrequencyError, PerformanceWarning)
@@ -649,7 +648,7 @@ def searchsorted(self, value, side='left', sorter=None):
         indices : array of ints
             Array of insertion points with the same shape as `value`.
         """
-        if isinstance(value, compat.string_types):
+        if isinstance(value, str):
             value = self._scalar_from_string(value)
 
         if not (isinstance(value, (self._scalar_type, type(self)))
@@ -1154,7 +1153,7 @@ def _time_shift(self, periods, freq=None):
             Frequency increment to shift by.
         """
         if freq is not None and freq != self.freq:
-            if isinstance(freq, compat.string_types):
+            if isinstance(freq, str):
                 freq = frequencies.to_offset(freq)
             offset = periods * freq
             result = self + offset

diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
@@ -137,7 +137,7 @@ def wrapper(self, other):
 
         other = lib.item_from_zerodim(other)
 
-        if isinstance(other, (datetime, np.datetime64, compat.string_types)):
+        if isinstance(other, (datetime, np.datetime64, str)):
             if isinstance(other, (datetime, np.datetime64)):
                 # GH#18435 strings get a pass from tzawareness compat
                 self._assert_tzawareness_compat(other)
@@ -2031,7 +2031,7 @@ def validate_tz_from_dtype(dtype, tz):
     ValueError : on tzinfo mismatch
     """
     if dtype is not None:
-        if isinstance(dtype, compat.string_types):
+        if isinstance(dtype, str):
             try:
                 dtype = DatetimeTZDtype.construct_from_string(dtype)
             except TypeError:

diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
@@ -5,7 +5,7 @@
 import numpy as np
 
 from pandas._libs import lib
-from pandas.compat import set_function_name, string_types
+from pandas.compat import set_function_name
 from pandas.util._decorators import cache_readonly
 
 from pandas.core.dtypes.base import ExtensionDtype
@@ -154,7 +154,7 @@ def coerce_to_array(values, dtype, mask=None, copy=False):
             dtype = values.dtype
 
     if dtype is not None:
-        if (isinstance(dtype, string_types) and
+        if (isinstance(dtype, str) and
                 (dtype.startswith("Int") or dtype.startswith("UInt"))):
             # Avoid DeprecationWarning from NumPy about np.dtype("Int64")
             # https://github.com/numpy/numpy/pull/7476

diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py
@@ -110,7 +110,7 @@ def __hash__(self):
     def __eq__(self, other):
         # We have to override __eq__ to handle NA values in _metadata.
         # The base class does simple == checks, which fail for NA.
-        if isinstance(other, compat.string_types):
+        if isinstance(other, str):
             try:
                 other = self.construct_from_string(other)
             except TypeError:
@@ -277,7 +277,7 @@ def _parse_subtype(dtype):
     @classmethod
     def is_dtype(cls, dtype):
         dtype = getattr(dtype, 'dtype', dtype)
-        if (isinstance(dtype, compat.string_types) and
+        if (isinstance(dtype, str) and
                 dtype.startswith("Sparse")):
             sub_type, _ = cls._parse_subtype(dtype)
             dtype = np.dtype(sub_type)
@@ -358,7 +358,7 @@ def _subtype_with_str(self):
         >>> dtype._subtype_with_str
         str
         """
-        if isinstance(self.fill_value, compat.string_types):
+        if isinstance(self.fill_value, str):
             return type(self.fill_value)
         return self.subtype
 
@@ -584,7 +584,7 @@ def __init__(self, data, sparse_index=None, index=None, fill_value=None,
             data = data.sp_values
 
         # Handle use-provided dtype
-        if isinstance(dtype, compat.string_types):
+        if isinstance(dtype, str):
             # Two options: dtype='int', regular numpy dtype
             # or dtype='Sparse[int]', a sparse dtype
             try:

diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
@@ -36,8 +36,7 @@
 
 
 def _is_convertible_to_td(key):
-    return isinstance(key, (Tick, timedelta,
-                            np.timedelta64, compat.string_types))
+    return isinstance(key, (Tick, timedelta, np.timedelta64, str))
 
 
 def _field_accessor(name, alias, docstring=None):