CLN: Remove PY2/3 checks in cython files (#25876)

mroeschke · jreback · commit b878f5b5e3a8 · 2019-03-26T07:39:50.000-04:00
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -73,7 +73,6 @@ cdef:
     object oINT64_MIN = <int64_t>INT64_MIN
     object oUINT64_MAX = <uint64_t>UINT64_MAX
 
-    bint PY2 = sys.version_info[0] == 2
     float64_t NaN = <float64_t>np.NaN
 
 
@@ -942,10 +941,9 @@ _TYPE_MAP = {
     'complex64': 'complex',
     'complex128': 'complex',
     'c': 'complex',
-    'string': 'string' if PY2 else 'bytes',
-    'S': 'string' if PY2 else 'bytes',
-    'unicode': 'unicode' if PY2 else 'string',
-    'U': 'unicode' if PY2 else 'string',
+    'string': 'bytes',
+    'S': 'bytes',
+    'U': 'string',
     'bool': 'boolean',
     'b': 'boolean',
     'datetime64[ns]': 'datetime64',
diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
@@ -67,8 +67,6 @@ CParserError = ParserError
 
 
 cdef:
-    bint PY3 = (sys.version_info[0] >= 3)
-
     float64_t INF = <float64_t>np.inf
     float64_t NEGINF = -INF
 
@@ -633,12 +631,7 @@ cdef class TextReader:
                     source = gzip.GzipFile(fileobj=source)
             elif self.compression == 'bz2':
                 import bz2
-                if isinstance(source, basestring) or PY3:
-                    source = bz2.BZ2File(source, 'rb')
-                else:
-                    content = source.read()
-                    source.close()
-                    source = compat.StringIO(bz2.decompress(content))
+                source = bz2.BZ2File(source, 'rb')
             elif self.compression == 'zip':
                 import zipfile
                 zip_file = zipfile.ZipFile(source)
@@ -1396,19 +1389,12 @@ def _ensure_encoded(list lst):
         if isinstance(x, unicode):
             x = PyUnicode_AsUTF8String(x)
         elif not isinstance(x, bytes):
-            x = asbytes(x)
+            x = str(x).encode('utf-8')
 
         result.append(x)
     return result
 
 
-cdef asbytes(object o):
-    if PY3:
-        return str(o).encode('utf-8')
-    else:
-        return str(o)
-
-
 # common NA values
 # no longer excluding inf representations
 # '1.#INF','-1.#INF', '1.#INF000000',
@@ -1441,10 +1427,7 @@ cdef enum StringPath:
 cdef inline StringPath _string_path(char *encoding):
     if encoding != NULL and encoding != b"utf-8":
         return ENCODED
-    elif PY3 or encoding != NULL:
-        return UTF8
-    else:
-        return CSTRING
+    return UTF8
 
 
 # ----------------------------------------------------------------------
@@ -2155,10 +2138,7 @@ cdef raise_parser_error(object base, parser_t *parser):
 
     message = '{base}. C error: '.format(base=base)
     if parser.error_msg != NULL:
-        if PY3:
-            message += parser.error_msg.decode('utf-8')
-        else:
-            message += parser.error_msg
+        message += parser.error_msg.decode('utf-8')
     else:
         message += 'no error message set'
 
@@ -2257,12 +2237,7 @@ cdef _apply_converter(object f, parser_t *parser, int64_t col,
 
     coliter_setup(&it, parser, col, line_start)
 
-    if not PY3 and c_encoding == NULL:
-        for i in range(lines):
-            COLITER_NEXT(it, word)
-            val = PyBytes_FromString(word)
-            result[i] = f(val)
-    elif ((PY3 and c_encoding == NULL) or c_encoding == b'utf-8'):
+    if c_encoding == NULL or c_encoding == b'utf-8':
         for i in range(lines):
             COLITER_NEXT(it, word)
             val = PyUnicode_FromString(word)
diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
@@ -45,9 +45,6 @@ from pandas._libs.tslibs.timestamps cimport create_timestamp_from_ts
 from pandas._libs.tslibs.timestamps import Timestamp
 
 
-cdef bint PY2 = str == bytes
-
-
 cdef inline object create_datetime_from_ts(
         int64_t value, npy_datetimestruct dts,
         object tz, object freq):
@@ -579,8 +576,6 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise',
                     if len(val) == 0 or val in nat_strings:
                         iresult[i] = NPY_NAT
                         continue
-                    if isinstance(val, unicode) and PY2:
-                        val = val.encode('utf-8')
 
                     try:
                         _string_to_dts(val, &dts, &out_local, &out_tzoffset)
diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx
@@ -31,8 +31,6 @@ from pandas._libs.tslibs.np_datetime cimport (
 from pandas._libs.tslibs.timezones import UTC
 
 
-PY2 = bytes == str
-
 # ---------------------------------------------------------------------
 # Constants
 
@@ -552,10 +550,6 @@ class _Tick(object):
         result = self.delta.__rtruediv__(other)
         return _wrap_timedelta_result(result)
 
-    if PY2:
-        __div__ = __truediv__
-        __rdiv__ = __rtruediv__
-
 
 # ----------------------------------------------------------------------
 # RelativeDelta Arithmetic
diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx
@@ -53,7 +53,6 @@ from pandas._libs.tslibs.offsets cimport to_offset
 from pandas._libs.tslibs.offsets import _Tick
 
 cdef:
-    bint PY2 = str == bytes
     enum:
         INT32_MIN = -2147483648
 
@@ -1287,9 +1286,6 @@ cdef object _period_strftime(int64_t value, int freq, object fmt):
 
             result = result.replace(str_extra_fmts[i], repl)
 
-    if PY2:
-        result = result.decode('utf-8', 'ignore')
-
     return result
 
 
diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
@@ -4,7 +4,6 @@ import textwrap
 import warnings
 
 import sys
-cdef bint PY3 = (sys.version_info[0] >= 3)
 
 import cython
 
@@ -312,14 +311,6 @@ cdef inline int64_t cast_from_unit(object ts, object unit) except? -1:
     return <int64_t>(base * m) + <int64_t>(frac * m)
 
 
-cdef inline _decode_if_necessary(object ts):
-    # decode ts if necessary
-    if not isinstance(ts, unicode) and not PY3:
-        ts = str(ts).decode('utf-8')
-
-    return ts
-
-
 cdef inline parse_timedelta_string(object ts):
     """
     Parse a regular format timedelta string. Return an int64_t (in ns)
@@ -342,8 +333,6 @@ cdef inline parse_timedelta_string(object ts):
     if len(ts) == 0 or ts in nat_strings:
         return NPY_NAT
 
-    ts = _decode_if_necessary(ts)
-
     for c in ts:
 
         # skip whitespace / commas
@@ -651,8 +640,6 @@ cdef inline int64_t parse_iso_format_string(object ts) except? -1:
         bint have_dot = 0, have_value = 0, neg = 0
         list number = [], unit = []
 
-    ts = _decode_if_necessary(ts)
-
     err_msg = "Invalid ISO 8601 Duration format - {}".format(ts)
 
     for c in ts:
@@ -1389,10 +1376,6 @@ class Timedelta(_Timedelta):
             return NaT
         return float(other.value) / self.value
 
-    if not PY3:
-        __div__ = __truediv__
-        __rdiv__ = __rtruediv__
-
     def __floordiv__(self, other):
         # numpy does not implement floordiv for timedelta64 dtype, so we cannot
         # just defer