From 6701e71f6fc48eb96e3bbf900ac733042492a74e Mon Sep 17 00:00:00 2001 From: Jeremy Schendel Date: Wed, 27 Mar 2019 21:19:05 -0600 Subject: [PATCH 1/3] CLN: Remove type definitions from pandas.compat --- pandas/_libs/parsers.pyx | 5 +- pandas/_libs/testing.pyx | 4 +- pandas/_libs/tslibs/parsing.pyx | 7 +-- pandas/_libs/tslibs/period.pyx | 2 +- pandas/compat/__init__.py | 58 +------------------ pandas/compat/numpy/__init__.py | 6 +- pandas/compat/pickle_compat.py | 4 +- pandas/core/apply.py | 5 +- pandas/core/arrays/array_.py | 4 +- pandas/core/arrays/categorical.py | 4 +- pandas/core/arrays/datetimelike.py | 5 +- pandas/core/arrays/datetimes.py | 4 +- pandas/core/arrays/integer.py | 4 +- pandas/core/arrays/sparse.py | 8 +-- pandas/core/arrays/timedeltas.py | 3 +- pandas/core/base.py | 6 +- pandas/core/common.py | 4 +- pandas/core/computation/common.py | 4 +- pandas/core/computation/engines.py | 3 +- pandas/core/computation/eval.py | 3 +- pandas/core/computation/expr.py | 6 +- pandas/core/computation/ops.py | 8 +-- pandas/core/computation/pytables.py | 14 ++--- pandas/core/computation/scope.py | 2 +- pandas/core/dtypes/base.py | 4 +- pandas/core/dtypes/cast.py | 16 ++--- pandas/core/dtypes/common.py | 6 +- pandas/core/dtypes/dtypes.py | 34 +++++------ pandas/core/dtypes/inference.py | 18 +++--- pandas/core/frame.py | 23 ++++---- pandas/core/generic.py | 15 +++-- pandas/core/groupby/generic.py | 14 ++--- pandas/core/groupby/grouper.py | 5 +- pandas/core/indexes/base.py | 10 ++-- pandas/core/indexes/datetimes.py | 11 ++-- pandas/core/indexes/multi.py | 6 +- pandas/core/indexes/numeric.py | 3 +- pandas/core/indexes/period.py | 5 +- pandas/core/indexes/timedeltas.py | 6 +- pandas/core/indexing.py | 10 ++-- pandas/core/internals/blocks.py | 9 ++- pandas/core/internals/construction.py | 2 +- pandas/core/missing.py | 3 +- pandas/core/ops.py | 2 +- pandas/core/panel.py | 4 +- pandas/core/resample.py | 7 +-- pandas/core/reshape/concat.py | 2 +- pandas/core/reshape/melt.py | 2 +- pandas/core/reshape/merge.py | 6 +- pandas/core/reshape/pivot.py | 12 ++-- pandas/core/reshape/reshape.py | 13 ++--- pandas/core/series.py | 6 +- pandas/core/sorting.py | 4 +- pandas/core/strings.py | 22 +++---- pandas/core/window.py | 11 ++-- pandas/io/clipboard/clipboards.py | 4 +- pandas/io/common.py | 16 +++-- pandas/io/excel/_base.py | 10 ++-- pandas/io/excel/_util.py | 4 +- pandas/io/excel/_xlrd.py | 4 +- pandas/io/excel/_xlsxwriter.py | 9 ++- pandas/io/formats/format.py | 34 +++++++---- pandas/io/formats/latex.py | 4 +- pandas/io/formats/printing.py | 12 ++-- pandas/io/html.py | 14 ++--- pandas/io/json/json.py | 6 +- pandas/io/json/normalize.py | 4 +- pandas/io/packers.py | 8 +-- pandas/io/parquet.py | 3 +- pandas/io/parsers.py | 34 +++++------ pandas/io/pytables.py | 14 ++--- pandas/io/sas/sas7bdat.py | 3 +- pandas/io/sas/sas_xport.py | 2 +- pandas/io/sas/sasreader.py | 4 +- pandas/io/sql.py | 19 +++--- pandas/io/stata.py | 14 ++--- pandas/plotting/_converter.py | 19 +++--- pandas/plotting/_core.py | 6 +- pandas/plotting/_style.py | 7 +-- pandas/plotting/_timeseries.py | 3 +- pandas/tests/extension/base/printing.py | 5 +- pandas/tests/frame/common.py | 4 +- pandas/tests/frame/test_dtypes.py | 28 ++++----- pandas/tests/frame/test_duplicates.py | 4 +- pandas/tests/frame/test_to_csv.py | 6 +- pandas/tests/indexes/multi/test_format.py | 4 +- .../tests/indexes/period/test_construction.py | 20 +++---- pandas/tests/indexes/test_common.py | 4 +- pandas/tests/indexing/test_categorical.py | 10 ++-- pandas/tests/io/formats/test_format.py | 7 +-- pandas/tests/io/formats/test_printing.py | 7 +-- pandas/tests/io/formats/test_to_html.py | 4 +- pandas/tests/io/parser/test_common.py | 2 +- pandas/tests/io/test_pytables.py | 6 +- pandas/tests/io/test_sql.py | 11 ++-- pandas/tests/io/test_stata.py | 2 +- pandas/tests/scalar/period/test_period.py | 4 +- pandas/tests/series/test_api.py | 4 +- pandas/tests/series/test_dtypes.py | 9 ++- pandas/tests/test_strings.py | 4 +- pandas/tseries/frequencies.py | 3 +- pandas/tseries/offsets.py | 5 +- pandas/util/testing.py | 12 +--- scripts/find_commits_touching_func.py | 10 ++-- scripts/merge-pr.py | 5 +- 105 files changed, 387 insertions(+), 514 deletions(-) diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx index be2a8da58c7e4..7a9db38957e94 100644 --- a/pandas/_libs/parsers.pyx +++ b/pandas/_libs/parsers.pyx @@ -592,8 +592,7 @@ cdef class TextReader: if not QUOTE_MINIMAL <= quoting <= QUOTE_NONE: raise TypeError('bad "quoting" value') - if not isinstance(quote_char, (str, compat.text_type, - bytes)) and quote_char is not None: + if not isinstance(quote_char, (str, bytes)) and quote_char is not None: dtype = type(quote_char).__name__ raise TypeError('"quotechar" must be string, ' 'not {dtype}'.format(dtype=dtype)) @@ -2123,7 +2122,7 @@ cdef raise_parser_error(object base, parser_t *parser): # PyErr_Fetch only returned the error message in *value, # so the Exception class must be extracted from *type. - if isinstance(old_exc, compat.string_types): + if isinstance(old_exc, str): if type != NULL: exc_type = type else: diff --git a/pandas/_libs/testing.pyx b/pandas/_libs/testing.pyx index 10f68187938c0..ae0d3ac1a61ca 100644 --- a/pandas/_libs/testing.pyx +++ b/pandas/_libs/testing.pyx @@ -1,6 +1,5 @@ import numpy as np -from pandas import compat from pandas.core.dtypes.missing import isna, array_equivalent from pandas.core.dtypes.common import is_dtype_equal @@ -108,8 +107,7 @@ cpdef assert_almost_equal(a, b, if isinstance(a, dict) or isinstance(b, dict): return assert_dict_equal(a, b) - if (isinstance(a, compat.string_types) or - isinstance(b, compat.string_types)): + if isinstance(a, str) or isinstance(b, str): assert a == b, "%r != %r" % (a, b) return True diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 70b0f9c05f8fe..6e191d572529f 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -12,7 +12,6 @@ from cpython.datetime cimport datetime import numpy as np import six -from six import binary_type, text_type # Avoid import from outside _libs if sys.version_info.major == 2: @@ -102,7 +101,7 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None): Parameters ---------- - arg : compat.string_types + arg : str freq : str or DateOffset, default None Helps with interpreting time string if supplied dayfirst : bool, default None @@ -537,13 +536,13 @@ class _timelex(object): if six.PY2: # In Python 2, we can't duck type properly because unicode has # a 'decode' function, and we'd be double-decoding - if isinstance(instream, (binary_type, bytearray)): + if isinstance(instream, (bytes, bytearray)): instream = instream.decode() else: if getattr(instream, 'decode', None) is not None: instream = instream.decode() - if isinstance(instream, text_type): + if isinstance(instream, str): self.stream = instream elif getattr(instream, 'read', None) is None: raise TypeError( diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index d4cb58c93b1fe..44442be3dc55b 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -2390,7 +2390,7 @@ class Period(_Period): Parameters ---------- - value : Period or compat.string_types, default None + value : Period or str, default None The time period represented (e.g., '4Q2005') freq : str, default None One of pandas period strings or corresponding objects diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index be59f838fce9e..caf70a32e8d19 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -8,10 +8,6 @@ * lists: lrange(), lmap(), lzip(), lfilter() * iterable method compatibility: iteritems, iterkeys, itervalues * Uses the original method if available, otherwise uses items, keys, values. -* types: - * text_type: unicode in Python 2, str in Python 3 - * binary_type: str in Python 2, bytes in Python 3 - * string_types: basestring in Python 2, str in Python 3 * bind_method: binds functions to classes * add_metaclass(metaclass) - class decorator that recreates class with with the given metaclass instead (and avoids intermediary class creation) @@ -30,7 +26,6 @@ import sys import platform import types -from unicodedata import east_asian_width import struct import inspect from collections import namedtuple @@ -203,39 +198,18 @@ class to receive bound method # The license for this library can be found in LICENSES/SIX and the code can be # found at https://bitbucket.org/gutworth/six -# Definition of East Asian Width -# http://unicode.org/reports/tr11/ -# Ambiguous width can be changed by option -_EAW_MAP = {'Na': 1, 'N': 1, 'W': 2, 'F': 2, 'H': 1} if PY3: - string_types = str, - text_type = str - binary_type = bytes - def to_str(s): """ Convert bytes and non-string into Python 3 str """ - if isinstance(s, binary_type): + if isinstance(s, bytes): s = bytes_to_str(s) - elif not isinstance(s, string_types): + elif not isinstance(s, str): s = str(s) return s - def strlen(data, encoding=None): - # encoding is for compat with PY2 - return len(data) - - def east_asian_len(data, encoding=None, ambiguous_width=1): - """ - Calculate display width considering unicode East Asian Width - """ - if isinstance(data, text_type): - return sum(_EAW_MAP.get(east_asian_width(c), ambiguous_width) for c in data) - else: - return len(data) - def set_function_name(f, name, cls): """ Bind the name/qualname attributes of the function """ f.__name__ = name @@ -245,45 +219,19 @@ def set_function_name(f, name, cls): f.__module__ = cls.__module__ return f else: - string_types = basestring, - text_type = unicode - binary_type = str - def to_str(s): """ Convert unicode and non-string into Python 2 str """ - if not isinstance(s, string_types): + if not isinstance(s, basestring): s = str(s) return s - def strlen(data, encoding=None): - try: - data = data.decode(encoding) - except UnicodeError: - pass - return len(data) - - def east_asian_len(data, encoding=None, ambiguous_width=1): - """ - Calculate display width considering unicode East Asian Width - """ - if isinstance(data, text_type): - try: - data = data.decode(encoding) - except UnicodeError: - pass - return sum(_EAW_MAP.get(east_asian_width(c), ambiguous_width) for c in data) - else: - return len(data) - def set_function_name(f, name, cls): """ Bind the name attributes of the function """ f.__name__ = name return f -string_and_binary_types = string_types + (binary_type,) - def add_metaclass(metaclass): """Class decorator for creating a class with a metaclass.""" diff --git a/pandas/compat/numpy/__init__.py b/pandas/compat/numpy/__init__.py index 6e9f768d8bd68..0cbf2ad03b083 100644 --- a/pandas/compat/numpy/__init__.py +++ b/pandas/compat/numpy/__init__.py @@ -3,7 +3,6 @@ import re import numpy as np from distutils.version import LooseVersion -from pandas.compat import string_types, string_and_binary_types # numpy versioning @@ -28,7 +27,7 @@ def tz_replacer(s): - if isinstance(s, string_types): + if isinstance(s, str): if s.endswith('Z'): s = s[:-1] elif _tz_regex.search(s): @@ -54,8 +53,7 @@ def np_array_datetime64_compat(arr, *args, **kwargs): warning, when need to pass '2015-01-01 09:00:00' """ # is_list_like - if (hasattr(arr, '__iter__') - and not isinstance(arr, string_and_binary_types)): + if (hasattr(arr, '__iter__') and not isinstance(arr, (str, bytes))): arr = [tz_replacer(s) for s in arr] else: arr = tz_replacer(arr) diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index 5219dffd9c8e3..4e9cfe92a966a 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -6,8 +6,6 @@ import pickle as pkl import sys -from pandas.compat import string_types # noqa - import pandas # noqa from pandas import Index, compat @@ -41,7 +39,7 @@ def load_reduce(self): # try to re-encode the arguments if getattr(self, 'encoding', None) is not None: args = tuple(arg.encode(self.encoding) - if isinstance(arg, string_types) + if isinstance(arg, str) else arg for arg in args) try: stack[-1] = func(*args) diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 5658094ec36c6..db86a194ba709 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -71,8 +71,7 @@ def __init__(self, obj, func, broadcast, raw, reduce, result_type, self.result_type = result_type # curry if needed - if ((kwds or args) and - not isinstance(func, (np.ufunc, compat.string_types))): + if (kwds or args) and not isinstance(func, (np.ufunc, str)): def f(x): return func(x, *args, **kwds) @@ -119,7 +118,7 @@ def get_result(self): return self.apply_empty_result() # string dispatch - if isinstance(self.f, compat.string_types): + if isinstance(self.f, str): # Support for `frame.transform('method')` # Some methods (shift, etc.) require the axis argument, others # don't, so inspect and insert if necessary. diff --git a/pandas/core/arrays/array_.py b/pandas/core/arrays/array_.py index 254ab876af1ac..37e6c3f239162 100644 --- a/pandas/core/arrays/array_.py +++ b/pandas/core/arrays/array_.py @@ -8,8 +8,6 @@ is_datetime64_ns_dtype, is_extension_array_dtype, is_timedelta64_ns_dtype) from pandas.core.dtypes.dtypes import ExtensionDtype, registry -from pandas import compat - def array(data, # type: Sequence[object] dtype=None, # type: Optional[Union[str, np.dtype, ExtensionDtype]] @@ -227,7 +225,7 @@ def array(data, # type: Sequence[object] dtype = data.dtype # this returns None for not-found dtypes. - if isinstance(dtype, compat.string_types): + if isinstance(dtype, str): dtype = registry.find(dtype) or dtype if is_extension_array_dtype(dtype): diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 75b64a06fe8e8..6366410e65683 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1953,7 +1953,7 @@ def _tidy_repr(self, max_vals=10, footer=True): result = '{result}\n{footer}'.format( result=result, footer=self._repr_footer()) - return compat.text_type(result) + return str(result) def _repr_categories(self): """ @@ -2017,7 +2017,7 @@ def _get_repr(self, length=True, na_rep='NaN', footer=True): formatter = fmt.CategoricalFormatter(self, length=length, na_rep=na_rep, footer=footer) result = formatter.to_string() - return compat.text_type(result) + return str(result) def __unicode__(self): """ diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 27d7d4f888550..4a6f04103540c 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -12,7 +12,6 @@ from pandas._libs.tslibs.timedeltas import Timedelta, delta_to_nanoseconds from pandas._libs.tslibs.timestamps import ( RoundTo, maybe_integer_op_deprecated, round_nsint64) -import pandas.compat as compat from pandas.compat.numpy import function as nv from pandas.errors import ( AbstractMethodError, NullFrequencyError, PerformanceWarning) @@ -649,7 +648,7 @@ def searchsorted(self, value, side='left', sorter=None): indices : array of ints Array of insertion points with the same shape as `value`. """ - if isinstance(value, compat.string_types): + if isinstance(value, str): value = self._scalar_from_string(value) if not (isinstance(value, (self._scalar_type, type(self))) @@ -1154,7 +1153,7 @@ def _time_shift(self, periods, freq=None): Frequency increment to shift by. """ if freq is not None and freq != self.freq: - if isinstance(freq, compat.string_types): + if isinstance(freq, str): freq = frequencies.to_offset(freq) offset = periods * freq result = self + offset diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 33e6674389e7c..011d4fdaba529 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -137,7 +137,7 @@ def wrapper(self, other): other = lib.item_from_zerodim(other) - if isinstance(other, (datetime, np.datetime64, compat.string_types)): + if isinstance(other, (datetime, np.datetime64, str)): if isinstance(other, (datetime, np.datetime64)): # GH#18435 strings get a pass from tzawareness compat self._assert_tzawareness_compat(other) @@ -2034,7 +2034,7 @@ def validate_tz_from_dtype(dtype, tz): ValueError : on tzinfo mismatch """ if dtype is not None: - if isinstance(dtype, compat.string_types): + if isinstance(dtype, str): try: dtype = DatetimeTZDtype.construct_from_string(dtype) except TypeError: diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py index 92505bd193a49..4519931ff8247 100644 --- a/pandas/core/arrays/integer.py +++ b/pandas/core/arrays/integer.py @@ -5,7 +5,7 @@ import numpy as np from pandas._libs import lib -from pandas.compat import set_function_name, string_types +from pandas.compat import set_function_name from pandas.util._decorators import cache_readonly from pandas.core.dtypes.base import ExtensionDtype @@ -154,7 +154,7 @@ def coerce_to_array(values, dtype, mask=None, copy=False): dtype = values.dtype if dtype is not None: - if (isinstance(dtype, string_types) and + if (isinstance(dtype, str) and (dtype.startswith("Int") or dtype.startswith("UInt"))): # Avoid DeprecationWarning from NumPy about np.dtype("Int64") # https://github.com/numpy/numpy/pull/7476 diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py index 96c89981ff5e9..6c30fe691d0af 100644 --- a/pandas/core/arrays/sparse.py +++ b/pandas/core/arrays/sparse.py @@ -110,7 +110,7 @@ def __hash__(self): def __eq__(self, other): # We have to override __eq__ to handle NA values in _metadata. # The base class does simple == checks, which fail for NA. - if isinstance(other, compat.string_types): + if isinstance(other, str): try: other = self.construct_from_string(other) except TypeError: @@ -277,7 +277,7 @@ def _parse_subtype(dtype): @classmethod def is_dtype(cls, dtype): dtype = getattr(dtype, 'dtype', dtype) - if (isinstance(dtype, compat.string_types) and + if (isinstance(dtype, str) and dtype.startswith("Sparse")): sub_type, _ = cls._parse_subtype(dtype) dtype = np.dtype(sub_type) @@ -358,7 +358,7 @@ def _subtype_with_str(self): >>> dtype._subtype_with_str str """ - if isinstance(self.fill_value, compat.string_types): + if isinstance(self.fill_value, str): return type(self.fill_value) return self.subtype @@ -584,7 +584,7 @@ def __init__(self, data, sparse_index=None, index=None, fill_value=None, data = data.sp_values # Handle use-provided dtype - if isinstance(dtype, compat.string_types): + if isinstance(dtype, str): # Two options: dtype='int', regular numpy dtype # or dtype='Sparse[int]', a sparse dtype try: diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 0d677b250982a..d1bf59218edfe 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -36,8 +36,7 @@ def _is_convertible_to_td(key): - return isinstance(key, (Tick, timedelta, - np.timedelta64, compat.string_types)) + return isinstance(key, (Tick, timedelta, np.timedelta64, str)) def _field_accessor(name, alias, docstring=None): diff --git a/pandas/core/base.py b/pandas/core/base.py index 5adf01a62352c..27b7694f239b4 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -306,7 +306,7 @@ def _try_aggregate_string_function(self, arg, *args, **kwargs): - raise """ - assert isinstance(arg, compat.string_types) + assert isinstance(arg, str) f = getattr(self, arg, None) if f is not None: @@ -353,7 +353,7 @@ def _aggregate(self, arg, *args, **kwargs): _axis = getattr(self, 'axis', 0) _level = kwargs.pop('_level', None) - if isinstance(arg, compat.string_types): + if isinstance(arg, str): return self._try_aggregate_string_function(arg, *args, **kwargs), None @@ -554,7 +554,7 @@ def is_any_frame(): name=getattr(self, 'name', None)) return result, True - elif is_list_like(arg) and arg not in compat.string_types: + elif is_list_like(arg): # we require a list, but not an 'str' return self._aggregate_multiple_funcs(arg, _level=_level, diff --git a/pandas/core/common.py b/pandas/core/common.py index 77b7b94e7a1f7..6a55c39b51379 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -246,7 +246,7 @@ def asarray_tuplesafe(values, dtype=None): result = np.asarray(values, dtype=dtype) - if issubclass(result.dtype.type, compat.string_types): + if issubclass(result.dtype.type, str): result = np.asarray(values, dtype=object) if result.ndim == 2: @@ -271,7 +271,7 @@ def index_labels_to_array(labels, dtype=None): ------- array """ - if isinstance(labels, (compat.string_types, tuple)): + if isinstance(labels, (str, tuple)): labels = [labels] if not isinstance(labels, (list, np.ndarray)): diff --git a/pandas/core/computation/common.py b/pandas/core/computation/common.py index ef686a1205f44..6a0e7981ad82b 100644 --- a/pandas/core/computation/common.py +++ b/pandas/core/computation/common.py @@ -2,8 +2,6 @@ import numpy as np -from pandas.compat import string_types - import pandas as pd # A token value Python's tokenizer probably will never use. @@ -30,7 +28,7 @@ def _result_type_many(*arrays_and_dtypes): def _remove_spaces_column_name(name): """Check if name contains any spaces, if it contains any spaces the spaces will be removed and an underscore suffix is added.""" - if not isinstance(name, string_types) or " " not in name: + if not isinstance(name, str) or " " not in name: return name return name.replace(" ", "_") + "_BACKTICK_QUOTED_STRING" diff --git a/pandas/core/computation/engines.py b/pandas/core/computation/engines.py index cb03d16b06d50..505589db09731 100644 --- a/pandas/core/computation/engines.py +++ b/pandas/core/computation/engines.py @@ -4,7 +4,6 @@ import abc -from pandas import compat from pandas.core.computation.align import _align, _reconstruct_object from pandas.core.computation.ops import ( UndefinedVariableError, _mathops, _reductions) @@ -124,7 +123,7 @@ def _evaluate(self): try: msg = e.message except AttributeError: - msg = compat.text_type(e) + msg = str(e) raise UndefinedVariableError(msg) diff --git a/pandas/core/computation/eval.py b/pandas/core/computation/eval.py index 23c3e0eaace81..8f6c271af4a58 100644 --- a/pandas/core/computation/eval.py +++ b/pandas/core/computation/eval.py @@ -6,7 +6,6 @@ import tokenize import warnings -from pandas.compat import string_types from pandas.util._validators import validate_bool_kwarg from pandas.core.computation.engines import _engines @@ -263,7 +262,7 @@ def eval(expr, parser='pandas', engine=None, truediv=True, inplace = validate_bool_kwarg(inplace, "inplace") - if isinstance(expr, string_types): + if isinstance(expr, str): _check_expression(expr) exprs = [e.strip() for e in expr.splitlines() if e.strip() != ''] else: diff --git a/pandas/core/computation/expr.py b/pandas/core/computation/expr.py index 16795ea8c58e9..779cc0df1a6fe 100644 --- a/pandas/core/computation/expr.py +++ b/pandas/core/computation/expr.py @@ -9,7 +9,7 @@ import numpy as np -from pandas.compat import StringIO, lmap, string_types +from pandas.compat import StringIO, lmap import pandas as pd from pandas import compat @@ -188,7 +188,7 @@ def _is_type(t): _is_list = _is_type(list) -_is_str = _is_type(string_types) +_is_str = _is_type(str) # partition all AST nodes @@ -355,7 +355,7 @@ def __init__(self, env, engine, parser, preparser=_preparse): self.assigner = None def visit(self, node, **kwargs): - if isinstance(node, string_types): + if isinstance(node, str): clean = self.preparser(node) try: node = ast.fix_missing_locations(ast.parse(clean)) diff --git a/pandas/core/computation/ops.py b/pandas/core/computation/ops.py index 5c70255982e54..298284069305e 100644 --- a/pandas/core/computation/ops.py +++ b/pandas/core/computation/ops.py @@ -9,7 +9,7 @@ import numpy as np from pandas._libs.tslibs import Timestamp -from pandas.compat import PY3, string_types, text_type +from pandas.compat import PY3 from pandas.core.dtypes.common import is_list_like, is_scalar @@ -50,7 +50,7 @@ def __init__(self, name, is_local): class Term(StringMixin): def __new__(cls, name, env, side=None, encoding=None): - klass = Constant if not isinstance(name, string_types) else cls + klass = Constant if not isinstance(name, str) else cls supr_new = super(Term, klass).__new__ return supr_new(klass) @@ -58,7 +58,7 @@ def __init__(self, name, env, side=None, encoding=None): self._name = name self.env = env self.side = side - tname = text_type(name) + tname = str(name) self.is_local = (tname.startswith(_LOCAL_TAG) or tname in _DEFAULT_GLOBALS) self._value = self._resolve_name() @@ -99,7 +99,7 @@ def update(self, value): key = self.name # if it's a variable name (otherwise a constant) - if isinstance(key, string_types): + if isinstance(key, str): self.env.swapkey(self.local_name, key, new_value=value) self.value = value diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index 070d116e248b4..52e255446bd1f 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -6,7 +6,7 @@ import numpy as np from pandas._libs.tslibs import Timedelta, Timestamp -from pandas.compat import DeepChainMap, string_types +from pandas.compat import DeepChainMap from pandas.core.dtypes.common import is_list_like @@ -34,7 +34,7 @@ def __init__(self, level, global_dict=None, local_dict=None, class Term(ops.Term): def __new__(cls, name, env, side=None, encoding=None): - klass = Constant if not isinstance(name, string_types) else cls + klass = Constant if not isinstance(name, str) else cls supr_new = StringMixin.__new__ return supr_new(klass) @@ -209,14 +209,14 @@ def stringify(value): v = float(v) return TermValue(v, v, kind) elif kind == 'bool': - if isinstance(v, string_types): + if isinstance(v, str): v = not v.strip().lower() in ['false', 'f', 'no', 'n', 'none', '0', '[]', '{}', ''] else: v = bool(v) return TermValue(v, v, kind) - elif isinstance(v, string_types): + elif isinstance(v, str): # string quoting return TermValue(v, stringify(v), 'string') else: @@ -476,7 +476,7 @@ def _validate_where(w): TypeError : An invalid data type was passed in for w (e.g. dict). """ - if not (isinstance(w, (Expr, string_types)) or is_list_like(w)): + if not (isinstance(w, (Expr, str)) or is_list_like(w)): raise TypeError("where must be passed as a string, Expr, " "or list-like of Exprs") @@ -541,7 +541,7 @@ def __init__(self, where, queryables=None, encoding=None, scope_level=0): self.expr = where self.env = Scope(scope_level + 1, local_dict=local_dict) - if queryables is not None and isinstance(self.expr, string_types): + if queryables is not None and isinstance(self.expr, str): self.env.queryables.update(queryables) self._visitor = ExprVisitor(self.env, queryables=queryables, parser='pytables', engine='pytables', @@ -597,7 +597,7 @@ def tostring(self, encoding): def maybe_expression(s): """ loose checking if s is a pytables-acceptable expression """ - if not isinstance(s, string_types): + if not isinstance(s, str): return False ops = ExprVisitor.binary_ops + ExprVisitor.unary_ops + ('=',) diff --git a/pandas/core/computation/scope.py b/pandas/core/computation/scope.py index a2825fa9740de..dd5d66a4e0b42 100644 --- a/pandas/core/computation/scope.py +++ b/pandas/core/computation/scope.py @@ -160,7 +160,7 @@ def resolve(self, key, is_local): Parameters ---------- - key : text_type + key : str A variable name is_local : bool Flag indicating whether the variable is local or not (prefixed with diff --git a/pandas/core/dtypes/base.py b/pandas/core/dtypes/base.py index 8269f8c88ffd3..9c66d6235980b 100644 --- a/pandas/core/dtypes/base.py +++ b/pandas/core/dtypes/base.py @@ -7,8 +7,6 @@ from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries -from pandas import compat - class _DtypeOpsMixin(object): # Not all of pandas' extension dtypes are compatibile with @@ -46,7 +44,7 @@ def __eq__(self, other): ------- bool """ - if isinstance(other, compat.string_types): + if isinstance(other, str): try: other = self.construct_from_string(other) except TypeError: diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 6ac7fdd2434c7..1d09f7434408f 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -6,7 +6,7 @@ from pandas._libs import lib, tslib, tslibs from pandas._libs.tslibs import NaT, OutOfBoundsDatetime, Period, iNaT -from pandas.compat import PY3, string_types, text_type, to_str +from pandas.compat import PY3, to_str from .common import ( _INT64_DTYPE, _NS_DTYPE, _POSSIBLY_CAST_DTYPES, _TD_DTYPE, ensure_int8, @@ -73,7 +73,7 @@ def maybe_downcast_to_dtype(result, dtype): def trans(x): return x - if isinstance(dtype, string_types): + if isinstance(dtype, str): if dtype == 'infer': inferred_type = lib.infer_dtype(ensure_object(result.ravel()), skipna=False) @@ -96,7 +96,7 @@ def trans(x): # noqa else: dtype = 'object' - if isinstance(dtype, string_types): + if isinstance(dtype, str): dtype = np.dtype(dtype) try: @@ -328,7 +328,7 @@ def maybe_promote(dtype, fill_value=np.nan): pass elif is_datetime64tz_dtype(dtype): pass - elif issubclass(np.dtype(dtype).type, string_types): + elif issubclass(np.dtype(dtype).type, str): dtype = np.object_ return dtype, fill_value @@ -374,7 +374,7 @@ def infer_dtype_from_scalar(val, pandas_dtype=False): dtype = val.dtype val = val.item() - elif isinstance(val, string_types): + elif isinstance(val, str): # If we create an empty array using a string to infer # the dtype, NumPy will only allocate one character per entry @@ -635,12 +635,12 @@ def astype_nansafe(arr, dtype, copy=True, skipna=False): if not isinstance(dtype, np.dtype): dtype = pandas_dtype(dtype) - if issubclass(dtype.type, text_type): + if issubclass(dtype.type, str): # in Py3 that's str, in Py2 that's unicode return lib.astype_unicode(arr.ravel(), skipna=skipna).reshape(arr.shape) - elif issubclass(dtype.type, string_types): + elif issubclass(dtype.type, str): return lib.astype_str(arr.ravel(), skipna=skipna).reshape(arr.shape) @@ -971,7 +971,7 @@ def maybe_cast_to_datetime(value, dtype, errors='raise'): from pandas.core.tools.datetimes import to_datetime if dtype is not None: - if isinstance(dtype, string_types): + if isinstance(dtype, str): dtype = np.dtype(dtype) is_datetime64 = is_datetime64_dtype(dtype) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 4be7eb8ddb890..df9e036848df8 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -5,7 +5,7 @@ from pandas._libs import algos, lib from pandas._libs.tslibs import conversion -from pandas.compat import PY3, PY36, string_types +from pandas.compat import PY3, PY36 from pandas.core.dtypes.dtypes import ( CategoricalDtype, DatetimeTZDtype, ExtensionDtype, IntervalDtype, @@ -1954,7 +1954,7 @@ def infer_dtype_from_object(dtype): if is_extension_array_dtype(dtype): return dtype.type - elif isinstance(dtype, string_types): + elif isinstance(dtype, str): # TODO(jreback) # should deprecate these @@ -2037,7 +2037,7 @@ def pandas_dtype(dtype): npdtype = np.dtype(dtype) except Exception: # we don't want to force a repr of the non-string - if not isinstance(dtype, string_types): + if not isinstance(dtype, str): raise TypeError("data type not understood") raise TypeError("data type '{}' not understood".format( dtype)) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 34b4c3eac99c1..f0dd70886dc06 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -77,7 +77,7 @@ def find(self, dtype): ------- return the first matching dtype, otherwise return None """ - if not isinstance(dtype, compat.string_types): + if not isinstance(dtype, str): dtype_type = dtype if not isinstance(dtype, type): dtype_type = type(dtype) @@ -305,7 +305,7 @@ def _from_values_or_dtype(cls, values=None, categories=None, ordered=None, if dtype is not None: # The dtype argument takes precedence over values.dtype (if any) - if isinstance(dtype, compat.string_types): + if isinstance(dtype, str): if dtype == 'category': dtype = CategoricalDtype(categories, ordered) else: @@ -367,7 +367,7 @@ def __eq__(self, other): not required. There is no distinction between False/None. 6) Any other comparison returns False """ - if isinstance(other, compat.string_types): + if isinstance(other, str): return other == self.name elif other is self: return True @@ -532,7 +532,7 @@ def update_dtype(self, dtype): ------- new_dtype : CategoricalDtype """ - if isinstance(dtype, compat.string_types) and dtype == 'category': + if isinstance(dtype, str) and dtype == 'category': # dtype='category' should not change anything return self elif not self.is_dtype(dtype): @@ -623,7 +623,7 @@ def __init__(self, unit="ns", tz=None): unit, tz = unit.unit, unit.tz if unit != 'ns': - if isinstance(unit, compat.string_types) and tz is None: + if isinstance(unit, str) and tz is None: # maybe a string like datetime64[ns, tz], which we support for # now. result = type(self).construct_from_string(unit) @@ -688,7 +688,7 @@ def construct_from_string(cls, string): >>> DatetimeTZDtype.construct_from_string('datetime64[ns, UTC]') datetime64[ns, UTC] """ - if isinstance(string, compat.string_types): + if isinstance(string, str): msg = "Could not construct DatetimeTZDtype from '{}'" try: match = cls._match.match(string) @@ -716,7 +716,7 @@ def __hash__(self): return hash(str(self)) def __eq__(self, other): - if isinstance(other, compat.string_types): + if isinstance(other, str): return other == self.name return (isinstance(other, DatetimeTZDtype) and @@ -772,7 +772,7 @@ def __new__(cls, freq=None): @classmethod def _parse_dtype_strict(cls, freq): - if isinstance(freq, compat.string_types): + if isinstance(freq, str): if freq.startswith('period[') or freq.startswith('Period['): m = cls._match.search(freq) if m is not None: @@ -790,7 +790,7 @@ def construct_from_string(cls, string): Strict construction from a string, raise a TypeError if not possible """ - if (isinstance(string, compat.string_types) and + if (isinstance(string, str) and (string.startswith('period[') or string.startswith('Period[')) or isinstance(string, ABCDateOffset)): @@ -803,7 +803,7 @@ def construct_from_string(cls, string): raise TypeError("could not construct PeriodDtype") def __unicode__(self): - return compat.text_type(self.name) + return str(self.name) @property def name(self): @@ -818,7 +818,7 @@ def __hash__(self): return hash(str(self)) def __eq__(self, other): - if isinstance(other, compat.string_types): + if isinstance(other, str): return other == self.name or other == self.name.title() return isinstance(other, PeriodDtype) and self.freq == other.freq @@ -830,7 +830,7 @@ def is_dtype(cls, dtype): can match (via string or type) """ - if isinstance(dtype, compat.string_types): + if isinstance(dtype, str): # PeriodDtype can be instantiated from freq string like "U", # but doesn't regard freq str like "U" as dtype. if dtype.startswith('period[') or dtype.startswith('Period['): @@ -885,11 +885,11 @@ def __new__(cls, subtype=None): u = object.__new__(cls) u.subtype = None return u - elif (isinstance(subtype, compat.string_types) and + elif (isinstance(subtype, str) and subtype.lower() == 'interval'): subtype = None else: - if isinstance(subtype, compat.string_types): + if isinstance(subtype, str): m = cls._match.search(subtype) if m is not None: subtype = m.group('subtype') @@ -931,7 +931,7 @@ def construct_from_string(cls, string): attempt to construct this type from a string, raise a TypeError if its not possible """ - if not isinstance(string, compat.string_types): + if not isinstance(string, str): msg = "a string needs to be passed, got type {typ}" raise TypeError(msg.format(typ=type(string))) @@ -958,7 +958,7 @@ def __hash__(self): return hash(str(self)) def __eq__(self, other): - if isinstance(other, compat.string_types): + if isinstance(other, str): return other.lower() in (self.name.lower(), str(self).lower()) elif not isinstance(other, IntervalDtype): return False @@ -976,7 +976,7 @@ def is_dtype(cls, dtype): can match (via string or type) """ - if isinstance(dtype, compat.string_types): + if isinstance(dtype, str): if dtype.lower().startswith('interval'): try: if cls.construct_from_string(dtype) is not None: diff --git a/pandas/core/dtypes/inference.py b/pandas/core/dtypes/inference.py index 1a02623fa6072..afd2501ac2da1 100644 --- a/pandas/core/dtypes/inference.py +++ b/pandas/core/dtypes/inference.py @@ -6,8 +6,7 @@ import numpy as np from pandas._libs import lib -from pandas.compat import ( - PY2, Set, re_type, string_and_binary_types, string_types, text_type) +from pandas.compat import PY2, Set, re_type from pandas import compat @@ -88,7 +87,7 @@ def is_string_like(obj): Whether `obj` is a string or not. """ - return isinstance(obj, (text_type, string_types)) + return isinstance(obj, str) def _iterable_not_string(obj): @@ -114,8 +113,7 @@ def _iterable_not_string(obj): False """ - return (isinstance(obj, compat.Iterable) and - not isinstance(obj, string_types)) + return isinstance(obj, compat.Iterable) and not isinstance(obj, str) def is_iterator(obj): @@ -290,15 +288,15 @@ def is_list_like(obj, allow_sets=True): False """ - return (isinstance(obj, compat.Iterable) + return (isinstance(obj, compat.Iterable) and # we do not count strings/unicode/bytes as list-like - and not isinstance(obj, string_and_binary_types) + not isinstance(obj, (str, bytes)) and # exclude zero-dimensional numpy arrays, effectively scalars - and not (isinstance(obj, np.ndarray) and obj.ndim == 0) + not (isinstance(obj, np.ndarray) and obj.ndim == 0) and # exclude sets if allow_sets is False - and not (allow_sets is False and isinstance(obj, Set))) + not (allow_sets is False and isinstance(obj, Set))) def is_array_like(obj): @@ -494,6 +492,6 @@ def is_sequence(obj): try: iter(obj) # Can iterate over it. len(obj) # Has a length associated with it. - return not isinstance(obj, string_and_binary_types) + return not isinstance(obj, (str, bytes)) except (TypeError, AttributeError): return False diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7ead94ddcff4e..ef4780752341f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -34,8 +34,7 @@ from pandas import compat from pandas.compat import ( - PY36, Iterator, StringIO, lmap, lzip, raise_with_traceback, - string_and_binary_types) + PY36, Iterator, StringIO, lmap, lzip, raise_with_traceback) from pandas.compat.numpy import function as nv from pandas.core.dtypes.cast import ( maybe_upcast, @@ -425,8 +424,8 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, copy=copy) # For data is list-like, or Iterable (will consume into list) - elif (isinstance(data, compat.Iterable) - and not isinstance(data, string_and_binary_types)): + elif (isinstance(data, compat.Iterable) and + not isinstance(data, (str, bytes))): if not isinstance(data, compat.Sequence): data = list(data) if len(data) > 0: @@ -1509,7 +1508,7 @@ def from_records(cls, data, index=None, exclude=None, columns=None, result_index = None if index is not None: - if (isinstance(index, compat.string_types) or + if (isinstance(index, str) or not hasattr(index, "__iter__")): i = columns.get_loc(index) exclude.add(index) @@ -1661,11 +1660,11 @@ def to_records(self, index=True, convert_datetime64=None, elif index_names[0] is None: index_names = ['index'] - names = (lmap(compat.text_type, index_names) + - lmap(compat.text_type, self.columns)) + names = (lmap(str, index_names) + + lmap(str, self.columns)) else: arrays = [self[c].get_values() for c in self.columns] - names = lmap(compat.text_type, self.columns) + names = lmap(str, self.columns) index_names = [] index_len = len(index_names) @@ -1712,7 +1711,7 @@ def to_records(self, index=True, convert_datetime64=None, if dtype_mapping is None: formats.append(v.dtype) elif isinstance(dtype_mapping, (type, np.dtype, - compat.string_types)): + str)): formats.append(dtype_mapping) else: element = "row" if i < index_len else "column" @@ -3067,7 +3066,7 @@ def query(self, expr, inplace=False, **kwargs): 0 1 10 10 """ inplace = validate_bool_kwarg(inplace, 'inplace') - if not isinstance(expr, compat.string_types): + if not isinstance(expr, str): msg = "expr must be a string to be evaluated, {0} given" raise ValueError(msg.format(type(expr))) kwargs['level'] = kwargs.pop('level', 0) + 1 @@ -4724,7 +4723,7 @@ def f(vals): if subset is None: subset = self.columns elif (not np.iterable(subset) or - isinstance(subset, compat.string_types) or + isinstance(subset, str) or isinstance(subset, tuple) and subset in self.columns): subset = subset, @@ -7450,7 +7449,7 @@ def _count_level(self, level, axis=0, numeric_only=False): # upcasts to object, which induces a ~20x slowdown mask = mask.T - if isinstance(level, compat.string_types): + if isinstance(level, str): level = count_axis._get_level_number(level) level_index = count_axis.levels[level] diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9797069566b4b..3c5879ff841d8 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -16,8 +16,7 @@ from pandas._libs import Timestamp, iNaT, properties import pandas.compat as compat from pandas.compat import ( - cPickle as pkl, isidentifier, lrange, lzip, set_function_name, - string_types, to_str) + cPickle as pkl, isidentifier, lrange, lzip, set_function_name, to_str) from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError from pandas.util._decorators import ( @@ -365,7 +364,7 @@ def _get_axis_number(cls, axis): @classmethod def _get_axis_name(cls, axis): axis = cls._AXIS_ALIASES.get(axis, axis) - if isinstance(axis, string_types): + if isinstance(axis, str): if axis in cls._AXIS_NUMBERS: return axis else: @@ -3296,7 +3295,7 @@ def _check_setitem_copy(self, stacklevel=4, t='setting', force=False): pass # a custom message - if isinstance(self._is_copy, string_types): + if isinstance(self._is_copy, str): t = self._is_copy elif t == 'referant': @@ -4848,7 +4847,7 @@ def sample(self, n=None, frac=None, replace=False, weights=None, weights = weights.reindex(self.axes[axis]) # Strings acceptable if a dataframe and axis = 0 - if isinstance(weights, string_types): + if isinstance(weights, str): if isinstance(self, pd.DataFrame): if axis == 0: try: @@ -5151,7 +5150,7 @@ def _dir_additions(self): If info_axis is a MultiIndex, it's first level values are used. """ additions = {c for c in self._info_axis.unique(level=0)[:100] - if isinstance(c, string_types) and isidentifier(c)} + if isinstance(c, str) and isidentifier(c)} return super(NDFrame, self)._dir_additions().union(additions) # ---------------------------------------------------------------------- @@ -7001,7 +7000,7 @@ def asof(self, where, subset=None): 2018-02-27 09:03:30 30.0 NaN 2018-02-27 09:04:30 40.0 NaN """ - if isinstance(where, compat.string_types): + if isinstance(where, str): from pandas import to_datetime where = to_datetime(where) @@ -9081,7 +9080,7 @@ def tshift(self, periods=1, freq=None, axis=0): if periods == 0: return self - if isinstance(freq, string_types): + if isinstance(freq, str): freq = to_offset(freq) block_axis = self._get_block_manager_axis(axis) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 0604689c6bb2b..535ba0ef37b7f 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -532,7 +532,7 @@ def transform(self, func, *args, **kwargs): # optimized transforms func = self._is_cython_func(func) or func - if isinstance(func, compat.string_types): + if isinstance(func, str): if func in base.cython_transforms: # cythonized transform return getattr(self, func)(*args, **kwargs) @@ -576,7 +576,7 @@ def _transform_fast(self, result, obj, func_nm): index=obj.index) def _define_paths(self, func, *args, **kwargs): - if isinstance(func, compat.string_types): + if isinstance(func, str): fast_path = lambda group: getattr(group, func)(*args, **kwargs) slow_path = lambda group: group.apply( lambda x: getattr(x, func)(*args, **kwargs), axis=self.axis) @@ -763,7 +763,7 @@ def apply(self, func, *args, **kwargs): @Appender(_shared_docs['aggregate']) def aggregate(self, func_or_funcs, *args, **kwargs): _level = kwargs.pop('_level', None) - if isinstance(func_or_funcs, compat.string_types): + if isinstance(func_or_funcs, str): return getattr(self, func_or_funcs)(*args, **kwargs) if isinstance(func_or_funcs, compat.Iterable): @@ -823,7 +823,7 @@ def _aggregate_multiple_funcs(self, arg, _level): # list of functions / function names columns = [] for f in arg: - if isinstance(f, compat.string_types): + if isinstance(f, str): columns.append(f) else: # protect against callables without names @@ -924,7 +924,7 @@ def transform(self, func, *args, **kwargs): func = self._is_cython_func(func) or func # if string function - if isinstance(func, compat.string_types): + if isinstance(func, str): if func in base.cython_transforms: # cythonized transform return getattr(self, func)(*args, **kwargs) @@ -967,7 +967,7 @@ def _transform_fast(self, func, func_nm): fast version of transform, only applicable to builtin/cythonizable functions """ - if isinstance(func, compat.string_types): + if isinstance(func, str): func = getattr(self, func) ids, _, ngroup = self.grouper.group_info @@ -1006,7 +1006,7 @@ def filter(self, func, dropna=True, *args, **kwargs): # noqa ------- filtered : Series """ - if isinstance(func, compat.string_types): + if isinstance(func, str): wrapper = lambda x: getattr(x, func)(*args, **kwargs) else: wrapper = lambda x: func(x, *args, **kwargs) diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py index e470a32b85cd6..617dc6a3aba08 100644 --- a/pandas/core/groupby/grouper.py +++ b/pandas/core/groupby/grouper.py @@ -7,7 +7,6 @@ import numpy as np -import pandas.compat as compat from pandas.util._decorators import cache_readonly from pandas.core.dtypes.common import ( @@ -462,7 +461,7 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True, raise ValueError('multiple levels only valid with ' 'MultiIndex') - if isinstance(level, compat.string_types): + if isinstance(level, str): if obj.index.name != level: raise ValueError('level name {} is not the name of the ' 'index'.format(level)) @@ -614,7 +613,7 @@ def is_in_obj(gpr): def _is_label_like(val): - return (isinstance(val, (compat.string_types, tuple)) or + return (isinstance(val, (str, tuple)) or (val is not None and is_scalar(val))) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index a43e1b3007e2b..e977f652cd61c 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1077,12 +1077,10 @@ def _summary(self, name=None): """ if len(self) > 0: head = self[0] - if (hasattr(head, 'format') and - not isinstance(head, compat.string_types)): + if hasattr(head, 'format') and not isinstance(head, str): head = head.format() tail = self[-1] - if (hasattr(tail, 'format') and - not isinstance(tail, compat.string_types)): + if hasattr(tail, 'format') and not isinstance(tail, str): tail = tail.format() index_summary = ', %s to %s' % (pprint_thing(head), pprint_thing(tail)) @@ -4871,8 +4869,8 @@ def slice_locs(self, start=None, end=None, step=None, kind=None): # GH 16785: If start and end happen to be date strings with UTC offsets # attempt to parse and check that the offsets are the same - if (isinstance(start, (compat.string_types, datetime)) - and isinstance(end, (compat.string_types, datetime))): + if (isinstance(start, (str, datetime)) + and isinstance(end, (str, datetime))): try: ts_start = Timestamp(start) ts_end = Timestamp(end) diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index cb7f60a12d610..ff6d80dbadded 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -8,7 +8,6 @@ from pandas._libs import ( Timestamp, index as libindex, join as libjoin, lib, tslib as libts) from pandas._libs.tslibs import ccalendar, fields, parsing, timezones -import pandas.compat as compat from pandas.util._decorators import Appender, Substitution, cache_readonly from pandas.core.dtypes.common import ( @@ -1078,7 +1077,7 @@ def _maybe_cast_slice_bound(self, label, side, kind): if is_float(label) or isinstance(label, time) or is_integer(label): self._invalid_indexer('slice', label) - if isinstance(label, compat.string_types): + if isinstance(label, str): freq = getattr(self, 'freqstr', getattr(self, 'inferred_freq', None)) _, parsed, reso = parsing.parse_time_string(label, freq) @@ -1134,8 +1133,8 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None): # For historical reasons DatetimeIndex by default supports # value-based partial (aka string) slices on non-monotonic arrays, # let's try that. - if ((start is None or isinstance(start, compat.string_types)) and - (end is None or isinstance(end, compat.string_types))): + if ((start is None or isinstance(start, str)) and + (end is None or isinstance(end, str))): mask = True if start is not None: start_casted = self._maybe_cast_slice_bound( @@ -1274,7 +1273,7 @@ def insert(self, loc, item): except (AttributeError, TypeError): # fall back to object index - if isinstance(item, compat.string_types): + if isinstance(item, str): return self.astype(object).insert(loc, item) raise TypeError( "cannot insert DatetimeIndex with incompatible label") @@ -1331,7 +1330,7 @@ def indexer_at_time(self, time, asof=False): if asof: raise NotImplementedError("'asof' argument is not supported") - if isinstance(time, compat.string_types): + if isinstance(time, str): from dateutil.parser import parse time = parse(time).time() diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index e046ebaed7342..697f766511db3 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1261,7 +1261,7 @@ def _hashed_indexing_key(self, key): raise KeyError def f(k, stringify): - if stringify and not isinstance(k, compat.string_types): + if stringify and not isinstance(k, str): k = str(k) return k key = tuple(f(k, stringify) @@ -1341,7 +1341,7 @@ def _try_mi(k): # note that a string that 'looks' like a Timestamp will raise # a KeyError! (GH5725) if (isinstance(key, (datetime.datetime, np.datetime64)) or - (compat.PY3 and isinstance(key, compat.string_types))): + (compat.PY3 and isinstance(key, str))): try: return _try_mi(key) except KeyError: @@ -2066,7 +2066,7 @@ def sortlevel(self, level=0, ascending=True, sort_remaining=True): """ from pandas.core.sorting import indexer_from_factorized - if isinstance(level, (compat.string_types, int)): + if isinstance(level, (str, int)): level = [level] level = [self._get_level_number(lev) for lev in level] sortorder = None diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 379464f4fced6..5d6332f301c96 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -3,7 +3,6 @@ import numpy as np from pandas._libs import index as libindex -import pandas.compat as compat from pandas.util._decorators import Appender, cache_readonly from pandas.core.dtypes.common import ( @@ -44,7 +43,7 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, # is_scalar, generators handled in coerce_to_ndarray data = cls._coerce_to_ndarray(data) - if issubclass(data.dtype.type, compat.string_types): + if issubclass(data.dtype.type, str): cls._string_data_error(data) if copy or not is_dtype_equal(data.dtype, cls._default_dtype): diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index fb3d4f09cfe5e..133cf1c0755cf 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -15,7 +15,6 @@ is_bool_dtype, is_datetime64_any_dtype, is_float, is_float_dtype, is_integer, is_integer_dtype, pandas_dtype) -from pandas import compat from pandas.core import common as com from pandas.core.accessor import delegate_names from pandas.core.algorithms import unique1d @@ -538,7 +537,7 @@ def searchsorted(self, value, side='left', sorter=None): other_freq=value.freqstr) raise IncompatibleFrequency(msg) value = value.ordinal - elif isinstance(value, compat.string_types): + elif isinstance(value, str): try: value = Period(value, freq=self.freq).ordinal except DateParseError: @@ -717,7 +716,7 @@ def _maybe_cast_slice_bound(self, label, side, kind): if isinstance(label, datetime): return Period(label, freq=self.freq) - elif isinstance(label, compat.string_types): + elif isinstance(label, str): try: _, parsed, reso = parse_time_string(label, self.freq) bounds = self._parsed_string_to_bounds(reso, parsed) diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 830925535dab1..c3ed26b5b1cca 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -585,7 +585,7 @@ def _maybe_cast_slice_bound(self, label, side, kind): """ assert kind in ['ix', 'loc', 'getitem', None] - if isinstance(label, compat.string_types): + if isinstance(label, str): parsed = Timedelta(label) lbound = parsed.round(parsed.resolution) if side == 'left': @@ -608,7 +608,7 @@ def _get_string_slice(self, key): def _partial_td_slice(self, key): # given a key, try to figure out a location for a partial slice - if not isinstance(key, compat.string_types): + if not isinstance(key, str): return key raise NotImplementedError @@ -679,7 +679,7 @@ def insert(self, loc, item): except (AttributeError, TypeError): # fall back to object index - if isinstance(item, compat.string_types): + if isinstance(item, str): return self.astype(object).insert(loc, item) raise TypeError( "cannot insert TimedeltaIndex with incompatible label") diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index e547375648d60..ae5b1c3cdd097 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -6,7 +6,6 @@ from pandas._libs.indexing import _NDFrameIndexerBase from pandas._libs.lib import item_from_zerodim -import pandas.compat as compat from pandas.errors import AbstractMethodError from pandas.util._decorators import Appender @@ -1832,7 +1831,7 @@ def _get_partial_string_timestamp_match_key(self, key, labels): """Translate any partial string timestamp matches in key, returning the new key (GH 10331)""" if isinstance(labels, MultiIndex): - if (isinstance(key, compat.string_types) and + if (isinstance(key, str) and labels.levels[0].is_all_dates): # Convert key '2016-01-01' to # ('2016-01-01'[, slice(None, None, None)]+) @@ -1843,7 +1842,7 @@ def _get_partial_string_timestamp_match_key(self, key, labels): # (..., slice('2016-01-01', '2016-01-01', None), ...) new_key = [] for i, component in enumerate(key): - if (isinstance(component, compat.string_types) and + if (isinstance(component, str) and labels.levels[i].is_all_dates): new_key.append(slice(component, component, None)) else: @@ -2462,7 +2461,7 @@ def convert_to_index_sliceable(obj, key): if isinstance(key, slice): return idx._convert_slice_indexer(key, kind='getitem') - elif isinstance(key, compat.string_types): + elif isinstance(key, str): # we are an actual column if obj._data.items.contains(key): @@ -2737,8 +2736,7 @@ def _non_reducing_slice(slice_): """ # default to column slice, like DataFrame # ['A', 'B'] -> IndexSlices[:, ['A', 'B']] - kinds = tuple(list(compat.string_types) + [ABCSeries, np.ndarray, Index, - list]) + kinds = (ABCSeries, np.ndarray, Index, list, str) if isinstance(slice_, kinds): slice_ = IndexSlice[:, slice_] diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 4c9ff0c7074d8..80e6e3befa0ee 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -10,7 +10,6 @@ from pandas._libs import internals as libinternals, lib, tslib, tslibs from pandas._libs.tslibs import Timedelta, conversion, is_null_datetimelike -import pandas.compat as compat from pandas.util._validators import validate_bool_kwarg from pandas.core.dtypes.cast import ( @@ -603,7 +602,7 @@ def _astype(self, dtype, copy=False, errors='raise', values=None, values = self.values.astype(dtype) else: if issubclass(dtype.type, - (compat.text_type, compat.string_types)): + (str, str)): # use native type formatting for datetime/tz/timedelta if self.is_datelike: @@ -672,7 +671,7 @@ def _try_cast_result(self, result, dtype=None): elif self.is_float and result.dtype == self.dtype: # protect against a bool/object showing up here - if isinstance(dtype, compat.string_types) and dtype == 'infer': + if isinstance(dtype, str) and dtype == 'infer': return result if not isinstance(dtype, type): dtype = dtype.type @@ -2599,7 +2598,7 @@ class ObjectBlock(Block): _can_hold_na = True def __init__(self, values, placement=None, ndim=2): - if issubclass(values.dtype.type, compat.string_types): + if issubclass(values.dtype.type, str): values = np.array(values, dtype=object) super(ObjectBlock, self).__init__(values, ndim=ndim, @@ -2828,7 +2827,7 @@ def _replace_single(self, to_replace, value, inplace=False, filter=None, # deal with replacing values with objects (strings) that match but # whose replacement is not a string (numeric, nan, object) - if isna(value) or not isinstance(value, compat.string_types): + if isna(value) or not isinstance(value, str): def re_replacer(s): try: diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 231cca0c6701b..684c8ff8945a3 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -653,7 +653,7 @@ def sanitize_array(data, index, dtype=None, copy=False, # This is to prevent mixed-type Series getting all casted to # NumPy string type, e.g. NaN --> '-1#IND'. - if issubclass(subarr.dtype.type, compat.string_types): + if issubclass(subarr.dtype.type, str): # GH#16605 # If not empty convert the data to dtype # GH#19853: If data is a scalar, subarr has already the result diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 6a6ab78ae3554..2237b8d0359a4 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -7,7 +7,6 @@ import numpy as np from pandas._libs import algos, lib -from pandas.compat import string_types from pandas.core.dtypes.cast import infer_dtype_from_array from pandas.core.dtypes.common import ( @@ -73,7 +72,7 @@ def clean_fill_method(method, allow_nearest=False): if method in [None, 'asfreq']: return None - if isinstance(method, string_types): + if isinstance(method, str): method = method.lower() if method == 'ffill': method = 'pad' diff --git a/pandas/core/ops.py b/pandas/core/ops.py index a9f23d091a15f..92465a4acd9d7 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -165,7 +165,7 @@ def rmod(left, right): # check if right is a string as % is the string # formatting operation; this is a TypeError # otherwise perform the op - if isinstance(right, compat.string_types): + if isinstance(right, str): raise TypeError("{typ} cannot perform the operation mod".format( typ=type(left).__name__)) diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 41fea08374230..542b1075313bf 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -451,7 +451,7 @@ def to_excel(self, path, na_rep='', engine=None, **kwargs): """ from pandas.io.excel import ExcelWriter - if isinstance(path, compat.string_types): + if isinstance(path, str): writer = ExcelWriter(path, engine=engine) else: writer = path @@ -1466,7 +1466,7 @@ def _prep_ndarray(self, values, copy=True): if not isinstance(values, np.ndarray): values = np.asarray(values) # NumPy strings are a pain, convert to object - if issubclass(values.dtype.type, compat.string_types): + if issubclass(values.dtype.type, str): values = np.array(values, dtype=object, copy=True) else: if copy: diff --git a/pandas/core/resample.py b/pandas/core/resample.py index b3b28d7772713..f3aaf9eba2711 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -9,7 +9,6 @@ from pandas._libs.tslibs import NaT, Timestamp from pandas._libs.tslibs.frequencies import is_subperiod, is_superperiod from pandas._libs.tslibs.period import IncompatibleFrequency -import pandas.compat as compat from pandas.compat.numpy import function as nv from pandas.errors import AbstractMethodError from pandas.util._decorators import Appender, Substitution @@ -884,7 +883,7 @@ def _maybe_process_deprecations(r, how=None, fill_method=None, limit=None): if how is not None: # .resample(..., how='sum') - if isinstance(how, compat.string_types): + if isinstance(how, str): method = "{0}()".format(how) # .resample(..., how=lambda x: ....) @@ -954,7 +953,7 @@ def _apply(self, f, grouper=None, *args, **kwargs): def func(x): x = self._shallow_copy(x, groupby=self.groupby) - if isinstance(f, compat.string_types): + if isinstance(f, str): return getattr(x, f)(**kwargs) return x.apply(f, *args, **kwargs) @@ -1318,7 +1317,7 @@ def __init__(self, freq='Min', closed=None, label=None, how='mean', self.convention = convention or 'E' self.convention = self.convention.lower() - if isinstance(loffset, compat.string_types): + if isinstance(loffset, str): loffset = to_offset(loffset) self.loffset = loffset diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py index 4ad05f2b52ec5..1c2c97d6680a3 100644 --- a/pandas/core/reshape/concat.py +++ b/pandas/core/reshape/concat.py @@ -238,7 +238,7 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None, keys=None, levels=None, names=None, ignore_index=False, verify_integrity=False, copy=True, sort=False): - if isinstance(objs, (NDFrame, compat.string_types)): + if isinstance(objs, (NDFrame, str)): raise TypeError('first argument must be an iterable of pandas ' 'objects, you passed an object of type ' '"{name}"'.format(name=type(objs).__name__)) diff --git a/pandas/core/reshape/melt.py b/pandas/core/reshape/melt.py index 0fa80de812c5f..99224f6fb7c5b 100644 --- a/pandas/core/reshape/melt.py +++ b/pandas/core/reshape/melt.py @@ -82,7 +82,7 @@ def melt(frame, id_vars=None, value_vars=None, var_name=None, else: var_name = [frame.columns.name if frame.columns.name is not None else 'variable'] - if isinstance(var_name, compat.string_types): + if isinstance(var_name, str): var_name = [var_name] N, K = frame.shape diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index cd5c853c6efe4..645b394c0e04f 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -9,7 +9,6 @@ import numpy as np from pandas._libs import hashtable as libhashtable, join as libjoin, lib -import pandas.compat as compat from pandas.compat import lzip from pandas.errors import MergeError from pandas.util._decorators import Appender, Substitution @@ -502,7 +501,7 @@ def __init__(self, left, right, how='inner', on=None, self.indicator = indicator - if isinstance(self.indicator, compat.string_types): + if isinstance(self.indicator, str): self.indicator_name = self.indicator elif isinstance(self.indicator, bool): self.indicator_name = '_merge' if self.indicator else None @@ -1776,8 +1775,7 @@ def _get_join_keys(llab, rlab, shape, sort): def _should_fill(lname, rname): - if (not isinstance(lname, compat.string_types) or - not isinstance(rname, compat.string_types)): + if not isinstance(lname, str) or not isinstance(rname, str): return True return lname == rname diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 4bfb27c6f148c..89d46eea72185 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -8,7 +8,6 @@ from pandas.core.dtypes.common import is_integer_dtype, is_list_like, is_scalar from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries -from pandas import compat import pandas.core.common as com from pandas.core.frame import _shared_docs from pandas.core.groupby import Grouper @@ -151,7 +150,7 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', def _add_margins(table, data, values, rows, cols, aggfunc, observed=None, margins_name='All', fill_value=None): - if not isinstance(margins_name, compat.string_types): + if not isinstance(margins_name, str): raise ValueError('margins_name argument must be a string') msg = 'Conflicting name "{name}" in margins'.format(name=margins_name) @@ -195,7 +194,7 @@ def _add_margins(table, data, values, rows, cols, aggfunc, row_margin = row_margin.reindex(result.columns, fill_value=fill_value) # populate grand margin for k in margin_keys: - if isinstance(k, compat.string_types): + if isinstance(k, str): row_margin[k] = grand_margin[k] else: row_margin[k] = grand_margin[k[0]] @@ -226,10 +225,10 @@ def _compute_grand_margin(data, values, aggfunc, grand_margin = {} for k, v in data[values].iteritems(): try: - if isinstance(aggfunc, compat.string_types): + if isinstance(aggfunc, str): grand_margin[k] = getattr(v, aggfunc)() elif isinstance(aggfunc, dict): - if isinstance(aggfunc[k], compat.string_types): + if isinstance(aggfunc[k], str): grand_margin[k] = getattr(v, aggfunc[k])() else: grand_margin[k] = aggfunc[k](v) @@ -530,8 +529,7 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None, def _normalize(table, normalize, margins, margins_name='All'): - if not isinstance(normalize, bool) and not isinstance(normalize, - compat.string_types): + if not isinstance(normalize, bool) and not isinstance(normalize, str): axis_subs = {0: 'index', 1: 'columns'} try: normalize = axis_subs[normalize] diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index aa84d3886ec54..4898259dcff9b 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -7,7 +7,7 @@ from pandas._libs import algos as _algos, reshape as _reshape from pandas._libs.sparse import IntIndex -from pandas.compat import PY2, text_type +from pandas.compat import PY2 from pandas.core.dtypes.cast import maybe_promote from pandas.core.dtypes.common import ( @@ -15,7 +15,6 @@ is_integer_dtype, is_list_like, is_object_dtype, needs_i8_conversion) from pandas.core.dtypes.missing import notna -from pandas import compat import pandas.core.algorithms as algos from pandas.core.arrays import SparseArray from pandas.core.arrays.categorical import _factorize_from_iterable @@ -827,7 +826,7 @@ def check_len(item, name): check_len(prefix, 'prefix') check_len(prefix_sep, 'prefix_sep') - if isinstance(prefix, compat.string_types): + if isinstance(prefix, str): prefix = cycle([prefix]) if isinstance(prefix, dict): prefix = [prefix[col] for col in data_to_encode.columns] @@ -836,7 +835,7 @@ def check_len(item, name): prefix = data_to_encode.columns # validate separators - if isinstance(prefix_sep, compat.string_types): + if isinstance(prefix_sep, str): prefix_sep = cycle([prefix_sep]) elif isinstance(prefix_sep, dict): prefix_sep = [prefix_sep[col] for col in data_to_encode.columns] @@ -911,9 +910,9 @@ def get_empty_frame(data): # PY2 embedded unicode, gh-22084 def _make_col_name(prefix, prefix_sep, level): fstr = '{prefix}{prefix_sep}{level}' - if PY2 and (isinstance(prefix, text_type) or - isinstance(prefix_sep, text_type) or - isinstance(level, text_type)): + if PY2 and (isinstance(prefix, str) or + isinstance(prefix_sep, str) or + isinstance(level, str)): fstr = fstr return fstr.format(prefix=prefix, prefix_sep=prefix_sep, diff --git a/pandas/core/series.py b/pandas/core/series.py index 8bcab7129feb2..2068d9b487391 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1433,7 +1433,7 @@ def to_string(self, buf=None, na_rep='NaN', float_format=None, header=True, result = formatter.to_string() # catch contract violations - if not isinstance(result, compat.text_type): + if not isinstance(result, str): raise AssertionError("result must be of type unicode, type" " of result is {0!r}" "".format(result.__class__.__name__)) @@ -1623,7 +1623,7 @@ def count(self, level=None): if level is None: return notna(com.values_from_object(self)).sum() - if isinstance(level, compat.string_types): + if isinstance(level, str): level = self.index._get_level_number(level) lev = self.index.levels[level] @@ -3664,7 +3664,7 @@ def apply(self, func, convert_dtype=True, args=(), **kwds): return self.aggregate(func, *args, **kwds) # if we are a string, try to dispatch - if isinstance(func, compat.string_types): + if isinstance(func, str): return self._try_aggregate_string_function(func, *args, **kwds) # handle ufuncs and lambdas diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index e1cf5b76ba05b..c2a66ce608f3f 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -5,7 +5,7 @@ from pandas._libs import algos, hashtable, lib from pandas._libs.hashtable import unique_label_indices -from pandas.compat import PY3, string_types +from pandas.compat import PY3 from pandas.core.dtypes.cast import infer_dtype_from_array from pandas.core.dtypes.common import ( @@ -454,7 +454,7 @@ def safe_sort(values, labels=None, na_sentinel=-1, assume_unique=False): def sort_mixed(values): # order ints before strings, safe in py3 - str_pos = np.array([isinstance(x, string_types) for x in values], + str_pos = np.array([isinstance(x, str) for x in values], dtype=bool) nums = np.sort(values[~str_pos]) strs = np.sort(values[str_pos]) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 579ca75d3685f..4dd400f9f3ee1 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -625,18 +625,18 @@ def str_repeat(arr, repeats): if is_scalar(repeats): def rep(x): try: - return compat.binary_type.__mul__(x, repeats) + return bytes.__mul__(x, repeats) except TypeError: - return compat.text_type.__mul__(x, repeats) + return str.__mul__(x, repeats) return _na_map(rep, arr) else: def rep(x, r): try: - return compat.binary_type.__mul__(x, r) + return bytes.__mul__(x, r) except TypeError: - return compat.text_type.__mul__(x, r) + return str.__mul__(x, r) repeats = np.asarray(repeats, dtype=object) result = libops.vec_binop(com.values_from_object(arr), repeats, rep) @@ -693,7 +693,7 @@ def _groups_or_na_fun(regex): empty_row = [np.nan] * regex.groups def f(x): - if not isinstance(x, compat.string_types): + if not isinstance(x, str): return empty_row m = regex.search(x) if m: @@ -945,13 +945,13 @@ def str_extractall(arr, pat, flags=0): is_mi = arr.index.nlevels > 1 for subject_key, subject in arr.iteritems(): - if isinstance(subject, compat.string_types): + if isinstance(subject, str): if not is_mi: subject_key = (subject_key, ) for match_i, match_tuple in enumerate(regex.findall(subject)): - if isinstance(match_tuple, compat.string_types): + if isinstance(match_tuple, str): match_tuple = (match_tuple,) na_tuple = [np.NaN if group == "" else group for group in match_tuple] @@ -1202,7 +1202,7 @@ def str_find(arr, sub, start=0, end=None, side='left'): Indexes where substring is found. """ - if not isinstance(sub, compat.string_types): + if not isinstance(sub, str): msg = 'expected a string object, not {0}' raise TypeError(msg.format(type(sub).__name__)) @@ -1222,7 +1222,7 @@ def str_find(arr, sub, start=0, end=None, side='left'): def str_index(arr, sub, start=0, end=None, side='left'): - if not isinstance(sub, compat.string_types): + if not isinstance(sub, str): msg = 'expected a string object, not {0}' raise TypeError(msg.format(type(sub).__name__)) @@ -1294,7 +1294,7 @@ def str_pad(arr, width, side='left', fillchar=' '): 1 --tiger--- dtype: object """ - if not isinstance(fillchar, compat.string_types): + if not isinstance(fillchar, str): msg = 'fillchar must be a character, not {0}' raise TypeError(msg.format(type(fillchar).__name__)) @@ -2190,7 +2190,7 @@ def cat(self, others=None, sep=None, na_rep=None, join=None): """ from pandas import Index, Series, concat - if isinstance(others, compat.string_types): + if isinstance(others, str): raise ValueError("Did you mean to supply a `sep` keyword?") if sep is None: sep = '' diff --git a/pandas/core/window.py b/pandas/core/window.py index e6572674239e1..3325560a39d81 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -10,7 +10,6 @@ import numpy as np import pandas._libs.window as libwindow -import pandas.compat as compat from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, Substitution, cache_readonly @@ -600,7 +599,7 @@ def validate(self): raise ImportError('Please install scipy to generate window ' 'weight') - if not isinstance(self.win_type, compat.string_types): + if not isinstance(self.win_type, str): raise ValueError('Invalid win_type {0}'.format(self.win_type)) if getattr(sig, self.win_type, None) is None: raise ValueError('Invalid win_type {0}'.format(self.win_type)) @@ -790,7 +789,7 @@ def _apply(self, func, name, window=None, center=None, def f(x, name=name, *args): x = self._shallow_copy(x) - if isinstance(name, compat.string_types): + if isinstance(name, str): return getattr(x, name)(*args, **kwargs) return x.apply(name, *args, **kwargs) @@ -843,7 +842,7 @@ def _apply(self, func, name=None, window=None, center=None, continue # if we have a string function name, wrap it - if isinstance(func, compat.string_types): + if isinstance(func, str): cfunc = getattr(libwindow, func, None) if cfunc is None: raise ValueError("we do not support this function " @@ -1570,7 +1569,7 @@ def validate(self): # we allow rolling on a datetimelike index if ((self.obj.empty or self.is_datetimelike) and - isinstance(self.window, (compat.string_types, ABCDateOffset, + isinstance(self.window, (str, ABCDateOffset, timedelta))): self._validate_monotonic() @@ -2301,7 +2300,7 @@ def _apply(self, func, **kwargs): continue # if we have a string function name, wrap it - if isinstance(func, compat.string_types): + if isinstance(func, str): cfunc = getattr(libwindow, func, None) if cfunc is None: raise ValueError("we do not support this function " diff --git a/pandas/io/clipboard/clipboards.py b/pandas/io/clipboard/clipboards.py index d6d0ba0a560bb..870188cbf5634 100644 --- a/pandas/io/clipboard/clipboards.py +++ b/pandas/io/clipboard/clipboards.py @@ -1,6 +1,6 @@ import subprocess -from pandas.compat import PY2, text_type +from pandas.compat import PY2 from .exceptions import PyperclipException @@ -66,7 +66,7 @@ def copy_qt(text): def paste_qt(): cb = app.clipboard() - return text_type(cb.text()) + return str(cb.text()) return copy_qt, paste_qt diff --git a/pandas/io/common.py b/pandas/io/common.py index a042a0ae3d7d7..2fe198c8b3cf5 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -11,7 +11,7 @@ import zipfile import pandas.compat as compat -from pandas.compat import BytesIO, StringIO, string_types, text_type +from pandas.compat import BytesIO, StringIO from pandas.errors import ( # noqa AbstractMethodError, DtypeWarning, EmptyDataError, ParserError, ParserWarning) @@ -107,7 +107,7 @@ def _expand_user(filepath_or_buffer): expanded_filepath_or_buffer : an expanded filepath or the input if not expandable """ - if isinstance(filepath_or_buffer, string_types): + if isinstance(filepath_or_buffer, str): return os.path.expanduser(filepath_or_buffer) return filepath_or_buffer @@ -157,7 +157,7 @@ def _stringify_path(filepath_or_buffer): if hasattr(filepath_or_buffer, '__fspath__'): return filepath_or_buffer.__fspath__() if _PATHLIB_INSTALLED and isinstance(filepath_or_buffer, pathlib.Path): - return text_type(filepath_or_buffer) + return str(filepath_or_buffer) if _PY_PATH_INSTALLED and isinstance(filepath_or_buffer, LocalPath): return filepath_or_buffer.strpath return _expand_user(filepath_or_buffer) @@ -225,9 +225,7 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None, compression=compression, mode=mode) - if isinstance(filepath_or_buffer, (compat.string_types, - compat.binary_type, - mmap.mmap)): + if isinstance(filepath_or_buffer, (str, bytes, mmap.mmap)): return _expand_user(filepath_or_buffer), None, compression, False if not is_file_like(filepath_or_buffer): @@ -294,7 +292,7 @@ def _infer_compression(filepath_or_buffer, compression): if compression == 'infer': # Convert all path types (e.g. pathlib.Path) to strings filepath_or_buffer = _stringify_path(filepath_or_buffer) - if not isinstance(filepath_or_buffer, compat.string_types): + if not isinstance(filepath_or_buffer, str): # Cannot infer compression of a buffer, assume no compression return None @@ -354,7 +352,7 @@ def _get_handle(path_or_buf, mode, encoding=None, compression=None, # Convert pathlib.Path/py.path.local or string path_or_buf = _stringify_path(path_or_buf) - is_path = isinstance(path_or_buf, compat.string_types) + is_path = isinstance(path_or_buf, str) if is_path: compression = _infer_compression(path_or_buf, compression) @@ -560,7 +558,7 @@ def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): def __next__(self): row = next(self.reader) - return [compat.text_type(s, "utf-8") for s in row] + return [str(s, "utf-8") for s in row] class UnicodeWriter(object): diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index a7955bdb5240b..91741bd01c787 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -8,7 +8,7 @@ from pandas._config import config import pandas.compat as compat -from pandas.compat import add_metaclass, string_types +from pandas.compat import add_metaclass from pandas.errors import EmptyDataError from pandas.util._decorators import Appender, deprecate_kwarg @@ -394,7 +394,7 @@ def parse(self, if verbose: print("Reading sheet {sheet}".format(sheet=asheetname)) - if isinstance(asheetname, compat.string_types): + if isinstance(asheetname, str): sheet = self.get_sheet_by_name(asheetname) else: # assume an integer if not a string sheet = self.get_sheet_by_index(asheetname) @@ -579,9 +579,9 @@ def __new__(cls, path, engine=None, **kwargs): # only switch class if generic(ExcelWriter) if issubclass(cls, ExcelWriter): - if engine is None or (isinstance(engine, string_types) and + if engine is None or (isinstance(engine, str) and engine == 'auto'): - if isinstance(path, string_types): + if isinstance(path, str): ext = os.path.splitext(path)[-1][1:] else: ext = 'xlsx' @@ -643,7 +643,7 @@ def __init__(self, path, engine=None, date_format=None, datetime_format=None, mode='w', **engine_kwargs): # validate that this engine can handle the extension - if isinstance(path, string_types): + if isinstance(path, str): ext = os.path.splitext(path)[-1] else: ext = 'xls' if engine == 'xlwt' else 'xlsx' diff --git a/pandas/io/excel/_util.py b/pandas/io/excel/_util.py index ebde1f954e0e8..5f22e77695f5c 100644 --- a/pandas/io/excel/_util.py +++ b/pandas/io/excel/_util.py @@ -144,7 +144,7 @@ def _maybe_convert_usecols(usecols): FutureWarning, stacklevel=2) return lrange(usecols + 1) - if isinstance(usecols, compat.string_types): + if isinstance(usecols, str): return _range2cols(usecols) return usecols @@ -194,7 +194,7 @@ def _maybe_convert_to_string(row): converted = [] for i in range(len(row)): - if isinstance(row[i], compat.string_types): + if isinstance(row[i], str): try: converted.append(str(row[i])) except UnicodeEncodeError: diff --git a/pandas/io/excel/_xlrd.py b/pandas/io/excel/_xlrd.py index 03495fa7faf0c..019409b20a6c9 100644 --- a/pandas/io/excel/_xlrd.py +++ b/pandas/io/excel/_xlrd.py @@ -4,8 +4,6 @@ import numpy as np -import pandas.compat as compat - from pandas.io.common import _is_url, _urlopen, get_filepath_or_buffer from pandas.io.excel._base import _BaseExcelReader @@ -55,7 +53,7 @@ def __init__(self, filepath_or_buffer): data = filepath_or_buffer.read() self.book = xlrd.open_workbook(file_contents=data) - elif isinstance(filepath_or_buffer, compat.string_types): + elif isinstance(filepath_or_buffer, str): self.book = xlrd.open_workbook(filepath_or_buffer) else: raise ValueError('Must explicitly set engine if not passing in' diff --git a/pandas/io/excel/_xlsxwriter.py b/pandas/io/excel/_xlsxwriter.py index 531a3657cac6f..a7c9884e993a7 100644 --- a/pandas/io/excel/_xlsxwriter.py +++ b/pandas/io/excel/_xlsxwriter.py @@ -1,5 +1,4 @@ import pandas._libs.json as json -from pandas.compat import string_types from pandas.io.excel._base import ExcelWriter from pandas.io.excel._util import _validate_freeze_panes @@ -121,12 +120,12 @@ def convert(cls, style_dict, num_format_str=None): else: props[dst] = v - if isinstance(props.get('pattern'), string_types): + if isinstance(props.get('pattern'), str): # TODO: support other fill patterns props['pattern'] = 0 if props['pattern'] == 'none' else 1 for k in ['border', 'top', 'right', 'bottom', 'left']: - if isinstance(props.get(k), string_types): + if isinstance(props.get(k), str): try: props[k] = ['none', 'thin', 'medium', 'dashed', 'dotted', 'thick', 'double', 'hair', 'mediumDashed', @@ -136,11 +135,11 @@ def convert(cls, style_dict, num_format_str=None): except ValueError: props[k] = 2 - if isinstance(props.get('font_script'), string_types): + if isinstance(props.get('font_script'), str): props['font_script'] = ['baseline', 'superscript', 'subscript'].index(props['font_script']) - if isinstance(props.get('underline'), string_types): + if isinstance(props.get('underline'), str): props['underline'] = {'none': 0, 'single': 1, 'double': 2, 'singleAccounting': 33, 'doubleAccounting': 34}[props['underline']] diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index b7694c4cb8592..2769f12a8a472 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -5,6 +5,7 @@ """ from functools import partial +from unicodedata import east_asian_width import numpy as np @@ -130,7 +131,7 @@ def _get_footer(self): footer += '\n' footer += level_info - return compat.text_type(footer) + return str(footer) def _get_formatted_values(self): return format_array(self.categorical.get_values(), None, @@ -156,7 +157,7 @@ def to_string(self): if footer: result.append(footer) - return compat.text_type('\n'.join(result)) + return str('\n'.join(result)) class SeriesFormatter(object): @@ -235,7 +236,7 @@ def _get_footer(self): footer += "\n" footer += level_info - return compat.text_type(footer) + return str(footer) def _get_formatted_index(self): index = self.tr_series.index @@ -289,7 +290,7 @@ def to_string(self): if footer: result += '\n' + footer - return compat.text_type(''.join(result)) + return str(''.join(result)) class TextAdjustment(object): @@ -298,7 +299,7 @@ def __init__(self): self.encoding = get_option("display.encoding") def len(self, text): - return compat.strlen(text, encoding=self.encoding) + return len(text) def justify(self, texts, max_len, mode='right'): return justify(texts, max_len, mode=mode) @@ -317,9 +318,20 @@ def __init__(self): else: self.ambiguous_width = 1 + # Definition of East Asian Width + # http://unicode.org/reports/tr11/ + # Ambiguous width can be changed by option + self._EAW_MAP = {'Na': 1, 'N': 1, 'W': 2, 'F': 2, 'H': 1} + def len(self, text): - return compat.east_asian_len(text, encoding=self.encoding, - ambiguous_width=self.ambiguous_width) + """ + Calculate display width considering unicode East Asian Width + """ + if not isinstance(text, str): + return len(text) + + return sum(self._EAW_MAP.get(east_asian_width(c), self.ambiguous_width) + for c in text) def justify(self, texts, max_len, mode='right'): # re-calculate padding space per str considering East Asian Width @@ -693,7 +705,7 @@ def to_latex(self, column_format=None, longtable=False, encoding=None, if hasattr(self.buf, 'write'): latex_renderer.write_result(self.buf) - elif isinstance(self.buf, compat.string_types): + elif isinstance(self.buf, str): import codecs with codecs.open(self.buf, 'w', encoding=encoding) as f: latex_renderer.write_result(f) @@ -731,7 +743,7 @@ def to_html(self, classes=None, notebook=False, border=None): html = Klass(self, classes=classes, border=border).render() if hasattr(self.buf, 'write'): buffer_put_lines(self.buf, html) - elif isinstance(self.buf, compat.string_types): + elif isinstance(self.buf, str): with open(self.buf, 'w') as f: buffer_put_lines(f, html) else: @@ -1618,6 +1630,6 @@ def buffer_put_lines(buf, lines): lines The lines to append. """ - if any(isinstance(x, compat.text_type) for x in lines): - lines = [compat.text_type(x) for x in lines] + if any(isinstance(x, str) for x in lines): + lines = [str(x) for x in lines] buf.write('\n'.join(lines)) diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py index c3976c8e2eb17..01b4f6f4dfd07 100644 --- a/pandas/io/formats/latex.py +++ b/pandas/io/formats/latex.py @@ -6,8 +6,6 @@ from pandas.core.dtypes.generic import ABCMultiIndex -from pandas import compat - from pandas.io.formats.format import TableFormatter @@ -98,7 +96,7 @@ def pad_empties(x): index_format = 'l' * self.frame.index.nlevels column_format = index_format + column_format elif not isinstance(column_format, - compat.string_types): # pragma: no cover + str): # pragma: no cover raise AssertionError('column_format must be str or unicode, ' 'not {typ}'.format(typ=type(column_format))) diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py index 90013148a9e0f..4d68db6f619ae 100644 --- a/pandas/io/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -62,7 +62,7 @@ def _join_unicode(lines, sep=''): try: return sep.join(lines) except UnicodeDecodeError: - sep = compat.text_type(sep) + sep = str(sep) return sep.join([x.decode('utf-8') if isinstance(x, str) else x for x in lines]) @@ -187,7 +187,7 @@ def as_escaped_unicode(thing, escape_chars=escape_chars): # should deal with it himself. try: - result = compat.text_type(thing) # we should try this first + result = str(thing) # we should try this first except UnicodeDecodeError: # either utf-8 or we replace errors result = str(thing).decode('utf-8', "replace") @@ -204,10 +204,10 @@ def as_escaped_unicode(thing, escape_chars=escape_chars): for c in escape_chars: result = result.replace(c, translate[c]) - return compat.text_type(result) + return str(result) if (compat.PY3 and hasattr(thing, '__next__')) or hasattr(thing, 'next'): - return compat.text_type(thing) + return str(thing) elif (isinstance(thing, dict) and _nest_lvl < get_option("display.pprint_nest_depth")): result = _pprint_dict(thing, _nest_lvl, quote_strings=True, @@ -217,7 +217,7 @@ def as_escaped_unicode(thing, escape_chars=escape_chars): result = _pprint_seq(thing, _nest_lvl, escape_chars=escape_chars, quote_strings=quote_strings, max_seq_items=max_seq_items) - elif isinstance(thing, compat.string_types) and quote_strings: + elif isinstance(thing, str) and quote_strings: if compat.PY3: fmt = "'{thing}'" else: @@ -226,7 +226,7 @@ def as_escaped_unicode(thing, escape_chars=escape_chars): else: result = as_escaped_unicode(thing) - return compat.text_type(result) # always unicode + return str(result) # always unicode def pprint_thing_encoded(object, encoding='utf-8', errors='replace', **kwds): diff --git a/pandas/io/html.py b/pandas/io/html.py index 7df7cc3326c77..80cc1721d758f 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -9,8 +9,7 @@ import re import pandas.compat as compat -from pandas.compat import ( - binary_type, iteritems, lmap, lrange, raise_with_traceback, string_types) +from pandas.compat import iteritems, lmap, lrange, raise_with_traceback from pandas.errors import AbstractMethodError, EmptyDataError from pandas.core.dtypes.common import is_list_like @@ -64,9 +63,6 @@ def _importers(): _RE_WHITESPACE = re.compile(r'[\r\n]+|\s{2,}') -char_types = string_types + (binary_type,) - - def _remove_whitespace(s, regex=_RE_WHITESPACE): """Replace extra whitespace inside of a string with a single space. @@ -130,7 +126,7 @@ def _read(obj): text = url.read() elif hasattr(obj, 'read'): text = obj.read() - elif isinstance(obj, char_types): + elif isinstance(obj, (str, bytes)): text = obj try: if os.path.isfile(text): @@ -859,15 +855,15 @@ def _print_as_set(s): def _validate_flavor(flavor): if flavor is None: flavor = 'lxml', 'bs4' - elif isinstance(flavor, string_types): + elif isinstance(flavor, str): flavor = flavor, elif isinstance(flavor, compat.Iterable): - if not all(isinstance(flav, string_types) for flav in flavor): + if not all(isinstance(flav, str) for flav in flavor): raise TypeError('Object of type {typ!r} is not an iterable of ' 'strings' .format(typ=type(flavor).__name__)) else: - fmt = '{flavor!r}' if isinstance(flavor, string_types) else '{flavor}' + fmt = '{flavor!r}' if isinstance(flavor, str) else '{flavor}' fmt += ' is not a valid flavor' raise ValueError(fmt.format(flavor=flavor)) diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index 8c13d93a82bcf..7bb0a00e5ffe3 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -64,7 +64,7 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch', if lines: s = _convert_to_line_delimits(s) - if isinstance(path_or_buf, compat.string_types): + if isinstance(path_or_buf, str): fh, handles = _get_handle(path_or_buf, 'w', compression=compression) try: fh.write(s) @@ -522,7 +522,7 @@ def _get_data_from_filepath(self, filepath_or_buffer): data = filepath_or_buffer exists = False - if isinstance(data, compat.string_types): + if isinstance(data, str): try: exists = os.path.exists(filepath_or_buffer) # gh-5874: if the filepath is too long will raise here @@ -956,7 +956,7 @@ def is_ok(col): """ Return if this col is ok to try for a date parse. """ - if not isinstance(col, compat.string_types): + if not isinstance(col, str): return False col_lower = col.lower() diff --git a/pandas/io/json/normalize.py b/pandas/io/json/normalize.py index 7a8188dd07b6b..a836faec2b04f 100644 --- a/pandas/io/json/normalize.py +++ b/pandas/io/json/normalize.py @@ -72,7 +72,7 @@ def nested_to_record(ds, prefix="", sep=".", level=0): new_d = copy.deepcopy(d) for k, v in d.items(): # each key gets renamed with prefix - if not isinstance(k, compat.string_types): + if not isinstance(k, str): k = str(k) if level == 0: newkey = k @@ -224,7 +224,7 @@ def _pull_field(js, spec): lengths = [] meta_vals = defaultdict(list) - if not isinstance(sep, compat.string_types): + if not isinstance(sep, str): sep = str(sep) meta_keys = [sep.join(val) for val in meta] diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 4a71338bfc686..946cc30f6ab9b 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -146,7 +146,7 @@ def writer(fh): fh.write(pack(a, **kwargs)) path_or_buf = _stringify_path(path_or_buf) - if isinstance(path_or_buf, compat.string_types): + if isinstance(path_or_buf, str): with open(path_or_buf, mode) as fh: writer(fh) elif path_or_buf is None: @@ -193,7 +193,7 @@ def read(fh): return unpacked_obj # see if we have an actual file - if isinstance(path_or_buf, compat.string_types): + if isinstance(path_or_buf, str): try: exists = os.path.exists(path_or_buf) except (TypeError, ValueError): @@ -203,7 +203,7 @@ def read(fh): with open(path_or_buf, 'rb') as fh: return read(fh) - if isinstance(path_or_buf, compat.binary_type): + if isinstance(path_or_buf, bytes): # treat as a binary-like fh = None try: @@ -796,7 +796,7 @@ def __iter__(self): try: # see if we have an actual file - if isinstance(self.path, compat.string_types): + if isinstance(self.path, str): try: path_exists = os.path.exists(self.path) diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py index ba322f42c07c1..c126315cec63f 100644 --- a/pandas/io/parquet.py +++ b/pandas/io/parquet.py @@ -3,7 +3,6 @@ from distutils.version import LooseVersion from warnings import catch_warnings -from pandas.compat import string_types from pandas.errors import AbstractMethodError from pandas import DataFrame, get_option @@ -59,7 +58,7 @@ def validate_dataframe(df): # index level names must be strings valid_names = all( - isinstance(name, string_types) + isinstance(name, str) for name in df.index.names if name is not None ) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 6fba3981aa83c..bc67b8f365cbe 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -17,7 +17,7 @@ import pandas._libs.parsers as parsers from pandas._libs.tslibs import parsing import pandas.compat as compat -from pandas.compat import PY3, StringIO, lrange, lzip, string_types +from pandas.compat import PY3, StringIO, lrange, lzip from pandas.errors import ( AbstractMethodError, EmptyDataError, ParserError, ParserWarning) from pandas.util._decorators import Appender @@ -1005,7 +1005,7 @@ def _clean_options(self, options, engine): quotechar = options['quotechar'] if (quotechar is not None and - isinstance(quotechar, (str, compat.text_type, bytes))): + isinstance(quotechar, (str, str, bytes))): if (len(quotechar) == 1 and ord(quotechar) > 127 and engine not in ('python', 'python-fwf')): fallback_reason = ("ord(quotechar) > 127, meaning the " @@ -1566,7 +1566,7 @@ def _make_index(self, data, alldata, columns, indexnamerow=False): def _get_simple_index(self, data, columns): def ix(col): - if not isinstance(col, compat.string_types): + if not isinstance(col, str): return col raise ValueError('Index {col} invalid'.format(col=col)) @@ -1588,7 +1588,7 @@ def ix(col): def _get_complex_date_index(self, data, col_names): def _get_name(icol): - if isinstance(icol, compat.string_types): + if isinstance(icol, str): return icol if col_names is None: @@ -1841,7 +1841,7 @@ def __init__(self, src, **kwds): if (kwds.get('compression') is None and 'utf-16' in (kwds.get('encoding') or '')): # if source is utf-16 plain text, convert source to utf-8 - if isinstance(src, compat.string_types): + if isinstance(src, str): src = open(src, 'rb') self.handles.append(src) src = UTF8Recoder(src, kwds['encoding']) @@ -2194,7 +2194,7 @@ def __init__(self, f, **kwds): self.delimiter = kwds['delimiter'] self.quotechar = kwds['quotechar'] - if isinstance(self.quotechar, compat.text_type): + if isinstance(self.quotechar, str): self.quotechar = str(self.quotechar) self.escapechar = kwds['escapechar'] @@ -2664,14 +2664,14 @@ def _handle_usecols(self, columns, usecols_key): if self.usecols is not None: if callable(self.usecols): col_indices = _evaluate_usecols(self.usecols, usecols_key) - elif any(isinstance(u, string_types) for u in self.usecols): + elif any(isinstance(u, str) for u in self.usecols): if len(columns) > 1: raise ValueError("If using multiple headers, usecols must " "be integers.") col_indices = [] for col in self.usecols: - if isinstance(col, string_types): + if isinstance(col, str): try: col_indices.append(usecols_key.index(col)) except ValueError: @@ -2711,7 +2711,7 @@ def _check_for_bom(self, first_row): # The first element of this row is the one that could have the # BOM that we want to remove. Check that the first element is a # string before proceeding. - if not isinstance(first_row[0], compat.string_types): + if not isinstance(first_row[0], str): return first_row # Check that the string is not empty, as that would @@ -2878,7 +2878,7 @@ def _check_comments(self, lines): for l in lines: rl = [] for x in l: - if (not isinstance(x, compat.string_types) or + if (not isinstance(x, str) or self.comment not in x): rl.append(x) else: @@ -2909,7 +2909,7 @@ def _remove_empty_lines(self, lines): for l in lines: # Remove empty lines and lines with only one whitespace value if (len(l) > 1 or len(l) == 1 and - (not isinstance(l[0], compat.string_types) or + (not isinstance(l[0], str) or l[0].strip())): ret.append(l) return ret @@ -2927,7 +2927,7 @@ def _search_replace_num_columns(self, lines, search, replace): for l in lines: rl = [] for i, x in enumerate(l): - if (not isinstance(x, compat.string_types) or + if (not isinstance(x, str) or search not in x or (self._no_thousands_columns and i in self._no_thousands_columns) or @@ -3325,7 +3325,7 @@ def _clean_index_names(columns, index_col, unnamed_cols): index_col = list(index_col) for i, c in enumerate(index_col): - if isinstance(c, compat.string_types): + if isinstance(c, str): index_names.append(c) for j, name in enumerate(cp_cols): if name == c: @@ -3339,7 +3339,7 @@ def _clean_index_names(columns, index_col, unnamed_cols): # Only clean index names that were placeholders. for i, name in enumerate(index_names): - if isinstance(name, compat.string_types) and name in unnamed_cols: + if isinstance(name, str) and name in unnamed_cols: index_names[i] = None return index_names, columns, index_col @@ -3478,15 +3478,15 @@ def _get_col_names(colspec, columns): def _concat_date_cols(date_cols): if len(date_cols) == 1: if compat.PY3: - return np.array([compat.text_type(x) for x in date_cols[0]], + return np.array([str(x) for x in date_cols[0]], dtype=object) else: return np.array([ - str(x) if not isinstance(x, compat.string_types) else x + str(x) if not isinstance(x, str) else x for x in date_cols[0] ], dtype=object) - rs = np.array([' '.join(compat.text_type(y) for y in x) + rs = np.array([' '.join(str(y) for y in x) for x in zip(*date_cols)], dtype=object) return rs diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index fadb9a5c6c7cb..80de7f3bed170 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -19,7 +19,7 @@ from pandas._libs import lib, writers as libwriters from pandas._libs.tslibs import timezones -from pandas.compat import PY3, lrange, string_types +from pandas.compat import PY3, lrange from pandas.errors import PerformanceWarning from pandas.core.dtypes.common import ( @@ -72,8 +72,8 @@ def _ensure_str(name): https://github.com/pandas-dev/pandas/issues/13492 """ - if isinstance(name, compat.string_types): - name = compat.text_type(name) + if isinstance(name, str): + name = str(name) return name @@ -257,7 +257,7 @@ def to_hdf(path_or_buf, key, value, mode=None, complevel=None, complib=None, f = lambda store: store.put(key, value, **kwargs) path_or_buf = _stringify_path(path_or_buf) - if isinstance(path_or_buf, string_types): + if isinstance(path_or_buf, str): with HDFStore(path_or_buf, mode=mode, complevel=complevel, complib=complib) as store: f(store) @@ -340,7 +340,7 @@ def read_hdf(path_or_buf, key=None, mode='r', **kwargs): auto_close = False else: path_or_buf = _stringify_path(path_or_buf) - if not isinstance(path_or_buf, string_types): + if not isinstance(path_or_buf, str): raise NotImplementedError('Support for generic buffers has not ' 'been implemented.') try: @@ -791,7 +791,7 @@ def select_as_multiple(self, keys, where=None, selector=None, columns=None, where = _ensure_term(where, scope_level=1) if isinstance(keys, (list, tuple)) and len(keys) == 1: keys = keys[0] - if isinstance(keys, string_types): + if isinstance(keys, str): return self.select(key=keys, where=where, columns=columns, start=start, stop=stop, iterator=iterator, chunksize=chunksize, **kwargs) @@ -4583,7 +4583,7 @@ def _unconvert_string_array(data, nan_rep=None, encoding=None, else: dtype = "S{0}".format(itemsize) - if isinstance(data[0], compat.binary_type): + if isinstance(data[0], bytes): data = Series(data).str.decode(encoding, errors=errors).values else: data = data.astype(dtype, copy=False).astype(object, copy=False) diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index eb77f79d38d59..87089e204eef0 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -21,7 +21,6 @@ from pandas.errors import EmptyDataError import pandas as pd -from pandas import compat from pandas.io.common import BaseIterator, get_filepath_or_buffer from pandas.io.sas._sas import Parser @@ -96,7 +95,7 @@ def __init__(self, path_or_buf, index=None, convert_dates=True, self._current_row_in_file_index = 0 self._path_or_buf, _, _, _ = get_filepath_or_buffer(path_or_buf) - if isinstance(self._path_or_buf, compat.string_types): + if isinstance(self._path_or_buf, str): self._path_or_buf = open(self._path_or_buf, 'rb') self.handle = self._path_or_buf diff --git a/pandas/io/sas/sas_xport.py b/pandas/io/sas/sas_xport.py index 3c607d62b4286..c95551d429bfd 100644 --- a/pandas/io/sas/sas_xport.py +++ b/pandas/io/sas/sas_xport.py @@ -240,7 +240,7 @@ def __init__(self, filepath_or_buffer, index=None, encoding='ISO-8859-1', compression, should_close) = get_filepath_or_buffer( filepath_or_buffer, encoding=encoding) - if isinstance(filepath_or_buffer, (str, compat.text_type, bytes)): + if isinstance(filepath_or_buffer, (str, str, bytes)): self.filepath_or_buffer = open(filepath_or_buffer, 'rb') else: # Copy to BytesIO, and ensure no encoding diff --git a/pandas/io/sas/sasreader.py b/pandas/io/sas/sasreader.py index 9fae0da670bec..0726e17e3bbab 100644 --- a/pandas/io/sas/sasreader.py +++ b/pandas/io/sas/sasreader.py @@ -1,8 +1,6 @@ """ Read SAS sas7bdat or xport files. """ -from pandas import compat - from pandas.io.common import _stringify_path @@ -37,7 +35,7 @@ def read_sas(filepath_or_buffer, format=None, index=None, encoding=None, "than a string name, you must specify " "a format string") filepath_or_buffer = _stringify_path(filepath_or_buffer) - if not isinstance(filepath_or_buffer, compat.string_types): + if not isinstance(filepath_or_buffer, str): raise ValueError(buffer_error_msg) fname = filepath_or_buffer.lower() if fname.endswith(".xpt"): diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 266535fb6fcbd..6a2fc08e9dd65 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -13,7 +13,7 @@ import numpy as np import pandas._libs.lib as lib -from pandas.compat import raise_with_traceback, string_types, text_type +from pandas.compat import raise_with_traceback from pandas.core.dtypes.common import ( is_datetime64tz_dtype, is_dict_like, is_list_like) @@ -499,7 +499,7 @@ def _engine_builder(con): else it just return con without modifying it. """ global _SQLALCHEMY_INSTALLED - if isinstance(con, string_types): + if isinstance(con, str): try: import sqlalchemy except ImportError: @@ -522,7 +522,7 @@ def pandasSQL_builder(con, schema=None, meta=None, con = _engine_builder(con) if _is_sqlalchemy_connectable(con): return SQLDatabase(con, schema=schema, meta=meta) - elif isinstance(con, string_types): + elif isinstance(con, str): raise ImportError("Using URI string without sqlalchemy installed.") else: return SQLiteDatabase(con, is_cursor=is_cursor) @@ -626,7 +626,7 @@ def insert_data(self): else: temp = self.frame - column_names = list(map(text_type, temp.columns)) + column_names = list(map(str, temp.columns)) ncols = len(column_names) data_list = [None] * ncols blocks = temp._data.blocks @@ -764,7 +764,7 @@ def _index_name(self, index, index_label): for i, l in enumerate(self.frame.index.names)] # for reading: index=(list of) string to specify column to set as index - elif isinstance(index, string_types): + elif isinstance(index, str): return [index] elif isinstance(index, list): return index @@ -777,11 +777,10 @@ def _get_column_names_and_types(self, dtype_mapper): for i, idx_label in enumerate(self.index): idx_type = dtype_mapper( self.frame.index._get_level_values(i)) - column_names_and_types.append((text_type(idx_label), - idx_type, True)) + column_names_and_types.append((str(idx_label), idx_type, True)) column_names_and_types += [ - (text_type(self.frame.columns[i]), + (str(self.frame.columns[i]), dtype_mapper(self.frame.iloc[:, i]), False) for i in range(len(self.frame.columns)) @@ -1251,7 +1250,7 @@ def _create_sql_schema(self, frame, table_name, keys=None, dtype=None): def _get_unicode_name(name): try: - uname = text_type(name).encode("utf-8", "strict").decode("utf-8") + uname = str(name).encode("utf-8", "strict").decode("utf-8") except UnicodeError: raise ValueError( "Cannot convert identifier to UTF-8: '{name}'".format(name=name)) @@ -1305,7 +1304,7 @@ def _execute_create(self): conn.execute(stmt) def insert_statement(self): - names = list(map(text_type, self.frame.columns)) + names = list(map(str, self.frame.columns)) wld = '?' # wildcard char escape = _get_valid_sqlite_name diff --git a/pandas/io/stata.py b/pandas/io/stata.py index ec768d68a2ffd..4693584227f84 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -23,7 +23,7 @@ from pandas._libs.lib import infer_dtype from pandas._libs.tslibs import NaT, Timestamp from pandas._libs.writers import max_len_string_array -from pandas.compat import BytesIO, lmap, lrange, lzip, string_types, text_type +from pandas.compat import BytesIO, lmap, lrange, lzip from pandas.util._decorators import Appender, deprecate_kwarg from pandas.core.dtypes.common import ( @@ -630,7 +630,7 @@ def __init__(self, catarray): # Compute lengths and setup lists of offsets and labels for vl in self.value_labels: category = vl[1] - if not isinstance(category, string_types): + if not isinstance(category, str): category = str(category) warnings.warn(value_label_mismatch_doc.format(catarray.name), ValueLabelTypeMismatch) @@ -989,7 +989,7 @@ def __init__(self, path_or_buf, convert_dates=True, path_or_buf, encoding, _, should_close = get_filepath_or_buffer( path_or_buf) - if isinstance(path_or_buf, (str, text_type, bytes)): + if isinstance(path_or_buf, (str, bytes)): self.path_or_buf = open(path_or_buf, 'rb') else: # Copy to BytesIO, and ensure no encoding @@ -2092,8 +2092,8 @@ def _check_column_names(self, data): duplicate_var_id = 0 for j, name in enumerate(columns): orig_name = name - if not isinstance(name, string_types): - name = text_type(name) + if not isinstance(name, str): + name = str(name) for c in name: if ((c < 'A' or c > 'Z') and (c < 'a' or c > 'z') and @@ -2478,7 +2478,7 @@ def _pad_bytes_new(name, length): """ Takes a bytes instance and pads it with null bytes until it's length chars. """ - if isinstance(name, string_types): + if isinstance(name, str): name = _bytes(name, 'utf-8') return name + b'\x00' * (length - len(name)) @@ -2602,7 +2602,7 @@ def _encode(self, s): if compat.PY3: return s.encode(self._encoding) else: - if isinstance(s, text_type): + if isinstance(s, str): return s.encode(self._encoding) return s diff --git a/pandas/plotting/_converter.py b/pandas/plotting/_converter.py index aaa7aa04acf48..4b2080198fa9a 100644 --- a/pandas/plotting/_converter.py +++ b/pandas/plotting/_converter.py @@ -12,7 +12,6 @@ from pandas._libs import lib, tslibs from pandas._libs.tslibs import resolution from pandas._libs.tslibs.frequencies import FreqGroup, get_freq -import pandas.compat as compat from pandas.compat import lrange from pandas.core.dtypes.common import ( @@ -137,7 +136,7 @@ def _to_ordinalf(tm): def time2num(d): - if isinstance(d, compat.string_types): + if isinstance(d, str): parsed = tools.to_datetime(d) if not isinstance(parsed, datetime): raise ValueError('Could not parse time {d}'.format(d=d)) @@ -235,8 +234,8 @@ def convert(values, units, axis): def _convert_1d(values, units, axis): if not hasattr(axis, 'freq'): raise TypeError('Axis must have `freq` set to convert to Periods') - valid_types = (compat.string_types, datetime, - Period, pydt.date, pydt.time, np.datetime64) + valid_types = (str, datetime, Period, pydt.date, pydt.time, + np.datetime64) if (isinstance(values, valid_types) or is_integer(values) or is_float(values)): return get_datevalue(values, axis.freq) @@ -256,8 +255,8 @@ def _convert_1d(values, units, axis): def get_datevalue(date, freq): if isinstance(date, Period): return date.asfreq(freq).ordinal - elif isinstance(date, (compat.string_types, datetime, - pydt.date, pydt.time, np.datetime64)): + elif isinstance(date, (str, datetime, pydt.date, pydt.time, + np.datetime64)): return Period(date, freq).ordinal elif (is_integer(date) or is_float(date) or (isinstance(date, (np.ndarray, Index)) and (date.size == 1))): @@ -311,7 +310,7 @@ def try_parse(values): return dates.date2num(values) elif (is_integer(values) or is_float(values)): return values - elif isinstance(values, compat.string_types): + elif isinstance(values, str): return try_parse(values) elif isinstance(values, (list, tuple, np.ndarray, Index, ABCSeries)): if isinstance(values, ABCSeries): @@ -952,7 +951,7 @@ def _annual_finder(vmin, vmax, freq): def get_finder(freq): - if isinstance(freq, compat.string_types): + if isinstance(freq, str): freq = get_freq(freq) fgroup = resolution.get_freq_group(freq) @@ -989,7 +988,7 @@ class TimeSeries_DateLocator(Locator): def __init__(self, freq, minor_locator=False, dynamic_mode=True, base=1, quarter=1, month=1, day=1, plot_obj=None): - if isinstance(freq, compat.string_types): + if isinstance(freq, str): freq = get_freq(freq) self.freq = freq self.base = base @@ -1070,7 +1069,7 @@ class TimeSeries_DateFormatter(Formatter): def __init__(self, freq, minor_locator=False, dynamic_mode=True, plot_obj=None): - if isinstance(freq, compat.string_types): + if isinstance(freq, str): freq = get_freq(freq) self.format = None self.freq = freq diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 59d3bb355c1d7..46f1edd4bfd10 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -10,7 +10,7 @@ from pandas._config import get_option import pandas.compat as compat -from pandas.compat import lrange, string_types +from pandas.compat import lrange from pandas.errors import AbstractMethodError from pandas.util._decorators import Appender, cache_readonly @@ -723,7 +723,7 @@ def match_labels(data, e): err = np.tile(err, (self.nseries, 1)) # errors are a column in the dataframe - elif isinstance(err, string_types): + elif isinstance(err, str): evalues = self.data[err].values self.data = self.data[self.data.columns.drop(err)] err = np.atleast_2d(evalues) @@ -1786,7 +1786,7 @@ def _plot(data, x=None, y=None, subplots=False, label_kw = kwds['label'] if 'label' in kwds else False for kw in ['xerr', 'yerr']: if (kw in kwds) and \ - (isinstance(kwds[kw], string_types) or + (isinstance(kwds[kw], str) or is_integer(kwds[kw])): try: kwds[kw] = data[kwds[kw]] diff --git a/pandas/plotting/_style.py b/pandas/plotting/_style.py index 5de7f333fe280..43365bd91c724 100644 --- a/pandas/plotting/_style.py +++ b/pandas/plotting/_style.py @@ -5,7 +5,6 @@ import numpy as np -import pandas.compat as compat from pandas.compat import lmap, lrange from pandas.core.dtypes.common import is_list_like @@ -16,7 +15,7 @@ def _get_standard_colors(num_colors=None, colormap=None, color_type='default', import matplotlib.pyplot as plt if color is None and colormap is not None: - if isinstance(colormap, compat.string_types): + if isinstance(colormap, str): import matplotlib.cm as cm cmap = colormap colormap = cm.get_cmap(colormap) @@ -38,7 +37,7 @@ def _get_standard_colors(num_colors=None, colormap=None, color_type='default', except KeyError: colors = list(plt.rcParams.get('axes.color_cycle', list('bgrcmyk'))) - if isinstance(colors, compat.string_types): + if isinstance(colors, str): colors = list(colors) colors = colors[0:num_colors] @@ -55,7 +54,7 @@ def random_color(column): else: raise ValueError("color_type must be either 'default' or 'random'") - if isinstance(colors, compat.string_types): + if isinstance(colors, str): import matplotlib.colors conv = matplotlib.colors.ColorConverter() diff --git a/pandas/plotting/_timeseries.py b/pandas/plotting/_timeseries.py index 51b0629005942..f836bd0cd52d7 100644 --- a/pandas/plotting/_timeseries.py +++ b/pandas/plotting/_timeseries.py @@ -8,7 +8,6 @@ from pandas._libs.tslibs.frequencies import ( FreqGroup, get_base_alias, get_freq, is_subperiod, is_superperiod) from pandas._libs.tslibs.period import Period -import pandas.compat as compat from pandas.core.dtypes.generic import ( ABCDatetimeIndex, ABCPeriodIndex, ABCTimedeltaIndex) @@ -144,7 +143,7 @@ def _replot_ax(ax, freq, kwargs): ax._plot_data.append((series, plotf, kwds)) # for tsplot - if isinstance(plotf, compat.string_types): + if isinstance(plotf, str): from pandas.plotting._core import _plot_klass plotf = _plot_klass[plotf]._plot diff --git a/pandas/tests/extension/base/printing.py b/pandas/tests/extension/base/printing.py index b2ba1d95cf33e..8b33ce173c786 100644 --- a/pandas/tests/extension/base/printing.py +++ b/pandas/tests/extension/base/printing.py @@ -3,7 +3,6 @@ import pytest import pandas as pd -from pandas import compat from .base import BaseExtensionTests @@ -26,8 +25,8 @@ def test_array_repr(self, data, size): assert '...' in result def test_array_repr_unicode(self, data): - result = compat.text_type(data) - assert isinstance(result, compat.text_type) + result = str(data) + assert isinstance(result, str) def test_series_repr(self, data): ser = pd.Series(data) diff --git a/pandas/tests/frame/common.py b/pandas/tests/frame/common.py index 5624f7c1303b6..5e9a73719f67b 100644 --- a/pandas/tests/frame/common.py +++ b/pandas/tests/frame/common.py @@ -111,7 +111,7 @@ def simple(self): def _check_mixed_float(df, dtype=None): # float16 are most likely to be upcasted to float32 dtypes = dict(A='float32', B='float32', C='float16', D='float64') - if isinstance(dtype, compat.string_types): + if isinstance(dtype, str): dtypes = {k: dtype for k, v in dtypes.items()} elif isinstance(dtype, dict): dtypes.update(dtype) @@ -127,7 +127,7 @@ def _check_mixed_float(df, dtype=None): def _check_mixed_int(df, dtype=None): dtypes = dict(A='int32', B='uint64', C='uint8', D='int64') - if isinstance(dtype, compat.string_types): + if isinstance(dtype, str): dtypes = {k: dtype for k, v in dtypes.items()} elif isinstance(dtype, dict): dtypes.update(dtype) diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index 3fa4ed32790bd..b52b5222ea30a 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -19,11 +19,6 @@ assert_frame_equal, assert_series_equal, makeCustomDataframe as mkdf) -@pytest.fixture(params=[str, compat.text_type]) -def text_dtype(request): - return request.param - - class TestDataFrameDataTypes(TestData): def test_concat_empty_dataframe_dtypes(self): @@ -355,7 +350,7 @@ def test_select_dtypes_datetime_with_tz(self): @pytest.mark.parametrize("dtype", [ str, "str", np.string_, "S1", "unicode", np.unicode_, "U1", - compat.text_type + str ]) @pytest.mark.parametrize("arg", ["include", "exclude"]) def test_select_dtypes_str_raises(self, dtype, arg): @@ -514,7 +509,7 @@ def test_astype_cast_nan_inf_int(self, val, dtype): with pytest.raises(ValueError, match=msg): df.astype(dtype) - def test_astype_str(self, text_dtype): + def test_astype_str(self): # see gh-9757 a = Series(date_range("2010-01-04", periods=5)) b = Series(date_range("3/6/2012 00:00", periods=5, tz="US/Eastern")) @@ -525,29 +520,28 @@ def test_astype_str(self, text_dtype): df = DataFrame({"a": a, "b": b, "c": c, "d": d, "e": e}) # Datetime-like - # Test str and unicode on Python 2.x and just str on Python 3.x - result = df.astype(text_dtype) + result = df.astype(str) expected = DataFrame({ - "a": list(map(text_dtype, + "a": list(map(str, map(lambda x: Timestamp(x)._date_repr, a._values))), - "b": list(map(text_dtype, map(Timestamp, b._values))), - "c": list(map(text_dtype, + "b": list(map(str, map(Timestamp, b._values))), + "c": list(map(str, map(lambda x: Timedelta(x)._repr_base(format="all"), c._values))), - "d": list(map(text_dtype, d._values)), - "e": list(map(text_dtype, e._values)), + "d": list(map(str, d._values)), + "e": list(map(str, e._values)), }) assert_frame_equal(result, expected) - def test_astype_str_float(self, text_dtype): + def test_astype_str_float(self): # see gh-11302 - result = DataFrame([np.NaN]).astype(text_dtype) + result = DataFrame([np.NaN]).astype(str) expected = DataFrame(["nan"]) assert_frame_equal(result, expected) - result = DataFrame([1.12345678901234567890]).astype(text_dtype) + result = DataFrame([1.12345678901234567890]).astype(str) # < 1.14 truncates # >= 1.14 preserves the full repr diff --git a/pandas/tests/frame/test_duplicates.py b/pandas/tests/frame/test_duplicates.py index 3ea2eba6dd751..4b25aaf565ac8 100644 --- a/pandas/tests/frame/test_duplicates.py +++ b/pandas/tests/frame/test_duplicates.py @@ -3,7 +3,7 @@ import numpy as np import pytest -from pandas.compat import lrange, string_types +from pandas.compat import lrange from pandas import DataFrame, Series import pandas.util.testing as tm @@ -74,7 +74,7 @@ def test_duplicated_subset(subset, keep): if subset is None: subset = list(df.columns) - elif isinstance(subset, string_types): + elif isinstance(subset, str): # need to have a DataFrame, not a Series # -> select columns with singleton list, not string subset = [subset] diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index 180c752d246ff..7ae941addef6a 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -11,8 +11,8 @@ import pandas as pd from pandas import ( - DataFrame, Index, MultiIndex, Series, Timestamp, compat, date_range, - read_csv, to_datetime) + DataFrame, Index, MultiIndex, Series, Timestamp, date_range, read_csv, + to_datetime) import pandas.core.common as com from pandas.tests.frame.common import TestData import pandas.util.testing as tm @@ -275,7 +275,7 @@ def _do_test(df, r_dtype=None, c_dtype=None, recons = self.read_csv(path, **kwargs) def _to_uni(x): - if not isinstance(x, compat.text_type): + if not isinstance(x, str): return x.decode('utf8') return x if dupe_col: diff --git a/pandas/tests/indexes/multi/test_format.py b/pandas/tests/indexes/multi/test_format.py index d4244da1bc677..721de77171c1e 100644 --- a/pandas/tests/indexes/multi/test_format.py +++ b/pandas/tests/indexes/multi/test_format.py @@ -6,13 +6,13 @@ import pytest import pandas as pd -from pandas import MultiIndex, compat +from pandas import MultiIndex import pandas.util.testing as tm def test_dtype_str(indices): dtype = indices.dtype_str - assert isinstance(dtype, compat.string_types) + assert isinstance(dtype, str) assert dtype == str(indices.dtype) diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index cae055aecde85..536aa6fcef214 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -2,7 +2,7 @@ import pytest from pandas._libs.tslibs.period import IncompatibleFrequency -from pandas.compat import lmap, lrange, text_type +from pandas.compat import lmap, lrange from pandas.core.dtypes.dtypes import PeriodDtype @@ -513,20 +513,18 @@ def test_recreate_from_data(self, freq): def test_map_with_string_constructor(self): raw = [2005, 2007, 2009] index = PeriodIndex(raw, freq='A') - types = [str, text_type] - for t in types: - expected = Index(lmap(t, raw)) - res = index.map(t) + expected = Index(lmap(str, raw)) + res = index.map(str) - # should return an Index - assert isinstance(res, Index) + # should return an Index + assert isinstance(res, Index) - # preserve element types - assert all(isinstance(resi, t) for resi in res) + # preserve element types + assert all(isinstance(resi, str) for resi in res) - # lastly, values should compare equal - tm.assert_index_equal(res, expected) + # lastly, values should compare equal + tm.assert_index_equal(res, expected) class TestSeriesPeriod(object): diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index 03448129a48fc..56e87cc32340f 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -13,7 +13,7 @@ from pandas.core.dtypes.common import needs_i8_conversion import pandas as pd -from pandas import CategoricalIndex, MultiIndex, RangeIndex, compat +from pandas import CategoricalIndex, MultiIndex, RangeIndex import pandas.util.testing as tm @@ -156,7 +156,7 @@ def test_set_name_methods(self, indices): def test_dtype_str(self, indices): dtype = indices.dtype_str - assert isinstance(dtype, compat.string_types) + assert isinstance(dtype, str) assert dtype == str(indices.dtype) def test_hash_error(self, indices): diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index bb065e7da53f6..83ed6007aab2b 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -3,8 +3,6 @@ import numpy as np import pytest -import pandas.compat as compat - from pandas.core.dtypes.common import is_categorical_dtype from pandas.core.dtypes.dtypes import CategoricalDtype @@ -152,7 +150,7 @@ def test_slicing_and_getting_ops(self): # row res_row = df.iloc[2, :] tm.assert_series_equal(res_row, exp_row) - assert isinstance(res_row["cats"], compat.string_types) + assert isinstance(res_row["cats"], str) # col res_col = df.iloc[:, 0] @@ -172,7 +170,7 @@ def test_slicing_and_getting_ops(self): # row res_row = df.loc["j", :] tm.assert_series_equal(res_row, exp_row) - assert isinstance(res_row["cats"], compat.string_types) + assert isinstance(res_row["cats"], str) # col res_col = df.loc[:, "cats"] @@ -193,7 +191,7 @@ def test_slicing_and_getting_ops(self): # row res_row = df.loc["j", :] tm.assert_series_equal(res_row, exp_row) - assert isinstance(res_row["cats"], compat.string_types) + assert isinstance(res_row["cats"], str) # col res_col = df.loc[:, "cats"] @@ -227,7 +225,7 @@ def test_slicing_and_getting_ops(self): # i : int, slice, or sequence of integers res_row = df.iloc[2] tm.assert_series_equal(res_row, exp_row) - assert isinstance(res_row["cats"], compat.string_types) + assert isinstance(res_row["cats"], str) res_df = df.iloc[slice(2, 4)] tm.assert_frame_equal(res_df, exp_df) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 1966e7bb8cc8e..9d240873a40a3 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -18,7 +18,6 @@ import pytest import pytz -import pandas.compat as compat from pandas.compat import ( StringIO, is_platform_32bit, is_platform_windows, lrange, lzip) @@ -485,7 +484,7 @@ def test_to_string_unicode_columns(self): buf.getvalue() result = self.frame.to_string() - assert isinstance(result, compat.text_type) + assert isinstance(result, str) def test_to_string_utf8_columns(self): n = "\u05d0".encode('utf-8') @@ -956,7 +955,7 @@ def test_nonunicode_nonascii_alignment(self): def test_unicode_problem_decoding_as_ascii(self): dm = DataFrame({'c/\u03c3': Series({'test': np.nan})}) - compat.text_type(dm.to_string()) + str(dm.to_string()) def test_string_repr_encoding(self, datapath): filepath = datapath('io', 'parser', 'data', 'unicode_series.csv') @@ -1192,7 +1191,7 @@ def test_to_string(self): assert retval is None assert buf.getvalue() == s - assert isinstance(s, compat.string_types) + assert isinstance(s, str) # print in right order result = biggie.to_string(columns=['B', 'A'], col_space=17, diff --git a/pandas/tests/io/formats/test_printing.py b/pandas/tests/io/formats/test_printing.py index ac99dbb8d667d..1c4c97e832931 100644 --- a/pandas/tests/io/formats/test_printing.py +++ b/pandas/tests/io/formats/test_printing.py @@ -23,12 +23,11 @@ def test_adjoin(): def test_repr_binary_type(): import string letters = string.ascii_letters - btype = compat.binary_type try: - raw = btype(letters, encoding=cf.get_option('display.encoding')) + raw = bytes(letters, encoding=cf.get_option('display.encoding')) except TypeError: - raw = btype(letters) - b = compat.text_type(compat.bytes_to_str(raw)) + raw = bytes(letters) + b = str(compat.bytes_to_str(raw)) res = printing.pprint_thing(b, quote_strings=True) assert res == repr(b) res = printing.pprint_thing(b, quote_strings=False) diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index 148a42a332661..d146e9c16e114 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -10,7 +10,7 @@ from pandas.compat import StringIO, lrange import pandas as pd -from pandas import DataFrame, Index, MultiIndex, compat, option_context +from pandas import DataFrame, Index, MultiIndex, option_context from pandas.util import testing as tm import pandas.io.formats.format as fmt @@ -265,7 +265,7 @@ def test_to_html(biggie_df_fixture): assert retval is None assert buf.getvalue() == s - assert isinstance(s, compat.string_types) + assert isinstance(s, str) df.to_html(columns=['B', 'A'], col_space=17) df.to_html(columns=['B', 'A'], diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py index fc7c1dce0adad..b9f6f2ee8c7c1 100644 --- a/pandas/tests/io/parser/test_common.py +++ b/pandas/tests/io/parser/test_common.py @@ -123,7 +123,7 @@ def test_read_csv_local(all_parsers, csv1): prefix = "file:///" if compat.is_platform_windows() else "file://" parser = all_parsers - fname = prefix + compat.text_type(os.path.abspath(csv1)) + fname = prefix + str(os.path.abspath(csv1)) result = parser.read_csv(fname, index_col=0, parse_dates=True) expected = DataFrame([[0.980269, 3.685731, -0.364216805298, -1.159738], diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index b0bceaba3829c..679a24d374115 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -11,7 +11,7 @@ from pandas.compat import ( PY35, PY36, BytesIO, is_platform_little_endian, is_platform_windows, - lrange, text_type) + lrange) import pandas.util._test_decorators as td from pandas.core.dtypes.common import is_categorical_dtype @@ -2466,8 +2466,8 @@ def test_store_index_name_numpy_str(self, table_format): assert_frame_equal(df, df2, check_names=True) - assert type(df2.index.name) == text_type - assert type(df2.columns.name) == text_type + assert type(df2.index.name) == str + assert type(df2.columns.name) == str def test_store_series_name(self): df = tm.makeDataFrame() diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 80bf922b11a9a..959682703ec2b 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -26,7 +26,7 @@ import pytest import pandas.compat as compat -from pandas.compat import PY36, lrange, string_types +from pandas.compat import PY36, lrange from pandas.core.dtypes.common import ( is_datetime64_dtype, is_datetime64tz_dtype) @@ -1392,7 +1392,7 @@ def test_datetime_with_timezone_roundtrip(self): ) if self.flavor == 'sqlite': # read_sql_query does not return datetime type like read_sql_table - assert isinstance(result.loc[0, 'A'], string_types) + assert isinstance(result.loc[0, 'A'], str) result['A'] = to_datetime(result['A']) tm.assert_frame_equal(result, expected) @@ -1451,7 +1451,7 @@ def test_datetime(self): result = sql.read_sql_query('SELECT * FROM test_datetime', self.conn) result = result.drop('index', axis=1) if self.flavor == 'sqlite': - assert isinstance(result.loc[0, 'A'], string_types) + assert isinstance(result.loc[0, 'A'], str) result['A'] = to_datetime(result['A']) tm.assert_frame_equal(result, df) else: @@ -1470,7 +1470,7 @@ def test_datetime_NaT(self): # with read_sql -> no type information -> sqlite has no native result = sql.read_sql_query('SELECT * FROM test_datetime', self.conn) if self.flavor == 'sqlite': - assert isinstance(result.loc[0, 'A'], string_types) + assert isinstance(result.loc[0, 'A'], str) result['A'] = to_datetime(result['A'], errors='coerce') tm.assert_frame_equal(result, df) else: @@ -2192,8 +2192,7 @@ def date_format(dt): datetime: lambda dt: "'%s'" % date_format(dt), str: lambda x: "'%s'" % x, np.str_: lambda x: "'%s'" % x, - compat.text_type: lambda x: "'%s'" % x, - compat.binary_type: lambda x: "'%s'" % x, + bytes: lambda x: "'%s'" % x, float: lambda x: "%.8f" % x, int: lambda x: "%s" % x, type(None): lambda x: "NULL", diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index f29d592d323e1..420ccfc885ef0 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -372,7 +372,7 @@ def test_encoding(self, version): expected = raw.kreis1849[0] assert result == expected - assert isinstance(result, compat.string_types) + assert isinstance(result, str) with tm.ensure_clean() as path: with tm.assert_produces_warning(FutureWarning): diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index 8ca19745055a3..a7c2768c5b319 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -10,7 +10,7 @@ from pandas._libs.tslibs.parsing import DateParseError from pandas._libs.tslibs.period import IncompatibleFrequency from pandas._libs.tslibs.timezones import dateutil_gettz, maybe_get_tz -from pandas.compat import iteritems, text_type +from pandas.compat import iteritems from pandas.compat.numpy import np_datetime64_compat import pandas as pd @@ -653,7 +653,7 @@ def test_strftime(self): p = Period('2000-1-1 12:34:12', freq='S') res = p.strftime('%Y-%m-%d %H:%M:%S') assert res == '2000-01-01 12:34:12' - assert isinstance(res, text_type) + assert isinstance(res, str) class TestPeriodProperties(object): diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index b8f6482e9d674..e4f8bf1dee18b 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -8,7 +8,7 @@ import pytest import pandas.compat as compat -from pandas.compat import isidentifier, lzip, string_types +from pandas.compat import isidentifier, lzip import pandas as pd from pandas import ( @@ -281,7 +281,7 @@ def test_index_tab_completion(self, index): dir_s = dir(s) for i, x in enumerate(s.index.unique(level=0)): if i < 100: - assert (not isinstance(x, string_types) or + assert (not isinstance(x, str) or not isidentifier(x) or x in dir_s) else: assert x not in dir_s diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 636c0865a5b68..74e1ab87bf36e 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -10,7 +10,6 @@ import pytest from pandas._libs.tslibs import iNaT -import pandas.compat as compat from pandas.compat import lrange import pandas as pd @@ -131,7 +130,7 @@ def test_astype_datetime64tz(self): expected = Series(date_range('20130101 06:00:00', periods=3, tz='CET')) tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("dtype", [compat.text_type, np.str_]) + @pytest.mark.parametrize("dtype", [str, np.str_]) @pytest.mark.parametrize("series", [Series([string.digits * 10, tm.rands(63), tm.rands(64), @@ -142,10 +141,10 @@ def test_astype_datetime64tz(self): def test_astype_str_map(self, dtype, series): # see gh-4405 result = series.astype(dtype) - expected = series.map(compat.text_type) + expected = series.map(str) tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("dtype", [str, compat.text_type]) + @pytest.mark.parametrize("dtype", [str, str]) def test_astype_str_cast(self, dtype): # see gh-9757: test str and unicode on python 2.x # and just str on python 3.x @@ -184,7 +183,7 @@ def test_astype_unicode(self): for s in test_series: res = s.astype("unicode") - expec = s.map(compat.text_type) + expec = s.map(str) tm.assert_series_equal(res, expec) # Restore the former encoding diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index c20ee33ca3c47..bdb5066a77d1d 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -9,8 +9,6 @@ from numpy.random import randint import pytest -import pandas.compat as compat - from pandas import DataFrame, Index, MultiIndex, Series, concat, isna, notna import pandas.core.strings as strings import pandas.util.testing as tm @@ -302,7 +300,7 @@ def test_iter(self): for el in s: # each element of the series is either a basestring/str or nan - assert isinstance(el, compat.string_types) or isna(el) + assert isinstance(el, str) or isna(el) # desired behavior is to iterate until everything would be nan on the # next iter so make sure the last element of the iterator was 'l' in diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index bda150e5ef310..2bc33cf4c9547 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -15,7 +15,6 @@ import pandas._libs.tslibs.resolution as libresolution from pandas._libs.tslibs.resolution import Resolution from pandas._libs.tslibs.timezones import UTC -import pandas.compat as compat from pandas.util._decorators import cache_readonly from pandas.core.dtypes.common import ( @@ -107,7 +106,7 @@ def to_offset(freq): if isinstance(freq, tuple): name = freq[0] stride = freq[1] - if isinstance(stride, compat.string_types): + if isinstance(stride, str): name, stride = stride, name name, _ = libfreqs._base_and_stride(name) delta = get_offset(name) * stride diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index bda0c4ec1237a..ec8dcba9fe63f 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -13,7 +13,6 @@ from pandas._libs.tslibs.offsets import ( ApplyTypeError, BaseOffset, _get_calendar, _is_normalized, _to_dt64, apply_index_wraps, as_datetime, roll_yearday, shift_month) -import pandas.compat as compat from pandas.errors import AbstractMethodError from pandas.util._decorators import Appender, Substitution, cache_readonly @@ -2269,7 +2268,7 @@ def __add__(self, other): "will overflow".format(self=self, other=other)) def __eq__(self, other): - if isinstance(other, compat.string_types): + if isinstance(other, str): from pandas.tseries.frequencies import to_offset try: # GH#23524 if to_offset fails, we are dealing with an @@ -2290,7 +2289,7 @@ def __hash__(self): return hash(self._params) def __ne__(self, other): - if isinstance(other, compat.string_types): + if isinstance(other, str): from pandas.tseries.frequencies import to_offset try: # GH#23524 if to_offset fails, we are dealing with an diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 2664e60ce8616..e4167e0ab7066 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -23,7 +23,7 @@ from pandas._libs import testing as _testing import pandas.compat as compat from pandas.compat import ( - PY2, PY3, httplib, lmap, lrange, lzip, raise_with_traceback, string_types) + PY3, httplib, lmap, lrange, lzip, raise_with_traceback) from pandas.core.dtypes.common import ( is_bool, is_categorical_dtype, is_datetime64_dtype, is_datetime64tz_dtype, @@ -822,19 +822,11 @@ def raise_assert_detail(obj, message, left, right, diff=None): elif is_categorical_dtype(left): left = repr(left) - if PY2 and isinstance(left, string_types): - # left needs to be printable in native text type in python2 - left = left.encode('utf-8') - if isinstance(right, np.ndarray): right = pprint_thing(right) elif is_categorical_dtype(right): right = repr(right) - if PY2 and isinstance(right, string_types): - # right needs to be printable in native text type in python2 - right = right.encode('utf-8') - msg = """{obj} are different {message} @@ -1782,7 +1774,7 @@ def makeCustomIndex(nentries, nlevels, prefix='#', names=False, ndupe_l=None, names = None # make singelton case uniform - if isinstance(names, compat.string_types) and nlevels == 1: + if isinstance(names, str) and nlevels == 1: names = [names] # specific 1D index type requested? diff --git a/scripts/find_commits_touching_func.py b/scripts/find_commits_touching_func.py index 929e8f343e520..5a60bf708e4cb 100755 --- a/scripts/find_commits_touching_func.py +++ b/scripts/find_commits_touching_func.py @@ -16,7 +16,7 @@ import os import argparse from collections import namedtuple -from pandas.compat import lrange, string_types, text_type, parse_date +from pandas.compat import lrange, parse_date try: import sh except ImportError: @@ -102,7 +102,7 @@ def get_commit_info(c, fmt, sep='\t'): "-n", "1", _tty_out=False) - return text_type(r).split(sep) + return str(r).split(sep) def get_commit_vitals(c, hlen=HASH_LEN): @@ -198,11 +198,11 @@ def main(): !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! """) return - if isinstance(args.file_masks, string_types): + if isinstance(args.file_masks, str): args.file_masks = args.file_masks.split(',') - if isinstance(args.path_masks, string_types): + if isinstance(args.path_masks, str): args.path_masks = args.path_masks.split(',') - if isinstance(args.dir_masks, string_types): + if isinstance(args.dir_masks, str): args.dir_masks = args.dir_masks.split(',') logger.setLevel(getattr(logging, args.debug_level)) diff --git a/scripts/merge-pr.py b/scripts/merge-pr.py index bd22ea21a615e..b9233abe56149 100755 --- a/scripts/merge-pr.py +++ b/scripts/merge-pr.py @@ -27,7 +27,6 @@ import requests import os -import six import sys import textwrap @@ -80,12 +79,12 @@ def fail(msg): def run_cmd(cmd): - if isinstance(cmd, six.string_types): + if isinstance(cmd, str): cmd = cmd.split(' ') output = check_output(cmd) - if isinstance(output, six.binary_type): + if isinstance(output, bytes): output = output.decode('utf-8') return output From 0fa2947ece177fd577c388bbab3443fa36d7e7ce Mon Sep 17 00:00:00 2001 From: Jeremy Schendel Date: Thu, 28 Mar 2019 00:18:36 -0600 Subject: [PATCH 2/3] review edits and additional cleanup --- pandas/core/frame.py | 6 ++---- pandas/core/indexes/multi.py | 3 +-- pandas/core/indexing.py | 3 +-- pandas/core/internals/blocks.py | 3 +-- pandas/core/reshape/pivot.py | 2 +- pandas/core/reshape/reshape.py | 4 ---- pandas/core/window.py | 3 +-- pandas/io/formats/latex.py | 3 +-- pandas/io/parsers.py | 5 ++--- pandas/io/pytables.py | 6 +++--- pandas/io/sas/sas_xport.py | 2 +- pandas/tests/frame/test_dtypes.py | 4 +--- pandas/tests/series/test_dtypes.py | 18 ++++++++---------- 13 files changed, 23 insertions(+), 39 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ef4780752341f..dabb68417cdda 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1660,8 +1660,7 @@ def to_records(self, index=True, convert_datetime64=None, elif index_names[0] is None: index_names = ['index'] - names = (lmap(str, index_names) + - lmap(str, self.columns)) + names = lmap(str, index_names) + map(str, self.columns) else: arrays = [self[c].get_values() for c in self.columns] names = lmap(str, self.columns) @@ -1710,8 +1709,7 @@ def to_records(self, index=True, convert_datetime64=None, # string naming a type. if dtype_mapping is None: formats.append(v.dtype) - elif isinstance(dtype_mapping, (type, np.dtype, - str)): + elif isinstance(dtype_mapping, (type, np.dtype, str)): formats.append(dtype_mapping) else: element = "row" if i < index_len else "column" diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 697f766511db3..8ae3c6f7a8d3b 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1340,8 +1340,7 @@ def _try_mi(k): # rather than a KeyError, try it here # note that a string that 'looks' like a Timestamp will raise # a KeyError! (GH5725) - if (isinstance(key, (datetime.datetime, np.datetime64)) or - (compat.PY3 and isinstance(key, str))): + if isinstance(key, (datetime.datetime, np.datetime64, str)): try: return _try_mi(key) except KeyError: diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index ae5b1c3cdd097..79bdfebd9f90b 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1831,8 +1831,7 @@ def _get_partial_string_timestamp_match_key(self, key, labels): """Translate any partial string timestamp matches in key, returning the new key (GH 10331)""" if isinstance(labels, MultiIndex): - if (isinstance(key, str) and - labels.levels[0].is_all_dates): + if (isinstance(key, str) and labels.levels[0].is_all_dates): # Convert key '2016-01-01' to # ('2016-01-01'[, slice(None, None, None)]+) key = tuple([key] + [slice(None)] * (len(labels.levels) - 1)) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 80e6e3befa0ee..f10252f0261d8 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -601,8 +601,7 @@ def _astype(self, dtype, copy=False, errors='raise', values=None, if self.is_extension: values = self.values.astype(dtype) else: - if issubclass(dtype.type, - (str, str)): + if issubclass(dtype.type, str): # use native type formatting for datetime/tz/timedelta if self.is_datelike: diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 89d46eea72185..1fa69b9b5021a 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -529,7 +529,7 @@ def crosstab(index, columns, values=None, rownames=None, colnames=None, def _normalize(table, normalize, margins, margins_name='All'): - if not isinstance(normalize, bool) and not isinstance(normalize, str): + if not isinstance(normalize, (bool, str)): axis_subs = {0: 'index', 1: 'columns'} try: normalize = axis_subs[normalize] diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 4898259dcff9b..5b156b4f1043d 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -910,10 +910,6 @@ def get_empty_frame(data): # PY2 embedded unicode, gh-22084 def _make_col_name(prefix, prefix_sep, level): fstr = '{prefix}{prefix_sep}{level}' - if PY2 and (isinstance(prefix, str) or - isinstance(prefix_sep, str) or - isinstance(level, str)): - fstr = fstr return fstr.format(prefix=prefix, prefix_sep=prefix_sep, level=level) diff --git a/pandas/core/window.py b/pandas/core/window.py index 3325560a39d81..416647831880d 100644 --- a/pandas/core/window.py +++ b/pandas/core/window.py @@ -1569,8 +1569,7 @@ def validate(self): # we allow rolling on a datetimelike index if ((self.obj.empty or self.is_datetimelike) and - isinstance(self.window, (str, ABCDateOffset, - timedelta))): + isinstance(self.window, (str, ABCDateOffset, timedelta))): self._validate_monotonic() freq = self._validate_freq() diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py index 01b4f6f4dfd07..24853bf96a90a 100644 --- a/pandas/io/formats/latex.py +++ b/pandas/io/formats/latex.py @@ -95,8 +95,7 @@ def pad_empties(x): if self.fmt.index: index_format = 'l' * self.frame.index.nlevels column_format = index_format + column_format - elif not isinstance(column_format, - str): # pragma: no cover + elif not isinstance(column_format, str): # pragma: no cover raise AssertionError('column_format must be str or unicode, ' 'not {typ}'.format(typ=type(column_format))) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index bc67b8f365cbe..bddfc5be1a5ca 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1005,7 +1005,7 @@ def _clean_options(self, options, engine): quotechar = options['quotechar'] if (quotechar is not None and - isinstance(quotechar, (str, str, bytes))): + isinstance(quotechar, (str, bytes))): if (len(quotechar) == 1 and ord(quotechar) > 127 and engine not in ('python', 'python-fwf')): fallback_reason = ("ord(quotechar) > 127, meaning the " @@ -2909,8 +2909,7 @@ def _remove_empty_lines(self, lines): for l in lines: # Remove empty lines and lines with only one whitespace value if (len(l) > 1 or len(l) == 1 and - (not isinstance(l[0], str) or - l[0].strip())): + (not isinstance(l[0], str) or l[0].strip())): ret.append(l) return ret diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 80de7f3bed170..3f1e0c1056247 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -66,9 +66,9 @@ def _ensure_encoding(encoding): def _ensure_str(name): - """Ensure that an index / column name is a str (python 3) or - unicode (python 2); otherwise they may be np.string dtype. - Non-string dtypes are passed through unchanged. + """ + Ensure that an index / column name is a str (python 3); otherwise they + may be np.string dtype. Non-string dtypes are passed through unchanged. https://github.com/pandas-dev/pandas/issues/13492 """ diff --git a/pandas/io/sas/sas_xport.py b/pandas/io/sas/sas_xport.py index c95551d429bfd..92a2cc1a43066 100644 --- a/pandas/io/sas/sas_xport.py +++ b/pandas/io/sas/sas_xport.py @@ -240,7 +240,7 @@ def __init__(self, filepath_or_buffer, index=None, encoding='ISO-8859-1', compression, should_close) = get_filepath_or_buffer( filepath_or_buffer, encoding=encoding) - if isinstance(filepath_or_buffer, (str, str, bytes)): + if isinstance(filepath_or_buffer, (str, bytes)): self.filepath_or_buffer = open(filepath_or_buffer, 'rb') else: # Copy to BytesIO, and ensure no encoding diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index b52b5222ea30a..1275a5bccd612 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -349,9 +349,7 @@ def test_select_dtypes_datetime_with_tz(self): assert_frame_equal(result, expected) @pytest.mark.parametrize("dtype", [ - str, "str", np.string_, "S1", "unicode", np.unicode_, "U1", - str - ]) + str, "str", np.string_, "S1", "unicode", np.unicode_, "U1"]) @pytest.mark.parametrize("arg", ["include", "exclude"]) def test_select_dtypes_str_raises(self, dtype, arg): df = DataFrame({"a": list("abc"), diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 74e1ab87bf36e..d044a220965f9 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -144,26 +144,24 @@ def test_astype_str_map(self, dtype, series): expected = series.map(str) tm.assert_series_equal(result, expected) - @pytest.mark.parametrize("dtype", [str, str]) - def test_astype_str_cast(self, dtype): - # see gh-9757: test str and unicode on python 2.x - # and just str on python 3.x + def test_astype_str_cast(self): + # see gh-9757 ts = Series([Timestamp('2010-01-04 00:00:00')]) - s = ts.astype(dtype) + s = ts.astype(str) - expected = Series([dtype('2010-01-04')]) + expected = Series([str('2010-01-04')]) tm.assert_series_equal(s, expected) ts = Series([Timestamp('2010-01-04 00:00:00', tz='US/Eastern')]) - s = ts.astype(dtype) + s = ts.astype(str) - expected = Series([dtype('2010-01-04 00:00:00-05:00')]) + expected = Series([str('2010-01-04 00:00:00-05:00')]) tm.assert_series_equal(s, expected) td = Series([Timedelta(1, unit='d')]) - s = td.astype(dtype) + s = td.astype(str) - expected = Series([dtype('1 days 00:00:00.000000000')]) + expected = Series([str('1 days 00:00:00.000000000')]) tm.assert_series_equal(s, expected) def test_astype_unicode(self): From 4935190f99479afce39b11fc52d31042e7f8ec3d Mon Sep 17 00:00:00 2001 From: Jeremy Schendel Date: Thu, 28 Mar 2019 00:54:27 -0600 Subject: [PATCH 3/3] small fixes --- pandas/core/frame.py | 2 +- pandas/core/indexes/multi.py | 1 - pandas/core/reshape/reshape.py | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index dabb68417cdda..5a3956b28e123 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1660,7 +1660,7 @@ def to_records(self, index=True, convert_datetime64=None, elif index_names[0] is None: index_names = ['index'] - names = lmap(str, index_names) + map(str, self.columns) + names = lmap(str, index_names) + lmap(str, self.columns) else: arrays = [self[c].get_values() for c in self.columns] names = lmap(str, self.columns) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 8ae3c6f7a8d3b..f9347117bec23 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -10,7 +10,6 @@ from pandas._libs import ( Timestamp, algos as libalgos, index as libindex, lib, tslibs) -import pandas.compat as compat from pandas.compat import lrange, lzip from pandas.compat.numpy import function as nv from pandas.errors import PerformanceWarning, UnsortedIndexError diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 5b156b4f1043d..33b8384f1dd86 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -7,7 +7,6 @@ from pandas._libs import algos as _algos, reshape as _reshape from pandas._libs.sparse import IntIndex -from pandas.compat import PY2 from pandas.core.dtypes.cast import maybe_promote from pandas.core.dtypes.common import (