diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 3a811ba7706c9..fcdbfc0639157 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -7,7 +7,6 @@ Key items to import for 2/3 compatible code: * iterators: reduce() * lists: lrange(), lmap(), lzip(), lfilter() -* unicode: u() [no unicode builtin in Python 3] * longs: long (int in Python 3) * iterable method compatibility: iteritems, iterkeys, itervalues * Uses the original method if available, otherwise uses items, keys, values. @@ -256,12 +255,6 @@ class to receive bound method text_type = str binary_type = bytes - def u(s): - return s - - def u_safe(s): - return s - def to_str(s): """ Convert bytes and non-string into Python 3 str @@ -305,15 +298,6 @@ def set_function_name(f, name, cls): text_type = unicode binary_type = str - def u(s): - return unicode(s, "unicode_escape") - - def u_safe(s): - try: - return unicode(s, "unicode_escape") - except: - return s - def to_str(s): """ Convert unicode and non-string into Python 2 str diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py index 8f16f8154b952..5219dffd9c8e3 100644 --- a/pandas/compat/pickle_compat.py +++ b/pandas/compat/pickle_compat.py @@ -6,7 +6,7 @@ import pickle as pkl import sys -from pandas.compat import string_types, u # noqa +from pandas.compat import string_types # noqa import pandas # noqa from pandas import Index, compat diff --git a/pandas/conftest.py b/pandas/conftest.py index acda660edf84b..3140efa6503e3 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -9,7 +9,7 @@ import pytest from pytz import FixedOffset, utc -from pandas.compat import PY3, u +from pandas.compat import PY3 import pandas.util._test_decorators as td import pandas as pd @@ -561,7 +561,7 @@ def any_numpy_dtype(request): # categoricals are handled separately _any_skipna_inferred_dtype = [ ('string', ['a', np.nan, 'c']), - ('unicode' if not PY3 else 'string', [u('a'), np.nan, u('c')]), + ('unicode' if not PY3 else 'string', ['a', np.nan, 'c']), ('bytes' if PY3 else 'string', [b'a', np.nan, b'c']), ('empty', [np.nan, np.nan, np.nan]), ('empty', []), diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 7f77a5dcce613..48a8f6f6bf415 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -7,7 +7,7 @@ from pandas._libs import algos as libalgos, lib import pandas.compat as compat -from pandas.compat import lzip, u +from pandas.compat import lzip from pandas.compat.numpy import function as nv from pandas.util._decorators import ( Appender, Substitution, cache_readonly, deprecate_kwarg) @@ -1947,10 +1947,10 @@ def _tidy_repr(self, max_vals=10, footer=True): head = self[:num]._get_repr(length=False, footer=False) tail = self[-(max_vals - num):]._get_repr(length=False, footer=False) - result = u('{head}, ..., {tail}').format(head=head[:-1], tail=tail[1:]) + result = '{head}, ..., {tail}'.format(head=head[:-1], tail=tail[1:]) if footer: - result = u('{result}\n{footer}').format(result=result, - footer=self._repr_footer()) + result = '{result}\n{footer}'.format( + result=result, footer=self._repr_footer()) return compat.text_type(result) @@ -2008,7 +2008,7 @@ def _repr_categories_info(self): def _repr_footer(self): - return u('Length: {length}\n{info}').format( + return 'Length: {length}\n{info}'.format( length=len(self), info=self._repr_categories_info()) def _get_repr(self, length=True, na_rep='NaN', footer=True): diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index 18f13e17c046e..833650fb68a54 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -6,7 +6,7 @@ import numpy as np from pandas._libs.tslibs import Timedelta, Timestamp -from pandas.compat import DeepChainMap, string_types, u +from pandas.compat import DeepChainMap, string_types from pandas.core.dtypes.common import is_list_like @@ -182,7 +182,7 @@ def stringify(value): kind = _ensure_decoded(self.kind) meta = _ensure_decoded(self.meta) - if kind == u('datetime64') or kind == u('datetime'): + if kind == 'datetime64' or kind == 'datetime': if isinstance(v, (int, float)): v = stringify(v) v = _ensure_decoded(v) @@ -190,10 +190,10 @@ def stringify(value): if v.tz is not None: v = v.tz_convert('UTC') return TermValue(v, v.value, kind) - elif kind == u('timedelta64') or kind == u('timedelta'): + elif kind == 'timedelta64' or kind == 'timedelta': v = Timedelta(v, unit='s').value return TermValue(int(v), v, kind) - elif meta == u('category'): + elif meta == 'category': metadata = com.values_from_object(self.metadata) result = metadata.searchsorted(v, side='left') @@ -201,24 +201,24 @@ def stringify(value): # check that metadata contains v if not result and v not in metadata: result = -1 - return TermValue(result, result, u('integer')) - elif kind == u('integer'): + return TermValue(result, result, 'integer') + elif kind == 'integer': v = int(float(v)) return TermValue(v, v, kind) - elif kind == u('float'): + elif kind == 'float': v = float(v) return TermValue(v, v, kind) - elif kind == u('bool'): + elif kind == 'bool': if isinstance(v, string_types): - v = not v.strip().lower() in [u('false'), u('f'), u('no'), - u('n'), u('none'), u('0'), - u('[]'), u('{}'), u('')] + v = not v.strip().lower() in ['false', 'f', 'no', + 'n', 'none', '0', + '[]', '{}', ''] else: v = bool(v) return TermValue(v, v, kind) elif isinstance(v, string_types): # string quoting - return TermValue(v, stringify(v), u('string')) + return TermValue(v, stringify(v), 'string') else: raise TypeError("Cannot compare {v} of type {typ} to {kind} column" .format(v=v, typ=type(v), kind=kind)) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3862fa86da6ff..f43c8a8b38a70 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -35,7 +35,7 @@ from pandas import compat from pandas.compat import ( PY36, Iterator, StringIO, lmap, lzip, raise_with_traceback, - string_and_binary_types, u) + string_and_binary_types) from pandas.compat.numpy import function as nv from pandas.core.dtypes.cast import ( maybe_upcast, @@ -620,7 +620,7 @@ def __unicode__(self): Invoked by unicode(df) in py2 only. Yields a Unicode String in both py2/py3. """ - buf = StringIO(u("")) + buf = StringIO("") if self._info_repr(): self.info(buf=buf) return buf.getvalue() @@ -644,7 +644,7 @@ def _repr_html_(self): Mainly for IPython notebook. """ if self._info_repr(): - buf = StringIO(u("")) + buf = StringIO("") self.info(buf=buf) # need to escape the , should be the first line. val = buf.getvalue().replace('<', r'<', 1) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index cf85c10bf47e0..68bbe0f26784d 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -12,7 +12,7 @@ from pandas._libs.tslibs import OutOfBoundsDatetime, Timedelta, Timestamp from pandas._libs.tslibs.timezones import tz_compare import pandas.compat as compat -from pandas.compat import set_function_name, u +from pandas.compat import set_function_name from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, Substitution, cache_readonly @@ -931,14 +931,14 @@ def __unicode__(self): attrs = self._format_attrs() space = self._format_space() - prepr = (u(",%s") % - space).join(u("%s=%s") % (k, v) for k, v in attrs) + prepr = (",%s" % + space).join("%s=%s" % (k, v) for k, v in attrs) # no data provided, just attributes if data is None: data = '' - res = u("%s(%s%s)") % (klass, data, prepr) + res = "%s(%s%s)" % (klass, data, prepr) return res diff --git a/pandas/core/panel.py b/pandas/core/panel.py index c218e80daed9c..ced30bb88058e 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -10,7 +10,6 @@ import numpy as np import pandas.compat as compat -from pandas.compat import u from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, Substitution, deprecate_kwarg from pandas.util._validators import validate_axis_style_args @@ -356,18 +355,18 @@ def __unicode__(self): class_name = str(self.__class__) - dims = u('Dimensions: {dimensions}'.format(dimensions=' x '.join( + dims = 'Dimensions: {dimensions}'.format(dimensions=' x '.join( ["{shape} ({axis})".format(shape=shape, axis=axis) for axis, shape - in zip(self._AXIS_ORDERS, self.shape)]))) + in zip(self._AXIS_ORDERS, self.shape)])) def axis_pretty(a): v = getattr(self, a) if len(v) > 0: - return u('{ax} axis: {x} to {y}'.format(ax=a.capitalize(), - x=pprint_thing(v[0]), - y=pprint_thing(v[-1]))) + return '{ax} axis: {x} to {y}'.format(ax=a.capitalize(), + x=pprint_thing(v[0]), + y=pprint_thing(v[-1])) else: - return u('{ax} axis: None'.format(ax=a.capitalize())) + return '{ax} axis: None'.format(ax=a.capitalize()) output = '\n'.join( [class_name, dims] + [axis_pretty(a) for a in self._AXIS_ORDERS]) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 68ea7932e3718..aa84d3886ec54 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -7,7 +7,7 @@ from pandas._libs import algos as _algos, reshape as _reshape from pandas._libs.sparse import IntIndex -from pandas.compat import PY2, text_type, u +from pandas.compat import PY2, text_type from pandas.core.dtypes.cast import maybe_promote from pandas.core.dtypes.common import ( @@ -914,7 +914,7 @@ def _make_col_name(prefix, prefix_sep, level): if PY2 and (isinstance(prefix, text_type) or isinstance(prefix_sep, text_type) or isinstance(level, text_type)): - fstr = u(fstr) + fstr = fstr return fstr.format(prefix=prefix, prefix_sep=prefix_sep, level=level) diff --git a/pandas/core/series.py b/pandas/core/series.py index 272b621f6b328..efad1c984eef3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -11,7 +11,7 @@ from pandas._libs import iNaT, index as libindex, lib, tslibs import pandas.compat as compat -from pandas.compat import PY36, StringIO, u +from pandas.compat import PY36, StringIO from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, Substitution, deprecate from pandas.util._validators import validate_bool_kwarg @@ -1379,7 +1379,7 @@ def __unicode__(self): Invoked by unicode(df) in py2 only. Yields a Unicode String in both py2/py3. """ - buf = StringIO(u("")) + buf = StringIO("") width, height = get_terminal_size() max_rows = (height if get_option("display.max_rows") == 0 else get_option("display.max_rows")) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 90623737dfc0c..579ca75d3685f 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -2833,7 +2833,7 @@ def normalize(self, form): normalized : Series/Index of objects """ import unicodedata - f = lambda x: unicodedata.normalize(form, compat.u_safe(x)) + f = lambda x: unicodedata.normalize(form, x) result = _na_map(f, self._parent) return self._wrap_result(result) @@ -3187,10 +3187,10 @@ def rindex(self, sub, start=0, end=None): istitle = _noarg_wrapper(lambda x: x.istitle(), docstring=_shared_docs['ismethods'] % _shared_docs['istitle']) - isnumeric = _noarg_wrapper(lambda x: compat.u_safe(x).isnumeric(), + isnumeric = _noarg_wrapper(lambda x: x.isnumeric(), docstring=_shared_docs['ismethods'] % _shared_docs['isnumeric']) - isdecimal = _noarg_wrapper(lambda x: compat.u_safe(x).isdecimal(), + isdecimal = _noarg_wrapper(lambda x: x.isdecimal(), docstring=_shared_docs['ismethods'] % _shared_docs['isdecimal']) diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py index 4d42b336b01b8..e8a2e45a94bc3 100644 --- a/pandas/io/excel/_base.py +++ b/pandas/io/excel/_base.py @@ -6,7 +6,7 @@ import warnings import pandas.compat as compat -from pandas.compat import add_metaclass, string_types, u +from pandas.compat import add_metaclass, string_types from pandas.errors import EmptyDataError from pandas.util._decorators import Appender, deprecate_kwarg @@ -715,7 +715,7 @@ def check_extension(cls, ext): if ext.startswith('.'): ext = ext[1:] if not any(ext in extension for extension in cls.supported_extensions): - msg = (u("Invalid extension for engine '{engine}': '{ext}'") + msg = ("Invalid extension for engine '{engine}': '{ext}'" .format(engine=pprint_thing(cls.engine), ext=pprint_thing(ext))) raise ValueError(msg) diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index 016b052fdc6bb..edbef1191dddd 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -13,7 +13,7 @@ from pandas._libs import lib from pandas._libs.tslib import format_array_from_datetime from pandas._libs.tslibs import NaT, Timedelta, Timestamp, iNaT -from pandas.compat import StringIO, lzip, u +from pandas.compat import StringIO, lzip from pandas.core.dtypes.common import ( is_categorical_dtype, is_datetime64_dtype, is_datetime64tz_dtype, @@ -111,7 +111,7 @@ class CategoricalFormatter(object): def __init__(self, categorical, buf=None, length=True, na_rep='NaN', footer=True): self.categorical = categorical - self.buf = buf if buf is not None else StringIO(u("")) + self.buf = buf if buf is not None else StringIO("") self.na_rep = na_rep self.length = length self.footer = footer @@ -144,20 +144,20 @@ def to_string(self): if self.footer: return self._get_footer() else: - return u('') + return '' fmt_values = self._get_formatted_values() - result = [u('{i}').format(i=i) for i in fmt_values] + result = ['{i}'.format(i=i) for i in fmt_values] result = [i.strip() for i in result] - result = u(', ').join(result) - result = [u('[') + result + u(']')] + result = ', '.join(result) + result = ['[' + result + ']'] if self.footer: footer = self._get_footer() if footer: result.append(footer) - return compat.text_type(u('\n').join(result)) + return compat.text_type('\n'.join(result)) class SeriesFormatter(object): @@ -201,7 +201,7 @@ def _chk_truncate(self): def _get_footer(self): name = self.series.name - footer = u('') + footer = '' if getattr(self.series.index, 'freq', None) is not None: footer += 'Freq: {freq}'.format(freq=self.series.index.freqstr) @@ -290,7 +290,7 @@ def to_string(self): if footer: result += '\n' + footer - return compat.text_type(u('').join(result)) + return compat.text_type(''.join(result)) class TextAdjustment(object): @@ -591,10 +591,10 @@ def to_string(self): frame = self.frame if len(frame.columns) == 0 or len(frame.index) == 0: - info_line = (u('Empty {name}\nColumns: {col}\nIndex: {idx}') + info_line = ('Empty {name}\nColumns: {col}\nIndex: {idx}' .format(name=type(self.frame).__name__, - col=pprint_thing(frame.columns), - idx=pprint_thing(frame.index))) + col=pprint_thing(frame.columns), + idx=pprint_thing(frame.index))) text = info_line else: @@ -1525,9 +1525,9 @@ def __call__(self, num): mant = sign * dnum / (10**pow10) if self.accuracy is None: # pragma: no cover - format_str = u("{mant: g}{prefix}") + format_str = "{mant: g}{prefix}" else: - format_str = (u("{{mant: .{acc:d}f}}{{prefix}}") + format_str = ("{{mant: .{acc:d}f}}{{prefix}}" .format(acc=self.accuracy)) formatted = format_str.format(mant=mant, prefix=prefix) diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index 388252cdb4214..07b835007b43f 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -8,7 +8,7 @@ from collections import OrderedDict from textwrap import dedent -from pandas.compat import lzip, u, unichr +from pandas.compat import lzip, unichr from pandas.core.dtypes.generic import ABCMultiIndex @@ -147,7 +147,7 @@ def render(self): if self.should_show_dimensions: by = chr(215) if compat.PY3 else unichr(215) # × - self.write(u('

{rows} rows {by} {cols} columns

') + self.write('

{rows} rows {by} {cols} columns

' .format(rows=len(self.frame), by=by, cols=len(self.frame.columns))) @@ -211,7 +211,7 @@ def _write_col_header(self, indent): elif tag + span > ins_col: recs_new[tag] = span + 1 if lnum == inner_lvl: - values = (values[:ins_col] + (u('...'),) + + values = (values[:ins_col] + ('...',) + values[ins_col:]) else: # sparse col headers do not receive a ... @@ -224,7 +224,7 @@ def _write_col_header(self, indent): # get ... if tag + span == ins_col: recs_new[ins_col] = 1 - values = (values[:ins_col] + (u('...'),) + + values = (values[:ins_col] + ('...',) + values[ins_col:]) records = recs_new inner_lvl = len(level_lengths) - 1 @@ -239,7 +239,7 @@ def _write_col_header(self, indent): recs_new[tag] = span recs_new[ins_col] = 1 records = recs_new - values = (values[:ins_col] + [u('...')] + + values = (values[:ins_col] + ['...'] + values[ins_col:]) # see gh-22579 @@ -414,12 +414,12 @@ def _write_hierarchical_rows(self, fmt_values, indent): # GH 14882 - Make sure insertion done once if not inserted: dot_row = list(idx_values[ins_row - 1]) - dot_row[-1] = u('...') + dot_row[-1] = '...' idx_values.insert(ins_row, tuple(dot_row)) inserted = True else: dot_row = list(idx_values[ins_row]) - dot_row[inner_lvl - lnum] = u('...') + dot_row[inner_lvl - lnum] = '...' idx_values[ins_row] = tuple(dot_row) else: rec_new[tag] = span @@ -429,12 +429,12 @@ def _write_hierarchical_rows(self, fmt_values, indent): rec_new[ins_row] = 1 if lnum == 0: idx_values.insert(ins_row, tuple( - [u('...')] * len(level_lengths))) + ['...'] * len(level_lengths))) # GH 14882 - Place ... in correct level elif inserted: dot_row = list(idx_values[ins_row]) - dot_row[inner_lvl - lnum] = u('...') + dot_row[inner_lvl - lnum] = '...' idx_values[ins_row] = tuple(dot_row) level_lengths[lnum] = rec_new diff --git a/pandas/io/formats/latex.py b/pandas/io/formats/latex.py index 79835c8275929..6ac8071d426cb 100644 --- a/pandas/io/formats/latex.py +++ b/pandas/io/formats/latex.py @@ -6,8 +6,6 @@ import numpy as np -from pandas.compat import u - from pandas.core.dtypes.generic import ABCMultiIndex from pandas import compat @@ -51,7 +49,7 @@ def write_result(self, buf): # string representation of the columns if len(self.frame.columns) == 0 or len(self.frame.index) == 0: - info_line = (u('Empty {name}\nColumns: {col}\nIndex: {idx}') + info_line = ('Empty {name}\nColumns: {col}\nIndex: {idx}' .format(name=type(self.frame).__name__, col=self.frame.columns, idx=self.frame.index)) diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py index 6d45d1e5dfcee..ebd0113457bad 100644 --- a/pandas/io/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -4,8 +4,6 @@ import sys -from pandas.compat import u - from pandas.core.dtypes.inference import is_sequence from pandas import compat @@ -100,9 +98,9 @@ def _pprint_seq(seq, _nest_lvl=0, max_seq_items=None, **kwds): bounds length of printed sequence, depending on options """ if isinstance(seq, set): - fmt = u("{{{body}}}") + fmt = "{{{body}}}" else: - fmt = u("[{body}]") if hasattr(seq, '__setitem__') else u("({body})") + fmt = "[{body}]" if hasattr(seq, '__setitem__') else "({body})" if max_seq_items is False: nitems = len(seq) @@ -129,10 +127,10 @@ def _pprint_dict(seq, _nest_lvl=0, max_seq_items=None, **kwds): internal. pprinter for iterables. you should probably use pprint_thing() rather then calling this directly. """ - fmt = u("{{{things}}}") + fmt = "{{{things}}}" pairs = [] - pfmt = u("{key}: {val}") + pfmt = "{key}: {val}" if max_seq_items is False: nitems = len(seq) @@ -220,9 +218,9 @@ def as_escaped_unicode(thing, escape_chars=escape_chars): max_seq_items=max_seq_items) elif isinstance(thing, compat.string_types) and quote_strings: if compat.PY3: - fmt = u("'{thing}'") + fmt = "'{thing}'" else: - fmt = u("u'{thing}'") + fmt = "u'{thing}'" result = fmt.format(thing=as_escaped_unicode(thing)) else: result = as_escaped_unicode(thing) diff --git a/pandas/io/html.py b/pandas/io/html.py index 347bb3eec54af..7df7cc3326c77 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -10,8 +10,7 @@ import pandas.compat as compat from pandas.compat import ( - binary_type, iteritems, lmap, lrange, raise_with_traceback, string_types, - u) + binary_type, iteritems, lmap, lrange, raise_with_traceback, string_types) from pandas.errors import AbstractMethodError, EmptyDataError from pandas.core.dtypes.common import is_list_like @@ -622,8 +621,8 @@ def _build_xpath_expr(attrs): if 'class_' in attrs: attrs['class'] = attrs.pop('class_') - s = [u("@{key}={val!r}").format(key=k, val=v) for k, v in iteritems(attrs)] - return u('[{expr}]').format(expr=' and '.join(s)) + s = ["@{key}={val!r}".format(key=k, val=v) for k, v in iteritems(attrs)] + return '[{expr}]'.format(expr=' and '.join(s)) _re_namespace = {'re': 'http://exslt.org/regular-expressions'} @@ -665,7 +664,7 @@ def _parse_tables(self, doc, match, kwargs): # 1. check all descendants for the given pattern and only search tables # 2. go up the tree until we find a table query = '//table//*[re:test(text(), {patt!r})]/ancestor::table' - xpath_expr = u(query).format(patt=pattern) + xpath_expr = query.format(patt=pattern) # if any table attributes were given build an xpath expression to # search for them diff --git a/pandas/io/json/json.py b/pandas/io/json/json.py index 4bae067ee5196..3eca54cc42ef4 100644 --- a/pandas/io/json/json.py +++ b/pandas/io/json/json.py @@ -6,7 +6,7 @@ import pandas._libs.json as json from pandas._libs.tslibs import iNaT -from pandas.compat import StringIO, long, to_str, u +from pandas.compat import StringIO, long, to_str from pandas.errors import AbstractMethodError from pandas.core.dtypes.common import is_period_dtype @@ -662,7 +662,7 @@ def check_keys_split(self, decoded): bad_keys = set(decoded.keys()).difference(set(self._split_keys)) if bad_keys: bad_keys = ", ".join(bad_keys) - raise ValueError(u("JSON data had unexpected key(s): {bad_keys}") + raise ValueError("JSON data had unexpected key(s): {bad_keys}" .format(bad_keys=pprint_thing(bad_keys))) def parse(self): diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 588d63d73515f..57e941bbb8067 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -47,7 +47,6 @@ import numpy as np import pandas.compat as compat -from pandas.compat import u, u_safe from pandas.errors import PerformanceWarning from pandas.util._move import ( BadMove as _BadMove, move_into_mutable_buffer as _move_into_mutable_buffer) @@ -136,8 +135,6 @@ def to_msgpack(path_or_buf, *args, **kwargs): """ global compressor compressor = kwargs.pop('compress', None) - if compressor: - compressor = u(compressor) append = kwargs.pop('append', None) if append: mode = 'a+b' @@ -227,11 +224,11 @@ def read(fh): dtype_dict = {21: np.dtype('M8[ns]'), - u('datetime64[ns]'): np.dtype('M8[ns]'), - u('datetime64[us]'): np.dtype('M8[us]'), + 'datetime64[ns]': np.dtype('M8[ns]'), + 'datetime64[us]': np.dtype('M8[us]'), 22: np.dtype('m8[ns]'), - u('timedelta64[ns]'): np.dtype('m8[ns]'), - u('timedelta64[us]'): np.dtype('m8[us]'), + 'timedelta64[ns]': np.dtype('m8[ns]'), + 'timedelta64[us]': np.dtype('m8[us]'), # this is platform int, which we need to remap to np.int64 # for compat on windows platforms @@ -372,17 +369,17 @@ def encode(obj): if isinstance(obj, Index): if isinstance(obj, RangeIndex): return {u'typ': u'range_index', - u'klass': u(obj.__class__.__name__), + u'klass': obj.__class__.__name__, u'name': getattr(obj, 'name', None), u'start': getattr(obj, '_start', None), u'stop': getattr(obj, '_stop', None), u'step': getattr(obj, '_step', None)} elif isinstance(obj, PeriodIndex): return {u'typ': u'period_index', - u'klass': u(obj.__class__.__name__), + u'klass': obj.__class__.__name__, u'name': getattr(obj, 'name', None), - u'freq': u_safe(getattr(obj, 'freqstr', None)), - u'dtype': u(obj.dtype.name), + u'freq': getattr(obj, 'freqstr', None), + u'dtype': obj.dtype.name, u'data': convert(obj.asi8), u'compress': compressor} elif isinstance(obj, DatetimeIndex): @@ -390,14 +387,14 @@ def encode(obj): # store tz info and data as UTC if tz is not None: - tz = u(tz.zone) + tz = tz.zone obj = obj.tz_convert('UTC') return {u'typ': u'datetime_index', - u'klass': u(obj.__class__.__name__), + u'klass': obj.__class__.__name__, u'name': getattr(obj, 'name', None), - u'dtype': u(obj.dtype.name), + u'dtype': obj.dtype.name, u'data': convert(obj.asi8), - u'freq': u_safe(getattr(obj, 'freqstr', None)), + u'freq': getattr(obj, 'freqstr', None), u'tz': tz, u'compress': compressor} elif isinstance(obj, (IntervalIndex, IntervalArray)): @@ -406,29 +403,29 @@ def encode(obj): else: typ = u'interval_array' return {u'typ': typ, - u'klass': u(obj.__class__.__name__), + u'klass': obj.__class__.__name__, u'name': getattr(obj, 'name', None), u'left': getattr(obj, 'left', None), u'right': getattr(obj, 'right', None), u'closed': getattr(obj, 'closed', None)} elif isinstance(obj, MultiIndex): return {u'typ': u'multi_index', - u'klass': u(obj.__class__.__name__), + u'klass': obj.__class__.__name__, u'names': getattr(obj, 'names', None), - u'dtype': u(obj.dtype.name), + u'dtype': obj.dtype.name, u'data': convert(obj.values), u'compress': compressor} else: return {u'typ': u'index', - u'klass': u(obj.__class__.__name__), + u'klass': obj.__class__.__name__, u'name': getattr(obj, 'name', None), - u'dtype': u(obj.dtype.name), + u'dtype': obj.dtype.name, u'data': convert(obj.values), u'compress': compressor} elif isinstance(obj, Categorical): return {u'typ': u'category', - u'klass': u(obj.__class__.__name__), + u'klass': obj.__class__.__name__, u'name': getattr(obj, 'name', None), u'codes': obj.codes, u'categories': obj.categories, @@ -452,10 +449,10 @@ def encode(obj): # return d else: return {u'typ': u'series', - u'klass': u(obj.__class__.__name__), + u'klass': obj.__class__.__name__, u'name': getattr(obj, 'name', None), u'index': obj.index, - u'dtype': u(obj.dtype.name), + u'dtype': obj.dtype.name, u'data': convert(obj.values), u'compress': compressor} elif issubclass(tobj, NDFrame): @@ -479,13 +476,13 @@ def encode(obj): # the block manager return {u'typ': u'block_manager', - u'klass': u(obj.__class__.__name__), + u'klass': obj.__class__.__name__, u'axes': data.axes, u'blocks': [{u'locs': b.mgr_locs.as_array, u'values': convert(b.values), u'shape': b.values.shape, - u'dtype': u(b.dtype.name), - u'klass': u(b.__class__.__name__), + u'dtype': b.dtype.name, + u'klass': b.__class__.__name__, u'compress': compressor} for b in data.blocks] } @@ -494,10 +491,10 @@ def encode(obj): if isinstance(obj, Timestamp): tz = obj.tzinfo if tz is not None: - tz = u(tz.zone) + tz = tz.zone freq = obj.freq if freq is not None: - freq = u(freq.freqstr) + freq = freq.freqstr return {u'typ': u'timestamp', u'value': obj.value, u'freq': freq, @@ -512,19 +509,19 @@ def encode(obj): u'data': (obj.days, obj.seconds, obj.microseconds)} elif isinstance(obj, np.datetime64): return {u'typ': u'datetime64', - u'data': u(str(obj))} + u'data': str(obj)} elif isinstance(obj, datetime): return {u'typ': u'datetime', - u'data': u(obj.isoformat())} + u'data': obj.isoformat()} elif isinstance(obj, date): return {u'typ': u'date', - u'data': u(obj.isoformat())} + u'data': obj.isoformat()} raise Exception( "cannot encode this datetimelike object: {obj}".format(obj=obj)) elif isinstance(obj, Period): return {u'typ': u'period', u'ordinal': obj.ordinal, - u'freq': u_safe(obj.freqstr)} + u'freq': obj.freqstr} elif isinstance(obj, Interval): return {u'typ': u'interval', u'left': obj.left, @@ -532,37 +529,37 @@ def encode(obj): u'closed': obj.closed} elif isinstance(obj, BlockIndex): return {u'typ': u'block_index', - u'klass': u(obj.__class__.__name__), + u'klass': obj.__class__.__name__, u'blocs': obj.blocs, u'blengths': obj.blengths, u'length': obj.length} elif isinstance(obj, IntIndex): return {u'typ': u'int_index', - u'klass': u(obj.__class__.__name__), + u'klass': obj.__class__.__name__, u'indices': obj.indices, u'length': obj.length} elif isinstance(obj, np.ndarray): return {u'typ': u'ndarray', u'shape': obj.shape, u'ndim': obj.ndim, - u'dtype': u(obj.dtype.name), + u'dtype': obj.dtype.name, u'data': convert(obj), u'compress': compressor} elif isinstance(obj, np.number): if np.iscomplexobj(obj): return {u'typ': u'np_scalar', u'sub_typ': u'np_complex', - u'dtype': u(obj.dtype.name), - u'real': u(obj.real.__repr__()), - u'imag': u(obj.imag.__repr__())} + u'dtype': obj.dtype.name, + u'real': obj.real.__repr__(), + u'imag': obj.imag.__repr__()} else: return {u'typ': u'np_scalar', - u'dtype': u(obj.dtype.name), - u'data': u(obj.__repr__())} + u'dtype': obj.dtype.name, + u'data': obj.__repr__()} elif isinstance(obj, complex): return {u'typ': u'np_complex', - u'real': u(obj.real.__repr__()), - u'imag': u(obj.imag.__repr__())} + u'real': obj.real.__repr__(), + u'imag': obj.imag.__repr__()} return obj diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 616bbe1047f9d..5400d9bc60218 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -19,7 +19,7 @@ import pandas._libs.parsers as parsers from pandas._libs.tslibs import parsing import pandas.compat as compat -from pandas.compat import PY3, StringIO, lrange, lzip, string_types, u +from pandas.compat import PY3, StringIO, lrange, lzip, string_types from pandas.errors import ( AbstractMethodError, EmptyDataError, ParserError, ParserWarning) from pandas.util._decorators import Appender @@ -50,7 +50,7 @@ # This exists at the beginning of a file to indicate endianness # of a file (stream). Unfortunately, this marker screws up parsing, # so we need to remove it if we see it. -_BOM = u('\ufeff') +_BOM = '\ufeff' _doc_read_csv_and_table = r""" {summary} @@ -2724,15 +2724,6 @@ def _check_for_bom(self, first_row): # Since the string is non-empty, check that it does # in fact begin with a BOM. first_elt = first_row[0][0] - - # This is to avoid warnings we get in Python 2.x if - # we find ourselves comparing with non-Unicode - if compat.PY2 and not isinstance(first_elt, unicode): # noqa - try: - first_elt = u(first_elt) - except UnicodeDecodeError: - return first_row - if first_elt != _BOM: return first_row diff --git a/pandas/tests/arrays/categorical/test_repr.py b/pandas/tests/arrays/categorical/test_repr.py index 05d1a0b2a8b8b..b71eadb4a4711 100644 --- a/pandas/tests/arrays/categorical/test_repr.py +++ b/pandas/tests/arrays/categorical/test_repr.py @@ -2,8 +2,6 @@ import numpy as np -from pandas.compat import u - from pandas import ( Categorical, CategoricalIndex, Series, date_range, period_range, timedelta_range) @@ -53,8 +51,8 @@ def test_empty_print(self): def test_print_none_width(self): # GH10087 a = Series(Categorical([1, 2, 3, 4])) - exp = u("0 1\n1 2\n2 3\n3 4\n" + - "dtype: category\nCategories (4, int64): [1, 2, 3, 4]") + exp = ("0 1\n1 2\n2 3\n3 4\n" + "dtype: category\nCategories (4, int64): [1, 2, 3, 4]") with option_context("display.width", None): assert exp == repr(a) diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index 68857d6cc6902..b534e38d6304e 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -17,7 +17,7 @@ import pytz from pandas._libs import iNaT, lib, missing as libmissing -from pandas.compat import StringIO, lrange, u +from pandas.compat import StringIO, lrange import pandas.util._test_decorators as td from pandas.core.dtypes import inference @@ -108,7 +108,6 @@ def test_is_sequence(): assert (is_seq((1, 2))) assert (is_seq([1, 2])) assert (not is_seq("abcd")) - assert (not is_seq(u("abcd"))) assert (not is_seq(np.int64)) class A(object): @@ -300,10 +299,10 @@ def test_is_re_fails(ll): @pytest.mark.parametrize( - "ll", [r'a', u('x'), + "ll", [r'a', 'x', r'asdf', re.compile('adsf'), - u(r'\u2233\s*'), + r'\u2233\s*', re.compile(r'')]) def test_is_recompilable_passes(ll): assert inference.is_re_compilable(ll) @@ -369,7 +368,7 @@ def test_maybe_convert_numeric_infinities(self): tm.assert_numpy_array_equal(out, neg) out = lib.maybe_convert_numeric( - np.array([u(infinity)], dtype=object), + np.array([infinity], dtype=object), na_values, maybe_int) tm.assert_numpy_array_equal(out, pos) @@ -1239,7 +1238,6 @@ def test_is_scalar_builtin_scalars(self): assert is_scalar(np.nan) assert is_scalar('foobar') assert is_scalar(b'foobar') - assert is_scalar(u('efoobar')) assert is_scalar(datetime(2014, 1, 1)) assert is_scalar(date(2014, 1, 1)) assert is_scalar(time(12, 0)) @@ -1261,7 +1259,7 @@ def test_is_scalar_numpy_array_scalars(self): assert is_scalar(np.int32(1)) assert is_scalar(np.object_('foobar')) assert is_scalar(np.str_('foobar')) - assert is_scalar(np.unicode_(u('foobar'))) + assert is_scalar(np.unicode_('foobar')) assert is_scalar(np.bytes_(b'foobar')) assert is_scalar(np.datetime64('2014-01-01')) assert is_scalar(np.timedelta64(1, 'h')) diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index 7ca01e13a33a9..372be27b5b469 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -9,7 +9,6 @@ from pandas._libs import missing as libmissing from pandas._libs.tslibs import iNaT, is_null_datetimelike -from pandas.compat import u from pandas.core.dtypes.common import is_scalar from pandas.core.dtypes.dtypes import ( @@ -108,7 +107,7 @@ def test_isna_lists(self): exp = np.array([False, False]) tm.assert_numpy_array_equal(result, exp) - result = isna([u('foo'), u('bar')]) + result = isna(['foo', 'bar']) exp = np.array([False, False]) tm.assert_numpy_array_equal(result, exp) diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index 3e45f23ac70d2..560fb8dfaaf4f 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -7,7 +7,7 @@ import numpy as np import pytest -from pandas.compat import lrange, lzip, u +from pandas.compat import lrange, lzip from pandas.errors import PerformanceWarning import pandas as pd @@ -848,7 +848,7 @@ def test_filter(self): assert 'foo' in filtered # unicode columns, won't ascii-encode - df = self.frame.rename(columns={'B': u('\u2202')}) + df = self.frame.rename(columns={'B': '\u2202'}) filtered = df.filter(like='C') assert 'C' in filtered diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py index ca54993712439..90a21961ef78d 100644 --- a/pandas/tests/frame/test_dtypes.py +++ b/pandas/tests/frame/test_dtypes.py @@ -8,8 +8,6 @@ import numpy as np import pytest -from pandas.compat import u - from pandas.core.dtypes.dtypes import CategoricalDtype, DatetimeTZDtype import pandas as pd @@ -364,7 +362,7 @@ def test_select_dtypes_datetime_with_tz(self): @pytest.mark.parametrize("arg", ["include", "exclude"]) def test_select_dtypes_str_raises(self, dtype, arg): df = DataFrame({"a": list("abc"), - "g": list(u("abc")), + "g": list("abc"), "b": list(range(1, 4)), "c": np.arange(3, 6).astype("u1"), "d": np.arange(4.0, 7.0, dtype="float64"), @@ -378,7 +376,7 @@ def test_select_dtypes_str_raises(self, dtype, arg): def test_select_dtypes_bad_arg_raises(self): df = DataFrame({'a': list('abc'), - 'g': list(u('abc')), + 'g': list('abc'), 'b': list(range(1, 4)), 'c': np.arange(3, 6).astype('u1'), 'd': np.arange(4.0, 7.0, dtype='float64'), diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index 799d548100b5e..c96e5232ecf41 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -5,7 +5,7 @@ import numpy as np import pytest -from pandas.compat import lrange, u +from pandas.compat import lrange import pandas as pd from pandas import DataFrame, MultiIndex, Series, date_range @@ -162,7 +162,7 @@ def check(result, expected=None): df5 = DataFrame({'RPT_Date': [20120930, 20121231, 20130331], 'STK_ID': [600809] * 3, - 'STK_Name': [u('饡驦'), u('饡驦'), u('饡驦')], + 'STK_Name': ['饡驦', '饡驦', '饡驦'], 'TClose': [38.05, 41.66, 30.01]}, index=MultiIndex.from_tuples( [(600809, 20120930), @@ -177,7 +177,7 @@ def check(result, expected=None): result.dtypes expected = (DataFrame([[0.0454, 22.02, 0.0422, 20130331, 600809, - u('饡驦'), 30.01]], + '饡驦', 30.01]], columns=['RT', 'TClose', 'TExg', 'RPT_Date', 'STK_ID', 'STK_Name', 'QT_Close']) diff --git a/pandas/tests/frame/test_repr_info.py b/pandas/tests/frame/test_repr_info.py index d0125c0a94361..d97bd93264b9b 100644 --- a/pandas/tests/frame/test_repr_info.py +++ b/pandas/tests/frame/test_repr_info.py @@ -10,7 +10,7 @@ import numpy as np import pytest -from pandas.compat import PYPY, StringIO, lrange, u +from pandas.compat import PYPY, StringIO, lrange import pandas as pd from pandas import ( @@ -125,7 +125,7 @@ def test_repr_unsortable(self): warnings.filters = warn_filters def test_repr_unicode(self): - uval = u('\u03c3\u03c3\u03c3\u03c3') + uval = '\u03c3\u03c3\u03c3\u03c3' # TODO(wesm): is this supposed to be used? bval = uval.encode('utf-8') # noqa @@ -141,11 +141,11 @@ def test_repr_unicode(self): assert result.split('\n')[0].rstrip() == ex_top def test_unicode_string_with_unicode(self): - df = DataFrame({'A': [u("\u05d0")]}) + df = DataFrame({'A': ["\u05d0"]}) str(df) def test_bytestring_with_unicode(self): - df = DataFrame({'A': [u("\u05d0")]}) + df = DataFrame({'A': ["\u05d0"]}) bytes(df) def test_very_wide_info_repr(self): diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index 8abf3a6706886..301498547aad7 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -8,8 +8,6 @@ import numpy as np import pytest -from pandas.compat import u - import pandas as pd from pandas import ( DataFrame, Index, MultiIndex, Period, Series, Timedelta, date_range) @@ -452,10 +450,10 @@ def test_unstack_bool(self): def test_unstack_level_binding(self): # GH9856 mi = pd.MultiIndex( - levels=[[u('foo'), u('bar')], [u('one'), u('two')], - [u('a'), u('b')]], + levels=[['foo', 'bar'], ['one', 'two'], + ['a', 'b']], codes=[[0, 0, 1, 1], [0, 1, 0, 1], [1, 0, 1, 0]], - names=[u('first'), u('second'), u('third')]) + names=['first', 'second', 'third']) s = pd.Series(0, index=mi) result = s.unstack([1, 2]).stack(0) diff --git a/pandas/tests/frame/test_to_csv.py b/pandas/tests/frame/test_to_csv.py index 719294db053e5..917cd9a04efaf 100644 --- a/pandas/tests/frame/test_to_csv.py +++ b/pandas/tests/frame/test_to_csv.py @@ -8,7 +8,7 @@ import numpy as np import pytest -from pandas.compat import StringIO, lmap, lrange, u +from pandas.compat import StringIO, lmap, lrange from pandas.errors import ParserError import pandas as pd @@ -783,7 +783,7 @@ def test_to_csv_bug(self): def test_to_csv_unicode(self): - df = DataFrame({u('c/\u03c3'): [1, 2, 3]}) + df = DataFrame({'c/\u03c3': [1, 2, 3]}) with ensure_clean() as path: df.to_csv(path, encoding='UTF-8') @@ -797,10 +797,10 @@ def test_to_csv_unicode(self): def test_to_csv_unicode_index_col(self): buf = StringIO('') df = DataFrame( - [[u("\u05d0"), "d2", "d3", "d4"], ["a1", "a2", "a3", "a4"]], - columns=[u("\u05d0"), - u("\u05d1"), u("\u05d2"), u("\u05d3")], - index=[u("\u05d0"), u("\u05d1")]) + [["\u05d0", "d2", "d3", "d4"], ["a1", "a2", "a3", "a4"]], + columns=["\u05d0", + "\u05d1", "\u05d2", "\u05d3"], + index=["\u05d0", "\u05d1"]) df.to_csv(buf, encoding='UTF-8') buf.seek(0) diff --git a/pandas/tests/indexes/multi/test_duplicates.py b/pandas/tests/indexes/multi/test_duplicates.py index 3ba41190a297a..73e1ce16e9e3e 100644 --- a/pandas/tests/indexes/multi/test_duplicates.py +++ b/pandas/tests/indexes/multi/test_duplicates.py @@ -6,7 +6,6 @@ import pytest from pandas._libs import hashtable -from pandas.compat import u from pandas import DatetimeIndex, MultiIndex import pandas.util.testing as tm @@ -158,24 +157,24 @@ def test_has_duplicates(idx, idx_dup): def test_has_duplicates_from_tuples(): # GH 9075 - t = [(u('x'), u('out'), u('z'), 5, u('y'), u('in'), u('z'), 169), - (u('x'), u('out'), u('z'), 7, u('y'), u('in'), u('z'), 119), - (u('x'), u('out'), u('z'), 9, u('y'), u('in'), u('z'), 135), - (u('x'), u('out'), u('z'), 13, u('y'), u('in'), u('z'), 145), - (u('x'), u('out'), u('z'), 14, u('y'), u('in'), u('z'), 158), - (u('x'), u('out'), u('z'), 16, u('y'), u('in'), u('z'), 122), - (u('x'), u('out'), u('z'), 17, u('y'), u('in'), u('z'), 160), - (u('x'), u('out'), u('z'), 18, u('y'), u('in'), u('z'), 180), - (u('x'), u('out'), u('z'), 20, u('y'), u('in'), u('z'), 143), - (u('x'), u('out'), u('z'), 21, u('y'), u('in'), u('z'), 128), - (u('x'), u('out'), u('z'), 22, u('y'), u('in'), u('z'), 129), - (u('x'), u('out'), u('z'), 25, u('y'), u('in'), u('z'), 111), - (u('x'), u('out'), u('z'), 28, u('y'), u('in'), u('z'), 114), - (u('x'), u('out'), u('z'), 29, u('y'), u('in'), u('z'), 121), - (u('x'), u('out'), u('z'), 31, u('y'), u('in'), u('z'), 126), - (u('x'), u('out'), u('z'), 32, u('y'), u('in'), u('z'), 155), - (u('x'), u('out'), u('z'), 33, u('y'), u('in'), u('z'), 123), - (u('x'), u('out'), u('z'), 12, u('y'), u('in'), u('z'), 144)] + t = [('x', 'out', 'z', 5, 'y', 'in', 'z', 169), + ('x', 'out', 'z', 7, 'y', 'in', 'z', 119), + ('x', 'out', 'z', 9, 'y', 'in', 'z', 135), + ('x', 'out', 'z', 13, 'y', 'in', 'z', 145), + ('x', 'out', 'z', 14, 'y', 'in', 'z', 158), + ('x', 'out', 'z', 16, 'y', 'in', 'z', 122), + ('x', 'out', 'z', 17, 'y', 'in', 'z', 160), + ('x', 'out', 'z', 18, 'y', 'in', 'z', 180), + ('x', 'out', 'z', 20, 'y', 'in', 'z', 143), + ('x', 'out', 'z', 21, 'y', 'in', 'z', 128), + ('x', 'out', 'z', 22, 'y', 'in', 'z', 129), + ('x', 'out', 'z', 25, 'y', 'in', 'z', 111), + ('x', 'out', 'z', 28, 'y', 'in', 'z', 114), + ('x', 'out', 'z', 29, 'y', 'in', 'z', 121), + ('x', 'out', 'z', 31, 'y', 'in', 'z', 126), + ('x', 'out', 'z', 32, 'y', 'in', 'z', 155), + ('x', 'out', 'z', 33, 'y', 'in', 'z', 123), + ('x', 'out', 'z', 12, 'y', 'in', 'z', 144)] mi = MultiIndex.from_tuples(t) assert not mi.has_duplicates diff --git a/pandas/tests/indexes/multi/test_format.py b/pandas/tests/indexes/multi/test_format.py index bb431a1a67b32..61fa9c039e844 100644 --- a/pandas/tests/indexes/multi/test_format.py +++ b/pandas/tests/indexes/multi/test_format.py @@ -5,8 +5,6 @@ import pytest -from pandas.compat import u - import pandas as pd from pandas import MultiIndex, compat import pandas.util.testing as tm @@ -55,7 +53,7 @@ def test_format_sparse_display(): def test_repr_with_unicode_data(): with pd.core.config.option_context("display.encoding", 'UTF-8'): - d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} + d = {"a": ["\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} index = pd.DataFrame(d).set_index(["a", "b"]).index assert "\\u" not in repr(index) # we don't want unicode-escaped @@ -88,13 +86,13 @@ def test_repr_roundtrip(): def test_unicode_string_with_unicode(): - d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} + d = {"a": ["\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} idx = pd.DataFrame(d).set_index(["a", "b"]).index str(idx) def test_bytestring_with_unicode(): - d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} + d = {"a": ["\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} idx = pd.DataFrame(d).set_index(["a", "b"]).index bytes(idx) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 8a92d9b42fa6e..a0c41ab4beb4b 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -11,7 +11,7 @@ import pytest from pandas._libs.tslib import Timestamp -from pandas.compat import PY36, StringIO, lrange, lzip, u +from pandas.compat import PY36, StringIO, lrange, lzip from pandas.compat.numpy import np_datetime64_compat from pandas.core.dtypes.common import is_unsigned_integer_dtype @@ -1285,7 +1285,7 @@ def test_format_missing(self, vals, nulls_fixture): index = Index(vals) formatted = index.format() - expected = [str(index[0]), str(index[1]), str(index[2]), u('NaN')] + expected = [str(index[0]), str(index[1]), str(index[2]), 'NaN'] assert formatted == expected assert index[3] is nulls_fixture @@ -2388,7 +2388,7 @@ def test_int_name_format(self, klass): assert '0' in repr(result) def test_print_unicode_columns(self): - df = pd.DataFrame({u("\u05d0"): [1, 2, 3], + df = pd.DataFrame({"\u05d0": [1, 2, 3], "\u05d1": [4, 5, 6], "c": [7, 8, 9]}) repr(df.columns) # should not raise UnicodeDecodeError diff --git a/pandas/tests/indexes/test_frozen.py b/pandas/tests/indexes/test_frozen.py index c2931b10233e0..885d078b16f56 100644 --- a/pandas/tests/indexes/test_frozen.py +++ b/pandas/tests/indexes/test_frozen.py @@ -2,8 +2,6 @@ import numpy as np -from pandas.compat import u - from pandas.core.indexes.frozen import FrozenList, FrozenNDArray from pandas.tests.test_base import CheckImmutable, CheckStringMixin from pandas.util import testing as tm @@ -11,7 +9,7 @@ class TestFrozenList(CheckImmutable, CheckStringMixin): mutable_methods = ('extend', 'pop', 'remove', 'insert') - unicode_container = FrozenList([u("\u05d0"), u("\u05d1"), "c"]) + unicode_container = FrozenList(["\u05d0", "\u05d1", "c"]) def setup_method(self, _): self.lst = [1, 2, 3, 4, 5] @@ -63,8 +61,7 @@ def setup_method(self, _): warnings.simplefilter("ignore", FutureWarning) self.container = FrozenNDArray(self.lst) - self.unicode_container = FrozenNDArray( - [u("\u05d0"), u("\u05d1"), "c"]) + self.unicode_container = FrozenNDArray(["\u05d0", "\u05d1", "c"]) def test_constructor_warns(self): # see gh-9031 diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index 158327b0769fc..d3a4743599ec3 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -5,8 +5,6 @@ import numpy as np import pytest -from pandas.compat import u - import pandas as pd from pandas import Float64Index, Index, Int64Index, RangeIndex, Series import pandas.util.testing as tm @@ -672,7 +670,7 @@ def test_take_fill_value(self): idx.take(np.array([1, -5])) def test_print_unicode_columns(self): - df = pd.DataFrame({u("\u05d0"): [1, 2, 3], + df = pd.DataFrame({"\u05d0": [1, 2, 3], "\u05d1": [4, 5, 6], "c": [7, 8, 9]}) repr(df.columns) # should not raise UnicodeDecodeError diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index 490a9bf08ca3e..1080c3ecdcf1f 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -1,8 +1,6 @@ import numpy as np import pytest -from pandas.compat import u - from pandas import DataFrame, Index, MultiIndex, Series from pandas.core.indexing import IndexingError from pandas.util import testing as tm @@ -156,19 +154,11 @@ def test_frame_getitem_toplevel( tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize('unicode_strings', [True, False]) -def test_frame_mixed_depth_get(unicode_strings): - # If unicode_strings is True, the column labels in dataframe - # construction will use unicode strings in Python 2 (pull request - # #17099). - +def test_frame_mixed_depth_get(): arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'], ['', 'OD', 'OD', 'result1', 'result2', 'result1'], ['', 'wx', 'wy', '', '', '']] - if unicode_strings: - arrays = [[u(s) for s in arr] for arr in arrays] - tuples = sorted(zip(*arrays)) index = MultiIndex.from_tuples(tuples) df = DataFrame(np.random.randn(4, 6), columns=index) diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 058afdc4c2c83..6c672db4070fb 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -12,7 +12,7 @@ import pytest from pandas._libs.internals import BlockPlacement -from pandas.compat import lrange, u +from pandas.compat import lrange import pandas as pd from pandas import ( @@ -784,12 +784,12 @@ def test_get_bool_data(self): np.array([True, False, True])) def test_unicode_repr_doesnt_raise(self): - repr(create_mgr(u('b,\u05d0: object'))) + repr(create_mgr('b,\u05d0: object')) def test_missing_unicode_key(self): df = DataFrame({"a": [1]}) try: - df.loc[:, u("\u05d0")] # should not raise UnicodeEncodeError + df.loc[:, "\u05d0"] # should not raise UnicodeEncodeError except KeyError: pass # this is the expected exception diff --git a/pandas/tests/io/formats/test_eng_formatting.py b/pandas/tests/io/formats/test_eng_formatting.py index 455b6454d73ff..982212b46dacc 100644 --- a/pandas/tests/io/formats/test_eng_formatting.py +++ b/pandas/tests/io/formats/test_eng_formatting.py @@ -1,7 +1,5 @@ import numpy as np -from pandas.compat import u - import pandas as pd from pandas import DataFrame from pandas.util import testing as tm @@ -170,14 +168,14 @@ def test_rounding(self): formatter = fmt.EngFormatter(accuracy=3, use_eng_prefix=True) result = formatter(0) - assert result == u(' 0.000') + assert result == ' 0.000' def test_nan(self): # Issue #11981 formatter = fmt.EngFormatter(accuracy=1, use_eng_prefix=True) result = formatter(np.nan) - assert result == u('NaN') + assert result == 'NaN' df = pd.DataFrame({'a': [1.5, 10.3, 20.5], 'b': [50.3, 60.67, 70.12], @@ -193,4 +191,4 @@ def test_inf(self): formatter = fmt.EngFormatter(accuracy=1, use_eng_prefix=True) result = formatter(np.inf) - assert result == u('inf') + assert result == 'inf' diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index c39cc2443a18a..388903bad875d 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -22,7 +22,7 @@ import pandas.compat as compat from pandas.compat import ( - StringIO, is_platform_32bit, is_platform_windows, lrange, lzip, u) + StringIO, is_platform_32bit, is_platform_windows, lrange, lzip) import pandas as pd from pandas import ( @@ -260,8 +260,8 @@ def test_repr_should_return_str(self): # (str on py2.x, str (unicode) on py3) data = [8, 5, 3, 5] - index1 = [u("\u03c3"), u("\u03c4"), u("\u03c5"), u("\u03c6")] - cols = [u("\u03c8")] + index1 = ["\u03c3", "\u03c4", "\u03c5", "\u03c6"] + cols = ["\u03c8"] df = DataFrame(data, columns=cols, index=index1) assert type(df.__repr__()) == str # both py2 / 3 @@ -448,7 +448,7 @@ def test_auto_detect(self): def test_to_string_repr_unicode(self): buf = StringIO() - unicode_values = [u('\u03c3')] * 10 + unicode_values = ['\u03c3'] * 10 unicode_values = np.array(unicode_values, dtype=object) df = DataFrame({'unicode': unicode_values}) df.to_string(col_space=10, buf=buf) @@ -456,7 +456,7 @@ def test_to_string_repr_unicode(self): # it works! repr(df) - idx = Index(['abc', u('\u03c3a'), 'aegdvg']) + idx = Index(['abc', '\u03c3a', 'aegdvg']) ser = Series(np.random.randn(len(idx)), idx) rs = repr(ser).split('\n') line_len = len(rs[0]) @@ -477,7 +477,7 @@ def test_to_string_repr_unicode(self): sys.stdin = _stdin def test_to_string_unicode_columns(self): - df = DataFrame({u('\u03c3'): np.arange(10.)}) + df = DataFrame({'\u03c3': np.arange(10.)}) buf = StringIO() df.to_string(buf=buf) @@ -491,14 +491,14 @@ def test_to_string_unicode_columns(self): assert isinstance(result, compat.text_type) def test_to_string_utf8_columns(self): - n = u("\u05d0").encode('utf-8') + n = "\u05d0".encode('utf-8') with option_context('display.max_rows', 1): df = DataFrame([1, 2], columns=[n]) repr(df) def test_to_string_unicode_two(self): - dm = DataFrame({u('c/\u03c3'): []}) + dm = DataFrame({'c/\u03c3': []}) buf = StringIO() dm.to_string(buf) @@ -547,10 +547,10 @@ def format_func(x): assert result.strip() == expected def test_to_string_with_formatters_unicode(self): - df = DataFrame({u('c/\u03c3'): [1, 2, 3]}) + df = DataFrame({'c/\u03c3': [1, 2, 3]}) result = df.to_string( - formatters={u('c/\u03c3'): lambda x: '{x}'.format(x=x)}) - assert result == u(' c/\u03c3\n') + '0 1\n1 2\n2 3' + formatters={'c/\u03c3': lambda x: '{x}'.format(x=x)}) + assert result == ' c/\u03c3\n' + '0 1\n1 2\n2 3' def test_east_asian_unicode_false(self): # not alighned properly because of east asian width @@ -791,8 +791,8 @@ def test_east_asian_unicode_true(self): def test_to_string_buffer_all_unicode(self): buf = StringIO() - empty = DataFrame({u('c/\u03c3'): Series()}) - nonempty = DataFrame({u('c/\u03c3'): Series([1, 2, 3])}) + empty = DataFrame({'c/\u03c3': Series()}) + nonempty = DataFrame({'c/\u03c3': Series([1, 2, 3])}) print(empty, file=buf) print(nonempty, file=buf) @@ -958,7 +958,7 @@ def test_nonunicode_nonascii_alignment(self): assert len(lines[1]) == len(lines[2]) def test_unicode_problem_decoding_as_ascii(self): - dm = DataFrame({u('c/\u03c3'): Series({'test': np.nan})}) + dm = DataFrame({'c/\u03c3': Series({'test': np.nan})}) compat.text_type(dm.to_string()) def test_string_repr_encoding(self, datapath): @@ -1128,25 +1128,22 @@ def test_index_with_nan(self): # multi-index y = df.set_index(['id1', 'id2', 'id3']) result = y.to_string() - expected = u( - ' value\nid1 id2 id3 \n' - '1a3 NaN 78d 123\n9h4 d67 79d 64') + expected = (' value\nid1 id2 id3 \n' + '1a3 NaN 78d 123\n9h4 d67 79d 64') assert result == expected # index y = df.set_index('id2') result = y.to_string() - expected = u( - ' id1 id3 value\nid2 \n' - 'NaN 1a3 78d 123\nd67 9h4 79d 64') + expected = (' id1 id3 value\nid2 \n' + 'NaN 1a3 78d 123\nd67 9h4 79d 64') assert result == expected # with append (this failed in 0.12) y = df.set_index(['id1', 'id2']).set_index('id3', append=True) result = y.to_string() - expected = u( - ' value\nid1 id2 id3 \n' - '1a3 NaN 78d 123\n9h4 d67 79d 64') + expected = (' value\nid1 id2 id3 \n' + '1a3 NaN 78d 123\n9h4 d67 79d 64') assert result == expected # all-nan in mi @@ -1154,9 +1151,8 @@ def test_index_with_nan(self): df2.loc[:, 'id2'] = np.nan y = df2.set_index('id2') result = y.to_string() - expected = u( - ' id1 id3 value\nid2 \n' - 'NaN 1a3 78d 123\nNaN 9h4 79d 64') + expected = (' id1 id3 value\nid2 \n' + 'NaN 1a3 78d 123\nNaN 9h4 79d 64') assert result == expected # partial nan in mi @@ -1164,9 +1160,8 @@ def test_index_with_nan(self): df2.loc[:, 'id2'] = np.nan y = df2.set_index(['id2', 'id3']) result = y.to_string() - expected = u( - ' id1 value\nid2 id3 \n' - 'NaN 78d 1a3 123\n 79d 9h4 64') + expected = (' id1 value\nid2 id3 \n' + 'NaN 78d 1a3 123\n 79d 9h4 64') assert result == expected df = DataFrame({'id1': {0: np.nan, @@ -1180,9 +1175,8 @@ def test_index_with_nan(self): y = df.set_index(['id1', 'id2', 'id3']) result = y.to_string() - expected = u( - ' value\nid1 id2 id3 \n' - 'NaN NaN NaN 123\n9h4 d67 79d 64') + expected = (' value\nid1 id2 id3 \n' + 'NaN NaN NaN 123\n9h4 d67 79d 64') assert result == expected def test_to_string(self): @@ -1384,7 +1378,7 @@ def test_to_string_float_index(self): assert result == expected def test_to_string_ascii_error(self): - data = [('0 ', u(' .gitignore '), u(' 5 '), + data = [('0 ', ' .gitignore ', ' 5 ', ' \xe2\x80\xa2\xe2\x80\xa2\xe2\x80' '\xa2\xe2\x80\xa2\xe2\x80\xa2')] df = DataFrame(data) @@ -1586,8 +1580,8 @@ def test_repr_html_long(self): long_repr = df._repr_html_() assert '..' in long_repr assert str(41 + max_rows // 2) not in long_repr - assert u('{h} rows ').format(h=h) in long_repr - assert u('2 columns') in long_repr + assert '{h} rows '.format(h=h) in long_repr + assert '2 columns' in long_repr def test_repr_html_float(self): with option_context('display.max_rows', 60): @@ -1608,8 +1602,8 @@ def test_repr_html_float(self): long_repr = df._repr_html_() assert '..' in long_repr assert '{val}'.format(val='31') not in long_repr - assert u('{h} rows ').format(h=h) in long_repr - assert u('2 columns') in long_repr + assert '{h} rows '.format(h=h) in long_repr + assert '2 columns' in long_repr def test_repr_html_long_multiindex(self): max_rows = 60 @@ -1778,10 +1772,10 @@ def setup_method(self, method): self.ts = tm.makeTimeSeries() def test_repr_unicode(self): - s = Series([u('\u03c3')] * 10) + s = Series(['\u03c3'] * 10) repr(s) - a = Series([u("\u05d0")] * 1000) + a = Series(["\u05d0"] * 1000) a.name = 'title1' repr(a) @@ -1826,19 +1820,19 @@ def test_freq_name_separation(self): def test_to_string_mixed(self): s = Series(['foo', np.nan, -1.23, 4.56]) result = s.to_string() - expected = (u('0 foo\n') + u('1 NaN\n') + u('2 -1.23\n') + - u('3 4.56')) + expected = ('0 foo\n' + '1 NaN\n' + '2 -1.23\n' + + '3 4.56') assert result == expected # but don't count NAs as floats s = Series(['foo', np.nan, 'bar', 'baz']) result = s.to_string() - expected = (u('0 foo\n') + '1 NaN\n' + '2 bar\n' + '3 baz') + expected = ('0 foo\n' + '1 NaN\n' + '2 bar\n' + '3 baz') assert result == expected s = Series(['foo', 5, 'bar', 'baz']) result = s.to_string() - expected = (u('0 foo\n') + '1 5\n' + '2 bar\n' + '3 baz') + expected = ('0 foo\n' + '1 5\n' + '2 bar\n' + '3 baz') assert result == expected def test_to_string_float_na_spacing(self): @@ -1846,7 +1840,7 @@ def test_to_string_float_na_spacing(self): s[::2] = np.nan result = s.to_string() - expected = (u('0 NaN\n') + '1 1.5678\n' + '2 NaN\n' + + expected = ('0 NaN\n' + '1 1.5678\n' + '2 NaN\n' + '3 -3.0000\n' + '4 NaN') assert result == expected @@ -1854,12 +1848,12 @@ def test_to_string_without_index(self): # GH 11729 Test index=False option s = Series([1, 2, 3, 4]) result = s.to_string(index=False) - expected = (u(' 1\n') + ' 2\n' + ' 3\n' + ' 4') + expected = (' 1\n' + ' 2\n' + ' 3\n' + ' 4') assert result == expected def test_unicode_name_in_footer(self): - s = Series([1, 2], name=u('\u05e2\u05d1\u05e8\u05d9\u05ea')) - sf = fmt.SeriesFormatter(s, name=u('\u05e2\u05d1\u05e8\u05d9\u05ea')) + s = Series([1, 2], name='\u05e2\u05d1\u05e8\u05d9\u05ea') + sf = fmt.SeriesFormatter(s, name='\u05e2\u05d1\u05e8\u05d9\u05ea') sf._get_footer() # should not raise exception def test_east_asian_unicode_series(self): @@ -2130,7 +2124,7 @@ def test_timedelta64(self): # no boxing of the actual elements td = Series(pd.timedelta_range('1 days', periods=3)) result = td.to_string() - assert result == u("0 1 days\n1 2 days\n2 3 days") + assert result == "0 1 days\n1 2 days\n2 3 days" def test_mixed_datetime64(self): df = DataFrame({'A': [1, 2], 'B': ['2012-01-01', '2012-01-02']}) diff --git a/pandas/tests/io/formats/test_to_html.py b/pandas/tests/io/formats/test_to_html.py index 9cb2704f65587..148a42a332661 100644 --- a/pandas/tests/io/formats/test_to_html.py +++ b/pandas/tests/io/formats/test_to_html.py @@ -7,7 +7,7 @@ import numpy as np import pytest -from pandas.compat import StringIO, lrange, u +from pandas.compat import StringIO, lrange import pandas as pd from pandas import DataFrame, Index, MultiIndex, compat, option_context @@ -89,8 +89,8 @@ def test_to_html_with_empty_string_label(): @pytest.mark.parametrize('df,expected', [ - (DataFrame({u('\u03c3'): np.arange(10.)}), 'unicode_1'), - (DataFrame({'A': [u('\u03c3')]}), 'unicode_2') + (DataFrame({'\u03c3': np.arange(10.)}), 'unicode_1'), + (DataFrame({'A': ['\u03c3']}), 'unicode_2') ]) def test_to_html_unicode(df, expected, datapath): expected = expected_html(datapath, expected) @@ -201,13 +201,13 @@ def test_to_html_formatters(df, formatters, expected, datapath): def test_to_html_regression_GH6098(): df = DataFrame({ - u('clé1'): [u('a'), u('a'), u('b'), u('b'), u('a')], - u('clé2'): [u('1er'), u('2ème'), u('1er'), u('2ème'), u('1er')], + 'clé1': ['a', 'a', 'b', 'b', 'a'], + 'clé2': ['1er', '2ème', '1er', '2ème', '1er'], 'données1': np.random.randn(5), 'données2': np.random.randn(5)}) # it works - df.pivot_table(index=[u('clé1')], columns=[u('clé2')])._repr_html_() + df.pivot_table(index=['clé1'], columns=['clé2'])._repr_html_() def test_to_html_truncate(datapath): diff --git a/pandas/tests/io/formats/test_to_latex.py b/pandas/tests/io/formats/test_to_latex.py index 2efa33b1e95bb..8a7cb2d9a70b4 100644 --- a/pandas/tests/io/formats/test_to_latex.py +++ b/pandas/tests/io/formats/test_to_latex.py @@ -3,8 +3,6 @@ import pytest -from pandas.compat import u - import pandas as pd from pandas import DataFrame, Series from pandas.util import testing as tm @@ -343,9 +341,9 @@ def test_to_latex_escape(self): a = 'a' b = 'b' - test_dict = {u('co$e^x$'): {a: "a", + test_dict = {'co$e^x$': {a: "a", b: "b"}, - u('co^l1'): {a: "a", + 'co^l1': {a: "a", b: "b"}} unescaped_result = DataFrame(test_dict).to_latex(escape=False) diff --git a/pandas/tests/io/generate_legacy_storage_files.py b/pandas/tests/io/generate_legacy_storage_files.py index 815b8d4deaa66..5cb4436510bb9 100755 --- a/pandas/tests/io/generate_legacy_storage_files.py +++ b/pandas/tests/io/generate_legacy_storage_files.py @@ -44,8 +44,6 @@ import numpy as np -from pandas.compat import u - import pandas from pandas import ( Categorical, DataFrame, Index, MultiIndex, NaT, Period, Series, @@ -254,7 +252,7 @@ def create_pickle_data(): def _u(x): - return {u(k): _u(x[k]) for k in x} if isinstance(x, dict) else x + return {k: _u(x[k]) for k in x} if isinstance(x, dict) else x def create_msgpack_data(): diff --git a/pandas/tests/io/json/test_ujson.py b/pandas/tests/io/json/test_ujson.py index 928295f8c4be9..126a6851ed482 100644 --- a/pandas/tests/io/json/test_ujson.py +++ b/pandas/tests/io/json/test_ujson.py @@ -20,7 +20,7 @@ import pandas._libs.json as ujson from pandas._libs.tslib import Timestamp import pandas.compat as compat -from pandas.compat import StringIO, u +from pandas.compat import StringIO from pandas import DataFrame, DatetimeIndex, Index, NaT, Series, date_range import pandas.util.testing as tm @@ -147,7 +147,7 @@ def helper(expected_output, **encode_kwargs): -4342969734183514, -12345678901234.56789012, -528656961.4399388 ]) def test_double_long_numbers(self, long_number): - sut = {u("a"): long_number} + sut = {"a": long_number} encoded = ujson.encode(sut, double_precision=15) decoded = ujson.decode(encoded) @@ -165,7 +165,7 @@ def test_encode_non_c_locale(self): break def test_decimal_decode_test_precise(self): - sut = {u("a"): 4.56} + sut = {"a": 4.56} encoded = ujson.encode(sut) decoded = ujson.decode(encoded, precise_float=True) assert sut == decoded @@ -181,10 +181,10 @@ def test_encode_double_tiny_exponential(self): assert np.allclose(num, ujson.decode(ujson.encode(num))) @pytest.mark.parametrize("unicode_key", [ - u("key1"), u("بن") + "key1", "بن" ]) def test_encode_dict_with_unicode_keys(self, unicode_key): - unicode_dict = {unicode_key: u("value1")} + unicode_dict = {unicode_key: "value1"} assert unicode_dict == ujson.decode(ujson.encode(unicode_dict)) @pytest.mark.parametrize("double_input", [ @@ -430,7 +430,7 @@ def test_encode_to_utf8(self): assert dec == json.loads(enc) def test_decode_from_unicode(self): - unicode_input = u("{\"obj\": 31337}") + unicode_input = "{\"obj\": 31337}" dec1 = ujson.decode(unicode_input) dec2 = ujson.decode(str(unicode_input)) @@ -529,7 +529,7 @@ def test_encode_null_character(self): assert alone_input == json.loads(output) assert output == json.dumps(alone_input) assert alone_input == ujson.decode(output) - assert '" \\u0000\\r\\n "' == ujson.dumps(u(" \u0000\r\n ")) + assert '" \\u0000\\r\\n "' == ujson.dumps(" \u0000\r\n ") def test_decode_null_character(self): wrapped_input = "\"31337 \\u0000 31337\"" @@ -660,7 +660,7 @@ def test_decode_big_escape(self): ujson.decode(escape_input) def test_to_dict(self): - d = {u("key"): 31337} + d = {"key": 31337} class DictTest(object): def toDict(self): @@ -860,7 +860,7 @@ def test_array_numpy_labelled(self): labelled_input = [{"a": 42}] output = ujson.loads(ujson.dumps(labelled_input), numpy=True, labelled=True) - assert (np.array([u("a")]) == output[2]).all() + assert (np.array(["a"]) == output[2]).all() assert (np.array([42]) == output[0]).all() assert output[1] is None @@ -873,7 +873,7 @@ def test_array_numpy_labelled(self): [42, 31, 24, 99, 2.4, 78], dtype=int).reshape((3, 2)) assert (expected_vals == output[0]).all() assert output[1] is None - assert (np.array([u("a"), "b"]) == output[2]).all() + assert (np.array(["a", "b"]) == output[2]).all() input_dumps = ('{"1": {"a": 42, "b":31}, "2": {"a": 24, "c": 99}, ' '"3": {"a": 2.4, "b": 78}}') diff --git a/pandas/tests/io/msgpack/test_pack.py b/pandas/tests/io/msgpack/test_pack.py index 078d9f4ceb649..a5fb292a451f5 100644 --- a/pandas/tests/io/msgpack/test_pack.py +++ b/pandas/tests/io/msgpack/test_pack.py @@ -4,8 +4,6 @@ import pytest -from pandas.compat import u - from pandas import compat from pandas.io.msgpack import Packer, Unpacker, packb, unpackb @@ -32,7 +30,7 @@ def testPack(self): self.check(td) def testPackUnicode(self): - test_data = [u(""), u("abcd"), [u("defgh")], u("Русский текст"), ] + test_data = ["", "abcd", ["defgh"], "Русский текст", ] for td in test_data: re = unpackb( packb(td, encoding='utf-8'), use_list=1, encoding='utf-8') @@ -44,12 +42,7 @@ def testPackUnicode(self): assert re == td def testPackUTF32(self): - test_data = [ - compat.u(""), - compat.u("abcd"), - [compat.u("defgh")], - compat.u("Русский текст"), - ] + test_data = ["", "abcd", ["defgh"], "Русский текст"] for td in test_data: re = unpackb( packb(td, encoding='utf-32'), use_list=1, encoding='utf-32') @@ -76,20 +69,18 @@ def testStrictUnicodePack(self): msg = (r"'ascii' codec can't encode character u*'\\xed' in position 3:" r" ordinal not in range\(128\)") with pytest.raises(UnicodeEncodeError, match=msg): - packb(compat.u("abc\xeddef"), encoding='ascii', - unicode_errors='strict') + packb("abc\xeddef", encoding='ascii', unicode_errors='strict') def testIgnoreErrorsPack(self): re = unpackb( - packb( - compat.u("abcФФФdef"), encoding='ascii', - unicode_errors='ignore'), encoding='utf-8', use_list=1) - assert re == compat.u("abcdef") + packb("abcФФФdef", encoding='ascii', unicode_errors='ignore'), + encoding='utf-8', use_list=1) + assert re == "abcdef" def testNoEncoding(self): msg = "Can't encode unicode string: no encoding is specified" with pytest.raises(TypeError, match=msg): - packb(compat.u("abc"), encoding=None) + packb("abc", encoding=None) def testDecodeBinary(self): re = unpackb(packb("abc"), encoding=None, use_list=1) diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py index 30923bb7f5004..30799eed99367 100644 --- a/pandas/tests/io/parser/test_common.py +++ b/pandas/tests/io/parser/test_common.py @@ -17,7 +17,7 @@ import pytest from pandas._libs.tslib import Timestamp -from pandas.compat import BytesIO, StringIO, lrange, u +from pandas.compat import BytesIO, StringIO, lrange from pandas.errors import DtypeWarning, EmptyDataError, ParserError from pandas import DataFrame, Index, MultiIndex, Series, compat, concat @@ -120,7 +120,7 @@ def test_bad_stream_exception(all_parsers, csv_dir_path): def test_read_csv_local(all_parsers, csv1): - prefix = u("file:///") if compat.is_platform_windows() else u("file://") + prefix = "file:///" if compat.is_platform_windows() else "file://" parser = all_parsers fname = prefix + compat.text_type(os.path.abspath(csv1)) @@ -310,10 +310,10 @@ def test_read_csv_no_index_name(all_parsers, csv_dir_path): def test_read_csv_unicode(all_parsers): parser = all_parsers - data = BytesIO(u("\u0141aski, Jan;1").encode("utf-8")) + data = BytesIO("\u0141aski, Jan;1".encode("utf-8")) result = parser.read_csv(data, sep=";", encoding="utf-8", header=None) - expected = DataFrame([[u("\u0141aski, Jan"), 1]]) + expected = DataFrame([["\u0141aski, Jan", 1]]) tm.assert_frame_equal(result, expected) @@ -941,11 +941,11 @@ def test_skip_initial_space(all_parsers): def test_utf16_bom_skiprows(all_parsers, sep, encoding): # see gh-2298 parser = all_parsers - data = u("""skip this + data = """skip this skip this too A,B,C 1,2,3 -4,5,6""").replace(",", sep) +4,5,6""".replace(",", sep) path = "__%s__.csv" % tm.rands(10) kwargs = dict(sep=sep, skiprows=2) utf8 = "utf-8" @@ -982,7 +982,7 @@ def test_unicode_encoding(all_parsers, csv_dir_path): result = result.set_index(0) got = result[1][1632] - expected = u('\xc1 k\xf6ldum klaka (Cold Fever) (1994)') + expected = '\xc1 k\xf6ldum klaka (Cold Fever) (1994)' assert got == expected @@ -1686,7 +1686,7 @@ def test_null_byte_char(all_parsers): def test_utf8_bom(all_parsers, data, kwargs, expected): # see gh-4793 parser = all_parsers - bom = u("\ufeff") + bom = "\ufeff" utf8 = "utf-8" def _encode_data_with_bom(_data): diff --git a/pandas/tests/io/parser/test_header.py b/pandas/tests/io/parser/test_header.py index 38f4cc42357fa..b6854e0868aa0 100644 --- a/pandas/tests/io/parser/test_header.py +++ b/pandas/tests/io/parser/test_header.py @@ -10,7 +10,7 @@ import numpy as np import pytest -from pandas.compat import StringIO, u +from pandas.compat import StringIO from pandas.errors import ParserError from pandas import DataFrame, Index, MultiIndex @@ -233,11 +233,11 @@ def test_header_multi_index_common_format_malformed1(all_parsers): expected = DataFrame(np.array( [[2, 3, 4, 5, 6], [8, 9, 10, 11, 12]], dtype="int64"), index=Index([1, 7]), - columns=MultiIndex(levels=[[u("a"), u("b"), u("c")], - [u("r"), u("s"), u("t"), - u("u"), u("v")]], + columns=MultiIndex(levels=[["a", "b", "c"], + ["r", "s", "t", + "u", "v"]], codes=[[0, 0, 1, 2, 2], [0, 1, 2, 3, 4]], - names=[u("a"), u("q")])) + names=["a", "q"])) data = """a,a,a,b,c,c q,r,s,t,u,v 1,2,3,4,5,6 @@ -252,11 +252,11 @@ def test_header_multi_index_common_format_malformed2(all_parsers): expected = DataFrame(np.array( [[2, 3, 4, 5, 6], [8, 9, 10, 11, 12]], dtype="int64"), index=Index([1, 7]), - columns=MultiIndex(levels=[[u("a"), u("b"), u("c")], - [u("r"), u("s"), u("t"), - u("u"), u("v")]], + columns=MultiIndex(levels=[["a", "b", "c"], + ["r", "s", "t", + "u", "v"]], codes=[[0, 0, 1, 2, 2], [0, 1, 2, 3, 4]], - names=[None, u("q")])) + names=[None, "q"])) data = """,a,a,b,c,c q,r,s,t,u,v @@ -273,10 +273,10 @@ def test_header_multi_index_common_format_malformed3(all_parsers): [[3, 4, 5, 6], [9, 10, 11, 12]], dtype="int64"), index=MultiIndex(levels=[[1, 7], [2, 8]], codes=[[0, 1], [0, 1]]), - columns=MultiIndex(levels=[[u("a"), u("b"), u("c")], - [u("s"), u("t"), u("u"), u("v")]], + columns=MultiIndex(levels=[["a", "b", "c"], + ["s", "t", "u", "v"]], codes=[[0, 1, 2, 2], [0, 1, 2, 3]], - names=[None, u("q")])) + names=[None, "q"])) data = """,a,a,b,c,c q,r,s,t,u,v 1,2,3,4,5,6 diff --git a/pandas/tests/io/parser/test_python_parser_only.py b/pandas/tests/io/parser/test_python_parser_only.py index 51981b9373867..ad50f729a69f9 100644 --- a/pandas/tests/io/parser/test_python_parser_only.py +++ b/pandas/tests/io/parser/test_python_parser_only.py @@ -11,7 +11,7 @@ import pytest -from pandas.compat import BytesIO, StringIO, u +from pandas.compat import BytesIO, StringIO from pandas.errors import ParserError from pandas import DataFrame, Index, MultiIndex @@ -82,7 +82,7 @@ def test_sniff_delimiter_encoding(python_parser_only, encoding): if encoding is not None: from io import TextIOWrapper - data = u(data).encode(encoding) + data = data.encode(encoding) data = BytesIO(data) data = TextIOWrapper(data, encoding=encoding) else: diff --git a/pandas/tests/io/parser/test_quoting.py b/pandas/tests/io/parser/test_quoting.py index 49c1b08974545..961b276c2004f 100644 --- a/pandas/tests/io/parser/test_quoting.py +++ b/pandas/tests/io/parser/test_quoting.py @@ -9,7 +9,7 @@ import pytest -from pandas.compat import StringIO, u +from pandas.compat import StringIO from pandas.errors import ParserError from pandas import DataFrame @@ -125,7 +125,7 @@ def test_double_quote(all_parsers, doublequote, exp_data): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize("quotechar", [u('"'), u('\u0001')]) +@pytest.mark.parametrize("quotechar", ['"', '\u0001']) def test_quotechar_unicode(all_parsers, quotechar): # see gh-14477 data = "a\n1" diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index d508049ed5c4c..26caddf509306 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -10,7 +10,7 @@ from numpy import nan import pytest -from pandas.compat import PY36, BytesIO, iteritems, u +from pandas.compat import PY36, BytesIO, iteritems import pandas.util._test_decorators as td import pandas as pd @@ -1707,7 +1707,7 @@ def test_to_excel_output_encoding(self, merge_cells, engine, ext): tm.assert_frame_equal(result, df) def test_to_excel_unicode_filename(self, merge_cells, engine, ext): - with ensure_clean(u("\u0192u.") + ext) as filename: + with ensure_clean("\u0192u." + ext) as filename: try: f = open(filename, "wb") except UnicodeEncodeError: diff --git a/pandas/tests/io/test_packers.py b/pandas/tests/io/test_packers.py index d72bc3cf3fbae..4a17cb321d3ba 100644 --- a/pandas/tests/io/test_packers.py +++ b/pandas/tests/io/test_packers.py @@ -8,7 +8,6 @@ import pytest from pandas._libs.tslib import iNaT -from pandas.compat import u from pandas.errors import PerformanceWarning import pandas @@ -272,7 +271,7 @@ def test_numpy_array_complex(self): x.dtype == x_rec.dtype) def test_list_mixed(self): - x = [1.0, np.float32(3.5), np.complex128(4.25), u('foo'), np.bool_(1)] + x = [1.0, np.float32(3.5), np.complex128(4.25), 'foo', np.bool_(1)] x_rec = self.encode_decode(x) # current msgpack cannot distinguish list/tuple tm.assert_almost_equal(tuple(x), x_rec) @@ -801,7 +800,7 @@ class TestEncoding(TestPackers): def setup_method(self, method): super(TestEncoding, self).setup_method(method) data = { - 'A': [compat.u('\u2019')] * 1000, + 'A': ['\u2019'] * 1000, 'B': np.arange(1000, dtype=np.int32), 'C': list(100 * 'abcdefghij'), 'D': date_range(datetime.datetime(2015, 4, 1), periods=1000), diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 2483763c2e66f..42641d2c35c39 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -11,7 +11,7 @@ from pandas.compat import ( PY35, PY36, BytesIO, is_platform_little_endian, is_platform_windows, - lrange, text_type, u) + lrange, text_type) import pandas.util._test_decorators as td from pandas.core.dtypes.common import is_categorical_dtype @@ -2424,10 +2424,10 @@ def test_store_index_name_numpy_str(self, table_format): # GH #13492 idx = pd.Index(pd.to_datetime([datetime.date(2000, 1, 1), datetime.date(2000, 1, 2)]), - name=u('cols\u05d2')) + name='cols\u05d2') idx1 = pd.Index(pd.to_datetime([datetime.date(2010, 1, 1), datetime.date(2010, 1, 2)]), - name=u('rows\u05d0')) + name='rows\u05d0') df = pd.DataFrame(np.arange(4).reshape(2, 2), columns=idx, index=idx1) # This used to fail, returning numpy strings instead of python strings. @@ -4100,7 +4100,7 @@ def test_tseries_indices_frame(self): def test_unicode_index(self): - unicode_values = [u('\u03c3'), u('\u03c3\u03c3')] + unicode_values = ['\u03c3', '\u03c3\u03c3'] # PerformanceWarning with catch_warnings(record=True): diff --git a/pandas/tests/io/test_stata.py b/pandas/tests/io/test_stata.py index 3f23f34b3f860..8410eb61f76d4 100644 --- a/pandas/tests/io/test_stata.py +++ b/pandas/tests/io/test_stata.py @@ -383,7 +383,7 @@ def test_encoding(self, version): def test_read_write_dta11(self): original = DataFrame([(1, 2, 3, 4)], - columns=['good', compat.u('b\u00E4d'), '8number', + columns=['good', 'b\u00E4d', '8number', 'astringwithmorethan32characters______']) formatted = DataFrame([(1, 2, 3, 4)], columns=['good', 'b_d', '_8number', diff --git a/pandas/tests/plotting/test_converter.py b/pandas/tests/plotting/test_converter.py index 7dfc21562cc5d..95751752032be 100644 --- a/pandas/tests/plotting/test_converter.py +++ b/pandas/tests/plotting/test_converter.py @@ -5,7 +5,6 @@ import numpy as np import pytest -from pandas.compat import u from pandas.compat.numpy import np_datetime64_compat from pandas import Index, Period, Series, Timestamp, date_range @@ -20,7 +19,7 @@ def test_timtetonum_accepts_unicode(): - assert (converter.time2num("00:01") == converter.time2num(u("00:01"))) + assert (converter.time2num("00:01") == converter.time2num("00:01")) class TestRegistration(object): @@ -151,7 +150,7 @@ def setup_method(self, method): def test_convert_accepts_unicode(self): r1 = self.dtc.convert("12:22", None, None) - r2 = self.dtc.convert(u("12:22"), None, None) + r2 = self.dtc.convert("12:22", None, None) assert (r1 == r2), "DatetimeConverter.convert should accept unicode" def test_conversion(self): @@ -296,7 +295,7 @@ class Axis(object): def test_convert_accepts_unicode(self): r1 = self.pc.convert("2012-1-1", None, self.axis) - r2 = self.pc.convert(u("2012-1-1"), None, self.axis) + r2 = self.pc.convert("2012-1-1", None, self.axis) assert r1 == r2 def test_conversion(self): diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 2b17377c7b9bc..4c22c3245b788 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -10,7 +10,7 @@ from numpy.random import rand, randn import pytest -from pandas.compat import lmap, lrange, lzip, u +from pandas.compat import lmap, lrange, lzip import pandas.util._test_decorators as td from pandas.core.dtypes.api import is_list_like @@ -113,21 +113,20 @@ def test_plot(self): _check_plot_works(df.plot, use_index=True) # unicode - index = MultiIndex.from_tuples([(u('\u03b1'), 0), - (u('\u03b1'), 1), - (u('\u03b2'), 2), - (u('\u03b2'), 3), - (u('\u03b3'), 4), - (u('\u03b3'), 5), - (u('\u03b4'), 6), - (u('\u03b4'), 7)], names=['i0', 'i1']) - columns = MultiIndex.from_tuples([('bar', u('\u0394')), - ('bar', u('\u0395'))], names=['c0', - 'c1']) + index = MultiIndex.from_tuples([('\u03b1', 0), + ('\u03b1', 1), + ('\u03b2', 2), + ('\u03b2', 3), + ('\u03b3', 4), + ('\u03b3', 5), + ('\u03b4', 6), + ('\u03b4', 7)], names=['i0', 'i1']) + columns = MultiIndex.from_tuples( + [('bar', '\u0394'), ('bar', '\u0395')], names=['c0', 'c1']) df = DataFrame(np.random.randint(0, 10, (8, 2)), columns=columns, index=index) - _check_plot_works(df.plot, title=u('\u03A3')) + _check_plot_works(df.plot, title='\u03A3') # GH 6951 # Test with single column @@ -2471,7 +2470,7 @@ def test_errorbar_plot(self): self._check_has_errorbars(ax, xerr=0, yerr=2) # yerr is column name - for yerr in ['yerr', u('誤差')]: + for yerr in ['yerr', '誤差']: s_df = df.copy() s_df[yerr] = np.ones(12) * 0.2 ax = _check_plot_works(s_df.plot, yerr=yerr) diff --git a/pandas/tests/reshape/test_reshape.py b/pandas/tests/reshape/test_reshape.py index 9d75f5f4f2ca4..9b399b58a5403 100644 --- a/pandas/tests/reshape/test_reshape.py +++ b/pandas/tests/reshape/test_reshape.py @@ -7,8 +7,6 @@ from numpy import nan import pytest -from pandas.compat import u - from pandas.core.dtypes.common import is_integer_dtype import pandas as pd @@ -170,7 +168,7 @@ def test_unicode(self, sparse): s = [e, eacute, eacute] res = get_dummies(s, prefix='letter', sparse=sparse) exp = DataFrame({'letter_e': [1, 0, 0], - u('letter_%s') % eacute: [0, 1, 1]}, + 'letter_%s' % eacute: [0, 1, 1]}, dtype=np.uint8) if sparse: exp = exp.apply(pd.SparseArray, fill_value=0) diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index cb457b7c1a200..f27ae68080794 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -10,7 +10,7 @@ from pandas._libs.tslibs import iNaT import pandas.compat as compat -from pandas.compat import lrange, u +from pandas.compat import lrange import pandas as pd from pandas import ( @@ -172,13 +172,13 @@ def test_astype_unicode(self): digits = string.digits test_series = [ Series([digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]), - Series([u('データーサイエンス、お前はもう死んでいる')]), + Series(['データーサイエンス、お前はもう死んでいる']), ] former_encoding = None if sys.getdefaultencoding() == "utf-8": - test_series.append(Series([u('野菜食べないとやばい') + test_series.append(Series(['野菜食べないとやばい' .encode("utf-8")])) for s in test_series: diff --git a/pandas/tests/series/test_io.py b/pandas/tests/series/test_io.py index 5749b0c6551d6..4458cf788133c 100644 --- a/pandas/tests/series/test_io.py +++ b/pandas/tests/series/test_io.py @@ -7,7 +7,7 @@ import numpy as np import pytest -from pandas.compat import StringIO, u +from pandas.compat import StringIO import pandas as pd from pandas import DataFrame, Series @@ -121,7 +121,7 @@ def test_to_csv(self, datetime_series): def test_to_csv_unicode_index(self): buf = StringIO() - s = Series([u("\u05d0"), "d2"], index=[u("\u05d0"), u("\u05d1")]) + s = Series(["\u05d0", "d2"], index=["\u05d0", "\u05d1"]) s.to_csv(buf, encoding="UTF-8", header=False) buf.seek(0) diff --git a/pandas/tests/series/test_repr.py b/pandas/tests/series/test_repr.py index 108785e2bce27..6c090e43e0946 100644 --- a/pandas/tests/series/test_repr.py +++ b/pandas/tests/series/test_repr.py @@ -5,7 +5,7 @@ import numpy as np -from pandas.compat import lrange, u +from pandas.compat import lrange import pandas as pd from pandas import ( @@ -80,11 +80,11 @@ def test_repr(self): repr(ots) # various names - for name in ['', 1, 1.2, 'foo', u('\u03B1\u03B2\u03B3'), + for name in ['', 1, 1.2, 'foo', '\u03B1\u03B2\u03B3', 'loooooooooooooooooooooooooooooooooooooooooooooooooooong', ('foo', 'bar', 'baz'), (1, 2), ('foo', 1, 2.3), - (u('\u03B1'), u('\u03B2'), u('\u03B3')), - (u('\u03B1'), 'bar')]: + ('\u03B1', '\u03B2', '\u03B3'), + ('\u03B1', 'bar')]: self.series.name = name repr(self.series) @@ -115,7 +115,7 @@ def test_repr(self): assert repr(s) == 'Series([], dtype: int64)' def test_tidy_repr(self): - a = Series([u("\u05d0")] * 1000) + a = Series(["\u05d0"] * 1000) a.name = 'title1' repr(a) # should not raise exception @@ -134,7 +134,7 @@ def test_repr_name_iterable_indexable(self): # it works! repr(s) - s.name = (u("\u05d0"), ) * 2 + s.name = ("\u05d0", ) * 2 repr(s) def test_repr_should_return_str(self): @@ -144,7 +144,7 @@ def test_repr_should_return_str(self): # (str on py2.x, str (unicode) on py3) data = [8, 5, 3, 5] - index1 = [u("\u03c3"), u("\u03c4"), u("\u03c5"), u("\u03c6")] + index1 = ["\u03c3", "\u03c4", "\u03c5", "\u03c6"] df = Series(data, index=index1) assert type(df.__repr__() == str) # both py2 / 3 @@ -154,11 +154,11 @@ def test_repr_max_rows(self): str(Series(range(1001))) # should not raise exception def test_unicode_string_with_unicode(self): - df = Series([u("\u05d0")], name=u("\u05d1")) + df = Series(["\u05d0"], name="\u05d1") str(df) def test_bytestring_with_unicode(self): - df = Series([u("\u05d0")], name=u("\u05d1")) + df = Series(["\u05d0"], name="\u05d1") bytes(df) def test_timeseries_repr_object_dtype(self): @@ -223,21 +223,21 @@ def __unicode__(self): def test_categorical_repr(self): a = Series(Categorical([1, 2, 3, 4])) - exp = u("0 1\n1 2\n2 3\n3 4\n" + - "dtype: category\nCategories (4, int64): [1, 2, 3, 4]") + exp = ("0 1\n1 2\n2 3\n3 4\n" + + "dtype: category\nCategories (4, int64): [1, 2, 3, 4]") assert exp == a.__unicode__() a = Series(Categorical(["a", "b"] * 25)) - exp = u("0 a\n1 b\n" + " ..\n" + "48 a\n49 b\n" + - "Length: 50, dtype: category\nCategories (2, object): [a, b]") + exp = ("0 a\n1 b\n" + " ..\n" + "48 a\n49 b\n" + + "Length: 50, dtype: category\nCategories (2, object): [a, b]") with option_context("display.max_rows", 5): assert exp == repr(a) levs = list("abcdefghijklmnopqrstuvwxyz") a = Series(Categorical(["a", "b"], categories=levs, ordered=True)) - exp = u("0 a\n1 b\n" + "dtype: category\n" - "Categories (26, object): [a < b < c < d ... w < x < y < z]") + exp = ("0 a\n1 b\n" + "dtype: category\n" + "Categories (26, object): [a < b < c < d ... w < x < y < z]") assert exp == a.__unicode__() def test_categorical_series_repr(self): diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 9f4fbbaaa4a24..da8142a3c67fa 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -9,7 +9,7 @@ import pytest import pytz -from pandas.compat import StringIO, lrange, lzip, product as cart_product, u +from pandas.compat import StringIO, lrange, lzip, product as cart_product from pandas.core.dtypes.common import is_float_dtype, is_integer_dtype @@ -1303,7 +1303,7 @@ def test_drop_preserve_names(self): assert result.index.names == ('one', 'two') def test_unicode_repr_issues(self): - levels = [Index([u('a/\u03c3'), u('b/\u03c3'), u('c/\u03c3')]), + levels = [Index(['a/\u03c3', 'b/\u03c3', 'c/\u03c3']), Index([0, 1])] codes = [np.arange(3).repeat(2), np.tile(np.arange(2), 3)] index = MultiIndex(levels=levels, codes=codes) @@ -1315,7 +1315,7 @@ def test_unicode_repr_issues(self): def test_unicode_repr_level_names(self): index = MultiIndex.from_tuples([(0, 0), (1, 1)], - names=[u('\u0394'), 'i1']) + names=['\u0394', 'i1']) s = Series(lrange(2), index=index) df = DataFrame(np.random.randn(2, 4), index=index) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index cf322dc330799..025cdf205a7af 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -10,7 +10,6 @@ import pytest import pandas.compat as compat -from pandas.compat import u from pandas import DataFrame, Index, MultiIndex, Series, concat, isna, notna import pandas.core.strings as strings @@ -120,7 +119,6 @@ def any_string_method(request): # subset of the full set from pandas/conftest.py _any_allowed_skipna_inferred_dtype = [ ('string', ['a', np.nan, 'c']), - ('string', [u('a'), np.nan, u('c')]), ('bytes', [b'a', np.nan, b'c']), ('empty', [np.nan, np.nan, np.nan]), ('empty', []), @@ -707,18 +705,6 @@ def test_count(self): assert isinstance(rs, Series) tm.assert_series_equal(rs, xp) - # unicode - values = [u('foo'), u('foofoo'), NA, u('foooofooofommmfoo')] - - result = strings.str_count(values, 'f[o]+') - exp = np.array([1, 2, NA, 4]) - tm.assert_numpy_array_equal(result, exp) - - result = Series(values).str.count('f[o]+') - exp = Series([1, 2, NA, 4]) - assert isinstance(result, Series) - tm.assert_series_equal(result, exp) - def test_contains(self): values = np.array(['foo', NA, 'fooommm__foo', 'mmm_', 'foommm[_]+bar'], dtype=np.object_) @@ -811,6 +797,9 @@ def test_startswith(self): exp = Series([False, NA, True, False, False, NA, True]) tm.assert_series_equal(result, exp) + result = values.str.startswith('foo', na=True) + tm.assert_series_equal(result, exp.fillna(True).astype(bool)) + # mixed mixed = np.array(['a', NA, 'b', True, datetime.today(), 'foo', None, 1, 2.], dtype=np.object_) @@ -824,17 +813,6 @@ def test_startswith(self): xp = Series([False, NA, False, NA, NA, True, NA, NA, NA]) tm.assert_series_equal(rs, xp) - # unicode - values = Series([u('om'), NA, u('foo_nom'), u('nom'), u('bar_foo'), NA, - u('foo')]) - - result = values.str.startswith('foo') - exp = Series([False, NA, True, False, False, NA, True]) - tm.assert_series_equal(result, exp) - - result = values.str.startswith('foo', na=True) - tm.assert_series_equal(result, exp.fillna(True).astype(bool)) - def test_endswith(self): values = Series(['om', NA, 'foo_nom', 'nom', 'bar_foo', NA, 'foo']) @@ -842,6 +820,9 @@ def test_endswith(self): exp = Series([False, NA, False, False, True, NA, True]) tm.assert_series_equal(result, exp) + result = values.str.endswith('foo', na=False) + tm.assert_series_equal(result, exp.fillna(False).astype(bool)) + # mixed mixed = ['a', NA, 'b', True, datetime.today(), 'foo', None, 1, 2.] rs = strings.str_endswith(mixed, 'f') @@ -854,17 +835,6 @@ def test_endswith(self): assert isinstance(rs, Series) tm.assert_series_equal(rs, xp) - # unicode - values = Series([u('om'), NA, u('foo_nom'), u('nom'), u('bar_foo'), NA, - u('foo')]) - - result = values.str.endswith('foo') - exp = Series([False, NA, False, False, True, NA, True]) - tm.assert_series_equal(result, exp) - - result = values.str.endswith('foo', na=False) - tm.assert_series_equal(result, exp.fillna(False).astype(bool)) - def test_title(self): values = Series(["FOO", "BAR", NA, "Blah", "blurg"]) @@ -879,14 +849,6 @@ def test_title(self): exp = Series(["Foo", NA, "Bar", NA, NA, "Blah", NA, NA, NA]) tm.assert_almost_equal(mixed, exp) - # unicode - values = Series([u("FOO"), NA, u("bar"), u("Blurg")]) - - results = values.str.title() - exp = Series([u("Foo"), NA, u("Bar"), u("Blurg")]) - - tm.assert_series_equal(results, exp) - def test_lower_upper(self): values = Series(['om', NA, 'nom', 'nom']) @@ -906,16 +868,6 @@ def test_lower_upper(self): assert isinstance(rs, Series) tm.assert_series_equal(rs, xp) - # unicode - values = Series([u('om'), NA, u('nom'), u('nom')]) - - result = values.str.upper() - exp = Series([u('OM'), NA, u('NOM'), u('NOM')]) - tm.assert_series_equal(result, exp) - - result = result.str.lower() - tm.assert_series_equal(result, values) - def test_capitalize(self): values = Series(["FOO", "BAR", NA, "Blah", "blurg"]) result = values.str.capitalize() @@ -929,12 +881,6 @@ def test_capitalize(self): exp = Series(["Foo", NA, "Bar", NA, NA, "Blah", NA, NA, NA]) tm.assert_almost_equal(mixed, exp) - # unicode - values = Series([u("FOO"), NA, u("bar"), u("Blurg")]) - results = values.str.capitalize() - exp = Series([u("Foo"), NA, u("Bar"), u("Blurg")]) - tm.assert_series_equal(results, exp) - def test_swapcase(self): values = Series(["FOO", "BAR", NA, "Blah", "blurg"]) result = values.str.swapcase() @@ -948,12 +894,6 @@ def test_swapcase(self): exp = Series(["foo", NA, "BAR", NA, NA, "bLAH", NA, NA, NA]) tm.assert_almost_equal(mixed, exp) - # unicode - values = Series([u("FOO"), NA, u("bar"), u("Blurg")]) - results = values.str.swapcase() - exp = Series([u("foo"), NA, u("BAR"), u("bLURG")]) - tm.assert_series_equal(results, exp) - def test_casemethods(self): values = ['aaa', 'bbb', 'CCC', 'Dddd', 'eEEE'] s = Series(values) @@ -983,17 +923,6 @@ def test_replace(self): assert isinstance(rs, Series) tm.assert_almost_equal(rs, xp) - # unicode - values = Series([u('fooBAD__barBAD'), NA]) - - result = values.str.replace('BAD[_]*', '') - exp = Series([u('foobar'), NA]) - tm.assert_series_equal(result, exp) - - result = values.str.replace('BAD[_]*', '', n=1) - exp = Series([u('foobarBAD'), NA]) - tm.assert_series_equal(result, exp) - # flags + unicode values = Series([b"abcd,\xc3\xa0".decode("utf-8")]) exp = Series([b"abcd, \xc3\xa0".decode("utf-8")]) @@ -1053,6 +982,10 @@ def test_replace_compiled_regex(self): exp = Series(['foobar', NA]) tm.assert_series_equal(result, exp) + result = values.str.replace(pat, '', n=1) + exp = Series(['foobarBAD', NA]) + tm.assert_series_equal(result, exp) + # mixed mixed = Series(['aBAD', NA, 'bBAD', True, datetime.today(), 'fooBAD', None, 1, 2.]) @@ -1062,17 +995,6 @@ def test_replace_compiled_regex(self): assert isinstance(rs, Series) tm.assert_almost_equal(rs, xp) - # unicode - values = Series([u('fooBAD__barBAD'), NA]) - - result = values.str.replace(pat, '') - exp = Series([u('foobar'), NA]) - tm.assert_series_equal(result, exp) - - result = values.str.replace(pat, '', n=1) - exp = Series([u('foobarBAD'), NA]) - tm.assert_series_equal(result, exp) - # flags + unicode values = Series([b"abcd,\xc3\xa0".decode("utf-8")]) exp = Series([b"abcd, \xc3\xa0".decode("utf-8")]) @@ -1150,17 +1072,6 @@ def test_repeat(self): assert isinstance(rs, Series) tm.assert_series_equal(rs, xp) - # unicode - values = Series([u('a'), u('b'), NA, u('c'), NA, u('d')]) - - result = values.str.repeat(3) - exp = Series([u('aaa'), u('bbb'), NA, u('ccc'), NA, u('ddd')]) - tm.assert_series_equal(result, exp) - - result = values.str.repeat([1, 2, 3, 4, 5, 6]) - exp = Series([u('a'), u('bb'), NA, u('cccc'), NA, u('dddddd')]) - tm.assert_series_equal(result, exp) - def test_match(self): # New match behavior introduced in 0.13 values = Series(['fooBAD__barBAD', NA, 'foo']) @@ -1181,12 +1092,6 @@ def test_match(self): assert isinstance(rs, Series) tm.assert_series_equal(rs, xp) - # unicode - values = Series([u('fooBAD__barBAD'), NA, u('foo')]) - result = values.str.match('.*(BAD[_]+).*(BAD)') - exp = Series([True, NA, False]) - tm.assert_series_equal(result, exp) - # na GH #6609 res = Series(['a', 0, np.nan]).str.match('a', na=False) exp = Series([True, False, False]) @@ -1227,10 +1132,10 @@ def test_extract_expand_False(self): tm.assert_frame_equal(rs, exp) # unicode - values = Series([u('fooBAD__barBAD'), NA, u('foo')]) + values = Series(['fooBAD__barBAD', NA, 'foo']) result = values.str.extract('.*(BAD[_]+).*(BAD)', expand=False) - exp = DataFrame([[u('BAD__'), u('BAD')], er, er]) + exp = DataFrame([['BAD__', 'BAD'], er, er]) tm.assert_frame_equal(result, exp) # GH9980 @@ -1379,13 +1284,6 @@ def test_extract_expand_True(self): er, er, er, er]) tm.assert_frame_equal(rs, exp) - # unicode - values = Series([u('fooBAD__barBAD'), NA, u('foo')]) - - result = values.str.extract('.*(BAD[_]+).*(BAD)', expand=True) - exp = DataFrame([[u('BAD__'), u('BAD')], er, er]) - tm.assert_frame_equal(result, exp) - # these should work for both Series and Index for klass in [Series, Index]: # no groups @@ -2013,11 +1911,6 @@ def test_join(self): assert isinstance(rs, Series) tm.assert_almost_equal(rs, xp) - # unicode - values = Series([u('a_b_c'), u('c_d_e'), np.nan, u('f_g_h')]) - result = values.str.split('_').str.join('_') - tm.assert_series_equal(values, result) - def test_len(self): values = Series(['foo', 'fooo', 'fooooo', np.nan, 'fooooooo']) @@ -2035,14 +1928,6 @@ def test_len(self): assert isinstance(rs, Series) tm.assert_almost_equal(rs, xp) - # unicode - values = Series([u('foo'), u('fooo'), u('fooooo'), np.nan, u( - 'fooooooo')]) - - result = values.str.len() - exp = values.map(lambda x: len(x) if notna(x) else NA) - tm.assert_series_equal(result, exp) - def test_findall(self): values = Series(['fooBAD__barBAD', NA, 'foo', 'BAD']) @@ -2060,13 +1945,6 @@ def test_findall(self): assert isinstance(rs, Series) tm.assert_almost_equal(rs, xp) - # unicode - values = Series([u('fooBAD__barBAD'), NA, u('foo'), u('BAD')]) - - result = values.str.findall('BAD[_]*') - exp = Series([[u('BAD__'), u('BAD')], NA, [], [u('BAD')]]) - tm.assert_almost_equal(result, exp) - def test_find(self): values = Series(['ABCDEFG', 'BCDEFEF', 'DEFGHIJEF', 'EFGHEF', 'XXXX']) result = values.str.find('EF') @@ -2237,21 +2115,6 @@ def test_pad(self): assert isinstance(rs, Series) tm.assert_almost_equal(rs, xp) - # unicode - values = Series([u('a'), u('b'), NA, u('c'), NA, u('eeeeee')]) - - result = values.str.pad(5, side='left') - exp = Series([u(' a'), u(' b'), NA, u(' c'), NA, u('eeeeee')]) - tm.assert_almost_equal(result, exp) - - result = values.str.pad(5, side='right') - exp = Series([u('a '), u('b '), NA, u('c '), NA, u('eeeeee')]) - tm.assert_almost_equal(result, exp) - - result = values.str.pad(5, side='both') - exp = Series([u(' a '), u(' b '), NA, u(' c '), NA, u('eeeeee')]) - tm.assert_almost_equal(result, exp) - def test_pad_fillchar(self): values = Series(['a', 'b', NA, 'c', NA, 'eeeeee']) @@ -2347,21 +2210,6 @@ def test_center_ljust_rjust(self): assert isinstance(rs, Series) tm.assert_almost_equal(rs, xp) - # unicode - values = Series([u('a'), u('b'), NA, u('c'), NA, u('eeeeee')]) - - result = values.str.center(5) - exp = Series([u(' a '), u(' b '), NA, u(' c '), NA, u('eeeeee')]) - tm.assert_almost_equal(result, exp) - - result = values.str.ljust(5) - exp = Series([u('a '), u('b '), NA, u('c '), NA, u('eeeeee')]) - tm.assert_almost_equal(result, exp) - - result = values.str.rjust(5) - exp = Series([u(' a'), u(' b'), NA, u(' c'), NA, u('eeeeee')]) - tm.assert_almost_equal(result, exp) - def test_center_ljust_rjust_fillchar(self): values = Series(['a', 'bb', 'cccc', 'ddddd', 'eeeeee']) @@ -2459,22 +2307,10 @@ def test_split(self): assert isinstance(result, Series) tm.assert_almost_equal(result, exp) - # unicode - values = Series([u('a_b_c'), u('c_d_e'), NA, u('f_g_h')]) - - result = values.str.split('_') - exp = Series([[u('a'), u('b'), u('c')], [u('c'), u('d'), u('e')], NA, - [u('f'), u('g'), u('h')]]) - tm.assert_series_equal(result, exp) - - result = values.str.split('_', expand=False) - tm.assert_series_equal(result, exp) - # regex split - values = Series([u('a,b_c'), u('c_d,e'), NA, u('f,g,h')]) + values = Series(['a,b_c', 'c_d,e', NA, 'f,g,h']) result = values.str.split('[,_]') - exp = Series([[u('a'), u('b'), u('c')], [u('c'), u('d'), u('e')], NA, - [u('f'), u('g'), u('h')]]) + exp = Series([['a', 'b', 'c'], ['c', 'd', 'e'], NA, ['f', 'g', 'h']]) tm.assert_series_equal(result, exp) def test_rsplit(self): @@ -2504,20 +2340,10 @@ def test_rsplit(self): assert isinstance(result, Series) tm.assert_almost_equal(result, exp) - # unicode - values = Series([u('a_b_c'), u('c_d_e'), NA, u('f_g_h')]) - result = values.str.rsplit('_') - exp = Series([[u('a'), u('b'), u('c')], [u('c'), u('d'), u('e')], NA, - [u('f'), u('g'), u('h')]]) - tm.assert_series_equal(result, exp) - - result = values.str.rsplit('_', expand=False) - tm.assert_series_equal(result, exp) - # regex split is not supported by rsplit - values = Series([u('a,b_c'), u('c_d,e'), NA, u('f,g,h')]) + values = Series(['a,b_c', 'c_d,e', NA, 'f,g,h']) result = values.str.rsplit('[,_]') - exp = Series([[u('a,b_c')], [u('c_d,e')], NA, [u('f,g,h')]]) + exp = Series([['a,b_c'], ['c_d,e'], NA, ['f,g,h']]) tm.assert_series_equal(result, exp) # setting max number of splits, make sure it's from reverse @@ -2947,17 +2773,6 @@ def test_slice(self): rs = Series(mixed).str.slice(2, 5, -1) xp = Series(['oof', NA, 'rab', NA, NA, NA, NA, NA]) - # unicode - values = Series([u('aafootwo'), u('aabartwo'), NA, u('aabazqux')]) - - result = values.str.slice(2, 5) - exp = Series([u('foo'), u('bar'), NA, u('baz')]) - tm.assert_series_equal(result, exp) - - result = values.str.slice(0, -1, 2) - exp = Series([u('afow'), u('abrw'), NA, u('abzu')]) - tm.assert_series_equal(result, exp) - def test_slice_replace(self): values = Series(['short', 'a bit longer', 'evenlongerthanthat', '', NA ]) @@ -3034,22 +2849,6 @@ def test_strip_lstrip_rstrip_mixed(self): assert isinstance(rs, Series) tm.assert_almost_equal(rs, xp) - def test_strip_lstrip_rstrip_unicode(self): - # unicode - values = Series([u(' aa '), u(' bb \n'), NA, u('cc ')]) - - result = values.str.strip() - exp = Series([u('aa'), u('bb'), NA, u('cc')]) - tm.assert_series_equal(result, exp) - - result = values.str.lstrip() - exp = Series([u('aa '), u('bb \n'), NA, u('cc ')]) - tm.assert_series_equal(result, exp) - - result = values.str.rstrip() - exp = Series([u(' aa'), u(' bb'), NA, u('cc')]) - tm.assert_series_equal(result, exp) - def test_strip_lstrip_rstrip_args(self): values = Series(['xxABCxx', 'xx BNSD', 'LDFJH xx']) @@ -3065,44 +2864,27 @@ def test_strip_lstrip_rstrip_args(self): xp = Series(['xxABC', 'xx BNSD', 'LDFJH ']) assert_series_equal(rs, xp) - def test_strip_lstrip_rstrip_args_unicode(self): - values = Series([u('xxABCxx'), u('xx BNSD'), u('LDFJH xx')]) - - rs = values.str.strip(u('x')) - xp = Series(['ABC', ' BNSD', 'LDFJH ']) - assert_series_equal(rs, xp) - - rs = values.str.lstrip(u('x')) - xp = Series(['ABCxx', ' BNSD', 'LDFJH xx']) - assert_series_equal(rs, xp) - - rs = values.str.rstrip(u('x')) - xp = Series(['xxABC', 'xx BNSD', 'LDFJH ']) - assert_series_equal(rs, xp) - def test_wrap(self): # test values are: two words less than width, two words equal to width, # two words greater than width, one word less than width, one word # equal to width, one word greater than width, multiple tokens with # trailing whitespace equal to width - values = Series([u('hello world'), u('hello world!'), u( - 'hello world!!'), u('abcdefabcde'), u('abcdefabcdef'), u( - 'abcdefabcdefa'), u('ab ab ab ab '), u('ab ab ab ab a'), u( - '\t')]) + values = Series(['hello world', 'hello world!', 'hello world!!', + 'abcdefabcde', 'abcdefabcdef', 'abcdefabcdefa', + 'ab ab ab ab ', 'ab ab ab ab a', '\t']) # expected values - xp = Series([u('hello world'), u('hello world!'), u('hello\nworld!!'), - u('abcdefabcde'), u('abcdefabcdef'), u('abcdefabcdef\na'), - u('ab ab ab ab'), u('ab ab ab ab\na'), u('')]) + xp = Series(['hello world', 'hello world!', 'hello\nworld!!', + 'abcdefabcde', 'abcdefabcdef', 'abcdefabcdef\na', + 'ab ab ab ab', 'ab ab ab ab\na', '']) rs = values.str.wrap(12, break_long_words=True) assert_series_equal(rs, xp) # test with pre and post whitespace (non-unicode), NaN, and non-ascii # Unicode - values = Series([' pre ', np.nan, u('\xac\u20ac\U00008000 abadcafe') - ]) - xp = Series([' pre', NA, u('\xac\u20ac\U00008000 ab\nadcafe')]) + values = Series([' pre ', np.nan, '\xac\u20ac\U00008000 abadcafe']) + xp = Series([' pre', NA, '\xac\u20ac\U00008000 ab\nadcafe']) rs = values.str.wrap(6) assert_series_equal(rs, xp) @@ -3123,13 +2905,6 @@ def test_get(self): assert isinstance(rs, Series) tm.assert_almost_equal(rs, xp) - # unicode - values = Series([u('a_b_c'), u('c_d_e'), np.nan, u('f_g_h')]) - - result = values.str.split('_').str.get(1) - expected = Series([u('b'), u('d'), np.nan, u('g')]) - tm.assert_series_equal(result, expected) - # bounds testing values = Series(['1_2_3_4_5', '6_7_8_9_10', '11_12']) @@ -3293,7 +3068,7 @@ def test_match_findall_flags(self): assert result[0] def test_encode_decode(self): - base = Series([u('a'), u('b'), u('a\xe4')]) + base = Series(['a', 'b', 'a\xe4']) series = base.str.encode('utf-8') f = lambda x: x.decode('utf-8') @@ -3303,7 +3078,7 @@ def test_encode_decode(self): tm.assert_series_equal(result, exp) def test_encode_decode_errors(self): - encodeBase = Series([u('a'), u('b'), u('a\x9d')]) + encodeBase = Series(['a', 'b', 'a\x9d']) msg = (r"'charmap' codec can't encode character '\\x9d' in position 1:" " character maps to ") @@ -3355,8 +3130,7 @@ def test_normalize(self): def test_index_str_accessor_visibility(self): from pandas.core.strings import StringMethods - cases = [(['a', 'b'], 'string'), (['a', u('b')], 'string'), - ([u('a'), u('b')], 'string'), + cases = [(['a', 'b'], 'string'), (['a', 'b', 1], 'mixed-integer'), (['a', 'b', 1.3], 'mixed'), (['a', 'b', 1.3, 1], 'mixed-integer'), diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 3e4470ee30a80..20c6e2644cda9 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -24,7 +24,7 @@ import pandas.compat as compat from pandas.compat import ( PY2, PY3, httplib, lmap, lrange, lzip, raise_with_traceback, string_types, - u, unichr) + unichr) from pandas.core.dtypes.common import ( is_bool, is_categorical_dtype, is_datetime64_dtype, is_datetime64tz_dtype, @@ -96,7 +96,7 @@ def round_trip_pickle(obj, path=None): """ if path is None: - path = u('__{random_bytes}__.pickle'.format(random_bytes=rands(10))) + path = '__{random_bytes}__.pickle'.format(random_bytes=rands(10)) with ensure_clean(path) as path: pd.to_pickle(obj, path) return pd.read_pickle(path) @@ -370,7 +370,7 @@ def randbool(size=(), p=0.5): RANDS_CHARS = np.array(list(string.ascii_letters + string.digits), dtype=(np.str_, 1)) -RANDU_CHARS = np.array(list(u("").join(map(unichr, lrange(1488, 1488 + 26))) + +RANDU_CHARS = np.array(list("".join(map(unichr, lrange(1488, 1488 + 26))) + string.digits), dtype=(np.unicode_, 1))