diff --git a/doc/source/conf.py b/doc/source/conf.py index 8693a97bc00f1..3e639b887fd5a 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -134,8 +134,8 @@ master_doc = 'index' # General information about the project. -project = u'pandas' -copyright = u'2008-2014, the pandas development team' +project = 'pandas' +copyright = '2008-2014, the pandas development team' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the diff --git a/doc/source/user_guide/advanced.rst b/doc/source/user_guide/advanced.rst index 68f17a68784c9..0e68cddde8bc7 100644 --- a/doc/source/user_guide/advanced.rst +++ b/doc/source/user_guide/advanced.rst @@ -797,7 +797,7 @@ values **not** in the categories, similarly to how you can reindex **any** panda In [11]: df3 = df3.set_index('B') In [11]: df3.index - Out[11]: CategoricalIndex([u'a', u'a', u'b', u'b', u'c', u'a'], categories=[u'a', u'b', u'c'], ordered=False, name=u'B', dtype='category') + Out[11]: CategoricalIndex(['a', 'a', 'b', 'b', 'c', 'a'], categories=['a', 'b', 'c'], ordered=False, name='B', dtype='category') In [12]: pd.concat([df2, df3]) TypeError: categories must match existing categories when appending diff --git a/doc/source/user_guide/options.rst b/doc/source/user_guide/options.rst index d640d8b1153c5..4b466c2c44d49 100644 --- a/doc/source/user_guide/options.rst +++ b/doc/source/user_guide/options.rst @@ -484,7 +484,7 @@ If a DataFrame or Series contains these characters, the default output mode may .. ipython:: python - df = pd.DataFrame({u'国籍': ['UK', u'日本'], u'名前': ['Alice', u'しのぶ']}) + df = pd.DataFrame({'国籍': ['UK', '日本'], '名前': ['Alice', 'しのぶ']}) df .. image:: ../_static/option_unicode01.png @@ -507,7 +507,7 @@ By default, an "Ambiguous" character's width, such as "¡" (inverted exclamation .. ipython:: python - df = pd.DataFrame({'a': ['xxx', u'¡¡'], 'b': ['yyy', u'¡¡']}) + df = pd.DataFrame({'a': ['xxx', '¡¡'], 'b': ['yyy', '¡¡']}) df .. image:: ../_static/option_unicode03.png diff --git a/doc/source/user_guide/reshaping.rst b/doc/source/user_guide/reshaping.rst index 5c11be34e6ed4..28bf46cd4c583 100644 --- a/doc/source/user_guide/reshaping.rst +++ b/doc/source/user_guide/reshaping.rst @@ -695,7 +695,7 @@ handling of NaN: In [2]: pd.factorize(x, sort=True) Out[2]: (array([ 2, 2, -1, 3, 0, 1]), - Index([3.14, inf, u'A', u'B'], dtype='object')) + Index([3.14, inf, 'A', 'B'], dtype='object')) In [3]: np.unique(x, return_inverse=True)[::-1] Out[3]: (array([3, 3, 0, 4, 1, 2]), array([nan, 3.14, inf, 'A', 'B'], dtype=object)) diff --git a/doc/sphinxext/announce.py b/doc/sphinxext/announce.py index 6bc53d3e96d01..c61db6935a043 100755 --- a/doc/sphinxext/announce.py +++ b/doc/sphinxext/announce.py @@ -56,7 +56,7 @@ def get_authors(revision_range): - pat = u'^.*\\t(.*)$' + pat = '^.*\\t(.*)$' lst_release, cur_release = [r.strip() for r in revision_range.split('..')] # authors, in current release and previous to current release. @@ -70,7 +70,7 @@ def get_authors(revision_range): pre.discard('Homu') # Append '+' to new authors. - authors = [s + u' +' for s in cur - pre] + [s for s in cur & pre] + authors = [s + ' +' for s in cur - pre] + [s for s in cur & pre] authors.sort() return authors @@ -81,17 +81,17 @@ def get_pull_requests(repo, revision_range): # From regular merges merges = this_repo.git.log( '--oneline', '--merges', revision_range) - issues = re.findall(u"Merge pull request \\#(\\d*)", merges) + issues = re.findall("Merge pull request \\#(\\d*)", merges) prnums.extend(int(s) for s in issues) # From Homu merges (Auto merges) - issues = re. findall(u"Auto merge of \\#(\\d*)", merges) + issues = re. findall("Auto merge of \\#(\\d*)", merges) prnums.extend(int(s) for s in issues) # From fast forward squash-merges commits = this_repo.git.log( '--oneline', '--no-merges', '--first-parent', revision_range) - issues = re.findall(u'^.*\\(\\#(\\d+)\\)$', commits, re.M) + issues = re.findall('^.*\\(\\#(\\d+)\\)$', commits, re.M) prnums.extend(int(s) for s in issues) # get PR data from github repo diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index f7d427ce26e6a..79ccdb3a3278f 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -839,16 +839,16 @@ def __repr__(self): from pandas.io.formats.printing import format_object_summary template = ( - u'{class_name}' - u'{data}\n' - u'Length: {length}, dtype: {dtype}' + '{class_name}' + '{data}\n' + 'Length: {length}, dtype: {dtype}' ) # the short repr has no trailing newline, while the truncated # repr does. So we include a newline in our template, and strip # any trailing newlines from format_object_summary data = format_object_summary(self, self._formatter(), indent_for_name=False).rstrip(', \n') - class_name = u'<{}>\n'.format(self.__class__.__name__) + class_name = '<{}>\n'.format(self.__class__.__name__) return template.format(class_name=class_name, data=data, length=len(self), dtype=self.dtype) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 2a6cc6c742a52..75b64a06fe8e8 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2147,7 +2147,7 @@ def _reverse_indexer(self): Categories (3, object): [a, b, c] In [3]: c.categories - Out[3]: Index([u'a', u'b', u'c'], dtype='object') + Out[3]: Index(['a', 'b', 'c'], dtype='object') In [4]: c.codes Out[4]: array([0, 0, 1, 2, 0], dtype=int8) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 400d4c9b93da2..a48621d8fffa3 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -476,7 +476,7 @@ def asfreq(self, freq=None, how='E'): # ------------------------------------------------------------------ # Rendering Methods - def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): + def _format_native_types(self, na_rep='NaT', date_format=None, **kwargs): """ actually format my specific types """ @@ -485,7 +485,7 @@ def _format_native_types(self, na_rep=u'NaT', date_format=None, **kwargs): if date_format: formatter = lambda dt: dt.strftime(date_format) else: - formatter = lambda dt: u'%s' % dt + formatter = lambda dt: '%s' % dt if self._hasnans: mask = self._isnan diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index 833650fb68a54..070d116e248b4 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -584,11 +584,11 @@ def __init__(self, value, converted, kind): def tostring(self, encoding): """ quote the string if not encoded else encode and return """ - if self.kind == u'string': + if self.kind == 'string': if encoding is not None: return self.converted return '"{converted}"'.format(converted=self.converted) - elif self.kind == u'float': + elif self.kind == 'float': # python 2 str(float) is not always # round-trippable so use repr() return repr(self.converted) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index b1fe2940fe5c6..34b4c3eac99c1 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -393,9 +393,9 @@ def __eq__(self, other): return hash(self) == hash(other) def __repr__(self): - tpl = u'CategoricalDtype(categories={}ordered={})' + tpl = 'CategoricalDtype(categories={}ordered={})' if self.categories is None: - data = u"None, " + data = "None, " else: data = self.categories._format_data(name=self.__class__.__name__) return tpl.format(data, self.ordered) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 8aa294d426253..92db102543c43 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8063,4 +8063,4 @@ def _from_nested_dict(data): def _put_str(s, space): - return u'{s}'.format(s=s)[:space].ljust(space) + return '{s}'.format(s=s)[:space].ljust(space) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index e9373c914ade1..e046ebaed7342 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -379,8 +379,8 @@ def from_tuples(cls, tuples, sortorder=None, names=None): Examples -------- - >>> tuples = [(1, u'red'), (1, u'blue'), - ... (2, u'red'), (2, u'blue')] + >>> tuples = [(1, 'red'), (1, 'blue'), + ... (2, 'red'), (2, 'blue')] >>> pd.MultiIndex.from_tuples(tuples, names=('number', 'color')) MultiIndex(levels=[[1, 2], ['blue', 'red']], codes=[[0, 0, 1, 1], [1, 0, 1, 0]], @@ -621,25 +621,25 @@ def set_levels(self, levels, level=None, inplace=False, Examples -------- - >>> idx = pd.MultiIndex.from_tuples([(1, u'one'), (1, u'two'), - (2, u'one'), (2, u'two')], + >>> idx = pd.MultiIndex.from_tuples([(1, 'one'), (1, 'two'), + (2, 'one'), (2, 'two')], names=['foo', 'bar']) >>> idx.set_levels([['a','b'], [1,2]]) - MultiIndex(levels=[[u'a', u'b'], [1, 2]], + MultiIndex(levels=[['a', 'b'], [1, 2]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]], - names=[u'foo', u'bar']) + names=['foo', 'bar']) >>> idx.set_levels(['a','b'], level=0) - MultiIndex(levels=[[u'a', u'b'], [u'one', u'two']], + MultiIndex(levels=[['a', 'b'], ['one', 'two']], codes=[[0, 0, 1, 1], [0, 1, 0, 1]], - names=[u'foo', u'bar']) + names=['foo', 'bar']) >>> idx.set_levels(['a','b'], level='bar') - MultiIndex(levels=[[1, 2], [u'a', u'b']], + MultiIndex(levels=[[1, 2], ['a', 'b']], codes=[[0, 0, 1, 1], [0, 1, 0, 1]], - names=[u'foo', u'bar']) + names=['foo', 'bar']) >>> idx.set_levels([['a','b'], [1,2]], level=[0,1]) - MultiIndex(levels=[[u'a', u'b'], [1, 2]], + MultiIndex(levels=[['a', 'b'], [1, 2]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]], - names=[u'foo', u'bar']) + names=['foo', 'bar']) """ if is_list_like(levels) and not isinstance(levels, Index): levels = list(levels) @@ -740,25 +740,25 @@ def set_codes(self, codes, level=None, inplace=False, Examples -------- - >>> idx = pd.MultiIndex.from_tuples([(1, u'one'), (1, u'two'), - (2, u'one'), (2, u'two')], + >>> idx = pd.MultiIndex.from_tuples([(1, 'one'), (1, 'two'), + (2, 'one'), (2, 'two')], names=['foo', 'bar']) >>> idx.set_codes([[1,0,1,0], [0,0,1,1]]) - MultiIndex(levels=[[1, 2], [u'one', u'two']], + MultiIndex(levels=[[1, 2], ['one', 'two']], codes=[[1, 0, 1, 0], [0, 0, 1, 1]], - names=[u'foo', u'bar']) + names=['foo', 'bar']) >>> idx.set_codes([1,0,1,0], level=0) - MultiIndex(levels=[[1, 2], [u'one', u'two']], + MultiIndex(levels=[[1, 2], ['one', 'two']], codes=[[1, 0, 1, 0], [0, 1, 0, 1]], - names=[u'foo', u'bar']) + names=['foo', 'bar']) >>> idx.set_codes([0,0,1,1], level='bar') - MultiIndex(levels=[[1, 2], [u'one', u'two']], + MultiIndex(levels=[[1, 2], ['one', 'two']], codes=[[0, 0, 1, 1], [0, 0, 1, 1]], - names=[u'foo', u'bar']) + names=['foo', 'bar']) >>> idx.set_codes([[1,0,1,0], [0,0,1,1]], level=[0,1]) - MultiIndex(levels=[[1, 2], [u'one', u'two']], + MultiIndex(levels=[[1, 2], ['one', 'two']], codes=[[1, 0, 1, 0], [0, 0, 1, 1]], - names=[u'foo', u'bar']) + names=['foo', 'bar']) """ if level is not None and not is_list_like(level): if not is_list_like(codes): @@ -1512,10 +1512,10 @@ def to_hierarchical(self, n_repeat, n_shuffle=1): Examples -------- - >>> idx = pd.MultiIndex.from_tuples([(1, u'one'), (1, u'two'), - (2, u'one'), (2, u'two')]) + >>> idx = pd.MultiIndex.from_tuples([(1, 'one'), (1, 'two'), + (2, 'one'), (2, 'two')]) >>> idx.to_hierarchical(3) - MultiIndex(levels=[[1, 2], [u'one', u'two']], + MultiIndex(levels=[[1, 2], ['one', 'two']], codes=[[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1], [0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 1]]) """ diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index a4bd7f9017eb4..fb3d4f09cfe5e 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -400,7 +400,7 @@ def _maybe_convert_timedelta(self, other): # ------------------------------------------------------------------------ # Rendering Methods - def _format_native_types(self, na_rep=u'NaT', quoting=None, **kwargs): + def _format_native_types(self, na_rep='NaT', quoting=None, **kwargs): # just dispatch, return ndarray return self._data._format_native_types(na_rep=na_rep, quoting=quoting, diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index ebec8b91a5aba..7a87e8dc57164 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1242,7 +1242,7 @@ def _validate_read_indexer(self, key, indexer, axis, raise_missing=False): if missing: if missing == len(indexer): raise KeyError( - u"None of [{key}] are in the [{axis}]".format( + "None of [{key}] are in the [{axis}]".format( key=key, axis=self.obj._get_axis_name(axis))) # We (temporarily) allow for some missing keys with .loc, except in diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 3ea810f44791a..be2b56881db71 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -296,12 +296,12 @@ def __unicode__(self): output = pprint_thing(self.__class__.__name__) for i, ax in enumerate(self.axes): if i == 0: - output += u'\nItems: {ax}'.format(ax=ax) + output += '\nItems: {ax}'.format(ax=ax) else: - output += u'\nAxis {i}: {ax}'.format(i=i, ax=ax) + output += '\nAxis {i}: {ax}'.format(i=i, ax=ax) for block in self.blocks: - output += u'\n{block}'.format(block=pprint_thing(block)) + output += '\n{block}'.format(block=pprint_thing(block)) return output def _verify_integrity(self): diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index dbff2a069d73d..4bfb27c6f148c 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -154,7 +154,7 @@ def _add_margins(table, data, values, rows, cols, aggfunc, if not isinstance(margins_name, compat.string_types): raise ValueError('margins_name argument must be a string') - msg = u'Conflicting name "{name}" in margins'.format(name=margins_name) + msg = 'Conflicting name "{name}" in margins'.format(name=margins_name) for level in table.index.names: if margins_name in table.index.get_level_values(level): raise ValueError(msg) diff --git a/pandas/errors/__init__.py b/pandas/errors/__init__.py index 7d5a7f1a99e41..3b8904f4c1ef6 100644 --- a/pandas/errors/__init__.py +++ b/pandas/errors/__init__.py @@ -138,7 +138,7 @@ class ParserWarning(Warning): Using a `sep` in `pd.read_csv` other than a single character: >>> import io - >>> csv = u'''a;b;c + >>> csv = '''a;b;c ... 1;1,8 ... 1;2,1''' >>> df = pd.read_csv(io.StringIO(csv), sep='[;,]') # doctest: +SKIP diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py index f69678ca07831..8d7116388c3d9 100644 --- a/pandas/io/formats/format.py +++ b/pandas/io/formats/format.py @@ -213,7 +213,7 @@ def _get_footer(self): series_name = pprint_thing(name, escape_chars=('\t', '\r', '\n')) - footer += ((u"Name: {sname}".format(sname=series_name)) + footer += (("Name: {sname}".format(sname=series_name)) if name is not None else "") if (self.length is True or @@ -227,7 +227,7 @@ def _get_footer(self): if name: if footer: footer += ', ' - footer += u'dtype: {typ}'.format(typ=pprint_thing(name)) + footer += 'dtype: {typ}'.format(typ=pprint_thing(name)) # level infos are added to the end and in a new line, like it is done # for Categoricals @@ -949,10 +949,10 @@ def _format(x): return 'NaT' return self.na_rep elif isinstance(x, PandasObject): - return u'{x}'.format(x=x) + return '{x}'.format(x=x) else: # object dtype - return u'{x}'.format(x=formatter(x)) + return '{x}'.format(x=formatter(x)) vals = self.values if isinstance(vals, Index): @@ -968,16 +968,16 @@ def _format(x): fmt_values = [] for i, v in enumerate(vals): if not is_float_type[i] and leading_space: - fmt_values.append(u' {v}'.format(v=_format(v))) + fmt_values.append(' {v}'.format(v=_format(v))) elif is_float_type[i]: fmt_values.append(float_format(v)) else: if leading_space is False: # False specifically, so that the default is # to include a space if we get here. - tpl = u'{v}' + tpl = '{v}' else: - tpl = u' {v}' + tpl = ' {v}' fmt_values.append(tpl.format(v=_format(v))) return fmt_values diff --git a/pandas/io/formats/html.py b/pandas/io/formats/html.py index 4c235fed3c026..982e51ae8be65 100644 --- a/pandas/io/formats/html.py +++ b/pandas/io/formats/html.py @@ -118,7 +118,7 @@ def _write_cell(self, s, kind='td', indent=0, tags=None): else: end_a = '' - self.write(u'{start}{rs}{end_a}'.format( + self.write('{start}{rs}{end_a}'.format( start=start_tag, rs=rs, end_a=end_a, kind=kind), indent) def write_tr(self, line, indent=0, indent_delta=0, header=False, diff --git a/pandas/io/formats/printing.py b/pandas/io/formats/printing.py index 70457e0bf9eb5..90013148a9e0f 100644 --- a/pandas/io/formats/printing.py +++ b/pandas/io/formats/printing.py @@ -336,17 +336,17 @@ def best_len(values): else: return 0 - close = u', ' + close = ', ' if n == 0: - summary = u'[]{}'.format(close) + summary = '[]{}'.format(close) elif n == 1: first = formatter(obj[0]) - summary = u'[{}]{}'.format(first, close) + summary = '[{}]{}'.format(first, close) elif n == 2: first = formatter(obj[0]) last = formatter(obj[-1]) - summary = u'[{}, {}]{}'.format(first, last, close) + summary = '[{}, {}]{}'.format(first, last, close) else: if n > max_seq_items: diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 57e941bbb8067..4a71338bfc686 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -322,10 +322,10 @@ def unconvert(values, dtype, compress=None): values = values.encode('latin1') if compress: - if compress == u'zlib': + if compress == 'zlib': _check_zlib() decompress = zlib.decompress - elif compress == u'blosc': + elif compress == 'blosc': _check_blosc() decompress = blosc.decompress else: @@ -368,20 +368,20 @@ def encode(obj): tobj = type(obj) if isinstance(obj, Index): if isinstance(obj, RangeIndex): - return {u'typ': u'range_index', - u'klass': obj.__class__.__name__, - u'name': getattr(obj, 'name', None), - u'start': getattr(obj, '_start', None), - u'stop': getattr(obj, '_stop', None), - u'step': getattr(obj, '_step', None)} + return {'typ': 'range_index', + 'klass': obj.__class__.__name__, + 'name': getattr(obj, 'name', None), + 'start': getattr(obj, '_start', None), + 'stop': getattr(obj, '_stop', None), + 'step': getattr(obj, '_step', None)} elif isinstance(obj, PeriodIndex): - return {u'typ': u'period_index', - u'klass': obj.__class__.__name__, - u'name': getattr(obj, 'name', None), - u'freq': getattr(obj, 'freqstr', None), - u'dtype': obj.dtype.name, - u'data': convert(obj.asi8), - u'compress': compressor} + return {'typ': 'period_index', + 'klass': obj.__class__.__name__, + 'name': getattr(obj, 'name', None), + 'freq': getattr(obj, 'freqstr', None), + 'dtype': obj.dtype.name, + 'data': convert(obj.asi8), + 'compress': compressor} elif isinstance(obj, DatetimeIndex): tz = getattr(obj, 'tz', None) @@ -389,48 +389,48 @@ def encode(obj): if tz is not None: tz = tz.zone obj = obj.tz_convert('UTC') - return {u'typ': u'datetime_index', - u'klass': obj.__class__.__name__, - u'name': getattr(obj, 'name', None), - u'dtype': obj.dtype.name, - u'data': convert(obj.asi8), - u'freq': getattr(obj, 'freqstr', None), - u'tz': tz, - u'compress': compressor} + return {'typ': 'datetime_index', + 'klass': obj.__class__.__name__, + 'name': getattr(obj, 'name', None), + 'dtype': obj.dtype.name, + 'data': convert(obj.asi8), + 'freq': getattr(obj, 'freqstr', None), + 'tz': tz, + 'compress': compressor} elif isinstance(obj, (IntervalIndex, IntervalArray)): if isinstance(obj, IntervalIndex): - typ = u'interval_index' + typ = 'interval_index' else: - typ = u'interval_array' - return {u'typ': typ, - u'klass': obj.__class__.__name__, - u'name': getattr(obj, 'name', None), - u'left': getattr(obj, 'left', None), - u'right': getattr(obj, 'right', None), - u'closed': getattr(obj, 'closed', None)} + typ = 'interval_array' + return {'typ': typ, + 'klass': obj.__class__.__name__, + 'name': getattr(obj, 'name', None), + 'left': getattr(obj, 'left', None), + 'right': getattr(obj, 'right', None), + 'closed': getattr(obj, 'closed', None)} elif isinstance(obj, MultiIndex): - return {u'typ': u'multi_index', - u'klass': obj.__class__.__name__, - u'names': getattr(obj, 'names', None), - u'dtype': obj.dtype.name, - u'data': convert(obj.values), - u'compress': compressor} + return {'typ': 'multi_index', + 'klass': obj.__class__.__name__, + 'names': getattr(obj, 'names', None), + 'dtype': obj.dtype.name, + 'data': convert(obj.values), + 'compress': compressor} else: - return {u'typ': u'index', - u'klass': obj.__class__.__name__, - u'name': getattr(obj, 'name', None), - u'dtype': obj.dtype.name, - u'data': convert(obj.values), - u'compress': compressor} + return {'typ': 'index', + 'klass': obj.__class__.__name__, + 'name': getattr(obj, 'name', None), + 'dtype': obj.dtype.name, + 'data': convert(obj.values), + 'compress': compressor} elif isinstance(obj, Categorical): - return {u'typ': u'category', - u'klass': obj.__class__.__name__, - u'name': getattr(obj, 'name', None), - u'codes': obj.codes, - u'categories': obj.categories, - u'ordered': obj.ordered, - u'compress': compressor} + return {'typ': 'category', + 'klass': obj.__class__.__name__, + 'name': getattr(obj, 'name', None), + 'codes': obj.codes, + 'categories': obj.categories, + 'ordered': obj.ordered, + 'compress': compressor} elif isinstance(obj, Series): if isinstance(obj, SparseSeries): @@ -448,13 +448,13 @@ def encode(obj): # d[f] = getattr(obj, f, None) # return d else: - return {u'typ': u'series', - u'klass': obj.__class__.__name__, - u'name': getattr(obj, 'name', None), - u'index': obj.index, - u'dtype': obj.dtype.name, - u'data': convert(obj.values), - u'compress': compressor} + return {'typ': 'series', + 'klass': obj.__class__.__name__, + 'name': getattr(obj, 'name', None), + 'index': obj.index, + 'dtype': obj.dtype.name, + 'data': convert(obj.values), + 'compress': compressor} elif issubclass(tobj, NDFrame): if isinstance(obj, SparseDataFrame): raise NotImplementedError( @@ -475,15 +475,15 @@ def encode(obj): data = data.consolidate() # the block manager - return {u'typ': u'block_manager', - u'klass': obj.__class__.__name__, - u'axes': data.axes, - u'blocks': [{u'locs': b.mgr_locs.as_array, - u'values': convert(b.values), - u'shape': b.values.shape, - u'dtype': b.dtype.name, - u'klass': b.__class__.__name__, - u'compress': compressor} for b in data.blocks] + return {'typ': 'block_manager', + 'klass': obj.__class__.__name__, + 'axes': data.axes, + 'blocks': [{'locs': b.mgr_locs.as_array, + 'values': convert(b.values), + 'shape': b.values.shape, + 'dtype': b.dtype.name, + 'klass': b.__class__.__name__, + 'compress': compressor} for b in data.blocks] } elif isinstance(obj, (datetime, date, np.datetime64, timedelta, @@ -495,71 +495,71 @@ def encode(obj): freq = obj.freq if freq is not None: freq = freq.freqstr - return {u'typ': u'timestamp', - u'value': obj.value, - u'freq': freq, - u'tz': tz} + return {'typ': 'timestamp', + 'value': obj.value, + 'freq': freq, + 'tz': tz} if obj is NaT: - return {u'typ': u'nat'} + return {'typ': 'nat'} elif isinstance(obj, np.timedelta64): - return {u'typ': u'timedelta64', - u'data': obj.view('i8')} + return {'typ': 'timedelta64', + 'data': obj.view('i8')} elif isinstance(obj, timedelta): - return {u'typ': u'timedelta', - u'data': (obj.days, obj.seconds, obj.microseconds)} + return {'typ': 'timedelta', + 'data': (obj.days, obj.seconds, obj.microseconds)} elif isinstance(obj, np.datetime64): - return {u'typ': u'datetime64', - u'data': str(obj)} + return {'typ': 'datetime64', + 'data': str(obj)} elif isinstance(obj, datetime): - return {u'typ': u'datetime', - u'data': obj.isoformat()} + return {'typ': 'datetime', + 'data': obj.isoformat()} elif isinstance(obj, date): - return {u'typ': u'date', - u'data': obj.isoformat()} + return {'typ': 'date', + 'data': obj.isoformat()} raise Exception( "cannot encode this datetimelike object: {obj}".format(obj=obj)) elif isinstance(obj, Period): - return {u'typ': u'period', - u'ordinal': obj.ordinal, - u'freq': obj.freqstr} + return {'typ': 'period', + 'ordinal': obj.ordinal, + 'freq': obj.freqstr} elif isinstance(obj, Interval): - return {u'typ': u'interval', - u'left': obj.left, - u'right': obj.right, - u'closed': obj.closed} + return {'typ': 'interval', + 'left': obj.left, + 'right': obj.right, + 'closed': obj.closed} elif isinstance(obj, BlockIndex): - return {u'typ': u'block_index', - u'klass': obj.__class__.__name__, - u'blocs': obj.blocs, - u'blengths': obj.blengths, - u'length': obj.length} + return {'typ': 'block_index', + 'klass': obj.__class__.__name__, + 'blocs': obj.blocs, + 'blengths': obj.blengths, + 'length': obj.length} elif isinstance(obj, IntIndex): - return {u'typ': u'int_index', - u'klass': obj.__class__.__name__, - u'indices': obj.indices, - u'length': obj.length} + return {'typ': 'int_index', + 'klass': obj.__class__.__name__, + 'indices': obj.indices, + 'length': obj.length} elif isinstance(obj, np.ndarray): - return {u'typ': u'ndarray', - u'shape': obj.shape, - u'ndim': obj.ndim, - u'dtype': obj.dtype.name, - u'data': convert(obj), - u'compress': compressor} + return {'typ': 'ndarray', + 'shape': obj.shape, + 'ndim': obj.ndim, + 'dtype': obj.dtype.name, + 'data': convert(obj), + 'compress': compressor} elif isinstance(obj, np.number): if np.iscomplexobj(obj): - return {u'typ': u'np_scalar', - u'sub_typ': u'np_complex', - u'dtype': obj.dtype.name, - u'real': obj.real.__repr__(), - u'imag': obj.imag.__repr__()} + return {'typ': 'np_scalar', + 'sub_typ': 'np_complex', + 'dtype': obj.dtype.name, + 'real': obj.real.__repr__(), + 'imag': obj.imag.__repr__()} else: - return {u'typ': u'np_scalar', - u'dtype': obj.dtype.name, - u'data': obj.__repr__()} + return {'typ': 'np_scalar', + 'dtype': obj.dtype.name, + 'data': obj.__repr__()} elif isinstance(obj, complex): - return {u'typ': u'np_complex', - u'real': obj.real.__repr__(), - u'imag': obj.imag.__repr__()} + return {'typ': 'np_complex', + 'real': obj.real.__repr__(), + 'imag': obj.imag.__repr__()} return obj @@ -569,110 +569,110 @@ def decode(obj): Decoder for deserializing numpy data types. """ - typ = obj.get(u'typ') + typ = obj.get('typ') if typ is None: return obj - elif typ == u'timestamp': - freq = obj[u'freq'] if 'freq' in obj else obj[u'offset'] - return Timestamp(obj[u'value'], tz=obj[u'tz'], freq=freq) - elif typ == u'nat': + elif typ == 'timestamp': + freq = obj['freq'] if 'freq' in obj else obj['offset'] + return Timestamp(obj['value'], tz=obj['tz'], freq=freq) + elif typ == 'nat': return NaT - elif typ == u'period': - return Period(ordinal=obj[u'ordinal'], freq=obj[u'freq']) - elif typ == u'index': - dtype = dtype_for(obj[u'dtype']) - data = unconvert(obj[u'data'], dtype, - obj.get(u'compress')) - return Index(data, dtype=dtype, name=obj[u'name']) - elif typ == u'range_index': - return RangeIndex(obj[u'start'], - obj[u'stop'], - obj[u'step'], - name=obj[u'name']) - elif typ == u'multi_index': - dtype = dtype_for(obj[u'dtype']) - data = unconvert(obj[u'data'], dtype, - obj.get(u'compress')) + elif typ == 'period': + return Period(ordinal=obj['ordinal'], freq=obj['freq']) + elif typ == 'index': + dtype = dtype_for(obj['dtype']) + data = unconvert(obj['data'], dtype, + obj.get('compress')) + return Index(data, dtype=dtype, name=obj['name']) + elif typ == 'range_index': + return RangeIndex(obj['start'], + obj['stop'], + obj['step'], + name=obj['name']) + elif typ == 'multi_index': + dtype = dtype_for(obj['dtype']) + data = unconvert(obj['data'], dtype, + obj.get('compress')) data = [tuple(x) for x in data] - return MultiIndex.from_tuples(data, names=obj[u'names']) - elif typ == u'period_index': - data = unconvert(obj[u'data'], np.int64, obj.get(u'compress')) - d = dict(name=obj[u'name'], freq=obj[u'freq']) + return MultiIndex.from_tuples(data, names=obj['names']) + elif typ == 'period_index': + data = unconvert(obj['data'], np.int64, obj.get('compress')) + d = dict(name=obj['name'], freq=obj['freq']) freq = d.pop('freq', None) return PeriodIndex(PeriodArray(data, freq), **d) - elif typ == u'datetime_index': - data = unconvert(obj[u'data'], np.int64, obj.get(u'compress')) - d = dict(name=obj[u'name'], freq=obj[u'freq']) + elif typ == 'datetime_index': + data = unconvert(obj['data'], np.int64, obj.get('compress')) + d = dict(name=obj['name'], freq=obj['freq']) result = DatetimeIndex(data, **d) - tz = obj[u'tz'] + tz = obj['tz'] # reverse tz conversion if tz is not None: result = result.tz_localize('UTC').tz_convert(tz) return result - elif typ in (u'interval_index', 'interval_array'): - return globals()[obj[u'klass']].from_arrays(obj[u'left'], - obj[u'right'], - obj[u'closed'], - name=obj[u'name']) - elif typ == u'category': - from_codes = globals()[obj[u'klass']].from_codes - return from_codes(codes=obj[u'codes'], - categories=obj[u'categories'], - ordered=obj[u'ordered']) - - elif typ == u'interval': - return Interval(obj[u'left'], obj[u'right'], obj[u'closed']) - elif typ == u'series': - dtype = dtype_for(obj[u'dtype']) + elif typ in ('interval_index', 'interval_array'): + return globals()[obj['klass']].from_arrays(obj['left'], + obj['right'], + obj['closed'], + name=obj['name']) + elif typ == 'category': + from_codes = globals()[obj['klass']].from_codes + return from_codes(codes=obj['codes'], + categories=obj['categories'], + ordered=obj['ordered']) + + elif typ == 'interval': + return Interval(obj['left'], obj['right'], obj['closed']) + elif typ == 'series': + dtype = dtype_for(obj['dtype']) pd_dtype = pandas_dtype(dtype) - index = obj[u'index'] - result = Series(unconvert(obj[u'data'], dtype, obj[u'compress']), + index = obj['index'] + result = Series(unconvert(obj['data'], dtype, obj['compress']), index=index, dtype=pd_dtype, - name=obj[u'name']) + name=obj['name']) return result - elif typ == u'block_manager': - axes = obj[u'axes'] + elif typ == 'block_manager': + axes = obj['axes'] def create_block(b): values = _safe_reshape(unconvert( - b[u'values'], dtype_for(b[u'dtype']), - b[u'compress']), b[u'shape']) + b['values'], dtype_for(b['dtype']), + b['compress']), b['shape']) # locs handles duplicate column names, and should be used instead # of items; see GH 9618 - if u'locs' in b: - placement = b[u'locs'] + if 'locs' in b: + placement = b['locs'] else: - placement = axes[0].get_indexer(b[u'items']) + placement = axes[0].get_indexer(b['items']) - if is_datetime64tz_dtype(b[u'dtype']): + if is_datetime64tz_dtype(b['dtype']): assert isinstance(values, np.ndarray), type(values) assert values.dtype == 'M8[ns]', values.dtype - values = DatetimeArray(values, dtype=b[u'dtype']) + values = DatetimeArray(values, dtype=b['dtype']) return make_block(values=values, - klass=getattr(internals, b[u'klass']), + klass=getattr(internals, b['klass']), placement=placement, - dtype=b[u'dtype']) - - blocks = [create_block(b) for b in obj[u'blocks']] - return globals()[obj[u'klass']](BlockManager(blocks, axes)) - elif typ == u'datetime': - return parse(obj[u'data']) - elif typ == u'datetime64': - return np.datetime64(parse(obj[u'data'])) - elif typ == u'date': - return parse(obj[u'data']).date() - elif typ == u'timedelta': - return timedelta(*obj[u'data']) - elif typ == u'timedelta64': - return np.timedelta64(int(obj[u'data'])) + dtype=b['dtype']) + + blocks = [create_block(b) for b in obj['blocks']] + return globals()[obj['klass']](BlockManager(blocks, axes)) + elif typ == 'datetime': + return parse(obj['data']) + elif typ == 'datetime64': + return np.datetime64(parse(obj['data'])) + elif typ == 'date': + return parse(obj['data']).date() + elif typ == 'timedelta': + return timedelta(*obj['data']) + elif typ == 'timedelta64': + return np.timedelta64(int(obj['data'])) # elif typ == 'sparse_series': # dtype = dtype_for(obj['dtype']) # return SparseSeries( @@ -690,25 +690,25 @@ def create_block(b): # obj['data'], items=obj['items'], # default_fill_value=obj['default_fill_value'], # default_kind=obj['default_kind']) - elif typ == u'block_index': - return globals()[obj[u'klass']](obj[u'length'], obj[u'blocs'], - obj[u'blengths']) - elif typ == u'int_index': - return globals()[obj[u'klass']](obj[u'length'], obj[u'indices']) - elif typ == u'ndarray': - return unconvert(obj[u'data'], np.typeDict[obj[u'dtype']], - obj.get(u'compress')).reshape(obj[u'shape']) - elif typ == u'np_scalar': - if obj.get(u'sub_typ') == u'np_complex': - return c2f(obj[u'real'], obj[u'imag'], obj[u'dtype']) + elif typ == 'block_index': + return globals()[obj['klass']](obj['length'], obj['blocs'], + obj['blengths']) + elif typ == 'int_index': + return globals()[obj['klass']](obj['length'], obj['indices']) + elif typ == 'ndarray': + return unconvert(obj['data'], np.typeDict[obj['dtype']], + obj.get('compress')).reshape(obj['shape']) + elif typ == 'np_scalar': + if obj.get('sub_typ') == 'np_complex': + return c2f(obj['real'], obj['imag'], obj['dtype']) else: - dtype = dtype_for(obj[u'dtype']) + dtype = dtype_for(obj['dtype']) try: - return dtype(obj[u'data']) + return dtype(obj['data']) except (ValueError, TypeError): - return dtype.type(obj[u'data']) - elif typ == u'np_complex': - return complex(obj[u'real'] + u'+' + obj[u'imag'] + u'j') + return dtype.type(obj['data']) + elif typ == 'np_complex': + return complex(obj['real'] + '+' + obj['imag'] + 'j') elif isinstance(obj, (dict, list, set)): return obj else: diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index b0c0727c638be..fadb9a5c6c7cb 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -148,10 +148,10 @@ class DuplicateWarning(Warning): # formats _FORMAT_MAP = { - u'f': 'fixed', - u'fixed': 'fixed', - u't': 'table', - u'table': 'table', + 'f': 'fixed', + 'fixed': 'fixed', + 't': 'table', + 'table': 'table', } format_deprecate_doc = """ @@ -166,31 +166,31 @@ class DuplicateWarning(Warning): # map object types _TYPE_MAP = { - Series: u'series', - SparseSeries: u'sparse_series', - DataFrame: u'frame', - SparseDataFrame: u'sparse_frame', + Series: 'series', + SparseSeries: 'sparse_series', + DataFrame: 'frame', + SparseDataFrame: 'sparse_frame', } # storer class map _STORER_MAP = { - u'Series': 'LegacySeriesFixed', - u'DataFrame': 'LegacyFrameFixed', - u'DataMatrix': 'LegacyFrameFixed', - u'series': 'SeriesFixed', - u'sparse_series': 'SparseSeriesFixed', - u'frame': 'FrameFixed', - u'sparse_frame': 'SparseFrameFixed', + 'Series': 'LegacySeriesFixed', + 'DataFrame': 'LegacyFrameFixed', + 'DataMatrix': 'LegacyFrameFixed', + 'series': 'SeriesFixed', + 'sparse_series': 'SparseSeriesFixed', + 'frame': 'FrameFixed', + 'sparse_frame': 'SparseFrameFixed', } # table class map _TABLE_MAP = { - u'generic_table': 'GenericTable', - u'appendable_series': 'AppendableSeriesTable', - u'appendable_multiseries': 'AppendableMultiSeriesTable', - u'appendable_frame': 'AppendableFrameTable', - u'appendable_multiframe': 'AppendableMultiFrameTable', - u'worm': 'WORMTable', + 'generic_table': 'GenericTable', + 'appendable_series': 'AppendableSeriesTable', + 'appendable_multiseries': 'AppendableMultiSeriesTable', + 'appendable_frame': 'AppendableFrameTable', + 'appendable_multiframe': 'AppendableMultiFrameTable', + 'worm': 'WORMTable', } # axes map @@ -1091,7 +1091,7 @@ def groups(self): (getattr(g._v_attrs, 'pandas_type', None) or getattr(g, 'table', None) or (isinstance(g, _table_mod.table.Table) and - g._v_name != u'table'))) + g._v_name != 'table'))) ] def walk(self, where="/"): @@ -1286,8 +1286,8 @@ def error(t): _tables() if (getattr(group, 'table', None) or isinstance(group, _table_mod.table.Table)): - pt = u'frame_table' - tt = u'generic_table' + pt = 'frame_table' + tt = 'generic_table' else: raise TypeError( "cannot create a storer if the object is not existing " @@ -1301,10 +1301,10 @@ def error(t): # we are actually a table if format == 'table': - pt += u'_table' + pt += '_table' # a storer node - if u'table' not in pt: + if 'table' not in pt: try: return globals()[_STORER_MAP[pt]](self, group, **kwargs) except KeyError: @@ -1316,33 +1316,33 @@ def error(t): # if we are a writer, determine the tt if value is not None: - if pt == u'series_table': + if pt == 'series_table': index = getattr(value, 'index', None) if index is not None: if index.nlevels == 1: - tt = u'appendable_series' + tt = 'appendable_series' elif index.nlevels > 1: - tt = u'appendable_multiseries' - elif pt == u'frame_table': + tt = 'appendable_multiseries' + elif pt == 'frame_table': index = getattr(value, 'index', None) if index is not None: if index.nlevels == 1: - tt = u'appendable_frame' + tt = 'appendable_frame' elif index.nlevels > 1: - tt = u'appendable_multiframe' - elif pt == u'wide_table': - tt = u'appendable_panel' - elif pt == u'ndim_table': - tt = u'appendable_ndim' + tt = 'appendable_multiframe' + elif pt == 'wide_table': + tt = 'appendable_panel' + elif pt == 'ndim_table': + tt = 'appendable_ndim' else: # distiguish between a frame/table - tt = u'legacy_panel' + tt = 'legacy_panel' try: fields = group.table._v_attrs.fields - if len(fields) == 1 and fields[0] == u'value': - tt = u'legacy_frame' + if len(fields) == 1 and fields[0] == 'value': + tt = 'legacy_frame' except IndexError: pass @@ -1677,7 +1677,7 @@ def maybe_set_size(self, min_itemsize=None): """ maybe set a string col itemsize: min_itemsize can be an integer or a dict with this columns name with an integer size """ - if _ensure_decoded(self.kind) == u'string': + if _ensure_decoded(self.kind) == 'string': if isinstance(min_itemsize, dict): min_itemsize = min_itemsize.get(self.name) @@ -1704,7 +1704,7 @@ def validate_col(self, itemsize=None): """ validate this column: return the compared against itemsize """ # validate this column for string truncation (or reset to the max size) - if _ensure_decoded(self.kind) == u'string': + if _ensure_decoded(self.kind) == 'string': c = self.col if c is not None: if itemsize is None: @@ -1865,9 +1865,9 @@ def __init__(self, values=None, kind=None, typ=None, super(DataCol, self).__init__(values=values, kind=kind, typ=typ, cname=cname, **kwargs) self.dtype = None - self.dtype_attr = u'{name}_dtype'.format(name=self.name) + self.dtype_attr = '{name}_dtype'.format(name=self.name) self.meta = meta - self.meta_attr = u'{name}_meta'.format(name=self.name) + self.meta_attr = '{name}_meta'.format(name=self.name) self.set_data(data) self.set_metadata(metadata) @@ -1915,19 +1915,19 @@ def set_kind(self): if self.dtype is not None: dtype = _ensure_decoded(self.dtype) - if dtype.startswith(u'string') or dtype.startswith(u'bytes'): + if dtype.startswith('string') or dtype.startswith('bytes'): self.kind = 'string' - elif dtype.startswith(u'float'): + elif dtype.startswith('float'): self.kind = 'float' - elif dtype.startswith(u'complex'): + elif dtype.startswith('complex'): self.kind = 'complex' - elif dtype.startswith(u'int') or dtype.startswith(u'uint'): + elif dtype.startswith('int') or dtype.startswith('uint'): self.kind = 'integer' - elif dtype.startswith(u'date'): + elif dtype.startswith('date'): self.kind = 'datetime' - elif dtype.startswith(u'timedelta'): + elif dtype.startswith('timedelta'): self.kind = 'timedelta' - elif dtype.startswith(u'bool'): + elif dtype.startswith('bool'): self.kind = 'bool' else: raise AssertionError( @@ -2172,14 +2172,14 @@ def convert(self, values, nan_rep, encoding, errors): dtype = _ensure_decoded(self.dtype) # reverse converts - if dtype == u'datetime64': + if dtype == 'datetime64': # recreate with tz if indicated self.data = _set_tz(self.data, self.tz, coerce=True) - elif dtype == u'timedelta64': + elif dtype == 'timedelta64': self.data = np.asarray(self.data, dtype='m8[ns]') - elif dtype == u'date': + elif dtype == 'date': try: self.data = np.asarray( [date.fromordinal(v) for v in self.data], dtype=object) @@ -2187,12 +2187,12 @@ def convert(self, values, nan_rep, encoding, errors): self.data = np.asarray( [date.fromtimestamp(v) for v in self.data], dtype=object) - elif dtype == u'datetime': + elif dtype == 'datetime': self.data = np.asarray( [datetime.fromtimestamp(v) for v in self.data], dtype=object) - elif meta == u'category': + elif meta == 'category': # we have a categorical categories = self.metadata @@ -2225,7 +2225,7 @@ def convert(self, values, nan_rep, encoding, errors): self.data = self.data.astype('O', copy=False) # convert nans / decode - if _ensure_decoded(self.kind) == u'string': + if _ensure_decoded(self.kind) == 'string': self.data = _unconvert_string_array( self.data, nan_rep=nan_rep, encoding=encoding, errors=errors) @@ -2537,12 +2537,12 @@ def read_array(self, key, start=None, stop=None): else: ret = node[start:stop] - if dtype == u'datetime64': + if dtype == 'datetime64': # reconstruct a timezone if indicated ret = _set_tz(ret, getattr(attrs, 'tz', None), coerce=True) - elif dtype == u'timedelta64': + elif dtype == 'timedelta64': ret = np.asarray(ret, dtype='m8[ns]') if transposed: @@ -2554,13 +2554,13 @@ def read_index(self, key, **kwargs): variety = _ensure_decoded( getattr(self.attrs, '{key}_variety'.format(key=key))) - if variety == u'multi': + if variety == 'multi': return self.read_multi_index(key, **kwargs) - elif variety == u'block': + elif variety == 'block': return self.read_block_index(key, **kwargs) - elif variety == u'sparseint': + elif variety == 'sparseint': return self.read_sparse_intindex(key, **kwargs) - elif variety == u'regular': + elif variety == 'regular': _, index = self.read_index_node(getattr(self.group, key), **kwargs) return index else: # pragma: no cover @@ -2681,13 +2681,13 @@ def read_index_node(self, node, start=None, stop=None): factory = self._get_index_factory(index_class) kwargs = {} - if u'freq' in node._v_attrs: + if 'freq' in node._v_attrs: kwargs['freq'] = node._v_attrs['freq'] - if u'tz' in node._v_attrs: + if 'tz' in node._v_attrs: kwargs['tz'] = node._v_attrs['tz'] - if kind in (u'date', u'datetime'): + if kind in ('date', 'datetime'): index = factory(_unconvert_index(data, kind, encoding=self.encoding, errors=self.errors), @@ -2833,7 +2833,7 @@ def read(self, **kwargs): class SeriesFixed(GenericFixed): - pandas_kind = u'series' + pandas_kind = 'series' attributes = ['name'] @property @@ -2870,7 +2870,7 @@ def validate_read(self, kwargs): class SparseSeriesFixed(SparseFixed): - pandas_kind = u'sparse_series' + pandas_kind = 'sparse_series' attributes = ['name', 'fill_value', 'kind'] def read(self, **kwargs): @@ -2879,7 +2879,7 @@ def read(self, **kwargs): sp_values = self.read_array('sp_values') sp_index = self.read_index('sp_index') return SparseSeries(sp_values, index=index, sparse_index=sp_index, - kind=self.kind or u'block', + kind=self.kind or 'block', fill_value=self.fill_value, name=self.name) @@ -2894,7 +2894,7 @@ def write(self, obj, **kwargs): class SparseFrameFixed(SparseFixed): - pandas_kind = u'sparse_frame' + pandas_kind = 'sparse_frame' attributes = ['default_kind', 'default_fill_value'] def read(self, **kwargs): @@ -3013,7 +3013,7 @@ def write(self, obj, **kwargs): class FrameFixed(BlockManagerFixed): - pandas_kind = u'frame' + pandas_kind = 'frame' obj_type = DataFrame @@ -3042,7 +3042,7 @@ class Table(Fixed): metadata : the names of the metadata columns """ - pandas_kind = u'wide_table' + pandas_kind = 'wide_table' table_type = None levels = 1 is_table = True @@ -3154,7 +3154,7 @@ def nrows_expected(self): @property def is_exists(self): """ has this table been created """ - return u'table' in self.group + return 'table' in self.group @property def storable(self): @@ -3837,7 +3837,7 @@ class WORMTable(Table): table. writing is a one-time operation the data are stored in a format that allows for searching the data on disk """ - table_type = u'worm' + table_type = 'worm' def read(self, **kwargs): """ read the indices and the indexing array, calculate offset rows and @@ -3865,7 +3865,7 @@ class LegacyTable(Table): IndexCol(name='column', axis=2, pos=1, index_kind='columns_kind'), DataCol(name='fields', cname='values', kind_attr='fields', pos=2) ] - table_type = u'legacy' + table_type = 'legacy' ndim = 3 def write(self, **kwargs): @@ -3885,7 +3885,7 @@ def read(self, where=None, columns=None, **kwargs): class AppendableTable(LegacyTable): """ support the new appendable table formats """ _indexables = None - table_type = u'appendable' + table_type = 'appendable' def write(self, obj, axes=None, append=False, complib=None, complevel=None, fletcher32=None, min_itemsize=None, @@ -4116,8 +4116,8 @@ def delete(self, where=None, start=None, stop=None, **kwargs): class AppendableFrameTable(AppendableTable): """ support the new appendable table formats """ - pandas_kind = u'frame_table' - table_type = u'appendable_frame' + pandas_kind = 'frame_table' + table_type = 'appendable_frame' ndim = 2 obj_type = DataFrame @@ -4182,8 +4182,8 @@ def read(self, where=None, columns=None, **kwargs): class AppendableSeriesTable(AppendableFrameTable): """ support the new appendable table formats """ - pandas_kind = u'series_table' - table_type = u'appendable_series' + pandas_kind = 'series_table' + table_type = 'appendable_series' ndim = 2 obj_type = Series storage_obj_type = DataFrame @@ -4225,8 +4225,8 @@ def read(self, columns=None, **kwargs): class AppendableMultiSeriesTable(AppendableSeriesTable): """ support the new appendable table formats """ - pandas_kind = u'series_table' - table_type = u'appendable_multiseries' + pandas_kind = 'series_table' + table_type = 'appendable_multiseries' def write(self, obj, **kwargs): """ we are going to write this as a frame table """ @@ -4240,8 +4240,8 @@ def write(self, obj, **kwargs): class GenericTable(AppendableFrameTable): """ a table that read/writes the generic pytables table format """ - pandas_kind = u'frame_table' - table_type = u'generic_table' + pandas_kind = 'frame_table' + table_type = 'generic_table' ndim = 2 obj_type = DataFrame @@ -4290,14 +4290,14 @@ def write(self, **kwargs): class AppendableMultiFrameTable(AppendableFrameTable): """ a frame with a multi-index """ - table_type = u'appendable_multiframe' + table_type = 'appendable_multiframe' obj_type = DataFrame ndim = 2 _re_levels = re.compile(r"^level_\d+$") @property def table_type_short(self): - return u'appendable_multi' + return 'appendable_multi' def write(self, obj, data_columns=None, **kwargs): if data_columns is None: @@ -4480,26 +4480,26 @@ def _convert_index(index, encoding=None, errors='strict', format_type=None): def _unconvert_index(data, kind, encoding=None, errors='strict'): kind = _ensure_decoded(kind) - if kind == u'datetime64': + if kind == 'datetime64': index = DatetimeIndex(data) - elif kind == u'timedelta64': + elif kind == 'timedelta64': index = TimedeltaIndex(data) - elif kind == u'datetime': + elif kind == 'datetime': index = np.asarray([datetime.fromtimestamp(v) for v in data], dtype=object) - elif kind == u'date': + elif kind == 'date': try: index = np.asarray( [date.fromordinal(v) for v in data], dtype=object) except (ValueError): index = np.asarray( [date.fromtimestamp(v) for v in data], dtype=object) - elif kind in (u'integer', u'float'): + elif kind in ('integer', 'float'): index = np.asarray(data) - elif kind in (u'string'): + elif kind in ('string'): index = _unconvert_string_array(data, nan_rep=None, encoding=encoding, errors=errors) - elif kind == u'object': + elif kind == 'object': index = np.asarray(data[0]) else: # pragma: no cover raise ValueError('unrecognized index type {kind}'.format(kind=kind)) @@ -4509,11 +4509,11 @@ def _unconvert_index(data, kind, encoding=None, errors='strict'): def _unconvert_index_legacy(data, kind, legacy=False, encoding=None, errors='strict'): kind = _ensure_decoded(kind) - if kind == u'datetime': + if kind == 'datetime': index = to_datetime(data) - elif kind in (u'integer'): + elif kind in ('integer'): index = np.asarray(data, dtype=object) - elif kind in (u'string'): + elif kind in ('string'): index = _unconvert_string_array(data, nan_rep=None, encoding=encoding, errors=errors) else: # pragma: no cover @@ -4618,7 +4618,7 @@ def _get_converter(kind, encoding, errors): def _need_convert(kind): kind = _ensure_decoded(kind) - if kind in (u'datetime', u'datetime64', u'string'): + if kind in ('datetime', 'datetime64', 'string'): return True return False diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 3540daab13b54..09d86ef4afcfe 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -1318,7 +1318,7 @@ def insert_statement(self): col_names = ','.join(bracketed_names) wildcards = ','.join([wld] * len(names)) insert_statement = \ - u'INSERT INTO {table} ({columns}) VALUES ({wld})'.format( + 'INSERT INTO {table} ({columns}) VALUES ({wld})'.format( table=escape(self.name), columns=col_names, wld=wildcards) return insert_statement diff --git a/pandas/tests/arrays/categorical/test_repr.py b/pandas/tests/arrays/categorical/test_repr.py index e9ff0f1a7d591..98e255692acc8 100644 --- a/pandas/tests/arrays/categorical/test_repr.py +++ b/pandas/tests/arrays/categorical/test_repr.py @@ -58,15 +58,15 @@ def test_print_none_width(self): def test_unicode_print(self): c = Categorical(['aaaaa', 'bb', 'cccc'] * 20) - expected = u"""\ + expected = """\ [aaaaa, bb, cccc, aaaaa, bb, ..., bb, cccc, aaaaa, bb, cccc] Length: 60 Categories (3, object): [aaaaa, bb, cccc]""" assert repr(c) == expected - c = Categorical([u'ああああ', u'いいいいい', u'ううううううう'] * 20) - expected = u"""\ + c = Categorical(['ああああ', 'いいいいい', 'ううううううう'] * 20) + expected = """\ [ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう] Length: 60 Categories (3, object): [ああああ, いいいいい, ううううううう]""" # noqa @@ -77,8 +77,8 @@ def test_unicode_print(self): # the repr width with option_context('display.unicode.east_asian_width', True): - c = Categorical([u'ああああ', u'いいいいい', u'ううううううう'] * 20) - expected = u"""[ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう] + c = Categorical(['ああああ', 'いいいいい', 'ううううううう'] * 20) + expected = """[ああああ, いいいいい, ううううううう, ああああ, いいいいい, ..., いいいいい, ううううううう, ああああ, いいいいい, ううううううう] Length: 60 Categories (3, object): [ああああ, いいいいい, ううううううう]""" # noqa diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index b534e38d6304e..9731b9b63d028 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -639,11 +639,11 @@ def test_string(self): pass def test_unicode(self): - arr = [u'a', np.nan, u'c'] + arr = ['a', np.nan, 'c'] result = lib.infer_dtype(arr, skipna=False) assert result == 'mixed' - arr = [u'a', np.nan, u'c'] + arr = ['a', np.nan, 'c'] result = lib.infer_dtype(arr, skipna=True) expected = 'string' assert result == expected diff --git a/pandas/tests/frame/test_axis_select_reindex.py b/pandas/tests/frame/test_axis_select_reindex.py index 560fb8dfaaf4f..f0ffa7f310a1f 100644 --- a/pandas/tests/frame/test_axis_select_reindex.py +++ b/pandas/tests/frame/test_axis_select_reindex.py @@ -872,18 +872,18 @@ def test_filter_regex_search(self): assert_frame_equal(result, exp) @pytest.mark.parametrize('name,expected', [ - ('a', DataFrame({u'a': [1, 2]})), - (u'a', DataFrame({u'a': [1, 2]})), - (u'あ', DataFrame({u'あ': [3, 4]})) + ('a', DataFrame({'a': [1, 2]})), + ('a', DataFrame({'a': [1, 2]})), + ('あ', DataFrame({'あ': [3, 4]})) ]) def test_filter_unicode(self, name, expected): # GH13101 - df = DataFrame({u'a': [1, 2], u'あ': [3, 4]}) + df = DataFrame({'a': [1, 2], 'あ': [3, 4]}) assert_frame_equal(df.filter(like=name), expected) assert_frame_equal(df.filter(regex=name), expected) - @pytest.mark.parametrize('name', ['a', u'a']) + @pytest.mark.parametrize('name', ['a', 'a']) def test_filter_bytestring(self, name): # GH13101 df = DataFrame({b'a': [1, 2], b'b': [3, 4]}) diff --git a/pandas/tests/frame/test_convert_to.py b/pandas/tests/frame/test_convert_to.py index db60fbf0f8563..98302730b4be0 100644 --- a/pandas/tests/frame/test_convert_to.py +++ b/pandas/tests/frame/test_convert_to.py @@ -154,7 +154,7 @@ def test_to_records_index_name(self): def test_to_records_with_unicode_index(self): # GH13172 # unicode_literals conflict with to_records - result = DataFrame([{u'a': u'x', u'b': 'y'}]).set_index(u'a') \ + result = DataFrame([{'a': 'x', 'b': 'y'}]).set_index('a') \ .to_records() expected = np.rec.array([('x', 'y')], dtype=[('a', 'O'), ('b', 'O')]) tm.assert_almost_equal(result, expected) @@ -163,13 +163,13 @@ def test_to_records_with_unicode_column_names(self): # xref issue: https://github.com/numpy/numpy/issues/2407 # Issue #11879. to_records used to raise an exception when used # with column names containing non-ascii characters in Python 2 - result = DataFrame(data={u"accented_name_é": [1.0]}).to_records() + result = DataFrame(data={"accented_name_é": [1.0]}).to_records() # Note that numpy allows for unicode field names but dtypes need # to be specified using dictionary instead of list of tuples. expected = np.rec.array( [(0, 1.0)], - dtype={"names": ["index", u"accented_name_é"], + dtype={"names": ["index", "accented_name_é"], "formats": ['=i8', '=f8']} ) tm.assert_almost_equal(result, expected) @@ -314,8 +314,8 @@ def test_to_records_dtype(self, kwargs, expected): columns=MultiIndex.from_tuples([("a", "d"), ("b", "e"), ("c", "f")])), dict(column_dtypes={0: "