diff --git a/LICENSES/SIX b/LICENSES/SIX new file mode 100644 index 0000000000000..6fd669af222d3 --- /dev/null +++ b/LICENSES/SIX @@ -0,0 +1,21 @@ +six license (substantial portions used in the python 3 compatibility module) +=========================================================================== +Copyright (c) 2010-2013 Benjamin Peterson + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +# +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +# +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/bench/alignment.py b/bench/alignment.py index bf5d5604d913e..bc3134f597ee0 100644 --- a/bench/alignment.py +++ b/bench/alignment.py @@ -1,4 +1,5 @@ # Setup +from pandas.compat import range, lrange import numpy as np import pandas import la @@ -6,8 +7,8 @@ K = 50 arr1 = np.random.randn(N, K) arr2 = np.random.randn(N, K) -idx1 = range(N) -idx2 = range(K) +idx1 = lrange(N) +idx2 = lrange(K) # pandas dma1 = pandas.DataFrame(arr1, idx1, idx2) diff --git a/bench/bench_get_put_value.py b/bench/bench_get_put_value.py index 419e8f603e5ae..427e0b1b10a22 100644 --- a/bench/bench_get_put_value.py +++ b/bench/bench_get_put_value.py @@ -1,12 +1,13 @@ from pandas import * from pandas.util.testing import rands +from pandas.compat import range N = 1000 K = 50 def _random_index(howmany): - return Index([rands(10) for _ in xrange(howmany)]) + return Index([rands(10) for _ in range(howmany)]) df = DataFrame(np.random.randn(N, K), index=_random_index(N), columns=_random_index(K)) diff --git a/bench/bench_groupby.py b/bench/bench_groupby.py index 807d3449e1fcb..a86e8ed623ef7 100644 --- a/bench/bench_groupby.py +++ b/bench/bench_groupby.py @@ -1,5 +1,6 @@ from pandas import * from pandas.util.testing import rands +from pandas.compat import range import string import random @@ -7,7 +8,7 @@ k = 20000 n = 10 -foo = np.tile(np.array([rands(10) for _ in xrange(k)], dtype='O'), n) +foo = np.tile(np.array([rands(10) for _ in range(k)], dtype='O'), n) foo2 = list(foo) random.shuffle(foo) random.shuffle(foo2) diff --git a/bench/bench_join_panel.py b/bench/bench_join_panel.py index 0e484fb496036..f3c3f8ba15f70 100644 --- a/bench/bench_join_panel.py +++ b/bench/bench_join_panel.py @@ -35,7 +35,7 @@ def reindex_on_axis(panels, axis, axis_reindex): # concatenate values try: values = np.concatenate([p.values for p in panels], axis=1) - except (Exception), detail: + except Exception as detail: raise Exception("cannot append values that dont' match dimensions! -> [%s] %s" % (','.join(["%s" % p for p in panels]), str(detail))) # pm('append - create_panel') diff --git a/bench/bench_khash_dict.py b/bench/bench_khash_dict.py index fce3288e3294d..054fc36131b65 100644 --- a/bench/bench_khash_dict.py +++ b/bench/bench_khash_dict.py @@ -1,12 +1,14 @@ """ Some comparisons of khash.h to Python dict """ +from __future__ import print_function import numpy as np import os from vbench.api import Benchmark from pandas.util.testing import rands +from pandas.compat import range import pandas._tseries as lib import pandas._sandbox as sbx import time @@ -22,7 +24,7 @@ def object_test_data(n): def string_test_data(n): - return np.array([rands(10) for _ in xrange(n)], dtype='O') + return np.array([rands(10) for _ in range(n)], dtype='O') def int_test_data(n): @@ -50,7 +52,7 @@ def f(): def _timeit(f, iterations=10): start = time.time() - for _ in xrange(iterations): + for _ in range(iterations): foo = f() elapsed = time.time() - start return elapsed @@ -73,8 +75,8 @@ def lookup_khash(values): def leak(values): - for _ in xrange(100): - print proc.get_memory_info() + for _ in range(100): + print(proc.get_memory_info()) table = lookup_khash(values) # table.destroy() diff --git a/bench/bench_merge.py b/bench/bench_merge.py index 11f8c29a2897b..330dba7b9af69 100644 --- a/bench/bench_merge.py +++ b/bench/bench_merge.py @@ -1,13 +1,16 @@ +import random +import gc +import time from pandas import * +from pandas.compat import range, lrange, StringIO from pandas.util.testing import rands -import random N = 10000 ngroups = 10 def get_test_data(ngroups=100, n=N): - unique_groups = range(ngroups) + unique_groups = lrange(ngroups) arr = np.asarray(np.tile(unique_groups, n / ngroups), dtype=object) if len(arr) < n: @@ -28,14 +31,10 @@ def get_test_data(ngroups=100, n=N): # 'value' : np.random.randn(N // 10)}) # result = merge.merge(df, df2, on='key2') -from collections import defaultdict -import gc -import time -from pandas.util.testing import rands N = 10000 -indices = np.array([rands(10) for _ in xrange(N)], dtype='O') -indices2 = np.array([rands(10) for _ in xrange(N)], dtype='O') +indices = np.array([rands(10) for _ in range(N)], dtype='O') +indices2 = np.array([rands(10) for _ in range(N)], dtype='O') key = np.tile(indices[:8000], 10) key2 = np.tile(indices2[:8000], 10) @@ -55,7 +54,7 @@ def get_test_data(ngroups=100, n=N): f = lambda: merge(left, right, how=join_method, sort=sort) gc.disable() start = time.time() - for _ in xrange(niter): + for _ in range(niter): f() elapsed = (time.time() - start) / niter gc.enable() @@ -65,7 +64,6 @@ def get_test_data(ngroups=100, n=N): # R results -from StringIO import StringIO # many to one r_results = read_table(StringIO(""" base::merge plyr data.table inner 0.2475 0.1183 0.1100 @@ -93,7 +91,6 @@ def get_test_data(ngroups=100, n=N): # many to many -from StringIO import StringIO # many to one r_results = read_table(StringIO("""base::merge plyr data.table inner 0.4610 0.1276 0.1269 diff --git a/bench/bench_merge_sqlite.py b/bench/bench_merge_sqlite.py index d13b296698b97..3ad4b810119c3 100644 --- a/bench/bench_merge_sqlite.py +++ b/bench/bench_merge_sqlite.py @@ -4,12 +4,13 @@ import time from pandas import DataFrame from pandas.util.testing import rands +from pandas.compat import range, zip import random N = 10000 -indices = np.array([rands(10) for _ in xrange(N)], dtype='O') -indices2 = np.array([rands(10) for _ in xrange(N)], dtype='O') +indices = np.array([rands(10) for _ in range(N)], dtype='O') +indices2 = np.array([rands(10) for _ in range(N)], dtype='O') key = np.tile(indices[:8000], 10) key2 = np.tile(indices2[:8000], 10) @@ -67,7 +68,7 @@ g = lambda: conn.execute(sql) # list fetches results gc.disable() start = time.time() - # for _ in xrange(niter): + # for _ in range(niter): g() elapsed = (time.time() - start) / niter gc.enable() diff --git a/bench/bench_sparse.py b/bench/bench_sparse.py index 600b3d05c5f78..7dc2db05cfe20 100644 --- a/bench/bench_sparse.py +++ b/bench/bench_sparse.py @@ -3,6 +3,7 @@ from pandas import * import pandas.core.sparse as spm +import pandas.compat as compat reload(spm) from pandas.core.sparse import * @@ -41,7 +42,7 @@ def new_data_like(sdf): new_data = {} - for col, series in sdf.iteritems(): + for col, series in compat.iteritems(sdf): new_data[col] = SparseSeries(np.random.randn(len(series.sp_values)), index=sdf.index, sparse_index=series.sp_index, diff --git a/bench/bench_take_indexing.py b/bench/bench_take_indexing.py index 3ddd647a35bf6..5fb584bcfe45f 100644 --- a/bench/bench_take_indexing.py +++ b/bench/bench_take_indexing.py @@ -1,3 +1,4 @@ +from __future__ import print_function import numpy as np from pandas import * @@ -5,6 +6,7 @@ from pandas import DataFrame import timeit +from pandas.compat import zip setup = """ from pandas import Series @@ -35,7 +37,7 @@ def _timeit(stmt, size, k=5, iters=1000): return timer.timeit(n) / n for sz, its in zip(sizes, iters): - print sz + print(sz) fancy_2d.append(_timeit('arr[indexer]', sz, iters=its)) take_2d.append(_timeit('arr.take(indexer, axis=0)', sz, iters=its)) cython_2d.append(_timeit('lib.take_axis0(arr, indexer)', sz, iters=its)) @@ -44,7 +46,7 @@ def _timeit(stmt, size, k=5, iters=1000): 'take': take_2d, 'cython': cython_2d}) -print df +print(df) from pandas.rpy.common import r r('mat <- matrix(rnorm(50000), nrow=10000, ncol=5)') diff --git a/bench/bench_unique.py b/bench/bench_unique.py index 392d3b326bf09..87bd2f2df586c 100644 --- a/bench/bench_unique.py +++ b/bench/bench_unique.py @@ -1,5 +1,7 @@ +from __future__ import print_function from pandas import * from pandas.util.testing import rands +from pandas.compat import range, zip import pandas._tseries as lib import numpy as np import matplotlib.pyplot as plt @@ -7,8 +9,8 @@ N = 50000 K = 10000 -groups = np.array([rands(10) for _ in xrange(K)], dtype='O') -groups2 = np.array([rands(10) for _ in xrange(K)], dtype='O') +groups = np.array([rands(10) for _ in range(K)], dtype='O') +groups2 = np.array([rands(10) for _ in range(K)], dtype='O') labels = np.tile(groups, N // K) labels2 = np.tile(groups2, N // K) @@ -20,7 +22,7 @@ def timeit(f, niter): import time gc.disable() start = time.time() - for _ in xrange(niter): + for _ in range(niter): f() elapsed = (time.time() - start) / niter gc.enable() @@ -75,9 +77,8 @@ def algo3_sort(): def f(): - from itertools import izip # groupby sum - for k, v in izip(x, data): + for k, v in zip(x, data): try: counts[k] += v except KeyError: @@ -128,7 +129,7 @@ def algo4(): # N = 10000000 # K = 500000 -# groups = np.array([rands(10) for _ in xrange(K)], dtype='O') +# groups = np.array([rands(10) for _ in range(K)], dtype='O') # labels = np.tile(groups, N // K) data = np.random.randn(N) @@ -232,11 +233,11 @@ def hash_bench(): khash_hint = [] khash_nohint = [] for K in Ks: - print K - # groups = np.array([rands(10) for _ in xrange(K)]) + print(K) + # groups = np.array([rands(10) for _ in range(K)]) # labels = np.tile(groups, N // K).astype('O') - groups = np.random.randint(0, 100000000000L, size=K) + groups = np.random.randint(0, long(100000000000), size=K) labels = np.tile(groups, N // K) dict_based.append(timeit(lambda: dict_unique(labels, K), 20)) khash_nohint.append(timeit(lambda: khash_unique_int64(labels, K), 20)) @@ -245,11 +246,11 @@ def hash_bench(): # memory, hard to get # dict_based.append(np.mean([dict_unique(labels, K, memory=True) - # for _ in xrange(10)])) + # for _ in range(10)])) # khash_nohint.append(np.mean([khash_unique(labels, K, memory=True) - # for _ in xrange(10)])) + # for _ in range(10)])) # khash_hint.append(np.mean([khash_unique(labels, K, size_hint=True, memory=True) - # for _ in xrange(10)])) + # for _ in range(10)])) # dict_based_sort.append(timeit(lambda: dict_unique(labels, K, # sort=True), 10)) diff --git a/bench/better_unique.py b/bench/better_unique.py index 982dd88e879da..e03a4f433ce66 100644 --- a/bench/better_unique.py +++ b/bench/better_unique.py @@ -1,9 +1,12 @@ +from __future__ import print_function from pandas import DataFrame +from pandas.compat import range, zip import timeit setup = """ from pandas import Series import pandas._tseries as _tseries +from pandas.compat import range import random import numpy as np @@ -48,11 +51,11 @@ def get_test_data(ngroups=100, n=tot): numpy_timer = timeit.Timer(stmt='np.unique(arr)', setup=setup % sz) - print n + print(n) numpy_result = numpy_timer.timeit(number=n) / n wes_result = wes_timer.timeit(number=n) / n - print 'Groups: %d, NumPy: %s, Wes: %s' % (sz, numpy_result, wes_result) + print('Groups: %d, NumPy: %s, Wes: %s' % (sz, numpy_result, wes_result)) wes.append(wes_result) numpy.append(numpy_result) diff --git a/bench/io_roundtrip.py b/bench/io_roundtrip.py index a9711dbb83b8a..e389481d1aabc 100644 --- a/bench/io_roundtrip.py +++ b/bench/io_roundtrip.py @@ -1,16 +1,18 @@ +from __future__ import print_function import time import os import numpy as np import la import pandas +from pandas.compat import range from pandas import datetools, DateRange def timeit(f, iterations): start = time.clock() - for i in xrange(iterations): + for i in range(iterations): f() return time.clock() - start @@ -54,11 +56,11 @@ def rountrip_archive(N, K=50, iterations=10): pandas_f = lambda: pandas_roundtrip(filename_pandas, dma, dma) pandas_time = timeit(pandas_f, iterations) / iterations - print 'pandas (HDF5) %7.4f seconds' % pandas_time + print('pandas (HDF5) %7.4f seconds' % pandas_time) pickle_f = lambda: pandas_roundtrip(filename_pandas, dma, dma) pickle_time = timeit(pickle_f, iterations) / iterations - print 'pandas (pickle) %7.4f seconds' % pickle_time + print('pandas (pickle) %7.4f seconds' % pickle_time) # print 'Numpy (npz) %7.4f seconds' % numpy_time # print 'larry (HDF5) %7.4f seconds' % larry_time diff --git a/bench/serialize.py b/bench/serialize.py index 63f885a4efa88..b0edd6a5752d2 100644 --- a/bench/serialize.py +++ b/bench/serialize.py @@ -1,3 +1,5 @@ +from __future__ import print_function +from pandas.compat import range, lrange import time import os import numpy as np @@ -9,7 +11,7 @@ def timeit(f, iterations): start = time.clock() - for i in xrange(iterations): + for i in range(iterations): f() return time.clock() - start @@ -20,7 +22,7 @@ def roundtrip_archive(N, iterations=10): # Create data arr = np.random.randn(N, N) lar = la.larry(arr) - dma = pandas.DataFrame(arr, range(N), range(N)) + dma = pandas.DataFrame(arr, lrange(N), lrange(N)) # filenames filename_numpy = '/Users/wesm/tmp/numpy.npz' @@ -51,9 +53,9 @@ def roundtrip_archive(N, iterations=10): pandas_f = lambda: pandas_roundtrip(filename_pandas, dma, dma) pandas_time = timeit(pandas_f, iterations) / iterations - print 'Numpy (npz) %7.4f seconds' % numpy_time - print 'larry (HDF5) %7.4f seconds' % larry_time - print 'pandas (HDF5) %7.4f seconds' % pandas_time + print('Numpy (npz) %7.4f seconds' % numpy_time) + print('larry (HDF5) %7.4f seconds' % larry_time) + print('pandas (HDF5) %7.4f seconds' % pandas_time) def numpy_roundtrip(filename, arr1, arr2): diff --git a/bench/test.py b/bench/test.py index 2ac91468d7b73..2339deab313a1 100644 --- a/bench/test.py +++ b/bench/test.py @@ -2,6 +2,7 @@ import itertools import collections import scipy.ndimage as ndi +from pandas.compat import zip, range N = 10000 diff --git a/ci/requirements-2.6.txt b/ci/requirements-2.6.txt index ac77449b2df02..5038b9e2b6552 100644 --- a/ci/requirements-2.6.txt +++ b/ci/requirements-2.6.txt @@ -1,6 +1,6 @@ numpy==1.6.1 cython==0.19.1 -python-dateutil==2.1 +python-dateutil==1.5 pytz==2013b http://www.crummy.com/software/BeautifulSoup/bs4/download/4.2/beautifulsoup4-4.2.0.tar.gz html5lib==1.0b2 diff --git a/doc/make.py b/doc/make.py index adf34920b9ede..dbce5aaa7a1b4 100755 --- a/doc/make.py +++ b/doc/make.py @@ -14,6 +14,7 @@ python make.py clean python make.py html """ +from __future__ import print_function import glob import os @@ -60,7 +61,7 @@ def upload_prev(ver, doc_root='./'): remote_dir = '/usr/share/nginx/pandas/pandas-docs/version/%s/' % ver cmd = 'cd %s; rsync -avz . pandas@pandas.pydata.org:%s -essh' cmd = cmd % (local_dir, remote_dir) - print cmd + print(cmd) if os.system(cmd): raise SystemExit( 'Upload to %s from %s failed' % (remote_dir, local_dir)) @@ -154,7 +155,7 @@ def auto_dev_build(debug=False): upload_dev_pdf() if not debug: sendmail(step) - except (Exception, SystemExit), inst: + except (Exception, SystemExit) as inst: msg = str(inst) + '\n' sendmail(step, '[ERROR] ' + msg) @@ -258,7 +259,7 @@ def _get_config(): func = funcd.get(arg) if func is None: raise SystemExit('Do not know how to handle %s; valid args are %s' % ( - arg, funcd.keys())) + arg, list(funcd.keys()))) func() else: small_docs = False diff --git a/doc/plots/stats/moment_plots.py b/doc/plots/stats/moment_plots.py index 9e3a902592c6b..86ec1d10de520 100644 --- a/doc/plots/stats/moment_plots.py +++ b/doc/plots/stats/moment_plots.py @@ -1,3 +1,4 @@ +from pandas.compat import range import numpy as np import matplotlib.pyplot as plt diff --git a/doc/source/conf.py b/doc/source/conf.py index 99d1703b9ca34..99da77dd5d570 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -12,6 +12,7 @@ import sys import os +from pandas.compat import u # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the @@ -63,8 +64,8 @@ master_doc = 'index' # General information about the project. -project = u'pandas' -copyright = u'2008-2012, the pandas development team' +project = u('pandas') +copyright = u('2008-2012, the pandas development team') # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -211,8 +212,8 @@ # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ ('index', 'pandas.tex', - u'pandas: powerful Python data analysis toolkit', - u'Wes McKinney\n\& PyData Development Team', 'manual'), + u('pandas: powerful Python data analysis toolkit'), + u('Wes McKinney\n\& PyData Development Team'), 'manual'), ] # The name of an image file (relative to this directory) to place at the top of diff --git a/doc/source/release.rst b/doc/source/release.rst index fdcd0863d9f59..90d5b1600b4eb 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -47,6 +47,22 @@ pandas 0.13 **API Changes** + - ``pandas`` now is Python 2/3 compatible without the need for 2to3 thanks to + @jtratner. As a result, pandas now uses iterators more extensively. This + also led to the introduction of substantive parts of the Benjamin + Peterson's ``six`` library into compat. (:issue:`4384`, :issue:`4375`, + :issue:`4372`) + - ``pandas.util.compat`` and ``pandas.util.py3compat`` have been merged into + ``pandas.compat``. ``pandas.compat`` now includes many functions allowing + 2/3 compatibility. It contains both list and iterator versions of range, + filter, map and zip, plus other necessary elements for Python 3 + compatibility. ``lmap``, ``lzip``, ``lrange`` and ``lfilter`` all produce + lists instead of iterators, for compatibility with ``numpy``, subscripting + and ``pandas`` constructors.(:issue:`4384`, :issue:`4375`, :issue:`4372`) + - deprecated ``iterkv``, which will be removed in a future release (was just + an alias of iteritems used to get around ``2to3``'s changes). + (:issue:`4384`, :issue:`4375`, :issue:`4372`) + **Experimental Features** **Bug Fixes** diff --git a/doc/source/v0.13.0.txt b/doc/source/v0.13.0.txt index 1264f649ace21..9f2f7c870f849 100644 --- a/doc/source/v0.13.0.txt +++ b/doc/source/v0.13.0.txt @@ -12,8 +12,23 @@ API changes - ``read_excel`` now supports an integer in its ``sheetname`` argument giving the index of the sheet to read in (:issue:`4301`). - Text parser now treats anything that reads like inf ("inf", "Inf", "-Inf", - "iNf", etc.) to infinity. (:issue:`4220`, :issue:`4219`), affecting + "iNf", etc.) as infinity. (:issue:`4220`, :issue:`4219`), affecting ``read_table``, ``read_csv``, etc. + - ``pandas`` now is Python 2/3 compatible without the need for 2to3 thanks to + @jtratner. As a result, pandas now uses iterators more extensively. This + also led to the introduction of substantive parts of the Benjamin + Peterson's ``six`` library into compat. (:issue:`4384`, :issue:`4375`, + :issue:`4372`) + - ``pandas.util.compat`` and ``pandas.util.py3compat`` have been merged into + ``pandas.compat``. ``pandas.compat`` now includes many functions allowing + 2/3 compatibility. It contains both list and iterator versions of range, + filter, map and zip, plus other necessary elements for Python 3 + compatibility. ``lmap``, ``lzip``, ``lrange`` and ``lfilter`` all produce + lists instead of iterators, for compatibility with ``numpy``, subscripting + and ``pandas`` constructors.(:issue:`4384`, :issue:`4375`, :issue:`4372`) + - deprecated ``iterkv``, which will be removed in a future release (was just + an alias of iteritems used to get around ``2to3``'s changes). + (:issue:`4384`, :issue:`4375`, :issue:`4372`) Enhancements ~~~~~~~~~~~~ diff --git a/doc/sphinxext/__init__.py b/doc/sphinxext/__init__.py index ae9073bc4115f..68dbbb00a7cfb 100755 --- a/doc/sphinxext/__init__.py +++ b/doc/sphinxext/__init__.py @@ -1 +1 @@ -from numpydoc import setup +from .numpydoc import setup diff --git a/doc/sphinxext/comment_eater.py b/doc/sphinxext/comment_eater.py index e11eea9021073..1c6d46c5aed6c 100755 --- a/doc/sphinxext/comment_eater.py +++ b/doc/sphinxext/comment_eater.py @@ -1,10 +1,10 @@ -from cStringIO import StringIO +from pandas.compat import cStringIO import compiler import inspect import textwrap import tokenize -from compiler_unparse import unparse +from .compiler_unparse import unparse class Comment(object): @@ -95,7 +95,7 @@ def new_noncomment(self, start_lineno, end_lineno): def new_comment(self, string, start, end, line): """ Possibly add a new comment. - + Only adds a new comment if this comment is the only thing on the line. Otherwise, it extends the noncomment block. """ diff --git a/doc/sphinxext/compiler_unparse.py b/doc/sphinxext/compiler_unparse.py index ffcf51b353a10..46b7257c455f7 100755 --- a/doc/sphinxext/compiler_unparse.py +++ b/doc/sphinxext/compiler_unparse.py @@ -12,11 +12,11 @@ """ import sys -import cStringIO +from pandas.compat import cStringIO as StringIO from compiler.ast import Const, Name, Tuple, Div, Mul, Sub, Add def unparse(ast, single_line_functions=False): - s = cStringIO.StringIO() + s = StringIO() UnparseCompilerAst(ast, s, single_line_functions) return s.getvalue().lstrip() @@ -101,13 +101,13 @@ def _And(self, t): if i != len(t.nodes)-1: self._write(") and (") self._write(")") - + def _AssAttr(self, t): """ Handle assigning an attribute of an object """ self._dispatch(t.expr) self._write('.'+t.attrname) - + def _Assign(self, t): """ Expression Assignment such as "a = 1". @@ -145,36 +145,36 @@ def _AssTuple(self, t): def _AugAssign(self, t): """ +=,-=,*=,/=,**=, etc. operations """ - + self._fill() self._dispatch(t.node) self._write(' '+t.op+' ') self._dispatch(t.expr) if not self._do_indent: self._write(';') - + def _Bitand(self, t): """ Bit and operation. """ - + for i, node in enumerate(t.nodes): self._write("(") self._dispatch(node) self._write(")") if i != len(t.nodes)-1: self._write(" & ") - + def _Bitor(self, t): """ Bit or operation """ - + for i, node in enumerate(t.nodes): self._write("(") self._dispatch(node) self._write(")") if i != len(t.nodes)-1: self._write(" | ") - + def _CallFunc(self, t): """ Function call. """ @@ -249,7 +249,7 @@ def _From(self, t): self._write(name) if asname is not None: self._write(" as "+asname) - + def _Function(self, t): """ Handle function definitions """ @@ -282,12 +282,12 @@ def _Getattr(self, t): self._write(')') else: self._dispatch(t.expr) - + self._write('.'+t.attrname) - + def _If(self, t): self._fill() - + for i, (compare,code) in enumerate(t.tests): if i == 0: self._write("if ") @@ -307,7 +307,7 @@ def _If(self, t): self._dispatch(t.else_) self._leave() self._write("\n") - + def _IfExp(self, t): self._dispatch(t.then) self._write(" if ") @@ -322,7 +322,7 @@ def _Import(self, t): """ Handle "import xyz.foo". """ self._fill("import ") - + for i, (name,asname) in enumerate(t.names): if i != 0: self._write(", ") @@ -336,7 +336,7 @@ def _Keyword(self, t): self._write(t.name) self._write("=") self._dispatch(t.expr) - + def _List(self, t): self._write("[") for i,node in enumerate(t.nodes): @@ -358,12 +358,12 @@ def _Name(self, t): def _NoneType(self, t): self._write("None") - + def _Not(self, t): self._write('not (') self._dispatch(t.expr) self._write(')') - + def _Or(self, t): self._write(" (") for i, node in enumerate(t.nodes): @@ -371,7 +371,7 @@ def _Or(self, t): if i != len(t.nodes)-1: self._write(") or (") self._write(")") - + def _Pass(self, t): self._write("pass\n") @@ -452,7 +452,7 @@ def _TryExcept(self, t): self._enter() self._dispatch(handler[2]) self._leave() - + if t.else_: self._fill("else") self._enter() @@ -477,14 +477,14 @@ def _Tuple(self, t): self._dispatch(last_element) self._write(")") - + def _UnaryAdd(self, t): self._write("+") self._dispatch(t.expr) - + def _UnarySub(self, t): self._write("-") - self._dispatch(t.expr) + self._dispatch(t.expr) def _With(self, t): self._fill('with ') @@ -496,7 +496,7 @@ def _With(self, t): self._dispatch(t.body) self._leave() self._write('\n') - + def _int(self, t): self._write(repr(t)) @@ -533,7 +533,7 @@ def _float(self, t): def _str(self, t): self._write(repr(t)) - + def _tuple(self, t): self._write(str(t)) diff --git a/doc/sphinxext/docscrape.py b/doc/sphinxext/docscrape.py index 63fec42adaa41..3c2c303e85ccd 100755 --- a/doc/sphinxext/docscrape.py +++ b/doc/sphinxext/docscrape.py @@ -1,13 +1,14 @@ """Extract reference documentation from the NumPy source tree. """ +from __future__ import print_function import inspect import textwrap import re import pydoc -from StringIO import StringIO from warnings import warn +from pandas.compat import StringIO, callable class Reader(object): """A line-based string reader. @@ -113,7 +114,7 @@ def __getitem__(self,key): return self._parsed_data[key] def __setitem__(self,key,val): - if not self._parsed_data.has_key(key): + if key not in self._parsed_data: warn("Unknown section %s" % key) else: self._parsed_data[key] = val @@ -370,7 +371,7 @@ def _str_index(self): idx = self['index'] out = [] out += ['.. index:: %s' % idx.get('default','')] - for section, references in idx.iteritems(): + for section, references in compat.iteritems(idx): if section == 'default': continue out += [' :%s: %s' % (section, ', '.join(references))] @@ -427,7 +428,7 @@ def __init__(self, func, role='func', doc=None, config={}): argspec = inspect.formatargspec(*argspec) argspec = argspec.replace('*','\*') signature = '%s%s' % (func_name, argspec) - except TypeError, e: + except TypeError as e: signature = '%s()' % func_name self['Signature'] = signature @@ -449,8 +450,8 @@ def __str__(self): 'meth': 'method'} if self._role: - if not roles.has_key(self._role): - print "Warning: invalid role %s" % self._role + if self._role not in roles: + print("Warning: invalid role %s" % self._role) out += '.. %s:: %s\n \n\n' % (roles.get(self._role,''), func_name) diff --git a/doc/sphinxext/docscrape_sphinx.py b/doc/sphinxext/docscrape_sphinx.py index 9f4350d4601ad..650a2d8f33dd0 100755 --- a/doc/sphinxext/docscrape_sphinx.py +++ b/doc/sphinxext/docscrape_sphinx.py @@ -1,6 +1,7 @@ import re, inspect, textwrap, pydoc import sphinx -from docscrape import NumpyDocString, FunctionDoc, ClassDoc +from .docscrape import NumpyDocString, FunctionDoc, ClassDoc +from pandas.compat import callable class SphinxDocString(NumpyDocString): def __init__(self, docstring, config={}): @@ -127,7 +128,7 @@ def _str_index(self): return out out += ['.. index:: %s' % idx.get('default','')] - for section, references in idx.iteritems(): + for section, references in compat.iteritems(idx): if section == 'default': continue elif section == 'refguide': diff --git a/doc/sphinxext/ipython_directive.py b/doc/sphinxext/ipython_directive.py index 0c28e397a0005..948d60c3760e9 100644 --- a/doc/sphinxext/ipython_directive.py +++ b/doc/sphinxext/ipython_directive.py @@ -51,14 +51,15 @@ - VĂĄclavĹ milauer : Prompt generalizations. - Skipper Seabold, refactoring, cleanups, pure python addition """ +from __future__ import print_function #----------------------------------------------------------------------------- # Imports #----------------------------------------------------------------------------- # Stdlib +from pandas.compat import zip, range, map, lmap, u, cStringIO as StringIO import ast -import cStringIO import os import re import sys @@ -114,7 +115,7 @@ def block_parser(part, rgxin, rgxout, fmtin, fmtout): N = len(lines) i = 0 decorator = None - while 1: + while True: if i==N: # nothing left to parse -- the last line @@ -186,7 +187,7 @@ class EmbeddedSphinxShell(object): def __init__(self): - self.cout = cStringIO.StringIO() + self.cout = StringIO() # Create config object for IPython config = Config() @@ -299,7 +300,7 @@ def process_input(self, data, input_prompt, lineno): def _remove_first_space_if_any(line): return line[1:] if line.startswith(' ') else line - input_lines = map(_remove_first_space_if_any, input.split('\n')) + input_lines = lmap(_remove_first_space_if_any, input.split('\n')) self.datacontent = data @@ -489,7 +490,7 @@ def process_pure_python(self, content): multiline = True cont_len = len(str(lineno)) + 2 line_to_process = line.strip('\\') - output.extend([u"%s %s" % (fmtin%lineno,line)]) + output.extend([u("%s %s") % (fmtin%lineno,line)]) continue else: # no we're still not line_to_process = line.strip('\\') @@ -497,12 +498,12 @@ def process_pure_python(self, content): line_to_process += line.strip('\\') if line_stripped.endswith('\\'): # and we still are continuation = '.' * cont_len - output.extend([(u' %s: '+line_stripped) % continuation]) + output.extend([(u(' %s: ')+line_stripped) % continuation]) continue # else go ahead and run this multiline then carry on # get output of line - self.process_input_line(unicode(line_to_process.strip()), + self.process_input_line(compat.text_type(line_to_process.strip()), store_history=False) out_line = self.cout.getvalue() self.clear_cout() @@ -516,15 +517,15 @@ def process_pure_python(self, content): # line numbers don't actually matter, they're replaced later if not multiline: - in_line = u"%s %s" % (fmtin%lineno,line) + in_line = u("%s %s") % (fmtin%lineno,line) output.extend([in_line]) else: - output.extend([(u' %s: '+line_stripped) % continuation]) + output.extend([(u(' %s: ')+line_stripped) % continuation]) multiline = False if len(out_line): output.extend([out_line]) - output.extend([u'']) + output.extend([u('')]) return output @@ -566,19 +567,19 @@ def process_pure_python2(self, content): output.extend([line]) continue - continuation = u' %s:'% ''.join(['.']*(len(str(ct))+2)) + continuation = u(' %s:')% ''.join(['.']*(len(str(ct))+2)) if not multiline: - modified = u"%s %s" % (fmtin % ct, line_stripped) + modified = u("%s %s") % (fmtin % ct, line_stripped) output.append(modified) ct += 1 try: ast.parse(line_stripped) - output.append(u'') + output.append(u('')) except Exception: multiline = True multiline_start = lineno else: - modified = u'%s %s' % (continuation, line) + modified = u('%s %s') % (continuation, line) output.append(modified) try: @@ -590,7 +591,7 @@ def process_pure_python2(self, content): continue - output.extend([continuation, u'']) + output.extend([continuation, u('')]) multiline = False except Exception: pass @@ -732,7 +733,7 @@ def run(self): #print lines if len(lines)>2: if debug: - print '\n'.join(lines) + print('\n'.join(lines)) else: #NOTE: this raises some errors, what's it for? #print 'INSERTING %d lines'%len(lines) self.state_machine.insert_input( @@ -910,4 +911,4 @@ def test(): if not os.path.isdir('_static'): os.mkdir('_static') test() - print 'All OK? Check figures in _static/' + print('All OK? Check figures in _static/') diff --git a/doc/sphinxext/numpydoc.py b/doc/sphinxext/numpydoc.py index 43c67336b5c03..6f79703380a3d 100755 --- a/doc/sphinxext/numpydoc.py +++ b/doc/sphinxext/numpydoc.py @@ -22,7 +22,8 @@ raise RuntimeError("Sphinx 1.0.1 or newer is required") import os, re, pydoc -from docscrape_sphinx import get_doc_object, SphinxDocString +from .docscrape_sphinx import get_doc_object, SphinxDocString +from pandas.compat import u, callable from sphinx.util.compat import Directive import inspect @@ -34,28 +35,28 @@ def mangle_docstrings(app, what, name, obj, options, lines, if what == 'module': # Strip top title - title_re = re.compile(ur'^\s*[#*=]{4,}\n[a-z0-9 -]+\n[#*=]{4,}\s*', + title_re = re.compile(u(r'^\s*[#*=]{4,}\n[a-z0-9 -]+\n[#*=]{4,}\s*'), re.I|re.S) - lines[:] = title_re.sub(u'', u"\n".join(lines)).split(u"\n") + lines[:] = title_re.sub(u(''), u("\n").join(lines)).split(u("\n")) else: - doc = get_doc_object(obj, what, u"\n".join(lines), config=cfg) - lines[:] = unicode(doc).split(u"\n") + doc = get_doc_object(obj, what, u("\n").join(lines), config=cfg) + lines[:] = compat.text_type(doc).split(u("\n")) if app.config.numpydoc_edit_link and hasattr(obj, '__name__') and \ obj.__name__: if hasattr(obj, '__module__'): - v = dict(full_name=u"%s.%s" % (obj.__module__, obj.__name__)) + v = dict(full_name=u("%s.%s") % (obj.__module__, obj.__name__)) else: v = dict(full_name=obj.__name__) - lines += [u'', u'.. htmlonly::', ''] - lines += [u' %s' % x for x in + lines += [u(''), u('.. htmlonly::'), ''] + lines += [u(' %s') % x for x in (app.config.numpydoc_edit_link % v).split("\n")] # replace reference numbers so that there are no duplicates references = [] for line in lines: line = line.strip() - m = re.match(ur'^.. \[([a-z0-9_.-])\]', line, re.I) + m = re.match(u(r'^.. \[([a-z0-9_.-])\]'), line, re.I) if m: references.append(m.group(1)) @@ -64,14 +65,14 @@ def mangle_docstrings(app, what, name, obj, options, lines, if references: for i, line in enumerate(lines): for r in references: - if re.match(ur'^\d+$', r): - new_r = u"R%d" % (reference_offset[0] + int(r)) + if re.match(u(r'^\d+$'), r): + new_r = u("R%d") % (reference_offset[0] + int(r)) else: - new_r = u"%s%d" % (r, reference_offset[0]) - lines[i] = lines[i].replace(u'[%s]_' % r, - u'[%s]_' % new_r) - lines[i] = lines[i].replace(u'.. [%s]' % r, - u'.. [%s]' % new_r) + new_r = u("%s%d") % (r, reference_offset[0]) + lines[i] = lines[i].replace(u('[%s]_') % r, + u('[%s]_') % new_r) + lines[i] = lines[i].replace(u('.. [%s]') % r, + u('.. [%s]') % new_r) reference_offset[0] += len(references) @@ -87,8 +88,8 @@ def mangle_signature(app, what, name, obj, options, sig, retann): doc = SphinxDocString(pydoc.getdoc(obj)) if doc['Signature']: - sig = re.sub(u"^[^(]*", u"", doc['Signature']) - return sig, u'' + sig = re.sub(u("^[^(]*"), u(""), doc['Signature']) + return sig, u('') def setup(app, get_doc_object_=get_doc_object): global get_doc_object diff --git a/doc/sphinxext/phantom_import.py b/doc/sphinxext/phantom_import.py index c77eeb544e78b..a92eb96e589c8 100755 --- a/doc/sphinxext/phantom_import.py +++ b/doc/sphinxext/phantom_import.py @@ -14,6 +14,7 @@ .. [1] http://code.google.com/p/pydocweb """ +from __future__ import print_function import imp, sys, compiler, types, os, inspect, re def setup(app): @@ -23,7 +24,7 @@ def setup(app): def initialize(app): fn = app.config.phantom_import_file if (fn and os.path.isfile(fn)): - print "[numpydoc] Phantom importing modules from", fn, "..." + print("[numpydoc] Phantom importing modules from", fn, "...") import_phantom_module(fn) #------------------------------------------------------------------------------ @@ -129,7 +130,7 @@ def base_cmp(a, b): doc = "%s%s\n\n%s" % (funcname, argspec, doc) obj = lambda: 0 obj.__argspec_is_invalid_ = True - obj.func_name = funcname + obj.__name__ = funcname obj.__name__ = name obj.__doc__ = doc if inspect.isclass(object_cache[parent]): diff --git a/doc/sphinxext/plot_directive.py b/doc/sphinxext/plot_directive.py index cacd53dbc2699..b86c43249dbe8 100755 --- a/doc/sphinxext/plot_directive.py +++ b/doc/sphinxext/plot_directive.py @@ -75,7 +75,8 @@ """ -import sys, os, glob, shutil, imp, warnings, cStringIO, re, textwrap, traceback +from pandas.compat import range, cStringIO as StringIO, map +import sys, os, glob, shutil, imp, warnings, re, textwrap, traceback import sphinx import warnings @@ -257,7 +258,7 @@ def run(arguments, content, options, state_machine, state, lineno): # is it in doctest format? is_doctest = contains_doctest(code) - if options.has_key('format'): + if 'format' in options: if options['format'] == 'python': is_doctest = False else: @@ -291,7 +292,7 @@ def run(arguments, content, options, state_machine, state, lineno): results = makefig(code, source_file_name, build_dir, output_base, config) errors = [] - except PlotError, err: + except PlotError as err: reporter = state.memo.reporter sm = reporter.system_message( 2, "Exception occurred in plotting %s: %s" % (output_base, err), @@ -448,7 +449,7 @@ def run_code(code, code_path, ns=None): # Redirect stdout stdout = sys.stdout - sys.stdout = cStringIO.StringIO() + sys.stdout = StringIO() # Reset sys.argv old_sys_argv = sys.argv @@ -460,9 +461,9 @@ def run_code(code, code_path, ns=None): if ns is None: ns = {} if not ns: - exec setup.config.plot_pre_code in ns - exec code in ns - except (Exception, SystemExit), err: + exec(setup.config.plot_pre_code, ns) + exec(code, ns) + except (Exception, SystemExit) as err: raise PlotError(traceback.format_exc()) finally: os.chdir(pwd) @@ -524,7 +525,7 @@ def makefig(code, code_path, output_dir, output_base, config): all_exists = True for i, code_piece in enumerate(code_pieces): images = [] - for j in xrange(1000): + for j in range(1000): img = ImageFile('%s_%02d_%02d' % (output_base, i, j), output_dir) for format, dpi in formats: if out_of_date(code_path, img.filename(format)): @@ -570,7 +571,7 @@ def makefig(code, code_path, output_dir, output_base, config): try: figman.canvas.figure.savefig(img.filename(format), dpi=dpi, bbox_inches='tight') - except exceptions.BaseException, err: + except exceptions.BaseException as err: raise PlotError(traceback.format_exc()) img.formats.append(format) diff --git a/doc/sphinxext/tests/test_docscrape.py b/doc/sphinxext/tests/test_docscrape.py index 1d775e99e4f4f..ef2dfacc5b560 100755 --- a/doc/sphinxext/tests/test_docscrape.py +++ b/doc/sphinxext/tests/test_docscrape.py @@ -1,3 +1,4 @@ +from __future__ import print_function # -*- encoding:utf-8 -*- import sys, os @@ -6,6 +7,7 @@ from docscrape import NumpyDocString, FunctionDoc, ClassDoc from docscrape_sphinx import SphinxDocString, SphinxClassDoc from nose.tools import * +from pandas.compat import u doc_txt = '''\ numpy.multivariate_normal(mean, cov, shape=None) @@ -143,7 +145,7 @@ def test_examples(): def test_index(): assert_equal(doc['index']['default'], 'random') - print doc['index'] + print(doc['index']) assert_equal(len(doc['index']), 2) assert_equal(len(doc['index']['refguide']), 2) @@ -287,7 +289,7 @@ def test_sphinx_str(): The drawn samples, arranged according to `shape`. If the shape given is (m,n,...), then the shape of `out` is is (m,n,...,N). - + In other words, each entry ``out[i,j,...,:]`` is an N-dimensional value drawn from the distribution. @@ -296,12 +298,12 @@ def test_sphinx_str(): Certain warnings apply. .. seealso:: - + :obj:`some`, :obj:`other`, :obj:`funcs` - + :obj:`otherfunc` relationship - + .. rubric:: Notes Instead of specifying the full covariance matrix, popular @@ -348,7 +350,7 @@ def test_sphinx_str(): [True, True] """) - + doc2 = NumpyDocString(""" Returns array of indices of the maximum values of along the given axis. @@ -491,7 +493,7 @@ def test_unicode(): äää """) - assert doc['Summary'][0] == u'öäöäöäöäöåååå'.encode('utf-8') + assert doc['Summary'][0] == u('öäöäöäöäöåååå').encode('utf-8') def test_plot_examples(): cfg = dict(use_plots=True) @@ -509,7 +511,7 @@ def test_plot_examples(): Examples -------- .. plot:: - + import matplotlib.pyplot as plt plt.plot([1,2,3],[4,5,6]) plt.show() diff --git a/doc/sphinxext/traitsdoc.py b/doc/sphinxext/traitsdoc.py index 0fcf2c1cd38c9..8ec57a607ffb9 100755 --- a/doc/sphinxext/traitsdoc.py +++ b/doc/sphinxext/traitsdoc.py @@ -18,13 +18,14 @@ import os import pydoc -import docscrape -import docscrape_sphinx -from docscrape_sphinx import SphinxClassDoc, SphinxFunctionDoc, SphinxDocString +from pandas.compat import callable +from . import docscrape +from . import docscrape_sphinx +from .docscrape_sphinx import SphinxClassDoc, SphinxFunctionDoc, SphinxDocString -import numpydoc +from . import numpydoc -import comment_eater +from . import comment_eater class SphinxTraitsDoc(SphinxClassDoc): def __init__(self, cls, modulename='', func_doc=SphinxFunctionDoc): diff --git a/examples/finance.py b/examples/finance.py index 24aa337a84024..91ac57f67d91d 100644 --- a/examples/finance.py +++ b/examples/finance.py @@ -3,6 +3,7 @@ """ from datetime import datetime +from pandas.compat import zip import matplotlib.finance as fin import numpy as np diff --git a/ez_setup.py b/ez_setup.py index de65d3c1f0375..6f63b856f06c9 100644 --- a/ez_setup.py +++ b/ez_setup.py @@ -13,6 +13,7 @@ This file can also be run as a script to install or upgrade setuptools. """ +from __future__ import print_function import sys DEFAULT_VERSION = "0.6c11" DEFAULT_URL = "http://pypi.python.org/packages/%s/s/setuptools/" % sys.version[ @@ -75,10 +76,10 @@ def _validate_md5(egg_name, data): if egg_name in md5_data: digest = md5(data).hexdigest() if digest != md5_data[egg_name]: - print >>sys.stderr, ( + print(( "md5 validation of %s failed! (Possible download problem?)" % egg_name - ) + ), file=sys.stderr) sys.exit(2) return data @@ -113,14 +114,14 @@ def do_download(): try: pkg_resources.require("setuptools>=" + version) return - except pkg_resources.VersionConflict, e: + except pkg_resources.VersionConflict as e: if was_imported: - print >>sys.stderr, ( + print(( "The required version of setuptools (>=%s) is not available, and\n" "can't be installed while this script is running. Please install\n" " a more recent version first, using 'easy_install -U setuptools'." "\n\n(Currently using %r)" - ) % (version, e.args[0]) + ) % (version, e.args[0]), file=sys.stderr) sys.exit(2) else: del pkg_resources, sys.modules['pkg_resources'] # reload ok @@ -199,10 +200,10 @@ def main(argv, version=DEFAULT_VERSION): os.unlink(egg) else: if setuptools.__version__ == '0.0.1': - print >>sys.stderr, ( + print(( "You have an obsolete version of setuptools installed. Please\n" "remove it from your system entirely before rerunning this script." - ) + ), file=sys.stderr) sys.exit(2) req = "setuptools>=" + version @@ -221,8 +222,8 @@ def main(argv, version=DEFAULT_VERSION): from setuptools.command.easy_install import main main(argv) else: - print "Setuptools version", version, "or greater has been installed." - print '(Run "ez_setup.py -U setuptools" to reinstall or upgrade.)' + print("Setuptools version", version, "or greater has been installed.") + print('(Run "ez_setup.py -U setuptools" to reinstall or upgrade.)') def update_md5(filenames): @@ -236,8 +237,7 @@ def update_md5(filenames): md5_data[base] = md5(f.read()).hexdigest() f.close() - data = [" %r: %r,\n" % it for it in md5_data.items()] - data.sort() + data = sorted([" %r: %r,\n" % it for it in md5_data.items()]) repl = "".join(data) import inspect @@ -248,7 +248,7 @@ def update_md5(filenames): match = re.search("\nmd5_data = {\n([^}]+)}", src) if not match: - print >>sys.stderr, "Internal error!" + print("Internal error!", file=sys.stderr) sys.exit(2) src = src[:match.start(1)] + repl + src[match.end(1):] diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index e69de29bb2d1d..eaf2928e4482c 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -0,0 +1,698 @@ +""" +compat +====== + +Cross-compatible functions for Python 2 and 3. + +Key items to import for 2/3 compatible code: +* iterators: range(), map(), zip(), filter(), reduce() +* lists: lrange(), lmap(), lzip(), lfilter() +* unicode: u() [u"" is a syntax error in Python 3.0-3.2] +* longs: long (int in Python 3) +* callable +* iterable method compatibility: iteritems, iterkeys, itervalues + * Uses the original method if available, otherwise uses items, keys, values. +* types: + * text_type: unicode in Python 2, str in Python 3 + * binary_type: str in Python 2, bythes in Python 3 + * string_types: basestring in Python 2, str in Python 3 +* bind_method: binds functions to classes + +Python 2.6 compatibility: +* OrderedDict +* Counter + +Other items: +* OrderedDefaultDict +""" +# pylint disable=W0611 +import functools +import itertools +from distutils.version import LooseVersion +from itertools import product +import sys +import types + +PY3 = (sys.version_info[0] >= 3) +# import iterator versions of these functions + +try: + import __builtin__ as builtins + # not writeable when instantiated with string, doesn't handle unicode well + from cStringIO import StringIO as cStringIO + # always writeable + from StringIO import StringIO + BytesIO = StringIO + import cPickle +except ImportError: + import builtins + from io import StringIO, BytesIO + cStringIO = StringIO + import pickle as cPickle + + +if PY3: + def isidentifier(s): + return s.isidentifier() + + def str_to_bytes(s, encoding='ascii'): + return s.encode(encoding) + + def bytes_to_str(b, encoding='utf-8'): + return b.decode(encoding) + + # have to explicitly put builtins into the namespace + range = range + map = map + zip = zip + filter = filter + reduce = functools.reduce + long = int + unichr = chr + + # list-producing versions of the major Python iterating functions + def lrange(*args, **kwargs): + return list(range(*args, **kwargs)) + + def lzip(*args, **kwargs): + return list(zip(*args, **kwargs)) + + def lmap(*args, **kwargs): + return list(map(*args, **kwargs)) + + def lfilter(*args, **kwargs): + return list(filter(*args, **kwargs)) +else: + # Python 2 + import re + _name_re = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*$") + + def isidentifier(s, dotted=False): + return bool(_name_re.match(s)) + + def str_to_bytes(s, encoding='ascii'): + return s + + def bytes_to_str(b, encoding='ascii'): + return b + + range = xrange + zip = itertools.izip + filter = itertools.ifilter + map = itertools.imap + reduce = reduce + long = long + unichr = unichr + + # Python 2-builtin ranges produce lists + lrange = builtins.range + lzip = builtins.zip + lmap = builtins.map + lfilter = builtins.filter + + +def iteritems(obj, **kwargs): + """replacement for six's iteritems for Python2/3 compat + uses 'iteritems' if available and otherwise uses 'items'. + + Passes kwargs to method.""" + func = getattr(obj, "iteritems", None) + if not func: + func = obj.items + return func(**kwargs) + + +def iterkeys(obj, **kwargs): + func = getattr(obj, "iterkeys", None) + if not func: + func = obj.keys + return func(**kwargs) + + +def itervalues(obj, **kwargs): + func = getattr(obj, "itervalues", None) + if not func: + func = obj.values + return func(**kwargs) + + +def bind_method(cls, name, func): + """Bind a method to class, python 2 and python 3 compatible. + + Parameters + ---------- + + cls : type + class to receive bound method + name : basestring + name of method on class instance + func : function + function to be bound as method + + + Returns + ------- + None + """ + # only python 2 has bound/unbound method issue + if not PY3: + setattr(cls, name, types.MethodType(func, None, cls)) + else: + setattr(cls, name, func) +# ---------------------------------------------------------------------------- +# functions largely based / taken from the six module + +# Much of the code in this module comes from Benjamin Peterson's six library. +# The license for this library can be found in LICENSES/SIX and the code can be +# found at https://bitbucket.org/gutworth/six + +if PY3: + string_types = str, + integer_types = int, + class_types = type, + text_type = str + binary_type = bytes + + def u(s): + return s +else: + string_types = basestring, + integer_types = (int, long) + class_types = (type, types.ClassType) + text_type = unicode + binary_type = str + + def u(s): + return unicode(s, "unicode_escape") + +try: + # callable reintroduced in later versions of Python + callable = callable +except NameError: + def callable(obj): + return any("__call__" in klass.__dict__ for klass in type(obj).__mro__) + +# ---------------------------------------------------------------------------- +# Python 2.6 compatibility shims +# + +# OrderedDict Shim from Raymond Hettinger, python core dev +# http://code.activestate.com/recipes/576693-ordered-dictionary-for-py24/ +# here to support versions before 2.6 +if not PY3: + # don't need this except in 2.6 + try: + from thread import get_ident as _get_ident + except ImportError: + from dummy_thread import get_ident as _get_ident + +try: + from _abcoll import KeysView, ValuesView, ItemsView +except ImportError: + pass + + +class _OrderedDict(dict): + + 'Dictionary that remembers insertion order' + # An inherited dict maps keys to values. + # The inherited dict provides __getitem__, __len__, __contains__, and get. + # The remaining methods are order-aware. + # Big-O running times for all methods are the same as for regular + # dictionaries. + + # The internal self.__map dictionary maps keys to links in a doubly linked + # list. The circular doubly linked list starts and ends with a sentinel + # element. The sentinel element never gets deleted (this simplifies the + # algorithm). Each link is stored as a list of length three: [PREV, NEXT, + # KEY]. + + def __init__(self, *args, **kwds): + '''Initialize an ordered dictionary. Signature is the same as for + regular dictionaries, but keyword arguments are not recommended + because their insertion order is arbitrary. + + ''' + if len(args) > 1: + raise TypeError('expected at most 1 arguments, got %d' % len(args)) + try: + self.__root + except AttributeError: + self.__root = root = [] # sentinel node + root[:] = [root, root, None] + self.__map = {} + self.__update(*args, **kwds) + + def __setitem__(self, key, value, dict_setitem=dict.__setitem__): + 'od.__setitem__(i, y) <==> od[i]=y' + # Setting a new item creates a new link which goes at the end of the + # linked list, and the inherited dictionary is updated with the new + # key/value pair. + if key not in self: + root = self.__root + last = root[0] + last[1] = root[0] = self.__map[key] = [last, root, key] + dict_setitem(self, key, value) + + def __delitem__(self, key, dict_delitem=dict.__delitem__): + 'od.__delitem__(y) <==> del od[y]' + # Deleting an existing item uses self.__map to find the link which is + # then removed by updating the links in the predecessor and successor + # nodes. + dict_delitem(self, key) + link_prev, link_next, key = self.__map.pop(key) + link_prev[1] = link_next + link_next[0] = link_prev + + def __iter__(self): + 'od.__iter__() <==> iter(od)' + root = self.__root + curr = root[1] + while curr is not root: + yield curr[2] + curr = curr[1] + + def __reversed__(self): + 'od.__reversed__() <==> reversed(od)' + root = self.__root + curr = root[0] + while curr is not root: + yield curr[2] + curr = curr[0] + + def clear(self): + 'od.clear() -> None. Remove all items from od.' + try: + for node in itervalues(self.__map): + del node[:] + root = self.__root + root[:] = [root, root, None] + self.__map.clear() + except AttributeError: + pass + dict.clear(self) + + def popitem(self, last=True): + '''od.popitem() -> (k, v), return and remove a (key, value) pair. + Pairs are returned in LIFO order if last is true or FIFO order if + false. + ''' + if not self: + raise KeyError('dictionary is empty') + root = self.__root + if last: + link = root[0] + link_prev = link[0] + link_prev[1] = root + root[0] = link_prev + else: + link = root[1] + link_next = link[1] + root[1] = link_next + link_next[0] = root + key = link[2] + del self.__map[key] + value = dict.pop(self, key) + return key, value + + # -- the following methods do not depend on the internal structure -- + + def keys(self): + 'od.keys() -> list of keys in od' + return list(self) + + def values(self): + 'od.values() -> list of values in od' + return [self[key] for key in self] + + def items(self): + 'od.items() -> list of (key, value) pairs in od' + return [(key, self[key]) for key in self] + + def iterkeys(self): + 'od.iterkeys() -> an iterator over the keys in od' + return iter(self) + + def itervalues(self): + 'od.itervalues -> an iterator over the values in od' + for k in self: + yield self[k] + + def iteritems(self): + 'od.iteritems -> an iterator over the (key, value) items in od' + for k in self: + yield (k, self[k]) + + def update(*args, **kwds): + '''od.update(E, **F) -> None. Update od from dict/iterable E and F. + + If E is a dict instance, does: for k in E: od[k] = E[k] + If E has a .keys() method, does: for k in E.keys(): od[k] = E[k] + Or if E is an iterable of items, does:for k, v in E: od[k] = v + In either case, this is followed by: for k, v in F.items(): od[k] = v + ''' + if len(args) > 2: + raise TypeError('update() takes at most 2 positional ' + 'arguments (%d given)' % (len(args),)) + elif not args: + raise TypeError('update() takes at least 1 argument (0 given)') + self = args[0] + # Make progressively weaker assumptions about "other" + other = () + if len(args) == 2: + other = args[1] + if isinstance(other, dict): + for key in other: + self[key] = other[key] + elif hasattr(other, 'keys'): + for key in other.keys(): + self[key] = other[key] + else: + for key, value in other: + self[key] = value + for key, value in kwds.items(): + self[key] = value + # let subclasses override update without breaking __init__ + __update = update + + __marker = object() + + def pop(self, key, default=__marker): + '''od.pop(k[,d]) -> v, remove specified key and return the\ + corresponding value. If key is not found, d is returned if given, + otherwise KeyError is raised. + ''' + if key in self: + result = self[key] + del self[key] + return result + if default is self.__marker: + raise KeyError(key) + return default + + def setdefault(self, key, default=None): + 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od' + if key in self: + return self[key] + self[key] = default + return default + + def __repr__(self, _repr_running={}): + 'od.__repr__() <==> repr(od)' + call_key = id(self), _get_ident() + if call_key in _repr_running: + return '...' + _repr_running[call_key] = 1 + try: + if not self: + return '%s()' % (self.__class__.__name__,) + return '%s(%r)' % (self.__class__.__name__, list(self.items())) + finally: + del _repr_running[call_key] + + def __reduce__(self): + 'Return state information for pickling' + items = [[k, self[k]] for k in self] + inst_dict = vars(self).copy() + for k in vars(OrderedDict()): + inst_dict.pop(k, None) + if inst_dict: + return (self.__class__, (items,), inst_dict) + return self.__class__, (items,) + + def copy(self): + 'od.copy() -> a shallow copy of od' + return self.__class__(self) + + @classmethod + def fromkeys(cls, iterable, value=None): + '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S and + values equal to v (which defaults to None). + ''' + d = cls() + for key in iterable: + d[key] = value + return d + + def __eq__(self, other): + '''od.__eq__(y) <==> od==y. Comparison to another OD is + order-sensitive while comparison to a regular mapping is + order-insensitive. + ''' + if isinstance(other, OrderedDict): + return (len(self) == len(other) and + list(self.items()) == list(other.items())) + return dict.__eq__(self, other) + + def __ne__(self, other): + return not self == other + + # -- the following methods are only used in Python 2.7 -- + + def viewkeys(self): + "od.viewkeys() -> a set-like object providing a view on od's keys" + return KeysView(self) + + def viewvalues(self): + "od.viewvalues() -> an object providing a view on od's values" + return ValuesView(self) + + def viewitems(self): + "od.viewitems() -> a set-like object providing a view on od's items" + return ItemsView(self) + + +# {{{ http://code.activestate.com/recipes/576611/ (r11) + +try: + from operator import itemgetter + from heapq import nlargest +except ImportError: + pass + + +class _Counter(dict): + + '''Dict subclass for counting hashable objects. Sometimes called a bag + or multiset. Elements are stored as dictionary keys and their counts + are stored as dictionary values. + + >>> Counter('zyzygy') + Counter({'y': 3, 'z': 2, 'g': 1}) + + ''' + + def __init__(self, iterable=None, **kwds): + '''Create a new, empty Counter object. And if given, count elements + from an input iterable. Or, initialize the count from another mapping + of elements to their counts. + + >>> c = Counter() # a new, empty counter + >>> c = Counter('gallahad') # a new counter from an iterable + >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping + >>> c = Counter(a=4, b=2) # a new counter from keyword args + + ''' + self.update(iterable, **kwds) + + def __missing__(self, key): + return 0 + + def most_common(self, n=None): + '''List the n most common elements and their counts from the most + common to the least. If n is None, then list all element counts. + + >>> Counter('abracadabra').most_common(3) + [('a', 5), ('r', 2), ('b', 2)] + + ''' + if n is None: + return sorted(iteritems(self), key=itemgetter(1), reverse=True) + return nlargest(n, iteritems(self), key=itemgetter(1)) + + def elements(self): + '''Iterator over elements repeating each as many times as its count. + + >>> c = Counter('ABCABC') + >>> sorted(c.elements()) + ['A', 'A', 'B', 'B', 'C', 'C'] + + If an element's count has been set to zero or is a negative number, + elements() will ignore it. + + ''' + for elem, count in iteritems(self): + for _ in range(count): + yield elem + + # Override dict methods where the meaning changes for Counter objects. + + @classmethod + def fromkeys(cls, iterable, v=None): + raise NotImplementedError( + 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.') + + def update(self, iterable=None, **kwds): + '''Like dict.update() but add counts instead of replacing them. + + Source can be an iterable, a dictionary, or another Counter instance. + + >>> c = Counter('which') + >>> c.update('witch') # add elements from another iterable + >>> d = Counter('watch') + >>> c.update(d) # add elements from another counter + >>> c['h'] # four 'h' in which, witch, and watch + 4 + + ''' + if iterable is not None: + if hasattr(iterable, 'iteritems'): + if self: + self_get = self.get + for elem, count in iteritems(iterable): + self[elem] = self_get(elem, 0) + count + else: + dict.update( + self, iterable) # fast path when counter is empty + else: + self_get = self.get + for elem in iterable: + self[elem] = self_get(elem, 0) + 1 + if kwds: + self.update(kwds) + + def copy(self): + 'Like dict.copy() but returns a Counter instance instead of a dict.' + return Counter(self) + + def __delitem__(self, elem): + '''Like dict.__delitem__() but does not raise KeyError for missing + values.''' + if elem in self: + dict.__delitem__(self, elem) + + def __repr__(self): + if not self: + return '%s()' % self.__class__.__name__ + items = ', '.join(map('%r: %r'.__mod__, self.most_common())) + return '%s({%s})' % (self.__class__.__name__, items) + + # Multiset-style mathematical operations discussed in: + # Knuth TAOCP Volume II section 4.6.3 exercise 19 + # and at http://en.wikipedia.org/wiki/Multiset + # + # Outputs guaranteed to only include positive counts. + # + # To strip negative and zero counts, add-in an empty counter: + # c += Counter() + + def __add__(self, other): + '''Add counts from two counters. + + >>> Counter('abbb') + Counter('bcc') + Counter({'b': 4, 'c': 2, 'a': 1}) + + + ''' + if not isinstance(other, Counter): + return NotImplemented + result = Counter() + for elem in set(self) | set(other): + newcount = self[elem] + other[elem] + if newcount > 0: + result[elem] = newcount + return result + + def __sub__(self, other): + ''' Subtract count, but keep only results with positive counts. + + >>> Counter('abbbc') - Counter('bccd') + Counter({'b': 2, 'a': 1}) + + ''' + if not isinstance(other, Counter): + return NotImplemented + result = Counter() + for elem in set(self) | set(other): + newcount = self[elem] - other[elem] + if newcount > 0: + result[elem] = newcount + return result + + def __or__(self, other): + '''Union is the maximum of value in either of the input counters. + + >>> Counter('abbb') | Counter('bcc') + Counter({'b': 3, 'c': 2, 'a': 1}) + + ''' + if not isinstance(other, Counter): + return NotImplemented + _max = max + result = Counter() + for elem in set(self) | set(other): + newcount = _max(self[elem], other[elem]) + if newcount > 0: + result[elem] = newcount + return result + + def __and__(self, other): + ''' Intersection is the minimum of corresponding counts. + + >>> Counter('abbb') & Counter('bcc') + Counter({'b': 1}) + + ''' + if not isinstance(other, Counter): + return NotImplemented + _min = min + result = Counter() + if len(self) < len(other): + self, other = other, self + for elem in filter(self.__contains__, other): + newcount = _min(self[elem], other[elem]) + if newcount > 0: + result[elem] = newcount + return result + +if sys.version_info[:2] < (2, 7): + OrderedDict = _OrderedDict + Counter = _Counter +else: + from collections import OrderedDict, Counter + +# http://stackoverflow.com/questions/4126348 +# Thanks to @martineau at SO + +from dateutil import parser as _date_parser +import dateutil +if LooseVersion(dateutil.__version__) < '2.0': + @functools.wraps(_date_parser.parse) + def parse_date(timestr, *args, **kwargs): + timestr = bytes(timestr) + return _date_parser.parse(timestr, *args, **kwargs) +else: + parse_date = _date_parser.parse + +class OrderedDefaultdict(OrderedDict): + + def __init__(self, *args, **kwargs): + newdefault = None + newargs = () + if args: + newdefault = args[0] + if not (newdefault is None or callable(newdefault)): + raise TypeError('first argument must be callable or None') + newargs = args[1:] + self.default_factory = newdefault + super(self.__class__, self).__init__(*newargs, **kwargs) + + def __missing__(self, key): + if self.default_factory is None: + raise KeyError(key) + self[key] = value = self.default_factory() + return value + + def __reduce__(self): # optional, for pickle support + args = self.default_factory if self.default_factory else tuple() + return type(self), args, None, None, list(self.items()) diff --git a/pandas/compat/scipy.py b/pandas/compat/scipy.py index 59a9bbdfbdb9e..3dab5b1f0451e 100644 --- a/pandas/compat/scipy.py +++ b/pandas/compat/scipy.py @@ -2,6 +2,7 @@ Shipping functions from SciPy to reduce dependency on having SciPy installed """ +from pandas.compat import range, lrange import numpy as np @@ -118,12 +119,12 @@ def rankdata(a): sumranks = 0 dupcount = 0 newarray = np.zeros(n, float) - for i in xrange(n): + for i in range(n): sumranks += i dupcount += 1 if i == n - 1 or svec[i] != svec[i + 1]: averank = sumranks / float(dupcount) + 1 - for j in xrange(i - dupcount + 1, i + 1): + for j in range(i - dupcount + 1, i + 1): newarray[ivec[j]] = averank sumranks = 0 dupcount = 0 @@ -223,9 +224,9 @@ def percentileofscore(a, score, kind='rank'): if kind == 'rank': if not(np.any(a == score)): a = np.append(a, score) - a_len = np.array(range(len(a))) + a_len = np.array(lrange(len(a))) else: - a_len = np.array(range(len(a))) + 1.0 + a_len = np.array(lrange(len(a))) + 1.0 a = np.sort(a) idx = [a == score] diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 4bb990a57cb4d..a649edfada739 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -8,6 +8,7 @@ import pandas.core.common as com import pandas.algos as algos import pandas.hashtable as htable +import pandas.compat as compat def match(to_match, values, na_sentinel=-1): @@ -31,7 +32,7 @@ def match(to_match, values, na_sentinel=-1): match : ndarray of integers """ values = com._asarray_tuplesafe(values) - if issubclass(values.dtype.type, basestring): + if issubclass(values.dtype.type, compat.string_types): values = np.array(values, dtype='O') f = lambda htype, caster: _match_generic(to_match, values, htype, caster) diff --git a/pandas/core/array.py b/pandas/core/array.py index 0026dfcecc445..c9a8a00b7f2d7 100644 --- a/pandas/core/array.py +++ b/pandas/core/array.py @@ -16,7 +16,7 @@ _lift_types = [] -for _k, _v in _dtypes.iteritems(): +for _k, _v in _dtypes.items(): for _i in _v: _lift_types.append(_k + str(_i)) diff --git a/pandas/core/base.py b/pandas/core/base.py index 6122e78fa8bce..16fe28a804b6b 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1,7 +1,7 @@ """ Base class(es) for all pandas objects. """ -from pandas.util import py3compat +from pandas import compat class StringMixin(object): """implements string methods so long as object defines a `__unicode__` method. @@ -15,7 +15,7 @@ def __str__(self): Yields Bytestring in Py2, Unicode String in py3. """ - if py3compat.PY3: + if compat.PY3: return self.__unicode__() return self.__bytes__() diff --git a/pandas/core/common.py b/pandas/core/common.py index eba0379a2c824..7e835a5b8a7ac 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -2,9 +2,10 @@ Misc tools for implementing data structures """ -import itertools import re from datetime import datetime +import codecs +import csv from numpy.lib.format import read_array, write_array import numpy as np @@ -13,11 +14,9 @@ import pandas.lib as lib import pandas.tslib as tslib -from pandas.util import py3compat -import codecs -import csv +from pandas import compat +from pandas.compat import StringIO, BytesIO, range, long, u, zip, map -from pandas.util.py3compat import StringIO, BytesIO from pandas.core.config import get_option from pandas.core import array as pa @@ -688,7 +687,7 @@ def _infer_dtype_from_scalar(val): dtype = val.dtype val = val.item() - elif isinstance(val, basestring): + elif isinstance(val, compat.string_types): # If we create an empty array using a string to infer # the dtype, NumPy will only allocate one character per entry @@ -781,7 +780,7 @@ def _maybe_promote(dtype, fill_value=np.nan): dtype = np.object_ # in case we have a string that looked like a number - if issubclass(np.dtype(dtype).type, basestring): + if issubclass(np.dtype(dtype).type, compat.string_types): dtype = np.object_ return dtype, fill_value @@ -1168,7 +1167,7 @@ def _possibly_cast_to_datetime(value, dtype, coerce = False): """ try to cast the array/value to a datetimelike dtype, converting float nan to iNaT """ if dtype is not None: - if isinstance(dtype, basestring): + if isinstance(dtype, compat.string_types): dtype = np.dtype(dtype) is_datetime64 = is_datetime64_dtype(dtype) @@ -1338,7 +1337,7 @@ def _join_unicode(lines, sep=''): try: return sep.join(lines) except UnicodeDecodeError: - sep = unicode(sep) + sep = compat.text_type(sep) return sep.join([x.decode('utf-8') if isinstance(x, str) else x for x in lines]) @@ -1363,7 +1362,7 @@ def iterpairs(seq): seq_it_next = iter(seq) next(seq_it_next) - return itertools.izip(seq_it, seq_it_next) + return zip(seq_it, seq_it_next) def split_ranges(mask): @@ -1398,7 +1397,7 @@ def banner(message): return '%s\n%s\n%s' % (bar, message, bar) def _long_prod(vals): - result = 1L + result = long(1) for x in vals: result *= x return result @@ -1478,7 +1477,7 @@ def _asarray_tuplesafe(values, dtype=None): result = np.asarray(values, dtype=dtype) - if issubclass(result.dtype.type, basestring): + if issubclass(result.dtype.type, compat.string_types): result = np.asarray(values, dtype=object) if result.ndim == 2: @@ -1494,7 +1493,7 @@ def _asarray_tuplesafe(values, dtype=None): def _index_labels_to_array(labels): - if isinstance(labels, (basestring, tuple)): + if isinstance(labels, (compat.string_types, tuple)): labels = [labels] if not isinstance(labels, (list, np.ndarray)): @@ -1609,13 +1608,13 @@ def is_re_compilable(obj): def is_list_like(arg): - return hasattr(arg, '__iter__') and not isinstance(arg, basestring) + return hasattr(arg, '__iter__') and not isinstance(arg, compat.string_types) def _is_sequence(x): try: iter(x) len(x) # it has a length - return not isinstance(x, basestring) and True + return not isinstance(x, compat.string_types) and True except Exception: return False @@ -1649,7 +1648,7 @@ def _astype_nansafe(arr, dtype, copy = True): return arr.astype(object) # in py3, timedelta64[ns] are int64 - elif (py3compat.PY3 and dtype not in [_INT64_DTYPE,_TD_DTYPE]) or (not py3compat.PY3 and dtype != _TD_DTYPE): + elif (compat.PY3 and dtype not in [_INT64_DTYPE,_TD_DTYPE]) or (not compat.PY3 and dtype != _TD_DTYPE): raise TypeError("cannot astype a timedelta from [%s] to [%s]" % (arr.dtype,dtype)) return arr.astype(_TD_DTYPE) elif (np.issubdtype(arr.dtype, np.floating) and @@ -1703,7 +1702,10 @@ def readline(self): return self.reader.readline().encode('utf-8') def next(self): - return self.reader.next().encode("utf-8") + return next(self.reader).encode("utf-8") + + # Python 3 iterator + __next__ = next def _get_handle(path, mode, encoding=None, compression=None): @@ -1721,7 +1723,7 @@ def _get_handle(path, mode, encoding=None, compression=None): raise ValueError('Unrecognized compression type: %s' % compression) - if py3compat.PY3: # pragma: no cover + if compat.PY3: # pragma: no cover if encoding: f = open(path, mode, encoding=encoding) else: @@ -1730,7 +1732,7 @@ def _get_handle(path, mode, encoding=None, compression=None): f = open(path, mode) return f -if py3compat.PY3: # pragma: no cover +if compat.PY3: # pragma: no cover def UnicodeReader(f, dialect=csv.excel, encoding="utf-8", **kwds): # ignore encoding return csv.reader(f, dialect=dialect, **kwds) @@ -1752,8 +1754,11 @@ def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): self.reader = csv.reader(f, dialect=dialect, **kwds) def next(self): - row = self.reader.next() - return [unicode(s, "utf-8") for s in row] + row = next(self.reader) + return [compat.text_type(s, "utf-8") for s in row] + + # python 3 iterator + __next__ = next def __iter__(self): # pragma: no cover return self @@ -1951,9 +1956,9 @@ def _pprint_seq(seq, _nest_lvl=0, **kwds): bounds length of printed sequence, depending on options """ if isinstance(seq,set): - fmt = u"set([%s])" + fmt = u("set([%s])") else: - fmt = u"[%s]" if hasattr(seq, '__setitem__') else u"(%s)" + fmt = u("[%s]") if hasattr(seq, '__setitem__') else u("(%s)") nitems = get_option("max_seq_items") or len(seq) @@ -1976,14 +1981,14 @@ def _pprint_dict(seq, _nest_lvl=0,**kwds): internal. pprinter for iterables. you should probably use pprint_thing() rather then calling this directly. """ - fmt = u"{%s}" + fmt = u("{%s}") pairs = [] - pfmt = u"%s: %s" + pfmt = u("%s: %s") nitems = get_option("max_seq_items") or len(seq) - for k, v in seq.items()[:nitems]: + for k, v in list(seq.items())[:nitems]: pairs.append(pfmt % (pprint_thing(k,_nest_lvl+1,**kwds), pprint_thing(v,_nest_lvl+1,**kwds))) @@ -2025,7 +2030,7 @@ def as_escaped_unicode(thing,escape_chars=escape_chars): #should deal with it himself. try: - result = unicode(thing) # we should try this first + result = compat.text_type(thing) # we should try this first except UnicodeDecodeError: # either utf-8 or we replace errors result = str(thing).decode('utf-8', "replace") @@ -2039,17 +2044,17 @@ def as_escaped_unicode(thing,escape_chars=escape_chars): translate.update(escape_chars) else: translate = escape_chars - escape_chars = escape_chars.keys() + escape_chars = list(escape_chars.keys()) else: escape_chars = escape_chars or tuple() for c in escape_chars: result = result.replace(c, translate[c]) - return unicode(result) + return compat.text_type(result) - if (py3compat.PY3 and hasattr(thing, '__next__')) or \ + if (compat.PY3 and hasattr(thing, '__next__')) or \ hasattr(thing, 'next'): - return unicode(thing) + return compat.text_type(thing) elif (isinstance(thing, dict) and _nest_lvl < get_option("display.pprint_nest_depth")): result = _pprint_dict(thing, _nest_lvl,quote_strings=True) @@ -2057,8 +2062,8 @@ def as_escaped_unicode(thing,escape_chars=escape_chars): get_option("display.pprint_nest_depth"): result = _pprint_seq(thing, _nest_lvl, escape_chars=escape_chars, quote_strings=quote_strings) - elif isinstance(thing,basestring) and quote_strings: - if py3compat.PY3: + elif isinstance(thing,compat.string_types) and quote_strings: + if compat.PY3: fmt = "'%s'" else: fmt = "u'%s'" @@ -2066,7 +2071,7 @@ def as_escaped_unicode(thing,escape_chars=escape_chars): else: result = as_escaped_unicode(thing) - return unicode(result) # always unicode + return compat.text_type(result) # always unicode def pprint_thing_encoded(object, encoding='utf-8', errors='replace', **kwds): diff --git a/pandas/core/config.py b/pandas/core/config.py index ae7c71d082a89..a14e8afa21322 100644 --- a/pandas/core/config.py +++ b/pandas/core/config.py @@ -1,9 +1,7 @@ """ The config module holds package-wide configurables and provides a uniform API for working with them. -""" -""" Overview ======== @@ -54,6 +52,8 @@ from collections import namedtuple import warnings +from pandas.compat import map, lmap, u +import pandas.compat as compat DeprecatedOption = namedtuple('DeprecatedOption', 'key msg rkey removal_ver') RegisteredOption = namedtuple( @@ -128,8 +128,8 @@ def _set_option(*args, **kwargs): # if 1 kwarg then it must be silent=True or silent=False if nkwargs: - k, = kwargs.keys() - v, = kwargs.values() + k, = list(kwargs.keys()) + v, = list(kwargs.values()) if k != 'silent': raise ValueError("the only allowed keyword argument is 'silent', " @@ -149,7 +149,7 @@ def _describe_option(pat='', _print_desc=True): if len(keys) == 0: raise KeyError('No such keys(s)') - s = u'' + s = u('') for k in keys: # filter by pat s += _build_option_description(k) @@ -209,7 +209,7 @@ def __getattr__(self, key): return _get_option(prefix) def __dir__(self): - return self.d.keys() + return list(self.d.keys()) # For user convenience, we'd like to have the available options described # in the docstring. For dev convenience we'd like to generate the docstrings @@ -232,7 +232,7 @@ def __call__(self, *args, **kwds): @property def __doc__(self): opts_desc = _describe_option('all', _print_desc=False) - opts_list = pp_options_list(_registered_options.keys()) + opts_list = pp_options_list(list(_registered_options.keys())) return self.__doc_tmpl__.format(opts_desc=opts_desc, opts_list=opts_list) @@ -351,7 +351,7 @@ def __init__(self, *args): errmsg = "Need to invoke as option_context(pat,val,[(pat,val),..))." raise AssertionError(errmsg) - ops = zip(args[::2], args[1::2]) + ops = list(zip(args[::2], args[1::2])) undo = [] for pat, val in ops: undo.append((pat, _get_option(pat, silent=True))) @@ -588,9 +588,9 @@ def _build_option_description(k): o = _get_registered_option(k) d = _get_deprecated_option(k) - s = u'%s: ' % k + s = u('%s: ') % k if o: - s += u'[default: %s] [currently: %s]' % (o.defval, _get_option(k, True)) + s += u('[default: %s] [currently: %s]') % (o.defval, _get_option(k, True)) if o.doc: s += '\n' + '\n '.join(o.doc.strip().split('\n')) @@ -598,9 +598,9 @@ def _build_option_description(k): s += 'No description available.\n' if d: - s += u'\n\t(Deprecated' - s += (u', use `%s` instead.' % d.rkey if d.rkey else '') - s += u')\n' + s += u('\n\t(Deprecated') + s += (u(', use `%s` instead.') % d.rkey if d.rkey else '') + s += u(')\n') s += '\n' return s @@ -729,15 +729,16 @@ def is_instance_factory(_type): True if x is an instance of `_type` """ + if isinstance(_type, (tuple, list)): + _type = tuple(_type) + from pandas.core.common import pprint_thing + type_repr = "|".join(map(pprint_thing, _type)) + else: + type_repr = "'%s'" % _type def inner(x): - if isinstance(_type,(tuple,list)) : - if not any([isinstance(x,t) for t in _type]): - from pandas.core.common import pprint_thing as pp - pp_values = map(pp, _type) - raise ValueError("Value must be an instance of %s" % pp("|".join(pp_values))) - elif not isinstance(x, _type): - raise ValueError("Value must be an instance of '%s'" % str(_type)) + if not isinstance(x, _type): + raise ValueError("Value must be an instance of %s" % type_repr) return inner @@ -745,7 +746,7 @@ def is_one_of_factory(legal_values): def inner(x): from pandas.core.common import pprint_thing as pp if not x in legal_values: - pp_values = map(pp, legal_values) + pp_values = lmap(pp, legal_values) raise ValueError("Value must be one of %s" % pp("|".join(pp_values))) return inner @@ -756,5 +757,5 @@ def inner(x): is_bool = is_type_factory(bool) is_float = is_type_factory(float) is_str = is_type_factory(str) -is_unicode = is_type_factory(unicode) -is_text = is_instance_factory(basestring) +is_unicode = is_type_factory(compat.text_type) +is_text = is_instance_factory((str, bytes)) diff --git a/pandas/core/datetools.py b/pandas/core/datetools.py index d6da94856b140..228dc7574f8f3 100644 --- a/pandas/core/datetools.py +++ b/pandas/core/datetools.py @@ -3,7 +3,6 @@ from pandas.tseries.tools import * from pandas.tseries.offsets import * from pandas.tseries.frequencies import * -from dateutil import parser day = DateOffset() bday = BDay() diff --git a/pandas/core/expressions.py b/pandas/core/expressions.py index abe891b82410c..27c06e23b5a9e 100644 --- a/pandas/core/expressions.py +++ b/pandas/core/expressions.py @@ -93,10 +93,10 @@ def _evaluate_numexpr(op, op_str, a, b, raise_on_error = False, **eval_kwargs): local_dict={ 'a_value' : a_value, 'b_value' : b_value }, casting='safe', **eval_kwargs) - except (ValueError), detail: + except (ValueError) as detail: if 'unknown type object' in str(detail): pass - except (Exception), detail: + except (Exception) as detail: if raise_on_error: raise TypeError(str(detail)) @@ -126,10 +126,10 @@ def _where_numexpr(cond, a, b, raise_on_error = False): 'a_value' : a_value, 'b_value' : b_value }, casting='safe') - except (ValueError), detail: + except (ValueError) as detail: if 'unknown type object' in str(detail): pass - except (Exception), detail: + except (Exception) as detail: if raise_on_error: raise TypeError(str(detail)) diff --git a/pandas/core/format.py b/pandas/core/format.py index c9beb729b2436..30856d371c084 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -1,17 +1,13 @@ +from __future__ import print_function # pylint: disable=W0141 -from itertools import izip +from pandas import compat import sys -try: - from StringIO import StringIO -except: - from io import StringIO - +from pandas.compat import StringIO, lzip, range, map, zip, reduce, u, OrderedDict from pandas.core.common import adjoin, isnull, notnull from pandas.core.index import Index, MultiIndex, _ensure_index -from pandas.util import py3compat -from pandas.util.compat import OrderedDict +from pandas import compat from pandas.util.terminal import get_terminal_size from pandas.core.config import get_option, set_option, reset_option import pandas.core.common as com @@ -71,7 +67,7 @@ class SeriesFormatter(object): def __init__(self, series, buf=None, header=True, length=True, na_rep='NaN', name=False, float_format=None, dtype=True): self.series = series - self.buf = buf if buf is not None else StringIO(u"") + self.buf = buf if buf is not None else StringIO() self.name = name self.na_rep = na_rep self.length = length @@ -83,7 +79,7 @@ def __init__(self, series, buf=None, header=True, length=True, self.dtype = dtype def _get_footer(self): - footer = u'' + footer = u('') if self.name: if getattr(self.series.index, 'freq', None): @@ -108,7 +104,7 @@ def _get_footer(self): footer += ', ' footer += 'dtype: %s' % com.pprint_thing(self.series.dtype.name) - return unicode(footer) + return compat.text_type(footer) def _get_formatted_index(self): index = self.series.index @@ -131,7 +127,7 @@ def to_string(self): series = self.series if len(series) == 0: - return u'' + return u('') fmt_index, have_header = self._get_formatted_index() fmt_values = self._get_formatted_values() @@ -140,7 +136,7 @@ def to_string(self): pad_space = min(maxlen, 60) result = ['%s %s'] * len(fmt_values) - for i, (k, v) in enumerate(izip(fmt_index[1:], fmt_values)): + for i, (k, v) in enumerate(zip(fmt_index[1:], fmt_values)): idx = k.ljust(pad_space) result[i] = result[i] % (idx, v) @@ -151,10 +147,10 @@ def to_string(self): if footer: result.append(footer) - return unicode(u'\n'.join(result)) + return compat.text_type(u('\n').join(result)) def _strlen_func(): - if py3compat.PY3: # pragma: no cover + if compat.PY3: # pragma: no cover _strlen = len else: encoding = get_option("display.encoding") @@ -285,7 +281,7 @@ def to_string(self, force_unicode=None): frame = self.frame if len(frame.columns) == 0 or len(frame.index) == 0: - info_line = (u'Empty %s\nColumns: %s\nIndex: %s' + info_line = (u('Empty %s\nColumns: %s\nIndex: %s') % (type(self.frame).__name__, com.pprint_thing(frame.columns), com.pprint_thing(frame.index))) @@ -347,7 +343,7 @@ def get_col_type(dtype): frame = self.frame if len(frame.columns) == 0 or len(frame.index) == 0: - info_line = (u'Empty %s\nColumns: %s\nIndex: %s' + info_line = (u('Empty %s\nColumns: %s\nIndex: %s') % (type(self.frame).__name__, frame.columns, frame.index)) strcols = [[info_line]] @@ -360,7 +356,7 @@ def get_col_type(dtype): column_format = 'l%s' % ''.join(map(get_col_type, dtypes)) else: column_format = '%s' % ''.join(map(get_col_type, dtypes)) - elif not isinstance(column_format, basestring): + elif not isinstance(column_format, compat.string_types): raise AssertionError(('column_format must be str or unicode, not %s' % type(column_format))) @@ -369,7 +365,7 @@ def write(buf, frame, column_format, strcols): buf.write('\\toprule\n') nlevels = frame.index.nlevels - for i, row in enumerate(izip(*strcols)): + for i, row in enumerate(zip(*strcols)): if i == nlevels: buf.write('\\midrule\n') # End of header crow = [(x.replace('_', '\\_') @@ -383,7 +379,7 @@ def write(buf, frame, column_format, strcols): if hasattr(self.buf, 'write'): write(self.buf, frame, column_format, strcols) - elif isinstance(self.buf, basestring): + elif isinstance(self.buf, compat.string_types): with open(self.buf, 'w') as f: write(f, frame, column_format, strcols) else: @@ -404,7 +400,7 @@ def to_html(self, classes=None): html_renderer = HTMLFormatter(self, classes=classes) if hasattr(self.buf, 'write'): html_renderer.write_result(self.buf) - elif isinstance(self.buf, basestring): + elif isinstance(self.buf, compat.string_types): with open(self.buf, 'w') as f: html_renderer.write_result(f) else: @@ -419,13 +415,13 @@ def is_numeric_dtype(dtype): if isinstance(self.columns, MultiIndex): fmt_columns = self.columns.format(sparsify=False, adjoin=False) - fmt_columns = zip(*fmt_columns) + fmt_columns = lzip(*fmt_columns) dtypes = self.frame.dtypes.values need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes))) - str_columns = zip(*[[' ' + y + str_columns = list(zip(*[[' ' + y if y not in self.formatters and need_leadsp[x] else y for y in x] - for x in fmt_columns]) + for x in fmt_columns])) if self.sparsify: str_columns = _sparsify(str_columns) @@ -718,7 +714,7 @@ def _write_hierarchical_rows(self, fmt_values, indent): idx_values = frame.index.format(sparsify=False, adjoin=False, names=False) - idx_values = zip(*idx_values) + idx_values = lzip(*idx_values) if self.fmt.sparsify: @@ -749,9 +745,9 @@ def _write_hierarchical_rows(self, fmt_values, indent): nindex_levels=len(levels) - sparse_offset) else: for i in range(len(frame)): - idx_values = zip(*frame.index.format(sparsify=False, + idx_values = list(zip(*frame.index.format(sparsify=False, adjoin=False, - names=False)) + names=False))) row = [] row.extend(idx_values[i]) row.extend(fmt_values[j][i] for j in range(ncols)) @@ -872,7 +868,7 @@ def _helper_csv(self, writer, na_rep=None, cols=None, cols = self.columns series = {} - for k, v in self.obj._series.iteritems(): + for k, v in compat.iteritems(self.obj._series): series[k] = v.values @@ -1069,7 +1065,7 @@ def _save(self): chunksize = self.chunksize chunks = int(nrows / chunksize)+1 - for i in xrange(chunks): + for i in range(chunks): start_i = i * chunksize end_i = min((i + 1) * chunksize, nrows) if start_i >= end_i: @@ -1304,7 +1300,7 @@ def _format_hierarchical_rows(self): index_labels = self.index_label # if index labels are not empty go ahead and dump - if (filter(lambda x: x is not None, index_labels) + if (any(x is not None for x in index_labels) and self.header is not False): # if isinstance(self.df.columns, MultiIndex): # self.rowcounter += 1 @@ -1836,9 +1832,9 @@ def __call__(self, num): mant = sign * dnum / (10 ** pow10) if self.accuracy is None: # pragma: no cover - format_str = u"% g%s" + format_str = u("% g%s") else: - format_str = (u"%% .%if%%s" % self.accuracy) + format_str = (u("%% .%if%%s") % self.accuracy) formatted = format_str % (mant, prefix) @@ -1864,8 +1860,8 @@ def set_eng_float_format(precision=None, accuracy=3, use_eng_prefix=False): def _put_lines(buf, lines): - if any(isinstance(x, unicode) for x in lines): - lines = [unicode(x) for x in lines] + if any(isinstance(x, compat.text_type) for x in lines): + lines = [compat.text_type(x) for x in lines] buf.write('\n'.join(lines)) @@ -1900,4 +1896,4 @@ def _binify(cols, line_width): 1134250., 1219550., 855736.85, 1042615.4286, 722621.3043, 698167.1818, 803750.]) fmt = FloatArrayFormatter(arr, digits=7) - print (fmt.get_result()) + print(fmt.get_result()) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 22dc27ff977d9..902a6c736b569 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -12,12 +12,12 @@ # pylint: disable=E1101,E1103 # pylint: disable=W0212,W0231,W0703,W0622 -from itertools import izip -from StringIO import StringIO +from pandas.compat import range, zip, lrange, lmap, lzip, StringIO, u, OrderedDict +from pandas import compat import operator import sys import collections -import itertools +import warnings from numpy import nan as NA import numpy as np @@ -37,8 +37,7 @@ from pandas.core.series import Series, _radd_compat import pandas.core.expressions as expressions from pandas.compat.scipy import scoreatpercentile as _quantile -from pandas.util.compat import OrderedDict -from pandas.util import py3compat +from pandas import compat from pandas.util.terminal import get_terminal_size from pandas.util.decorators import deprecate, Appender, Substitution @@ -381,7 +380,7 @@ class DataFrame(NDFrame): 'columns': 1 } - _AXIS_NAMES = dict((v, k) for k, v in _AXIS_NUMBERS.iteritems()) + _AXIS_NAMES = dict((v, k) for k, v in compat.iteritems(_AXIS_NUMBERS)) def __init__(self, data=None, index=None, columns=None, dtype=None, copy=False): @@ -440,7 +439,7 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, 'incompatible data and dtype') if arr.ndim == 0 and index is not None and columns is not None: - if isinstance(data, basestring) and dtype is None: + if isinstance(data, compat.string_types) and dtype is None: dtype = np.object_ if dtype is None: dtype, data = _infer_dtype_from_scalar(data) @@ -490,10 +489,10 @@ def _init_dict(self, data, index, columns, dtype=None): # prefilter if columns passed - data = dict((k, v) for k, v in data.iteritems() if k in columns) + data = dict((k, v) for k, v in compat.iteritems(data) if k in columns) if index is None: - index = extract_index(data.values()) + index = extract_index(list(data.values())) else: index = _ensure_index(index) @@ -518,9 +517,9 @@ def _init_dict(self, data, index, columns, dtype=None): data_names.append(k) arrays.append(v) else: - keys = data.keys() + keys = list(data.keys()) if not isinstance(data, OrderedDict): - keys = _try_sort(data.keys()) + keys = _try_sort(list(data.keys())) columns = data_names = Index(keys) arrays = [data[k] for k in columns] @@ -566,14 +565,12 @@ def _wrap_array(self, arr, axes, copy=False): @property def _verbose_info(self): - import warnings warnings.warn('The _verbose_info property will be removed in version ' '0.13. please use "max_info_rows"', FutureWarning) return get_option('display.max_info_rows') is None @_verbose_info.setter def _verbose_info(self, value): - import warnings warnings.warn('The _verbose_info property will be removed in version ' '0.13. please use "max_info_rows"', FutureWarning) @@ -656,7 +653,7 @@ def __unicode__(self): Invoked by unicode(df) in py2 only. Yields a Unicode String in both py2/py3. """ - buf = StringIO(u"") + buf = StringIO(u("")) fits_vertical = self._repr_fits_vertical_() fits_horizontal = False if fits_vertical: @@ -683,7 +680,7 @@ def __unicode__(self): self.info(buf=buf, verbose=verbose) value = buf.getvalue() - if not type(value) == unicode: + if not isinstance(value, compat.text_type): raise AssertionError() return value @@ -715,7 +712,7 @@ def _repr_html_(self): 'max-width:1500px;overflow:auto;">\n' + self.to_html() + '\n') else: - buf = StringIO(u"") + buf = StringIO(u("")) max_info_rows = get_option('display.max_info_rows') verbose = (max_info_rows is None or self.shape[0] <= max_info_rows) @@ -769,7 +766,7 @@ def iterrows(self): A generator that iterates over the rows of the frame. """ columns = self.columns - for k, v in izip(self.index, self.values): + for k, v in zip(self.index, self.values): s = v.view(Series) s.index = columns s.name = k @@ -785,11 +782,10 @@ def itertuples(self, index=True): arrays.append(self.index) # use integer indexing because of possible duplicate column names - arrays.extend(self.iloc[:, k] for k in xrange(len(self.columns))) - return izip(*arrays) + arrays.extend(self.iloc[:, k] for k in range(len(self.columns))) + return zip(*arrays) - iterkv = iteritems - if py3compat.PY3: # pragma: no cover + if compat.PY3: # pragma: no cover items = iteritems def __len__(self): @@ -851,7 +847,7 @@ def __contains__(self, key): __xor__ = _arith_method(operator.xor, '__xor__') # Python 2 division methods - if not py3compat.PY3: + if not compat.PY3: __div__ = _arith_method(operator.div, '__div__', '/', default_axis=None, fill_zeros=np.inf, truediv=False) __rdiv__ = _arith_method(lambda x, y: y / x, '__rdiv__', @@ -951,10 +947,10 @@ def from_dict(cls, data, orient='columns', dtype=None): if orient == 'index': if len(data) > 0: # TODO speed up Series case - if isinstance(data.values()[0], (Series, dict)): + if isinstance(list(data.values())[0], (Series, dict)): data = _from_nested_dict(data) else: - data, index = data.values(), data.keys() + data, index = list(data.values()), list(data.keys()) elif orient != 'columns': # pragma: no cover raise ValueError('only recognize index or columns for orient') @@ -978,16 +974,15 @@ def to_dict(self, outtype='dict'): ------- result : dict like {column -> {index -> value}} """ - import warnings if not self.columns.is_unique: warnings.warn("DataFrame columns are not unique, some " "columns will be omitted.", UserWarning) if outtype.lower().startswith('d'): - return dict((k, v.to_dict()) for k, v in self.iteritems()) + return dict((k, v.to_dict()) for k, v in compat.iteritems(self)) elif outtype.lower().startswith('l'): - return dict((k, v.tolist()) for k, v in self.iteritems()) + return dict((k, v.tolist()) for k, v in compat.iteritems(self)) elif outtype.lower().startswith('s'): - return dict((k, v) for k, v in self.iteritems()) + return dict((k, v) for k, v in compat.iteritems(self)) else: # pragma: no cover raise ValueError("outtype %s not understood" % outtype) @@ -1028,10 +1023,10 @@ def from_records(cls, data, index=None, exclude=None, columns=None, return cls() try: - if py3compat.PY3: + if compat.PY3: first_row = next(data) else: - first_row = data.next() + first_row = next(data) except StopIteration: return cls(index=index, columns=columns) @@ -1060,7 +1055,7 @@ def from_records(cls, data, index=None, exclude=None, columns=None, else: arrays = [] arr_columns = [] - for k, v in data.iteritems(): + for k, v in compat.iteritems(data): if k in columns: arr_columns.append(k) arrays.append(v) @@ -1093,7 +1088,7 @@ def from_records(cls, data, index=None, exclude=None, columns=None, result_index = None if index is not None: - if (isinstance(index, basestring) or + if (isinstance(index, compat.string_types) or not hasattr(index, "__iter__")): i = columns.get_loc(index) exclude.add(index) @@ -1148,7 +1143,7 @@ def to_records(self, index=True, convert_datetime64=True): else: if isinstance(self.index, MultiIndex): # array of tuples to numpy cols. copy copy copy - ix_vals = map(np.array,zip(*self.index.values)) + ix_vals = lmap(np.array,zip(*self.index.values)) else: ix_vals = [self.index.values] @@ -1163,10 +1158,10 @@ def to_records(self, index=True, convert_datetime64=True): count += 1 elif index_names[0] is None: index_names = ['index'] - names = index_names + list(map(str, self.columns)) + names = index_names + lmap(str, self.columns) else: arrays = [self[c].values for c in self.columns] - names = list(map(str, self.columns)) + names = lmap(str, self.columns) dtype = np.dtype([(x, v.dtype) for x, v in zip(names, arrays)]) return np.rec.fromarrays(arrays, dtype=dtype, names=names) @@ -1194,7 +1189,7 @@ def from_items(cls, items, columns=None, orient='columns'): ------- frame : DataFrame """ - keys, values = zip(*items) + keys, values = lzip(*items) if orient == 'columns': if columns is not None: @@ -1393,7 +1388,6 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None, or new (expanded format) if False) """ if nanRep is not None: # pragma: no cover - import warnings warnings.warn("nanRep is deprecated, use na_rep", FutureWarning) na_rep = nanRep @@ -1452,7 +1446,7 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='', """ from pandas.io.excel import ExcelWriter need_save = False - if isinstance(excel_writer, basestring): + if isinstance(excel_writer, compat.string_types): excel_writer = ExcelWriter(excel_writer) need_save = True @@ -1529,7 +1523,6 @@ def to_string(self, buf=None, columns=None, col_space=None, colSpace=None, """ Render a DataFrame to a console-friendly tabular output. """ - import warnings if force_unicode is not None: # pragma: no cover warnings.warn("force_unicode is deprecated, it will have no " "effect", FutureWarning) @@ -1578,7 +1571,6 @@ def to_html(self, buf=None, columns=None, col_space=None, colSpace=None, Render a DataFrame as an HTML table. """ - import warnings if force_unicode is not None: # pragma: no cover warnings.warn("force_unicode is deprecated, it will have no " "effect", FutureWarning) @@ -1617,7 +1609,6 @@ def to_latex(self, buf=None, columns=None, col_space=None, colSpace=None, You can splice this into a LaTeX document. """ - import warnings if force_unicode is not None: # pragma: no cover warnings.warn("force_unicode is deprecated, it will have no " "effect", FutureWarning) @@ -1679,7 +1670,7 @@ def info(self, verbose=True, buf=None, max_cols=None): counts = self.count() if len(cols) != len(counts): raise AssertionError('Columns must equal counts') - for col, count in counts.iteritems(): + for col, count in compat.iteritems(counts): col = com.pprint_thing(col) lines.append(_put_str(col, space) + '%d non-null values' % count) @@ -1687,7 +1678,7 @@ def info(self, verbose=True, buf=None, max_cols=None): lines.append(self.columns.summary(name='Columns')) counts = self.get_dtype_counts() - dtypes = ['%s(%d)' % k for k in sorted(counts.iteritems())] + dtypes = ['%s(%d)' % k for k in sorted(compat.iteritems(counts))] lines.append('dtypes: %s' % ', '.join(dtypes)) _put_lines(buf, lines) @@ -2016,7 +2007,6 @@ def _getitem_array(self, key): # go with the __setitem__ behavior since that is more consistent # with all other indexing behavior if isinstance(key, Series) and not key.index.equals(self.index): - import warnings warnings.warn("Boolean Series key will be reindexed to match " "DataFrame index.", UserWarning) elif len(key) != len(self.index): @@ -2419,8 +2409,6 @@ def lookup(self, row_labels, col_labels): The found values """ - from itertools import izip - n = len(row_labels) if n != len(col_labels): raise AssertionError('Row labels must have same size as ' @@ -2439,7 +2427,7 @@ def lookup(self, row_labels, col_labels): result = values.flat[flat_index] else: result = np.empty(n, dtype='O') - for i, (r, c) in enumerate(izip(row_labels, col_labels)): + for i, (r, c) in enumerate(zip(row_labels, col_labels)): result[i] = self.get_value(r, c) if result.dtype == 'O': @@ -2910,7 +2898,7 @@ def _maybe_cast(values, labels=None): if not drop: names = self.index.names - zipped = zip(self.index.levels, self.index.labels) + zipped = lzip(self.index.levels, self.index.labels) multi_col = isinstance(self.columns, MultiIndex) for i, (lev, lab) in reversed(list(enumerate(zipped))): @@ -3030,7 +3018,7 @@ def filter(self, items=None, like=None, regex=None): if items is not None: return self.reindex(columns=[r for r in items if r in self]) elif like: - matchf = lambda x: (like in x if isinstance(x, basestring) + matchf = lambda x: (like in x if isinstance(x, compat.string_types) else like in str(x)) return self.select(matchf, axis=1) elif regex: @@ -3152,7 +3140,7 @@ def _m8_to_i8(x): if cols is None: values = list(_m8_to_i8(self.values.T)) else: - if np.iterable(cols) and not isinstance(cols, basestring): + if np.iterable(cols) and not isinstance(cols, compat.string_types): if isinstance(cols, tuple): if cols in self.columns: values = [self[cols]] @@ -3198,7 +3186,6 @@ def sort(self, columns=None, column=None, axis=0, ascending=True, sorted : DataFrame """ if column is not None: # pragma: no cover - import warnings warnings.warn("column is deprecated, use columns", FutureWarning) columns = column return self.sort_index(by=columns, axis=axis, ascending=ascending, @@ -3456,7 +3443,7 @@ def fillna(self, value=None, method=None, axis=0, inplace=False, 'by column') result = self if inplace else self.copy() - for k, v in value.iteritems(): + for k, v in compat.iteritems(value): if k not in result: continue result[k].fillna(v, inplace=True) @@ -3580,13 +3567,11 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, raise AssertionError("'to_replace' must be 'None' if 'regex' is " "not a bool") if method is not None: - from warnings import warn - warn('the "method" argument is deprecated and will be removed in' + warnings.warn('the "method" argument is deprecated and will be removed in' 'v0.13; this argument has no effect') if axis is not None: - from warnings import warn - warn('the "axis" argument is deprecated and will be removed in' + warnings.warn('the "axis" argument is deprecated and will be removed in' 'v0.13; this argument has no effect') self._consolidate_inplace() @@ -3599,8 +3584,8 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, to_replace = regex regex = True - items = to_replace.items() - keys, values = itertools.izip(*items) + items = list(to_replace.items()) + keys, values = zip(*items) are_mappings = [isinstance(v, (dict, Series)) for v in values] @@ -3614,8 +3599,8 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, value_dict = {} for k, v in items: - to_rep_dict[k] = v.keys() - value_dict[k] = v.values() + to_rep_dict[k] = list(v.keys()) + value_dict[k] = list(v.values()) to_replace, value = to_rep_dict, value_dict else: @@ -3631,7 +3616,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, if isinstance(to_replace, (dict, Series)): if isinstance(value, (dict, Series)): # {'A' : NA} -> {'A' : 0} new_data = self._data - for c, src in to_replace.iteritems(): + for c, src in compat.iteritems(to_replace): if c in value and c in self: new_data = new_data.replace(src, value[c], filter=[c], @@ -3640,7 +3625,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, elif not isinstance(value, (list, np.ndarray)): # {'A': NA} -> 0 new_data = self._data - for k, src in to_replace.iteritems(): + for k, src in compat.iteritems(to_replace): if k in self: new_data = new_data.replace(src, value, filter=[k], @@ -3680,7 +3665,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, if isinstance(value, (dict, Series)): # NA -> {'A' : 0, 'B' : -1} new_data = self._data - for k, v in value.iteritems(): + for k, v in compat.iteritems(value): if k in self: new_data = new_data.replace(to_replace, v, filter=[k], @@ -3721,7 +3706,6 @@ def interpolate(self, to_replace, method='pad', axis=0, inplace=False, -------- reindex, replace, fillna """ - from warnings import warn warn('DataFrame.interpolate will be removed in v0.13, please use ' 'either DataFrame.fillna or DataFrame.replace instead', FutureWarning) @@ -3871,7 +3855,6 @@ def _combine_series_infer(self, other, func, fill_value=None): # teeny hack because one does DataFrame + TimeSeries all the time if self.index.is_all_dates and other.index.is_all_dates: - import warnings warnings.warn(("TimeSeries broadcasting along DataFrame index " "by default is deprecated. Please use " "DataFrame. to explicitly broadcast arithmetic " @@ -4315,7 +4298,7 @@ def shift(self, periods=1, freq=None, **kwds): offset = _resolve_offset(freq, kwds) - if isinstance(offset, basestring): + if isinstance(offset, compat.string_types): offset = datetools.to_offset(offset) if offset is None: @@ -4456,7 +4439,7 @@ def _apply_standard(self, func, axis, ignore_failures=False): values = self.values series_gen = (Series.from_array(arr, index=res_columns, name=name) for i, (arr, name) in - enumerate(izip(values, res_index))) + enumerate(zip(values, res_index))) else: raise ValueError('Axis must be 0 or 1, got %s' % str(axis)) @@ -4479,7 +4462,7 @@ def _apply_standard(self, func, axis, ignore_failures=False): for i, v in enumerate(series_gen): results[i] = func(v) keys.append(v.name) - except Exception, e: + except Exception as e: try: if hasattr(e, 'args'): k = res_index[i] @@ -4863,7 +4846,7 @@ def describe(self, percentile_width=50): if len(numdata.columns) == 0: return DataFrame(dict((k, v.describe()) - for k, v in self.iteritems()), + for k, v in compat.iteritems(self)), columns=self.columns) lb = .5 * (1. - percentile_width / 100.) @@ -4888,7 +4871,7 @@ def pretty_name(x): series.min(), series.quantile(lb), series.median(), series.quantile(ub), series.max()]) - return self._constructor(map(list, zip(*destat)), index=destat_columns, + return self._constructor(lmap(list, zip(*destat)), index=destat_columns, columns=numdata.columns) #---------------------------------------------------------------------- @@ -4947,7 +4930,7 @@ def _count_level(self, level, axis=0, numeric_only=False): # python 2.5 mask = notnull(frame.values).view(np.uint8) - if isinstance(level, basestring): + if isinstance(level, compat.string_types): level = self.index._get_level_number(level) level_index = frame.index.levels[level] @@ -5734,7 +5717,7 @@ def extract_index(data): indexes.append(v.index) elif isinstance(v, dict): have_dicts = True - indexes.append(v.keys()) + indexes.append(list(v.keys())) elif isinstance(v, (list, tuple, np.ndarray)): have_raw_arrays = True raw_lengths.append(len(v)) @@ -5802,7 +5785,7 @@ def _rec_to_dict(arr): sdict = dict((k, arr[k]) for k in columns) elif isinstance(arr, DataFrame): columns = list(arr.columns) - sdict = dict((k, v.values) for k, v in arr.iteritems()) + sdict = dict((k, v.values) for k, v in compat.iteritems(arr)) elif isinstance(arr, dict): columns = sorted(arr) sdict = arr.copy() @@ -5849,7 +5832,7 @@ def _to_arrays(data, columns, coerce_float=False, dtype=None): return arrays, columns else: # last ditch effort - data = map(tuple, data) + data = lmap(tuple, data) return _list_to_arrays(data, columns, coerce_float=coerce_float, dtype=dtype) @@ -5894,7 +5877,7 @@ def _list_of_series_to_arrays(data, columns, coerce_float=False, dtype=None): def _list_of_dict_to_arrays(data, columns, coerce_float=False, dtype=None): if columns is None: - gen = (x.keys() for x in data) + gen = (list(x.keys()) for x in data) columns = lib.fast_unique_multiple_list_gen(gen) # assure that they are of the base dict class and not of derived @@ -5923,7 +5906,7 @@ def _convert_object_array(content, columns, coerce_float=False, dtype=None): def _get_names_from_index(data): - index = range(len(data)) + index = lrange(len(data)) has_some_name = any([s.name is not None for s in data]) if not has_some_name: return index @@ -5977,8 +5960,8 @@ def _homogenize(data, index, dtype=None): def _from_nested_dict(data): # TODO: this should be seriously cythonized new_data = OrderedDict() - for index, s in data.iteritems(): - for col, v in s.iteritems(): + for index, s in compat.iteritems(data): + for col, v in compat.iteritems(s): new_data[col] = new_data.get(col, OrderedDict()) new_data[col][index] = v return new_data @@ -5996,7 +5979,7 @@ def install_ipython_completers(): # pragma: no cover @complete_object.when_type(DataFrame) def complete_dataframe(obj, prev_completions): return prev_completions + [c for c in obj.columns - if isinstance(c, basestring) and py3compat.isidentifier(c)] + if isinstance(c, compat.string_types) and compat.isidentifier(c)] # Importing IPython brings in about 200 modules, so we want to avoid it unless diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6be5f456b50e6..0eaae228da627 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1,5 +1,6 @@ # pylint: disable=W0231,E1101 - +import warnings +from pandas import compat import numpy as np import pandas.lib as lib from pandas.core.base import PandasObject @@ -9,6 +10,7 @@ from pandas.core.indexing import _maybe_convert_indices from pandas.tseries.index import DatetimeIndex import pandas.core.common as com +from pandas.compat import map, zip class PandasError(Exception): @@ -23,7 +25,7 @@ class PandasContainer(PandasObject): } _AXIS_ALIASES = {} - _AXIS_NAMES = dict((v, k) for k, v in _AXIS_NUMBERS.iteritems()) + _AXIS_NAMES = dict((v, k) for k, v in compat.iteritems(_AXIS_NUMBERS)) def to_pickle(self, path): """ @@ -38,13 +40,11 @@ def to_pickle(self, path): return to_pickle(self, path) def save(self, path): # TODO remove in 0.13 - import warnings from pandas.io.pickle import to_pickle warnings.warn("save is deprecated, use to_pickle", FutureWarning) return to_pickle(self, path) def load(self, path): # TODO remove in 0.13 - import warnings from pandas.io.pickle import read_pickle warnings.warn("load is deprecated, use pd.read_pickle", FutureWarning) return read_pickle(path) @@ -77,7 +77,7 @@ def _get_axis_number(self, axis): def _get_axis_name(self, axis): axis = self._AXIS_ALIASES.get(axis, axis) - if isinstance(axis, basestring): + if isinstance(axis, compat.string_types): if axis in self._AXIS_NUMBERS: return axis else: @@ -648,6 +648,9 @@ def empty(self): def __nonzero__(self): return not self.empty + # Python 3 compat + __bool__ = __nonzero__ + @property def ndim(self): return self._data.ndim @@ -712,6 +715,13 @@ def __delitem__(self, key): except KeyError: pass + # originally used to get around 2to3's changes to iteritems. + # Now unnecessary. + def iterkv(self, *args, **kwargs): + warnings.warn("iterkv is deprecated and will be removed in a future " + "release, use ``iteritems`` instead.", DeprecationWarning) + return self.iteritems(*args, **kwargs) + def get_dtype_counts(self): """ return the counts of dtypes in this frame """ from pandas import Series diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index cc0a2b7589bb6..e12795682460c 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1,7 +1,11 @@ -from itertools import izip import types import numpy as np +from pandas.compat import( + zip, builtins, range, long, lrange, lzip, OrderedDict, callable +) +from pandas import compat + from pandas.core.base import PandasObject from pandas.core.categorical import Categorical from pandas.core.frame import DataFrame @@ -11,7 +15,6 @@ from pandas.core.series import Series from pandas.core.panel import Panel from pandas.util.decorators import cache_readonly, Appender -from pandas.util.compat import OrderedDict import pandas.core.algorithms as algos import pandas.core.common as com from pandas.core.common import _possibly_downcast_to_dtype, notnull @@ -484,7 +487,7 @@ def _python_agg_general(self, func, *args, **kwargs): if self.grouper._filter_empty_groups: mask = counts.ravel() > 0 - for name, result in output.iteritems(): + for name, result in compat.iteritems(output): # since we are masking, make sure that we have a float object values = result @@ -588,7 +591,7 @@ def get_iterator(self, data, axis=0, keep_internal=True): splitter = self._get_splitter(data, axis=axis, keep_internal=keep_internal) keys = self._get_group_keys() - for key, (i, group) in izip(keys, splitter): + for key, (i, group) in zip(keys, splitter): yield key, group def _get_splitter(self, data, axis=0, keep_internal=True): @@ -616,13 +619,13 @@ def apply(self, f, data, axis=0, keep_internal=False): try: values, mutated = splitter.fast_apply(f, group_keys) return group_keys, values, mutated - except (Exception), detail: + except (Exception) as detail: # we detect a mutatation of some kind # so take slow path pass result_values = [] - for key, (i, group) in izip(group_keys, splitter): + for key, (i, group) in zip(group_keys, splitter): object.__setattr__(group, 'name', key) # group might be modified @@ -671,7 +674,7 @@ def groups(self): if len(self.groupings) == 1: return self.groupings[0].groups else: - to_groupby = zip(*(ping.grouper for ping in self.groupings)) + to_groupby = lzip(*(ping.grouper for ping in self.groupings)) to_groupby = Index(to_groupby) return self.axis.groupby(to_groupby) @@ -727,12 +730,12 @@ def get_group_levels(self): return [self.groupings[0].group_index] if self._overflow_possible: - recons_labels = [np.array(x) for x in izip(*obs_ids)] + recons_labels = [np.array(x) for x in zip(*obs_ids)] else: recons_labels = decons_group_index(obs_ids, self.shape) name_list = [] - for ping, labels in izip(self.groupings, recons_labels): + for ping, labels in zip(self.groupings, recons_labels): labels = com._ensure_platform_int(labels) name_list.append(ping.group_index.take(labels)) @@ -1004,7 +1007,7 @@ def get_iterator(self, data, axis=0): """ if axis == 0: start = 0 - for edge, label in izip(self.bins, self.binlabels): + for edge, label in zip(self.bins, self.binlabels): yield label, data[start:edge] start = edge @@ -1012,14 +1015,14 @@ def get_iterator(self, data, axis=0): yield self.binlabels[-1], data[start:] else: start = 0 - for edge, label in izip(self.bins, self.binlabels): - inds = range(start, edge) + for edge, label in zip(self.bins, self.binlabels): + inds = lrange(start, edge) yield label, data.take(inds, axis=axis) start = edge n = len(data.axes[axis]) if start < n: - inds = range(start, n) + inds = lrange(start, n) yield self.binlabels[-1], data.take(inds, axis=axis) def apply(self, f, data, axis=0, keep_internal=False): @@ -1257,12 +1260,12 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True): if level is not None: if not isinstance(group_axis, MultiIndex): - if isinstance(level, basestring): + if isinstance(level, compat.string_types): if obj.index.name != level: raise ValueError('level name %s is not the name of the index' % level) elif level > 0: raise ValueError('level > 0 only valid with MultiIndex') - + level = None key = group_axis @@ -1305,7 +1308,7 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True): groupings = [] exclusions = [] - for i, (gpr, level) in enumerate(izip(keys, levels)): + for i, (gpr, level) in enumerate(zip(keys, levels)): name = None try: obj._data.items.get_loc(gpr) @@ -1334,7 +1337,7 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True): def _is_label_like(val): - return isinstance(val, basestring) or np.isscalar(val) + return isinstance(val, compat.string_types) or np.isscalar(val) def _convert_grouper(axis, grouper): @@ -1406,7 +1409,7 @@ def aggregate(self, func_or_funcs, *args, **kwargs): ------- Series or DataFrame """ - if isinstance(func_or_funcs, basestring): + if isinstance(func_or_funcs, compat.string_types): return getattr(self, func_or_funcs)(*args, **kwargs) if hasattr(func_or_funcs, '__iter__'): @@ -1434,23 +1437,23 @@ def aggregate(self, func_or_funcs, *args, **kwargs): def _aggregate_multiple_funcs(self, arg): if isinstance(arg, dict): - columns = arg.keys() - arg = arg.items() + columns = list(arg.keys()) + arg = list(arg.items()) elif any(isinstance(x, (tuple, list)) for x in arg): arg = [(x, x) if not isinstance(x, (tuple, list)) else x for x in arg] # indicated column order - columns = list(zip(*arg))[0] + columns = lzip(*arg)[0] else: # list of functions / function names columns = [] for f in arg: - if isinstance(f, basestring): + if isinstance(f, compat.string_types): columns.append(f) else: columns.append(f.__name__) - arg = zip(columns, arg) + arg = lzip(columns, arg) results = {} @@ -1534,7 +1537,7 @@ def transform(self, func, *args, **kwargs): result = result.values dtype = result.dtype - if isinstance(func, basestring): + if isinstance(func, compat.string_types): wrapper = lambda x: getattr(x, func)(*args, **kwargs) else: wrapper = lambda x: func(x, *args, **kwargs) @@ -1576,7 +1579,7 @@ def filter(self, func, dropna=True, *args, **kwargs): ------- filtered : Series """ - if isinstance(func, basestring): + if isinstance(func, compat.string_types): wrapper = lambda x: getattr(x, func)(*args, **kwargs) else: wrapper = lambda x: func(x, *args, **kwargs) @@ -1690,7 +1693,7 @@ def _obj_with_exclusions(self): @Appender(_agg_doc) def aggregate(self, arg, *args, **kwargs): - if isinstance(arg, basestring): + if isinstance(arg, compat.string_types): return getattr(self, arg)(*args, **kwargs) result = OrderedDict() @@ -1702,7 +1705,7 @@ def aggregate(self, arg, *args, **kwargs): if any(isinstance(x, (list, tuple, dict)) for x in arg.values()): new_arg = OrderedDict() - for k, v in arg.iteritems(): + for k, v in compat.iteritems(arg): if not isinstance(v, (tuple, list, dict)): new_arg[k] = [v] else: @@ -1715,19 +1718,19 @@ def aggregate(self, arg, *args, **kwargs): if isinstance(subset, DataFrame): raise NotImplementedError - for fname, agg_how in arg.iteritems(): + for fname, agg_how in compat.iteritems(arg): colg = SeriesGroupBy(subset, selection=self._selection, grouper=self.grouper) result[fname] = colg.aggregate(agg_how) keys.append(fname) else: - for col, agg_how in arg.iteritems(): + for col, agg_how in compat.iteritems(arg): colg = SeriesGroupBy(obj[col], selection=col, grouper=self.grouper) result[col] = colg.aggregate(agg_how) keys.append(col) - if isinstance(result.values()[0], DataFrame): + if isinstance(list(result.values())[0], DataFrame): from pandas.tools.merge import concat result = concat([result[k] for k in keys], keys=keys, axis=1) else: @@ -1905,7 +1908,7 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False): if not all_indexed_same: return self._concat_objects(keys, values, not_indexed_same=not_indexed_same) - + try: if self.axis == 0: @@ -1998,13 +2001,13 @@ def transform(self, func, *args, **kwargs): return concatenated def _define_paths(self, func, *args, **kwargs): - if isinstance(func, basestring): + if isinstance(func, compat.string_types): fast_path = lambda group: getattr(group, func)(*args, **kwargs) slow_path = lambda group: group.apply(lambda x: getattr(x, func)(*args, **kwargs), axis=self.axis) else: fast_path = lambda group: func(group, *args, **kwargs) slow_path = lambda group: group.apply(lambda x: func(x, *args, **kwargs), axis=self.axis) - return fast_path, slow_path + return fast_path, slow_path def _choose_path(self, fast_path, slow_path, group): path = slow_path @@ -2249,7 +2252,7 @@ def aggregate(self, arg, *args, **kwargs): ------- aggregated : Panel """ - if isinstance(arg, basestring): + if isinstance(arg, compat.string_types): return getattr(self, arg)(*args, **kwargs) return self._aggregate_generic(arg, *args, **kwargs) @@ -2332,7 +2335,7 @@ def __iter__(self): starts, ends = lib.generate_slices(self.slabels, self.ngroups) - for i, (start, end) in enumerate(izip(starts, ends)): + for i, (start, end) in enumerate(zip(starts, ends)): # Since I'm now compressing the group ids, it's now not "possible" # to produce empty slices because such groups would not be observed # in the data @@ -2436,7 +2439,7 @@ def get_group_index(label_list, shape): n = len(label_list[0]) group_index = np.zeros(n, dtype=np.int64) mask = np.zeros(n, dtype=bool) - for i in xrange(len(shape)): + for i in range(len(shape)): stride = np.prod([x for x in shape[i + 1:]], dtype=np.int64) group_index += com._ensure_int64(label_list[i]) * stride mask |= label_list[i] < 0 @@ -2448,7 +2451,7 @@ def get_group_index(label_list, shape): def _int64_overflow_possible(shape): - the_prod = 1L + the_prod = long(1) for x in shape: the_prod *= long(x) @@ -2461,7 +2464,7 @@ def decons_group_index(comp_labels, shape): factor = 1 y = 0 x = comp_labels - for i in reversed(xrange(len(shape))): + for i in reversed(range(len(shape))): labels = (x - y) % (factor * shape[i]) // factor np.putmask(labels, comp_labels < 0, -1) label_list.append(labels) @@ -2503,7 +2506,7 @@ def _lexsort_indexer(keys, orders=None): elif orders is None: orders = [True] * len(keys) - for key, order in izip(keys, orders): + for key, order in zip(keys, orders): rizer = _hash.Factorizer(len(key)) if not key.dtype == np.object_: @@ -2537,12 +2540,12 @@ def __init__(self, comp_ids, ngroups, labels, levels): self._populate_tables() def _populate_tables(self): - for labs, table in izip(self.labels, self.tables): + for labs, table in zip(self.labels, self.tables): table.map(self.comp_ids, labs.astype(np.int64)) def get_key(self, comp_id): return tuple(level[table.get_item(comp_id)] - for table, level in izip(self.tables, self.levels)) + for table, level in zip(self.tables, self.levels)) def _get_indices_dict(label_list, keys): @@ -2603,14 +2606,14 @@ def _reorder_by_uniques(uniques, labels): return uniques, labels -import __builtin__ _func_table = { - __builtin__.sum: np.sum + builtins.sum: np.sum } + _cython_table = { - __builtin__.sum: 'sum', + builtins.sum: 'sum', np.sum: 'sum', np.mean: 'mean', np.prod: 'prod', @@ -2652,7 +2655,7 @@ def numpy_groupby(data, labels, axis=0): # Helper functions -from pandas.util import py3compat +from pandas import compat import sys @@ -2664,7 +2667,7 @@ def install_ipython_completers(): # pragma: no cover @complete_object.when_type(DataFrameGroupBy) def complete_dataframe(obj, prev_completions): return prev_completions + [c for c in obj.obj.columns - if isinstance(c, basestring) and py3compat.isidentifier(c)] + if isinstance(c, compat.string_types) and compat.isidentifier(c)] # Importing IPython brings in about 200 modules, so we want to avoid it unless diff --git a/pandas/core/index.py b/pandas/core/index.py index 3eb804d3a70e6..5175e01d116c0 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -1,7 +1,7 @@ # pylint: disable=E1101,E1103,W0232 -from itertools import izip - +from pandas.compat import range, zip, lrange, lzip +from pandas import compat import numpy as np import pandas.tslib as tslib @@ -259,7 +259,7 @@ def get_duplicates(self): counter = defaultdict(lambda: 0) for k in self.values: counter[k] += 1 - return sorted(k for k, v in counter.iteritems() if v > 1) + return sorted(k for k, v in compat.iteritems(counter) if v > 1) _get_duplicates = get_duplicates @@ -722,7 +722,7 @@ def get_value(self, series, key): """ try: return self._engine.get_value(series, key) - except KeyError, e1: + except KeyError as e1: if len(self) > 0 and self.inferred_type == 'integer': raise @@ -1349,7 +1349,7 @@ def __new__(cls, data, dtype=None, copy=False, name=None): data = list(data) data = np.asarray(data) - if issubclass(data.dtype.type, basestring): + if issubclass(data.dtype.type, compat.string_types): raise TypeError('String dtype not supported, you may need ' 'to explicitly cast to int') elif issubclass(data.dtype.type, np.integer): @@ -1593,7 +1593,7 @@ def has_duplicates(self): # has duplicates shape = [len(lev) for lev in self.levels] group_index = np.zeros(len(self), dtype='i8') - for i in xrange(len(shape)): + for i in range(len(shape)): stride = np.prod([x for x in shape[i + 1:]], dtype='i8') group_index += self.labels[i] * stride @@ -1610,7 +1610,7 @@ def get_value(self, series, key): # Label-based try: return self._engine.get_value(series, key) - except KeyError, e1: + except KeyError as e1: try: # TODO: what if a level contains tuples?? loc = self.get_loc(key) @@ -1800,7 +1800,7 @@ def from_tuples(cls, tuples, sortorder=None, names=None): elif isinstance(tuples, list): arrays = list(lib.to_object_array_tuples(tuples).T) else: - arrays = zip(*tuples) + arrays = lzip(*tuples) return MultiIndex.from_arrays(arrays, sortorder=sortorder, names=names) @@ -1940,7 +1940,7 @@ def drop(self, labels, level=None): if isinstance(loc, int): inds.append(loc) else: - inds.extend(range(loc.start, loc.stop)) + inds.extend(lrange(loc.start, loc.stop)) return self.delete(inds) @@ -2236,7 +2236,7 @@ def _partial_tup_index(self, tup, side='left'): n = len(tup) start, end = 0, len(self) - zipped = izip(tup, self.levels, self.labels) + zipped = zip(tup, self.levels, self.labels) for k, (lab, lev, labs) in enumerate(zipped): section = labs[start:end] @@ -2445,7 +2445,7 @@ def equals(self, other): if len(self) != len(other): return False - for i in xrange(self.nlevels): + for i in range(self.nlevels): svalues = com.take_nd(self.levels[i].values, self.labels[i], allow_fill=False) ovalues = com.take_nd(other.levels[i].values, other.labels[i], @@ -2463,7 +2463,7 @@ def equal_levels(self, other): if self.nlevels != other.nlevels: return False - for i in xrange(self.nlevels): + for i in range(self.nlevels): if not self.levels[i].equals(other.levels[i]): return False return True @@ -2488,7 +2488,7 @@ def union(self, other): result_names = self.names if self.names == other.names else None uniq_tuples = lib.fast_unique_multiple([self.values, other.values]) - return MultiIndex.from_arrays(zip(*uniq_tuples), sortorder=0, + return MultiIndex.from_arrays(lzip(*uniq_tuples), sortorder=0, names=result_names) def intersection(self, other): @@ -2518,7 +2518,7 @@ def intersection(self, other): labels=[[]] * self.nlevels, names=result_names) else: - return MultiIndex.from_arrays(zip(*uniq_tuples), sortorder=0, + return MultiIndex.from_arrays(lzip(*uniq_tuples), sortorder=0, names=result_names) def diff(self, other): @@ -2635,7 +2635,7 @@ def _wrap_joined_index(self, joined, other): # For utility purposes def _sparsify(label_list, start=0,sentinal=''): - pivoted = zip(*label_list) + pivoted = lzip(*label_list) k = len(label_list) result = pivoted[:start + 1] @@ -2659,7 +2659,7 @@ def _sparsify(label_list, start=0,sentinal=''): prev = cur - return zip(*result) + return lzip(*result) def _ensure_index(index_like): @@ -2702,7 +2702,7 @@ def _get_combined_index(indexes, intersect=False): def _get_distinct_indexes(indexes): - return dict((id(x), x) for x in indexes).values() + return list(dict((id(x), x) for x in indexes).values()) def _union_indexes(indexes): diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 0237cfde3b561..4d64b058a15d7 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -3,6 +3,8 @@ from datetime import datetime from pandas.core.common import _asarray_tuplesafe from pandas.core.index import Index, MultiIndex, _ensure_index +from pandas.compat import range, zip +import pandas.compat as compat import pandas.core.common as com import pandas.lib as lib @@ -340,7 +342,7 @@ def _getitem_lowerdim(self, tup): except TypeError: # slices are unhashable pass - except Exception, e1: + except Exception as e1: if isinstance(tup[0], (slice, Index)): raise IndexingError @@ -707,7 +709,7 @@ def _getbool_axis(self, key, axis=0): inds, = key.nonzero() try: return self.obj.take(inds, axis=axis, convert=False) - except (Exception), detail: + except (Exception) as detail: raise self._exception(detail) def _get_slice_axis(self, slice_obj, axis=0): """ this is pretty simple as we just have to deal with labels """ @@ -920,7 +922,7 @@ def _convert_to_index_sliceable(obj, key): indexer = obj.ix._convert_to_indexer(key, axis=0) return indexer - elif isinstance(key, basestring): + elif isinstance(key, compat.string_types): # we are an actual column if key in obj._data.items: @@ -1077,7 +1079,7 @@ def _is_label_like(key): def _is_list_like(obj): # Consider namedtuples to be not list like as they are useful as indices return (np.iterable(obj) - and not isinstance(obj, basestring) + and not isinstance(obj, compat.string_types) and not (isinstance(obj, tuple) and type(obj) is not tuple)) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index f23a89635aaf2..2d09bbec85ffa 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -17,7 +17,8 @@ import pandas.core.expressions as expressions from pandas.tslib import Timestamp -from pandas.util import py3compat +from pandas import compat +from pandas.compat import range, lrange, lmap, callable, map, zip class Block(PandasObject): @@ -471,7 +472,7 @@ def eval(self, func, other, raise_on_error = True, try_cast = False): args = [ values, other ] try: result = self._try_coerce_result(func(*args)) - except (Exception), detail: + except (Exception) as detail: if raise_on_error: raise TypeError('Could not operate [%s] with block values [%s]' % (repr(other),str(detail))) @@ -546,7 +547,7 @@ def func(c,v,o): v, o = self._try_coerce_args(v, o) try: return self._try_coerce_result(expressions.where(c, v, o, raise_on_error=True)) - except (Exception), detail: + except (Exception) as detail: if raise_on_error: raise TypeError('Could not operate [%s] with block values [%s]' % (repr(o),str(detail))) @@ -576,7 +577,7 @@ def func(c,v,o): # might need to separate out blocks axis = cond.ndim - 1 cond = cond.swapaxes(axis, 0) - mask = np.array([cond[i].all() for i in xrange(cond.shape[0])], + mask = np.array([cond[i].all() for i in range(cond.shape[0])], dtype=bool) result_blocks = [] @@ -686,7 +687,7 @@ class ObjectBlock(Block): _can_hold_na = True def __init__(self, values, items, ref_items, ndim=2, fastpath=False, placement=None): - if issubclass(values.dtype.type, basestring): + if issubclass(values.dtype.type, compat.string_types): values = np.array(values, dtype=object) super(ObjectBlock, self).__init__(values, items, ref_items, @@ -757,7 +758,7 @@ def replace(self, to_replace, value, inplace=False, filter=None, inplace=inplace, filter=filter, regex=regex) elif both_lists: - for to_rep, v in itertools.izip(to_replace, value): + for to_rep, v in zip(to_replace, value): blk[0], = blk[0]._replace_single(to_rep, v, inplace=inplace, filter=filter, regex=regex) elif to_rep_is_list and regex: @@ -812,7 +813,7 @@ def _replace_single(self, to_replace, value, inplace=False, filter=None, # deal with replacing values with objects (strings) that match but # whose replacement is not a string (numeric, nan, object) - if isnull(value) or not isinstance(value, basestring): + if isnull(value) or not isinstance(value, compat.string_types): def re_replacer(s): try: return value if rx.search(s) is not None else s @@ -830,7 +831,7 @@ def re_replacer(s): f = np.vectorize(re_replacer, otypes=[self.dtype]) try: - filt = map(self.items.get_loc, filter) + filt = lmap(self.items.get_loc, filter) except TypeError: filt = slice(None) @@ -1013,6 +1014,9 @@ def make_empty(self): def __nonzero__(self): return True + # Python3 compat + __bool__ = __nonzero__ + @property def ndim(self): return len(self.axes) @@ -1922,7 +1926,7 @@ def _add_new_block(self, item, value, loc=None): # need to shift elements to the right if self._ref_locs[loc] is not None: - for i in reversed(range(loc+1,len(self._ref_locs))): + for i in reversed(lrange(loc+1,len(self._ref_locs))): self._ref_locs[i] = self._ref_locs[i-1] self._ref_locs[loc] = (new_block, 0) @@ -2532,5 +2536,5 @@ def _possibly_convert_to_indexer(loc): if com._is_bool_indexer(loc): loc = [i for i, v in enumerate(loc) if v] elif isinstance(loc,slice): - loc = range(loc.start,loc.stop) + loc = lrange(loc.start,loc.stop) return loc diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index b2ff366daa826..23cc4fe31eba1 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1,3 +1,4 @@ +from pandas import compat import sys import itertools import functools @@ -11,6 +12,9 @@ import pandas.hashtable as _hash import pandas.tslib as tslib +from pandas.compat import builtins + + try: import bottleneck as bn _USE_BOTTLENECK = True @@ -30,7 +34,7 @@ def check(self, obj): def __call__(self, f): @functools.wraps(f) def _f(*args, **kwargs): - obj_iter = itertools.chain(args, kwargs.itervalues()) + obj_iter = itertools.chain(args, compat.itervalues(kwargs)) if any(self.check(obj) for obj in obj_iter): raise TypeError('reduction operation {0!r} not allowed for ' 'this dtype'.format(f.__name__.replace('nan', @@ -55,7 +59,7 @@ def __call__(self, alt): @functools.wraps(alt) def f(values, axis=None, skipna=True, **kwds): if len(self.kwargs) > 0: - for k, v in self.kwargs.iteritems(): + for k, v in compat.iteritems(self.kwargs): if k not in kwds: kwds[k] = v try: @@ -284,12 +288,11 @@ def nanmin(values, axis=None, skipna=True): # numpy 1.6.1 workaround in Python 3.x if (values.dtype == np.object_ and sys.version_info[0] >= 3): # pragma: no cover - import __builtin__ if values.ndim > 1: apply_ax = axis if axis is not None else 0 - result = np.apply_along_axis(__builtin__.min, apply_ax, values) + result = np.apply_along_axis(builtins.min, apply_ax, values) else: - result = __builtin__.min(values) + result = builtins.min(values) else: if ((axis is not None and values.shape[axis] == 0) or values.size == 0): @@ -309,13 +312,12 @@ def nanmax(values, axis=None, skipna=True): # numpy 1.6.1 workaround in Python 3.x if (values.dtype == np.object_ and sys.version_info[0] >= 3): # pragma: no cover - import __builtin__ if values.ndim > 1: apply_ax = axis if axis is not None else 0 - result = np.apply_along_axis(__builtin__.max, apply_ax, values) + result = np.apply_along_axis(builtins.max, apply_ax, values) else: - result = __builtin__.max(values) + result = builtins.max(values) else: if ((axis is not None and values.shape[axis] == 0) or values.size == 0): diff --git a/pandas/core/panel.py b/pandas/core/panel.py index d33f7144c27b0..9f7785ae27465 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -3,6 +3,8 @@ """ # pylint: disable=E1103,W0231,W0212,W0621 +from pandas.compat import map, zip, range, lrange, lmap, u, OrderedDict, OrderedDefaultdict +from pandas import compat import operator import sys import numpy as np @@ -20,7 +22,7 @@ from pandas.core.series import Series from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame -from pandas.util import py3compat +from pandas import compat from pandas.util.decorators import deprecate, Appender, Substitution import pandas.core.common as com import pandas.core.nanops as nanops @@ -223,7 +225,7 @@ def _construct_axes_dict_for_slice(self, axes=None, **kwargs): __rfloordiv__ = _arith_method(lambda x, y: y // x, '__rfloordiv__') __rpow__ = _arith_method(lambda x, y: y ** x, '__rpow__') - if not py3compat.PY3: + if not compat.PY3: __div__ = _arith_method(operator.div, '__div__') __rdiv__ = _arith_method(lambda x, y: y / x, '__rdiv__') @@ -271,21 +273,20 @@ def _from_axes(cls, data, axes): return cls(data, **d) def _init_dict(self, data, axes, dtype=None): - from pandas.util.compat import OrderedDict haxis = axes.pop(self._het_axis) # prefilter if haxis passed if haxis is not None: haxis = _ensure_index(haxis) data = OrderedDict((k, v) for k, v - in data.iteritems() if k in haxis) + in compat.iteritems(data) if k in haxis) else: - ks = data.keys() + ks = list(data.keys()) if not isinstance(data,OrderedDict): ks = _try_sort(ks) haxis = Index(ks) - for k, v in data.iteritems(): + for k, v in compat.iteritems(data): if isinstance(v, dict): data[k] = self._constructor_sliced(v) @@ -343,20 +344,19 @@ def from_dict(cls, data, intersect=False, orient='items', dtype=None): ------- Panel """ - from pandas.util.compat import OrderedDict,OrderedDefaultdict orient = orient.lower() if orient == 'minor': new_data = OrderedDefaultdict(dict) - for col, df in data.iteritems(): - for item, s in df.iteritems(): + for col, df in compat.iteritems(data): + for item, s in compat.iteritems(df): new_data[item][col] = s data = new_data elif orient != 'items': # pragma: no cover raise ValueError('Orientation must be one of {items, minor}.') d = cls._homogenize_dict(cls, data, intersect=intersect, dtype=dtype) - ks = d['data'].keys() + ks = list(d['data'].keys()) if not isinstance(d['data'],OrderedDict): ks = list(sorted(ks)) d[cls._info_axis] = Index(ks) @@ -473,17 +473,17 @@ def __unicode__(self): class_name = str(self.__class__) shape = self.shape - dims = u'Dimensions: %s' % ' x '.join( + dims = u('Dimensions: %s') % ' x '.join( ["%d (%s)" % (s, a) for a, s in zip(self._AXIS_ORDERS, shape)]) def axis_pretty(a): v = getattr(self, a) if len(v) > 0: - return u'%s axis: %s to %s' % (a.capitalize(), + return u('%s axis: %s to %s') % (a.capitalize(), com.pprint_thing(v[0]), com.pprint_thing(v[-1])) else: - return u'%s axis: None' % a.capitalize() + return u('%s axis: None') % a.capitalize() output = '\n'.join( [class_name, dims] + [axis_pretty(a) for a in self._AXIS_ORDERS]) @@ -496,10 +496,6 @@ def iteritems(self): for h in getattr(self, self._info_axis): yield h, self[h] - # Name that won't get automatically converted to items by 2to3. items is - # already in use for the first axis. - iterkv = iteritems - def _get_plane_axes(self, axis): """ Get my plane axes: these are already @@ -540,7 +536,7 @@ def to_sparse(self, fill_value=None, kind='block'): y : SparseDataFrame """ from pandas.core.sparse import SparsePanel - frames = dict(self.iterkv()) + frames = dict(compat.iteritems(self)) return SparsePanel(frames, items=self.items, major_axis=self.major_axis, minor_axis=self.minor_axis, @@ -560,7 +556,7 @@ def to_excel(self, path, na_rep=''): """ from pandas.io.excel import ExcelWriter writer = ExcelWriter(path) - for item, df in self.iteritems(): + for item, df in compat.iteritems(self): name = str(item) df.to_excel(writer, name, na_rep=na_rep) writer.save() @@ -804,13 +800,13 @@ def _reindex_multi(self, items, major, minor): new_minor, indexer2 = self.minor_axis.reindex(minor) if indexer0 is None: - indexer0 = range(len(new_items)) + indexer0 = lrange(len(new_items)) if indexer1 is None: - indexer1 = range(len(new_major)) + indexer1 = lrange(len(new_major)) if indexer2 is None: - indexer2 = range(len(new_minor)) + indexer2 = lrange(len(new_minor)) for i, ind in enumerate(indexer0): com.take_2d_multi(values[ind], (indexer1, indexer2), @@ -976,7 +972,7 @@ def fillna(self, value=None, method=None): if method is None: raise ValueError('must specify a fill method or value') result = {} - for col, s in self.iterkv(): + for col, s in compat.iteritems(self): result[col] = s.fillna(method=method, value=value) return self._constructor.from_dict(result) @@ -1133,11 +1129,11 @@ def transpose(self, *args, **kwargs): """ # construct the args args = list(args) - aliases = tuple(kwargs.iterkeys()) + aliases = tuple(compat.iterkeys(kwargs)) for a in self._AXIS_ORDERS: if not a in kwargs: - where = map(a.startswith, aliases) + where = lmap(a.startswith, aliases) if any(where): if sum(where) != 1: @@ -1483,7 +1479,7 @@ def _prep_ndarray(self, values, copy=True): if not isinstance(values, np.ndarray): values = np.asarray(values) # NumPy strings are a pain, convert to object - if issubclass(values.dtype.type, basestring): + if issubclass(values.dtype.type, compat.string_types): values = np.array(values, dtype=object, copy=True) else: if copy: @@ -1507,14 +1503,13 @@ def _homogenize_dict(self, frames, intersect=True, dtype=None): ------- dict of aligned results & indicies """ - from pandas.util.compat import OrderedDict result = dict() if isinstance(frames,OrderedDict): # caller differs dict/ODict, presered type result = OrderedDict() adj_frames = OrderedDict() - for k, v in frames.iteritems(): + for k, v in compat.iteritems(frames): if isinstance(v, dict): adj_frames[k] = self._constructor_sliced(v) else: @@ -1527,7 +1522,7 @@ def _homogenize_dict(self, frames, intersect=True, dtype=None): reindex_dict = dict( [(self._AXIS_SLICEMAP[a], axes_dict[a]) for a in axes]) reindex_dict['copy'] = False - for key, frame in adj_frames.iteritems(): + for key, frame in compat.iteritems(adj_frames): if frame is not None: result[key] = frame.reindex(**reindex_dict) else: @@ -1711,8 +1706,8 @@ def install_ipython_completers(): # pragma: no cover @complete_object.when_type(Panel) def complete_dataframe(obj, prev_completions): return prev_completions + [c for c in obj.keys() - if isinstance(c, basestring) - and py3compat.isidentifier(c)] + if isinstance(c, compat.string_types) + and compat.isidentifier(c)] # Importing IPython brings in about 200 modules, so we want to avoid it unless # we're in IPython (when those modules are loaded anyway). diff --git a/pandas/core/panelnd.py b/pandas/core/panelnd.py index 08ff3b70dcb13..f43ec2c31ba96 100644 --- a/pandas/core/panelnd.py +++ b/pandas/core/panelnd.py @@ -1,6 +1,8 @@ """ Factory methods to create N-D panels """ import pandas.lib as lib +from pandas.compat import zip +import pandas.compat as compat def create_nd_panel_factory(klass_name, axis_orders, axis_slices, slicer, axis_aliases=None, stat_axis=2,ns=None): @@ -27,7 +29,7 @@ def create_nd_panel_factory(klass_name, axis_orders, axis_slices, slicer, axis_a """ # if slicer is a name, get the object - if isinstance(slicer, basestring): + if isinstance(slicer, compat.string_types): import pandas try: slicer = getattr(pandas, slicer) diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index cb34d0bad5df7..b69e4a6a96acc 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -1,6 +1,8 @@ # pylint: disable=E1101,E1103 # pylint: disable=W0703,W0622,W0613,W0201 +from pandas.compat import range, zip +from pandas import compat import itertools import numpy as np @@ -187,7 +189,7 @@ def get_new_values(self): new_mask = np.zeros(result_shape, dtype=bool) # is there a simpler / faster way of doing this? - for i in xrange(values.shape[1]): + for i in range(values.shape[1]): chunk = new_values[:, i * width: (i + 1) * width] mask_chunk = new_mask[:, i * width: (i + 1) * width] @@ -397,7 +399,7 @@ def _slow_pivot(index, columns, values): Could benefit from some Cython here. """ tree = {} - for i, (idx, col) in enumerate(itertools.izip(index, columns)): + for i, (idx, col) in enumerate(zip(index, columns)): if col not in tree: tree[col] = {} branch = tree[col] @@ -539,9 +541,9 @@ def _stack_multi_columns(frame, level=-1, dropna=True): # tuple list excluding level for grouping columns if len(frame.columns.levels) > 2: - tuples = zip(*[lev.values.take(lab) + tuples = list(zip(*[lev.values.take(lab) for lev, lab in zip(this.columns.levels[:-1], - this.columns.labels[:-1])]) + this.columns.labels[:-1])])) unique_groups = [key for key, _ in itertools.groupby(tuples)] new_names = this.columns.names[:-1] new_columns = MultiIndex.from_tuples(unique_groups, names=new_names) @@ -685,11 +687,11 @@ def melt(frame, id_vars=None, value_vars=None, var_name = frame.columns.names else: var_name = ['variable_%s' % i for i in - xrange(len(frame.columns.names))] + range(len(frame.columns.names))] else: var_name = [frame.columns.name if frame.columns.name is not None else 'variable'] - if isinstance(var_name, basestring): + if isinstance(var_name, compat.string_types): var_name = [var_name] N, K = frame.shape @@ -743,8 +745,8 @@ def lreshape(data, groups, dropna=True, label=None): reshaped : DataFrame """ if isinstance(groups, dict): - keys = groups.keys() - values = groups.values() + keys = list(groups.keys()) + values = list(groups.values()) else: keys, values = zip(*groups) @@ -772,7 +774,7 @@ def lreshape(data, groups, dropna=True, label=None): for c in pivot_cols: mask &= notnull(mdata[c]) if not mask.all(): - mdata = dict((k, v[mask]) for k, v in mdata.iteritems()) + mdata = dict((k, v[mask]) for k, v in compat.iteritems(mdata)) return DataFrame(mdata, columns=id_cols + pivot_cols) @@ -898,7 +900,7 @@ def block2d_to_blocknd(values, items, shape, labels, ref_items=None): pvalues.fill(fill_value) values = values - for i in xrange(len(items)): + for i in range(len(items)): pvalues[i].flat[mask] = values[:, i] if ref_items is None: diff --git a/pandas/core/series.py b/pandas/core/series.py index b77dfbfd9618c..0e995f47935a0 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -5,10 +5,11 @@ # pylint: disable=E1101,E1103 # pylint: disable=W0703,W0622,W0613,W0201 -from itertools import izip +from pandas import compat import operator from distutils.version import LooseVersion import types +import warnings from numpy import nan, ndarray import numpy as np @@ -25,8 +26,9 @@ _check_slice_bounds, _maybe_convert_indices) from pandas.tseries.index import DatetimeIndex from pandas.tseries.period import PeriodIndex, Period -from pandas.util import py3compat +from pandas import compat from pandas.util.terminal import get_terminal_size +from pandas.compat import zip, lzip, u, OrderedDict import pandas.core.array as pa @@ -425,7 +427,7 @@ class Series(generic.PandasContainer, pa.Array): 'index': 0 } - _AXIS_NAMES = dict((v, k) for k, v in _AXIS_NUMBERS.iteritems()) + _AXIS_NAMES = dict((v, k) for k, v in compat.iteritems(_AXIS_NUMBERS)) def __new__(cls, data=None, index=None, dtype=None, name=None, copy=False): @@ -448,7 +450,6 @@ def __new__(cls, data=None, index=None, dtype=None, name=None, data = data.reindex(index).values elif isinstance(data, dict): if index is None: - from pandas.util.compat import OrderedDict if isinstance(data, OrderedDict): index = Index(data) else: @@ -829,7 +830,7 @@ def __setitem__(self, key, value): return raise KeyError('%s not in this series!' % str(key)) - except TypeError, e: + except TypeError as e: # python 3 type errors should be raised if 'unorderable' in str(e): # pragma: no cover raise IndexError(key) @@ -1116,9 +1117,9 @@ def __unicode__(self): name=True, dtype=True) else: - result = u'Series([], dtype: %s)' % self.dtype + result = u('Series([], dtype: %s)') % self.dtype - if not ( type(result) == unicode): + if not (isinstance(result, compat.text_type)): raise AssertionError() return result @@ -1137,12 +1138,12 @@ def _tidy_repr(self, max_vals=20): result = head + '\n...\n' + tail result = '%s\n%s' % (result, self._repr_footer()) - return unicode(result) + return compat.text_type(result) def _repr_footer(self): - namestr = u"Name: %s, " % com.pprint_thing( + namestr = u("Name: %s, ") % com.pprint_thing( self.name) if self.name is not None else "" - return u'%sLength: %d, dtype: %s' % (namestr, len(self), + return u('%sLength: %d, dtype: %s') % (namestr, len(self), str(self.dtype.name)) def to_string(self, buf=None, na_rep='NaN', float_format=None, @@ -1180,7 +1181,7 @@ def to_string(self, buf=None, na_rep='NaN', float_format=None, length=length, dtype=dtype, name=name) # catch contract violations - if not type(the_repr) == unicode: + if not isinstance(the_repr, compat.text_type): raise AssertionError("expected unicode string") if buf is None: @@ -1203,7 +1204,7 @@ def _get_repr(self, name=False, print_header=False, length=True, dtype=True, length=length, dtype=dtype, na_rep=na_rep, float_format=float_format) result = formatter.to_string() - if not ( type(result) == unicode): + if not (isinstance(result, compat.text_type)): raise AssertionError() return result @@ -1217,10 +1218,14 @@ def iteritems(self): """ Lazily iterate over (index, value) tuples """ - return izip(iter(self.index), iter(self)) + return lzip(iter(self.index), iter(self)) - iterkv = iteritems - if py3compat.PY3: # pragma: no cover + def iterkv(self): + warnings.warn("iterkv is deprecated and will be removed in a future " + "release. Use ``iteritems`` instead", DeprecationWarning) + return self.iteritems() + + if compat.PY3: # pragma: no cover items = iteritems #---------------------------------------------------------------------- @@ -1273,7 +1278,7 @@ def __invert__(self): __ipow__ = __pow__ # Python 2 division operators - if not py3compat.PY3: + if not compat.PY3: __div__ = _arith_method(operator.div, '__div__', fill_zeros=np.inf) __rdiv__ = _arith_method(lambda x, y: y / x, '__div__', fill_zeros=np.inf) __idiv__ = __div__ @@ -1333,7 +1338,7 @@ def to_dict(self): ------- value_dict : dict """ - return dict(self.iteritems()) + return dict(compat.iteritems(self)) def to_sparse(self, kind='block', fill_value=None): """ @@ -1384,7 +1389,7 @@ def count(self, level=None): if level is not None: mask = notnull(self.values) - if isinstance(level, basestring): + if isinstance(level, compat.string_types): level = self.index._get_level_number(level) level_index = self.index.levels[level] @@ -2817,20 +2822,20 @@ def _rep_dict(rs, to_rep): # replace {[src] -> dest} all_src = set() dd = {} # group by unique destination value - for s, d in to_rep.iteritems(): + for s, d in compat.iteritems(to_rep): dd.setdefault(d, []).append(s) all_src.add(s) if any(d in all_src for d in dd.keys()): # don't clobber each other at the cost of temporaries masks = {} - for d, sset in dd.iteritems(): # now replace by each dest + for d, sset in compat.iteritems(dd): # now replace by each dest masks[d] = com.mask_missing(rs.values, sset) - for d, m in masks.iteritems(): + for d, m in compat.iteritems(masks): com._maybe_upcast_putmask(rs.values,m,d,change=change) else: # if no risk of clobbering then simple - for d, sset in dd.iteritems(): + for d, sset in compat.iteritems(dd): _rep_one(rs, sset, d) if np.isscalar(to_replace): @@ -3046,7 +3051,7 @@ def shift(self, periods=1, freq=None, copy=True, **kwds): offset = _resolve_offset(freq, kwds) - if isinstance(offset, basestring): + if isinstance(offset, compat.string_types): offset = datetools.to_offset(offset) def _get_values(): @@ -3099,7 +3104,7 @@ def asof(self, where): ------- value or NaN """ - if isinstance(where, basestring): + if isinstance(where, compat.string_types): where = datetools.to_datetime(where) values = self.values @@ -3407,7 +3412,7 @@ def _try_cast(arr, take_fast_path): # This is to prevent mixed-type Series getting all casted to # NumPy string type, e.g. NaN --> '-1#IND'. - if issubclass(subarr.dtype.type, basestring): + if issubclass(subarr.dtype.type, compat.string_types): subarr = pa.array(data, dtype=object, copy=copy) return subarr @@ -3430,7 +3435,7 @@ def _resolve_offset(freq, kwds): if 'timeRule' in kwds or 'offset' in kwds: offset = kwds.get('offset', None) offset = kwds.get('timeRule', offset) - if isinstance(offset, basestring): + if isinstance(offset, compat.string_types): offset = datetools.getOffset(offset) warn = True else: diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 1aa7fe87903d7..462ed81aaf875 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -1,8 +1,9 @@ import numpy as np -from itertools import izip +from pandas.compat import zip from pandas.core.common import isnull from pandas.core.series import Series +import pandas.compat as compat import re import pandas.lib as lib @@ -50,7 +51,7 @@ def str_cat(arr, others=None, sep=None, na_rep=None): notmask = -na_mask - tuples = izip(*[x[notmask] for x in arrays]) + tuples = zip(*[x[notmask] for x in arrays]) cats = [sep.join(tup) for tup in tuples] result[notmask] = cats @@ -282,16 +283,18 @@ def str_repeat(arr, repeats): if np.isscalar(repeats): def rep(x): try: - return str.__mul__(x, repeats) + return compat.binary_type.__mul__(x, repeats) except TypeError: - return unicode.__mul__(x, repeats) + return compat.text_type.__mul__(x, repeats) + return _na_map(rep, arr) else: def rep(x, r): try: - return str.__mul__(x, r) + return compat.binary_type.__mul__(x, r) except TypeError: - return unicode.__mul__(x, r) + return compat.text_type.__mul__(x, r) + repeats = np.asarray(repeats, dtype=object) result = lib.vec_binop(arr, repeats, rep) return result diff --git a/pandas/io/__init__.py b/pandas/io/__init__.py index a984c40cdc098..e69de29bb2d1d 100644 --- a/pandas/io/__init__.py +++ b/pandas/io/__init__.py @@ -1,2 +0,0 @@ -import sql -import stata diff --git a/pandas/io/auth.py b/pandas/io/auth.py index 6da497687cf25..15e3eb70d91b2 100644 --- a/pandas/io/auth.py +++ b/pandas/io/auth.py @@ -1,3 +1,4 @@ +from __future__ import print_function # see LICENSES directory for copyright and license import os import sys @@ -54,8 +55,8 @@ def process_flags(flags=[]): # Let the gflags module process the command-line arguments. try: FLAGS(flags) - except gflags.FlagsError, e: - print ('%s\nUsage: %s ARGS\n%s' % (e, str(flags), FLAGS)) + except gflags.FlagsError as e: + print('%s\nUsage: %s ARGS\n%s' % (e, str(flags), FLAGS)) sys.exit(1) # Set the logging according to the command-line flag. diff --git a/pandas/io/clipboard.py b/pandas/io/clipboard.py index 08837474c11b4..798f30e85544f 100644 --- a/pandas/io/clipboard.py +++ b/pandas/io/clipboard.py @@ -1,5 +1,5 @@ """ io on the clipboard """ -from StringIO import StringIO +from pandas.compat import StringIO def read_clipboard(**kwargs): # pragma: no cover """ diff --git a/pandas/io/common.py b/pandas/io/common.py index 33958ade2bcd6..a2cf057c8f531 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -1,18 +1,40 @@ """Common IO api utilities""" import sys -import urlparse -import urllib2 import zipfile from contextlib import contextmanager, closing -from StringIO import StringIO -from pandas.util import py3compat +from pandas.compat import StringIO +from pandas import compat + + +if compat.PY3: + from urllib.request import urlopen + _urlopen = urlopen + from urllib.parse import urlparse as parse_url + import urllib.parse as compat_parse + from urllib.parse import uses_relative, uses_netloc, uses_params, urlencode + from urllib.error import URLError + from http.client import HTTPException +else: + from urllib2 import urlopen as _urlopen + from urllib import urlencode + from urlparse import urlparse as parse_url + from urlparse import uses_relative, uses_netloc, uses_params + from urllib2 import URLError + from httplib import HTTPException + from contextlib import contextmanager, closing + from functools import wraps + + # @wraps(_urlopen) + @contextmanager + def urlopen(*args, **kwargs): + with closing(_urlopen(*args, **kwargs)) as f: + yield f -_VALID_URLS = set(urlparse.uses_relative + urlparse.uses_netloc + - urlparse.uses_params) -_VALID_URLS.discard('') +_VALID_URLS = set(uses_relative + uses_netloc + uses_params) +_VALID_URLS.discard('') class PerformanceWarning(Warning): pass @@ -31,7 +53,7 @@ def _is_url(url): If `url` has a valid protocol return True otherwise False. """ try: - return urlparse.urlparse(url).scheme in _VALID_URLS + return parse_url(url).scheme in _VALID_URLS except: return False @@ -60,18 +82,18 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None): """ if _is_url(filepath_or_buffer): - from urllib2 import urlopen - filepath_or_buffer = urlopen(filepath_or_buffer) - if py3compat.PY3: # pragma: no cover + req = _urlopen(str(filepath_or_buffer)) + if compat.PY3: # pragma: no cover if encoding: errors = 'strict' else: errors = 'replace' encoding = 'utf-8' - bytes = filepath_or_buffer.read().decode(encoding, errors) - filepath_or_buffer = StringIO(bytes) - return filepath_or_buffer, encoding - return filepath_or_buffer, None + out = StringIO(req.read().decode(encoding, errors)) + else: + encoding = None + out = req + return out, encoding if _is_s3_url(filepath_or_buffer): try: @@ -80,7 +102,7 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None): raise ImportError("boto is required to handle s3 files") # Assuming AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY # are environment variables - parsed_url = urlparse.urlparse(filepath_or_buffer) + parsed_url = parse_url(filepath_or_buffer) conn = boto.connect_s3() b = conn.get_bucket(parsed_url.netloc) k = boto.s3.key.Key(b) @@ -91,16 +113,6 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None): return filepath_or_buffer, None -# ---------------------- -# Prevent double closing -if py3compat.PY3: - urlopen = urllib2.urlopen -else: - @contextmanager - def urlopen(*args, **kwargs): - with closing(urllib2.urlopen(*args, **kwargs)) as f: - yield f - # ZipFile is not a context manager for <= 2.6 # must be tuple index here since 2.6 doesn't use namedtuple for version_info if sys.version_info[1] <= 6: diff --git a/pandas/io/data.py b/pandas/io/data.py index 1b51ae5ec8a02..e6d19aee4a9d6 100644 --- a/pandas/io/data.py +++ b/pandas/io/data.py @@ -5,20 +5,21 @@ """ import warnings import tempfile -import itertools import datetime as dt -import urllib import time from collections import defaultdict import numpy as np -from pandas.util.py3compat import StringIO, bytes_to_str +from pandas.compat import( + StringIO, bytes_to_str, range, lrange, lmap, zip +) +import pandas.compat as compat from pandas import Panel, DataFrame, Series, read_csv, concat from pandas.core.common import PandasError from pandas.io.parsers import TextParser -from pandas.io.common import urlopen, ZipFile +from pandas.io.common import urlopen, ZipFile, urlencode from pandas.util.testing import _network_error_classes @@ -95,26 +96,27 @@ def _in_chunks(seq, size): """ Return sequence in 'chunks' of size defined by size """ - return (seq[pos:pos + size] for pos in xrange(0, len(seq), size)) + return (seq[pos:pos + size] for pos in range(0, len(seq), size)) _yahoo_codes = {'symbol': 's', 'last': 'l1', 'change_pct': 'p2', 'PE': 'r', 'time': 't1', 'short_ratio': 's7'} + def get_quote_yahoo(symbols): """ Get current yahoo quote Returns a DataFrame """ - if isinstance(symbols, basestring): + if isinstance(symbols, compat.string_types): sym_list = symbols else: sym_list = '+'.join(symbols) # for codes see: http://www.gummy-stuff.org/Yahoo-data.htm - request = ''.join(_yahoo_codes.itervalues()) # code request string - header = _yahoo_codes.keys() + request = ''.join(compat.itervalues(_yahoo_codes)) # code request string + header = list(_yahoo_codes.keys()) data = defaultdict(list) @@ -147,7 +149,7 @@ def get_quote_google(symbols): def _retry_read_url(url, retry_count, pause, name): - for _ in xrange(retry_count): + for _ in range(retry_count): time.sleep(pause) # kludge to close the socket ASAP @@ -201,11 +203,10 @@ def _get_hist_google(sym, start, end, retry_count, pause): google_URL = 'http://www.google.com/finance/historical?' # www.google.com/finance/historical?q=GOOG&startdate=Jun+9%2C+2011&enddate=Jun+8%2C+2013&output=csv - url = google_URL + urllib.urlencode({"q": sym, - "startdate": start.strftime('%b %d, ' - '%Y'), - "enddate": end.strftime('%b %d, %Y'), - "output": "csv"}) + url = google_URL + urlencode({"q": sym, + "startdate": start.strftime('%b %d, ' '%Y'), + "enddate": end.strftime('%b %d, %Y'), + "output": "csv"}) return _retry_read_url(url, retry_count, pause, 'Google') @@ -322,6 +323,7 @@ def _dl_mult_symbols(symbols, start, end, chunksize, retry_count, pause, _source_functions = {'google': _get_hist_google, 'yahoo': _get_hist_yahoo} + def _get_data_from(symbols, start, end, retry_count, pause, adjust_price, ret_index, chunksize, source, name): if name is not None: @@ -332,7 +334,7 @@ def _get_data_from(symbols, start, end, retry_count, pause, adjust_price, src_fn = _source_functions[source] # If a single symbol, (e.g., 'GOOG') - if isinstance(symbols, (basestring, int)): + if isinstance(symbols, (compat.string_types, int)): hist_data = src_fn(symbols, start, end, retry_count, pause) # Or multiple symbols, (e.g., ['GOOG', 'AAPL', 'MSFT']) elif isinstance(symbols, DataFrame): @@ -465,15 +467,15 @@ def get_data_famafrench(name): with ZipFile(tmpf, 'r') as zf: data = zf.open(name + '.txt').readlines() - line_lengths = np.array(map(len, data)) + line_lengths = np.array(lmap(len, data)) file_edges = np.where(line_lengths == 2)[0] datasets = {} - edges = itertools.izip(file_edges + 1, file_edges[1:]) + edges = zip(file_edges + 1, file_edges[1:]) for i, (left_edge, right_edge) in enumerate(edges): dataset = [d.split() for d in data[left_edge:right_edge]] if len(dataset) > 10: - ncol_raw = np.array(map(len, dataset)) + ncol_raw = np.array(lmap(len, dataset)) ncol = np.median(ncol_raw) header_index = np.where(ncol_raw == ncol - 1)[0][-1] header = dataset[header_index] @@ -809,18 +811,18 @@ def get_forward_data(self, months, call=True, put=False, near=False, data : dict of str, DataFrame """ warnings.warn("get_forward_data() is deprecated", FutureWarning) - in_months = xrange(CUR_MONTH, CUR_MONTH + months + 1) + in_months = lrange(CUR_MONTH, CUR_MONTH + months + 1) in_years = [CUR_YEAR] * (months + 1) # Figure out how many items in in_months go past 12 to_change = 0 - for i in xrange(months): + for i in range(months): if in_months[i] > 12: in_months[i] -= 12 to_change += 1 # Change the corresponding items in the in_years list. - for i in xrange(1, to_change + 1): + for i in range(1, to_change + 1): in_years[-i] += 1 to_ret = Series({'calls': call, 'puts': put}) @@ -830,7 +832,7 @@ def get_forward_data(self, months, call=True, put=False, near=False, for name in to_ret: all_data = DataFrame() - for mon in xrange(months): + for mon in range(months): m2 = in_months[mon] y2 = in_years[mon] diff --git a/pandas/io/date_converters.py b/pandas/io/date_converters.py index c7a60d13f1778..2be477f49e28b 100644 --- a/pandas/io/date_converters.py +++ b/pandas/io/date_converters.py @@ -1,4 +1,5 @@ """This module is designed for community supported date conversion functions""" +from pandas.compat import range import numpy as np import pandas.lib as lib @@ -32,7 +33,7 @@ def generic_parser(parse_func, *cols): N = _check_columns(cols) results = np.empty(N, dtype=object) - for i in xrange(N): + for i in range(N): args = [c[i] for c in cols] results[i] = parse_func(*args) diff --git a/pandas/io/excel.py b/pandas/io/excel.py index b3b48382faae0..534a88e303dbf 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -6,12 +6,14 @@ # ExcelFile class import datetime -from itertools import izip import numpy as np from pandas.io.parsers import TextParser from pandas.tseries.period import Period from pandas import json +from pandas.compat import map, zip, reduce, range, lrange +import pandas.compat as compat + def read_excel(path_or_buf, sheetname, kind=None, **kwds): """Read an Excel table into a pandas DataFrame @@ -65,15 +67,17 @@ class ExcelFile(object): def __init__(self, path_or_buf, kind=None, **kwds): self.kind = kind - import xlrd # throw an ImportError if we need to - ver = tuple(map(int,xlrd.__VERSION__.split(".")[:2])) + import xlrd # throw an ImportError if we need to + + ver = tuple(map(int, xlrd.__VERSION__.split(".")[:2])) if ver < (0, 9): - raise ImportError("pandas requires xlrd >= 0.9.0 for excel support, current version "+xlrd.__VERSION__) + raise ImportError("pandas requires xlrd >= 0.9.0 for excel " + "support, current version " + xlrd.__VERSION__) self.path_or_buf = path_or_buf self.tmpfile = None - if isinstance(path_or_buf, basestring): + if isinstance(path_or_buf, compat.string_types): self.book = xlrd.open_workbook(path_or_buf) else: data = path_or_buf.read() @@ -108,8 +112,8 @@ def parse(self, sheetname, header=0, skiprows=None, skip_footer=0, na_values : list-like, default None List of additional strings to recognize as NA/NaN keep_default_na : bool, default True - If na_values are specified and keep_default_na is False the default NaN - values are overridden, otherwise they're appended to + If na_values are specified and keep_default_na is False the default + NaN values are overridden, otherwise they're appended to verbose : boolean, default False Indicate number of NA values placed in non-numeric columns @@ -124,14 +128,14 @@ def parse(self, sheetname, header=0, skiprows=None, skip_footer=0, if skipfooter is not None: skip_footer = skipfooter - return self._parse_excel(sheetname, header=header, skiprows=skiprows, - index_col=index_col, - has_index_names=has_index_names, - parse_cols=parse_cols, - parse_dates=parse_dates, - date_parser=date_parser, na_values=na_values, - thousands=thousands, chunksize=chunksize, - skip_footer=skip_footer, **kwds) + return self._parse_excel(sheetname, header=header, skiprows=skiprows, + index_col=index_col, + has_index_names=has_index_names, + parse_cols=parse_cols, + parse_dates=parse_dates, + date_parser=date_parser, na_values=na_values, + thousands=thousands, chunksize=chunksize, + skip_footer=skip_footer, **kwds) def _should_parse(self, i, parse_cols): @@ -147,20 +151,21 @@ def _range2cols(areas): """ def _excel2num(x): "Convert Excel column name like 'AB' to 0-based column index" - return reduce(lambda s, a: s * 26 + ord(a) - ord('A') + 1, x.upper().strip(), 0) - 1 + return reduce(lambda s, a: s * 26 + ord(a) - ord('A') + 1, + x.upper().strip(), 0) - 1 cols = [] for rng in areas.split(','): if ':' in rng: rng = rng.split(':') - cols += range(_excel2num(rng[0]), _excel2num(rng[1]) + 1) + cols += lrange(_excel2num(rng[0]), _excel2num(rng[1]) + 1) else: cols.append(_excel2num(rng)) return cols if isinstance(parse_cols, int): return i <= parse_cols - elif isinstance(parse_cols, basestring): + elif isinstance(parse_cols, compat.string_types): return i in _range2cols(parse_cols) else: return i in parse_cols @@ -173,17 +178,17 @@ def _parse_excel(self, sheetname, header=0, skiprows=None, skip_footer=0, XL_CELL_ERROR, XL_CELL_BOOLEAN) datemode = self.book.datemode - if isinstance(sheetname, basestring): + if isinstance(sheetname, compat.string_types): sheet = self.book.sheet_by_name(sheetname) else: # assume an integer if not a string sheet = self.book.sheet_by_index(sheetname) data = [] should_parse = {} - for i in xrange(sheet.nrows): + for i in range(sheet.nrows): row = [] - for j, (value, typ) in enumerate(izip(sheet.row_values(i), - sheet.row_types(i))): + for j, (value, typ) in enumerate(zip(sheet.row_values(i), + sheet.row_types(i))): if parse_cols is not None and j not in should_parse: should_parse[j] = self._should_parse(j, parse_cols) @@ -456,4 +461,3 @@ def _writecells_xls(self, cells, sheet_name, startrow, startcol): wks.write(startrow + cell.row, startcol + cell.col, val, style) - diff --git a/pandas/io/ga.py b/pandas/io/ga.py index 7d6277e2d45f9..dcbecd74886ac 100644 --- a/pandas/io/ga.py +++ b/pandas/io/ga.py @@ -5,6 +5,7 @@ 4. Download JSON secret file and move into same directory as this file """ from datetime import datetime +from pandas import compat import numpy as np from pandas import DataFrame import pandas as pd @@ -16,8 +17,9 @@ from apiclient.errors import HttpError from oauth2client.client import AccessTokenRefreshError +from pandas.compat import zip, u -TYPE_MAP = {u'INTEGER': int, u'FLOAT': float, u'TIME': int} +TYPE_MAP = {u('INTEGER'): int, u('FLOAT'): float, u('TIME'): int} NO_CALLBACK = auth.OOB_CALLBACK_URN DOC_URL = auth.DOC_URL @@ -261,7 +263,7 @@ def get_data(self, metrics, start_date, end_date=None, profile_id = profile.get('id') if index_col is None and dimensions is not None: - if isinstance(dimensions, basestring): + if isinstance(dimensions, compat.string_types): dimensions = [dimensions] index_col = _clean_index(list(dimensions), parse_dates) @@ -283,7 +285,7 @@ def _read(start, result_size): dayfirst=dayfirst, na_values=na_values, converters=converters, sort=sort) - except HttpError, inst: + except HttpError as inst: raise ValueError('Google API error %s: %s' % (inst.resp.status, inst._get_reason())) @@ -312,7 +314,7 @@ def _parse_data(self, rows, col_info, index_col, parse_dates=True, if isinstance(sort, bool) and sort: return df.sort_index() - elif isinstance(sort, (basestring, list, tuple, np.ndarray)): + elif isinstance(sort, (compat.string_types, list, tuple, np.ndarray)): return df.sort_index(by=sort) return df @@ -330,14 +332,14 @@ def create_query(self, profile_id, metrics, start_date, end_date=None, max_results=max_results, **kwargs) try: return self.service.data().ga().get(**qry) - except TypeError, error: + except TypeError as error: raise ValueError('Error making query: %s' % error) def format_query(ids, metrics, start_date, end_date=None, dimensions=None, segment=None, filters=None, sort=None, start_index=None, max_results=10000, **kwargs): - if isinstance(metrics, basestring): + if isinstance(metrics, compat.string_types): metrics = [metrics] met = ','.join(['ga:%s' % x for x in metrics]) @@ -356,7 +358,7 @@ def format_query(ids, metrics, start_date, end_date=None, dimensions=None, lst = [dimensions, filters, sort] [_maybe_add_arg(qry, n, d) for n, d in zip(names, lst)] - if isinstance(segment, basestring): + if isinstance(segment, compat.string_types): _maybe_add_arg(qry, 'segment', segment, 'dynamic::ga') elif isinstance(segment, int): _maybe_add_arg(qry, 'segment', segment, 'gaid:') @@ -374,7 +376,7 @@ def format_query(ids, metrics, start_date, end_date=None, dimensions=None, def _maybe_add_arg(query, field, data, prefix='ga'): if data is not None: - if isinstance(data, (basestring, int)): + if isinstance(data, (compat.string_types, int)): data = [data] data = ','.join(['%s:%s' % (prefix, x) for x in data]) query[field] = data @@ -382,8 +384,8 @@ def _maybe_add_arg(query, field, data, prefix='ga'): def _get_match(obj_store, name, id, **kwargs): key, val = None, None if len(kwargs) > 0: - key = kwargs.keys()[0] - val = kwargs.values()[0] + key = list(kwargs.keys())[0] + val = list(kwargs.values())[0] if name is None and id is None and key is None: return obj_store.get('items')[0] @@ -412,7 +414,7 @@ def _clean_index(index_dims, parse_dates): to_add.append('_'.join(lst)) to_remove.extend(lst) elif isinstance(parse_dates, dict): - for name, lst in parse_dates.iteritems(): + for name, lst in compat.iteritems(parse_dates): if isinstance(lst, (list, tuple, np.ndarray)): if _should_add(lst): to_add.append(name) @@ -435,12 +437,12 @@ def _get_column_types(header_info): def _get_dim_names(header_info): return [x['name'][3:] for x in header_info - if x['columnType'] == u'DIMENSION'] + if x['columnType'] == u('DIMENSION')] def _get_met_names(header_info): return [x['name'][3:] for x in header_info - if x['columnType'] == u'METRIC'] + if x['columnType'] == u('METRIC')] def _get_data_types(header_info): diff --git a/pandas/io/html.py b/pandas/io/html.py index 651a3eb507618..df94e0ffa2e79 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -6,8 +6,6 @@ import os import re import numbers -import urllib2 -import urlparse import collections from distutils.version import LooseVersion @@ -15,7 +13,9 @@ import numpy as np from pandas import DataFrame, MultiIndex, isnull -from pandas.io.common import _is_url, urlopen +from pandas.io.common import _is_url, urlopen, parse_url +from pandas.compat import range, lrange, lmap, u, map +from pandas import compat try: @@ -91,9 +91,9 @@ def _get_skiprows_iter(skiprows): A proper iterator to use to skip rows of a DataFrame. """ if isinstance(skiprows, slice): - return range(skiprows.start or 0, skiprows.stop, skiprows.step or 1) + return lrange(skiprows.start or 0, skiprows.stop, skiprows.step or 1) elif isinstance(skiprows, numbers.Integral): - return range(skiprows) + return lrange(skiprows) elif isinstance(skiprows, collections.Container): return skiprows else: @@ -120,7 +120,7 @@ def _read(io): elif os.path.isfile(io): with open(io) as f: raw_text = f.read() - elif isinstance(io, basestring): + elif isinstance(io, compat.string_types): raw_text = io else: raise TypeError("Cannot read object of type " @@ -343,14 +343,14 @@ def _parse_raw_thead(self, table): thead = self._parse_thead(table) res = [] if thead: - res = map(self._text_getter, self._parse_th(thead[0])) + res = lmap(self._text_getter, self._parse_th(thead[0])) return np.array(res).squeeze() if res and len(res) == 1 else res def _parse_raw_tfoot(self, table): tfoot = self._parse_tfoot(table) res = [] if tfoot: - res = map(self._text_getter, self._parse_td(tfoot[0])) + res = lmap(self._text_getter, self._parse_td(tfoot[0])) return np.array(res).squeeze() if res and len(res) == 1 else res def _parse_raw_tbody(self, table): @@ -450,8 +450,8 @@ def _build_node_xpath_expr(attrs): if 'class_' in attrs: attrs['class'] = attrs.pop('class_') - s = (u"@{k}='{v}'".format(k=k, v=v) for k, v in attrs.iteritems()) - return u'[{0}]'.format(' and '.join(s)) + s = (u("@{k}='{v}'").format(k=k, v=v) for k, v in compat.iteritems(attrs)) + return u('[{0}]').format(' and '.join(s)) _re_namespace = {'re': 'http://exslt.org/regular-expressions'} @@ -492,9 +492,9 @@ def _parse_tables(self, doc, match, kwargs): pattern = match.pattern # check all descendants for the given pattern - check_all_expr = u'//*' + check_all_expr = u('//*') if pattern: - check_all_expr += u"[re:test(text(), '{0}')]".format(pattern) + check_all_expr += u("[re:test(text(), '{0}')]").format(pattern) # go up the tree until we find a table check_table_expr = '/ancestor::table' @@ -549,7 +549,7 @@ def _build_doc(self): pass else: # not a url - scheme = urlparse.urlparse(self.io).scheme + scheme = parse_url(self.io).scheme if scheme not in _valid_schemes: # lxml can't parse it msg = ('{0} is not a valid url scheme, valid schemes are ' @@ -706,7 +706,7 @@ def _parser_dispatch(flavor): ImportError * If you do not have the requested `flavor` """ - valid_parsers = _valid_parsers.keys() + valid_parsers = list(_valid_parsers.keys()) if flavor not in valid_parsers: raise AssertionError('"{0!r}" is not a valid flavor, valid flavors are' ' {1}'.format(flavor, valid_parsers)) @@ -733,16 +733,16 @@ def _parser_dispatch(flavor): def _validate_parser_flavor(flavor): if flavor is None: flavor = ['lxml', 'bs4'] - elif isinstance(flavor, basestring): + elif isinstance(flavor, compat.string_types): flavor = [flavor] elif isinstance(flavor, collections.Iterable): - if not all(isinstance(flav, basestring) for flav in flavor): + if not all(isinstance(flav, compat.string_types) for flav in flavor): raise TypeError('{0} is not an iterable of strings'.format(flavor)) else: raise TypeError('{0} is not a valid "flavor"'.format(flavor)) flavor = list(flavor) - valid_flavors = _valid_parsers.keys() + valid_flavors = list(_valid_parsers.keys()) if not set(flavor) & set(valid_flavors): raise ValueError('{0} is not a valid set of flavors, valid flavors are' diff --git a/pandas/io/json.py b/pandas/io/json.py index d3bea36b57e77..7b6c97be21393 100644 --- a/pandas/io/json.py +++ b/pandas/io/json.py @@ -1,6 +1,7 @@ # pylint: disable-msg=E1101,W0613,W0603 -from StringIO import StringIO +from pandas.compat import StringIO, long +from pandas import compat import os from pandas import Series, DataFrame, to_datetime @@ -26,7 +27,7 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch', double_precision else: raise NotImplementedError - if isinstance(path_or_buf, basestring): + if isinstance(path_or_buf, compat.string_types): with open(path_or_buf,'w') as fh: fh.write(s) elif path_or_buf is None: @@ -182,7 +183,7 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True, """ filepath_or_buffer,_ = get_filepath_or_buffer(path_or_buf) - if isinstance(filepath_or_buffer, basestring): + if isinstance(filepath_or_buffer, compat.string_types): if os.path.exists(filepath_or_buffer): with open(filepath_or_buffer,'r') as fh: json = fh.read() @@ -342,7 +343,7 @@ def _try_convert_to_date(self, data): # ignore numbers that are out of range if issubclass(new_data.dtype.type,np.number): - if not ((new_data == iNaT) | (new_data > 31536000000000000L)).all(): + if not ((new_data == iNaT) | (new_data > long(31536000000000000))).all(): return data, False try: @@ -369,9 +370,9 @@ def _parse_no_numpy(self): orient = self.orient if orient == "split": decoded = dict((str(k), v) - for k, v in loads( + for k, v in compat.iteritems(loads( json, - precise_float=self.precise_float).iteritems()) + precise_float=self.precise_float))) self.obj = Series(dtype=None, **decoded) else: self.obj = Series( @@ -384,7 +385,7 @@ def _parse_numpy(self): if orient == "split": decoded = loads(json, dtype=None, numpy=True, precise_float=self.precise_float) - decoded = dict((str(k), v) for k, v in decoded.iteritems()) + decoded = dict((str(k), v) for k, v in compat.iteritems(decoded)) self.obj = Series(**decoded) elif orient == "columns" or orient == "index": self.obj = Series(*loads(json, dtype=None, numpy=True, @@ -417,7 +418,7 @@ def _parse_numpy(self): elif orient == "split": decoded = loads(json, dtype=None, numpy=True, precise_float=self.precise_float) - decoded = dict((str(k), v) for k, v in decoded.iteritems()) + decoded = dict((str(k), v) for k, v in compat.iteritems(decoded)) self.obj = DataFrame(**decoded) elif orient == "values": self.obj = DataFrame(loads(json, dtype=None, numpy=True, @@ -436,9 +437,9 @@ def _parse_no_numpy(self): loads(json, precise_float=self.precise_float), dtype=None) elif orient == "split": decoded = dict((str(k), v) - for k, v in loads( + for k, v in compat.iteritems(loads( json, - precise_float=self.precise_float).iteritems()) + precise_float=self.precise_float))) self.obj = DataFrame(dtype=None, **decoded) elif orient == "index": self.obj = DataFrame( @@ -467,7 +468,7 @@ def _try_convert_dates(self): def is_ok(col): """ return if this col is ok to try for a date parse """ - if not isinstance(col, basestring): return False + if not isinstance(col, compat.string_types): return False if (col.endswith('_at') or col.endswith('_time') or diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 3bcfb66d32092..f76b1c563a7a5 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1,9 +1,10 @@ """ Module contains tools for processing files into DataFrames or other objects """ -from StringIO import StringIO +from __future__ import print_function +from pandas.compat import range, lrange, StringIO, lzip, zip +from pandas import compat import re -from itertools import izip import csv from warnings import warn @@ -13,7 +14,7 @@ from pandas.core.frame import DataFrame import datetime import pandas.core.common as com -from pandas.util import py3compat +from pandas import compat from pandas.io.date_converters import generic_parser from pandas.io.common import get_filepath_or_buffer @@ -482,7 +483,7 @@ def _get_options_with_defaults(self, engine): kwds = self.orig_options options = {} - for argname, default in _parser_defaults.iteritems(): + for argname, default in compat.iteritems(_parser_defaults): if argname in kwds: value = kwds[argname] else: @@ -490,7 +491,7 @@ def _get_options_with_defaults(self, engine): options[argname] = value - for argname, default in _c_parser_defaults.iteritems(): + for argname, default in compat.iteritems(_c_parser_defaults): if argname in kwds: value = kwds[argname] if engine != 'c' and value != default: @@ -499,7 +500,7 @@ def _get_options_with_defaults(self, engine): options[argname] = value if engine == 'python-fwf': - for argname, default in _fwf_defaults.iteritems(): + for argname, default in compat.iteritems(_fwf_defaults): if argname in kwds: value = kwds[argname] options[argname] = value @@ -558,7 +559,7 @@ def _clean_options(self, options, engine): na_values, na_fvalues = _clean_na_values(na_values, keep_default_na) if com.is_integer(skiprows): - skiprows = range(skiprows) + skiprows = lrange(skiprows) skiprows = set() if skiprows is None else set(skiprows) # put stuff back @@ -727,7 +728,7 @@ def _extract_multi_indexer_columns(self, header, index_names, col_names, passed_ field_count = len(header[0]) def extract(r): return tuple([ r[i] for i in range(field_count) if i not in sic ]) - columns = zip(*[ extract(r) for r in header ]) + columns = lzip(*[ extract(r) for r in header ]) names = ic + columns # if we find 'Unnamed' all of a single level, then our header was too long @@ -784,7 +785,7 @@ def _make_index(self, data, alldata, columns, indexnamerow=False): def _get_simple_index(self, data, columns): def ix(col): - if not isinstance(col, basestring): + if not isinstance(col, compat.string_types): return col raise ValueError('Index %s invalid' % col) index = None @@ -807,7 +808,7 @@ def ix(col): def _get_complex_date_index(self, data, col_names): def _get_name(icol): - if isinstance(icol, basestring): + if isinstance(icol, compat.string_types): return icol if col_names is None: @@ -851,7 +852,7 @@ def _agg_index(self, index, try_parse_dates=True): col_na_values, col_na_fvalues = _get_na_values(col_name, self.na_values, self.na_fvalues) - + arr, _ = self._convert_types(arr, col_na_values | col_na_fvalues) arrays.append(arr) @@ -862,7 +863,7 @@ def _agg_index(self, index, try_parse_dates=True): def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False, converters=None): result = {} - for c, values in dct.iteritems(): + for c, values in compat.iteritems(dct): conv_f = None if converters is None else converters.get(c, None) col_na_values, col_na_fvalues = _get_na_values(c, na_values, na_fvalues) coerce_type = True @@ -874,7 +875,7 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False, coerce_type) result[c] = cvals if verbose and na_count: - print ('Filled %d NA values in column %s' % (na_count, str(c))) + print('Filled %d NA values in column %s' % (na_count, str(c))) return result def _convert_types(self, values, na_values, try_num_bool=True): @@ -928,7 +929,7 @@ def _exclude_implicit_index(self, alldata): offset += 1 data[col] = alldata[i + offset] else: - data = dict((k, v) for k, v in izip(self.orig_names, alldata)) + data = dict((k, v) for k, v in zip(self.orig_names, alldata)) return data @@ -946,7 +947,7 @@ def __init__(self, src, **kwds): ParserBase.__init__(self, kwds) if 'utf-16' in (kwds.get('encoding') or ''): - if isinstance(src, basestring): + if isinstance(src, compat.string_types): src = open(src, 'rb') src = com.UTF8Recoder(src, kwds['encoding']) kwds['encoding'] = 'utf-8' @@ -976,7 +977,7 @@ def __init__(self, src, **kwds): self.names = ['X%d' % i for i in range(self._reader.table_width)] else: - self.names = range(self._reader.table_width) + self.names = lrange(self._reader.table_width) # XXX self._set_noconvert_columns() @@ -1227,7 +1228,7 @@ def __init__(self, f, **kwds): self.comment = kwds['comment'] self._comment_lines = [] - if isinstance(f, basestring): + if isinstance(f, compat.string_types): f = com._get_handle(f, 'r', encoding=self.encoding, compression=self.compression) elif self.compression: @@ -1317,7 +1318,7 @@ class MyDialect(csv.Dialect): def _read(): line = next(f) pat = re.compile(sep) - if (py3compat.PY3 and isinstance(line, bytes)): + if (compat.PY3 and isinstance(line, bytes)): yield pat.split(line.decode('utf-8').strip()) for line in f: yield pat.split(line.decode('utf-8').strip()) @@ -1375,7 +1376,7 @@ def _convert_data(self, data): # apply converters clean_conv = {} - for col, f in self.converters.iteritems(): + for col, f in compat.iteritems(self.converters): if isinstance(col, int) and col not in self.orig_names: col = self.orig_names[col] clean_conv[col] = f @@ -1450,7 +1451,7 @@ def _infer_columns(self): if self.prefix: columns = [ ['X%d' % i for i in range(ncols)] ] else: - columns = [ range(ncols) ] + columns = [ lrange(ncols) ] else: columns = [ names ] @@ -1487,7 +1488,7 @@ def _check_comments(self, lines): for l in lines: rl = [] for x in l: - if (not isinstance(x, basestring) or + if (not isinstance(x, compat.string_types) or self.comment not in x): rl.append(x) else: @@ -1506,7 +1507,7 @@ def _check_thousands(self, lines): for l in lines: rl = [] for x in l: - if (not isinstance(x, basestring) or + if (not isinstance(x, compat.string_types) or self.thousands not in x or nonnum.search(x.strip())): rl.append(x) @@ -1548,7 +1549,7 @@ def _get_index_name(self, columns): # column and index names on diff rows implicit_first_cols = 0 - self.index_col = range(len(line)) + self.index_col = lrange(len(line)) self.buf = self.buf[1:] for c in reversed(line): @@ -1559,7 +1560,7 @@ def _get_index_name(self, columns): if implicit_first_cols > 0: self._implicit_index = True if self.index_col is None: - self.index_col = range(implicit_first_cols) + self.index_col = lrange(implicit_first_cols) index_name = None else: @@ -1629,7 +1630,7 @@ def _get_lines(self, rows=None): new_rows = [] try: if rows is not None: - for _ in xrange(rows): + for _ in range(rows): new_rows.append(next(source)) lines.extend(new_rows) else: @@ -1638,7 +1639,7 @@ def _get_lines(self, rows=None): try: new_rows.append(next(source)) rows += 1 - except csv.Error, inst: + except csv.Error as inst: if 'newline inside string' in str(inst): row_num = str(self.pos + rows) msg = ('EOF inside string starting with line ' @@ -1729,7 +1730,7 @@ def _isindex(colspec): elif isinstance(parse_spec, dict): # dict of new name to column list - for new_name, colspec in parse_spec.iteritems(): + for new_name, colspec in compat.iteritems(parse_spec): if new_name in data_dict: raise ValueError('Date column %s already in dict' % new_name) @@ -1778,7 +1779,7 @@ def _clean_na_values(na_values, keep_default_na=True): na_fvalues = set() elif isinstance(na_values, dict): if keep_default_na: - for k, v in na_values.iteritems(): + for k, v in compat.iteritems(na_values): v = set(list(v)) | _NA_VALUES na_values[k] = v na_fvalues = dict([ (k, _floatify_na_values(v)) for k, v in na_values.items() ]) @@ -1806,7 +1807,7 @@ def _clean_index_names(columns, index_col): index_col = list(index_col) for i, c in enumerate(index_col): - if isinstance(c, basestring): + if isinstance(c, compat.string_types): index_names.append(c) for j, name in enumerate(cp_cols): if name == c: @@ -1819,7 +1820,7 @@ def _clean_index_names(columns, index_col): index_names.append(name) # hack - if isinstance(index_names[0], basestring) and 'Unnamed' in index_names[0]: + if isinstance(index_names[0], compat.string_types) and 'Unnamed' in index_names[0]: index_names[0] = None return index_names, columns, index_col @@ -1900,14 +1901,13 @@ def _get_col_names(colspec, columns): def _concat_date_cols(date_cols): if len(date_cols) == 1: - if py3compat.PY3: - return np.array([unicode(x) for x in date_cols[0]], dtype=object) + if compat.PY3: + return np.array([compat.text_type(x) for x in date_cols[0]], dtype=object) else: - return np.array([str(x) if not isinstance(x, basestring) else x + return np.array([str(x) if not isinstance(x, compat.string_types) else x for x in date_cols[0]], dtype=object) - # stripped = [map(str.strip, x) for x in date_cols] - rs = np.array([' '.join([unicode(y) for y in x]) + rs = np.array([' '.join([compat.text_type(y) for y in x]) for x in zip(*date_cols)], dtype=object) return rs diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index 765c0cd46d4e5..efa8bdb0b123b 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -1,5 +1,4 @@ -import cPickle as pkl - +from pandas.compat import cPickle as pkl, PY3 def to_pickle(obj, path): """ @@ -36,7 +35,6 @@ def read_pickle(path): with open(path, 'rb') as fh: return pkl.load(fh) except: - from pandas.util.py3compat import PY3 if PY3: with open(path, 'rb') as fh: return pkl.load(fh, encoding='latin1') diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index a5a8355567e23..a7daa7e7c8691 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2,9 +2,12 @@ High level interface to PyTables for reading and writing pandas data structures to disk """ +from __future__ import print_function # pylint: disable-msg=E1101,W0613,W0603 from datetime import datetime, date +from pandas.compat import map, range, zip, lrange, lmap, u +from pandas import compat import time import re import copy @@ -27,7 +30,7 @@ from pandas.core.index import _ensure_index import pandas.core.common as com from pandas.tools.merge import concat -from pandas.util import py3compat +from pandas import compat from pandas.io.common import PerformanceWarning import pandas.lib as lib @@ -53,7 +56,7 @@ def _ensure_decoded(s): def _ensure_encoding(encoding): # set the encoding if we need if encoding is None: - if py3compat.PY3: + if compat.PY3: encoding = _default_encoding return encoding @@ -87,40 +90,40 @@ class AttributeConflictWarning(Warning): # map object types _TYPE_MAP = { - Series : u'series', - SparseSeries : u'sparse_series', - TimeSeries : u'series', - DataFrame : u'frame', - SparseDataFrame : u'sparse_frame', - Panel : u'wide', - Panel4D : u'ndim', - SparsePanel : u'sparse_panel' + Series: u('series'), + SparseSeries: u('sparse_series'), + TimeSeries: u('series'), + DataFrame: u('frame'), + SparseDataFrame: u('sparse_frame'), + Panel: u('wide'), + Panel4D: u('ndim'), + SparsePanel: u('sparse_panel') } # storer class map _STORER_MAP = { - u'TimeSeries' : 'LegacySeriesStorer', - u'Series' : 'LegacySeriesStorer', - u'DataFrame' : 'LegacyFrameStorer', - u'DataMatrix' : 'LegacyFrameStorer', - u'series' : 'SeriesStorer', - u'sparse_series' : 'SparseSeriesStorer', - u'frame' : 'FrameStorer', - u'sparse_frame' : 'SparseFrameStorer', - u'wide' : 'PanelStorer', - u'sparse_panel' : 'SparsePanelStorer', + u('TimeSeries') : 'LegacySeriesStorer', + u('Series') : 'LegacySeriesStorer', + u('DataFrame') : 'LegacyFrameStorer', + u('DataMatrix') : 'LegacyFrameStorer', + u('series') : 'SeriesStorer', + u('sparse_series') : 'SparseSeriesStorer', + u('frame') : 'FrameStorer', + u('sparse_frame') : 'SparseFrameStorer', + u('wide') : 'PanelStorer', + u('sparse_panel') : 'SparsePanelStorer', } # table class map _TABLE_MAP = { - u'generic_table' : 'GenericTable', - u'appendable_frame' : 'AppendableFrameTable', - u'appendable_multiframe' : 'AppendableMultiFrameTable', - u'appendable_panel' : 'AppendablePanelTable', - u'appendable_ndim' : 'AppendableNDimTable', - u'worm' : 'WORMTable', - u'legacy_frame' : 'LegacyFrameTable', - u'legacy_panel' : 'LegacyPanelTable', + u('generic_table') : 'GenericTable', + u('appendable_frame') : 'AppendableFrameTable', + u('appendable_multiframe') : 'AppendableMultiFrameTable', + u('appendable_panel') : 'AppendablePanelTable', + u('appendable_ndim') : 'AppendableNDimTable', + u('worm') : 'WORMTable', + u('legacy_frame') : 'LegacyFrameTable', + u('legacy_panel') : 'LegacyPanelTable', } # axes map @@ -189,7 +192,7 @@ def to_hdf(path_or_buf, key, value, mode=None, complevel=None, complib=None, app else: f = lambda store: store.put(key, value, **kwargs) - if isinstance(path_or_buf, basestring): + if isinstance(path_or_buf, compat.string_types): with get_store(path_or_buf, mode=mode, complevel=complevel, complib=complib) as store: f(store) else: @@ -199,7 +202,7 @@ def read_hdf(path_or_buf, key, **kwargs): """ read from the store, closeit if we opened it """ f = lambda store, auto_close: store.select(key, auto_close=auto_close, **kwargs) - if isinstance(path_or_buf, basestring): + if isinstance(path_or_buf, compat.string_types): # can't auto open/close if we are using an iterator # so delegate to the iterator @@ -319,7 +322,7 @@ def __len__(self): def __unicode__(self): output = '%s\nFile path: %s\n' % (type(self), pprint_thing(self._path)) - if len(self.keys()): + if len(list(self.keys())): keys = [] values = [] @@ -367,6 +370,8 @@ def open(self, mode='a', warn=True): self._mode = mode if warn and mode == 'w': # pragma: no cover while True: + if compat.PY3: + raw_input = input response = raw_input("Re-opening as mode='w' will delete the " "current file. Continue (y/n)?") if response == 'y': @@ -385,9 +390,9 @@ def open(self, mode='a', warn=True): try: self._handle = h5_open(self._path, self._mode) - except IOError, e: # pragma: no cover + except IOError as e: # pragma: no cover if 'can not be written' in str(e): - print ('Opening %s in read-only mode' % self._path) + print('Opening %s in read-only mode' % self._path) self._handle = h5_open(self._path, 'r') else: raise @@ -513,7 +518,7 @@ def select_as_multiple(self, keys, where=None, selector=None, columns=None, star # default to single select if isinstance(keys, (list, tuple)) and len(keys) == 1: keys = keys[0] - if isinstance(keys, basestring): + if isinstance(keys, compat.string_types): return self.select(key=keys, where=where, columns=columns, start=start, stop=stop, iterator=iterator, chunksize=chunksize, **kwargs) if not isinstance(keys, (list, tuple)): @@ -545,7 +550,7 @@ def select_as_multiple(self, keys, where=None, selector=None, columns=None, star try: c = self.select_as_coordinates(selector, where, start=start, stop=stop) nrows = len(c) - except (Exception), detail: + except (Exception) as detail: raise ValueError("invalid selector [%s]" % selector) def func(_start, _stop): @@ -744,7 +749,7 @@ def groups(self): """ return a list of all the top-level nodes (that are not themselves a pandas storage object) """ _tables() return [ g for g in self._handle.walkNodes() if getattr(g._v_attrs,'pandas_type',None) or getattr( - g,'table',None) or (isinstance(g,_table_mod.table.Table) and g._v_name != u'table') ] + g,'table',None) or (isinstance(g,_table_mod.table.Table) and g._v_name != u('table')) ] def get_node(self, key): """ return the node with the key or None if it does not exist """ @@ -782,7 +787,7 @@ def copy(self, file, mode = 'w', propindexes = True, keys = None, complib = None """ new_store = HDFStore(file, mode = mode, complib = complib, complevel = complevel, fletcher32 = fletcher32) if keys is None: - keys = self.keys() + keys = list(self.keys()) if not isinstance(keys, (tuple,list)): keys = [ keys ] for k in keys: @@ -823,8 +828,8 @@ def error(t): _tables() if getattr(group,'table',None) or isinstance(group,_table_mod.table.Table): - pt = u'frame_table' - tt = u'generic_table' + pt = u('frame_table') + tt = u('generic_table') else: raise TypeError("cannot create a storer if the object is not existing nor a value are passed") else: @@ -836,10 +841,10 @@ def error(t): # we are actually a table if table or append: - pt += u'_table' + pt += u('_table') # a storer node - if u'table' not in pt: + if u('table') not in pt: try: return globals()[_STORER_MAP[pt]](self, group, **kwargs) except: @@ -851,26 +856,26 @@ def error(t): # if we are a writer, determin the tt if value is not None: - if pt == u'frame_table': + if pt == u('frame_table'): index = getattr(value,'index',None) if index is not None: if index.nlevels == 1: - tt = u'appendable_frame' + tt = u('appendable_frame') elif index.nlevels > 1: - tt = u'appendable_multiframe' - elif pt == u'wide_table': - tt = u'appendable_panel' - elif pt == u'ndim_table': - tt = u'appendable_ndim' + tt = u('appendable_multiframe') + elif pt == u('wide_table'): + tt = u('appendable_panel') + elif pt == u('ndim_table'): + tt = u('appendable_ndim') else: # distiguish between a frame/table - tt = u'legacy_panel' + tt = u('legacy_panel') try: fields = group.table._v_attrs.fields - if len(fields) == 1 and fields[0] == u'value': - tt = u'legacy_frame' + if len(fields) == 1 and fields[0] == u('value'): + tt = u('legacy_frame') except: pass @@ -1140,7 +1145,7 @@ def __iter__(self): def maybe_set_size(self, min_itemsize=None, **kwargs): """ maybe set a string col itemsize: min_itemsize can be an interger or a dict with this columns name with an integer size """ - if _ensure_decoded(self.kind) == u'string': + if _ensure_decoded(self.kind) == u('string'): if isinstance(min_itemsize, dict): min_itemsize = min_itemsize.get(self.name) @@ -1160,7 +1165,7 @@ def validate_col(self, itemsize=None): # validate this column for string truncation (or reset to the max size) dtype = getattr(self, 'dtype', None) - if _ensure_decoded(self.kind) == u'string': + if _ensure_decoded(self.kind) == u('string'): c = self.col if c is not None: @@ -1290,7 +1295,7 @@ def __init__(self, values=None, kind=None, typ=None, cname=None, data=None, bloc super(DataCol, self).__init__( values=values, kind=kind, typ=typ, cname=cname, **kwargs) self.dtype = None - self.dtype_attr = u"%s_dtype" % self.name + self.dtype_attr = u("%s_dtype") % self.name self.set_data(data) def __unicode__(self): @@ -1319,15 +1324,15 @@ def set_kind(self): # set my kind if we can if self.dtype is not None: dtype = _ensure_decoded(self.dtype) - if dtype.startswith(u'string') or dtype.startswith(u'bytes'): + if dtype.startswith(u('string')) or dtype.startswith(u('bytes')): self.kind = 'string' - elif dtype.startswith(u'float'): + elif dtype.startswith(u('float')): self.kind = 'float' - elif dtype.startswith(u'int') or dtype.startswith(u'uint'): + elif dtype.startswith(u('int')) or dtype.startswith(u('uint')): self.kind = 'integer' - elif dtype.startswith(u'date'): + elif dtype.startswith(u('date')): self.kind = 'datetime' - elif dtype.startswith(u'bool'): + elif dtype.startswith(u('bool')): self.kind = 'bool' else: raise AssertionError("cannot interpret dtype of [%s] in [%s]" % (dtype,self)) @@ -1501,7 +1506,7 @@ def convert(self, values, nan_rep, encoding): dtype = _ensure_decoded(self.dtype) # reverse converts - if dtype == u'datetime64': + if dtype == u('datetime64'): # recreate the timezone if self.tz is not None: @@ -1514,10 +1519,10 @@ def convert(self, values, nan_rep, encoding): else: self.data = np.asarray(self.data, dtype='M8[ns]') - elif dtype == u'date': + elif dtype == u('date'): self.data = np.array( [date.fromtimestamp(v) for v in self.data], dtype=object) - elif dtype == u'datetime': + elif dtype == u('datetime'): self.data = np.array( [datetime.fromtimestamp(v) for v in self.data], dtype=object) @@ -1529,7 +1534,7 @@ def convert(self, values, nan_rep, encoding): self.data = self.data.astype('O') # convert nans / decode - if _ensure_decoded(self.kind) == u'string': + if _ensure_decoded(self.kind) == u('string'): self.data = _unconvert_string_array(self.data, nan_rep=nan_rep, encoding=encoding) return self @@ -1553,7 +1558,7 @@ class DataIndexableCol(DataCol): @property def is_searchable(self): - return _ensure_decoded(self.kind) == u'string' + return _ensure_decoded(self.kind) == u('string') def get_atom_string(self, block, itemsize): return _tables().StringCol(itemsize=itemsize) @@ -1724,7 +1729,7 @@ class GenericStorer(Storer): """ a generified storer version """ _index_type_map = { DatetimeIndex: 'datetime', PeriodIndex: 'period'} - _reverse_index_map = dict([ (v,k) for k, v in _index_type_map.iteritems() ]) + _reverse_index_map = dict([ (v,k) for k, v in compat.iteritems(_index_type_map) ]) attributes = [] # indexer helpders @@ -1790,7 +1795,7 @@ def read_array(self, key): else: ret = data - if dtype == u'datetime64': + if dtype == u('datetime64'): ret = np.array(ret, dtype='M8[ns]') if transposed: @@ -1801,13 +1806,13 @@ def read_array(self, key): def read_index(self, key): variety = _ensure_decoded(getattr(self.attrs, '%s_variety' % key)) - if variety == u'multi': + if variety == u('multi'): return self.read_multi_index(key) - elif variety == u'block': + elif variety == u('block'): return self.read_block_index(key) - elif variety == u'sparseint': + elif variety == u('sparseint'): return self.read_sparse_intindex(key) - elif variety == u'regular': + elif variety == u('regular'): _, index = self.read_index_node(getattr(self.group, key)) return index else: # pragma: no cover @@ -1916,13 +1921,13 @@ def read_index_node(self, node): factory = self._get_index_factory(index_class) kwargs = {} - if u'freq' in node._v_attrs: + if u('freq') in node._v_attrs: kwargs['freq'] = node._v_attrs['freq'] - if u'tz' in node._v_attrs: + if u('tz') in node._v_attrs: kwargs['tz'] = node._v_attrs['tz'] - if kind in (u'date', u'datetime'): + if kind in (u('date'), u('datetime')): index = factory(_unconvert_index(data, kind, encoding=self.encoding), dtype=object, **kwargs) else: @@ -2031,7 +2036,7 @@ def read(self, **kwargs): return DataFrame(values, index=index, columns=columns) class SeriesStorer(GenericStorer): - pandas_kind = u'series' + pandas_kind = u('series') attributes = ['name'] @property @@ -2058,7 +2063,7 @@ def write(self, obj, **kwargs): self.attrs.name = obj.name class SparseSeriesStorer(GenericStorer): - pandas_kind = u'sparse_series' + pandas_kind = u('sparse_series') attributes = ['name','fill_value','kind'] def read(self, **kwargs): @@ -2067,7 +2072,7 @@ def read(self, **kwargs): sp_values = self.read_array('sp_values') sp_index = self.read_index('sp_index') return SparseSeries(sp_values, index=index, sparse_index=sp_index, - kind=self.kind or u'block', fill_value=self.fill_value, + kind=self.kind or u('block'), fill_value=self.fill_value, name=self.name) def write(self, obj, **kwargs): @@ -2080,7 +2085,7 @@ def write(self, obj, **kwargs): self.attrs.kind = obj.kind class SparseFrameStorer(GenericStorer): - pandas_kind = u'sparse_frame' + pandas_kind = u('sparse_frame') attributes = ['default_kind','default_fill_value'] def read(self, **kwargs): @@ -2099,7 +2104,7 @@ def read(self, **kwargs): def write(self, obj, **kwargs): """ write it as a collection of individual sparse series """ super(SparseFrameStorer, self).write(obj, **kwargs) - for name, ss in obj.iteritems(): + for name, ss in compat.iteritems(obj): key = 'sparse_series_%s' % name if key not in self.group._v_children: node = self._handle.createGroup(self.group, key) @@ -2112,7 +2117,7 @@ def write(self, obj, **kwargs): self.write_index('columns', obj.columns) class SparsePanelStorer(GenericStorer): - pandas_kind = u'sparse_panel' + pandas_kind = u('sparse_panel') attributes = ['default_kind','default_fill_value'] def read(self, **kwargs): @@ -2135,7 +2140,7 @@ def write(self, obj, **kwargs): self.attrs.default_kind = obj.default_kind self.write_index('items', obj.items) - for name, sdf in obj.iterkv(): + for name, sdf in compat.iteritems(obj): key = 'sparse_frame_%s' % name if key not in self.group._v_children: node = self._handle.createGroup(self.group, key) @@ -2183,7 +2188,7 @@ def read(self, **kwargs): self.validate_read(kwargs) axes = [] - for i in xrange(self.ndim): + for i in range(self.ndim): ax = self.read_index('axis%d' % i) axes.append(ax) @@ -2216,11 +2221,11 @@ def write(self, obj, **kwargs): self.write_index('block%d_items' % i, blk.items) class FrameStorer(BlockManagerStorer): - pandas_kind = u'frame' + pandas_kind = u('frame') obj_type = DataFrame class PanelStorer(BlockManagerStorer): - pandas_kind = u'wide' + pandas_kind = u('wide') obj_type = Panel is_shape_reversed = True @@ -2245,7 +2250,7 @@ class Table(Storer): levels : the names of levels """ - pandas_kind = u'wide_table' + pandas_kind = u('wide_table') table_type = None levels = 1 is_table = True @@ -2319,7 +2324,7 @@ def nrows_expected(self): @property def is_exists(self): """ has this table been created """ - return u'table' in self.group + return u('table') in self.group @property def storable(self): @@ -2713,9 +2718,9 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None, col.set_pos(j) self.values_axes.append(col) - except (NotImplementedError, ValueError, TypeError), e: + except (NotImplementedError, ValueError, TypeError) as e: raise e - except (Exception), detail: + except (Exception) as detail: raise Exception("cannot find the correct atom type -> [dtype->%s,items->%s] %s" % (b.dtype.name, b.items, str(detail))) j += 1 @@ -2838,7 +2843,7 @@ class WORMTable(Table): table. writing is a one-time operation the data are stored in a format that allows for searching the data on disk """ - table_type = u'worm' + table_type = u('worm') def read(self, **kwargs): """ read the indicies and the indexing array, calculate offset rows and @@ -2863,7 +2868,7 @@ class LegacyTable(Table): IndexCol(name='column', axis=2, pos=1, index_kind='columns_kind'), DataCol(name='fields', cname='values', kind_attr='fields', pos=2)] - table_type = u'legacy' + table_type = u('legacy') ndim = 3 def write(self, **kwargs): @@ -2953,8 +2958,8 @@ def read(self, where=None, columns=None, **kwargs): class LegacyFrameTable(LegacyTable): """ support the legacy frame table """ - pandas_kind = u'frame_table' - table_type = u'legacy_frame' + pandas_kind = u('frame_table') + table_type = u('legacy_frame') obj_type = Panel def read(self, *args, **kwargs): @@ -2963,14 +2968,14 @@ def read(self, *args, **kwargs): class LegacyPanelTable(LegacyTable): """ support the legacy panel table """ - table_type = u'legacy_panel' + table_type = u('legacy_panel') obj_type = Panel class AppendableTable(LegacyTable): """ suppor the new appendable table formats """ _indexables = None - table_type = u'appendable' + table_type = u('appendable') def write(self, obj, axes=None, append=False, complib=None, complevel=None, fletcher32=None, min_itemsize=None, chunksize=None, @@ -3043,7 +3048,7 @@ def write_data(self, chunksize): rows = self.nrows_expected chunks = int(rows / chunksize) + 1 - for i in xrange(chunks): + for i in range(chunks): start_i = i * chunksize end_i = min((i + 1) * chunksize, rows) if start_i >= end_i: @@ -3068,14 +3073,14 @@ def write_data_chunk(self, indexes, mask, search, values): args = list(indexes) args.extend([self.dtype, mask, search, values]) rows = func(*args) - except (Exception), detail: + except Exception as detail: raise Exception("cannot create row-data -> %s" % str(detail)) try: if len(rows): self.table.append(rows) self.table.flush() - except (Exception), detail: + except Exception as detail: raise Exception("tables cannot write this data -> %s" % str(detail)) def delete(self, where=None, **kwargs): @@ -3120,7 +3125,7 @@ def delete(self, where=None, **kwargs): # we must remove in reverse order! pg = groups.pop() for g in reversed(groups): - rows = l.take(range(g, pg)) + rows = l.take(lrange(g, pg)) table.removeRows(start=rows[rows.index[0] ], stop=rows[rows.index[-1]] + 1) pg = g @@ -3133,8 +3138,8 @@ def delete(self, where=None, **kwargs): class AppendableFrameTable(AppendableTable): """ suppor the new appendable table formats """ - pandas_kind = u'frame_table' - table_type = u'appendable_frame' + pandas_kind = u('frame_table') + table_type = u('appendable_frame') ndim = 2 obj_type = DataFrame @@ -3188,8 +3193,8 @@ def read(self, where=None, columns=None, **kwargs): class GenericTable(AppendableFrameTable): """ a table that read/writes the generic pytables table format """ - pandas_kind = u'frame_table' - table_type = u'generic_table' + pandas_kind = u('frame_table') + table_type = u('generic_table') ndim = 2 obj_type = DataFrame @@ -3233,13 +3238,13 @@ def write(self, **kwargs): class AppendableMultiFrameTable(AppendableFrameTable): """ a frame with a multi-index """ - table_type = u'appendable_multiframe' + table_type = u('appendable_multiframe') obj_type = DataFrame ndim = 2 @property def table_type_short(self): - return u'appendable_multi' + return u('appendable_multi') def write(self, obj, data_columns=None, **kwargs): if data_columns is None: @@ -3264,7 +3269,7 @@ def read(self, columns=None, **kwargs): class AppendablePanelTable(AppendableTable): """ suppor the new appendable table formats """ - table_type = u'appendable_panel' + table_type = u('appendable_panel') ndim = 3 obj_type = Panel @@ -3281,7 +3286,7 @@ def is_transposed(self): class AppendableNDimTable(AppendablePanelTable): """ suppor the new appendable table formats """ - table_type = u'appendable_ndim' + table_type = u('appendable_ndim') ndim = 4 obj_type = Panel4D @@ -3349,18 +3354,18 @@ def _convert_index(index, encoding=None): def _unconvert_index(data, kind, encoding=None): kind = _ensure_decoded(kind) - if kind == u'datetime64': + if kind == u('datetime64'): index = DatetimeIndex(data) - elif kind == u'datetime': + elif kind == u('datetime'): index = np.array([datetime.fromtimestamp(v) for v in data], dtype=object) - elif kind == u'date': + elif kind == u('date'): index = np.array([date.fromtimestamp(v) for v in data], dtype=object) - elif kind in (u'integer', u'float'): + elif kind in (u('integer'), u('float')): index = np.array(data) - elif kind in (u'string'): + elif kind in (u('string')): index = _unconvert_string_array(data, nan_rep=None, encoding=encoding) - elif kind == u'object': + elif kind == u('object'): index = np.array(data[0]) else: # pragma: no cover raise ValueError('unrecognized index type %s' % kind) @@ -3368,11 +3373,11 @@ def _unconvert_index(data, kind, encoding=None): def _unconvert_index_legacy(data, kind, legacy=False, encoding=None): kind = _ensure_decoded(kind) - if kind == u'datetime': + if kind == u('datetime'): index = lib.time64_to_datetime(data) - elif kind in (u'integer'): + elif kind in (u('integer')): index = np.array(data, dtype=object) - elif kind in (u'string'): + elif kind in (u('string')): index = _unconvert_string_array(data, nan_rep=None, encoding=encoding) else: # pragma: no cover raise ValueError('unrecognized index type %s' % kind) @@ -3430,7 +3435,7 @@ def _get_converter(kind, encoding): def _need_convert(kind): kind = _ensure_decoded(kind) - if kind in (u'datetime', u'datetime64', u'string'): + if kind in (u('datetime'), u('datetime64'), u('string')): return True return False @@ -3496,7 +3501,7 @@ def __init__(self, field, op=None, value=None, queryables=None, encoding=None): self.value = field.value # a string expression (or just the field) - elif isinstance(field, basestring): + elif isinstance(field, compat.string_types): # is a term is passed s = self._search.match(field) @@ -3509,7 +3514,7 @@ def __init__(self, field, op=None, value=None, queryables=None, encoding=None): self.field = field # is an op passed? - if isinstance(op, basestring) and op in self._ops: + if isinstance(op, compat.string_types) and op in self._ops: self.op = op self.value = value else: @@ -3530,7 +3535,7 @@ def __init__(self, field, op=None, value=None, queryables=None, encoding=None): # we have valid conditions if self.op in ['>', '>=', '<', '<=']: - if hasattr(self.value, '__iter__') and len(self.value) > 1 and not isinstance(self.value,basestring): + if hasattr(self.value, '__iter__') and len(self.value) > 1 and not isinstance(self.value,compat.string_types): raise ValueError("an inequality condition cannot have multiple values [%s]" % str(self)) if not is_list_like(self.value): @@ -3540,7 +3545,7 @@ def __init__(self, field, op=None, value=None, queryables=None, encoding=None): self.eval() def __unicode__(self): - attrs = map(pprint_thing, (self.field, self.op, self.value)) + attrs = lmap(pprint_thing, (self.field, self.op, self.value)) return "field->%s,op->%s,value->%s" % tuple(attrs) @property @@ -3620,32 +3625,36 @@ def stringify(value): return value kind = _ensure_decoded(self.kind) - if kind == u'datetime64' or kind == u'datetime' : + if kind == u('datetime64') or kind == u('datetime'): v = lib.Timestamp(v) if v.tz is not None: v = v.tz_convert('UTC') return TermValue(v,v.value,kind) - elif isinstance(v, datetime) or hasattr(v, 'timetuple') or kind == u'date': + elif (isinstance(v, datetime) or hasattr(v, 'timetuple') + or kind == u('date')): v = time.mktime(v.timetuple()) return TermValue(v,Timestamp(v),kind) - elif kind == u'integer': + elif kind == u('integer'): v = int(float(v)) return TermValue(v,v,kind) - elif kind == u'float': + elif kind == u('float'): v = float(v) return TermValue(v,v,kind) - elif kind == u'bool': - if isinstance(v, basestring): - v = not v.strip().lower() in [u'false', u'f', u'no', u'n', u'none', u'0', u'[]', u'{}', u''] + elif kind == u('bool'): + if isinstance(v, compat.string_types): + poss_vals = [u('false'), u('f'), u('no'), + u('n'), u('none'), u('0'), + u('[]'), u('{}'), u('')] + v = not v.strip().lower() in poss_vals else: v = bool(v) return TermValue(v,v,kind) - elif not isinstance(v, basestring): + elif not isinstance(v, compat.string_types): v = stringify(v) - return TermValue(v,stringify(v),u'string') + return TermValue(v,stringify(v),u('string')) # string quoting - return TermValue(v,stringify(v),u'string') + return TermValue(v,stringify(v),u('string')) class TermValue(object): """ hold a term value the we use to construct a condition/filter """ @@ -3658,7 +3667,7 @@ def __init__(self, value, converted, kind): def tostring(self, encoding): """ quote the string if not encoded else encode and return """ - if self.kind == u'string': + if self.kind == u('string'): if encoding is not None: return self.converted return '"%s"' % self.converted @@ -3733,7 +3742,7 @@ def generate(self, where): # operands inside any terms if not any([isinstance(w, (list, tuple, Term)) for w in where]): - if not any([isinstance(w, basestring) and Term._search.match(w) for w in where]): + if not any([isinstance(w, compat.string_types) and Term._search.match(w) for w in where]): where = [where] queryables = self.table.queryables() diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 11b139b620175..b65c35e6b352a 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -2,13 +2,16 @@ Collection of query wrappers / abstractions to both facilitate data retrieval and to reduce dependency on DB-specific API. """ +from __future__ import print_function from datetime import datetime, date +from pandas.compat import range, lzip, map, zip +import pandas.compat as compat import numpy as np import traceback from pandas.core.datetools import format as date_format -from pandas.core.api import DataFrame, isnull +from pandas.core.api import DataFrame #------------------------------------------------------------------------------ # Helper execution function @@ -51,7 +54,7 @@ def execute(sql, con, retry=True, cur=None, params=None): except Exception: # pragma: no cover pass - print ('Error on sql %s' % sql) + print('Error on sql %s' % sql) raise @@ -61,7 +64,7 @@ def _safe_fetch(cur): if not isinstance(result, list): result = list(result) return result - except Exception, e: # pragma: no cover + except Exception as e: # pragma: no cover excName = e.__class__.__name__ if excName == 'OperationalError': return [] @@ -91,7 +94,7 @@ def tquery(sql, con=None, cur=None, retry=True): try: cur.close() con.commit() - except Exception, e: + except Exception as e: excName = e.__class__.__name__ if excName == 'OperationalError': # pragma: no cover print ('Failed to commit, may need to restart interpreter') @@ -104,7 +107,7 @@ def tquery(sql, con=None, cur=None, retry=True): if result and len(result[0]) == 1: # python 3 compat - result = list(list(zip(*result))[0]) + result = list(lzip(*result)[0]) elif result is None: # pragma: no cover result = [] @@ -121,7 +124,7 @@ def uquery(sql, con=None, cur=None, retry=True, params=None): result = cur.rowcount try: con.commit() - except Exception, e: + except Exception as e: excName = e.__class__.__name__ if excName != 'OperationalError': raise @@ -172,6 +175,7 @@ def read_frame(sql, con, index_col=None, coerce_float=True, params=None): frame_query = read_frame read_sql = read_frame + def write_frame(frame, name, con, flavor='sqlite', if_exists='fail', **kwargs): """ Write records stored in a DataFrame to a SQL database. @@ -193,12 +197,12 @@ def write_frame(frame, name, con, flavor='sqlite', if_exists='fail', **kwargs): warnings.warn("append is deprecated, use if_exists instead", FutureWarning) if kwargs['append']: - if_exists='append' + if_exists = 'append' else: - if_exists='fail' + if_exists = 'fail' exists = table_exists(name, con, flavor) if if_exists == 'fail' and exists: - raise ValueError, "Table '%s' already exists." % name + raise ValueError("Table '%s' already exists." % name) #create or drop-recreate if necessary create = None @@ -215,8 +219,8 @@ def write_frame(frame, name, con, flavor='sqlite', if_exists='fail', **kwargs): cur = con.cursor() # Replace spaces in DataFrame column names with _. safe_names = [s.replace(' ', '_').strip() for s in frame.columns] - flavor_picker = {'sqlite' : _write_sqlite, - 'mysql' : _write_mysql} + flavor_picker = {'sqlite': _write_sqlite, + 'mysql': _write_mysql} func = flavor_picker.get(flavor, None) if func is None: @@ -225,6 +229,7 @@ def write_frame(frame, name, con, flavor='sqlite', if_exists='fail', **kwargs): cur.close() con.commit() + def _write_sqlite(frame, table, names, cur): bracketed_names = ['[' + column + ']' for column in names] col_names = ','.join(bracketed_names) @@ -232,12 +237,13 @@ def _write_sqlite(frame, table, names, cur): insert_query = 'INSERT INTO %s (%s) VALUES (%s)' % ( table, col_names, wildcards) # pandas types are badly handled if there is only 1 column ( Issue #3628 ) - if not len(frame.columns )==1 : + if not len(frame.columns) == 1: data = [tuple(x) for x in frame.values] - else : + else: data = [tuple(x) for x in frame.values.tolist()] cur.executemany(insert_query, data) + def _write_mysql(frame, table, names, cur): bracketed_names = ['`' + column + '`' for column in names] col_names = ','.join(bracketed_names) @@ -247,16 +253,18 @@ def _write_mysql(frame, table, names, cur): data = [tuple(x) for x in frame.values] cur.executemany(insert_query, data) + def table_exists(name, con, flavor): flavor_map = { 'sqlite': ("SELECT name FROM sqlite_master " "WHERE type='table' AND name='%s';") % name, - 'mysql' : "SHOW TABLES LIKE '%s'" % name} + 'mysql': "SHOW TABLES LIKE '%s'" % name} query = flavor_map.get(flavor, None) if query is None: raise NotImplementedError return len(tquery(query, con)) > 0 + def get_sqltype(pytype, flavor): sqltype = {'mysql': 'VARCHAR (63)', 'sqlite': 'TEXT'} @@ -284,12 +292,13 @@ def get_sqltype(pytype, flavor): return sqltype[flavor] + def get_schema(frame, name, flavor, keys=None): "Return a CREATE TABLE statement to suit the contents of a DataFrame." lookup_type = lambda dtype: get_sqltype(dtype.type, flavor) # Replace spaces in DataFrame column names with _. safe_columns = [s.replace(' ', '_').strip() for s in frame.dtypes.index] - column_types = zip(safe_columns, map(lookup_type, frame.dtypes)) + column_types = lzip(safe_columns, map(lookup_type, frame.dtypes)) if flavor == 'sqlite': columns = ',\n '.join('[%s] %s' % x for x in column_types) else: @@ -297,7 +306,7 @@ def get_schema(frame, name, flavor, keys=None): keystr = '' if keys is not None: - if isinstance(keys, basestring): + if isinstance(keys, compat.string_types): keys = (keys,) keystr = ', PRIMARY KEY (%s)' % ','.join(keys) template = """CREATE TABLE %(name)s ( @@ -308,6 +317,7 @@ def get_schema(frame, name, flavor, keys=None): 'keystr': keystr} return create_statement + def sequence2dict(seq): """Helper function for cx_Oracle. @@ -320,6 +330,6 @@ def sequence2dict(seq): http://www.gingerandjohn.com/archives/2004/02/26/cx_oracle-executemany-example/ """ d = {} - for k,v in zip(range(1, 1 + len(seq)), seq): + for k, v in zip(range(1, 1 + len(seq)), seq): d[str(k)] = v return d diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 9257338cd4913..21cf6d40ddec9 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -9,8 +9,7 @@ You can find more information on http://presbrey.mit.edu/PyDTA and http://statsmodels.sourceforge.net/devel/ """ - -from StringIO import StringIO +# TODO: Fix this module so it can use cross-compatible zip, map, and range import numpy as np import sys @@ -20,7 +19,9 @@ from pandas.core.series import Series from pandas.core.categorical import Categorical import datetime -from pandas.util import py3compat +from pandas import compat +from pandas import compat +from pandas.compat import StringIO, long, lrange, lmap, lzip from pandas import isnull from pandas.io.parsers import _parser_params, Appender from pandas.io.common import get_filepath_or_buffer @@ -225,7 +226,7 @@ def __init__(self, encoding): # we're going to drop the label and cast to int self.DTYPE_MAP = \ dict( - zip(range(1, 245), ['a' + str(i) for i in range(1, 245)]) + + lzip(range(1, 245), ['a' + str(i) for i in range(1, 245)]) + [ (251, np.int16), (252, np.int32), @@ -234,7 +235,7 @@ def __init__(self, encoding): (255, np.float64) ] ) - self.TYPE_MAP = range(251) + list('bhlfd') + self.TYPE_MAP = lrange(251) + list('bhlfd') #NOTE: technically, some of these are wrong. there are more numbers # that can be represented. it's the 27 ABOVE and BELOW the max listed # numeric data type in [U] 12.2.2 of the 11.2 manual @@ -255,7 +256,7 @@ def __init__(self, encoding): } def _decode_bytes(self, str, errors=None): - if py3compat.PY3: + if compat.PY3: return str.decode(self._encoding, errors) else: return str @@ -297,7 +298,7 @@ def __init__(self, path_or_buf, encoding=None): if encoding is not None: self._encoding = encoding - if type(path_or_buf) is str: + if isinstance(path_or_buf, (str, compat.text_type, bytes)): self.path_or_buf = open(path_or_buf, 'rb') else: self.path_or_buf = path_or_buf @@ -384,7 +385,7 @@ def _calcsize(self, fmt): def _col_size(self, k=None): """Calculate size of a data record.""" if len(self.col_sizes) == 0: - self.col_sizes = map(lambda x: self._calcsize(x), self.typlist) + self.col_sizes = lmap(lambda x: self._calcsize(x), self.typlist) if k is None: return self.col_sizes else: @@ -402,7 +403,7 @@ def _unpack(self, fmt, byt): return d def _null_terminate(self, s): - if py3compat.PY3: # have bytes not strings, so must decode + if compat.PY3: # have bytes not strings, so must decode null_byte = b"\0" try: s = s[:s.index(null_byte)] @@ -427,9 +428,9 @@ def _next(self): data[i] = self._unpack(typlist[i], self.path_or_buf.read(self._col_size(i))) return data else: - return map(lambda i: self._unpack(typlist[i], + return list(map(lambda i: self._unpack(typlist[i], self.path_or_buf.read(self._col_size(i))), - range(self.nvar)) + range(self.nvar))) def _dataset(self): """ @@ -538,18 +539,18 @@ def data(self, convert_dates=True, convert_categoricals=True, index=None): data[col] = Series(data[col], data[col].index, self.dtyplist[i]) if convert_dates: - cols = np.where(map(lambda x: x in _date_formats, self.fmtlist))[0] + cols = np.where(lmap(lambda x: x in _date_formats, self.fmtlist))[0] for i in cols: col = data.columns[i] data[col] = data[col].apply(_stata_elapsed_date_to_datetime, args=(self.fmtlist[i],)) if convert_categoricals: - cols = np.where(map(lambda x: x in self.value_label_dict.iterkeys(), self.lbllist))[0] + cols = np.where(lmap(lambda x: x in compat.iterkeys(self.value_label_dict), self.lbllist))[0] for i in cols: col = data.columns[i] labeled_data = np.copy(data[col]) labeled_data = labeled_data.astype(object) - for k, v in self.value_label_dict[self.lbllist[i]].iteritems(): + for k, v in compat.iteritems(self.value_label_dict[self.lbllist[i]]): labeled_data[data[col] == k] = v data[col] = Categorical.from_array(labeled_data) @@ -750,7 +751,7 @@ def _write(self, to_write): """ Helper to call encode before writing to file for Python 3 compat. """ - if py3compat.PY3: + if compat.PY3: self._file.write(to_write.encode(self._encoding)) else: self._file.write(to_write) @@ -906,7 +907,7 @@ def _write_data_dates(self): def _null_terminate(self, s, as_string=False): null_byte = '\x00' - if py3compat.PY3 and not as_string: + if compat.PY3 and not as_string: s += null_byte return s.encode(self._encoding) else: diff --git a/pandas/io/tests/generate_legacy_pickles.py b/pandas/io/tests/generate_legacy_pickles.py index 1838e0907233c..f5d949e2cfc45 100644 --- a/pandas/io/tests/generate_legacy_pickles.py +++ b/pandas/io/tests/generate_legacy_pickles.py @@ -1,4 +1,7 @@ """ self-contained to write legacy pickle files """ +from __future__ import print_function + +from pandas.compat import zip, cPickle as pickle def _create_sp_series(): @@ -28,13 +31,13 @@ def _create_sp_frame(): 'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], 'C': np.arange(10), 'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]} - + dates = bdate_range('1/1/2011', periods=10) return SparseDataFrame(data, index=dates) def create_data(): """ create the pickle data """ - + import numpy as np import pandas from pandas import (Series,DataFrame,Panel, @@ -50,29 +53,29 @@ def create_data(): 'D': date_range('1/1/2009', periods=5), 'E' : [0., 1, Timestamp('20100101'),'foo',2.], } - - index = dict(int = Index(np.arange(10)), - date = date_range('20130101',periods=10)) - mi = dict(reg = MultiIndex.from_tuples(zip([['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], - ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]), + + index = dict(int = Index(np.arange(10)), + date = date_range('20130101',periods=10)) + mi = dict(reg = MultiIndex.from_tuples(list(zip([['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], + ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']])), names=['first', 'second'])) series = dict(float = Series(data['A']), - int = Series(data['B']), + int = Series(data['B']), mixed = Series(data['E'])) - frame = dict(float = DataFrame(dict(A = series['float'], B = series['float'] + 1)), - int = DataFrame(dict(A = series['int'] , B = series['int'] + 1)), + frame = dict(float = DataFrame(dict(A = series['float'], B = series['float'] + 1)), + int = DataFrame(dict(A = series['int'] , B = series['int'] + 1)), mixed = DataFrame(dict([ (k,data[k]) for k in ['A','B','C','D']]))) - panel = dict(float = Panel(dict(ItemA = frame['float'], ItemB = frame['float']+1))) + panel = dict(float = Panel(dict(ItemA = frame['float'], ItemB = frame['float']+1))) - - return dict( series = series, - frame = frame, - panel = panel, - index = index, - mi = mi, + + return dict( series = series, + frame = frame, + panel = panel, + index = index, + mi = mi, sp_series = dict(float = _create_sp_series()), - sp_frame = dict(float = _create_sp_frame()) + sp_frame = dict(float = _create_sp_frame()) ) def write_legacy_pickles(): @@ -86,15 +89,14 @@ def write_legacy_pickles(): import pandas import pandas.util.testing as tm import platform as pl - import cPickle as pickle print("This script generates a pickle file for the current arch, system, and python version") base_dir, _ = os.path.split(os.path.abspath(__file__)) base_dir = os.path.join(base_dir,'data/legacy_pickle') - + # could make this a parameter? - version = None + version = None if version is None: @@ -108,11 +110,11 @@ def write_legacy_pickles(): # construct a reasonable platform name f = '_'.join([ str(pl.machine()), str(pl.system().lower()), str(pl.python_version()) ]) pth = os.path.abspath(os.path.join(pth,'%s.pickle' % f)) - + fh = open(pth,'wb') pickle.dump(create_data(),fh,pickle.HIGHEST_PROTOCOL) fh.close() - + print("created pickle file: %s" % pth) if __name__ == '__main__': diff --git a/pandas/io/tests/test_clipboard.py b/pandas/io/tests/test_clipboard.py index 9eadd16c207a9..12c696f7076a4 100644 --- a/pandas/io/tests/test_clipboard.py +++ b/pandas/io/tests/test_clipboard.py @@ -33,7 +33,7 @@ def setUpClass(cls): cls.data['mixed'] = DataFrame({'a': np.arange(1.0, 6.0) + 0.01, 'b': np.arange(1, 6), 'c': list('abcde')}) - cls.data_types = cls.data.keys() + cls.data_types = list(cls.data.keys()) @classmethod def tearDownClass(cls): diff --git a/pandas/io/tests/test_cparser.py b/pandas/io/tests/test_cparser.py index 7fa8d06f48ea3..d5f62cf909513 100644 --- a/pandas/io/tests/test_cparser.py +++ b/pandas/io/tests/test_cparser.py @@ -2,8 +2,9 @@ C/Cython ascii file parser tests """ -from pandas.util.py3compat import StringIO, BytesIO +from pandas.compat import StringIO, BytesIO, map from datetime import datetime +from pandas import compat import csv import os import sys @@ -22,7 +23,7 @@ from pandas.util.testing import (assert_almost_equal, assert_frame_equal, assert_series_equal, network) import pandas.lib as lib -from pandas.util import py3compat +from pandas import compat from pandas.lib import Timestamp import pandas.util.testing as tm @@ -325,7 +326,7 @@ def test_empty_field_eof(self): def assert_array_dicts_equal(left, right): - for k, v in left.iteritems(): + for k, v in compat.iteritems(left): assert(np.array_equal(v, right[k])) if __name__ == '__main__': diff --git a/pandas/io/tests/test_data.py b/pandas/io/tests/test_data.py index e760ddff518f5..c85fd61e975e9 100644 --- a/pandas/io/tests/test_data.py +++ b/pandas/io/tests/test_data.py @@ -1,3 +1,5 @@ +from __future__ import print_function +from pandas import compat import unittest import warnings import nose @@ -16,7 +18,7 @@ def assert_n_failed_equals_n_null_columns(wngs, obj, cls=SymbolWarning): all_nan_cols = pd.Series(dict((k, pd.isnull(v).all()) for k, v in - obj.iteritems())) + compat.iteritems(obj))) n_all_nan_cols = all_nan_cols.sum() valid_warnings = pd.Series([wng for wng in wngs if isinstance(wng, cls)]) assert_equal(len(valid_warnings), n_all_nan_cols) @@ -33,7 +35,7 @@ def test_google(self): # an exception when DataReader can't get a 200 response from # google start = datetime(2010, 1, 1) - end = datetime(2013, 01, 27) + end = datetime(2013, 1, 27) self.assertEquals( web.DataReader("F", 'google', start, end)['Close'][-1], @@ -97,7 +99,7 @@ def test_yahoo(self): # an exception when DataReader can't get a 200 response from # yahoo start = datetime(2010, 1, 1) - end = datetime(2013, 01, 27) + end = datetime(2013, 1, 27) self.assertEquals( web.DataReader("F", 'yahoo', start, end)['Close'][-1], 13.68) @@ -105,7 +107,7 @@ def test_yahoo(self): @network def test_yahoo_fails(self): start = datetime(2010, 1, 1) - end = datetime(2013, 01, 27) + end = datetime(2013, 1, 27) self.assertRaises(Exception, web.DataReader, "NON EXISTENT TICKER", 'yahoo', start, end) @@ -363,7 +365,7 @@ def test_fred(self): FRED. """ start = datetime(2010, 1, 1) - end = datetime(2013, 01, 27) + end = datetime(2013, 1, 27) self.assertEquals( web.DataReader("GDP", "fred", start, end)['GDP'].tail(1), @@ -375,14 +377,14 @@ def test_fred(self): @network def test_fred_nan(self): start = datetime(2010, 1, 1) - end = datetime(2013, 01, 27) + end = datetime(2013, 1, 27) df = web.DataReader("DFII5", "fred", start, end) assert pd.isnull(df.ix['2010-01-01']) @network def test_fred_parts(self): start = datetime(2010, 1, 1) - end = datetime(2013, 01, 27) + end = datetime(2013, 1, 27) df = web.get_data_fred("CPIAUCSL", start, end) self.assertEqual(df.ix['2010-05-01'], 217.23) diff --git a/pandas/io/tests/test_date_converters.py b/pandas/io/tests/test_date_converters.py index 396912c0f5f54..8c1009b904857 100644 --- a/pandas/io/tests/test_date_converters.py +++ b/pandas/io/tests/test_date_converters.py @@ -1,4 +1,4 @@ -from pandas.util.py3compat import StringIO, BytesIO +from pandas.compat import StringIO, BytesIO from datetime import date, datetime import csv import os @@ -19,7 +19,7 @@ from pandas.util.testing import (assert_almost_equal, assert_frame_equal, assert_series_equal, network) import pandas.lib as lib -from pandas.util import py3compat +from pandas import compat from pandas.lib import Timestamp import pandas.io.date_converters as conv diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index ebbb7292cb3d7..1ac4d4e31ed10 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -1,6 +1,6 @@ # pylint: disable=E1101 -from pandas.util.py3compat import StringIO, BytesIO, PY3 +from pandas.compat import StringIO, BytesIO, PY3, u, range, map from datetime import datetime from os.path import split as psplit import csv @@ -27,7 +27,7 @@ import pandas as pd import pandas.lib as lib -from pandas.util import py3compat +from pandas import compat from pandas.lib import Timestamp from pandas.tseries.index import date_range import pandas.tseries.tools as tools @@ -707,7 +707,7 @@ def test_to_excel_unicode_filename(self): _skip_if_no_excelsuite() for ext in ['xls', 'xlsx']: - filename = u'\u0192u.' + ext + filename = u('\u0192u.') + ext try: f = open(filename, 'wb') @@ -769,7 +769,7 @@ def test_to_excel_styleconverter(self): # def test_to_excel_header_styling_xls(self): # import StringIO - # s = StringIO.StringIO( + # s = StringIO( # """Date,ticker,type,value # 2001-01-01,x,close,12.2 # 2001-01-01,x,open ,12.1 @@ -816,7 +816,7 @@ def test_to_excel_styleconverter(self): # os.remove(filename) # def test_to_excel_header_styling_xlsx(self): # import StringIO - # s = StringIO.StringIO( + # s = StringIO( # """Date,ticker,type,value # 2001-01-01,x,close,12.2 # 2001-01-01,x,open ,12.1 diff --git a/pandas/io/tests/test_ga.py b/pandas/io/tests/test_ga.py index d2061a6d0b57a..e33b75c569fef 100644 --- a/pandas/io/tests/test_ga.py +++ b/pandas/io/tests/test_ga.py @@ -82,8 +82,8 @@ def test_iterator(self): dimensions='date', max_results=10, chunksize=5) - df1 = it.next() - df2 = it.next() + df1 = next(it) + df2 = next(it) for df in [df1, df2]: assert isinstance(df, DataFrame) diff --git a/pandas/io/tests/test_html.py b/pandas/io/tests/test_html.py index 1d0c2a13302af..44e4b5cfda7b6 100644 --- a/pandas/io/tests/test_html.py +++ b/pandas/io/tests/test_html.py @@ -1,10 +1,10 @@ +from __future__ import print_function import os import re -from cStringIO import StringIO from unittest import TestCase import warnings from distutils.version import LooseVersion -import urllib2 +from pandas.io.common import URLError import nose from nose.tools import assert_raises @@ -12,6 +12,8 @@ import numpy as np from numpy.random import rand from numpy.testing.decorators import slow +from pandas.compat import map, zip, StringIO +import pandas.compat as compat try: from importlib import import_module @@ -42,7 +44,7 @@ def _skip_if_no(module_name): def _skip_if_none_of(module_names): - if isinstance(module_names, basestring): + if isinstance(module_names, compat.string_types): _skip_if_no(module_names) if module_names == 'bs4': import bs4 @@ -112,8 +114,8 @@ def test_to_html_compat(self): out = df.to_html() res = self.run_read_html(out, attrs={'class': 'dataframe'}, index_col=0)[0] - print (df.dtypes) - print (res.dtypes) + print(df.dtypes) + print(res.dtypes) assert_frame_equal(res, df) @network @@ -149,7 +151,7 @@ def test_spam(self): df2 = self.run_read_html(self.spam_data, 'Unit', infer_types=False) assert_framelist_equal(df1, df2) - print (df1[0]) + print(df1[0]) self.assertEqual(df1[0].ix[0, 0], 'Proximates') self.assertEqual(df1[0].columns[0], 'Nutrient') @@ -178,7 +180,7 @@ def test_skiprows_int(self): def test_skiprows_xrange(self): df1 = [self.run_read_html(self.spam_data, '.*Water.*').pop()[2:]] - df2 = self.run_read_html(self.spam_data, 'Unit', skiprows=xrange(2)) + df2 = self.run_read_html(self.spam_data, 'Unit', skiprows=range(2)) assert_framelist_equal(df1, df2) @@ -288,12 +290,12 @@ def test_file_like(self): @network def test_bad_url_protocol(self): - self.assertRaises(urllib2.URLError, self.run_read_html, + self.assertRaises(URLError, self.run_read_html, 'git://github.com', '.*Water.*') @network def test_invalid_url(self): - self.assertRaises(urllib2.URLError, self.run_read_html, + self.assertRaises(URLError, self.run_read_html, 'http://www.a23950sdfa908sd.com') @slow diff --git a/pandas/io/tests/test_json/test_pandas.py b/pandas/io/tests/test_json/test_pandas.py index 21fae9a50c7dd..893243d148618 100644 --- a/pandas/io/tests/test_json/test_pandas.py +++ b/pandas/io/tests/test_json/test_pandas.py @@ -2,8 +2,9 @@ # pylint: disable-msg=W0612,E1101 from copy import deepcopy from datetime import datetime, timedelta -from StringIO import StringIO -import cPickle as pickle +from pandas.compat import range, lrange, StringIO, cPickle as pickle +from pandas import compat +from pandas.io.common import URLError import operator import os import unittest @@ -27,7 +28,7 @@ _frame = DataFrame(_seriesd) _frame2 = DataFrame(_seriesd, columns=['D', 'C', 'B', 'A']) _intframe = DataFrame(dict((k, v.astype(np.int64)) - for k, v in _seriesd.iteritems())) + for k, v in compat.iteritems(_seriesd))) _tsframe = DataFrame(_tsd) @@ -91,9 +92,9 @@ def _check_orient(df, orient, dtype=None, numpy=False, convert_axes=True, check_ try: unser = read_json(dfjson, orient=orient, dtype=dtype, numpy=numpy, convert_axes=convert_axes) - except (Exception), detail: + except (Exception) as detail: if raise_ok is not None: - if type(detail) == raise_ok: + if isinstance(detail, raise_ok): return raise @@ -320,7 +321,7 @@ def _check_all_orients(series, dtype=None): _check_all_orients(self.ts) # dtype - s = Series(range(6), index=['a','b','c','d','e','f']) + s = Series(lrange(6), index=['a','b','c','d','e','f']) _check_all_orients(Series(s, dtype=np.float64), dtype=np.float64) _check_all_orients(Series(s, dtype=np.int), dtype=np.int) @@ -340,7 +341,7 @@ def test_frame_from_json_precise_float(self): def test_typ(self): - s = Series(range(6), index=['a','b','c','d','e','f'], dtype='int64') + s = Series(lrange(6), index=['a','b','c','d','e','f'], dtype='int64') result = read_json(s.to_json(),typ=None) assert_series_equal(result,s) @@ -439,7 +440,7 @@ def test_weird_nested_json(self): def test_doc_example(self): dfj2 = DataFrame(np.random.randn(5, 2), columns=list('AB')) dfj2['date'] = Timestamp('20130101') - dfj2['ints'] = range(5) + dfj2['ints'] = lrange(5) dfj2['bools'] = True dfj2.index = pd.date_range('20130101',periods=5) @@ -471,7 +472,6 @@ def test_round_trip_exception_(self): @network @slow def test_url(self): - import urllib2 try: url = 'https://api.github.com/repos/pydata/pandas/issues?per_page=5' @@ -482,5 +482,5 @@ def test_url(self): url = 'http://search.twitter.com/search.json?q=pandas%20python' result = read_json(url) - except urllib2.URLError: + except URLError: raise nose.SkipTest diff --git a/pandas/io/tests/test_json/test_ujson.py b/pandas/io/tests/test_json/test_ujson.py index 86aeecf169b28..ff684e30b206d 100644 --- a/pandas/io/tests/test_json/test_ujson.py +++ b/pandas/io/tests/test_json/test_ujson.py @@ -1,7 +1,6 @@ import unittest from unittest import TestCase -import pandas.json as ujson try: import json except ImportError: @@ -13,12 +12,14 @@ import time import datetime import calendar -import StringIO import re import random import decimal from functools import partial -import pandas.util.py3compat as py3compat +from pandas.compat import range, zip, StringIO, u +from pandas import compat +import pandas.json as ujson +import pandas.compat as compat import numpy as np from pandas.util.testing import assert_almost_equal @@ -69,7 +70,7 @@ def helper(expected_output, **encode_kwargs): helper(html_encoded, ensure_ascii=False, encode_html_chars=True) def test_doubleLongIssue(self): - sut = {u'a': -4342969734183514} + sut = {u('a'): -4342969734183514} encoded = json.dumps(sut) decoded = json.loads(encoded) self.assertEqual(sut, decoded) @@ -78,7 +79,7 @@ def test_doubleLongIssue(self): self.assertEqual(sut, decoded) def test_doubleLongDecimalIssue(self): - sut = {u'a': -12345678901234.56789012} + sut = {u('a'): -12345678901234.56789012} encoded = json.dumps(sut) decoded = json.loads(encoded) self.assertEqual(sut, decoded) @@ -88,12 +89,12 @@ def test_doubleLongDecimalIssue(self): def test_encodeDecodeLongDecimal(self): - sut = {u'a': -528656961.4399388} + sut = {u('a'): -528656961.4399388} encoded = ujson.dumps(sut, double_precision=15) ujson.decode(encoded) def test_decimalDecodeTestPrecise(self): - sut = {u'a': 4.56} + sut = {u('a'): 4.56} encoded = ujson.encode(sut) decoded = ujson.decode(encoded, precise_float=True) self.assertEqual(sut, decoded) @@ -109,10 +110,16 @@ def test_encodeDoubleTinyExponential(self): self.assert_(np.allclose(num, ujson.decode(ujson.encode(num)))) def test_encodeDictWithUnicodeKeys(self): - input = { u"key1": u"value1", u"key1": u"value1", u"key1": u"value1", u"key1": u"value1", u"key1": u"value1", u"key1": u"value1" } + input = {u("key1"): u("value1"), u("key1"): + u("value1"), u("key1"): u("value1"), + u("key1"): u("value1"), u("key1"): + u("value1"), u("key1"): u("value1")} output = ujson.encode(input) - input = { u"بن": u"value1", u"بن": u"value1", u"بن": u"value1", u"بن": u"value1", u"بن": u"value1", u"بن": u"value1", u"بن": u"value1" } + input = {u("بن"): u("value1"), u("بن"): u("value1"), + u("بن"): u("value1"), u("بن"): u("value1"), + u("بن"): u("value1"), u("بن"): u("value1"), + u("بن"): u("value1")} output = ujson.encode(input) pass @@ -361,7 +368,7 @@ def test_encodeToUTF8(self): self.assertEquals(dec, json.loads(enc)) def test_decodeFromUnicode(self): - input = u"{\"obj\": 31337}" + input = u("{\"obj\": 31337}") dec1 = ujson.decode(input) dec2 = ujson.decode(str(input)) self.assertEquals(dec1, dec2) @@ -520,18 +527,18 @@ def test_decodeNullBroken(self): def test_decodeBrokenDictKeyTypeLeakTest(self): input = '{{1337:""}}' - for x in xrange(1000): + for x in range(1000): try: ujson.decode(input) assert False, "Expected exception!" - except(ValueError),e: + except ValueError as e: continue assert False, "Wrong exception" def test_decodeBrokenDictLeakTest(self): input = '{{"key":"}' - for x in xrange(1000): + for x in range(1000): try: ujson.decode(input) assert False, "Expected exception!" @@ -542,7 +549,7 @@ def test_decodeBrokenDictLeakTest(self): def test_decodeBrokenListLeakTest(self): input = '[[[true' - for x in xrange(1000): + for x in range(1000): try: ujson.decode(input) assert False, "Expected exception!" @@ -611,7 +618,7 @@ def test_encodeNullCharacter(self): self.assertEquals(output, json.dumps(input)) self.assertEquals(input, ujson.decode(output)) - self.assertEquals('" \\u0000\\r\\n "', ujson.dumps(u" \u0000\r\n ")) + self.assertEquals('" \\u0000\\r\\n "', ujson.dumps(u(" \u0000\r\n "))) pass def test_decodeNullCharacter(self): @@ -678,7 +685,7 @@ def test_decodeNumericIntExpeMinus(self): self.assertAlmostEqual(output, json.loads(input)) def test_dumpToFile(self): - f = StringIO.StringIO() + f = StringIO() ujson.dump([1, 2, 3], f) self.assertEquals("[1,2,3]", f.getvalue()) @@ -701,9 +708,9 @@ def test_dumpFileArgsError(self): assert False, 'expected TypeError' def test_loadFile(self): - f = StringIO.StringIO("[1,2,3,4]") + f = StringIO("[1,2,3,4]") self.assertEquals([1, 2, 3, 4], ujson.load(f)) - f = StringIO.StringIO("[1,2,3,4]") + f = StringIO("[1,2,3,4]") assert_array_equal(np.array([1, 2, 3, 4]), ujson.load(f, numpy=True)) def test_loadFileLikeObject(self): @@ -740,7 +747,7 @@ def test_encodeNumericOverflow(self): assert False, "expected OverflowError" def test_encodeNumericOverflowNested(self): - for n in xrange(0, 100): + for n in range(0, 100): class Nested: x = 12839128391289382193812939 @@ -769,8 +776,8 @@ def test_decodeNumberWith32bitSignBit(self): self.assertEqual(ujson.decode(doc)['id'], result) def test_encodeBigEscape(self): - for x in xrange(10): - if py3compat.PY3: + for x in range(10): + if compat.PY3: base = '\u00e5'.encode('utf-8') else: base = "\xc3\xa5" @@ -778,17 +785,17 @@ def test_encodeBigEscape(self): output = ujson.encode(input) def test_decodeBigEscape(self): - for x in xrange(10): - if py3compat.PY3: + for x in range(10): + if compat.PY3: base = '\u00e5'.encode('utf-8') else: base = "\xc3\xa5" - quote = py3compat.str_to_bytes("\"") + quote = compat.str_to_bytes("\"") input = quote + (base * 1024 * 1024 * 2) + quote output = ujson.decode(input) def test_toDict(self): - d = {u"key": 31337} + d = {u("key"): 31337} class DictTest: def toDict(self): @@ -1034,16 +1041,16 @@ def testArrayNumpyLabelled(self): output = ujson.loads(ujson.dumps(input), numpy=True, labelled=True) self.assertTrue((np.array([42]) == output[0]).all()) self.assertTrue(output[1] is None) - self.assertTrue((np.array([u'a']) == output[2]).all()) + self.assertTrue((np.array([u('a')]) == output[2]).all()) # py3 is non-determinstic on the ordering...... - if not py3compat.PY3: + if not compat.PY3: input = [{'a': 42, 'b':31}, {'a': 24, 'c': 99}, {'a': 2.4, 'b': 78}] output = ujson.loads(ujson.dumps(input), numpy=True, labelled=True) expectedvals = np.array([42, 31, 24, 99, 2.4, 78], dtype=int).reshape((3,2)) self.assertTrue((expectedvals == output[0]).all()) self.assertTrue(output[1] is None) - self.assertTrue((np.array([u'a', 'b']) == output[2]).all()) + self.assertTrue((np.array([u('a'), 'b']) == output[2]).all()) input = {1: {'a': 42, 'b':31}, 2: {'a': 24, 'c': 99}, 3: {'a': 2.4, 'b': 78}} @@ -1331,7 +1338,7 @@ def test_decodeTooBigValue(self): try: input = "9223372036854775808" ujson.decode(input) - except ValueError, e: + except ValueError as e: pass else: assert False, "expected ValueError" @@ -1340,7 +1347,7 @@ def test_decodeTooSmallValue(self): try: input = "-90223372036854775809" ujson.decode(input) - except ValueError,e: + except ValueError as e: pass else: assert False, "expected ValueError" @@ -1418,7 +1425,7 @@ def test_decodeFloatingPointAdditionalTests(self): def test_encodeBigSet(self): s = set() - for x in xrange(0, 100000): + for x in range(0, 100000): s.add(x) ujson.encode(s) @@ -1462,7 +1469,7 @@ def test_decodeStringUTF8(self): """ def _clean_dict(d): - return dict((str(k), v) for k, v in d.iteritems()) + return dict((str(k), v) for k, v in compat.iteritems(d)) if __name__ == '__main__': # unittest.main() diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py index b88b1ab776ab4..a46a3de60fe04 100644 --- a/pandas/io/tests/test_parsers.py +++ b/pandas/io/tests/test_parsers.py @@ -1,21 +1,21 @@ # pylint: disable=E1101 -from pandas.util.py3compat import StringIO, BytesIO, PY3 from datetime import datetime import csv import os import sys import re import unittest -from contextlib import closing -from urllib2 import urlopen - import nose from numpy import nan import numpy as np from pandas import DataFrame, Series, Index, MultiIndex, DatetimeIndex +from pandas.compat import( + StringIO, BytesIO, PY3, range, long, lrange, lmap, u, map, StringIO +) +from pandas.io.common import urlopen, URLError import pandas.io.parsers as parsers from pandas.io.parsers import (read_csv, read_table, read_fwf, TextFileReader, TextParser) @@ -27,8 +27,9 @@ import pandas.util.testing as tm import pandas as pd +from pandas.compat import parse_date import pandas.lib as lib -from pandas.util import py3compat +from pandas import compat from pandas.lib import Timestamp from pandas.tseries.index import date_range import pandas.tseries.tools as tools @@ -108,12 +109,12 @@ def test_empty_string(self): tm.assert_frame_equal(xp.reindex(columns=df.columns), df) def test_read_csv(self): - if not py3compat.PY3: + if not compat.PY3: if 'win' in sys.platform: - prefix = u"file:///" + prefix = u("file:///") else: - prefix = u"file://" - fname = prefix + unicode(self.csv1) + prefix = u("file://") + fname = prefix + compat.text_type(self.csv1) # it works! df1 = read_csv(fname, index_col=0, parse_dates=True) @@ -160,7 +161,7 @@ def test_squeeze(self): expected = Series([1, 2, 3], ['a', 'b', 'c']) result = self.read_table(StringIO(data), sep=',', index_col=0, header=None, squeeze=True) - self.assert_(isinstance(result, Series)) + tm.assert_isinstance(result, Series) assert_series_equal(result, expected) def test_inf_parsing(self): @@ -181,7 +182,6 @@ def test_inf_parsing(self): df = read_csv(StringIO(data), index_col=0) assert_almost_equal(df['A'].values, expected.values) df = read_csv(StringIO(data), index_col=0, na_filter=False) - print df['A'].values assert_almost_equal(df['A'].values, expected.values) def test_multiple_date_col(self): @@ -316,7 +316,7 @@ def test_multiple_date_cols_with_header(self): KORD,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000""" df = self.read_csv(StringIO(data), parse_dates={'nominal': [1, 2]}) - self.assert_(not isinstance(df.nominal[0], basestring)) + self.assert_(not isinstance(df.nominal[0], compat.string_types)) ts_data = """\ ID,date,nominalTime,actualTime,A,B,C,D,E @@ -423,7 +423,7 @@ def test_malformed(self): df = self.read_table( StringIO(data), sep=',', header=1, comment='#') self.assert_(False) - except Exception, inst: + except Exception as inst: self.assert_('Expected 3 fields in line 4, saw 5' in str(inst)) # skip_footer @@ -440,7 +440,7 @@ def test_malformed(self): StringIO(data), sep=',', header=1, comment='#', skip_footer=1) self.assert_(False) - except Exception, inst: + except Exception as inst: self.assert_('Expected 3 fields in line 4, saw 5' in str(inst)) # first chunk @@ -458,7 +458,7 @@ def test_malformed(self): skiprows=[2]) df = it.read(5) self.assert_(False) - except Exception, inst: + except Exception as inst: self.assert_('Expected 3 fields in line 6, saw 5' in str(inst)) # middle chunk @@ -477,7 +477,7 @@ def test_malformed(self): df = it.read(1) it.read(2) self.assert_(False) - except Exception, inst: + except Exception as inst: self.assert_('Expected 3 fields in line 6, saw 5' in str(inst)) # last chunk @@ -496,7 +496,7 @@ def test_malformed(self): df = it.read(1) it.read() self.assert_(False) - except Exception, inst: + except Exception as inst: self.assert_('Expected 3 fields in line 6, saw 5' in str(inst)) def test_passing_dtype(self): @@ -610,7 +610,7 @@ def test_nat_parse(self): # GH 3062 df = DataFrame(dict({ - 'A' : np.asarray(range(10),dtype='float64'), + 'A' : np.asarray(lrange(10),dtype='float64'), 'B' : pd.Timestamp('20010101') })) df.iloc[3:6,:] = np.nan @@ -640,7 +640,7 @@ def test_skiprows_bug(self): 1/2/2000,4,5,6 1/3/2000,7,8,9 """ - data = self.read_csv(StringIO(text), skiprows=range(6), header=None, + data = self.read_csv(StringIO(text), skiprows=lrange(6), header=None, index_col=0, parse_dates=True) data2 = self.read_csv(StringIO(text), skiprows=6, header=None, @@ -793,20 +793,20 @@ def test_parse_dates_column_list(self): 15/01/2010;P;P;50;1;14/1/2011 01/05/2010;P;P;50;1;15/1/2011''' - expected = self.read_csv(StringIO(data), sep=";", index_col=range(4)) + expected = self.read_csv(StringIO(data), sep=";", index_col=lrange(4)) lev = expected.index.levels[0] expected.index.levels[0] = lev.to_datetime(dayfirst=True) expected['aux_date'] = to_datetime(expected['aux_date'], dayfirst=True) - expected['aux_date'] = map(Timestamp, expected['aux_date']) - self.assert_(isinstance(expected['aux_date'][0], datetime)) + expected['aux_date'] = lmap(Timestamp, expected['aux_date']) + tm.assert_isinstance(expected['aux_date'][0], datetime) - df = self.read_csv(StringIO(data), sep=";", index_col=range(4), + df = self.read_csv(StringIO(data), sep=";", index_col=lrange(4), parse_dates=[0, 5], dayfirst=True) tm.assert_frame_equal(df, expected) - df = self.read_csv(StringIO(data), sep=";", index_col=range(4), + df = self.read_csv(StringIO(data), sep=";", index_col=lrange(4), parse_dates=['date', 'aux_date'], dayfirst=True) tm.assert_frame_equal(df, expected) @@ -829,7 +829,7 @@ def test_no_header(self): self.assert_(np.array_equal(df_pref.columns, ['X0', 'X1', 'X2', 'X3', 'X4'])) - self.assert_(np.array_equal(df.columns, range(5))) + self.assert_(np.array_equal(df.columns, lrange(5))) self.assert_(np.array_equal(df2.columns, names)) @@ -870,9 +870,9 @@ def test_read_csv_no_index_name(self): tm.assert_frame_equal(df, df2) def test_read_table_unicode(self): - fin = BytesIO(u'\u0141aski, Jan;1'.encode('utf-8')) + fin = BytesIO(u('\u0141aski, Jan;1').encode('utf-8')) df1 = read_table(fin, sep=";", encoding="utf-8", header=None) - self.assert_(isinstance(df1[0].values[0], unicode)) + tm.assert_isinstance(df1[0].values[0], compat.text_type) def test_read_table_wrong_num_columns(self): # too few! @@ -1049,7 +1049,7 @@ def test_iterator(self): treader = self.read_table(StringIO(self.data1), sep=',', index_col=0, iterator=True) - self.assert_(isinstance(treader, TextFileReader)) + tm.assert_isinstance(treader, TextFileReader) # stopping iteration when on chunksize is specified, GH 3967 data = """A,B,C @@ -1255,15 +1255,15 @@ def test_converters(self): b,3,4,01/02/2009 c,4,5,01/03/2009 """ - from dateutil import parser + from pandas.compat import parse_date - result = self.read_csv(StringIO(data), converters={'D': parser.parse}) - result2 = self.read_csv(StringIO(data), converters={3: parser.parse}) + result = self.read_csv(StringIO(data), converters={'D': parse_date}) + result2 = self.read_csv(StringIO(data), converters={3: parse_date}) expected = self.read_csv(StringIO(data)) - expected['D'] = expected['D'].map(parser.parse) + expected['D'] = expected['D'].map(parse_date) - self.assert_(isinstance(result['D'][0], (datetime, Timestamp))) + tm.assert_isinstance(result['D'][0], (datetime, Timestamp)) tm.assert_frame_equal(result, expected) tm.assert_frame_equal(result2, expected) @@ -1328,13 +1328,12 @@ def test_read_csv_parse_simple_list(self): tm.assert_frame_equal(df, expected) def test_parse_dates_custom_euroformat(self): - from dateutil.parser import parse text = """foo,bar,baz 31/01/2010,1,2 01/02/2010,1,NA 02/02/2010,1,2 """ - parser = lambda d: parse(d, dayfirst=True) + parser = lambda d: parse_date(d, dayfirst=True) df = self.read_csv(StringIO(text), names=['time', 'Q', 'NTU'], header=0, index_col=0, parse_dates=True, @@ -1346,7 +1345,7 @@ def test_parse_dates_custom_euroformat(self): index=exp_index, columns=['Q', 'NTU']) tm.assert_frame_equal(df, expected) - parser = lambda d: parse(d, day_first=True) + parser = lambda d: parse_date(d, day_first=True) self.assertRaises(Exception, self.read_csv, StringIO(text), skiprows=[0], names=['time', 'Q', 'NTU'], index_col=0, @@ -1391,7 +1390,6 @@ def test_na_value_dict(self): @slow @network def test_url(self): - import urllib2 try: # HTTP(S) url = ('https://raw.github.com/pydata/pandas/master/' @@ -1403,18 +1401,17 @@ def test_url(self): tm.assert_frame_equal(url_table, local_table) # TODO: ftp testing - except urllib2.URLError: + except URLError: try: with closing(urlopen('http://www.google.com')) as resp: pass - except urllib2.URLError: + except URLError: raise nose.SkipTest else: raise @slow def test_file(self): - import urllib2 # FILE if sys.version_info[:2] < (2, 6): @@ -1425,7 +1422,7 @@ def test_file(self): try: url_table = self.read_table('file://localhost/' + localtable) - except urllib2.URLError: + except URLError: # fails on some systems raise nose.SkipTest @@ -1553,23 +1550,23 @@ def test_skipinitialspace(self): sfile = StringIO(s) # it's 33 columns - result = self.read_csv(sfile, names=range(33), na_values=['-9999.0'], + result = self.read_csv(sfile, names=lrange(33), na_values=['-9999.0'], header=None, skipinitialspace=True) self.assertTrue(pd.isnull(result.ix[0, 29])) def test_utf16_bom_skiprows(self): # #2298 - data = u"""skip this + data = u("""skip this skip this too A\tB\tC 1\t2\t3 -4\t5\t6""" +4\t5\t6""") - data2 = u"""skip this + data2 = u("""skip this skip this too A,B,C 1,2,3 -4,5,6""" +4,5,6""") path = '__%s__.csv' % tm.rands(10) @@ -1581,7 +1578,7 @@ def test_utf16_bom_skiprows(self): f.write(bytes) s = BytesIO(dat.encode('utf-8')) - if py3compat.PY3: + if compat.PY3: # somewhat False since the code never sees bytes from io import TextIOWrapper s = TextIOWrapper(s, encoding='utf-8') @@ -1600,7 +1597,7 @@ def test_utf16_example(self): result = self.read_table(path, encoding='utf-16') self.assertEquals(len(result), 50) - if not py3compat.PY3: + if not compat.PY3: buf = BytesIO(open(path, 'rb').read()) result = self.read_table(buf, encoding='utf-16') self.assertEquals(len(result), 50) @@ -1610,7 +1607,6 @@ def test_converters_corner_with_nas(self): if hash(np.int64(-1)) != -2: raise nose.SkipTest - import StringIO csv = """id,score,days 1,2,12 2,2-5, @@ -1646,20 +1642,20 @@ def convert_score(x): if not x: return np.nan if x.find('-') > 0: - valmin, valmax = map(int, x.split('-')) + valmin, valmax = lmap(int, x.split('-')) val = 0.5 * (valmin + valmax) else: val = float(x) return val - fh = StringIO.StringIO(csv) + fh = StringIO(csv) result = self.read_csv(fh, converters={'score': convert_score, 'days': convert_days}, na_values=['', None]) self.assert_(pd.isnull(result['days'][1])) - fh = StringIO.StringIO(csv) + fh = StringIO(csv) result2 = self.read_csv(fh, converters={'score': convert_score, 'days': convert_days_sentinel}, na_values=['', None]) @@ -1672,7 +1668,7 @@ def test_unicode_encoding(self): result = result.set_index(0) got = result[1][1632] - expected = u'\xc1 k\xf6ldum klaka (Cold Fever) (1994)' + expected = u('\xc1 k\xf6ldum klaka (Cold Fever) (1994)') self.assertEquals(got, expected) @@ -1800,16 +1796,16 @@ def test_sniff_delimiter(self): sep=None, skiprows=2) tm.assert_frame_equal(data, data3) - text = u"""ignore this + text = u("""ignore this ignore this too index|A|B|C foo|1|2|3 bar|4|5|6 baz|7|8|9 -""".encode('utf-8') +""").encode('utf-8') s = BytesIO(text) - if py3compat.PY3: + if compat.PY3: # somewhat False since the code never sees bytes from io import TextIOWrapper s = TextIOWrapper(s, encoding='utf-8') @@ -2325,9 +2321,9 @@ def test_parse_ragged_csv(self): data = "1,2\n3,4,5" result = self.read_csv(StringIO(data), header=None, - names=range(50)) + names=lrange(50)) expected = self.read_csv(StringIO(data), header=None, - names=range(3)).reindex(columns=range(50)) + names=lrange(3)).reindex(columns=lrange(50)) tm.assert_frame_equal(result, expected) @@ -2374,9 +2370,11 @@ def test_convert_sql_column_strings(self): assert_same_values_and_dtype(result, expected) def test_convert_sql_column_unicode(self): - arr = np.array([u'1.5', None, u'3', u'4.2'], dtype=object) + arr = np.array([u('1.5'), None, u('3'), u('4.2')], + dtype=object) result = lib.convert_sql_column(arr) - expected = np.array([u'1.5', np.nan, u'3', u'4.2'], dtype=object) + expected = np.array([u('1.5'), np.nan, u('3'), u('4.2')], + dtype=object) assert_same_values_and_dtype(result, expected) def test_convert_sql_column_ints(self): @@ -2394,12 +2392,12 @@ def test_convert_sql_column_ints(self): assert_same_values_and_dtype(result, expected) def test_convert_sql_column_longs(self): - arr = np.array([1L, 2L, 3L, 4L], dtype='O') + arr = np.array([long(1), long(2), long(3), long(4)], dtype='O') result = lib.convert_sql_column(arr) expected = np.array([1, 2, 3, 4], dtype='i8') assert_same_values_and_dtype(result, expected) - arr = np.array([1L, 2L, 3L, None, 4L], dtype='O') + arr = np.array([long(1), long(2), long(3), None, long(4)], dtype='O') result = lib.convert_sql_column(arr) expected = np.array([1, 2, 3, np.nan, 4], dtype='f8') assert_same_values_and_dtype(result, expected) diff --git a/pandas/io/tests/test_pickle.py b/pandas/io/tests/test_pickle.py index 5c79c57c1e020..3c805e9fa260d 100644 --- a/pandas/io/tests/test_pickle.py +++ b/pandas/io/tests/test_pickle.py @@ -14,7 +14,7 @@ import pandas as pd from pandas import Index from pandas.sparse.tests import test_sparse -from pandas.util import py3compat +from pandas import compat from pandas.util.misc import is_little_endian class TestPickle(unittest.TestCase): @@ -27,7 +27,7 @@ def setUp(self): def compare(self, vf): # py3 compat when reading py2 pickle - + try: with open(vf,'rb') as fh: data = pickle.load(fh) @@ -36,7 +36,7 @@ def compare(self, vf): # we are trying to read a py3 pickle in py2..... return except: - if not py3compat.PY3: + if not compat.PY3: raise with open(vf,'rb') as fh: data = pickle.load(fh, encoding='latin1') diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 6518f9cb6097f..3c532ea287755 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -1,3 +1,5 @@ +from __future__ import print_function +from pandas.compat import range, lrange, u import nose import unittest import os @@ -17,7 +19,7 @@ from pandas.tests.test_series import assert_series_equal from pandas.tests.test_frame import assert_frame_equal from pandas import concat, Timestamp -from pandas.util import py3compat +from pandas import compat from numpy.testing.decorators import slow @@ -127,7 +129,7 @@ def roundtrip(key, obj,**kwargs): tm.assert_panel_equal(o, roundtrip('panel',o)) # table - df = DataFrame(dict(A=range(5), B=range(5))) + df = DataFrame(dict(A=lrange(5), B=lrange(5))) df.to_hdf(self.path,'table',append=True) result = read_hdf(self.path, 'table', where = ['index>2']) assert_frame_equal(df[df.index>2],result) @@ -481,7 +483,7 @@ def test_encoding(self): raise nose.SkipTest('system byteorder is not little, skipping test_encoding!') with ensure_clean(self.path) as store: - df = DataFrame(dict(A='foo',B='bar'),index=range(5)) + df = DataFrame(dict(A='foo',B='bar'),index=lrange(5)) df.loc[2,'A'] = np.nan df.loc[3,'B'] = np.nan _maybe_remove(store, 'df') @@ -604,7 +606,7 @@ def test_append_with_different_block_ordering(self): for i in range(10): df = DataFrame(np.random.randn(10,2),columns=list('AB')) - df['index'] = range(10) + df['index'] = lrange(10) df['index'] += i*10 df['int64'] = Series([1]*len(df),dtype='int64') df['int16'] = Series([1]*len(df),dtype='int16') @@ -780,7 +782,7 @@ def check_col(key,name,size): def check_col(key,name,size): self.assert_(getattr(store.get_storer(key).table.description,name).itemsize == size) - df = DataFrame(dict(A = 'foo', B = 'bar'),index=range(10)) + df = DataFrame(dict(A = 'foo', B = 'bar'),index=lrange(10)) # a min_itemsize that creates a data_column _maybe_remove(store, 'df') @@ -1015,8 +1017,9 @@ def test_big_table_frame(self): raise nose.SkipTest('no big table frame') # create and write a big table - df = DataFrame(np.random.randn(2000 * 100, 100), index=range( - 2000 * 100), columns=['E%03d' % i for i in xrange(100)]) + df = DataFrame(np.random.randn(2000 * 100, 100), + index=lrange(2000 * 100), + columns=['E%03d' % i for i in range(100)]) for x in range(20): df['String%03d' % x] = 'string%03d' % x @@ -1027,7 +1030,7 @@ def test_big_table_frame(self): rows = store.root.df.table.nrows recons = store.select('df') - print ("\nbig_table frame [%s] -> %5.2f" % (rows, time.time() - x)) + print("\nbig_table frame [%s] -> %5.2f" % (rows, time.time() - x)) def test_big_table2_frame(self): # this is a really big table: 1m rows x 60 float columns, 20 string, 20 datetime @@ -1038,14 +1041,15 @@ def test_big_table2_frame(self): print ("\nbig_table2 start") import time start_time = time.time() - df = DataFrame(np.random.randn(1000 * 1000, 60), index=xrange(int( - 1000 * 1000)), columns=['E%03d' % i for i in xrange(60)]) - for x in xrange(20): + df = DataFrame(np.random.randn(1000 * 1000, 60), + index=lrange(int(1000 * 1000)), + columns=['E%03d' % i for i in range(60)]) + for x in range(20): df['String%03d' % x] = 'string%03d' % x - for x in xrange(20): + for x in range(20): df['datetime%03d' % x] = datetime.datetime(2001, 1, 2, 0, 0) - print ("\nbig_table2 frame (creation of df) [rows->%s] -> %5.2f" + print("\nbig_table2 frame (creation of df) [rows->%s] -> %5.2f" % (len(df.index), time.time() - start_time)) def f(chunksize): @@ -1056,9 +1060,9 @@ def f(chunksize): for c in [10000, 50000, 250000]: start_time = time.time() - print ("big_table2 frame [chunk->%s]" % c) + print("big_table2 frame [chunk->%s]" % c) rows = f(c) - print ("big_table2 frame [rows->%s,chunk->%s] -> %5.2f" + print("big_table2 frame [rows->%s,chunk->%s] -> %5.2f" % (rows, c, time.time() - start_time)) def test_big_put_frame(self): @@ -1067,14 +1071,14 @@ def test_big_put_frame(self): print ("\nbig_put start") import time start_time = time.time() - df = DataFrame(np.random.randn(1000 * 1000, 60), index=xrange(int( - 1000 * 1000)), columns=['E%03d' % i for i in xrange(60)]) - for x in xrange(20): + df = DataFrame(np.random.randn(1000 * 1000, 60), index=lrange(int( + 1000 * 1000)), columns=['E%03d' % i for i in range(60)]) + for x in range(20): df['String%03d' % x] = 'string%03d' % x - for x in xrange(20): + for x in range(20): df['datetime%03d' % x] = datetime.datetime(2001, 1, 2, 0, 0) - print ("\nbig_put frame (creation of df) [rows->%s] -> %5.2f" + print("\nbig_put frame (creation of df) [rows->%s] -> %5.2f" % (len(df.index), time.time() - start_time)) with ensure_clean(self.path, mode='w') as store: @@ -1082,8 +1086,8 @@ def test_big_put_frame(self): store = HDFStore(fn, mode='w') store.put('df', df) - print (df.get_dtype_counts()) - print ("big_put frame [shape->%s] -> %5.2f" + print(df.get_dtype_counts()) + print("big_put frame [shape->%s] -> %5.2f" % (df.shape, time.time() - start_time)) def test_big_table_panel(self): @@ -1091,8 +1095,8 @@ def test_big_table_panel(self): # create and write a big table wp = Panel( - np.random.randn(20, 1000, 1000), items=['Item%03d' % i for i in xrange(20)], - major_axis=date_range('1/1/2000', periods=1000), minor_axis=['E%03d' % i for i in xrange(1000)]) + np.random.randn(20, 1000, 1000), items=['Item%03d' % i for i in range(20)], + major_axis=date_range('1/1/2000', periods=1000), minor_axis=['E%03d' % i for i in range(1000)]) wp.ix[:, 100:200, 300:400] = np.nan @@ -1108,7 +1112,7 @@ def test_big_table_panel(self): rows = store.root.wp.table.nrows recons = store.select('wp') - print ("\nbig_table panel [%s] -> %5.2f" % (rows, time.time() - x)) + print("\nbig_table panel [%s] -> %5.2f" % (rows, time.time() - x)) def test_append_diff_item_order(self): @@ -1327,8 +1331,8 @@ def test_unimplemented_dtypes_table_columns(self): l = [('date', datetime.date(2001, 1, 2))] # py3 ok for unicode - if not py3compat.PY3: - l.append(('unicode', u'\u03c3')) + if not compat.PY3: + l.append(('unicode', u('\u03c3'))) ### currently not supported dtypes #### for n, f in l: @@ -1377,14 +1381,14 @@ def compare(a,b): compare(store.select('df_tz',where=Term('A','>=',df.A[3])),df[df.A>=df.A[3]]) _maybe_remove(store, 'df_tz') - df = DataFrame(dict(A = Timestamp('20130102',tz='US/Eastern'), B = Timestamp('20130103',tz='US/Eastern')),index=range(5)) + df = DataFrame(dict(A = Timestamp('20130102',tz='US/Eastern'), B = Timestamp('20130103',tz='US/Eastern')),index=lrange(5)) store.append('df_tz',df) result = store['df_tz'] compare(result,df) assert_frame_equal(result,df) _maybe_remove(store, 'df_tz') - df = DataFrame(dict(A = Timestamp('20130102',tz='US/Eastern'), B = Timestamp('20130102',tz='EET')),index=range(5)) + df = DataFrame(dict(A = Timestamp('20130102',tz='US/Eastern'), B = Timestamp('20130102',tz='EET')),index=lrange(5)) self.assertRaises(TypeError, store.append, 'df_tz', df) # this is ok @@ -1395,14 +1399,14 @@ def compare(a,b): assert_frame_equal(result,df) # can't append with diff timezone - df = DataFrame(dict(A = Timestamp('20130102',tz='US/Eastern'), B = Timestamp('20130102',tz='CET')),index=range(5)) + df = DataFrame(dict(A = Timestamp('20130102',tz='US/Eastern'), B = Timestamp('20130102',tz='CET')),index=lrange(5)) self.assertRaises(ValueError, store.append, 'df_tz', df) # as index with ensure_clean(self.path) as store: # GH 4098 example - df = DataFrame(dict(A = Series(xrange(3), index=date_range('2000-1-1',periods=3,freq='H', tz='US/Eastern')))) + df = DataFrame(dict(A = Series(lrange(3), index=date_range('2000-1-1',periods=3,freq='H', tz='US/Eastern')))) _maybe_remove(store, 'df') store.put('df',df) @@ -1989,12 +1993,12 @@ def test_select(self): # selection on the non-indexable with a large number of columns wp = Panel( - np.random.randn(100, 100, 100), items=['Item%03d' % i for i in xrange(100)], - major_axis=date_range('1/1/2000', periods=100), minor_axis=['E%03d' % i for i in xrange(100)]) + np.random.randn(100, 100, 100), items=['Item%03d' % i for i in range(100)], + major_axis=date_range('1/1/2000', periods=100), minor_axis=['E%03d' % i for i in range(100)]) _maybe_remove(store, 'wp') store.append('wp', wp) - items = ['Item%03d' % i for i in xrange(80)] + items = ['Item%03d' % i for i in range(80)] result = store.select('wp', Term('items', items)) expected = wp.reindex(items=items) tm.assert_panel_equal(expected, result) @@ -2092,7 +2096,7 @@ def test_select_with_many_inputs(self): df = DataFrame(dict(ts=bdate_range('2012-01-01', periods=300), A=np.random.randn(300), - B=range(300), + B=lrange(300), users = ['a']*50 + ['b']*50 + ['c']*100 + ['a%03d' % i for i in range(100)])) _maybe_remove(store, 'df') store.append('df', df, data_columns=['ts', 'A', 'B', 'users']) @@ -2108,12 +2112,12 @@ def test_select_with_many_inputs(self): tm.assert_frame_equal(expected, result) # big selector along the columns - selector = [ 'a','b','c' ] + [ 'a%03d' % i for i in xrange(60) ] + selector = [ 'a','b','c' ] + [ 'a%03d' % i for i in range(60) ] result = store.select('df', [Term('ts', '>=', Timestamp('2012-02-01')),Term('users',selector)]) expected = df[ (df.ts >= Timestamp('2012-02-01')) & df.users.isin(selector) ] tm.assert_frame_equal(expected, result) - selector = range(100,200) + selector = lrange(100,200) result = store.select('df', [Term('B', selector)]) expected = df[ df.B.isin(selector) ] tm.assert_frame_equal(expected, result) @@ -2211,7 +2215,7 @@ def test_select_iterator(self): def test_retain_index_attributes(self): # GH 3499, losing frequency info on index recreation - df = DataFrame(dict(A = Series(xrange(3), + df = DataFrame(dict(A = Series(lrange(3), index=date_range('2000-1-1',periods=3,freq='H')))) with ensure_clean(self.path) as store: @@ -2228,7 +2232,7 @@ def test_retain_index_attributes(self): # try to append a table with a different frequency warnings.filterwarnings('ignore', category=AttributeConflictWarning) - df2 = DataFrame(dict(A = Series(xrange(3), + df2 = DataFrame(dict(A = Series(lrange(3), index=date_range('2002-1-1',periods=3,freq='D')))) store.append('data',df2) warnings.filterwarnings('always', category=AttributeConflictWarning) @@ -2237,10 +2241,10 @@ def test_retain_index_attributes(self): # this is ok _maybe_remove(store,'df2') - df2 = DataFrame(dict(A = Series(xrange(3), + df2 = DataFrame(dict(A = Series(lrange(3), index=[Timestamp('20010101'),Timestamp('20010102'),Timestamp('20020101')]))) store.append('df2',df2) - df3 = DataFrame(dict(A = Series(xrange(3),index=date_range('2002-1-1',periods=3,freq='D')))) + df3 = DataFrame(dict(A = Series(lrange(3),index=date_range('2002-1-1',periods=3,freq='D')))) store.append('df2',df3) def test_retain_index_attributes2(self): @@ -2249,20 +2253,20 @@ def test_retain_index_attributes2(self): warnings.filterwarnings('ignore', category=AttributeConflictWarning) - df = DataFrame(dict(A = Series(xrange(3), index=date_range('2000-1-1',periods=3,freq='H')))) + df = DataFrame(dict(A = Series(lrange(3), index=date_range('2000-1-1',periods=3,freq='H')))) df.to_hdf(path,'data',mode='w',append=True) - df2 = DataFrame(dict(A = Series(xrange(3), index=date_range('2002-1-1',periods=3,freq='D')))) + df2 = DataFrame(dict(A = Series(lrange(3), index=date_range('2002-1-1',periods=3,freq='D')))) df2.to_hdf(path,'data',append=True) idx = date_range('2000-1-1',periods=3,freq='H') idx.name = 'foo' - df = DataFrame(dict(A = Series(xrange(3), index=idx))) + df = DataFrame(dict(A = Series(lrange(3), index=idx))) df.to_hdf(path,'data',mode='w',append=True) self.assert_(read_hdf(path,'data').index.name == 'foo') idx2 = date_range('2001-1-1',periods=3,freq='H') idx2.name = 'bar' - df2 = DataFrame(dict(A = Series(xrange(3), index=idx2))) + df2 = DataFrame(dict(A = Series(lrange(3), index=idx2))) df2.to_hdf(path,'data',append=True) self.assert_(read_hdf(path,'data').index.name is None) @@ -2386,7 +2390,7 @@ def f(): # valid result = store.select_column('df', 'index') tm.assert_almost_equal(result.values, Series(df.index).values) - self.assert_(isinstance(result,Series)) + tm.assert_isinstance(result,Series) # not a data indexable column self.assertRaises( @@ -2422,7 +2426,7 @@ def test_coordinates(self): # get coordinates back & test vs frame _maybe_remove(store, 'df') - df = DataFrame(dict(A=range(5), B=range(5))) + df = DataFrame(dict(A=lrange(5), B=lrange(5))) store.append('df', df) c = store.select_as_coordinates('df', ['index<3']) assert((c.values == np.arange(3)).all() == True) @@ -2527,11 +2531,11 @@ def test_select_as_multiple(self): expected = concat([df1, df2], axis=1) expected = expected[5:] tm.assert_frame_equal(result, expected) - except (Exception), detail: - print ("error in select_as_multiple %s" % str(detail)) - print ("store: %s" % store) - print ("df1: %s" % df1) - print ("df2: %s" % df2) + except (Exception) as detail: + print("error in select_as_multiple %s" % str(detail)) + print("store: %s" % store) + print("df1: %s" % df1) + print("df2: %s" % df2) # test excpection for diff rows @@ -2555,7 +2559,7 @@ def test_start_stop(self): result = store.select( 'df', [Term("columns", "=", ["A"])], start=30, stop=40) assert(len(result) == 0) - assert(type(result) == DataFrame) + tm.assert_isinstance(result, DataFrame) def test_select_filter_corner(self): @@ -2696,7 +2700,7 @@ def do_copy(f = None, new_f = None, keys = None, propindexes = True, **kwargs): # check keys if keys is None: - keys = store.keys() + keys = list(store.keys()) self.assert_(set(keys) == set(tstore.keys())) # check indicies & nrows @@ -2751,7 +2755,7 @@ def test_legacy_table_write(self): columns=['A', 'B', 'C']) store.append('mi', df) - df = DataFrame(dict(A = 'foo', B = 'bar'),index=range(10)) + df = DataFrame(dict(A = 'foo', B = 'bar'),index=lrange(10)) store.append('df', df, data_columns = ['B'], min_itemsize={'A' : 200 }) store.close() @@ -2808,7 +2812,7 @@ def test_tseries_indices_frame(self): def test_unicode_index(self): - unicode_values = [u'\u03c3', u'\u03c3\u03c3'] + unicode_values = [u('\u03c3'), u('\u03c3\u03c3')] warnings.filterwarnings('ignore', category=PerformanceWarning) s = Series(np.random.randn(len(unicode_values)), unicode_values) self._check_roundtrip(s, tm.assert_series_equal) diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index 5b23bf173ec4e..624f16b3207cd 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -1,5 +1,4 @@ -from __future__ import with_statement -from pandas.util.py3compat import StringIO +from __future__ import print_function import unittest import sqlite3 import sys @@ -12,6 +11,8 @@ from pandas.core.datetools import format as date_format from pandas.core.api import DataFrame, isnull +from pandas.compat import StringIO, range, lrange +import pandas.compat as compat import pandas.io.sql as sql import pandas.util.testing as tm @@ -22,7 +23,8 @@ datetime: lambda dt: "'%s'" % date_format(dt), str: lambda x: "'%s'" % x, np.str_: lambda x: "'%s'" % x, - unicode: lambda x: "'%s'" % x, + compat.text_type: lambda x: "'%s'" % x, + compat.binary_type: lambda x: "'%s'" % x, float: lambda x: "%.8f" % x, int: lambda x: "%s" % x, type(None): lambda x: "NULL", @@ -171,15 +173,15 @@ def _check_roundtrip(self, frame): frame['txt'] = ['a'] * len(frame) frame2 = frame.copy() - frame2['Idx'] = Index(range(len(frame2))) + 10 + frame2['Idx'] = Index(lrange(len(frame2))) + 10 sql.write_frame(frame2, name='test_table2', con=self.db) result = sql.read_frame("select * from test_table2", self.db, index_col='Idx') expected = frame.copy() - expected.index = Index(range(len(frame2))) + 10 + expected.index = Index(lrange(len(frame2))) + 10 expected.index.name = 'Idx' - print expected.index.names - print result.index.names + print(expected.index.names) + print(result.index.names) tm.assert_frame_equal(expected, result) def test_tquery(self): @@ -257,12 +259,12 @@ def setUp(self): return try: self.db = MySQLdb.connect(read_default_group='pandas') - except MySQLdb.ProgrammingError, e: + except MySQLdb.ProgrammingError as e: raise nose.SkipTest( "Create a group of connection parameters under the heading " "[pandas] in your system's mysql default file, " "typically located at ~/.my.cnf or /etc/.my.cnf. ") - except MySQLdb.Error, e: + except MySQLdb.Error as e: raise nose.SkipTest( "Cannot connect to database. " "Create a group of connection parameters under the heading " @@ -408,7 +410,7 @@ def _check_roundtrip(self, frame): frame['txt'] = ['a'] * len(frame) frame2 = frame.copy() - index = Index(range(len(frame2))) + 10 + index = Index(lrange(len(frame2))) + 10 frame2['Idx'] = index drop_sql = "DROP TABLE IF EXISTS test_table2" cur = self.db.cursor() diff --git a/pandas/io/tests/test_wb.py b/pandas/io/tests/test_wb.py index 46eeabaf1e209..e85c63d7d5999 100644 --- a/pandas/io/tests/test_wb.py +++ b/pandas/io/tests/test_wb.py @@ -11,14 +11,16 @@ @network def test_wdi_search(): raise nose.SkipTest - expected = {u'id': {2634: u'GDPPCKD', - 4649: u'NY.GDP.PCAP.KD', - 4651: u'NY.GDP.PCAP.KN', - 4653: u'NY.GDP.PCAP.PP.KD'}, - u'name': {2634: u'GDP per Capita, constant US$, millions', - 4649: u'GDP per capita (constant 2000 US$)', - 4651: u'GDP per capita (constant LCU)', - 4653: u'GDP per capita, PPP (constant 2005 international $)'}} + expected = {u('id'): {2634: u('GDPPCKD'), + 4649: u('NY.GDP.PCAP.KD'), + 4651: u('NY.GDP.PCAP.KN'), + 4653: u('NY.GDP.PCAP.PP.KD')}, + u('name'): {2634: u('GDP per Capita, constant US$, ' + 'millions'), + 4649: u('GDP per capita (constant 2000 US$)'), + 4651: u('GDP per capita (constant LCU)'), + 4653: u('GDP per capita, PPP (constant 2005 ' + 'international $)')}} result = search('gdp.*capita.*constant').ix[:, :2] expected = pandas.DataFrame(expected) expected.index = result.index @@ -29,7 +31,7 @@ def test_wdi_search(): @network def test_wdi_download(): raise nose.SkipTest - expected = {'GDPPCKN': {(u'United States', u'2003'): u'40800.0735367688', (u'Canada', u'2004'): u'37857.1261134552', (u'United States', u'2005'): u'42714.8594790102', (u'Canada', u'2003'): u'37081.4575704003', (u'United States', u'2004'): u'41826.1728310667', (u'Mexico', u'2003'): u'72720.0691255285', (u'Mexico', u'2004'): u'74751.6003347038', (u'Mexico', u'2005'): u'76200.2154469437', (u'Canada', u'2005'): u'38617.4563629611'}, 'GDPPCKD': {(u'United States', u'2003'): u'40800.0735367688', (u'Canada', u'2004'): u'34397.055116118', (u'United States', u'2005'): u'42714.8594790102', (u'Canada', u'2003'): u'33692.2812368928', (u'United States', u'2004'): u'41826.1728310667', (u'Mexico', u'2003'): u'7608.43848670658', (u'Mexico', u'2004'): u'7820.99026814334', (u'Mexico', u'2005'): u'7972.55364129367', (u'Canada', u'2005'): u'35087.8925933298'}} + expected = {'GDPPCKN': {(u('United States'), u('2003')): u('40800.0735367688'), (u('Canada'), u('2004')): u('37857.1261134552'), (u('United States'), u('2005')): u('42714.8594790102'), (u('Canada'), u('2003')): u('37081.4575704003'), (u('United States'), u('2004')): u('41826.1728310667'), (u('Mexico'), u('2003')): u('72720.0691255285'), (u('Mexico'), u('2004')): u('74751.6003347038'), (u('Mexico'), u('2005')): u('76200.2154469437'), (u('Canada'), u('2005')): u('38617.4563629611')}, 'GDPPCKD': {(u('United States'), u('2003')): u('40800.0735367688'), (u('Canada'), u('2004')): u('34397.055116118'), (u('United States'), u('2005')): u('42714.8594790102'), (u('Canada'), u('2003')): u('33692.2812368928'), (u('United States'), u('2004')): u('41826.1728310667'), (u('Mexico'), u('2003')): u('7608.43848670658'), (u('Mexico'), u('2004')): u('7820.99026814334'), (u('Mexico'), u('2005')): u('7972.55364129367'), (u('Canada'), u('2005')): u('35087.8925933298')}} expected = pandas.DataFrame(expected) result = download(country=['CA', 'MX', 'US', 'junk'], indicator=['GDPPCKD', 'GDPPCKN', 'junk'], start=2003, end=2005) diff --git a/pandas/io/wb.py b/pandas/io/wb.py index f83ed296e360c..7c50c0b41e897 100644 --- a/pandas/io/wb.py +++ b/pandas/io/wb.py @@ -1,6 +1,8 @@ -from urllib2 import urlopen -import json -from contextlib import closing +from __future__ import print_function + +from pandas.compat import map, reduce, range, lrange +from pandas.io.common import urlopen +from pandas.io import json import pandas import numpy as np @@ -65,10 +67,10 @@ def download(country=['MX', 'CA', 'US'], indicator=['GDPPCKD', 'GDPPCKN'], bad_indicators.append(ind) # Warn if len(bad_indicators) > 0: - print ('Failed to obtain indicator(s): %s' % '; '.join(bad_indicators)) + print('Failed to obtain indicator(s): %s' % '; '.join(bad_indicators)) print ('The data may still be available for download at http://data.worldbank.org') if len(bad_countries) > 0: - print ('Invalid ISO-2 codes: %s' % ' '.join(bad_countries)) + print('Invalid ISO-2 codes: %s' % ' '.join(bad_countries)) # Merge WDI series if len(data) > 0: out = reduce(lambda x, y: x.merge(y, how='outer'), data) @@ -86,14 +88,14 @@ def _get_data(indicator="NY.GNS.ICTR.GN.ZS", country='US', indicator + "?date=" + str(start) + ":" + str(end) + "&per_page=25000" + \ "&format=json" # Download - with closing(urlopen(url)) as response: + with urlopen(url) as response: data = response.read() # Parse JSON file data = json.loads(data)[1] - country = map(lambda x: x['country']['value'], data) - iso2c = map(lambda x: x['country']['id'], data) - year = map(lambda x: x['date'], data) - value = map(lambda x: x['value'], data) + country = [x['country']['value'] for x in data] + iso2c = [x['country']['id'] for x in data] + year = [x['date'] for x in data] + value = [x['value'] for x in data] # Prepare output out = pandas.DataFrame([country, iso2c, year, value]).T return out @@ -103,14 +105,14 @@ def get_countries(): '''Query information about countries ''' url = 'http://api.worldbank.org/countries/all?format=json' - with closing(urlopen(url)) as response: + with urlopen(url) as response: data = response.read() data = json.loads(data)[1] data = pandas.DataFrame(data) - data.adminregion = map(lambda x: x['value'], data.adminregion) - data.incomeLevel = map(lambda x: x['value'], data.incomeLevel) - data.lendingType = map(lambda x: x['value'], data.lendingType) - data.region = map(lambda x: x['value'], data.region) + data.adminregion = [x['value'] for x in data.adminregion] + data.incomeLevel = [x['value'] for x in data.incomeLevel] + data.lendingType = [x['value'] for x in data.lendingType] + data.region = [x['value'] for x in data.region] data = data.rename(columns={'id': 'iso3c', 'iso2Code': 'iso2c'}) return data @@ -119,12 +121,12 @@ def get_indicators(): '''Download information about all World Bank data series ''' url = 'http://api.worldbank.org/indicators?per_page=50000&format=json' - with closing(urlopen(url)) as response: + with urlopen(url) as response: data = response.read() data = json.loads(data)[1] data = pandas.DataFrame(data) # Clean fields - data.source = map(lambda x: x['value'], data.source) + data.source = [x['value'] for x in data.source] fun = lambda x: x.encode('ascii', 'ignore') data.sourceOrganization = data.sourceOrganization.apply(fun) # Clean topic field @@ -134,12 +136,12 @@ def get_value(x): return x['value'] except: return '' - fun = lambda x: map(lambda y: get_value(y), x) + fun = lambda x: [get_value(y) for y in x] data.topics = data.topics.apply(fun) data.topics = data.topics.apply(lambda x: ' ; '.join(x)) # Clean outpu data = data.sort(columns='id') - data.index = pandas.Index(range(data.shape[0])) + data.index = pandas.Index(lrange(data.shape[0])) return data diff --git a/pandas/rpy/__init__.py b/pandas/rpy/__init__.py index 3e77a0b0b0109..d5cf8a420b727 100644 --- a/pandas/rpy/__init__.py +++ b/pandas/rpy/__init__.py @@ -1,4 +1,4 @@ try: - from common import importr, r, load_data + from .common import importr, r, load_data except ImportError: pass diff --git a/pandas/rpy/common.py b/pandas/rpy/common.py index 92adee5bdae57..a640b43ab97e6 100644 --- a/pandas/rpy/common.py +++ b/pandas/rpy/common.py @@ -2,7 +2,9 @@ Utilities for making working with rpy2 more user- and developer-friendly. """ +from __future__ import print_function +from pandas.compat import zip, range import numpy as np import pandas as pd @@ -73,7 +75,7 @@ def _convert_array(obj): major_axis=name_list[0], minor_axis=name_list[1]) else: - print ('Cannot handle dim=%d' % len(dim)) + print('Cannot handle dim=%d' % len(dim)) else: return arr diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py index 48fa9caa0a05c..7710749a869f0 100644 --- a/pandas/sparse/array.py +++ b/pandas/sparse/array.py @@ -11,7 +11,7 @@ from pandas.core.base import PandasObject import pandas.core.common as com -from pandas.util import py3compat +from pandas import compat from pandas._sparse import BlockIndex, IntIndex import pandas._sparse as splib @@ -216,7 +216,7 @@ def disable(self, other): __ipow__ = disable # Python 2 division operators - if not py3compat.PY3: + if not compat.PY3: __div__ = _sparse_op_wrap(operator.div, 'div') __rdiv__ = _sparse_op_wrap(lambda x, y: y / x, '__rdiv__') __idiv__ = disable diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index f5e57efdcb166..d108094036f64 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -6,6 +6,8 @@ # pylint: disable=E1101,E1103,W0231,E0202 from numpy import nan +from pandas.compat import range, lmap, map +from pandas import compat import numpy as np from pandas.core.common import _pickle_array, _unpickle_array, _try_sort @@ -148,12 +150,12 @@ def _init_dict(self, data, index, columns, dtype=None): # pre-filter out columns if we passed it if columns is not None: columns = _ensure_index(columns) - data = dict((k, v) for k, v in data.iteritems() if k in columns) + data = dict((k, v) for k, v in compat.iteritems(data) if k in columns) else: - columns = Index(_try_sort(data.keys())) + columns = Index(_try_sort(list(data.keys()))) if index is None: - index = extract_index(data.values()) + index = extract_index(list(data.values())) sp_maker = lambda x: SparseSeries(x, index=index, kind=self.default_kind, @@ -161,7 +163,7 @@ def _init_dict(self, data, index, columns, dtype=None): copy=True) sdict = {} - for k, v in data.iteritems(): + for k, v in compat.iteritems(data): if isinstance(v, Series): # Force alignment, no copy necessary if not v.index.equals(index): @@ -211,7 +213,7 @@ def __array_wrap__(self, result): def __getstate__(self): series = dict((k, (v.sp_index, v.sp_values)) - for k, v in self.iteritems()) + for k, v in compat.iteritems(self)) columns = self.columns index = self.index @@ -232,7 +234,7 @@ def __setstate__(self, state): index = idx series_dict = {} - for col, (sp_index, sp_values) in series.iteritems(): + for col, (sp_index, sp_values) in compat.iteritems(series): series_dict[col] = SparseSeries(sp_values, sparse_index=sp_index, fill_value=fv) @@ -250,16 +252,16 @@ def to_dense(self): ------- df : DataFrame """ - data = dict((k, v.to_dense()) for k, v in self.iteritems()) + data = dict((k, v.to_dense()) for k, v in compat.iteritems(self)) return DataFrame(data, index=self.index) def get_dtype_counts(self): from collections import defaultdict d = defaultdict(int) - for k, v in self.iteritems(): + for k, v in compat.iteritems(self): d[v.dtype.name] += 1 return Series(d) - + def astype(self, dtype): raise NotImplementedError @@ -267,7 +269,7 @@ def copy(self, deep=True): """ Make a copy of this SparseDataFrame """ - series = dict((k, v.copy()) for k, v in self.iteritems()) + series = dict((k, v.copy()) for k, v in compat.iteritems(self)) return SparseDataFrame(series, index=self.index, columns=self.columns, default_fill_value=self.default_fill_value, default_kind=self.default_kind) @@ -279,7 +281,7 @@ def density(self): represented in the frame """ tot_nonsparse = sum([ser.sp_index.npoints - for _, ser in self.iteritems()]) + for _, ser in compat.iteritems(self)]) tot = len(self.index) * len(self.columns) return tot_nonsparse / float(tot) @@ -545,7 +547,7 @@ def _combine_match_index(self, other, func, fill_value=None): if other.index is not new_index: other = other.reindex(new_index) - for col, series in this.iteritems(): + for col, series in compat.iteritems(this): new_data[col] = func(series.values, other.values) return self._constructor(new_data, index=new_index, @@ -576,7 +578,7 @@ def _combine_match_columns(self, other, func, fill_value): def _combine_const(self, other, func): new_data = {} - for col, series in self.iteritems(): + for col, series in compat.iteritems(self): new_data[col] = func(series, other) return self._constructor(data=new_data, index=self.index, @@ -602,7 +604,7 @@ def _reindex_index(self, index, method, copy, level, fill_value=np.nan, need_mask = mask.any() new_series = {} - for col, series in self.iteritems(): + for col, series in compat.iteritems(self): values = series.values new = values.take(indexer) @@ -626,7 +628,7 @@ def _reindex_columns(self, columns, copy, level, fill_value, limit=None, raise NotImplementedError # TODO: fill value handling - sdict = dict((k, v) for k, v in self.iteritems() if k in columns) + sdict = dict((k, v) for k, v in compat.iteritems(self) if k in columns) return SparseDataFrame(sdict, index=self.index, columns=columns, default_fill_value=self.default_fill_value) @@ -649,7 +651,7 @@ def _reindex_with_indexers(self, index, row_indexer, columns, col_indexer, def _rename_index_inplace(self, mapper): self.index = [mapper(x) for x in self.index] - + def _rename_columns_inplace(self, mapper): new_series = {} new_columns = [] @@ -797,11 +799,11 @@ def shift(self, periods, freq=None, **kwds): new_series = {} if offset is None: new_index = self.index - for col, s in self.iteritems(): + for col, s in compat.iteritems(self): new_series[col] = s.shift(periods) else: new_index = self.index.shift(periods, offset) - for col, s in self.iteritems(): + for col, s in compat.iteritems(self): new_series[col] = SparseSeries(s.sp_values, index=new_index, sparse_index=s.sp_index, fill_value=s.fill_value) @@ -833,7 +835,7 @@ def apply(self, func, axis=0, broadcast=False): if isinstance(func, np.ufunc): new_series = {} - for k, v in self.iteritems(): + for k, v in compat.iteritems(self): applied = func(v) applied.fill_value = func(applied.fill_value) new_series[k] = applied @@ -862,12 +864,12 @@ def applymap(self, func): ------- applied : DataFrame """ - return self.apply(lambda x: map(func, x)) + return self.apply(lambda x: lmap(func, x)) @Appender(DataFrame.fillna.__doc__) def fillna(self, value=None, method=None, inplace=False, limit=None): new_series = {} - for k, v in self.iterkv(): + for k, v in compat.iteritems(self): new_series[k] = v.fillna(value=value, method=method, limit=limit) if inplace: @@ -882,7 +884,7 @@ def stack_sparse_frame(frame): """ Only makes sense when fill_value is NaN """ - lengths = [s.sp_index.npoints for _, s in frame.iteritems()] + lengths = [s.sp_index.npoints for _, s in compat.iteritems(frame)] nobs = sum(lengths) # this is pretty fast @@ -893,7 +895,7 @@ def stack_sparse_frame(frame): # TODO: Figure out whether this can be reached. # I think this currently can't be reached because you can't build a SparseDataFrame # with a non-np.NaN fill value (fails earlier). - for _, series in frame.iteritems(): + for _, series in compat.iteritems(frame): if not np.isnan(series.fill_value): raise TypeError('This routine assumes NaN fill value') @@ -933,7 +935,7 @@ def homogenize(series_dict): need_reindex = False - for _, series in series_dict.iteritems(): + for _, series in compat.iteritems(series_dict): if not np.isnan(series.fill_value): raise TypeError('this method is only valid with NaN fill values') @@ -945,7 +947,7 @@ def homogenize(series_dict): if need_reindex: output = {} - for name, series in series_dict.iteritems(): + for name, series in compat.iteritems(series_dict): if not series.sp_index.equals(index): series = series.sparse_reindex(index) diff --git a/pandas/sparse/panel.py b/pandas/sparse/panel.py index 246e6fa93918f..260d648243633 100644 --- a/pandas/sparse/panel.py +++ b/pandas/sparse/panel.py @@ -5,6 +5,8 @@ # pylint: disable=E1101,E1103,W0231 +from pandas.compat import range, lrange, zip +from pandas import compat import numpy as np from pandas.core.index import Index, MultiIndex, _ensure_index @@ -31,7 +33,7 @@ def __set__(self, obj, value): if isinstance(value, MultiIndex): raise NotImplementedError - for v in obj._frames.itervalues(): + for v in compat.itervalues(obj._frames): setattr(v, self.frame_attr, value) setattr(obj, self.cache_field, value) @@ -205,7 +207,7 @@ def set_value(self, item, major, minor, value): def __delitem__(self, key): loc = self.items.get_loc(key) - indices = range(loc) + range(loc + 1, len(self.items)) + indices = lrange(loc) + lrange(loc + 1, len(self.items)) del self._frames[key] self._items = self._items.take(indices) @@ -331,7 +333,7 @@ def reindex(self, major=None, items=None, minor=None, major_axis=None, new_frames = self._frames if copy: - new_frames = dict((k, v.copy()) for k, v in new_frames.iteritems()) + new_frames = dict((k, v.copy()) for k, v in compat.iteritems(new_frames)) return SparsePanel(new_frames, items=items, major_axis=major, @@ -346,7 +348,7 @@ def _combine(self, other, func, axis=0): return self._combinePanel(other, func) elif np.isscalar(other): new_frames = dict((k, func(v, other)) - for k, v in self.iterkv()) + for k, v in compat.iteritems(self)) return self._new_like(new_frames) def _combineFrame(self, other, func, axis=0): @@ -423,7 +425,7 @@ def major_xs(self, key): y : DataFrame index -> minor axis, columns -> items """ - slices = dict((k, v.xs(key)) for k, v in self.iterkv()) + slices = dict((k, v.xs(key)) for k, v in compat.iteritems(self)) return DataFrame(slices, index=self.minor_axis, columns=self.items) def minor_xs(self, key): @@ -440,7 +442,7 @@ def minor_xs(self, key): y : SparseDataFrame index -> major axis, columns -> items """ - slices = dict((k, v[key]) for k, v in self.iterkv()) + slices = dict((k, v[key]) for k, v in compat.iteritems(self)) return SparseDataFrame(slices, index=self.major_axis, columns=self.items, default_fill_value=self.default_fill_value, @@ -452,7 +454,7 @@ def minor_xs(self, key): def _convert_frames(frames, index, columns, fill_value=np.nan, kind='block'): from pandas.core.panel import _get_combined_index output = {} - for item, df in frames.iteritems(): + for item, df in compat.iteritems(frames): if not isinstance(df, SparseDataFrame): df = SparseDataFrame(df, default_kind=kind, default_fill_value=fill_value) @@ -469,7 +471,7 @@ def _convert_frames(frames, index, columns, fill_value=np.nan, kind='block'): index = _ensure_index(index) columns = _ensure_index(columns) - for item, df in output.iteritems(): + for item, df in compat.iteritems(output): if not (df.index.equals(index) and df.columns.equals(columns)): output[item] = df.reindex(index=index, columns=columns) @@ -477,7 +479,7 @@ def _convert_frames(frames, index, columns, fill_value=np.nan, kind='block'): def _stack_sparse_info(frame): - lengths = [s.sp_index.npoints for _, s in frame.iteritems()] + lengths = [s.sp_index.npoints for _, s in compat.iteritems(frame)] # this is pretty fast minor_labels = np.repeat(np.arange(len(frame.columns)), lengths) diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py index 802808954c8f4..83adf135d47d3 100644 --- a/pandas/sparse/series.py +++ b/pandas/sparse/series.py @@ -17,7 +17,7 @@ import pandas.core.common as com import pandas.core.datetools as datetools -from pandas.util import py3compat +from pandas import compat from pandas.sparse.array import (make_sparse, _sparse_array_op, SparseArray) from pandas._sparse import BlockIndex, IntIndex @@ -265,7 +265,7 @@ def __unicode__(self): __rpow__ = _sparse_op_wrap(lambda x, y: y ** x, '__rpow__') # Python 2 division operators - if not py3compat.PY3: + if not compat.PY3: __div__ = _sparse_op_wrap(operator.div, 'div') __rdiv__ = _sparse_op_wrap(lambda x, y: y / x, '__rdiv__') diff --git a/pandas/sparse/tests/test_array.py b/pandas/sparse/tests/test_array.py index a92170621f50d..bd5f99ef73fe8 100644 --- a/pandas/sparse/tests/test_array.py +++ b/pandas/sparse/tests/test_array.py @@ -1,3 +1,4 @@ +from pandas.compat import range import re from numpy import nan, ndarray import numpy as np @@ -10,6 +11,7 @@ from pandas.core.common import notnull from pandas.sparse.api import SparseArray from pandas.util.testing import assert_almost_equal, assertRaisesRegexp +import pandas.util.testing as tm def assert_sp_array_equal(left, right): @@ -128,19 +130,19 @@ def _check_op(op, first, second): res = op(first, second) exp = SparseArray(op(first.values, second.values), fill_value=first.fill_value) - self.assert_(isinstance(res, SparseArray)) + tm.assert_isinstance(res, SparseArray) assert_almost_equal(res.values, exp.values) res2 = op(first, second.values) - self.assert_(isinstance(res2, SparseArray)) + tm.assert_isinstance(res2, SparseArray) assert_sp_array_equal(res, res2) res3 = op(first.values, second) - self.assert_(isinstance(res3, SparseArray)) + tm.assert_isinstance(res3, SparseArray) assert_sp_array_equal(res, res3) res4 = op(first, 4) - self.assert_(isinstance(res4, SparseArray)) + tm.assert_isinstance(res4, SparseArray) exp = op(first.values, 4) exp_fv = op(first.fill_value, 4) assert_almost_equal(res4.fill_value, exp_fv) diff --git a/pandas/sparse/tests/test_libsparse.py b/pandas/sparse/tests/test_libsparse.py index d31f919e2e84b..f820142a6e71d 100644 --- a/pandas/sparse/tests/test_libsparse.py +++ b/pandas/sparse/tests/test_libsparse.py @@ -7,6 +7,7 @@ import numpy as np import operator from numpy.testing import assert_almost_equal, assert_equal +import pandas.util.testing as tm from pandas.core.sparse import SparseSeries from pandas import DataFrame @@ -288,7 +289,7 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen): # see if survive the round trip xbindex = xindex.to_int_index().to_block_index() ybindex = yindex.to_int_index().to_block_index() - self.assert_(isinstance(xbindex, BlockIndex)) + tm.assert_isinstance(xbindex, BlockIndex) self.assert_(xbindex.equals(xindex)) self.assert_(ybindex.equals(yindex)) check_cases(_check_case) diff --git a/pandas/sparse/tests/test_list.py b/pandas/sparse/tests/test_list.py index a69385dd9a436..21241050e39dc 100644 --- a/pandas/sparse/tests/test_list.py +++ b/pandas/sparse/tests/test_list.py @@ -1,3 +1,4 @@ +from pandas.compat import range import unittest from numpy import nan @@ -6,7 +7,7 @@ from pandas.sparse.api import SparseList, SparseArray from pandas.util.testing import assert_almost_equal -from test_sparse import assert_sp_array_equal +from .test_sparse import assert_sp_array_equal def assert_sp_list_equal(left, right): diff --git a/pandas/sparse/tests/test_sparse.py b/pandas/sparse/tests/test_sparse.py index 1382a6a642aa3..248c920b03838 100644 --- a/pandas/sparse/tests/test_sparse.py +++ b/pandas/sparse/tests/test_sparse.py @@ -1,7 +1,6 @@ # pylint: disable-msg=E1101,W0612 from unittest import TestCase -import cPickle as pickle import operator from datetime import datetime @@ -23,6 +22,8 @@ import pandas.core.datetools as datetools from pandas.core.common import isnull import pandas.util.testing as tm +from pandas.compat import range, lrange, cPickle as pickle, StringIO, lrange +from pandas import compat import pandas.sparse.frame as spf @@ -34,9 +35,8 @@ import pandas.tests.test_frame as test_frame import pandas.tests.test_panel as test_panel import pandas.tests.test_series as test_series -from pandas.util.py3compat import StringIO -from test_array import assert_sp_array_equal +from .test_array import assert_sp_array_equal import warnings warnings.filterwarnings(action='ignore', category=FutureWarning) @@ -85,7 +85,7 @@ def assert_sp_frame_equal(left, right, exact_indices=True): exact: Series SparseIndex objects must be exactly the same, otherwise just compare dense representations """ - for col, series in left.iteritems(): + for col, series in compat.iteritems(left): assert(col in right) # trade-off? @@ -105,7 +105,7 @@ def assert_sp_frame_equal(left, right, exact_indices=True): def assert_sp_panel_equal(left, right, exact_indices=True): - for item, frame in left.iterkv(): + for item, frame in compat.iteritems(left): assert(item in right) # trade-off? assert_sp_frame_equal(frame, right[item], exact_indices=exact_indices) @@ -204,9 +204,9 @@ def test_to_dense_preserve_name(self): def test_constructor(self): # test setup guys self.assert_(np.isnan(self.bseries.fill_value)) - self.assert_(isinstance(self.bseries.sp_index, BlockIndex)) + tm.assert_isinstance(self.bseries.sp_index, BlockIndex) self.assert_(np.isnan(self.iseries.fill_value)) - self.assert_(isinstance(self.iseries.sp_index, IntIndex)) + tm.assert_isinstance(self.iseries.sp_index, IntIndex) self.assertEquals(self.zbseries.fill_value, 0) assert_equal(self.zbseries.values, self.bseries.to_dense().fillna(0)) @@ -222,7 +222,7 @@ def test_constructor(self): # Sparse time series works date_index = bdate_range('1/1/2000', periods=len(self.bseries)) s5 = SparseSeries(self.bseries, index=date_index) - self.assert_(isinstance(s5, SparseTimeSeries)) + tm.assert_isinstance(s5, SparseTimeSeries) # pass Series bseries2 = SparseSeries(self.bseries.to_dense()) @@ -312,10 +312,10 @@ def _check_all(self, check_func): def test_getitem(self): def _check_getitem(sp, dense): - for idx, val in dense.iteritems(): + for idx, val in compat.iteritems(dense): assert_almost_equal(val, sp[idx]) - for i in xrange(len(dense)): + for i in range(len(dense)): assert_almost_equal(sp[i], dense[i]) # j = np.float64(i) # assert_almost_equal(sp[j], dense[j]) @@ -365,11 +365,11 @@ def test_set_value(self): def test_getitem_slice(self): idx = self.bseries.index res = self.bseries[::2] - self.assert_(isinstance(res, SparseSeries)) + tm.assert_isinstance(res, SparseSeries) assert_sp_series_equal(res, self.bseries.reindex(idx[::2])) res = self.bseries[:5] - self.assert_(isinstance(res, SparseSeries)) + tm.assert_isinstance(res, SparseSeries) assert_sp_series_equal(res, self.bseries.reindex(idx[:5])) res = self.bseries[5:] @@ -386,7 +386,7 @@ def _compare_with_dense(sp): def _compare(idx): dense_result = dense.take(idx).values sparse_result = sp.take(idx) - self.assert_(isinstance(sparse_result, SparseSeries)) + tm.assert_isinstance(sparse_result, SparseSeries) assert_almost_equal(dense_result, sparse_result.values) _compare([1., 2., 3., 4., 5., 0.]) @@ -624,7 +624,7 @@ def _check_matches(indices, expected): sparse_index=idx) homogenized = spf.homogenize(data) - for k, v in homogenized.iteritems(): + for k, v in compat.iteritems(homogenized): assert(v.sp_index.equals(expected)) indices1 = [BlockIndex(10, [2], [7]), @@ -680,13 +680,13 @@ def test_shift(self): def test_cumsum(self): result = self.bseries.cumsum() expected = self.bseries.to_dense().cumsum() - self.assert_(isinstance(result, SparseSeries)) + tm.assert_isinstance(result, SparseSeries) self.assertEquals(result.name, self.bseries.name) assert_series_equal(result.to_dense(), expected) result = self.zbseries.cumsum() expected = self.zbseries.to_dense().cumsum() - self.assert_(isinstance(result, Series)) + tm.assert_isinstance(result, Series) assert_series_equal(result, expected) def test_combine_first(self): @@ -751,15 +751,15 @@ def test_as_matrix(self): def test_copy(self): cp = self.frame.copy() - self.assert_(isinstance(cp, SparseDataFrame)) + tm.assert_isinstance(cp, SparseDataFrame) assert_sp_frame_equal(cp, self.frame) self.assert_(cp.index is self.frame.index) def test_constructor(self): - for col, series in self.frame.iteritems(): - self.assert_(isinstance(series, SparseSeries)) + for col, series in compat.iteritems(self.frame): + tm.assert_isinstance(series, SparseSeries) - self.assert_(isinstance(self.iframe['A'].sp_index, IntIndex)) + tm.assert_isinstance(self.iframe['A'].sp_index, IntIndex) # constructed zframe from matrix above self.assertEquals(self.zframe['A'].fill_value, 0) @@ -768,12 +768,12 @@ def test_constructor(self): # construct no data sdf = SparseDataFrame(columns=np.arange(10), index=np.arange(10)) - for col, series in sdf.iteritems(): - self.assert_(isinstance(series, SparseSeries)) + for col, series in compat.iteritems(sdf): + tm.assert_isinstance(series, SparseSeries) # construct from nested dict data = {} - for c, s in self.frame.iteritems(): + for c, s in compat.iteritems(self.frame): data[c] = s.to_dict() sdf = SparseDataFrame(data) @@ -826,7 +826,7 @@ def test_constructor_dataframe(self): def test_constructor_convert_index_once(self): arr = np.array([1.5, 2.5, 3.5]) - sdf = SparseDataFrame(columns=range(4), index=arr) + sdf = SparseDataFrame(columns=lrange(4), index=arr) self.assertTrue(sdf[0].index is sdf[1].index) def test_constructor_from_series(self): @@ -834,16 +834,16 @@ def test_constructor_from_series(self): # GH 2873 x = Series(np.random.randn(10000), name='a') x = x.to_sparse(fill_value=0) - self.assert_(isinstance(x,SparseSeries)) + tm.assert_isinstance(x,SparseSeries) df = SparseDataFrame(x) - self.assert_(isinstance(df,SparseDataFrame)) + tm.assert_isinstance(df,SparseDataFrame) x = Series(np.random.randn(10000), name ='a') y = Series(np.random.randn(10000), name ='b') x2 = x.astype(float) x2.ix[:9998] = np.NaN x_sparse = x2.to_sparse(fill_value=np.NaN) - + # Currently fails too with weird ufunc error # df1 = SparseDataFrame([x_sparse, y]) @@ -867,7 +867,7 @@ def test_str(self): sdf = df.to_sparse() str(sdf) - + def test_array_interface(self): res = np.sqrt(self.frame) dres = np.sqrt(self.frame.to_dense()) @@ -886,13 +886,13 @@ def test_dense_to_sparse(self): df = DataFrame({'A': [nan, nan, nan, 1, 2], 'B': [1, 2, nan, nan, nan]}) sdf = df.to_sparse() - self.assert_(isinstance(sdf, SparseDataFrame)) + tm.assert_isinstance(sdf, SparseDataFrame) self.assert_(np.isnan(sdf.default_fill_value)) - self.assert_(isinstance(sdf['A'].sp_index, BlockIndex)) + tm.assert_isinstance(sdf['A'].sp_index, BlockIndex) tm.assert_frame_equal(sdf.to_dense(), df) sdf = df.to_sparse(kind='integer') - self.assert_(isinstance(sdf['A'].sp_index, IntIndex)) + tm.assert_isinstance(sdf['A'].sp_index, IntIndex) df = DataFrame({'A': [0, 0, 0, 1, 2], 'B': [1, 2, 0, 0, 0]}, dtype=float) @@ -960,7 +960,7 @@ def _compare_to_dense(a, b, da, db, op): if isinstance(a, DataFrame) and isinstance(db, DataFrame): mixed_result = op(a, db) - self.assert_(isinstance(mixed_result, SparseDataFrame)) + tm.assert_isinstance(mixed_result, SparseDataFrame) assert_sp_frame_equal(mixed_result, sparse_result, exact_indices=False) @@ -1008,7 +1008,7 @@ def test_op_corners(self): self.assert_(empty.empty) foo = self.frame + self.empty - self.assert_(isinstance(foo.index, DatetimeIndex)) + tm.assert_isinstance(foo.index, DatetimeIndex) assert_frame_equal(foo, self.frame * np.nan) foo = self.empty + self.frame @@ -1083,7 +1083,7 @@ def _check_frame(frame): # insert SparseSeries frame['E'] = frame['A'] - self.assert_(isinstance(frame['E'], SparseSeries)) + tm.assert_isinstance(frame['E'], SparseSeries) assert_sp_series_equal(frame['E'], frame['A']) # insert SparseSeries differently-indexed @@ -1094,7 +1094,7 @@ def _check_frame(frame): # insert Series frame['F'] = frame['A'].to_dense() - self.assert_(isinstance(frame['F'], SparseSeries)) + tm.assert_isinstance(frame['F'], SparseSeries) assert_sp_series_equal(frame['F'], frame['A']) # insert Series differently-indexed @@ -1105,7 +1105,7 @@ def _check_frame(frame): # insert ndarray frame['H'] = np.random.randn(N) - self.assert_(isinstance(frame['H'], SparseSeries)) + tm.assert_isinstance(frame['H'], SparseSeries) to_sparsify = np.random.randn(N) to_sparsify[N // 2:] = frame.default_fill_value @@ -1176,7 +1176,7 @@ def test_append(self): def test_apply(self): applied = self.frame.apply(np.sqrt) - self.assert_(isinstance(applied, SparseDataFrame)) + tm.assert_isinstance(applied, SparseDataFrame) assert_almost_equal(applied.values, np.sqrt(self.frame.values)) applied = self.fill_frame.apply(np.sqrt) @@ -1188,7 +1188,7 @@ def test_apply(self): self.frame.to_dense().apply(np.sum)) broadcasted = self.frame.apply(np.sum, broadcast=True) - self.assert_(isinstance(broadcasted, SparseDataFrame)) + tm.assert_isinstance(broadcasted, SparseDataFrame) assert_frame_equal(broadcasted.to_dense(), self.frame.to_dense().apply(np.sum, broadcast=True)) @@ -1211,13 +1211,13 @@ def test_apply_nonuq(self): def test_applymap(self): # just test that it works result = self.frame.applymap(lambda x: x * 2) - self.assert_(isinstance(result, SparseDataFrame)) + tm.assert_isinstance(result, SparseDataFrame) def test_astype(self): self.assertRaises(Exception, self.frame.astype, np.int64) def test_fillna(self): - df = self.zframe.reindex(range(5)) + df = self.zframe.reindex(lrange(5)) result = df.fillna(0) expected = df.to_dense().fillna(0).to_sparse(fill_value=0) assert_sp_frame_equal(result, expected) @@ -1397,7 +1397,7 @@ def test_count(self): def test_cumsum(self): result = self.frame.cumsum() expected = self.frame.to_dense().cumsum() - self.assert_(isinstance(result, SparseDataFrame)) + tm.assert_isinstance(result, SparseDataFrame) assert_frame_equal(result.to_dense(), expected) def _check_all(self, check_func): @@ -1533,9 +1533,9 @@ def test_pickle(self): def _test_roundtrip(panel): pickled = pickle.dumps(panel, protocol=pickle.HIGHEST_PROTOCOL) unpickled = pickle.loads(pickled) - self.assert_(isinstance(unpickled.items, Index)) - self.assert_(isinstance(unpickled.major_axis, Index)) - self.assert_(isinstance(unpickled.minor_axis, Index)) + tm.assert_isinstance(unpickled.items, Index) + tm.assert_isinstance(unpickled.major_axis, Index) + tm.assert_isinstance(unpickled.minor_axis, Index) assert_sp_panel_equal(panel, unpickled) _test_roundtrip(self.panel) @@ -1543,7 +1543,7 @@ def _test_roundtrip(panel): def test_dense_to_sparse(self): wp = Panel.from_dict(self.data_dict) dwp = wp.to_sparse() - self.assert_(isinstance(dwp['ItemA']['A'], SparseSeries)) + tm.assert_isinstance(dwp['ItemA']['A'], SparseSeries) def test_to_dense(self): dwp = self.panel.to_dense() diff --git a/pandas/src/generate_code.py b/pandas/src/generate_code.py index 2d5873393de08..70b68eae7564a 100644 --- a/pandas/src/generate_code.py +++ b/pandas/src/generate_code.py @@ -1,5 +1,6 @@ +from __future__ import print_function +from pandas.compat import range, cStringIO as StringIO import os -from cStringIO import StringIO header = """ cimport numpy as np @@ -2290,21 +2291,21 @@ def generate_from_template(template, exclude=None): def generate_take_cython_file(path='generated.pyx'): with open(path, 'w') as f: - print >> f, header + print(header, file=f) - print >> f, generate_ensure_dtypes() + print(generate_ensure_dtypes(), file=f) for template in templates_1d: - print >> f, generate_from_template(template) + print(generate_from_template(template), file=f) for template in take_templates: - print >> f, generate_take_template(template) + print(generate_take_template(template), file=f) for template in put_2d: - print >> f, generate_put_template(template) + print(generate_put_template(template), file=f) for template in groupbys: - print >> f, generate_put_template(template, use_ints = False) + print(generate_put_template(template, use_ints = False), file=f) # for template in templates_1d_datetime: # print >> f, generate_from_template_datetime(template) @@ -2313,7 +2314,7 @@ def generate_take_cython_file(path='generated.pyx'): # print >> f, generate_from_template_datetime(template, ndim=2) for template in nobool_1d_templates: - print >> f, generate_from_template(template, exclude=['bool']) + print(generate_from_template(template, exclude=['bool']), file=f) if __name__ == '__main__': generate_take_cython_file() diff --git a/pandas/src/offsets.pyx b/pandas/src/offsets.pyx index 1823edeb0a4d9..096198c8a05fa 100644 --- a/pandas/src/offsets.pyx +++ b/pandas/src/offsets.pyx @@ -85,6 +85,10 @@ cdef class _Offset: cpdef next(self): pass + cpdef __next__(self): + """wrapper around next""" + return self.next() + cpdef prev(self): pass diff --git a/pandas/stats/common.py b/pandas/stats/common.py index 75ebc9284ca21..c30b3e7a4bf61 100644 --- a/pandas/stats/common.py +++ b/pandas/stats/common.py @@ -5,7 +5,7 @@ 2: 'expanding' } # also allow 'rolling' as key -_WINDOW_TYPES.update((v, v) for k,v in _WINDOW_TYPES.items()) +_WINDOW_TYPES.update((v, v) for k,v in list(_WINDOW_TYPES.items())) _ADDITIONAL_CLUSTER_TYPES = set(("entity", "time")) def _get_cluster_type(cluster_type): diff --git a/pandas/stats/fama_macbeth.py b/pandas/stats/fama_macbeth.py index 967199c0bcf69..38fb5894c94bb 100644 --- a/pandas/stats/fama_macbeth.py +++ b/pandas/stats/fama_macbeth.py @@ -1,5 +1,5 @@ from pandas.core.base import StringMixin -from pandas.util.py3compat import StringIO +from pandas.compat import StringIO, range import numpy as np @@ -173,7 +173,7 @@ def _calc_stats(self): start = self._window - 1 betas = self._beta_raw - for i in xrange(start, self._T): + for i in range(start, self._T): if self._is_rolling: begin = i - start else: @@ -213,7 +213,7 @@ def _calc_t_stat(beta, nw_lags_beta): C = np.dot(B.T, B) / N if nw_lags_beta is not None: - for i in xrange(nw_lags_beta + 1): + for i in range(nw_lags_beta + 1): cov = np.dot(B[i:].T, B[:(N - i)]) / N weight = i / (nw_lags_beta + 1) diff --git a/pandas/stats/math.py b/pandas/stats/math.py index 579d49edb8511..64548b90dade8 100644 --- a/pandas/stats/math.py +++ b/pandas/stats/math.py @@ -3,6 +3,7 @@ from __future__ import division +from pandas.compat import range import numpy as np import numpy.linalg as linalg @@ -70,7 +71,7 @@ def newey_west(m, max_lags, nobs, df, nw_overlap=False): Covariance Matrix, Econometrica, vol. 55(3), 703-708 """ Xeps = np.dot(m.T, m) - for lag in xrange(1, max_lags + 1): + for lag in range(1, max_lags + 1): auto_cov = np.dot(m[:-lag].T, m[lag:]) weight = lag / (max_lags + 1) if nw_overlap: diff --git a/pandas/stats/misc.py b/pandas/stats/misc.py index e81319cb79c94..c79bae34f20c4 100644 --- a/pandas/stats/misc.py +++ b/pandas/stats/misc.py @@ -1,8 +1,10 @@ from numpy import NaN +from pandas import compat import numpy as np from pandas.core.api import Series, DataFrame, isnull, notnull from pandas.core.series import remove_na +from pandas.compat import zip def zscore(series): @@ -21,7 +23,7 @@ def correl_ts(frame1, frame2): y : Series """ results = {} - for col, series in frame1.iteritems(): + for col, series in compat.iteritems(frame1): if col in frame2: other = frame2[col] @@ -82,15 +84,15 @@ def percentileRank(frame, column=None, kind='mean'): framet = frame.T if column is not None: if isinstance(column, Series): - for date, xs in frame.T.iteritems(): + for date, xs in compat.iteritems(frame.T): results[date] = fun(xs, column.get(date, NaN)) else: - for date, xs in frame.T.iteritems(): + for date, xs in compat.iteritems(frame.T): results[date] = fun(xs, xs[column]) results = Series(results) else: for column in frame.columns: - for date, xs in framet.iteritems(): + for date, xs in compat.iteritems(framet): results.setdefault(date, {})[column] = fun(xs, xs[column]) results = DataFrame(results).T return results diff --git a/pandas/stats/ols.py b/pandas/stats/ols.py index 742d832a923d8..2b8f6fc1601c8 100644 --- a/pandas/stats/ols.py +++ b/pandas/stats/ols.py @@ -4,9 +4,9 @@ # pylint: disable-msg=W0201 -from itertools import izip, starmap -from StringIO import StringIO - +from pandas.compat import zip, range, StringIO +from itertools import starmap +from pandas import compat import numpy as np from pandas.core.api import DataFrame, Series, isnull @@ -41,7 +41,7 @@ class OLS(StringMixin): Number of Newey-West lags. nw_overlap : boolean, default False Assume data is overlapping when computing Newey-West estimator - + """ _panel_model = False @@ -610,15 +610,15 @@ class MovingOLS(OLS): window : int size of window (for rolling/expanding OLS) min_periods : int - Threshold of non-null data points to require. - If None, defaults to size of window. + Threshold of non-null data points to require. + If None, defaults to size of window. intercept : bool True if you want an intercept. nw_lags : None or int Number of Newey-West lags. nw_overlap : boolean, default False Assume data is overlapping when computing Newey-West estimator - + """ def __init__(self, y, x, weights=None, window_type='expanding', window=None, min_periods=None, intercept=True, @@ -743,7 +743,7 @@ def var_beta(self): """Returns the covariance of beta.""" result = {} result_index = self._result_index - for i in xrange(len(self._var_beta_raw)): + for i in range(len(self._var_beta_raw)): dm = DataFrame(self._var_beta_raw[i], columns=self.beta.columns, index=self.beta.columns) result[result_index[i]] = dm @@ -803,7 +803,7 @@ def _calc_betas(self, x, y): cum_xx = self._cum_xx(x) cum_xy = self._cum_xy(x, y) - for i in xrange(N): + for i in range(N): if not valid[i] or not enough[i]: continue @@ -948,7 +948,7 @@ def get_result_simple(Fst, d): return Fst, (q, d), 1 - f.cdf(Fst, q, d) # Compute the P-value for each pair - result = starmap(get_result_simple, izip(F, df_resid)) + result = starmap(get_result_simple, zip(F, df_resid)) return list(result) @@ -968,7 +968,7 @@ def get_result(beta, vcov, n, d): return math.calc_F(R, r, beta, vcov, n, d) results = starmap(get_result, - izip(self._beta_raw, self._var_beta_raw, nobs, df)) + zip(self._beta_raw, self._var_beta_raw, nobs, df)) return list(results) @@ -978,7 +978,7 @@ def _p_value_raw(self): from scipy.stats import t result = [2 * t.sf(a, b) - for a, b in izip(np.fabs(self._t_stat_raw), + for a, b in zip(np.fabs(self._t_stat_raw), self._df_resid_raw)] return np.array(result) @@ -1062,7 +1062,7 @@ def _resid_raw(self): def _std_err_raw(self): """Returns the raw standard err values.""" results = [] - for i in xrange(len(self._var_beta_raw)): + for i in range(len(self._var_beta_raw)): results.append(np.sqrt(np.diag(self._var_beta_raw[i]))) return np.array(results) @@ -1251,7 +1251,7 @@ def _safe_update(d, other): """ Combine dictionaries with non-overlapping keys """ - for k, v in other.iteritems(): + for k, v in compat.iteritems(other): if k in d: raise Exception('Duplicate regressor: %s' % k) @@ -1317,7 +1317,7 @@ def _combine_rhs(rhs): elif isinstance(rhs, DataFrame): series = rhs.copy() elif isinstance(rhs, dict): - for name, value in rhs.iteritems(): + for name, value in compat.iteritems(rhs): if isinstance(value, Series): _safe_update(series, {name: value}) elif isinstance(value, (dict, DataFrame)): diff --git a/pandas/stats/plm.py b/pandas/stats/plm.py index e8c413ec4739c..2c4e4c47c684a 100644 --- a/pandas/stats/plm.py +++ b/pandas/stats/plm.py @@ -6,6 +6,8 @@ # pylint: disable-msg=E1101,E1103 from __future__ import division +from pandas.compat import range +from pandas import compat import warnings import numpy as np @@ -261,7 +263,7 @@ def _add_categorical_dummies(self, panel, cat_mappings): val_map = cat_mappings.get(effect) if val_map: - val_map = dict((v, k) for k, v in val_map.iteritems()) + val_map = dict((v, k) for k, v in compat.iteritems(val_map)) if dropped_dummy or not self._use_all_dummies: if effect in self._dropped_dummies: @@ -670,7 +672,7 @@ def _enough_obs(self): def create_ols_dict(attr): def attr_getter(self): d = {} - for k, v in self.results.iteritems(): + for k, v in compat.iteritems(self.results): result = getattr(v, attr) d[k] = result diff --git a/pandas/stats/tests/test_fama_macbeth.py b/pandas/stats/tests/test_fama_macbeth.py index ef262cfaf44bb..dd2f196361226 100644 --- a/pandas/stats/tests/test_fama_macbeth.py +++ b/pandas/stats/tests/test_fama_macbeth.py @@ -1,7 +1,9 @@ from pandas import DataFrame, Panel from pandas.stats.api import fama_macbeth -from common import assert_almost_equal, BaseTest +from .common import assert_almost_equal, BaseTest +from pandas.compat import range +from pandas import compat import numpy as np @@ -28,7 +30,7 @@ def checkFamaMacBethExtended(self, window_type, x, y, **kwds): index = result._index time = len(index) - for i in xrange(time - window + 1): + for i in range(time - window + 1): if window_type == 'rolling': start = index[i] else: @@ -37,7 +39,7 @@ def checkFamaMacBethExtended(self, window_type, x, y, **kwds): end = index[i + window - 1] x2 = {} - for k, v in x.iterkv(): + for k, v in compat.iteritems(x): x2[k] = v.truncate(start, end) y2 = y.truncate(start, end) diff --git a/pandas/stats/tests/test_moments.py b/pandas/stats/tests/test_moments.py index 6312a28595935..24fc04d849c7f 100644 --- a/pandas/stats/tests/test_moments.py +++ b/pandas/stats/tests/test_moments.py @@ -11,10 +11,10 @@ from pandas.util.testing import ( assert_almost_equal, assert_series_equal, assert_frame_equal ) -from pandas.util.py3compat import PY3 import pandas.core.datetools as datetools import pandas.stats.moments as mom import pandas.util.testing as tm +from pandas.compat import range, zip, PY3, StringIO N, K = 100, 10 @@ -432,7 +432,7 @@ def _check_structures(self, func, static_comp, fill_value=None): series_result = func(self.series, 50) - self.assert_(isinstance(series_result, Series)) + tm.assert_isinstance(series_result, Series) frame_result = func(self.frame, 50) self.assertEquals(type(frame_result), DataFrame) @@ -487,7 +487,6 @@ def _check_structures(self, func, static_comp, assert_frame_equal(frame_xp, frame_rs) def test_legacy_time_rule_arg(self): - from StringIO import StringIO # suppress deprecation warnings sys.stderr = StringIO() @@ -566,7 +565,7 @@ def _check_ew_ndarray(self, func, preserve_nan=False): def _check_ew_structures(self, func): series_result = func(self.series, com=10) - self.assert_(isinstance(series_result, Series)) + tm.assert_isinstance(series_result, Series) frame_result = func(self.frame, com=10) self.assertEquals(type(frame_result), DataFrame) @@ -767,7 +766,7 @@ def _check_expanding_ndarray(self, func, static_comp, has_min_periods=True, def _check_expanding_structures(self, func): series_result = func(self.series) - self.assert_(isinstance(series_result, Series)) + tm.assert_isinstance(series_result, Series) frame_result = func(self.frame) self.assertEquals(type(frame_result), DataFrame) diff --git a/pandas/stats/tests/test_ols.py b/pandas/stats/tests/test_ols.py index 88f9224e8975a..697425c8e0fcf 100644 --- a/pandas/stats/tests/test_ols.py +++ b/pandas/stats/tests/test_ols.py @@ -7,6 +7,7 @@ from __future__ import division from datetime import datetime +from pandas import compat import unittest import nose import numpy as np @@ -21,8 +22,8 @@ from pandas.util.testing import (assert_almost_equal, assert_series_equal, assert_frame_equal, assertRaisesRegexp) import pandas.util.testing as tm - -from common import BaseTest +import pandas.compat as compat +from .common import BaseTest _have_statsmodels = True try: @@ -40,7 +41,7 @@ def _check_repr(obj): def _compare_ols_results(model1, model2): - assert(type(model1) == type(model2)) + tm.assert_isinstance(model1, type(model2)) if hasattr(model1, '_window_type'): _compare_moving_ols(model1, model2) @@ -196,7 +197,7 @@ def checkMovingOLS(self, window_type, x, y, weights=None, **kwds): date = index[i] x_iter = {} - for k, v in x.iteritems(): + for k, v in compat.iteritems(x): x_iter[k] = v.truncate(before=prior_date, after=date) y_iter = y.truncate(before=prior_date, after=date) @@ -367,7 +368,7 @@ def test_longpanel_series_combo(self): y = lp.pop('ItemA') model = ols(y=y, x=lp, entity_effects=True, window=20) self.assert_(notnull(model.beta.values).all()) - self.assert_(isinstance(model, PanelOLS)) + tm.assert_isinstance(model, PanelOLS) model.summary def test_series_rhs(self): @@ -388,7 +389,7 @@ def test_various_attributes(self): for attr in series_attrs: value = getattr(model, attr) - self.assert_(isinstance(value, Series)) + tm.assert_isinstance(value, Series) # works model._results @@ -529,7 +530,7 @@ def test_wls_panel(self): stack_y = y.stack() stack_x = DataFrame(dict((k, v.stack()) - for k, v in x.iterkv())) + for k, v in compat.iteritems(x))) weights = x.std('items') stack_weights = weights.stack() @@ -722,7 +723,7 @@ def checkMovingOLS(self, x, y, window_type='rolling', **kwds): date = index[i] x_iter = {} - for k, v in x.iteritems(): + for k, v in compat.iteritems(x): x_iter[k] = v.truncate(before=prior_date, after=date) y_iter = y.truncate(before=prior_date, after=date) diff --git a/pandas/stats/tests/test_var.py b/pandas/stats/tests/test_var.py index cbaacd0e89b6e..ab5709d013fa9 100644 --- a/pandas/stats/tests/test_var.py +++ b/pandas/stats/tests/test_var.py @@ -1,7 +1,9 @@ +from __future__ import print_function from numpy.testing import run_module_suite, assert_equal, TestCase from pandas.util.testing import assert_almost_equal +from pandas.compat import range import nose import unittest @@ -124,10 +126,10 @@ def beta(self): return rpy.convert_robj(r.coef(self._estimate)) def summary(self, equation=None): - print (r.summary(self._estimate, equation=equation)) + print(r.summary(self._estimate, equation=equation)) def output(self): - print (self._estimate) + print(self._estimate) def estimate(self): self._estimate = r.VAR(self.rdata, p=self.p, type=self.type) @@ -144,7 +146,7 @@ def serial_test(self, lags_pt=16, type='PT.asymptotic'): return test def data_summary(self): - print (r.summary(self.rdata)) + print(r.summary(self.rdata)) class TestVAR(TestCase): diff --git a/pandas/stats/var.py b/pandas/stats/var.py index 8953f7badfefb..be55507f976cb 100644 --- a/pandas/stats/var.py +++ b/pandas/stats/var.py @@ -1,5 +1,7 @@ from __future__ import division +from pandas.compat import range, lrange, zip, reduce +from pandas import compat import numpy as np from pandas.core.base import StringMixin from pandas.util.decorators import cache_readonly @@ -59,7 +61,7 @@ def beta(self): DataFrame """ d = dict([(key, value.beta) - for (key, value) in self.ols_results.iteritems()]) + for (key, value) in compat.iteritems(self.ols_results)]) return DataFrame(d) def forecast(self, h): @@ -77,7 +79,7 @@ def forecast(self, h): DataFrame """ forecast = self._forecast_raw(h)[:, 0, :] - return DataFrame(forecast, index=xrange(1, 1 + h), + return DataFrame(forecast, index=lrange(1, 1 + h), columns=self._columns) def forecast_cov(self, h): @@ -100,7 +102,7 @@ def forecast_std_err(self, h): DataFrame """ return DataFrame(self._forecast_std_err_raw(h), - index=xrange(1, 1 + h), columns=self._columns) + index=lrange(1, 1 + h), columns=self._columns) @cache_readonly def granger_causality(self): @@ -128,17 +130,17 @@ def granger_causality(self): d = {} for col in self._columns: d[col] = {} - for i in xrange(1, 1 + self._p): + for i in range(1, 1 + self._p): lagged_data = self._lagged_data[i].filter( self._columns - [col]) - for key, value in lagged_data.iteritems(): + for key, value in compat.iteritems(lagged_data): d[col][_make_param_name(i, key)] = value f_stat_dict = {} p_value_dict = {} - for col, y in self._data.iteritems(): + for col, y in compat.iteritems(self._data): ssr_full = (self.resid[col] ** 2).sum() f_stats = [] @@ -190,12 +192,12 @@ def ols_results(self): from pandas.stats.api import ols d = {} - for i in xrange(1, 1 + self._p): - for col, series in self._lagged_data[i].iteritems(): + for i in range(1, 1 + self._p): + for col, series in compat.iteritems(self._lagged_data[i]): d[_make_param_name(i, col)] = series result = dict([(col, ols(y=y, x=d, intercept=self._intercept)) - for col, y in self._data.iteritems()]) + for col, y in compat.iteritems(self._data)]) return result @@ -211,7 +213,7 @@ def resid(self): DataFrame """ d = dict([(col, series.resid) - for (col, series) in self.ols_results.iteritems()]) + for (col, series) in compat.iteritems(self.ols_results)]) return DataFrame(d, index=self._index) @cache_readonly @@ -252,7 +254,7 @@ def _alpha(self): @cache_readonly def _beta_raw(self): - return np.array([self.beta[col].values() for col in self._columns]).T + return np.array([list(self.beta[col].values()) for col in self._columns]).T def _trans_B(self, h): """ @@ -278,7 +280,7 @@ def _trans_B(self, h): result.append(trans_B) - for i in xrange(2, h): + for i in range(2, h): result.append(np.dot(trans_B, result[i - 1])) return result @@ -286,8 +288,8 @@ def _trans_B(self, h): @cache_readonly def _x(self): values = np.array([ - self._lagged_data[i][col].values() - for i in xrange(1, 1 + self._p) + list(self._lagged_data[i][col].values()) + for i in range(1, 1 + self._p) for col in self._columns ]).T @@ -315,7 +317,7 @@ def _forecast_cov_raw(self, n): resid = self._forecast_cov_resid_raw(n) # beta = self._forecast_cov_beta_raw(n) - # return [a + b for a, b in izip(resid, beta)] + # return [a + b for a, b in zip(resid, beta)] # TODO: ignore the beta forecast std err until it's verified return resid @@ -332,7 +334,7 @@ def _forecast_cov_beta_raw(self, n): results = [] - for h in xrange(1, n + 1): + for h in range(1, n + 1): psi = self._psi(h) trans_B = self._trans_B(h) @@ -340,14 +342,14 @@ def _forecast_cov_beta_raw(self, n): cov_beta = self._cov_beta - for t in xrange(T + 1): + for t in range(T + 1): index = t + p - y = values.take(xrange(index, index - p, -1), axis=0).ravel() + y = values.take(lrange(index, index - p, -1), axis=0).ravel() trans_Z = np.hstack(([1], y)) trans_Z = trans_Z.reshape(1, len(trans_Z)) sum2 = 0 - for i in xrange(h): + for i in range(h): ZB = np.dot(trans_Z, trans_B[h - 1 - i]) prod = np.kron(ZB, psi[i]) @@ -367,7 +369,7 @@ def _forecast_cov_resid_raw(self, h): psi_values = self._psi(h) sum = 0 result = [] - for i in xrange(h): + for i in range(h): psi = psi_values[i] sum = sum + chain_dot(psi, self._sigma, psi.T) result.append(sum) @@ -380,9 +382,9 @@ def _forecast_raw(self, h): """ k = self._k result = [] - for i in xrange(h): + for i in range(h): sum = self._alpha.reshape(1, k) - for j in xrange(self._p): + for j in range(self._p): beta = self._lag_betas[j] idx = i - j if idx > 0: @@ -429,12 +431,12 @@ def _lag_betas(self): """ k = self._k b = self._beta_raw - return [b[k * i: k * (i + 1)].T for i in xrange(self._p)] + return [b[k * i: k * (i + 1)].T for i in range(self._p)] @cache_readonly def _lagged_data(self): return dict([(i, self._data.shift(i)) - for i in xrange(1, 1 + self._p)]) + for i in range(1, 1 + self._p)]) @cache_readonly def _nobs(self): @@ -448,10 +450,10 @@ def _psi(self, h): """ k = self._k result = [np.eye(k)] - for i in xrange(1, h): + for i in range(1, h): result.append(sum( [np.dot(result[i - j], self._lag_betas[j - 1]) - for j in xrange(1, 1 + i) + for j in range(1, 1 + i) if j <= self._p])) return result @@ -532,7 +534,7 @@ def forecast(self, h): Returns the forecasts at 1, 2, ..., n timesteps in the future. """ forecast = self._forecast_raw(h).T.swapaxes(1, 2) - index = xrange(1, 1 + h) + index = lrange(1, 1 + h) w = Panel(forecast, items=self._data.items, major_axis=index, minor_axis=self._data.minor_axis) return w @@ -549,7 +551,7 @@ def resid(self): DataFrame """ d = dict([(key, value.resid) - for (key, value) in self.ols_results.iteritems()]) + for (key, value) in compat.iteritems(self.ols_results)]) return Panel.fromDict(d) def _data_xs(self, i): diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 8706bb9cf7f4f..d0a050984a07f 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1,3 +1,4 @@ +from pandas.compat import range import unittest import numpy as np @@ -36,17 +37,17 @@ def test_ints(self): arr = np.random.randint(0, 100, size=50) result = algos.unique(arr) - self.assert_(isinstance(result, np.ndarray)) + tm.assert_isinstance(result, np.ndarray) def test_objects(self): arr = np.random.randint(0, 100, size=50).astype('O') result = algos.unique(arr) - self.assert_(isinstance(result, np.ndarray)) + tm.assert_isinstance(result, np.ndarray) def test_object_refcount_bug(self): lst = ['A', 'B', 'C', 'D', 'E'] - for i in xrange(1000): + for i in range(1000): len(algos.unique(lst)) def test_on_index_object(self): diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 48db7afa29aaa..29d104e9c465c 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -1,6 +1,7 @@ # pylint: disable=E1101,E1103,W0232 from datetime import datetime +from pandas.compat import range, lrange import unittest import nose @@ -94,7 +95,7 @@ def test_value_counts(self): arr = np.random.randn(4) factor = cut(arr, 4) - self.assert_(isinstance(factor, Categorical)) + tm.assert_isinstance(factor, Categorical) result = value_counts(factor) expected = value_counts(np.asarray(factor)) @@ -103,7 +104,7 @@ def test_value_counts(self): def test_na_flags_int_levels(self): # #1457 - levels = range(10) + levels = lrange(10) labels = np.random.randint(0, 10, 20) labels[::5] = -1 diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 3212105562446..ca119a8e263bf 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -6,6 +6,7 @@ import unittest from pandas import Series, DataFrame, date_range, DatetimeIndex, Timestamp +from pandas.compat import range, long, lrange, lmap, u, map from pandas.core.common import notnull, isnull import pandas.core.common as com import pandas.util.testing as tm @@ -14,7 +15,7 @@ import numpy as np from pandas.tslib import iNaT -from pandas.util import py3compat +from pandas import compat _multiprocess_can_split_ = True @@ -24,7 +25,7 @@ def test_is_sequence(): assert(is_seq((1, 2))) assert(is_seq([1, 2])) assert(not is_seq("abcd")) - assert(not is_seq(u"abcd")) + assert(not is_seq(u("abcd"))) assert(not is_seq(np.int64)) class A(object): @@ -94,7 +95,7 @@ def test_isnull_lists(): result = isnull(['foo', 'bar']) assert(not result.any()) - result = isnull([u'foo', u'bar']) + result = isnull([u('foo'), u('bar')]) assert(not result.any()) @@ -120,7 +121,7 @@ def test_datetimeindex_from_empty_datetime64_array(): def test_nan_to_nat_conversions(): df = DataFrame(dict({ - 'A' : np.asarray(range(10),dtype='float64'), + 'A' : np.asarray(lrange(10),dtype='float64'), 'B' : Timestamp('20010101') })) df.iloc[3:6,:] = np.nan result = df.loc[4,'B'].value @@ -176,7 +177,7 @@ def test_iterpairs(): def test_split_ranges(): def _bin(x, width): "return int(x) as a base2 string of given width" - return ''.join(str((x >> i) & 1) for i in xrange(width - 1, -1, -1)) + return ''.join(str((x >> i) & 1) for i in range(width - 1, -1, -1)) def test_locs(mask): nfalse = sum(np.array(mask) == 0) @@ -193,7 +194,7 @@ def test_locs(mask): # exhaustively test all possible mask sequences of length 8 ncols = 8 for i in range(2 ** ncols): - cols = map(int, list(_bin(i, ncols))) # count up in base2 + cols = lmap(int, list(_bin(i, ncols))) # count up in base2 mask = [cols[i] == 1 for i in range(len(cols))] test_locs(mask) @@ -311,7 +312,7 @@ def test_ensure_platform_int(): # On Python 2, if sys.stdin.encoding is None (IPython with zmq frontend) # common.console_encode should encode things as utf-8. # """ -# if py3compat.PY3: +# if compat.PY3: # raise nose.SkipTest # with tm.stdin_encoding(encoding=None): @@ -332,8 +333,8 @@ def test_is_re(): def test_is_recompilable(): - passes = (r'a', u'x', r'asdf', re.compile('adsf'), ur'\u2233\s*', - re.compile(r'')) + passes = (r'a', u('x'), r'asdf', re.compile('adsf'), + u(r'\u2233\s*'), re.compile(r'')) fails = 1, [], object() for p in passes: @@ -720,7 +721,7 @@ def test_2d_float32(self): def test_2d_datetime64(self): # 2005/01/01 - 2006/01/01 - arr = np.random.randint(11045376L, 11360736L, (5,3))*100000000000 + arr = np.random.randint(long(11045376), long(11360736), (5,3))*100000000000 arr = arr.view(dtype='datetime64[ns]') indexer = [0, 2, -1, 1, -1] diff --git a/pandas/tests/test_compat.py b/pandas/tests/test_compat.py new file mode 100644 index 0000000000000..a8b9a88126861 --- /dev/null +++ b/pandas/tests/test_compat.py @@ -0,0 +1,70 @@ +""" +Testing that functions from compat work as expected +""" + +from pandas.compat import ( + range, zip, map, filter, + lrange, lzip, lmap, lfilter, + builtins +) +import unittest +import nose +import pandas.util.testing as tm + +class TestBuiltinIterators(unittest.TestCase): + def check_result(self, actual, expected, lengths): + for (iter_res, list_res), exp, length in zip(actual, expected, lengths): + self.assert_(not isinstance(iter_res, list)) + tm.assert_isinstance(list_res, list) + iter_res = list(iter_res) + self.assertEqual(len(list_res), length) + self.assertEqual(len(iter_res), length) + self.assertEqual(iter_res, exp) + self.assertEqual(list_res, exp) + + def test_range(self): + actual1 = range(10) + actual2 = lrange(10) + actual = [actual1, actual2], + expected = list(builtins.range(10)), + lengths = 10, + + actual1 = range(1, 10, 2) + actual2 = lrange(1, 10, 2) + actual += [actual1, actual2], + lengths += 5, + expected += list(builtins.range(1, 10, 2)), + self.check_result(actual, expected, lengths) + + def test_map(self): + func = lambda x, y, z: x + y + z + lst = [builtins.range(10), builtins.range(10), builtins.range(10)] + actual1 = map(func, *lst) + actual2 = lmap(func, *lst) + actual = [actual1, actual2], + expected = list(builtins.map(func, *lst)), + lengths = 10, + self.check_result(actual, expected, lengths) + + + def test_filter(self): + func = lambda x: x + lst = list(builtins.range(10)) + actual1 = filter(func, lst) + actual2 = lfilter(func, lst) + actual = [actual1, actual2], + lengths = 9, + expected = list(builtins.filter(func, lst)), + self.check_result(actual, expected, lengths) + + def test_zip(self): + lst = [builtins.range(10), builtins.range(10), builtins.range(10)] + actual = [zip(*lst), lzip(*lst)], + expected = list(builtins.zip(*lst)), + lengths = 10, + self.check_result(actual, expected, lengths) + +if __name__ == '__main__': + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], + # '--with-coverage', '--cover-package=pandas.core'], + exit=False) diff --git a/pandas/tests/test_config.py b/pandas/tests/test_config.py index a2b1ea43717cf..ed6f641cbcb2c 100644 --- a/pandas/tests/test_config.py +++ b/pandas/tests/test_config.py @@ -1,6 +1,5 @@ #!/usr/bin/python # -*- coding: utf-8 -*- -from __future__ import with_statement # support python 2.5 import pandas as pd import unittest import warnings diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index ba0a9926dfa78..ff76c7c070946 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -1,3 +1,4 @@ +from __future__ import print_function # pylint: disable-msg=W0612,E1101 import unittest @@ -16,7 +17,7 @@ from pandas.util.testing import (assert_almost_equal, assert_series_equal, assert_frame_equal) -from pandas.util import py3compat +from pandas import compat import pandas.util.testing as tm import pandas.lib as lib @@ -54,7 +55,7 @@ def tearDown(self): def run_arithmetic_test(self, df, assert_func, check_dtype=False): expr._MIN_ELEMENTS = 0 operations = ['add', 'sub', 'mul','mod','truediv','floordiv','pow'] - if not py3compat.PY3: + if not compat.PY3: operations.append('div') for arith in operations: op = getattr(operator, arith) diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index bca38ba55e205..e7a52756089cc 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -1,10 +1,8 @@ +from __future__ import print_function # -*- coding: utf-8 -*- -try: - from StringIO import StringIO -except: - from io import StringIO - +from pandas.compat import range, zip, lrange, StringIO, PY3, lzip, u +import pandas.compat as compat import os import sys import unittest @@ -16,7 +14,6 @@ import numpy as np from pandas import DataFrame, Series, Index -from pandas.util.py3compat import lzip, PY3 import pandas.core.format as fmt import pandas.util.testing as tm @@ -86,7 +83,7 @@ def test_eng_float_formatter(self): def test_repr_tuples(self): buf = StringIO() - df = DataFrame({'tups': zip(range(10), range(10))}) + df = DataFrame({'tups': lzip(range(10), range(10))}) repr(df) df.to_string(col_space=10, buf=buf) @@ -101,7 +98,7 @@ def test_repr_truncation(self): _strlen = fmt._strlen_func() - for line, value in zip(r.split('\n'), df['B']): + for line, value in lzip(r.split('\n'), df['B']): if _strlen(value) + 1 > max_len: self.assert_('...' in line) else: @@ -132,10 +129,10 @@ def test_repr_obeys_max_seq_limit(self): #unlimited reset_option("display.max_seq_items") - self.assertTrue(len(com.pprint_thing(range(1000)))> 2000) + self.assertTrue(len(com.pprint_thing(lrange(1000)))> 2000) with option_context("display.max_seq_items",5): - self.assertTrue(len(com.pprint_thing(range(1000)))< 100) + self.assertTrue(len(com.pprint_thing(lrange(1000)))< 100) def test_repr_is_valid_construction_code(self): import pandas as pd @@ -154,8 +151,9 @@ def test_repr_should_return_str(self): data = [8, 5, 3, 5] - index1 = [u"\u03c3", u"\u03c4", u"\u03c5", u"\u03c6"] - cols = [u"\u03c8"] + index1 = [u("\u03c3"), u("\u03c4"), u("\u03c5"), + u("\u03c6")] + cols = [u("\u03c8")] df = DataFrame(data, columns=cols, index=index1) self.assertTrue(type(df.__repr__() == str)) # both py2 / 3 @@ -166,8 +164,8 @@ def test_repr_no_backslash(self): def test_expand_frame_repr(self): df_small = DataFrame('hello', [0], [0]) - df_wide = DataFrame('hello', [0], range(10)) - df_tall = DataFrame('hello', range(30), range(5)) + df_wide = DataFrame('hello', [0], lrange(10)) + df_tall = DataFrame('hello', lrange(30), lrange(5)) with option_context('mode.sim_interactive', True): with option_context('display.max_columns', 10, @@ -192,7 +190,7 @@ def test_expand_frame_repr(self): def test_repr_non_interactive(self): # in non interactive mode, there can be no dependency on the # result of terminal auto size detection - df = DataFrame('hello', range(1000), range(5)) + df = DataFrame('hello', lrange(1000), lrange(5)) with option_context('mode.sim_interactive', False, 'display.width', 0, @@ -247,7 +245,7 @@ def mkframe(n): def test_to_string_repr_unicode(self): buf = StringIO() - unicode_values = [u'\u03c3'] * 10 + unicode_values = [u('\u03c3')] * 10 unicode_values = np.array(unicode_values, dtype=object) df = DataFrame({'unicode': unicode_values}) df.to_string(col_space=10, buf=buf) @@ -255,7 +253,7 @@ def test_to_string_repr_unicode(self): # it works! repr(df) - idx = Index(['abc', u'\u03c3a', 'aegdvg']) + idx = Index(['abc', u('\u03c3a'), 'aegdvg']) ser = Series(np.random.randn(len(idx)), idx) rs = repr(ser).split('\n') line_len = len(rs[0]) @@ -276,7 +274,7 @@ def test_to_string_repr_unicode(self): sys.stdin = _stdin def test_to_string_unicode_columns(self): - df = DataFrame({u'\u03c3': np.arange(10.)}) + df = DataFrame({u('\u03c3'): np.arange(10.)}) buf = StringIO() df.to_string(buf=buf) @@ -287,17 +285,17 @@ def test_to_string_unicode_columns(self): buf.getvalue() result = self.frame.to_string() - self.assert_(isinstance(result, unicode)) + tm.assert_isinstance(result, compat.text_type) def test_to_string_utf8_columns(self): - n = u"\u05d0".encode('utf-8') + n = u("\u05d0").encode('utf-8') with option_context('display.max_rows', 1): df = pd.DataFrame([1, 2], columns=[n]) repr(df) def test_to_string_unicode_two(self): - dm = DataFrame({u'c/\u03c3': []}) + dm = DataFrame({u('c/\u03c3'): []}) buf = StringIO() dm.to_string(buf) @@ -324,21 +322,20 @@ def test_to_string_with_formatters(self): self.assertEqual(result, result2) def test_to_string_with_formatters_unicode(self): - df = DataFrame({u'c/\u03c3': [1, 2, 3]}) - result = df.to_string(formatters={u'c/\u03c3': lambda x: '%s' % x}) - self.assertEqual(result, (u' c/\u03c3\n' - '0 1\n' - '1 2\n' - '2 3')) + df = DataFrame({u('c/\u03c3'): [1, 2, 3]}) + result = df.to_string(formatters={u('c/\u03c3'): + lambda x: '%s' % x}) + self.assertEqual(result, u(' c/\u03c3\n') + + '0 1\n1 2\n2 3') def test_to_string_buffer_all_unicode(self): buf = StringIO() - empty = DataFrame({u'c/\u03c3': Series()}) - nonempty = DataFrame({u'c/\u03c3': Series([1, 2, 3])}) + empty = DataFrame({u('c/\u03c3'): Series()}) + nonempty = DataFrame({u('c/\u03c3'): Series([1, 2, 3])}) - print >>buf, empty - print >>buf, nonempty + print(empty, file=buf) + print(nonempty, file=buf) # this should work buf.getvalue() @@ -376,9 +373,9 @@ def test_to_html_with_empty_string_label(self): def test_to_html_unicode(self): # it works! - df = DataFrame({u'\u03c3': np.arange(10.)}) + df = DataFrame({u('\u03c3'): np.arange(10.)}) df.to_html() - df = DataFrame({'A': [u'\u03c3']}) + df = DataFrame({'A': [u('\u03c3')]}) df.to_html() def test_to_html_escaped(self): @@ -657,7 +654,7 @@ def test_to_html_multiindex_sparsify(self): def test_to_html_index_formatter(self): df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], - columns=['foo', None], index=range(4)) + columns=['foo', None], index=lrange(4)) f = lambda x: 'abcd'[x] result = df.to_html(formatters={'__index__': f}) @@ -702,8 +699,8 @@ def test_nonunicode_nonascii_alignment(self): self.assert_(len(lines[1]) == len(lines[2])) def test_unicode_problem_decoding_as_ascii(self): - dm = DataFrame({u'c/\u03c3': Series({'test': np.NaN})}) - unicode(dm.to_string()) + dm = DataFrame({u('c/\u03c3'): Series({'test': np.NaN})}) + compat.text_type(dm.to_string()) def test_string_repr_encoding(self): filepath = tm.get_data_path('unicode_series.csv') @@ -771,17 +768,24 @@ def test_pprint_thing(self): if PY3: raise nose.SkipTest() - self.assertEquals(pp_t('a') , u'a') - self.assertEquals(pp_t(u'a') , u'a') + self.assertEquals(pp_t('a') , u('a')) + self.assertEquals(pp_t(u('a')) , u('a')) self.assertEquals(pp_t(None) , 'None') - self.assertEquals(pp_t(u'\u05d0',quote_strings=True) , u"u'\u05d0'") - self.assertEquals(pp_t(u'\u05d0',quote_strings=False) , u'\u05d0') - self.assertEquals(pp_t((u'\u05d0', u'\u05d1'),quote_strings=True) , - u"(u'\u05d0', u'\u05d1')") - self.assertEquals(pp_t((u'\u05d0', (u'\u05d1', u'\u05d2')),quote_strings=True) , - u"(u'\u05d0', (u'\u05d1', u'\u05d2'))") - self.assertEquals(pp_t(('foo', u'\u05d0', (u'\u05d0', u'\u05d0')),quote_strings=True) - , u"(u'foo', u'\u05d0', (u'\u05d0', u'\u05d0'))") + self.assertEquals(pp_t(u('\u05d0'), quote_strings=True), + u("u'\u05d0'")) + self.assertEquals(pp_t(u('\u05d0'), quote_strings=False), + u('\u05d0')) + self.assertEquals(pp_t((u('\u05d0'), + u('\u05d1')), quote_strings=True), + u("(u'\u05d0', u'\u05d1')")) + self.assertEquals(pp_t((u('\u05d0'), (u('\u05d1'), + u('\u05d2'))), + quote_strings=True), + u("(u'\u05d0', (u'\u05d1', u'\u05d2'))")) + self.assertEquals(pp_t(('foo', u('\u05d0'), (u('\u05d0'), + u('\u05d0'))), + quote_strings=True), + u("(u'foo', u'\u05d0', (u'\u05d0', u'\u05d0'))")) # escape embedded tabs in string # GH #2038 @@ -789,7 +793,7 @@ def test_pprint_thing(self): def test_wide_repr(self): with option_context('mode.sim_interactive', True): - col = lambda l, k: [tm.rands(k) for _ in xrange(l)] + col = lambda l, k: [tm.rands(k) for _ in range(l)] max_cols = get_option('display.max_columns') df = DataFrame([col(max_cols-1, 25) for _ in range(10)]) set_option('display.expand_frame_repr', False) @@ -813,7 +817,7 @@ def test_wide_repr_wide_columns(self): def test_wide_repr_named(self): with option_context('mode.sim_interactive', True): - col = lambda l, k: [tm.rands(k) for _ in xrange(l)] + col = lambda l, k: [tm.rands(k) for _ in range(l)] max_cols = get_option('display.max_columns') df = DataFrame([col(max_cols-1, 25) for _ in range(10)]) df.index.name = 'DataFrame Index' @@ -835,7 +839,7 @@ def test_wide_repr_named(self): def test_wide_repr_multiindex(self): with option_context('mode.sim_interactive', True): - col = lambda l, k: [tm.rands(k) for _ in xrange(l)] + col = lambda l, k: [tm.rands(k) for _ in range(l)] midx = pandas.MultiIndex.from_arrays([np.array(col(10, 5)), np.array(col(10, 5))]) max_cols = get_option('display.max_columns') @@ -860,7 +864,7 @@ def test_wide_repr_multiindex(self): def test_wide_repr_multiindex_cols(self): with option_context('mode.sim_interactive', True): max_cols = get_option('display.max_columns') - col = lambda l, k: [tm.rands(k) for _ in xrange(l)] + col = lambda l, k: [tm.rands(k) for _ in range(l)] midx = pandas.MultiIndex.from_arrays([np.array(col(10, 5)), np.array(col(10, 5))]) mcols = pandas.MultiIndex.from_arrays([np.array(col(max_cols-1, 3)), @@ -882,7 +886,7 @@ def test_wide_repr_multiindex_cols(self): def test_wide_repr_unicode(self): with option_context('mode.sim_interactive', True): - col = lambda l, k: [tm.randu(k) for _ in xrange(l)] + col = lambda l, k: [tm.randu(k) for _ in range(l)] max_cols = get_option('display.max_columns') df = DataFrame([col(max_cols-1, 25) for _ in range(10)]) set_option('display.expand_frame_repr', False) @@ -908,7 +912,7 @@ def test_wide_repr_wide_long_columns(self): def test_long_series(self): n = 1000 - s = Series(np.random.randint(-50,50,n),index=['s%04d' % x for x in xrange(n)], dtype='int64') + s = Series(np.random.randint(-50,50,n),index=['s%04d' % x for x in range(n)], dtype='int64') import re str_rep = str(s) @@ -923,13 +927,13 @@ def test_index_with_nan(self): # multi-index y = df.set_index(['id1', 'id2', 'id3']) result = y.to_string() - expected = u' value\nid1 id2 id3 \n1a3 NaN 78d 123\n9h4 d67 79d 64' + expected = u(' value\nid1 id2 id3 \n1a3 NaN 78d 123\n9h4 d67 79d 64') self.assert_(result == expected) # index y = df.set_index('id2') result = y.to_string() - expected = u' id1 id3 value\nid2 \nNaN 1a3 78d 123\nd67 9h4 79d 64' + expected = u(' id1 id3 value\nid2 \nNaN 1a3 78d 123\nd67 9h4 79d 64') self.assert_(result == expected) # all-nan in mi @@ -937,7 +941,7 @@ def test_index_with_nan(self): df2.ix[:,'id2'] = np.nan y = df2.set_index('id2') result = y.to_string() - expected = u' id1 id3 value\nid2 \nNaN 1a3 78d 123\nNaN 9h4 79d 64' + expected = u(' id1 id3 value\nid2 \nNaN 1a3 78d 123\nNaN 9h4 79d 64') self.assert_(result == expected) # partial nan in mi @@ -945,7 +949,7 @@ def test_index_with_nan(self): df2.ix[:,'id2'] = np.nan y = df2.set_index(['id2','id3']) result = y.to_string() - expected = u' id1 value\nid2 id3 \nNaN 78d 1a3 123\n 79d 9h4 64' + expected = u(' id1 value\nid2 id3 \nNaN 78d 1a3 123\n 79d 9h4 64') self.assert_(result == expected) df = DataFrame({'id1': {0: np.nan, 1: '9h4'}, 'id2': {0: np.nan, 1: 'd67'}, @@ -953,7 +957,7 @@ def test_index_with_nan(self): y = df.set_index(['id1','id2','id3']) result = y.to_string() - expected = u' value\nid1 id2 id3 \nNaN NaN NaN 123\n9h4 d67 79d 64' + expected = u(' value\nid1 id2 id3 \nNaN NaN NaN 123\n9h4 d67 79d 64') self.assert_(result == expected) def test_to_string(self): @@ -963,7 +967,7 @@ def test_to_string(self): # big mixed biggie = DataFrame({'A': randn(200), 'B': tm.makeStringIndex(200)}, - index=range(200)) + index=lrange(200)) biggie['A'][:20] = nan biggie['B'][:20] = nan @@ -974,7 +978,7 @@ def test_to_string(self): self.assert_(retval is None) self.assertEqual(buf.getvalue(), s) - self.assert_(isinstance(s, basestring)) + tm.assert_isinstance(s, compat.string_types) # print in right order result = biggie.to_string(columns=['B', 'A'], col_space=17, @@ -1101,7 +1105,7 @@ def test_to_string_small_float_values(self): def test_to_string_float_index(self): index = Index([1.5, 2, 3, 4, 5]) - df = DataFrame(range(5), index=index) + df = DataFrame(lrange(5), index=index) result = df.to_string() expected = (' 0\n' @@ -1114,8 +1118,8 @@ def test_to_string_float_index(self): def test_to_string_ascii_error(self): data = [('0 ', - u' .gitignore ', - u' 5 ', + u(' .gitignore '), + u(' 5 '), ' \xe2\x80\xa2\xe2\x80\xa2\xe2\x80' '\xa2\xe2\x80\xa2\xe2\x80\xa2')] df = DataFrame(data) @@ -1136,7 +1140,7 @@ def test_to_string_int_formatting(self): self.assertEqual(output, expected) def test_to_string_index_formatter(self): - df = DataFrame([range(5), range(5, 10), range(10, 15)]) + df = DataFrame([lrange(5), lrange(5, 10), lrange(10, 15)]) rs = df.to_string(formatters={'__index__': lambda x: 'abc'[x]}) @@ -1184,7 +1188,7 @@ def test_to_string_format_na(self): self.assertEqual(result, expected) def test_to_string_line_width(self): - df = pd.DataFrame(123, range(10, 15), range(30)) + df = pd.DataFrame(123, lrange(10, 15), lrange(30)) s = df.to_string(line_width=80) self.assertEqual(max(len(l) for l in s.split('\n')), 80) @@ -1192,7 +1196,7 @@ def test_to_html(self): # big mixed biggie = DataFrame({'A': randn(200), 'B': tm.makeStringIndex(200)}, - index=range(200)) + index=lrange(200)) biggie['A'][:20] = nan biggie['B'][:20] = nan @@ -1203,7 +1207,7 @@ def test_to_html(self): self.assert_(retval is None) self.assertEqual(buf.getvalue(), s) - self.assert_(isinstance(s, basestring)) + tm.assert_isinstance(s, compat.string_types) biggie.to_html(columns=['B', 'A'], col_space=17) biggie.to_html(columns=['B', 'A'], @@ -1219,7 +1223,7 @@ def test_to_html(self): def test_to_html_filename(self): biggie = DataFrame({'A': randn(200), 'B': tm.makeStringIndex(200)}, - index=range(200)) + index=lrange(200)) biggie['A'][:20] = nan biggie['B'][:20] = nan @@ -1246,8 +1250,8 @@ def test_to_html_columns_arg(self): self.assert_('B' not in result) def test_to_html_multiindex(self): - columns = pandas.MultiIndex.from_tuples(zip(np.arange(2).repeat(2), - np.mod(range(4), 2)), + columns = pandas.MultiIndex.from_tuples(list(zip(np.arange(2).repeat(2), + np.mod(lrange(4), 2))), names=['CL0', 'CL1']) df = pandas.DataFrame([list('abcd'), list('efgh')], columns=columns) result = df.to_html(justify='left') @@ -1286,8 +1290,8 @@ def test_to_html_multiindex(self): self.assertEqual(result, expected) - columns = pandas.MultiIndex.from_tuples(zip(range(4), - np.mod(range(4), 2))) + columns = pandas.MultiIndex.from_tuples(list(zip(range(4), + np.mod(lrange(4), 2)))) df = pandas.DataFrame([list('abcd'), list('efgh')], columns=columns) result = df.to_html(justify='right') @@ -1538,10 +1542,10 @@ def setUp(self): self.ts = tm.makeTimeSeries() def test_repr_unicode(self): - s = Series([u'\u03c3'] * 10) + s = Series([u('\u03c3')] * 10) repr(s) - a = Series([u"\u05d0"] * 1000) + a = Series([u("\u05d0")] * 1000) a.name = 'title1' repr(a) @@ -1585,26 +1589,26 @@ def test_freq_name_separation(self): def test_to_string_mixed(self): s = Series(['foo', np.nan, -1.23, 4.56]) result = s.to_string() - expected = (u'0 foo\n' - u'1 NaN\n' - u'2 -1.23\n' - u'3 4.56') + expected = (u('0 foo\n') + + u('1 NaN\n') + + u('2 -1.23\n') + + u('3 4.56')) self.assertEqual(result, expected) # but don't count NAs as floats s = Series(['foo', np.nan, 'bar', 'baz']) result = s.to_string() - expected = (u'0 foo\n' - '1 NaN\n' - '2 bar\n' + expected = (u('0 foo\n') + + '1 NaN\n' + + '2 bar\n' + '3 baz') self.assertEqual(result, expected) s = Series(['foo', 5, 'bar', 'baz']) result = s.to_string() - expected = (u'0 foo\n' - '1 5\n' - '2 bar\n' + expected = (u('0 foo\n') + + '1 5\n' + + '2 bar\n' + '3 baz') self.assertEqual(result, expected) @@ -1613,16 +1617,16 @@ def test_to_string_float_na_spacing(self): s[::2] = np.nan result = s.to_string() - expected = (u'0 NaN\n' - '1 1.5678\n' - '2 NaN\n' - '3 -3.0000\n' + expected = (u('0 NaN\n') + + '1 1.5678\n' + + '2 NaN\n' + + '3 -3.0000\n' + '4 NaN') self.assertEqual(result, expected) def test_unicode_name_in_footer(self): - s = Series([1, 2], name=u'\u05e2\u05d1\u05e8\u05d9\u05ea') - sf = fmt.SeriesFormatter(s, name=u'\u05e2\u05d1\u05e8\u05d9\u05ea') + s = Series([1, 2], name=u('\u05e2\u05d1\u05e8\u05d9\u05ea')) + sf = fmt.SeriesFormatter(s, name=u('\u05e2\u05d1\u05e8\u05d9\u05ea')) sf._get_footer() # should not raise exception def test_float_trim_zeros(self): @@ -1916,7 +1920,7 @@ def test_rounding(self): formatter = fmt.EngFormatter(accuracy=3, use_eng_prefix=True) result = formatter(0) - self.assertEqual(result, u' 0.000') + self.assertEqual(result, u(' 0.000')) def _three_digit_exp(): diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 577cbfe9dc744..e08f3552382c2 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -1,13 +1,18 @@ +from __future__ import print_function # pylint: disable-msg=W0612,E1101 from copy import deepcopy from datetime import datetime, timedelta, time -from StringIO import StringIO -import cPickle as pickle import operator import re import unittest import nose +from pandas.compat import( + map, zip, range, long, lrange, lmap, lzip, + OrderedDict, cPickle as pickle, u, StringIO +) +from pandas import compat + from numpy import random, nan from numpy.random import randn import numpy as np @@ -32,8 +37,6 @@ assertRaisesRegexp, makeCustomDataframe as mkdf, ensure_clean) -from pandas.util import py3compat -from pandas.util.compat import OrderedDict import pandas.util.testing as tm import pandas.lib as lib @@ -58,7 +61,7 @@ def _check_mixed_float(df, dtype = None): # float16 are most likely to be upcasted to float32 dtypes = dict(A = 'float32', B = 'float32', C = 'float16', D = 'float64') - if isinstance(dtype, basestring): + if isinstance(dtype, compat.string_types): dtypes = dict([ (k,dtype) for k, v in dtypes.items() ]) elif isinstance(dtype, dict): dtypes.update(dtype) @@ -73,7 +76,7 @@ def _check_mixed_float(df, dtype = None): def _check_mixed_int(df, dtype = None): dtypes = dict(A = 'int32', B = 'uint64', C = 'uint8', D = 'int64') - if isinstance(dtype, basestring): + if isinstance(dtype, compat.string_types): dtypes = dict([ (k,dtype) for k, v in dtypes.items() ]) elif isinstance(dtype, dict): dtypes.update(dtype) @@ -101,11 +104,11 @@ def test_getitem(self): # column access - for _, series in sl.iteritems(): + for _, series in compat.iteritems(sl): self.assertEqual(20, len(series.index)) self.assert_(tm.equalContents(series.index, sl.index)) - for key, _ in self.frame._series.iteritems(): + for key, _ in compat.iteritems(self.frame._series): self.assert_(self.frame[key] is not None) self.assert_('random' not in self.frame) @@ -172,7 +175,7 @@ def test_setitem_list(self): assert_series_equal(self.frame['B'], data['A']) assert_series_equal(self.frame['A'], data['B']) - df = DataFrame(0, range(3), ['tt1', 'tt2'], dtype=np.int_) + df = DataFrame(0, lrange(3), ['tt1', 'tt2'], dtype=np.int_) df.ix[1, ['tt1', 'tt2']] = [1, 2] result = df.ix[1, ['tt1', 'tt2']] @@ -191,7 +194,7 @@ def test_setitem_list_not_dataframe(self): assert_almost_equal(self.frame[['A', 'B']].values, data) def test_setitem_list_of_tuples(self): - tuples = zip(self.frame['A'], self.frame['B']) + tuples = lzip(self.frame['A'], self.frame['B']) self.frame['tuples'] = tuples result = self.frame['tuples'] @@ -357,7 +360,7 @@ def test_getattr(self): 'NONEXISTENT_NAME') def test_setattr_column(self): - df = DataFrame({'foobar': 1}, index=range(10)) + df = DataFrame({'foobar': 1}, index=lrange(10)) df.foobar = 5 self.assert_((df.foobar == 5).all()) @@ -561,11 +564,11 @@ def test_setitem_ambig(self): from decimal import Decimal # created as float type - dm = DataFrame(index=range(3), columns=range(3)) + dm = DataFrame(index=lrange(3), columns=lrange(3)) coercable_series = Series([Decimal(1) for _ in range(3)], - index=range(3)) - uncoercable_series = Series(['foo', 'bzr', 'baz'], index=range(3)) + index=lrange(3)) + uncoercable_series = Series(['foo', 'bzr', 'baz'], index=lrange(3)) dm[0] = np.ones(3) self.assertEqual(len(dm.columns), 3) @@ -663,7 +666,7 @@ def test_getitem_fancy_slice_integers_step(self): self.assert_(isnull(df.ix[:8:2]).values.all()) def test_getitem_setitem_integer_slice_keyerrors(self): - df = DataFrame(np.random.randn(10, 5), index=range(0, 20, 2)) + df = DataFrame(np.random.randn(10, 5), index=lrange(0, 20, 2)) # this is OK cp = df.copy() @@ -776,11 +779,12 @@ def test_setitem_fancy_2d(self): assert_frame_equal(frame, expected) # new corner case of boolean slicing / setting - frame = DataFrame(zip([2, 3, 9, 6, 7], [np.nan] * 5), + frame = DataFrame(lzip([2, 3, 9, 6, 7], [np.nan] * 5), columns=['a', 'b']) lst = [100] lst.extend([np.nan] * 4) - expected = DataFrame(zip([100, 3, 9, 6, 7], lst), columns=['a', 'b']) + expected = DataFrame(lzip([100, 3, 9, 6, 7], lst), + columns=['a', 'b']) frame[frame['a'] == 2] = 100 assert_frame_equal(frame, expected) @@ -1421,7 +1425,7 @@ def test_get_value(self): def test_iteritems(self): df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=['a', 'a', 'b']) - for k, v in df.iteritems(): + for k, v in compat.iteritems(df): self.assertEqual(type(v), Series) def test_lookup(self): @@ -1486,7 +1490,7 @@ def test_set_value_resize(self): self.assertRaises(ValueError, res3.set_value, 'foobar', 'baz', 'sam') def test_set_value_with_index_dtype_change(self): - df = DataFrame(randn(3, 3), index=range(3), columns=list('ABC')) + df = DataFrame(randn(3, 3), index=lrange(3), columns=list('ABC')) res = df.set_value('C', 2, 1.0) self.assert_(list(res.index) == list(df.index) + ['C']) self.assert_(list(res.columns) == list(df.columns) + [2]) @@ -1494,7 +1498,7 @@ def test_set_value_with_index_dtype_change(self): def test_get_set_value_no_partial_indexing(self): # partial w/ MultiIndex raise exception index = MultiIndex.from_tuples([(0, 1), (0, 2), (1, 1), (1, 2)]) - df = DataFrame(index=index, columns=range(4)) + df = DataFrame(index=index, columns=lrange(4)) self.assertRaises(KeyError, df.get_value, 0, 1) # self.assertRaises(KeyError, df.set_value, 0, 1, 0) @@ -1507,7 +1511,7 @@ def test_single_element_ix_dont_upcast(self): self.assert_(com.is_integer(result)) def test_irow(self): - df = DataFrame(np.random.randn(10, 4), index=range(0, 20, 2)) + df = DataFrame(np.random.randn(10, 4), index=lrange(0, 20, 2)) result = df.irow(1) exp = df.ix[2] @@ -1534,7 +1538,7 @@ def test_irow(self): assert_frame_equal(result, expected) def test_icol(self): - df = DataFrame(np.random.randn(4, 10), columns=range(0, 20, 2)) + df = DataFrame(np.random.randn(4, 10), columns=lrange(0, 20, 2)) result = df.icol(1) exp = df.ix[:, 2] @@ -1564,13 +1568,13 @@ def test_irow_icol_duplicates(self): result = df.irow(0) result2 = df.ix[0] - self.assert_(isinstance(result, Series)) + tm.assert_isinstance(result, Series) assert_almost_equal(result.values, df.values[0]) assert_series_equal(result, result2) result = df.T.icol(0) result2 = df.T.ix[:, 0] - self.assert_(isinstance(result, Series)) + tm.assert_isinstance(result, Series) assert_almost_equal(result.values, df.values[0]) assert_series_equal(result, result2) @@ -1621,7 +1625,7 @@ def test_nested_exception(self): try: repr(df) - except Exception, e: + except Exception as e: self.assertNotEqual(type(e), UnboundLocalError) _seriesd = tm.getSeriesData() @@ -1630,7 +1634,7 @@ def test_nested_exception(self): _frame = DataFrame(_seriesd) _frame2 = DataFrame(_seriesd, columns=['D', 'C', 'B', 'A']) _intframe = DataFrame(dict((k, v.astype(int)) - for k, v in _seriesd.iteritems())) + for k, v in compat.iteritems(_seriesd))) _tsframe = DataFrame(_tsd) @@ -1776,7 +1780,7 @@ def setUp(self): self.frame2 = _frame2.copy() # force these all to int64 to avoid platform testing issues - self.intframe = DataFrame(dict([ (c,s) for c,s in _intframe.iteritems() ]), dtype = np.int64) + self.intframe = DataFrame(dict([ (c,s) for c,s in compat.iteritems(_intframe) ]), dtype = np.int64) self.tsframe = _tsframe.copy() self.mixed_frame = _mixed_frame.copy() self.mixed_float = DataFrame({ 'A': _frame['A'].copy().astype('float32'), @@ -1972,7 +1976,7 @@ def test_set_index_cast_datetimeindex(self): 'B': np.random.randn(1000)}) idf = df.set_index('A') - self.assert_(isinstance(idf.index, DatetimeIndex)) + tm.assert_isinstance(idf.index, DatetimeIndex) def test_set_index_multiindexcolumns(self): columns = MultiIndex.from_tuples([('foo', 1), ('foo', 2), ('bar', 1)]) @@ -2066,8 +2070,8 @@ def test_constructor_list_frames(self): result = DataFrame([DataFrame([])]) self.assert_(result.shape == (1,0)) - result = DataFrame([DataFrame(dict(A = range(5)))]) - self.assert_(type(result.iloc[0,0]) == DataFrame) + result = DataFrame([DataFrame(dict(A = lrange(5)))]) + tm.assert_isinstance(result.iloc[0,0], DataFrame) def test_constructor_mixed_dtypes(self): @@ -2080,7 +2084,7 @@ def _make_mixed_dtypes_df(typ, ad = None): dtypes = MIXED_FLOAT_DTYPES arrays = [ np.array(np.random.randint(10, size=10), dtype = d) for d in dtypes ] - zipper = zip(dtypes,arrays) + zipper = lzip(dtypes,arrays) for d,a in zipper: assert(a.dtype == d) if ad is None: @@ -2141,8 +2145,8 @@ def test_constructor_overflow_int64(self): # #2355 data_scores = [(6311132704823138710, 273), (2685045978526272070, 23), - (8921811264899370420, 45), (17019687244989530680L, 270), - (9930107427299601010L, 273)] + (8921811264899370420, 45), (long(17019687244989530680), 270), + (long(9930107427299601010), 273)] dtype = [('uid', 'u8'), ('score', 'u8')] data = np.zeros((len(data_scores),), dtype=dtype) data[:] = data_scores @@ -2156,7 +2160,7 @@ def test_is_mixed_type(self): def test_constructor_ordereddict(self): import random nitems = 100 - nums = range(nitems) + nums = lrange(nitems) random.shuffle(nums) expected = ['A%d' % i for i in nums] df = DataFrame(OrderedDict(zip(expected, [[0]] * nitems))) @@ -2251,14 +2255,14 @@ def testit(): def test_constructor_subclass_dict(self): # Test for passing dict subclass to constructor - data = {'col1': tm.TestSubDict((x, 10.0 * x) for x in xrange(10)), - 'col2': tm.TestSubDict((x, 20.0 * x) for x in xrange(10))} + data = {'col1': tm.TestSubDict((x, 10.0 * x) for x in range(10)), + 'col2': tm.TestSubDict((x, 20.0 * x) for x in range(10))} df = DataFrame(data) - refdf = DataFrame(dict((col, dict(val.iteritems())) - for col, val in data.iteritems())) + refdf = DataFrame(dict((col, dict(compat.iteritems(val))) + for col, val in compat.iteritems(data))) assert_frame_equal(refdf, df) - data = tm.TestSubDict(data.iteritems()) + data = tm.TestSubDict(compat.iteritems(data)) df = DataFrame(data) assert_frame_equal(refdf, df) @@ -2266,7 +2270,7 @@ def test_constructor_subclass_dict(self): from collections import defaultdict data = {} self.frame['B'][:10] = np.nan - for k, v in self.frame.iterkv(): + for k, v in compat.iteritems(self.frame): dct = defaultdict(dict) dct.update(v.to_dict()) data[k] = dct @@ -2308,17 +2312,17 @@ def test_constructor_dict_cast(self): def test_constructor_dict_dont_upcast(self): d = {'Col1': {'Row1': 'A String', 'Row2': np.nan}} df = DataFrame(d) - self.assert_(isinstance(df['Col1']['Row2'], float)) + tm.assert_isinstance(df['Col1']['Row2'], float) dm = DataFrame([[1, 2], ['a', 'b']], index=[1, 2], columns=[1, 2]) - self.assert_(isinstance(dm[1][1], int)) + tm.assert_isinstance(dm[1][1], int) def test_constructor_dict_of_tuples(self): # GH #1491 data = {'a': (1, 2, 3), 'b': (4, 5, 6)} result = DataFrame(data) - expected = DataFrame(dict((k, list(v)) for k, v in data.iteritems())) + expected = DataFrame(dict((k, list(v)) for k, v in compat.iteritems(data))) assert_frame_equal(result, expected, check_dtype=False) def test_constructor_ndarray(self): @@ -2356,14 +2360,14 @@ def test_constructor_ndarray(self): # automatic labeling frame = DataFrame(mat) - self.assert_(np.array_equal(frame.index, range(2))) - self.assert_(np.array_equal(frame.columns, range(3))) + self.assert_(np.array_equal(frame.index, lrange(2))) + self.assert_(np.array_equal(frame.columns, lrange(3))) frame = DataFrame(mat, index=[1, 2]) - self.assert_(np.array_equal(frame.columns, range(3))) + self.assert_(np.array_equal(frame.columns, lrange(3))) frame = DataFrame(mat, columns=['A', 'B', 'C']) - self.assert_(np.array_equal(frame.index, range(2))) + self.assert_(np.array_equal(frame.index, lrange(2))) # 0-length axis frame = DataFrame(np.empty((0, 3))) @@ -2414,14 +2418,14 @@ def test_constructor_maskedarray(self): # automatic labeling frame = DataFrame(mat) - self.assert_(np.array_equal(frame.index, range(2))) - self.assert_(np.array_equal(frame.columns, range(3))) + self.assert_(np.array_equal(frame.index, lrange(2))) + self.assert_(np.array_equal(frame.columns, lrange(3))) frame = DataFrame(mat, index=[1, 2]) - self.assert_(np.array_equal(frame.columns, range(3))) + self.assert_(np.array_equal(frame.columns, lrange(3))) frame = DataFrame(mat, columns=['A', 'B', 'C']) - self.assert_(np.array_equal(frame.index, range(2))) + self.assert_(np.array_equal(frame.index, lrange(2))) # 0-length axis frame = DataFrame(ma.masked_all((0, 3))) @@ -2502,11 +2506,11 @@ def test_constructor_corner(self): self.assertEqual(df.values.shape, (0, 0)) # empty but with specified dtype - df = DataFrame(index=range(10), columns=['a', 'b'], dtype=object) + df = DataFrame(index=lrange(10), columns=['a', 'b'], dtype=object) self.assert_(df.values.dtype == np.object_) # does not error but ends up float - df = DataFrame(index=range(10), columns=['a', 'b'], dtype=int) + df = DataFrame(index=lrange(10), columns=['a', 'b'], dtype=int) self.assert_(df.values.dtype == np.object_) # #1783 empty dtype object @@ -2680,7 +2684,7 @@ def test_constructor_ragged(self): self.assertRaises(Exception, DataFrame, data) def test_constructor_scalar(self): - idx = Index(range(3)) + idx = Index(lrange(3)) df = DataFrame({"a": 0}, index=idx) expected = DataFrame({"a": [0, 0, 0]}, index=idx) assert_frame_equal(df, expected, check_dtype=False) @@ -2723,7 +2727,7 @@ def test_constructor_orient(self): a = {'hi': [32, 3, 3], 'there': [3, 5, 3]} rs = DataFrame.from_dict(a, orient='index') - xp = DataFrame.from_dict(a).T.reindex(a.keys()) + xp = DataFrame.from_dict(a).T.reindex(list(a.keys())) assert_frame_equal(rs, xp) def test_constructor_Series_named(self): @@ -2799,7 +2803,7 @@ def test_constructor_from_items(self): columns=self.mixed_frame.columns, orient='index') assert_frame_equal(recons, self.mixed_frame) - self.assert_(isinstance(recons['foo'][0], tuple)) + tm.assert_isinstance(recons['foo'][0], tuple) rs = DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])], orient='index', columns=['one', 'two', 'three']) @@ -2849,7 +2853,7 @@ def check(result, expected=None): # assignment # GH 3687 arr = np.random.randn(3, 2) - idx = range(2) + idx = lrange(2) df = DataFrame(arr, columns=['A', 'A']) df.columns = idx expected = DataFrame(arr,columns=idx) @@ -2950,11 +2954,11 @@ def test_insert_benchmark(self): # from the vb_suite/frame_methods/frame_insert_columns N = 10 K = 5 - df = DataFrame(index=range(N)) + df = DataFrame(index=lrange(N)) new_col = np.random.randn(N) for i in range(K): df[i] = new_col - expected = DataFrame(np.repeat(new_col,K).reshape(N,K),index=range(N)) + expected = DataFrame(np.repeat(new_col,K).reshape(N,K),index=lrange(N)) assert_frame_equal(df,expected) def test_constructor_single_value(self): @@ -3090,12 +3094,12 @@ def test_constructor_for_list_with_dtypes(self): expected = Series({'float64' : 1}) assert_series_equal(result, expected) - df = DataFrame({'a' : 1 }, index=range(3)) + df = DataFrame({'a' : 1 }, index=lrange(3)) result = df.get_dtype_counts() expected = Series({'int64': 1}) assert_series_equal(result, expected) - df = DataFrame({'a' : 1. }, index=range(3)) + df = DataFrame({'a' : 1. }, index=lrange(3)) result = df.get_dtype_counts() expected = Series({'float64': 1 }) assert_series_equal(result, expected) @@ -3200,7 +3204,7 @@ def test_operators_timedelta64(self): def test__slice_consolidate_invalidate_item_cache(self): # #3970 - df = DataFrame({ "aa":range(5), "bb":[2.2]*5}) + df = DataFrame({ "aa":lrange(5), "bb":[2.2]*5}) # Creates a second float block df["cc"] = 0.0 @@ -3244,7 +3248,7 @@ def test_astype(self): # mixed casting def _check_cast(df, v): - self.assert_(list(set([ s.dtype.name for _, s in df.iteritems() ]))[0] == v) + self.assert_(list(set([ s.dtype.name for _, s in compat.iteritems(df) ]))[0] == v) mn = self.all_mixed._get_numeric_data().copy() mn['little_float'] = np.array(12345.,dtype='float16') @@ -3323,7 +3327,7 @@ def test_astype_cast_nan_int(self): def test_array_interface(self): result = np.sqrt(self.frame) - self.assert_(type(result) is type(self.frame)) + tm.assert_isinstance(result, type(self.frame)) self.assert_(result.index is self.frame.index) self.assert_(result.columns is self.frame.columns) @@ -3347,20 +3351,20 @@ def test_to_dict(self): } recons_data = DataFrame(test_data).to_dict() - for k, v in test_data.iteritems(): - for k2, v2 in v.iteritems(): + for k, v in compat.iteritems(test_data): + for k2, v2 in compat.iteritems(v): self.assertEqual(v2, recons_data[k][k2]) recons_data = DataFrame(test_data).to_dict("l") - for k, v in test_data.iteritems(): - for k2, v2 in v.iteritems(): + for k, v in compat.iteritems(test_data): + for k2, v2 in compat.iteritems(v): self.assertEqual(v2, recons_data[k][int(k2) - 1]) recons_data = DataFrame(test_data).to_dict("s") - for k, v in test_data.iteritems(): - for k2, v2 in v.iteritems(): + for k, v in compat.iteritems(test_data): + for k2, v2 in compat.iteritems(v): self.assertEqual(v2, recons_data[k][k2]) def test_to_records_dt64(self): @@ -3573,7 +3577,7 @@ def test_join_str_datetime(self): str_dates = ['20120209', '20120222'] dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] - A = DataFrame(str_dates, index=range(2), columns=['aa']) + A = DataFrame(str_dates, index=lrange(2), columns=['aa']) C = DataFrame([[1, 2], [3, 4]], index=str_dates, columns=dt_dates) tst = A.join(C, on='aa') @@ -3595,12 +3599,12 @@ def test_from_records_sequencelike(self): tuples = [] columns = [] dtypes = [] - for dtype, b in blocks.iteritems(): + for dtype, b in compat.iteritems(blocks): columns.extend(b.columns) dtypes.extend([ (c,np.dtype(dtype).descr[0][1]) for c in b.columns ]) - for i in xrange(len(df.index)): + for i in range(len(df.index)): tup = [] - for _, b in blocks.iteritems(): + for _, b in compat.iteritems(blocks): tup.extend(b.irow(i).values) tuples.append(tuple(tup)) @@ -3625,12 +3629,12 @@ def test_from_records_sequencelike(self): # tuples is in the order of the columns result = DataFrame.from_records(tuples) - self.assert_(np.array_equal(result.columns, range(8))) + self.assert_(np.array_equal(result.columns, lrange(8))) # test exclude parameter & we are casting the results here (as we don't have dtype info to recover) columns_to_test = [ columns.index('C'), columns.index('E1') ] - exclude = list(set(xrange(8))-set(columns_to_test)) + exclude = list(set(range(8))-set(columns_to_test)) result = DataFrame.from_records(tuples, exclude=exclude) result.columns = [ columns[i] for i in sorted(columns_to_test) ] assert_series_equal(result['C'], df['C']) @@ -3659,11 +3663,11 @@ def test_from_records_dictlike(self): # columns is in a different order here than the actual items iterated from the dict columns = [] - for dtype, b in df.blocks.iteritems(): + for dtype, b in compat.iteritems(df.blocks): columns.extend(b.columns) - asdict = dict((x, y) for x, y in df.iteritems()) - asdict2 = dict((x, y.values) for x, y in df.iteritems()) + asdict = dict((x, y) for x, y in compat.iteritems(df)) + asdict2 = dict((x, y.values) for x, y in compat.iteritems(df)) # dict of series & dict of ndarrays (have dtype info) results = [] @@ -3708,7 +3712,7 @@ def __iter__(self): return iter(self.args) recs = [Record(1, 2, 3), Record(4, 5, 6), Record(7, 8, 9)] - tups = map(tuple, recs) + tups = lmap(tuple, recs) result = DataFrame.from_records(recs) expected = DataFrame.from_records(tups) @@ -3767,7 +3771,7 @@ def test_repr_mixed_big(self): # big mixed biggie = DataFrame({'A': randn(200), 'B': tm.makeStringIndex(200)}, - index=range(200)) + index=lrange(200)) biggie['A'][:20] = nan biggie['B'][:20] = nan @@ -3803,8 +3807,8 @@ def test_repr_big(self): buf = StringIO() # big one - biggie = DataFrame(np.zeros((200, 4)), columns=range(4), - index=range(200)) + biggie = DataFrame(np.zeros((200, 4)), columns=lrange(4), + index=lrange(200)) foo = repr(biggie) def test_repr_unsortable(self): @@ -3837,7 +3841,7 @@ def test_repr_unsortable(self): warnings.filters = warn_filters def test_repr_unicode(self): - uval = u'\u03c3\u03c3\u03c3\u03c3' + uval = u('\u03c3\u03c3\u03c3\u03c3') bval = uval.encode('utf-8') df = DataFrame({'A': [uval, uval]}) @@ -3850,23 +3854,23 @@ def test_repr_unicode(self): self.assertEqual(result.split('\n')[0].rstrip(), ex_top) def test_unicode_string_with_unicode(self): - df = DataFrame({'A': [u"\u05d0"]}) + df = DataFrame({'A': [u("\u05d0")]}) - if py3compat.PY3: + if compat.PY3: str(df) else: - unicode(df) + compat.text_type(df) def test_bytestring_with_unicode(self): - df = DataFrame({'A': [u"\u05d0"]}) - if py3compat.PY3: + df = DataFrame({'A': [u("\u05d0")]}) + if compat.PY3: bytes(df) else: str(df) def test_very_wide_info_repr(self): df = DataFrame(np.random.randn(10, 20), - columns=[tm.rands(10) for _ in xrange(20)]) + columns=[tm.rands(10) for _ in range(20)]) repr(df) def test_repr_column_name_unicode_truncation_bug(self): @@ -3971,10 +3975,10 @@ def test_itertuples(self): assert_series_equal(s, expected) df = DataFrame({'floats': np.random.randn(5), - 'ints': range(5)}, columns=['floats', 'ints']) + 'ints': lrange(5)}, columns=['floats', 'ints']) for tup in df.itertuples(index=False): - self.assert_(isinstance(tup[1], np.integer)) + tm.assert_isinstance(tup[1], np.integer) df = DataFrame(data={"a": [1, 2, 3], "b": [4, 5, 6]}) dfaa = df[['a', 'a']] @@ -3990,16 +3994,16 @@ def test_operators(self): idSum = self.frame + self.frame seriesSum = self.frame + colSeries - for col, series in idSum.iteritems(): - for idx, val in series.iteritems(): + for col, series in compat.iteritems(idSum): + for idx, val in compat.iteritems(series): origVal = self.frame[col][idx] * 2 if not np.isnan(val): self.assertEqual(val, origVal) else: self.assert_(np.isnan(origVal)) - for col, series in seriesSum.iteritems(): - for idx, val in series.iteritems(): + for col, series in compat.iteritems(seriesSum): + for idx, val in compat.iteritems(series): origVal = self.frame[col][idx] + colSeries[col] if not np.isnan(val): self.assertEqual(val, origVal) @@ -4138,7 +4142,7 @@ def _check_unary_op(op): _check_unary_op(operator.neg) def test_logical_typeerror(self): - if py3compat.PY3: + if compat.PY3: pass else: self.assertRaises(TypeError, self.frame.__eq__, 'foo') @@ -4518,7 +4522,7 @@ def test_combineSeries(self): added = self.frame + series - for key, s in added.iteritems(): + for key, s in compat.iteritems(added): assert_series_equal(s, self.frame[key] + series[key]) larger_series = series.to_dict() @@ -4526,7 +4530,7 @@ def test_combineSeries(self): larger_series = Series(larger_series) larger_added = self.frame + larger_series - for key, s in self.frame.iteritems(): + for key, s in compat.iteritems(self.frame): assert_series_equal(larger_added[key], s + series[key]) self.assert_('E' in larger_added) self.assert_(np.isnan(larger_added['E']).all()) @@ -4557,7 +4561,7 @@ def test_combineSeries(self): ts = self.tsframe['A'] added = self.tsframe + ts - for key, col in self.tsframe.iteritems(): + for key, col in compat.iteritems(self.tsframe): assert_series_equal(added[key], col + ts) smaller_frame = self.tsframe[:-5] @@ -4589,7 +4593,7 @@ def test_combineFunc(self): # vs mix result = self.mixed_float * 2 - for c, s in result.iteritems(): + for c, s in compat.iteritems(result): self.assert_(np.array_equal(s.values, self.mixed_float[c].values * 2)) _check_mixed_float(result, dtype = dict(C = None)) @@ -4636,7 +4640,7 @@ def test_string_comparison(self): assert_frame_equal(df[-mask_b], df.ix[1:1, :]) def test_float_none_comparison(self): - df = DataFrame(np.random.randn(8, 3), index=range(8), + df = DataFrame(np.random.randn(8, 3), index=lrange(8), columns=['A', 'B', 'C']) self.assertRaises(TypeError, df.__eq__, None) @@ -4679,8 +4683,8 @@ def test_to_csv_from_csv(self): assert_almost_equal(self.tsframe.values, recons.values) # corner case - dm = DataFrame({'s1': Series(range(3), range(3)), - 's2': Series(range(2), range(2))}) + dm = DataFrame({'s1': Series(lrange(3), lrange(3)), + 's2': Series(lrange(2), lrange(2))}) dm.to_csv(path) recons = DataFrame.from_csv(path) assert_frame_equal(dm, recons) @@ -4723,8 +4727,8 @@ def test_to_csv_from_csv(self): df2.to_csv(path,mode='a',header=False) xp = pd.concat([df1,df2]) rs = pd.read_csv(path,index_col=0) - rs.columns = map(int,rs.columns) - xp.columns = map(int,xp.columns) + rs.columns = lmap(int,rs.columns) + xp.columns = lmap(int,xp.columns) assert_frame_equal(xp,rs) def test_to_csv_cols_reordering(self): @@ -4807,17 +4811,17 @@ def _do_test(df,path,r_dtype=None,c_dtype=None,rnlvl=None,cnlvl=None, dupe_col=False): if cnlvl: - header = range(cnlvl) + header = lrange(cnlvl) with ensure_clean(path) as path: df.to_csv(path,encoding='utf8',chunksize=chunksize,tupleize_cols=False) - recons = DataFrame.from_csv(path,header=range(cnlvl),tupleize_cols=False,parse_dates=False) + recons = DataFrame.from_csv(path,header=lrange(cnlvl),tupleize_cols=False,parse_dates=False) else: with ensure_clean(path) as path: df.to_csv(path,encoding='utf8',chunksize=chunksize) recons = DataFrame.from_csv(path,header=0,parse_dates=False) def _to_uni(x): - if not isinstance(x,unicode): + if not isinstance(x, compat.text_type): return x.decode('utf8') return x if dupe_col: @@ -4834,19 +4838,22 @@ def _to_uni(x): if r_dtype: if r_dtype == 'u': # unicode r_dtype='O' - recons.index = np.array(map(_to_uni,recons.index), - dtype=r_dtype ) - df.index = np.array(map(_to_uni,df.index),dtype=r_dtype ) + recons.index = np.array(lmap(_to_uni,recons.index), + dtype=r_dtype) + df.index = np.array(lmap(_to_uni,df.index),dtype=r_dtype) if r_dtype == 'dt': # unicode r_dtype='O' - recons.index = np.array(map(Timestamp,recons.index), - dtype=r_dtype ) - df.index = np.array(map(Timestamp,df.index),dtype=r_dtype ) + recons.index = np.array(lmap(Timestamp,recons.index), + dtype=r_dtype) + df.index = np.array(lmap(Timestamp,df.index),dtype=r_dtype) elif r_dtype == 'p': r_dtype='O' - recons.index = np.array(map(Timestamp,recons.index.to_datetime()), - dtype=r_dtype ) - df.index = np.array(map(Timestamp,df.index.to_datetime()),dtype=r_dtype ) + recons.index = np.array(list(map(Timestamp, + recons.index.to_datetime())), + dtype=r_dtype) + df.index = np.array(list(map(Timestamp, + df.index.to_datetime())), + dtype=r_dtype) else: r_dtype= type_map.get(r_dtype) recons.index = np.array(recons.index,dtype=r_dtype ) @@ -4854,19 +4861,19 @@ def _to_uni(x): if c_dtype: if c_dtype == 'u': c_dtype='O' - recons.columns = np.array(map(_to_uni,recons.columns), - dtype=c_dtype ) - df.columns = np.array(map(_to_uni,df.columns),dtype=c_dtype ) + recons.columns = np.array(lmap(_to_uni,recons.columns), + dtype=c_dtype) + df.columns = np.array(lmap(_to_uni,df.columns),dtype=c_dtype ) elif c_dtype == 'dt': c_dtype='O' - recons.columns = np.array(map(Timestamp,recons.columns), + recons.columns = np.array(lmap(Timestamp,recons.columns), dtype=c_dtype ) - df.columns = np.array(map(Timestamp,df.columns),dtype=c_dtype ) + df.columns = np.array(lmap(Timestamp,df.columns),dtype=c_dtype) elif c_dtype == 'p': c_dtype='O' - recons.columns = np.array(map(Timestamp,recons.columns.to_datetime()), - dtype=c_dtype ) - df.columns = np.array(map(Timestamp,df.columns.to_datetime()),dtype=c_dtype ) + recons.columns = np.array(lmap(Timestamp,recons.columns.to_datetime()), + dtype=c_dtype) + df.columns = np.array(lmap(Timestamp,df.columns.to_datetime()),dtype=c_dtype ) else: c_dtype= type_map.get(c_dtype) recons.columns = np.array(recons.columns,dtype=c_dtype ) @@ -4947,7 +4954,7 @@ def make_dtnat_arr(n,nnat=None): _do_test(df,path,dupe_col=True) - _do_test(DataFrame(index=range(10)),path) + _do_test(DataFrame(index=lrange(10)),path) _do_test(mkdf(chunksize//2+1, 2,r_idx_nlevels=2),path,rnlvl=2) for ncols in [2,3,4]: base = int(chunksize//ncols) @@ -5123,15 +5130,15 @@ def _make_frame(names=None): # catch invalid headers def testit(): - read_csv(path,tupleize_cols=False,header=range(3),index_col=0) + read_csv(path,tupleize_cols=False,header=lrange(3),index_col=0) assertRaisesRegexp(CParserError, 'Passed header=\[0,1,2\] are too many rows for this multi_index of columns', testit) def testit(): - read_csv(path,tupleize_cols=False,header=range(7),index_col=0) + read_csv(path,tupleize_cols=False,header=lrange(7),index_col=0) assertRaisesRegexp(CParserError, 'Passed header=\[0,1,2,3,4,5,6\], len of 7, but only 6 lines in file', testit) for i in [3,4,5,6,7]: - self.assertRaises(Exception, read_csv, path, tupleize_cols=False, header=range(i), index_col=0) + self.assertRaises(Exception, read_csv, path, tupleize_cols=False, header=lrange(i), index_col=0) self.assertRaises(Exception, read_csv, path, tupleize_cols=False, header=[0,2], index_col=0) # write with cols @@ -5171,7 +5178,7 @@ def test_to_csv_withcommas(self): def test_to_csv_mixed(self): def create_cols(name): - return [ "%s%03d" % (name,i) for i in xrange(5) ] + return [ "%s%03d" % (name,i) for i in range(5) ] df_float = DataFrame(np.random.randn(100, 5),dtype='float64',columns=create_cols('float')) df_int = DataFrame(np.random.randn(100, 5),dtype='int64',columns=create_cols('int')) @@ -5200,7 +5207,7 @@ def create_cols(name): def test_to_csv_dups_cols(self): - df = DataFrame(np.random.randn(1000, 30),columns=range(15)+range(15),dtype='float64') + df = DataFrame(np.random.randn(1000, 30),columns=lrange(15)+lrange(15),dtype='float64') with ensure_clean() as filename: df.to_csv(filename) # single dtype, fine @@ -5210,9 +5217,9 @@ def test_to_csv_dups_cols(self): df_float = DataFrame(np.random.randn(1000, 3),dtype='float64') df_int = DataFrame(np.random.randn(1000, 3),dtype='int64') - df_bool = DataFrame(True,index=df_float.index,columns=range(3)) - df_object = DataFrame('foo',index=df_float.index,columns=range(3)) - df_dt = DataFrame(Timestamp('20010101'),index=df_float.index,columns=range(3)) + df_bool = DataFrame(True,index=df_float.index,columns=lrange(3)) + df_object = DataFrame('foo',index=df_float.index,columns=lrange(3)) + df_dt = DataFrame(Timestamp('20010101'),index=df_float.index,columns=lrange(3)) df = pan.concat([ df_float, df_int, df_bool, df_object, df_dt ], axis=1, ignore_index=True) cols = [] @@ -5249,7 +5256,7 @@ def test_to_csv_dups_cols(self): def test_to_csv_chunking(self): - aa=DataFrame({'A':range(100000)}) + aa=DataFrame({'A':lrange(100000)}) aa['B'] = aa.A + 1.0 aa['C'] = aa.A + 2.0 aa['D'] = aa.A + 3.0 @@ -5273,7 +5280,7 @@ def test_to_csv_bug(self): def test_to_csv_unicode(self): - df = DataFrame({u'c/\u03c3': [1, 2, 3]}) + df = DataFrame({u('c/\u03c3'): [1, 2, 3]}) with ensure_clean() as path: df.to_csv(path, encoding='UTF-8') @@ -5287,10 +5294,10 @@ def test_to_csv_unicode(self): def test_to_csv_unicode_index_col(self): buf = StringIO('') df = DataFrame( - [[u"\u05d0", "d2", "d3", "d4"], ["a1", "a2", "a3", "a4"]], - columns=[u"\u05d0", - u"\u05d1", u"\u05d2", u"\u05d3"], - index=[u"\u05d0", u"\u05d1"]) + [[u("\u05d0"), "d2", "d3", "d4"], ["a1", "a2", "a3", "a4"]], + columns=[u("\u05d0"), + u("\u05d1"), u("\u05d2"), u("\u05d3")], + index=[u("\u05d0"), u("\u05d1")]) df.to_csv(buf, encoding='UTF-8') buf.seek(0) @@ -5439,7 +5446,7 @@ def test_dtypes(self): self.mixed_frame['bool'] = self.mixed_frame['A'] > 0 result = self.mixed_frame.dtypes expected = Series(dict((k, v.dtype) - for k, v in self.mixed_frame.iteritems()), + for k, v in compat.iteritems(self.mixed_frame)), index=result.index) assert_series_equal(result, expected) @@ -5586,13 +5593,13 @@ def test_asfreq(self): def test_asfreq_datetimeindex(self): df = DataFrame({'A': [1, 2, 3]}, - index=[datetime(2011, 11, 01), datetime(2011, 11, 2), + index=[datetime(2011, 11, 1), datetime(2011, 11, 2), datetime(2011, 11, 3)]) df = df.asfreq('B') - self.assert_(isinstance(df.index, DatetimeIndex)) + tm.assert_isinstance(df.index, DatetimeIndex) ts = df['A'].asfreq('B') - self.assert_(isinstance(ts.index, DatetimeIndex)) + tm.assert_isinstance(ts.index, DatetimeIndex) def test_at_time_between_time_datetimeindex(self): index = pan.date_range("2012-01-01", "2012-01-05", freq='30min') @@ -5690,7 +5697,7 @@ def test_deepcopy(self): cp = deepcopy(self.frame) series = cp['A'] series[:] = 10 - for idx, value in series.iteritems(): + for idx, value in compat.iteritems(series): self.assertNotEqual(self.frame['A'][idx], value) def test_copy(self): @@ -5929,7 +5936,7 @@ def test_dropna(self): assert_frame_equal(dropped, expected) dropped = df.dropna(axis=0) - expected = df.ix[range(2, 6)] + expected = df.ix[lrange(2, 6)] assert_frame_equal(dropped, expected) # threshold @@ -5938,7 +5945,7 @@ def test_dropna(self): assert_frame_equal(dropped, expected) dropped = df.dropna(axis=0, thresh=4) - expected = df.ix[range(2, 6)] + expected = df.ix[lrange(2, 6)] assert_frame_equal(dropped, expected) dropped = df.dropna(axis=1, thresh=4) @@ -5984,7 +5991,7 @@ def test_drop_duplicates(self): 'B': ['one', 'one', 'two', 'two', 'two', 'two', 'one', 'two'], 'C': [1, 1, 2, 2, 2, 2, 1, 2], - 'D': range(8)}) + 'D': lrange(8)}) # single column result = df.drop_duplicates('AAA') @@ -6024,7 +6031,7 @@ def test_drop_duplicates_tuple(self): 'B': ['one', 'one', 'two', 'two', 'two', 'two', 'one', 'two'], 'C': [1, 1, 2, 2, 2, 2, 1, 2], - 'D': range(8)}) + 'D': lrange(8)}) # single column result = df.drop_duplicates(('AA', 'AB')) @@ -6047,7 +6054,7 @@ def test_drop_duplicates_NA(self): 'B': ['one', 'one', 'two', 'two', 'two', 'two', 'one', 'two'], 'C': [1.0, np.nan, np.nan, np.nan, 1., 1., 1, 1.], - 'D': range(8)}) + 'D': lrange(8)}) # single column result = df.drop_duplicates('A') @@ -6073,7 +6080,7 @@ def test_drop_duplicates_NA(self): 'B': ['one', 'one', 'two', 'two', 'two', 'two', 'one', 'two'], 'C': [1.0, np.nan, np.nan, np.nan, 1., 1., 1, 1.], - 'D': range(8)}) + 'D': lrange(8)}) # single column result = df.drop_duplicates('C') @@ -6099,7 +6106,7 @@ def test_drop_duplicates_inplace(self): 'B': ['one', 'one', 'two', 'two', 'two', 'two', 'one', 'two'], 'C': [1, 1, 2, 2, 2, 2, 1, 2], - 'D': range(8)}) + 'D': lrange(8)}) # single column df = orig.copy() @@ -6148,8 +6155,7 @@ def test_drop_col_still_multiindex(self): ['', '', '', 'OD'], ['', '', '', 'wx']] - tuples = zip(*arrays) - tuples.sort() + tuples = sorted(zip(*arrays)) index = MultiIndex.from_tuples(tuples) df = DataFrame(randn(3, 4), columns=index) @@ -6271,7 +6277,7 @@ def test_fillna_columns(self): def test_fillna_invalid_method(self): try: self.frame.fillna(method='ffil') - except ValueError, inst: + except ValueError as inst: self.assert_('ffil' in str(inst)) def test_fillna_invalid_value(self): @@ -6305,7 +6311,7 @@ def test_replace_inplace(self): def test_regex_replace_scalar(self): obj = {'a': list('ab..'), 'b': list('efgh')} dfobj = DataFrame(obj) - mix = {'a': range(4), 'b': list('ab..')} + mix = {'a': lrange(4), 'b': list('ab..')} dfmix = DataFrame(mix) ### simplest cases @@ -6371,7 +6377,7 @@ def test_regex_replace_scalar(self): def test_regex_replace_scalar_inplace(self): obj = {'a': list('ab..'), 'b': list('efgh')} dfobj = DataFrame(obj) - mix = {'a': range(4), 'b': list('ab..')} + mix = {'a': lrange(4), 'b': list('ab..')} dfmix = DataFrame(mix) ### simplest cases @@ -6579,14 +6585,14 @@ def test_regex_replace_list_obj_inplace(self): def test_regex_replace_list_mixed(self): ## mixed frame to make sure this doesn't break things - mix = {'a': range(4), 'b': list('ab..')} + mix = {'a': lrange(4), 'b': list('ab..')} dfmix = DataFrame(mix) ## lists of regexes and values # list of [re1, re2, ..., reN] -> [v1, v2, ..., vN] to_replace_res = [r'\s*\.\s*', r'a'] values = [nan, 'crap'] - mix2 = {'a': range(4), 'b': list('ab..'), 'c': list('halo')} + mix2 = {'a': lrange(4), 'b': list('ab..'), 'c': list('halo')} dfmix2 = DataFrame(mix2) res = dfmix2.replace(to_replace_res, values, regex=True) expec = DataFrame({'a': mix2['a'], 'b': ['crap', 'b', nan, nan], @@ -6617,7 +6623,7 @@ def test_regex_replace_list_mixed(self): assert_frame_equal(res, expec) def test_regex_replace_list_mixed_inplace(self): - mix = {'a': range(4), 'b': list('ab..')} + mix = {'a': lrange(4), 'b': list('ab..')} dfmix = DataFrame(mix) # the same inplace ## lists of regexes and values @@ -6656,7 +6662,7 @@ def test_regex_replace_list_mixed_inplace(self): assert_frame_equal(res, expec) def test_regex_replace_dict_mixed(self): - mix = {'a': range(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} + mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} dfmix = DataFrame(mix) ## dicts @@ -6713,7 +6719,7 @@ def test_regex_replace_dict_mixed(self): def test_regex_replace_dict_nested(self): # nested dicts will not work until this is implemented for Series - mix = {'a': range(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} + mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} dfmix = DataFrame(mix) res = dfmix.replace({'b': {r'\s*\.\s*': nan}}, regex=True) res2 = dfmix.copy() @@ -6734,7 +6740,7 @@ def test_regex_replace_dict_nested_gh4115(self): assert_frame_equal(df.replace({'Type': {'Q':0,'T':1}}), expected) def test_regex_replace_list_to_scalar(self): - mix = {'a': range(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} + mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} df = DataFrame(mix) res = df.replace([r'\s*\.\s*', 'a|b'], nan, regex=True) res2 = df.copy() @@ -6749,7 +6755,7 @@ def test_regex_replace_list_to_scalar(self): def test_regex_replace_str_to_numeric(self): # what happens when you try to replace a numeric value with a regex? - mix = {'a': range(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} + mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} df = DataFrame(mix) res = df.replace(r'\s*\.\s*', 0, regex=True) res2 = df.copy() @@ -6763,7 +6769,7 @@ def test_regex_replace_str_to_numeric(self): assert_frame_equal(res3, expec) def test_regex_replace_regex_list_to_numeric(self): - mix = {'a': range(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} + mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} df = DataFrame(mix) res = df.replace([r'\s*\.\s*', 'b'], 0, regex=True) res2 = df.copy() @@ -6778,7 +6784,7 @@ def test_regex_replace_regex_list_to_numeric(self): assert_frame_equal(res3, expec) def test_regex_replace_series_of_regexes(self): - mix = {'a': range(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} + mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} df = DataFrame(mix) s1 = Series({'b': r'\s*\.\s*'}) s2 = Series({'b': nan}) @@ -6794,7 +6800,7 @@ def test_regex_replace_series_of_regexes(self): assert_frame_equal(res3, expec) def test_regex_replace_numeric_to_object_conversion(self): - mix = {'a': range(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} + mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} df = DataFrame(mix) res = df.replace(0, 'a') expec = DataFrame({'a': ['a', 1, 2, 3], 'b': mix['b'], 'c': mix['c']}) @@ -7008,7 +7014,7 @@ def test_replace_input_formats(self): 'C': ['', 'asdf', 'fd']}) filled = df.replace(to_rep, values) expected = {} - for k, v in df.iteritems(): + for k, v in compat.iteritems(df): expected[k] = v.replace(to_rep[k], values[k]) assert_frame_equal(filled, DataFrame(expected)) @@ -7020,7 +7026,7 @@ def test_replace_input_formats(self): # dict to scalar filled = df.replace(to_rep, 0) expected = {} - for k, v in df.iteritems(): + for k, v in compat.iteritems(df): expected[k] = v.replace(to_rep[k], 0) assert_frame_equal(filled, DataFrame(expected)) @@ -7032,7 +7038,7 @@ def test_replace_input_formats(self): 'C': ['', 'asdf', 'fd']}) filled = df.replace(np.nan, values) expected = {} - for k, v in df.iteritems(): + for k, v in compat.iteritems(df): expected[k] = v.replace(np.nan, values[k]) assert_frame_equal(filled, DataFrame(expected)) @@ -7118,7 +7124,7 @@ def test_truncate_copy(self): def test_xs(self): idx = self.frame.index[5] xs = self.frame.xs(idx) - for item, value in xs.iteritems(): + for item, value in compat.iteritems(xs): if np.isnan(value): self.assert_(np.isnan(self.frame[item][idx])) else: @@ -7234,7 +7240,7 @@ def test_reindex(self): newFrame = self.frame.reindex(self.ts1.index) for col in newFrame.columns: - for idx, val in newFrame[col].iteritems(): + for idx, val in compat.iteritems(newFrame[col]): if idx in self.frame.index: if np.isnan(val): self.assert_(np.isnan(self.frame[col][idx])) @@ -7243,7 +7249,7 @@ def test_reindex(self): else: self.assert_(np.isnan(val)) - for col, series in newFrame.iteritems(): + for col, series in compat.iteritems(newFrame): self.assert_(tm.equalContents(series.index, newFrame.index)) emptyFrame = self.frame.reindex(Index([])) self.assert_(len(emptyFrame.index) == 0) @@ -7252,7 +7258,7 @@ def test_reindex(self): nonContigFrame = self.frame.reindex(self.ts1.index[::2]) for col in nonContigFrame.columns: - for idx, val in nonContigFrame[col].iteritems(): + for idx, val in compat.iteritems(nonContigFrame[col]): if idx in self.frame.index: if np.isnan(val): self.assert_(np.isnan(self.frame[col][idx])) @@ -7261,7 +7267,7 @@ def test_reindex(self): else: self.assert_(np.isnan(val)) - for col, series in nonContigFrame.iteritems(): + for col, series in compat.iteritems(nonContigFrame): self.assert_(tm.equalContents(series.index, nonContigFrame.index)) @@ -7335,42 +7341,42 @@ def test_reindex_fill_value(self): df = DataFrame(np.random.randn(10, 4)) # axis=0 - result = df.reindex(range(15)) + result = df.reindex(lrange(15)) self.assert_(np.isnan(result.values[-5:]).all()) - result = df.reindex(range(15), fill_value=0) - expected = df.reindex(range(15)).fillna(0) + result = df.reindex(lrange(15), fill_value=0) + expected = df.reindex(lrange(15)).fillna(0) assert_frame_equal(result, expected) # axis=1 - result = df.reindex(columns=range(5), fill_value=0.) + result = df.reindex(columns=lrange(5), fill_value=0.) expected = df.copy() expected[4] = 0. assert_frame_equal(result, expected) - result = df.reindex(columns=range(5), fill_value=0) + result = df.reindex(columns=lrange(5), fill_value=0) expected = df.copy() expected[4] = 0 assert_frame_equal(result, expected) - result = df.reindex(columns=range(5), fill_value='foo') + result = df.reindex(columns=lrange(5), fill_value='foo') expected = df.copy() expected[4] = 'foo' assert_frame_equal(result, expected) # reindex_axis - result = df.reindex_axis(range(15), fill_value=0., axis=0) - expected = df.reindex(range(15)).fillna(0) + result = df.reindex_axis(lrange(15), fill_value=0., axis=0) + expected = df.reindex(lrange(15)).fillna(0) assert_frame_equal(result, expected) - result = df.reindex_axis(range(5), fill_value=0., axis=1) - expected = df.reindex(columns=range(5)).fillna(0) + result = df.reindex_axis(lrange(5), fill_value=0., axis=1) + expected = df.reindex(columns=lrange(5)).fillna(0) assert_frame_equal(result, expected) # other dtypes df['foo'] = 'foo' - result = df.reindex(range(15), fill_value=0) - expected = df.reindex(range(15)).fillna(0) + result = df.reindex(lrange(15), fill_value=0) + expected = df.reindex(lrange(15)).fillna(0) assert_frame_equal(result, expected) def test_align(self): @@ -7542,13 +7548,13 @@ def _safe_add(df): # only add to the numeric items def is_ok(s): return issubclass(s.dtype.type, (np.integer,np.floating)) and s.dtype != 'uint8' - return DataFrame(dict([ (c,s+1) if is_ok(s) else (c,s) for c, s in df.iteritems() ])) + return DataFrame(dict([ (c,s+1) if is_ok(s) else (c,s) for c, s in compat.iteritems(df) ])) def _check_get(df, cond, check_dtypes = True): other1 = _safe_add(df) rs = df.where(cond, other1) rs2 = df.where(cond.values, other1) - for k, v in rs.iteritems(): + for k, v in compat.iteritems(rs): assert_series_equal(v, np.where(cond[k], df[k], other1[k])) assert_frame_equal(rs, rs2) @@ -7642,7 +7648,7 @@ def _check_set(df, cond, check_dtypes = True): # dtypes (and confirm upcasts)x if check_dtypes: - for k, v in df.dtypes.iteritems(): + for k, v in compat.iteritems(df.dtypes): if issubclass(v.type,np.integer) and not cond[k].all(): v = np.dtype('float64') self.assert_(dfi[k].dtype == v) @@ -7716,8 +7722,8 @@ def test_mask_edge_case_1xN_frame(self): def test_transpose(self): frame = self.frame dft = frame.T - for idx, series in dft.iteritems(): - for col, value in series.iteritems(): + for idx, series in compat.iteritems(dft): + for col, value in compat.iteritems(series): if np.isnan(value): self.assert_(np.isnan(frame[col][idx])) else: @@ -7728,7 +7734,7 @@ def test_transpose(self): mixed = DataFrame(data, index=index) mixed_T = mixed.T - for col, s in mixed_T.iteritems(): + for col, s in compat.iteritems(mixed_T): self.assert_(s.dtype == np.object_) def test_transpose_get_view(self): @@ -8035,7 +8041,7 @@ def test_apply_broadcast(self): broadcasted = self.frame.apply(np.mean, broadcast=True) agged = self.frame.apply(np.mean) - for col, ts in broadcasted.iteritems(): + for col, ts in compat.iteritems(broadcasted): self.assert_((ts == agged[col]).all()) broadcasted = self.frame.apply(np.mean, axis=1, broadcast=True) @@ -8092,10 +8098,10 @@ def _checkit(axis=0, raw=False): res = df.apply(f, axis=axis, raw=raw) if is_reduction: agg_axis = df._get_agg_axis(axis) - self.assert_(isinstance(res, Series)) + tm.assert_isinstance(res, Series) self.assert_(res.index is agg_axis) else: - self.assert_(isinstance(res, DataFrame)) + tm.assert_isinstance(res, DataFrame) _checkit() _checkit(axis=1) @@ -8108,7 +8114,7 @@ def _checkit(axis=0, raw=False): _check(no_index, lambda x: x.mean()) result = no_cols.apply(lambda x: x.mean(), broadcast=True) - self.assert_(isinstance(result, DataFrame)) + tm.assert_isinstance(result, DataFrame) def test_apply_with_args_kwds(self): def add_some(x, howmuch=0): @@ -8147,13 +8153,13 @@ def test_apply_differently_indexed(self): result0 = df.apply(Series.describe, axis=0) expected0 = DataFrame(dict((i, v.describe()) - for i, v in df.iteritems()), + for i, v in compat.iteritems(df)), columns=df.columns) assert_frame_equal(result0, expected0) result1 = df.apply(Series.describe, axis=1) expected1 = DataFrame(dict((i, v.describe()) - for i, v in df.T.iteritems()), + for i, v in compat.iteritems(df.T)), columns=df.index).T assert_frame_equal(result1, expected1) @@ -8186,7 +8192,7 @@ def transform2(row): try: transformed = data.apply(transform, axis=1) - except Exception, e: + except Exception as e: self.assertEqual(len(e.args), 2) self.assertEqual(e.args[1], 'occurred at index 4') @@ -8244,7 +8250,7 @@ def test_apply_multi_index(self): s.index = MultiIndex.from_arrays([['a','a','b'], ['c','d','d']]) s.columns = ['col1','col2'] res = s.apply(lambda x: Series({'min': min(x), 'max': max(x)}), 1) - self.assert_(isinstance(res.index, MultiIndex)) + tm.assert_isinstance(res.index, MultiIndex) def test_applymap(self): applied = self.frame.applymap(lambda x: x * 2) @@ -8253,7 +8259,7 @@ def test_applymap(self): # GH #465, function returning tuples result = self.frame.applymap(lambda x: (x, x)) - self.assert_(isinstance(result['A'][0], tuple)) + tm.assert_isinstance(result['A'][0], tuple) # GH 2909, object conversion to float in constructor? df = DataFrame(data=[1,'a']) @@ -8303,7 +8309,7 @@ def test_filter(self): self.assert_('foo' in filtered) # unicode columns, won't ascii-encode - df = self.frame.rename(columns={'B': u'\u2202'}) + df = self.frame.rename(columns={'B': u('\u2202')}) filtered = df.filter(like='C') self.assertTrue('C' in filtered) @@ -8505,12 +8511,12 @@ def test_sort_index_duplicates(self): try: df.sort_index(by='a') - except Exception, e: + except Exception as e: self.assertTrue('duplicate' in str(e)) try: df.sort_index(by=['a']) - except Exception, e: + except Exception as e: self.assertTrue('duplicate' in str(e)) def test_sort_datetimes(self): @@ -8540,7 +8546,7 @@ def test_frame_column_inplace_sort_exception(self): self.assertRaises(Exception, s.sort) cp = s.copy() - cp.sort() # it works! + cp.sort() # it works! def test_combine_first(self): # disjoint @@ -8950,18 +8956,18 @@ def test_count(self): # corner case frame = DataFrame() ct1 = frame.count(1) - self.assert_(isinstance(ct1, Series)) + tm.assert_isinstance(ct1, Series) ct2 = frame.count(0) - self.assert_(isinstance(ct2, Series)) + tm.assert_isinstance(ct2, Series) # GH #423 - df = DataFrame(index=range(10)) + df = DataFrame(index=lrange(10)) result = df.count(1) expected = Series(0, index=df.index) assert_series_equal(result, expected) - df = DataFrame(columns=range(10)) + df = DataFrame(columns=lrange(10)) result = df.count(0) expected = Series(0, index=df.columns) assert_series_equal(result, expected) @@ -9144,7 +9150,7 @@ def _check_stat_op(self, name, alternative, frame=None, has_skipna=True, print (df) self.assertFalse(len(_f())) - df['a'] = range(len(df)) + df['a'] = lrange(len(df)) self.assert_(len(getattr(df, name)())) if has_skipna: @@ -9205,8 +9211,8 @@ def wrapper(x): def test_sum_corner(self): axis0 = self.empty.sum(0) axis1 = self.empty.sum(1) - self.assert_(isinstance(axis0, Series)) - self.assert_(isinstance(axis1, Series)) + tm.assert_isinstance(axis0, Series) + tm.assert_isinstance(axis1, Series) self.assertEquals(len(axis0), 0) self.assertEquals(len(axis1), 0) @@ -9482,7 +9488,7 @@ def test_describe_no_numeric(self): 'B': ['a', 'b', 'c', 'd'] * 6}) desc = df.describe() expected = DataFrame(dict((k, v.describe()) - for k, v in df.iteritems()), + for k, v in compat.iteritems(df)), columns=df.columns) assert_frame_equal(desc, expected) @@ -9523,12 +9529,12 @@ def test_axis_aliases(self): assert_series_equal(result, expected) def test_combine_first_mixed(self): - a = Series(['a', 'b'], index=range(2)) - b = Series(range(2), index=range(2)) + a = Series(['a', 'b'], index=lrange(2)) + b = Series(lrange(2), index=lrange(2)) f = DataFrame({'A': a, 'B': b}) - a = Series(['a', 'b'], index=range(5, 7)) - b = Series(range(2), index=range(5, 7)) + a = Series(['a', 'b'], index=lrange(5, 7)) + b = Series(lrange(2), index=lrange(5, 7)) g = DataFrame({'A': a, 'B': b}) combined = f.combine_first(g) @@ -9546,7 +9552,7 @@ def test_reindex_boolean(self): self.assert_(reindexed.values.dtype == np.object_) self.assert_(isnull(reindexed[0][1])) - reindexed = frame.reindex(columns=range(3)) + reindexed = frame.reindex(columns=lrange(3)) self.assert_(reindexed.values.dtype == np.object_) self.assert_(isnull(reindexed[1]).all()) @@ -9606,22 +9612,22 @@ def test_reindex_with_nans(self): def test_reindex_multi(self): df = DataFrame(np.random.randn(3, 3)) - result = df.reindex(range(4), range(4)) - expected = df.reindex(range(4)).reindex(columns=range(4)) + result = df.reindex(lrange(4), lrange(4)) + expected = df.reindex(lrange(4)).reindex(columns=lrange(4)) assert_frame_equal(result, expected) df = DataFrame(np.random.randint(0, 10, (3, 3))) - result = df.reindex(range(4), range(4)) - expected = df.reindex(range(4)).reindex(columns=range(4)) + result = df.reindex(lrange(4), lrange(4)) + expected = df.reindex(lrange(4)).reindex(columns=lrange(4)) assert_frame_equal(result, expected) df = DataFrame(np.random.randint(0, 10, (3, 3))) - result = df.reindex(range(2), range(2)) - expected = df.reindex(range(2)).reindex(columns=range(2)) + result = df.reindex(lrange(2), lrange(2)) + expected = df.reindex(lrange(2)).reindex(columns=lrange(2)) assert_frame_equal(result, expected) @@ -9657,7 +9663,7 @@ def test_count_objects(self): def test_cumsum_corner(self): dm = DataFrame(np.arange(20).reshape(4, 5), - index=range(4), columns=range(5)) + index=lrange(4), columns=lrange(5)) result = dm.cumsum() #---------------------------------------------------------------------- @@ -9711,7 +9717,7 @@ def test_unstack_to_series(self): # check composability of unstack old_data = data.copy() - for _ in xrange(4): + for _ in range(4): data = data.unstack() assert_frame_equal(old_data, data) @@ -9867,13 +9873,13 @@ def test_reset_index_multiindex_col(self): assert_frame_equal(rs, xp) rs = df.reset_index('a', col_fill=None) - xp = DataFrame(full, Index(range(3), name='d'), + xp = DataFrame(full, Index(lrange(3), name='d'), columns=[['a', 'b', 'b', 'c'], ['a', 'mean', 'median', 'mean']]) assert_frame_equal(rs, xp) rs = df.reset_index('a', col_fill='blah', col_level=1) - xp = DataFrame(full, Index(range(3), name='d'), + xp = DataFrame(full, Index(lrange(3), name='d'), columns=[['blah', 'b', 'b', 'c'], ['a', 'mean', 'median', 'mean']]) assert_frame_equal(rs, xp) @@ -10148,7 +10154,7 @@ def test_boolean_set_uncons(self): def test_xs_view(self): dm = DataFrame(np.arange(20.).reshape(4, 5), - index=range(4), columns=range(5)) + index=lrange(4), columns=lrange(5)) dm.xs(2, copy=False)[:] = 5 self.assert_((dm.xs(2) == 5).all()) @@ -10166,7 +10172,7 @@ def test_xs_view(self): self.assert_((dm.xs(3) == 10).all()) def test_boolean_indexing(self): - idx = range(3) + idx = lrange(3) cols = ['A','B','C'] df1 = DataFrame(index=idx, columns=cols, data=np.array([[0.0, 0.5, 1.0], @@ -10186,15 +10192,15 @@ def test_boolean_indexing(self): def test_boolean_indexing_mixed(self): df = DataFrame( - {0L: {35: np.nan, 40: np.nan, 43: np.nan, 49: np.nan, 50: np.nan}, - 1L: {35: np.nan, + {long(0): {35: np.nan, 40: np.nan, 43: np.nan, 49: np.nan, 50: np.nan}, + long(1): {35: np.nan, 40: 0.32632316859446198, 43: np.nan, 49: 0.32632316859446198, 50: 0.39114724480578139}, - 2L: {35: np.nan, 40: np.nan, 43: 0.29012581014105987, 49: np.nan, 50: np.nan}, - 3L: {35: np.nan, 40: np.nan, 43: np.nan, 49: np.nan, 50: np.nan}, - 4L: {35: 0.34215328467153283, 40: np.nan, 43: np.nan, 49: np.nan, 50: np.nan}, + long(2): {35: np.nan, 40: np.nan, 43: 0.29012581014105987, 49: np.nan, 50: np.nan}, + long(3): {35: np.nan, 40: np.nan, 43: np.nan, 49: np.nan, 50: np.nan}, + long(4): {35: 0.34215328467153283, 40: np.nan, 43: np.nan, 49: np.nan, 50: np.nan}, 'y': {35: 0, 40: 0, 43: 0, 49: 0, 50: 1}}) # mixed int/float ok @@ -10212,15 +10218,15 @@ def test_boolean_indexing_mixed(self): self.assertRaises(ValueError, df.__setitem__, df>0.3, 1) def test_sum_bools(self): - df = DataFrame(index=range(1), columns=range(10)) + df = DataFrame(index=lrange(1), columns=lrange(10)) bools = isnull(df) self.assert_(bools.sum(axis=1)[0] == 10) def test_fillna_col_reordering(self): - idx = range(20) + idx = lrange(20) cols = ["COL." + str(i) for i in range(5, 0, -1)] data = np.random.rand(20, 5) - df = DataFrame(index=range(20), columns=cols, data=data) + df = DataFrame(index=lrange(20), columns=cols, data=data) filled = df.fillna(method='ffill') self.assert_(df.columns.tolist() == filled.columns.tolist()) @@ -10299,13 +10305,17 @@ def test_take(self): expected = df.ix[:, ['B', 'C', 'A', 'D']] assert_frame_equal(result, expected) + def test_iterkv_deprecation(self): + with tm.assert_produces_warning(DeprecationWarning): + self.mixed_float.iterkv() + def test_iterkv_names(self): - for k, v in self.mixed_frame.iterkv(): + for k, v in compat.iteritems(self.mixed_frame): self.assertEqual(v.name, k) def test_series_put_names(self): series = self.mixed_frame._series - for k, v in series.iteritems(): + for k, v in compat.iteritems(series): self.assertEqual(v.name, k) def test_dot(self): @@ -10347,8 +10357,8 @@ def test_dot(self): result = A.dot(b) # unaligned - df = DataFrame(randn(3, 4), index=[1, 2, 3], columns=range(4)) - df2 = DataFrame(randn(5, 3), index=range(5), columns=[1, 2, 3]) + df = DataFrame(randn(3, 4), index=[1, 2, 3], columns=lrange(4)) + df2 = DataFrame(randn(5, 3), index=lrange(5), columns=[1, 2, 3]) self.assertRaises(ValueError, df.dot, df2) @@ -10554,7 +10564,7 @@ def test_strange_column_corruption_issue(self): # df[col] = nan for i, dt in enumerate(df.index): - for col in xrange(100, 200): + for col in range(100, 200): if not col in wasCol: wasCol[col] = 1 df[col] = nan @@ -10675,12 +10685,12 @@ def test_isin_dict(self): # without using iloc result = df.isin(d) - assert_frame_equal(result, expected) + assert_frame_equal(result, expected) # using iloc result = df.isin(d, iloc=True) expected.iloc[0, 0] = True - assert_frame_equal(result, expected) + assert_frame_equal(result, expected) if __name__ == '__main__': diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py index 08b42d7cf8975..f017acce0419b 100644 --- a/pandas/tests/test_graphics.py +++ b/pandas/tests/test_graphics.py @@ -6,6 +6,7 @@ from datetime import datetime, date from pandas import Series, DataFrame, MultiIndex, PeriodIndex, date_range +from pandas.compat import range, lrange, StringIO, lmap, lzip, u, map, zip import pandas.util.testing as tm from pandas.util.testing import ensure_clean from pandas.core.config import set_option @@ -115,7 +116,7 @@ def test_bar_colors(self): rects = ax.patches - rgba_colors = map(cm.jet, np.linspace(0, 1, 5)) + rgba_colors = lmap(cm.jet, np.linspace(0, 1, 5)) for i, rect in enumerate(rects[::5]): xp = rgba_colors[i] rs = rect.get_facecolor() @@ -128,7 +129,7 @@ def test_bar_colors(self): rects = ax.patches - rgba_colors = map(cm.jet, np.linspace(0, 1, 5)) + rgba_colors = lmap(cm.jet, np.linspace(0, 1, 5)) for i, rect in enumerate(rects[::5]): xp = rgba_colors[i] rs = rect.get_facecolor() @@ -271,7 +272,7 @@ def test_invalid_plot_data(self): @slow def test_valid_object_plot(self): - s = Series(range(10), dtype=object) + s = Series(lrange(10), dtype=object) kinds = 'line', 'bar', 'barh', 'kde', 'density' for kind in kinds: @@ -327,27 +328,27 @@ def test_plot(self): _check_plot_works(df.plot, subplots=True, title='blah') _check_plot_works(df.plot, title='blah') - tuples = zip(list(string.ascii_letters[:10]), range(10)) + tuples = lzip(string.ascii_letters[:10], range(10)) df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples)) _check_plot_works(df.plot, use_index=True) # unicode - index = MultiIndex.from_tuples([(u'\u03b1', 0), - (u'\u03b1', 1), - (u'\u03b2', 2), - (u'\u03b2', 3), - (u'\u03b3', 4), - (u'\u03b3', 5), - (u'\u03b4', 6), - (u'\u03b4', 7)], names=['i0', 'i1']) - columns = MultiIndex.from_tuples([('bar', u'\u0394'), - ('bar', u'\u0395')], names=['c0', + index = MultiIndex.from_tuples([(u('\u03b1'), 0), + (u('\u03b1'), 1), + (u('\u03b2'), 2), + (u('\u03b2'), 3), + (u('\u03b3'), 4), + (u('\u03b3'), 5), + (u('\u03b4'), 6), + (u('\u03b4'), 7)], names=['i0', 'i1']) + columns = MultiIndex.from_tuples([('bar', u('\u0394')), + ('bar', u('\u0395'))], names=['c0', 'c1']) df = DataFrame(np.random.randint(0, 10, (8, 2)), columns=columns, index=index) - _check_plot_works(df.plot, title=u'\u03A3') + _check_plot_works(df.plot, title=u('\u03A3')) def test_nonnumeric_exclude(self): import matplotlib.pyplot as plt @@ -384,7 +385,7 @@ def test_plot_xy(self): self._check_data(df.plot(y='B'), df.B.plot()) # columns.inferred_type == 'integer' - df.columns = range(1, len(df.columns) + 1) + df.columns = lrange(1, len(df.columns) + 1) self._check_data(df.plot(x=1, y=2), df.set_index(1)[2].plot()) self._check_data(df.plot(x=1), df.set_index(1).plot()) @@ -421,7 +422,7 @@ def test_xcompat(self): pd.plot_params['x_compat'] = False ax = df.plot() lines = ax.get_lines() - self.assert_(isinstance(lines[0].get_xdata(), PeriodIndex)) + tm.assert_isinstance(lines[0].get_xdata(), PeriodIndex) plt.close('all') # useful if you're plotting a bunch together @@ -433,7 +434,7 @@ def test_xcompat(self): plt.close('all') ax = df.plot() lines = ax.get_lines() - self.assert_(isinstance(lines[0].get_xdata(), PeriodIndex)) + tm.assert_isinstance(lines[0].get_xdata(), PeriodIndex) def test_unsorted_index(self): df = DataFrame({'y': np.arange(100)}, @@ -497,7 +498,7 @@ def test_plot_bar(self): df = DataFrame(np.random.randn(10, 15), index=list(string.ascii_letters[:10]), - columns=range(15)) + columns=lrange(15)) _check_plot_works(df.plot, kind='bar') df = DataFrame({'a': [0, 1], 'b': [1, 0]}) @@ -505,13 +506,13 @@ def test_plot_bar(self): def test_bar_stacked_center(self): # GH2157 - df = DataFrame({'A': [3] * 5, 'B': range(5)}, index=range(5)) + df = DataFrame({'A': [3] * 5, 'B': lrange(5)}, index=lrange(5)) ax = df.plot(kind='bar', stacked='True', grid=True) self.assertEqual(ax.xaxis.get_ticklocs()[0], ax.patches[0].get_x() + ax.patches[0].get_width() / 2) def test_bar_center(self): - df = DataFrame({'A': [3] * 5, 'B': range(5)}, index=range(5)) + df = DataFrame({'A': [3] * 5, 'B': lrange(5)}, index=lrange(5)) ax = df.plot(kind='bar', grid=True) self.assertEqual(ax.xaxis.get_ticklocs()[0], ax.patches[0].get_x() + ax.patches[0].get_width()) @@ -521,7 +522,7 @@ def test_bar_log(self): # GH3254, GH3298 matplotlib/matplotlib#1882, #1892 # regressions in 1.2.1 - df = DataFrame({'A': [3] * 5, 'B': range(1, 6)}, index=range(5)) + df = DataFrame({'A': [3] * 5, 'B': lrange(1, 6)}, index=lrange(5)) ax = df.plot(kind='bar', grid=True, log=True) self.assertEqual(ax.yaxis.get_ticklocs()[0], 1.0) @@ -765,7 +766,6 @@ def test_style_by_column(self): def test_line_colors(self): import matplotlib.pyplot as plt import sys - from StringIO import StringIO from matplotlib import cm custom_colors = 'rgcby' @@ -796,7 +796,7 @@ def test_line_colors(self): ax = df.plot(colormap='jet') - rgba_colors = map(cm.jet, np.linspace(0, 1, len(df))) + rgba_colors = lmap(cm.jet, np.linspace(0, 1, len(df))) lines = ax.get_lines() for i, l in enumerate(lines): @@ -808,7 +808,7 @@ def test_line_colors(self): ax = df.plot(colormap=cm.jet) - rgba_colors = map(cm.jet, np.linspace(0, 1, len(df))) + rgba_colors = lmap(cm.jet, np.linspace(0, 1, len(df))) lines = ax.get_lines() for i, l in enumerate(lines): @@ -887,7 +887,7 @@ def test_boxplot(self): _check_plot_works(grouped.boxplot) _check_plot_works(grouped.boxplot, subplots=False) - tuples = zip(list(string.ascii_letters[:10]), range(10)) + tuples = lzip(string.ascii_letters[:10], range(10)) df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples)) grouped = df.groupby(level=1) diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 6af287b77cbac..19f15e44dc096 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -1,3 +1,4 @@ +from __future__ import print_function import nose import unittest @@ -12,6 +13,10 @@ from pandas.core.series import Series from pandas.util.testing import (assert_panel_equal, assert_frame_equal, assert_series_equal, assert_almost_equal) +from pandas.compat import( + range, long, lrange, StringIO, lmap, lzip, map, zip, builtins, OrderedDict +) +from pandas import compat from pandas.core.panel import Panel from pandas.tools.merge import concat from collections import defaultdict @@ -27,11 +32,11 @@ def commonSetUp(self): self.dateRange = bdate_range('1/1/2005', periods=250) - self.stringIndex = Index([rands(8).upper() for x in xrange(250)]) + self.stringIndex = Index([rands(8).upper() for x in range(250)]) self.groupId = Series([x[0] for x in self.stringIndex], index=self.stringIndex) - self.groupDict = dict((k, v) for k, v in self.groupId.iteritems()) + self.groupDict = dict((k, v) for k, v in compat.iteritems(self.groupId)) self.columnIndex = Index(['A', 'B', 'C', 'D', 'E']) @@ -189,9 +194,9 @@ def test_first_last_nth_dtypes(self): assert_frame_equal(nth, expected, check_names=False) # GH 2763, first/last shifting dtypes - idx = range(10) + idx = lrange(10) idx.append(9) - s = Series(data=range(11), index=idx, name='IntCol') + s = Series(data=lrange(11), index=idx, name='IntCol') self.assert_(s.dtype == 'int64') f = s.groupby(level=0).first() self.assert_(f.dtype == 'int64') @@ -263,7 +268,7 @@ def test_groupby_nonobject_dtype(self): # GH 3911, mixed frame non-conversion df = self.df_mixed_floats.copy() - df['value'] = range(len(df)) + df['value'] = lrange(len(df)) def max_value(group): return group.ix[group['value'].idxmax()] @@ -278,27 +283,27 @@ def max_value(group): def test_groupby_return_type(self): # GH2893, return a reduced type - df1 = DataFrame([{"val1": 1, "val2" : 20}, {"val1":1, "val2": 19}, + df1 = DataFrame([{"val1": 1, "val2" : 20}, {"val1":1, "val2": 19}, {"val1":2, "val2": 27}, {"val1":2, "val2": 12}]) def func(dataf): return dataf["val2"] - dataf["val2"].mean() result = df1.groupby("val1", squeeze=True).apply(func) - self.assert_(isinstance(result,Series)) + tm.assert_isinstance(result,Series) - df2 = DataFrame([{"val1": 1, "val2" : 20}, {"val1":1, "val2": 19}, + df2 = DataFrame([{"val1": 1, "val2" : 20}, {"val1":1, "val2": 19}, {"val1":1, "val2": 27}, {"val1":1, "val2": 12}]) def func(dataf): return dataf["val2"] - dataf["val2"].mean() result = df2.groupby("val1", squeeze=True).apply(func) - self.assert_(isinstance(result,Series)) + tm.assert_isinstance(result,Series) # GH3596, return a consistent type (regression in 0.11 from 0.10.1) df = DataFrame([[1,1],[1,1]],columns=['X','Y']) result = df.groupby('X',squeeze=False).count() - self.assert_(isinstance(result,DataFrame)) + tm.assert_isinstance(result,DataFrame) def test_agg_regression1(self): grouped = self.tsframe.groupby([lambda x: x.year, lambda x: x.month]) @@ -335,7 +340,7 @@ def test_agg_period_index(self): prng = period_range('2012-1-1', freq='M', periods=3) df = DataFrame(np.random.randn(3, 2), index=prng) rs = df.groupby(level=0).sum() - self.assert_(isinstance(rs.index, PeriodIndex)) + tm.assert_isinstance(rs.index, PeriodIndex) # GH 3579 index = period_range(start='1999-01', periods=5, freq='M') @@ -428,18 +433,17 @@ def test_groups(self): groups = grouped.groups self.assert_(groups is grouped.groups) # caching works - for k, v in grouped.groups.iteritems(): + for k, v in compat.iteritems(grouped.groups): self.assert_((self.df.ix[v]['A'] == k).all()) grouped = self.df.groupby(['A', 'B']) groups = grouped.groups self.assert_(groups is grouped.groups) # caching works - for k, v in grouped.groups.iteritems(): + for k, v in compat.iteritems(grouped.groups): self.assert_((self.df.ix[v]['A'] == k[0]).all()) self.assert_((self.df.ix[v]['B'] == k[1]).all()) def test_aggregate_str_func(self): - from pandas.util.compat import OrderedDict def _check_results(grouped): # single series @@ -490,7 +494,7 @@ def test_aggregate_item_by_item(self): def aggfun(ser): return ser.size result = DataFrame().groupby(self.df.A).agg(aggfun) - self.assert_(isinstance(result, DataFrame)) + tm.assert_isinstance(result, DataFrame) self.assertEqual(len(result), 0) def test_agg_item_by_item_raise_typeerror(self): @@ -500,7 +504,7 @@ def test_agg_item_by_item_raise_typeerror(self): def raiseException(df): print ('----------------------------------------') - print (df.to_string()) + print(df.to_string()) raise TypeError self.assertRaises(TypeError, df.groupby(0).agg, @@ -508,11 +512,11 @@ def raiseException(df): def test_basic_regression(self): # regression - T = [1.0 * x for x in range(1, 10) * 10][:1095] - result = Series(T, range(0, len(T))) + T = [1.0 * x for x in lrange(1, 10) * 10][:1095] + result = Series(T, lrange(0, len(T))) groupings = np.random.random((1100,)) - groupings = Series(groupings, range(0, len(groupings))) * 10. + groupings = Series(groupings, lrange(0, len(groupings))) * 10. grouped = result.groupby(groupings) grouped.mean() @@ -707,12 +711,12 @@ def f3(x): return y df = DataFrame({'a':[1,2,2,2], - 'b':range(4), - 'c':range(5,9)}) + 'b':lrange(4), + 'c':lrange(5,9)}) df2 = DataFrame({'a':[3,2,2,2], - 'b':range(4), - 'c':range(5,9)}) + 'b':lrange(4), + 'c':lrange(5,9)}) # correct result @@ -850,7 +854,7 @@ def test_frame_groupby(self): groups = grouped.groups indices = grouped.indices - for k, v in groups.iteritems(): + for k, v in compat.iteritems(groups): samething = self.tsframe.index.take(indices[k]) self.assertTrue((samething == v).all()) @@ -1041,7 +1045,7 @@ def _check_op(op): for n1, gp1 in data.groupby('A'): for n2, gp2 in gp1.groupby('B'): expected[n1][n2] = op(gp2.ix[:, ['C', 'D']]) - expected = dict((k, DataFrame(v)) for k, v in expected.iteritems()) + expected = dict((k, DataFrame(v)) for k, v in compat.iteritems(expected)) expected = Panel.fromDict(expected).swapaxes(0, 1) expected.major_axis.name, expected.minor_axis.name = 'A', 'B' @@ -1064,7 +1068,6 @@ def _check_op(op): assert_series_equal(result, expected) def test_groupby_as_index_agg(self): - from pandas.util.compat import OrderedDict grouped = self.df.groupby('A', as_index=False) # single-key @@ -1115,22 +1118,22 @@ def test_as_index_series_return_frame(self): result = grouped['C'].agg(np.sum) expected = grouped.agg(np.sum).ix[:, ['A', 'C']] - self.assert_(isinstance(result, DataFrame)) + tm.assert_isinstance(result, DataFrame) assert_frame_equal(result, expected) result2 = grouped2['C'].agg(np.sum) expected2 = grouped2.agg(np.sum).ix[:, ['A', 'B', 'C']] - self.assert_(isinstance(result2, DataFrame)) + tm.assert_isinstance(result2, DataFrame) assert_frame_equal(result2, expected2) result = grouped['C'].sum() expected = grouped.sum().ix[:, ['A', 'C']] - self.assert_(isinstance(result, DataFrame)) + tm.assert_isinstance(result, DataFrame) assert_frame_equal(result, expected) result2 = grouped2['C'].sum() expected2 = grouped2.sum().ix[:, ['A', 'B', 'C']] - self.assert_(isinstance(result2, DataFrame)) + tm.assert_isinstance(result2, DataFrame) assert_frame_equal(result2, expected2) # corner case @@ -1153,7 +1156,7 @@ def test_groupby_as_index_cython(self): result = grouped.mean() expected = data.groupby(['A', 'B']).mean() - arrays = zip(*expected.index._tuple_index) + arrays = lzip(*expected.index._tuple_index) expected.insert(0, 'A', arrays[0]) expected.insert(1, 'B', arrays[1]) expected.index = np.arange(len(expected)) @@ -1367,7 +1370,7 @@ def test_wrap_aggregated_output_multindex(self): keys = [np.array([0, 0, 1]), np.array([0, 0, 1])] agged = df.groupby(keys).agg(np.mean) - self.assert_(isinstance(agged.columns, MultiIndex)) + tm.assert_isinstance(agged.columns, MultiIndex) def aggfun(ser): if ser.name == ('foo', 'one'): @@ -1416,7 +1419,7 @@ def test_groupby_level(self): def test_groupby_level_index_names(self): ## GH4014 this used to raise ValueError since 'exp'>1 (in py2) - df = DataFrame({'exp' : ['A']*3 + ['B']*3, 'var1' : range(6),}).set_index('exp') + df = DataFrame({'exp' : ['A']*3 + ['B']*3, 'var1' : lrange(6),}).set_index('exp') df.groupby(level='exp') self.assertRaises(ValueError, df.groupby, level='foo') @@ -1511,7 +1514,7 @@ def f(piece): grouped = ts.groupby(lambda x: x.month) result = grouped.apply(f) - self.assert_(isinstance(result, DataFrame)) + tm.assert_isinstance(result, DataFrame) self.assert_(result.index.equals(ts.index)) def test_apply_series_yield_constant(self): @@ -1565,7 +1568,7 @@ def test_mutate_groups(self): mydf = DataFrame({ 'cat1' : ['a'] * 8 + ['b'] * 6, 'cat2' : ['c'] * 2 + ['d'] * 2 + ['e'] * 2 + ['f'] * 2 + ['c'] * 2 + ['d'] * 2 + ['e'] * 2, - 'cat3' : map(lambda x: 'g%s' % x, range(1,15)), + 'cat3' : lmap(lambda x: 'g%s' % x, lrange(1,15)), 'val' : np.random.randint(100, size=14), }) @@ -1585,7 +1588,7 @@ def f_no_copy(x): def test_apply_chunk_view(self): # Low level tinkering could be unsafe, make sure not df = DataFrame({'key': [1, 1, 1, 2, 2, 2, 3, 3, 3], - 'value': range(9)}) + 'value': lrange(9)}) # return view f = lambda x: x[:2] @@ -1597,7 +1600,7 @@ def test_apply_chunk_view(self): def test_apply_no_name_column_conflict(self): df = DataFrame({'name': [1, 1, 1, 1, 1, 1, 2, 2, 2, 2], 'name2': [0, 0, 0, 1, 1, 1, 0, 0, 1, 1], - 'value': range(10)[::-1]}) + 'value': lrange(10)[::-1]}) # it works! #2605 grouped = df.groupby(['name', 'name2']) @@ -1615,10 +1618,10 @@ def test_groupby_series_indexed_differently(self): assert_series_equal(agged, exp) def test_groupby_with_hier_columns(self): - tuples = zip(*[['bar', 'bar', 'baz', 'baz', + tuples = list(zip(*[['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], ['one', 'two', 'one', 'two', - 'one', 'two', 'one', 'two']]) + 'one', 'two', 'one', 'two']])) index = MultiIndex.from_tuples(tuples) columns = MultiIndex.from_tuples([('A', 'cat'), ('B', 'dog'), ('B', 'cat'), ('A', 'dog')]) @@ -1810,7 +1813,6 @@ def f(group): def test_groupby_wrong_multi_labels(self): from pandas import read_csv - from pandas.util.py3compat import StringIO data = """index,foo,bar,baz,spam,data 0,foo1,bar1,baz1,spam2,20 1,foo1,bar2,baz1,spam3,30 @@ -1849,14 +1851,14 @@ def test_groupby_nonstring_columns(self): def test_cython_grouper_series_bug_noncontig(self): arr = np.empty((100, 100)) arr.fill(np.nan) - obj = Series(arr[:, 0], index=range(100)) - inds = np.tile(range(10), 10) + obj = Series(arr[:, 0], index=lrange(100)) + inds = np.tile(lrange(10), 10) result = obj.groupby(inds).agg(Series.median) self.assert_(result.isnull().all()) def test_series_grouper_noncontig_index(self): - index = Index([tm.rands(10) for _ in xrange(100)]) + index = Index([tm.rands(10) for _ in range(100)]) values = Series(np.random.randn(50), index=index[::2]) labels = np.random.randint(0, 5, 50) @@ -1872,7 +1874,7 @@ def test_convert_objects_leave_decimal_alone(self): from decimal import Decimal - s = Series(range(5)) + s = Series(lrange(5)) labels = np.array(['a', 'b', 'c', 'd', 'e'], dtype='O') def convert_fast(x): @@ -1887,11 +1889,11 @@ def convert_force_pure(x): result = grouped.agg(convert_fast) self.assert_(result.dtype == np.object_) - self.assert_(isinstance(result[0], Decimal)) + tm.assert_isinstance(result[0], Decimal) result = grouped.agg(convert_force_pure) self.assert_(result.dtype == np.object_) - self.assert_(isinstance(result[0], Decimal)) + tm.assert_isinstance(result[0], Decimal) def test_apply_with_mixed_dtype(self): # GH3480, apply with mixed dtype on axis=1 breaks in 0.11 @@ -1987,7 +1989,7 @@ def test_numpy_groupby(self): assert_almost_equal(result, expected) def test_groupby_2d_malformed(self): - d = DataFrame(index=range(2)) + d = DataFrame(index=lrange(2)) d['group'] = ['g1', 'g2'] d['zeros'] = [0, 0] d['ones'] = [1, 1] @@ -2031,12 +2033,12 @@ def test_int64_overflow(self): exp_index, _ = right.index.sortlevel(0) self.assert_(right.index.equals(exp_index)) - tups = map(tuple, df[['A', 'B', 'C', 'D', - 'E', 'F', 'G', 'H']].values) + tups = list(map(tuple, df[['A', 'B', 'C', 'D', + 'E', 'F', 'G', 'H']].values)) tups = com._asarray_tuplesafe(tups) expected = df.groupby(tups).sum()['values'] - for k, v in expected.iteritems(): + for k, v in compat.iteritems(expected): self.assert_(left[k] == right[k[::-1]] == v) self.assert_(len(left) == len(right)) @@ -2046,18 +2048,18 @@ def test_groupby_sort_multi(self): 'c': [0, 1, 2], 'd': np.random.randn(3)}) - tups = map(tuple, df[['a', 'b', 'c']].values) + tups = lmap(tuple, df[['a', 'b', 'c']].values) tups = com._asarray_tuplesafe(tups) result = df.groupby(['a', 'b', 'c'], sort=True).sum() self.assert_(np.array_equal(result.index.values, tups[[1, 2, 0]])) - tups = map(tuple, df[['c', 'a', 'b']].values) + tups = lmap(tuple, df[['c', 'a', 'b']].values) tups = com._asarray_tuplesafe(tups) result = df.groupby(['c', 'a', 'b'], sort=True).sum() self.assert_(np.array_equal(result.index.values, tups)) - tups = map(tuple, df[['b', 'c', 'a']].values) + tups = lmap(tuple, df[['b', 'c', 'a']].values) tups = com._asarray_tuplesafe(tups) result = df.groupby(['b', 'c', 'a'], sort=True).sum() self.assert_(np.array_equal(result.index.values, @@ -2071,12 +2073,11 @@ def test_groupby_sort_multi(self): _check_groupby(df, result, ['a', 'b'], 'd') def test_intercept_builtin_sum(self): - import __builtin__ s = Series([1., 2., np.nan, 3.]) grouped = s.groupby([0, 1, 2, 2]) - result = grouped.agg(__builtin__.sum) - result2 = grouped.apply(__builtin__.sum) + result = grouped.agg(builtins.sum) + result2 = grouped.apply(builtins.sum) expected = grouped.sum() assert_series_equal(result, expected) assert_series_equal(result2, expected) @@ -2092,8 +2093,8 @@ def test_column_select_via_attr(self): assert_frame_equal(result, expected) def test_rank_apply(self): - lev1 = np.array([rands(10) for _ in xrange(100)], dtype=object) - lev2 = np.array([rands(10) for _ in xrange(130)], dtype=object) + lev1 = np.array([rands(10) for _ in range(100)], dtype=object) + lev2 = np.array([rands(10) for _ in range(130)], dtype=object) lab1 = np.random.randint(0, 100, size=500) lab2 = np.random.randint(0, 130, size=500) @@ -2184,7 +2185,7 @@ def g(group): result = self.df.groupby('A')['C'].apply(f) expected = self.df.groupby('A')['C'].apply(g) - self.assert_(isinstance(result, Series)) + tm.assert_isinstance(result, Series) assert_series_equal(result, expected) def test_getitem_list_of_columns(self): @@ -2236,7 +2237,6 @@ def test_agg_multiple_functions_too_many_lambdas(self): def test_more_flexible_frame_multi_function(self): from pandas import concat - from pandas.util.compat import OrderedDict grouped = self.df.groupby('A') @@ -2275,7 +2275,6 @@ def bar(x): def test_multi_function_flexible_mix(self): # GH #1268 - from pandas.util.compat import OrderedDict grouped = self.df.groupby('A') d = OrderedDict([['C', OrderedDict([['foo', 'mean'], @@ -2373,7 +2372,7 @@ def test_groupby_groups_datetimeindex(self): # it works! groups = grouped.groups - self.assert_(isinstance(groups.keys()[0], datetime)) + tm.assert_isinstance(list(groups.keys())[0], datetime) def test_groupby_reindex_inside_function(self): from pandas.tseries.api import DatetimeIndex @@ -2410,7 +2409,7 @@ def test_multiindex_columns_empty_level(self): l = [['count', 'values'], ['to filter', '']] midx = MultiIndex.from_tuples(l) - df = DataFrame([[1L, 'A']], columns=midx) + df = DataFrame([[long(1), 'A']], columns=midx) grouped = df.groupby('to filter').groups self.assert_(np.array_equal(grouped['A'], [0])) @@ -2418,13 +2417,13 @@ def test_multiindex_columns_empty_level(self): grouped = df.groupby([('to filter', '')]).groups self.assert_(np.array_equal(grouped['A'], [0])) - df = DataFrame([[1L, 'A'], [2L, 'B']], columns=midx) + df = DataFrame([[long(1), 'A'], [long(2), 'B']], columns=midx) expected = df.groupby('to filter').groups result = df.groupby([('to filter', '')]).groups self.assertEquals(result, expected) - df = DataFrame([[1L, 'A'], [2L, 'A']], columns=midx) + df = DataFrame([[long(1), 'A'], [long(2), 'A']], columns=midx) expected = df.groupby('to filter').groups result = df.groupby([('to filter', '')]).groups @@ -2553,7 +2552,7 @@ def test_filter_single_column_df(self): grouped.filter(lambda x: x.mean() < 10, dropna=False), expected_odd.reindex(df.index)) assert_frame_equal( - grouped.filter(lambda x: x.mean() > 10, dropna=False), + grouped.filter(lambda x: x.mean() > 10, dropna=False), expected_even.reindex(df.index)) def test_filter_multi_column_df(self): @@ -2570,7 +2569,7 @@ def test_filter_mixed_df(self): df = pd.DataFrame({'A': [1, 12, 12, 1], 'B': 'a b c d'.split()}) grouper = df['A'].apply(lambda x: x % 2) grouped = df.groupby(grouper) - expected = pd.DataFrame({'A': [12, 12], 'B': ['b', 'c']}, + expected = pd.DataFrame({'A': [12, 12], 'B': ['b', 'c']}, index=[1, 2]) assert_frame_equal( grouped.filter(lambda x: x['A'].sum() > 10), expected) @@ -2613,7 +2612,7 @@ def raise_if_sum_is_zero(x): s = pd.Series([-1,0,1,2]) grouper = s.apply(lambda x: x % 2) grouped = s.groupby(grouper) - self.assertRaises(ValueError, + self.assertRaises(ValueError, lambda: grouped.filter(raise_if_sum_is_zero)) def test_filter_against_workaround(self): @@ -2673,10 +2672,10 @@ def assert_fp_equal(a, b): def _check_groupby(df, result, keys, field, f=lambda x: x.sum()): - tups = map(tuple, df[keys].values) + tups = lmap(tuple, df[keys].values) tups = com._asarray_tuplesafe(tups) expected = f(df.groupby(tups)[field]) - for k, v in expected.iteritems(): + for k, v in compat.iteritems(expected): assert(result[k] == v) diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 250728dc59481..200bc5d6611f9 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -1,6 +1,7 @@ # pylint: disable=E1101,E1103,W0232 from datetime import datetime, timedelta +from pandas.compat import range, lrange, lzip, u, zip import operator import pickle import unittest @@ -12,7 +13,7 @@ from pandas.core.index import Index, Int64Index, MultiIndex from pandas.util.testing import assert_almost_equal -from pandas.util import py3compat +from pandas import compat import pandas.util.testing as tm import pandas.core.config as cf @@ -34,7 +35,7 @@ def setUp(self): self.intIndex = tm.makeIntIndex(100) self.floatIndex = tm.makeFloatIndex(100) self.empty = Index([]) - self.tuples = Index(zip(['foo', 'bar', 'baz'], [1, 2, 3])) + self.tuples = Index(lzip(['foo', 'bar', 'baz'], [1, 2, 3])) def test_hash_error(self): self.assertRaises(TypeError, hash, self.strIndex) @@ -42,7 +43,7 @@ def test_hash_error(self): def test_new_axis(self): new_index = self.dateIndex[None, :] self.assert_(new_index.ndim == 2) - self.assert_(type(new_index) == np.ndarray) + tm.assert_isinstance(new_index, np.ndarray) def test_deepcopy(self): from copy import deepcopy @@ -74,7 +75,7 @@ def test_constructor(self): # copy arr = np.array(self.strIndex) index = Index(arr, copy=True, name='name') - self.assert_(isinstance(index, Index)) + tm.assert_isinstance(index, Index) self.assert_(index.name == 'name') assert_array_equal(arr, index) @@ -91,7 +92,7 @@ def test_index_ctor_infer_periodindex(self): xp = period_range('2012-1-1', freq='M', periods=3) rs = Index(xp) assert_array_equal(rs, xp) - self.assert_(isinstance(rs, PeriodIndex)) + tm.assert_isinstance(rs, PeriodIndex) def test_copy(self): i = Index([], name='Foo') @@ -139,7 +140,7 @@ def test_asof(self): self.assert_(self.dateIndex.asof(d + timedelta(1)) == d) d = self.dateIndex[0].to_datetime() - self.assert_(isinstance(self.dateIndex.asof(d), Timestamp)) + tm.assert_isinstance(self.dateIndex.asof(d), Timestamp) def test_argsort(self): result = self.strIndex.argsort() @@ -157,7 +158,7 @@ def _check(op): arr_result = op(arr, element) index_result = op(index, element) - self.assert_(isinstance(index_result, np.ndarray)) + tm.assert_isinstance(index_result, np.ndarray) self.assert_(not isinstance(index_result, Index)) self.assert_(np.array_equal(arr_result, index_result)) @@ -331,7 +332,7 @@ def testit(index): pickled = pickle.dumps(index) unpickled = pickle.loads(pickled) - self.assert_(isinstance(unpickled, Index)) + tm.assert_isinstance(unpickled, Index) self.assert_(np.array_equal(unpickled, index)) self.assertEquals(unpickled.name, index.name) @@ -368,13 +369,13 @@ def test_format(self): # 2845 index = Index([1, 2.0+3.0j, np.nan]) formatted = index.format() - expected = [str(index[0]), str(index[1]), u'NaN'] + expected = [str(index[0]), str(index[1]), u('NaN')] self.assertEquals(formatted, expected) # is this really allowed? index = Index([1, 2.0+3.0j, None]) formatted = index.format() - expected = [str(index[0]), str(index[1]), u'NaN'] + expected = [str(index[0]), str(index[1]), u('NaN')] self.assertEquals(formatted, expected) self.strIndex[:0].format() @@ -467,8 +468,8 @@ def test_slice_locs_dup(self): def test_drop(self): n = len(self.strIndex) - dropped = self.strIndex.drop(self.strIndex[range(5, 10)]) - expected = self.strIndex[range(5) + range(10, n)] + dropped = self.strIndex.drop(self.strIndex[lrange(5, 10)]) + expected = self.strIndex[lrange(5) + lrange(10, n)] self.assert_(dropped.equals(expected)) self.assertRaises(ValueError, self.strIndex.drop, ['foo', 'bar']) @@ -597,11 +598,11 @@ def test_view(self): def test_coerce_list(self): # coerce things arr = Index([1, 2, 3, 4]) - self.assert_(type(arr) == Int64Index) + tm.assert_isinstance(arr, Int64Index) # but not if explicit dtype passed arr = Index([1, 2, 3, 4], dtype=object) - self.assert_(type(arr) == Index) + tm.assert_isinstance(arr, Index) def test_dtype(self): self.assert_(self.index.dtype == np.int64) @@ -652,7 +653,7 @@ def test_join_outer(self): eridx = np.array([-1, 3, 4, -1, 5, -1, 0, -1, -1, 1, -1, -1, -1, 2], dtype=np.int64) - self.assert_(isinstance(res, Int64Index)) + tm.assert_isinstance(res, Int64Index) self.assert_(res.equals(eres)) self.assert_(np.array_equal(lidx, elidx)) self.assert_(np.array_equal(ridx, eridx)) @@ -665,7 +666,7 @@ def test_join_outer(self): eridx = np.array([-1, 0, 1, -1, 2, -1, 3, -1, -1, 4, -1, -1, -1, 5], dtype=np.int64) - self.assert_(isinstance(res, Int64Index)) + tm.assert_isinstance(res, Int64Index) self.assert_(res.equals(eres)) self.assert_(np.array_equal(lidx, elidx)) self.assert_(np.array_equal(ridx, eridx)) @@ -688,7 +689,7 @@ def test_join_inner(self): elidx = np.array([1, 6]) eridx = np.array([4, 1]) - self.assert_(isinstance(res, Int64Index)) + tm.assert_isinstance(res, Int64Index) self.assert_(res.equals(eres)) self.assert_(np.array_equal(lidx, elidx)) self.assert_(np.array_equal(ridx, eridx)) @@ -701,7 +702,7 @@ def test_join_inner(self): self.assert_(res.equals(res2)) eridx = np.array([1, 4]) - self.assert_(isinstance(res, Int64Index)) + tm.assert_isinstance(res, Int64Index) self.assert_(res.equals(eres)) self.assert_(np.array_equal(lidx, elidx)) self.assert_(np.array_equal(ridx, eridx)) @@ -717,7 +718,7 @@ def test_join_left(self): eridx = np.array([-1, 4, -1, -1, -1, -1, 1, -1, -1, -1], dtype=np.int64) - self.assert_(isinstance(res, Int64Index)) + tm.assert_isinstance(res, Int64Index) self.assert_(res.equals(eres)) self.assert_(lidx is None) self.assert_(np.array_equal(ridx, eridx)) @@ -727,7 +728,7 @@ def test_join_left(self): return_indexers=True) eridx = np.array([-1, 1, -1, -1, -1, -1, 4, -1, -1, -1], dtype=np.int64) - self.assert_(isinstance(res, Int64Index)) + tm.assert_isinstance(res, Int64Index) self.assert_(res.equals(eres)) self.assert_(lidx is None) self.assert_(np.array_equal(ridx, eridx)) @@ -756,7 +757,7 @@ def test_join_right(self): elidx = np.array([-1, 6, -1, -1, 1, -1], dtype=np.int64) - self.assert_(isinstance(other, Int64Index)) + tm.assert_isinstance(other, Int64Index) self.assert_(res.equals(eres)) self.assert_(np.array_equal(lidx, elidx)) self.assert_(ridx is None) @@ -767,7 +768,7 @@ def test_join_right(self): eres = other_mono elidx = np.array([-1, 1, -1, -1, 6, -1], dtype=np.int64) - self.assert_(isinstance(other, Int64Index)) + tm.assert_isinstance(other, Int64Index) self.assert_(res.equals(eres)) self.assert_(np.array_equal(lidx, elidx)) self.assert_(ridx is None) @@ -857,7 +858,7 @@ def test_union_noncomparable(self): from datetime import datetime, timedelta # corner case, non-Int64Index now = datetime.now() - other = Index([now + timedelta(i) for i in xrange(4)], dtype=object) + other = Index([now + timedelta(i) for i in range(4)], dtype=object) result = self.index.union(other) expected = np.concatenate((self.index, other)) self.assert_(np.array_equal(result, expected)) @@ -890,14 +891,14 @@ def test_take_preserve_name(self): def test_int_name_format(self): from pandas import Series, DataFrame index = Index(['a', 'b', 'c'], name=0) - s = Series(range(3), index) - df = DataFrame(range(3), index=index) + s = Series(lrange(3), index) + df = DataFrame(lrange(3), index=index) repr(s) repr(df) def test_print_unicode_columns(self): df = pd.DataFrame( - {u"\u05d0": [1, 2, 3], "\u05d1": [4, 5, 6], "c": [7, 8, 9]}) + {u("\u05d0"): [1, 2, 3], "\u05d1": [4, 5, 6], "c": [7, 8, 9]}) repr(df.columns) # should not raise UnicodeDecodeError def test_repr_summary(self): @@ -907,16 +908,16 @@ def test_repr_summary(self): self.assertTrue("..." in r) def test_unicode_string_with_unicode(self): - idx = Index(range(1000)) + idx = Index(lrange(1000)) - if py3compat.PY3: + if compat.PY3: str(idx) else: - unicode(idx) + compat.text_type(idx) def test_bytestring_with_unicode(self): - idx = Index(range(1000)) - if py3compat.PY3: + idx = Index(lrange(1000)) + if compat.PY3: bytes(idx) else: str(idx) @@ -944,7 +945,7 @@ def test_constructor_single_level(self): single_level = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']], labels=[[0, 1, 2, 3]], names=['first']) - self.assert_(isinstance(single_level, Index)) + tm.assert_isinstance(single_level, Index) self.assert_(not isinstance(single_level, MultiIndex)) self.assert_(single_level.name == 'first') @@ -1062,7 +1063,7 @@ def test_pickle(self): self.assert_(self.index.equals(unpickled)) def test_legacy_pickle(self): - if py3compat.PY3: + if compat.PY3: raise nose.SkipTest def curpath(): @@ -1151,9 +1152,9 @@ def test_get_loc(self): self.assertRaises(KeyError, self.index.get_loc, 'quux') # 3 levels - index = MultiIndex(levels=[Index(range(4)), - Index(range(4)), - Index(range(4))], + index = MultiIndex(levels=[Index(lrange(4)), + Index(lrange(4)), + Index(lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array([0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) @@ -1173,9 +1174,9 @@ def test_get_loc_duplicates(self): assert(rs == xp) def test_get_loc_level(self): - index = MultiIndex(levels=[Index(range(4)), - Index(range(4)), - Index(range(4))], + index = MultiIndex(levels=[Index(lrange(4)), + Index(lrange(4)), + Index(lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array([0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) @@ -1193,7 +1194,7 @@ def test_get_loc_level(self): self.assertRaises(KeyError, index.get_loc_level, (2, 2)) - index = MultiIndex(levels=[[2000], range(4)], + index = MultiIndex(levels=[[2000], lrange(4)], labels=[np.array([0, 0, 0, 0]), np.array([0, 1, 2, 3])]) result, new_index = index.get_loc_level((2000, slice(None, None))) @@ -1219,9 +1220,9 @@ def test_slice_locs(self): tm.assert_almost_equal(sliced.values, expected.values) def test_slice_locs_not_sorted(self): - index = MultiIndex(levels=[Index(range(4)), - Index(range(4)), - Index(range(4))], + index = MultiIndex(levels=[Index(lrange(4)), + Index(lrange(4)), + Index(lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array([0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) @@ -1276,11 +1277,11 @@ def test_slice_locs_not_contained(self): def test_consistency(self): # need to construct an overflow - major_axis = range(70000) - minor_axis = range(10) + major_axis = lrange(70000) + minor_axis = lrange(10) major_labels = np.arange(70000) - minor_labels = np.repeat(range(10), 7000) + minor_labels = np.repeat(lrange(10), 7000) # the fact that is works means it's consistent index = MultiIndex(levels=[major_axis, minor_axis], @@ -1295,8 +1296,8 @@ def test_consistency(self): self.assert_(not index.is_unique) def test_truncate(self): - major_axis = Index(range(4)) - minor_axis = Index(range(2)) + major_axis = Index(lrange(4)) + minor_axis = Index(lrange(2)) major_labels = np.array([0, 0, 1, 2, 3, 3]) minor_labels = np.array([0, 1, 0, 1, 0, 1]) @@ -1319,8 +1320,8 @@ def test_truncate(self): self.assertRaises(ValueError, index.truncate, 3, 1) def test_get_indexer(self): - major_axis = Index(range(4)) - minor_axis = Index(range(2)) + major_axis = Index(lrange(4)) + minor_axis = Index(lrange(2)) major_labels = np.array([0, 0, 1, 2, 2, 3, 3]) minor_labels = np.array([0, 1, 0, 0, 1, 0, 1]) @@ -1353,8 +1354,6 @@ def test_get_indexer(self): r1 = idx1.get_indexer([1, 2, 3]) self.assert_((r1 == [-1, -1, -1]).all()) - # self.assertRaises(Exception, idx1.get_indexer, - # list(list(zip(*idx2._tuple_index))[0])) def test_format(self): self.index.format() @@ -1404,9 +1403,9 @@ def test_equals(self): self.assert_(self.index.equals(self.index._tuple_index)) # different number of levels - index = MultiIndex(levels=[Index(range(4)), - Index(range(4)), - Index(range(4))], + index = MultiIndex(levels=[Index(lrange(4)), + Index(lrange(4)), + Index(lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array([0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) @@ -1417,8 +1416,8 @@ def test_equals(self): self.assert_(not index.equal_levels(index2)) # levels are different - major_axis = Index(range(4)) - minor_axis = Index(range(2)) + major_axis = Index(lrange(4)) + minor_axis = Index(lrange(2)) major_labels = np.array([0, 0, 1, 2, 2, 3]) minor_labels = np.array([0, 1, 0, 0, 1, 0]) @@ -1503,7 +1502,7 @@ def test_diff(self): sortorder=0, names=self.index.names) - self.assert_(isinstance(result, MultiIndex)) + tm.assert_isinstance(result, MultiIndex) self.assert_(result.equals(expected)) self.assertEqual(result.names, self.index.names) @@ -1637,9 +1636,9 @@ def test_droplevel_with_names(self): dropped = index.droplevel(0) self.assertEqual(dropped.name, 'second') - index = MultiIndex(levels=[Index(range(4)), - Index(range(4)), - Index(range(4))], + index = MultiIndex(levels=[Index(lrange(4)), + Index(lrange(4)), + Index(lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array([0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])], @@ -1652,9 +1651,9 @@ def test_droplevel_with_names(self): self.assert_(dropped.equals(expected)) def test_droplevel_multiple(self): - index = MultiIndex(levels=[Index(range(4)), - Index(range(4)), - Index(range(4))], + index = MultiIndex(levels=[Index(lrange(4)), + Index(lrange(4)), + Index(lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array([0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])], @@ -1724,16 +1723,16 @@ def _check_all(other): # some corner cases idx = Index(['three', 'one', 'two']) result = idx.join(self.index, level='second') - self.assert_(isinstance(result, MultiIndex)) + tm.assert_isinstance(result, MultiIndex) self.assertRaises(Exception, self.index.join, self.index, level=1) def test_reindex(self): result, indexer = self.index.reindex(list(self.index[:4])) - self.assert_(isinstance(result, MultiIndex)) + tm.assert_isinstance(result, MultiIndex) result, indexer = self.index.reindex(list(self.index)) - self.assert_(isinstance(result, MultiIndex)) + tm.assert_isinstance(result, MultiIndex) self.assert_(indexer is None) def test_reindex_level(self): @@ -1774,24 +1773,24 @@ def test_tolist(self): self.assertEqual(result, exp) def test_repr_with_unicode_data(self): - d = {"a": [u"\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} + d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} index = pd.DataFrame(d).set_index(["a", "b"]).index self.assertFalse("\\u" in repr(index)) # we don't want unicode-escaped def test_unicode_string_with_unicode(self): - d = {"a": [u"\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} + d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} idx = pd.DataFrame(d).set_index(["a", "b"]).index - if py3compat.PY3: + if compat.PY3: str(idx) else: - unicode(idx) + compat.text_type(idx) def test_bytestring_with_unicode(self): - d = {"a": [u"\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} + d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} idx = pd.DataFrame(d).set_index(["a", "b"]).index - if py3compat.PY3: + if compat.PY3: bytes(idx) else: str(idx) diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index f0ace52f2c2b5..f6a6bd1587a04 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -2,8 +2,8 @@ import unittest import nose import itertools -from StringIO import StringIO +from pandas.compat import range, lrange, StringIO, lmap, map from numpy import random, nan from numpy.random import randn import numpy as np @@ -15,7 +15,7 @@ MultiIndex, DatetimeIndex, Timestamp) from pandas.util.testing import (assert_almost_equal, assert_series_equal, assert_frame_equal, assert_panel_equal) -from pandas.util import py3compat +from pandas import compat import pandas.util.testing as tm import pandas.lib as lib @@ -36,7 +36,7 @@ def _generate_indices(f, values=False): axes = f.axes if values: - axes = [ range(len(a)) for a in axes ] + axes = [ lrange(len(a)) for a in axes ] return itertools.product(*axes) @@ -94,9 +94,9 @@ def setUp(self): import warnings warnings.filterwarnings(action='ignore', category=FutureWarning) - self.series_ints = Series(np.random.rand(4), index=range(0,8,2)) - self.frame_ints = DataFrame(np.random.randn(4, 4), index=range(0, 8, 2), columns=range(0,12,3)) - self.panel_ints = Panel(np.random.rand(4,4,4), items=range(0,8,2),major_axis=range(0,12,3),minor_axis=range(0,16,4)) + self.series_ints = Series(np.random.rand(4), index=lrange(0,8,2)) + self.frame_ints = DataFrame(np.random.randn(4, 4), index=lrange(0, 8, 2), columns=lrange(0,12,3)) + self.panel_ints = Panel(np.random.rand(4,4,4), items=lrange(0,8,2),major_axis=lrange(0,12,3),minor_axis=lrange(0,16,4)) self.series_labels = Series(np.random.randn(4), index=list('abcd')) self.frame_labels = DataFrame(np.random.randn(4, 4), index=list('abcd'), columns=list('ABCD')) @@ -201,15 +201,15 @@ def _print(result, error = None): _print(result) - except (AssertionError): + except AssertionError: raise - except (TypeError): + except TypeError: raise AssertionError(_print('type error')) - except (Exception), detail: + except Exception as detail: # if we are in fails, the ok, otherwise raise it if fails is not None: - if fails == type(detail): + if isinstance(detail, fails): result = 'ok (%s)' % type(detail).__name__ _print(result) return @@ -342,7 +342,7 @@ def test_iloc_getitem_dups(self): def test_iloc_getitem_array(self): # array like - s = Series(index=range(1,4)) + s = Series(index=lrange(1,4)) self.check_result('array like', 'iloc', s.index, 'ix', { 0 : [2,4,6], 1 : [3,6,9], 2: [4,8,12] }, typs = ['ints']) def test_iloc_getitem_bool(self): @@ -547,7 +547,7 @@ def test_loc_setitem_frame(self): def test_iloc_getitem_frame(self): """ originally from test_frame.py""" - df = DataFrame(np.random.randn(10, 4), index=range(0, 20, 2), columns=range(0,8,2)) + df = DataFrame(np.random.randn(10, 4), index=lrange(0, 20, 2), columns=lrange(0,8,2)) result = df.iloc[2] exp = df.ix[4] @@ -586,7 +586,7 @@ def test_iloc_getitem_frame(self): assert_frame_equal(result, expected) # with index-like - s = Series(index=range(1,5)) + s = Series(index=lrange(1,5)) result = df.iloc[s.index] expected = df.ix[[2,4,6,8]] assert_frame_equal(result, expected) @@ -633,7 +633,7 @@ def test_iloc_setitem_series(self): assert_frame_equal(result, expected) def test_iloc_setitem_series(self): - s = Series(np.random.randn(10), index=range(0,20,2)) + s = Series(np.random.randn(10), index=lrange(0,20,2)) s.iloc[1] = 1 result = s.iloc[1] @@ -796,7 +796,7 @@ def test_dups_fancy_indexing(self): # GH 3561, dups not in selected order ind = ['A', 'A', 'B', 'C'] - df = DataFrame({'test':range(len(ind))}, index=ind) + df = DataFrame({'test':lrange(len(ind))}, index=ind) rows = ['C', 'B'] res = df.ix[rows] self.assert_(rows == list(res.index)) @@ -878,8 +878,8 @@ def test_multi_assign(self): # GH 3626, an assignement of a sub-df to a df df = DataFrame({'FC':['a','b','a','b','a','b'], 'PF':[0,0,0,0,1,1], - 'col1':range(6), - 'col2':range(6,12)}) + 'col1':lrange(6), + 'col2':lrange(6,12)}) df.ix[1,0]=np.nan df2 = df.copy() @@ -918,7 +918,7 @@ def test_ix_assign_column_mixed(self): assert_series_equal(df.B, orig + 1) # GH 3668, mixed frame with series value - df = DataFrame({'x':range(10), 'y':range(10,20),'z' : 'bar'}) + df = DataFrame({'x':lrange(10), 'y':lrange(10,20),'z' : 'bar'}) expected = df.copy() expected.ix[0, 'y'] = 1000 expected.ix[2, 'y'] = 1200 @@ -932,10 +932,10 @@ def test_ix_assign_column_mixed(self): def test_iloc_mask(self): # GH 3631, iloc with a mask (of a series) should raise - df = DataFrame(range(5), list('ABCDE'), columns=['a']) + df = DataFrame(lrange(5), list('ABCDE'), columns=['a']) mask = (df.a%2 == 0) self.assertRaises(ValueError, df.iloc.__getitem__, tuple([mask])) - mask.index = range(len(mask)) + mask.index = lrange(len(mask)) self.assertRaises(NotImplementedError, df.iloc.__getitem__, tuple([mask])) # ndarray ok @@ -945,7 +945,7 @@ def test_iloc_mask(self): # the possibilities locs = np.arange(4) nums = 2**locs - reps = map(bin, nums) + reps = lmap(bin, nums) df = DataFrame({'locs':locs, 'nums':nums}, reps) expected = { @@ -974,7 +974,7 @@ def test_iloc_mask(self): else: accessor = df ans = str(bin(accessor[mask]['nums'].sum())) - except Exception, e: + except Exception as e: ans = str(e) key = tuple([idx,method]) @@ -1042,7 +1042,7 @@ def test_iloc_non_unique_indexing(self): #GH 4017, non-unique indexing (on the axis) df = DataFrame({'A' : [0.1] * 3000, 'B' : [1] * 3000}) - idx = np.array(range(30)) * 99 + idx = np.array(lrange(30)) * 99 expected = df.iloc[idx] df3 = pd.concat([df, 2*df, 3*df]) @@ -1109,7 +1109,7 @@ def test_non_unique_loc_memory_error(self): columns = list('ABCDEFG') def gen_test(l,l2): - return pd.concat([ DataFrame(randn(l,len(columns)),index=range(l),columns=columns), + return pd.concat([ DataFrame(randn(l,len(columns)),index=lrange(l),columns=columns), DataFrame(np.ones((l2,len(columns))),index=[0]*l2,columns=columns) ]) diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py index 0f3b8c1634416..6f13678339425 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/test_internals.py @@ -11,6 +11,7 @@ from pandas.util.testing import ( assert_almost_equal, assert_frame_equal, randn) +from pandas.compat import zip, u def assert_block_equal(left, right): @@ -199,7 +200,7 @@ def test_unicode_repr(self): mat = np.empty((N, 2), dtype=object) mat[:, 0] = 'foo' mat[:, 1] = 'bar' - cols = ['b', u"\u05d0"] + cols = ['b', u("\u05d0")] str_repr = repr(make_block(mat.T, cols, TEST_COLS)) def test_get(self): @@ -385,7 +386,7 @@ def test_astype(self): self.assert_(tmgr.as_matrix().dtype == np.dtype(t)) def test_convert(self): - + def _compare(old_mgr, new_mgr): """ compare the blocks, numeric compare ==, object don't """ old_blocks = set(old_mgr.blocks) @@ -440,7 +441,7 @@ def _check(new_mgr,block_type, citems): _check(new_mgr,FloatBlock,['b','g']) _check(new_mgr,IntBlock,['a','f']) - mgr = create_blockmanager([b, get_int_ex(['f'],np.int32), get_bool_ex(['bool']), get_dt_ex(['dt']), + mgr = create_blockmanager([b, get_int_ex(['f'],np.int32), get_bool_ex(['bool']), get_dt_ex(['dt']), get_int_ex(['i'],np.int64), get_float_ex(['g'],np.float64), get_float_ex(['h'],np.float16)]) new_mgr = mgr.convert(convert_numeric = True) @@ -535,7 +536,7 @@ def test_get_numeric_data(self): def test_missing_unicode_key(self): df = DataFrame({"a": [1]}) try: - df.ix[:, u"\u05d0"] # should not raise UnicodeEncodeError + df.ix[:, u("\u05d0")] # should not raise UnicodeEncodeError except KeyError: pass # this is the expected exception diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index d852bad215f77..d54fc32b6efa6 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -1,5 +1,4 @@ # pylint: disable-msg=W0612,E1101,W0141 -from pandas.util.py3compat import StringIO import nose import unittest @@ -14,7 +13,8 @@ assert_frame_equal) import pandas.core.common as com import pandas.util.testing as tm -from pandas.util.compat import product as cart_product +from pandas.compat import (range, lrange, StringIO, lzip, u, cPickle, + product as cart_product, zip) import pandas as pd import pandas.index as _index @@ -43,7 +43,7 @@ def setUp(self): # create test series object arrays = [['bar', 'bar', 'baz', 'baz', 'qux', 'qux', 'foo', 'foo'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] - tuples = zip(*arrays) + tuples = lzip(*arrays) index = MultiIndex.from_tuples(tuples) s = Series(randn(8), index=index) s[3] = np.NaN @@ -72,26 +72,26 @@ def test_dataframe_constructor(self): multi = DataFrame(np.random.randn(4, 4), index=[np.array(['a', 'a', 'b', 'b']), np.array(['x', 'y', 'x', 'y'])]) - self.assert_(isinstance(multi.index, MultiIndex)) + tm.assert_isinstance(multi.index, MultiIndex) self.assert_(not isinstance(multi.columns, MultiIndex)) multi = DataFrame(np.random.randn(4, 4), columns=[['a', 'a', 'b', 'b'], ['x', 'y', 'x', 'y']]) - self.assert_(isinstance(multi.columns, MultiIndex)) + tm.assert_isinstance(multi.columns, MultiIndex) def test_series_constructor(self): multi = Series(1., index=[np.array(['a', 'a', 'b', 'b']), np.array(['x', 'y', 'x', 'y'])]) - self.assert_(isinstance(multi.index, MultiIndex)) + tm.assert_isinstance(multi.index, MultiIndex) multi = Series(1., index=[['a', 'a', 'b', 'b'], ['x', 'y', 'x', 'y']]) - self.assert_(isinstance(multi.index, MultiIndex)) + tm.assert_isinstance(multi.index, MultiIndex) - multi = Series(range(4), index=[['a', 'a', 'b', 'b'], + multi = Series(lrange(4), index=[['a', 'a', 'b', 'b'], ['x', 'y', 'x', 'y']]) - self.assert_(isinstance(multi.index, MultiIndex)) + tm.assert_isinstance(multi.index, MultiIndex) def test_reindex_level(self): # axis=0 @@ -136,7 +136,6 @@ def _check_op(opname): _check_op('div') def test_pickle(self): - import cPickle def _test_roundtrip(frame): pickled = cPickle.dumps(frame) @@ -349,8 +348,8 @@ def test_frame_setitem_multi_column(self): def test_getitem_tuple_plus_slice(self): # GH #671 - df = DataFrame({'a': range(10), - 'b': range(10), + df = DataFrame({'a': lrange(10), + 'b': lrange(10), 'c': np.random.randn(10), 'd': np.random.randn(10)}) @@ -429,7 +428,6 @@ def test_xs_level(self): def test_xs_level_multiple(self): from pandas import read_table - from StringIO import StringIO text = """ A B C D E one two three four a b 10.0032 5 -0.5109 -2.3358 -0.4645 0.05076 0.3640 @@ -443,7 +441,7 @@ def test_xs_level_multiple(self): assert_frame_equal(result, expected) # GH2107 - dates = range(20111201, 20111205) + dates = lrange(20111201, 20111205) ids = 'abcde' idx = MultiIndex.from_tuples([x for x in cart_product(dates, ids)]) idx.names = ['date', 'secid'] @@ -454,7 +452,6 @@ def test_xs_level_multiple(self): def test_xs_level0(self): from pandas import read_table - from StringIO import StringIO text = """ A B C D E one two three four a b 10.0032 5 -0.5109 -2.3358 -0.4645 0.05076 0.3640 @@ -577,7 +574,7 @@ def test_setitem_change_dtype(self): s = dft['foo', 'two'] dft['foo', 'two'] = s > s.median() assert_series_equal(dft['foo', 'two'], s > s.median()) - self.assert_(isinstance(dft._data.blocks[1].items, MultiIndex)) + tm.assert_isinstance(dft._data.blocks[1].items, MultiIndex) reindexed = dft.reindex(columns=[('foo', 'two')]) assert_series_equal(reindexed['foo', 'two'], s > s.median()) @@ -588,7 +585,7 @@ def test_frame_setitem_ix(self): # with integer labels df = self.frame.copy() - df.columns = range(3) + df.columns = lrange(3) df.ix[('bar', 'two'), 1] = 7 self.assertEquals(df.ix[('bar', 'two'), 1], 7) @@ -673,12 +670,12 @@ def test_reset_index_with_drop(self): self.assertEquals(len(deleveled.columns), len(self.ymd.columns)) deleveled = self.series.reset_index() - self.assert_(isinstance(deleveled, DataFrame)) + tm.assert_isinstance(deleveled, DataFrame) self.assert_( len(deleveled.columns) == len(self.series.index.levels) + 1) deleveled = self.series.reset_index(drop=True) - self.assert_(isinstance(deleveled, Series)) + tm.assert_isinstance(deleveled, Series) def test_sortlevel_by_name(self): self.frame.index.names = ['first', 'second'] @@ -950,8 +947,8 @@ def test_stack_multiple_bug(self): def test_stack_dropna(self): # GH #3997 - df = pd.DataFrame({'A': ['a1', 'a2'], - 'B': ['b1', 'b2'], + df = pd.DataFrame({'A': ['a1', 'a2'], + 'B': ['b1', 'b2'], 'C': [1, 1]}) df = df.set_index(['A', 'B']) @@ -1092,7 +1089,7 @@ def test_reorder_levels(self): def test_insert_index(self): df = self.ymd[:5].T df[2000, 1, 10] = df[2000, 1, 7] - self.assert_(isinstance(df.columns, MultiIndex)) + tm.assert_isinstance(df.columns, MultiIndex) self.assert_((df[2000, 1, 10] == df[2000, 1, 7]).all()) def test_alignment(self): @@ -1167,7 +1164,7 @@ def test_frame_getitem_not_sorted(self): def test_series_getitem_not_sorted(self): arrays = [['bar', 'bar', 'baz', 'baz', 'qux', 'qux', 'foo', 'foo'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] - tuples = zip(*arrays) + tuples = lzip(*arrays) index = MultiIndex.from_tuples(tuples) s = Series(randn(8), index=index) @@ -1211,7 +1208,7 @@ def test_count(self): def test_series_group_min_max(self): for op, level, skipna in cart_product(self.AGG_FUNCTIONS, - range(2), + lrange(2), [False, True]): grouped = self.series.groupby(level=level) aggf = lambda x: getattr(x, op)(skipna=skipna) @@ -1225,7 +1222,7 @@ def test_frame_group_ops(self): self.frame.ix[7, [0, 1]] = np.nan for op, level, axis, skipna in cart_product(self.AGG_FUNCTIONS, - range(2), range(2), + lrange(2), lrange(2), [False, True]): if axis == 0: frame = self.frame @@ -1496,8 +1493,7 @@ def test_mixed_depth_get(self): ['', 'OD', 'OD', 'result1', 'result2', 'result1'], ['', 'wx', 'wy', '', '', '']] - tuples = zip(*arrays) - tuples.sort() + tuples = sorted(zip(*arrays)) index = MultiIndex.from_tuples(tuples) df = DataFrame(randn(4, 6), columns=index) @@ -1516,8 +1512,7 @@ def test_mixed_depth_insert(self): ['', 'OD', 'OD', 'result1', 'result2', 'result1'], ['', 'wx', 'wy', '', '', '']] - tuples = zip(*arrays) - tuples.sort() + tuples = sorted(zip(*arrays)) index = MultiIndex.from_tuples(tuples) df = DataFrame(randn(4, 6), columns=index) @@ -1532,8 +1527,7 @@ def test_mixed_depth_drop(self): ['', 'OD', 'OD', 'result1', 'result2', 'result1'], ['', 'wx', 'wy', '', '', '']] - tuples = zip(*arrays) - tuples.sort() + tuples = sorted(zip(*arrays)) index = MultiIndex.from_tuples(tuples) df = DataFrame(randn(4, 6), columns=index) @@ -1584,8 +1578,7 @@ def test_mixed_depth_pop(self): ['', 'OD', 'OD', 'result1', 'result2', 'result1'], ['', 'wx', 'wy', '', '', '']] - tuples = zip(*arrays) - tuples.sort() + tuples = sorted(zip(*arrays)) index = MultiIndex.from_tuples(tuples) df = DataFrame(randn(4, 6), columns=index) @@ -1677,7 +1670,7 @@ def test_drop_preserve_names(self): self.assert_(result.index.names == ['one', 'two']) def test_unicode_repr_issues(self): - levels = [Index([u'a/\u03c3', u'b/\u03c3', u'c/\u03c3']), + levels = [Index([u('a/\u03c3'), u('b/\u03c3'), u('c/\u03c3')]), Index([0, 1])] labels = [np.arange(3).repeat(2), np.tile(np.arange(2), 3)] index = MultiIndex(levels=levels, labels=labels) @@ -1689,9 +1682,9 @@ def test_unicode_repr_issues(self): def test_unicode_repr_level_names(self): index = MultiIndex.from_tuples([(0, 0), (1, 1)], - names=[u'\u0394', 'i1']) + names=[u('\u0394'), 'i1']) - s = Series(range(2), index=index) + s = Series(lrange(2), index=index) df = DataFrame(np.random.randn(2, 4), index=index) repr(s) repr(df) @@ -1747,7 +1740,7 @@ def test_indexing_ambiguity_bug_1678(self): result = frame.ix[:, 1] exp = frame.icol(1) - self.assert_(isinstance(result, Series)) + tm.assert_isinstance(result, Series) assert_series_equal(result, exp) def test_nonunique_assignment_1750(self): diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 5d1053289b49e..d04192772ce7d 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1,6 +1,8 @@ # pylint: disable=W0612,E1101 from datetime import datetime +from pandas.compat import range, lrange, StringIO, cPickle, OrderedDict +from pandas import compat import operator import unittest import nose @@ -13,7 +15,7 @@ from pandas.core.panel import Panel from pandas.core.series import remove_na import pandas.core.common as com -from pandas.util import py3compat +from pandas import compat from pandas.util.testing import (assert_panel_equal, assert_frame_equal, @@ -38,7 +40,6 @@ class PanelTests(object): panel = None def test_pickle(self): - import cPickle pickled = cPickle.dumps(self.panel) unpickled = cPickle.loads(pickled) assert_frame_equal(unpickled['ItemA'], self.panel['ItemA']) @@ -266,15 +267,15 @@ def _test_op(panel, op): assert_frame_equal(result['ItemA'], op(panel['ItemA'], 1)) def test_keys(self): - tm.equalContents(self.panel.keys(), self.panel.items) + tm.equalContents(list(self.panel.keys()), self.panel.items) def test_iteritems(self): - # Test panel.iteritems(), aka panel.iterkv() + # Test panel.iteritems(), aka panel.iteritems() # just test that it works - for k, v in self.panel.iterkv(): + for k, v in compat.iteritems(self.panel): pass - self.assertEqual(len(list(self.panel.iterkv())), + self.assertEqual(len(list(compat.iteritems(self.panel))), len(self.panel.items)) def test_combineFrame(self): @@ -309,7 +310,7 @@ def check_op(op, name): check_op(operator.add, 'add') check_op(operator.sub, 'subtract') check_op(operator.mul, 'multiply') - if py3compat.PY3: + if compat.PY3: check_op(operator.truediv, 'divide') else: check_op(operator.div, 'divide') @@ -390,7 +391,7 @@ def test_delitem_and_pop(self): values[1] = 1 values[2] = 2 - panel = Panel(values, range(3), range(3), range(3)) + panel = Panel(values, lrange(3), lrange(3), lrange(3)) # did we delete the right row? @@ -729,7 +730,7 @@ def test_set_value(self): # resize res = self.panel.set_value('ItemE', 'foo', 'bar', 1.5) - self.assert_(isinstance(res, Panel)) + tm.assert_isinstance(res, Panel) self.assert_(res is not self.panel) self.assertEqual(res.get_value('ItemE', 'foo', 'bar'), 1.5) @@ -811,8 +812,8 @@ def test_constructor_empty_panel(self): def test_constructor_observe_dtype(self): # GH #411 - panel = Panel(items=range(3), major_axis=range(3), - minor_axis=range(3), dtype='O') + panel = Panel(items=lrange(3), major_axis=lrange(3), + minor_axis=lrange(3), dtype='O') self.assert_(panel.values.dtype == np.object_) def test_constructor_dtypes(self): @@ -824,19 +825,19 @@ def _check_dtype(panel, dtype): # only nan holding types allowed here for dtype in ['float64','float32','object']: - panel = Panel(items=range(2),major_axis=range(10),minor_axis=range(5),dtype=dtype) + panel = Panel(items=lrange(2),major_axis=lrange(10),minor_axis=lrange(5),dtype=dtype) _check_dtype(panel,dtype) for dtype in ['float64','float32','int64','int32','object']: - panel = Panel(np.array(np.random.randn(2,10,5),dtype=dtype),items=range(2),major_axis=range(10),minor_axis=range(5),dtype=dtype) + panel = Panel(np.array(np.random.randn(2,10,5),dtype=dtype),items=lrange(2),major_axis=lrange(10),minor_axis=lrange(5),dtype=dtype) _check_dtype(panel,dtype) for dtype in ['float64','float32','int64','int32','object']: - panel = Panel(np.array(np.random.randn(2,10,5),dtype='O'),items=range(2),major_axis=range(10),minor_axis=range(5),dtype=dtype) + panel = Panel(np.array(np.random.randn(2,10,5),dtype='O'),items=lrange(2),major_axis=lrange(10),minor_axis=lrange(5),dtype=dtype) _check_dtype(panel,dtype) for dtype in ['float64','float32','int64','int32','object']: - panel = Panel(np.random.randn(2,10,5),items=range(2),major_axis=range(10),minor_axis=range(5),dtype=dtype) + panel = Panel(np.random.randn(2,10,5),items=lrange(2),major_axis=lrange(10),minor_axis=lrange(5),dtype=dtype) _check_dtype(panel,dtype) def test_consolidate(self): @@ -880,19 +881,19 @@ def test_ctor_dict(self): # cast dcasted = dict((k, v.reindex(wp.major_axis).fillna(0)) - for k, v in d.iteritems()) + for k, v in compat.iteritems(d)) result = Panel(dcasted, dtype=int) expected = Panel(dict((k, v.astype(int)) - for k, v in dcasted.iteritems())) + for k, v in compat.iteritems(dcasted))) assert_panel_equal(result, expected) result = Panel(dcasted, dtype=np.int32) expected = Panel(dict((k, v.astype(np.int32)) - for k, v in dcasted.iteritems())) + for k, v in compat.iteritems(dcasted))) assert_panel_equal(result, expected) def test_constructor_dict_mixed(self): - data = dict((k, v.values) for k, v in self.panel.iterkv()) + data = dict((k, v.values) for k, v in compat.iteritems(self.panel)) result = Panel(data) exp_major = Index(np.arange(len(self.panel.major_axis))) self.assert_(result.major_axis.equals(exp_major)) @@ -914,7 +915,6 @@ def test_constructor_dict_mixed(self): self.assertRaises(Exception, Panel, data) def test_ctor_orderedDict(self): - from pandas.util.compat import OrderedDict keys = list(set(np.random.randint(0,5000,100)))[:50] # unique random int keys d = OrderedDict([(k,mkdf(10,5)) for k in keys]) p = Panel(d) @@ -961,15 +961,15 @@ def test_from_dict_mixed_orient(self): def test_constructor_error_msgs(self): def testit(): - Panel(np.random.randn(3,4,5), range(4), range(5), range(5)) + Panel(np.random.randn(3,4,5), lrange(4), lrange(5), lrange(5)) assertRaisesRegexp(ValueError, "Shape of passed values is \(3, 4, 5\), indices imply \(4, 5, 5\)", testit) def testit(): - Panel(np.random.randn(3,4,5), range(5), range(4), range(5)) + Panel(np.random.randn(3,4,5), lrange(5), lrange(4), lrange(5)) assertRaisesRegexp(ValueError, "Shape of passed values is \(3, 4, 5\), indices imply \(5, 4, 5\)", testit) def testit(): - Panel(np.random.randn(3,4,5), range(5), range(5), range(4)) + Panel(np.random.randn(3,4,5), lrange(5), lrange(5), lrange(4)) assertRaisesRegexp(ValueError, "Shape of passed values is \(3, 4, 5\), indices imply \(5, 5, 4\)", testit) def test_conform(self): @@ -1282,7 +1282,7 @@ def test_shift(self): # negative numbers, #2164 result = self.panel.shift(-1) expected = Panel(dict((i, f.shift(-1)[:-1]) - for i, f in self.panel.iterkv())) + for i, f in compat.iteritems(self.panel))) assert_panel_equal(result, expected) def test_multiindex_get(self): @@ -1381,7 +1381,7 @@ def test_to_excel(self): except ImportError: raise nose.SkipTest - for item, df in self.panel.iterkv(): + for item, df in compat.iteritems(self.panel): recdf = reader.parse(str(item), index_col=0) assert_frame_equal(df, recdf) @@ -1615,8 +1615,6 @@ def is_sorted(arr): self.assert_(is_sorted(sorted_major.index.labels[0])) def test_to_string(self): - from pandas.util.py3compat import StringIO - buf = StringIO() self.panel.to_string(buf) diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index 9c3a66c32c501..3c6ab18126e8f 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -1,4 +1,5 @@ from datetime import datetime +from pandas.compat import range, lrange import os import operator import unittest @@ -14,7 +15,7 @@ from pandas.core.series import remove_na import pandas.core.common as com import pandas.core.panel as panelmod -from pandas.util import py3compat +from pandas import compat from pandas.util.testing import (assert_panel_equal, assert_panel4d_equal, @@ -22,6 +23,7 @@ assert_series_equal, assert_almost_equal) import pandas.util.testing as tm +import pandas.compat as compat def add_nans(panel4d): @@ -215,15 +217,12 @@ def _test_op(panel4d, op): assert_panel_equal(result['l1'], op(panel4d['l1'], 1)) def test_keys(self): - tm.equalContents(self.panel4d.keys(), self.panel4d.labels) + tm.equalContents(list(self.panel4d.keys()), self.panel4d.labels) def test_iteritems(self): - """Test panel4d.iteritems(), aka panel4d.iterkv()""" - # just test that it works - for k, v in self.panel4d.iterkv(): - pass + """Test panel4d.iteritems()""" - self.assertEqual(len(list(self.panel4d.iterkv())), + self.assertEqual(len(list(compat.iteritems(self.panel4d))), len(self.panel4d.labels)) def test_combinePanel4d(self): @@ -308,7 +307,7 @@ def test_delitem_and_pop(self): values[2] = 2 values[3] = 3 - panel4d = Panel4D(values, range(4), range(4), range(4), range(4)) + panel4d = Panel4D(values, lrange(4), lrange(4), lrange(4), lrange(4)) # did we delete the right row? @@ -536,7 +535,7 @@ def test_set_value(self): # resize res = self.panel4d.set_value('l4', 'ItemE', 'foo', 'bar', 1.5) - self.assert_(isinstance(res, Panel4D)) + tm.assert_isinstance(res, Panel4D) self.assert_(res is not self.panel4d) self.assertEqual(res.get_value('l4', 'ItemE', 'foo', 'bar'), 1.5) @@ -610,8 +609,8 @@ def test_constructor_empty_panel(self): def test_constructor_observe_dtype(self): # GH #411 - panel = Panel(items=range(3), major_axis=range(3), - minor_axis=range(3), dtype='O') + panel = Panel(items=lrange(3), major_axis=lrange(3), + minor_axis=lrange(3), dtype='O') self.assert_(panel.values.dtype == np.object_) def test_consolidate(self): @@ -658,7 +657,7 @@ def test_ctor_dict(self): # assert_panel_equal(result, expected) def test_constructor_dict_mixed(self): - data = dict((k, v.values) for k, v in self.panel4d.iterkv()) + data = dict((k, v.values) for k, v in compat.iteritems(self.panel4d)) result = Panel4D(data) exp_major = Index(np.arange(len(self.panel4d.major_axis))) self.assert_(result.major_axis.equals(exp_major)) @@ -721,7 +720,7 @@ def test_from_dict_mixed_orient(self): def test_values(self): self.assertRaises(Exception, Panel, np.random.randn(5, 5, 5), - range(5), range(5), range(4)) + lrange(5), lrange(5), lrange(4)) def test_conform(self): p = self.panel4d['l1'].filter(items=['ItemA', 'ItemB']) diff --git a/pandas/tests/test_panelnd.py b/pandas/tests/test_panelnd.py index 5675cfec58678..e195839242f55 100644 --- a/pandas/tests/test_panelnd.py +++ b/pandas/tests/test_panelnd.py @@ -9,7 +9,7 @@ from pandas.core import panelnd from pandas.core.panel import Panel import pandas.core.common as com -from pandas.util import py3compat +from pandas import compat from pandas.util.testing import (assert_panel_equal, assert_panel4d_equal, diff --git a/pandas/tests/test_reshape.py b/pandas/tests/test_reshape.py index b24e097238a70..0c6c34ff4dc29 100644 --- a/pandas/tests/test_reshape.py +++ b/pandas/tests/test_reshape.py @@ -1,8 +1,6 @@ # pylint: disable-msg=W0612,E1101 from copy import deepcopy from datetime import datetime, timedelta -from StringIO import StringIO -import cPickle as pickle import operator import os import unittest @@ -17,6 +15,7 @@ from pandas.core.reshape import melt, convert_dummies, lreshape import pandas.util.testing as tm +from pandas.compat import StringIO, cPickle, range _multiprocess_can_split_ = True @@ -56,9 +55,9 @@ def test_value_vars(self): 'id2': self.df['id2'].tolist() * 2, 'variable': ['A']*10 + ['B']*10, 'value': self.df['A'].tolist() + self.df['B'].tolist()}, - columns=['id1', 'id2', 'variable', 'value']) + columns=['id1', 'id2', 'variable', 'value']) tm.assert_frame_equal(result4, expected4) - + def test_custom_var_name(self): result5 = melt(self.df, var_name=self.var_name) self.assertEqual(result5.columns.tolist(), ['var', 'value']) @@ -79,7 +78,7 @@ def test_custom_var_name(self): 'id2': self.df['id2'].tolist() * 2, self.var_name: ['A']*10 + ['B']*10, 'value': self.df['A'].tolist() + self.df['B'].tolist()}, - columns=['id1', 'id2', self.var_name, 'value']) + columns=['id1', 'id2', self.var_name, 'value']) tm.assert_frame_equal(result9, expected9) def test_custom_value_name(self): @@ -97,12 +96,12 @@ def test_custom_value_name(self): self.assertEqual(result13.columns.tolist(), ['id1', 'id2', 'variable', 'val']) result14 = melt(self.df, id_vars=['id1', 'id2'], - value_vars=['A', 'B'], value_name=self.value_name) + value_vars=['A', 'B'], value_name=self.value_name) expected14 = DataFrame({'id1': self.df['id1'].tolist() * 2, 'id2': self.df['id2'].tolist() * 2, 'variable': ['A']*10 + ['B']*10, self.value_name: self.df['A'].tolist() + self.df['B'].tolist()}, - columns=['id1', 'id2', 'variable', self.value_name]) + columns=['id1', 'id2', 'variable', self.value_name]) tm.assert_frame_equal(result14, expected14) def test_custom_var_and_value_name(self): @@ -122,12 +121,12 @@ def test_custom_var_and_value_name(self): self.assertEqual(result18.columns.tolist(), ['id1', 'id2', 'var', 'val']) result19 = melt(self.df, id_vars=['id1', 'id2'], - value_vars=['A', 'B'], var_name=self.var_name, value_name=self.value_name) + value_vars=['A', 'B'], var_name=self.var_name, value_name=self.value_name) expected19 = DataFrame({'id1': self.df['id1'].tolist() * 2, 'id2': self.df['id2'].tolist() * 2, var_name: ['A']*10 + ['B']*10, value_name: self.df['A'].tolist() + self.df['B'].tolist()}, - columns=['id1', 'id2', self.var_name, self.value_name]) + columns=['id1', 'id2', self.var_name, self.value_name]) tm.assert_frame_equal(result19, expected19) def test_custom_var_and_value_name(self): diff --git a/pandas/tests/test_rplot.py b/pandas/tests/test_rplot.py index 0f429bf715688..e7faa8f25deb3 100644 --- a/pandas/tests/test_rplot.py +++ b/pandas/tests/test_rplot.py @@ -1,5 +1,7 @@ +from pandas.compat import range import unittest import pandas.tools.rplot as rplot +import pandas.util.testing as tm from pandas import read_csv import os @@ -50,7 +52,7 @@ def test_make_aes1(self): self.assertTrue(aes['colour'] is None) self.assertTrue(aes['shape'] is None) self.assertTrue(aes['alpha'] is None) - self.assertTrue(type(aes) is dict) + self.assertTrue(isinstance(aes, dict)) def test_make_aes2(self): self.assertRaises(ValueError, rplot.make_aes, @@ -67,7 +69,7 @@ def test_dictionary_union(self): dict2 = {1 : 1, 2 : 2, 4 : 4} union = rplot.dictionary_union(dict1, dict2) self.assertEqual(len(union), 4) - keys = union.keys() + keys = list(union.keys()) self.assertTrue(1 in keys) self.assertTrue(2 in keys) self.assertTrue(3 in keys) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index cbf7fb070e97f..151a97a281ad3 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -23,8 +23,8 @@ import pandas.core.datetools as datetools import pandas.core.nanops as nanops -from pandas.util.py3compat import StringIO -from pandas.util import py3compat +from pandas.compat import StringIO, lrange, range, zip, u, OrderedDict +from pandas import compat from pandas.util.testing import (assert_series_equal, assert_almost_equal, ensure_clean) @@ -128,8 +128,8 @@ def test_getitem_setitem_ellipsis(self): self.assert_((result == 5).all()) def test_getitem_negative_out_of_bounds(self): - s = Series([tm.rands(5) for _ in xrange(10)], - index=[tm.rands(10) for _ in xrange(10)]) + s = Series([tm.rands(5) for _ in range(10)], + index=[tm.rands(10) for _ in range(10)]) self.assertRaises(IndexError, s.__getitem__, -11) self.assertRaises(IndexError, s.__setitem__, -11, 'foo') @@ -140,7 +140,7 @@ def test_multilevel_name_print(self): labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['first', 'second']) - s = Series(range(0, len(index)), index=index, name='sth') + s = Series(lrange(0, len(index)), index=index, name='sth') expected = ["first second", "foo one 0", " two 1", @@ -177,7 +177,7 @@ def test_name_printing(self): s.name = None self.assert_(not "Name:" in repr(s)) # test big series (diff code path) - s = Series(range(0, 1000)) + s = Series(lrange(0, 1000)) s.name = "test" self.assert_("Name: test" in repr(s)) s.name = None @@ -231,7 +231,7 @@ def test_comparisons(self): def test_none_comparison(self): # bug brought up by #1079 - s = Series(np.random.randn(10), index=range(0, 20, 2)) + s = Series(np.random.randn(10), index=lrange(0, 20, 2)) self.assertRaises(TypeError, s.__eq__, None) def test_sum_zero(self): @@ -281,11 +281,11 @@ def setUp(self): def test_constructor(self): # Recognize TimeSeries - self.assert_(isinstance(self.ts, TimeSeries)) + tm.assert_isinstance(self.ts, TimeSeries) # Pass in Series derived = Series(self.ts) - self.assert_(isinstance(derived, TimeSeries)) + tm.assert_isinstance(derived, TimeSeries) self.assert_(tm.equalContents(derived.index, self.ts.index)) # Ensure new index is not created @@ -293,7 +293,7 @@ def test_constructor(self): # Pass in scalar scalar = Series(0.5) - self.assert_(isinstance(scalar, float)) + tm.assert_isinstance(scalar, float) # Mixed type Series mixed = Series(['hello', np.NaN], index=[0, 1]) @@ -320,8 +320,8 @@ def test_constructor_empty(self): empty2 = Series([]) assert_series_equal(empty, empty2) - empty = Series(index=range(10)) - empty2 = Series(np.nan, index=range(10)) + empty = Series(index=lrange(10)) + empty2 = Series(np.nan, index=lrange(10)) assert_series_equal(empty, empty2) def test_constructor_series(self): @@ -336,12 +336,12 @@ def test_constructor_generator(self): gen = (i for i in range(10)) result = Series(gen) - exp = Series(range(10)) + exp = Series(lrange(10)) assert_series_equal(result, exp) gen = (i for i in range(10)) - result = Series(gen, index=range(10, 20)) - exp.index = range(10, 20) + result = Series(gen, index=lrange(10, 20)) + exp.index = lrange(10, 20) assert_series_equal(result, exp) def test_constructor_maskedarray(self): @@ -424,7 +424,7 @@ def test_constructor_corner(self): df = tm.makeTimeDataFrame() objs = [df, df] s = Series(objs, index=[0, 1]) - self.assert_(isinstance(s, Series)) + tm.assert_isinstance(s, Series) def test_constructor_sanitize(self): s = Series(np.array([1., 1., 8.]), dtype='i8') @@ -434,10 +434,10 @@ def test_constructor_sanitize(self): self.assertEquals(s.dtype, np.dtype('f8')) def test_constructor_pass_none(self): - s = Series(None, index=range(5)) + s = Series(None, index=lrange(5)) self.assert_(s.dtype == np.float64) - s = Series(None, index=range(5), dtype=object) + s = Series(None, index=lrange(5), dtype=object) self.assert_(s.dtype == np.object_) def test_constructor_cast(self): @@ -455,15 +455,15 @@ def test_constructor_dtype_nocast(self): def test_constructor_dtype_datetime64(self): import pandas.tslib as tslib - s = Series(tslib.iNaT, dtype='M8[ns]', index=range(5)) + s = Series(tslib.iNaT, dtype='M8[ns]', index=lrange(5)) self.assert_(isnull(s).all() == True) #### in theory this should be all nulls, but since #### we are not specifying a dtype is ambiguous - s = Series(tslib.iNaT, index=range(5)) + s = Series(tslib.iNaT, index=lrange(5)) self.assert_(isnull(s).all() == False) - s = Series(nan, dtype='M8[ns]', index=range(5)) + s = Series(nan, dtype='M8[ns]', index=lrange(5)) self.assert_(isnull(s).all() == True) s = Series([datetime(2001, 1, 2, 0, 0), tslib.iNaT], dtype='M8[ns]') @@ -510,28 +510,26 @@ def test_constructor_dict(self): assert_series_equal(result, expected) def test_constructor_subclass_dict(self): - data = tm.TestSubDict((x, 10.0 * x) for x in xrange(10)) + data = tm.TestSubDict((x, 10.0 * x) for x in range(10)) series = Series(data) - refseries = Series(dict(data.iteritems())) + refseries = Series(dict(compat.iteritems(data))) assert_series_equal(refseries, series) def test_orderedDict_ctor(self): # GH3283 - from pandas.util.compat import OrderedDict import pandas, random data = OrderedDict([('col%s' % i, random.random()) for i in range(12)]) s = pandas.Series(data) - self.assertTrue(all(s.values == data.values())) + self.assertTrue(all(s.values == list(data.values()))) def test_orderedDict_subclass_ctor(self): # GH3283 - from pandas.util.compat import OrderedDict import pandas, random class A(OrderedDict): pass data = A([('col%s' % i, random.random()) for i in range(12)]) s = pandas.Series(data) - self.assertTrue(all(s.values == data.values())) + self.assertTrue(all(s.values == list(data.values()))) def test_constructor_list_of_tuples(self): data = [(1, 1), (2, 2), (2, 3)] @@ -579,7 +577,7 @@ def test_setindex(self): # works series = self.series.copy() series.index = np.arange(len(series)) - self.assert_(isinstance(series.index, Index)) + tm.assert_isinstance(series.index, Index) def test_array_finalize(self): pass @@ -639,7 +637,7 @@ def test_getitem_get(self): self.assertRaises(KeyError, self.ts.__getitem__, d) def test_iget(self): - s = Series(np.random.randn(10), index=range(0, 20, 2)) + s = Series(np.random.randn(10), index=lrange(0, 20, 2)) for i in range(len(s)): result = s.iget(i) exp = s[s.index[i]] @@ -664,12 +662,12 @@ def test_iget_nonunique(self): self.assertEqual(s.iget(2), 2) def test_getitem_regression(self): - s = Series(range(5), index=range(5)) - result = s[range(5)] + s = Series(lrange(5), index=lrange(5)) + result = s[lrange(5)] assert_series_equal(result, s) def test_getitem_setitem_slice_bug(self): - s = Series(range(10), range(10)) + s = Series(lrange(10), lrange(10)) result = s[-12:] assert_series_equal(result, s) @@ -679,7 +677,7 @@ def test_getitem_setitem_slice_bug(self): result = s[:-12] assert_series_equal(result, s[:0]) - s = Series(range(10), range(10)) + s = Series(lrange(10), lrange(10)) s[-12:] = 0 self.assert_((s == 0).all()) @@ -776,15 +774,15 @@ def test_getitem_setitem_integers(self): def test_getitem_box_float64(self): value = self.ts[5] - self.assert_(isinstance(value, np.float64)) + tm.assert_isinstance(value, np.float64) def test_getitem_ambiguous_keyerror(self): - s = Series(range(10), index=range(0, 20, 2)) + s = Series(lrange(10), index=lrange(0, 20, 2)) self.assertRaises(KeyError, s.__getitem__, 1) self.assertRaises(KeyError, s.ix.__getitem__, 1) def test_getitem_unordered_dup(self): - obj = Series(range(5), index=['c', 'a', 'a', 'b', 'b']) + obj = Series(lrange(5), index=['c', 'a', 'a', 'b', 'b']) self.assert_(np.isscalar(obj['c'])) self.assert_(obj['c'] == 0) @@ -798,7 +796,7 @@ def test_getitem_dups_with_missing(self): assert_series_equal(result,expected) def test_setitem_ambiguous_keyerror(self): - s = Series(range(10), index=range(0, 20, 2)) + s = Series(lrange(10), index=lrange(0, 20, 2)) self.assertRaises(KeyError, s.__setitem__, 1, 5) self.assertRaises(KeyError, s.ix.__setitem__, 1, 5) @@ -971,7 +969,7 @@ def test_basic_getitem_with_labels(self): assert_series_equal(result, expected) # integer indexes, be careful - s = Series(np.random.randn(10), index=range(0, 20, 2)) + s = Series(np.random.randn(10), index=lrange(0, 20, 2)) inds = [0, 2, 5, 7, 8] arr_inds = np.array([0, 2, 5, 7, 8]) result = s[inds] @@ -998,7 +996,7 @@ def test_basic_setitem_with_labels(self): assert_series_equal(cp, exp) # integer indexes, be careful - s = Series(np.random.randn(10), index=range(0, 20, 2)) + s = Series(np.random.randn(10), index=lrange(0, 20, 2)) inds = [0, 4, 6] arr_inds = np.array([0, 4, 6]) @@ -1047,7 +1045,7 @@ def test_ix_getitem_not_monotonic(self): self.assertRaises(KeyError, ts2.ix.__setitem__, slice(d1, d2), 0) def test_ix_getitem_setitem_integer_slice_keyerrors(self): - s = Series(np.random.randn(10), index=range(0, 20, 2)) + s = Series(np.random.randn(10), index=lrange(0, 20, 2)) # this is OK cp = s.copy() @@ -1111,8 +1109,8 @@ def test_where(self): for dtype in [ np.int8, np.int16, np.int32, np.int64, np.float16, np.float32, np.float64 ]: s = Series(np.arange(10), dtype=dtype) mask = s < 5 - s[mask] = range(2,7) - expected = Series(range(2,7) + range(5,10), dtype=dtype) + s[mask] = lrange(2,7) + expected = Series(lrange(2,7) + lrange(5,10), dtype=dtype) assert_series_equal(s, expected) self.assertEquals(s.dtype, expected.dtype) @@ -1122,7 +1120,7 @@ def test_where(self): mask = s < 5 values = [2.5,3.5,4.5,5.5,6.5] s[mask] = values - expected = Series(values + range(5,10), dtype='float64') + expected = Series(values + lrange(5,10), dtype='float64') assert_series_equal(s, expected) self.assertEquals(s.dtype, expected.dtype) @@ -1136,8 +1134,8 @@ def test_where(self): # GH3235 s = Series(np.arange(10),dtype='int64') mask = s < 5 - s[mask] = range(2,7) - expected = Series(range(2,7) + range(5,10),dtype='int64') + s[mask] = lrange(2,7) + expected = Series(lrange(2,7) + lrange(5,10),dtype='int64') assert_series_equal(s, expected) self.assertEquals(s.dtype, expected.dtype) @@ -1286,13 +1284,13 @@ def test_repr(self): repr(ots) # various names - for name in ['', 1, 1.2, 'foo', u'\u03B1\u03B2\u03B3', + for name in ['', 1, 1.2, 'foo', u('\u03B1\u03B2\u03B3'), 'loooooooooooooooooooooooooooooooooooooooooooooooooooong', ('foo', 'bar', 'baz'), (1, 2), ('foo', 1, 2.3), - (u'\u03B1', u'\u03B2', u'\u03B3'), - (u'\u03B1', 'bar')]: + (u('\u03B1'), u('\u03B2'), u('\u03B3')), + (u('\u03B1'), 'bar')]: self.series.name = name repr(self.series) @@ -1316,7 +1314,7 @@ def test_repr(self): self.assertFalse("a\n" in repr(ser)) def test_tidy_repr(self): - a = Series([u"\u05d0"] * 1000) + a = Series([u("\u05d0")] * 1000) a.name = 'title1' repr(a) # should not raise exception @@ -1341,7 +1339,7 @@ def test_repr_name_iterable_indexable(self): # it works! repr(s) - s.name = (u"\u05d0",) * 2 + s.name = (u("\u05d0"),) * 2 repr(s) def test_repr_should_return_str(self): @@ -1354,20 +1352,20 @@ def test_repr_should_return_str(self): """ data = [8, 5, 3, 5] - index1 = [u"\u03c3", u"\u03c4", u"\u03c5", u"\u03c6"] + index1 = [u("\u03c3"), u("\u03c4"), u("\u03c5"), u("\u03c6")] df = Series(data, index=index1) self.assertTrue(type(df.__repr__() == str)) # both py2 / 3 def test_unicode_string_with_unicode(self): - df = Series([u"\u05d0"], name=u"\u05d1") - if py3compat.PY3: + df = Series([u("\u05d0")], name=u("\u05d1")) + if compat.PY3: str(df) else: - unicode(df) + compat.text_type(df) def test_bytestring_with_unicode(self): - df = Series([u"\u05d0"], name=u"\u05d1") - if py3compat.PY3: + df = Series([u("\u05d0")], name=u("\u05d1")) + if compat.PY3: bytes(df) else: str(df) @@ -1411,10 +1409,10 @@ def test_values(self): self.assert_(np.array_equal(self.ts, self.ts.values)) def test_iteritems(self): - for idx, val in self.series.iteritems(): + for idx, val in compat.iteritems(self.series): self.assertEqual(val, self.series[idx]) - for idx, val in self.ts.iteritems(): + for idx, val in compat.iteritems(self.ts): self.assertEqual(val, self.ts[idx]) def test_sum(self): @@ -1447,7 +1445,7 @@ def test_median(self): self._check_stat_op('median', np.median) # test with integers, test failure - int_ts = TimeSeries(np.ones(10, dtype=int), index=range(10)) + int_ts = TimeSeries(np.ones(10, dtype=int), index=lrange(10)) self.assertAlmostEqual(np.median(int_ts), int_ts.median()) def test_prod(self): @@ -1508,11 +1506,11 @@ def test_argsort(self): self.assert_(isnull(shifted[4]) == True) result = s.argsort() - expected = Series(range(5),dtype='int64') + expected = Series(lrange(5),dtype='int64') assert_series_equal(result,expected) result = shifted.argsort() - expected = Series(range(4) + [-1],dtype='int64') + expected = Series(lrange(4) + [-1],dtype='int64') assert_series_equal(result,expected) def test_argsort_stable(self): @@ -1591,7 +1589,7 @@ def testit(): # 2888 l = [0] - l.extend(list(range(2**40,2**40+1000))) + l.extend(lrange(2**40,2**40+1000)) s = Series(l, dtype='int64') assert_almost_equal(float(f(s)), float(alternate(s.values))) @@ -1634,7 +1632,7 @@ def test_round(self): self.assertEqual(result.name, self.ts.name) def test_prod_numpy16_bug(self): - s = Series([1., 1., 1.], index=range(3)) + s = Series([1., 1., 1.], index=lrange(3)) result = s.prod() self.assert_(not isinstance(result, Series)) @@ -1699,7 +1697,7 @@ def test_describe_none(self): def test_append(self): appendedSeries = self.series.append(self.objSeries) - for idx, value in appendedSeries.iteritems(): + for idx, value in compat.iteritems(appendedSeries): if idx in self.series.index: self.assertEqual(value, self.series[idx]) elif idx in self.objSeries.index: @@ -1788,7 +1786,7 @@ def test_div(self): p = DataFrame({ 'first' : [3,4,5,8], 'second' : [1,1,1,1] }) result = p['first'] / p['second'] - if py3compat.PY3: + if compat.PY3: assert_series_equal(result,p['first'].astype('float64')) else: assert_series_equal(result,p['first']) @@ -1903,7 +1901,7 @@ def test_operators_timedelta64(self): # scalar Timestamp on rhs maxa = df['A'].max() - self.assert_(isinstance(maxa,Timestamp)) + tm.assert_isinstance(maxa,Timestamp) resultb = df['A']- df['A'].max() self.assert_(resultb.dtype=='timedelta64[ns]') @@ -2034,7 +2032,7 @@ def test_timedelta64_functions(self): def test_sub_of_datetime_from_TimeSeries(self): from pandas.core import common as com from datetime import datetime - a = Timestamp(datetime(1993,01,07,13,30,00)) + a = Timestamp(datetime(1993,0o1,0o7,13,30,00)) b = datetime(1993, 6, 22, 13, 30) a = Series([a]) result = com._possibly_cast_to_timedelta(np.abs(a - b)) @@ -2343,7 +2341,7 @@ def test_series_frame_radd_bug(self): import operator # GH 353 - vals = Series([rands(5) for _ in xrange(10)]) + vals = Series([rands(5) for _ in range(10)]) result = 'foo_' + vals expected = vals.map(lambda x: 'foo_' + x) assert_series_equal(result, expected) @@ -2404,7 +2402,7 @@ def _check_fill(meth, op, a, b, fill_value=0): ops = [Series.add, Series.sub, Series.mul, Series.div] equivs = [operator.add, operator.sub, operator.mul] - if py3compat.PY3: + if compat.PY3: equivs.append(operator.truediv) else: equivs.append(operator.div) @@ -2620,9 +2618,8 @@ def test_value_counts_nunique(self): assert_series_equal(hist, expected) # GH 3002, datetime64[ns] - import StringIO import pandas as pd - f = StringIO.StringIO("xxyyzz20100101PIE\nxxyyzz20100101GUM\nxxyyww20090101EGG\nfoofoo20080909PIE") + f = StringIO("xxyyzz20100101PIE\nxxyyzz20100101GUM\nxxyyww20090101EGG\nfoofoo20080909PIE") df = pd.read_fwf(f, widths=[6,8,3], names=["person_id", "dt", "food"], parse_dates=["dt"]) s = df.dt.copy() result = s.value_counts() @@ -2671,7 +2668,7 @@ def test_unique(self): self.assert_(np.array_equal(result, expected)) # test string arrays for coverage - strings = np.tile(np.array([tm.rands(10) for _ in xrange(10)]), 10) + strings = np.tile(np.array([tm.rands(10) for _ in range(10)]), 10) result = np.sort(nanops.unique1d(strings)) expected = np.unique(strings) self.assert_(np.array_equal(result, expected)) @@ -2819,7 +2816,7 @@ def test_to_csv(self): def test_to_csv_unicode_index(self): buf = StringIO() - s = Series([u"\u05d0", "d2"], index=[u"\u05d0", u"\u05d1"]) + s = Series([u("\u05d0"), "d2"], index=[u("\u05d0"), u("\u05d1")]) s.to_csv(buf, encoding='UTF-8') buf.seek(0) @@ -2871,7 +2868,7 @@ def test_clip(self): result = self.ts.clip(-0.5, 0.5) expected = np.clip(self.ts, -0.5, 0.5) assert_series_equal(result, expected) - self.assert_(isinstance(expected, Series)) + tm.assert_isinstance(expected, Series) def test_clip_types_and_nulls(self): @@ -3343,7 +3340,7 @@ def test_astype_cast_object_int(self): def test_astype_datetimes(self): import pandas.tslib as tslib - s = Series(tslib.iNaT, dtype='M8[ns]', index=range(5)) + s = Series(tslib.iNaT, dtype='M8[ns]', index=lrange(5)) s = s.astype('O') self.assert_(s.dtype == np.object_) @@ -3365,13 +3362,13 @@ def test_map(self): merged = target.map(source) - for k, v in merged.iteritems(): + for k, v in compat.iteritems(merged): self.assertEqual(v, source[target[k]]) # input could be a dict merged = target.map(source.to_dict()) - for k, v in merged.iteritems(): + for k, v in compat.iteritems(merged): self.assertEqual(v, source[target[k]]) # function @@ -3391,7 +3388,7 @@ def test_map_int(self): self.assert_(not isnull(merged['c'])) def test_map_type_inference(self): - s = Series(range(3)) + s = Series(lrange(3)) s2 = s.map(lambda x: np.where(x == 0, 0, 1)) self.assert_(issubclass(s2.dtype.type, np.integer)) @@ -3400,7 +3397,7 @@ def test_map_decimal(self): result = self.series.map(lambda x: Decimal(str(x))) self.assert_(result.dtype == np.object_) - self.assert_(isinstance(result[0], Decimal)) + tm.assert_isinstance(result[0], Decimal) def test_map_na_exclusion(self): s = Series([1.5, np.nan, 3, np.nan, 5]) @@ -3651,13 +3648,13 @@ def test_reindex(self): subIndex = self.series.index[10:20] subSeries = self.series.reindex(subIndex) - for idx, val in subSeries.iteritems(): + for idx, val in compat.iteritems(subSeries): self.assertEqual(val, self.series[idx]) subIndex2 = self.ts.index[10:20] subTS = self.ts.reindex(subIndex2) - for idx, val in subTS.iteritems(): + for idx, val in compat.iteritems(subTS): self.assertEqual(val, self.ts[idx]) stuffSeries = self.ts.reindex(subIndex) @@ -3666,7 +3663,7 @@ def test_reindex(self): # This is extremely important for the Cython code to not screw up nonContigIndex = self.ts.index[::2] subNonContig = self.ts.reindex(nonContigIndex) - for idx, val in subNonContig.iteritems(): + for idx, val in compat.iteritems(subNonContig): self.assertEqual(val, self.ts[idx]) self.assertRaises(ValueError, self.ts.reindex) @@ -3938,7 +3935,7 @@ def test_fillna_inplace(self): def test_fillna_invalid_method(self): try: self.ts.fillna(method='ffil') - except ValueError, inst: + except ValueError as inst: self.assert_('ffil' in str(inst)) def test_ffill(self): @@ -4024,7 +4021,7 @@ def test_replace(self): # malformed self.assertRaises(ValueError, ser.replace, [1, 2, 3], [np.nan, 0]) - self.assertRaises(ValueError, ser.replace, xrange(1, 3), [np.nan, 0]) + self.assertRaises(ValueError, ser.replace, range(1, 3), [np.nan, 0]) ser = Series([0, 1, 2, 3, 4]) result = ser.replace([0, 1, 2, 3, 4], [4, 3, 2, 1, 0]) @@ -4297,12 +4294,12 @@ def test_reset_index(self): rs = s.reset_index(level=[0, 2], drop=True) self.assert_(rs.index.equals(Index(index.get_level_values(1)))) - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) def test_set_index_makes_timeseries(self): idx = tm.makeDateIndex(10) - s = Series(range(10)) + s = Series(lrange(10)) s.index = idx self.assertTrue(isinstance(s, TimeSeries)) @@ -4310,8 +4307,8 @@ def test_set_index_makes_timeseries(self): def test_timeseries_coercion(self): idx = tm.makeDateIndex(10000) ser = Series(np.random.randn(len(idx)), idx.astype(object)) - self.assert_(isinstance(ser, TimeSeries)) - self.assert_(isinstance(ser.index, DatetimeIndex)) + tm.assert_isinstance(ser, TimeSeries) + tm.assert_isinstance(ser.index, DatetimeIndex) def test_replace(self): N = 100 diff --git a/pandas/tests/test_stats.py b/pandas/tests/test_stats.py index 0432d11aaa254..e3533afc71e95 100644 --- a/pandas/tests/test_stats.py +++ b/pandas/tests/test_stats.py @@ -1,3 +1,4 @@ +from pandas import compat import nose import unittest @@ -6,7 +7,7 @@ from pandas import Series, DataFrame -from pandas.util.compat import product +from pandas.compat import product from pandas.util.testing import (assert_frame_equal, assert_series_equal, assert_almost_equal) @@ -106,7 +107,7 @@ def _check2d(df, expected, method='average', axis=0): def test_rank_int(self): s = self.s.dropna().astype('i8') - for method, res in self.results.iteritems(): + for method, res in compat.iteritems(self.results): result = s.rank(method=method) expected = Series(res).dropna() expected.index = result.index diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index d057dc5304277..4170f34c13095 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -13,6 +13,8 @@ from numpy.testing import assert_array_equal from numpy.random import randint +from pandas.compat import range, lrange, u +import pandas.compat as compat from pandas import (Index, Series, TimeSeries, DataFrame, isnull, notnull, bdate_range, date_range) import pandas.core.common as com @@ -34,15 +36,15 @@ def test_iter(self): for s in ds.str: # iter must yield a Series - self.assert_(isinstance(s, Series)) + tm.assert_isinstance(s, Series) # indices of each yielded Series should be equal to the index of # the original Series assert_array_equal(s.index, ds.index) for el in s: - # each element of the series is either a basestring or nan - self.assert_(isinstance(el, basestring) or isnull(el)) + # each element of the series is either a basestring/str or nan + self.assert_(isinstance(el, compat.string_types) or isnull(el)) # desired behavior is to iterate until everything would be nan on the # next iter so make sure the last element of the iterator was 'l' in @@ -73,7 +75,7 @@ def test_iter_single_element(self): def test_iter_numeric_try_string(self): # behavior identical to empty series - dsi = Series(range(4)) + dsi = Series(lrange(4)) i, s = 100, 'h' @@ -93,7 +95,7 @@ def test_iter_numeric_try_string(self): def test_iter_object_try_string(self): ds = Series([slice(None, randint(10), randint(10, 20)) - for _ in xrange(4)]) + for _ in range(4)]) i, s = 100, 'h' @@ -140,7 +142,7 @@ def test_count(self): tm.assert_almost_equal(result, exp) result = Series(values).str.count('f[o]+') - self.assert_(isinstance(result, Series)) + tm.assert_isinstance(result, Series) tm.assert_almost_equal(result, exp) # mixed @@ -150,18 +152,18 @@ def test_count(self): tm.assert_almost_equal(rs, xp) rs = Series(mixed).str.count('a') - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode - values = [u'foo', u'foofoo', NA, u'foooofooofommmfoo'] + values = [u('foo'), u('foofoo'), NA, u('foooofooofommmfoo')] result = strings.str_count(values, 'f[o]+') exp = [1, 2, NA, 4] tm.assert_almost_equal(result, exp) result = Series(values).str.count('f[o]+') - self.assert_(isinstance(result, Series)) + tm.assert_isinstance(result, Series) tm.assert_almost_equal(result, exp) def test_contains(self): @@ -185,11 +187,11 @@ def test_contains(self): tm.assert_almost_equal(rs, xp) rs = Series(mixed).str.contains('o') - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode - values = [u'foo', NA, u'fooommm__foo', u'mmm_'] + values = [u('foo'), NA, u('fooommm__foo'), u('mmm_')] pat = 'mmm[_]+' result = strings.str_contains(values, pat) @@ -225,12 +227,12 @@ def test_startswith(self): tm.assert_almost_equal(rs, xp) rs = Series(mixed).str.startswith('f') - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode - values = Series([u'om', NA, u'foo_nom', u'nom', u'bar_foo', NA, - u'foo']) + values = Series([u('om'), NA, u('foo_nom'), u('nom'), u('bar_foo'), NA, + u('foo')]) result = values.str.startswith('foo') exp = Series([False, NA, True, False, False, NA, True]) @@ -253,12 +255,12 @@ def test_endswith(self): tm.assert_almost_equal(rs, xp) rs = Series(mixed).str.endswith('f') - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode - values = Series([u'om', NA, u'foo_nom', u'nom', u'bar_foo', NA, - u'foo']) + values = Series([u('om'), NA, u('foo_nom'), u('nom'), u('bar_foo'), NA, + u('foo')]) result = values.str.endswith('foo') exp = Series([False, NA, False, False, True, NA, True]) @@ -282,10 +284,10 @@ def test_title(self): tm.assert_almost_equal(mixed, exp) # unicode - values = Series([u"FOO", NA, u"bar", u"Blurg"]) + values = Series([u("FOO"), NA, u("bar"), u("Blurg")]) results = values.str.title() - exp = Series([u"Foo", NA, u"Bar", u"Blurg"]) + exp = Series([u("Foo"), NA, u("Bar"), u("Blurg")]) tm.assert_series_equal(results, exp) @@ -305,14 +307,14 @@ def test_lower_upper(self): mixed = mixed.str.upper() rs = Series(mixed).str.lower() xp = ['a', NA, 'b', NA, NA, 'foo', NA, NA, NA] - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode - values = Series([u'om', NA, u'nom', u'nom']) + values = Series([u('om'), NA, u('nom'), u('nom')]) result = values.str.upper() - exp = Series([u'OM', NA, u'NOM', u'NOM']) + exp = Series([u('OM'), NA, u('NOM'), u('NOM')]) tm.assert_series_equal(result, exp) result = result.str.lower() @@ -335,18 +337,18 @@ def test_replace(self): rs = Series(mixed).str.replace('BAD[_]*', '') xp = ['a', NA, 'b', NA, NA, 'foo', NA, NA, NA] - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode - values = Series([u'fooBAD__barBAD', NA]) + values = Series([u('fooBAD__barBAD'), NA]) result = values.str.replace('BAD[_]*', '') - exp = Series([u'foobar', NA]) + exp = Series([u('foobar'), NA]) tm.assert_series_equal(result, exp) result = values.str.replace('BAD[_]*', '', n=1) - exp = Series([u'foobarBAD', NA]) + exp = Series([u('foobarBAD'), NA]) tm.assert_series_equal(result, exp) #flags + unicode @@ -373,18 +375,21 @@ def test_repeat(self): rs = Series(mixed).str.repeat(3) xp = ['aaa', NA, 'bbb', NA, NA, 'foofoofoo', NA, NA, NA] - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode - values = Series([u'a', u'b', NA, u'c', NA, u'd']) + values = Series([u('a'), u('b'), NA, u('c'), NA, + u('d')]) result = values.str.repeat(3) - exp = Series([u'aaa', u'bbb', NA, u'ccc', NA, u'ddd']) + exp = Series([u('aaa'), u('bbb'), NA, u('ccc'), NA, + u('ddd')]) tm.assert_series_equal(result, exp) result = values.str.repeat([1, 2, 3, 4, 5, 6]) - exp = Series([u'a', u'bb', NA, u'cccc', NA, u'dddddd']) + exp = Series([u('a'), u('bb'), NA, u('cccc'), NA, + u('dddddd')]) tm.assert_series_equal(result, exp) def test_match(self): @@ -400,14 +405,14 @@ def test_match(self): rs = Series(mixed).str.match('.*(BAD[_]+).*(BAD)') xp = [('BAD_', 'BAD'), NA, ('BAD_', 'BAD'), NA, NA, [], NA, NA, NA] - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode - values = Series([u'fooBAD__barBAD', NA, u'foo']) + values = Series([u('fooBAD__barBAD'), NA, u('foo')]) result = values.str.match('.*(BAD[_]+).*(BAD)') - exp = Series([(u'BAD__', u'BAD'), NA, []]) + exp = Series([(u('BAD__'), u('BAD')), NA, []]) tm.assert_series_equal(result, exp) def test_join(self): @@ -422,11 +427,12 @@ def test_join(self): rs = Series(mixed).str.split('_').str.join('_') xp = Series(['a_b', NA, 'asdf_cas_asdf', NA, NA, 'foo', NA, NA, NA]) - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode - values = Series([u'a_b_c', u'c_d_e', np.nan, u'f_g_h']) + values = Series([u('a_b_c'), u('c_d_e'), np.nan, + u('f_g_h')]) result = values.str.split('_').str.join('_') tm.assert_series_equal(values, result) @@ -444,11 +450,12 @@ def test_len(self): rs = Series(mixed).str.len() xp = Series([3, NA, 13, NA, NA, 3, NA, NA, NA]) - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode - values = Series([u'foo', u'fooo', u'fooooo', np.nan, u'fooooooo']) + values = Series([u('foo'), u('fooo'), u('fooooo'), np.nan, + u('fooooooo')]) result = values.str.len() exp = values.map(lambda x: len(x) if com.notnull(x) else NA) @@ -468,14 +475,15 @@ def test_findall(self): rs = Series(mixed).str.findall('BAD[_]*') xp = Series([['BAD__', 'BAD'], NA, [], NA, NA, ['BAD'], NA, NA, NA]) - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode - values = Series([u'fooBAD__barBAD', NA, u'foo', u'BAD']) + values = Series([u('fooBAD__barBAD'), NA, u('foo'), + u('BAD')]) result = values.str.findall('BAD[_]*') - exp = Series([[u'BAD__', u'BAD'], NA, [], [u'BAD']]) + exp = Series([[u('BAD__'), u('BAD')], NA, [], [u('BAD')]]) tm.assert_almost_equal(result, exp) def test_pad(self): @@ -500,7 +508,7 @@ def test_pad(self): rs = Series(mixed).str.pad(5, side='left') xp = Series([' a', NA, ' b', NA, NA, ' ee', NA, NA, NA]) - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) mixed = Series(['a', NA, 'b', True, datetime.today(), @@ -509,7 +517,7 @@ def test_pad(self): rs = Series(mixed).str.pad(5, side='right') xp = Series(['a ', NA, 'b ', NA, NA, 'ee ', NA, NA, NA]) - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) mixed = Series(['a', NA, 'b', True, datetime.today(), @@ -518,22 +526,26 @@ def test_pad(self): rs = Series(mixed).str.pad(5, side='both') xp = Series([' a ', NA, ' b ', NA, NA, ' ee ', NA, NA, NA]) - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode - values = Series([u'a', u'b', NA, u'c', NA, u'eeeeee']) + values = Series([u('a'), u('b'), NA, u('c'), NA, + u('eeeeee')]) result = values.str.pad(5, side='left') - exp = Series([u' a', u' b', NA, u' c', NA, u'eeeeee']) + exp = Series([u(' a'), u(' b'), NA, u(' c'), NA, + u('eeeeee')]) tm.assert_almost_equal(result, exp) result = values.str.pad(5, side='right') - exp = Series([u'a ', u'b ', NA, u'c ', NA, u'eeeeee']) + exp = Series([u('a '), u('b '), NA, u('c '), NA, + u('eeeeee')]) tm.assert_almost_equal(result, exp) result = values.str.pad(5, side='both') - exp = Series([u' a ', u' b ', NA, u' c ', NA, u'eeeeee']) + exp = Series([u(' a '), u(' b '), NA, u(' c '), NA, + u('eeeeee')]) tm.assert_almost_equal(result, exp) def test_center(self): @@ -551,14 +563,16 @@ def test_center(self): xp = Series([' a ', NA, ' b ', NA, NA, ' c ', ' eee ', NA, NA, NA]) - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode - values = Series([u'a', u'b', NA, u'c', NA, u'eeeeee']) + values = Series([u('a'), u('b'), NA, u('c'), NA, + u('eeeeee')]) result = values.str.center(5) - exp = Series([u' a ', u' b ', NA, u' c ', NA, u'eeeeee']) + exp = Series([u(' a '), u(' b '), NA, u(' c '), NA, + u('eeeeee')]) tm.assert_almost_equal(result, exp) def test_split(self): @@ -581,15 +595,16 @@ def test_split(self): xp = Series([['a', 'b', 'c'], NA, ['d', 'e', 'f'], NA, NA, NA, NA, NA]) - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode - values = Series([u'a_b_c', u'c_d_e', NA, u'f_g_h']) + values = Series([u('a_b_c'), u('c_d_e'), NA, u('f_g_h')]) result = values.str.split('_') - exp = Series([[u'a', u'b', u'c'], [u'c', u'd', u'e'], NA, - [u'f', u'g', u'h']]) + exp = Series([[u('a'), u('b'), u('c')], + [u('c'), u('d'), u('e')], NA, + [u('f'), u('g'), u('h')]]) tm.assert_series_equal(result, exp) def test_split_noargs(self): @@ -646,14 +661,15 @@ def test_slice(self): xp = Series(['foo', NA, 'bar', NA, NA, NA, NA, NA]) - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode - values = Series([u'aafootwo', u'aabartwo', NA, u'aabazqux']) + values = Series([u('aafootwo'), u('aabartwo'), NA, + u('aabazqux')]) result = values.str.slice(2, 5) - exp = Series([u'foo', u'bar', NA, u'baz']) + exp = Series([u('foo'), u('bar'), NA, u('baz')]) tm.assert_series_equal(result, exp) def test_slice_replace(self): @@ -683,37 +699,38 @@ def test_strip_lstrip_rstrip_mixed(self): xp = Series(['aa', NA, 'bb', NA, NA, NA, NA, NA]) - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) rs = Series(mixed).str.lstrip() xp = Series(['aa ', NA, 'bb \t\n', NA, NA, NA, NA, NA]) - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) rs = Series(mixed).str.rstrip() xp = Series([' aa', NA, ' bb', NA, NA, NA, NA, NA]) - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) def test_strip_lstrip_rstrip_unicode(self): # unicode - values = Series([u' aa ', u' bb \n', NA, u'cc ']) + values = Series([u(' aa '), u(' bb \n'), NA, + u('cc ')]) result = values.str.strip() - exp = Series([u'aa', u'bb', NA, u'cc']) + exp = Series([u('aa'), u('bb'), NA, u('cc')]) tm.assert_series_equal(result, exp) result = values.str.lstrip() - exp = Series([u'aa ', u'bb \n', NA, u'cc ']) + exp = Series([u('aa '), u('bb \n'), NA, u('cc ')]) tm.assert_series_equal(result, exp) result = values.str.rstrip() - exp = Series([u' aa', u' bb', NA, u'cc']) + exp = Series([u(' aa'), u(' bb'), NA, u('cc')]) tm.assert_series_equal(result, exp) def test_strip_lstrip_rstrip_args(self): @@ -732,17 +749,18 @@ def test_strip_lstrip_rstrip_args(self): assert_series_equal(rs, xp) def test_strip_lstrip_rstrip_args_unicode(self): - values = Series([u'xxABCxx', u'xx BNSD', u'LDFJH xx']) + values = Series([u('xxABCxx'), u('xx BNSD'), + u('LDFJH xx')]) - rs = values.str.strip(u'x') + rs = values.str.strip(u('x')) xp = Series(['ABC', ' BNSD', 'LDFJH ']) assert_series_equal(rs, xp) - rs = values.str.lstrip(u'x') + rs = values.str.lstrip(u('x')) xp = Series(['ABCxx', ' BNSD', 'LDFJH xx']) assert_series_equal(rs, xp) - rs = values.str.rstrip(u'x') + rs = values.str.rstrip(u('x')) xp = Series(['xxABC', 'xx BNSD', 'LDFJH ']) assert_series_equal(rs, xp) @@ -764,14 +782,15 @@ def test_get(self): xp = Series(['b', NA, 'd', NA, NA, NA, NA, NA]) - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode - values = Series([u'a_b_c', u'c_d_e', np.nan, u'f_g_h']) + values = Series([u('a_b_c'), u('c_d_e'), np.nan, + u('f_g_h')]) result = values.str.split('_').str.get(1) - expected = Series([u'b', u'd', np.nan, u'g']) + expected = Series([u('b'), u('d'), np.nan, u('g')]) tm.assert_series_equal(result, expected) def test_more_contains(self): @@ -872,7 +891,7 @@ def test_match_findall_flags(self): self.assertEquals(result[0], True) def test_encode_decode(self): - base = Series([u'a', u'b', u'a\xe4']) + base = Series([u('a'), u('b'), u('a\xe4')]) series = base.str.encode('utf-8') f = lambda x: x.decode('utf-8') @@ -882,7 +901,7 @@ def test_encode_decode(self): tm.assert_series_equal(result, exp) def test_encode_decode_errors(self): - encodeBase = Series([u'a', u'b', u'a\x9d']) + encodeBase = Series([u('a'), u('b'), u('a\x9d')]) self.assertRaises(UnicodeEncodeError, encodeBase.str.encode, 'cp1252') diff --git a/pandas/tests/test_tests.py b/pandas/tests/test_tests.py index 89238187ce434..b52ab61f7be6b 100644 --- a/pandas/tests/test_tests.py +++ b/pandas/tests/test_tests.py @@ -1,6 +1,5 @@ #!/usr/bin/python # -*- coding: utf-8 -*- -from __future__ import with_statement # support python 2.5 import pandas as pd import unittest import warnings diff --git a/pandas/tests/test_tseries.py b/pandas/tests/test_tseries.py index 54c00e798f08a..1ed6dd4469f4d 100644 --- a/pandas/tests/test_tseries.py +++ b/pandas/tests/test_tseries.py @@ -5,6 +5,7 @@ from pandas import Index, isnull, Timestamp from pandas.util.testing import assert_almost_equal import pandas.util.testing as common +from pandas.compat import range, lrange, zip import pandas.lib as lib import pandas.algos as algos from datetime import datetime @@ -30,7 +31,7 @@ def test_groupby_withnull(self): def test_backfill(self): old = Index([1, 5, 10]) - new = Index(range(12)) + new = Index(lrange(12)) filler = algos.backfill_int64(old, new) @@ -39,7 +40,7 @@ def test_backfill(self): # corner case old = Index([1, 4]) - new = Index(range(5, 10)) + new = Index(lrange(5, 10)) filler = algos.backfill_int64(old, new) expect_filler = [-1, -1, -1, -1, -1] @@ -47,7 +48,7 @@ def test_backfill(self): def test_pad(self): old = Index([1, 5, 10]) - new = Index(range(12)) + new = Index(lrange(12)) filler = algos.pad_int64(old, new) @@ -56,7 +57,7 @@ def test_pad(self): # corner case old = Index([5, 10]) - new = Index(range(5)) + new = Index(lrange(5)) filler = algos.pad_int64(old, new) expect_filler = [-1, -1, -1, -1, -1] self.assert_(np.array_equal(filler, expect_filler)) @@ -526,7 +527,7 @@ def _check(dtype): bins = np.array([6, 12], dtype=np.int64) out = np.zeros((3, 4), dtype) counts = np.zeros(len(out), dtype=np.int64) - + func = getattr(algos,'group_ohlc_%s' % dtype) func(out, counts, obj[:, None], bins) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index f96f3b98a0383..7133782fa66d3 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -1,10 +1,11 @@ """ SQL-style merge routines """ +import types -import itertools import numpy as np -import types +from pandas.compat import range, long, lrange, lzip, zip +import pandas.compat as compat from pandas.core.categorical import Categorical from pandas.core.frame import DataFrame, _merge_doc from pandas.core.generic import NDFrame @@ -441,7 +442,7 @@ def _get_join_indexers(left_keys, right_keys, sort=False, how='inner'): right_labels.append(rlab) group_sizes.append(count) - max_groups = 1L + max_groups = long(1) for x in group_sizes: max_groups *= long(x) @@ -892,7 +893,7 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None, raise AssertionError('first argument must be a list-like of pandas ' 'objects, you passed an object of type ' '"{0}"'.format(type(objs).__name__)) - + if join == 'outer': self.intersect = False elif join == 'inner': @@ -959,7 +960,7 @@ def get_result(self): name = com._consensus_name_attr(self.objs) return Series(new_data, index=self.new_axes[0], name=name) elif self._is_series: - data = dict(itertools.izip(xrange(len(self.objs)), self.objs)) + data = dict(zip(range(len(self.objs)), self.objs)) index, columns = self.new_axes tmpdf = DataFrame(data, index=index) if columns is not None: @@ -1057,7 +1058,7 @@ def _concat_blocks(self, blocks): concat_items = indexer else: concat_items = self.new_axes[0].take(indexer) - + if self.ignore_index: ref_items = self._get_fresh_axis() return make_block(concat_values, concat_items, ref_items) @@ -1134,7 +1135,7 @@ def _get_new_axes(self): raise AssertionError() # ufff... - indices = range(ndim) + indices = lrange(ndim) indices.remove(self.axis) for i, ax in zip(indices, self.join_axes): @@ -1199,7 +1200,7 @@ def _concat_indexes(indexes): def _make_concat_multiindex(indexes, keys, levels=None, names=None): if ((levels is None and isinstance(keys[0], tuple)) or (levels is not None and len(levels) > 1)): - zipped = zip(*keys) + zipped = lzip(*keys) if names is None: names = [None] * len(zipped) @@ -1297,7 +1298,7 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None): def _should_fill(lname, rname): - if not isinstance(lname, basestring) or not isinstance(rname, basestring): + if not isinstance(lname, compat.string_types) or not isinstance(rname, compat.string_types): return True return lname == rname diff --git a/pandas/tools/pivot.py b/pandas/tools/pivot.py index 945f7fb4ab437..effcc3ff7695f 100644 --- a/pandas/tools/pivot.py +++ b/pandas/tools/pivot.py @@ -5,6 +5,8 @@ from pandas.core.reshape import _unstack_multiple from pandas.tools.merge import concat from pandas.tools.util import cartesian_product +from pandas.compat import range, lrange, zip +from pandas import compat import pandas.core.common as com import numpy as np @@ -149,9 +151,9 @@ def pivot_table(data, values=None, rows=None, cols=None, aggfunc='mean', def _add_margins(table, data, values, rows=None, cols=None, aggfunc=np.mean): grand_margin = {} - for k, v in data[values].iteritems(): + for k, v in compat.iteritems(data[values]): try: - if isinstance(aggfunc, basestring): + if isinstance(aggfunc, compat.string_types): grand_margin[k] = getattr(v, aggfunc)() else: grand_margin[k] = aggfunc(v) @@ -196,7 +198,7 @@ def _all_key(key): row_margin = row_margin.stack() # slight hack - new_order = [len(cols)] + range(len(cols)) + new_order = [len(cols)] + lrange(len(cols)) row_margin.index = row_margin.index.reorder_levels(new_order) else: row_margin = Series(np.nan, index=result.columns) diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index 1ffdf83b02763..3e3fff32a654a 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -15,6 +15,8 @@ from pandas.tseries.period import PeriodIndex, Period from pandas.tseries.frequencies import get_period_alias, get_base_alias from pandas.tseries.offsets import DateOffset +from pandas.compat import range, lrange, lmap, map, zip +import pandas.compat as compat try: # mpl optional import pandas.tseries.converter as conv @@ -96,13 +98,13 @@ def _get_standard_colors(num_colors=None, colormap=None, color_type='default', import matplotlib.pyplot as plt if color is None and colormap is not None: - if isinstance(colormap, basestring): + if isinstance(colormap, compat.string_types): import matplotlib.cm as cm cmap = colormap colormap = cm.get_cmap(colormap) if colormap is None: raise ValueError("Colormap {0} is not recognized".format(cmap)) - colors = map(colormap, np.linspace(0, 1, num=num_colors)) + colors = lmap(colormap, np.linspace(0, 1, num=num_colors)) elif color is not None: if colormap is not None: warnings.warn("'color' and 'colormap' cannot be used " @@ -111,7 +113,7 @@ def _get_standard_colors(num_colors=None, colormap=None, color_type='default', else: if color_type == 'default': colors = plt.rcParams.get('axes.color_cycle', list('bgrcmyk')) - if isinstance(colors, basestring): + if isinstance(colors, compat.string_types): colors = list(colors) elif color_type == 'random': import random @@ -119,7 +121,7 @@ def random_color(column): random.seed(column) return [random.random() for _ in range(3)] - colors = map(random_color, range(num_colors)) + colors = lmap(random_color, lrange(num_colors)) else: raise NotImplementedError @@ -240,8 +242,8 @@ def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, marker = _get_marker_compat(marker) - for i, a in zip(range(n), df.columns): - for j, b in zip(range(n), df.columns): + for i, a in zip(lrange(n), df.columns): + for j, b in zip(lrange(n), df.columns): ax = axes[i, j] if i == j: @@ -500,7 +502,7 @@ def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds): for sampling in samplings]) if fig is None: fig = plt.figure() - x = range(samples) + x = lrange(samples) axes = [] ax1 = fig.add_subplot(2, 3, 1) ax1.set_xlabel("Sample") @@ -598,7 +600,7 @@ def parallel_coordinates(data, class_column, cols=None, ax=None, colors=None, raise ValueError('Length of xticks must match number of columns') x = xticks else: - x = range(ncols) + x = lrange(ncols) if ax is None: ax = plt.gca() @@ -681,7 +683,7 @@ def autocorrelation_plot(series, ax=None): def r(h): return ((data[:n - h] - mean) * (data[h:] - mean)).sum() / float(n) / c0 x = np.arange(n) + 1 - y = map(r, x) + y = lmap(r, x) z95 = 1.959963984540054 z99 = 2.5758293035489004 ax.axhline(y=z99 / np.sqrt(n), linestyle='--', color='grey') @@ -1035,9 +1037,9 @@ def _get_xticks(self, convert_period=False): x = self.data.index._mpl_repr() else: self._need_to_set_index = True - x = range(len(index)) + x = lrange(len(index)) else: - x = range(len(index)) + x = lrange(len(index)) return x @@ -1711,7 +1713,7 @@ def plot_series(series, label=None, kind='line', use_index=True, rot=None, if ax.get_yaxis().get_ticks_position().strip().lower() == 'right': fig = _gcf() axes = fig.get_axes() - for i in range(len(axes))[::-1]: + for i in reversed(range(len(axes))): ax = axes[i] ypos = ax.get_yaxis().get_ticks_position().strip().lower() if ypos == 'left': diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 43cbb9344b714..5928472df1c22 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -1,7 +1,8 @@ -import numpy as np import random from copy import deepcopy +import numpy as np +from pandas.compat import range, zip # # TODO: # * Make sure legends work properly @@ -600,7 +601,7 @@ def trellis(self, layers): grouped = data.groupby(self.by[0]) else: grouped = data.groupby(self.by) - groups = grouped.groups.keys() + groups = list(grouped.groups.keys()) if self.by[0] == '.' or self.by[1] == '.': shingle1 = set([g for g in groups]) else: @@ -644,8 +645,8 @@ def dictionary_union(dict1, dict2): A union of the dictionaries. It assumes that values with the same keys are identical. """ - keys1 = dict1.keys() - keys2 = dict2.keys() + keys1 = list(dict1.keys()) + keys2 = list(dict2.keys()) result = {} for key1 in keys1: result[key1] = dict1[key1] @@ -771,13 +772,13 @@ def adjust_subplots(fig, axes, trellis, layers): legend = dictionary_union(legend, layer.legend) patches = [] labels = [] - if len(legend.keys()) == 0: + if len(list(legend.keys())) == 0: key_function = lambda tup: tup - elif len(legend.keys()[0]) == 2: + elif len(list(legend.keys())[0]) == 2: key_function = lambda tup: (tup[1]) else: key_function = lambda tup: (tup[1], tup[3]) - for key in sorted(legend.keys(), key=key_function): + for key in sorted(list(legend.keys()), key=key_function): value = legend[key] patches.append(value) if len(key) == 2: @@ -844,13 +845,13 @@ def render(self, fig=None): legend = dictionary_union(legend, layer.legend) patches = [] labels = [] - if len(legend.keys()) == 0: + if len(list(legend.keys())) == 0: key_function = lambda tup: tup - elif len(legend.keys()[0]) == 2: + elif len(list(legend.keys())[0]) == 2: key_function = lambda tup: (tup[1]) else: key_function = lambda tup: (tup[1], tup[3]) - for key in sorted(legend.keys(), key=key_function): + for key in sorted(list(legend.keys()), key=key_function): value = legend[key] patches.append(value) if len(key) == 2: diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py index b0261077fc767..1008e23c3ebcd 100644 --- a/pandas/tools/tests/test_merge.py +++ b/pandas/tools/tests/test_merge.py @@ -9,12 +9,14 @@ import numpy as np import random -from pandas import * +from pandas.compat import range, lrange, lzip, zip +from pandas import compat from pandas.tseries.index import DatetimeIndex from pandas.tools.merge import merge, concat, ordered_merge, MergeError from pandas.util.testing import (assert_frame_equal, assert_series_equal, assert_almost_equal, rands, makeCustomDataframe as mkdf) +from pandas import isnull, DataFrame, Index, MultiIndex, Panel, Series, date_range import pandas.algos as algos import pandas.util.testing as tm @@ -26,7 +28,7 @@ def get_test_data(ngroups=NGROUPS, n=N): - unique_groups = range(ngroups) + unique_groups = lrange(ngroups) arr = np.asarray(np.tile(unique_groups, n // ngroups)) if len(arr) < n: @@ -555,8 +557,8 @@ def test_merge_different_column_key_names(self): assert_almost_equal(merged['value_y'], [6, np.nan, 5, 8, 5, 8, 7]) def test_merge_nocopy(self): - left = DataFrame({'a': 0, 'b': 1}, index=range(10)) - right = DataFrame({'c': 'foo', 'd': 'bar'}, index=range(10)) + left = DataFrame({'a': 0, 'b': 1}, index=lrange(10)) + right = DataFrame({'c': 'foo', 'd': 'bar'}, index=lrange(10)) merged = merge(left, right, left_index=True, right_index=True, copy=False) @@ -582,15 +584,15 @@ def test_join_sort(self): # smoke test joined = left.join(right, on='key', sort=False) - self.assert_(np.array_equal(joined.index, range(4))) + self.assert_(np.array_equal(joined.index, lrange(4))) def test_intelligently_handle_join_key(self): # #733, be a bit more 1337 about not returning unconsolidated DataFrame left = DataFrame({'key': [1, 1, 2, 2, 3], - 'value': range(5)}, columns=['value', 'key']) + 'value': lrange(5)}, columns=['value', 'key']) right = DataFrame({'key': [1, 1, 2, 3, 4, 5], - 'rvalue': range(6)}) + 'rvalue': lrange(6)}) joined = merge(left, right, on='key', how='outer') expected = DataFrame({'key': [1, 1, 1, 1, 2, 2, 3, 4, 5.], @@ -604,8 +606,8 @@ def test_intelligently_handle_join_key(self): def test_handle_join_key_pass_array(self): left = DataFrame({'key': [1, 1, 2, 2, 3], - 'value': range(5)}, columns=['value', 'key']) - right = DataFrame({'rvalue': range(6)}) + 'value': lrange(5)}, columns=['value', 'key']) + right = DataFrame({'rvalue': lrange(6)}) key = np.array([1, 1, 2, 3, 4, 5]) merged = merge(left, right, left_on='key', right_on=key, how='outer') @@ -615,8 +617,8 @@ def test_handle_join_key_pass_array(self): self.assert_(merged['key'].notnull().all()) self.assert_(merged2['key'].notnull().all()) - left = DataFrame({'value': range(5)}, columns=['value']) - right = DataFrame({'rvalue': range(6)}) + left = DataFrame({'value': lrange(5)}, columns=['value']) + right = DataFrame({'rvalue': lrange(6)}) lkey = np.array([1, 1, 2, 2, 3]) rkey = np.array([1, 1, 2, 3, 4, 5]) @@ -624,8 +626,8 @@ def test_handle_join_key_pass_array(self): self.assert_(np.array_equal(merged['key_0'], np.array([1, 1, 1, 1, 2, 2, 3, 4, 5]))) - left = DataFrame({'value': range(3)}) - right = DataFrame({'rvalue': range(6)}) + left = DataFrame({'value': lrange(3)}) + right = DataFrame({'rvalue': lrange(6)}) key = np.array([0, 1, 1, 2, 2, 3]) merged = merge(left, right, left_index=True, right_on=key, how='outer') @@ -787,7 +789,7 @@ def setUp(self): def test_merge_on_multikey(self): joined = self.data.join(self.to_join, on=['key1', 'key2']) - join_key = Index(zip(self.data['key1'], self.data['key2'])) + join_key = Index(lzip(self.data['key1'], self.data['key2'])) indexer = self.to_join.index.get_indexer(join_key) ex_values = self.to_join.values.take(indexer, axis=0) ex_values[indexer == -1] = np.nan @@ -809,7 +811,7 @@ def test_merge_right_vs_left(self): def test_compress_group_combinations(self): # ~ 40000000 possible unique groups - key1 = np.array([rands(10) for _ in xrange(10000)], dtype='O') + key1 = np.array([rands(10) for _ in range(10000)], dtype='O') key1 = np.tile(key1, 2) key2 = key1[::-1] @@ -1022,7 +1024,7 @@ def _join_by_hand(a, b, how='left'): result_columns = a.columns.append(b.columns) - for col, s in b_re.iteritems(): + for col, s in compat.iteritems(b_re): a_re[col] = s return a_re.reindex(columns=result_columns) @@ -1469,7 +1471,7 @@ def test_panel_join_many(self): data_dict = {} for p in panels: - data_dict.update(p.iterkv()) + data_dict.update(compat.iteritems(p)) joined = panels[0].join(panels[1:], how='inner') expected = Panel.from_dict(data_dict, intersect=True) @@ -1613,7 +1615,7 @@ def test_concat_series_axis1(self): s2.name = None result = concat([s, s2], axis=1) - self.assertTrue(np.array_equal(result.columns, range(2))) + self.assertTrue(np.array_equal(result.columns, lrange(2))) # must reindex, #2603 s = Series(randn(3), index=['c', 'a', 'b'], name='A') @@ -1763,6 +1765,5 @@ def test_multigroup(self): self.assert_(result['group'].notnull().all()) if __name__ == '__main__': - import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tools/tests/test_pivot.py index a603118c2ad16..57e7d2f7f6ae9 100644 --- a/pandas/tools/tests/test_pivot.py +++ b/pandas/tools/tests/test_pivot.py @@ -1,11 +1,14 @@ +import datetime import unittest import numpy as np from numpy.testing import assert_equal +import pandas from pandas import DataFrame, Series, Index, MultiIndex from pandas.tools.merge import concat from pandas.tools.pivot import pivot_table, crosstab +from pandas.compat import range, u, product import pandas.util.testing as tm @@ -72,9 +75,18 @@ def test_pivot_table_dropna(self): pv_col = df.pivot_table('quantity', 'month', ['customer', 'product'], dropna=False) pv_ind = df.pivot_table('quantity', ['customer', 'product'], 'month', dropna=False) - m = MultiIndex.from_tuples([(u'A', u'a'), (u'A', u'b'), (u'A', u'c'), (u'A', u'd'), - (u'B', u'a'), (u'B', u'b'), (u'B', u'c'), (u'B', u'd'), - (u'C', u'a'), (u'C', u'b'), (u'C', u'c'), (u'C', u'd')]) + m = MultiIndex.from_tuples([(u('A'), u('a')), + (u('A'), u('b')), + (u('A'), u('c')), + (u('A'), u('d')), + (u('B'), u('a')), + (u('B'), u('b')), + (u('B'), u('c')), + (u('B'), u('d')), + (u('C'), u('a')), + (u('C'), u('b')), + (u('C'), u('c')), + (u('C'), u('d'))]) assert_equal(pv_col.columns.values, m.values) assert_equal(pv_ind.index.values, m.values) @@ -151,7 +163,7 @@ def test_pivot_index_with_nan(self): nan = np.nan df = DataFrame({"a":['R1', 'R2', nan, 'R4'], 'b':["C1", "C2", "C3" , "C4"], "c":[10, 15, nan , 20]}) result = df.pivot('a','b','c') - expected = DataFrame([[nan,nan,nan,nan],[nan,10,nan,nan], + expected = DataFrame([[nan,nan,nan,nan],[nan,10,nan,nan], [nan,nan,nan,nan],[nan,nan,15,20]], index = Index(['R1','R2',nan,'R4'],name='a'), columns = Index(['C1','C2','C3','C4'],name='b')) @@ -199,20 +211,17 @@ def _check_output(res, col, rows=['A', 'B'], cols=['C']): # no rows rtable = self.data.pivot_table(cols=['AA', 'BB'], margins=True, aggfunc=np.mean) - self.assert_(isinstance(rtable, Series)) + tm.assert_isinstance(rtable, Series) for item in ['DD', 'EE', 'FF']: gmarg = table[item]['All', ''] self.assertEqual(gmarg, self.data[item].mean()) def test_pivot_integer_columns(self): # caused by upstream bug in unstack - from pandas.util.compat import product - import datetime - import pandas d = datetime.date.min data = list(product(['foo', 'bar'], ['A', 'B', 'C'], ['x1', 'x2'], - [d + datetime.timedelta(i) for i in xrange(20)], [1.0])) + [d + datetime.timedelta(i) for i in range(20)], [1.0])) df = pandas.DataFrame(data) table = df.pivot_table(values=4, rows=[0, 1, 3], cols=[2]) @@ -236,9 +245,6 @@ def test_pivot_no_level_overlap(self): tm.assert_frame_equal(table, expected) def test_pivot_columns_lexsorted(self): - import datetime - import numpy as np - import pandas n = 10000 diff --git a/pandas/tools/tests/test_tile.py b/pandas/tools/tests/test_tile.py index 7da9a3bb5a95a..53258864b1ab8 100644 --- a/pandas/tools/tests/test_tile.py +++ b/pandas/tools/tests/test_tile.py @@ -3,6 +3,7 @@ import unittest import numpy as np +from pandas.compat import zip from pandas import DataFrame, Series, unique import pandas.util.testing as tm diff --git a/pandas/tools/tile.py b/pandas/tools/tile.py index ffed6cafc1047..aa64b046c6891 100644 --- a/pandas/tools/tile.py +++ b/pandas/tools/tile.py @@ -8,6 +8,7 @@ import pandas.core.algorithms as algos import pandas.core.common as com import pandas.core.nanops as nanops +from pandas.compat import zip import numpy as np diff --git a/pandas/tools/util.py b/pandas/tools/util.py index 1f2905b86f7d0..7de8c25379258 100644 --- a/pandas/tools/util.py +++ b/pandas/tools/util.py @@ -8,7 +8,7 @@ def match(needles, haystack): def cartesian_product(X): ''' - Numpy version of itertools.product or pandas.util.compat.product. + Numpy version of itertools.product or pandas.compat.product. Sometimes faster (for large inputs)... Examples diff --git a/pandas/tseries/converter.py b/pandas/tseries/converter.py index d0ec942cec307..54c2a4a2a3056 100644 --- a/pandas/tseries/converter.py +++ b/pandas/tseries/converter.py @@ -10,6 +10,8 @@ from matplotlib.ticker import Formatter, AutoLocator, Locator from matplotlib.transforms import nonsingular +from pandas.compat import range, lrange +import pandas.compat as compat import pandas.lib as lib import pandas.core.common as com from pandas.core.index import Index @@ -36,7 +38,7 @@ def _to_ordinalf(tm): def time2num(d): - if isinstance(d, basestring): + if isinstance(d, compat.string_types): parsed = tools.to_datetime(d) if not isinstance(parsed, datetime): raise ValueError('Could not parse time %s' % d) @@ -161,7 +163,7 @@ def try_parse(values): return dates.date2num(values) elif (com.is_integer(values) or com.is_float(values)): return values - elif isinstance(values, basestring): + elif isinstance(values, compat.string_types): return try_parse(values) elif isinstance(values, (list, tuple, np.ndarray)): if not isinstance(values, np.ndarray): @@ -330,7 +332,7 @@ def __call__(self): if len(all_dates) > 0: locs = self.raise_if_exceeds(dates.date2num(all_dates)) return locs - except Exception, e: # pragma: no cover + except Exception as e: # pragma: no cover pass lims = dates.date2num([dmin, dmax]) @@ -808,7 +810,7 @@ def _annual_finder(vmin, vmax, freq): def get_finder(freq): - if isinstance(freq, basestring): + if isinstance(freq, compat.string_types): freq = frequencies.get_freq(freq) fgroup = frequencies.get_freq_group(freq) @@ -845,7 +847,7 @@ class TimeSeries_DateLocator(Locator): def __init__(self, freq, minor_locator=False, dynamic_mode=True, base=1, quarter=1, month=1, day=1, plot_obj=None): - if isinstance(freq, basestring): + if isinstance(freq, compat.string_types): freq = frequencies.get_freq(freq) self.freq = freq self.base = base @@ -884,7 +886,7 @@ def __call__(self): base = self.base (d, m) = divmod(vmin, base) vmin = (d + 1) * base - locs = range(vmin, vmax + 1, base) + locs = lrange(vmin, vmax + 1, base) return locs def autoscale(self): @@ -924,7 +926,7 @@ class TimeSeries_DateFormatter(Formatter): def __init__(self, freq, minor_locator=False, dynamic_mode=True, plot_obj=None): - if isinstance(freq, basestring): + if isinstance(freq, compat.string_types): freq = frequencies.get_freq(freq) self.format = None self.freq = freq diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 51b8e5d042ca9..2c4fc0d1b9c78 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -1,4 +1,6 @@ from datetime import datetime +from pandas.compat import range, long, zip +from pandas import compat import re import numpy as np @@ -54,14 +56,14 @@ def get_to_timestamp_base(base): def get_freq_group(freq): - if isinstance(freq, basestring): + if isinstance(freq, compat.string_types): base, mult = get_freq_code(freq) freq = base return (freq // 1000) * 1000 def get_freq(freq): - if isinstance(freq, basestring): + if isinstance(freq, compat.string_types): base, mult = get_freq_code(freq) freq = base return freq @@ -364,7 +366,7 @@ def get_period_alias(offset_str): } for _i, _weekday in enumerate(['MON', 'TUE', 'WED', 'THU', 'FRI']): - for _iweek in xrange(4): + for _iweek in range(4): _name = 'WOM-%d%s' % (_iweek + 1, _weekday) _offset_map[_name] = offsets.WeekOfMonth(week=_iweek, weekday=_i) _rule_aliases[_name.replace('-', '@')] = _name @@ -372,12 +374,12 @@ def get_period_alias(offset_str): # Note that _rule_aliases is not 1:1 (d[BA]==d[A@DEC]), and so traversal # order matters when constructing an inverse. we pick one. #2331 _legacy_reverse_map = dict((v, k) for k, v in - reversed(sorted(_rule_aliases.iteritems()))) + reversed(sorted(compat.iteritems(_rule_aliases)))) # for helping out with pretty-printing and name-lookups _offset_names = {} -for name, offset in _offset_map.iteritems(): +for name, offset in compat.iteritems(_offset_map): if offset is None: continue offset.name = name @@ -416,7 +418,7 @@ def to_offset(freqstr): if isinstance(freqstr, tuple): name = freqstr[0] stride = freqstr[1] - if isinstance(stride, basestring): + if isinstance(stride, compat.string_types): name, stride = stride, name name, _ = _base_and_stride(name) delta = get_offset(name) * stride @@ -610,7 +612,7 @@ def get_standard_freq(freq): } _reverse_period_code_map = {} -for _k, _v in _period_code_map.iteritems(): +for _k, _v in compat.iteritems(_period_code_map): _reverse_period_code_map[_v] = _k # Additional aliases @@ -770,7 +772,7 @@ def infer_freq(index, warn=True): inferer = _FrequencyInferer(index, warn=warn) return inferer.get_freq() -_ONE_MICRO = 1000L +_ONE_MICRO = long(1000) _ONE_MILLI = _ONE_MICRO * 1000 _ONE_SECOND = _ONE_MILLI * 1000 _ONE_MINUTE = 60 * _ONE_SECOND diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 9983f12bb29f0..17d357370c078 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -8,6 +8,8 @@ from pandas.core.common import isnull, _NS_DTYPE, _INT64_DTYPE from pandas.core.index import Index, Int64Index +import pandas.compat as compat +from pandas.compat import u from pandas.tseries.frequencies import ( infer_freq, to_offset, get_period_alias, Resolution, get_reso_string) @@ -70,7 +72,7 @@ def wrapper(self, other): other = _to_m8(other, tz=self.tz) elif isinstance(other, list): other = DatetimeIndex(other) - elif isinstance(other, basestring): + elif isinstance(other, compat.string_types): other = _to_m8(other, tz=self.tz) elif not isinstance(other, np.ndarray): other = _ensure_datetime64(other) @@ -207,7 +209,7 @@ def __new__(cls, data=None, return data - if issubclass(data.dtype.type, basestring): + if issubclass(data.dtype.type, compat.string_types): data = _str_to_dt_array(data, offset, dayfirst=dayfirst, yearfirst=yearfirst) @@ -581,21 +583,23 @@ def __contains__(self, key): def _format_with_header(self, header, **kwargs): return header + self._format_native_types(**kwargs) - def _format_native_types(self, na_rep=u'NaT', **kwargs): + def _format_native_types(self, na_rep=u('NaT'), **kwargs): data = list(self) # tz formatter or time formatter zero_time = time(0, 0) for d in data: if d.time() != zero_time or d.tzinfo is not None: - return [u'%s' % x for x in data ] + return [u('%s') % x for x in data] values = np.array(data,dtype=object) mask = isnull(self.values) values[mask] = na_rep imask = -mask - values[imask] = np.array([ u'%d-%.2d-%.2d' % (dt.year, dt.month, dt.day) for dt in values[imask] ]) + values[imask] = np.array([u('%d-%.2d-%.2d') % ( + dt.year, dt.month, dt.day) + for dt in values[imask] ]) return values.tolist() def isin(self, values): @@ -766,7 +770,7 @@ def shift(self, n, freq=None): shifted : DatetimeIndex """ if freq is not None and freq != self.offset: - if isinstance(freq, basestring): + if isinstance(freq, compat.string_types): freq = to_offset(freq) result = Index.shift(self, n, freq) result.tz = self.tz @@ -1230,7 +1234,7 @@ def slice_locs(self, start=None, end=None): """ Index.slice_locs, customized to handle partial ISO-8601 string slicing """ - if isinstance(start, basestring) or isinstance(end, basestring): + if isinstance(start, compat.string_types) or isinstance(end, compat.string_types): if self.is_monotonic: try: @@ -1543,7 +1547,7 @@ def indexer_at_time(self, time, asof=False): if asof: raise NotImplementedError - if isinstance(time, basestring): + if isinstance(time, compat.string_types): time = parse(time).time() if time.tzinfo: @@ -1573,10 +1577,10 @@ def indexer_between_time(self, start_time, end_time, include_start=True, """ from dateutil.parser import parse - if isinstance(start_time, basestring): + if isinstance(start_time, compat.string_types): start_time = parse(start_time).time() - if isinstance(end_time, basestring): + if isinstance(end_time, compat.string_types): end_time = parse(end_time).time() if start_time.tzinfo or end_time.tzinfo: diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index fc57f96239636..b78fa52f0be03 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -1,4 +1,6 @@ from datetime import date, datetime, timedelta +from pandas.compat import range +from pandas import compat import numpy as np from pandas.tseries.tools import to_datetime @@ -80,10 +82,10 @@ def __init__(self, n=1, **kwds): def apply(self, other): if len(self.kwds) > 0: if self.n > 0: - for i in xrange(self.n): + for i in range(self.n): other = other + self._offset else: - for i in xrange(-self.n): + for i in range(-self.n): other = other - self._offset return other else: @@ -99,10 +101,10 @@ def _should_cache(self): return self.isAnchored() and self._cacheable def _params(self): - attrs = [(k, v) for k, v in vars(self).iteritems() + attrs = [(k, v) for k, v in compat.iteritems(vars(self)) if k not in ['kwds', '_offset', 'name', 'normalize', 'busdaycalendar']] - attrs.extend(self.kwds.items()) + attrs.extend(list(self.kwds.items())) attrs = sorted(set(attrs)) params = tuple([str(self.__class__)] + attrs) @@ -137,7 +139,7 @@ def __eq__(self, other): if other is None: return False - if isinstance(other, basestring): + if isinstance(other, compat.string_types): from pandas.tseries.frequencies import to_offset other = to_offset(other) @@ -428,7 +430,7 @@ def rule_code(self): @staticmethod def _to_dt64(dt, dtype='datetime64'): - if isinstance(dt, (datetime, basestring)): + if isinstance(dt, (datetime, compat.string_types)): dt = np.datetime64(dt, dtype=dtype) if isinstance(dt, np.datetime64): dt = dt.astype(dtype) @@ -622,14 +624,14 @@ def apply(self, other): if otherDay != self.weekday: other = other + timedelta((self.weekday - otherDay) % 7) k = k - 1 - for i in xrange(k): + for i in range(k): other = other + self._inc else: k = self.n otherDay = other.weekday() if otherDay != self.weekday: other = other + timedelta((self.weekday - otherDay) % 7) - for i in xrange(-k): + for i in range(-k): other = other - self._inc return other @@ -713,7 +715,7 @@ def getOffsetOfMonth(self, dt): d = w.rollforward(d) - for i in xrange(self.week): + for i in range(self.week): d = w.apply(d) return d @@ -1166,7 +1168,7 @@ def __add__(self, other): return self.apply(other) def __eq__(self, other): - if isinstance(other, basestring): + if isinstance(other, compat.string_types): from pandas.tseries.frequencies import to_offset other = to_offset(other) @@ -1181,7 +1183,7 @@ def __hash__(self): return hash(self._params()) def __ne__(self, other): - if isinstance(other, basestring): + if isinstance(other, compat.string_types): from pandas.tseries.frequencies import to_offset other = to_offset(other) @@ -1315,7 +1317,7 @@ def generate_range(start=None, end=None, periods=None, end : datetime (default None) periods : int, optional time_rule : (legacy) name of DateOffset object to be used, optional - Corresponds with names expected by tseries.frequencies.get_offset + Corresponds with names expected by tseries.frequencies.get_offset Note ---- diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 4fec590dddd14..bf1199dc2690f 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -14,12 +14,13 @@ import pandas.core.common as com from pandas.core.common import isnull, _NS_DTYPE, _INT64_DTYPE -from pandas.util import py3compat +from pandas import compat from pandas.lib import Timestamp import pandas.lib as lib import pandas.tslib as tslib import pandas.algos as _algos +from pandas.compat import map, zip, u #--------------- @@ -47,7 +48,7 @@ class Period(PandasObject): Parameters ---------- - value : Period or basestring, default None + value : Period or compat.string_types, default None The time period represented (e.g., '4Q2005') freq : str, default None e.g., 'B' for businessday, ('T', 5) or '5T' for 5 minutes @@ -99,7 +100,7 @@ def __init__(self, value=None, freq=None, ordinal=None, converted = other.asfreq(freq) self.ordinal = converted.ordinal - elif isinstance(value, basestring) or com.is_integer(value): + elif isinstance(value, compat.string_types) or com.is_integer(value): if com.is_integer(value): value = str(value) @@ -267,7 +268,7 @@ def __repr__(self): formatted = tslib.period_format(self.ordinal, base) freqstr = _freq_mod._reverse_period_code_map[base] - if not py3compat.PY3: + if not compat.PY3: encoding = com.get_option("display.encoding") formatted = formatted.encode(encoding) @@ -666,7 +667,7 @@ def _from_arraylike(cls, data, freq, tz): def __contains__(self, key): if not isinstance(key, Period) or key.freq != self.freq: - if isinstance(key, basestring): + if isinstance(key, compat.string_types): try: self.get_loc(key) return True @@ -946,7 +947,7 @@ def slice_locs(self, start=None, end=None): """ Index.slice_locs, customized to handle partial ISO-8601 string slicing """ - if isinstance(start, basestring) or isinstance(end, basestring): + if isinstance(start, compat.string_types) or isinstance(end, compat.string_types): try: if start: start_loc = self._get_string_slice(start).start @@ -1057,14 +1058,14 @@ def __getitem__(self, key): def _format_with_header(self, header, **kwargs): return header + self._format_native_types(**kwargs) - def _format_native_types(self, na_rep=u'NaT', **kwargs): + def _format_native_types(self, na_rep=u('NaT'), **kwargs): values = np.array(list(self),dtype=object) mask = isnull(self.values) values[mask] = na_rep imask = -mask - values[imask] = np.array([ u'%s' % dt for dt in values[imask] ]) + values[imask] = np.array([u('%s') % dt for dt in values[imask]]) return values.tolist() def __array_finalize__(self, obj): @@ -1084,8 +1085,8 @@ def __repr__(self): def __unicode__(self): output = self.__class__.__name__ - output += u'(' - prefix = '' if py3compat.PY3 else 'u' + output += u('(') + prefix = '' if compat.PY3 else 'u' mapper = "{0}'{{0}}'".format(prefix) output += '[{0}]'.format(', '.join(map(mapper.format, self))) output += ", freq='{0}'".format(self.freq) @@ -1097,7 +1098,7 @@ def __bytes__(self): return self.__unicode__().encode(encoding, 'replace') def __str__(self): - if py3compat.PY3: + if compat.PY3: return self.__unicode__() return self.__bytes__() diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index 9c22ad66d4f2b..be0c5dfad9071 100644 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -9,6 +9,7 @@ from pandas.tseries.period import PeriodIndex, period_range import pandas.tseries.tools as tools import pandas.core.common as com +import pandas.compat as compat from pandas.lib import Timestamp import pandas.lib as lib @@ -230,7 +231,7 @@ def _resample_timestamps(self, obj): limit=self.limit) loffset = self.loffset - if isinstance(loffset, basestring): + if isinstance(loffset, compat.string_types): loffset = to_offset(self.loffset) if isinstance(loffset, (DateOffset, timedelta)): @@ -291,7 +292,7 @@ def _take_new_index(obj, indexer, new_index, axis=0): def _get_range_edges(axis, offset, closed='left', base=0): - if isinstance(offset, basestring): + if isinstance(offset, compat.string_types): offset = to_offset(offset) if isinstance(offset, Tick): diff --git a/pandas/tseries/tests/test_converter.py b/pandas/tseries/tests/test_converter.py index dc5d5cf67995b..c3bb7d82dfb6d 100644 --- a/pandas/tseries/tests/test_converter.py +++ b/pandas/tseries/tests/test_converter.py @@ -6,6 +6,7 @@ import nose import numpy as np +from pandas.compat import u try: import pandas.tseries.converter as converter @@ -14,7 +15,7 @@ def test_timtetonum_accepts_unicode(): - assert(converter.time2num("00:01") == converter.time2num(u"00:01")) + assert(converter.time2num("00:01") == converter.time2num(u("00:01"))) class TestDateTimeConverter(unittest.TestCase): @@ -25,7 +26,7 @@ def setUp(self): def test_convert_accepts_unicode(self): r1 = self.dtc.convert("12:22", None, None) - r2 = self.dtc.convert(u"12:22", None, None) + r2 = self.dtc.convert(u("12:22"), None, None) assert(r1 == r2), "DatetimeConverter.convert should accept unicode" def test_conversion(self): diff --git a/pandas/tseries/tests/test_cursor.py b/pandas/tseries/tests/test_cursor.py index ffada187620a4..fc02a83cbe639 100644 --- a/pandas/tseries/tests/test_cursor.py +++ b/pandas/tseries/tests/test_cursor.py @@ -11,7 +11,7 @@ def test_yearoffset(self): self.assert_(t.day == 1) self.assert_(t.month == 1) self.assert_(t.year == 2002 + i) - off.next() + next(off) for i in range(499, -1, -1): off.prev() @@ -27,7 +27,7 @@ def test_yearoffset(self): self.assert_(t.month == 12) self.assert_(t.day == 31) self.assert_(t.year == 2001 + i) - off.next() + next(off) for i in range(499, -1, -1): off.prev() @@ -47,7 +47,7 @@ def test_yearoffset(self): self.assert_(t.day == 31 or t.day == 30 or t.day == 29) self.assert_(t.year == 2001 + i) self.assert_(t.weekday() < 5) - off.next() + next(off) for i in range(499, -1, -1): off.prev() @@ -66,7 +66,7 @@ def test_monthoffset(self): self.assert_(t.day == 1) self.assert_(t.month == 1 + i) self.assert_(t.year == 2002) - off.next() + next(off) for i in range(11, -1, -1): off.prev() @@ -82,7 +82,7 @@ def test_monthoffset(self): self.assert_(t.day >= 28) self.assert_(t.month == (12 if i == 0 else i)) self.assert_(t.year == 2001 + (i != 0)) - off.next() + next(off) for i in range(11, -1, -1): off.prev() @@ -103,7 +103,7 @@ def test_monthoffset(self): else: self.assert_(t.day >= 26) self.assert_(t.weekday() < 5) - off.next() + next(off) for i in range(499, -1, -1): off.prev() @@ -124,8 +124,8 @@ def test_monthoffset(self): for k in range(500): self.assert_(off1.ts == off2.ts) - off1.next() - off2.next() + next(off1) + next(off2) for k in range(500): self.assert_(off1.ts == off2.ts) @@ -139,7 +139,7 @@ def test_dayoffset(self): t0 = lib.Timestamp(off.ts) for i in range(500): - off.next() + next(off) t1 = lib.Timestamp(off.ts) self.assert_(t1.value - t0.value == us_in_day) t0 = t1 @@ -155,7 +155,7 @@ def test_dayoffset(self): t0 = lib.Timestamp(off.ts) for i in range(500): - off.next() + next(off) t1 = lib.Timestamp(off.ts) self.assert_(t1.weekday() < 5) self.assert_(t1.value - t0.value == us_in_day or @@ -184,7 +184,7 @@ def test_dayofmonthoffset(self): t = lib.Timestamp(off.ts) stack.append(t) self.assert_(t.weekday() == day) - off.next() + next(off) for i in range(499, -1, -1): off.prev() diff --git a/pandas/tseries/tests/test_daterange.py b/pandas/tseries/tests/test_daterange.py index 4c46dcccbce1c..536d718d72eba 100644 --- a/pandas/tseries/tests/test_daterange.py +++ b/pandas/tseries/tests/test_daterange.py @@ -1,4 +1,5 @@ from datetime import datetime +from pandas.compat import range import pickle import unittest import nose @@ -15,6 +16,7 @@ import pandas.core.datetools as datetools from pandas.util.testing import assertRaisesRegexp +import pandas.util.testing as tm def _skip_if_no_pytz(): @@ -146,7 +148,7 @@ def test_getitem(self): fancy_indexed = self.rng[[4, 3, 2, 1, 0]] self.assertEquals(len(fancy_indexed), 5) - self.assert_(isinstance(fancy_indexed, DatetimeIndex)) + tm.assert_isinstance(fancy_indexed, DatetimeIndex) self.assert_(fancy_indexed.freq is None) # 32-bit vs. 64-bit platforms @@ -186,21 +188,21 @@ def test_union(self): right = self.rng[5:10] the_union = left.union(right) - self.assert_(isinstance(the_union, DatetimeIndex)) + tm.assert_isinstance(the_union, DatetimeIndex) # non-overlapping, gap in middle left = self.rng[:5] right = self.rng[10:] the_union = left.union(right) - self.assert_(isinstance(the_union, Index)) + tm.assert_isinstance(the_union, Index) # non-overlapping, no gap left = self.rng[:5] right = self.rng[5:10] the_union = left.union(right) - self.assert_(isinstance(the_union, DatetimeIndex)) + tm.assert_isinstance(the_union, DatetimeIndex) # order does not matter self.assert_(np.array_equal(right.union(left), the_union)) @@ -209,7 +211,7 @@ def test_union(self): rng = date_range(START, END, freq=datetools.bmonthEnd) the_union = self.rng.union(rng) - self.assert_(isinstance(the_union, DatetimeIndex)) + tm.assert_isinstance(the_union, DatetimeIndex) def test_outer_join(self): # should just behave as union @@ -219,14 +221,14 @@ def test_outer_join(self): right = self.rng[5:10] the_join = left.join(right, how='outer') - self.assert_(isinstance(the_join, DatetimeIndex)) + tm.assert_isinstance(the_join, DatetimeIndex) # non-overlapping, gap in middle left = self.rng[:5] right = self.rng[10:] the_join = left.join(right, how='outer') - self.assert_(isinstance(the_join, DatetimeIndex)) + tm.assert_isinstance(the_join, DatetimeIndex) self.assert_(the_join.freq is None) # non-overlapping, no gap @@ -234,13 +236,13 @@ def test_outer_join(self): right = self.rng[5:10] the_join = left.join(right, how='outer') - self.assert_(isinstance(the_join, DatetimeIndex)) + tm.assert_isinstance(the_join, DatetimeIndex) # overlapping, but different offset rng = date_range(START, END, freq=datetools.bmonthEnd) the_join = self.rng.join(rng, how='outer') - self.assert_(isinstance(the_join, DatetimeIndex)) + tm.assert_isinstance(the_join, DatetimeIndex) self.assert_(the_join.freq is None) def test_union_not_cacheable(self): @@ -263,7 +265,7 @@ def test_intersection(self): the_int = rng1.intersection(rng2) expected = rng[10:25] self.assert_(the_int.equals(expected)) - self.assert_(isinstance(the_int, DatetimeIndex)) + tm.assert_isinstance(the_int, DatetimeIndex) self.assert_(the_int.offset == rng.offset) the_int = rng1.intersection(rng2.view(DatetimeIndex)) @@ -321,7 +323,7 @@ def test_daterange_bug_456(self): rng2.offset = datetools.BDay() result = rng1.union(rng2) - self.assert_(isinstance(result, DatetimeIndex)) + tm.assert_isinstance(result, DatetimeIndex) def test_error_with_zero_monthends(self): self.assertRaises(ValueError, date_range, '1/1/2000', '1/1/2001', @@ -366,13 +368,13 @@ def test_month_range_union_tz(self): early_start = datetime(2011, 1, 1) early_end = datetime(2011, 3, 1) - + late_start = datetime(2011, 3, 1) late_end = datetime(2011, 5, 1) early_dr = date_range(start=early_start, end=early_end, tz=tz, freq=datetools.monthEnd) late_dr = date_range(start=late_start, end=late_end, tz=tz, freq=datetools.monthEnd) - + early_dr.union(late_dr) @@ -434,7 +436,7 @@ def test_getitem(self): fancy_indexed = self.rng[[4, 3, 2, 1, 0]] self.assertEquals(len(fancy_indexed), 5) - self.assert_(isinstance(fancy_indexed, DatetimeIndex)) + tm.assert_isinstance(fancy_indexed, DatetimeIndex) self.assert_(fancy_indexed.freq is None) # 32-bit vs. 64-bit platforms @@ -474,21 +476,21 @@ def test_union(self): right = self.rng[5:10] the_union = left.union(right) - self.assert_(isinstance(the_union, DatetimeIndex)) + tm.assert_isinstance(the_union, DatetimeIndex) # non-overlapping, gap in middle left = self.rng[:5] right = self.rng[10:] the_union = left.union(right) - self.assert_(isinstance(the_union, Index)) + tm.assert_isinstance(the_union, Index) # non-overlapping, no gap left = self.rng[:5] right = self.rng[5:10] the_union = left.union(right) - self.assert_(isinstance(the_union, DatetimeIndex)) + tm.assert_isinstance(the_union, DatetimeIndex) # order does not matter self.assert_(np.array_equal(right.union(left), the_union)) @@ -497,7 +499,7 @@ def test_union(self): rng = date_range(START, END, freq=datetools.bmonthEnd) the_union = self.rng.union(rng) - self.assert_(isinstance(the_union, DatetimeIndex)) + tm.assert_isinstance(the_union, DatetimeIndex) def test_outer_join(self): # should just behave as union @@ -507,14 +509,14 @@ def test_outer_join(self): right = self.rng[5:10] the_join = left.join(right, how='outer') - self.assert_(isinstance(the_join, DatetimeIndex)) + tm.assert_isinstance(the_join, DatetimeIndex) # non-overlapping, gap in middle left = self.rng[:5] right = self.rng[10:] the_join = left.join(right, how='outer') - self.assert_(isinstance(the_join, DatetimeIndex)) + tm.assert_isinstance(the_join, DatetimeIndex) self.assert_(the_join.freq is None) # non-overlapping, no gap @@ -522,13 +524,13 @@ def test_outer_join(self): right = self.rng[5:10] the_join = left.join(right, how='outer') - self.assert_(isinstance(the_join, DatetimeIndex)) + tm.assert_isinstance(the_join, DatetimeIndex) # overlapping, but different offset rng = date_range(START, END, freq=datetools.bmonthEnd) the_join = self.rng.join(rng, how='outer') - self.assert_(isinstance(the_join, DatetimeIndex)) + tm.assert_isinstance(the_join, DatetimeIndex) self.assert_(the_join.freq is None) def test_intersection_bug(self): @@ -578,7 +580,7 @@ def test_daterange_bug_456(self): rng2.offset = datetools.CDay() result = rng1.union(rng2) - self.assert_(isinstance(result, DatetimeIndex)) + tm.assert_isinstance(result, DatetimeIndex) def test_cdaterange(self): rng = cdate_range('2013-05-01', periods=3) diff --git a/pandas/tseries/tests/test_frequencies.py b/pandas/tseries/tests/test_frequencies.py index aad831ae48a64..6386f61a24a85 100644 --- a/pandas/tseries/tests/test_frequencies.py +++ b/pandas/tseries/tests/test_frequencies.py @@ -1,4 +1,5 @@ from datetime import datetime, time, timedelta +from pandas.compat import range import sys import os import unittest diff --git a/pandas/tseries/tests/test_offsets.py b/pandas/tseries/tests/test_offsets.py index 487a3091fd83b..7d026a46dde15 100644 --- a/pandas/tseries/tests/test_offsets.py +++ b/pandas/tseries/tests/test_offsets.py @@ -1,4 +1,6 @@ from datetime import date, datetime, timedelta +from pandas.compat import range +from pandas import compat import unittest import nose from nose.tools import assert_raises @@ -22,6 +24,7 @@ from pandas.tslib import monthrange from pandas.lib import Timestamp from pandas.util.testing import assertRaisesRegexp +import pandas.util.testing as tm _multiprocess_can_split_ = True @@ -75,7 +78,7 @@ def test_normalize_date(): def test_to_m8(): valb = datetime(2007, 10, 1) valu = _to_m8(valb) - assert type(valu) == np.datetime64 + tm.assert_isinstance(valu, np.datetime64) # assert valu == np.datetime64(datetime(2007,10,1)) # def test_datetime64_box(): @@ -270,7 +273,7 @@ def test_apply(self): datetime(2008, 1, 7): datetime(2008, 1, 7)})) for offset, cases in tests: - for base, expected in cases.iteritems(): + for base, expected in compat.iteritems(cases): assertEq(offset, base, expected) def test_apply_large_n(self): @@ -445,7 +448,7 @@ def test_apply(self): datetime(2008, 1, 7): datetime(2008, 1, 7)})) for offset, cases in tests: - for base, expected in cases.iteritems(): + for base, expected in compat.iteritems(cases): assertEq(offset, base, expected) def test_apply_large_n(self): @@ -562,7 +565,7 @@ def test_offset(self): datetime(2010, 4, 5): datetime(2010, 3, 23)})) for offset, cases in tests: - for base, expected in cases.iteritems(): + for base, expected in compat.iteritems(cases): assertEq(offset, base, expected) def test_onOffset(self): @@ -701,7 +704,7 @@ def test_offset(self): datetime(2007, 1, 1): datetime(2006, 12, 1)})) for offset, cases in tests: - for base, expected in cases.iteritems(): + for base, expected in compat.iteritems(cases): assertEq(offset, base, expected) def test_onOffset(self): @@ -758,7 +761,7 @@ def test_offset(self): datetime(2007, 1, 1): datetime(2006, 12, 29)})) for offset, cases in tests: - for base, expected in cases.iteritems(): + for base, expected in compat.iteritems(cases): assertEq(offset, base, expected) def test_normalize(self): @@ -819,7 +822,7 @@ def test_offset(self): datetime(2006, 1, 2): datetime(2006, 1, 1)})) for offset, cases in tests: - for base, expected in cases.iteritems(): + for base, expected in compat.iteritems(cases): assertEq(offset, base, expected) @@ -860,7 +863,7 @@ def test_offset(self): datetime(2007, 1, 1): datetime(2006, 12, 31)})) for offset, cases in tests: - for base, expected in cases.iteritems(): + for base, expected in compat.iteritems(cases): assertEq(offset, base, expected) # def test_day_of_month(self): @@ -967,7 +970,7 @@ def test_offset(self): datetime(2008, 4, 30): datetime(2008, 10, 1), })) for offset, cases in tests: - for base, expected in cases.iteritems(): + for base, expected in compat.iteritems(cases): assertEq(offset, base, expected) # corner @@ -1035,7 +1038,7 @@ def test_offset(self): datetime(2008, 4, 30): datetime(2008, 10, 31), })) for offset, cases in tests: - for base, expected in cases.iteritems(): + for base, expected in compat.iteritems(cases): assertEq(offset, base, expected) # corner @@ -1139,7 +1142,7 @@ def test_offset(self): datetime(2008, 4, 1): datetime(2008, 10, 1), })) for offset, cases in tests: - for base, expected in cases.iteritems(): + for base, expected in compat.iteritems(cases): assertEq(offset, base, expected) # corner @@ -1208,7 +1211,7 @@ def test_offset(self): datetime(2008, 4, 30): datetime(2008, 10, 31), })) for offset, cases in tests: - for base, expected in cases.iteritems(): + for base, expected in compat.iteritems(cases): assertEq(offset, base, expected) # corner @@ -1322,7 +1325,7 @@ def test_offset(self): datetime(2008, 12, 31): datetime(2007, 1, 1), })) for offset, cases in tests: - for base, expected in cases.iteritems(): + for base, expected in compat.iteritems(cases): assertEq(offset, base, expected) @@ -1382,7 +1385,7 @@ def test_offset(self): datetime(2012, 1, 31): datetime(2011, 4, 1), })) for offset, cases in tests: - for base, expected in cases.iteritems(): + for base, expected in compat.iteritems(cases): assertEq(offset, base, expected) def test_onOffset(self): @@ -1418,7 +1421,7 @@ def test_offset(self): )) for offset, cases in tests: - for base, expected in cases.iteritems(): + for base, expected in compat.iteritems(cases): self.assertEqual(base + offset, expected) def test_roll(self): @@ -1471,7 +1474,7 @@ def test_offset(self): datetime(2008, 12, 31): datetime(2006, 12, 29), })) for offset, cases in tests: - for base, expected in cases.iteritems(): + for base, expected in compat.iteritems(cases): assertEq(offset, base, expected) def test_onOffset(self): @@ -1522,7 +1525,7 @@ def test_offset(self): datetime(2008, 12, 31): datetime(2006, 12, 31), })) for offset, cases in tests: - for base, expected in cases.iteritems(): + for base, expected in compat.iteritems(cases): assertEq(offset, base, expected) def test_onOffset(self): @@ -1571,7 +1574,7 @@ def test_offset(self): datetime(2008, 3, 31): datetime(2006, 3, 31), })) for offset, cases in tests: - for base, expected in cases.iteritems(): + for base, expected in compat.iteritems(cases): assertEq(offset, base, expected) def test_onOffset(self): @@ -1651,7 +1654,7 @@ def test_compare_ticks(): three = kls(3) four = kls(4) - for _ in xrange(10): + for _ in range(10): assert(three < kls(4)) assert(kls(3) < four) assert(four > kls(3)) @@ -1731,7 +1734,7 @@ def setUp(self): def test_alias_equality(self): from pandas.tseries.frequencies import _offset_map - for k, v in _offset_map.iteritems(): + for k, v in compat.iteritems(_offset_map): if v is None: continue self.assertEqual(k, v.copy()) diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index 9fd5e6bf5f3e9..03b1d89714f68 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -22,12 +22,13 @@ import pandas.core.datetools as datetools import pandas as pd import numpy as np +from pandas.compat import range, lrange, lmap, map, zip randn = np.random.randn from pandas import Series, TimeSeries, DataFrame from pandas.util.testing import assert_series_equal, assert_almost_equal import pandas.util.testing as tm -from pandas.util import py3compat +from pandas import compat from numpy.testing import assert_array_equal @@ -209,8 +210,8 @@ def test_repr(self): def test_strftime(self): p = Period('2000-1-1 12:34:12', freq='S') res = p.strftime('%Y-%m-%d %H:%M:%S') - self.assert_( res == '2000-01-01 12:34:12') - self.assert_( isinstance(res,unicode)) # GH3363 + self.assertEqual(res, '2000-01-01 12:34:12') + tm.assert_isinstance(res, compat.text_type) # GH3363 def test_sub_delta(self): left, right = Period('2011', freq='A'), Period('2007', freq='A') @@ -1061,7 +1062,7 @@ def setUp(self): def test_make_time_series(self): index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') series = Series(1, index=index) - self.assert_(isinstance(series, TimeSeries)) + tm.assert_isinstance(series, TimeSeries) def test_astype(self): idx = period_range('1990', '2009', freq='A') @@ -1115,7 +1116,7 @@ def test_constructor_U(self): def test_constructor_arrays_negative_year(self): years = np.arange(1960, 2000).repeat(4) - quarters = np.tile(range(1, 5), 40) + quarters = np.tile(lrange(1, 5), 40) pindex = PeriodIndex(year=years, quarter=quarters) @@ -1123,8 +1124,8 @@ def test_constructor_arrays_negative_year(self): self.assert_(np.array_equal(pindex.quarter, quarters)) def test_constructor_invalid_quarters(self): - self.assertRaises(ValueError, PeriodIndex, year=range(2000, 2004), - quarter=range(4), freq='Q-DEC') + self.assertRaises(ValueError, PeriodIndex, year=lrange(2000, 2004), + quarter=lrange(4), freq='Q-DEC') def test_constructor_corner(self): self.assertRaises(ValueError, PeriodIndex, periods=10, freq='A') @@ -1178,7 +1179,7 @@ def test_getitem_ndim2(self): result = idx[:, None] # MPL kludge - self.assert_(type(result) == PeriodIndex) + tm.assert_isinstance(result, PeriodIndex) def test_getitem_partial(self): rng = period_range('2007-01', periods=50, freq='M') @@ -1213,7 +1214,7 @@ def test_getitem_partial(self): def test_getitem_datetime(self): rng = period_range(start='2012-01-01', periods=10, freq='W-MON') - ts = Series(range(len(rng)), index=rng) + ts = Series(lrange(len(rng)), index=rng) dt1 = datetime(2011, 10, 2) dt4 = datetime(2012, 4, 20) @@ -1235,7 +1236,7 @@ def test_periods_number_check(self): def test_tolist(self): index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') rs = index.tolist() - [self.assert_(isinstance(x, Period)) for x in rs] + [tm.assert_isinstance(x, Period) for x in rs] recon = PeriodIndex(rs) self.assert_(index.equals(recon)) @@ -1285,7 +1286,7 @@ def _get_with_delta(delta, freq='A-DEC'): def test_to_timestamp_quarterly_bug(self): years = np.arange(1960, 2000).repeat(4) - quarters = np.tile(range(1, 5), 40) + quarters = np.tile(lrange(1, 5), 40) pindex = PeriodIndex(year=years, quarter=quarters) @@ -1332,7 +1333,7 @@ def test_frame_setitem(self): self.assert_(rs.equals(rng)) rs = df.reset_index().set_index('index') - self.assert_(isinstance(rs.index, PeriodIndex)) + tm.assert_isinstance(rs.index, PeriodIndex) self.assert_(rs.index.equals(rng)) def test_nested_dict_frame_constructor(self): @@ -1622,45 +1623,45 @@ def test_ts_repr(self): def test_period_index_unicode(self): pi = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') assert_equal(len(pi), 9) - assert_equal(pi, eval(unicode(pi))) + assert_equal(pi, eval(compat.text_type(pi))) pi = PeriodIndex(freq='Q', start='1/1/2001', end='12/1/2009') assert_equal(len(pi), 4 * 9) - assert_equal(pi, eval(unicode(pi))) + assert_equal(pi, eval(compat.text_type(pi))) pi = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009') assert_equal(len(pi), 12 * 9) - assert_equal(pi, eval(unicode(pi))) + assert_equal(pi, eval(compat.text_type(pi))) start = Period('02-Apr-2005', 'B') i1 = PeriodIndex(start=start, periods=20) assert_equal(len(i1), 20) assert_equal(i1.freq, start.freq) assert_equal(i1[0], start) - assert_equal(i1, eval(unicode(i1))) + assert_equal(i1, eval(compat.text_type(i1))) end_intv = Period('2006-12-31', 'W') i1 = PeriodIndex(end=end_intv, periods=10) assert_equal(len(i1), 10) assert_equal(i1.freq, end_intv.freq) assert_equal(i1[-1], end_intv) - assert_equal(i1, eval(unicode(i1))) + assert_equal(i1, eval(compat.text_type(i1))) end_intv = Period('2006-12-31', '1w') i2 = PeriodIndex(end=end_intv, periods=10) assert_equal(len(i1), len(i2)) self.assert_((i1 == i2).all()) assert_equal(i1.freq, i2.freq) - assert_equal(i1, eval(unicode(i1))) - assert_equal(i2, eval(unicode(i2))) + assert_equal(i1, eval(compat.text_type(i1))) + assert_equal(i2, eval(compat.text_type(i2))) end_intv = Period('2006-12-31', ('w', 1)) i2 = PeriodIndex(end=end_intv, periods=10) assert_equal(len(i1), len(i2)) self.assert_((i1 == i2).all()) assert_equal(i1.freq, i2.freq) - assert_equal(i1, eval(unicode(i1))) - assert_equal(i2, eval(unicode(i2))) + assert_equal(i1, eval(compat.text_type(i1))) + assert_equal(i2, eval(compat.text_type(i2))) try: PeriodIndex(start=start, end=end_intv) @@ -1670,7 +1671,7 @@ def test_period_index_unicode(self): end_intv = Period('2005-05-01', 'B') i1 = PeriodIndex(start=start, end=end_intv) - assert_equal(i1, eval(unicode(i1))) + assert_equal(i1, eval(compat.text_type(i1))) try: PeriodIndex(start=start) @@ -1683,12 +1684,12 @@ def test_period_index_unicode(self): i2 = PeriodIndex([end_intv, Period('2005-05-05', 'B')]) assert_equal(len(i2), 2) assert_equal(i2[0], end_intv) - assert_equal(i2, eval(unicode(i2))) + assert_equal(i2, eval(compat.text_type(i2))) i2 = PeriodIndex(np.array([end_intv, Period('2005-05-05', 'B')])) assert_equal(len(i2), 2) assert_equal(i2[0], end_intv) - assert_equal(i2, eval(unicode(i2))) + assert_equal(i2, eval(compat.text_type(i2))) # Mixed freq should fail vals = [end_intv, Period('2006-12-31', 'w')] @@ -1832,7 +1833,7 @@ def test_iteration(self): index = PeriodIndex(start='1/1/10', periods=4, freq='B') result = list(index) - self.assert_(isinstance(result[0], Period)) + tm.assert_isinstance(result[0], Period) self.assert_(result[0].freq == index.freq) def test_take(self): @@ -1840,9 +1841,9 @@ def test_take(self): taken = index.take([5, 6, 8, 12]) taken2 = index[[5, 6, 8, 12]] - self.assert_(isinstance(taken, PeriodIndex)) + tm.assert_isinstance(taken, PeriodIndex) self.assert_(taken.freq == index.freq) - self.assert_(isinstance(taken2, PeriodIndex)) + tm.assert_isinstance(taken2, PeriodIndex) self.assert_(taken2.freq == index.freq) def test_joins(self): @@ -1851,7 +1852,7 @@ def test_joins(self): for kind in ['inner', 'outer', 'left', 'right']: joined = index.join(index[:-5], how=kind) - self.assert_(isinstance(joined, PeriodIndex)) + tm.assert_isinstance(joined, PeriodIndex) self.assert_(joined.freq == index.freq) def test_align_series(self): @@ -1997,15 +1998,17 @@ def test_map_with_string_constructor(self): raw = [2005, 2007, 2009] index = PeriodIndex(raw, freq='A') types = str, - if not py3compat.PY3: - types += unicode, + + if compat.PY3: + # unicode + types += compat.text_type, for t in types: - expected = np.array(map(t, raw), dtype=object) + expected = np.array(lmap(t, raw), dtype=object) res = index.map(t) # should return an array - self.assert_(isinstance(res, np.ndarray)) + tm.assert_isinstance(res, np.ndarray) # preserve element types self.assert_(all(isinstance(resi, t) for resi in res)) @@ -2021,7 +2024,7 @@ def test_convert_array_of_periods(self): periods = list(rng) result = pd.Index(periods) - self.assert_(isinstance(result, PeriodIndex)) + tm.assert_isinstance(result, PeriodIndex) def test_with_multi_index(self): # #1705 @@ -2030,9 +2033,9 @@ def test_with_multi_index(self): s = Series([0, 1, 2, 3], index_as_arrays) - self.assert_(isinstance(s.index.levels[0], PeriodIndex)) + tm.assert_isinstance(s.index.levels[0], PeriodIndex) - self.assert_(isinstance(s.index.values[0][0], Period)) + tm.assert_isinstance(s.index.values[0][0], Period) def test_to_datetime_1703(self): index = period_range('1/1/2012', periods=4, freq='D') @@ -2063,7 +2066,7 @@ def test_append_concat(self): # drops index result = pd.concat([s1, s2]) - self.assert_(isinstance(result.index, PeriodIndex)) + tm.assert_isinstance(result.index, PeriodIndex) self.assertEquals(result.index[0], s1.index[0]) def test_pickle_freq(self): diff --git a/pandas/tseries/tests/test_plotting.py b/pandas/tseries/tests/test_plotting.py index f1602bbd3f020..717e7bfe5da96 100644 --- a/pandas/tseries/tests/test_plotting.py +++ b/pandas/tseries/tests/test_plotting.py @@ -3,6 +3,7 @@ import unittest import nose +from pandas.compat import range, lrange, zip import numpy as np from numpy.testing.decorators import slow @@ -186,7 +187,7 @@ def test_fake_inferred_business(self): plt.clf() fig.add_subplot(111) rng = date_range('2001-1-1', '2001-1-10') - ts = Series(range(len(rng)), rng) + ts = Series(lrange(len(rng)), rng) ts = ts[:3].append(ts[5:]) ax = ts.plot() self.assert_(not hasattr(ax, 'freq')) @@ -482,7 +483,7 @@ def test_gaps(self): self.assert_(len(lines) == 1) l = lines[0] data = l.get_xydata() - self.assert_(isinstance(data, np.ma.core.MaskedArray)) + tm.assert_isinstance(data, np.ma.core.MaskedArray) mask = data.mask self.assert_(mask[5:25, 1].all()) @@ -496,7 +497,7 @@ def test_gaps(self): self.assert_(len(lines) == 1) l = lines[0] data = l.get_xydata() - self.assert_(isinstance(data, np.ma.core.MaskedArray)) + tm.assert_isinstance(data, np.ma.core.MaskedArray) mask = data.mask self.assert_(mask[2:5, 1].all()) @@ -510,7 +511,7 @@ def test_gaps(self): self.assert_(len(lines) == 1) l = lines[0] data = l.get_xydata() - self.assert_(isinstance(data, np.ma.core.MaskedArray)) + tm.assert_isinstance(data, np.ma.core.MaskedArray) mask = data.mask self.assert_(mask[2:5, 1].all()) @@ -530,7 +531,7 @@ def test_gap_upsample(self): self.assert_(len(ax.right_ax.get_lines()) == 1) l = lines[0] data = l.get_xydata() - self.assert_(isinstance(data, np.ma.core.MaskedArray)) + tm.assert_isinstance(data, np.ma.core.MaskedArray) mask = data.mask self.assert_(mask[5:25, 1].all()) @@ -942,7 +943,7 @@ def test_format_date_axis(self): def test_ax_plot(self): x = DatetimeIndex(start='2012-01-02', periods=10, freq='D') - y = range(len(x)) + y = lrange(len(x)) import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(111) diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index 02a3030f69519..1b75961cb2721 100644 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -2,6 +2,7 @@ from datetime import datetime, timedelta +from pandas.compat import range, lrange, zip, product import numpy as np from pandas import Series, TimeSeries, DataFrame, Panel, isnull, notnull, Timestamp @@ -266,7 +267,7 @@ def test_resample_reresample(self): bs = s.resample('B', closed='right', label='right') result = bs.resample('8H') self.assertEquals(len(result), 22) - self.assert_(isinstance(result.index.freq, offsets.DateOffset)) + tm.assert_isinstance(result.index.freq, offsets.DateOffset) self.assert_(result.index.freq == offsets.Hour(8)) def test_resample_timestamp_to_period(self): @@ -535,7 +536,7 @@ def test_upsample_apply_functions(self): ts = Series(np.random.randn(len(rng)), index=rng) result = ts.resample('20min', how=['mean', 'sum']) - self.assert_(isinstance(result, DataFrame)) + tm.assert_isinstance(result, DataFrame) def test_resample_not_monotonic(self): rng = pd.date_range('2012-06-12', periods=200, freq='h') @@ -603,7 +604,6 @@ def _simple_pts(start, end, freq='D'): from pandas.tseries.frequencies import MONTHS, DAYS -from pandas.util.compat import product class TestResamplePeriodIndex(unittest.TestCase): @@ -860,7 +860,7 @@ def test_resample_weekly_all_na(self): def test_resample_tz_localized(self): dr = date_range(start='2012-4-13', end='2012-5-1') - ts = Series(range(len(dr)), dr) + ts = Series(lrange(len(dr)), dr) ts_utc = ts.tz_localize('UTC') ts_local = ts_utc.tz_convert('America/Los_Angeles') diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index f41d31d2afbd0..0fcdcf344ca38 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -1,5 +1,4 @@ # pylint: disable-msg=E1101,W0612 -import pandas.util.compat as itertools from datetime import datetime, time, timedelta import sys import os @@ -23,21 +22,21 @@ from pandas.util.testing import assert_series_equal, assert_almost_equal import pandas.util.testing as tm -from pandas.util.py3compat import StringIO - from pandas.tslib import NaT, iNaT import pandas.lib as lib import pandas.tslib as tslib import pandas.index as _index -import cPickle as pickle +from pandas.compat import( + range, long, StringIO, lrange, lmap, map, zip, cPickle as pickle, product +) from pandas import read_pickle import pandas.core.datetools as dt from numpy.random import rand from numpy.testing import assert_array_equal from pandas.util.testing import assert_frame_equal -import pandas.util.py3compat as py3compat +import pandas.compat as compat from pandas.core.datetools import BDay import pandas.core.common as com from pandas import concat @@ -65,8 +64,8 @@ def setUp(self): self.dups = Series(np.random.randn(len(dates)), index=dates) def test_constructor(self): - self.assert_(isinstance(self.dups, TimeSeries)) - self.assert_(isinstance(self.dups.index, DatetimeIndex)) + tm.assert_isinstance(self.dups, TimeSeries) + tm.assert_isinstance(self.dups.index, DatetimeIndex) def test_is_unique_monotonic(self): self.assert_(not self.dups.index.is_unique) @@ -239,17 +238,17 @@ def test_indexing(self): # GH3546 (not including times on the last day) idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:00', freq='H') - ts = Series(range(len(idx)), index=idx) + ts = Series(lrange(len(idx)), index=idx) expected = ts['2013-05'] assert_series_equal(expected,ts) idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:59', freq='S') - ts = Series(range(len(idx)), index=idx) + ts = Series(lrange(len(idx)), index=idx) expected = ts['2013-05'] assert_series_equal(expected,ts) idx = [ Timestamp('2013-05-31 00:00'), Timestamp(datetime(2013,5,31,23,59,59,999999))] - ts = Series(range(len(idx)), index=idx) + ts = Series(lrange(len(idx)), index=idx) expected = ts['2013'] assert_series_equal(expected,ts) @@ -325,13 +324,13 @@ def test_series_box_timestamp(self): rng = date_range('20090415', '20090519', freq='B') s = Series(rng) - self.assert_(isinstance(s[5], Timestamp)) + tm.assert_isinstance(s[5], Timestamp) rng = date_range('20090415', '20090519', freq='B') s = Series(rng, index=rng) - self.assert_(isinstance(s[5], Timestamp)) + tm.assert_isinstance(s[5], Timestamp) - self.assert_(isinstance(s.iget_value(5), Timestamp)) + tm.assert_isinstance(s.iget_value(5), Timestamp) def test_date_range_ambiguous_arguments(self): # #2538 @@ -356,9 +355,9 @@ def test_index_convert_to_datetime_array(self): def _check_rng(rng): converted = rng.to_pydatetime() - self.assert_(isinstance(converted, np.ndarray)) + tm.assert_isinstance(converted, np.ndarray) for x, stamp in zip(converted, rng): - self.assert_(type(x) is datetime) + tm.assert_isinstance(x, datetime) self.assertEquals(x, stamp.to_pydatetime()) self.assertEquals(x.tzinfo, stamp.tzinfo) @@ -453,7 +452,7 @@ def test_frame_setitem_timestamp(self): # 2155 columns = DatetimeIndex(start='1/1/2012', end='2/1/2012', freq=datetools.bday) - index = range(10) + index = lrange(10) data = DataFrame(columns=columns, index=index) t = datetime(2012, 11, 1) ts = Timestamp(t) @@ -657,14 +656,14 @@ def test_index_astype_datetime64(self): casted = idx.astype(np.dtype('M8[D]')) expected = DatetimeIndex(idx.values) - self.assert_(isinstance(casted, DatetimeIndex)) + tm.assert_isinstance(casted, DatetimeIndex) self.assert_(casted.equals(expected)) def test_reindex_series_add_nat(self): rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s') series = Series(rng) - result = series.reindex(range(15)) + result = series.reindex(lrange(15)) self.assert_(np.issubdtype(result.dtype, np.dtype('M8[ns]'))) mask = result.isnull() @@ -675,7 +674,7 @@ def test_reindex_frame_add_nat(self): rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s') df = DataFrame({'A': np.random.randn(len(rng)), 'B': rng}) - result = df.reindex(range(15)) + result = df.reindex(lrange(15)) self.assert_(np.issubdtype(result['B'].dtype, np.dtype('M8[ns]'))) mask = com.isnull(result)['B'] @@ -733,7 +732,7 @@ def test_fillna_nat(self): def test_string_na_nat_conversion(self): # GH #999, #858 - from dateutil.parser import parse + from pandas.compat import parse_date strings = np.array(['1/1/2000', '1/2/2000', np.nan, '1/4/2000, 12:34:56'], dtype=object) @@ -743,13 +742,13 @@ def test_string_na_nat_conversion(self): if com.isnull(val): expected[i] = iNaT else: - expected[i] = parse(val) + expected[i] = parse_date(val) result = tslib.array_to_datetime(strings) assert_almost_equal(result, expected) result2 = to_datetime(strings) - self.assert_(isinstance(result2, DatetimeIndex)) + tm.assert_isinstance(result2, DatetimeIndex) assert_almost_equal(result, result2) malformed = np.array(['1/100/2000', np.nan], dtype=object) @@ -890,7 +889,7 @@ def test_to_datetime_types(self): ### array = ['2012','20120101','20120101 12:01:01'] array = ['20120101','20120101 12:01:01'] expected = list(to_datetime(array)) - result = map(Timestamp,array) + result = lmap(Timestamp,array) tm.assert_almost_equal(result,expected) ### currently fails ### @@ -954,7 +953,7 @@ def test_reasonable_keyerror(self): index = DatetimeIndex(['1/3/2000']) try: index.get_loc('1/1/2000') - except KeyError, e: + except KeyError as e: self.assert_('2000' in str(e)) def test_reindex_with_datetimes(self): @@ -1153,7 +1152,7 @@ def test_between_time(self): stime = time(0, 0) etime = time(1, 0) - close_open = itertools.product([True, False], [True, False]) + close_open = product([True, False], [True, False]) for inc_start, inc_end in close_open: filtered = ts.between_time(stime, etime, inc_start, inc_end) exp_len = 13 * 4 + 1 @@ -1185,7 +1184,7 @@ def test_between_time(self): stime = time(22, 0) etime = time(9, 0) - close_open = itertools.product([True, False], [True, False]) + close_open = product([True, False], [True, False]) for inc_start, inc_end in close_open: filtered = ts.between_time(stime, etime, inc_start, inc_end) exp_len = (12 * 11 + 1) * 4 + 1 @@ -1213,7 +1212,7 @@ def test_between_time_frame(self): stime = time(0, 0) etime = time(1, 0) - close_open = itertools.product([True, False], [True, False]) + close_open = product([True, False], [True, False]) for inc_start, inc_end in close_open: filtered = ts.between_time(stime, etime, inc_start, inc_end) exp_len = 13 * 4 + 1 @@ -1245,7 +1244,7 @@ def test_between_time_frame(self): stime = time(22, 0) etime = time(9, 0) - close_open = itertools.product([True, False], [True, False]) + close_open = product([True, False], [True, False]) for inc_start, inc_end in close_open: filtered = ts.between_time(stime, etime, inc_start, inc_end) exp_len = (12 * 11 + 1) * 4 + 1 @@ -1513,11 +1512,11 @@ def test_groupby_count_dateparseerror(self): dr = date_range(start='1/1/2012', freq='5min', periods=10) # BAD Example, datetimes first - s = Series(np.arange(10), index=[dr, range(10)]) + s = Series(np.arange(10), index=[dr, lrange(10)]) grouped = s.groupby(lambda x: x[1] % 2 == 0) result = grouped.count() - s = Series(np.arange(10), index=[range(10), dr]) + s = Series(np.arange(10), index=[lrange(10), dr]) grouped = s.groupby(lambda x: x[0] % 2 == 0) expected = grouped.count() @@ -1668,7 +1667,7 @@ def test_concat_datetime_datetime64_frame(self): df2_obj = DataFrame.from_records(rows, columns=['date', 'test']) ind = date_range(start="2000/1/1", freq="D", periods=10) - df1 = DataFrame({'date': ind, 'test':range(10)}) + df1 = DataFrame({'date': ind, 'test':lrange(10)}) # it works! pd.concat([df1, df2_obj]) @@ -1687,7 +1686,7 @@ def test_stringified_slice_with_tz(self): import datetime start=datetime.datetime.now() idx=DatetimeIndex(start=start,freq="1d",periods=10) - df=DataFrame(range(10),index=idx) + df=DataFrame(lrange(10),index=idx) df["2013-01-14 23:44:34.437768-05:00":] # no exception here def test_append_join_nondatetimeindex(self): @@ -1695,7 +1694,7 @@ def test_append_join_nondatetimeindex(self): idx = Index(['a', 'b', 'c', 'd']) result = rng.append(idx) - self.assert_(isinstance(result[0], Timestamp)) + tm.assert_isinstance(result[0], Timestamp) # it works rng.join(idx, how='outer') @@ -1790,7 +1789,7 @@ def test_add_union(self): def test_misc_coverage(self): rng = date_range('1/1/2000', periods=5) result = rng.groupby(rng.day) - self.assert_(isinstance(result.values()[0][0], Timestamp)) + tm.assert_isinstance(list(result.values())[0][0], Timestamp) idx = DatetimeIndex(['2000-01-03', '2000-01-01', '2000-01-02']) self.assert_(idx.equals(list(idx))) @@ -1898,7 +1897,7 @@ def test_groupby_function_tuple_1677(self): monthly_group = df.groupby(lambda x: (x.year, x.month)) result = monthly_group.mean() - self.assert_(isinstance(result.index[0], tuple)) + tm.assert_isinstance(result.index[0], tuple) def test_append_numpy_bug_1681(self): # another datetime64 bug @@ -1967,7 +1966,7 @@ class TestLegacySupport(unittest.TestCase): @classmethod def setUpClass(cls): - if py3compat.PY3: + if compat.PY3: raise nose.SkipTest pth, _ = os.path.split(os.path.abspath(__file__)) @@ -1981,7 +1980,6 @@ def setUpClass(cls): cls.series = pickle.load(f) def test_pass_offset_warn(self): - from StringIO import StringIO buf = StringIO() sys.stderr = buf @@ -2022,7 +2020,7 @@ def test_unpickle_legacy_len0_daterange(self): ex_index = DatetimeIndex([], freq='B') self.assert_(result.index.equals(ex_index)) - self.assert_(isinstance(result.index.freq, offsets.BDay)) + tm.assert_isinstance(result.index.freq, offsets.BDay) self.assert_(len(result) == 0) def test_arithmetic_interaction(self): @@ -2034,12 +2032,12 @@ def test_arithmetic_interaction(self): result = dseries + oseries expected = dseries * 2 - self.assert_(isinstance(result.index, DatetimeIndex)) + tm.assert_isinstance(result.index, DatetimeIndex) assert_series_equal(result, expected) result = dseries + oseries[:5] expected = dseries + dseries[:5] - self.assert_(isinstance(result.index, DatetimeIndex)) + tm.assert_isinstance(result.index, DatetimeIndex) assert_series_equal(result, expected) def test_join_interaction(self): @@ -2051,7 +2049,7 @@ def _check_join(left, right, how='inner'): ea, eb, ec = left.join(DatetimeIndex(right), how=how, return_indexers=True) - self.assert_(isinstance(ra, DatetimeIndex)) + tm.assert_isinstance(ra, DatetimeIndex) self.assert_(ra.equals(ea)) assert_almost_equal(rb, eb) @@ -2075,8 +2073,8 @@ def test_unpickle_daterange(self): filepath = os.path.join(pth, 'data', 'daterange_073.pickle') rng = read_pickle(filepath) - self.assert_(type(rng[0]) == datetime) - self.assert_(isinstance(rng.offset, offsets.BDay)) + tm.assert_isinstance(rng[0], datetime) + tm.assert_isinstance(rng.offset, offsets.BDay) self.assert_(rng.values.dtype == object) def test_setops(self): @@ -2085,17 +2083,17 @@ def test_setops(self): result = index[:5].union(obj_index[5:]) expected = index - self.assert_(isinstance(result, DatetimeIndex)) + tm.assert_isinstance(result, DatetimeIndex) self.assert_(result.equals(expected)) result = index[:10].intersection(obj_index[5:]) expected = index[5:10] - self.assert_(isinstance(result, DatetimeIndex)) + tm.assert_isinstance(result, DatetimeIndex) self.assert_(result.equals(expected)) result = index[:10] - obj_index[5:] expected = index[:5] - self.assert_(isinstance(result, DatetimeIndex)) + tm.assert_isinstance(result, DatetimeIndex) self.assert_(result.equals(expected)) def test_index_conversion(self): @@ -2111,7 +2109,7 @@ def test_tolist(self): rng = date_range('1/1/2000', periods=10) result = rng.tolist() - self.assert_(isinstance(result[0], Timestamp)) + tm.assert_isinstance(result[0], Timestamp) def test_object_convert_fail(self): idx = DatetimeIndex([NaT]) @@ -2336,8 +2334,8 @@ def test_min_max(self): the_min = rng2.min() the_max = rng2.max() - self.assert_(isinstance(the_min, Timestamp)) - self.assert_(isinstance(the_max, Timestamp)) + tm.assert_isinstance(the_min, Timestamp) + tm.assert_isinstance(the_max, Timestamp) self.assertEqual(the_min, rng[0]) self.assertEqual(the_max, rng[-1]) @@ -2402,7 +2400,6 @@ def test_frame_apply_dont_convert_datetime64(self): class TestLegacyCompat(unittest.TestCase): def setUp(self): - from StringIO import StringIO # suppress deprecation warnings sys.stderr = StringIO() @@ -2623,11 +2620,11 @@ def test_datetimeindex_union_join_empty(self): empty = Index([]) result = dti.union(empty) - self.assert_(isinstance(result, DatetimeIndex)) + tm.assert_isinstance(result, DatetimeIndex) self.assert_(result is result) result = dti.join(empty) - self.assert_(isinstance(result, DatetimeIndex)) + tm.assert_isinstance(result, DatetimeIndex) def test_series_set_value(self): # #1561 @@ -2650,7 +2647,7 @@ def test_series_set_value(self): def test_slice_locs_indexerror(self): times = [datetime(2000, 1, 1) + timedelta(minutes=i * 10) for i in range(100000)] - s = Series(range(100000), times) + s = Series(lrange(100000), times) s.ix[datetime(1900, 1, 1):datetime(2100, 1, 1)] @@ -2813,26 +2810,26 @@ def check(val,unit=None,h=1,s=1,us=0): days = (ts - Timestamp('1970-01-01')).days check(val) - check(val/1000L,unit='us') - check(val/1000000L,unit='ms') - check(val/1000000000L,unit='s') + check(val/long(1000),unit='us') + check(val/long(1000000),unit='ms') + check(val/long(1000000000),unit='s') check(days,unit='D',h=0) # using truediv, so these are like floats - if py3compat.PY3: - check((val+500000)/1000000000L,unit='s',us=500) - check((val+500000000)/1000000000L,unit='s',us=500000) - check((val+500000)/1000000L,unit='ms',us=500) + if compat.PY3: + check((val+500000)/long(1000000000),unit='s',us=500) + check((val+500000000)/long(1000000000),unit='s',us=500000) + check((val+500000)/long(1000000),unit='ms',us=500) # get chopped in py2 else: - check((val+500000)/1000000000L,unit='s') - check((val+500000000)/1000000000L,unit='s') - check((val+500000)/1000000L,unit='ms') + check((val+500000)/long(1000000000),unit='s') + check((val+500000000)/long(1000000000),unit='s') + check((val+500000)/long(1000000),unit='ms') # ok - check((val+500000)/1000L,unit='us',us=500) - check((val+500000000)/1000000L,unit='ms',us=500000) + check((val+500000)/long(1000),unit='us',us=500) + check((val+500000000)/long(1000000),unit='ms',us=500000) # floats check(val/1000.0 + 5,unit='us',us=5) @@ -2857,7 +2854,7 @@ def check(val,unit=None,h=1,s=1,us=0): def test_comparison(self): # 5-18-2012 00:00:00.000 - stamp = 1337299200000000000L + stamp = long(1337299200000000000) val = Timestamp(stamp) @@ -2908,7 +2905,7 @@ def test_cant_compare_tz_naive_w_aware(self): self.assertFalse(a.to_pydatetime() == b) def test_delta_preserve_nanos(self): - val = Timestamp(1337299200000000123L) + val = Timestamp(long(1337299200000000123)) result = val + timedelta(1) self.assert_(result.nanosecond == val.nanosecond) diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tseries/tests/test_timezones.py index 09224d0133e3d..883025bee1ba1 100644 --- a/pandas/tseries/tests/test_timezones.py +++ b/pandas/tseries/tests/test_timezones.py @@ -24,11 +24,11 @@ import pandas.util.testing as tm import pandas.lib as lib -import cPickle as pickle import pandas.core.datetools as dt from numpy.random import rand from pandas.util.testing import assert_frame_equal -import pandas.util.py3compat as py3compat +import pandas.compat as compat +from pandas.compat import range, lrange, zip, cPickle as pickle from pandas.core.datetools import BDay import pandas.core.common as com @@ -180,7 +180,7 @@ def test_astimezone(self): expected = utc.tz_convert('US/Eastern') result = utc.astimezone('US/Eastern') self.assertEquals(expected, result) - self.assert_(isinstance(result, Timestamp)) + tm.assert_isinstance(result, Timestamp) def test_create_with_tz(self): stamp = Timestamp('3/11/2012 05:00', tz='US/Eastern') @@ -393,7 +393,7 @@ def test_take_dont_lose_meta(self): _skip_if_no_pytz() rng = date_range('1/1/2000', periods=20, tz='US/Eastern') - result = rng.take(range(5)) + result = rng.take(lrange(5)) self.assert_(result.tz == rng.tz) self.assert_(result.freq == rng.freq) @@ -620,7 +620,7 @@ def test_getitem_pydatetime_tz(self): tz='Europe/Berlin') ts = Series(index=index, data=index.hour) time_pandas = Timestamp('2012-12-24 17:00', tz='Europe/Berlin') - time_datetime = datetime(2012, 12, 24, 17, 00, + time_datetime = datetime(2012, 12, 24, 17, 0, tzinfo=pytz.timezone('Europe/Berlin')) self.assertEqual(ts[time_pandas], ts[time_datetime]) @@ -635,14 +635,14 @@ def test_datetimeindex_tz(self): """ Test different DatetimeIndex constructions with timezone Follow-up of #4229 """ - + arr = ['11/10/2005 08:00:00', '11/10/2005 09:00:00'] - + idx1 = to_datetime(arr).tz_localize('US/Eastern') idx2 = DatetimeIndex(start="2005-11-10 08:00:00", freq='H', periods=2, tz='US/Eastern') idx3 = DatetimeIndex(arr, tz='US/Eastern') idx4 = DatetimeIndex(np.array(arr), tz='US/Eastern') - + for other in [idx2, idx3, idx4]: self.assert_(idx1.equals(other)) @@ -724,11 +724,11 @@ def test_join_utc_convert(self): for how in ['inner', 'outer', 'left', 'right']: result = left.join(left[:-5], how=how) - self.assert_(isinstance(result, DatetimeIndex)) + tm.assert_isinstance(result, DatetimeIndex) self.assert_(result.tz == left.tz) result = left.join(right[:-5], how=how) - self.assert_(isinstance(result, DatetimeIndex)) + tm.assert_isinstance(result, DatetimeIndex) self.assert_(result.tz.zone == 'UTC') def test_join_aware(self): @@ -746,7 +746,7 @@ def test_join_aware(self): test2 = DataFrame(np.zeros((3, 3)), index=date_range("2012-11-15 00:00:00", periods=3, freq="250L", tz="US/Central"), - columns=range(3, 6)) + columns=lrange(3, 6)) result = test1.join(test2, how='outer') ex_index = test1.index.union(test2.index) @@ -815,7 +815,7 @@ def test_append_aware_naive(self): # mixed rng1 = date_range('1/1/2011 01:00', periods=1, freq='H') - rng2 = range(100) + rng2 = lrange(100) ts1 = Series(np.random.randn(len(rng1)), index=rng1) ts2 = Series(np.random.randn(len(rng2)), index=rng2) ts_result = ts1.append(ts2) diff --git a/pandas/tseries/tests/test_util.py b/pandas/tseries/tests/test_util.py index 09dad264b7ae0..8bf448118561d 100644 --- a/pandas/tseries/tests/test_util.py +++ b/pandas/tseries/tests/test_util.py @@ -1,3 +1,4 @@ +from pandas.compat import range import nose import unittest diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index d914a8fa570d4..3087d54396691 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -7,7 +7,8 @@ import pandas.lib as lib import pandas.tslib as tslib import pandas.core.common as com -from pandas.util.py3compat import StringIO +from pandas.compat import StringIO, callable +import pandas.compat as compat try: import dateutil @@ -40,7 +41,7 @@ def _infer(a, b): def _maybe_get_tz(tz): - if isinstance(tz, basestring): + if isinstance(tz, compat.string_types): import pytz tz = pytz.timezone(tz) if com.is_integer(tz): @@ -91,7 +92,7 @@ def _convert_listlike(arg, box): if box and not isinstance(arg, DatetimeIndex): try: return DatetimeIndex(arg, tz='utc' if utc else None) - except ValueError, e: + except ValueError as e: values, tz = tslib.datetime_to_datetime64(arg) return DatetimeIndex._simple_new(values, None, tz=tz) @@ -109,7 +110,7 @@ def _convert_listlike(arg, box): result = DatetimeIndex(result, tz='utc' if utc else None) return result - except ValueError, e: + except ValueError as e: try: values, tz = tslib.datetime_to_datetime64(arg) return DatetimeIndex._simple_new(values, None, tz=tz) @@ -148,7 +149,7 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None): Parameters ---------- - arg : basestring + arg : compat.string_types freq : str or DateOffset, default None Helps with interpreting time string if supplied dayfirst : bool, default None @@ -165,7 +166,7 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None): from pandas.tseries.frequencies import (_get_rule_month, _month_numbers, _get_freq_str) - if not isinstance(arg, basestring): + if not isinstance(arg, compat.string_types): return arg arg = arg.upper() @@ -236,7 +237,8 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None): try: parsed, reso = dateutil_parse(arg, default, dayfirst=dayfirst, yearfirst=yearfirst) - except Exception, e: + except Exception as e: + # TODO: allow raise of errors within instead raise DateParseError(e) if parsed is None: @@ -251,19 +253,25 @@ def dateutil_parse(timestr, default, """ lifted from dateutil to get resolution""" from dateutil import tz import time + fobj = StringIO(str(timestr)) - res = DEFAULTPARSER._parse(StringIO(timestr), **kwargs) + res = DEFAULTPARSER._parse(fobj, **kwargs) if res is None: raise ValueError("unknown string format") repl = {} + reso = None for attr in ["year", "month", "day", "hour", "minute", "second", "microsecond"]: value = getattr(res, attr) if value is not None: repl[attr] = value reso = attr + + if reso is None: + raise ValueError("Cannot parse date.") + if reso == 'microsecond' and repl['microsecond'] == 0: reso = 'second' @@ -278,7 +286,7 @@ def dateutil_parse(timestr, default, tzdata = tzinfos.get(res.tzname) if isinstance(tzdata, datetime.tzinfo): tzinfo = tzdata - elif isinstance(tzdata, basestring): + elif isinstance(tzdata, compat.string_types): tzinfo = tz.tzstr(tzdata) elif isinstance(tzdata, int): tzinfo = tz.tzoffset(res.tzname, tzdata) diff --git a/pandas/tseries/util.py b/pandas/tseries/util.py index eb80746cf0c25..664a42543822d 100644 --- a/pandas/tseries/util.py +++ b/pandas/tseries/util.py @@ -1,3 +1,4 @@ +from pandas.compat import range, lrange import numpy as np import pandas as pd @@ -53,12 +54,12 @@ def pivot_annual(series, freq=None): # adjust for leap year offset[(-isleapyear(year)) & (offset >= 59)] += 1 - columns = range(1, 367) + columns = lrange(1, 367) # todo: strings like 1/1, 1/25, etc.? elif freq in ('M', 'BM'): width = 12 offset = index.month - 1 - columns = range(1, 13) + columns = lrange(1, 13) elif freq == 'H': width = 8784 grouped = series.groupby(series.index.year) @@ -66,7 +67,7 @@ def pivot_annual(series, freq=None): defaulted.index = defaulted.index.droplevel(0) offset = np.asarray(defaulted.index) offset[-isleapyear(year) & (offset >= 1416)] += 24 - columns = range(1, 8785) + columns = lrange(1, 8785) else: raise NotImplementedError(freq) diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 3439e6bb37eb7..1c12b627f0690 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -28,7 +28,7 @@ cimport cython from datetime import timedelta, datetime from datetime import time as datetime_time -from dateutil.parser import parse as parse_date +from pandas.compat import parse_date cdef extern from "Python.h": int PySlice_Check(object) @@ -852,8 +852,6 @@ def array_to_datetime(ndarray[object] values, raise_=False, dayfirst=False, _TSObject _ts int64_t m = cast_from_unit(unit,None) - from dateutil.parser import parse - try: result = np.empty(n, dtype='M8[ns]') iresult = result.view('i8') @@ -917,7 +915,7 @@ def array_to_datetime(ndarray[object] values, raise_=False, dayfirst=False, elif raise_: raise try: - result[i] = parse(val, dayfirst=dayfirst) + result[i] = parse_date(val, dayfirst=dayfirst) except Exception: if coerce: iresult[i] = iNaT @@ -946,7 +944,7 @@ def array_to_datetime(ndarray[object] values, raise_=False, dayfirst=False, oresult[i] = 'NaT' continue try: - oresult[i] = parse(val, dayfirst=dayfirst) + oresult[i] = parse_date(val, dayfirst=dayfirst) except Exception: if raise_: raise diff --git a/pandas/util/compat.py b/pandas/util/compat.py deleted file mode 100644 index c18044fc6c492..0000000000000 --- a/pandas/util/compat.py +++ /dev/null @@ -1,502 +0,0 @@ -# itertools.product not in Python 2.5 - -try: - from itertools import product -except ImportError: # python 2.5 - def product(*args, **kwds): - # product('ABCD', 'xy') --> Ax Ay Bx By Cx Cy Dx Dy - # product(range(2), repeat=3) --> 000 001 010 011 100 101 110 111 - pools = map(tuple, args) * kwds.get('repeat', 1) - result = [[]] - for pool in pools: - result = [x + [y] for x in result for y in pool] - for prod in result: - yield tuple(prod) - - -# OrderedDict Shim from Raymond Hettinger, python core dev -# http://code.activestate.com/recipes/576693-ordered-dictionary-for-py24/ -# here to support versions before 2.6 -import sys -try: - from thread import get_ident as _get_ident -except ImportError: - from dummy_thread import get_ident as _get_ident - -try: - from _abcoll import KeysView, ValuesView, ItemsView -except ImportError: - pass - - -class _OrderedDict(dict): - 'Dictionary that remembers insertion order' - # An inherited dict maps keys to values. - # The inherited dict provides __getitem__, __len__, __contains__, and get. - # The remaining methods are order-aware. - # Big-O running times for all methods are the same as for regular - # dictionaries. - - # The internal self.__map dictionary maps keys to links in a doubly linked list. - # The circular doubly linked list starts and ends with a sentinel element. - # The sentinel element never gets deleted (this simplifies the algorithm). - # Each link is stored as a list of length three: [PREV, NEXT, KEY]. - - def __init__(self, *args, **kwds): - '''Initialize an ordered dictionary. Signature is the same as for - regular dictionaries, but keyword arguments are not recommended - because their insertion order is arbitrary. - - ''' - if len(args) > 1: - raise TypeError('expected at most 1 arguments, got %d' % len(args)) - try: - self.__root - except AttributeError: - self.__root = root = [] # sentinel node - root[:] = [root, root, None] - self.__map = {} - self.__update(*args, **kwds) - - def __setitem__(self, key, value, dict_setitem=dict.__setitem__): - 'od.__setitem__(i, y) <==> od[i]=y' - # Setting a new item creates a new link which goes at the end of the linked - # list, and the inherited dictionary is updated with the new key/value - # pair. - if key not in self: - root = self.__root - last = root[0] - last[1] = root[0] = self.__map[key] = [last, root, key] - dict_setitem(self, key, value) - - def __delitem__(self, key, dict_delitem=dict.__delitem__): - 'od.__delitem__(y) <==> del od[y]' - # Deleting an existing item uses self.__map to find the link which is - # then removed by updating the links in the predecessor and successor - # nodes. - dict_delitem(self, key) - link_prev, link_next, key = self.__map.pop(key) - link_prev[1] = link_next - link_next[0] = link_prev - - def __iter__(self): - 'od.__iter__() <==> iter(od)' - root = self.__root - curr = root[1] - while curr is not root: - yield curr[2] - curr = curr[1] - - def __reversed__(self): - 'od.__reversed__() <==> reversed(od)' - root = self.__root - curr = root[0] - while curr is not root: - yield curr[2] - curr = curr[0] - - def clear(self): - 'od.clear() -> None. Remove all items from od.' - try: - for node in self.__map.itervalues(): - del node[:] - root = self.__root - root[:] = [root, root, None] - self.__map.clear() - except AttributeError: - pass - dict.clear(self) - - def popitem(self, last=True): - '''od.popitem() -> (k, v), return and remove a (key, value) pair. - Pairs are returned in LIFO order if last is true or FIFO order if false. - - ''' - if not self: - raise KeyError('dictionary is empty') - root = self.__root - if last: - link = root[0] - link_prev = link[0] - link_prev[1] = root - root[0] = link_prev - else: - link = root[1] - link_next = link[1] - root[1] = link_next - link_next[0] = root - key = link[2] - del self.__map[key] - value = dict.pop(self, key) - return key, value - - # -- the following methods do not depend on the internal structure -- - - def keys(self): - 'od.keys() -> list of keys in od' - return list(self) - - def values(self): - 'od.values() -> list of values in od' - return [self[key] for key in self] - - def items(self): - 'od.items() -> list of (key, value) pairs in od' - return [(key, self[key]) for key in self] - - def iterkeys(self): - 'od.iterkeys() -> an iterator over the keys in od' - return iter(self) - - def itervalues(self): - 'od.itervalues -> an iterator over the values in od' - for k in self: - yield self[k] - - def iteritems(self): - 'od.iteritems -> an iterator over the (key, value) items in od' - for k in self: - yield (k, self[k]) - - def update(*args, **kwds): - '''od.update(E, **F) -> None. Update od from dict/iterable E and F. - - If E is a dict instance, does: for k in E: od[k] = E[k] - If E has a .keys() method, does: for k in E.keys(): od[k] = E[k] - Or if E is an iterable of items, does: for k, v in E: od[k] = v - In either case, this is followed by: for k, v in F.items(): od[k] = v - - ''' - if len(args) > 2: - raise TypeError('update() takes at most 2 positional ' - 'arguments (%d given)' % (len(args),)) - elif not args: - raise TypeError('update() takes at least 1 argument (0 given)') - self = args[0] - # Make progressively weaker assumptions about "other" - other = () - if len(args) == 2: - other = args[1] - if isinstance(other, dict): - for key in other: - self[key] = other[key] - elif hasattr(other, 'keys'): - for key in other.keys(): - self[key] = other[key] - else: - for key, value in other: - self[key] = value - for key, value in kwds.items(): - self[key] = value - - __update = update # let subclasses override update without breaking __init__ - - __marker = object() - - def pop(self, key, default=__marker): - '''od.pop(k[,d]) -> v, remove specified key and return the corresponding value. - If key is not found, d is returned if given, otherwise KeyError is raised. - - ''' - if key in self: - result = self[key] - del self[key] - return result - if default is self.__marker: - raise KeyError(key) - return default - - def setdefault(self, key, default=None): - 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od' - if key in self: - return self[key] - self[key] = default - return default - - def __repr__(self, _repr_running={}): - 'od.__repr__() <==> repr(od)' - call_key = id(self), _get_ident() - if call_key in _repr_running: - return '...' - _repr_running[call_key] = 1 - try: - if not self: - return '%s()' % (self.__class__.__name__,) - return '%s(%r)' % (self.__class__.__name__, self.items()) - finally: - del _repr_running[call_key] - - def __reduce__(self): - 'Return state information for pickling' - items = [[k, self[k]] for k in self] - inst_dict = vars(self).copy() - for k in vars(OrderedDict()): - inst_dict.pop(k, None) - if inst_dict: - return (self.__class__, (items,), inst_dict) - return self.__class__, (items,) - - def copy(self): - 'od.copy() -> a shallow copy of od' - return self.__class__(self) - - @classmethod - def fromkeys(cls, iterable, value=None): - '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S - and values equal to v (which defaults to None). - - ''' - d = cls() - for key in iterable: - d[key] = value - return d - - def __eq__(self, other): - '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive - while comparison to a regular mapping is order-insensitive. - - ''' - if isinstance(other, OrderedDict): - return len(self) == len(other) and self.items() == other.items() - return dict.__eq__(self, other) - - def __ne__(self, other): - return not self == other - - # -- the following methods are only used in Python 2.7 -- - - def viewkeys(self): - "od.viewkeys() -> a set-like object providing a view on od's keys" - return KeysView(self) - - def viewvalues(self): - "od.viewvalues() -> an object providing a view on od's values" - return ValuesView(self) - - def viewitems(self): - "od.viewitems() -> a set-like object providing a view on od's items" - return ItemsView(self) - - -## {{{ http://code.activestate.com/recipes/576611/ (r11) - -try: - from operator import itemgetter - from heapq import nlargest - from itertools import repeat, ifilter -except ImportError: - pass - - -class _Counter(dict): - '''Dict subclass for counting hashable objects. Sometimes called a bag - or multiset. Elements are stored as dictionary keys and their counts - are stored as dictionary values. - - >>> Counter('zyzygy') - Counter({'y': 3, 'z': 2, 'g': 1}) - - ''' - - def __init__(self, iterable=None, **kwds): - '''Create a new, empty Counter object. And if given, count elements - from an input iterable. Or, initialize the count from another mapping - of elements to their counts. - - >>> c = Counter() # a new, empty counter - >>> c = Counter('gallahad') # a new counter from an iterable - >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping - >>> c = Counter(a=4, b=2) # a new counter from keyword args - - ''' - self.update(iterable, **kwds) - - def __missing__(self, key): - return 0 - - def most_common(self, n=None): - '''List the n most common elements and their counts from the most - common to the least. If n is None, then list all element counts. - - >>> Counter('abracadabra').most_common(3) - [('a', 5), ('r', 2), ('b', 2)] - - ''' - if n is None: - return sorted(self.iteritems(), key=itemgetter(1), reverse=True) - return nlargest(n, self.iteritems(), key=itemgetter(1)) - - def elements(self): - '''Iterator over elements repeating each as many times as its count. - - >>> c = Counter('ABCABC') - >>> sorted(c.elements()) - ['A', 'A', 'B', 'B', 'C', 'C'] - - If an element's count has been set to zero or is a negative number, - elements() will ignore it. - - ''' - for elem, count in self.iteritems(): - for _ in repeat(None, count): - yield elem - - # Override dict methods where the meaning changes for Counter objects. - - @classmethod - def fromkeys(cls, iterable, v=None): - raise NotImplementedError( - 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.') - - def update(self, iterable=None, **kwds): - '''Like dict.update() but add counts instead of replacing them. - - Source can be an iterable, a dictionary, or another Counter instance. - - >>> c = Counter('which') - >>> c.update('witch') # add elements from another iterable - >>> d = Counter('watch') - >>> c.update(d) # add elements from another counter - >>> c['h'] # four 'h' in which, witch, and watch - 4 - - ''' - if iterable is not None: - if hasattr(iterable, 'iteritems'): - if self: - self_get = self.get - for elem, count in iterable.iteritems(): - self[elem] = self_get(elem, 0) + count - else: - dict.update( - self, iterable) # fast path when counter is empty - else: - self_get = self.get - for elem in iterable: - self[elem] = self_get(elem, 0) + 1 - if kwds: - self.update(kwds) - - def copy(self): - 'Like dict.copy() but returns a Counter instance instead of a dict.' - return Counter(self) - - def __delitem__(self, elem): - 'Like dict.__delitem__() but does not raise KeyError for missing values.' - if elem in self: - dict.__delitem__(self, elem) - - def __repr__(self): - if not self: - return '%s()' % self.__class__.__name__ - items = ', '.join(map('%r: %r'.__mod__, self.most_common())) - return '%s({%s})' % (self.__class__.__name__, items) - - # Multiset-style mathematical operations discussed in: - # Knuth TAOCP Volume II section 4.6.3 exercise 19 - # and at http://en.wikipedia.org/wiki/Multiset - # - # Outputs guaranteed to only include positive counts. - # - # To strip negative and zero counts, add-in an empty counter: - # c += Counter() - - def __add__(self, other): - '''Add counts from two counters. - - >>> Counter('abbb') + Counter('bcc') - Counter({'b': 4, 'c': 2, 'a': 1}) - - - ''' - if not isinstance(other, Counter): - return NotImplemented - result = Counter() - for elem in set(self) | set(other): - newcount = self[elem] + other[elem] - if newcount > 0: - result[elem] = newcount - return result - - def __sub__(self, other): - ''' Subtract count, but keep only results with positive counts. - - >>> Counter('abbbc') - Counter('bccd') - Counter({'b': 2, 'a': 1}) - - ''' - if not isinstance(other, Counter): - return NotImplemented - result = Counter() - for elem in set(self) | set(other): - newcount = self[elem] - other[elem] - if newcount > 0: - result[elem] = newcount - return result - - def __or__(self, other): - '''Union is the maximum of value in either of the input counters. - - >>> Counter('abbb') | Counter('bcc') - Counter({'b': 3, 'c': 2, 'a': 1}) - - ''' - if not isinstance(other, Counter): - return NotImplemented - _max = max - result = Counter() - for elem in set(self) | set(other): - newcount = _max(self[elem], other[elem]) - if newcount > 0: - result[elem] = newcount - return result - - def __and__(self, other): - ''' Intersection is the minimum of corresponding counts. - - >>> Counter('abbb') & Counter('bcc') - Counter({'b': 1}) - - ''' - if not isinstance(other, Counter): - return NotImplemented - _min = min - result = Counter() - if len(self) < len(other): - self, other = other, self - for elem in ifilter(self.__contains__, other): - newcount = _min(self[elem], other[elem]) - if newcount > 0: - result[elem] = newcount - return result - -if sys.version_info[:2] < (2, 7): - OrderedDict = _OrderedDict - Counter = _Counter -else: - from collections import OrderedDict, Counter - -# http://stackoverflow.com/questions/4126348 -# Thanks to @martineau at SO - -class OrderedDefaultdict(OrderedDict): - def __init__(self, *args, **kwargs): - newdefault = None - newargs = () - if args: - newdefault = args[0] - if not (newdefault is None or callable(newdefault)): - raise TypeError('first argument must be callable or None') - newargs = args[1:] - self.default_factory = newdefault - super(self.__class__, self).__init__(*newargs, **kwargs) - - def __missing__ (self, key): - if self.default_factory is None: - raise KeyError(key) - self[key] = value = self.default_factory() - return value - - def __reduce__(self): # optional, for pickle support - args = self.default_factory if self.default_factory else tuple() - return type(self), args, None, None, self.items() diff --git a/pandas/util/counter.py b/pandas/util/counter.py index 29e8906fdee38..75f7b214ce6a5 100644 --- a/pandas/util/counter.py +++ b/pandas/util/counter.py @@ -1,9 +1,11 @@ # This is copied from collections in Python 2.7, for compatibility with older # versions of Python. It can be dropped when we depend on Python 2.7/3.1 +from pandas import compat import heapq as _heapq from itertools import repeat as _repeat, chain as _chain, starmap as _starmap from operator import itemgetter as _itemgetter +from pandas.compat import map try: from collections import Mapping @@ -92,8 +94,8 @@ def most_common(self, n=None): ''' # Emulate Bag.sortedByCount from Smalltalk if n is None: - return sorted(self.iteritems(), key=_itemgetter(1), reverse=True) - return _heapq.nlargest(n, self.iteritems(), key=_itemgetter(1)) + return sorted(compat.iteritems(self), key=_itemgetter(1), reverse=True) + return _heapq.nlargest(n, compat.iteritems(self), key=_itemgetter(1)) def elements(self): '''Iterator over elements repeating each as many times as its count. @@ -115,7 +117,7 @@ def elements(self): ''' # Emulate Bag.do from Smalltalk and Multiset.begin from C++. - return _chain.from_iterable(_starmap(_repeat, self.iteritems())) + return _chain.from_iterable(_starmap(_repeat, compat.iteritems(self))) # Override dict methods where necessary @@ -150,7 +152,7 @@ def update(self, iterable=None, **kwds): if isinstance(iterable, Mapping): if self: self_get = self.get - for elem, count in iterable.iteritems(): + for elem, count in compat.iteritems(iterable): self[elem] = self_get(elem, 0) + count else: # fast path when counter is empty diff --git a/pandas/util/decorators.py b/pandas/util/decorators.py index 97b2ee3353fa3..8c6744cbf2963 100644 --- a/pandas/util/decorators.py +++ b/pandas/util/decorators.py @@ -1,11 +1,11 @@ -from pandas.util.py3compat import StringIO +from pandas.compat import StringIO, callable from pandas.lib import cache_readonly import sys import warnings def deprecate(name, alternative): - alt_name = alternative.func_name + alt_name = alternative.__name__ def wrapper(*args, **kwargs): warnings.warn("%s is deprecated. Use %s instead" % (name, alt_name), @@ -107,7 +107,7 @@ def __call__(self, func): def indent(text, indents=1): - if not text or type(text) != str: + if not text or not isinstance(text, str): return '' jointext = ''.join(['\n'] + [' '] * indents) return jointext.join(text.split('\n')) diff --git a/pandas/util/py3compat.py b/pandas/util/py3compat.py deleted file mode 100644 index dcc877b094dda..0000000000000 --- a/pandas/util/py3compat.py +++ /dev/null @@ -1,40 +0,0 @@ -import sys - -PY3 = (sys.version_info[0] >= 3) - -if PY3: - def isidentifier(s): - return s.isidentifier() - - def str_to_bytes(s, encoding='ascii'): - return s.encode(encoding) - - def bytes_to_str(b, encoding='utf-8'): - return b.decode(encoding) - - lzip = lambda *args: list(zip(*args)) -else: - # Python 2 - import re - _name_re = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*$") - - def isidentifier(s, dotted=False): - return bool(_name_re.match(s)) - - def str_to_bytes(s, encoding='ascii'): - return s - - def bytes_to_str(b, encoding='ascii'): - return b - - lzip = zip - -try: - from cStringIO import StringIO -except: - from io import StringIO - -try: - from io import BytesIO -except: - from cStringIO import StringIO as BytesIO diff --git a/pandas/util/terminal.py b/pandas/util/terminal.py index 3b5f893d1a0b3..fc985855d2682 100644 --- a/pandas/util/terminal.py +++ b/pandas/util/terminal.py @@ -11,6 +11,7 @@ It is mentioned in the stackoverflow response that this code works on linux, os x, windows and cygwin (windows). """ +from __future__ import print_function import os @@ -117,4 +118,4 @@ def ioctl_GWINSZ(fd): if __name__ == "__main__": sizex, sizey = get_terminal_size() - print ('width = %s height = %s' % (sizex, sizey)) + print('width = %s height = %s' % (sizex, sizey)) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 7b2960ef498e1..0628d6705c769 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -13,8 +13,6 @@ from datetime import datetime from functools import wraps from contextlib import contextmanager -from httplib import HTTPException -from urllib2 import urlopen from distutils.version import LooseVersion from numpy.random import randn @@ -26,11 +24,17 @@ import pandas.core.frame as frame import pandas.core.panel as panel import pandas.core.panel4d as panel4d +import pandas.compat as compat +from pandas.compat import( + map, zip, range, unichr, lrange, lmap, lzip, u, callable, Counter +) from pandas import bdate_range from pandas.tseries.index import DatetimeIndex from pandas.tseries.period import PeriodIndex +from pandas.io.common import urlopen, HTTPException + Index = index.Index MultiIndex = index.MultiIndex Series = series.Series @@ -45,12 +49,13 @@ def rands(n): choices = string.ascii_letters + string.digits - return ''.join(random.choice(choices) for _ in xrange(n)) + return ''.join(random.choice(choices) for _ in range(n)) def randu(n): - choices = u"".join(map(unichr, range(1488, 1488 + 26))) + string.digits - return ''.join([random.choice(choices) for _ in xrange(n)]) + choices = u("").join(map(unichr, lrange(1488, 1488 + 26))) + choices += string.digits + return ''.join([random.choice(choices) for _ in range(n)]) #------------------------------------------------------------------------------ # Console debugging tools @@ -115,16 +120,29 @@ def equalContents(arr1, arr2): return frozenset(arr1) == frozenset(arr2) +def assert_isinstance(obj, class_type_or_tuple): + """asserts that obj is an instance of class_type_or_tuple""" + assert isinstance(obj, class_type_or_tuple), ( + "Expected object to be of type %r, found %r instead" % ( + type(obj), class_type_or_tuple)) + + def isiterable(obj): return hasattr(obj, '__iter__') +def assert_isinstance(obj, class_type_or_tuple): + """asserts that obj is an instance of class_type_or_tuple""" + assert isinstance(obj, class_type_or_tuple), ( + "Expected object to be of type %r, found %r instead" % (type(obj), class_type_or_tuple)) + + def assert_almost_equal(a, b, check_less_precise = False): if isinstance(a, dict) or isinstance(b, dict): return assert_dict_equal(a, b) - if isinstance(a, basestring): - assert a == b, "%r != %r" % (a, b) + if isinstance(a, compat.string_types): + assert a == b, "%s != %s" % (a, b) return True if isiterable(a): @@ -135,7 +153,7 @@ def assert_almost_equal(a, b, check_less_precise = False): if np.array_equal(a, b): return True else: - for i in xrange(na): + for i in range(na): assert_almost_equal(a[i], b[i], check_less_precise) return True @@ -191,7 +209,7 @@ def assert_series_equal(left, right, check_dtype=True, check_series_type=False, check_less_precise=False): if check_series_type: - assert(type(left) == type(right)) + assert_isinstance(left, type(right)) assert_almost_equal(left.values, right.values, check_less_precise) if check_dtype: assert(left.dtype == right.dtype) @@ -200,7 +218,7 @@ def assert_series_equal(left, right, check_dtype=True, else: assert(left.index.equals(right.index)) if check_index_type: - assert(type(left.index) == type(right.index)) + assert_isinstance(left.index, type(right.index)) assert(left.index.dtype == right.index.dtype) assert(left.index.inferred_type == right.index.inferred_type) if check_index_freq: @@ -215,9 +233,9 @@ def assert_frame_equal(left, right, check_dtype=True, check_less_precise=False, check_names=True): if check_frame_type: - assert(type(left) == type(right)) - assert(isinstance(left, DataFrame)) - assert(isinstance(right, DataFrame)) + assert_isinstance(left, type(right)) + assert_isinstance(left, DataFrame) + assert_isinstance(right, DataFrame) if check_less_precise: assert_almost_equal(left.columns,right.columns) @@ -236,11 +254,11 @@ def assert_frame_equal(left, right, check_dtype=True, check_less_precise=check_less_precise) if check_index_type: - assert(type(left.index) == type(right.index)) + assert_isinstance(left.index, type(right.index)) assert(left.index.dtype == right.index.dtype) assert(left.index.inferred_type == right.index.inferred_type) if check_column_type: - assert(type(left.columns) == type(right.columns)) + assert_isinstance(left.columns, type(right.columns)) assert(left.columns.dtype == right.columns.dtype) assert(left.columns.inferred_type == right.columns.inferred_type) if check_names: @@ -252,13 +270,13 @@ def assert_panel_equal(left, right, check_panel_type=False, check_less_precise=False): if check_panel_type: - assert(type(left) == type(right)) + assert_isinstance(left, type(right)) assert(left.items.equals(right.items)) assert(left.major_axis.equals(right.major_axis)) assert(left.minor_axis.equals(right.minor_axis)) - for col, series in left.iterkv(): + for col, series in compat.iteritems(left): assert(col in right) assert_frame_equal(series, right[col], check_less_precise=check_less_precise, check_names=False) # TODO strangely check_names fails in py3 ? @@ -273,7 +291,7 @@ def assert_panel4d_equal(left, right, assert(left.major_axis.equals(right.major_axis)) assert(left.minor_axis.equals(right.minor_axis)) - for col, series in left.iterkv(): + for col, series in compat.iteritems(left): assert(col in right) assert_panel_equal(series, right[col], check_less_precise=check_less_precise) @@ -291,15 +309,15 @@ def getCols(k): def makeStringIndex(k): - return Index([rands(10) for _ in xrange(k)]) + return Index([rands(10) for _ in range(k)]) def makeUnicodeIndex(k): - return Index([randu(10) for _ in xrange(k)]) + return Index([randu(10) for _ in range(k)]) def makeIntIndex(k): - return Index(range(k)) + return Index(lrange(k)) def makeFloatIndex(k): @@ -427,7 +445,6 @@ def makeCustomIndex(nentries, nlevels, prefix='#', names=False, ndupe_l=None, if unspecified, string labels will be generated. """ - from pandas.util.compat import Counter if ndupe_l is None: ndupe_l = [1] * nlevels assert (_is_sequence(ndupe_l) and len(ndupe_l) <= nlevels) @@ -444,7 +461,7 @@ def makeCustomIndex(nentries, nlevels, prefix='#', names=False, ndupe_l=None, names = None # make singelton case uniform - if isinstance(names, basestring) and nlevels == 1: + if isinstance(names, compat.string_types) and nlevels == 1: names = [names] # specific 1D index type requested? @@ -471,7 +488,7 @@ def makeCustomIndex(nentries, nlevels, prefix='#', names=False, ndupe_l=None, def keyfunc(x): import re numeric_tuple = re.sub("[^\d_]_?","",x).split("_") - return map(int,numeric_tuple) + return lmap(int,numeric_tuple) # build a list of lists to create the index from div_factor = nentries // ndupe_l[i] + 1 @@ -483,7 +500,7 @@ def keyfunc(x): result = list(sorted(cnt.elements(), key=keyfunc))[:nentries] tuples.append(result) - tuples = zip(*tuples) + tuples = lzip(*tuples) # convert tuples to index if nentries == 1: @@ -725,11 +742,12 @@ def network(t, raise_on_error=_RAISE_NETWORK_ERROR_DEFAULT, A test can be decorated as requiring network like this:: >>> from pandas.util.testing import network - >>> import urllib2 + >>> from pandas.io.common import urlopen >>> import nose >>> @network ... def test_network(): - ... urllib2.urlopen("rabbit://bonanza.com") + ... with urlopen("rabbit://bonanza.com") as f: + ... pass ... >>> try: ... test_network() @@ -743,7 +761,8 @@ def network(t, raise_on_error=_RAISE_NETWORK_ERROR_DEFAULT, >>> @network(raise_on_error=True) ... def test_network(): - ... urllib2.urlopen("complaint://deadparrot.com") + ... with urlopen("complaint://deadparrot.com") as f: + ... pass ... >>> test_network() Traceback (most recent call last): @@ -831,7 +850,7 @@ def with_connectivity_check(t, url="http://www.google.com", t : callable The test requiring network connectivity. url : path - The url to test via ``urllib2.urlopen`` to check for connectivity. + The url to test via ``pandas.io.common.urlopen`` to check for connectivity. Defaults to 'http://www.google.com'. raise_on_error : bool If True, never catches errors. diff --git a/scripts/bench_join.py b/scripts/bench_join.py index be24dac810aee..5e50e8da61fdb 100644 --- a/scripts/bench_join.py +++ b/scripts/bench_join.py @@ -1,3 +1,4 @@ +from pandas.compat import range, lrange import numpy as np import pandas.lib as lib from pandas import * @@ -27,8 +28,8 @@ a_series = Series(av, index=a) b_series = Series(bv, index=b) -a_frame = DataFrame(avf, index=a, columns=range(K)) -b_frame = DataFrame(bvf, index=b, columns=range(K, 2 * K)) +a_frame = DataFrame(avf, index=a, columns=lrange(K)) +b_frame = DataFrame(bvf, index=b, columns=lrange(K, 2 * K)) def do_left_join(a, b, av, bv): @@ -77,7 +78,7 @@ def do_left_join_python(a, b, av, bv): def _take_multi(data, indexer, out): if not data.flags.c_contiguous: data = data.copy() - for i in xrange(data.shape[0]): + for i in range(data.shape[0]): data[i].take(indexer, out=out[i]) @@ -162,8 +163,8 @@ def bench_python(n=100000, pct_overlap=0.20, K=1): avf = np.random.randn(n, K) bvf = np.random.randn(n, K) - a_frame = DataFrame(avf, index=a, columns=range(K)) - b_frame = DataFrame(bvf, index=b, columns=range(K, 2 * K)) + a_frame = DataFrame(avf, index=a, columns=lrange(K)) + b_frame = DataFrame(bvf, index=b, columns=lrange(K, 2 * K)) all_results[logn] = result = {} diff --git a/scripts/bench_join_multi.py b/scripts/bench_join_multi.py index cdac37f289bb8..7b93112b7f869 100644 --- a/scripts/bench_join_multi.py +++ b/scripts/bench_join_multi.py @@ -1,26 +1,26 @@ from pandas import * import numpy as np -from itertools import izip +from pandas.compat import zip, range, lzip from pandas.util.testing import rands import pandas.lib as lib N = 100000 -key1 = [rands(10) for _ in xrange(N)] -key2 = [rands(10) for _ in xrange(N)] +key1 = [rands(10) for _ in range(N)] +key2 = [rands(10) for _ in range(N)] -zipped = izip(key1, key2) +zipped = lzip(key1, key2) def _zip(*args): arr = np.empty(N, dtype=object) - arr[:] = zip(*args) + arr[:] = lzip(*args) return arr def _zip2(*args): - return lib.list_to_object_array(zip(*args)) + return lib.list_to_object_array(lzip(*args)) index = MultiIndex.from_arrays([key1, key2]) to_join = DataFrame({'j1': np.random.randn(100000)}, index=index) diff --git a/scripts/bench_refactor.py b/scripts/bench_refactor.py index 3d0c7e40ced7d..dafba371e995a 100644 --- a/scripts/bench_refactor.py +++ b/scripts/bench_refactor.py @@ -1,4 +1,5 @@ from pandas import * +from pandas.compat import range try: import pandas.core.internals as internals reload(internals) @@ -17,7 +18,7 @@ def horribly_unconsolidated(): df = DataMatrix(index=index) - for i in xrange(K): + for i in range(K): df[i] = float(K) return df @@ -25,13 +26,13 @@ def horribly_unconsolidated(): def bench_reindex_index(df, it=100): new_idx = np.arange(0, N, 2) - for i in xrange(it): + for i in range(it): df.reindex(new_idx) def bench_reindex_columns(df, it=100): new_cols = np.arange(0, K, 2) - for i in xrange(it): + for i in range(it): df.reindex(columns=new_cols) @@ -39,7 +40,7 @@ def bench_join_index(df, it=10): left = df.reindex(index=np.arange(0, N, 2), columns=np.arange(K // 2)) right = df.reindex(columns=np.arange(K // 2 + 1, K)) - for i in xrange(it): + for i in range(it): joined = left.join(right) if __name__ == '__main__': diff --git a/scripts/file_sizes.py b/scripts/file_sizes.py index 8720730d2bb10..de03c72ffbd09 100644 --- a/scripts/file_sizes.py +++ b/scripts/file_sizes.py @@ -1,3 +1,4 @@ +from __future__ import print_function import os import sys @@ -6,6 +7,7 @@ from pandas import DataFrame from pandas.util.testing import set_trace +from pandas import compat dirs = [] names = [] @@ -154,13 +156,13 @@ def x(): def doit(): for directory, _, files in walked: - print directory + print(directory) for path in files: if not _should_count_file(path): continue full_path = os.path.join(directory, path) - print full_path + print(full_path) lines = len(open(full_path).readlines()) dirs.append(directory) @@ -174,7 +176,7 @@ def doit(): def doit2(): counts = {} for directory, _, files in walked: - print directory + print(directory) for path in files: if not _should_count_file(path) or path.startswith('test_'): continue @@ -189,7 +191,7 @@ def doit2(): # counts = _get_file_function_lengths('pandas/tests/test_series.py') all_counts = [] -for k, v in counts.iteritems(): +for k, v in compat.iteritems(counts): all_counts.extend(v) all_counts = np.array(all_counts) diff --git a/scripts/find_commits_touching_func.py b/scripts/find_commits_touching_func.py index d23889ec80d05..e4c24b8c3bcbb 100755 --- a/scripts/find_commits_touching_func.py +++ b/scripts/find_commits_touching_func.py @@ -4,6 +4,7 @@ # copryright 2013, y-p @ github from __future__ import print_function +from pandas.compat import range, lrange, map """Search the git history for all commits touching a named method @@ -15,7 +16,7 @@ import re import os from collections import namedtuple -from dateutil import parser +from pandas.compat import parse_date try: import sh @@ -93,11 +94,11 @@ def get_hits(defname,files=()): def get_commit_info(c,fmt,sep='\t'): r=sh.git('log', "--format={}".format(fmt), '{}^..{}'.format(c,c),"-n","1",_tty_out=False) - return unicode(r).split(sep) + return compat.text_type(r).split(sep) def get_commit_vitals(c,hlen=HASH_LEN): h,s,d= get_commit_info(c,'%H\t%s\t%ci',"\t") - return h[:hlen],s,parser.parse(d) + return h[:hlen],s,parse_date(d) def file_filter(state,dirname,fnames): if args.dir_masks and not any([re.search(x,dirname) for x in args.dir_masks]): @@ -159,7 +160,7 @@ def sorter(i): print("\nThese commits touched the %s method in these files on these dates:\n" \ % args.funcname) - for i in sorted(range(len(hits)),key=sorter): + for i in sorted(lrange(len(hits)),key=sorter): hit = hits[i] h,s,d=get_commit_vitals(hit.commit) p=hit.path.split(os.path.realpath(os.curdir)+os.path.sep)[-1] @@ -182,11 +183,11 @@ def main(): !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! """) return - if isinstance(args.file_masks,basestring): + if isinstance(args.file_masks,compat.string_types): args.file_masks = args.file_masks.split(',') - if isinstance(args.path_masks,basestring): + if isinstance(args.path_masks,compat.string_types): args.path_masks = args.path_masks.split(',') - if isinstance(args.dir_masks,basestring): + if isinstance(args.dir_masks,compat.string_types): args.dir_masks = args.dir_masks.split(',') logger.setLevel(getattr(logging,args.debug_level)) diff --git a/scripts/find_undoc_args.py b/scripts/find_undoc_args.py index 4a4099afc9a2a..f6bcd43185fa6 100755 --- a/scripts/find_undoc_args.py +++ b/scripts/find_undoc_args.py @@ -41,18 +41,18 @@ def entry_gen(root_ns,module_name): seen.add(cand.__name__) q.insert(0,cand) elif (isinstance(cand,(types.MethodType,types.FunctionType)) and - cand not in seen and cand.func_doc): + cand not in seen and cand.__doc__): seen.add(cand) yield cand def cmp_docstring_sig(f): def build_loc(f): - path=f.func_code.co_filename.split(args.path,1)[-1][1:] - return dict(path=path,lnum=f.func_code.co_firstlineno) + path=f.__code__.co_filename.split(args.path,1)[-1][1:] + return dict(path=path,lnum=f.__code__.co_firstlineno) import inspect sig_names=set(inspect.getargspec(f).args) - doc = f.func_doc.lower() + doc = f.__doc__.lower() doc = re.split("^\s*parameters\s*",doc,1,re.M)[-1] doc = re.split("^\s*returns*",doc,1,re.M)[0] doc_names={x.split(":")[0].strip() for x in doc.split("\n") diff --git a/scripts/gen_release_notes.py b/scripts/gen_release_notes.py index c64b33d71ea2a..02ba4f57c189d 100644 --- a/scripts/gen_release_notes.py +++ b/scripts/gen_release_notes.py @@ -1,7 +1,7 @@ +from __future__ import print_function import sys -import urllib2 import json -from contextlib import closing +from pandas.io.common import urlopen from datetime import datetime @@ -48,8 +48,7 @@ def get_issues(): def _get_page(page_number): gh_url = ('https://api.github.com/repos/pydata/pandas/issues?' 'milestone=*&state=closed&assignee=*&page=%d') % page_number - req = urllib2.Request(gh_url) - with closing(urllib2.urlopen(req)) as resp: + with urlopen(gh_url) as resp: rs = resp.readlines()[0] jsondata = json.loads(rs) issues = [Issue(x['title'], x['labels'], x['number'], @@ -93,4 +92,4 @@ def release_notes(milestone): if __name__ == '__main__': rs = release_notes(sys.argv[1]) - print rs + print(rs) diff --git a/scripts/git_code_churn.py b/scripts/git_code_churn.py index 3e999aec1ad33..18c9b244a6ba0 100644 --- a/scripts/git_code_churn.py +++ b/scripts/git_code_churn.py @@ -1,4 +1,3 @@ -from dateutil import parser import subprocess import os import re diff --git a/scripts/groupby_sample.py b/scripts/groupby_sample.py index 8685b2bbe8ff7..42008858d3cad 100644 --- a/scripts/groupby_sample.py +++ b/scripts/groupby_sample.py @@ -1,6 +1,7 @@ from pandas import * import numpy as np import string +import pandas.compat as compat g1 = np.array(list(string.letters))[:-1] g2 = np.arange(510) @@ -30,7 +31,7 @@ def random_sample_v2(): grouped = df.groupby(['group1', 'group2'])['value'] from random import choice choose = lambda group: choice(group.index) - indices = [choice(v) for k, v in grouped.groups.iteritems()] + indices = [choice(v) for k, v in compat.iteritems(grouped.groups)] return df.reindex(indices) @@ -43,7 +44,7 @@ def do_shuffle(arr): def shuffle_uri(df, grouped): perm = np.r_[tuple([np.random.permutation( - idxs) for idxs in grouped.groups.itervalues()])] + idxs) for idxs in compat.itervalues(grouped.groups)])] df['state_permuted'] = np.asarray(df.ix[perm]['value']) df2 = df.copy() diff --git a/scripts/groupby_speed.py b/scripts/groupby_speed.py index a25b00206733d..4e60c34556968 100644 --- a/scripts/groupby_speed.py +++ b/scripts/groupby_speed.py @@ -1,3 +1,4 @@ +from __future__ import print_function from pandas import * rng = DateRange('1/3/2011', '11/30/2011', offset=datetools.Minute()) @@ -23,12 +24,12 @@ def get2(dt): def f(): for i, date in enumerate(df.index): if i % 10000 == 0: - print i + print(i) get1(date) def g(): for i, date in enumerate(df.index): if i % 10000 == 0: - print i + print(i) get2(date) diff --git a/scripts/groupby_test.py b/scripts/groupby_test.py index 76c9cb0cb3bc5..3425f0cd98723 100644 --- a/scripts/groupby_test.py +++ b/scripts/groupby_test.py @@ -8,6 +8,7 @@ import pandas.lib as tseries import pandas.core.groupby as gp import pandas.util.testing as tm +from pandas.compat import range reload(gp) """ diff --git a/scripts/hdfstore_panel_perf.py b/scripts/hdfstore_panel_perf.py index d344fc80943ca..06c2a15bdc7c2 100644 --- a/scripts/hdfstore_panel_perf.py +++ b/scripts/hdfstore_panel_perf.py @@ -1,13 +1,14 @@ from pandas import * from pandas.util.testing import rands +from pandas.compat import range i, j, k = 7, 771, 5532 panel = Panel(np.random.randn(i, j, k), - items=[rands(10) for _ in xrange(i)], + items=[rands(10) for _ in range(i)], major_axis=DateRange('1/1/2000', periods=j, offset=datetools.Minute()), - minor_axis=[rands(10) for _ in xrange(k)]) + minor_axis=[rands(10) for _ in range(k)]) store = HDFStore('test.h5') diff --git a/scripts/json_manip.py b/scripts/json_manip.py index e76a99cca344a..72d0bbb34d6b6 100644 --- a/scripts/json_manip.py +++ b/scripts/json_manip.py @@ -65,15 +65,17 @@ themselves. """ +from __future__ import print_function -from collections import Counter, namedtuple +from collections import namedtuple import csv import itertools from itertools import product from operator import attrgetter as aget, itemgetter as iget import operator import sys - +from pandas.compat import map, u, callable, Counter +import pandas.compat as compat ## note 'url' appears multiple places and not all extensions have same struct @@ -89,77 +91,77 @@ } ## much longer example -ex2 = {u'metadata': {u'accessibilities': [{u'name': u'accessibility.tabfocus', - u'value': 7}, - {u'name': u'accessibility.mouse_focuses_formcontrol', u'value': False}, - {u'name': u'accessibility.browsewithcaret', u'value': False}, - {u'name': u'accessibility.win32.force_disabled', u'value': False}, - {u'name': u'accessibility.typeaheadfind.startlinksonly', u'value': False}, - {u'name': u'accessibility.usebrailledisplay', u'value': u''}, - {u'name': u'accessibility.typeaheadfind.timeout', u'value': 5000}, - {u'name': u'accessibility.typeaheadfind.enabletimeout', u'value': True}, - {u'name': u'accessibility.tabfocus_applies_to_xul', u'value': False}, - {u'name': u'accessibility.typeaheadfind.flashBar', u'value': 1}, - {u'name': u'accessibility.typeaheadfind.autostart', u'value': True}, - {u'name': u'accessibility.blockautorefresh', u'value': False}, - {u'name': u'accessibility.browsewithcaret_shortcut.enabled', - u'value': True}, - {u'name': u'accessibility.typeaheadfind.enablesound', u'value': True}, - {u'name': u'accessibility.typeaheadfind.prefillwithselection', - u'value': True}, - {u'name': u'accessibility.typeaheadfind.soundURL', u'value': u'beep'}, - {u'name': u'accessibility.typeaheadfind', u'value': False}, - {u'name': u'accessibility.typeaheadfind.casesensitive', u'value': 0}, - {u'name': u'accessibility.warn_on_browsewithcaret', u'value': True}, - {u'name': u'accessibility.usetexttospeech', u'value': u''}, - {u'name': u'accessibility.accesskeycausesactivation', u'value': True}, - {u'name': u'accessibility.typeaheadfind.linksonly', u'value': False}, - {u'name': u'isInstantiated', u'value': True}], - u'extensions': [{u'id': u'216ee7f7f4a5b8175374cd62150664efe2433a31', - u'isEnabled': True}, - {u'id': u'1aa53d3b720800c43c4ced5740a6e82bb0b3813e', u'isEnabled': False}, - {u'id': u'01ecfac5a7bd8c9e27b7c5499e71c2d285084b37', u'isEnabled': True}, - {u'id': u'1c01f5b22371b70b312ace94785f7b0b87c3dfb2', u'isEnabled': True}, - {u'id': u'fb723781a2385055f7d024788b75e959ad8ea8c3', u'isEnabled': True}], - u'fxVersion': u'9.0', - u'location': u'zh-CN', - u'operatingSystem': u'WINNT Windows NT 5.1', - u'surveyAnswers': u'', - u'task_guid': u'd69fbd15-2517-45b5-8a17-bb7354122a75', - u'tpVersion': u'1.2', - u'updateChannel': u'beta'}, - u'survey_data': { - u'extensions': [{u'appDisabled': False, - u'id': u'testpilot?labs.mozilla.com', - u'isCompatible': True, - u'isEnabled': True, - u'isPlatformCompatible': True, - u'name': u'Test Pilot'}, - {u'appDisabled': True, - u'id': u'dict?www.youdao.com', - u'isCompatible': False, - u'isEnabled': False, - u'isPlatformCompatible': True, - u'name': u'Youdao Word Capturer'}, - {u'appDisabled': False, - u'id': u'jqs?sun.com', - u'isCompatible': True, - u'isEnabled': True, - u'isPlatformCompatible': True, - u'name': u'Java Quick Starter'}, - {u'appDisabled': False, - u'id': u'?20a82645-c095-46ed-80e3-08825760534b?', - u'isCompatible': True, - u'isEnabled': True, - u'isPlatformCompatible': True, - u'name': u'Microsoft .NET Framework Assistant'}, - {u'appDisabled': False, - u'id': u'?a0d7ccb3-214d-498b-b4aa-0e8fda9a7bf7?', - u'isCompatible': True, - u'isEnabled': True, - u'isPlatformCompatible': True, - u'name': u'WOT'}], - u'version_number': 1}} +ex2 = {u('metadata'): {u('accessibilities'): [{u('name'): u('accessibility.tabfocus'), + u('value'): 7}, + {u('name'): u('accessibility.mouse_focuses_formcontrol'), u('value'): False}, + {u('name'): u('accessibility.browsewithcaret'), u('value'): False}, + {u('name'): u('accessibility.win32.force_disabled'), u('value'): False}, + {u('name'): u('accessibility.typeaheadfind.startlinksonly'), u('value'): False}, + {u('name'): u('accessibility.usebrailledisplay'), u('value'): u('')}, + {u('name'): u('accessibility.typeaheadfind.timeout'), u('value'): 5000}, + {u('name'): u('accessibility.typeaheadfind.enabletimeout'), u('value'): True}, + {u('name'): u('accessibility.tabfocus_applies_to_xul'), u('value'): False}, + {u('name'): u('accessibility.typeaheadfind.flashBar'), u('value'): 1}, + {u('name'): u('accessibility.typeaheadfind.autostart'), u('value'): True}, + {u('name'): u('accessibility.blockautorefresh'), u('value'): False}, + {u('name'): u('accessibility.browsewithcaret_shortcut.enabled'), + u('value'): True}, + {u('name'): u('accessibility.typeaheadfind.enablesound'), u('value'): True}, + {u('name'): u('accessibility.typeaheadfind.prefillwithselection'), + u('value'): True}, + {u('name'): u('accessibility.typeaheadfind.soundURL'), u('value'): u('beep')}, + {u('name'): u('accessibility.typeaheadfind'), u('value'): False}, + {u('name'): u('accessibility.typeaheadfind.casesensitive'), u('value'): 0}, + {u('name'): u('accessibility.warn_on_browsewithcaret'), u('value'): True}, + {u('name'): u('accessibility.usetexttospeech'), u('value'): u('')}, + {u('name'): u('accessibility.accesskeycausesactivation'), u('value'): True}, + {u('name'): u('accessibility.typeaheadfind.linksonly'), u('value'): False}, + {u('name'): u('isInstantiated'), u('value'): True}], + u('extensions'): [{u('id'): u('216ee7f7f4a5b8175374cd62150664efe2433a31'), + u('isEnabled'): True}, + {u('id'): u('1aa53d3b720800c43c4ced5740a6e82bb0b3813e'), u('isEnabled'): False}, + {u('id'): u('01ecfac5a7bd8c9e27b7c5499e71c2d285084b37'), u('isEnabled'): True}, + {u('id'): u('1c01f5b22371b70b312ace94785f7b0b87c3dfb2'), u('isEnabled'): True}, + {u('id'): u('fb723781a2385055f7d024788b75e959ad8ea8c3'), u('isEnabled'): True}], + u('fxVersion'): u('9.0'), + u('location'): u('zh-CN'), + u('operatingSystem'): u('WINNT Windows NT 5.1'), + u('surveyAnswers'): u(''), + u('task_guid'): u('d69fbd15-2517-45b5-8a17-bb7354122a75'), + u('tpVersion'): u('1.2'), + u('updateChannel'): u('beta')}, + u('survey_data'): { + u('extensions'): [{u('appDisabled'): False, + u('id'): u('testpilot?labs.mozilla.com'), + u('isCompatible'): True, + u('isEnabled'): True, + u('isPlatformCompatible'): True, + u('name'): u('Test Pilot')}, + {u('appDisabled'): True, + u('id'): u('dict?www.youdao.com'), + u('isCompatible'): False, + u('isEnabled'): False, + u('isPlatformCompatible'): True, + u('name'): u('Youdao Word Capturer')}, + {u('appDisabled'): False, + u('id'): u('jqs?sun.com'), + u('isCompatible'): True, + u('isEnabled'): True, + u('isPlatformCompatible'): True, + u('name'): u('Java Quick Starter')}, + {u('appDisabled'): False, + u('id'): u('?20a82645-c095-46ed-80e3-08825760534b?'), + u('isCompatible'): True, + u('isEnabled'): True, + u('isPlatformCompatible'): True, + u('name'): u('Microsoft .NET Framework Assistant')}, + {u('appDisabled'): False, + u('id'): u('?a0d7ccb3-214d-498b-b4aa-0e8fda9a7bf7?'), + u('isCompatible'): True, + u('isEnabled'): True, + u('isPlatformCompatible'): True, + u('name'): u('WOT')}], + u('version_number'): 1}} # class SurveyResult(object): @@ -208,7 +210,7 @@ def _denorm(queries,thing): #print "-- result: ", r if not r: r = [default] - if type(r[0]) is type({}): + if isinstance(r[0], type({})): fields.append(sorted(r[0].keys())) # dicty answers else: fields.append([q]) # stringy answer @@ -224,7 +226,7 @@ def _denorm(queries,thing): U = dict() for (ii,thing) in enumerate(p): #print ii,thing - if type(thing) is type({}): + if isinstance(thing, type({})): U.update(thing) else: U[fields[ii][0]] = thing @@ -267,7 +269,7 @@ def flatten(*stack): """ stack = list(stack) while stack: - try: x = stack[0].next() + try: x = next(stack[0]) except StopIteration: stack.pop(0) continue @@ -281,11 +283,11 @@ def flatten(*stack): def _Q(filter_, thing): """ underlying machinery for Q function recursion """ T = type(thing) - if T is type({}): - for k,v in thing.iteritems(): + if isinstance({}, T): + for k,v in compat.iteritems(thing): #print k,v if filter_ == k: - if type(v) is type([]): + if isinstance(v, type([])): yield iter(v) else: yield v @@ -293,7 +295,7 @@ def _Q(filter_, thing): if type(v) in (type({}),type([])): yield Q(filter_,v) - elif T is type([]): + elif isinstance([], T): for k in thing: #print k yield Q(filter_,k) @@ -315,10 +317,10 @@ def Q(filter_,thing): [3] returns a generator. Use ``Ql`` if you want a list. """ - if type(filter_) is type([]): + if isinstance(filter_, type([])): return flatten(*[_Q(x,thing) for x in filter_]) - elif type(filter_) is type({}): - d = dict.fromkeys(filter_.keys()) + elif isinstance(filter_, type({})): + d = dict.fromkeys(list(filter_.keys())) #print d for k in d: #print flatten(Q(k,thing)) @@ -343,7 +345,7 @@ def Ql(filter_,thing): """ same as Q, but returns a list, not a generator """ res = Q(filter_,thing) - if type(filter_) is type({}): + if isinstance(filter_, type({})): for k in res: res[k] = list(res[k]) return res @@ -386,34 +388,34 @@ def printout(queries,things,default=None, f=sys.stdout, **kwargs): def test_run(): - print "\n>>> print list(Q('url',ex1))" - print list(Q('url',ex1)) + print("\n>>> print list(Q('url',ex1))") + print(list(Q('url',ex1))) assert list(Q('url',ex1)) == ['url1','url2','url3'] assert Ql('url',ex1) == ['url1','url2','url3'] - print "\n>>> print list(Q(['name','id'],ex1))" - print list(Q(['name','id'],ex1)) + print("\n>>> print list(Q(['name','id'],ex1))") + print(list(Q(['name','id'],ex1))) assert Ql(['name','id'],ex1) == ['Gregg','hello','gbye'] - print "\n>>> print Ql('more url',ex1)" - print Ql('more url',ex1) + print("\n>>> print Ql('more url',ex1)") + print(Ql('more url',ex1)) - print "\n>>> list(Q('extensions',ex1))" - print list(Q('extensions',ex1)) + print("\n>>> list(Q('extensions',ex1))") + print(list(Q('extensions',ex1))) - print "\n>>> print Ql('extensions',ex1)" - print Ql('extensions',ex1) + print("\n>>> print Ql('extensions',ex1)") + print(Ql('extensions',ex1)) - print "\n>>> printout(['name','extensions'],[ex1,], extrasaction='ignore')" + print("\n>>> printout(['name','extensions'],[ex1,], extrasaction='ignore')") printout(['name','extensions'],[ex1,], extrasaction='ignore') - print "\n\n" + print("\n\n") from pprint import pprint as pp - print "-- note that the extension fields are also flattened! (and N/A) -- " + print("-- note that the extension fields are also flattened! (and N/A) -- ") pp(denorm(['location','fxVersion','notthere','survey_data extensions'],[ex2,], default="N/A")[:2]) diff --git a/scripts/leak.py b/scripts/leak.py index 3d704af4f9945..47f74bf020597 100644 --- a/scripts/leak.py +++ b/scripts/leak.py @@ -1,4 +1,5 @@ from pandas import * +from pandas.compat import range import numpy as np import pandas.util.testing as tm import os diff --git a/scripts/parser_magic.py b/scripts/parser_magic.py index c35611350988c..72fef39d8db65 100644 --- a/scripts/parser_magic.py +++ b/scripts/parser_magic.py @@ -1,5 +1,6 @@ from pandas.util.testing import set_trace import pandas.util.testing as tm +import pandas.compat as compat from pandas import * import ast @@ -45,7 +46,7 @@ def _format_call(call): if args: content += ', '.join(args) if kwds: - fmt_kwds = ['%s=%s' % item for item in kwds.iteritems()] + fmt_kwds = ['%s=%s' % item for item in compat.iteritems(kwds)] joined_kwds = ', '.join(fmt_kwds) if args: content = content + ', ' + joined_kwds diff --git a/scripts/pypistats.py b/scripts/pypistats.py index e64be63551fde..41343f6d30c76 100644 --- a/scripts/pypistats.py +++ b/scripts/pypistats.py @@ -93,7 +93,7 @@ def get_downloads(self): result = pd.DataFrame({'downloads': totals, 'release_date': first_upload}) result = result.sort('release_date') - result = result.drop(to_omit + rollup.keys()) + result = result.drop(to_omit + list(rollup.keys())) result.index.name = 'release' by_date = result.reset_index().set_index('release_date').downloads diff --git a/scripts/roll_median_leak.py b/scripts/roll_median_leak.py index 6441a69f3a8bf..07161cc6499bf 100644 --- a/scripts/roll_median_leak.py +++ b/scripts/roll_median_leak.py @@ -1,3 +1,4 @@ +from __future__ import print_function from pandas import * import numpy as np @@ -5,6 +6,7 @@ from vbench.api import Benchmark from pandas.util.testing import rands +from pandas.compat import range import pandas.lib as lib import pandas._sandbox as sbx import time @@ -18,7 +20,7 @@ lst.append([5] * 10000) lst.append(np.repeat(np.nan, 1000000)) -for _ in xrange(10000): - print proc.get_memory_info() +for _ in range(10000): + print(proc.get_memory_info()) sdf = SparseDataFrame({'A': lst.to_array()}) chunk = sdf[sdf['A'] == 5] diff --git a/scripts/runtests.py b/scripts/runtests.py index b995db65ac591..e14752b43116b 100644 --- a/scripts/runtests.py +++ b/scripts/runtests.py @@ -1,4 +1,5 @@ +from __future__ import print_function import os -print os.getpid() +print(os.getpid()) import nose nose.main('pandas.core') diff --git a/scripts/testmed.py b/scripts/testmed.py index ed0f76cd2f3fb..dd3b952d58c60 100644 --- a/scripts/testmed.py +++ b/scripts/testmed.py @@ -2,6 +2,9 @@ from random import random from math import log, ceil +from pandas.compat import range +from numpy.random import randn +from pandas.lib.skiplist import rolling_median class Node(object): @@ -138,8 +141,6 @@ def _test(arr, k): _test(arr, K) -from numpy.random import randn -from pandas.lib.skiplist import rolling_median def test2(): diff --git a/setup.py b/setup.py index d66ac345aa61a..a99ba88322796 100755 --- a/setup.py +++ b/setup.py @@ -40,14 +40,12 @@ if sys.version_info[1] >= 3: # 3.3 needs numpy 1.7+ min_numpy_ver = "1.7.0b2" - setuptools_kwargs = {'use_2to3': True, + setuptools_kwargs = { 'zip_safe': False, 'install_requires': ['python-dateutil >= 2', 'pytz >= 2011k', 'numpy >= %s' % min_numpy_ver], 'setup_requires': ['numpy >= %s' % min_numpy_ver], - 'use_2to3_exclude_fixers': ['lib2to3.fixes.fix_next', - ], } if not _have_setuptools: sys.exit("need setuptools/distribute for Py3k" diff --git a/vb_suite/groupby.py b/vb_suite/groupby.py index f38f42c89f5de..ded6a064eebd3 100644 --- a/vb_suite/groupby.py +++ b/vb_suite/groupby.py @@ -1,5 +1,6 @@ from vbench.api import Benchmark from datetime import datetime +from pandas.compat import map common_setup = """from pandas_vb_common import * """ @@ -284,12 +285,12 @@ def f(g): share_na = 0.1 dates = date_range('1997-12-31', periods=n_dates, freq='B') -dates = Index(map(lambda x: x.year * 10000 + x.month * 100 + x.day, dates)) +dates = Index(lmap(lambda x: x.year * 10000 + x.month * 100 + x.day, dates)) secid_min = int('10000000', 16) secid_max = int('F0000000', 16) step = (secid_max - secid_min) // (n_securities - 1) -security_ids = map(lambda x: hex(x)[2:10].upper(), range(secid_min, secid_max + 1, step)) +security_ids = lmap(lambda x: hex(x)[2:10].upper(), range(secid_min, secid_max + 1, step)) data_index = MultiIndex(levels=[dates.values, security_ids], labels=[[i for i in xrange(n_dates) for _ in xrange(n_securities)], range(n_securities) * n_dates], diff --git a/vb_suite/indexing.py b/vb_suite/indexing.py index 1264ae053ffca..a87c95f54c9d5 100644 --- a/vb_suite/indexing.py +++ b/vb_suite/indexing.py @@ -106,6 +106,7 @@ start_date=datetime(2012, 1, 1)) setup = common_setup + """ +from pandas.compat import range import pandas.core.expressions as expr df = DataFrame(np.random.randn(50000, 100)) df2 = DataFrame(np.random.randn(50000, 100)) diff --git a/vb_suite/make.py b/vb_suite/make.py index 5a8a8215db9a4..1bea9ae1abaea 100755 --- a/vb_suite/make.py +++ b/vb_suite/make.py @@ -71,7 +71,7 @@ def auto_update(): html() upload() sendmail() - except (Exception, SystemExit), inst: + except (Exception, SystemExit) as inst: msg += str(inst) + '\n' sendmail(msg) @@ -159,7 +159,7 @@ def _get_config(): func = funcd.get(arg) if func is None: raise SystemExit('Do not know how to handle %s; valid args are %s' % ( - arg, funcd.keys())) + arg, list(funcd.keys()))) func() else: small_docs = False diff --git a/vb_suite/measure_memory_consumption.py b/vb_suite/measure_memory_consumption.py index bb73cf5da4302..8d15b78069b9c 100755 --- a/vb_suite/measure_memory_consumption.py +++ b/vb_suite/measure_memory_consumption.py @@ -45,7 +45,7 @@ def main(): s = Series(results) s.sort() - print((s)) + print(s) finally: shutil.rmtree(TMP_DIR) diff --git a/vb_suite/parser.py b/vb_suite/parser.py index 50d37f37708e7..fb9fbc436eaa4 100644 --- a/vb_suite/parser.py +++ b/vb_suite/parser.py @@ -44,7 +44,7 @@ start_date=datetime(2011, 11, 1)) setup = common_setup + """ -from cStringIO import StringIO +from pandas.compat import cStringIO as StringIO import os N = 10000 K = 8 @@ -63,7 +63,7 @@ read_table_multiple_date = Benchmark(cmd, setup, start_date=sdate) setup = common_setup + """ -from cStringIO import StringIO +from pandas.compat import cStringIO as StringIO import os N = 10000 K = 8 diff --git a/vb_suite/perf_HEAD.py b/vb_suite/perf_HEAD.py index c14a1795f01e0..95aa8893918e8 100755 --- a/vb_suite/perf_HEAD.py +++ b/vb_suite/perf_HEAD.py @@ -7,12 +7,11 @@ """ -import urllib2 -from contextlib import closing -from urllib2 import urlopen +from pandas.io.common import urlopen import json import pandas as pd +import pandas.compat as compat WEB_TIMEOUT = 10 @@ -25,7 +24,7 @@ def get_travis_data(): if not jobid: return None, None - with closing(urlopen("https://api.travis-ci.org/workers/")) as resp: + with urlopen("https://api.travis-ci.org/workers/") as resp: workers = json.loads(resp.read()) host = njobs = None @@ -72,7 +71,7 @@ def dump_as_gist(data, desc="The Commit", njobs=None): print("\n\n" + "-" * 80) gist = json.loads(r.read()) - file_raw_url = gist['files'].items()[0][1]['raw_url'] + file_raw_url = list(gist['files'].items())[0][1]['raw_url'] print("[vbench-gist-raw_url] %s" % file_raw_url) print("[vbench-html-url] %s" % gist['html_url']) print("[vbench-api-url] %s" % gist['url']) @@ -104,7 +103,7 @@ def main(): except Exception as e: exit_code = 1 - if (type(e) == KeyboardInterrupt or + if (isinstance(e, KeyboardInterrupt) or 'KeyboardInterrupt' in str(d)): raise KeyboardInterrupt() @@ -114,7 +113,7 @@ def main(): if d['succeeded']: print("\nException:\n%s\n" % str(e)) else: - for k, v in sorted(d.iteritems()): + for k, v in sorted(compat.iteritems(d)): print("{k}: {v}".format(k=k, v=v)) print("------->\n") @@ -133,7 +132,7 @@ def main(): def get_vbench_log(build_url): - with closing(urllib2.urlopen(build_url)) as r: + with urlopen(build_url) as r: if not (200 <= r.getcode() < 300): return @@ -144,7 +143,7 @@ def get_vbench_log(build_url): if not s: return id = s[0]['id'] # should be just one for now - with closing(urllib2.urlopen("https://api.travis-ci.org/jobs/%s" % id)) as r2: + with urlopen("https://api.travis-ci.org/jobs/%s" % id) as r2: if not 200 <= r.getcode() < 300: return s2 = json.loads(r2.read()) @@ -172,7 +171,7 @@ def convert_json_to_df(results_url): df contains timings for all successful vbenchmarks """ - with closing(urlopen(results_url)) as resp: + with urlopen(results_url) as resp: res = json.loads(resp.read()) timings = res.get("timings") if not timings: @@ -216,7 +215,7 @@ def get_results_from_builds(builds): dfs = OrderedDict() while True: - with closing(urlopen(url)) as r: + with urlopen(url) as r: if not (200 <= r.getcode() < 300): break builds = json.loads(r.read()) @@ -238,6 +237,6 @@ def mk_unique(df): dfs = get_all_results(repo_id) for k in dfs: dfs[k] = mk_unique(dfs[k]) - ss = [pd.Series(v.timing, name=k) for k, v in dfs.iteritems()] + ss = [pd.Series(v.timing, name=k) for k, v in compat.iteritems(dfs)] results = pd.concat(reversed(ss), 1) return results diff --git a/vb_suite/source/conf.py b/vb_suite/source/conf.py index d83448fd97d09..735a800fb9c02 100644 --- a/vb_suite/source/conf.py +++ b/vb_suite/source/conf.py @@ -13,6 +13,8 @@ import sys import os +from pandas.compat import u + # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. @@ -49,8 +51,8 @@ master_doc = 'index' # General information about the project. -project = u'pandas' -copyright = u'2008-2011, the pandas development team' +project = u('pandas') +copyright = u('2008-2011, the pandas development team') # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -197,8 +199,8 @@ # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ ('index', 'performance.tex', - u'pandas vbench Performance Benchmarks', - u'Wes McKinney', 'manual'), + u('pandas vbench Performance Benchmarks'), + u('Wes McKinney'), 'manual'), ] # The name of an image file (relative to this directory) to place at the top of diff --git a/vb_suite/suite.py b/vb_suite/suite.py index 905c4371837cc..76fafb87b05b6 100644 --- a/vb_suite/suite.py +++ b/vb_suite/suite.py @@ -1,3 +1,4 @@ +from __future__ import print_function from vbench.api import Benchmark, GitRepo from datetime import datetime @@ -90,15 +91,15 @@ def generate_rst_files(benchmarks): fig_base_path = os.path.join(vb_path, 'figures') if not os.path.exists(vb_path): - print 'creating %s' % vb_path + print('creating %s' % vb_path) os.makedirs(vb_path) if not os.path.exists(fig_base_path): - print 'creating %s' % fig_base_path + print('creating %s' % fig_base_path) os.makedirs(fig_base_path) for bmk in benchmarks: - print 'Generating rst file for %s' % bmk.name + print('Generating rst file for %s' % bmk.name) rst_path = os.path.join(RST_BASE, 'vbench/%s.txt' % bmk.name) fig_full_path = os.path.join(fig_base_path, '%s.png' % bmk.name) @@ -120,7 +121,7 @@ def generate_rst_files(benchmarks): f.write(rst_text) with open(os.path.join(RST_BASE, 'index.rst'), 'w') as f: - print >> f, """ + print(""" Performance Benchmarks ====================== @@ -141,15 +142,15 @@ def generate_rst_files(benchmarks): .. toctree:: :hidden: :maxdepth: 3 -""" +""", file=f) for modname, mod_bmks in sorted(by_module.items()): - print >> f, ' vb_%s' % modname + print(' vb_%s' % modname, file=f) modpath = os.path.join(RST_BASE, 'vb_%s.rst' % modname) with open(modpath, 'w') as mh: header = '%s\n%s\n\n' % (modname, '=' * len(modname)) - print >> mh, header + print(header, file=mh) for bmk in mod_bmks: - print >> mh, bmk.name - print >> mh, '-' * len(bmk.name) - print >> mh, '.. include:: vbench/%s.txt\n' % bmk.name + print(bmk.name, file=mh) + print('-' * len(bmk.name), file=mh) + print('.. include:: vbench/%s.txt\n' % bmk.name, file=mh) diff --git a/vb_suite/test_perf.py b/vb_suite/test_perf.py index ca98b94e4fbbd..9eca76a5f3226 100755 --- a/vb_suite/test_perf.py +++ b/vb_suite/test_perf.py @@ -25,7 +25,9 @@ 5) print the results to the log file and to stdout. """ +from __future__ import print_function +from pandas.compat import range, lmap import shutil import os import sys @@ -137,11 +139,11 @@ def get_results_df(db, rev): """Takes a git commit hash and returns a Dataframe of benchmark results """ bench = DataFrame(db.get_benchmarks()) - results = DataFrame(map(list,db.get_rev_results(rev).values())) + results = DataFrame(lmap(list,db.get_rev_results(rev).values())) # Sinch vbench.db._reg_rev_results returns an unlabeled dict, # we have to break encapsulation a bit. - results.columns = db._results.c.keys() + results.columns = list(db._results.c.keys()) results = results.join(bench['name'], on='checksum').set_index("checksum") return results @@ -275,7 +277,8 @@ def profile_head_single(benchmark): err = str(e) except: pass - print("%s died with:\n%s\nSkipping...\n" % (benchmark.name, err)) + print("%s died with:\n%s\nSkipping...\n" % (benchmark.name, + err)) results.append(d.get('timing',np.nan)) gc.enable() @@ -296,7 +299,8 @@ def profile_head_single(benchmark): # return df.set_index("name")[HEAD_COL] def profile_head(benchmarks): - print( "Performing %d benchmarks (%d runs each)" % ( len(benchmarks), args.hrepeats)) + print("Performing %d benchmarks (%d runs each)" % (len(benchmarks), + args.hrepeats)) ss= [profile_head_single(b) for b in benchmarks] print("\n") @@ -462,7 +466,7 @@ def main(): def _parse_commit_log(this,repo_path,base_commit=None): from vbench.git import _convert_timezones from pandas import Series - from dateutil import parser as dparser + from pandas.compat import parse_date git_cmd = 'git --git-dir=%s/.git --work-tree=%s ' % (repo_path, repo_path) githist = git_cmd + ('log --graph --pretty=format:'+ @@ -484,7 +488,7 @@ def _parse_commit_log(this,repo_path,base_commit=None): _, sha, stamp, message, author = line.split('::', 4) # parse timestamp into datetime object - stamp = dparser.parse(stamp) + stamp = parse_date(stamp) shas.append(sha) timestamps.append(stamp)