From 6c304c78d6a0559525abe16f03465d275c516455 Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Fri, 26 Jul 2013 21:07:57 -0400 Subject: [PATCH 01/11] CLN: Newer syntax, unicode, iterator range, zip, etc Use new syntax ('except as', print as function, new raise syntax, next function rather than method, next and __next__ defined throughout, switchout xrange, etc.) Now range is always equivalent to 2.X xrange throughout (but need to import range from py3compat to use it). Also remove range fixer from setup.py. + compatible long and string types, etc. --- bench/alignment.py | 5 +- bench/bench_get_put_value.py | 3 +- bench/bench_groupby.py | 3 +- bench/bench_join_panel.py | 2 +- bench/bench_khash_dict.py | 10 +- bench/bench_merge.py | 13 +- bench/bench_merge_sqlite.py | 8 +- bench/bench_sparse.py | 3 +- bench/bench_take_indexing.py | 6 +- bench/bench_unique.py | 26 +- bench/better_unique.py | 8 +- bench/io_roundtrip.py | 8 +- bench/serialize.py | 12 +- bench/test.py | 2 + doc/make.py | 5 +- doc/plots/stats/moment_plots.py | 1 + doc/source/conf.py | 9 +- doc/source/io.rst | 2 +- doc/sphinxext/__init__.py | 2 +- doc/sphinxext/comment_eater.py | 6 +- doc/sphinxext/compiler_unparse.py | 52 ++-- doc/sphinxext/docscrape.py | 14 +- doc/sphinxext/docscrape_sphinx.py | 5 +- doc/sphinxext/ipython_directive.py | 38 ++- doc/sphinxext/numpydoc.py | 37 +-- doc/sphinxext/phantom_import.py | 5 +- doc/sphinxext/plot_directive.py | 21 +- doc/sphinxext/tests/test_docscrape.py | 6 +- doc/sphinxext/traitsdoc.py | 10 +- examples/finance.py | 2 +- ez_setup.py | 24 +- pandas/compat/scipy.py | 9 +- pandas/core/algorithms.py | 3 +- pandas/core/array.py | 4 +- pandas/core/common.py | 51 +-- pandas/core/config.py | 22 +- pandas/core/expressions.py | 8 +- pandas/core/format.py | 62 ++-- pandas/core/frame.py | 63 ++-- pandas/core/generic.py | 7 +- pandas/core/groupby.py | 69 +++-- pandas/core/index.py | 32 +- pandas/core/indexing.py | 11 +- pandas/core/internals.py | 21 +- pandas/core/nanops.py | 6 +- pandas/core/panel.py | 26 +- pandas/core/panelnd.py | 4 +- pandas/core/reshape.py | 14 +- pandas/core/series.py | 44 +-- pandas/core/strings.py | 8 +- pandas/io/auth.py | 5 +- pandas/io/clipboard.py | 2 +- pandas/io/common.py | 2 +- pandas/io/data.py | 28 +- pandas/io/date_converters.py | 3 +- pandas/io/excel.py | 16 +- pandas/io/ga.py | 25 +- pandas/io/html.py | 26 +- pandas/io/json.py | 25 +- pandas/io/parsers.py | 53 ++-- pandas/io/pytables.py | 259 ++++++++-------- pandas/io/sql.py | 18 +- pandas/io/stata.py | 14 +- pandas/io/tests/generate_legacy_pickles.py | 46 +-- pandas/io/tests/test_cparser.py | 5 +- pandas/io/tests/test_data.py | 17 +- pandas/io/tests/test_excel.py | 9 +- pandas/io/tests/test_ga.py | 4 +- pandas/io/tests/test_html.py | 15 +- pandas/io/tests/test_json/test_pandas.py | 12 +- pandas/io/tests/test_json/test_ujson.py | 61 ++-- pandas/io/tests/test_parsers.py | 76 ++--- pandas/io/tests/test_pickle.py | 2 +- pandas/io/tests/test_pytables.py | 105 ++++--- pandas/io/tests/test_sql.py | 16 +- pandas/io/tests/test_wb.py | 21 +- pandas/io/wb.py | 29 +- pandas/rpy/__init__.py | 2 +- pandas/rpy/common.py | 5 +- pandas/sparse/frame.py | 13 +- pandas/sparse/panel.py | 11 +- pandas/sparse/tests/test_array.py | 1 + pandas/sparse/tests/test_list.py | 3 +- pandas/sparse/tests/test_sparse.py | 16 +- pandas/src/generate_code.py | 18 +- pandas/src/offsets.pyx | 4 + pandas/stats/fama_macbeth.py | 5 +- pandas/stats/math.py | 3 +- pandas/stats/misc.py | 11 +- pandas/stats/ols.py | 27 +- pandas/stats/plm.py | 2 + pandas/stats/tests/test_fama_macbeth.py | 7 +- pandas/stats/tests/test_moments.py | 4 +- pandas/stats/tests/test_ols.py | 8 +- pandas/stats/tests/test_var.py | 8 +- pandas/stats/var.py | 41 +-- pandas/tests/test_algos.py | 3 +- pandas/tests/test_categorical.py | 3 +- pandas/tests/test_common.py | 19 +- pandas/tests/test_expressions.py | 1 + pandas/tests/test_format.py | 183 +++++------ pandas/tests/test_frame.py | 344 +++++++++++---------- pandas/tests/test_graphics.py | 52 ++-- pandas/tests/test_groupby.py | 85 ++--- pandas/tests/test_index.py | 93 +++--- pandas/tests/test_indexing.py | 46 +-- pandas/tests/test_internals.py | 10 +- pandas/tests/test_multilevel.py | 43 +-- pandas/tests/test_panel.py | 34 +- pandas/tests/test_panel4d.py | 18 +- pandas/tests/test_reshape.py | 3 +- pandas/tests/test_rplot.py | 1 + pandas/tests/test_series.py | 114 +++---- pandas/tests/test_stats.py | 4 +- pandas/tests/test_strings.py | 117 ++++--- pandas/tests/test_tseries.py | 12 +- pandas/tools/merge.py | 18 +- pandas/tools/pivot.py | 8 +- pandas/tools/plotting.py | 29 +- pandas/tools/rplot.py | 2 + pandas/tools/tests/test_merge.py | 35 ++- pandas/tools/tests/test_pivot.py | 19 +- pandas/tools/tests/test_tile.py | 1 + pandas/tools/tile.py | 1 + pandas/tseries/converter.py | 16 +- pandas/tseries/frequencies.py | 14 +- pandas/tseries/index.py | 23 +- pandas/tseries/offsets.py | 23 +- pandas/tseries/period.py | 16 +- pandas/tseries/resample.py | 5 +- pandas/tseries/tests/test_converter.py | 5 +- pandas/tseries/tests/test_cursor.py | 22 +- pandas/tseries/tests/test_daterange.py | 1 + pandas/tseries/tests/test_frequencies.py | 1 + pandas/tseries/tests/test_offsets.py | 4 +- pandas/tseries/tests/test_period.py | 43 +-- pandas/tseries/tests/test_plotting.py | 6 +- pandas/tseries/tests/test_resample.py | 4 +- pandas/tseries/tests/test_timeseries.py | 70 ++--- pandas/tseries/tests/test_timezones.py | 16 +- pandas/tseries/tests/test_util.py | 1 + pandas/tseries/tools.py | 15 +- pandas/tseries/util.py | 7 +- pandas/util/compat.py | 24 +- pandas/util/counter.py | 11 +- pandas/util/decorators.py | 4 +- pandas/util/py3compat.py | 13 +- pandas/util/terminal.py | 3 +- pandas/util/testing.py | 36 ++- scripts/bench_join.py | 11 +- scripts/bench_join_multi.py | 13 +- scripts/bench_refactor.py | 9 +- scripts/file_sizes.py | 10 +- scripts/find_commits_touching_func.py | 13 +- scripts/find_undoc_args.py | 8 +- scripts/gen_release_notes.py | 3 +- scripts/groupby_sample.py | 6 +- scripts/groupby_speed.py | 5 +- scripts/groupby_test.py | 1 + scripts/hdfstore_panel_perf.py | 5 +- scripts/json_manip.py | 177 +++++------ scripts/leak.py | 1 + scripts/parser_magic.py | 3 +- scripts/roll_median_leak.py | 6 +- scripts/runtests.py | 3 +- scripts/testmed.py | 5 +- setup.py | 4 +- vb_suite/groupby.py | 5 +- vb_suite/indexing.py | 1 + vb_suite/make.py | 2 +- vb_suite/measure_memory_consumption.py | 2 +- vb_suite/pandas_vb_common.py | 7 + vb_suite/parser.py | 4 +- vb_suite/perf_HEAD.py | 5 +- vb_suite/source/conf.py | 9 +- vb_suite/suite.py | 21 +- vb_suite/test_perf.py | 9 +- 177 files changed, 2159 insertions(+), 1765 deletions(-) diff --git a/bench/alignment.py b/bench/alignment.py index bf5d5604d913e..a5ffe96140117 100644 --- a/bench/alignment.py +++ b/bench/alignment.py @@ -1,4 +1,5 @@ # Setup +from pandas.util.py3compat import range import numpy as np import pandas import la @@ -6,8 +7,8 @@ K = 50 arr1 = np.random.randn(N, K) arr2 = np.random.randn(N, K) -idx1 = range(N) -idx2 = range(K) +idx1 = list(range(N)) +idx2 = list(range(K)) # pandas dma1 = pandas.DataFrame(arr1, idx1, idx2) diff --git a/bench/bench_get_put_value.py b/bench/bench_get_put_value.py index 419e8f603e5ae..cf1b827e133ac 100644 --- a/bench/bench_get_put_value.py +++ b/bench/bench_get_put_value.py @@ -1,12 +1,13 @@ from pandas import * from pandas.util.testing import rands +from pandas.util.py3compat import range N = 1000 K = 50 def _random_index(howmany): - return Index([rands(10) for _ in xrange(howmany)]) + return Index([rands(10) for _ in range(howmany)]) df = DataFrame(np.random.randn(N, K), index=_random_index(N), columns=_random_index(K)) diff --git a/bench/bench_groupby.py b/bench/bench_groupby.py index 807d3449e1fcb..aa337acf9308e 100644 --- a/bench/bench_groupby.py +++ b/bench/bench_groupby.py @@ -1,5 +1,6 @@ from pandas import * from pandas.util.testing import rands +from pandas.util.py3compat import range import string import random @@ -7,7 +8,7 @@ k = 20000 n = 10 -foo = np.tile(np.array([rands(10) for _ in xrange(k)], dtype='O'), n) +foo = np.tile(np.array([rands(10) for _ in range(k)], dtype='O'), n) foo2 = list(foo) random.shuffle(foo) random.shuffle(foo2) diff --git a/bench/bench_join_panel.py b/bench/bench_join_panel.py index 0e484fb496036..f3c3f8ba15f70 100644 --- a/bench/bench_join_panel.py +++ b/bench/bench_join_panel.py @@ -35,7 +35,7 @@ def reindex_on_axis(panels, axis, axis_reindex): # concatenate values try: values = np.concatenate([p.values for p in panels], axis=1) - except (Exception), detail: + except Exception as detail: raise Exception("cannot append values that dont' match dimensions! -> [%s] %s" % (','.join(["%s" % p for p in panels]), str(detail))) # pm('append - create_panel') diff --git a/bench/bench_khash_dict.py b/bench/bench_khash_dict.py index fce3288e3294d..784704cbb809a 100644 --- a/bench/bench_khash_dict.py +++ b/bench/bench_khash_dict.py @@ -1,12 +1,14 @@ """ Some comparisons of khash.h to Python dict """ +from __future__ import print_function import numpy as np import os from vbench.api import Benchmark from pandas.util.testing import rands +from pandas.util.py3compat import range import pandas._tseries as lib import pandas._sandbox as sbx import time @@ -22,7 +24,7 @@ def object_test_data(n): def string_test_data(n): - return np.array([rands(10) for _ in xrange(n)], dtype='O') + return np.array([rands(10) for _ in range(n)], dtype='O') def int_test_data(n): @@ -50,7 +52,7 @@ def f(): def _timeit(f, iterations=10): start = time.time() - for _ in xrange(iterations): + for _ in range(iterations): foo = f() elapsed = time.time() - start return elapsed @@ -73,8 +75,8 @@ def lookup_khash(values): def leak(values): - for _ in xrange(100): - print proc.get_memory_info() + for _ in range(100): + print(proc.get_memory_info()) table = lookup_khash(values) # table.destroy() diff --git a/bench/bench_merge.py b/bench/bench_merge.py index 11f8c29a2897b..7820c7792afc4 100644 --- a/bench/bench_merge.py +++ b/bench/bench_merge.py @@ -1,5 +1,6 @@ from pandas import * from pandas.util.testing import rands +from pandas.util.py3compat import range import random N = 10000 @@ -7,7 +8,7 @@ def get_test_data(ngroups=100, n=N): - unique_groups = range(ngroups) + unique_groups = list(range(ngroups)) arr = np.asarray(np.tile(unique_groups, n / ngroups), dtype=object) if len(arr) < n: @@ -34,8 +35,8 @@ def get_test_data(ngroups=100, n=N): from pandas.util.testing import rands N = 10000 -indices = np.array([rands(10) for _ in xrange(N)], dtype='O') -indices2 = np.array([rands(10) for _ in xrange(N)], dtype='O') +indices = np.array([rands(10) for _ in range(N)], dtype='O') +indices2 = np.array([rands(10) for _ in range(N)], dtype='O') key = np.tile(indices[:8000], 10) key2 = np.tile(indices2[:8000], 10) @@ -55,7 +56,7 @@ def get_test_data(ngroups=100, n=N): f = lambda: merge(left, right, how=join_method, sort=sort) gc.disable() start = time.time() - for _ in xrange(niter): + for _ in range(niter): f() elapsed = (time.time() - start) / niter gc.enable() @@ -65,7 +66,7 @@ def get_test_data(ngroups=100, n=N): # R results -from StringIO import StringIO +from pandas.util.py3compat import StringIO # many to one r_results = read_table(StringIO(""" base::merge plyr data.table inner 0.2475 0.1183 0.1100 @@ -93,7 +94,7 @@ def get_test_data(ngroups=100, n=N): # many to many -from StringIO import StringIO +from pandas.util.py3compat import StringIO # many to one r_results = read_table(StringIO("""base::merge plyr data.table inner 0.4610 0.1276 0.1269 diff --git a/bench/bench_merge_sqlite.py b/bench/bench_merge_sqlite.py index d13b296698b97..e15a482f39c50 100644 --- a/bench/bench_merge_sqlite.py +++ b/bench/bench_merge_sqlite.py @@ -4,12 +4,14 @@ import time from pandas import DataFrame from pandas.util.testing import rands +from pandas.util.py3compat import range +from six.moves import zip import random N = 10000 -indices = np.array([rands(10) for _ in xrange(N)], dtype='O') -indices2 = np.array([rands(10) for _ in xrange(N)], dtype='O') +indices = np.array([rands(10) for _ in range(N)], dtype='O') +indices2 = np.array([rands(10) for _ in range(N)], dtype='O') key = np.tile(indices[:8000], 10) key2 = np.tile(indices2[:8000], 10) @@ -67,7 +69,7 @@ g = lambda: conn.execute(sql) # list fetches results gc.disable() start = time.time() - # for _ in xrange(niter): + # for _ in range(niter): g() elapsed = (time.time() - start) / niter gc.enable() diff --git a/bench/bench_sparse.py b/bench/bench_sparse.py index 600b3d05c5f78..beb3e84c3e42b 100644 --- a/bench/bench_sparse.py +++ b/bench/bench_sparse.py @@ -3,6 +3,7 @@ from pandas import * import pandas.core.sparse as spm +import pandas.util.compat as compat reload(spm) from pandas.core.sparse import * @@ -41,7 +42,7 @@ def new_data_like(sdf): new_data = {} - for col, series in sdf.iteritems(): + for col, series in compat.iteritems(sdf): new_data[col] = SparseSeries(np.random.randn(len(series.sp_values)), index=sdf.index, sparse_index=series.sp_index, diff --git a/bench/bench_take_indexing.py b/bench/bench_take_indexing.py index 3ddd647a35bf6..b6a7b04eb8adf 100644 --- a/bench/bench_take_indexing.py +++ b/bench/bench_take_indexing.py @@ -1,3 +1,4 @@ +from __future__ import print_function import numpy as np from pandas import * @@ -5,6 +6,7 @@ from pandas import DataFrame import timeit +from six.moves import zip setup = """ from pandas import Series @@ -35,7 +37,7 @@ def _timeit(stmt, size, k=5, iters=1000): return timer.timeit(n) / n for sz, its in zip(sizes, iters): - print sz + print(sz) fancy_2d.append(_timeit('arr[indexer]', sz, iters=its)) take_2d.append(_timeit('arr.take(indexer, axis=0)', sz, iters=its)) cython_2d.append(_timeit('lib.take_axis0(arr, indexer)', sz, iters=its)) @@ -44,7 +46,7 @@ def _timeit(stmt, size, k=5, iters=1000): 'take': take_2d, 'cython': cython_2d}) -print df +print(df) from pandas.rpy.common import r r('mat <- matrix(rnorm(50000), nrow=10000, ncol=5)') diff --git a/bench/bench_unique.py b/bench/bench_unique.py index 392d3b326bf09..8a24630632684 100644 --- a/bench/bench_unique.py +++ b/bench/bench_unique.py @@ -1,5 +1,8 @@ +from __future__ import print_function from pandas import * from pandas.util.testing import rands +from pandas.util.py3compat import range +from six.moves import zip import pandas._tseries as lib import numpy as np import matplotlib.pyplot as plt @@ -7,8 +10,8 @@ N = 50000 K = 10000 -groups = np.array([rands(10) for _ in xrange(K)], dtype='O') -groups2 = np.array([rands(10) for _ in xrange(K)], dtype='O') +groups = np.array([rands(10) for _ in range(K)], dtype='O') +groups2 = np.array([rands(10) for _ in range(K)], dtype='O') labels = np.tile(groups, N // K) labels2 = np.tile(groups2, N // K) @@ -20,7 +23,7 @@ def timeit(f, niter): import time gc.disable() start = time.time() - for _ in xrange(niter): + for _ in range(niter): f() elapsed = (time.time() - start) / niter gc.enable() @@ -75,9 +78,8 @@ def algo3_sort(): def f(): - from itertools import izip # groupby sum - for k, v in izip(x, data): + for k, v in zip(x, data): try: counts[k] += v except KeyError: @@ -128,7 +130,7 @@ def algo4(): # N = 10000000 # K = 500000 -# groups = np.array([rands(10) for _ in xrange(K)], dtype='O') +# groups = np.array([rands(10) for _ in range(K)], dtype='O') # labels = np.tile(groups, N // K) data = np.random.randn(N) @@ -232,11 +234,11 @@ def hash_bench(): khash_hint = [] khash_nohint = [] for K in Ks: - print K - # groups = np.array([rands(10) for _ in xrange(K)]) + print(K) + # groups = np.array([rands(10) for _ in range(K)]) # labels = np.tile(groups, N // K).astype('O') - groups = np.random.randint(0, 100000000000L, size=K) + groups = np.random.randint(0, long(100000000000), size=K) labels = np.tile(groups, N // K) dict_based.append(timeit(lambda: dict_unique(labels, K), 20)) khash_nohint.append(timeit(lambda: khash_unique_int64(labels, K), 20)) @@ -245,11 +247,11 @@ def hash_bench(): # memory, hard to get # dict_based.append(np.mean([dict_unique(labels, K, memory=True) - # for _ in xrange(10)])) + # for _ in range(10)])) # khash_nohint.append(np.mean([khash_unique(labels, K, memory=True) - # for _ in xrange(10)])) + # for _ in range(10)])) # khash_hint.append(np.mean([khash_unique(labels, K, size_hint=True, memory=True) - # for _ in xrange(10)])) + # for _ in range(10)])) # dict_based_sort.append(timeit(lambda: dict_unique(labels, K, # sort=True), 10)) diff --git a/bench/better_unique.py b/bench/better_unique.py index 982dd88e879da..f8881ecd7b6bc 100644 --- a/bench/better_unique.py +++ b/bench/better_unique.py @@ -1,9 +1,13 @@ +from __future__ import print_function from pandas import DataFrame +from pandas.util.py3compat import range +from six.moves import zip import timeit setup = """ from pandas import Series import pandas._tseries as _tseries +from pandas.util.py3compat import range import random import numpy as np @@ -48,11 +52,11 @@ def get_test_data(ngroups=100, n=tot): numpy_timer = timeit.Timer(stmt='np.unique(arr)', setup=setup % sz) - print n + print(n) numpy_result = numpy_timer.timeit(number=n) / n wes_result = wes_timer.timeit(number=n) / n - print 'Groups: %d, NumPy: %s, Wes: %s' % (sz, numpy_result, wes_result) + print('Groups: %d, NumPy: %s, Wes: %s' % (sz, numpy_result, wes_result)) wes.append(wes_result) numpy.append(numpy_result) diff --git a/bench/io_roundtrip.py b/bench/io_roundtrip.py index a9711dbb83b8a..a033ef0c72857 100644 --- a/bench/io_roundtrip.py +++ b/bench/io_roundtrip.py @@ -1,16 +1,18 @@ +from __future__ import print_function import time import os import numpy as np import la import pandas +from pandas.util.py3compat import range from pandas import datetools, DateRange def timeit(f, iterations): start = time.clock() - for i in xrange(iterations): + for i in range(iterations): f() return time.clock() - start @@ -54,11 +56,11 @@ def rountrip_archive(N, K=50, iterations=10): pandas_f = lambda: pandas_roundtrip(filename_pandas, dma, dma) pandas_time = timeit(pandas_f, iterations) / iterations - print 'pandas (HDF5) %7.4f seconds' % pandas_time + print('pandas (HDF5) %7.4f seconds' % pandas_time) pickle_f = lambda: pandas_roundtrip(filename_pandas, dma, dma) pickle_time = timeit(pickle_f, iterations) / iterations - print 'pandas (pickle) %7.4f seconds' % pickle_time + print('pandas (pickle) %7.4f seconds' % pickle_time) # print 'Numpy (npz) %7.4f seconds' % numpy_time # print 'larry (HDF5) %7.4f seconds' % larry_time diff --git a/bench/serialize.py b/bench/serialize.py index 63f885a4efa88..9c0ba84209c49 100644 --- a/bench/serialize.py +++ b/bench/serialize.py @@ -1,3 +1,5 @@ +from __future__ import print_function +from pandas.util.py3compat import range import time import os import numpy as np @@ -9,7 +11,7 @@ def timeit(f, iterations): start = time.clock() - for i in xrange(iterations): + for i in range(iterations): f() return time.clock() - start @@ -20,7 +22,7 @@ def roundtrip_archive(N, iterations=10): # Create data arr = np.random.randn(N, N) lar = la.larry(arr) - dma = pandas.DataFrame(arr, range(N), range(N)) + dma = pandas.DataFrame(arr, list(range(N)), list(range(N))) # filenames filename_numpy = '/Users/wesm/tmp/numpy.npz' @@ -51,9 +53,9 @@ def roundtrip_archive(N, iterations=10): pandas_f = lambda: pandas_roundtrip(filename_pandas, dma, dma) pandas_time = timeit(pandas_f, iterations) / iterations - print 'Numpy (npz) %7.4f seconds' % numpy_time - print 'larry (HDF5) %7.4f seconds' % larry_time - print 'pandas (HDF5) %7.4f seconds' % pandas_time + print('Numpy (npz) %7.4f seconds' % numpy_time) + print('larry (HDF5) %7.4f seconds' % larry_time) + print('pandas (HDF5) %7.4f seconds' % pandas_time) def numpy_roundtrip(filename, arr1, arr2): diff --git a/bench/test.py b/bench/test.py index 2ac91468d7b73..9d47c091b932d 100644 --- a/bench/test.py +++ b/bench/test.py @@ -1,7 +1,9 @@ +from pandas.util.py3compat import range import numpy as np import itertools import collections import scipy.ndimage as ndi +from six.moves import zip N = 10000 diff --git a/doc/make.py b/doc/make.py index adf34920b9ede..12b60a4f1098b 100755 --- a/doc/make.py +++ b/doc/make.py @@ -14,6 +14,7 @@ python make.py clean python make.py html """ +from __future__ import print_function import glob import os @@ -60,7 +61,7 @@ def upload_prev(ver, doc_root='./'): remote_dir = '/usr/share/nginx/pandas/pandas-docs/version/%s/' % ver cmd = 'cd %s; rsync -avz . pandas@pandas.pydata.org:%s -essh' cmd = cmd % (local_dir, remote_dir) - print cmd + print(cmd) if os.system(cmd): raise SystemExit( 'Upload to %s from %s failed' % (remote_dir, local_dir)) @@ -154,7 +155,7 @@ def auto_dev_build(debug=False): upload_dev_pdf() if not debug: sendmail(step) - except (Exception, SystemExit), inst: + except (Exception, SystemExit) as inst: msg = str(inst) + '\n' sendmail(step, '[ERROR] ' + msg) diff --git a/doc/plots/stats/moment_plots.py b/doc/plots/stats/moment_plots.py index 9e3a902592c6b..a078651d2fe89 100644 --- a/doc/plots/stats/moment_plots.py +++ b/doc/plots/stats/moment_plots.py @@ -1,3 +1,4 @@ +from pandas.util.py3compat import range import numpy as np import matplotlib.pyplot as plt diff --git a/doc/source/conf.py b/doc/source/conf.py index 99d1703b9ca34..128e4ade9220d 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -12,6 +12,7 @@ import sys import os +import six # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the @@ -63,8 +64,8 @@ master_doc = 'index' # General information about the project. -project = u'pandas' -copyright = u'2008-2012, the pandas development team' +project = six.u('pandas') +copyright = six.u('2008-2012, the pandas development team') # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -211,8 +212,8 @@ # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ ('index', 'pandas.tex', - u'pandas: powerful Python data analysis toolkit', - u'Wes McKinney\n\& PyData Development Team', 'manual'), + six.u('pandas: powerful Python data analysis toolkit'), + six.u('Wes McKinney\n\& PyData Development Team'), 'manual'), ] # The name of an image file (relative to this directory) to place at the top of diff --git a/doc/source/io.rst b/doc/source/io.rst index 7290e499c6cbf..ee6c35187808f 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -1184,7 +1184,7 @@ You can even pass in an instance of ``StringIO`` if you so desire .. ipython:: python - from cStringIO import StringIO + from six.moves import cStringIO as StringIO with open(file_path, 'r') as f: sio = StringIO(f.read()) diff --git a/doc/sphinxext/__init__.py b/doc/sphinxext/__init__.py index ae9073bc4115f..68dbbb00a7cfb 100755 --- a/doc/sphinxext/__init__.py +++ b/doc/sphinxext/__init__.py @@ -1 +1 @@ -from numpydoc import setup +from .numpydoc import setup diff --git a/doc/sphinxext/comment_eater.py b/doc/sphinxext/comment_eater.py index e11eea9021073..3b15bd178eade 100755 --- a/doc/sphinxext/comment_eater.py +++ b/doc/sphinxext/comment_eater.py @@ -1,10 +1,10 @@ -from cStringIO import StringIO +from six.moves import cStringIO import compiler import inspect import textwrap import tokenize -from compiler_unparse import unparse +from .compiler_unparse import unparse class Comment(object): @@ -95,7 +95,7 @@ def new_noncomment(self, start_lineno, end_lineno): def new_comment(self, string, start, end, line): """ Possibly add a new comment. - + Only adds a new comment if this comment is the only thing on the line. Otherwise, it extends the noncomment block. """ diff --git a/doc/sphinxext/compiler_unparse.py b/doc/sphinxext/compiler_unparse.py index ffcf51b353a10..240dd17243ef6 100755 --- a/doc/sphinxext/compiler_unparse.py +++ b/doc/sphinxext/compiler_unparse.py @@ -12,11 +12,11 @@ """ import sys -import cStringIO +from six.moves import cStringIO as StringIO from compiler.ast import Const, Name, Tuple, Div, Mul, Sub, Add def unparse(ast, single_line_functions=False): - s = cStringIO.StringIO() + s = StringIO() UnparseCompilerAst(ast, s, single_line_functions) return s.getvalue().lstrip() @@ -101,13 +101,13 @@ def _And(self, t): if i != len(t.nodes)-1: self._write(") and (") self._write(")") - + def _AssAttr(self, t): """ Handle assigning an attribute of an object """ self._dispatch(t.expr) self._write('.'+t.attrname) - + def _Assign(self, t): """ Expression Assignment such as "a = 1". @@ -145,36 +145,36 @@ def _AssTuple(self, t): def _AugAssign(self, t): """ +=,-=,*=,/=,**=, etc. operations """ - + self._fill() self._dispatch(t.node) self._write(' '+t.op+' ') self._dispatch(t.expr) if not self._do_indent: self._write(';') - + def _Bitand(self, t): """ Bit and operation. """ - + for i, node in enumerate(t.nodes): self._write("(") self._dispatch(node) self._write(")") if i != len(t.nodes)-1: self._write(" & ") - + def _Bitor(self, t): """ Bit or operation """ - + for i, node in enumerate(t.nodes): self._write("(") self._dispatch(node) self._write(")") if i != len(t.nodes)-1: self._write(" | ") - + def _CallFunc(self, t): """ Function call. """ @@ -249,7 +249,7 @@ def _From(self, t): self._write(name) if asname is not None: self._write(" as "+asname) - + def _Function(self, t): """ Handle function definitions """ @@ -282,12 +282,12 @@ def _Getattr(self, t): self._write(')') else: self._dispatch(t.expr) - + self._write('.'+t.attrname) - + def _If(self, t): self._fill() - + for i, (compare,code) in enumerate(t.tests): if i == 0: self._write("if ") @@ -307,7 +307,7 @@ def _If(self, t): self._dispatch(t.else_) self._leave() self._write("\n") - + def _IfExp(self, t): self._dispatch(t.then) self._write(" if ") @@ -322,7 +322,7 @@ def _Import(self, t): """ Handle "import xyz.foo". """ self._fill("import ") - + for i, (name,asname) in enumerate(t.names): if i != 0: self._write(", ") @@ -336,7 +336,7 @@ def _Keyword(self, t): self._write(t.name) self._write("=") self._dispatch(t.expr) - + def _List(self, t): self._write("[") for i,node in enumerate(t.nodes): @@ -358,12 +358,12 @@ def _Name(self, t): def _NoneType(self, t): self._write("None") - + def _Not(self, t): self._write('not (') self._dispatch(t.expr) self._write(')') - + def _Or(self, t): self._write(" (") for i, node in enumerate(t.nodes): @@ -371,7 +371,7 @@ def _Or(self, t): if i != len(t.nodes)-1: self._write(") or (") self._write(")") - + def _Pass(self, t): self._write("pass\n") @@ -452,7 +452,7 @@ def _TryExcept(self, t): self._enter() self._dispatch(handler[2]) self._leave() - + if t.else_: self._fill("else") self._enter() @@ -477,14 +477,14 @@ def _Tuple(self, t): self._dispatch(last_element) self._write(")") - + def _UnaryAdd(self, t): self._write("+") self._dispatch(t.expr) - + def _UnarySub(self, t): self._write("-") - self._dispatch(t.expr) + self._dispatch(t.expr) def _With(self, t): self._fill('with ') @@ -496,7 +496,7 @@ def _With(self, t): self._dispatch(t.body) self._leave() self._write('\n') - + def _int(self, t): self._write(repr(t)) @@ -533,7 +533,7 @@ def _float(self, t): def _str(self, t): self._write(repr(t)) - + def _tuple(self, t): self._write(str(t)) diff --git a/doc/sphinxext/docscrape.py b/doc/sphinxext/docscrape.py index 63fec42adaa41..384a6db2c1dec 100755 --- a/doc/sphinxext/docscrape.py +++ b/doc/sphinxext/docscrape.py @@ -1,13 +1,15 @@ """Extract reference documentation from the NumPy source tree. """ +from __future__ import print_function import inspect import textwrap import re import pydoc -from StringIO import StringIO from warnings import warn +from six import StringIO +import six class Reader(object): """A line-based string reader. @@ -113,7 +115,7 @@ def __getitem__(self,key): return self._parsed_data[key] def __setitem__(self,key,val): - if not self._parsed_data.has_key(key): + if key not in self._parsed_data: warn("Unknown section %s" % key) else: self._parsed_data[key] = val @@ -370,7 +372,7 @@ def _str_index(self): idx = self['index'] out = [] out += ['.. index:: %s' % idx.get('default','')] - for section, references in idx.iteritems(): + for section, references in six.iteritems(idx): if section == 'default': continue out += [' :%s: %s' % (section, ', '.join(references))] @@ -427,7 +429,7 @@ def __init__(self, func, role='func', doc=None, config={}): argspec = inspect.formatargspec(*argspec) argspec = argspec.replace('*','\*') signature = '%s%s' % (func_name, argspec) - except TypeError, e: + except TypeError as e: signature = '%s()' % func_name self['Signature'] = signature @@ -449,8 +451,8 @@ def __str__(self): 'meth': 'method'} if self._role: - if not roles.has_key(self._role): - print "Warning: invalid role %s" % self._role + if self._role not in roles: + print("Warning: invalid role %s" % self._role) out += '.. %s:: %s\n \n\n' % (roles.get(self._role,''), func_name) diff --git a/doc/sphinxext/docscrape_sphinx.py b/doc/sphinxext/docscrape_sphinx.py index 9f4350d4601ad..a5b53eb09ccd8 100755 --- a/doc/sphinxext/docscrape_sphinx.py +++ b/doc/sphinxext/docscrape_sphinx.py @@ -1,6 +1,7 @@ import re, inspect, textwrap, pydoc import sphinx -from docscrape import NumpyDocString, FunctionDoc, ClassDoc +from .docscrape import NumpyDocString, FunctionDoc, ClassDoc +import six class SphinxDocString(NumpyDocString): def __init__(self, docstring, config={}): @@ -127,7 +128,7 @@ def _str_index(self): return out out += ['.. index:: %s' % idx.get('default','')] - for section, references in idx.iteritems(): + for section, references in six.iteritems(idx): if section == 'default': continue elif section == 'refguide': diff --git a/doc/sphinxext/ipython_directive.py b/doc/sphinxext/ipython_directive.py index 0c28e397a0005..b74808f0e73d1 100644 --- a/doc/sphinxext/ipython_directive.py +++ b/doc/sphinxext/ipython_directive.py @@ -51,14 +51,16 @@ - VĂĄclavĹ milauer : Prompt generalizations. - Skipper Seabold, refactoring, cleanups, pure python addition """ +from __future__ import print_function #----------------------------------------------------------------------------- # Imports #----------------------------------------------------------------------------- # Stdlib +from pandas.util.py3compat import range +from six.moves import map, cStringIO as StringIO import ast -import cStringIO import os import re import sys @@ -69,6 +71,8 @@ from docutils.parsers.rst import directives from docutils import nodes from sphinx.util.compat import Directive +import six +from six.moves import zip matplotlib.use('Agg') @@ -114,7 +118,7 @@ def block_parser(part, rgxin, rgxout, fmtin, fmtout): N = len(lines) i = 0 decorator = None - while 1: + while True: if i==N: # nothing left to parse -- the last line @@ -186,7 +190,7 @@ class EmbeddedSphinxShell(object): def __init__(self): - self.cout = cStringIO.StringIO() + self.cout = StringIO() # Create config object for IPython config = Config() @@ -299,7 +303,7 @@ def process_input(self, data, input_prompt, lineno): def _remove_first_space_if_any(line): return line[1:] if line.startswith(' ') else line - input_lines = map(_remove_first_space_if_any, input.split('\n')) + input_lines = list(map(_remove_first_space_if_any, input.split('\n'))) self.datacontent = data @@ -489,7 +493,7 @@ def process_pure_python(self, content): multiline = True cont_len = len(str(lineno)) + 2 line_to_process = line.strip('\\') - output.extend([u"%s %s" % (fmtin%lineno,line)]) + output.extend([six.u("%s %s") % (fmtin%lineno,line)]) continue else: # no we're still not line_to_process = line.strip('\\') @@ -497,12 +501,12 @@ def process_pure_python(self, content): line_to_process += line.strip('\\') if line_stripped.endswith('\\'): # and we still are continuation = '.' * cont_len - output.extend([(u' %s: '+line_stripped) % continuation]) + output.extend([(six.u(' %s: ')+line_stripped) % continuation]) continue # else go ahead and run this multiline then carry on # get output of line - self.process_input_line(unicode(line_to_process.strip()), + self.process_input_line(six.text_type(line_to_process.strip()), store_history=False) out_line = self.cout.getvalue() self.clear_cout() @@ -516,15 +520,15 @@ def process_pure_python(self, content): # line numbers don't actually matter, they're replaced later if not multiline: - in_line = u"%s %s" % (fmtin%lineno,line) + in_line = six.u("%s %s") % (fmtin%lineno,line) output.extend([in_line]) else: - output.extend([(u' %s: '+line_stripped) % continuation]) + output.extend([(six.u(' %s: ')+line_stripped) % continuation]) multiline = False if len(out_line): output.extend([out_line]) - output.extend([u'']) + output.extend([six.u('')]) return output @@ -566,19 +570,19 @@ def process_pure_python2(self, content): output.extend([line]) continue - continuation = u' %s:'% ''.join(['.']*(len(str(ct))+2)) + continuation = six.u(' %s:')% ''.join(['.']*(len(str(ct))+2)) if not multiline: - modified = u"%s %s" % (fmtin % ct, line_stripped) + modified = six.u("%s %s") % (fmtin % ct, line_stripped) output.append(modified) ct += 1 try: ast.parse(line_stripped) - output.append(u'') + output.append(six.u('')) except Exception: multiline = True multiline_start = lineno else: - modified = u'%s %s' % (continuation, line) + modified = six.u('%s %s') % (continuation, line) output.append(modified) try: @@ -590,7 +594,7 @@ def process_pure_python2(self, content): continue - output.extend([continuation, u'']) + output.extend([continuation, six.u('')]) multiline = False except Exception: pass @@ -732,7 +736,7 @@ def run(self): #print lines if len(lines)>2: if debug: - print '\n'.join(lines) + print('\n'.join(lines)) else: #NOTE: this raises some errors, what's it for? #print 'INSERTING %d lines'%len(lines) self.state_machine.insert_input( @@ -910,4 +914,4 @@ def test(): if not os.path.isdir('_static'): os.mkdir('_static') test() - print 'All OK? Check figures in _static/' + print('All OK? Check figures in _static/') diff --git a/doc/sphinxext/numpydoc.py b/doc/sphinxext/numpydoc.py index 43c67336b5c03..f32d778b6b79f 100755 --- a/doc/sphinxext/numpydoc.py +++ b/doc/sphinxext/numpydoc.py @@ -17,12 +17,13 @@ """ import sphinx +import six if sphinx.__version__ < '1.0.1': raise RuntimeError("Sphinx 1.0.1 or newer is required") import os, re, pydoc -from docscrape_sphinx import get_doc_object, SphinxDocString +from .docscrape_sphinx import get_doc_object, SphinxDocString from sphinx.util.compat import Directive import inspect @@ -34,28 +35,28 @@ def mangle_docstrings(app, what, name, obj, options, lines, if what == 'module': # Strip top title - title_re = re.compile(ur'^\s*[#*=]{4,}\n[a-z0-9 -]+\n[#*=]{4,}\s*', + title_re = re.compile(six.u(r'^\s*[#*=]{4,}\n[a-z0-9 -]+\n[#*=]{4,}\s*'), re.I|re.S) - lines[:] = title_re.sub(u'', u"\n".join(lines)).split(u"\n") + lines[:] = title_re.sub(six.u(''), six.u("\n").join(lines)).split(six.u("\n")) else: - doc = get_doc_object(obj, what, u"\n".join(lines), config=cfg) - lines[:] = unicode(doc).split(u"\n") + doc = get_doc_object(obj, what, six.u("\n").join(lines), config=cfg) + lines[:] = six.text_type(doc).split(six.u("\n")) if app.config.numpydoc_edit_link and hasattr(obj, '__name__') and \ obj.__name__: if hasattr(obj, '__module__'): - v = dict(full_name=u"%s.%s" % (obj.__module__, obj.__name__)) + v = dict(full_name=six.u("%s.%s") % (obj.__module__, obj.__name__)) else: v = dict(full_name=obj.__name__) - lines += [u'', u'.. htmlonly::', ''] - lines += [u' %s' % x for x in + lines += [six.u(''), six.u('.. htmlonly::'), ''] + lines += [six.u(' %s') % x for x in (app.config.numpydoc_edit_link % v).split("\n")] # replace reference numbers so that there are no duplicates references = [] for line in lines: line = line.strip() - m = re.match(ur'^.. \[([a-z0-9_.-])\]', line, re.I) + m = re.match(six.u(r'^.. \[([a-z0-9_.-])\]'), line, re.I) if m: references.append(m.group(1)) @@ -64,14 +65,14 @@ def mangle_docstrings(app, what, name, obj, options, lines, if references: for i, line in enumerate(lines): for r in references: - if re.match(ur'^\d+$', r): - new_r = u"R%d" % (reference_offset[0] + int(r)) + if re.match(six.u(r'^\d+$'), r): + new_r = six.u("R%d") % (reference_offset[0] + int(r)) else: - new_r = u"%s%d" % (r, reference_offset[0]) - lines[i] = lines[i].replace(u'[%s]_' % r, - u'[%s]_' % new_r) - lines[i] = lines[i].replace(u'.. [%s]' % r, - u'.. [%s]' % new_r) + new_r = six.u("%s%d") % (r, reference_offset[0]) + lines[i] = lines[i].replace(six.u('[%s]_') % r, + six.u('[%s]_') % new_r) + lines[i] = lines[i].replace(six.u('.. [%s]') % r, + six.u('.. [%s]') % new_r) reference_offset[0] += len(references) @@ -87,8 +88,8 @@ def mangle_signature(app, what, name, obj, options, sig, retann): doc = SphinxDocString(pydoc.getdoc(obj)) if doc['Signature']: - sig = re.sub(u"^[^(]*", u"", doc['Signature']) - return sig, u'' + sig = re.sub(six.u("^[^(]*"), six.u(""), doc['Signature']) + return sig, six.u('') def setup(app, get_doc_object_=get_doc_object): global get_doc_object diff --git a/doc/sphinxext/phantom_import.py b/doc/sphinxext/phantom_import.py index c77eeb544e78b..a92eb96e589c8 100755 --- a/doc/sphinxext/phantom_import.py +++ b/doc/sphinxext/phantom_import.py @@ -14,6 +14,7 @@ .. [1] http://code.google.com/p/pydocweb """ +from __future__ import print_function import imp, sys, compiler, types, os, inspect, re def setup(app): @@ -23,7 +24,7 @@ def setup(app): def initialize(app): fn = app.config.phantom_import_file if (fn and os.path.isfile(fn)): - print "[numpydoc] Phantom importing modules from", fn, "..." + print("[numpydoc] Phantom importing modules from", fn, "...") import_phantom_module(fn) #------------------------------------------------------------------------------ @@ -129,7 +130,7 @@ def base_cmp(a, b): doc = "%s%s\n\n%s" % (funcname, argspec, doc) obj = lambda: 0 obj.__argspec_is_invalid_ = True - obj.func_name = funcname + obj.__name__ = funcname obj.__name__ = name obj.__doc__ = doc if inspect.isclass(object_cache[parent]): diff --git a/doc/sphinxext/plot_directive.py b/doc/sphinxext/plot_directive.py index cacd53dbc2699..795410380f335 100755 --- a/doc/sphinxext/plot_directive.py +++ b/doc/sphinxext/plot_directive.py @@ -75,10 +75,13 @@ """ -import sys, os, glob, shutil, imp, warnings, cStringIO, re, textwrap, traceback +from pandas.util.py3compat import range +import sys, os, glob, shutil, imp, warnings, re, textwrap, traceback +from six.moves import cStringIO as StringIO import sphinx import warnings +from six.moves import map warnings.warn("A plot_directive module is also available under " "matplotlib.sphinxext; expect this numpydoc.plot_directive " "module to be deprecated after relevant features have been " @@ -257,7 +260,7 @@ def run(arguments, content, options, state_machine, state, lineno): # is it in doctest format? is_doctest = contains_doctest(code) - if options.has_key('format'): + if 'format' in options: if options['format'] == 'python': is_doctest = False else: @@ -291,7 +294,7 @@ def run(arguments, content, options, state_machine, state, lineno): results = makefig(code, source_file_name, build_dir, output_base, config) errors = [] - except PlotError, err: + except PlotError as err: reporter = state.memo.reporter sm = reporter.system_message( 2, "Exception occurred in plotting %s: %s" % (output_base, err), @@ -448,7 +451,7 @@ def run_code(code, code_path, ns=None): # Redirect stdout stdout = sys.stdout - sys.stdout = cStringIO.StringIO() + sys.stdout = StringIO() # Reset sys.argv old_sys_argv = sys.argv @@ -460,9 +463,9 @@ def run_code(code, code_path, ns=None): if ns is None: ns = {} if not ns: - exec setup.config.plot_pre_code in ns - exec code in ns - except (Exception, SystemExit), err: + exec(setup.config.plot_pre_code, ns) + exec(code, ns) + except (Exception, SystemExit) as err: raise PlotError(traceback.format_exc()) finally: os.chdir(pwd) @@ -524,7 +527,7 @@ def makefig(code, code_path, output_dir, output_base, config): all_exists = True for i, code_piece in enumerate(code_pieces): images = [] - for j in xrange(1000): + for j in range(1000): img = ImageFile('%s_%02d_%02d' % (output_base, i, j), output_dir) for format, dpi in formats: if out_of_date(code_path, img.filename(format)): @@ -570,7 +573,7 @@ def makefig(code, code_path, output_dir, output_base, config): try: figman.canvas.figure.savefig(img.filename(format), dpi=dpi, bbox_inches='tight') - except exceptions.BaseException, err: + except exceptions.BaseException as err: raise PlotError(traceback.format_exc()) img.formats.append(format) diff --git a/doc/sphinxext/tests/test_docscrape.py b/doc/sphinxext/tests/test_docscrape.py index 1d775e99e4f4f..1abf11b77dc56 100755 --- a/doc/sphinxext/tests/test_docscrape.py +++ b/doc/sphinxext/tests/test_docscrape.py @@ -1,6 +1,8 @@ +from __future__ import print_function # -*- encoding:utf-8 -*- import sys, os +import six sys.path.append(os.path.join(os.path.dirname(__file__), '..')) from docscrape import NumpyDocString, FunctionDoc, ClassDoc @@ -143,7 +145,7 @@ def test_examples(): def test_index(): assert_equal(doc['index']['default'], 'random') - print doc['index'] + print(doc['index']) assert_equal(len(doc['index']), 2) assert_equal(len(doc['index']['refguide']), 2) @@ -491,7 +493,7 @@ def test_unicode(): äää """) - assert doc['Summary'][0] == u'öäöäöäöäöåååå'.encode('utf-8') + assert doc['Summary'][0] == six.u('öäöäöäöäöåååå').encode('utf-8') def test_plot_examples(): cfg = dict(use_plots=True) diff --git a/doc/sphinxext/traitsdoc.py b/doc/sphinxext/traitsdoc.py index 0fcf2c1cd38c9..952206c442752 100755 --- a/doc/sphinxext/traitsdoc.py +++ b/doc/sphinxext/traitsdoc.py @@ -18,13 +18,13 @@ import os import pydoc -import docscrape -import docscrape_sphinx -from docscrape_sphinx import SphinxClassDoc, SphinxFunctionDoc, SphinxDocString +from . import docscrape +from . import docscrape_sphinx +from .docscrape_sphinx import SphinxClassDoc, SphinxFunctionDoc, SphinxDocString -import numpydoc +from . import numpydoc -import comment_eater +from . import comment_eater class SphinxTraitsDoc(SphinxClassDoc): def __init__(self, cls, modulename='', func_doc=SphinxFunctionDoc): diff --git a/examples/finance.py b/examples/finance.py index 24aa337a84024..069f299d585bb 100644 --- a/examples/finance.py +++ b/examples/finance.py @@ -19,7 +19,7 @@ def getQuotes(symbol, start, end): quotes = fin.quotes_historical_yahoo(symbol, start, end) - dates, open, close, high, low, volume = zip(*quotes) + dates, open, close, high, low, volume = list(zip(*quotes)) data = { 'open': open, diff --git a/ez_setup.py b/ez_setup.py index de65d3c1f0375..6f63b856f06c9 100644 --- a/ez_setup.py +++ b/ez_setup.py @@ -13,6 +13,7 @@ This file can also be run as a script to install or upgrade setuptools. """ +from __future__ import print_function import sys DEFAULT_VERSION = "0.6c11" DEFAULT_URL = "http://pypi.python.org/packages/%s/s/setuptools/" % sys.version[ @@ -75,10 +76,10 @@ def _validate_md5(egg_name, data): if egg_name in md5_data: digest = md5(data).hexdigest() if digest != md5_data[egg_name]: - print >>sys.stderr, ( + print(( "md5 validation of %s failed! (Possible download problem?)" % egg_name - ) + ), file=sys.stderr) sys.exit(2) return data @@ -113,14 +114,14 @@ def do_download(): try: pkg_resources.require("setuptools>=" + version) return - except pkg_resources.VersionConflict, e: + except pkg_resources.VersionConflict as e: if was_imported: - print >>sys.stderr, ( + print(( "The required version of setuptools (>=%s) is not available, and\n" "can't be installed while this script is running. Please install\n" " a more recent version first, using 'easy_install -U setuptools'." "\n\n(Currently using %r)" - ) % (version, e.args[0]) + ) % (version, e.args[0]), file=sys.stderr) sys.exit(2) else: del pkg_resources, sys.modules['pkg_resources'] # reload ok @@ -199,10 +200,10 @@ def main(argv, version=DEFAULT_VERSION): os.unlink(egg) else: if setuptools.__version__ == '0.0.1': - print >>sys.stderr, ( + print(( "You have an obsolete version of setuptools installed. Please\n" "remove it from your system entirely before rerunning this script." - ) + ), file=sys.stderr) sys.exit(2) req = "setuptools>=" + version @@ -221,8 +222,8 @@ def main(argv, version=DEFAULT_VERSION): from setuptools.command.easy_install import main main(argv) else: - print "Setuptools version", version, "or greater has been installed." - print '(Run "ez_setup.py -U setuptools" to reinstall or upgrade.)' + print("Setuptools version", version, "or greater has been installed.") + print('(Run "ez_setup.py -U setuptools" to reinstall or upgrade.)') def update_md5(filenames): @@ -236,8 +237,7 @@ def update_md5(filenames): md5_data[base] = md5(f.read()).hexdigest() f.close() - data = [" %r: %r,\n" % it for it in md5_data.items()] - data.sort() + data = sorted([" %r: %r,\n" % it for it in md5_data.items()]) repl = "".join(data) import inspect @@ -248,7 +248,7 @@ def update_md5(filenames): match = re.search("\nmd5_data = {\n([^}]+)}", src) if not match: - print >>sys.stderr, "Internal error!" + print("Internal error!", file=sys.stderr) sys.exit(2) src = src[:match.start(1)] + repl + src[match.end(1):] diff --git a/pandas/compat/scipy.py b/pandas/compat/scipy.py index 59a9bbdfbdb9e..26a70963d5bcc 100644 --- a/pandas/compat/scipy.py +++ b/pandas/compat/scipy.py @@ -2,6 +2,7 @@ Shipping functions from SciPy to reduce dependency on having SciPy installed """ +from pandas.util.py3compat import range import numpy as np @@ -118,12 +119,12 @@ def rankdata(a): sumranks = 0 dupcount = 0 newarray = np.zeros(n, float) - for i in xrange(n): + for i in range(n): sumranks += i dupcount += 1 if i == n - 1 or svec[i] != svec[i + 1]: averank = sumranks / float(dupcount) + 1 - for j in xrange(i - dupcount + 1, i + 1): + for j in range(i - dupcount + 1, i + 1): newarray[ivec[j]] = averank sumranks = 0 dupcount = 0 @@ -223,9 +224,9 @@ def percentileofscore(a, score, kind='rank'): if kind == 'rank': if not(np.any(a == score)): a = np.append(a, score) - a_len = np.array(range(len(a))) + a_len = np.array(list(range(len(a)))) else: - a_len = np.array(range(len(a))) + 1.0 + a_len = np.array(list(range(len(a)))) + 1.0 a = np.sort(a) idx = [a == score] diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 4bb990a57cb4d..21b6f32892cf2 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -4,6 +4,7 @@ """ import numpy as np +import six import pandas.core.common as com import pandas.algos as algos @@ -31,7 +32,7 @@ def match(to_match, values, na_sentinel=-1): match : ndarray of integers """ values = com._asarray_tuplesafe(values) - if issubclass(values.dtype.type, basestring): + if issubclass(values.dtype.type, six.string_types): values = np.array(values, dtype='O') f = lambda htype, caster: _match_generic(to_match, values, htype, caster) diff --git a/pandas/core/array.py b/pandas/core/array.py index 0026dfcecc445..d1d29649d4dc7 100644 --- a/pandas/core/array.py +++ b/pandas/core/array.py @@ -2,7 +2,9 @@ Isolate pandas's exposure to NumPy """ +from pandas.util import compat import numpy as np +import six Array = np.ndarray @@ -16,7 +18,7 @@ _lift_types = [] -for _k, _v in _dtypes.iteritems(): +for _k, _v in compat.iteritems(_dtypes): for _i in _v: _lift_types.append(_k + str(_i)) diff --git a/pandas/core/common.py b/pandas/core/common.py index eba0379a2c824..25353fe33ccf4 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -2,6 +2,7 @@ Misc tools for implementing data structures """ +from pandas.util.py3compat import range, long import itertools import re from datetime import datetime @@ -21,6 +22,8 @@ from pandas.core.config import get_option from pandas.core import array as pa +import six +from six.moves import map # XXX: HACK for NumPy 1.5.1 to suppress warnings try: @@ -688,7 +691,7 @@ def _infer_dtype_from_scalar(val): dtype = val.dtype val = val.item() - elif isinstance(val, basestring): + elif isinstance(val, six.string_types): # If we create an empty array using a string to infer # the dtype, NumPy will only allocate one character per entry @@ -781,7 +784,7 @@ def _maybe_promote(dtype, fill_value=np.nan): dtype = np.object_ # in case we have a string that looked like a number - if issubclass(np.dtype(dtype).type, basestring): + if issubclass(np.dtype(dtype).type, six.string_types): dtype = np.object_ return dtype, fill_value @@ -1168,7 +1171,7 @@ def _possibly_cast_to_datetime(value, dtype, coerce = False): """ try to cast the array/value to a datetimelike dtype, converting float nan to iNaT """ if dtype is not None: - if isinstance(dtype, basestring): + if isinstance(dtype, six.string_types): dtype = np.dtype(dtype) is_datetime64 = is_datetime64_dtype(dtype) @@ -1338,7 +1341,7 @@ def _join_unicode(lines, sep=''): try: return sep.join(lines) except UnicodeDecodeError: - sep = unicode(sep) + sep = six.text_type(sep) return sep.join([x.decode('utf-8') if isinstance(x, str) else x for x in lines]) @@ -1398,7 +1401,7 @@ def banner(message): return '%s\n%s\n%s' % (bar, message, bar) def _long_prod(vals): - result = 1L + result = long(1) for x in vals: result *= x return result @@ -1478,7 +1481,7 @@ def _asarray_tuplesafe(values, dtype=None): result = np.asarray(values, dtype=dtype) - if issubclass(result.dtype.type, basestring): + if issubclass(result.dtype.type, six.string_types): result = np.asarray(values, dtype=object) if result.ndim == 2: @@ -1494,7 +1497,7 @@ def _asarray_tuplesafe(values, dtype=None): def _index_labels_to_array(labels): - if isinstance(labels, (basestring, tuple)): + if isinstance(labels, (six.string_types, tuple)): labels = [labels] if not isinstance(labels, (list, np.ndarray)): @@ -1609,13 +1612,13 @@ def is_re_compilable(obj): def is_list_like(arg): - return hasattr(arg, '__iter__') and not isinstance(arg, basestring) + return hasattr(arg, '__iter__') and not isinstance(arg, six.string_types) def _is_sequence(x): try: iter(x) len(x) # it has a length - return not isinstance(x, basestring) and True + return not isinstance(x, six.string_types) and True except Exception: return False @@ -1703,7 +1706,10 @@ def readline(self): return self.reader.readline().encode('utf-8') def next(self): - return self.reader.next().encode("utf-8") + return next(self.reader).encode("utf-8") + + # Python 3 iterator + __next__ = next def _get_handle(path, mode, encoding=None, compression=None): @@ -1752,8 +1758,11 @@ def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): self.reader = csv.reader(f, dialect=dialect, **kwds) def next(self): - row = self.reader.next() - return [unicode(s, "utf-8") for s in row] + row = next(self.reader) + return [six.text_type(s, "utf-8") for s in row] + + # python 3 iterator + __next__ = next def __iter__(self): # pragma: no cover return self @@ -1951,9 +1960,9 @@ def _pprint_seq(seq, _nest_lvl=0, **kwds): bounds length of printed sequence, depending on options """ if isinstance(seq,set): - fmt = u"set([%s])" + fmt = six.u("set([%s])") else: - fmt = u"[%s]" if hasattr(seq, '__setitem__') else u"(%s)" + fmt = six.u("[%s]") if hasattr(seq, '__setitem__') else six.u("(%s)") nitems = get_option("max_seq_items") or len(seq) @@ -1976,10 +1985,10 @@ def _pprint_dict(seq, _nest_lvl=0,**kwds): internal. pprinter for iterables. you should probably use pprint_thing() rather then calling this directly. """ - fmt = u"{%s}" + fmt = six.u("{%s}") pairs = [] - pfmt = u"%s: %s" + pfmt = six.u("%s: %s") nitems = get_option("max_seq_items") or len(seq) @@ -2025,7 +2034,7 @@ def as_escaped_unicode(thing,escape_chars=escape_chars): #should deal with it himself. try: - result = unicode(thing) # we should try this first + result = six.text_type(thing) # we should try this first except UnicodeDecodeError: # either utf-8 or we replace errors result = str(thing).decode('utf-8', "replace") @@ -2045,11 +2054,11 @@ def as_escaped_unicode(thing,escape_chars=escape_chars): for c in escape_chars: result = result.replace(c, translate[c]) - return unicode(result) + return six.text_type(result) if (py3compat.PY3 and hasattr(thing, '__next__')) or \ hasattr(thing, 'next'): - return unicode(thing) + return six.text_type(thing) elif (isinstance(thing, dict) and _nest_lvl < get_option("display.pprint_nest_depth")): result = _pprint_dict(thing, _nest_lvl,quote_strings=True) @@ -2057,7 +2066,7 @@ def as_escaped_unicode(thing,escape_chars=escape_chars): get_option("display.pprint_nest_depth"): result = _pprint_seq(thing, _nest_lvl, escape_chars=escape_chars, quote_strings=quote_strings) - elif isinstance(thing,basestring) and quote_strings: + elif isinstance(thing,six.string_types) and quote_strings: if py3compat.PY3: fmt = "'%s'" else: @@ -2066,7 +2075,7 @@ def as_escaped_unicode(thing,escape_chars=escape_chars): else: result = as_escaped_unicode(thing) - return unicode(result) # always unicode + return six.text_type(result) # always unicode def pprint_thing_encoded(object, encoding='utf-8', errors='replace', **kwds): diff --git a/pandas/core/config.py b/pandas/core/config.py index ae7c71d082a89..c66911c122e26 100644 --- a/pandas/core/config.py +++ b/pandas/core/config.py @@ -1,9 +1,7 @@ """ The config module holds package-wide configurables and provides a uniform API for working with them. -""" -""" Overview ======== @@ -54,6 +52,8 @@ from collections import namedtuple import warnings +import six +from six.moves import map DeprecatedOption = namedtuple('DeprecatedOption', 'key msg rkey removal_ver') RegisteredOption = namedtuple( @@ -149,7 +149,7 @@ def _describe_option(pat='', _print_desc=True): if len(keys) == 0: raise KeyError('No such keys(s)') - s = u'' + s = six.u('') for k in keys: # filter by pat s += _build_option_description(k) @@ -588,9 +588,9 @@ def _build_option_description(k): o = _get_registered_option(k) d = _get_deprecated_option(k) - s = u'%s: ' % k + s = six.u('%s: ') % k if o: - s += u'[default: %s] [currently: %s]' % (o.defval, _get_option(k, True)) + s += six.u('[default: %s] [currently: %s]') % (o.defval, _get_option(k, True)) if o.doc: s += '\n' + '\n '.join(o.doc.strip().split('\n')) @@ -598,9 +598,9 @@ def _build_option_description(k): s += 'No description available.\n' if d: - s += u'\n\t(Deprecated' - s += (u', use `%s` instead.' % d.rkey if d.rkey else '') - s += u')\n' + s += six.u('\n\t(Deprecated') + s += (six.u(', use `%s` instead.') % d.rkey if d.rkey else '') + s += six.u(')\n') s += '\n' return s @@ -734,7 +734,7 @@ def inner(x): if isinstance(_type,(tuple,list)) : if not any([isinstance(x,t) for t in _type]): from pandas.core.common import pprint_thing as pp - pp_values = map(pp, _type) + pp_values = list(map(pp, _type)) raise ValueError("Value must be an instance of %s" % pp("|".join(pp_values))) elif not isinstance(x, _type): raise ValueError("Value must be an instance of '%s'" % str(_type)) @@ -745,7 +745,7 @@ def is_one_of_factory(legal_values): def inner(x): from pandas.core.common import pprint_thing as pp if not x in legal_values: - pp_values = map(pp, legal_values) + pp_values = list(map(pp, legal_values)) raise ValueError("Value must be one of %s" % pp("|".join(pp_values))) return inner @@ -756,5 +756,5 @@ def inner(x): is_bool = is_type_factory(bool) is_float = is_type_factory(float) is_str = is_type_factory(str) -is_unicode = is_type_factory(unicode) +is_unicode = is_type_factory(six.text_type) is_text = is_instance_factory(basestring) diff --git a/pandas/core/expressions.py b/pandas/core/expressions.py index abe891b82410c..27c06e23b5a9e 100644 --- a/pandas/core/expressions.py +++ b/pandas/core/expressions.py @@ -93,10 +93,10 @@ def _evaluate_numexpr(op, op_str, a, b, raise_on_error = False, **eval_kwargs): local_dict={ 'a_value' : a_value, 'b_value' : b_value }, casting='safe', **eval_kwargs) - except (ValueError), detail: + except (ValueError) as detail: if 'unknown type object' in str(detail): pass - except (Exception), detail: + except (Exception) as detail: if raise_on_error: raise TypeError(str(detail)) @@ -126,10 +126,10 @@ def _where_numexpr(cond, a, b, raise_on_error = False): 'a_value' : a_value, 'b_value' : b_value }, casting='safe') - except (ValueError), detail: + except (ValueError) as detail: if 'unknown type object' in str(detail): pass - except (Exception), detail: + except (Exception) as detail: if raise_on_error: raise TypeError(str(detail)) diff --git a/pandas/core/format.py b/pandas/core/format.py index c9beb729b2436..1b78b501b58a3 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -1,13 +1,13 @@ +from __future__ import print_function # pylint: disable=W0141 -from itertools import izip +from pandas.util.py3compat import range +from pandas.util import compat import sys +import six +from six.moves import map, zip, reduce -try: - from StringIO import StringIO -except: - from io import StringIO - +from pandas.util.py3compat import StringIO from pandas.core.common import adjoin, isnull, notnull from pandas.core.index import Index, MultiIndex, _ensure_index from pandas.util import py3compat @@ -71,7 +71,7 @@ class SeriesFormatter(object): def __init__(self, series, buf=None, header=True, length=True, na_rep='NaN', name=False, float_format=None, dtype=True): self.series = series - self.buf = buf if buf is not None else StringIO(u"") + self.buf = buf if buf is not None else StringIO() self.name = name self.na_rep = na_rep self.length = length @@ -83,7 +83,7 @@ def __init__(self, series, buf=None, header=True, length=True, self.dtype = dtype def _get_footer(self): - footer = u'' + footer = six.u('') if self.name: if getattr(self.series.index, 'freq', None): @@ -108,7 +108,7 @@ def _get_footer(self): footer += ', ' footer += 'dtype: %s' % com.pprint_thing(self.series.dtype.name) - return unicode(footer) + return six.text_type(footer) def _get_formatted_index(self): index = self.series.index @@ -131,7 +131,7 @@ def to_string(self): series = self.series if len(series) == 0: - return u'' + return six.u('') fmt_index, have_header = self._get_formatted_index() fmt_values = self._get_formatted_values() @@ -140,7 +140,7 @@ def to_string(self): pad_space = min(maxlen, 60) result = ['%s %s'] * len(fmt_values) - for i, (k, v) in enumerate(izip(fmt_index[1:], fmt_values)): + for i, (k, v) in enumerate(zip(fmt_index[1:], fmt_values)): idx = k.ljust(pad_space) result[i] = result[i] % (idx, v) @@ -151,7 +151,7 @@ def to_string(self): if footer: result.append(footer) - return unicode(u'\n'.join(result)) + return six.text_type(six.u('\n').join(result)) def _strlen_func(): if py3compat.PY3: # pragma: no cover @@ -285,7 +285,7 @@ def to_string(self, force_unicode=None): frame = self.frame if len(frame.columns) == 0 or len(frame.index) == 0: - info_line = (u'Empty %s\nColumns: %s\nIndex: %s' + info_line = (six.u('Empty %s\nColumns: %s\nIndex: %s') % (type(self.frame).__name__, com.pprint_thing(frame.columns), com.pprint_thing(frame.index))) @@ -347,7 +347,7 @@ def get_col_type(dtype): frame = self.frame if len(frame.columns) == 0 or len(frame.index) == 0: - info_line = (u'Empty %s\nColumns: %s\nIndex: %s' + info_line = (six.u('Empty %s\nColumns: %s\nIndex: %s') % (type(self.frame).__name__, frame.columns, frame.index)) strcols = [[info_line]] @@ -360,7 +360,7 @@ def get_col_type(dtype): column_format = 'l%s' % ''.join(map(get_col_type, dtypes)) else: column_format = '%s' % ''.join(map(get_col_type, dtypes)) - elif not isinstance(column_format, basestring): + elif not isinstance(column_format, six.string_types): raise AssertionError(('column_format must be str or unicode, not %s' % type(column_format))) @@ -369,7 +369,7 @@ def write(buf, frame, column_format, strcols): buf.write('\\toprule\n') nlevels = frame.index.nlevels - for i, row in enumerate(izip(*strcols)): + for i, row in enumerate(zip(*strcols)): if i == nlevels: buf.write('\\midrule\n') # End of header crow = [(x.replace('_', '\\_') @@ -383,7 +383,7 @@ def write(buf, frame, column_format, strcols): if hasattr(self.buf, 'write'): write(self.buf, frame, column_format, strcols) - elif isinstance(self.buf, basestring): + elif isinstance(self.buf, six.string_types): with open(self.buf, 'w') as f: write(f, frame, column_format, strcols) else: @@ -404,7 +404,7 @@ def to_html(self, classes=None): html_renderer = HTMLFormatter(self, classes=classes) if hasattr(self.buf, 'write'): html_renderer.write_result(self.buf) - elif isinstance(self.buf, basestring): + elif isinstance(self.buf, six.string_types): with open(self.buf, 'w') as f: html_renderer.write_result(f) else: @@ -419,13 +419,13 @@ def is_numeric_dtype(dtype): if isinstance(self.columns, MultiIndex): fmt_columns = self.columns.format(sparsify=False, adjoin=False) - fmt_columns = zip(*fmt_columns) + fmt_columns = list(zip(*fmt_columns)) dtypes = self.frame.dtypes.values need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes))) - str_columns = zip(*[[' ' + y + str_columns = list(zip(*[[' ' + y if y not in self.formatters and need_leadsp[x] else y for y in x] - for x in fmt_columns]) + for x in fmt_columns])) if self.sparsify: str_columns = _sparsify(str_columns) @@ -718,7 +718,7 @@ def _write_hierarchical_rows(self, fmt_values, indent): idx_values = frame.index.format(sparsify=False, adjoin=False, names=False) - idx_values = zip(*idx_values) + idx_values = list(zip(*idx_values)) if self.fmt.sparsify: @@ -749,9 +749,9 @@ def _write_hierarchical_rows(self, fmt_values, indent): nindex_levels=len(levels) - sparse_offset) else: for i in range(len(frame)): - idx_values = zip(*frame.index.format(sparsify=False, + idx_values = list(zip(*frame.index.format(sparsify=False, adjoin=False, - names=False)) + names=False))) row = [] row.extend(idx_values[i]) row.extend(fmt_values[j][i] for j in range(ncols)) @@ -1069,7 +1069,7 @@ def _save(self): chunksize = self.chunksize chunks = int(nrows / chunksize)+1 - for i in xrange(chunks): + for i in range(chunks): start_i = i * chunksize end_i = min((i + 1) * chunksize, nrows) if start_i >= end_i: @@ -1304,7 +1304,7 @@ def _format_hierarchical_rows(self): index_labels = self.index_label # if index labels are not empty go ahead and dump - if (filter(lambda x: x is not None, index_labels) + if (any(x is not None for x in index_labels) and self.header is not False): # if isinstance(self.df.columns, MultiIndex): # self.rowcounter += 1 @@ -1836,9 +1836,9 @@ def __call__(self, num): mant = sign * dnum / (10 ** pow10) if self.accuracy is None: # pragma: no cover - format_str = u"% g%s" + format_str = six.u("% g%s") else: - format_str = (u"%% .%if%%s" % self.accuracy) + format_str = (six.u("%% .%if%%s") % self.accuracy) formatted = format_str % (mant, prefix) @@ -1864,8 +1864,8 @@ def set_eng_float_format(precision=None, accuracy=3, use_eng_prefix=False): def _put_lines(buf, lines): - if any(isinstance(x, unicode) for x in lines): - lines = [unicode(x) for x in lines] + if any(isinstance(x, six.text_type) for x in lines): + lines = [six.text_type(x) for x in lines] buf.write('\n'.join(lines)) @@ -1900,4 +1900,4 @@ def _binify(cols, line_width): 1134250., 1219550., 855736.85, 1042615.4286, 722621.3043, 698167.1818, 803750.]) fmt = FloatArrayFormatter(arr, digits=7) - print (fmt.get_result()) + print(fmt.get_result()) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 22dc27ff977d9..94b36ffed3b9f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -12,12 +12,13 @@ # pylint: disable=E1101,E1103 # pylint: disable=W0212,W0231,W0703,W0622 -from itertools import izip -from StringIO import StringIO +from six.moves import zip +from pandas.util.py3compat import StringIO +from pandas.util.py3compat import range +from pandas.util import compat import operator import sys import collections -import itertools from numpy import nan as NA import numpy as np @@ -57,6 +58,8 @@ import pandas.algos as _algos from pandas.core.config import get_option, set_option +import six +from six.moves import map #---------------------------------------------------------------------- # Docstring templates @@ -440,7 +443,7 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, 'incompatible data and dtype') if arr.ndim == 0 and index is not None and columns is not None: - if isinstance(data, basestring) and dtype is None: + if isinstance(data, six.string_types) and dtype is None: dtype = np.object_ if dtype is None: dtype, data = _infer_dtype_from_scalar(data) @@ -656,7 +659,7 @@ def __unicode__(self): Invoked by unicode(df) in py2 only. Yields a Unicode String in both py2/py3. """ - buf = StringIO(u"") + buf = StringIO(six.u("")) fits_vertical = self._repr_fits_vertical_() fits_horizontal = False if fits_vertical: @@ -683,7 +686,7 @@ def __unicode__(self): self.info(buf=buf, verbose=verbose) value = buf.getvalue() - if not type(value) == unicode: + if not isinstance(value, six.text_type): raise AssertionError() return value @@ -715,7 +718,7 @@ def _repr_html_(self): 'max-width:1500px;overflow:auto;">\n' + self.to_html() + '\n') else: - buf = StringIO(u"") + buf = StringIO(six.u("")) max_info_rows = get_option('display.max_info_rows') verbose = (max_info_rows is None or self.shape[0] <= max_info_rows) @@ -769,7 +772,7 @@ def iterrows(self): A generator that iterates over the rows of the frame. """ columns = self.columns - for k, v in izip(self.index, self.values): + for k, v in zip(self.index, self.values): s = v.view(Series) s.index = columns s.name = k @@ -785,8 +788,8 @@ def itertuples(self, index=True): arrays.append(self.index) # use integer indexing because of possible duplicate column names - arrays.extend(self.iloc[:, k] for k in xrange(len(self.columns))) - return izip(*arrays) + arrays.extend(self.iloc[:, k] for k in range(len(self.columns))) + return zip(*arrays) iterkv = iteritems if py3compat.PY3: # pragma: no cover @@ -1031,7 +1034,7 @@ def from_records(cls, data, index=None, exclude=None, columns=None, if py3compat.PY3: first_row = next(data) else: - first_row = data.next() + first_row = next(data) except StopIteration: return cls(index=index, columns=columns) @@ -1093,7 +1096,7 @@ def from_records(cls, data, index=None, exclude=None, columns=None, result_index = None if index is not None: - if (isinstance(index, basestring) or + if (isinstance(index, six.string_types) or not hasattr(index, "__iter__")): i = columns.get_loc(index) exclude.add(index) @@ -1148,7 +1151,7 @@ def to_records(self, index=True, convert_datetime64=True): else: if isinstance(self.index, MultiIndex): # array of tuples to numpy cols. copy copy copy - ix_vals = map(np.array,zip(*self.index.values)) + ix_vals = list(map(np.array,zip(*self.index.values))) else: ix_vals = [self.index.values] @@ -1194,7 +1197,7 @@ def from_items(cls, items, columns=None, orient='columns'): ------- frame : DataFrame """ - keys, values = zip(*items) + keys, values = list(zip(*items)) if orient == 'columns': if columns is not None: @@ -1452,7 +1455,7 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='', """ from pandas.io.excel import ExcelWriter need_save = False - if isinstance(excel_writer, basestring): + if isinstance(excel_writer, six.string_types): excel_writer = ExcelWriter(excel_writer) need_save = True @@ -2419,8 +2422,6 @@ def lookup(self, row_labels, col_labels): The found values """ - from itertools import izip - n = len(row_labels) if n != len(col_labels): raise AssertionError('Row labels must have same size as ' @@ -2439,7 +2440,7 @@ def lookup(self, row_labels, col_labels): result = values.flat[flat_index] else: result = np.empty(n, dtype='O') - for i, (r, c) in enumerate(izip(row_labels, col_labels)): + for i, (r, c) in enumerate(zip(row_labels, col_labels)): result[i] = self.get_value(r, c) if result.dtype == 'O': @@ -2910,7 +2911,7 @@ def _maybe_cast(values, labels=None): if not drop: names = self.index.names - zipped = zip(self.index.levels, self.index.labels) + zipped = list(zip(self.index.levels, self.index.labels)) multi_col = isinstance(self.columns, MultiIndex) for i, (lev, lab) in reversed(list(enumerate(zipped))): @@ -3030,7 +3031,7 @@ def filter(self, items=None, like=None, regex=None): if items is not None: return self.reindex(columns=[r for r in items if r in self]) elif like: - matchf = lambda x: (like in x if isinstance(x, basestring) + matchf = lambda x: (like in x if isinstance(x, six.string_types) else like in str(x)) return self.select(matchf, axis=1) elif regex: @@ -3152,7 +3153,7 @@ def _m8_to_i8(x): if cols is None: values = list(_m8_to_i8(self.values.T)) else: - if np.iterable(cols) and not isinstance(cols, basestring): + if np.iterable(cols) and not isinstance(cols, six.string_types): if isinstance(cols, tuple): if cols in self.columns: values = [self[cols]] @@ -3600,7 +3601,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, regex = True items = to_replace.items() - keys, values = itertools.izip(*items) + keys, values = zip(*items) are_mappings = [isinstance(v, (dict, Series)) for v in values] @@ -4315,7 +4316,7 @@ def shift(self, periods=1, freq=None, **kwds): offset = _resolve_offset(freq, kwds) - if isinstance(offset, basestring): + if isinstance(offset, six.string_types): offset = datetools.to_offset(offset) if offset is None: @@ -4456,7 +4457,7 @@ def _apply_standard(self, func, axis, ignore_failures=False): values = self.values series_gen = (Series.from_array(arr, index=res_columns, name=name) for i, (arr, name) in - enumerate(izip(values, res_index))) + enumerate(zip(values, res_index))) else: raise ValueError('Axis must be 0 or 1, got %s' % str(axis)) @@ -4479,7 +4480,7 @@ def _apply_standard(self, func, axis, ignore_failures=False): for i, v in enumerate(series_gen): results[i] = func(v) keys.append(v.name) - except Exception, e: + except Exception as e: try: if hasattr(e, 'args'): k = res_index[i] @@ -4535,7 +4536,7 @@ def _apply_broadcast(self, func, axis): def applymap(self, func): """ Apply a function to a DataFrame that is intended to operate - elementwise, i.e. like doing map(func, series) for each series in the + elementwise, i.e. like doing list(map(func, series)) for each series in the DataFrame Parameters @@ -4888,7 +4889,7 @@ def pretty_name(x): series.min(), series.quantile(lb), series.median(), series.quantile(ub), series.max()]) - return self._constructor(map(list, zip(*destat)), index=destat_columns, + return self._constructor(list(map(list, zip(*destat))), index=destat_columns, columns=numdata.columns) #---------------------------------------------------------------------- @@ -4947,7 +4948,7 @@ def _count_level(self, level, axis=0, numeric_only=False): # python 2.5 mask = notnull(frame.values).view(np.uint8) - if isinstance(level, basestring): + if isinstance(level, six.string_types): level = self.index._get_level_number(level) level_index = frame.index.levels[level] @@ -5849,7 +5850,7 @@ def _to_arrays(data, columns, coerce_float=False, dtype=None): return arrays, columns else: # last ditch effort - data = map(tuple, data) + data = list(map(tuple, data)) return _list_to_arrays(data, columns, coerce_float=coerce_float, dtype=dtype) @@ -5923,7 +5924,7 @@ def _convert_object_array(content, columns, coerce_float=False, dtype=None): def _get_names_from_index(data): - index = range(len(data)) + index = list(range(len(data))) has_some_name = any([s.name is not None for s in data]) if not has_some_name: return index @@ -5996,7 +5997,7 @@ def install_ipython_completers(): # pragma: no cover @complete_object.when_type(DataFrame) def complete_dataframe(obj, prev_completions): return prev_completions + [c for c in obj.columns - if isinstance(c, basestring) and py3compat.isidentifier(c)] + if isinstance(c, six.string_types) and py3compat.isidentifier(c)] # Importing IPython brings in about 200 modules, so we want to avoid it unless diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6be5f456b50e6..2dce7430c144c 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1,5 +1,6 @@ # pylint: disable=W0231,E1101 +from pandas.util import compat import numpy as np import pandas.lib as lib from pandas.core.base import PandasObject @@ -9,6 +10,8 @@ from pandas.core.indexing import _maybe_convert_indices from pandas.tseries.index import DatetimeIndex import pandas.core.common as com +import six +from six.moves import map, zip class PandasError(Exception): @@ -23,7 +26,7 @@ class PandasContainer(PandasObject): } _AXIS_ALIASES = {} - _AXIS_NAMES = dict((v, k) for k, v in _AXIS_NUMBERS.iteritems()) + _AXIS_NAMES = dict((v, k) for k, v in compat.iteritems(_AXIS_NUMBERS)) def to_pickle(self, path): """ @@ -77,7 +80,7 @@ def _get_axis_number(self, axis): def _get_axis_name(self, axis): axis = self._AXIS_ALIASES.get(axis, axis) - if isinstance(axis, basestring): + if isinstance(axis, six.string_types): if axis in self._AXIS_NUMBERS: return axis else: diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index cc0a2b7589bb6..528d7baca330c 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1,6 +1,9 @@ -from itertools import izip import types import numpy as np +from pandas.util.py3compat import range, long +import six +from six.moves import zip +from pandas.util import compat from pandas.core.base import PandasObject from pandas.core.categorical import Categorical @@ -588,7 +591,7 @@ def get_iterator(self, data, axis=0, keep_internal=True): splitter = self._get_splitter(data, axis=axis, keep_internal=keep_internal) keys = self._get_group_keys() - for key, (i, group) in izip(keys, splitter): + for key, (i, group) in zip(keys, splitter): yield key, group def _get_splitter(self, data, axis=0, keep_internal=True): @@ -616,13 +619,13 @@ def apply(self, f, data, axis=0, keep_internal=False): try: values, mutated = splitter.fast_apply(f, group_keys) return group_keys, values, mutated - except (Exception), detail: + except (Exception) as detail: # we detect a mutatation of some kind # so take slow path pass result_values = [] - for key, (i, group) in izip(group_keys, splitter): + for key, (i, group) in zip(group_keys, splitter): object.__setattr__(group, 'name', key) # group might be modified @@ -671,7 +674,7 @@ def groups(self): if len(self.groupings) == 1: return self.groupings[0].groups else: - to_groupby = zip(*(ping.grouper for ping in self.groupings)) + to_groupby = list(zip(*(ping.grouper for ping in self.groupings))) to_groupby = Index(to_groupby) return self.axis.groupby(to_groupby) @@ -727,12 +730,12 @@ def get_group_levels(self): return [self.groupings[0].group_index] if self._overflow_possible: - recons_labels = [np.array(x) for x in izip(*obs_ids)] + recons_labels = [np.array(x) for x in zip(*obs_ids)] else: recons_labels = decons_group_index(obs_ids, self.shape) name_list = [] - for ping, labels in izip(self.groupings, recons_labels): + for ping, labels in zip(self.groupings, recons_labels): labels = com._ensure_platform_int(labels) name_list.append(ping.group_index.take(labels)) @@ -1004,7 +1007,7 @@ def get_iterator(self, data, axis=0): """ if axis == 0: start = 0 - for edge, label in izip(self.bins, self.binlabels): + for edge, label in zip(self.bins, self.binlabels): yield label, data[start:edge] start = edge @@ -1012,14 +1015,14 @@ def get_iterator(self, data, axis=0): yield self.binlabels[-1], data[start:] else: start = 0 - for edge, label in izip(self.bins, self.binlabels): - inds = range(start, edge) + for edge, label in zip(self.bins, self.binlabels): + inds = list(range(start, edge)) yield label, data.take(inds, axis=axis) start = edge n = len(data.axes[axis]) if start < n: - inds = range(start, n) + inds = list(range(start, n)) yield self.binlabels[-1], data.take(inds, axis=axis) def apply(self, f, data, axis=0, keep_internal=False): @@ -1257,12 +1260,12 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True): if level is not None: if not isinstance(group_axis, MultiIndex): - if isinstance(level, basestring): + if isinstance(level, six.string_types): if obj.index.name != level: raise ValueError('level name %s is not the name of the index' % level) elif level > 0: raise ValueError('level > 0 only valid with MultiIndex') - + level = None key = group_axis @@ -1305,7 +1308,7 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True): groupings = [] exclusions = [] - for i, (gpr, level) in enumerate(izip(keys, levels)): + for i, (gpr, level) in enumerate(zip(keys, levels)): name = None try: obj._data.items.get_loc(gpr) @@ -1334,7 +1337,7 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True): def _is_label_like(val): - return isinstance(val, basestring) or np.isscalar(val) + return isinstance(val, six.string_types) or np.isscalar(val) def _convert_grouper(axis, grouper): @@ -1406,7 +1409,7 @@ def aggregate(self, func_or_funcs, *args, **kwargs): ------- Series or DataFrame """ - if isinstance(func_or_funcs, basestring): + if isinstance(func_or_funcs, six.string_types): return getattr(self, func_or_funcs)(*args, **kwargs) if hasattr(func_or_funcs, '__iter__'): @@ -1446,11 +1449,11 @@ def _aggregate_multiple_funcs(self, arg): # list of functions / function names columns = [] for f in arg: - if isinstance(f, basestring): + if isinstance(f, six.string_types): columns.append(f) else: columns.append(f.__name__) - arg = zip(columns, arg) + arg = list(zip(columns, arg)) results = {} @@ -1534,7 +1537,7 @@ def transform(self, func, *args, **kwargs): result = result.values dtype = result.dtype - if isinstance(func, basestring): + if isinstance(func, six.string_types): wrapper = lambda x: getattr(x, func)(*args, **kwargs) else: wrapper = lambda x: func(x, *args, **kwargs) @@ -1576,7 +1579,7 @@ def filter(self, func, dropna=True, *args, **kwargs): ------- filtered : Series """ - if isinstance(func, basestring): + if isinstance(func, six.string_types): wrapper = lambda x: getattr(x, func)(*args, **kwargs) else: wrapper = lambda x: func(x, *args, **kwargs) @@ -1690,7 +1693,7 @@ def _obj_with_exclusions(self): @Appender(_agg_doc) def aggregate(self, arg, *args, **kwargs): - if isinstance(arg, basestring): + if isinstance(arg, six.string_types): return getattr(self, arg)(*args, **kwargs) result = OrderedDict() @@ -1905,7 +1908,7 @@ def _wrap_applied_output(self, keys, values, not_indexed_same=False): if not all_indexed_same: return self._concat_objects(keys, values, not_indexed_same=not_indexed_same) - + try: if self.axis == 0: @@ -1998,13 +2001,13 @@ def transform(self, func, *args, **kwargs): return concatenated def _define_paths(self, func, *args, **kwargs): - if isinstance(func, basestring): + if isinstance(func, six.string_types): fast_path = lambda group: getattr(group, func)(*args, **kwargs) slow_path = lambda group: group.apply(lambda x: getattr(x, func)(*args, **kwargs), axis=self.axis) else: fast_path = lambda group: func(group, *args, **kwargs) slow_path = lambda group: group.apply(lambda x: func(x, *args, **kwargs), axis=self.axis) - return fast_path, slow_path + return fast_path, slow_path def _choose_path(self, fast_path, slow_path, group): path = slow_path @@ -2249,7 +2252,7 @@ def aggregate(self, arg, *args, **kwargs): ------- aggregated : Panel """ - if isinstance(arg, basestring): + if isinstance(arg, six.string_types): return getattr(self, arg)(*args, **kwargs) return self._aggregate_generic(arg, *args, **kwargs) @@ -2332,7 +2335,7 @@ def __iter__(self): starts, ends = lib.generate_slices(self.slabels, self.ngroups) - for i, (start, end) in enumerate(izip(starts, ends)): + for i, (start, end) in enumerate(zip(starts, ends)): # Since I'm now compressing the group ids, it's now not "possible" # to produce empty slices because such groups would not be observed # in the data @@ -2436,7 +2439,7 @@ def get_group_index(label_list, shape): n = len(label_list[0]) group_index = np.zeros(n, dtype=np.int64) mask = np.zeros(n, dtype=bool) - for i in xrange(len(shape)): + for i in range(len(shape)): stride = np.prod([x for x in shape[i + 1:]], dtype=np.int64) group_index += com._ensure_int64(label_list[i]) * stride mask |= label_list[i] < 0 @@ -2448,7 +2451,7 @@ def get_group_index(label_list, shape): def _int64_overflow_possible(shape): - the_prod = 1L + the_prod = long(1) for x in shape: the_prod *= long(x) @@ -2461,7 +2464,7 @@ def decons_group_index(comp_labels, shape): factor = 1 y = 0 x = comp_labels - for i in reversed(xrange(len(shape))): + for i in reversed(range(len(shape))): labels = (x - y) % (factor * shape[i]) // factor np.putmask(labels, comp_labels < 0, -1) label_list.append(labels) @@ -2503,7 +2506,7 @@ def _lexsort_indexer(keys, orders=None): elif orders is None: orders = [True] * len(keys) - for key, order in izip(keys, orders): + for key, order in zip(keys, orders): rizer = _hash.Factorizer(len(key)) if not key.dtype == np.object_: @@ -2537,12 +2540,12 @@ def __init__(self, comp_ids, ngroups, labels, levels): self._populate_tables() def _populate_tables(self): - for labs, table in izip(self.labels, self.tables): + for labs, table in zip(self.labels, self.tables): table.map(self.comp_ids, labs.astype(np.int64)) def get_key(self, comp_id): return tuple(level[table.get_item(comp_id)] - for table, level in izip(self.tables, self.levels)) + for table, level in zip(self.tables, self.levels)) def _get_indices_dict(label_list, keys): @@ -2664,7 +2667,7 @@ def install_ipython_completers(): # pragma: no cover @complete_object.when_type(DataFrameGroupBy) def complete_dataframe(obj, prev_completions): return prev_completions + [c for c in obj.obj.columns - if isinstance(c, basestring) and py3compat.isidentifier(c)] + if isinstance(c, six.string_types) and py3compat.isidentifier(c)] # Importing IPython brings in about 200 modules, so we want to avoid it unless diff --git a/pandas/core/index.py b/pandas/core/index.py index 3eb804d3a70e6..7cff2e51ad399 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -1,7 +1,9 @@ # pylint: disable=E1101,E1103,W0232 -from itertools import izip - +from pandas.util.py3compat import range +from six.moves import zip +import six +from pandas.util import compat import numpy as np import pandas.tslib as tslib @@ -722,7 +724,7 @@ def get_value(self, series, key): """ try: return self._engine.get_value(series, key) - except KeyError, e1: + except KeyError as e1: if len(self) > 0 and self.inferred_type == 'integer': raise @@ -1349,7 +1351,7 @@ def __new__(cls, data, dtype=None, copy=False, name=None): data = list(data) data = np.asarray(data) - if issubclass(data.dtype.type, basestring): + if issubclass(data.dtype.type, six.string_types): raise TypeError('String dtype not supported, you may need ' 'to explicitly cast to int') elif issubclass(data.dtype.type, np.integer): @@ -1593,7 +1595,7 @@ def has_duplicates(self): # has duplicates shape = [len(lev) for lev in self.levels] group_index = np.zeros(len(self), dtype='i8') - for i in xrange(len(shape)): + for i in range(len(shape)): stride = np.prod([x for x in shape[i + 1:]], dtype='i8') group_index += self.labels[i] * stride @@ -1610,7 +1612,7 @@ def get_value(self, series, key): # Label-based try: return self._engine.get_value(series, key) - except KeyError, e1: + except KeyError as e1: try: # TODO: what if a level contains tuples?? loc = self.get_loc(key) @@ -1800,7 +1802,7 @@ def from_tuples(cls, tuples, sortorder=None, names=None): elif isinstance(tuples, list): arrays = list(lib.to_object_array_tuples(tuples).T) else: - arrays = zip(*tuples) + arrays = list(zip(*tuples)) return MultiIndex.from_arrays(arrays, sortorder=sortorder, names=names) @@ -1940,7 +1942,7 @@ def drop(self, labels, level=None): if isinstance(loc, int): inds.append(loc) else: - inds.extend(range(loc.start, loc.stop)) + inds.extend(list(range(loc.start, loc.stop))) return self.delete(inds) @@ -2236,7 +2238,7 @@ def _partial_tup_index(self, tup, side='left'): n = len(tup) start, end = 0, len(self) - zipped = izip(tup, self.levels, self.labels) + zipped = zip(tup, self.levels, self.labels) for k, (lab, lev, labs) in enumerate(zipped): section = labs[start:end] @@ -2445,7 +2447,7 @@ def equals(self, other): if len(self) != len(other): return False - for i in xrange(self.nlevels): + for i in range(self.nlevels): svalues = com.take_nd(self.levels[i].values, self.labels[i], allow_fill=False) ovalues = com.take_nd(other.levels[i].values, other.labels[i], @@ -2463,7 +2465,7 @@ def equal_levels(self, other): if self.nlevels != other.nlevels: return False - for i in xrange(self.nlevels): + for i in range(self.nlevels): if not self.levels[i].equals(other.levels[i]): return False return True @@ -2488,7 +2490,7 @@ def union(self, other): result_names = self.names if self.names == other.names else None uniq_tuples = lib.fast_unique_multiple([self.values, other.values]) - return MultiIndex.from_arrays(zip(*uniq_tuples), sortorder=0, + return MultiIndex.from_arrays(list(zip(*uniq_tuples)), sortorder=0, names=result_names) def intersection(self, other): @@ -2518,7 +2520,7 @@ def intersection(self, other): labels=[[]] * self.nlevels, names=result_names) else: - return MultiIndex.from_arrays(zip(*uniq_tuples), sortorder=0, + return MultiIndex.from_arrays(list(zip(*uniq_tuples)), sortorder=0, names=result_names) def diff(self, other): @@ -2635,7 +2637,7 @@ def _wrap_joined_index(self, joined, other): # For utility purposes def _sparsify(label_list, start=0,sentinal=''): - pivoted = zip(*label_list) + pivoted = list(zip(*label_list)) k = len(label_list) result = pivoted[:start + 1] @@ -2659,7 +2661,7 @@ def _sparsify(label_list, start=0,sentinal=''): prev = cur - return zip(*result) + return list(zip(*result)) def _ensure_index(index_like): diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 0237cfde3b561..cb841169d88ab 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -3,7 +3,10 @@ from datetime import datetime from pandas.core.common import _asarray_tuplesafe from pandas.core.index import Index, MultiIndex, _ensure_index +from pandas.util.py3compat import range +from six.moves import zip import pandas.core.common as com +import six import pandas.lib as lib import numpy as np @@ -340,7 +343,7 @@ def _getitem_lowerdim(self, tup): except TypeError: # slices are unhashable pass - except Exception, e1: + except Exception as e1: if isinstance(tup[0], (slice, Index)): raise IndexingError @@ -707,7 +710,7 @@ def _getbool_axis(self, key, axis=0): inds, = key.nonzero() try: return self.obj.take(inds, axis=axis, convert=False) - except (Exception), detail: + except (Exception) as detail: raise self._exception(detail) def _get_slice_axis(self, slice_obj, axis=0): """ this is pretty simple as we just have to deal with labels """ @@ -920,7 +923,7 @@ def _convert_to_index_sliceable(obj, key): indexer = obj.ix._convert_to_indexer(key, axis=0) return indexer - elif isinstance(key, basestring): + elif isinstance(key, six.string_types): # we are an actual column if key in obj._data.items: @@ -1077,7 +1080,7 @@ def _is_label_like(key): def _is_list_like(obj): # Consider namedtuples to be not list like as they are useful as indices return (np.iterable(obj) - and not isinstance(obj, basestring) + and not isinstance(obj, six.string_types) and not (isinstance(obj, tuple) and type(obj) is not tuple)) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index f23a89635aaf2..0ff462ce21073 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1,5 +1,6 @@ import itertools import re +import six from datetime import datetime from numpy import nan @@ -18,6 +19,8 @@ from pandas.tslib import Timestamp from pandas.util import py3compat +from pandas.util.py3compat import range +from six.moves import map, zip class Block(PandasObject): @@ -471,7 +474,7 @@ def eval(self, func, other, raise_on_error = True, try_cast = False): args = [ values, other ] try: result = self._try_coerce_result(func(*args)) - except (Exception), detail: + except (Exception) as detail: if raise_on_error: raise TypeError('Could not operate [%s] with block values [%s]' % (repr(other),str(detail))) @@ -546,7 +549,7 @@ def func(c,v,o): v, o = self._try_coerce_args(v, o) try: return self._try_coerce_result(expressions.where(c, v, o, raise_on_error=True)) - except (Exception), detail: + except (Exception) as detail: if raise_on_error: raise TypeError('Could not operate [%s] with block values [%s]' % (repr(o),str(detail))) @@ -576,7 +579,7 @@ def func(c,v,o): # might need to separate out blocks axis = cond.ndim - 1 cond = cond.swapaxes(axis, 0) - mask = np.array([cond[i].all() for i in xrange(cond.shape[0])], + mask = np.array([cond[i].all() for i in range(cond.shape[0])], dtype=bool) result_blocks = [] @@ -686,7 +689,7 @@ class ObjectBlock(Block): _can_hold_na = True def __init__(self, values, items, ref_items, ndim=2, fastpath=False, placement=None): - if issubclass(values.dtype.type, basestring): + if issubclass(values.dtype.type, six.string_types): values = np.array(values, dtype=object) super(ObjectBlock, self).__init__(values, items, ref_items, @@ -757,7 +760,7 @@ def replace(self, to_replace, value, inplace=False, filter=None, inplace=inplace, filter=filter, regex=regex) elif both_lists: - for to_rep, v in itertools.izip(to_replace, value): + for to_rep, v in zip(to_replace, value): blk[0], = blk[0]._replace_single(to_rep, v, inplace=inplace, filter=filter, regex=regex) elif to_rep_is_list and regex: @@ -812,7 +815,7 @@ def _replace_single(self, to_replace, value, inplace=False, filter=None, # deal with replacing values with objects (strings) that match but # whose replacement is not a string (numeric, nan, object) - if isnull(value) or not isinstance(value, basestring): + if isnull(value) or not isinstance(value, six.string_types): def re_replacer(s): try: return value if rx.search(s) is not None else s @@ -830,7 +833,7 @@ def re_replacer(s): f = np.vectorize(re_replacer, otypes=[self.dtype]) try: - filt = map(self.items.get_loc, filter) + filt = list(map(self.items.get_loc, filter)) except TypeError: filt = slice(None) @@ -1922,7 +1925,7 @@ def _add_new_block(self, item, value, loc=None): # need to shift elements to the right if self._ref_locs[loc] is not None: - for i in reversed(range(loc+1,len(self._ref_locs))): + for i in reversed(list(range(loc+1,len(self._ref_locs)))): self._ref_locs[i] = self._ref_locs[i-1] self._ref_locs[loc] = (new_block, 0) @@ -2532,5 +2535,5 @@ def _possibly_convert_to_indexer(loc): if com._is_bool_indexer(loc): loc = [i for i, v in enumerate(loc) if v] elif isinstance(loc,slice): - loc = range(loc.start,loc.stop) + loc = list(range(loc.start,loc.stop)) return loc diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index b2ff366daa826..72ba4364cfedd 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1,3 +1,4 @@ +from pandas.util import compat import sys import itertools import functools @@ -10,6 +11,7 @@ import pandas.algos as algos import pandas.hashtable as _hash import pandas.tslib as tslib +import six try: import bottleneck as bn @@ -30,7 +32,7 @@ def check(self, obj): def __call__(self, f): @functools.wraps(f) def _f(*args, **kwargs): - obj_iter = itertools.chain(args, kwargs.itervalues()) + obj_iter = itertools.chain(args, six.itervalues(kwargs)) if any(self.check(obj) for obj in obj_iter): raise TypeError('reduction operation {0!r} not allowed for ' 'this dtype'.format(f.__name__.replace('nan', @@ -55,7 +57,7 @@ def __call__(self, alt): @functools.wraps(alt) def f(values, axis=None, skipna=True, **kwds): if len(self.kwargs) > 0: - for k, v in self.kwargs.iteritems(): + for k, v in compat.iteritems(self.kwargs): if k not in kwds: kwds[k] = v try: diff --git a/pandas/core/panel.py b/pandas/core/panel.py index d33f7144c27b0..739ffc6f31e9c 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -3,6 +3,8 @@ """ # pylint: disable=E1103,W0231,W0212,W0621 +from pandas.util.py3compat import range +from pandas.util import compat import operator import sys import numpy as np @@ -25,6 +27,8 @@ import pandas.core.common as com import pandas.core.nanops as nanops import pandas.lib as lib +import six +from six.moves import map, zip def _ensure_like_indices(time, panels): @@ -473,17 +477,17 @@ def __unicode__(self): class_name = str(self.__class__) shape = self.shape - dims = u'Dimensions: %s' % ' x '.join( + dims = six.u('Dimensions: %s') % ' x '.join( ["%d (%s)" % (s, a) for a, s in zip(self._AXIS_ORDERS, shape)]) def axis_pretty(a): v = getattr(self, a) if len(v) > 0: - return u'%s axis: %s to %s' % (a.capitalize(), + return six.u('%s axis: %s to %s') % (a.capitalize(), com.pprint_thing(v[0]), com.pprint_thing(v[-1])) else: - return u'%s axis: None' % a.capitalize() + return six.u('%s axis: None') % a.capitalize() output = '\n'.join( [class_name, dims] + [axis_pretty(a) for a in self._AXIS_ORDERS]) @@ -540,7 +544,7 @@ def to_sparse(self, fill_value=None, kind='block'): y : SparseDataFrame """ from pandas.core.sparse import SparsePanel - frames = dict(self.iterkv()) + frames = dict(self.iteritems()) return SparsePanel(frames, items=self.items, major_axis=self.major_axis, minor_axis=self.minor_axis, @@ -804,13 +808,13 @@ def _reindex_multi(self, items, major, minor): new_minor, indexer2 = self.minor_axis.reindex(minor) if indexer0 is None: - indexer0 = range(len(new_items)) + indexer0 = list(range(len(new_items))) if indexer1 is None: - indexer1 = range(len(new_major)) + indexer1 = list(range(len(new_major))) if indexer2 is None: - indexer2 = range(len(new_minor)) + indexer2 = list(range(len(new_minor))) for i, ind in enumerate(indexer0): com.take_2d_multi(values[ind], (indexer1, indexer2), @@ -976,7 +980,7 @@ def fillna(self, value=None, method=None): if method is None: raise ValueError('must specify a fill method or value') result = {} - for col, s in self.iterkv(): + for col, s in self.iteritems(): result[col] = s.fillna(method=method, value=value) return self._constructor.from_dict(result) @@ -1137,7 +1141,7 @@ def transpose(self, *args, **kwargs): for a in self._AXIS_ORDERS: if not a in kwargs: - where = map(a.startswith, aliases) + where = list(map(a.startswith, aliases)) if any(where): if sum(where) != 1: @@ -1483,7 +1487,7 @@ def _prep_ndarray(self, values, copy=True): if not isinstance(values, np.ndarray): values = np.asarray(values) # NumPy strings are a pain, convert to object - if issubclass(values.dtype.type, basestring): + if issubclass(values.dtype.type, six.string_types): values = np.array(values, dtype=object, copy=True) else: if copy: @@ -1711,7 +1715,7 @@ def install_ipython_completers(): # pragma: no cover @complete_object.when_type(Panel) def complete_dataframe(obj, prev_completions): return prev_completions + [c for c in obj.keys() - if isinstance(c, basestring) + if isinstance(c, six.string_types) and py3compat.isidentifier(c)] # Importing IPython brings in about 200 modules, so we want to avoid it unless diff --git a/pandas/core/panelnd.py b/pandas/core/panelnd.py index 08ff3b70dcb13..3981850d9f861 100644 --- a/pandas/core/panelnd.py +++ b/pandas/core/panelnd.py @@ -1,6 +1,8 @@ """ Factory methods to create N-D panels """ import pandas.lib as lib +from six.moves import zip +import six def create_nd_panel_factory(klass_name, axis_orders, axis_slices, slicer, axis_aliases=None, stat_axis=2,ns=None): @@ -27,7 +29,7 @@ def create_nd_panel_factory(klass_name, axis_orders, axis_slices, slicer, axis_a """ # if slicer is a name, get the object - if isinstance(slicer, basestring): + if isinstance(slicer, six.string_types): import pandas try: slicer = getattr(pandas, slicer) diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index cb34d0bad5df7..436c2298164ff 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -1,6 +1,10 @@ # pylint: disable=E1101,E1103 # pylint: disable=W0703,W0622,W0613,W0201 +from pandas.util.py3compat import range +from pandas.util import compat +from six.moves import zip +import six import itertools import numpy as np @@ -187,7 +191,7 @@ def get_new_values(self): new_mask = np.zeros(result_shape, dtype=bool) # is there a simpler / faster way of doing this? - for i in xrange(values.shape[1]): + for i in range(values.shape[1]): chunk = new_values[:, i * width: (i + 1) * width] mask_chunk = new_mask[:, i * width: (i + 1) * width] @@ -397,7 +401,7 @@ def _slow_pivot(index, columns, values): Could benefit from some Cython here. """ tree = {} - for i, (idx, col) in enumerate(itertools.izip(index, columns)): + for i, (idx, col) in enumerate(zip(index, columns)): if col not in tree: tree[col] = {} branch = tree[col] @@ -685,11 +689,11 @@ def melt(frame, id_vars=None, value_vars=None, var_name = frame.columns.names else: var_name = ['variable_%s' % i for i in - xrange(len(frame.columns.names))] + range(len(frame.columns.names))] else: var_name = [frame.columns.name if frame.columns.name is not None else 'variable'] - if isinstance(var_name, basestring): + if isinstance(var_name, six.string_types): var_name = [var_name] N, K = frame.shape @@ -898,7 +902,7 @@ def block2d_to_blocknd(values, items, shape, labels, ref_items=None): pvalues.fill(fill_value) values = values - for i in xrange(len(items)): + for i in range(len(items)): pvalues[i].flat[mask] = values[:, i] if ref_items is None: diff --git a/pandas/core/series.py b/pandas/core/series.py index b77dfbfd9618c..c8075e223df4f 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -5,7 +5,8 @@ # pylint: disable=E1101,E1103 # pylint: disable=W0703,W0622,W0613,W0201 -from itertools import izip +from pandas.util import compat +from six.moves import zip import operator from distutils.version import LooseVersion import types @@ -43,6 +44,7 @@ from pandas.compat.scipy import scoreatpercentile as _quantile from pandas.core.config import get_option +import six __all__ = ['Series', 'TimeSeries'] @@ -425,7 +427,7 @@ class Series(generic.PandasContainer, pa.Array): 'index': 0 } - _AXIS_NAMES = dict((v, k) for k, v in _AXIS_NUMBERS.iteritems()) + _AXIS_NAMES = dict((v, k) for k, v in compat.iteritems(_AXIS_NUMBERS)) def __new__(cls, data=None, index=None, dtype=None, name=None, copy=False): @@ -829,7 +831,7 @@ def __setitem__(self, key, value): return raise KeyError('%s not in this series!' % str(key)) - except TypeError, e: + except TypeError as e: # python 3 type errors should be raised if 'unorderable' in str(e): # pragma: no cover raise IndexError(key) @@ -1116,9 +1118,9 @@ def __unicode__(self): name=True, dtype=True) else: - result = u'Series([], dtype: %s)' % self.dtype + result = six.u('Series([], dtype: %s)') % self.dtype - if not ( type(result) == unicode): + if not (isinstance(result, six.text_type)): raise AssertionError() return result @@ -1137,12 +1139,12 @@ def _tidy_repr(self, max_vals=20): result = head + '\n...\n' + tail result = '%s\n%s' % (result, self._repr_footer()) - return unicode(result) + return six.text_type(result) def _repr_footer(self): - namestr = u"Name: %s, " % com.pprint_thing( + namestr = six.u("Name: %s, ") % com.pprint_thing( self.name) if self.name is not None else "" - return u'%sLength: %d, dtype: %s' % (namestr, len(self), + return six.u('%sLength: %d, dtype: %s') % (namestr, len(self), str(self.dtype.name)) def to_string(self, buf=None, na_rep='NaN', float_format=None, @@ -1180,7 +1182,7 @@ def to_string(self, buf=None, na_rep='NaN', float_format=None, length=length, dtype=dtype, name=name) # catch contract violations - if not type(the_repr) == unicode: + if not isinstance(the_repr, six.text_type): raise AssertionError("expected unicode string") if buf is None: @@ -1203,7 +1205,7 @@ def _get_repr(self, name=False, print_header=False, length=True, dtype=True, length=length, dtype=dtype, na_rep=na_rep, float_format=float_format) result = formatter.to_string() - if not ( type(result) == unicode): + if not (isinstance(result, six.text_type)): raise AssertionError() return result @@ -1217,7 +1219,7 @@ def iteritems(self): """ Lazily iterate over (index, value) tuples """ - return izip(iter(self.index), iter(self)) + return list(zip(iter(self.index), iter(self))) iterkv = iteritems if py3compat.PY3: # pragma: no cover @@ -1333,7 +1335,7 @@ def to_dict(self): ------- value_dict : dict """ - return dict(self.iteritems()) + return dict(compat.iteritems(self)) def to_sparse(self, kind='block', fill_value=None): """ @@ -1384,7 +1386,7 @@ def count(self, level=None): if level is not None: mask = notnull(self.values) - if isinstance(level, basestring): + if isinstance(level, six.string_types): level = self.index._get_level_number(level) level_index = self.index.levels[level] @@ -2817,20 +2819,20 @@ def _rep_dict(rs, to_rep): # replace {[src] -> dest} all_src = set() dd = {} # group by unique destination value - for s, d in to_rep.iteritems(): + for s, d in compat.iteritems(to_rep): dd.setdefault(d, []).append(s) all_src.add(s) if any(d in all_src for d in dd.keys()): # don't clobber each other at the cost of temporaries masks = {} - for d, sset in dd.iteritems(): # now replace by each dest + for d, sset in compat.iteritems(dd): # now replace by each dest masks[d] = com.mask_missing(rs.values, sset) - for d, m in masks.iteritems(): + for d, m in compat.iteritems(masks): com._maybe_upcast_putmask(rs.values,m,d,change=change) else: # if no risk of clobbering then simple - for d, sset in dd.iteritems(): + for d, sset in compat.iteritems(dd): _rep_one(rs, sset, d) if np.isscalar(to_replace): @@ -3046,7 +3048,7 @@ def shift(self, periods=1, freq=None, copy=True, **kwds): offset = _resolve_offset(freq, kwds) - if isinstance(offset, basestring): + if isinstance(offset, six.string_types): offset = datetools.to_offset(offset) def _get_values(): @@ -3099,7 +3101,7 @@ def asof(self, where): ------- value or NaN """ - if isinstance(where, basestring): + if isinstance(where, six.string_types): where = datetools.to_datetime(where) values = self.values @@ -3407,7 +3409,7 @@ def _try_cast(arr, take_fast_path): # This is to prevent mixed-type Series getting all casted to # NumPy string type, e.g. NaN --> '-1#IND'. - if issubclass(subarr.dtype.type, basestring): + if issubclass(subarr.dtype.type, six.string_types): subarr = pa.array(data, dtype=object, copy=copy) return subarr @@ -3430,7 +3432,7 @@ def _resolve_offset(freq, kwds): if 'timeRule' in kwds or 'offset' in kwds: offset = kwds.get('offset', None) offset = kwds.get('timeRule', offset) - if isinstance(offset, basestring): + if isinstance(offset, six.string_types): offset = datetools.getOffset(offset) warn = True else: diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 1aa7fe87903d7..e717f5a2b0f0f 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -1,6 +1,6 @@ import numpy as np -from itertools import izip +from six.moves import zip from pandas.core.common import isnull from pandas.core.series import Series import re @@ -50,7 +50,7 @@ def str_cat(arr, others=None, sep=None, na_rep=None): notmask = -na_mask - tuples = izip(*[x[notmask] for x in arrays]) + tuples = zip(*[x[notmask] for x in arrays]) cats = [sep.join(tup) for tup in tuples] result[notmask] = cats @@ -284,14 +284,14 @@ def rep(x): try: return str.__mul__(x, repeats) except TypeError: - return unicode.__mul__(x, repeats) + return six.text_type.__mul__(x, repeats) return _na_map(rep, arr) else: def rep(x, r): try: return str.__mul__(x, r) except TypeError: - return unicode.__mul__(x, r) + return six.text_type.__mul__(x, r) repeats = np.asarray(repeats, dtype=object) result = lib.vec_binop(arr, repeats, rep) return result diff --git a/pandas/io/auth.py b/pandas/io/auth.py index 6da497687cf25..15e3eb70d91b2 100644 --- a/pandas/io/auth.py +++ b/pandas/io/auth.py @@ -1,3 +1,4 @@ +from __future__ import print_function # see LICENSES directory for copyright and license import os import sys @@ -54,8 +55,8 @@ def process_flags(flags=[]): # Let the gflags module process the command-line arguments. try: FLAGS(flags) - except gflags.FlagsError, e: - print ('%s\nUsage: %s ARGS\n%s' % (e, str(flags), FLAGS)) + except gflags.FlagsError as e: + print('%s\nUsage: %s ARGS\n%s' % (e, str(flags), FLAGS)) sys.exit(1) # Set the logging according to the command-line flag. diff --git a/pandas/io/clipboard.py b/pandas/io/clipboard.py index 08837474c11b4..fa3e384597ee6 100644 --- a/pandas/io/clipboard.py +++ b/pandas/io/clipboard.py @@ -1,5 +1,5 @@ """ io on the clipboard """ -from StringIO import StringIO +from pandas.util.py3compat import StringIO def read_clipboard(**kwargs): # pragma: no cover """ diff --git a/pandas/io/common.py b/pandas/io/common.py index 33958ade2bcd6..3ad181c3dffcb 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -5,7 +5,7 @@ import urllib2 import zipfile from contextlib import contextmanager, closing -from StringIO import StringIO +from pandas.util.py3compat import StringIO from pandas.util import py3compat diff --git a/pandas/io/data.py b/pandas/io/data.py index 1b51ae5ec8a02..74268241db2c7 100644 --- a/pandas/io/data.py +++ b/pandas/io/data.py @@ -3,9 +3,9 @@ """ +from pandas.util.py3compat import range import warnings import tempfile -import itertools import datetime as dt import urllib import time @@ -20,6 +20,8 @@ from pandas.io.parsers import TextParser from pandas.io.common import urlopen, ZipFile from pandas.util.testing import _network_error_classes +import six +from six.moves import map, zip class SymbolWarning(UserWarning): @@ -95,7 +97,7 @@ def _in_chunks(seq, size): """ Return sequence in 'chunks' of size defined by size """ - return (seq[pos:pos + size] for pos in xrange(0, len(seq), size)) + return (seq[pos:pos + size] for pos in range(0, len(seq), size)) _yahoo_codes = {'symbol': 's', 'last': 'l1', 'change_pct': 'p2', 'PE': 'r', @@ -107,13 +109,13 @@ def get_quote_yahoo(symbols): Returns a DataFrame """ - if isinstance(symbols, basestring): + if isinstance(symbols, six.string_types): sym_list = symbols else: sym_list = '+'.join(symbols) # for codes see: http://www.gummy-stuff.org/Yahoo-data.htm - request = ''.join(_yahoo_codes.itervalues()) # code request string + request = ''.join(six.itervalues(_yahoo_codes)) # code request string header = _yahoo_codes.keys() data = defaultdict(list) @@ -147,7 +149,7 @@ def get_quote_google(symbols): def _retry_read_url(url, retry_count, pause, name): - for _ in xrange(retry_count): + for _ in range(retry_count): time.sleep(pause) # kludge to close the socket ASAP @@ -332,7 +334,7 @@ def _get_data_from(symbols, start, end, retry_count, pause, adjust_price, src_fn = _source_functions[source] # If a single symbol, (e.g., 'GOOG') - if isinstance(symbols, (basestring, int)): + if isinstance(symbols, (six.string_types, int)): hist_data = src_fn(symbols, start, end, retry_count, pause) # Or multiple symbols, (e.g., ['GOOG', 'AAPL', 'MSFT']) elif isinstance(symbols, DataFrame): @@ -465,15 +467,15 @@ def get_data_famafrench(name): with ZipFile(tmpf, 'r') as zf: data = zf.open(name + '.txt').readlines() - line_lengths = np.array(map(len, data)) + line_lengths = np.array(list(map(len, data))) file_edges = np.where(line_lengths == 2)[0] datasets = {} - edges = itertools.izip(file_edges + 1, file_edges[1:]) + edges = zip(file_edges + 1, file_edges[1:]) for i, (left_edge, right_edge) in enumerate(edges): dataset = [d.split() for d in data[left_edge:right_edge]] if len(dataset) > 10: - ncol_raw = np.array(map(len, dataset)) + ncol_raw = np.array(list(map(len, dataset))) ncol = np.median(ncol_raw) header_index = np.where(ncol_raw == ncol - 1)[0][-1] header = dataset[header_index] @@ -809,18 +811,18 @@ def get_forward_data(self, months, call=True, put=False, near=False, data : dict of str, DataFrame """ warnings.warn("get_forward_data() is deprecated", FutureWarning) - in_months = xrange(CUR_MONTH, CUR_MONTH + months + 1) + in_months = range(CUR_MONTH, CUR_MONTH + months + 1) in_years = [CUR_YEAR] * (months + 1) # Figure out how many items in in_months go past 12 to_change = 0 - for i in xrange(months): + for i in range(months): if in_months[i] > 12: in_months[i] -= 12 to_change += 1 # Change the corresponding items in the in_years list. - for i in xrange(1, to_change + 1): + for i in range(1, to_change + 1): in_years[-i] += 1 to_ret = Series({'calls': call, 'puts': put}) @@ -830,7 +832,7 @@ def get_forward_data(self, months, call=True, put=False, near=False, for name in to_ret: all_data = DataFrame() - for mon in xrange(months): + for mon in range(months): m2 = in_months[mon] y2 = in_years[mon] diff --git a/pandas/io/date_converters.py b/pandas/io/date_converters.py index c7a60d13f1778..c0e9b4da8f52a 100644 --- a/pandas/io/date_converters.py +++ b/pandas/io/date_converters.py @@ -1,4 +1,5 @@ """This module is designed for community supported date conversion functions""" +from pandas.util.py3compat import range import numpy as np import pandas.lib as lib @@ -32,7 +33,7 @@ def generic_parser(parse_func, *cols): N = _check_columns(cols) results = np.empty(N, dtype=object) - for i in xrange(N): + for i in range(N): args = [c[i] for c in cols] results[i] = parse_func(*args) diff --git a/pandas/io/excel.py b/pandas/io/excel.py index b3b48382faae0..65d0b6f017f23 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -5,13 +5,15 @@ #---------------------------------------------------------------------- # ExcelFile class +from pandas.util.py3compat import range import datetime -from itertools import izip import numpy as np from pandas.io.parsers import TextParser from pandas.tseries.period import Period from pandas import json +from six.moves import map, zip, reduce +import six def read_excel(path_or_buf, sheetname, kind=None, **kwds): """Read an Excel table into a pandas DataFrame @@ -73,7 +75,7 @@ def __init__(self, path_or_buf, kind=None, **kwds): self.path_or_buf = path_or_buf self.tmpfile = None - if isinstance(path_or_buf, basestring): + if isinstance(path_or_buf, six.string_types): self.book = xlrd.open_workbook(path_or_buf) else: data = path_or_buf.read() @@ -153,14 +155,14 @@ def _excel2num(x): for rng in areas.split(','): if ':' in rng: rng = rng.split(':') - cols += range(_excel2num(rng[0]), _excel2num(rng[1]) + 1) + cols += list(range(_excel2num(rng[0]), _excel2num(rng[1]) + 1)) else: cols.append(_excel2num(rng)) return cols if isinstance(parse_cols, int): return i <= parse_cols - elif isinstance(parse_cols, basestring): + elif isinstance(parse_cols, six.string_types): return i in _range2cols(parse_cols) else: return i in parse_cols @@ -173,16 +175,16 @@ def _parse_excel(self, sheetname, header=0, skiprows=None, skip_footer=0, XL_CELL_ERROR, XL_CELL_BOOLEAN) datemode = self.book.datemode - if isinstance(sheetname, basestring): + if isinstance(sheetname, six.string_types): sheet = self.book.sheet_by_name(sheetname) else: # assume an integer if not a string sheet = self.book.sheet_by_index(sheetname) data = [] should_parse = {} - for i in xrange(sheet.nrows): + for i in range(sheet.nrows): row = [] - for j, (value, typ) in enumerate(izip(sheet.row_values(i), + for j, (value, typ) in enumerate(zip(sheet.row_values(i), sheet.row_types(i))): if parse_cols is not None and j not in should_parse: should_parse[j] = self._should_parse(j, parse_cols) diff --git a/pandas/io/ga.py b/pandas/io/ga.py index 7d6277e2d45f9..d71de9da4d2b7 100644 --- a/pandas/io/ga.py +++ b/pandas/io/ga.py @@ -5,6 +5,7 @@ 4. Download JSON secret file and move into same directory as this file """ from datetime import datetime +from pandas.util import compat import numpy as np from pandas import DataFrame import pandas as pd @@ -16,8 +17,10 @@ from apiclient.errors import HttpError from oauth2client.client import AccessTokenRefreshError +import six +from six.moves import zip -TYPE_MAP = {u'INTEGER': int, u'FLOAT': float, u'TIME': int} +TYPE_MAP = {six.u('INTEGER'): int, six.u('FLOAT'): float, six.u('TIME'): int} NO_CALLBACK = auth.OOB_CALLBACK_URN DOC_URL = auth.DOC_URL @@ -261,7 +264,7 @@ def get_data(self, metrics, start_date, end_date=None, profile_id = profile.get('id') if index_col is None and dimensions is not None: - if isinstance(dimensions, basestring): + if isinstance(dimensions, six.string_types): dimensions = [dimensions] index_col = _clean_index(list(dimensions), parse_dates) @@ -283,7 +286,7 @@ def _read(start, result_size): dayfirst=dayfirst, na_values=na_values, converters=converters, sort=sort) - except HttpError, inst: + except HttpError as inst: raise ValueError('Google API error %s: %s' % (inst.resp.status, inst._get_reason())) @@ -312,7 +315,7 @@ def _parse_data(self, rows, col_info, index_col, parse_dates=True, if isinstance(sort, bool) and sort: return df.sort_index() - elif isinstance(sort, (basestring, list, tuple, np.ndarray)): + elif isinstance(sort, (six.string_types, list, tuple, np.ndarray)): return df.sort_index(by=sort) return df @@ -330,14 +333,14 @@ def create_query(self, profile_id, metrics, start_date, end_date=None, max_results=max_results, **kwargs) try: return self.service.data().ga().get(**qry) - except TypeError, error: + except TypeError as error: raise ValueError('Error making query: %s' % error) def format_query(ids, metrics, start_date, end_date=None, dimensions=None, segment=None, filters=None, sort=None, start_index=None, max_results=10000, **kwargs): - if isinstance(metrics, basestring): + if isinstance(metrics, six.string_types): metrics = [metrics] met = ','.join(['ga:%s' % x for x in metrics]) @@ -356,7 +359,7 @@ def format_query(ids, metrics, start_date, end_date=None, dimensions=None, lst = [dimensions, filters, sort] [_maybe_add_arg(qry, n, d) for n, d in zip(names, lst)] - if isinstance(segment, basestring): + if isinstance(segment, six.string_types): _maybe_add_arg(qry, 'segment', segment, 'dynamic::ga') elif isinstance(segment, int): _maybe_add_arg(qry, 'segment', segment, 'gaid:') @@ -374,7 +377,7 @@ def format_query(ids, metrics, start_date, end_date=None, dimensions=None, def _maybe_add_arg(query, field, data, prefix='ga'): if data is not None: - if isinstance(data, (basestring, int)): + if isinstance(data, (six.string_types, int)): data = [data] data = ','.join(['%s:%s' % (prefix, x) for x in data]) query[field] = data @@ -412,7 +415,7 @@ def _clean_index(index_dims, parse_dates): to_add.append('_'.join(lst)) to_remove.extend(lst) elif isinstance(parse_dates, dict): - for name, lst in parse_dates.iteritems(): + for name, lst in compat.iteritems(parse_dates): if isinstance(lst, (list, tuple, np.ndarray)): if _should_add(lst): to_add.append(name) @@ -435,12 +438,12 @@ def _get_column_types(header_info): def _get_dim_names(header_info): return [x['name'][3:] for x in header_info - if x['columnType'] == u'DIMENSION'] + if x['columnType'] == six.u('DIMENSION')] def _get_met_names(header_info): return [x['name'][3:] for x in header_info - if x['columnType'] == u'METRIC'] + if x['columnType'] == six.u('METRIC')] def _get_data_types(header_info): diff --git a/pandas/io/html.py b/pandas/io/html.py index 651a3eb507618..bcecc624434cc 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -3,6 +3,8 @@ """ +from pandas.util.py3compat import range +from pandas.util import compat import os import re import numbers @@ -16,6 +18,8 @@ from pandas import DataFrame, MultiIndex, isnull from pandas.io.common import _is_url, urlopen +import six +from six.moves import map try: @@ -91,9 +95,9 @@ def _get_skiprows_iter(skiprows): A proper iterator to use to skip rows of a DataFrame. """ if isinstance(skiprows, slice): - return range(skiprows.start or 0, skiprows.stop, skiprows.step or 1) + return list(range(skiprows.start or 0, skiprows.stop, skiprows.step or 1)) elif isinstance(skiprows, numbers.Integral): - return range(skiprows) + return list(range(skiprows)) elif isinstance(skiprows, collections.Container): return skiprows else: @@ -120,7 +124,7 @@ def _read(io): elif os.path.isfile(io): with open(io) as f: raw_text = f.read() - elif isinstance(io, basestring): + elif isinstance(io, six.string_types): raw_text = io else: raise TypeError("Cannot read object of type " @@ -343,14 +347,14 @@ def _parse_raw_thead(self, table): thead = self._parse_thead(table) res = [] if thead: - res = map(self._text_getter, self._parse_th(thead[0])) + res = list(map(self._text_getter, self._parse_th(thead[0]))) return np.array(res).squeeze() if res and len(res) == 1 else res def _parse_raw_tfoot(self, table): tfoot = self._parse_tfoot(table) res = [] if tfoot: - res = map(self._text_getter, self._parse_td(tfoot[0])) + res = list(map(self._text_getter, self._parse_td(tfoot[0]))) return np.array(res).squeeze() if res and len(res) == 1 else res def _parse_raw_tbody(self, table): @@ -450,8 +454,8 @@ def _build_node_xpath_expr(attrs): if 'class_' in attrs: attrs['class'] = attrs.pop('class_') - s = (u"@{k}='{v}'".format(k=k, v=v) for k, v in attrs.iteritems()) - return u'[{0}]'.format(' and '.join(s)) + s = (six.u("@{k}='{v}'").format(k=k, v=v) for k, v in attrs.iteritems()) + return six.u('[{0}]').format(' and '.join(s)) _re_namespace = {'re': 'http://exslt.org/regular-expressions'} @@ -492,9 +496,9 @@ def _parse_tables(self, doc, match, kwargs): pattern = match.pattern # check all descendants for the given pattern - check_all_expr = u'//*' + check_all_expr = six.u('//*') if pattern: - check_all_expr += u"[re:test(text(), '{0}')]".format(pattern) + check_all_expr += six.u("[re:test(text(), '{0}')]").format(pattern) # go up the tree until we find a table check_table_expr = '/ancestor::table' @@ -733,10 +737,10 @@ def _parser_dispatch(flavor): def _validate_parser_flavor(flavor): if flavor is None: flavor = ['lxml', 'bs4'] - elif isinstance(flavor, basestring): + elif isinstance(flavor, six.string_types): flavor = [flavor] elif isinstance(flavor, collections.Iterable): - if not all(isinstance(flav, basestring) for flav in flavor): + if not all(isinstance(flav, six.string_types) for flav in flavor): raise TypeError('{0} is not an iterable of strings'.format(flavor)) else: raise TypeError('{0} is not a valid "flavor"'.format(flavor)) diff --git a/pandas/io/json.py b/pandas/io/json.py index d3bea36b57e77..ef53d0b9e93e7 100644 --- a/pandas/io/json.py +++ b/pandas/io/json.py @@ -1,11 +1,14 @@ # pylint: disable-msg=E1101,W0613,W0603 -from StringIO import StringIO +from pandas.util.py3compat import StringIO +from pandas.util import compat +from pandas.util.py3compat import long import os from pandas import Series, DataFrame, to_datetime from pandas.io.common import get_filepath_or_buffer import pandas.json as _json +import six loads = _json.loads dumps = _json.dumps @@ -26,7 +29,7 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch', double_precision else: raise NotImplementedError - if isinstance(path_or_buf, basestring): + if isinstance(path_or_buf, six.string_types): with open(path_or_buf,'w') as fh: fh.write(s) elif path_or_buf is None: @@ -182,7 +185,7 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True, """ filepath_or_buffer,_ = get_filepath_or_buffer(path_or_buf) - if isinstance(filepath_or_buffer, basestring): + if isinstance(filepath_or_buffer, six.string_types): if os.path.exists(filepath_or_buffer): with open(filepath_or_buffer,'r') as fh: json = fh.read() @@ -342,7 +345,7 @@ def _try_convert_to_date(self, data): # ignore numbers that are out of range if issubclass(new_data.dtype.type,np.number): - if not ((new_data == iNaT) | (new_data > 31536000000000000L)).all(): + if not ((new_data == iNaT) | (new_data > long(31536000000000000))).all(): return data, False try: @@ -369,9 +372,9 @@ def _parse_no_numpy(self): orient = self.orient if orient == "split": decoded = dict((str(k), v) - for k, v in loads( + for k, v in compat.iteritems(loads( json, - precise_float=self.precise_float).iteritems()) + precise_float=self.precise_float))) self.obj = Series(dtype=None, **decoded) else: self.obj = Series( @@ -384,7 +387,7 @@ def _parse_numpy(self): if orient == "split": decoded = loads(json, dtype=None, numpy=True, precise_float=self.precise_float) - decoded = dict((str(k), v) for k, v in decoded.iteritems()) + decoded = dict((str(k), v) for k, v in compat.iteritems(decoded)) self.obj = Series(**decoded) elif orient == "columns" or orient == "index": self.obj = Series(*loads(json, dtype=None, numpy=True, @@ -417,7 +420,7 @@ def _parse_numpy(self): elif orient == "split": decoded = loads(json, dtype=None, numpy=True, precise_float=self.precise_float) - decoded = dict((str(k), v) for k, v in decoded.iteritems()) + decoded = dict((str(k), v) for k, v in compat.iteritems(decoded)) self.obj = DataFrame(**decoded) elif orient == "values": self.obj = DataFrame(loads(json, dtype=None, numpy=True, @@ -436,9 +439,9 @@ def _parse_no_numpy(self): loads(json, precise_float=self.precise_float), dtype=None) elif orient == "split": decoded = dict((str(k), v) - for k, v in loads( + for k, v in compat.iteritems(loads( json, - precise_float=self.precise_float).iteritems()) + precise_float=self.precise_float))) self.obj = DataFrame(dtype=None, **decoded) elif orient == "index": self.obj = DataFrame( @@ -467,7 +470,7 @@ def _try_convert_dates(self): def is_ok(col): """ return if this col is ok to try for a date parse """ - if not isinstance(col, basestring): return False + if not isinstance(col, six.string_types): return False if (col.endswith('_at') or col.endswith('_time') or diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 3bcfb66d32092..57f1daa623a5a 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -1,9 +1,11 @@ """ Module contains tools for processing files into DataFrames or other objects """ -from StringIO import StringIO +from __future__ import print_function +from pandas.util.py3compat import StringIO +from pandas.util.py3compat import range +from pandas.util import compat import re -from itertools import izip import csv from warnings import warn @@ -23,6 +25,8 @@ import pandas.tslib as tslib import pandas.parser as _parser from pandas.tseries.period import Period +import six +from six.moves import zip _parser_params = """Also supports optionally iterating or breaking of the file into chunks. @@ -558,7 +562,7 @@ def _clean_options(self, options, engine): na_values, na_fvalues = _clean_na_values(na_values, keep_default_na) if com.is_integer(skiprows): - skiprows = range(skiprows) + skiprows = list(range(skiprows)) skiprows = set() if skiprows is None else set(skiprows) # put stuff back @@ -727,7 +731,7 @@ def _extract_multi_indexer_columns(self, header, index_names, col_names, passed_ field_count = len(header[0]) def extract(r): return tuple([ r[i] for i in range(field_count) if i not in sic ]) - columns = zip(*[ extract(r) for r in header ]) + columns = list(zip(*[ extract(r) for r in header ])) names = ic + columns # if we find 'Unnamed' all of a single level, then our header was too long @@ -784,7 +788,7 @@ def _make_index(self, data, alldata, columns, indexnamerow=False): def _get_simple_index(self, data, columns): def ix(col): - if not isinstance(col, basestring): + if not isinstance(col, six.string_types): return col raise ValueError('Index %s invalid' % col) index = None @@ -807,7 +811,7 @@ def ix(col): def _get_complex_date_index(self, data, col_names): def _get_name(icol): - if isinstance(icol, basestring): + if isinstance(icol, six.string_types): return icol if col_names is None: @@ -851,7 +855,7 @@ def _agg_index(self, index, try_parse_dates=True): col_na_values, col_na_fvalues = _get_na_values(col_name, self.na_values, self.na_fvalues) - + arr, _ = self._convert_types(arr, col_na_values | col_na_fvalues) arrays.append(arr) @@ -874,7 +878,7 @@ def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False, coerce_type) result[c] = cvals if verbose and na_count: - print ('Filled %d NA values in column %s' % (na_count, str(c))) + print('Filled %d NA values in column %s' % (na_count, str(c))) return result def _convert_types(self, values, na_values, try_num_bool=True): @@ -928,7 +932,7 @@ def _exclude_implicit_index(self, alldata): offset += 1 data[col] = alldata[i + offset] else: - data = dict((k, v) for k, v in izip(self.orig_names, alldata)) + data = dict((k, v) for k, v in zip(self.orig_names, alldata)) return data @@ -946,7 +950,7 @@ def __init__(self, src, **kwds): ParserBase.__init__(self, kwds) if 'utf-16' in (kwds.get('encoding') or ''): - if isinstance(src, basestring): + if isinstance(src, six.string_types): src = open(src, 'rb') src = com.UTF8Recoder(src, kwds['encoding']) kwds['encoding'] = 'utf-8' @@ -976,7 +980,7 @@ def __init__(self, src, **kwds): self.names = ['X%d' % i for i in range(self._reader.table_width)] else: - self.names = range(self._reader.table_width) + self.names = list(range(self._reader.table_width)) # XXX self._set_noconvert_columns() @@ -1227,7 +1231,7 @@ def __init__(self, f, **kwds): self.comment = kwds['comment'] self._comment_lines = [] - if isinstance(f, basestring): + if isinstance(f, six.string_types): f = com._get_handle(f, 'r', encoding=self.encoding, compression=self.compression) elif self.compression: @@ -1450,7 +1454,7 @@ def _infer_columns(self): if self.prefix: columns = [ ['X%d' % i for i in range(ncols)] ] else: - columns = [ range(ncols) ] + columns = [ list(range(ncols)) ] else: columns = [ names ] @@ -1487,7 +1491,7 @@ def _check_comments(self, lines): for l in lines: rl = [] for x in l: - if (not isinstance(x, basestring) or + if (not isinstance(x, six.string_types) or self.comment not in x): rl.append(x) else: @@ -1506,7 +1510,7 @@ def _check_thousands(self, lines): for l in lines: rl = [] for x in l: - if (not isinstance(x, basestring) or + if (not isinstance(x, six.string_types) or self.thousands not in x or nonnum.search(x.strip())): rl.append(x) @@ -1548,7 +1552,7 @@ def _get_index_name(self, columns): # column and index names on diff rows implicit_first_cols = 0 - self.index_col = range(len(line)) + self.index_col = list(range(len(line))) self.buf = self.buf[1:] for c in reversed(line): @@ -1559,7 +1563,7 @@ def _get_index_name(self, columns): if implicit_first_cols > 0: self._implicit_index = True if self.index_col is None: - self.index_col = range(implicit_first_cols) + self.index_col = list(range(implicit_first_cols)) index_name = None else: @@ -1629,7 +1633,7 @@ def _get_lines(self, rows=None): new_rows = [] try: if rows is not None: - for _ in xrange(rows): + for _ in range(rows): new_rows.append(next(source)) lines.extend(new_rows) else: @@ -1638,7 +1642,7 @@ def _get_lines(self, rows=None): try: new_rows.append(next(source)) rows += 1 - except csv.Error, inst: + except csv.Error as inst: if 'newline inside string' in str(inst): row_num = str(self.pos + rows) msg = ('EOF inside string starting with line ' @@ -1806,7 +1810,7 @@ def _clean_index_names(columns, index_col): index_col = list(index_col) for i, c in enumerate(index_col): - if isinstance(c, basestring): + if isinstance(c, six.string_types): index_names.append(c) for j, name in enumerate(cp_cols): if name == c: @@ -1819,7 +1823,7 @@ def _clean_index_names(columns, index_col): index_names.append(name) # hack - if isinstance(index_names[0], basestring) and 'Unnamed' in index_names[0]: + if isinstance(index_names[0], six.string_types) and 'Unnamed' in index_names[0]: index_names[0] = None return index_names, columns, index_col @@ -1901,13 +1905,12 @@ def _get_col_names(colspec, columns): def _concat_date_cols(date_cols): if len(date_cols) == 1: if py3compat.PY3: - return np.array([unicode(x) for x in date_cols[0]], dtype=object) + return np.array([six.text_type(x) for x in date_cols[0]], dtype=object) else: - return np.array([str(x) if not isinstance(x, basestring) else x + return np.array([str(x) if not isinstance(x, six.string_types) else x for x in date_cols[0]], dtype=object) - # stripped = [map(str.strip, x) for x in date_cols] - rs = np.array([' '.join([unicode(y) for y in x]) + rs = np.array([' '.join([six.text_type(y) for y in x]) for x in zip(*date_cols)], dtype=object) return rs diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index a5a8355567e23..52cc7dc24ffde 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2,9 +2,12 @@ High level interface to PyTables for reading and writing pandas data structures to disk """ +from __future__ import print_function # pylint: disable-msg=E1101,W0613,W0603 from datetime import datetime, date +from pandas.util.py3compat import range +from pandas.util import compat import time import re import copy @@ -35,6 +38,8 @@ import pandas.tslib as tslib from contextlib import contextmanager +import six +from six.moves import map, zip # versioning attribute _version = '0.10.1' @@ -87,40 +92,40 @@ class AttributeConflictWarning(Warning): # map object types _TYPE_MAP = { - Series : u'series', - SparseSeries : u'sparse_series', - TimeSeries : u'series', - DataFrame : u'frame', - SparseDataFrame : u'sparse_frame', - Panel : u'wide', - Panel4D : u'ndim', - SparsePanel : u'sparse_panel' + Series: six.u('series'), + SparseSeries: six.u('sparse_series'), + TimeSeries: six.u('series'), + DataFrame: six.u('frame'), + SparseDataFrame: six.u('sparse_frame'), + Panel: six.u('wide'), + Panel4D: six.u('ndim'), + SparsePanel: six.u('sparse_panel') } # storer class map _STORER_MAP = { - u'TimeSeries' : 'LegacySeriesStorer', - u'Series' : 'LegacySeriesStorer', - u'DataFrame' : 'LegacyFrameStorer', - u'DataMatrix' : 'LegacyFrameStorer', - u'series' : 'SeriesStorer', - u'sparse_series' : 'SparseSeriesStorer', - u'frame' : 'FrameStorer', - u'sparse_frame' : 'SparseFrameStorer', - u'wide' : 'PanelStorer', - u'sparse_panel' : 'SparsePanelStorer', + six.u('TimeSeries') : 'LegacySeriesStorer', + six.u('Series') : 'LegacySeriesStorer', + six.u('DataFrame') : 'LegacyFrameStorer', + six.u('DataMatrix') : 'LegacyFrameStorer', + six.u('series') : 'SeriesStorer', + six.u('sparse_series') : 'SparseSeriesStorer', + six.u('frame') : 'FrameStorer', + six.u('sparse_frame') : 'SparseFrameStorer', + six.u('wide') : 'PanelStorer', + six.u('sparse_panel') : 'SparsePanelStorer', } # table class map _TABLE_MAP = { - u'generic_table' : 'GenericTable', - u'appendable_frame' : 'AppendableFrameTable', - u'appendable_multiframe' : 'AppendableMultiFrameTable', - u'appendable_panel' : 'AppendablePanelTable', - u'appendable_ndim' : 'AppendableNDimTable', - u'worm' : 'WORMTable', - u'legacy_frame' : 'LegacyFrameTable', - u'legacy_panel' : 'LegacyPanelTable', + six.u('generic_table') : 'GenericTable', + six.u('appendable_frame') : 'AppendableFrameTable', + six.u('appendable_multiframe') : 'AppendableMultiFrameTable', + six.u('appendable_panel') : 'AppendablePanelTable', + six.u('appendable_ndim') : 'AppendableNDimTable', + six.u('worm') : 'WORMTable', + six.u('legacy_frame') : 'LegacyFrameTable', + six.u('legacy_panel') : 'LegacyPanelTable', } # axes map @@ -189,7 +194,7 @@ def to_hdf(path_or_buf, key, value, mode=None, complevel=None, complib=None, app else: f = lambda store: store.put(key, value, **kwargs) - if isinstance(path_or_buf, basestring): + if isinstance(path_or_buf, six.string_types): with get_store(path_or_buf, mode=mode, complevel=complevel, complib=complib) as store: f(store) else: @@ -199,7 +204,7 @@ def read_hdf(path_or_buf, key, **kwargs): """ read from the store, closeit if we opened it """ f = lambda store, auto_close: store.select(key, auto_close=auto_close, **kwargs) - if isinstance(path_or_buf, basestring): + if isinstance(path_or_buf, six.string_types): # can't auto open/close if we are using an iterator # so delegate to the iterator @@ -385,9 +390,9 @@ def open(self, mode='a', warn=True): try: self._handle = h5_open(self._path, self._mode) - except IOError, e: # pragma: no cover + except IOError as e: # pragma: no cover if 'can not be written' in str(e): - print ('Opening %s in read-only mode' % self._path) + print('Opening %s in read-only mode' % self._path) self._handle = h5_open(self._path, 'r') else: raise @@ -513,7 +518,7 @@ def select_as_multiple(self, keys, where=None, selector=None, columns=None, star # default to single select if isinstance(keys, (list, tuple)) and len(keys) == 1: keys = keys[0] - if isinstance(keys, basestring): + if isinstance(keys, six.string_types): return self.select(key=keys, where=where, columns=columns, start=start, stop=stop, iterator=iterator, chunksize=chunksize, **kwargs) if not isinstance(keys, (list, tuple)): @@ -545,7 +550,7 @@ def select_as_multiple(self, keys, where=None, selector=None, columns=None, star try: c = self.select_as_coordinates(selector, where, start=start, stop=stop) nrows = len(c) - except (Exception), detail: + except (Exception) as detail: raise ValueError("invalid selector [%s]" % selector) def func(_start, _stop): @@ -744,7 +749,7 @@ def groups(self): """ return a list of all the top-level nodes (that are not themselves a pandas storage object) """ _tables() return [ g for g in self._handle.walkNodes() if getattr(g._v_attrs,'pandas_type',None) or getattr( - g,'table',None) or (isinstance(g,_table_mod.table.Table) and g._v_name != u'table') ] + g,'table',None) or (isinstance(g,_table_mod.table.Table) and g._v_name != six.u('table')) ] def get_node(self, key): """ return the node with the key or None if it does not exist """ @@ -823,8 +828,8 @@ def error(t): _tables() if getattr(group,'table',None) or isinstance(group,_table_mod.table.Table): - pt = u'frame_table' - tt = u'generic_table' + pt = six.u('frame_table') + tt = six.u('generic_table') else: raise TypeError("cannot create a storer if the object is not existing nor a value are passed") else: @@ -836,10 +841,10 @@ def error(t): # we are actually a table if table or append: - pt += u'_table' + pt += six.u('_table') # a storer node - if u'table' not in pt: + if six.u('table') not in pt: try: return globals()[_STORER_MAP[pt]](self, group, **kwargs) except: @@ -851,26 +856,26 @@ def error(t): # if we are a writer, determin the tt if value is not None: - if pt == u'frame_table': + if pt == six.u('frame_table'): index = getattr(value,'index',None) if index is not None: if index.nlevels == 1: - tt = u'appendable_frame' + tt = six.u('appendable_frame') elif index.nlevels > 1: - tt = u'appendable_multiframe' - elif pt == u'wide_table': - tt = u'appendable_panel' - elif pt == u'ndim_table': - tt = u'appendable_ndim' + tt = six.u('appendable_multiframe') + elif pt == six.u('wide_table'): + tt = six.u('appendable_panel') + elif pt == six.u('ndim_table'): + tt = six.u('appendable_ndim') else: # distiguish between a frame/table - tt = u'legacy_panel' + tt = six.u('legacy_panel') try: fields = group.table._v_attrs.fields - if len(fields) == 1 and fields[0] == u'value': - tt = u'legacy_frame' + if len(fields) == 1 and fields[0] == six.u('value'): + tt = six.u('legacy_frame') except: pass @@ -1140,7 +1145,7 @@ def __iter__(self): def maybe_set_size(self, min_itemsize=None, **kwargs): """ maybe set a string col itemsize: min_itemsize can be an interger or a dict with this columns name with an integer size """ - if _ensure_decoded(self.kind) == u'string': + if _ensure_decoded(self.kind) == six.u('string'): if isinstance(min_itemsize, dict): min_itemsize = min_itemsize.get(self.name) @@ -1160,7 +1165,7 @@ def validate_col(self, itemsize=None): # validate this column for string truncation (or reset to the max size) dtype = getattr(self, 'dtype', None) - if _ensure_decoded(self.kind) == u'string': + if _ensure_decoded(self.kind) == six.u('string'): c = self.col if c is not None: @@ -1290,7 +1295,7 @@ def __init__(self, values=None, kind=None, typ=None, cname=None, data=None, bloc super(DataCol, self).__init__( values=values, kind=kind, typ=typ, cname=cname, **kwargs) self.dtype = None - self.dtype_attr = u"%s_dtype" % self.name + self.dtype_attr = six.u("%s_dtype") % self.name self.set_data(data) def __unicode__(self): @@ -1319,15 +1324,15 @@ def set_kind(self): # set my kind if we can if self.dtype is not None: dtype = _ensure_decoded(self.dtype) - if dtype.startswith(u'string') or dtype.startswith(u'bytes'): + if dtype.startswith(six.u('string')) or dtype.startswith(six.u('bytes')): self.kind = 'string' - elif dtype.startswith(u'float'): + elif dtype.startswith(six.u('float')): self.kind = 'float' - elif dtype.startswith(u'int') or dtype.startswith(u'uint'): + elif dtype.startswith(six.u('int')) or dtype.startswith(six.u('uint')): self.kind = 'integer' - elif dtype.startswith(u'date'): + elif dtype.startswith(six.u('date')): self.kind = 'datetime' - elif dtype.startswith(u'bool'): + elif dtype.startswith(six.u('bool')): self.kind = 'bool' else: raise AssertionError("cannot interpret dtype of [%s] in [%s]" % (dtype,self)) @@ -1501,7 +1506,7 @@ def convert(self, values, nan_rep, encoding): dtype = _ensure_decoded(self.dtype) # reverse converts - if dtype == u'datetime64': + if dtype == six.u('datetime64'): # recreate the timezone if self.tz is not None: @@ -1514,10 +1519,10 @@ def convert(self, values, nan_rep, encoding): else: self.data = np.asarray(self.data, dtype='M8[ns]') - elif dtype == u'date': + elif dtype == six.u('date'): self.data = np.array( [date.fromtimestamp(v) for v in self.data], dtype=object) - elif dtype == u'datetime': + elif dtype == six.u('datetime'): self.data = np.array( [datetime.fromtimestamp(v) for v in self.data], dtype=object) @@ -1529,7 +1534,7 @@ def convert(self, values, nan_rep, encoding): self.data = self.data.astype('O') # convert nans / decode - if _ensure_decoded(self.kind) == u'string': + if _ensure_decoded(self.kind) == six.u('string'): self.data = _unconvert_string_array(self.data, nan_rep=nan_rep, encoding=encoding) return self @@ -1553,7 +1558,7 @@ class DataIndexableCol(DataCol): @property def is_searchable(self): - return _ensure_decoded(self.kind) == u'string' + return _ensure_decoded(self.kind) == six.u('string') def get_atom_string(self, block, itemsize): return _tables().StringCol(itemsize=itemsize) @@ -1790,7 +1795,7 @@ def read_array(self, key): else: ret = data - if dtype == u'datetime64': + if dtype == six.u('datetime64'): ret = np.array(ret, dtype='M8[ns]') if transposed: @@ -1801,13 +1806,13 @@ def read_array(self, key): def read_index(self, key): variety = _ensure_decoded(getattr(self.attrs, '%s_variety' % key)) - if variety == u'multi': + if variety == six.u('multi'): return self.read_multi_index(key) - elif variety == u'block': + elif variety == six.u('block'): return self.read_block_index(key) - elif variety == u'sparseint': + elif variety == six.u('sparseint'): return self.read_sparse_intindex(key) - elif variety == u'regular': + elif variety == six.u('regular'): _, index = self.read_index_node(getattr(self.group, key)) return index else: # pragma: no cover @@ -1916,13 +1921,13 @@ def read_index_node(self, node): factory = self._get_index_factory(index_class) kwargs = {} - if u'freq' in node._v_attrs: + if six.u('freq') in node._v_attrs: kwargs['freq'] = node._v_attrs['freq'] - if u'tz' in node._v_attrs: + if six.u('tz') in node._v_attrs: kwargs['tz'] = node._v_attrs['tz'] - if kind in (u'date', u'datetime'): + if kind in (six.u('date'), six.u('datetime')): index = factory(_unconvert_index(data, kind, encoding=self.encoding), dtype=object, **kwargs) else: @@ -2031,7 +2036,7 @@ def read(self, **kwargs): return DataFrame(values, index=index, columns=columns) class SeriesStorer(GenericStorer): - pandas_kind = u'series' + pandas_kind = six.u('series') attributes = ['name'] @property @@ -2058,7 +2063,7 @@ def write(self, obj, **kwargs): self.attrs.name = obj.name class SparseSeriesStorer(GenericStorer): - pandas_kind = u'sparse_series' + pandas_kind = six.u('sparse_series') attributes = ['name','fill_value','kind'] def read(self, **kwargs): @@ -2067,7 +2072,7 @@ def read(self, **kwargs): sp_values = self.read_array('sp_values') sp_index = self.read_index('sp_index') return SparseSeries(sp_values, index=index, sparse_index=sp_index, - kind=self.kind or u'block', fill_value=self.fill_value, + kind=self.kind or six.u('block'), fill_value=self.fill_value, name=self.name) def write(self, obj, **kwargs): @@ -2080,7 +2085,7 @@ def write(self, obj, **kwargs): self.attrs.kind = obj.kind class SparseFrameStorer(GenericStorer): - pandas_kind = u'sparse_frame' + pandas_kind = six.u('sparse_frame') attributes = ['default_kind','default_fill_value'] def read(self, **kwargs): @@ -2112,7 +2117,7 @@ def write(self, obj, **kwargs): self.write_index('columns', obj.columns) class SparsePanelStorer(GenericStorer): - pandas_kind = u'sparse_panel' + pandas_kind = six.u('sparse_panel') attributes = ['default_kind','default_fill_value'] def read(self, **kwargs): @@ -2135,7 +2140,7 @@ def write(self, obj, **kwargs): self.attrs.default_kind = obj.default_kind self.write_index('items', obj.items) - for name, sdf in obj.iterkv(): + for name, sdf in obj.iteritems(): key = 'sparse_frame_%s' % name if key not in self.group._v_children: node = self._handle.createGroup(self.group, key) @@ -2183,7 +2188,7 @@ def read(self, **kwargs): self.validate_read(kwargs) axes = [] - for i in xrange(self.ndim): + for i in range(self.ndim): ax = self.read_index('axis%d' % i) axes.append(ax) @@ -2216,11 +2221,11 @@ def write(self, obj, **kwargs): self.write_index('block%d_items' % i, blk.items) class FrameStorer(BlockManagerStorer): - pandas_kind = u'frame' + pandas_kind = six.u('frame') obj_type = DataFrame class PanelStorer(BlockManagerStorer): - pandas_kind = u'wide' + pandas_kind = six.u('wide') obj_type = Panel is_shape_reversed = True @@ -2245,7 +2250,7 @@ class Table(Storer): levels : the names of levels """ - pandas_kind = u'wide_table' + pandas_kind = six.u('wide_table') table_type = None levels = 1 is_table = True @@ -2319,7 +2324,7 @@ def nrows_expected(self): @property def is_exists(self): """ has this table been created """ - return u'table' in self.group + return six.u('table') in self.group @property def storable(self): @@ -2713,9 +2718,9 @@ def create_axes(self, axes, obj, validate=True, nan_rep=None, data_columns=None, col.set_pos(j) self.values_axes.append(col) - except (NotImplementedError, ValueError, TypeError), e: + except (NotImplementedError, ValueError, TypeError) as e: raise e - except (Exception), detail: + except (Exception) as detail: raise Exception("cannot find the correct atom type -> [dtype->%s,items->%s] %s" % (b.dtype.name, b.items, str(detail))) j += 1 @@ -2838,7 +2843,7 @@ class WORMTable(Table): table. writing is a one-time operation the data are stored in a format that allows for searching the data on disk """ - table_type = u'worm' + table_type = six.u('worm') def read(self, **kwargs): """ read the indicies and the indexing array, calculate offset rows and @@ -2863,7 +2868,7 @@ class LegacyTable(Table): IndexCol(name='column', axis=2, pos=1, index_kind='columns_kind'), DataCol(name='fields', cname='values', kind_attr='fields', pos=2)] - table_type = u'legacy' + table_type = six.u('legacy') ndim = 3 def write(self, **kwargs): @@ -2953,8 +2958,8 @@ def read(self, where=None, columns=None, **kwargs): class LegacyFrameTable(LegacyTable): """ support the legacy frame table """ - pandas_kind = u'frame_table' - table_type = u'legacy_frame' + pandas_kind = six.u('frame_table') + table_type = six.u('legacy_frame') obj_type = Panel def read(self, *args, **kwargs): @@ -2963,14 +2968,14 @@ def read(self, *args, **kwargs): class LegacyPanelTable(LegacyTable): """ support the legacy panel table """ - table_type = u'legacy_panel' + table_type = six.u('legacy_panel') obj_type = Panel class AppendableTable(LegacyTable): """ suppor the new appendable table formats """ _indexables = None - table_type = u'appendable' + table_type = six.u('appendable') def write(self, obj, axes=None, append=False, complib=None, complevel=None, fletcher32=None, min_itemsize=None, chunksize=None, @@ -3043,7 +3048,7 @@ def write_data(self, chunksize): rows = self.nrows_expected chunks = int(rows / chunksize) + 1 - for i in xrange(chunks): + for i in range(chunks): start_i = i * chunksize end_i = min((i + 1) * chunksize, rows) if start_i >= end_i: @@ -3068,14 +3073,14 @@ def write_data_chunk(self, indexes, mask, search, values): args = list(indexes) args.extend([self.dtype, mask, search, values]) rows = func(*args) - except (Exception), detail: + except Exception as detail: raise Exception("cannot create row-data -> %s" % str(detail)) try: if len(rows): self.table.append(rows) self.table.flush() - except (Exception), detail: + except Exception as detail: raise Exception("tables cannot write this data -> %s" % str(detail)) def delete(self, where=None, **kwargs): @@ -3120,7 +3125,7 @@ def delete(self, where=None, **kwargs): # we must remove in reverse order! pg = groups.pop() for g in reversed(groups): - rows = l.take(range(g, pg)) + rows = l.take(list(range(g, pg))) table.removeRows(start=rows[rows.index[0] ], stop=rows[rows.index[-1]] + 1) pg = g @@ -3133,8 +3138,8 @@ def delete(self, where=None, **kwargs): class AppendableFrameTable(AppendableTable): """ suppor the new appendable table formats """ - pandas_kind = u'frame_table' - table_type = u'appendable_frame' + pandas_kind = six.u('frame_table') + table_type = six.u('appendable_frame') ndim = 2 obj_type = DataFrame @@ -3188,8 +3193,8 @@ def read(self, where=None, columns=None, **kwargs): class GenericTable(AppendableFrameTable): """ a table that read/writes the generic pytables table format """ - pandas_kind = u'frame_table' - table_type = u'generic_table' + pandas_kind = six.u('frame_table') + table_type = six.u('generic_table') ndim = 2 obj_type = DataFrame @@ -3233,13 +3238,13 @@ def write(self, **kwargs): class AppendableMultiFrameTable(AppendableFrameTable): """ a frame with a multi-index """ - table_type = u'appendable_multiframe' + table_type = six.u('appendable_multiframe') obj_type = DataFrame ndim = 2 @property def table_type_short(self): - return u'appendable_multi' + return six.u('appendable_multi') def write(self, obj, data_columns=None, **kwargs): if data_columns is None: @@ -3264,7 +3269,7 @@ def read(self, columns=None, **kwargs): class AppendablePanelTable(AppendableTable): """ suppor the new appendable table formats """ - table_type = u'appendable_panel' + table_type = six.u('appendable_panel') ndim = 3 obj_type = Panel @@ -3281,7 +3286,7 @@ def is_transposed(self): class AppendableNDimTable(AppendablePanelTable): """ suppor the new appendable table formats """ - table_type = u'appendable_ndim' + table_type = six.u('appendable_ndim') ndim = 4 obj_type = Panel4D @@ -3349,18 +3354,18 @@ def _convert_index(index, encoding=None): def _unconvert_index(data, kind, encoding=None): kind = _ensure_decoded(kind) - if kind == u'datetime64': + if kind == six.u('datetime64'): index = DatetimeIndex(data) - elif kind == u'datetime': + elif kind == six.u('datetime'): index = np.array([datetime.fromtimestamp(v) for v in data], dtype=object) - elif kind == u'date': + elif kind == six.u('date'): index = np.array([date.fromtimestamp(v) for v in data], dtype=object) - elif kind in (u'integer', u'float'): + elif kind in (six.u('integer'), six.u('float')): index = np.array(data) - elif kind in (u'string'): + elif kind in (six.u('string')): index = _unconvert_string_array(data, nan_rep=None, encoding=encoding) - elif kind == u'object': + elif kind == six.u('object'): index = np.array(data[0]) else: # pragma: no cover raise ValueError('unrecognized index type %s' % kind) @@ -3368,11 +3373,11 @@ def _unconvert_index(data, kind, encoding=None): def _unconvert_index_legacy(data, kind, legacy=False, encoding=None): kind = _ensure_decoded(kind) - if kind == u'datetime': + if kind == six.u('datetime'): index = lib.time64_to_datetime(data) - elif kind in (u'integer'): + elif kind in (six.u('integer')): index = np.array(data, dtype=object) - elif kind in (u'string'): + elif kind in (six.u('string')): index = _unconvert_string_array(data, nan_rep=None, encoding=encoding) else: # pragma: no cover raise ValueError('unrecognized index type %s' % kind) @@ -3430,7 +3435,7 @@ def _get_converter(kind, encoding): def _need_convert(kind): kind = _ensure_decoded(kind) - if kind in (u'datetime', u'datetime64', u'string'): + if kind in (six.u('datetime'), six.u('datetime64'), six.u('string')): return True return False @@ -3496,7 +3501,7 @@ def __init__(self, field, op=None, value=None, queryables=None, encoding=None): self.value = field.value # a string expression (or just the field) - elif isinstance(field, basestring): + elif isinstance(field, six.string_types): # is a term is passed s = self._search.match(field) @@ -3509,7 +3514,7 @@ def __init__(self, field, op=None, value=None, queryables=None, encoding=None): self.field = field # is an op passed? - if isinstance(op, basestring) and op in self._ops: + if isinstance(op, six.string_types) and op in self._ops: self.op = op self.value = value else: @@ -3530,7 +3535,7 @@ def __init__(self, field, op=None, value=None, queryables=None, encoding=None): # we have valid conditions if self.op in ['>', '>=', '<', '<=']: - if hasattr(self.value, '__iter__') and len(self.value) > 1 and not isinstance(self.value,basestring): + if hasattr(self.value, '__iter__') and len(self.value) > 1 and not isinstance(self.value,six.string_types): raise ValueError("an inequality condition cannot have multiple values [%s]" % str(self)) if not is_list_like(self.value): @@ -3540,7 +3545,7 @@ def __init__(self, field, op=None, value=None, queryables=None, encoding=None): self.eval() def __unicode__(self): - attrs = map(pprint_thing, (self.field, self.op, self.value)) + attrs = list(map(pprint_thing, (self.field, self.op, self.value))) return "field->%s,op->%s,value->%s" % tuple(attrs) @property @@ -3620,32 +3625,36 @@ def stringify(value): return value kind = _ensure_decoded(self.kind) - if kind == u'datetime64' or kind == u'datetime' : + if kind == six.u('datetime64') or kind == six.u('datetime'): v = lib.Timestamp(v) if v.tz is not None: v = v.tz_convert('UTC') return TermValue(v,v.value,kind) - elif isinstance(v, datetime) or hasattr(v, 'timetuple') or kind == u'date': + elif (isinstance(v, datetime) or hasattr(v, 'timetuple') + or kind == six.u('date')): v = time.mktime(v.timetuple()) return TermValue(v,Timestamp(v),kind) - elif kind == u'integer': + elif kind == six.u('integer'): v = int(float(v)) return TermValue(v,v,kind) - elif kind == u'float': + elif kind == six.u('float'): v = float(v) return TermValue(v,v,kind) - elif kind == u'bool': - if isinstance(v, basestring): - v = not v.strip().lower() in [u'false', u'f', u'no', u'n', u'none', u'0', u'[]', u'{}', u''] + elif kind == six.u('bool'): + if isinstance(v, six.string_types): + poss_vals = [six.u('false'), six.u('f'), six.u('no'), + six.u('n'), six.u('none'), six.u('0'), + six.u('[]'), six.u('{}'), six.u('')] + v = not v.strip().lower() in poss_vals else: v = bool(v) return TermValue(v,v,kind) - elif not isinstance(v, basestring): + elif not isinstance(v, six.string_types): v = stringify(v) - return TermValue(v,stringify(v),u'string') + return TermValue(v,stringify(v),six.u('string')) # string quoting - return TermValue(v,stringify(v),u'string') + return TermValue(v,stringify(v),six.u('string')) class TermValue(object): """ hold a term value the we use to construct a condition/filter """ @@ -3658,7 +3667,7 @@ def __init__(self, value, converted, kind): def tostring(self, encoding): """ quote the string if not encoded else encode and return """ - if self.kind == u'string': + if self.kind == six.u('string'): if encoding is not None: return self.converted return '"%s"' % self.converted @@ -3733,7 +3742,7 @@ def generate(self, where): # operands inside any terms if not any([isinstance(w, (list, tuple, Term)) for w in where]): - if not any([isinstance(w, basestring) and Term._search.match(w) for w in where]): + if not any([isinstance(w, six.string_types) and Term._search.match(w) for w in where]): where = [where] queryables = self.table.queryables() diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 11b139b620175..16ccafcd14dd9 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -2,13 +2,17 @@ Collection of query wrappers / abstractions to both facilitate data retrieval and to reduce dependency on DB-specific API. """ +from __future__ import print_function from datetime import datetime, date +from pandas.util.py3compat import range import numpy as np import traceback from pandas.core.datetools import format as date_format from pandas.core.api import DataFrame, isnull +from six.moves import map, zip +import six #------------------------------------------------------------------------------ # Helper execution function @@ -51,7 +55,7 @@ def execute(sql, con, retry=True, cur=None, params=None): except Exception: # pragma: no cover pass - print ('Error on sql %s' % sql) + print('Error on sql %s' % sql) raise @@ -61,7 +65,7 @@ def _safe_fetch(cur): if not isinstance(result, list): result = list(result) return result - except Exception, e: # pragma: no cover + except Exception as e: # pragma: no cover excName = e.__class__.__name__ if excName == 'OperationalError': return [] @@ -91,7 +95,7 @@ def tquery(sql, con=None, cur=None, retry=True): try: cur.close() con.commit() - except Exception, e: + except Exception as e: excName = e.__class__.__name__ if excName == 'OperationalError': # pragma: no cover print ('Failed to commit, may need to restart interpreter') @@ -121,7 +125,7 @@ def uquery(sql, con=None, cur=None, retry=True, params=None): result = cur.rowcount try: con.commit() - except Exception, e: + except Exception as e: excName = e.__class__.__name__ if excName != 'OperationalError': raise @@ -198,7 +202,7 @@ def write_frame(frame, name, con, flavor='sqlite', if_exists='fail', **kwargs): if_exists='fail' exists = table_exists(name, con, flavor) if if_exists == 'fail' and exists: - raise ValueError, "Table '%s' already exists." % name + raise ValueError("Table '%s' already exists." % name) #create or drop-recreate if necessary create = None @@ -289,7 +293,7 @@ def get_schema(frame, name, flavor, keys=None): lookup_type = lambda dtype: get_sqltype(dtype.type, flavor) # Replace spaces in DataFrame column names with _. safe_columns = [s.replace(' ', '_').strip() for s in frame.dtypes.index] - column_types = zip(safe_columns, map(lookup_type, frame.dtypes)) + column_types = list(zip(safe_columns, map(lookup_type, frame.dtypes))) if flavor == 'sqlite': columns = ',\n '.join('[%s] %s' % x for x in column_types) else: @@ -297,7 +301,7 @@ def get_schema(frame, name, flavor, keys=None): keystr = '' if keys is not None: - if isinstance(keys, basestring): + if isinstance(keys, six.string_types): keys = (keys,) keystr = ', PRIMARY KEY (%s)' % ','.join(keys) template = """CREATE TABLE %(name)s ( diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 9257338cd4913..50b3d63cb459d 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -9,8 +9,9 @@ You can find more information on http://presbrey.mit.edu/PyDTA and http://statsmodels.sourceforge.net/devel/ """ - +# TODO: Fix this module so it can use cross-compatible zip, map, and range from StringIO import StringIO +from pandas.util import compat import numpy as np import sys @@ -21,6 +22,7 @@ from pandas.core.categorical import Categorical import datetime from pandas.util import py3compat +from pandas.util.py3compat import long from pandas import isnull from pandas.io.parsers import _parser_params, Appender from pandas.io.common import get_filepath_or_buffer @@ -225,7 +227,7 @@ def __init__(self, encoding): # we're going to drop the label and cast to int self.DTYPE_MAP = \ dict( - zip(range(1, 245), ['a' + str(i) for i in range(1, 245)]) + + list(zip(range(1, 245), ['a' + str(i) for i in range(1, 245)])) + [ (251, np.int16), (252, np.int32), @@ -234,7 +236,7 @@ def __init__(self, encoding): (255, np.float64) ] ) - self.TYPE_MAP = range(251) + list('bhlfd') + self.TYPE_MAP = list(range(251)) + list('bhlfd') #NOTE: technically, some of these are wrong. there are more numbers # that can be represented. it's the 27 ABOVE and BELOW the max listed # numeric data type in [U] 12.2.2 of the 11.2 manual @@ -384,7 +386,7 @@ def _calcsize(self, fmt): def _col_size(self, k=None): """Calculate size of a data record.""" if len(self.col_sizes) == 0: - self.col_sizes = map(lambda x: self._calcsize(x), self.typlist) + self.col_sizes = list(map(lambda x: self._calcsize(x), self.typlist)) if k is None: return self.col_sizes else: @@ -427,9 +429,9 @@ def _next(self): data[i] = self._unpack(typlist[i], self.path_or_buf.read(self._col_size(i))) return data else: - return map(lambda i: self._unpack(typlist[i], + return list(map(lambda i: self._unpack(typlist[i], self.path_or_buf.read(self._col_size(i))), - range(self.nvar)) + range(self.nvar))) def _dataset(self): """ diff --git a/pandas/io/tests/generate_legacy_pickles.py b/pandas/io/tests/generate_legacy_pickles.py index 1838e0907233c..49a7b90b2e1e8 100644 --- a/pandas/io/tests/generate_legacy_pickles.py +++ b/pandas/io/tests/generate_legacy_pickles.py @@ -1,4 +1,6 @@ """ self-contained to write legacy pickle files """ +from __future__ import print_function +from six.moves import zip def _create_sp_series(): @@ -28,13 +30,13 @@ def _create_sp_frame(): 'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], 'C': np.arange(10), 'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]} - + dates = bdate_range('1/1/2011', periods=10) return SparseDataFrame(data, index=dates) def create_data(): """ create the pickle data """ - + import numpy as np import pandas from pandas import (Series,DataFrame,Panel, @@ -50,29 +52,29 @@ def create_data(): 'D': date_range('1/1/2009', periods=5), 'E' : [0., 1, Timestamp('20100101'),'foo',2.], } - - index = dict(int = Index(np.arange(10)), - date = date_range('20130101',periods=10)) - mi = dict(reg = MultiIndex.from_tuples(zip([['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], - ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]), + + index = dict(int = Index(np.arange(10)), + date = date_range('20130101',periods=10)) + mi = dict(reg = MultiIndex.from_tuples(list(zip([['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], + ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']])), names=['first', 'second'])) series = dict(float = Series(data['A']), - int = Series(data['B']), + int = Series(data['B']), mixed = Series(data['E'])) - frame = dict(float = DataFrame(dict(A = series['float'], B = series['float'] + 1)), - int = DataFrame(dict(A = series['int'] , B = series['int'] + 1)), + frame = dict(float = DataFrame(dict(A = series['float'], B = series['float'] + 1)), + int = DataFrame(dict(A = series['int'] , B = series['int'] + 1)), mixed = DataFrame(dict([ (k,data[k]) for k in ['A','B','C','D']]))) - panel = dict(float = Panel(dict(ItemA = frame['float'], ItemB = frame['float']+1))) + panel = dict(float = Panel(dict(ItemA = frame['float'], ItemB = frame['float']+1))) + - - return dict( series = series, - frame = frame, - panel = panel, - index = index, - mi = mi, + return dict( series = series, + frame = frame, + panel = panel, + index = index, + mi = mi, sp_series = dict(float = _create_sp_series()), - sp_frame = dict(float = _create_sp_frame()) + sp_frame = dict(float = _create_sp_frame()) ) def write_legacy_pickles(): @@ -92,9 +94,9 @@ def write_legacy_pickles(): base_dir, _ = os.path.split(os.path.abspath(__file__)) base_dir = os.path.join(base_dir,'data/legacy_pickle') - + # could make this a parameter? - version = None + version = None if version is None: @@ -108,11 +110,11 @@ def write_legacy_pickles(): # construct a reasonable platform name f = '_'.join([ str(pl.machine()), str(pl.system().lower()), str(pl.python_version()) ]) pth = os.path.abspath(os.path.join(pth,'%s.pickle' % f)) - + fh = open(pth,'wb') pickle.dump(create_data(),fh,pickle.HIGHEST_PROTOCOL) fh.close() - + print("created pickle file: %s" % pth) if __name__ == '__main__': diff --git a/pandas/io/tests/test_cparser.py b/pandas/io/tests/test_cparser.py index 7fa8d06f48ea3..2063b34c95e57 100644 --- a/pandas/io/tests/test_cparser.py +++ b/pandas/io/tests/test_cparser.py @@ -4,6 +4,7 @@ from pandas.util.py3compat import StringIO, BytesIO from datetime import datetime +from pandas.util import compat import csv import os import sys @@ -29,6 +30,8 @@ from pandas.parser import TextReader import pandas.parser as parser +import six +from six.moves import map class TestCParser(unittest.TestCase): @@ -325,7 +328,7 @@ def test_empty_field_eof(self): def assert_array_dicts_equal(left, right): - for k, v in left.iteritems(): + for k, v in compat.iteritems(left): assert(np.array_equal(v, right[k])) if __name__ == '__main__': diff --git a/pandas/io/tests/test_data.py b/pandas/io/tests/test_data.py index e760ddff518f5..a6ccc56fb6050 100644 --- a/pandas/io/tests/test_data.py +++ b/pandas/io/tests/test_data.py @@ -1,3 +1,5 @@ +from __future__ import print_function +from pandas.util import compat import unittest import warnings import nose @@ -12,11 +14,12 @@ from pandas.util.testing import (assert_series_equal, assert_produces_warning, network, assert_frame_equal) from numpy.testing import assert_array_equal +import six def assert_n_failed_equals_n_null_columns(wngs, obj, cls=SymbolWarning): all_nan_cols = pd.Series(dict((k, pd.isnull(v).all()) for k, v in - obj.iteritems())) + compat.iteritems(obj))) n_all_nan_cols = all_nan_cols.sum() valid_warnings = pd.Series([wng for wng in wngs if isinstance(wng, cls)]) assert_equal(len(valid_warnings), n_all_nan_cols) @@ -33,7 +36,7 @@ def test_google(self): # an exception when DataReader can't get a 200 response from # google start = datetime(2010, 1, 1) - end = datetime(2013, 01, 27) + end = datetime(2013, 1, 27) self.assertEquals( web.DataReader("F", 'google', start, end)['Close'][-1], @@ -97,7 +100,7 @@ def test_yahoo(self): # an exception when DataReader can't get a 200 response from # yahoo start = datetime(2010, 1, 1) - end = datetime(2013, 01, 27) + end = datetime(2013, 1, 27) self.assertEquals( web.DataReader("F", 'yahoo', start, end)['Close'][-1], 13.68) @@ -105,7 +108,7 @@ def test_yahoo(self): @network def test_yahoo_fails(self): start = datetime(2010, 1, 1) - end = datetime(2013, 01, 27) + end = datetime(2013, 1, 27) self.assertRaises(Exception, web.DataReader, "NON EXISTENT TICKER", 'yahoo', start, end) @@ -363,7 +366,7 @@ def test_fred(self): FRED. """ start = datetime(2010, 1, 1) - end = datetime(2013, 01, 27) + end = datetime(2013, 1, 27) self.assertEquals( web.DataReader("GDP", "fred", start, end)['GDP'].tail(1), @@ -375,14 +378,14 @@ def test_fred(self): @network def test_fred_nan(self): start = datetime(2010, 1, 1) - end = datetime(2013, 01, 27) + end = datetime(2013, 1, 27) df = web.DataReader("DFII5", "fred", start, end) assert pd.isnull(df.ix['2010-01-01']) @network def test_fred_parts(self): start = datetime(2010, 1, 1) - end = datetime(2013, 01, 27) + end = datetime(2013, 1, 27) df = web.get_data_fred("CPIAUCSL", start, end) self.assertEqual(df.ix['2010-05-01'], 217.23) diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index ebbb7292cb3d7..251a32cc3a933 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -3,6 +3,7 @@ from pandas.util.py3compat import StringIO, BytesIO, PY3 from datetime import datetime from os.path import split as psplit +from pandas.util.py3compat import range import csv import os import sys @@ -35,6 +36,8 @@ from numpy.testing.decorators import slow from pandas.parser import OverflowError +import six +from six.moves import map def _skip_if_no_xlrd(): try: @@ -707,7 +710,7 @@ def test_to_excel_unicode_filename(self): _skip_if_no_excelsuite() for ext in ['xls', 'xlsx']: - filename = u'\u0192u.' + ext + filename = six.u('\u0192u.') + ext try: f = open(filename, 'wb') @@ -769,7 +772,7 @@ def test_to_excel_styleconverter(self): # def test_to_excel_header_styling_xls(self): # import StringIO - # s = StringIO.StringIO( + # s = StringIO( # """Date,ticker,type,value # 2001-01-01,x,close,12.2 # 2001-01-01,x,open ,12.1 @@ -816,7 +819,7 @@ def test_to_excel_styleconverter(self): # os.remove(filename) # def test_to_excel_header_styling_xlsx(self): # import StringIO - # s = StringIO.StringIO( + # s = StringIO( # """Date,ticker,type,value # 2001-01-01,x,close,12.2 # 2001-01-01,x,open ,12.1 diff --git a/pandas/io/tests/test_ga.py b/pandas/io/tests/test_ga.py index d2061a6d0b57a..e33b75c569fef 100644 --- a/pandas/io/tests/test_ga.py +++ b/pandas/io/tests/test_ga.py @@ -82,8 +82,8 @@ def test_iterator(self): dimensions='date', max_results=10, chunksize=5) - df1 = it.next() - df2 = it.next() + df1 = next(it) + df2 = next(it) for df in [df1, df2]: assert isinstance(df, DataFrame) diff --git a/pandas/io/tests/test_html.py b/pandas/io/tests/test_html.py index 1d0c2a13302af..2f7c6092d9bcf 100644 --- a/pandas/io/tests/test_html.py +++ b/pandas/io/tests/test_html.py @@ -1,8 +1,10 @@ +from __future__ import print_function import os import re -from cStringIO import StringIO +from pandas.util.py3compat import StringIO from unittest import TestCase import warnings +import six from distutils.version import LooseVersion import urllib2 @@ -12,6 +14,7 @@ import numpy as np from numpy.random import rand from numpy.testing.decorators import slow +from six.moves import map, zip try: from importlib import import_module @@ -42,7 +45,7 @@ def _skip_if_no(module_name): def _skip_if_none_of(module_names): - if isinstance(module_names, basestring): + if isinstance(module_names, six.string_types): _skip_if_no(module_names) if module_names == 'bs4': import bs4 @@ -112,8 +115,8 @@ def test_to_html_compat(self): out = df.to_html() res = self.run_read_html(out, attrs={'class': 'dataframe'}, index_col=0)[0] - print (df.dtypes) - print (res.dtypes) + print(df.dtypes) + print(res.dtypes) assert_frame_equal(res, df) @network @@ -149,7 +152,7 @@ def test_spam(self): df2 = self.run_read_html(self.spam_data, 'Unit', infer_types=False) assert_framelist_equal(df1, df2) - print (df1[0]) + print(df1[0]) self.assertEqual(df1[0].ix[0, 0], 'Proximates') self.assertEqual(df1[0].columns[0], 'Nutrient') @@ -178,7 +181,7 @@ def test_skiprows_int(self): def test_skiprows_xrange(self): df1 = [self.run_read_html(self.spam_data, '.*Water.*').pop()[2:]] - df2 = self.run_read_html(self.spam_data, 'Unit', skiprows=xrange(2)) + df2 = self.run_read_html(self.spam_data, 'Unit', skiprows=range(2)) assert_framelist_equal(df1, df2) diff --git a/pandas/io/tests/test_json/test_pandas.py b/pandas/io/tests/test_json/test_pandas.py index 21fae9a50c7dd..2aaffe40474f1 100644 --- a/pandas/io/tests/test_json/test_pandas.py +++ b/pandas/io/tests/test_json/test_pandas.py @@ -2,7 +2,9 @@ # pylint: disable-msg=W0612,E1101 from copy import deepcopy from datetime import datetime, timedelta -from StringIO import StringIO +from pandas.util.py3compat import StringIO +from pandas.util.py3compat import range +from pandas.util import compat import cPickle as pickle import operator import os @@ -91,7 +93,7 @@ def _check_orient(df, orient, dtype=None, numpy=False, convert_axes=True, check_ try: unser = read_json(dfjson, orient=orient, dtype=dtype, numpy=numpy, convert_axes=convert_axes) - except (Exception), detail: + except (Exception) as detail: if raise_ok is not None: if type(detail) == raise_ok: return @@ -320,7 +322,7 @@ def _check_all_orients(series, dtype=None): _check_all_orients(self.ts) # dtype - s = Series(range(6), index=['a','b','c','d','e','f']) + s = Series(list(range(6)), index=['a','b','c','d','e','f']) _check_all_orients(Series(s, dtype=np.float64), dtype=np.float64) _check_all_orients(Series(s, dtype=np.int), dtype=np.int) @@ -340,7 +342,7 @@ def test_frame_from_json_precise_float(self): def test_typ(self): - s = Series(range(6), index=['a','b','c','d','e','f'], dtype='int64') + s = Series(list(range(6)), index=['a','b','c','d','e','f'], dtype='int64') result = read_json(s.to_json(),typ=None) assert_series_equal(result,s) @@ -439,7 +441,7 @@ def test_weird_nested_json(self): def test_doc_example(self): dfj2 = DataFrame(np.random.randn(5, 2), columns=list('AB')) dfj2['date'] = Timestamp('20130101') - dfj2['ints'] = range(5) + dfj2['ints'] = list(range(5)) dfj2['bools'] = True dfj2.index = pd.date_range('20130101',periods=5) diff --git a/pandas/io/tests/test_json/test_ujson.py b/pandas/io/tests/test_json/test_ujson.py index 86aeecf169b28..a8f6ddffe8e68 100644 --- a/pandas/io/tests/test_json/test_ujson.py +++ b/pandas/io/tests/test_json/test_ujson.py @@ -1,7 +1,6 @@ import unittest from unittest import TestCase -import pandas.json as ujson try: import json except ImportError: @@ -13,11 +12,15 @@ import time import datetime import calendar -import StringIO import re import random import decimal from functools import partial +from pandas.util.py3compat import range, StringIO +from pandas.util import compat +import pandas.json as ujson +import six +from six.moves import zip import pandas.util.py3compat as py3compat import numpy as np @@ -69,7 +72,7 @@ def helper(expected_output, **encode_kwargs): helper(html_encoded, ensure_ascii=False, encode_html_chars=True) def test_doubleLongIssue(self): - sut = {u'a': -4342969734183514} + sut = {six.u('a'): -4342969734183514} encoded = json.dumps(sut) decoded = json.loads(encoded) self.assertEqual(sut, decoded) @@ -78,7 +81,7 @@ def test_doubleLongIssue(self): self.assertEqual(sut, decoded) def test_doubleLongDecimalIssue(self): - sut = {u'a': -12345678901234.56789012} + sut = {six.u('a'): -12345678901234.56789012} encoded = json.dumps(sut) decoded = json.loads(encoded) self.assertEqual(sut, decoded) @@ -88,12 +91,12 @@ def test_doubleLongDecimalIssue(self): def test_encodeDecodeLongDecimal(self): - sut = {u'a': -528656961.4399388} + sut = {six.u('a'): -528656961.4399388} encoded = ujson.dumps(sut, double_precision=15) ujson.decode(encoded) def test_decimalDecodeTestPrecise(self): - sut = {u'a': 4.56} + sut = {six.u('a'): 4.56} encoded = ujson.encode(sut) decoded = ujson.decode(encoded, precise_float=True) self.assertEqual(sut, decoded) @@ -109,10 +112,16 @@ def test_encodeDoubleTinyExponential(self): self.assert_(np.allclose(num, ujson.decode(ujson.encode(num)))) def test_encodeDictWithUnicodeKeys(self): - input = { u"key1": u"value1", u"key1": u"value1", u"key1": u"value1", u"key1": u"value1", u"key1": u"value1", u"key1": u"value1" } + input = {six.u("key1"): six.u("value1"), six.u("key1"): + six.u("value1"), six.u("key1"): six.u("value1"), + six.u("key1"): six.u("value1"), six.u("key1"): + six.u("value1"), six.u("key1"): six.u("value1")} output = ujson.encode(input) - input = { u"بن": u"value1", u"بن": u"value1", u"بن": u"value1", u"بن": u"value1", u"بن": u"value1", u"بن": u"value1", u"بن": u"value1" } + input = {six.u("بن"): six.u("value1"), six.u("بن"): six.u("value1"), + six.u("بن"): six.u("value1"), six.u("بن"): six.u("value1"), + six.u("بن"): six.u("value1"), six.u("بن"): six.u("value1"), + six.u("بن"): six.u("value1")} output = ujson.encode(input) pass @@ -361,7 +370,7 @@ def test_encodeToUTF8(self): self.assertEquals(dec, json.loads(enc)) def test_decodeFromUnicode(self): - input = u"{\"obj\": 31337}" + input = six.u("{\"obj\": 31337}") dec1 = ujson.decode(input) dec2 = ujson.decode(str(input)) self.assertEquals(dec1, dec2) @@ -520,18 +529,18 @@ def test_decodeNullBroken(self): def test_decodeBrokenDictKeyTypeLeakTest(self): input = '{{1337:""}}' - for x in xrange(1000): + for x in range(1000): try: ujson.decode(input) assert False, "Expected exception!" - except(ValueError),e: + except ValueError as e: continue assert False, "Wrong exception" def test_decodeBrokenDictLeakTest(self): input = '{{"key":"}' - for x in xrange(1000): + for x in range(1000): try: ujson.decode(input) assert False, "Expected exception!" @@ -542,7 +551,7 @@ def test_decodeBrokenDictLeakTest(self): def test_decodeBrokenListLeakTest(self): input = '[[[true' - for x in xrange(1000): + for x in range(1000): try: ujson.decode(input) assert False, "Expected exception!" @@ -611,7 +620,7 @@ def test_encodeNullCharacter(self): self.assertEquals(output, json.dumps(input)) self.assertEquals(input, ujson.decode(output)) - self.assertEquals('" \\u0000\\r\\n "', ujson.dumps(u" \u0000\r\n ")) + self.assertEquals('" \\u0000\\r\\n "', ujson.dumps(six.u(" \u0000\r\n "))) pass def test_decodeNullCharacter(self): @@ -678,7 +687,7 @@ def test_decodeNumericIntExpeMinus(self): self.assertAlmostEqual(output, json.loads(input)) def test_dumpToFile(self): - f = StringIO.StringIO() + f = StringIO() ujson.dump([1, 2, 3], f) self.assertEquals("[1,2,3]", f.getvalue()) @@ -701,9 +710,9 @@ def test_dumpFileArgsError(self): assert False, 'expected TypeError' def test_loadFile(self): - f = StringIO.StringIO("[1,2,3,4]") + f = StringIO("[1,2,3,4]") self.assertEquals([1, 2, 3, 4], ujson.load(f)) - f = StringIO.StringIO("[1,2,3,4]") + f = StringIO("[1,2,3,4]") assert_array_equal(np.array([1, 2, 3, 4]), ujson.load(f, numpy=True)) def test_loadFileLikeObject(self): @@ -740,7 +749,7 @@ def test_encodeNumericOverflow(self): assert False, "expected OverflowError" def test_encodeNumericOverflowNested(self): - for n in xrange(0, 100): + for n in range(0, 100): class Nested: x = 12839128391289382193812939 @@ -769,7 +778,7 @@ def test_decodeNumberWith32bitSignBit(self): self.assertEqual(ujson.decode(doc)['id'], result) def test_encodeBigEscape(self): - for x in xrange(10): + for x in range(10): if py3compat.PY3: base = '\u00e5'.encode('utf-8') else: @@ -778,7 +787,7 @@ def test_encodeBigEscape(self): output = ujson.encode(input) def test_decodeBigEscape(self): - for x in xrange(10): + for x in range(10): if py3compat.PY3: base = '\u00e5'.encode('utf-8') else: @@ -788,7 +797,7 @@ def test_decodeBigEscape(self): output = ujson.decode(input) def test_toDict(self): - d = {u"key": 31337} + d = {six.u("key"): 31337} class DictTest: def toDict(self): @@ -1034,7 +1043,7 @@ def testArrayNumpyLabelled(self): output = ujson.loads(ujson.dumps(input), numpy=True, labelled=True) self.assertTrue((np.array([42]) == output[0]).all()) self.assertTrue(output[1] is None) - self.assertTrue((np.array([u'a']) == output[2]).all()) + self.assertTrue((np.array([six.u('a')]) == output[2]).all()) # py3 is non-determinstic on the ordering...... if not py3compat.PY3: @@ -1043,7 +1052,7 @@ def testArrayNumpyLabelled(self): expectedvals = np.array([42, 31, 24, 99, 2.4, 78], dtype=int).reshape((3,2)) self.assertTrue((expectedvals == output[0]).all()) self.assertTrue(output[1] is None) - self.assertTrue((np.array([u'a', 'b']) == output[2]).all()) + self.assertTrue((np.array([six.u('a'), 'b']) == output[2]).all()) input = {1: {'a': 42, 'b':31}, 2: {'a': 24, 'c': 99}, 3: {'a': 2.4, 'b': 78}} @@ -1331,7 +1340,7 @@ def test_decodeTooBigValue(self): try: input = "9223372036854775808" ujson.decode(input) - except ValueError, e: + except ValueError as e: pass else: assert False, "expected ValueError" @@ -1340,7 +1349,7 @@ def test_decodeTooSmallValue(self): try: input = "-90223372036854775809" ujson.decode(input) - except ValueError,e: + except ValueError as e: pass else: assert False, "expected ValueError" @@ -1418,7 +1427,7 @@ def test_decodeFloatingPointAdditionalTests(self): def test_encodeBigSet(self): s = set() - for x in xrange(0, 100000): + for x in range(0, 100000): s.add(x) ujson.encode(s) diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py index b88b1ab776ab4..198de5d0fb212 100644 --- a/pandas/io/tests/test_parsers.py +++ b/pandas/io/tests/test_parsers.py @@ -2,6 +2,7 @@ from pandas.util.py3compat import StringIO, BytesIO, PY3 from datetime import datetime +from pandas.util.py3compat import range, long import csv import os import sys @@ -36,6 +37,8 @@ from numpy.testing.decorators import slow from pandas.parser import OverflowError +import six +from six.moves import map class ParserTests(object): @@ -110,10 +113,10 @@ def test_empty_string(self): def test_read_csv(self): if not py3compat.PY3: if 'win' in sys.platform: - prefix = u"file:///" + prefix = six.u("file:///") else: - prefix = u"file://" - fname = prefix + unicode(self.csv1) + prefix = six.u("file://") + fname = prefix + six.text_type(self.csv1) # it works! df1 = read_csv(fname, index_col=0, parse_dates=True) @@ -181,7 +184,6 @@ def test_inf_parsing(self): df = read_csv(StringIO(data), index_col=0) assert_almost_equal(df['A'].values, expected.values) df = read_csv(StringIO(data), index_col=0, na_filter=False) - print df['A'].values assert_almost_equal(df['A'].values, expected.values) def test_multiple_date_col(self): @@ -316,7 +318,7 @@ def test_multiple_date_cols_with_header(self): KORD,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000""" df = self.read_csv(StringIO(data), parse_dates={'nominal': [1, 2]}) - self.assert_(not isinstance(df.nominal[0], basestring)) + self.assert_(not isinstance(df.nominal[0], six.string_types)) ts_data = """\ ID,date,nominalTime,actualTime,A,B,C,D,E @@ -423,7 +425,7 @@ def test_malformed(self): df = self.read_table( StringIO(data), sep=',', header=1, comment='#') self.assert_(False) - except Exception, inst: + except Exception as inst: self.assert_('Expected 3 fields in line 4, saw 5' in str(inst)) # skip_footer @@ -440,7 +442,7 @@ def test_malformed(self): StringIO(data), sep=',', header=1, comment='#', skip_footer=1) self.assert_(False) - except Exception, inst: + except Exception as inst: self.assert_('Expected 3 fields in line 4, saw 5' in str(inst)) # first chunk @@ -458,7 +460,7 @@ def test_malformed(self): skiprows=[2]) df = it.read(5) self.assert_(False) - except Exception, inst: + except Exception as inst: self.assert_('Expected 3 fields in line 6, saw 5' in str(inst)) # middle chunk @@ -477,7 +479,7 @@ def test_malformed(self): df = it.read(1) it.read(2) self.assert_(False) - except Exception, inst: + except Exception as inst: self.assert_('Expected 3 fields in line 6, saw 5' in str(inst)) # last chunk @@ -496,7 +498,7 @@ def test_malformed(self): df = it.read(1) it.read() self.assert_(False) - except Exception, inst: + except Exception as inst: self.assert_('Expected 3 fields in line 6, saw 5' in str(inst)) def test_passing_dtype(self): @@ -610,7 +612,7 @@ def test_nat_parse(self): # GH 3062 df = DataFrame(dict({ - 'A' : np.asarray(range(10),dtype='float64'), + 'A' : np.asarray(list(range(10)),dtype='float64'), 'B' : pd.Timestamp('20010101') })) df.iloc[3:6,:] = np.nan @@ -640,7 +642,7 @@ def test_skiprows_bug(self): 1/2/2000,4,5,6 1/3/2000,7,8,9 """ - data = self.read_csv(StringIO(text), skiprows=range(6), header=None, + data = self.read_csv(StringIO(text), skiprows=list(range(6)), header=None, index_col=0, parse_dates=True) data2 = self.read_csv(StringIO(text), skiprows=6, header=None, @@ -793,20 +795,20 @@ def test_parse_dates_column_list(self): 15/01/2010;P;P;50;1;14/1/2011 01/05/2010;P;P;50;1;15/1/2011''' - expected = self.read_csv(StringIO(data), sep=";", index_col=range(4)) + expected = self.read_csv(StringIO(data), sep=";", index_col=list(range(4))) lev = expected.index.levels[0] expected.index.levels[0] = lev.to_datetime(dayfirst=True) expected['aux_date'] = to_datetime(expected['aux_date'], dayfirst=True) - expected['aux_date'] = map(Timestamp, expected['aux_date']) + expected['aux_date'] = list(map(Timestamp, expected['aux_date'])) self.assert_(isinstance(expected['aux_date'][0], datetime)) - df = self.read_csv(StringIO(data), sep=";", index_col=range(4), + df = self.read_csv(StringIO(data), sep=";", index_col=list(range(4)), parse_dates=[0, 5], dayfirst=True) tm.assert_frame_equal(df, expected) - df = self.read_csv(StringIO(data), sep=";", index_col=range(4), + df = self.read_csv(StringIO(data), sep=";", index_col=list(range(4)), parse_dates=['date', 'aux_date'], dayfirst=True) tm.assert_frame_equal(df, expected) @@ -829,7 +831,7 @@ def test_no_header(self): self.assert_(np.array_equal(df_pref.columns, ['X0', 'X1', 'X2', 'X3', 'X4'])) - self.assert_(np.array_equal(df.columns, range(5))) + self.assert_(np.array_equal(df.columns, list(range(5)))) self.assert_(np.array_equal(df2.columns, names)) @@ -870,7 +872,7 @@ def test_read_csv_no_index_name(self): tm.assert_frame_equal(df, df2) def test_read_table_unicode(self): - fin = BytesIO(u'\u0141aski, Jan;1'.encode('utf-8')) + fin = BytesIO(six.u('\u0141aski, Jan;1').encode('utf-8')) df1 = read_table(fin, sep=";", encoding="utf-8", header=None) self.assert_(isinstance(df1[0].values[0], unicode)) @@ -1553,23 +1555,23 @@ def test_skipinitialspace(self): sfile = StringIO(s) # it's 33 columns - result = self.read_csv(sfile, names=range(33), na_values=['-9999.0'], + result = self.read_csv(sfile, names=list(range(33)), na_values=['-9999.0'], header=None, skipinitialspace=True) self.assertTrue(pd.isnull(result.ix[0, 29])) def test_utf16_bom_skiprows(self): # #2298 - data = u"""skip this + data = six.u("""skip this skip this too A\tB\tC 1\t2\t3 -4\t5\t6""" +4\t5\t6""") - data2 = u"""skip this + data2 = six.u("""skip this skip this too A,B,C 1,2,3 -4,5,6""" +4,5,6""") path = '__%s__.csv' % tm.rands(10) @@ -1610,7 +1612,7 @@ def test_converters_corner_with_nas(self): if hash(np.int64(-1)) != -2: raise nose.SkipTest - import StringIO + from pandas.util.py3compat import StringIO csv = """id,score,days 1,2,12 2,2-5, @@ -1646,20 +1648,20 @@ def convert_score(x): if not x: return np.nan if x.find('-') > 0: - valmin, valmax = map(int, x.split('-')) + valmin, valmax = list(map(int, x.split('-'))) val = 0.5 * (valmin + valmax) else: val = float(x) return val - fh = StringIO.StringIO(csv) + fh = StringIO(csv) result = self.read_csv(fh, converters={'score': convert_score, 'days': convert_days}, na_values=['', None]) self.assert_(pd.isnull(result['days'][1])) - fh = StringIO.StringIO(csv) + fh = StringIO(csv) result2 = self.read_csv(fh, converters={'score': convert_score, 'days': convert_days_sentinel}, na_values=['', None]) @@ -1672,7 +1674,7 @@ def test_unicode_encoding(self): result = result.set_index(0) got = result[1][1632] - expected = u'\xc1 k\xf6ldum klaka (Cold Fever) (1994)' + expected = six.u('\xc1 k\xf6ldum klaka (Cold Fever) (1994)') self.assertEquals(got, expected) @@ -1800,13 +1802,13 @@ def test_sniff_delimiter(self): sep=None, skiprows=2) tm.assert_frame_equal(data, data3) - text = u"""ignore this + text = six.u("""ignore this ignore this too index|A|B|C foo|1|2|3 bar|4|5|6 baz|7|8|9 -""".encode('utf-8') +""").encode('utf-8') s = BytesIO(text) if py3compat.PY3: @@ -2325,9 +2327,9 @@ def test_parse_ragged_csv(self): data = "1,2\n3,4,5" result = self.read_csv(StringIO(data), header=None, - names=range(50)) + names=list(range(50))) expected = self.read_csv(StringIO(data), header=None, - names=range(3)).reindex(columns=range(50)) + names=list(range(3))).reindex(columns=list(range(50))) tm.assert_frame_equal(result, expected) @@ -2374,9 +2376,11 @@ def test_convert_sql_column_strings(self): assert_same_values_and_dtype(result, expected) def test_convert_sql_column_unicode(self): - arr = np.array([u'1.5', None, u'3', u'4.2'], dtype=object) + arr = np.array([six.u('1.5'), None, six.u('3'), six.u('4.2')], + dtype=object) result = lib.convert_sql_column(arr) - expected = np.array([u'1.5', np.nan, u'3', u'4.2'], dtype=object) + expected = np.array([six.u('1.5'), np.nan, six.u('3'), six.u('4.2')], + dtype=object) assert_same_values_and_dtype(result, expected) def test_convert_sql_column_ints(self): @@ -2394,12 +2398,12 @@ def test_convert_sql_column_ints(self): assert_same_values_and_dtype(result, expected) def test_convert_sql_column_longs(self): - arr = np.array([1L, 2L, 3L, 4L], dtype='O') + arr = np.array([long(1), long(2), long(3), long(4)], dtype='O') result = lib.convert_sql_column(arr) expected = np.array([1, 2, 3, 4], dtype='i8') assert_same_values_and_dtype(result, expected) - arr = np.array([1L, 2L, 3L, None, 4L], dtype='O') + arr = np.array([long(1), long(2), long(3), None, long(4)], dtype='O') result = lib.convert_sql_column(arr) expected = np.array([1, 2, 3, np.nan, 4], dtype='f8') assert_same_values_and_dtype(result, expected) diff --git a/pandas/io/tests/test_pickle.py b/pandas/io/tests/test_pickle.py index 5c79c57c1e020..69a52f4489725 100644 --- a/pandas/io/tests/test_pickle.py +++ b/pandas/io/tests/test_pickle.py @@ -27,7 +27,7 @@ def setUp(self): def compare(self, vf): # py3 compat when reading py2 pickle - + try: with open(vf,'rb') as fh: data = pickle.load(fh) diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index 6518f9cb6097f..aff43cc913203 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -1,3 +1,5 @@ +from __future__ import print_function +from pandas.util.py3compat import range import nose import unittest import os @@ -20,6 +22,7 @@ from pandas.util import py3compat from numpy.testing.decorators import slow +import six try: import tables @@ -127,7 +130,7 @@ def roundtrip(key, obj,**kwargs): tm.assert_panel_equal(o, roundtrip('panel',o)) # table - df = DataFrame(dict(A=range(5), B=range(5))) + df = DataFrame(dict(A=list(range(5)), B=list(range(5)))) df.to_hdf(self.path,'table',append=True) result = read_hdf(self.path, 'table', where = ['index>2']) assert_frame_equal(df[df.index>2],result) @@ -481,7 +484,7 @@ def test_encoding(self): raise nose.SkipTest('system byteorder is not little, skipping test_encoding!') with ensure_clean(self.path) as store: - df = DataFrame(dict(A='foo',B='bar'),index=range(5)) + df = DataFrame(dict(A='foo',B='bar'),index=list(range(5))) df.loc[2,'A'] = np.nan df.loc[3,'B'] = np.nan _maybe_remove(store, 'df') @@ -604,7 +607,7 @@ def test_append_with_different_block_ordering(self): for i in range(10): df = DataFrame(np.random.randn(10,2),columns=list('AB')) - df['index'] = range(10) + df['index'] = list(range(10)) df['index'] += i*10 df['int64'] = Series([1]*len(df),dtype='int64') df['int16'] = Series([1]*len(df),dtype='int16') @@ -780,7 +783,7 @@ def check_col(key,name,size): def check_col(key,name,size): self.assert_(getattr(store.get_storer(key).table.description,name).itemsize == size) - df = DataFrame(dict(A = 'foo', B = 'bar'),index=range(10)) + df = DataFrame(dict(A = 'foo', B = 'bar'),index=list(range(10))) # a min_itemsize that creates a data_column _maybe_remove(store, 'df') @@ -1015,8 +1018,8 @@ def test_big_table_frame(self): raise nose.SkipTest('no big table frame') # create and write a big table - df = DataFrame(np.random.randn(2000 * 100, 100), index=range( - 2000 * 100), columns=['E%03d' % i for i in xrange(100)]) + df = DataFrame(np.random.randn(2000 * 100, 100), index=list(range( + 2000 * 100)), columns=['E%03d' % i for i in range(100)]) for x in range(20): df['String%03d' % x] = 'string%03d' % x @@ -1027,7 +1030,7 @@ def test_big_table_frame(self): rows = store.root.df.table.nrows recons = store.select('df') - print ("\nbig_table frame [%s] -> %5.2f" % (rows, time.time() - x)) + print("\nbig_table frame [%s] -> %5.2f" % (rows, time.time() - x)) def test_big_table2_frame(self): # this is a really big table: 1m rows x 60 float columns, 20 string, 20 datetime @@ -1038,14 +1041,14 @@ def test_big_table2_frame(self): print ("\nbig_table2 start") import time start_time = time.time() - df = DataFrame(np.random.randn(1000 * 1000, 60), index=xrange(int( - 1000 * 1000)), columns=['E%03d' % i for i in xrange(60)]) - for x in xrange(20): + df = DataFrame(np.random.randn(1000 * 1000, 60), index=range(int( + 1000 * 1000)), columns=['E%03d' % i for i in range(60)]) + for x in range(20): df['String%03d' % x] = 'string%03d' % x - for x in xrange(20): + for x in range(20): df['datetime%03d' % x] = datetime.datetime(2001, 1, 2, 0, 0) - print ("\nbig_table2 frame (creation of df) [rows->%s] -> %5.2f" + print("\nbig_table2 frame (creation of df) [rows->%s] -> %5.2f" % (len(df.index), time.time() - start_time)) def f(chunksize): @@ -1056,9 +1059,9 @@ def f(chunksize): for c in [10000, 50000, 250000]: start_time = time.time() - print ("big_table2 frame [chunk->%s]" % c) + print("big_table2 frame [chunk->%s]" % c) rows = f(c) - print ("big_table2 frame [rows->%s,chunk->%s] -> %5.2f" + print("big_table2 frame [rows->%s,chunk->%s] -> %5.2f" % (rows, c, time.time() - start_time)) def test_big_put_frame(self): @@ -1067,14 +1070,14 @@ def test_big_put_frame(self): print ("\nbig_put start") import time start_time = time.time() - df = DataFrame(np.random.randn(1000 * 1000, 60), index=xrange(int( - 1000 * 1000)), columns=['E%03d' % i for i in xrange(60)]) - for x in xrange(20): + df = DataFrame(np.random.randn(1000 * 1000, 60), index=range(int( + 1000 * 1000)), columns=['E%03d' % i for i in range(60)]) + for x in range(20): df['String%03d' % x] = 'string%03d' % x - for x in xrange(20): + for x in range(20): df['datetime%03d' % x] = datetime.datetime(2001, 1, 2, 0, 0) - print ("\nbig_put frame (creation of df) [rows->%s] -> %5.2f" + print("\nbig_put frame (creation of df) [rows->%s] -> %5.2f" % (len(df.index), time.time() - start_time)) with ensure_clean(self.path, mode='w') as store: @@ -1082,8 +1085,8 @@ def test_big_put_frame(self): store = HDFStore(fn, mode='w') store.put('df', df) - print (df.get_dtype_counts()) - print ("big_put frame [shape->%s] -> %5.2f" + print(df.get_dtype_counts()) + print("big_put frame [shape->%s] -> %5.2f" % (df.shape, time.time() - start_time)) def test_big_table_panel(self): @@ -1091,8 +1094,8 @@ def test_big_table_panel(self): # create and write a big table wp = Panel( - np.random.randn(20, 1000, 1000), items=['Item%03d' % i for i in xrange(20)], - major_axis=date_range('1/1/2000', periods=1000), minor_axis=['E%03d' % i for i in xrange(1000)]) + np.random.randn(20, 1000, 1000), items=['Item%03d' % i for i in range(20)], + major_axis=date_range('1/1/2000', periods=1000), minor_axis=['E%03d' % i for i in range(1000)]) wp.ix[:, 100:200, 300:400] = np.nan @@ -1108,7 +1111,7 @@ def test_big_table_panel(self): rows = store.root.wp.table.nrows recons = store.select('wp') - print ("\nbig_table panel [%s] -> %5.2f" % (rows, time.time() - x)) + print("\nbig_table panel [%s] -> %5.2f" % (rows, time.time() - x)) def test_append_diff_item_order(self): @@ -1328,7 +1331,7 @@ def test_unimplemented_dtypes_table_columns(self): # py3 ok for unicode if not py3compat.PY3: - l.append(('unicode', u'\u03c3')) + l.append(('unicode', six.u('\u03c3'))) ### currently not supported dtypes #### for n, f in l: @@ -1377,14 +1380,14 @@ def compare(a,b): compare(store.select('df_tz',where=Term('A','>=',df.A[3])),df[df.A>=df.A[3]]) _maybe_remove(store, 'df_tz') - df = DataFrame(dict(A = Timestamp('20130102',tz='US/Eastern'), B = Timestamp('20130103',tz='US/Eastern')),index=range(5)) + df = DataFrame(dict(A = Timestamp('20130102',tz='US/Eastern'), B = Timestamp('20130103',tz='US/Eastern')),index=list(range(5))) store.append('df_tz',df) result = store['df_tz'] compare(result,df) assert_frame_equal(result,df) _maybe_remove(store, 'df_tz') - df = DataFrame(dict(A = Timestamp('20130102',tz='US/Eastern'), B = Timestamp('20130102',tz='EET')),index=range(5)) + df = DataFrame(dict(A = Timestamp('20130102',tz='US/Eastern'), B = Timestamp('20130102',tz='EET')),index=list(range(5))) self.assertRaises(TypeError, store.append, 'df_tz', df) # this is ok @@ -1395,14 +1398,14 @@ def compare(a,b): assert_frame_equal(result,df) # can't append with diff timezone - df = DataFrame(dict(A = Timestamp('20130102',tz='US/Eastern'), B = Timestamp('20130102',tz='CET')),index=range(5)) + df = DataFrame(dict(A = Timestamp('20130102',tz='US/Eastern'), B = Timestamp('20130102',tz='CET')),index=list(range(5))) self.assertRaises(ValueError, store.append, 'df_tz', df) # as index with ensure_clean(self.path) as store: # GH 4098 example - df = DataFrame(dict(A = Series(xrange(3), index=date_range('2000-1-1',periods=3,freq='H', tz='US/Eastern')))) + df = DataFrame(dict(A = Series(range(3), index=date_range('2000-1-1',periods=3,freq='H', tz='US/Eastern')))) _maybe_remove(store, 'df') store.put('df',df) @@ -1989,12 +1992,12 @@ def test_select(self): # selection on the non-indexable with a large number of columns wp = Panel( - np.random.randn(100, 100, 100), items=['Item%03d' % i for i in xrange(100)], - major_axis=date_range('1/1/2000', periods=100), minor_axis=['E%03d' % i for i in xrange(100)]) + np.random.randn(100, 100, 100), items=['Item%03d' % i for i in range(100)], + major_axis=date_range('1/1/2000', periods=100), minor_axis=['E%03d' % i for i in range(100)]) _maybe_remove(store, 'wp') store.append('wp', wp) - items = ['Item%03d' % i for i in xrange(80)] + items = ['Item%03d' % i for i in range(80)] result = store.select('wp', Term('items', items)) expected = wp.reindex(items=items) tm.assert_panel_equal(expected, result) @@ -2092,7 +2095,7 @@ def test_select_with_many_inputs(self): df = DataFrame(dict(ts=bdate_range('2012-01-01', periods=300), A=np.random.randn(300), - B=range(300), + B=list(range(300)), users = ['a']*50 + ['b']*50 + ['c']*100 + ['a%03d' % i for i in range(100)])) _maybe_remove(store, 'df') store.append('df', df, data_columns=['ts', 'A', 'B', 'users']) @@ -2108,12 +2111,12 @@ def test_select_with_many_inputs(self): tm.assert_frame_equal(expected, result) # big selector along the columns - selector = [ 'a','b','c' ] + [ 'a%03d' % i for i in xrange(60) ] + selector = [ 'a','b','c' ] + [ 'a%03d' % i for i in range(60) ] result = store.select('df', [Term('ts', '>=', Timestamp('2012-02-01')),Term('users',selector)]) expected = df[ (df.ts >= Timestamp('2012-02-01')) & df.users.isin(selector) ] tm.assert_frame_equal(expected, result) - selector = range(100,200) + selector = list(range(100,200)) result = store.select('df', [Term('B', selector)]) expected = df[ df.B.isin(selector) ] tm.assert_frame_equal(expected, result) @@ -2211,7 +2214,7 @@ def test_select_iterator(self): def test_retain_index_attributes(self): # GH 3499, losing frequency info on index recreation - df = DataFrame(dict(A = Series(xrange(3), + df = DataFrame(dict(A = Series(range(3), index=date_range('2000-1-1',periods=3,freq='H')))) with ensure_clean(self.path) as store: @@ -2228,7 +2231,7 @@ def test_retain_index_attributes(self): # try to append a table with a different frequency warnings.filterwarnings('ignore', category=AttributeConflictWarning) - df2 = DataFrame(dict(A = Series(xrange(3), + df2 = DataFrame(dict(A = Series(range(3), index=date_range('2002-1-1',periods=3,freq='D')))) store.append('data',df2) warnings.filterwarnings('always', category=AttributeConflictWarning) @@ -2237,10 +2240,10 @@ def test_retain_index_attributes(self): # this is ok _maybe_remove(store,'df2') - df2 = DataFrame(dict(A = Series(xrange(3), + df2 = DataFrame(dict(A = Series(range(3), index=[Timestamp('20010101'),Timestamp('20010102'),Timestamp('20020101')]))) store.append('df2',df2) - df3 = DataFrame(dict(A = Series(xrange(3),index=date_range('2002-1-1',periods=3,freq='D')))) + df3 = DataFrame(dict(A = Series(range(3),index=date_range('2002-1-1',periods=3,freq='D')))) store.append('df2',df3) def test_retain_index_attributes2(self): @@ -2249,20 +2252,20 @@ def test_retain_index_attributes2(self): warnings.filterwarnings('ignore', category=AttributeConflictWarning) - df = DataFrame(dict(A = Series(xrange(3), index=date_range('2000-1-1',periods=3,freq='H')))) + df = DataFrame(dict(A = Series(range(3), index=date_range('2000-1-1',periods=3,freq='H')))) df.to_hdf(path,'data',mode='w',append=True) - df2 = DataFrame(dict(A = Series(xrange(3), index=date_range('2002-1-1',periods=3,freq='D')))) + df2 = DataFrame(dict(A = Series(range(3), index=date_range('2002-1-1',periods=3,freq='D')))) df2.to_hdf(path,'data',append=True) idx = date_range('2000-1-1',periods=3,freq='H') idx.name = 'foo' - df = DataFrame(dict(A = Series(xrange(3), index=idx))) + df = DataFrame(dict(A = Series(range(3), index=idx))) df.to_hdf(path,'data',mode='w',append=True) self.assert_(read_hdf(path,'data').index.name == 'foo') idx2 = date_range('2001-1-1',periods=3,freq='H') idx2.name = 'bar' - df2 = DataFrame(dict(A = Series(xrange(3), index=idx2))) + df2 = DataFrame(dict(A = Series(range(3), index=idx2))) df2.to_hdf(path,'data',append=True) self.assert_(read_hdf(path,'data').index.name is None) @@ -2422,7 +2425,7 @@ def test_coordinates(self): # get coordinates back & test vs frame _maybe_remove(store, 'df') - df = DataFrame(dict(A=range(5), B=range(5))) + df = DataFrame(dict(A=list(range(5)), B=list(range(5)))) store.append('df', df) c = store.select_as_coordinates('df', ['index<3']) assert((c.values == np.arange(3)).all() == True) @@ -2527,11 +2530,11 @@ def test_select_as_multiple(self): expected = concat([df1, df2], axis=1) expected = expected[5:] tm.assert_frame_equal(result, expected) - except (Exception), detail: - print ("error in select_as_multiple %s" % str(detail)) - print ("store: %s" % store) - print ("df1: %s" % df1) - print ("df2: %s" % df2) + except (Exception) as detail: + print("error in select_as_multiple %s" % str(detail)) + print("store: %s" % store) + print("df1: %s" % df1) + print("df2: %s" % df2) # test excpection for diff rows @@ -2751,7 +2754,7 @@ def test_legacy_table_write(self): columns=['A', 'B', 'C']) store.append('mi', df) - df = DataFrame(dict(A = 'foo', B = 'bar'),index=range(10)) + df = DataFrame(dict(A = 'foo', B = 'bar'),index=list(range(10))) store.append('df', df, data_columns = ['B'], min_itemsize={'A' : 200 }) store.close() @@ -2808,7 +2811,7 @@ def test_tseries_indices_frame(self): def test_unicode_index(self): - unicode_values = [u'\u03c3', u'\u03c3\u03c3'] + unicode_values = [six.u('\u03c3'), six.u('\u03c3\u03c3')] warnings.filterwarnings('ignore', category=PerformanceWarning) s = Series(np.random.randn(len(unicode_values)), unicode_values) self._check_roundtrip(s, tm.assert_series_equal) diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index 5b23bf173ec4e..614b401ceaa73 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -1,5 +1,7 @@ +from __future__ import print_function from __future__ import with_statement from pandas.util.py3compat import StringIO +from pandas.util.py3compat import range import unittest import sqlite3 import sys @@ -171,15 +173,15 @@ def _check_roundtrip(self, frame): frame['txt'] = ['a'] * len(frame) frame2 = frame.copy() - frame2['Idx'] = Index(range(len(frame2))) + 10 + frame2['Idx'] = Index(list(range(len(frame2)))) + 10 sql.write_frame(frame2, name='test_table2', con=self.db) result = sql.read_frame("select * from test_table2", self.db, index_col='Idx') expected = frame.copy() - expected.index = Index(range(len(frame2))) + 10 + expected.index = Index(list(range(len(frame2)))) + 10 expected.index.name = 'Idx' - print expected.index.names - print result.index.names + print(expected.index.names) + print(result.index.names) tm.assert_frame_equal(expected, result) def test_tquery(self): @@ -257,12 +259,12 @@ def setUp(self): return try: self.db = MySQLdb.connect(read_default_group='pandas') - except MySQLdb.ProgrammingError, e: + except MySQLdb.ProgrammingError as e: raise nose.SkipTest( "Create a group of connection parameters under the heading " "[pandas] in your system's mysql default file, " "typically located at ~/.my.cnf or /etc/.my.cnf. ") - except MySQLdb.Error, e: + except MySQLdb.Error as e: raise nose.SkipTest( "Cannot connect to database. " "Create a group of connection parameters under the heading " @@ -408,7 +410,7 @@ def _check_roundtrip(self, frame): frame['txt'] = ['a'] * len(frame) frame2 = frame.copy() - index = Index(range(len(frame2))) + 10 + index = Index(list(range(len(frame2)))) + 10 frame2['Idx'] = index drop_sql = "DROP TABLE IF EXISTS test_table2" cur = self.db.cursor() diff --git a/pandas/io/tests/test_wb.py b/pandas/io/tests/test_wb.py index 46eeabaf1e209..e1492c13c151d 100644 --- a/pandas/io/tests/test_wb.py +++ b/pandas/io/tests/test_wb.py @@ -5,20 +5,23 @@ from pandas.util.testing import assert_frame_equal from numpy.testing.decorators import slow from pandas.io.wb import search, download +import six @slow @network def test_wdi_search(): raise nose.SkipTest - expected = {u'id': {2634: u'GDPPCKD', - 4649: u'NY.GDP.PCAP.KD', - 4651: u'NY.GDP.PCAP.KN', - 4653: u'NY.GDP.PCAP.PP.KD'}, - u'name': {2634: u'GDP per Capita, constant US$, millions', - 4649: u'GDP per capita (constant 2000 US$)', - 4651: u'GDP per capita (constant LCU)', - 4653: u'GDP per capita, PPP (constant 2005 international $)'}} + expected = {six.u('id'): {2634: six.u('GDPPCKD'), + 4649: six.u('NY.GDP.PCAP.KD'), + 4651: six.u('NY.GDP.PCAP.KN'), + 4653: six.u('NY.GDP.PCAP.PP.KD')}, + six.u('name'): {2634: six.u('GDP per Capita, constant US$, ' + 'millions'), + 4649: six.u('GDP per capita (constant 2000 US$)'), + 4651: six.u('GDP per capita (constant LCU)'), + 4653: six.u('GDP per capita, PPP (constant 2005 ' + 'international $)')}} result = search('gdp.*capita.*constant').ix[:, :2] expected = pandas.DataFrame(expected) expected.index = result.index @@ -29,7 +32,7 @@ def test_wdi_search(): @network def test_wdi_download(): raise nose.SkipTest - expected = {'GDPPCKN': {(u'United States', u'2003'): u'40800.0735367688', (u'Canada', u'2004'): u'37857.1261134552', (u'United States', u'2005'): u'42714.8594790102', (u'Canada', u'2003'): u'37081.4575704003', (u'United States', u'2004'): u'41826.1728310667', (u'Mexico', u'2003'): u'72720.0691255285', (u'Mexico', u'2004'): u'74751.6003347038', (u'Mexico', u'2005'): u'76200.2154469437', (u'Canada', u'2005'): u'38617.4563629611'}, 'GDPPCKD': {(u'United States', u'2003'): u'40800.0735367688', (u'Canada', u'2004'): u'34397.055116118', (u'United States', u'2005'): u'42714.8594790102', (u'Canada', u'2003'): u'33692.2812368928', (u'United States', u'2004'): u'41826.1728310667', (u'Mexico', u'2003'): u'7608.43848670658', (u'Mexico', u'2004'): u'7820.99026814334', (u'Mexico', u'2005'): u'7972.55364129367', (u'Canada', u'2005'): u'35087.8925933298'}} + expected = {'GDPPCKN': {(six.u('United States'), six.u('2003')): six.u('40800.0735367688'), (six.u('Canada'), six.u('2004')): six.u('37857.1261134552'), (six.u('United States'), six.u('2005')): six.u('42714.8594790102'), (six.u('Canada'), six.u('2003')): six.u('37081.4575704003'), (six.u('United States'), six.u('2004')): six.u('41826.1728310667'), (six.u('Mexico'), six.u('2003')): six.u('72720.0691255285'), (six.u('Mexico'), six.u('2004')): six.u('74751.6003347038'), (six.u('Mexico'), six.u('2005')): six.u('76200.2154469437'), (six.u('Canada'), six.u('2005')): six.u('38617.4563629611')}, 'GDPPCKD': {(six.u('United States'), six.u('2003')): six.u('40800.0735367688'), (six.u('Canada'), six.u('2004')): six.u('34397.055116118'), (six.u('United States'), six.u('2005')): six.u('42714.8594790102'), (six.u('Canada'), six.u('2003')): six.u('33692.2812368928'), (six.u('United States'), six.u('2004')): six.u('41826.1728310667'), (six.u('Mexico'), six.u('2003')): six.u('7608.43848670658'), (six.u('Mexico'), six.u('2004')): six.u('7820.99026814334'), (six.u('Mexico'), six.u('2005')): six.u('7972.55364129367'), (six.u('Canada'), six.u('2005')): six.u('35087.8925933298')}} expected = pandas.DataFrame(expected) result = download(country=['CA', 'MX', 'US', 'junk'], indicator=['GDPPCKD', 'GDPPCKN', 'junk'], start=2003, end=2005) diff --git a/pandas/io/wb.py b/pandas/io/wb.py index f83ed296e360c..5048551cf09b0 100644 --- a/pandas/io/wb.py +++ b/pandas/io/wb.py @@ -1,8 +1,11 @@ +from __future__ import print_function from urllib2 import urlopen +from pandas.util.py3compat import range import json from contextlib import closing import pandas import numpy as np +from six.moves import map, reduce def download(country=['MX', 'CA', 'US'], indicator=['GDPPCKD', 'GDPPCKN'], @@ -65,10 +68,10 @@ def download(country=['MX', 'CA', 'US'], indicator=['GDPPCKD', 'GDPPCKN'], bad_indicators.append(ind) # Warn if len(bad_indicators) > 0: - print ('Failed to obtain indicator(s): %s' % '; '.join(bad_indicators)) + print('Failed to obtain indicator(s): %s' % '; '.join(bad_indicators)) print ('The data may still be available for download at http://data.worldbank.org') if len(bad_countries) > 0: - print ('Invalid ISO-2 codes: %s' % ' '.join(bad_countries)) + print('Invalid ISO-2 codes: %s' % ' '.join(bad_countries)) # Merge WDI series if len(data) > 0: out = reduce(lambda x, y: x.merge(y, how='outer'), data) @@ -90,10 +93,10 @@ def _get_data(indicator="NY.GNS.ICTR.GN.ZS", country='US', data = response.read() # Parse JSON file data = json.loads(data)[1] - country = map(lambda x: x['country']['value'], data) - iso2c = map(lambda x: x['country']['id'], data) - year = map(lambda x: x['date'], data) - value = map(lambda x: x['value'], data) + country = list(map(lambda x: x['country']['value'], data)) + iso2c = list(map(lambda x: x['country']['id'], data)) + year = list(map(lambda x: x['date'], data)) + value = list(map(lambda x: x['value'], data)) # Prepare output out = pandas.DataFrame([country, iso2c, year, value]).T return out @@ -107,10 +110,10 @@ def get_countries(): data = response.read() data = json.loads(data)[1] data = pandas.DataFrame(data) - data.adminregion = map(lambda x: x['value'], data.adminregion) - data.incomeLevel = map(lambda x: x['value'], data.incomeLevel) - data.lendingType = map(lambda x: x['value'], data.lendingType) - data.region = map(lambda x: x['value'], data.region) + data.adminregion = list(map(lambda x: x['value'], data.adminregion)) + data.incomeLevel = list(map(lambda x: x['value'], data.incomeLevel)) + data.lendingType = list(map(lambda x: x['value'], data.lendingType)) + data.region = list(map(lambda x: x['value'], data.region)) data = data.rename(columns={'id': 'iso3c', 'iso2Code': 'iso2c'}) return data @@ -124,7 +127,7 @@ def get_indicators(): data = json.loads(data)[1] data = pandas.DataFrame(data) # Clean fields - data.source = map(lambda x: x['value'], data.source) + data.source = list(map(lambda x: x['value'], data.source)) fun = lambda x: x.encode('ascii', 'ignore') data.sourceOrganization = data.sourceOrganization.apply(fun) # Clean topic field @@ -134,12 +137,12 @@ def get_value(x): return x['value'] except: return '' - fun = lambda x: map(lambda y: get_value(y), x) + fun = lambda x: list(map(lambda y: get_value(y), x)) data.topics = data.topics.apply(fun) data.topics = data.topics.apply(lambda x: ' ; '.join(x)) # Clean outpu data = data.sort(columns='id') - data.index = pandas.Index(range(data.shape[0])) + data.index = pandas.Index(list(range(data.shape[0]))) return data diff --git a/pandas/rpy/__init__.py b/pandas/rpy/__init__.py index 3e77a0b0b0109..d5cf8a420b727 100644 --- a/pandas/rpy/__init__.py +++ b/pandas/rpy/__init__.py @@ -1,4 +1,4 @@ try: - from common import importr, r, load_data + from .common import importr, r, load_data except ImportError: pass diff --git a/pandas/rpy/common.py b/pandas/rpy/common.py index 92adee5bdae57..75065a19da0f4 100644 --- a/pandas/rpy/common.py +++ b/pandas/rpy/common.py @@ -2,7 +2,10 @@ Utilities for making working with rpy2 more user- and developer-friendly. """ +from __future__ import print_function +from six.moves import zip +from pandas.util.py3compat import range import numpy as np import pandas as pd @@ -73,7 +76,7 @@ def _convert_array(obj): major_axis=name_list[0], minor_axis=name_list[1]) else: - print ('Cannot handle dim=%d' % len(dim)) + print('Cannot handle dim=%d' % len(dim)) else: return arr diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index f5e57efdcb166..7bc6f818c663b 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -6,6 +6,8 @@ # pylint: disable=E1101,E1103,W0231,E0202 from numpy import nan +from pandas.util.py3compat import range +from pandas.util import compat import numpy as np from pandas.core.common import _pickle_array, _unpickle_array, _try_sort @@ -21,6 +23,7 @@ from pandas.sparse.series import SparseSeries from pandas.util.decorators import Appender import pandas.lib as lib +from six.moves import map class _SparseMockBlockManager(object): @@ -259,7 +262,7 @@ def get_dtype_counts(self): for k, v in self.iteritems(): d[v.dtype.name] += 1 return Series(d) - + def astype(self, dtype): raise NotImplementedError @@ -649,7 +652,7 @@ def _reindex_with_indexers(self, index, row_indexer, columns, col_indexer, def _rename_index_inplace(self, mapper): self.index = [mapper(x) for x in self.index] - + def _rename_columns_inplace(self, mapper): new_series = {} new_columns = [] @@ -850,7 +853,7 @@ def apply(self, func, axis=0, broadcast=False): def applymap(self, func): """ Apply a function to a DataFrame that is intended to operate - elementwise, i.e. like doing map(func, series) for each series in the + elementwise, i.e. like doing list(map(func, series)) for each series in the DataFrame Parameters @@ -862,12 +865,12 @@ def applymap(self, func): ------- applied : DataFrame """ - return self.apply(lambda x: map(func, x)) + return self.apply(lambda x: list(map(func, x))) @Appender(DataFrame.fillna.__doc__) def fillna(self, value=None, method=None, inplace=False, limit=None): new_series = {} - for k, v in self.iterkv(): + for k, v in self.iteritems(): new_series[k] = v.fillna(value=value, method=method, limit=limit) if inplace: diff --git a/pandas/sparse/panel.py b/pandas/sparse/panel.py index 246e6fa93918f..746b91a898a9c 100644 --- a/pandas/sparse/panel.py +++ b/pandas/sparse/panel.py @@ -5,6 +5,9 @@ # pylint: disable=E1101,E1103,W0231 +from pandas.util.py3compat import range +from six.moves import zip +from pandas.util import compat import numpy as np from pandas.core.index import Index, MultiIndex, _ensure_index @@ -205,7 +208,7 @@ def set_value(self, item, major, minor, value): def __delitem__(self, key): loc = self.items.get_loc(key) - indices = range(loc) + range(loc + 1, len(self.items)) + indices = list(range(loc)) + list(range(loc + 1, len(self.items))) del self._frames[key] self._items = self._items.take(indices) @@ -346,7 +349,7 @@ def _combine(self, other, func, axis=0): return self._combinePanel(other, func) elif np.isscalar(other): new_frames = dict((k, func(v, other)) - for k, v in self.iterkv()) + for k, v in self.iteritems()) return self._new_like(new_frames) def _combineFrame(self, other, func, axis=0): @@ -423,7 +426,7 @@ def major_xs(self, key): y : DataFrame index -> minor axis, columns -> items """ - slices = dict((k, v.xs(key)) for k, v in self.iterkv()) + slices = dict((k, v.xs(key)) for k, v in self.iteritems()) return DataFrame(slices, index=self.minor_axis, columns=self.items) def minor_xs(self, key): @@ -440,7 +443,7 @@ def minor_xs(self, key): y : SparseDataFrame index -> major axis, columns -> items """ - slices = dict((k, v[key]) for k, v in self.iterkv()) + slices = dict((k, v[key]) for k, v in self.iteritems()) return SparseDataFrame(slices, index=self.major_axis, columns=self.items, default_fill_value=self.default_fill_value, diff --git a/pandas/sparse/tests/test_array.py b/pandas/sparse/tests/test_array.py index a92170621f50d..96edc71d1fe5f 100644 --- a/pandas/sparse/tests/test_array.py +++ b/pandas/sparse/tests/test_array.py @@ -1,3 +1,4 @@ +from pandas.util.py3compat import range import re from numpy import nan, ndarray import numpy as np diff --git a/pandas/sparse/tests/test_list.py b/pandas/sparse/tests/test_list.py index a69385dd9a436..47ad7b0c18d30 100644 --- a/pandas/sparse/tests/test_list.py +++ b/pandas/sparse/tests/test_list.py @@ -1,3 +1,4 @@ +from pandas.util.py3compat import range import unittest from numpy import nan @@ -6,7 +7,7 @@ from pandas.sparse.api import SparseList, SparseArray from pandas.util.testing import assert_almost_equal -from test_sparse import assert_sp_array_equal +from .test_sparse import assert_sp_array_equal def assert_sp_list_equal(left, right): diff --git a/pandas/sparse/tests/test_sparse.py b/pandas/sparse/tests/test_sparse.py index 1382a6a642aa3..75d58f483a1da 100644 --- a/pandas/sparse/tests/test_sparse.py +++ b/pandas/sparse/tests/test_sparse.py @@ -1,6 +1,8 @@ # pylint: disable-msg=E1101,W0612 from unittest import TestCase +from pandas.util.py3compat import range +from pandas.util import compat import cPickle as pickle import operator from datetime import datetime @@ -36,7 +38,7 @@ import pandas.tests.test_series as test_series from pandas.util.py3compat import StringIO -from test_array import assert_sp_array_equal +from .test_array import assert_sp_array_equal import warnings warnings.filterwarnings(action='ignore', category=FutureWarning) @@ -105,7 +107,7 @@ def assert_sp_frame_equal(left, right, exact_indices=True): def assert_sp_panel_equal(left, right, exact_indices=True): - for item, frame in left.iterkv(): + for item, frame in left.iteritems(): assert(item in right) # trade-off? assert_sp_frame_equal(frame, right[item], exact_indices=exact_indices) @@ -315,7 +317,7 @@ def _check_getitem(sp, dense): for idx, val in dense.iteritems(): assert_almost_equal(val, sp[idx]) - for i in xrange(len(dense)): + for i in range(len(dense)): assert_almost_equal(sp[i], dense[i]) # j = np.float64(i) # assert_almost_equal(sp[j], dense[j]) @@ -826,7 +828,7 @@ def test_constructor_dataframe(self): def test_constructor_convert_index_once(self): arr = np.array([1.5, 2.5, 3.5]) - sdf = SparseDataFrame(columns=range(4), index=arr) + sdf = SparseDataFrame(columns=list(range(4)), index=arr) self.assertTrue(sdf[0].index is sdf[1].index) def test_constructor_from_series(self): @@ -843,7 +845,7 @@ def test_constructor_from_series(self): x2 = x.astype(float) x2.ix[:9998] = np.NaN x_sparse = x2.to_sparse(fill_value=np.NaN) - + # Currently fails too with weird ufunc error # df1 = SparseDataFrame([x_sparse, y]) @@ -867,7 +869,7 @@ def test_str(self): sdf = df.to_sparse() str(sdf) - + def test_array_interface(self): res = np.sqrt(self.frame) dres = np.sqrt(self.frame.to_dense()) @@ -1217,7 +1219,7 @@ def test_astype(self): self.assertRaises(Exception, self.frame.astype, np.int64) def test_fillna(self): - df = self.zframe.reindex(range(5)) + df = self.zframe.reindex(list(range(5))) result = df.fillna(0) expected = df.to_dense().fillna(0).to_sparse(fill_value=0) assert_sp_frame_equal(result, expected) diff --git a/pandas/src/generate_code.py b/pandas/src/generate_code.py index 2d5873393de08..040e12922d26c 100644 --- a/pandas/src/generate_code.py +++ b/pandas/src/generate_code.py @@ -1,5 +1,7 @@ +from __future__ import print_function +from pandas.util.py3compat import range import os -from cStringIO import StringIO +from pandas.util.py3compat import cStringIO as StringIO header = """ cimport numpy as np @@ -2290,21 +2292,21 @@ def generate_from_template(template, exclude=None): def generate_take_cython_file(path='generated.pyx'): with open(path, 'w') as f: - print >> f, header + print(header, file=f) - print >> f, generate_ensure_dtypes() + print(generate_ensure_dtypes(), file=f) for template in templates_1d: - print >> f, generate_from_template(template) + print(generate_from_template(template), file=f) for template in take_templates: - print >> f, generate_take_template(template) + print(generate_take_template(template), file=f) for template in put_2d: - print >> f, generate_put_template(template) + print(generate_put_template(template), file=f) for template in groupbys: - print >> f, generate_put_template(template, use_ints = False) + print(generate_put_template(template, use_ints = False), file=f) # for template in templates_1d_datetime: # print >> f, generate_from_template_datetime(template) @@ -2313,7 +2315,7 @@ def generate_take_cython_file(path='generated.pyx'): # print >> f, generate_from_template_datetime(template, ndim=2) for template in nobool_1d_templates: - print >> f, generate_from_template(template, exclude=['bool']) + print(generate_from_template(template, exclude=['bool']), file=f) if __name__ == '__main__': generate_take_cython_file() diff --git a/pandas/src/offsets.pyx b/pandas/src/offsets.pyx index 1823edeb0a4d9..096198c8a05fa 100644 --- a/pandas/src/offsets.pyx +++ b/pandas/src/offsets.pyx @@ -85,6 +85,10 @@ cdef class _Offset: cpdef next(self): pass + cpdef __next__(self): + """wrapper around next""" + return self.next() + cpdef prev(self): pass diff --git a/pandas/stats/fama_macbeth.py b/pandas/stats/fama_macbeth.py index 967199c0bcf69..9e4e62a07d46d 100644 --- a/pandas/stats/fama_macbeth.py +++ b/pandas/stats/fama_macbeth.py @@ -1,6 +1,7 @@ from pandas.core.base import StringMixin from pandas.util.py3compat import StringIO +from pandas.util.py3compat import range import numpy as np from pandas.core.api import Series, DataFrame @@ -173,7 +174,7 @@ def _calc_stats(self): start = self._window - 1 betas = self._beta_raw - for i in xrange(start, self._T): + for i in range(start, self._T): if self._is_rolling: begin = i - start else: @@ -213,7 +214,7 @@ def _calc_t_stat(beta, nw_lags_beta): C = np.dot(B.T, B) / N if nw_lags_beta is not None: - for i in xrange(nw_lags_beta + 1): + for i in range(nw_lags_beta + 1): cov = np.dot(B[i:].T, B[:(N - i)]) / N weight = i / (nw_lags_beta + 1) diff --git a/pandas/stats/math.py b/pandas/stats/math.py index 579d49edb8511..7a36654a4395f 100644 --- a/pandas/stats/math.py +++ b/pandas/stats/math.py @@ -3,6 +3,7 @@ from __future__ import division +from pandas.util.py3compat import range import numpy as np import numpy.linalg as linalg @@ -70,7 +71,7 @@ def newey_west(m, max_lags, nobs, df, nw_overlap=False): Covariance Matrix, Econometrica, vol. 55(3), 703-708 """ Xeps = np.dot(m.T, m) - for lag in xrange(1, max_lags + 1): + for lag in range(1, max_lags + 1): auto_cov = np.dot(m[:-lag].T, m[lag:]) weight = lag / (max_lags + 1) if nw_overlap: diff --git a/pandas/stats/misc.py b/pandas/stats/misc.py index e81319cb79c94..3e5db98d8335e 100644 --- a/pandas/stats/misc.py +++ b/pandas/stats/misc.py @@ -1,8 +1,11 @@ from numpy import NaN +from pandas.util import compat import numpy as np from pandas.core.api import Series, DataFrame, isnull, notnull from pandas.core.series import remove_na +import six +from six.moves import zip def zscore(series): @@ -21,7 +24,7 @@ def correl_ts(frame1, frame2): y : Series """ results = {} - for col, series in frame1.iteritems(): + for col, series in compat.iteritems(frame1): if col in frame2: other = frame2[col] @@ -82,15 +85,15 @@ def percentileRank(frame, column=None, kind='mean'): framet = frame.T if column is not None: if isinstance(column, Series): - for date, xs in frame.T.iteritems(): + for date, xs in compat.iteritems(frame.T): results[date] = fun(xs, column.get(date, NaN)) else: - for date, xs in frame.T.iteritems(): + for date, xs in compat.iteritems(frame.T): results[date] = fun(xs, xs[column]) results = Series(results) else: for column in frame.columns: - for date, xs in framet.iteritems(): + for date, xs in compat.iteritems(framet): results.setdefault(date, {})[column] = fun(xs, xs[column]) results = DataFrame(results).T return results diff --git a/pandas/stats/ols.py b/pandas/stats/ols.py index 742d832a923d8..e9563dcd16f72 100644 --- a/pandas/stats/ols.py +++ b/pandas/stats/ols.py @@ -4,9 +4,12 @@ # pylint: disable-msg=W0201 -from itertools import izip, starmap -from StringIO import StringIO +from six.moves import zip +from itertools import starmap +from pandas.util.py3compat import StringIO +from pandas.util.py3compat import range +from pandas.util import compat import numpy as np from pandas.core.api import DataFrame, Series, isnull @@ -41,7 +44,7 @@ class OLS(StringMixin): Number of Newey-West lags. nw_overlap : boolean, default False Assume data is overlapping when computing Newey-West estimator - + """ _panel_model = False @@ -610,15 +613,15 @@ class MovingOLS(OLS): window : int size of window (for rolling/expanding OLS) min_periods : int - Threshold of non-null data points to require. - If None, defaults to size of window. + Threshold of non-null data points to require. + If None, defaults to size of window. intercept : bool True if you want an intercept. nw_lags : None or int Number of Newey-West lags. nw_overlap : boolean, default False Assume data is overlapping when computing Newey-West estimator - + """ def __init__(self, y, x, weights=None, window_type='expanding', window=None, min_periods=None, intercept=True, @@ -743,7 +746,7 @@ def var_beta(self): """Returns the covariance of beta.""" result = {} result_index = self._result_index - for i in xrange(len(self._var_beta_raw)): + for i in range(len(self._var_beta_raw)): dm = DataFrame(self._var_beta_raw[i], columns=self.beta.columns, index=self.beta.columns) result[result_index[i]] = dm @@ -803,7 +806,7 @@ def _calc_betas(self, x, y): cum_xx = self._cum_xx(x) cum_xy = self._cum_xy(x, y) - for i in xrange(N): + for i in range(N): if not valid[i] or not enough[i]: continue @@ -948,7 +951,7 @@ def get_result_simple(Fst, d): return Fst, (q, d), 1 - f.cdf(Fst, q, d) # Compute the P-value for each pair - result = starmap(get_result_simple, izip(F, df_resid)) + result = starmap(get_result_simple, zip(F, df_resid)) return list(result) @@ -968,7 +971,7 @@ def get_result(beta, vcov, n, d): return math.calc_F(R, r, beta, vcov, n, d) results = starmap(get_result, - izip(self._beta_raw, self._var_beta_raw, nobs, df)) + zip(self._beta_raw, self._var_beta_raw, nobs, df)) return list(results) @@ -978,7 +981,7 @@ def _p_value_raw(self): from scipy.stats import t result = [2 * t.sf(a, b) - for a, b in izip(np.fabs(self._t_stat_raw), + for a, b in zip(np.fabs(self._t_stat_raw), self._df_resid_raw)] return np.array(result) @@ -1062,7 +1065,7 @@ def _resid_raw(self): def _std_err_raw(self): """Returns the raw standard err values.""" results = [] - for i in xrange(len(self._var_beta_raw)): + for i in range(len(self._var_beta_raw)): results.append(np.sqrt(np.diag(self._var_beta_raw[i]))) return np.array(results) diff --git a/pandas/stats/plm.py b/pandas/stats/plm.py index e8c413ec4739c..44f0dcf2bc16a 100644 --- a/pandas/stats/plm.py +++ b/pandas/stats/plm.py @@ -6,6 +6,8 @@ # pylint: disable-msg=E1101,E1103 from __future__ import division +from pandas.util.py3compat import range +from pandas.util import compat import warnings import numpy as np diff --git a/pandas/stats/tests/test_fama_macbeth.py b/pandas/stats/tests/test_fama_macbeth.py index ef262cfaf44bb..593d6ab5e2945 100644 --- a/pandas/stats/tests/test_fama_macbeth.py +++ b/pandas/stats/tests/test_fama_macbeth.py @@ -1,7 +1,8 @@ from pandas import DataFrame, Panel from pandas.stats.api import fama_macbeth -from common import assert_almost_equal, BaseTest +from .common import assert_almost_equal, BaseTest +from pandas.util.py3compat import range import numpy as np @@ -28,7 +29,7 @@ def checkFamaMacBethExtended(self, window_type, x, y, **kwds): index = result._index time = len(index) - for i in xrange(time - window + 1): + for i in range(time - window + 1): if window_type == 'rolling': start = index[i] else: @@ -37,7 +38,7 @@ def checkFamaMacBethExtended(self, window_type, x, y, **kwds): end = index[i + window - 1] x2 = {} - for k, v in x.iterkv(): + for k, v in x.iteritems(): x2[k] = v.truncate(start, end) y2 = y.truncate(start, end) diff --git a/pandas/stats/tests/test_moments.py b/pandas/stats/tests/test_moments.py index 6312a28595935..c948d2abaca29 100644 --- a/pandas/stats/tests/test_moments.py +++ b/pandas/stats/tests/test_moments.py @@ -1,3 +1,5 @@ +from pandas.util.py3compat import range +from six.moves import zip import unittest import nose import sys @@ -487,7 +489,7 @@ def _check_structures(self, func, static_comp, assert_frame_equal(frame_xp, frame_rs) def test_legacy_time_rule_arg(self): - from StringIO import StringIO + from pandas.util.py3compat import StringIO # suppress deprecation warnings sys.stderr = StringIO() diff --git a/pandas/stats/tests/test_ols.py b/pandas/stats/tests/test_ols.py index 88f9224e8975a..cbfbc0ad11e57 100644 --- a/pandas/stats/tests/test_ols.py +++ b/pandas/stats/tests/test_ols.py @@ -7,6 +7,7 @@ from __future__ import division from datetime import datetime +from pandas.util import compat import unittest import nose import numpy as np @@ -23,6 +24,7 @@ import pandas.util.testing as tm from common import BaseTest +import six _have_statsmodels = True try: @@ -196,7 +198,7 @@ def checkMovingOLS(self, window_type, x, y, weights=None, **kwds): date = index[i] x_iter = {} - for k, v in x.iteritems(): + for k, v in compat.iteritems(x): x_iter[k] = v.truncate(before=prior_date, after=date) y_iter = y.truncate(before=prior_date, after=date) @@ -529,7 +531,7 @@ def test_wls_panel(self): stack_y = y.stack() stack_x = DataFrame(dict((k, v.stack()) - for k, v in x.iterkv())) + for k, v in x.iteritems())) weights = x.std('items') stack_weights = weights.stack() @@ -722,7 +724,7 @@ def checkMovingOLS(self, x, y, window_type='rolling', **kwds): date = index[i] x_iter = {} - for k, v in x.iteritems(): + for k, v in compat.iteritems(x): x_iter[k] = v.truncate(before=prior_date, after=date) y_iter = y.truncate(before=prior_date, after=date) diff --git a/pandas/stats/tests/test_var.py b/pandas/stats/tests/test_var.py index cbaacd0e89b6e..99ee9f3bf93a1 100644 --- a/pandas/stats/tests/test_var.py +++ b/pandas/stats/tests/test_var.py @@ -1,7 +1,9 @@ +from __future__ import print_function from numpy.testing import run_module_suite, assert_equal, TestCase from pandas.util.testing import assert_almost_equal +from pandas.util.py3compat import range import nose import unittest @@ -124,10 +126,10 @@ def beta(self): return rpy.convert_robj(r.coef(self._estimate)) def summary(self, equation=None): - print (r.summary(self._estimate, equation=equation)) + print(r.summary(self._estimate, equation=equation)) def output(self): - print (self._estimate) + print(self._estimate) def estimate(self): self._estimate = r.VAR(self.rdata, p=self.p, type=self.type) @@ -144,7 +146,7 @@ def serial_test(self, lags_pt=16, type='PT.asymptotic'): return test def data_summary(self): - print (r.summary(self.rdata)) + print(r.summary(self.rdata)) class TestVAR(TestCase): diff --git a/pandas/stats/var.py b/pandas/stats/var.py index 8953f7badfefb..b10d6b9fa3eb8 100644 --- a/pandas/stats/var.py +++ b/pandas/stats/var.py @@ -1,5 +1,8 @@ from __future__ import division +from pandas.util.py3compat import range +from six.moves import zip, reduce +from pandas.util import compat import numpy as np from pandas.core.base import StringMixin from pandas.util.decorators import cache_readonly @@ -77,7 +80,7 @@ def forecast(self, h): DataFrame """ forecast = self._forecast_raw(h)[:, 0, :] - return DataFrame(forecast, index=xrange(1, 1 + h), + return DataFrame(forecast, index=range(1, 1 + h), columns=self._columns) def forecast_cov(self, h): @@ -100,7 +103,7 @@ def forecast_std_err(self, h): DataFrame """ return DataFrame(self._forecast_std_err_raw(h), - index=xrange(1, 1 + h), columns=self._columns) + index=range(1, 1 + h), columns=self._columns) @cache_readonly def granger_causality(self): @@ -128,7 +131,7 @@ def granger_causality(self): d = {} for col in self._columns: d[col] = {} - for i in xrange(1, 1 + self._p): + for i in range(1, 1 + self._p): lagged_data = self._lagged_data[i].filter( self._columns - [col]) @@ -190,7 +193,7 @@ def ols_results(self): from pandas.stats.api import ols d = {} - for i in xrange(1, 1 + self._p): + for i in range(1, 1 + self._p): for col, series in self._lagged_data[i].iteritems(): d[_make_param_name(i, col)] = series @@ -278,7 +281,7 @@ def _trans_B(self, h): result.append(trans_B) - for i in xrange(2, h): + for i in range(2, h): result.append(np.dot(trans_B, result[i - 1])) return result @@ -287,7 +290,7 @@ def _trans_B(self, h): def _x(self): values = np.array([ self._lagged_data[i][col].values() - for i in xrange(1, 1 + self._p) + for i in range(1, 1 + self._p) for col in self._columns ]).T @@ -315,7 +318,7 @@ def _forecast_cov_raw(self, n): resid = self._forecast_cov_resid_raw(n) # beta = self._forecast_cov_beta_raw(n) - # return [a + b for a, b in izip(resid, beta)] + # return [a + b for a, b in zip(resid, beta)] # TODO: ignore the beta forecast std err until it's verified return resid @@ -332,7 +335,7 @@ def _forecast_cov_beta_raw(self, n): results = [] - for h in xrange(1, n + 1): + for h in range(1, n + 1): psi = self._psi(h) trans_B = self._trans_B(h) @@ -340,14 +343,14 @@ def _forecast_cov_beta_raw(self, n): cov_beta = self._cov_beta - for t in xrange(T + 1): + for t in range(T + 1): index = t + p - y = values.take(xrange(index, index - p, -1), axis=0).ravel() + y = values.take(range(index, index - p, -1), axis=0).ravel() trans_Z = np.hstack(([1], y)) trans_Z = trans_Z.reshape(1, len(trans_Z)) sum2 = 0 - for i in xrange(h): + for i in range(h): ZB = np.dot(trans_Z, trans_B[h - 1 - i]) prod = np.kron(ZB, psi[i]) @@ -367,7 +370,7 @@ def _forecast_cov_resid_raw(self, h): psi_values = self._psi(h) sum = 0 result = [] - for i in xrange(h): + for i in range(h): psi = psi_values[i] sum = sum + chain_dot(psi, self._sigma, psi.T) result.append(sum) @@ -380,9 +383,9 @@ def _forecast_raw(self, h): """ k = self._k result = [] - for i in xrange(h): + for i in range(h): sum = self._alpha.reshape(1, k) - for j in xrange(self._p): + for j in range(self._p): beta = self._lag_betas[j] idx = i - j if idx > 0: @@ -429,12 +432,12 @@ def _lag_betas(self): """ k = self._k b = self._beta_raw - return [b[k * i: k * (i + 1)].T for i in xrange(self._p)] + return [b[k * i: k * (i + 1)].T for i in range(self._p)] @cache_readonly def _lagged_data(self): return dict([(i, self._data.shift(i)) - for i in xrange(1, 1 + self._p)]) + for i in range(1, 1 + self._p)]) @cache_readonly def _nobs(self): @@ -448,10 +451,10 @@ def _psi(self, h): """ k = self._k result = [np.eye(k)] - for i in xrange(1, h): + for i in range(1, h): result.append(sum( [np.dot(result[i - j], self._lag_betas[j - 1]) - for j in xrange(1, 1 + i) + for j in range(1, 1 + i) if j <= self._p])) return result @@ -532,7 +535,7 @@ def forecast(self, h): Returns the forecasts at 1, 2, ..., n timesteps in the future. """ forecast = self._forecast_raw(h).T.swapaxes(1, 2) - index = xrange(1, 1 + h) + index = range(1, 1 + h) w = Panel(forecast, items=self._data.items, major_axis=index, minor_axis=self._data.minor_axis) return w diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 8706bb9cf7f4f..1e04403b398a5 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1,3 +1,4 @@ +from pandas.util.py3compat import range import unittest import numpy as np @@ -46,7 +47,7 @@ def test_objects(self): def test_object_refcount_bug(self): lst = ['A', 'B', 'C', 'D', 'E'] - for i in xrange(1000): + for i in range(1000): len(algos.unique(lst)) def test_on_index_object(self): diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 48db7afa29aaa..2b70fbcff8499 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -1,6 +1,7 @@ # pylint: disable=E1101,E1103,W0232 from datetime import datetime +from pandas.util.py3compat import range import unittest import nose @@ -103,7 +104,7 @@ def test_value_counts(self): def test_na_flags_int_levels(self): # #1457 - levels = range(10) + levels = list(range(10)) labels = np.random.randint(0, 10, 20) labels[::5] = -1 diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 3212105562446..048b4c6f15b28 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -1,4 +1,5 @@ from datetime import datetime +from pandas.util.py3compat import range, long import sys import re @@ -15,6 +16,8 @@ from pandas.tslib import iNaT from pandas.util import py3compat +import six +from six.moves import map _multiprocess_can_split_ = True @@ -24,7 +27,7 @@ def test_is_sequence(): assert(is_seq((1, 2))) assert(is_seq([1, 2])) assert(not is_seq("abcd")) - assert(not is_seq(u"abcd")) + assert(not is_seq(six.u("abcd"))) assert(not is_seq(np.int64)) class A(object): @@ -94,7 +97,7 @@ def test_isnull_lists(): result = isnull(['foo', 'bar']) assert(not result.any()) - result = isnull([u'foo', u'bar']) + result = isnull([six.u('foo'), six.u('bar')]) assert(not result.any()) @@ -120,7 +123,7 @@ def test_datetimeindex_from_empty_datetime64_array(): def test_nan_to_nat_conversions(): df = DataFrame(dict({ - 'A' : np.asarray(range(10),dtype='float64'), + 'A' : np.asarray(list(range(10)),dtype='float64'), 'B' : Timestamp('20010101') })) df.iloc[3:6,:] = np.nan result = df.loc[4,'B'].value @@ -176,7 +179,7 @@ def test_iterpairs(): def test_split_ranges(): def _bin(x, width): "return int(x) as a base2 string of given width" - return ''.join(str((x >> i) & 1) for i in xrange(width - 1, -1, -1)) + return ''.join(str((x >> i) & 1) for i in range(width - 1, -1, -1)) def test_locs(mask): nfalse = sum(np.array(mask) == 0) @@ -193,7 +196,7 @@ def test_locs(mask): # exhaustively test all possible mask sequences of length 8 ncols = 8 for i in range(2 ** ncols): - cols = map(int, list(_bin(i, ncols))) # count up in base2 + cols = list(map(int, list(_bin(i, ncols)))) # count up in base2 mask = [cols[i] == 1 for i in range(len(cols))] test_locs(mask) @@ -332,8 +335,8 @@ def test_is_re(): def test_is_recompilable(): - passes = (r'a', u'x', r'asdf', re.compile('adsf'), ur'\u2233\s*', - re.compile(r'')) + passes = (r'a', six.u('x'), r'asdf', re.compile('adsf'), + six.u(r'\u2233\s*'), re.compile(r'')) fails = 1, [], object() for p in passes: @@ -720,7 +723,7 @@ def test_2d_float32(self): def test_2d_datetime64(self): # 2005/01/01 - 2006/01/01 - arr = np.random.randint(11045376L, 11360736L, (5,3))*100000000000 + arr = np.random.randint(long(11045376), long(11360736), (5,3))*100000000000 arr = arr.view(dtype='datetime64[ns]') indexer = [0, 2, -1, 1, -1] diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index ba0a9926dfa78..018440dd09f87 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -1,3 +1,4 @@ +from __future__ import print_function # pylint: disable-msg=W0612,E1101 import unittest diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index bca38ba55e205..4b7ca2c701611 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -1,10 +1,13 @@ +from __future__ import print_function # -*- coding: utf-8 -*- try: - from StringIO import StringIO + from pandas.util.py3compat import StringIO except: from io import StringIO +from pandas.util.py3compat import range +from six.moves import zip import os import sys import unittest @@ -16,7 +19,7 @@ import numpy as np from pandas import DataFrame, Series, Index -from pandas.util.py3compat import lzip, PY3 +from pandas.util.py3compat import PY3 import pandas.core.format as fmt import pandas.util.testing as tm @@ -25,6 +28,7 @@ import pandas as pd from pandas.core.config import (set_option, get_option, option_context, reset_option) +import six _frame = DataFrame(tm.getSeriesData()) @@ -86,7 +90,7 @@ def test_eng_float_formatter(self): def test_repr_tuples(self): buf = StringIO() - df = DataFrame({'tups': zip(range(10), range(10))}) + df = DataFrame({'tups': list(zip(range(10), range(10)))}) repr(df) df.to_string(col_space=10, buf=buf) @@ -101,7 +105,7 @@ def test_repr_truncation(self): _strlen = fmt._strlen_func() - for line, value in zip(r.split('\n'), df['B']): + for line, value in list(zip(r.split('\n'), df['B'])): if _strlen(value) + 1 > max_len: self.assert_('...' in line) else: @@ -132,10 +136,10 @@ def test_repr_obeys_max_seq_limit(self): #unlimited reset_option("display.max_seq_items") - self.assertTrue(len(com.pprint_thing(range(1000)))> 2000) + self.assertTrue(len(com.pprint_thing(list(range(1000))))> 2000) with option_context("display.max_seq_items",5): - self.assertTrue(len(com.pprint_thing(range(1000)))< 100) + self.assertTrue(len(com.pprint_thing(list(range(1000))))< 100) def test_repr_is_valid_construction_code(self): import pandas as pd @@ -154,8 +158,9 @@ def test_repr_should_return_str(self): data = [8, 5, 3, 5] - index1 = [u"\u03c3", u"\u03c4", u"\u03c5", u"\u03c6"] - cols = [u"\u03c8"] + index1 = [six.u("\u03c3"), six.u("\u03c4"), six.u("\u03c5"), + six.u("\u03c6")] + cols = [six.u("\u03c8")] df = DataFrame(data, columns=cols, index=index1) self.assertTrue(type(df.__repr__() == str)) # both py2 / 3 @@ -166,8 +171,8 @@ def test_repr_no_backslash(self): def test_expand_frame_repr(self): df_small = DataFrame('hello', [0], [0]) - df_wide = DataFrame('hello', [0], range(10)) - df_tall = DataFrame('hello', range(30), range(5)) + df_wide = DataFrame('hello', [0], list(range(10))) + df_tall = DataFrame('hello', list(range(30)), list(range(5))) with option_context('mode.sim_interactive', True): with option_context('display.max_columns', 10, @@ -192,7 +197,7 @@ def test_expand_frame_repr(self): def test_repr_non_interactive(self): # in non interactive mode, there can be no dependency on the # result of terminal auto size detection - df = DataFrame('hello', range(1000), range(5)) + df = DataFrame('hello', list(range(1000)), list(range(5))) with option_context('mode.sim_interactive', False, 'display.width', 0, @@ -247,7 +252,7 @@ def mkframe(n): def test_to_string_repr_unicode(self): buf = StringIO() - unicode_values = [u'\u03c3'] * 10 + unicode_values = [six.u('\u03c3')] * 10 unicode_values = np.array(unicode_values, dtype=object) df = DataFrame({'unicode': unicode_values}) df.to_string(col_space=10, buf=buf) @@ -255,7 +260,7 @@ def test_to_string_repr_unicode(self): # it works! repr(df) - idx = Index(['abc', u'\u03c3a', 'aegdvg']) + idx = Index(['abc', six.u('\u03c3a'), 'aegdvg']) ser = Series(np.random.randn(len(idx)), idx) rs = repr(ser).split('\n') line_len = len(rs[0]) @@ -276,7 +281,7 @@ def test_to_string_repr_unicode(self): sys.stdin = _stdin def test_to_string_unicode_columns(self): - df = DataFrame({u'\u03c3': np.arange(10.)}) + df = DataFrame({six.u('\u03c3'): np.arange(10.)}) buf = StringIO() df.to_string(buf=buf) @@ -290,14 +295,14 @@ def test_to_string_unicode_columns(self): self.assert_(isinstance(result, unicode)) def test_to_string_utf8_columns(self): - n = u"\u05d0".encode('utf-8') + n = six.u("\u05d0").encode('utf-8') with option_context('display.max_rows', 1): df = pd.DataFrame([1, 2], columns=[n]) repr(df) def test_to_string_unicode_two(self): - dm = DataFrame({u'c/\u03c3': []}) + dm = DataFrame({six.u('c/\u03c3'): []}) buf = StringIO() dm.to_string(buf) @@ -316,7 +321,7 @@ def test_to_string_with_formatters(self): ('float', lambda x: '[% 4.1f]' % x), ('object', lambda x: '-%s-' % str(x))] result = df.to_string(formatters=dict(formatters)) - result2 = df.to_string(formatters=lzip(*formatters)[1]) + result2 = df.to_string(formatters=list(zip(*formatters))[1]) self.assertEqual(result, (' int float object\n' '0 0x1 [ 1.0] -(1, 2)-\n' '1 0x2 [ 2.0] -True-\n' @@ -324,21 +329,20 @@ def test_to_string_with_formatters(self): self.assertEqual(result, result2) def test_to_string_with_formatters_unicode(self): - df = DataFrame({u'c/\u03c3': [1, 2, 3]}) - result = df.to_string(formatters={u'c/\u03c3': lambda x: '%s' % x}) - self.assertEqual(result, (u' c/\u03c3\n' - '0 1\n' - '1 2\n' - '2 3')) + df = DataFrame({six.u('c/\u03c3'): [1, 2, 3]}) + result = df.to_string(formatters={six.u('c/\u03c3'): + lambda x: '%s' % x}) + self.assertEqual(result, six.u(' c/\u03c3\n') + + '0 1\n1 2\n2 3') def test_to_string_buffer_all_unicode(self): buf = StringIO() - empty = DataFrame({u'c/\u03c3': Series()}) - nonempty = DataFrame({u'c/\u03c3': Series([1, 2, 3])}) + empty = DataFrame({six.u('c/\u03c3'): Series()}) + nonempty = DataFrame({six.u('c/\u03c3'): Series([1, 2, 3])}) - print >>buf, empty - print >>buf, nonempty + print(empty, file=buf) + print(nonempty, file=buf) # this should work buf.getvalue() @@ -376,9 +380,9 @@ def test_to_html_with_empty_string_label(self): def test_to_html_unicode(self): # it works! - df = DataFrame({u'\u03c3': np.arange(10.)}) + df = DataFrame({six.u('\u03c3'): np.arange(10.)}) df.to_html() - df = DataFrame({'A': [u'\u03c3']}) + df = DataFrame({'A': [six.u('\u03c3')]}) df.to_html() def test_to_html_escaped(self): @@ -657,7 +661,7 @@ def test_to_html_multiindex_sparsify(self): def test_to_html_index_formatter(self): df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], - columns=['foo', None], index=range(4)) + columns=['foo', None], index=list(range(4))) f = lambda x: 'abcd'[x] result = df.to_html(formatters={'__index__': f}) @@ -702,8 +706,8 @@ def test_nonunicode_nonascii_alignment(self): self.assert_(len(lines[1]) == len(lines[2])) def test_unicode_problem_decoding_as_ascii(self): - dm = DataFrame({u'c/\u03c3': Series({'test': np.NaN})}) - unicode(dm.to_string()) + dm = DataFrame({six.u('c/\u03c3'): Series({'test': np.NaN})}) + six.text_type(dm.to_string()) def test_string_repr_encoding(self): filepath = tm.get_data_path('unicode_series.csv') @@ -771,17 +775,24 @@ def test_pprint_thing(self): if PY3: raise nose.SkipTest() - self.assertEquals(pp_t('a') , u'a') - self.assertEquals(pp_t(u'a') , u'a') + self.assertEquals(pp_t('a') , six.u('a')) + self.assertEquals(pp_t(six.u('a')) , six.u('a')) self.assertEquals(pp_t(None) , 'None') - self.assertEquals(pp_t(u'\u05d0',quote_strings=True) , u"u'\u05d0'") - self.assertEquals(pp_t(u'\u05d0',quote_strings=False) , u'\u05d0') - self.assertEquals(pp_t((u'\u05d0', u'\u05d1'),quote_strings=True) , - u"(u'\u05d0', u'\u05d1')") - self.assertEquals(pp_t((u'\u05d0', (u'\u05d1', u'\u05d2')),quote_strings=True) , - u"(u'\u05d0', (u'\u05d1', u'\u05d2'))") - self.assertEquals(pp_t(('foo', u'\u05d0', (u'\u05d0', u'\u05d0')),quote_strings=True) - , u"(u'foo', u'\u05d0', (u'\u05d0', u'\u05d0'))") + self.assertEquals(pp_t(six.u('\u05d0'), quote_strings=True), + six.u("u'\u05d0'")) + self.assertEquals(pp_t(six.u('\u05d0'), quote_strings=False), + six.u('\u05d0')) + self.assertEquals(pp_t((six.u('\u05d0'), + six.u('\u05d1')), quote_strings=True), + six.u("(u'\u05d0', u'\u05d1')")) + self.assertEquals(pp_t((six.u('\u05d0'), (six.u('\u05d1'), + six.u('\u05d2'))), + quote_strings=True), + six.u("(u'\u05d0', (u'\u05d1', u'\u05d2'))")) + self.assertEquals(pp_t(('foo', six.u('\u05d0'), (six.u('\u05d0'), + six.u('\u05d0'))), + quote_strings=True), + six.u("(u'foo', u'\u05d0', (u'\u05d0', u'\u05d0'))")) # escape embedded tabs in string # GH #2038 @@ -789,7 +800,7 @@ def test_pprint_thing(self): def test_wide_repr(self): with option_context('mode.sim_interactive', True): - col = lambda l, k: [tm.rands(k) for _ in xrange(l)] + col = lambda l, k: [tm.rands(k) for _ in range(l)] max_cols = get_option('display.max_columns') df = DataFrame([col(max_cols-1, 25) for _ in range(10)]) set_option('display.expand_frame_repr', False) @@ -813,7 +824,7 @@ def test_wide_repr_wide_columns(self): def test_wide_repr_named(self): with option_context('mode.sim_interactive', True): - col = lambda l, k: [tm.rands(k) for _ in xrange(l)] + col = lambda l, k: [tm.rands(k) for _ in range(l)] max_cols = get_option('display.max_columns') df = DataFrame([col(max_cols-1, 25) for _ in range(10)]) df.index.name = 'DataFrame Index' @@ -835,7 +846,7 @@ def test_wide_repr_named(self): def test_wide_repr_multiindex(self): with option_context('mode.sim_interactive', True): - col = lambda l, k: [tm.rands(k) for _ in xrange(l)] + col = lambda l, k: [tm.rands(k) for _ in range(l)] midx = pandas.MultiIndex.from_arrays([np.array(col(10, 5)), np.array(col(10, 5))]) max_cols = get_option('display.max_columns') @@ -860,7 +871,7 @@ def test_wide_repr_multiindex(self): def test_wide_repr_multiindex_cols(self): with option_context('mode.sim_interactive', True): max_cols = get_option('display.max_columns') - col = lambda l, k: [tm.rands(k) for _ in xrange(l)] + col = lambda l, k: [tm.rands(k) for _ in range(l)] midx = pandas.MultiIndex.from_arrays([np.array(col(10, 5)), np.array(col(10, 5))]) mcols = pandas.MultiIndex.from_arrays([np.array(col(max_cols-1, 3)), @@ -882,7 +893,7 @@ def test_wide_repr_multiindex_cols(self): def test_wide_repr_unicode(self): with option_context('mode.sim_interactive', True): - col = lambda l, k: [tm.randu(k) for _ in xrange(l)] + col = lambda l, k: [tm.randu(k) for _ in range(l)] max_cols = get_option('display.max_columns') df = DataFrame([col(max_cols-1, 25) for _ in range(10)]) set_option('display.expand_frame_repr', False) @@ -908,7 +919,7 @@ def test_wide_repr_wide_long_columns(self): def test_long_series(self): n = 1000 - s = Series(np.random.randint(-50,50,n),index=['s%04d' % x for x in xrange(n)], dtype='int64') + s = Series(np.random.randint(-50,50,n),index=['s%04d' % x for x in range(n)], dtype='int64') import re str_rep = str(s) @@ -923,13 +934,13 @@ def test_index_with_nan(self): # multi-index y = df.set_index(['id1', 'id2', 'id3']) result = y.to_string() - expected = u' value\nid1 id2 id3 \n1a3 NaN 78d 123\n9h4 d67 79d 64' + expected = six.u(' value\nid1 id2 id3 \n1a3 NaN 78d 123\n9h4 d67 79d 64') self.assert_(result == expected) # index y = df.set_index('id2') result = y.to_string() - expected = u' id1 id3 value\nid2 \nNaN 1a3 78d 123\nd67 9h4 79d 64' + expected = six.u(' id1 id3 value\nid2 \nNaN 1a3 78d 123\nd67 9h4 79d 64') self.assert_(result == expected) # all-nan in mi @@ -937,7 +948,7 @@ def test_index_with_nan(self): df2.ix[:,'id2'] = np.nan y = df2.set_index('id2') result = y.to_string() - expected = u' id1 id3 value\nid2 \nNaN 1a3 78d 123\nNaN 9h4 79d 64' + expected = six.u(' id1 id3 value\nid2 \nNaN 1a3 78d 123\nNaN 9h4 79d 64') self.assert_(result == expected) # partial nan in mi @@ -945,7 +956,7 @@ def test_index_with_nan(self): df2.ix[:,'id2'] = np.nan y = df2.set_index(['id2','id3']) result = y.to_string() - expected = u' id1 value\nid2 id3 \nNaN 78d 1a3 123\n 79d 9h4 64' + expected = six.u(' id1 value\nid2 id3 \nNaN 78d 1a3 123\n 79d 9h4 64') self.assert_(result == expected) df = DataFrame({'id1': {0: np.nan, 1: '9h4'}, 'id2': {0: np.nan, 1: 'd67'}, @@ -953,7 +964,7 @@ def test_index_with_nan(self): y = df.set_index(['id1','id2','id3']) result = y.to_string() - expected = u' value\nid1 id2 id3 \nNaN NaN NaN 123\n9h4 d67 79d 64' + expected = six.u(' value\nid1 id2 id3 \nNaN NaN NaN 123\n9h4 d67 79d 64') self.assert_(result == expected) def test_to_string(self): @@ -963,7 +974,7 @@ def test_to_string(self): # big mixed biggie = DataFrame({'A': randn(200), 'B': tm.makeStringIndex(200)}, - index=range(200)) + index=list(range(200))) biggie['A'][:20] = nan biggie['B'][:20] = nan @@ -974,7 +985,7 @@ def test_to_string(self): self.assert_(retval is None) self.assertEqual(buf.getvalue(), s) - self.assert_(isinstance(s, basestring)) + tm.assert_isinstance(s, six.string_types) # print in right order result = biggie.to_string(columns=['B', 'A'], col_space=17, @@ -1101,7 +1112,7 @@ def test_to_string_small_float_values(self): def test_to_string_float_index(self): index = Index([1.5, 2, 3, 4, 5]) - df = DataFrame(range(5), index=index) + df = DataFrame(list(range(5)), index=index) result = df.to_string() expected = (' 0\n' @@ -1114,8 +1125,8 @@ def test_to_string_float_index(self): def test_to_string_ascii_error(self): data = [('0 ', - u' .gitignore ', - u' 5 ', + six.u(' .gitignore '), + six.u(' 5 '), ' \xe2\x80\xa2\xe2\x80\xa2\xe2\x80' '\xa2\xe2\x80\xa2\xe2\x80\xa2')] df = DataFrame(data) @@ -1136,7 +1147,7 @@ def test_to_string_int_formatting(self): self.assertEqual(output, expected) def test_to_string_index_formatter(self): - df = DataFrame([range(5), range(5, 10), range(10, 15)]) + df = DataFrame([list(range(5)), list(range(5, 10)), list(range(10, 15))]) rs = df.to_string(formatters={'__index__': lambda x: 'abc'[x]}) @@ -1184,7 +1195,7 @@ def test_to_string_format_na(self): self.assertEqual(result, expected) def test_to_string_line_width(self): - df = pd.DataFrame(123, range(10, 15), range(30)) + df = pd.DataFrame(123, list(range(10, 15)), list(range(30))) s = df.to_string(line_width=80) self.assertEqual(max(len(l) for l in s.split('\n')), 80) @@ -1192,7 +1203,7 @@ def test_to_html(self): # big mixed biggie = DataFrame({'A': randn(200), 'B': tm.makeStringIndex(200)}, - index=range(200)) + index=list(range(200))) biggie['A'][:20] = nan biggie['B'][:20] = nan @@ -1203,7 +1214,7 @@ def test_to_html(self): self.assert_(retval is None) self.assertEqual(buf.getvalue(), s) - self.assert_(isinstance(s, basestring)) + tm.assert_isinstance(s, six.string_types) biggie.to_html(columns=['B', 'A'], col_space=17) biggie.to_html(columns=['B', 'A'], @@ -1219,7 +1230,7 @@ def test_to_html(self): def test_to_html_filename(self): biggie = DataFrame({'A': randn(200), 'B': tm.makeStringIndex(200)}, - index=range(200)) + index=list(range(200))) biggie['A'][:20] = nan biggie['B'][:20] = nan @@ -1246,8 +1257,8 @@ def test_to_html_columns_arg(self): self.assert_('B' not in result) def test_to_html_multiindex(self): - columns = pandas.MultiIndex.from_tuples(zip(np.arange(2).repeat(2), - np.mod(range(4), 2)), + columns = pandas.MultiIndex.from_tuples(list(zip(np.arange(2).repeat(2), + np.mod(list(range(4)), 2))), names=['CL0', 'CL1']) df = pandas.DataFrame([list('abcd'), list('efgh')], columns=columns) result = df.to_html(justify='left') @@ -1286,8 +1297,8 @@ def test_to_html_multiindex(self): self.assertEqual(result, expected) - columns = pandas.MultiIndex.from_tuples(zip(range(4), - np.mod(range(4), 2))) + columns = pandas.MultiIndex.from_tuples(list(zip(range(4), + np.mod(list(range(4)), 2)))) df = pandas.DataFrame([list('abcd'), list('efgh')], columns=columns) result = df.to_html(justify='right') @@ -1538,10 +1549,10 @@ def setUp(self): self.ts = tm.makeTimeSeries() def test_repr_unicode(self): - s = Series([u'\u03c3'] * 10) + s = Series([six.u('\u03c3')] * 10) repr(s) - a = Series([u"\u05d0"] * 1000) + a = Series([six.u("\u05d0")] * 1000) a.name = 'title1' repr(a) @@ -1585,26 +1596,26 @@ def test_freq_name_separation(self): def test_to_string_mixed(self): s = Series(['foo', np.nan, -1.23, 4.56]) result = s.to_string() - expected = (u'0 foo\n' - u'1 NaN\n' - u'2 -1.23\n' - u'3 4.56') + expected = (six.u('0 foo\n') + + six.u('1 NaN\n') + + six.u('2 -1.23\n') + + six.u('3 4.56')) self.assertEqual(result, expected) # but don't count NAs as floats s = Series(['foo', np.nan, 'bar', 'baz']) result = s.to_string() - expected = (u'0 foo\n' - '1 NaN\n' - '2 bar\n' + expected = (six.u('0 foo\n') + + '1 NaN\n' + + '2 bar\n' + '3 baz') self.assertEqual(result, expected) s = Series(['foo', 5, 'bar', 'baz']) result = s.to_string() - expected = (u'0 foo\n' - '1 5\n' - '2 bar\n' + expected = (six.u('0 foo\n') + + '1 5\n' + + '2 bar\n' + '3 baz') self.assertEqual(result, expected) @@ -1613,16 +1624,16 @@ def test_to_string_float_na_spacing(self): s[::2] = np.nan result = s.to_string() - expected = (u'0 NaN\n' - '1 1.5678\n' - '2 NaN\n' - '3 -3.0000\n' + expected = (six.u('0 NaN\n') + + '1 1.5678\n' + + '2 NaN\n' + + '3 -3.0000\n' + '4 NaN') self.assertEqual(result, expected) def test_unicode_name_in_footer(self): - s = Series([1, 2], name=u'\u05e2\u05d1\u05e8\u05d9\u05ea') - sf = fmt.SeriesFormatter(s, name=u'\u05e2\u05d1\u05e8\u05d9\u05ea') + s = Series([1, 2], name=six.u('\u05e2\u05d1\u05e8\u05d9\u05ea')) + sf = fmt.SeriesFormatter(s, name=six.u('\u05e2\u05d1\u05e8\u05d9\u05ea')) sf._get_footer() # should not raise exception def test_float_trim_zeros(self): @@ -1916,7 +1927,7 @@ def test_rounding(self): formatter = fmt.EngFormatter(accuracy=3, use_eng_prefix=True) result = formatter(0) - self.assertEqual(result, u' 0.000') + self.assertEqual(result, six.u(' 0.000')) def _three_digit_exp(): diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 577cbfe9dc744..a3a799279f4fb 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -1,7 +1,10 @@ +from __future__ import print_function # pylint: disable-msg=W0612,E1101 from copy import deepcopy from datetime import datetime, timedelta, time -from StringIO import StringIO +from pandas.util.py3compat import StringIO +from pandas.util.py3compat import range, long +from pandas.util import compat import cPickle as pickle import operator import re @@ -39,6 +42,9 @@ import pandas.lib as lib from numpy.testing.decorators import slow +import six +from six.moves import map +from six.moves import zip def _skip_if_no_scipy(): try: @@ -58,7 +64,7 @@ def _check_mixed_float(df, dtype = None): # float16 are most likely to be upcasted to float32 dtypes = dict(A = 'float32', B = 'float32', C = 'float16', D = 'float64') - if isinstance(dtype, basestring): + if isinstance(dtype, six.string_types): dtypes = dict([ (k,dtype) for k, v in dtypes.items() ]) elif isinstance(dtype, dict): dtypes.update(dtype) @@ -73,7 +79,7 @@ def _check_mixed_float(df, dtype = None): def _check_mixed_int(df, dtype = None): dtypes = dict(A = 'int32', B = 'uint64', C = 'uint8', D = 'int64') - if isinstance(dtype, basestring): + if isinstance(dtype, six.string_types): dtypes = dict([ (k,dtype) for k, v in dtypes.items() ]) elif isinstance(dtype, dict): dtypes.update(dtype) @@ -172,7 +178,7 @@ def test_setitem_list(self): assert_series_equal(self.frame['B'], data['A']) assert_series_equal(self.frame['A'], data['B']) - df = DataFrame(0, range(3), ['tt1', 'tt2'], dtype=np.int_) + df = DataFrame(0, list(range(3)), ['tt1', 'tt2'], dtype=np.int_) df.ix[1, ['tt1', 'tt2']] = [1, 2] result = df.ix[1, ['tt1', 'tt2']] @@ -191,7 +197,7 @@ def test_setitem_list_not_dataframe(self): assert_almost_equal(self.frame[['A', 'B']].values, data) def test_setitem_list_of_tuples(self): - tuples = zip(self.frame['A'], self.frame['B']) + tuples = list(zip(self.frame['A'], self.frame['B'])) self.frame['tuples'] = tuples result = self.frame['tuples'] @@ -357,7 +363,7 @@ def test_getattr(self): 'NONEXISTENT_NAME') def test_setattr_column(self): - df = DataFrame({'foobar': 1}, index=range(10)) + df = DataFrame({'foobar': 1}, index=list(range(10))) df.foobar = 5 self.assert_((df.foobar == 5).all()) @@ -561,11 +567,11 @@ def test_setitem_ambig(self): from decimal import Decimal # created as float type - dm = DataFrame(index=range(3), columns=range(3)) + dm = DataFrame(index=list(range(3)), columns=list(range(3))) coercable_series = Series([Decimal(1) for _ in range(3)], - index=range(3)) - uncoercable_series = Series(['foo', 'bzr', 'baz'], index=range(3)) + index=list(range(3))) + uncoercable_series = Series(['foo', 'bzr', 'baz'], index=list(range(3))) dm[0] = np.ones(3) self.assertEqual(len(dm.columns), 3) @@ -663,7 +669,7 @@ def test_getitem_fancy_slice_integers_step(self): self.assert_(isnull(df.ix[:8:2]).values.all()) def test_getitem_setitem_integer_slice_keyerrors(self): - df = DataFrame(np.random.randn(10, 5), index=range(0, 20, 2)) + df = DataFrame(np.random.randn(10, 5), index=list(range(0, 20, 2))) # this is OK cp = df.copy() @@ -776,11 +782,12 @@ def test_setitem_fancy_2d(self): assert_frame_equal(frame, expected) # new corner case of boolean slicing / setting - frame = DataFrame(zip([2, 3, 9, 6, 7], [np.nan] * 5), + frame = DataFrame(list(zip([2, 3, 9, 6, 7], [np.nan] * 5)), columns=['a', 'b']) lst = [100] lst.extend([np.nan] * 4) - expected = DataFrame(zip([100, 3, 9, 6, 7], lst), columns=['a', 'b']) + expected = DataFrame(list(zip([100, 3, 9, 6, 7], lst)), + columns=['a', 'b']) frame[frame['a'] == 2] = 100 assert_frame_equal(frame, expected) @@ -1486,7 +1493,7 @@ def test_set_value_resize(self): self.assertRaises(ValueError, res3.set_value, 'foobar', 'baz', 'sam') def test_set_value_with_index_dtype_change(self): - df = DataFrame(randn(3, 3), index=range(3), columns=list('ABC')) + df = DataFrame(randn(3, 3), index=list(range(3)), columns=list('ABC')) res = df.set_value('C', 2, 1.0) self.assert_(list(res.index) == list(df.index) + ['C']) self.assert_(list(res.columns) == list(df.columns) + [2]) @@ -1494,7 +1501,7 @@ def test_set_value_with_index_dtype_change(self): def test_get_set_value_no_partial_indexing(self): # partial w/ MultiIndex raise exception index = MultiIndex.from_tuples([(0, 1), (0, 2), (1, 1), (1, 2)]) - df = DataFrame(index=index, columns=range(4)) + df = DataFrame(index=index, columns=list(range(4))) self.assertRaises(KeyError, df.get_value, 0, 1) # self.assertRaises(KeyError, df.set_value, 0, 1, 0) @@ -1507,7 +1514,7 @@ def test_single_element_ix_dont_upcast(self): self.assert_(com.is_integer(result)) def test_irow(self): - df = DataFrame(np.random.randn(10, 4), index=range(0, 20, 2)) + df = DataFrame(np.random.randn(10, 4), index=list(range(0, 20, 2))) result = df.irow(1) exp = df.ix[2] @@ -1534,7 +1541,7 @@ def test_irow(self): assert_frame_equal(result, expected) def test_icol(self): - df = DataFrame(np.random.randn(4, 10), columns=range(0, 20, 2)) + df = DataFrame(np.random.randn(4, 10), columns=list(range(0, 20, 2))) result = df.icol(1) exp = df.ix[:, 2] @@ -1621,7 +1628,7 @@ def test_nested_exception(self): try: repr(df) - except Exception, e: + except Exception as e: self.assertNotEqual(type(e), UnboundLocalError) _seriesd = tm.getSeriesData() @@ -2066,7 +2073,7 @@ def test_constructor_list_frames(self): result = DataFrame([DataFrame([])]) self.assert_(result.shape == (1,0)) - result = DataFrame([DataFrame(dict(A = range(5)))]) + result = DataFrame([DataFrame(dict(A = list(range(5))))]) self.assert_(type(result.iloc[0,0]) == DataFrame) def test_constructor_mixed_dtypes(self): @@ -2080,7 +2087,7 @@ def _make_mixed_dtypes_df(typ, ad = None): dtypes = MIXED_FLOAT_DTYPES arrays = [ np.array(np.random.randint(10, size=10), dtype = d) for d in dtypes ] - zipper = zip(dtypes,arrays) + zipper = list(zip(dtypes,arrays)) for d,a in zipper: assert(a.dtype == d) if ad is None: @@ -2141,8 +2148,8 @@ def test_constructor_overflow_int64(self): # #2355 data_scores = [(6311132704823138710, 273), (2685045978526272070, 23), - (8921811264899370420, 45), (17019687244989530680L, 270), - (9930107427299601010L, 273)] + (8921811264899370420, 45), (long(17019687244989530680), 270), + (long(9930107427299601010), 273)] dtype = [('uid', 'u8'), ('score', 'u8')] data = np.zeros((len(data_scores),), dtype=dtype) data[:] = data_scores @@ -2156,7 +2163,7 @@ def test_is_mixed_type(self): def test_constructor_ordereddict(self): import random nitems = 100 - nums = range(nitems) + nums = list(range(nitems)) random.shuffle(nums) expected = ['A%d' % i for i in nums] df = DataFrame(OrderedDict(zip(expected, [[0]] * nitems))) @@ -2251,8 +2258,8 @@ def testit(): def test_constructor_subclass_dict(self): # Test for passing dict subclass to constructor - data = {'col1': tm.TestSubDict((x, 10.0 * x) for x in xrange(10)), - 'col2': tm.TestSubDict((x, 20.0 * x) for x in xrange(10))} + data = {'col1': tm.TestSubDict((x, 10.0 * x) for x in range(10)), + 'col2': tm.TestSubDict((x, 20.0 * x) for x in range(10))} df = DataFrame(data) refdf = DataFrame(dict((col, dict(val.iteritems())) for col, val in data.iteritems())) @@ -2266,7 +2273,7 @@ def test_constructor_subclass_dict(self): from collections import defaultdict data = {} self.frame['B'][:10] = np.nan - for k, v in self.frame.iterkv(): + for k, v in self.frame.iteritems(): dct = defaultdict(dict) dct.update(v.to_dict()) data[k] = dct @@ -2356,14 +2363,14 @@ def test_constructor_ndarray(self): # automatic labeling frame = DataFrame(mat) - self.assert_(np.array_equal(frame.index, range(2))) - self.assert_(np.array_equal(frame.columns, range(3))) + self.assert_(np.array_equal(frame.index, list(range(2)))) + self.assert_(np.array_equal(frame.columns, list(range(3)))) frame = DataFrame(mat, index=[1, 2]) - self.assert_(np.array_equal(frame.columns, range(3))) + self.assert_(np.array_equal(frame.columns, list(range(3)))) frame = DataFrame(mat, columns=['A', 'B', 'C']) - self.assert_(np.array_equal(frame.index, range(2))) + self.assert_(np.array_equal(frame.index, list(range(2)))) # 0-length axis frame = DataFrame(np.empty((0, 3))) @@ -2414,14 +2421,14 @@ def test_constructor_maskedarray(self): # automatic labeling frame = DataFrame(mat) - self.assert_(np.array_equal(frame.index, range(2))) - self.assert_(np.array_equal(frame.columns, range(3))) + self.assert_(np.array_equal(frame.index, list(range(2)))) + self.assert_(np.array_equal(frame.columns, list(range(3)))) frame = DataFrame(mat, index=[1, 2]) - self.assert_(np.array_equal(frame.columns, range(3))) + self.assert_(np.array_equal(frame.columns, list(range(3)))) frame = DataFrame(mat, columns=['A', 'B', 'C']) - self.assert_(np.array_equal(frame.index, range(2))) + self.assert_(np.array_equal(frame.index, list(range(2)))) # 0-length axis frame = DataFrame(ma.masked_all((0, 3))) @@ -2502,11 +2509,11 @@ def test_constructor_corner(self): self.assertEqual(df.values.shape, (0, 0)) # empty but with specified dtype - df = DataFrame(index=range(10), columns=['a', 'b'], dtype=object) + df = DataFrame(index=list(range(10)), columns=['a', 'b'], dtype=object) self.assert_(df.values.dtype == np.object_) # does not error but ends up float - df = DataFrame(index=range(10), columns=['a', 'b'], dtype=int) + df = DataFrame(index=list(range(10)), columns=['a', 'b'], dtype=int) self.assert_(df.values.dtype == np.object_) # #1783 empty dtype object @@ -2680,7 +2687,7 @@ def test_constructor_ragged(self): self.assertRaises(Exception, DataFrame, data) def test_constructor_scalar(self): - idx = Index(range(3)) + idx = Index(list(range(3))) df = DataFrame({"a": 0}, index=idx) expected = DataFrame({"a": [0, 0, 0]}, index=idx) assert_frame_equal(df, expected, check_dtype=False) @@ -2849,7 +2856,7 @@ def check(result, expected=None): # assignment # GH 3687 arr = np.random.randn(3, 2) - idx = range(2) + idx = list(range(2)) df = DataFrame(arr, columns=['A', 'A']) df.columns = idx expected = DataFrame(arr,columns=idx) @@ -2950,11 +2957,11 @@ def test_insert_benchmark(self): # from the vb_suite/frame_methods/frame_insert_columns N = 10 K = 5 - df = DataFrame(index=range(N)) + df = DataFrame(index=list(range(N))) new_col = np.random.randn(N) for i in range(K): df[i] = new_col - expected = DataFrame(np.repeat(new_col,K).reshape(N,K),index=range(N)) + expected = DataFrame(np.repeat(new_col,K).reshape(N,K),index=list(range(N))) assert_frame_equal(df,expected) def test_constructor_single_value(self): @@ -3090,12 +3097,12 @@ def test_constructor_for_list_with_dtypes(self): expected = Series({'float64' : 1}) assert_series_equal(result, expected) - df = DataFrame({'a' : 1 }, index=range(3)) + df = DataFrame({'a' : 1 }, index=list(range(3))) result = df.get_dtype_counts() expected = Series({'int64': 1}) assert_series_equal(result, expected) - df = DataFrame({'a' : 1. }, index=range(3)) + df = DataFrame({'a' : 1. }, index=list(range(3))) result = df.get_dtype_counts() expected = Series({'float64': 1 }) assert_series_equal(result, expected) @@ -3200,7 +3207,7 @@ def test_operators_timedelta64(self): def test__slice_consolidate_invalidate_item_cache(self): # #3970 - df = DataFrame({ "aa":range(5), "bb":[2.2]*5}) + df = DataFrame({ "aa":list(range(5)), "bb":[2.2]*5}) # Creates a second float block df["cc"] = 0.0 @@ -3573,7 +3580,7 @@ def test_join_str_datetime(self): str_dates = ['20120209', '20120222'] dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] - A = DataFrame(str_dates, index=range(2), columns=['aa']) + A = DataFrame(str_dates, index=list(range(2)), columns=['aa']) C = DataFrame([[1, 2], [3, 4]], index=str_dates, columns=dt_dates) tst = A.join(C, on='aa') @@ -3598,7 +3605,7 @@ def test_from_records_sequencelike(self): for dtype, b in blocks.iteritems(): columns.extend(b.columns) dtypes.extend([ (c,np.dtype(dtype).descr[0][1]) for c in b.columns ]) - for i in xrange(len(df.index)): + for i in range(len(df.index)): tup = [] for _, b in blocks.iteritems(): tup.extend(b.irow(i).values) @@ -3625,12 +3632,12 @@ def test_from_records_sequencelike(self): # tuples is in the order of the columns result = DataFrame.from_records(tuples) - self.assert_(np.array_equal(result.columns, range(8))) + self.assert_(np.array_equal(result.columns, list(range(8)))) # test exclude parameter & we are casting the results here (as we don't have dtype info to recover) columns_to_test = [ columns.index('C'), columns.index('E1') ] - exclude = list(set(xrange(8))-set(columns_to_test)) + exclude = list(set(range(8))-set(columns_to_test)) result = DataFrame.from_records(tuples, exclude=exclude) result.columns = [ columns[i] for i in sorted(columns_to_test) ] assert_series_equal(result['C'], df['C']) @@ -3708,7 +3715,7 @@ def __iter__(self): return iter(self.args) recs = [Record(1, 2, 3), Record(4, 5, 6), Record(7, 8, 9)] - tups = map(tuple, recs) + tups = list(map(tuple, recs)) result = DataFrame.from_records(recs) expected = DataFrame.from_records(tups) @@ -3767,7 +3774,7 @@ def test_repr_mixed_big(self): # big mixed biggie = DataFrame({'A': randn(200), 'B': tm.makeStringIndex(200)}, - index=range(200)) + index=list(range(200))) biggie['A'][:20] = nan biggie['B'][:20] = nan @@ -3803,8 +3810,8 @@ def test_repr_big(self): buf = StringIO() # big one - biggie = DataFrame(np.zeros((200, 4)), columns=range(4), - index=range(200)) + biggie = DataFrame(np.zeros((200, 4)), columns=list(range(4)), + index=list(range(200))) foo = repr(biggie) def test_repr_unsortable(self): @@ -3837,7 +3844,7 @@ def test_repr_unsortable(self): warnings.filters = warn_filters def test_repr_unicode(self): - uval = u'\u03c3\u03c3\u03c3\u03c3' + uval = six.u('\u03c3\u03c3\u03c3\u03c3') bval = uval.encode('utf-8') df = DataFrame({'A': [uval, uval]}) @@ -3850,15 +3857,15 @@ def test_repr_unicode(self): self.assertEqual(result.split('\n')[0].rstrip(), ex_top) def test_unicode_string_with_unicode(self): - df = DataFrame({'A': [u"\u05d0"]}) + df = DataFrame({'A': [six.u("\u05d0")]}) if py3compat.PY3: str(df) else: - unicode(df) + six.text_type(df) def test_bytestring_with_unicode(self): - df = DataFrame({'A': [u"\u05d0"]}) + df = DataFrame({'A': [six.u("\u05d0")]}) if py3compat.PY3: bytes(df) else: @@ -3866,7 +3873,7 @@ def test_bytestring_with_unicode(self): def test_very_wide_info_repr(self): df = DataFrame(np.random.randn(10, 20), - columns=[tm.rands(10) for _ in xrange(20)]) + columns=[tm.rands(10) for _ in range(20)]) repr(df) def test_repr_column_name_unicode_truncation_bug(self): @@ -3971,7 +3978,7 @@ def test_itertuples(self): assert_series_equal(s, expected) df = DataFrame({'floats': np.random.randn(5), - 'ints': range(5)}, columns=['floats', 'ints']) + 'ints': list(range(5))}, columns=['floats', 'ints']) for tup in df.itertuples(index=False): self.assert_(isinstance(tup[1], np.integer)) @@ -4636,7 +4643,7 @@ def test_string_comparison(self): assert_frame_equal(df[-mask_b], df.ix[1:1, :]) def test_float_none_comparison(self): - df = DataFrame(np.random.randn(8, 3), index=range(8), + df = DataFrame(np.random.randn(8, 3), index=list(range(8)), columns=['A', 'B', 'C']) self.assertRaises(TypeError, df.__eq__, None) @@ -4679,8 +4686,8 @@ def test_to_csv_from_csv(self): assert_almost_equal(self.tsframe.values, recons.values) # corner case - dm = DataFrame({'s1': Series(range(3), range(3)), - 's2': Series(range(2), range(2))}) + dm = DataFrame({'s1': Series(list(range(3)), list(range(3))), + 's2': Series(list(range(2)), list(range(2)))}) dm.to_csv(path) recons = DataFrame.from_csv(path) assert_frame_equal(dm, recons) @@ -4723,8 +4730,8 @@ def test_to_csv_from_csv(self): df2.to_csv(path,mode='a',header=False) xp = pd.concat([df1,df2]) rs = pd.read_csv(path,index_col=0) - rs.columns = map(int,rs.columns) - xp.columns = map(int,xp.columns) + rs.columns = list(map(int,rs.columns)) + xp.columns = list(map(int,xp.columns)) assert_frame_equal(xp,rs) def test_to_csv_cols_reordering(self): @@ -4807,10 +4814,10 @@ def _do_test(df,path,r_dtype=None,c_dtype=None,rnlvl=None,cnlvl=None, dupe_col=False): if cnlvl: - header = range(cnlvl) + header = list(range(cnlvl)) with ensure_clean(path) as path: df.to_csv(path,encoding='utf8',chunksize=chunksize,tupleize_cols=False) - recons = DataFrame.from_csv(path,header=range(cnlvl),tupleize_cols=False,parse_dates=False) + recons = DataFrame.from_csv(path,header=list(range(cnlvl)),tupleize_cols=False,parse_dates=False) else: with ensure_clean(path) as path: df.to_csv(path,encoding='utf8',chunksize=chunksize) @@ -4834,19 +4841,22 @@ def _to_uni(x): if r_dtype: if r_dtype == 'u': # unicode r_dtype='O' - recons.index = np.array(map(_to_uni,recons.index), - dtype=r_dtype ) - df.index = np.array(map(_to_uni,df.index),dtype=r_dtype ) + recons.index = np.array(list(map(_to_uni,recons.index)), + dtype=r_dtype) + df.index = np.array(list(map(_to_uni,df.index)),dtype=r_dtype) if r_dtype == 'dt': # unicode r_dtype='O' - recons.index = np.array(map(Timestamp,recons.index), - dtype=r_dtype ) - df.index = np.array(map(Timestamp,df.index),dtype=r_dtype ) + recons.index = np.array(list(map(Timestamp,recons.index)), + dtype=r_dtype) + df.index = np.array(list(map(Timestamp,df.index)),dtype=r_dtype) elif r_dtype == 'p': r_dtype='O' - recons.index = np.array(map(Timestamp,recons.index.to_datetime()), - dtype=r_dtype ) - df.index = np.array(map(Timestamp,df.index.to_datetime()),dtype=r_dtype ) + recons.index = np.array(list(map(Timestamp, + recons.index.to_datetime())), + dtype=r_dtype) + df.index = np.array(list(map(Timestamp, + df.index.to_datetime())), + dtype=r_dtype) else: r_dtype= type_map.get(r_dtype) recons.index = np.array(recons.index,dtype=r_dtype ) @@ -4854,19 +4864,19 @@ def _to_uni(x): if c_dtype: if c_dtype == 'u': c_dtype='O' - recons.columns = np.array(map(_to_uni,recons.columns), - dtype=c_dtype ) - df.columns = np.array(map(_to_uni,df.columns),dtype=c_dtype ) + recons.columns = np.array(list(map(_to_uni,recons.columns)), + dtype=c_dtype) + df.columns = np.array(list(map(_to_uni,df.columns)),dtype=c_dtype ) elif c_dtype == 'dt': c_dtype='O' - recons.columns = np.array(map(Timestamp,recons.columns), + recons.columns = np.array(list(map(Timestamp,recons.columns)), dtype=c_dtype ) - df.columns = np.array(map(Timestamp,df.columns),dtype=c_dtype ) + df.columns = np.array(list(map(Timestamp,df.columns)),dtype=c_dtype) elif c_dtype == 'p': c_dtype='O' - recons.columns = np.array(map(Timestamp,recons.columns.to_datetime()), - dtype=c_dtype ) - df.columns = np.array(map(Timestamp,df.columns.to_datetime()),dtype=c_dtype ) + recons.columns = np.array(list(map(Timestamp,recons.columns.to_datetime())), + dtype=c_dtype) + df.columns = np.array(list(map(Timestamp,df.columns.to_datetime())),dtype=c_dtype ) else: c_dtype= type_map.get(c_dtype) recons.columns = np.array(recons.columns,dtype=c_dtype ) @@ -4947,7 +4957,7 @@ def make_dtnat_arr(n,nnat=None): _do_test(df,path,dupe_col=True) - _do_test(DataFrame(index=range(10)),path) + _do_test(DataFrame(index=list(range(10))),path) _do_test(mkdf(chunksize//2+1, 2,r_idx_nlevels=2),path,rnlvl=2) for ncols in [2,3,4]: base = int(chunksize//ncols) @@ -5123,15 +5133,15 @@ def _make_frame(names=None): # catch invalid headers def testit(): - read_csv(path,tupleize_cols=False,header=range(3),index_col=0) + read_csv(path,tupleize_cols=False,header=list(range(3)),index_col=0) assertRaisesRegexp(CParserError, 'Passed header=\[0,1,2\] are too many rows for this multi_index of columns', testit) def testit(): - read_csv(path,tupleize_cols=False,header=range(7),index_col=0) + read_csv(path,tupleize_cols=False,header=list(range(7)),index_col=0) assertRaisesRegexp(CParserError, 'Passed header=\[0,1,2,3,4,5,6\], len of 7, but only 6 lines in file', testit) for i in [3,4,5,6,7]: - self.assertRaises(Exception, read_csv, path, tupleize_cols=False, header=range(i), index_col=0) + self.assertRaises(Exception, read_csv, path, tupleize_cols=False, header=list(range(i)), index_col=0) self.assertRaises(Exception, read_csv, path, tupleize_cols=False, header=[0,2], index_col=0) # write with cols @@ -5171,7 +5181,7 @@ def test_to_csv_withcommas(self): def test_to_csv_mixed(self): def create_cols(name): - return [ "%s%03d" % (name,i) for i in xrange(5) ] + return [ "%s%03d" % (name,i) for i in range(5) ] df_float = DataFrame(np.random.randn(100, 5),dtype='float64',columns=create_cols('float')) df_int = DataFrame(np.random.randn(100, 5),dtype='int64',columns=create_cols('int')) @@ -5200,7 +5210,7 @@ def create_cols(name): def test_to_csv_dups_cols(self): - df = DataFrame(np.random.randn(1000, 30),columns=range(15)+range(15),dtype='float64') + df = DataFrame(np.random.randn(1000, 30),columns=list(range(15))+list(range(15)),dtype='float64') with ensure_clean() as filename: df.to_csv(filename) # single dtype, fine @@ -5210,9 +5220,9 @@ def test_to_csv_dups_cols(self): df_float = DataFrame(np.random.randn(1000, 3),dtype='float64') df_int = DataFrame(np.random.randn(1000, 3),dtype='int64') - df_bool = DataFrame(True,index=df_float.index,columns=range(3)) - df_object = DataFrame('foo',index=df_float.index,columns=range(3)) - df_dt = DataFrame(Timestamp('20010101'),index=df_float.index,columns=range(3)) + df_bool = DataFrame(True,index=df_float.index,columns=list(range(3))) + df_object = DataFrame('foo',index=df_float.index,columns=list(range(3))) + df_dt = DataFrame(Timestamp('20010101'),index=df_float.index,columns=list(range(3))) df = pan.concat([ df_float, df_int, df_bool, df_object, df_dt ], axis=1, ignore_index=True) cols = [] @@ -5249,7 +5259,7 @@ def test_to_csv_dups_cols(self): def test_to_csv_chunking(self): - aa=DataFrame({'A':range(100000)}) + aa=DataFrame({'A':list(range(100000))}) aa['B'] = aa.A + 1.0 aa['C'] = aa.A + 2.0 aa['D'] = aa.A + 3.0 @@ -5273,7 +5283,7 @@ def test_to_csv_bug(self): def test_to_csv_unicode(self): - df = DataFrame({u'c/\u03c3': [1, 2, 3]}) + df = DataFrame({six.u('c/\u03c3'): [1, 2, 3]}) with ensure_clean() as path: df.to_csv(path, encoding='UTF-8') @@ -5287,10 +5297,10 @@ def test_to_csv_unicode(self): def test_to_csv_unicode_index_col(self): buf = StringIO('') df = DataFrame( - [[u"\u05d0", "d2", "d3", "d4"], ["a1", "a2", "a3", "a4"]], - columns=[u"\u05d0", - u"\u05d1", u"\u05d2", u"\u05d3"], - index=[u"\u05d0", u"\u05d1"]) + [[six.u("\u05d0"), "d2", "d3", "d4"], ["a1", "a2", "a3", "a4"]], + columns=[six.u("\u05d0"), + six.u("\u05d1"), six.u("\u05d2"), six.u("\u05d3")], + index=[six.u("\u05d0"), six.u("\u05d1")]) df.to_csv(buf, encoding='UTF-8') buf.seek(0) @@ -5586,7 +5596,7 @@ def test_asfreq(self): def test_asfreq_datetimeindex(self): df = DataFrame({'A': [1, 2, 3]}, - index=[datetime(2011, 11, 01), datetime(2011, 11, 2), + index=[datetime(2011, 11, 1), datetime(2011, 11, 2), datetime(2011, 11, 3)]) df = df.asfreq('B') self.assert_(isinstance(df.index, DatetimeIndex)) @@ -5929,7 +5939,7 @@ def test_dropna(self): assert_frame_equal(dropped, expected) dropped = df.dropna(axis=0) - expected = df.ix[range(2, 6)] + expected = df.ix[list(range(2, 6))] assert_frame_equal(dropped, expected) # threshold @@ -5938,7 +5948,7 @@ def test_dropna(self): assert_frame_equal(dropped, expected) dropped = df.dropna(axis=0, thresh=4) - expected = df.ix[range(2, 6)] + expected = df.ix[list(range(2, 6))] assert_frame_equal(dropped, expected) dropped = df.dropna(axis=1, thresh=4) @@ -5984,7 +5994,7 @@ def test_drop_duplicates(self): 'B': ['one', 'one', 'two', 'two', 'two', 'two', 'one', 'two'], 'C': [1, 1, 2, 2, 2, 2, 1, 2], - 'D': range(8)}) + 'D': list(range(8))}) # single column result = df.drop_duplicates('AAA') @@ -6024,7 +6034,7 @@ def test_drop_duplicates_tuple(self): 'B': ['one', 'one', 'two', 'two', 'two', 'two', 'one', 'two'], 'C': [1, 1, 2, 2, 2, 2, 1, 2], - 'D': range(8)}) + 'D': list(range(8))}) # single column result = df.drop_duplicates(('AA', 'AB')) @@ -6047,7 +6057,7 @@ def test_drop_duplicates_NA(self): 'B': ['one', 'one', 'two', 'two', 'two', 'two', 'one', 'two'], 'C': [1.0, np.nan, np.nan, np.nan, 1., 1., 1, 1.], - 'D': range(8)}) + 'D': list(range(8))}) # single column result = df.drop_duplicates('A') @@ -6073,7 +6083,7 @@ def test_drop_duplicates_NA(self): 'B': ['one', 'one', 'two', 'two', 'two', 'two', 'one', 'two'], 'C': [1.0, np.nan, np.nan, np.nan, 1., 1., 1, 1.], - 'D': range(8)}) + 'D': list(range(8))}) # single column result = df.drop_duplicates('C') @@ -6099,7 +6109,7 @@ def test_drop_duplicates_inplace(self): 'B': ['one', 'one', 'two', 'two', 'two', 'two', 'one', 'two'], 'C': [1, 1, 2, 2, 2, 2, 1, 2], - 'D': range(8)}) + 'D': list(range(8))}) # single column df = orig.copy() @@ -6148,7 +6158,7 @@ def test_drop_col_still_multiindex(self): ['', '', '', 'OD'], ['', '', '', 'wx']] - tuples = zip(*arrays) + tuples = list(zip(*arrays)) tuples.sort() index = MultiIndex.from_tuples(tuples) @@ -6271,7 +6281,7 @@ def test_fillna_columns(self): def test_fillna_invalid_method(self): try: self.frame.fillna(method='ffil') - except ValueError, inst: + except ValueError as inst: self.assert_('ffil' in str(inst)) def test_fillna_invalid_value(self): @@ -6305,7 +6315,7 @@ def test_replace_inplace(self): def test_regex_replace_scalar(self): obj = {'a': list('ab..'), 'b': list('efgh')} dfobj = DataFrame(obj) - mix = {'a': range(4), 'b': list('ab..')} + mix = {'a': list(range(4)), 'b': list('ab..')} dfmix = DataFrame(mix) ### simplest cases @@ -6371,7 +6381,7 @@ def test_regex_replace_scalar(self): def test_regex_replace_scalar_inplace(self): obj = {'a': list('ab..'), 'b': list('efgh')} dfobj = DataFrame(obj) - mix = {'a': range(4), 'b': list('ab..')} + mix = {'a': list(range(4)), 'b': list('ab..')} dfmix = DataFrame(mix) ### simplest cases @@ -6579,14 +6589,14 @@ def test_regex_replace_list_obj_inplace(self): def test_regex_replace_list_mixed(self): ## mixed frame to make sure this doesn't break things - mix = {'a': range(4), 'b': list('ab..')} + mix = {'a': list(range(4)), 'b': list('ab..')} dfmix = DataFrame(mix) ## lists of regexes and values # list of [re1, re2, ..., reN] -> [v1, v2, ..., vN] to_replace_res = [r'\s*\.\s*', r'a'] values = [nan, 'crap'] - mix2 = {'a': range(4), 'b': list('ab..'), 'c': list('halo')} + mix2 = {'a': list(range(4)), 'b': list('ab..'), 'c': list('halo')} dfmix2 = DataFrame(mix2) res = dfmix2.replace(to_replace_res, values, regex=True) expec = DataFrame({'a': mix2['a'], 'b': ['crap', 'b', nan, nan], @@ -6617,7 +6627,7 @@ def test_regex_replace_list_mixed(self): assert_frame_equal(res, expec) def test_regex_replace_list_mixed_inplace(self): - mix = {'a': range(4), 'b': list('ab..')} + mix = {'a': list(range(4)), 'b': list('ab..')} dfmix = DataFrame(mix) # the same inplace ## lists of regexes and values @@ -6656,7 +6666,7 @@ def test_regex_replace_list_mixed_inplace(self): assert_frame_equal(res, expec) def test_regex_replace_dict_mixed(self): - mix = {'a': range(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} + mix = {'a': list(range(4)), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} dfmix = DataFrame(mix) ## dicts @@ -6713,7 +6723,7 @@ def test_regex_replace_dict_mixed(self): def test_regex_replace_dict_nested(self): # nested dicts will not work until this is implemented for Series - mix = {'a': range(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} + mix = {'a': list(range(4)), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} dfmix = DataFrame(mix) res = dfmix.replace({'b': {r'\s*\.\s*': nan}}, regex=True) res2 = dfmix.copy() @@ -6734,7 +6744,7 @@ def test_regex_replace_dict_nested_gh4115(self): assert_frame_equal(df.replace({'Type': {'Q':0,'T':1}}), expected) def test_regex_replace_list_to_scalar(self): - mix = {'a': range(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} + mix = {'a': list(range(4)), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} df = DataFrame(mix) res = df.replace([r'\s*\.\s*', 'a|b'], nan, regex=True) res2 = df.copy() @@ -6749,7 +6759,7 @@ def test_regex_replace_list_to_scalar(self): def test_regex_replace_str_to_numeric(self): # what happens when you try to replace a numeric value with a regex? - mix = {'a': range(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} + mix = {'a': list(range(4)), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} df = DataFrame(mix) res = df.replace(r'\s*\.\s*', 0, regex=True) res2 = df.copy() @@ -6763,7 +6773,7 @@ def test_regex_replace_str_to_numeric(self): assert_frame_equal(res3, expec) def test_regex_replace_regex_list_to_numeric(self): - mix = {'a': range(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} + mix = {'a': list(range(4)), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} df = DataFrame(mix) res = df.replace([r'\s*\.\s*', 'b'], 0, regex=True) res2 = df.copy() @@ -6778,7 +6788,7 @@ def test_regex_replace_regex_list_to_numeric(self): assert_frame_equal(res3, expec) def test_regex_replace_series_of_regexes(self): - mix = {'a': range(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} + mix = {'a': list(range(4)), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} df = DataFrame(mix) s1 = Series({'b': r'\s*\.\s*'}) s2 = Series({'b': nan}) @@ -6794,7 +6804,7 @@ def test_regex_replace_series_of_regexes(self): assert_frame_equal(res3, expec) def test_regex_replace_numeric_to_object_conversion(self): - mix = {'a': range(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} + mix = {'a': list(range(4)), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} df = DataFrame(mix) res = df.replace(0, 'a') expec = DataFrame({'a': ['a', 1, 2, 3], 'b': mix['b'], 'c': mix['c']}) @@ -7335,42 +7345,42 @@ def test_reindex_fill_value(self): df = DataFrame(np.random.randn(10, 4)) # axis=0 - result = df.reindex(range(15)) + result = df.reindex(list(range(15))) self.assert_(np.isnan(result.values[-5:]).all()) - result = df.reindex(range(15), fill_value=0) - expected = df.reindex(range(15)).fillna(0) + result = df.reindex(list(range(15)), fill_value=0) + expected = df.reindex(list(range(15))).fillna(0) assert_frame_equal(result, expected) # axis=1 - result = df.reindex(columns=range(5), fill_value=0.) + result = df.reindex(columns=list(range(5)), fill_value=0.) expected = df.copy() expected[4] = 0. assert_frame_equal(result, expected) - result = df.reindex(columns=range(5), fill_value=0) + result = df.reindex(columns=list(range(5)), fill_value=0) expected = df.copy() expected[4] = 0 assert_frame_equal(result, expected) - result = df.reindex(columns=range(5), fill_value='foo') + result = df.reindex(columns=list(range(5)), fill_value='foo') expected = df.copy() expected[4] = 'foo' assert_frame_equal(result, expected) # reindex_axis - result = df.reindex_axis(range(15), fill_value=0., axis=0) - expected = df.reindex(range(15)).fillna(0) + result = df.reindex_axis(list(range(15)), fill_value=0., axis=0) + expected = df.reindex(list(range(15))).fillna(0) assert_frame_equal(result, expected) - result = df.reindex_axis(range(5), fill_value=0., axis=1) - expected = df.reindex(columns=range(5)).fillna(0) + result = df.reindex_axis(list(range(5)), fill_value=0., axis=1) + expected = df.reindex(columns=list(range(5))).fillna(0) assert_frame_equal(result, expected) # other dtypes df['foo'] = 'foo' - result = df.reindex(range(15), fill_value=0) - expected = df.reindex(range(15)).fillna(0) + result = df.reindex(list(range(15)), fill_value=0) + expected = df.reindex(list(range(15))).fillna(0) assert_frame_equal(result, expected) def test_align(self): @@ -8186,7 +8196,7 @@ def transform2(row): try: transformed = data.apply(transform, axis=1) - except Exception, e: + except Exception as e: self.assertEqual(len(e.args), 2) self.assertEqual(e.args[1], 'occurred at index 4') @@ -8303,7 +8313,7 @@ def test_filter(self): self.assert_('foo' in filtered) # unicode columns, won't ascii-encode - df = self.frame.rename(columns={'B': u'\u2202'}) + df = self.frame.rename(columns={'B': six.u('\u2202')}) filtered = df.filter(like='C') self.assertTrue('C' in filtered) @@ -8505,12 +8515,12 @@ def test_sort_index_duplicates(self): try: df.sort_index(by='a') - except Exception, e: + except Exception as e: self.assertTrue('duplicate' in str(e)) try: df.sort_index(by=['a']) - except Exception, e: + except Exception as e: self.assertTrue('duplicate' in str(e)) def test_sort_datetimes(self): @@ -8956,12 +8966,12 @@ def test_count(self): self.assert_(isinstance(ct2, Series)) # GH #423 - df = DataFrame(index=range(10)) + df = DataFrame(index=list(range(10))) result = df.count(1) expected = Series(0, index=df.index) assert_series_equal(result, expected) - df = DataFrame(columns=range(10)) + df = DataFrame(columns=list(range(10))) result = df.count(0) expected = Series(0, index=df.columns) assert_series_equal(result, expected) @@ -9144,7 +9154,7 @@ def _check_stat_op(self, name, alternative, frame=None, has_skipna=True, print (df) self.assertFalse(len(_f())) - df['a'] = range(len(df)) + df['a'] = list(range(len(df))) self.assert_(len(getattr(df, name)())) if has_skipna: @@ -9523,12 +9533,12 @@ def test_axis_aliases(self): assert_series_equal(result, expected) def test_combine_first_mixed(self): - a = Series(['a', 'b'], index=range(2)) - b = Series(range(2), index=range(2)) + a = Series(['a', 'b'], index=list(range(2))) + b = Series(list(range(2)), index=list(range(2))) f = DataFrame({'A': a, 'B': b}) - a = Series(['a', 'b'], index=range(5, 7)) - b = Series(range(2), index=range(5, 7)) + a = Series(['a', 'b'], index=list(range(5, 7))) + b = Series(list(range(2)), index=list(range(5, 7))) g = DataFrame({'A': a, 'B': b}) combined = f.combine_first(g) @@ -9546,7 +9556,7 @@ def test_reindex_boolean(self): self.assert_(reindexed.values.dtype == np.object_) self.assert_(isnull(reindexed[0][1])) - reindexed = frame.reindex(columns=range(3)) + reindexed = frame.reindex(columns=list(range(3))) self.assert_(reindexed.values.dtype == np.object_) self.assert_(isnull(reindexed[1]).all()) @@ -9606,22 +9616,22 @@ def test_reindex_with_nans(self): def test_reindex_multi(self): df = DataFrame(np.random.randn(3, 3)) - result = df.reindex(range(4), range(4)) - expected = df.reindex(range(4)).reindex(columns=range(4)) + result = df.reindex(list(range(4)), list(range(4))) + expected = df.reindex(list(range(4))).reindex(columns=list(range(4))) assert_frame_equal(result, expected) df = DataFrame(np.random.randint(0, 10, (3, 3))) - result = df.reindex(range(4), range(4)) - expected = df.reindex(range(4)).reindex(columns=range(4)) + result = df.reindex(list(range(4)), list(range(4))) + expected = df.reindex(list(range(4))).reindex(columns=list(range(4))) assert_frame_equal(result, expected) df = DataFrame(np.random.randint(0, 10, (3, 3))) - result = df.reindex(range(2), range(2)) - expected = df.reindex(range(2)).reindex(columns=range(2)) + result = df.reindex(list(range(2)), list(range(2))) + expected = df.reindex(list(range(2))).reindex(columns=list(range(2))) assert_frame_equal(result, expected) @@ -9657,7 +9667,7 @@ def test_count_objects(self): def test_cumsum_corner(self): dm = DataFrame(np.arange(20).reshape(4, 5), - index=range(4), columns=range(5)) + index=list(range(4)), columns=list(range(5))) result = dm.cumsum() #---------------------------------------------------------------------- @@ -9711,7 +9721,7 @@ def test_unstack_to_series(self): # check composability of unstack old_data = data.copy() - for _ in xrange(4): + for _ in range(4): data = data.unstack() assert_frame_equal(old_data, data) @@ -9867,13 +9877,13 @@ def test_reset_index_multiindex_col(self): assert_frame_equal(rs, xp) rs = df.reset_index('a', col_fill=None) - xp = DataFrame(full, Index(range(3), name='d'), + xp = DataFrame(full, Index(list(range(3)), name='d'), columns=[['a', 'b', 'b', 'c'], ['a', 'mean', 'median', 'mean']]) assert_frame_equal(rs, xp) rs = df.reset_index('a', col_fill='blah', col_level=1) - xp = DataFrame(full, Index(range(3), name='d'), + xp = DataFrame(full, Index(list(range(3)), name='d'), columns=[['blah', 'b', 'b', 'c'], ['a', 'mean', 'median', 'mean']]) assert_frame_equal(rs, xp) @@ -10148,7 +10158,7 @@ def test_boolean_set_uncons(self): def test_xs_view(self): dm = DataFrame(np.arange(20.).reshape(4, 5), - index=range(4), columns=range(5)) + index=list(range(4)), columns=list(range(5))) dm.xs(2, copy=False)[:] = 5 self.assert_((dm.xs(2) == 5).all()) @@ -10166,7 +10176,7 @@ def test_xs_view(self): self.assert_((dm.xs(3) == 10).all()) def test_boolean_indexing(self): - idx = range(3) + idx = list(range(3)) cols = ['A','B','C'] df1 = DataFrame(index=idx, columns=cols, data=np.array([[0.0, 0.5, 1.0], @@ -10186,15 +10196,15 @@ def test_boolean_indexing(self): def test_boolean_indexing_mixed(self): df = DataFrame( - {0L: {35: np.nan, 40: np.nan, 43: np.nan, 49: np.nan, 50: np.nan}, - 1L: {35: np.nan, + {long(0): {35: np.nan, 40: np.nan, 43: np.nan, 49: np.nan, 50: np.nan}, + long(1): {35: np.nan, 40: 0.32632316859446198, 43: np.nan, 49: 0.32632316859446198, 50: 0.39114724480578139}, - 2L: {35: np.nan, 40: np.nan, 43: 0.29012581014105987, 49: np.nan, 50: np.nan}, - 3L: {35: np.nan, 40: np.nan, 43: np.nan, 49: np.nan, 50: np.nan}, - 4L: {35: 0.34215328467153283, 40: np.nan, 43: np.nan, 49: np.nan, 50: np.nan}, + long(2): {35: np.nan, 40: np.nan, 43: 0.29012581014105987, 49: np.nan, 50: np.nan}, + long(3): {35: np.nan, 40: np.nan, 43: np.nan, 49: np.nan, 50: np.nan}, + long(4): {35: 0.34215328467153283, 40: np.nan, 43: np.nan, 49: np.nan, 50: np.nan}, 'y': {35: 0, 40: 0, 43: 0, 49: 0, 50: 1}}) # mixed int/float ok @@ -10212,15 +10222,15 @@ def test_boolean_indexing_mixed(self): self.assertRaises(ValueError, df.__setitem__, df>0.3, 1) def test_sum_bools(self): - df = DataFrame(index=range(1), columns=range(10)) + df = DataFrame(index=list(range(1)), columns=list(range(10))) bools = isnull(df) self.assert_(bools.sum(axis=1)[0] == 10) def test_fillna_col_reordering(self): - idx = range(20) + idx = list(range(20)) cols = ["COL." + str(i) for i in range(5, 0, -1)] data = np.random.rand(20, 5) - df = DataFrame(index=range(20), columns=cols, data=data) + df = DataFrame(index=list(range(20)), columns=cols, data=data) filled = df.fillna(method='ffill') self.assert_(df.columns.tolist() == filled.columns.tolist()) @@ -10300,7 +10310,7 @@ def test_take(self): assert_frame_equal(result, expected) def test_iterkv_names(self): - for k, v in self.mixed_frame.iterkv(): + for k, v in self.mixed_frame.iteritems(): self.assertEqual(v.name, k) def test_series_put_names(self): @@ -10347,8 +10357,8 @@ def test_dot(self): result = A.dot(b) # unaligned - df = DataFrame(randn(3, 4), index=[1, 2, 3], columns=range(4)) - df2 = DataFrame(randn(5, 3), index=range(5), columns=[1, 2, 3]) + df = DataFrame(randn(3, 4), index=[1, 2, 3], columns=list(range(4))) + df2 = DataFrame(randn(5, 3), index=list(range(5)), columns=[1, 2, 3]) self.assertRaises(ValueError, df.dot, df2) @@ -10554,7 +10564,7 @@ def test_strange_column_corruption_issue(self): # df[col] = nan for i, dt in enumerate(df.index): - for col in xrange(100, 200): + for col in range(100, 200): if not col in wasCol: wasCol[col] = 1 df[col] = nan @@ -10675,12 +10685,12 @@ def test_isin_dict(self): # without using iloc result = df.isin(d) - assert_frame_equal(result, expected) + assert_frame_equal(result, expected) # using iloc result = df.isin(d, iloc=True) expected.iloc[0, 0] = True - assert_frame_equal(result, expected) + assert_frame_equal(result, expected) if __name__ == '__main__': diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py index 08b42d7cf8975..53c169a7a6570 100644 --- a/pandas/tests/test_graphics.py +++ b/pandas/tests/test_graphics.py @@ -1,3 +1,4 @@ +from pandas.util.py3compat import range import nose import os import string @@ -17,6 +18,9 @@ from numpy.testing import assert_array_equal from numpy.testing.decorators import slow import pandas.tools.plotting as plotting +import six +from six.moves import map +from six.moves import zip def _skip_if_no_scipy(): @@ -115,7 +119,7 @@ def test_bar_colors(self): rects = ax.patches - rgba_colors = map(cm.jet, np.linspace(0, 1, 5)) + rgba_colors = list(map(cm.jet, np.linspace(0, 1, 5))) for i, rect in enumerate(rects[::5]): xp = rgba_colors[i] rs = rect.get_facecolor() @@ -128,7 +132,7 @@ def test_bar_colors(self): rects = ax.patches - rgba_colors = map(cm.jet, np.linspace(0, 1, 5)) + rgba_colors = list(map(cm.jet, np.linspace(0, 1, 5))) for i, rect in enumerate(rects[::5]): xp = rgba_colors[i] rs = rect.get_facecolor() @@ -271,7 +275,7 @@ def test_invalid_plot_data(self): @slow def test_valid_object_plot(self): - s = Series(range(10), dtype=object) + s = Series(list(range(10)), dtype=object) kinds = 'line', 'bar', 'barh', 'kde', 'density' for kind in kinds: @@ -327,27 +331,27 @@ def test_plot(self): _check_plot_works(df.plot, subplots=True, title='blah') _check_plot_works(df.plot, title='blah') - tuples = zip(list(string.ascii_letters[:10]), range(10)) + tuples = list(zip(string.ascii_letters[:10], range(10))) df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples)) _check_plot_works(df.plot, use_index=True) # unicode - index = MultiIndex.from_tuples([(u'\u03b1', 0), - (u'\u03b1', 1), - (u'\u03b2', 2), - (u'\u03b2', 3), - (u'\u03b3', 4), - (u'\u03b3', 5), - (u'\u03b4', 6), - (u'\u03b4', 7)], names=['i0', 'i1']) - columns = MultiIndex.from_tuples([('bar', u'\u0394'), - ('bar', u'\u0395')], names=['c0', + index = MultiIndex.from_tuples([(six.u('\u03b1'), 0), + (six.u('\u03b1'), 1), + (six.u('\u03b2'), 2), + (six.u('\u03b2'), 3), + (six.u('\u03b3'), 4), + (six.u('\u03b3'), 5), + (six.u('\u03b4'), 6), + (six.u('\u03b4'), 7)], names=['i0', 'i1']) + columns = MultiIndex.from_tuples([('bar', six.u('\u0394')), + ('bar', six.u('\u0395'))], names=['c0', 'c1']) df = DataFrame(np.random.randint(0, 10, (8, 2)), columns=columns, index=index) - _check_plot_works(df.plot, title=u'\u03A3') + _check_plot_works(df.plot, title=six.u('\u03A3')) def test_nonnumeric_exclude(self): import matplotlib.pyplot as plt @@ -384,7 +388,7 @@ def test_plot_xy(self): self._check_data(df.plot(y='B'), df.B.plot()) # columns.inferred_type == 'integer' - df.columns = range(1, len(df.columns) + 1) + df.columns = list(range(1, len(df.columns) + 1)) self._check_data(df.plot(x=1, y=2), df.set_index(1)[2].plot()) self._check_data(df.plot(x=1), df.set_index(1).plot()) @@ -497,7 +501,7 @@ def test_plot_bar(self): df = DataFrame(np.random.randn(10, 15), index=list(string.ascii_letters[:10]), - columns=range(15)) + columns=list(range(15))) _check_plot_works(df.plot, kind='bar') df = DataFrame({'a': [0, 1], 'b': [1, 0]}) @@ -505,13 +509,13 @@ def test_plot_bar(self): def test_bar_stacked_center(self): # GH2157 - df = DataFrame({'A': [3] * 5, 'B': range(5)}, index=range(5)) + df = DataFrame({'A': [3] * 5, 'B': list(range(5))}, index=list(range(5))) ax = df.plot(kind='bar', stacked='True', grid=True) self.assertEqual(ax.xaxis.get_ticklocs()[0], ax.patches[0].get_x() + ax.patches[0].get_width() / 2) def test_bar_center(self): - df = DataFrame({'A': [3] * 5, 'B': range(5)}, index=range(5)) + df = DataFrame({'A': [3] * 5, 'B': list(range(5))}, index=list(range(5))) ax = df.plot(kind='bar', grid=True) self.assertEqual(ax.xaxis.get_ticklocs()[0], ax.patches[0].get_x() + ax.patches[0].get_width()) @@ -521,7 +525,7 @@ def test_bar_log(self): # GH3254, GH3298 matplotlib/matplotlib#1882, #1892 # regressions in 1.2.1 - df = DataFrame({'A': [3] * 5, 'B': range(1, 6)}, index=range(5)) + df = DataFrame({'A': [3] * 5, 'B': list(range(1, 6))}, index=list(range(5))) ax = df.plot(kind='bar', grid=True, log=True) self.assertEqual(ax.yaxis.get_ticklocs()[0], 1.0) @@ -765,7 +769,7 @@ def test_style_by_column(self): def test_line_colors(self): import matplotlib.pyplot as plt import sys - from StringIO import StringIO + from pandas.util.py3compat import StringIO from matplotlib import cm custom_colors = 'rgcby' @@ -796,7 +800,7 @@ def test_line_colors(self): ax = df.plot(colormap='jet') - rgba_colors = map(cm.jet, np.linspace(0, 1, len(df))) + rgba_colors = list(map(cm.jet, np.linspace(0, 1, len(df)))) lines = ax.get_lines() for i, l in enumerate(lines): @@ -808,7 +812,7 @@ def test_line_colors(self): ax = df.plot(colormap=cm.jet) - rgba_colors = map(cm.jet, np.linspace(0, 1, len(df))) + rgba_colors = list(map(cm.jet, np.linspace(0, 1, len(df)))) lines = ax.get_lines() for i, l in enumerate(lines): @@ -887,7 +891,7 @@ def test_boxplot(self): _check_plot_works(grouped.boxplot) _check_plot_works(grouped.boxplot, subplots=False) - tuples = zip(list(string.ascii_letters[:10]), range(10)) + tuples = list(zip(string.ascii_letters[:10], range(10))) df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples)) grouped = df.groupby(level=1) diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 6af287b77cbac..28756a1c079d5 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -1,3 +1,6 @@ +from __future__ import print_function +from pandas.util.py3compat import range, long +from pandas.util import compat import nose import unittest @@ -23,11 +26,13 @@ import pandas.core.nanops as nanops import pandas.util.testing as tm +from six.moves import map +from six.moves import zip def commonSetUp(self): self.dateRange = bdate_range('1/1/2005', periods=250) - self.stringIndex = Index([rands(8).upper() for x in xrange(250)]) + self.stringIndex = Index([rands(8).upper() for x in range(250)]) self.groupId = Series([x[0] for x in self.stringIndex], index=self.stringIndex) @@ -189,9 +194,9 @@ def test_first_last_nth_dtypes(self): assert_frame_equal(nth, expected, check_names=False) # GH 2763, first/last shifting dtypes - idx = range(10) + idx = list(range(10)) idx.append(9) - s = Series(data=range(11), index=idx, name='IntCol') + s = Series(data=list(range(11)), index=idx, name='IntCol') self.assert_(s.dtype == 'int64') f = s.groupby(level=0).first() self.assert_(f.dtype == 'int64') @@ -263,7 +268,7 @@ def test_groupby_nonobject_dtype(self): # GH 3911, mixed frame non-conversion df = self.df_mixed_floats.copy() - df['value'] = range(len(df)) + df['value'] = list(range(len(df))) def max_value(group): return group.ix[group['value'].idxmax()] @@ -278,7 +283,7 @@ def max_value(group): def test_groupby_return_type(self): # GH2893, return a reduced type - df1 = DataFrame([{"val1": 1, "val2" : 20}, {"val1":1, "val2": 19}, + df1 = DataFrame([{"val1": 1, "val2" : 20}, {"val1":1, "val2": 19}, {"val1":2, "val2": 27}, {"val1":2, "val2": 12}]) def func(dataf): @@ -287,7 +292,7 @@ def func(dataf): result = df1.groupby("val1", squeeze=True).apply(func) self.assert_(isinstance(result,Series)) - df2 = DataFrame([{"val1": 1, "val2" : 20}, {"val1":1, "val2": 19}, + df2 = DataFrame([{"val1": 1, "val2" : 20}, {"val1":1, "val2": 19}, {"val1":1, "val2": 27}, {"val1":1, "val2": 12}]) def func(dataf): return dataf["val2"] - dataf["val2"].mean() @@ -500,7 +505,7 @@ def test_agg_item_by_item_raise_typeerror(self): def raiseException(df): print ('----------------------------------------') - print (df.to_string()) + print(df.to_string()) raise TypeError self.assertRaises(TypeError, df.groupby(0).agg, @@ -508,11 +513,11 @@ def raiseException(df): def test_basic_regression(self): # regression - T = [1.0 * x for x in range(1, 10) * 10][:1095] - result = Series(T, range(0, len(T))) + T = [1.0 * x for x in list(range(1, 10)) * 10][:1095] + result = Series(T, list(range(0, len(T)))) groupings = np.random.random((1100,)) - groupings = Series(groupings, range(0, len(groupings))) * 10. + groupings = Series(groupings, list(range(0, len(groupings)))) * 10. grouped = result.groupby(groupings) grouped.mean() @@ -707,12 +712,12 @@ def f3(x): return y df = DataFrame({'a':[1,2,2,2], - 'b':range(4), - 'c':range(5,9)}) + 'b':list(range(4)), + 'c':list(range(5,9))}) df2 = DataFrame({'a':[3,2,2,2], - 'b':range(4), - 'c':range(5,9)}) + 'b':list(range(4)), + 'c':list(range(5,9))}) # correct result @@ -1153,7 +1158,7 @@ def test_groupby_as_index_cython(self): result = grouped.mean() expected = data.groupby(['A', 'B']).mean() - arrays = zip(*expected.index._tuple_index) + arrays = list(zip(*expected.index._tuple_index)) expected.insert(0, 'A', arrays[0]) expected.insert(1, 'B', arrays[1]) expected.index = np.arange(len(expected)) @@ -1416,7 +1421,7 @@ def test_groupby_level(self): def test_groupby_level_index_names(self): ## GH4014 this used to raise ValueError since 'exp'>1 (in py2) - df = DataFrame({'exp' : ['A']*3 + ['B']*3, 'var1' : range(6),}).set_index('exp') + df = DataFrame({'exp' : ['A']*3 + ['B']*3, 'var1' : list(range(6)),}).set_index('exp') df.groupby(level='exp') self.assertRaises(ValueError, df.groupby, level='foo') @@ -1565,7 +1570,7 @@ def test_mutate_groups(self): mydf = DataFrame({ 'cat1' : ['a'] * 8 + ['b'] * 6, 'cat2' : ['c'] * 2 + ['d'] * 2 + ['e'] * 2 + ['f'] * 2 + ['c'] * 2 + ['d'] * 2 + ['e'] * 2, - 'cat3' : map(lambda x: 'g%s' % x, range(1,15)), + 'cat3' : list(map(lambda x: 'g%s' % x, list(range(1,15)))), 'val' : np.random.randint(100, size=14), }) @@ -1585,7 +1590,7 @@ def f_no_copy(x): def test_apply_chunk_view(self): # Low level tinkering could be unsafe, make sure not df = DataFrame({'key': [1, 1, 1, 2, 2, 2, 3, 3, 3], - 'value': range(9)}) + 'value': list(range(9))}) # return view f = lambda x: x[:2] @@ -1597,7 +1602,7 @@ def test_apply_chunk_view(self): def test_apply_no_name_column_conflict(self): df = DataFrame({'name': [1, 1, 1, 1, 1, 1, 2, 2, 2, 2], 'name2': [0, 0, 0, 1, 1, 1, 0, 0, 1, 1], - 'value': range(10)[::-1]}) + 'value': list(range(10))[::-1]}) # it works! #2605 grouped = df.groupby(['name', 'name2']) @@ -1615,10 +1620,10 @@ def test_groupby_series_indexed_differently(self): assert_series_equal(agged, exp) def test_groupby_with_hier_columns(self): - tuples = zip(*[['bar', 'bar', 'baz', 'baz', + tuples = list(zip(*[['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], ['one', 'two', 'one', 'two', - 'one', 'two', 'one', 'two']]) + 'one', 'two', 'one', 'two']])) index = MultiIndex.from_tuples(tuples) columns = MultiIndex.from_tuples([('A', 'cat'), ('B', 'dog'), ('B', 'cat'), ('A', 'dog')]) @@ -1849,14 +1854,14 @@ def test_groupby_nonstring_columns(self): def test_cython_grouper_series_bug_noncontig(self): arr = np.empty((100, 100)) arr.fill(np.nan) - obj = Series(arr[:, 0], index=range(100)) - inds = np.tile(range(10), 10) + obj = Series(arr[:, 0], index=list(range(100))) + inds = np.tile(list(range(10)), 10) result = obj.groupby(inds).agg(Series.median) self.assert_(result.isnull().all()) def test_series_grouper_noncontig_index(self): - index = Index([tm.rands(10) for _ in xrange(100)]) + index = Index([tm.rands(10) for _ in range(100)]) values = Series(np.random.randn(50), index=index[::2]) labels = np.random.randint(0, 5, 50) @@ -1872,7 +1877,7 @@ def test_convert_objects_leave_decimal_alone(self): from decimal import Decimal - s = Series(range(5)) + s = Series(list(range(5))) labels = np.array(['a', 'b', 'c', 'd', 'e'], dtype='O') def convert_fast(x): @@ -1987,7 +1992,7 @@ def test_numpy_groupby(self): assert_almost_equal(result, expected) def test_groupby_2d_malformed(self): - d = DataFrame(index=range(2)) + d = DataFrame(index=list(range(2))) d['group'] = ['g1', 'g2'] d['zeros'] = [0, 0] d['ones'] = [1, 1] @@ -2031,8 +2036,8 @@ def test_int64_overflow(self): exp_index, _ = right.index.sortlevel(0) self.assert_(right.index.equals(exp_index)) - tups = map(tuple, df[['A', 'B', 'C', 'D', - 'E', 'F', 'G', 'H']].values) + tups = list(map(tuple, df[['A', 'B', 'C', 'D', + 'E', 'F', 'G', 'H']].values)) tups = com._asarray_tuplesafe(tups) expected = df.groupby(tups).sum()['values'] @@ -2046,18 +2051,18 @@ def test_groupby_sort_multi(self): 'c': [0, 1, 2], 'd': np.random.randn(3)}) - tups = map(tuple, df[['a', 'b', 'c']].values) + tups = list(map(tuple, df[['a', 'b', 'c']].values)) tups = com._asarray_tuplesafe(tups) result = df.groupby(['a', 'b', 'c'], sort=True).sum() self.assert_(np.array_equal(result.index.values, tups[[1, 2, 0]])) - tups = map(tuple, df[['c', 'a', 'b']].values) + tups = list(map(tuple, df[['c', 'a', 'b']].values)) tups = com._asarray_tuplesafe(tups) result = df.groupby(['c', 'a', 'b'], sort=True).sum() self.assert_(np.array_equal(result.index.values, tups)) - tups = map(tuple, df[['b', 'c', 'a']].values) + tups = list(map(tuple, df[['b', 'c', 'a']].values)) tups = com._asarray_tuplesafe(tups) result = df.groupby(['b', 'c', 'a'], sort=True).sum() self.assert_(np.array_equal(result.index.values, @@ -2092,8 +2097,8 @@ def test_column_select_via_attr(self): assert_frame_equal(result, expected) def test_rank_apply(self): - lev1 = np.array([rands(10) for _ in xrange(100)], dtype=object) - lev2 = np.array([rands(10) for _ in xrange(130)], dtype=object) + lev1 = np.array([rands(10) for _ in range(100)], dtype=object) + lev2 = np.array([rands(10) for _ in range(130)], dtype=object) lab1 = np.random.randint(0, 100, size=500) lab2 = np.random.randint(0, 130, size=500) @@ -2410,7 +2415,7 @@ def test_multiindex_columns_empty_level(self): l = [['count', 'values'], ['to filter', '']] midx = MultiIndex.from_tuples(l) - df = DataFrame([[1L, 'A']], columns=midx) + df = DataFrame([[long(1), 'A']], columns=midx) grouped = df.groupby('to filter').groups self.assert_(np.array_equal(grouped['A'], [0])) @@ -2418,13 +2423,13 @@ def test_multiindex_columns_empty_level(self): grouped = df.groupby([('to filter', '')]).groups self.assert_(np.array_equal(grouped['A'], [0])) - df = DataFrame([[1L, 'A'], [2L, 'B']], columns=midx) + df = DataFrame([[long(1), 'A'], [long(2), 'B']], columns=midx) expected = df.groupby('to filter').groups result = df.groupby([('to filter', '')]).groups self.assertEquals(result, expected) - df = DataFrame([[1L, 'A'], [2L, 'A']], columns=midx) + df = DataFrame([[long(1), 'A'], [long(2), 'A']], columns=midx) expected = df.groupby('to filter').groups result = df.groupby([('to filter', '')]).groups @@ -2553,7 +2558,7 @@ def test_filter_single_column_df(self): grouped.filter(lambda x: x.mean() < 10, dropna=False), expected_odd.reindex(df.index)) assert_frame_equal( - grouped.filter(lambda x: x.mean() > 10, dropna=False), + grouped.filter(lambda x: x.mean() > 10, dropna=False), expected_even.reindex(df.index)) def test_filter_multi_column_df(self): @@ -2570,7 +2575,7 @@ def test_filter_mixed_df(self): df = pd.DataFrame({'A': [1, 12, 12, 1], 'B': 'a b c d'.split()}) grouper = df['A'].apply(lambda x: x % 2) grouped = df.groupby(grouper) - expected = pd.DataFrame({'A': [12, 12], 'B': ['b', 'c']}, + expected = pd.DataFrame({'A': [12, 12], 'B': ['b', 'c']}, index=[1, 2]) assert_frame_equal( grouped.filter(lambda x: x['A'].sum() > 10), expected) @@ -2613,7 +2618,7 @@ def raise_if_sum_is_zero(x): s = pd.Series([-1,0,1,2]) grouper = s.apply(lambda x: x % 2) grouped = s.groupby(grouper) - self.assertRaises(ValueError, + self.assertRaises(ValueError, lambda: grouped.filter(raise_if_sum_is_zero)) def test_filter_against_workaround(self): @@ -2673,7 +2678,7 @@ def assert_fp_equal(a, b): def _check_groupby(df, result, keys, field, f=lambda x: x.sum()): - tups = map(tuple, df[keys].values) + tups = list(map(tuple, df[keys].values)) tups = com._asarray_tuplesafe(tups) expected = f(df.groupby(tups)[field]) for k, v in expected.iteritems(): diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 250728dc59481..2141a6fc9c8dd 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -1,6 +1,7 @@ # pylint: disable=E1101,E1103,W0232 from datetime import datetime, timedelta +from pandas.util.py3compat import range import operator import pickle import unittest @@ -22,6 +23,8 @@ import pandas as pd from pandas.lib import Timestamp +import six +from six.moves import zip class TestIndex(unittest.TestCase): @@ -34,7 +37,7 @@ def setUp(self): self.intIndex = tm.makeIntIndex(100) self.floatIndex = tm.makeFloatIndex(100) self.empty = Index([]) - self.tuples = Index(zip(['foo', 'bar', 'baz'], [1, 2, 3])) + self.tuples = Index(list(zip(['foo', 'bar', 'baz'], [1, 2, 3]))) def test_hash_error(self): self.assertRaises(TypeError, hash, self.strIndex) @@ -368,13 +371,13 @@ def test_format(self): # 2845 index = Index([1, 2.0+3.0j, np.nan]) formatted = index.format() - expected = [str(index[0]), str(index[1]), u'NaN'] + expected = [str(index[0]), str(index[1]), six.u('NaN')] self.assertEquals(formatted, expected) # is this really allowed? index = Index([1, 2.0+3.0j, None]) formatted = index.format() - expected = [str(index[0]), str(index[1]), u'NaN'] + expected = [str(index[0]), str(index[1]), six.u('NaN')] self.assertEquals(formatted, expected) self.strIndex[:0].format() @@ -467,8 +470,8 @@ def test_slice_locs_dup(self): def test_drop(self): n = len(self.strIndex) - dropped = self.strIndex.drop(self.strIndex[range(5, 10)]) - expected = self.strIndex[range(5) + range(10, n)] + dropped = self.strIndex.drop(self.strIndex[list(range(5, 10))]) + expected = self.strIndex[list(range(5)) + list(range(10, n))] self.assert_(dropped.equals(expected)) self.assertRaises(ValueError, self.strIndex.drop, ['foo', 'bar']) @@ -857,7 +860,7 @@ def test_union_noncomparable(self): from datetime import datetime, timedelta # corner case, non-Int64Index now = datetime.now() - other = Index([now + timedelta(i) for i in xrange(4)], dtype=object) + other = Index([now + timedelta(i) for i in range(4)], dtype=object) result = self.index.union(other) expected = np.concatenate((self.index, other)) self.assert_(np.array_equal(result, expected)) @@ -890,14 +893,14 @@ def test_take_preserve_name(self): def test_int_name_format(self): from pandas import Series, DataFrame index = Index(['a', 'b', 'c'], name=0) - s = Series(range(3), index) - df = DataFrame(range(3), index=index) + s = Series(list(range(3)), index) + df = DataFrame(list(range(3)), index=index) repr(s) repr(df) def test_print_unicode_columns(self): df = pd.DataFrame( - {u"\u05d0": [1, 2, 3], "\u05d1": [4, 5, 6], "c": [7, 8, 9]}) + {six.u("\u05d0"): [1, 2, 3], "\u05d1": [4, 5, 6], "c": [7, 8, 9]}) repr(df.columns) # should not raise UnicodeDecodeError def test_repr_summary(self): @@ -907,15 +910,15 @@ def test_repr_summary(self): self.assertTrue("..." in r) def test_unicode_string_with_unicode(self): - idx = Index(range(1000)) + idx = Index(list(range(1000))) if py3compat.PY3: str(idx) else: - unicode(idx) + six.text_type(idx) def test_bytestring_with_unicode(self): - idx = Index(range(1000)) + idx = Index(list(range(1000))) if py3compat.PY3: bytes(idx) else: @@ -1151,9 +1154,9 @@ def test_get_loc(self): self.assertRaises(KeyError, self.index.get_loc, 'quux') # 3 levels - index = MultiIndex(levels=[Index(range(4)), - Index(range(4)), - Index(range(4))], + index = MultiIndex(levels=[Index(list(range(4))), + Index(list(range(4))), + Index(list(range(4)))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array([0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) @@ -1173,9 +1176,9 @@ def test_get_loc_duplicates(self): assert(rs == xp) def test_get_loc_level(self): - index = MultiIndex(levels=[Index(range(4)), - Index(range(4)), - Index(range(4))], + index = MultiIndex(levels=[Index(list(range(4))), + Index(list(range(4))), + Index(list(range(4)))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array([0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) @@ -1193,7 +1196,7 @@ def test_get_loc_level(self): self.assertRaises(KeyError, index.get_loc_level, (2, 2)) - index = MultiIndex(levels=[[2000], range(4)], + index = MultiIndex(levels=[[2000], list(range(4))], labels=[np.array([0, 0, 0, 0]), np.array([0, 1, 2, 3])]) result, new_index = index.get_loc_level((2000, slice(None, None))) @@ -1219,9 +1222,9 @@ def test_slice_locs(self): tm.assert_almost_equal(sliced.values, expected.values) def test_slice_locs_not_sorted(self): - index = MultiIndex(levels=[Index(range(4)), - Index(range(4)), - Index(range(4))], + index = MultiIndex(levels=[Index(list(range(4))), + Index(list(range(4))), + Index(list(range(4)))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array([0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) @@ -1276,11 +1279,11 @@ def test_slice_locs_not_contained(self): def test_consistency(self): # need to construct an overflow - major_axis = range(70000) - minor_axis = range(10) + major_axis = list(range(70000)) + minor_axis = list(range(10)) major_labels = np.arange(70000) - minor_labels = np.repeat(range(10), 7000) + minor_labels = np.repeat(list(range(10)), 7000) # the fact that is works means it's consistent index = MultiIndex(levels=[major_axis, minor_axis], @@ -1295,8 +1298,8 @@ def test_consistency(self): self.assert_(not index.is_unique) def test_truncate(self): - major_axis = Index(range(4)) - minor_axis = Index(range(2)) + major_axis = Index(list(range(4))) + minor_axis = Index(list(range(2))) major_labels = np.array([0, 0, 1, 2, 3, 3]) minor_labels = np.array([0, 1, 0, 1, 0, 1]) @@ -1319,8 +1322,8 @@ def test_truncate(self): self.assertRaises(ValueError, index.truncate, 3, 1) def test_get_indexer(self): - major_axis = Index(range(4)) - minor_axis = Index(range(2)) + major_axis = Index(list(range(4))) + minor_axis = Index(list(range(2))) major_labels = np.array([0, 0, 1, 2, 2, 3, 3]) minor_labels = np.array([0, 1, 0, 0, 1, 0, 1]) @@ -1353,8 +1356,6 @@ def test_get_indexer(self): r1 = idx1.get_indexer([1, 2, 3]) self.assert_((r1 == [-1, -1, -1]).all()) - # self.assertRaises(Exception, idx1.get_indexer, - # list(list(zip(*idx2._tuple_index))[0])) def test_format(self): self.index.format() @@ -1404,9 +1405,9 @@ def test_equals(self): self.assert_(self.index.equals(self.index._tuple_index)) # different number of levels - index = MultiIndex(levels=[Index(range(4)), - Index(range(4)), - Index(range(4))], + index = MultiIndex(levels=[Index(list(range(4))), + Index(list(range(4))), + Index(list(range(4)))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array([0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) @@ -1417,8 +1418,8 @@ def test_equals(self): self.assert_(not index.equal_levels(index2)) # levels are different - major_axis = Index(range(4)) - minor_axis = Index(range(2)) + major_axis = Index(list(range(4))) + minor_axis = Index(list(range(2))) major_labels = np.array([0, 0, 1, 2, 2, 3]) minor_labels = np.array([0, 1, 0, 0, 1, 0]) @@ -1637,9 +1638,9 @@ def test_droplevel_with_names(self): dropped = index.droplevel(0) self.assertEqual(dropped.name, 'second') - index = MultiIndex(levels=[Index(range(4)), - Index(range(4)), - Index(range(4))], + index = MultiIndex(levels=[Index(list(range(4))), + Index(list(range(4))), + Index(list(range(4)))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array([0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])], @@ -1652,9 +1653,9 @@ def test_droplevel_with_names(self): self.assert_(dropped.equals(expected)) def test_droplevel_multiple(self): - index = MultiIndex(levels=[Index(range(4)), - Index(range(4)), - Index(range(4))], + index = MultiIndex(levels=[Index(list(range(4))), + Index(list(range(4))), + Index(list(range(4)))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array([0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])], @@ -1774,21 +1775,21 @@ def test_tolist(self): self.assertEqual(result, exp) def test_repr_with_unicode_data(self): - d = {"a": [u"\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} + d = {"a": [six.u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} index = pd.DataFrame(d).set_index(["a", "b"]).index self.assertFalse("\\u" in repr(index)) # we don't want unicode-escaped def test_unicode_string_with_unicode(self): - d = {"a": [u"\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} + d = {"a": [six.u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} idx = pd.DataFrame(d).set_index(["a", "b"]).index if py3compat.PY3: str(idx) else: - unicode(idx) + six.text_type(idx) def test_bytestring_with_unicode(self): - d = {"a": [u"\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} + d = {"a": [six.u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} idx = pd.DataFrame(d).set_index(["a", "b"]).index if py3compat.PY3: diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index f0ace52f2c2b5..b72b1f3878597 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -1,8 +1,9 @@ # pylint: disable-msg=W0612,E1101 +from pandas.util.py3compat import range import unittest import nose import itertools -from StringIO import StringIO +from pandas.util.py3compat import StringIO from numpy import random, nan from numpy.random import randn @@ -21,6 +22,7 @@ import pandas.lib as lib from pandas import date_range from numpy.testing.decorators import slow +from six.moves import map _verbose = False @@ -36,7 +38,7 @@ def _generate_indices(f, values=False): axes = f.axes if values: - axes = [ range(len(a)) for a in axes ] + axes = [ list(range(len(a))) for a in axes ] return itertools.product(*axes) @@ -94,9 +96,9 @@ def setUp(self): import warnings warnings.filterwarnings(action='ignore', category=FutureWarning) - self.series_ints = Series(np.random.rand(4), index=range(0,8,2)) - self.frame_ints = DataFrame(np.random.randn(4, 4), index=range(0, 8, 2), columns=range(0,12,3)) - self.panel_ints = Panel(np.random.rand(4,4,4), items=range(0,8,2),major_axis=range(0,12,3),minor_axis=range(0,16,4)) + self.series_ints = Series(np.random.rand(4), index=list(range(0,8,2))) + self.frame_ints = DataFrame(np.random.randn(4, 4), index=list(range(0, 8, 2)), columns=list(range(0,12,3))) + self.panel_ints = Panel(np.random.rand(4,4,4), items=list(range(0,8,2)),major_axis=list(range(0,12,3)),minor_axis=list(range(0,16,4))) self.series_labels = Series(np.random.randn(4), index=list('abcd')) self.frame_labels = DataFrame(np.random.randn(4, 4), index=list('abcd'), columns=list('ABCD')) @@ -201,11 +203,11 @@ def _print(result, error = None): _print(result) - except (AssertionError): + except AssertionError: raise - except (TypeError): + except TypeError: raise AssertionError(_print('type error')) - except (Exception), detail: + except Exception as detail: # if we are in fails, the ok, otherwise raise it if fails is not None: @@ -342,7 +344,7 @@ def test_iloc_getitem_dups(self): def test_iloc_getitem_array(self): # array like - s = Series(index=range(1,4)) + s = Series(index=list(range(1,4))) self.check_result('array like', 'iloc', s.index, 'ix', { 0 : [2,4,6], 1 : [3,6,9], 2: [4,8,12] }, typs = ['ints']) def test_iloc_getitem_bool(self): @@ -547,7 +549,7 @@ def test_loc_setitem_frame(self): def test_iloc_getitem_frame(self): """ originally from test_frame.py""" - df = DataFrame(np.random.randn(10, 4), index=range(0, 20, 2), columns=range(0,8,2)) + df = DataFrame(np.random.randn(10, 4), index=list(range(0, 20, 2)), columns=list(range(0,8,2))) result = df.iloc[2] exp = df.ix[4] @@ -586,7 +588,7 @@ def test_iloc_getitem_frame(self): assert_frame_equal(result, expected) # with index-like - s = Series(index=range(1,5)) + s = Series(index=list(range(1,5))) result = df.iloc[s.index] expected = df.ix[[2,4,6,8]] assert_frame_equal(result, expected) @@ -633,7 +635,7 @@ def test_iloc_setitem_series(self): assert_frame_equal(result, expected) def test_iloc_setitem_series(self): - s = Series(np.random.randn(10), index=range(0,20,2)) + s = Series(np.random.randn(10), index=list(range(0,20,2))) s.iloc[1] = 1 result = s.iloc[1] @@ -796,7 +798,7 @@ def test_dups_fancy_indexing(self): # GH 3561, dups not in selected order ind = ['A', 'A', 'B', 'C'] - df = DataFrame({'test':range(len(ind))}, index=ind) + df = DataFrame({'test':list(range(len(ind)))}, index=ind) rows = ['C', 'B'] res = df.ix[rows] self.assert_(rows == list(res.index)) @@ -878,8 +880,8 @@ def test_multi_assign(self): # GH 3626, an assignement of a sub-df to a df df = DataFrame({'FC':['a','b','a','b','a','b'], 'PF':[0,0,0,0,1,1], - 'col1':range(6), - 'col2':range(6,12)}) + 'col1':list(range(6)), + 'col2':list(range(6,12))}) df.ix[1,0]=np.nan df2 = df.copy() @@ -918,7 +920,7 @@ def test_ix_assign_column_mixed(self): assert_series_equal(df.B, orig + 1) # GH 3668, mixed frame with series value - df = DataFrame({'x':range(10), 'y':range(10,20),'z' : 'bar'}) + df = DataFrame({'x':list(range(10)), 'y':list(range(10,20)),'z' : 'bar'}) expected = df.copy() expected.ix[0, 'y'] = 1000 expected.ix[2, 'y'] = 1200 @@ -932,10 +934,10 @@ def test_ix_assign_column_mixed(self): def test_iloc_mask(self): # GH 3631, iloc with a mask (of a series) should raise - df = DataFrame(range(5), list('ABCDE'), columns=['a']) + df = DataFrame(list(range(5)), list('ABCDE'), columns=['a']) mask = (df.a%2 == 0) self.assertRaises(ValueError, df.iloc.__getitem__, tuple([mask])) - mask.index = range(len(mask)) + mask.index = list(range(len(mask))) self.assertRaises(NotImplementedError, df.iloc.__getitem__, tuple([mask])) # ndarray ok @@ -945,7 +947,7 @@ def test_iloc_mask(self): # the possibilities locs = np.arange(4) nums = 2**locs - reps = map(bin, nums) + reps = list(map(bin, nums)) df = DataFrame({'locs':locs, 'nums':nums}, reps) expected = { @@ -974,7 +976,7 @@ def test_iloc_mask(self): else: accessor = df ans = str(bin(accessor[mask]['nums'].sum())) - except Exception, e: + except Exception as e: ans = str(e) key = tuple([idx,method]) @@ -1042,7 +1044,7 @@ def test_iloc_non_unique_indexing(self): #GH 4017, non-unique indexing (on the axis) df = DataFrame({'A' : [0.1] * 3000, 'B' : [1] * 3000}) - idx = np.array(range(30)) * 99 + idx = np.array(list(range(30))) * 99 expected = df.iloc[idx] df3 = pd.concat([df, 2*df, 3*df]) @@ -1109,7 +1111,7 @@ def test_non_unique_loc_memory_error(self): columns = list('ABCDEFG') def gen_test(l,l2): - return pd.concat([ DataFrame(randn(l,len(columns)),index=range(l),columns=columns), + return pd.concat([ DataFrame(randn(l,len(columns)),index=list(range(l)),columns=columns), DataFrame(np.ones((l2,len(columns))),index=[0]*l2,columns=columns) ]) diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py index 0f3b8c1634416..b1d29a97b39c1 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/test_internals.py @@ -11,6 +11,8 @@ from pandas.util.testing import ( assert_almost_equal, assert_frame_equal, randn) +import six +from six.moves import zip def assert_block_equal(left, right): @@ -199,7 +201,7 @@ def test_unicode_repr(self): mat = np.empty((N, 2), dtype=object) mat[:, 0] = 'foo' mat[:, 1] = 'bar' - cols = ['b', u"\u05d0"] + cols = ['b', six.u("\u05d0")] str_repr = repr(make_block(mat.T, cols, TEST_COLS)) def test_get(self): @@ -385,7 +387,7 @@ def test_astype(self): self.assert_(tmgr.as_matrix().dtype == np.dtype(t)) def test_convert(self): - + def _compare(old_mgr, new_mgr): """ compare the blocks, numeric compare ==, object don't """ old_blocks = set(old_mgr.blocks) @@ -440,7 +442,7 @@ def _check(new_mgr,block_type, citems): _check(new_mgr,FloatBlock,['b','g']) _check(new_mgr,IntBlock,['a','f']) - mgr = create_blockmanager([b, get_int_ex(['f'],np.int32), get_bool_ex(['bool']), get_dt_ex(['dt']), + mgr = create_blockmanager([b, get_int_ex(['f'],np.int32), get_bool_ex(['bool']), get_dt_ex(['dt']), get_int_ex(['i'],np.int64), get_float_ex(['g'],np.float64), get_float_ex(['h'],np.float16)]) new_mgr = mgr.convert(convert_numeric = True) @@ -535,7 +537,7 @@ def test_get_numeric_data(self): def test_missing_unicode_key(self): df = DataFrame({"a": [1]}) try: - df.ix[:, u"\u05d0"] # should not raise UnicodeEncodeError + df.ix[:, six.u("\u05d0")] # should not raise UnicodeEncodeError except KeyError: pass # this is the expected exception diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index d852bad215f77..d152e6ed1c41d 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -1,5 +1,6 @@ # pylint: disable-msg=W0612,E1101,W0141 from pandas.util.py3compat import StringIO +from pandas.util.py3compat import range import nose import unittest @@ -18,6 +19,8 @@ import pandas as pd import pandas.index as _index +import six +from six.moves import zip class TestMultiLevel(unittest.TestCase): @@ -43,7 +46,7 @@ def setUp(self): # create test series object arrays = [['bar', 'bar', 'baz', 'baz', 'qux', 'qux', 'foo', 'foo'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] - tuples = zip(*arrays) + tuples = list(zip(*arrays)) index = MultiIndex.from_tuples(tuples) s = Series(randn(8), index=index) s[3] = np.NaN @@ -89,7 +92,7 @@ def test_series_constructor(self): ['x', 'y', 'x', 'y']]) self.assert_(isinstance(multi.index, MultiIndex)) - multi = Series(range(4), index=[['a', 'a', 'b', 'b'], + multi = Series(list(range(4)), index=[['a', 'a', 'b', 'b'], ['x', 'y', 'x', 'y']]) self.assert_(isinstance(multi.index, MultiIndex)) @@ -349,8 +352,8 @@ def test_frame_setitem_multi_column(self): def test_getitem_tuple_plus_slice(self): # GH #671 - df = DataFrame({'a': range(10), - 'b': range(10), + df = DataFrame({'a': list(range(10)), + 'b': list(range(10)), 'c': np.random.randn(10), 'd': np.random.randn(10)}) @@ -429,7 +432,7 @@ def test_xs_level(self): def test_xs_level_multiple(self): from pandas import read_table - from StringIO import StringIO + from pandas.util.py3compat import StringIO text = """ A B C D E one two three four a b 10.0032 5 -0.5109 -2.3358 -0.4645 0.05076 0.3640 @@ -443,7 +446,7 @@ def test_xs_level_multiple(self): assert_frame_equal(result, expected) # GH2107 - dates = range(20111201, 20111205) + dates = list(range(20111201, 20111205)) ids = 'abcde' idx = MultiIndex.from_tuples([x for x in cart_product(dates, ids)]) idx.names = ['date', 'secid'] @@ -454,7 +457,7 @@ def test_xs_level_multiple(self): def test_xs_level0(self): from pandas import read_table - from StringIO import StringIO + from pandas.util.py3compat import StringIO text = """ A B C D E one two three four a b 10.0032 5 -0.5109 -2.3358 -0.4645 0.05076 0.3640 @@ -588,7 +591,7 @@ def test_frame_setitem_ix(self): # with integer labels df = self.frame.copy() - df.columns = range(3) + df.columns = list(range(3)) df.ix[('bar', 'two'), 1] = 7 self.assertEquals(df.ix[('bar', 'two'), 1], 7) @@ -950,8 +953,8 @@ def test_stack_multiple_bug(self): def test_stack_dropna(self): # GH #3997 - df = pd.DataFrame({'A': ['a1', 'a2'], - 'B': ['b1', 'b2'], + df = pd.DataFrame({'A': ['a1', 'a2'], + 'B': ['b1', 'b2'], 'C': [1, 1]}) df = df.set_index(['A', 'B']) @@ -1167,7 +1170,7 @@ def test_frame_getitem_not_sorted(self): def test_series_getitem_not_sorted(self): arrays = [['bar', 'bar', 'baz', 'baz', 'qux', 'qux', 'foo', 'foo'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] - tuples = zip(*arrays) + tuples = list(zip(*arrays)) index = MultiIndex.from_tuples(tuples) s = Series(randn(8), index=index) @@ -1211,7 +1214,7 @@ def test_count(self): def test_series_group_min_max(self): for op, level, skipna in cart_product(self.AGG_FUNCTIONS, - range(2), + list(range(2)), [False, True]): grouped = self.series.groupby(level=level) aggf = lambda x: getattr(x, op)(skipna=skipna) @@ -1225,7 +1228,7 @@ def test_frame_group_ops(self): self.frame.ix[7, [0, 1]] = np.nan for op, level, axis, skipna in cart_product(self.AGG_FUNCTIONS, - range(2), range(2), + list(range(2)), list(range(2)), [False, True]): if axis == 0: frame = self.frame @@ -1496,7 +1499,7 @@ def test_mixed_depth_get(self): ['', 'OD', 'OD', 'result1', 'result2', 'result1'], ['', 'wx', 'wy', '', '', '']] - tuples = zip(*arrays) + tuples = list(zip(*arrays)) tuples.sort() index = MultiIndex.from_tuples(tuples) df = DataFrame(randn(4, 6), columns=index) @@ -1516,7 +1519,7 @@ def test_mixed_depth_insert(self): ['', 'OD', 'OD', 'result1', 'result2', 'result1'], ['', 'wx', 'wy', '', '', '']] - tuples = zip(*arrays) + tuples = list(zip(*arrays)) tuples.sort() index = MultiIndex.from_tuples(tuples) df = DataFrame(randn(4, 6), columns=index) @@ -1532,7 +1535,7 @@ def test_mixed_depth_drop(self): ['', 'OD', 'OD', 'result1', 'result2', 'result1'], ['', 'wx', 'wy', '', '', '']] - tuples = zip(*arrays) + tuples = list(zip(*arrays)) tuples.sort() index = MultiIndex.from_tuples(tuples) df = DataFrame(randn(4, 6), columns=index) @@ -1584,7 +1587,7 @@ def test_mixed_depth_pop(self): ['', 'OD', 'OD', 'result1', 'result2', 'result1'], ['', 'wx', 'wy', '', '', '']] - tuples = zip(*arrays) + tuples = list(zip(*arrays)) tuples.sort() index = MultiIndex.from_tuples(tuples) df = DataFrame(randn(4, 6), columns=index) @@ -1677,7 +1680,7 @@ def test_drop_preserve_names(self): self.assert_(result.index.names == ['one', 'two']) def test_unicode_repr_issues(self): - levels = [Index([u'a/\u03c3', u'b/\u03c3', u'c/\u03c3']), + levels = [Index([six.u('a/\u03c3'), six.u('b/\u03c3'), six.u('c/\u03c3')]), Index([0, 1])] labels = [np.arange(3).repeat(2), np.tile(np.arange(2), 3)] index = MultiIndex(levels=levels, labels=labels) @@ -1689,9 +1692,9 @@ def test_unicode_repr_issues(self): def test_unicode_repr_level_names(self): index = MultiIndex.from_tuples([(0, 0), (1, 1)], - names=[u'\u0394', 'i1']) + names=[six.u('\u0394'), 'i1']) - s = Series(range(2), index=index) + s = Series(list(range(2)), index=index) df = DataFrame(np.random.randn(2, 4), index=index) repr(s) repr(df) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 5d1053289b49e..d8c45ed6599d0 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1,6 +1,8 @@ # pylint: disable=W0612,E1101 from datetime import datetime +from pandas.util.py3compat import range +from pandas.util import compat import operator import unittest import nose @@ -269,12 +271,12 @@ def test_keys(self): tm.equalContents(self.panel.keys(), self.panel.items) def test_iteritems(self): - # Test panel.iteritems(), aka panel.iterkv() + # Test panel.iteritems(), aka panel.iteritems() # just test that it works - for k, v in self.panel.iterkv(): + for k, v in self.panel.iteritems(): pass - self.assertEqual(len(list(self.panel.iterkv())), + self.assertEqual(len(list(self.panel.iteritems())), len(self.panel.items)) def test_combineFrame(self): @@ -390,7 +392,7 @@ def test_delitem_and_pop(self): values[1] = 1 values[2] = 2 - panel = Panel(values, range(3), range(3), range(3)) + panel = Panel(values, list(range(3)), list(range(3)), list(range(3))) # did we delete the right row? @@ -811,8 +813,8 @@ def test_constructor_empty_panel(self): def test_constructor_observe_dtype(self): # GH #411 - panel = Panel(items=range(3), major_axis=range(3), - minor_axis=range(3), dtype='O') + panel = Panel(items=list(range(3)), major_axis=list(range(3)), + minor_axis=list(range(3)), dtype='O') self.assert_(panel.values.dtype == np.object_) def test_constructor_dtypes(self): @@ -824,19 +826,19 @@ def _check_dtype(panel, dtype): # only nan holding types allowed here for dtype in ['float64','float32','object']: - panel = Panel(items=range(2),major_axis=range(10),minor_axis=range(5),dtype=dtype) + panel = Panel(items=list(range(2)),major_axis=list(range(10)),minor_axis=list(range(5)),dtype=dtype) _check_dtype(panel,dtype) for dtype in ['float64','float32','int64','int32','object']: - panel = Panel(np.array(np.random.randn(2,10,5),dtype=dtype),items=range(2),major_axis=range(10),minor_axis=range(5),dtype=dtype) + panel = Panel(np.array(np.random.randn(2,10,5),dtype=dtype),items=list(range(2)),major_axis=list(range(10)),minor_axis=list(range(5)),dtype=dtype) _check_dtype(panel,dtype) for dtype in ['float64','float32','int64','int32','object']: - panel = Panel(np.array(np.random.randn(2,10,5),dtype='O'),items=range(2),major_axis=range(10),minor_axis=range(5),dtype=dtype) + panel = Panel(np.array(np.random.randn(2,10,5),dtype='O'),items=list(range(2)),major_axis=list(range(10)),minor_axis=list(range(5)),dtype=dtype) _check_dtype(panel,dtype) for dtype in ['float64','float32','int64','int32','object']: - panel = Panel(np.random.randn(2,10,5),items=range(2),major_axis=range(10),minor_axis=range(5),dtype=dtype) + panel = Panel(np.random.randn(2,10,5),items=list(range(2)),major_axis=list(range(10)),minor_axis=list(range(5)),dtype=dtype) _check_dtype(panel,dtype) def test_consolidate(self): @@ -892,7 +894,7 @@ def test_ctor_dict(self): assert_panel_equal(result, expected) def test_constructor_dict_mixed(self): - data = dict((k, v.values) for k, v in self.panel.iterkv()) + data = dict((k, v.values) for k, v in self.panel.iteritems()) result = Panel(data) exp_major = Index(np.arange(len(self.panel.major_axis))) self.assert_(result.major_axis.equals(exp_major)) @@ -961,15 +963,15 @@ def test_from_dict_mixed_orient(self): def test_constructor_error_msgs(self): def testit(): - Panel(np.random.randn(3,4,5), range(4), range(5), range(5)) + Panel(np.random.randn(3,4,5), list(range(4)), list(range(5)), list(range(5))) assertRaisesRegexp(ValueError, "Shape of passed values is \(3, 4, 5\), indices imply \(4, 5, 5\)", testit) def testit(): - Panel(np.random.randn(3,4,5), range(5), range(4), range(5)) + Panel(np.random.randn(3,4,5), list(range(5)), list(range(4)), list(range(5))) assertRaisesRegexp(ValueError, "Shape of passed values is \(3, 4, 5\), indices imply \(5, 4, 5\)", testit) def testit(): - Panel(np.random.randn(3,4,5), range(5), range(5), range(4)) + Panel(np.random.randn(3,4,5), list(range(5)), list(range(5)), list(range(4))) assertRaisesRegexp(ValueError, "Shape of passed values is \(3, 4, 5\), indices imply \(5, 5, 4\)", testit) def test_conform(self): @@ -1282,7 +1284,7 @@ def test_shift(self): # negative numbers, #2164 result = self.panel.shift(-1) expected = Panel(dict((i, f.shift(-1)[:-1]) - for i, f in self.panel.iterkv())) + for i, f in self.panel.iteritems())) assert_panel_equal(result, expected) def test_multiindex_get(self): @@ -1381,7 +1383,7 @@ def test_to_excel(self): except ImportError: raise nose.SkipTest - for item, df in self.panel.iterkv(): + for item, df in self.panel.iteritems(): recdf = reader.parse(str(item), index_col=0) assert_frame_equal(df, recdf) diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index 9c3a66c32c501..4119d2b5a0769 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -1,4 +1,5 @@ from datetime import datetime +from pandas.util.py3compat import range import os import operator import unittest @@ -218,12 +219,9 @@ def test_keys(self): tm.equalContents(self.panel4d.keys(), self.panel4d.labels) def test_iteritems(self): - """Test panel4d.iteritems(), aka panel4d.iterkv()""" - # just test that it works - for k, v in self.panel4d.iterkv(): - pass + """Test panel4d.iteritems()""" - self.assertEqual(len(list(self.panel4d.iterkv())), + self.assertEqual(len(list(self.panel4d.iteritems())), len(self.panel4d.labels)) def test_combinePanel4d(self): @@ -308,7 +306,7 @@ def test_delitem_and_pop(self): values[2] = 2 values[3] = 3 - panel4d = Panel4D(values, range(4), range(4), range(4), range(4)) + panel4d = Panel4D(values, list(range(4)), list(range(4)), list(range(4)), list(range(4))) # did we delete the right row? @@ -610,8 +608,8 @@ def test_constructor_empty_panel(self): def test_constructor_observe_dtype(self): # GH #411 - panel = Panel(items=range(3), major_axis=range(3), - minor_axis=range(3), dtype='O') + panel = Panel(items=list(range(3)), major_axis=list(range(3)), + minor_axis=list(range(3)), dtype='O') self.assert_(panel.values.dtype == np.object_) def test_consolidate(self): @@ -658,7 +656,7 @@ def test_ctor_dict(self): # assert_panel_equal(result, expected) def test_constructor_dict_mixed(self): - data = dict((k, v.values) for k, v in self.panel4d.iterkv()) + data = dict((k, v.values) for k, v in self.panel4d.iteritems()) result = Panel4D(data) exp_major = Index(np.arange(len(self.panel4d.major_axis))) self.assert_(result.major_axis.equals(exp_major)) @@ -721,7 +719,7 @@ def test_from_dict_mixed_orient(self): def test_values(self): self.assertRaises(Exception, Panel, np.random.randn(5, 5, 5), - range(5), range(5), range(4)) + list(range(5)), list(range(5)), list(range(4))) def test_conform(self): p = self.panel4d['l1'].filter(items=['ItemA', 'ItemB']) diff --git a/pandas/tests/test_reshape.py b/pandas/tests/test_reshape.py index b24e097238a70..1228e1605f0a0 100644 --- a/pandas/tests/test_reshape.py +++ b/pandas/tests/test_reshape.py @@ -1,7 +1,8 @@ # pylint: disable-msg=W0612,E1101 from copy import deepcopy from datetime import datetime, timedelta -from StringIO import StringIO +from pandas.util.py3compat import StringIO +from pandas.util.py3compat import range import cPickle as pickle import operator import os diff --git a/pandas/tests/test_rplot.py b/pandas/tests/test_rplot.py index 0f429bf715688..18f0c76b469a3 100644 --- a/pandas/tests/test_rplot.py +++ b/pandas/tests/test_rplot.py @@ -1,3 +1,4 @@ +from pandas.util.py3compat import range import unittest import pandas.tools.rplot as rplot from pandas import read_csv diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index cbf7fb070e97f..f53b62474b811 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -1,6 +1,9 @@ # pylint: disable-msg=E1101,W0612 from datetime import datetime, timedelta, date +from pandas.util.py3compat import range +from six.moves import zip +from pandas.util import compat import os import operator import unittest @@ -29,6 +32,7 @@ assert_almost_equal, ensure_clean) import pandas.util.testing as tm +import six def _skip_if_no_scipy(): @@ -128,8 +132,8 @@ def test_getitem_setitem_ellipsis(self): self.assert_((result == 5).all()) def test_getitem_negative_out_of_bounds(self): - s = Series([tm.rands(5) for _ in xrange(10)], - index=[tm.rands(10) for _ in xrange(10)]) + s = Series([tm.rands(5) for _ in range(10)], + index=[tm.rands(10) for _ in range(10)]) self.assertRaises(IndexError, s.__getitem__, -11) self.assertRaises(IndexError, s.__setitem__, -11, 'foo') @@ -140,7 +144,7 @@ def test_multilevel_name_print(self): labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['first', 'second']) - s = Series(range(0, len(index)), index=index, name='sth') + s = Series(list(range(0, len(index))), index=index, name='sth') expected = ["first second", "foo one 0", " two 1", @@ -177,7 +181,7 @@ def test_name_printing(self): s.name = None self.assert_(not "Name:" in repr(s)) # test big series (diff code path) - s = Series(range(0, 1000)) + s = Series(list(range(0, 1000))) s.name = "test" self.assert_("Name: test" in repr(s)) s.name = None @@ -231,7 +235,7 @@ def test_comparisons(self): def test_none_comparison(self): # bug brought up by #1079 - s = Series(np.random.randn(10), index=range(0, 20, 2)) + s = Series(np.random.randn(10), index=list(range(0, 20, 2))) self.assertRaises(TypeError, s.__eq__, None) def test_sum_zero(self): @@ -320,8 +324,8 @@ def test_constructor_empty(self): empty2 = Series([]) assert_series_equal(empty, empty2) - empty = Series(index=range(10)) - empty2 = Series(np.nan, index=range(10)) + empty = Series(index=list(range(10))) + empty2 = Series(np.nan, index=list(range(10))) assert_series_equal(empty, empty2) def test_constructor_series(self): @@ -336,12 +340,12 @@ def test_constructor_generator(self): gen = (i for i in range(10)) result = Series(gen) - exp = Series(range(10)) + exp = Series(list(range(10))) assert_series_equal(result, exp) gen = (i for i in range(10)) - result = Series(gen, index=range(10, 20)) - exp.index = range(10, 20) + result = Series(gen, index=list(range(10, 20))) + exp.index = list(range(10, 20)) assert_series_equal(result, exp) def test_constructor_maskedarray(self): @@ -434,10 +438,10 @@ def test_constructor_sanitize(self): self.assertEquals(s.dtype, np.dtype('f8')) def test_constructor_pass_none(self): - s = Series(None, index=range(5)) + s = Series(None, index=list(range(5))) self.assert_(s.dtype == np.float64) - s = Series(None, index=range(5), dtype=object) + s = Series(None, index=list(range(5)), dtype=object) self.assert_(s.dtype == np.object_) def test_constructor_cast(self): @@ -455,15 +459,15 @@ def test_constructor_dtype_nocast(self): def test_constructor_dtype_datetime64(self): import pandas.tslib as tslib - s = Series(tslib.iNaT, dtype='M8[ns]', index=range(5)) + s = Series(tslib.iNaT, dtype='M8[ns]', index=list(range(5))) self.assert_(isnull(s).all() == True) #### in theory this should be all nulls, but since #### we are not specifying a dtype is ambiguous - s = Series(tslib.iNaT, index=range(5)) + s = Series(tslib.iNaT, index=list(range(5))) self.assert_(isnull(s).all() == False) - s = Series(nan, dtype='M8[ns]', index=range(5)) + s = Series(nan, dtype='M8[ns]', index=list(range(5))) self.assert_(isnull(s).all() == True) s = Series([datetime(2001, 1, 2, 0, 0), tslib.iNaT], dtype='M8[ns]') @@ -510,7 +514,7 @@ def test_constructor_dict(self): assert_series_equal(result, expected) def test_constructor_subclass_dict(self): - data = tm.TestSubDict((x, 10.0 * x) for x in xrange(10)) + data = tm.TestSubDict((x, 10.0 * x) for x in range(10)) series = Series(data) refseries = Series(dict(data.iteritems())) assert_series_equal(refseries, series) @@ -639,7 +643,7 @@ def test_getitem_get(self): self.assertRaises(KeyError, self.ts.__getitem__, d) def test_iget(self): - s = Series(np.random.randn(10), index=range(0, 20, 2)) + s = Series(np.random.randn(10), index=list(range(0, 20, 2))) for i in range(len(s)): result = s.iget(i) exp = s[s.index[i]] @@ -664,12 +668,12 @@ def test_iget_nonunique(self): self.assertEqual(s.iget(2), 2) def test_getitem_regression(self): - s = Series(range(5), index=range(5)) - result = s[range(5)] + s = Series(list(range(5)), index=list(range(5))) + result = s[list(range(5))] assert_series_equal(result, s) def test_getitem_setitem_slice_bug(self): - s = Series(range(10), range(10)) + s = Series(list(range(10)), list(range(10))) result = s[-12:] assert_series_equal(result, s) @@ -679,7 +683,7 @@ def test_getitem_setitem_slice_bug(self): result = s[:-12] assert_series_equal(result, s[:0]) - s = Series(range(10), range(10)) + s = Series(list(range(10)), list(range(10))) s[-12:] = 0 self.assert_((s == 0).all()) @@ -779,12 +783,12 @@ def test_getitem_box_float64(self): self.assert_(isinstance(value, np.float64)) def test_getitem_ambiguous_keyerror(self): - s = Series(range(10), index=range(0, 20, 2)) + s = Series(list(range(10)), index=list(range(0, 20, 2))) self.assertRaises(KeyError, s.__getitem__, 1) self.assertRaises(KeyError, s.ix.__getitem__, 1) def test_getitem_unordered_dup(self): - obj = Series(range(5), index=['c', 'a', 'a', 'b', 'b']) + obj = Series(list(range(5)), index=['c', 'a', 'a', 'b', 'b']) self.assert_(np.isscalar(obj['c'])) self.assert_(obj['c'] == 0) @@ -798,7 +802,7 @@ def test_getitem_dups_with_missing(self): assert_series_equal(result,expected) def test_setitem_ambiguous_keyerror(self): - s = Series(range(10), index=range(0, 20, 2)) + s = Series(list(range(10)), index=list(range(0, 20, 2))) self.assertRaises(KeyError, s.__setitem__, 1, 5) self.assertRaises(KeyError, s.ix.__setitem__, 1, 5) @@ -971,7 +975,7 @@ def test_basic_getitem_with_labels(self): assert_series_equal(result, expected) # integer indexes, be careful - s = Series(np.random.randn(10), index=range(0, 20, 2)) + s = Series(np.random.randn(10), index=list(range(0, 20, 2))) inds = [0, 2, 5, 7, 8] arr_inds = np.array([0, 2, 5, 7, 8]) result = s[inds] @@ -998,7 +1002,7 @@ def test_basic_setitem_with_labels(self): assert_series_equal(cp, exp) # integer indexes, be careful - s = Series(np.random.randn(10), index=range(0, 20, 2)) + s = Series(np.random.randn(10), index=list(range(0, 20, 2))) inds = [0, 4, 6] arr_inds = np.array([0, 4, 6]) @@ -1047,7 +1051,7 @@ def test_ix_getitem_not_monotonic(self): self.assertRaises(KeyError, ts2.ix.__setitem__, slice(d1, d2), 0) def test_ix_getitem_setitem_integer_slice_keyerrors(self): - s = Series(np.random.randn(10), index=range(0, 20, 2)) + s = Series(np.random.randn(10), index=list(range(0, 20, 2))) # this is OK cp = s.copy() @@ -1111,8 +1115,8 @@ def test_where(self): for dtype in [ np.int8, np.int16, np.int32, np.int64, np.float16, np.float32, np.float64 ]: s = Series(np.arange(10), dtype=dtype) mask = s < 5 - s[mask] = range(2,7) - expected = Series(range(2,7) + range(5,10), dtype=dtype) + s[mask] = list(range(2,7)) + expected = Series(list(range(2,7)) + list(range(5,10)), dtype=dtype) assert_series_equal(s, expected) self.assertEquals(s.dtype, expected.dtype) @@ -1122,7 +1126,7 @@ def test_where(self): mask = s < 5 values = [2.5,3.5,4.5,5.5,6.5] s[mask] = values - expected = Series(values + range(5,10), dtype='float64') + expected = Series(values + list(range(5,10)), dtype='float64') assert_series_equal(s, expected) self.assertEquals(s.dtype, expected.dtype) @@ -1136,8 +1140,8 @@ def test_where(self): # GH3235 s = Series(np.arange(10),dtype='int64') mask = s < 5 - s[mask] = range(2,7) - expected = Series(range(2,7) + range(5,10),dtype='int64') + s[mask] = list(range(2,7)) + expected = Series(list(range(2,7)) + list(range(5,10)),dtype='int64') assert_series_equal(s, expected) self.assertEquals(s.dtype, expected.dtype) @@ -1286,13 +1290,13 @@ def test_repr(self): repr(ots) # various names - for name in ['', 1, 1.2, 'foo', u'\u03B1\u03B2\u03B3', + for name in ['', 1, 1.2, 'foo', six.u('\u03B1\u03B2\u03B3'), 'loooooooooooooooooooooooooooooooooooooooooooooooooooong', ('foo', 'bar', 'baz'), (1, 2), ('foo', 1, 2.3), - (u'\u03B1', u'\u03B2', u'\u03B3'), - (u'\u03B1', 'bar')]: + (six.u('\u03B1'), six.u('\u03B2'), six.u('\u03B3')), + (six.u('\u03B1'), 'bar')]: self.series.name = name repr(self.series) @@ -1316,7 +1320,7 @@ def test_repr(self): self.assertFalse("a\n" in repr(ser)) def test_tidy_repr(self): - a = Series([u"\u05d0"] * 1000) + a = Series([six.u("\u05d0")] * 1000) a.name = 'title1' repr(a) # should not raise exception @@ -1341,7 +1345,7 @@ def test_repr_name_iterable_indexable(self): # it works! repr(s) - s.name = (u"\u05d0",) * 2 + s.name = (six.u("\u05d0"),) * 2 repr(s) def test_repr_should_return_str(self): @@ -1354,19 +1358,19 @@ def test_repr_should_return_str(self): """ data = [8, 5, 3, 5] - index1 = [u"\u03c3", u"\u03c4", u"\u03c5", u"\u03c6"] + index1 = [six.u("\u03c3"), six.u("\u03c4"), six.u("\u03c5"), six.u("\u03c6")] df = Series(data, index=index1) self.assertTrue(type(df.__repr__() == str)) # both py2 / 3 def test_unicode_string_with_unicode(self): - df = Series([u"\u05d0"], name=u"\u05d1") + df = Series([six.u("\u05d0")], name=six.u("\u05d1")) if py3compat.PY3: str(df) else: - unicode(df) + six.text_type(df) def test_bytestring_with_unicode(self): - df = Series([u"\u05d0"], name=u"\u05d1") + df = Series([six.u("\u05d0")], name=six.u("\u05d1")) if py3compat.PY3: bytes(df) else: @@ -1447,7 +1451,7 @@ def test_median(self): self._check_stat_op('median', np.median) # test with integers, test failure - int_ts = TimeSeries(np.ones(10, dtype=int), index=range(10)) + int_ts = TimeSeries(np.ones(10, dtype=int), index=list(range(10))) self.assertAlmostEqual(np.median(int_ts), int_ts.median()) def test_prod(self): @@ -1508,11 +1512,11 @@ def test_argsort(self): self.assert_(isnull(shifted[4]) == True) result = s.argsort() - expected = Series(range(5),dtype='int64') + expected = Series(list(range(5)),dtype='int64') assert_series_equal(result,expected) result = shifted.argsort() - expected = Series(range(4) + [-1],dtype='int64') + expected = Series(list(range(4)) + [-1],dtype='int64') assert_series_equal(result,expected) def test_argsort_stable(self): @@ -1634,7 +1638,7 @@ def test_round(self): self.assertEqual(result.name, self.ts.name) def test_prod_numpy16_bug(self): - s = Series([1., 1., 1.], index=range(3)) + s = Series([1., 1., 1.], index=list(range(3))) result = s.prod() self.assert_(not isinstance(result, Series)) @@ -2343,7 +2347,7 @@ def test_series_frame_radd_bug(self): import operator # GH 353 - vals = Series([rands(5) for _ in xrange(10)]) + vals = Series([rands(5) for _ in range(10)]) result = 'foo_' + vals expected = vals.map(lambda x: 'foo_' + x) assert_series_equal(result, expected) @@ -2620,9 +2624,9 @@ def test_value_counts_nunique(self): assert_series_equal(hist, expected) # GH 3002, datetime64[ns] - import StringIO + from pandas.util.py3compat import StringIO import pandas as pd - f = StringIO.StringIO("xxyyzz20100101PIE\nxxyyzz20100101GUM\nxxyyww20090101EGG\nfoofoo20080909PIE") + f = StringIO("xxyyzz20100101PIE\nxxyyzz20100101GUM\nxxyyww20090101EGG\nfoofoo20080909PIE") df = pd.read_fwf(f, widths=[6,8,3], names=["person_id", "dt", "food"], parse_dates=["dt"]) s = df.dt.copy() result = s.value_counts() @@ -2671,7 +2675,7 @@ def test_unique(self): self.assert_(np.array_equal(result, expected)) # test string arrays for coverage - strings = np.tile(np.array([tm.rands(10) for _ in xrange(10)]), 10) + strings = np.tile(np.array([tm.rands(10) for _ in range(10)]), 10) result = np.sort(nanops.unique1d(strings)) expected = np.unique(strings) self.assert_(np.array_equal(result, expected)) @@ -2819,7 +2823,7 @@ def test_to_csv(self): def test_to_csv_unicode_index(self): buf = StringIO() - s = Series([u"\u05d0", "d2"], index=[u"\u05d0", u"\u05d1"]) + s = Series([six.u("\u05d0"), "d2"], index=[six.u("\u05d0"), six.u("\u05d1")]) s.to_csv(buf, encoding='UTF-8') buf.seek(0) @@ -3343,7 +3347,7 @@ def test_astype_cast_object_int(self): def test_astype_datetimes(self): import pandas.tslib as tslib - s = Series(tslib.iNaT, dtype='M8[ns]', index=range(5)) + s = Series(tslib.iNaT, dtype='M8[ns]', index=list(range(5))) s = s.astype('O') self.assert_(s.dtype == np.object_) @@ -3391,7 +3395,7 @@ def test_map_int(self): self.assert_(not isnull(merged['c'])) def test_map_type_inference(self): - s = Series(range(3)) + s = Series(list(range(3))) s2 = s.map(lambda x: np.where(x == 0, 0, 1)) self.assert_(issubclass(s2.dtype.type, np.integer)) @@ -3938,7 +3942,7 @@ def test_fillna_inplace(self): def test_fillna_invalid_method(self): try: self.ts.fillna(method='ffil') - except ValueError, inst: + except ValueError as inst: self.assert_('ffil' in str(inst)) def test_ffill(self): @@ -4024,7 +4028,7 @@ def test_replace(self): # malformed self.assertRaises(ValueError, ser.replace, [1, 2, 3], [np.nan, 0]) - self.assertRaises(ValueError, ser.replace, xrange(1, 3), [np.nan, 0]) + self.assertRaises(ValueError, ser.replace, range(1, 3), [np.nan, 0]) ser = Series([0, 1, 2, 3, 4]) result = ser.replace([0, 1, 2, 3, 4], [4, 3, 2, 1, 0]) @@ -4302,7 +4306,7 @@ def test_reset_index(self): def test_set_index_makes_timeseries(self): idx = tm.makeDateIndex(10) - s = Series(range(10)) + s = Series(list(range(10))) s.index = idx self.assertTrue(isinstance(s, TimeSeries)) diff --git a/pandas/tests/test_stats.py b/pandas/tests/test_stats.py index 0432d11aaa254..8dc5823429be4 100644 --- a/pandas/tests/test_stats.py +++ b/pandas/tests/test_stats.py @@ -1,3 +1,4 @@ +from pandas.util import compat import nose import unittest @@ -10,6 +11,7 @@ from pandas.util.testing import (assert_frame_equal, assert_series_equal, assert_almost_equal) +import six class TestRank(unittest.TestCase): @@ -106,7 +108,7 @@ def _check2d(df, expected, method='average', axis=0): def test_rank_int(self): s = self.s.dropna().astype('i8') - for method, res in self.results.iteritems(): + for method, res in compat.iteritems(self.results): result = s.rank(method=method) expected = Series(res).dropna() expected.index = result.index diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index d057dc5304277..9a1d3bc71a091 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -1,6 +1,7 @@ # pylint: disable-msg=E1101,W0612 from datetime import datetime, timedelta, date +from pandas.util.py3compat import range import os import operator import re @@ -21,6 +22,7 @@ import pandas.util.testing as tm import pandas.core.strings as strings +import six class TestStringMethods(unittest.TestCase): @@ -41,8 +43,8 @@ def test_iter(self): assert_array_equal(s.index, ds.index) for el in s: - # each element of the series is either a basestring or nan - self.assert_(isinstance(el, basestring) or isnull(el)) + # each element of the series is either a six.string_types or nan + self.assert_(isinstance(el, six.string_types) or isnull(el)) # desired behavior is to iterate until everything would be nan on the # next iter so make sure the last element of the iterator was 'l' in @@ -73,7 +75,7 @@ def test_iter_single_element(self): def test_iter_numeric_try_string(self): # behavior identical to empty series - dsi = Series(range(4)) + dsi = Series(list(range(4))) i, s = 100, 'h' @@ -93,7 +95,7 @@ def test_iter_numeric_try_string(self): def test_iter_object_try_string(self): ds = Series([slice(None, randint(10), randint(10, 20)) - for _ in xrange(4)]) + for _ in range(4)]) i, s = 100, 'h' @@ -154,7 +156,7 @@ def test_count(self): tm.assert_almost_equal(rs, xp) # unicode - values = [u'foo', u'foofoo', NA, u'foooofooofommmfoo'] + values = [six.u('foo'), six.u('foofoo'), NA, six.u('foooofooofommmfoo')] result = strings.str_count(values, 'f[o]+') exp = [1, 2, NA, 4] @@ -189,7 +191,7 @@ def test_contains(self): tm.assert_almost_equal(rs, xp) # unicode - values = [u'foo', NA, u'fooommm__foo', u'mmm_'] + values = [six.u('foo'), NA, six.u('fooommm__foo'), six.u('mmm_')] pat = 'mmm[_]+' result = strings.str_contains(values, pat) @@ -229,8 +231,8 @@ def test_startswith(self): tm.assert_almost_equal(rs, xp) # unicode - values = Series([u'om', NA, u'foo_nom', u'nom', u'bar_foo', NA, - u'foo']) + values = Series([six.u('om'), NA, six.u('foo_nom'), six.u('nom'), six.u('bar_foo'), NA, + six.u('foo')]) result = values.str.startswith('foo') exp = Series([False, NA, True, False, False, NA, True]) @@ -257,8 +259,8 @@ def test_endswith(self): tm.assert_almost_equal(rs, xp) # unicode - values = Series([u'om', NA, u'foo_nom', u'nom', u'bar_foo', NA, - u'foo']) + values = Series([six.u('om'), NA, six.u('foo_nom'), six.u('nom'), six.u('bar_foo'), NA, + six.u('foo')]) result = values.str.endswith('foo') exp = Series([False, NA, False, False, True, NA, True]) @@ -282,10 +284,10 @@ def test_title(self): tm.assert_almost_equal(mixed, exp) # unicode - values = Series([u"FOO", NA, u"bar", u"Blurg"]) + values = Series([six.u("FOO"), NA, six.u("bar"), six.u("Blurg")]) results = values.str.title() - exp = Series([u"Foo", NA, u"Bar", u"Blurg"]) + exp = Series([six.u("Foo"), NA, six.u("Bar"), six.u("Blurg")]) tm.assert_series_equal(results, exp) @@ -309,10 +311,10 @@ def test_lower_upper(self): tm.assert_almost_equal(rs, xp) # unicode - values = Series([u'om', NA, u'nom', u'nom']) + values = Series([six.u('om'), NA, six.u('nom'), six.u('nom')]) result = values.str.upper() - exp = Series([u'OM', NA, u'NOM', u'NOM']) + exp = Series([six.u('OM'), NA, six.u('NOM'), six.u('NOM')]) tm.assert_series_equal(result, exp) result = result.str.lower() @@ -339,14 +341,14 @@ def test_replace(self): tm.assert_almost_equal(rs, xp) # unicode - values = Series([u'fooBAD__barBAD', NA]) + values = Series([six.u('fooBAD__barBAD'), NA]) result = values.str.replace('BAD[_]*', '') - exp = Series([u'foobar', NA]) + exp = Series([six.u('foobar'), NA]) tm.assert_series_equal(result, exp) result = values.str.replace('BAD[_]*', '', n=1) - exp = Series([u'foobarBAD', NA]) + exp = Series([six.u('foobarBAD'), NA]) tm.assert_series_equal(result, exp) #flags + unicode @@ -377,14 +379,17 @@ def test_repeat(self): tm.assert_almost_equal(rs, xp) # unicode - values = Series([u'a', u'b', NA, u'c', NA, u'd']) + values = Series([six.u('a'), six.u('b'), NA, six.u('c'), NA, + six.u('d')]) result = values.str.repeat(3) - exp = Series([u'aaa', u'bbb', NA, u'ccc', NA, u'ddd']) + exp = Series([six.u('aaa'), six.u('bbb'), NA, six.u('ccc'), NA, + six.u('ddd')]) tm.assert_series_equal(result, exp) result = values.str.repeat([1, 2, 3, 4, 5, 6]) - exp = Series([u'a', u'bb', NA, u'cccc', NA, u'dddddd']) + exp = Series([six.u('a'), six.u('bb'), NA, six.u('cccc'), NA, + six.u('dddddd')]) tm.assert_series_equal(result, exp) def test_match(self): @@ -404,10 +409,10 @@ def test_match(self): tm.assert_almost_equal(rs, xp) # unicode - values = Series([u'fooBAD__barBAD', NA, u'foo']) + values = Series([six.u('fooBAD__barBAD'), NA, six.u('foo')]) result = values.str.match('.*(BAD[_]+).*(BAD)') - exp = Series([(u'BAD__', u'BAD'), NA, []]) + exp = Series([(six.u('BAD__'), six.u('BAD')), NA, []]) tm.assert_series_equal(result, exp) def test_join(self): @@ -426,7 +431,8 @@ def test_join(self): tm.assert_almost_equal(rs, xp) # unicode - values = Series([u'a_b_c', u'c_d_e', np.nan, u'f_g_h']) + values = Series([six.u('a_b_c'), six.u('c_d_e'), np.nan, + six.u('f_g_h')]) result = values.str.split('_').str.join('_') tm.assert_series_equal(values, result) @@ -448,7 +454,8 @@ def test_len(self): tm.assert_almost_equal(rs, xp) # unicode - values = Series([u'foo', u'fooo', u'fooooo', np.nan, u'fooooooo']) + values = Series([six.u('foo'), six.u('fooo'), six.u('fooooo'), np.nan, + six.u('fooooooo')]) result = values.str.len() exp = values.map(lambda x: len(x) if com.notnull(x) else NA) @@ -472,10 +479,11 @@ def test_findall(self): tm.assert_almost_equal(rs, xp) # unicode - values = Series([u'fooBAD__barBAD', NA, u'foo', u'BAD']) + values = Series([six.u('fooBAD__barBAD'), NA, six.u('foo'), + six.u('BAD')]) result = values.str.findall('BAD[_]*') - exp = Series([[u'BAD__', u'BAD'], NA, [], [u'BAD']]) + exp = Series([[six.u('BAD__'), six.u('BAD')], NA, [], [six.u('BAD')]]) tm.assert_almost_equal(result, exp) def test_pad(self): @@ -522,18 +530,22 @@ def test_pad(self): tm.assert_almost_equal(rs, xp) # unicode - values = Series([u'a', u'b', NA, u'c', NA, u'eeeeee']) + values = Series([six.u('a'), six.u('b'), NA, six.u('c'), NA, + six.u('eeeeee')]) result = values.str.pad(5, side='left') - exp = Series([u' a', u' b', NA, u' c', NA, u'eeeeee']) + exp = Series([six.u(' a'), six.u(' b'), NA, six.u(' c'), NA, + six.u('eeeeee')]) tm.assert_almost_equal(result, exp) result = values.str.pad(5, side='right') - exp = Series([u'a ', u'b ', NA, u'c ', NA, u'eeeeee']) + exp = Series([six.u('a '), six.u('b '), NA, six.u('c '), NA, + six.u('eeeeee')]) tm.assert_almost_equal(result, exp) result = values.str.pad(5, side='both') - exp = Series([u' a ', u' b ', NA, u' c ', NA, u'eeeeee']) + exp = Series([six.u(' a '), six.u(' b '), NA, six.u(' c '), NA, + six.u('eeeeee')]) tm.assert_almost_equal(result, exp) def test_center(self): @@ -555,10 +567,12 @@ def test_center(self): tm.assert_almost_equal(rs, xp) # unicode - values = Series([u'a', u'b', NA, u'c', NA, u'eeeeee']) + values = Series([six.u('a'), six.u('b'), NA, six.u('c'), NA, + six.u('eeeeee')]) result = values.str.center(5) - exp = Series([u' a ', u' b ', NA, u' c ', NA, u'eeeeee']) + exp = Series([six.u(' a '), six.u(' b '), NA, six.u(' c '), NA, + six.u('eeeeee')]) tm.assert_almost_equal(result, exp) def test_split(self): @@ -585,11 +599,12 @@ def test_split(self): tm.assert_almost_equal(rs, xp) # unicode - values = Series([u'a_b_c', u'c_d_e', NA, u'f_g_h']) + values = Series([six.u('a_b_c'), six.u('c_d_e'), NA, six.u('f_g_h')]) result = values.str.split('_') - exp = Series([[u'a', u'b', u'c'], [u'c', u'd', u'e'], NA, - [u'f', u'g', u'h']]) + exp = Series([[six.u('a'), six.u('b'), six.u('c')], + [six.u('c'), six.u('d'), six.u('e')], NA, + [six.u('f'), six.u('g'), six.u('h')]]) tm.assert_series_equal(result, exp) def test_split_noargs(self): @@ -650,10 +665,11 @@ def test_slice(self): tm.assert_almost_equal(rs, xp) # unicode - values = Series([u'aafootwo', u'aabartwo', NA, u'aabazqux']) + values = Series([six.u('aafootwo'), six.u('aabartwo'), NA, + six.u('aabazqux')]) result = values.str.slice(2, 5) - exp = Series([u'foo', u'bar', NA, u'baz']) + exp = Series([six.u('foo'), six.u('bar'), NA, six.u('baz')]) tm.assert_series_equal(result, exp) def test_slice_replace(self): @@ -702,18 +718,19 @@ def test_strip_lstrip_rstrip_mixed(self): def test_strip_lstrip_rstrip_unicode(self): # unicode - values = Series([u' aa ', u' bb \n', NA, u'cc ']) + values = Series([six.u(' aa '), six.u(' bb \n'), NA, + six.u('cc ')]) result = values.str.strip() - exp = Series([u'aa', u'bb', NA, u'cc']) + exp = Series([six.u('aa'), six.u('bb'), NA, six.u('cc')]) tm.assert_series_equal(result, exp) result = values.str.lstrip() - exp = Series([u'aa ', u'bb \n', NA, u'cc ']) + exp = Series([six.u('aa '), six.u('bb \n'), NA, six.u('cc ')]) tm.assert_series_equal(result, exp) result = values.str.rstrip() - exp = Series([u' aa', u' bb', NA, u'cc']) + exp = Series([six.u(' aa'), six.u(' bb'), NA, six.u('cc')]) tm.assert_series_equal(result, exp) def test_strip_lstrip_rstrip_args(self): @@ -732,17 +749,18 @@ def test_strip_lstrip_rstrip_args(self): assert_series_equal(rs, xp) def test_strip_lstrip_rstrip_args_unicode(self): - values = Series([u'xxABCxx', u'xx BNSD', u'LDFJH xx']) + values = Series([six.u('xxABCxx'), six.u('xx BNSD'), + six.u('LDFJH xx')]) - rs = values.str.strip(u'x') + rs = values.str.strip(six.u('x')) xp = Series(['ABC', ' BNSD', 'LDFJH ']) assert_series_equal(rs, xp) - rs = values.str.lstrip(u'x') + rs = values.str.lstrip(six.u('x')) xp = Series(['ABCxx', ' BNSD', 'LDFJH xx']) assert_series_equal(rs, xp) - rs = values.str.rstrip(u'x') + rs = values.str.rstrip(six.u('x')) xp = Series(['xxABC', 'xx BNSD', 'LDFJH ']) assert_series_equal(rs, xp) @@ -768,10 +786,11 @@ def test_get(self): tm.assert_almost_equal(rs, xp) # unicode - values = Series([u'a_b_c', u'c_d_e', np.nan, u'f_g_h']) + values = Series([six.u('a_b_c'), six.u('c_d_e'), np.nan, + six.u('f_g_h')]) result = values.str.split('_').str.get(1) - expected = Series([u'b', u'd', np.nan, u'g']) + expected = Series([six.u('b'), six.u('d'), np.nan, six.u('g')]) tm.assert_series_equal(result, expected) def test_more_contains(self): @@ -872,7 +891,7 @@ def test_match_findall_flags(self): self.assertEquals(result[0], True) def test_encode_decode(self): - base = Series([u'a', u'b', u'a\xe4']) + base = Series([six.u('a'), six.u('b'), six.u('a\xe4')]) series = base.str.encode('utf-8') f = lambda x: x.decode('utf-8') @@ -882,7 +901,7 @@ def test_encode_decode(self): tm.assert_series_equal(result, exp) def test_encode_decode_errors(self): - encodeBase = Series([u'a', u'b', u'a\x9d']) + encodeBase = Series([six.u('a'), six.u('b'), six.u('a\x9d')]) self.assertRaises(UnicodeEncodeError, encodeBase.str.encode, 'cp1252') diff --git a/pandas/tests/test_tseries.py b/pandas/tests/test_tseries.py index 54c00e798f08a..22679d36b26c3 100644 --- a/pandas/tests/test_tseries.py +++ b/pandas/tests/test_tseries.py @@ -1,3 +1,5 @@ +from pandas.util.py3compat import range +from six.moves import zip import unittest from numpy import nan @@ -30,7 +32,7 @@ def test_groupby_withnull(self): def test_backfill(self): old = Index([1, 5, 10]) - new = Index(range(12)) + new = Index(list(range(12))) filler = algos.backfill_int64(old, new) @@ -39,7 +41,7 @@ def test_backfill(self): # corner case old = Index([1, 4]) - new = Index(range(5, 10)) + new = Index(list(range(5, 10))) filler = algos.backfill_int64(old, new) expect_filler = [-1, -1, -1, -1, -1] @@ -47,7 +49,7 @@ def test_backfill(self): def test_pad(self): old = Index([1, 5, 10]) - new = Index(range(12)) + new = Index(list(range(12))) filler = algos.pad_int64(old, new) @@ -56,7 +58,7 @@ def test_pad(self): # corner case old = Index([5, 10]) - new = Index(range(5)) + new = Index(list(range(5))) filler = algos.pad_int64(old, new) expect_filler = [-1, -1, -1, -1, -1] self.assert_(np.array_equal(filler, expect_filler)) @@ -526,7 +528,7 @@ def _check(dtype): bins = np.array([6, 12], dtype=np.int64) out = np.zeros((3, 4), dtype) counts = np.zeros(len(out), dtype=np.int64) - + func = getattr(algos,'group_ohlc_%s' % dtype) func(out, counts, obj[:, None], bins) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index f96f3b98a0383..63d78ddde6549 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -2,7 +2,9 @@ SQL-style merge routines """ -import itertools +from pandas.util.py3compat import range, long +from six.moves import zip +import six import numpy as np import types from pandas.core.categorical import Categorical @@ -441,7 +443,7 @@ def _get_join_indexers(left_keys, right_keys, sort=False, how='inner'): right_labels.append(rlab) group_sizes.append(count) - max_groups = 1L + max_groups = long(1) for x in group_sizes: max_groups *= long(x) @@ -892,7 +894,7 @@ def __init__(self, objs, axis=0, join='outer', join_axes=None, raise AssertionError('first argument must be a list-like of pandas ' 'objects, you passed an object of type ' '"{0}"'.format(type(objs).__name__)) - + if join == 'outer': self.intersect = False elif join == 'inner': @@ -959,7 +961,7 @@ def get_result(self): name = com._consensus_name_attr(self.objs) return Series(new_data, index=self.new_axes[0], name=name) elif self._is_series: - data = dict(itertools.izip(xrange(len(self.objs)), self.objs)) + data = dict(zip(range(len(self.objs)), self.objs)) index, columns = self.new_axes tmpdf = DataFrame(data, index=index) if columns is not None: @@ -1057,7 +1059,7 @@ def _concat_blocks(self, blocks): concat_items = indexer else: concat_items = self.new_axes[0].take(indexer) - + if self.ignore_index: ref_items = self._get_fresh_axis() return make_block(concat_values, concat_items, ref_items) @@ -1134,7 +1136,7 @@ def _get_new_axes(self): raise AssertionError() # ufff... - indices = range(ndim) + indices = list(range(ndim)) indices.remove(self.axis) for i, ax in zip(indices, self.join_axes): @@ -1199,7 +1201,7 @@ def _concat_indexes(indexes): def _make_concat_multiindex(indexes, keys, levels=None, names=None): if ((levels is None and isinstance(keys[0], tuple)) or (levels is not None and len(levels) > 1)): - zipped = zip(*keys) + zipped = list(zip(*keys)) if names is None: names = [None] * len(zipped) @@ -1297,7 +1299,7 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None): def _should_fill(lname, rname): - if not isinstance(lname, basestring) or not isinstance(rname, basestring): + if not isinstance(lname, six.string_types) or not isinstance(rname, six.string_types): return True return lname == rname diff --git a/pandas/tools/pivot.py b/pandas/tools/pivot.py index 945f7fb4ab437..bc1ebd3752cc3 100644 --- a/pandas/tools/pivot.py +++ b/pandas/tools/pivot.py @@ -5,7 +5,11 @@ from pandas.core.reshape import _unstack_multiple from pandas.tools.merge import concat from pandas.tools.util import cartesian_product +from pandas.util.py3compat import range +from pandas.util import compat +import six import pandas.core.common as com +from six.moves import zip import numpy as np @@ -151,7 +155,7 @@ def _add_margins(table, data, values, rows=None, cols=None, aggfunc=np.mean): grand_margin = {} for k, v in data[values].iteritems(): try: - if isinstance(aggfunc, basestring): + if isinstance(aggfunc, six.string_types): grand_margin[k] = getattr(v, aggfunc)() else: grand_margin[k] = aggfunc(v) @@ -196,7 +200,7 @@ def _all_key(key): row_margin = row_margin.stack() # slight hack - new_order = [len(cols)] + range(len(cols)) + new_order = [len(cols)] + list(range(len(cols))) row_margin.index = row_margin.index.reorder_levels(new_order) else: row_margin = Series(np.nan, index=result.columns) diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index 1ffdf83b02763..483d989e9e13b 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -1,5 +1,6 @@ # being a bit too dynamic # pylint: disable=E1101 +import six import datetime import warnings import re @@ -15,6 +16,8 @@ from pandas.tseries.period import PeriodIndex, Period from pandas.tseries.frequencies import get_period_alias, get_base_alias from pandas.tseries.offsets import DateOffset +from pandas.util.py3compat import range +from six.moves import map, zip try: # mpl optional import pandas.tseries.converter as conv @@ -96,13 +99,13 @@ def _get_standard_colors(num_colors=None, colormap=None, color_type='default', import matplotlib.pyplot as plt if color is None and colormap is not None: - if isinstance(colormap, basestring): + if isinstance(colormap, six.string_types): import matplotlib.cm as cm cmap = colormap colormap = cm.get_cmap(colormap) if colormap is None: raise ValueError("Colormap {0} is not recognized".format(cmap)) - colors = map(colormap, np.linspace(0, 1, num=num_colors)) + colors = list(map(colormap, np.linspace(0, 1, num=num_colors))) elif color is not None: if colormap is not None: warnings.warn("'color' and 'colormap' cannot be used " @@ -111,7 +114,7 @@ def _get_standard_colors(num_colors=None, colormap=None, color_type='default', else: if color_type == 'default': colors = plt.rcParams.get('axes.color_cycle', list('bgrcmyk')) - if isinstance(colors, basestring): + if isinstance(colors, six.string_types): colors = list(colors) elif color_type == 'random': import random @@ -119,7 +122,7 @@ def random_color(column): random.seed(column) return [random.random() for _ in range(3)] - colors = map(random_color, range(num_colors)) + colors = list(map(random_color, list(range(num_colors)))) else: raise NotImplementedError @@ -240,8 +243,8 @@ def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, marker = _get_marker_compat(marker) - for i, a in zip(range(n), df.columns): - for j, b in zip(range(n), df.columns): + for i, a in zip(list(range(n)), df.columns): + for j, b in zip(list(range(n)), df.columns): ax = axes[i, j] if i == j: @@ -500,7 +503,7 @@ def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds): for sampling in samplings]) if fig is None: fig = plt.figure() - x = range(samples) + x = list(range(samples)) axes = [] ax1 = fig.add_subplot(2, 3, 1) ax1.set_xlabel("Sample") @@ -598,7 +601,7 @@ def parallel_coordinates(data, class_column, cols=None, ax=None, colors=None, raise ValueError('Length of xticks must match number of columns') x = xticks else: - x = range(ncols) + x = list(range(ncols)) if ax is None: ax = plt.gca() @@ -681,7 +684,7 @@ def autocorrelation_plot(series, ax=None): def r(h): return ((data[:n - h] - mean) * (data[h:] - mean)).sum() / float(n) / c0 x = np.arange(n) + 1 - y = map(r, x) + y = list(map(r, x)) z95 = 1.959963984540054 z99 = 2.5758293035489004 ax.axhline(y=z99 / np.sqrt(n), linestyle='--', color='grey') @@ -984,7 +987,7 @@ def _adorn_subplots(self): if self._need_to_set_index: labels = [com.pprint_thing(key) for key in self.data.index] - labels = dict(zip(range(len(self.data.index)), labels)) + labels = dict(zip(list(range(len(self.data.index))), labels)) for ax_ in self.axes: # ax_.set_xticks(self.xticks) @@ -1035,9 +1038,9 @@ def _get_xticks(self, convert_period=False): x = self.data.index._mpl_repr() else: self._need_to_set_index = True - x = range(len(index)) + x = list(range(len(index))) else: - x = range(len(index)) + x = list(range(len(index))) return x @@ -1711,7 +1714,7 @@ def plot_series(series, label=None, kind='line', use_index=True, rot=None, if ax.get_yaxis().get_ticks_position().strip().lower() == 'right': fig = _gcf() axes = fig.get_axes() - for i in range(len(axes))[::-1]: + for i in reversed(range(len(axes))): ax = axes[i] ypos = ax.get_yaxis().get_ticks_position().strip().lower() if ypos == 'left': diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 43cbb9344b714..747d7bfb08d67 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -1,3 +1,5 @@ +from pandas.util.py3compat import range +from six.moves import zip import numpy as np import random from copy import deepcopy diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py index b0261077fc767..0e6235438f3d0 100644 --- a/pandas/tools/tests/test_merge.py +++ b/pandas/tools/tests/test_merge.py @@ -1,5 +1,8 @@ # pylint: disable=E1103 +from pandas.util.py3compat import range +from six.moves import zip +from pandas.util import compat import nose import unittest @@ -26,7 +29,7 @@ def get_test_data(ngroups=NGROUPS, n=N): - unique_groups = range(ngroups) + unique_groups = list(range(ngroups)) arr = np.asarray(np.tile(unique_groups, n // ngroups)) if len(arr) < n: @@ -555,8 +558,8 @@ def test_merge_different_column_key_names(self): assert_almost_equal(merged['value_y'], [6, np.nan, 5, 8, 5, 8, 7]) def test_merge_nocopy(self): - left = DataFrame({'a': 0, 'b': 1}, index=range(10)) - right = DataFrame({'c': 'foo', 'd': 'bar'}, index=range(10)) + left = DataFrame({'a': 0, 'b': 1}, index=list(range(10))) + right = DataFrame({'c': 'foo', 'd': 'bar'}, index=list(range(10))) merged = merge(left, right, left_index=True, right_index=True, copy=False) @@ -582,15 +585,15 @@ def test_join_sort(self): # smoke test joined = left.join(right, on='key', sort=False) - self.assert_(np.array_equal(joined.index, range(4))) + self.assert_(np.array_equal(joined.index, list(range(4)))) def test_intelligently_handle_join_key(self): # #733, be a bit more 1337 about not returning unconsolidated DataFrame left = DataFrame({'key': [1, 1, 2, 2, 3], - 'value': range(5)}, columns=['value', 'key']) + 'value': list(range(5))}, columns=['value', 'key']) right = DataFrame({'key': [1, 1, 2, 3, 4, 5], - 'rvalue': range(6)}) + 'rvalue': list(range(6))}) joined = merge(left, right, on='key', how='outer') expected = DataFrame({'key': [1, 1, 1, 1, 2, 2, 3, 4, 5.], @@ -604,8 +607,8 @@ def test_intelligently_handle_join_key(self): def test_handle_join_key_pass_array(self): left = DataFrame({'key': [1, 1, 2, 2, 3], - 'value': range(5)}, columns=['value', 'key']) - right = DataFrame({'rvalue': range(6)}) + 'value': list(range(5))}, columns=['value', 'key']) + right = DataFrame({'rvalue': list(range(6))}) key = np.array([1, 1, 2, 3, 4, 5]) merged = merge(left, right, left_on='key', right_on=key, how='outer') @@ -615,8 +618,8 @@ def test_handle_join_key_pass_array(self): self.assert_(merged['key'].notnull().all()) self.assert_(merged2['key'].notnull().all()) - left = DataFrame({'value': range(5)}, columns=['value']) - right = DataFrame({'rvalue': range(6)}) + left = DataFrame({'value': list(range(5))}, columns=['value']) + right = DataFrame({'rvalue': list(range(6))}) lkey = np.array([1, 1, 2, 2, 3]) rkey = np.array([1, 1, 2, 3, 4, 5]) @@ -624,8 +627,8 @@ def test_handle_join_key_pass_array(self): self.assert_(np.array_equal(merged['key_0'], np.array([1, 1, 1, 1, 2, 2, 3, 4, 5]))) - left = DataFrame({'value': range(3)}) - right = DataFrame({'rvalue': range(6)}) + left = DataFrame({'value': list(range(3))}) + right = DataFrame({'rvalue': list(range(6))}) key = np.array([0, 1, 1, 2, 2, 3]) merged = merge(left, right, left_index=True, right_on=key, how='outer') @@ -787,7 +790,7 @@ def setUp(self): def test_merge_on_multikey(self): joined = self.data.join(self.to_join, on=['key1', 'key2']) - join_key = Index(zip(self.data['key1'], self.data['key2'])) + join_key = Index(list(zip(self.data['key1'], self.data['key2']))) indexer = self.to_join.index.get_indexer(join_key) ex_values = self.to_join.values.take(indexer, axis=0) ex_values[indexer == -1] = np.nan @@ -809,7 +812,7 @@ def test_merge_right_vs_left(self): def test_compress_group_combinations(self): # ~ 40000000 possible unique groups - key1 = np.array([rands(10) for _ in xrange(10000)], dtype='O') + key1 = np.array([rands(10) for _ in range(10000)], dtype='O') key1 = np.tile(key1, 2) key2 = key1[::-1] @@ -1469,7 +1472,7 @@ def test_panel_join_many(self): data_dict = {} for p in panels: - data_dict.update(p.iterkv()) + data_dict.update(p.iteritems()) joined = panels[0].join(panels[1:], how='inner') expected = Panel.from_dict(data_dict, intersect=True) @@ -1613,7 +1616,7 @@ def test_concat_series_axis1(self): s2.name = None result = concat([s, s2], axis=1) - self.assertTrue(np.array_equal(result.columns, range(2))) + self.assertTrue(np.array_equal(result.columns, list(range(2)))) # must reindex, #2603 s = Series(randn(3), index=['c', 'a', 'b'], name='A') diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tools/tests/test_pivot.py index a603118c2ad16..11a9fef9a0b53 100644 --- a/pandas/tools/tests/test_pivot.py +++ b/pandas/tools/tests/test_pivot.py @@ -1,3 +1,4 @@ +from pandas.util.py3compat import range import unittest import numpy as np @@ -7,6 +8,7 @@ from pandas.tools.merge import concat from pandas.tools.pivot import pivot_table, crosstab import pandas.util.testing as tm +import six class TestPivotTable(unittest.TestCase): @@ -72,9 +74,18 @@ def test_pivot_table_dropna(self): pv_col = df.pivot_table('quantity', 'month', ['customer', 'product'], dropna=False) pv_ind = df.pivot_table('quantity', ['customer', 'product'], 'month', dropna=False) - m = MultiIndex.from_tuples([(u'A', u'a'), (u'A', u'b'), (u'A', u'c'), (u'A', u'd'), - (u'B', u'a'), (u'B', u'b'), (u'B', u'c'), (u'B', u'd'), - (u'C', u'a'), (u'C', u'b'), (u'C', u'c'), (u'C', u'd')]) + m = MultiIndex.from_tuples([(six.u('A'), six.u('a')), + (six.u('A'), six.u('b')), + (six.u('A'), six.u('c')), + (six.u('A'), six.u('d')), + (six.u('B'), six.u('a')), + (six.u('B'), six.u('b')), + (six.u('B'), six.u('c')), + (six.u('B'), six.u('d')), + (six.u('C'), six.u('a')), + (six.u('C'), six.u('b')), + (six.u('C'), six.u('c')), + (six.u('C'), six.u('d'))]) assert_equal(pv_col.columns.values, m.values) assert_equal(pv_ind.index.values, m.values) @@ -212,7 +223,7 @@ def test_pivot_integer_columns(self): d = datetime.date.min data = list(product(['foo', 'bar'], ['A', 'B', 'C'], ['x1', 'x2'], - [d + datetime.timedelta(i) for i in xrange(20)], [1.0])) + [d + datetime.timedelta(i) for i in range(20)], [1.0])) df = pandas.DataFrame(data) table = df.pivot_table(values=4, rows=[0, 1, 3], cols=[2]) diff --git a/pandas/tools/tests/test_tile.py b/pandas/tools/tests/test_tile.py index 7da9a3bb5a95a..54b8f05b61d52 100644 --- a/pandas/tools/tests/test_tile.py +++ b/pandas/tools/tests/test_tile.py @@ -3,6 +3,7 @@ import unittest import numpy as np +from six.moves import zip from pandas import DataFrame, Series, unique import pandas.util.testing as tm diff --git a/pandas/tools/tile.py b/pandas/tools/tile.py index ffed6cafc1047..31db8ed705fe4 100644 --- a/pandas/tools/tile.py +++ b/pandas/tools/tile.py @@ -8,6 +8,7 @@ import pandas.core.algorithms as algos import pandas.core.common as com import pandas.core.nanops as nanops +from six.moves import zip import numpy as np diff --git a/pandas/tseries/converter.py b/pandas/tseries/converter.py index d0ec942cec307..efbd80350379f 100644 --- a/pandas/tseries/converter.py +++ b/pandas/tseries/converter.py @@ -1,4 +1,6 @@ from datetime import datetime, timedelta +from pandas.util.py3compat import range +import six import datetime as pydt import numpy as np @@ -36,7 +38,7 @@ def _to_ordinalf(tm): def time2num(d): - if isinstance(d, basestring): + if isinstance(d, six.string_types): parsed = tools.to_datetime(d) if not isinstance(parsed, datetime): raise ValueError('Could not parse time %s' % d) @@ -161,7 +163,7 @@ def try_parse(values): return dates.date2num(values) elif (com.is_integer(values) or com.is_float(values)): return values - elif isinstance(values, basestring): + elif isinstance(values, six.string_types): return try_parse(values) elif isinstance(values, (list, tuple, np.ndarray)): if not isinstance(values, np.ndarray): @@ -330,7 +332,7 @@ def __call__(self): if len(all_dates) > 0: locs = self.raise_if_exceeds(dates.date2num(all_dates)) return locs - except Exception, e: # pragma: no cover + except Exception as e: # pragma: no cover pass lims = dates.date2num([dmin, dmax]) @@ -808,7 +810,7 @@ def _annual_finder(vmin, vmax, freq): def get_finder(freq): - if isinstance(freq, basestring): + if isinstance(freq, six.string_types): freq = frequencies.get_freq(freq) fgroup = frequencies.get_freq_group(freq) @@ -845,7 +847,7 @@ class TimeSeries_DateLocator(Locator): def __init__(self, freq, minor_locator=False, dynamic_mode=True, base=1, quarter=1, month=1, day=1, plot_obj=None): - if isinstance(freq, basestring): + if isinstance(freq, six.string_types): freq = frequencies.get_freq(freq) self.freq = freq self.base = base @@ -884,7 +886,7 @@ def __call__(self): base = self.base (d, m) = divmod(vmin, base) vmin = (d + 1) * base - locs = range(vmin, vmax + 1, base) + locs = list(range(vmin, vmax + 1, base)) return locs def autoscale(self): @@ -924,7 +926,7 @@ class TimeSeries_DateFormatter(Formatter): def __init__(self, freq, minor_locator=False, dynamic_mode=True, plot_obj=None): - if isinstance(freq, basestring): + if isinstance(freq, six.string_types): freq = frequencies.get_freq(freq) self.format = None self.freq = freq diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 51b8e5d042ca9..20caf150cb55d 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -1,4 +1,8 @@ from datetime import datetime +from pandas.util.py3compat import range, long +from pandas.util import compat +from six.moves import zip +import six import re import numpy as np @@ -54,14 +58,14 @@ def get_to_timestamp_base(base): def get_freq_group(freq): - if isinstance(freq, basestring): + if isinstance(freq, six.string_types): base, mult = get_freq_code(freq) freq = base return (freq // 1000) * 1000 def get_freq(freq): - if isinstance(freq, basestring): + if isinstance(freq, six.string_types): base, mult = get_freq_code(freq) freq = base return freq @@ -364,7 +368,7 @@ def get_period_alias(offset_str): } for _i, _weekday in enumerate(['MON', 'TUE', 'WED', 'THU', 'FRI']): - for _iweek in xrange(4): + for _iweek in range(4): _name = 'WOM-%d%s' % (_iweek + 1, _weekday) _offset_map[_name] = offsets.WeekOfMonth(week=_iweek, weekday=_i) _rule_aliases[_name.replace('-', '@')] = _name @@ -416,7 +420,7 @@ def to_offset(freqstr): if isinstance(freqstr, tuple): name = freqstr[0] stride = freqstr[1] - if isinstance(stride, basestring): + if isinstance(stride, six.string_types): name, stride = stride, name name, _ = _base_and_stride(name) delta = get_offset(name) * stride @@ -770,7 +774,7 @@ def infer_freq(index, warn=True): inferer = _FrequencyInferer(index, warn=warn) return inferer.get_freq() -_ONE_MICRO = 1000L +_ONE_MICRO = long(1000) _ONE_MILLI = _ONE_MICRO * 1000 _ONE_SECOND = _ONE_MILLI * 1000 _ONE_MINUTE = 60 * _ONE_SECOND diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 9983f12bb29f0..2bff7c0e4498c 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -23,6 +23,7 @@ import pandas.tslib as tslib import pandas.algos as _algos import pandas.index as _index +import six def _utc(): @@ -70,7 +71,7 @@ def wrapper(self, other): other = _to_m8(other, tz=self.tz) elif isinstance(other, list): other = DatetimeIndex(other) - elif isinstance(other, basestring): + elif isinstance(other, six.string_types): other = _to_m8(other, tz=self.tz) elif not isinstance(other, np.ndarray): other = _ensure_datetime64(other) @@ -207,7 +208,7 @@ def __new__(cls, data=None, return data - if issubclass(data.dtype.type, basestring): + if issubclass(data.dtype.type, six.string_types): data = _str_to_dt_array(data, offset, dayfirst=dayfirst, yearfirst=yearfirst) @@ -581,21 +582,23 @@ def __contains__(self, key): def _format_with_header(self, header, **kwargs): return header + self._format_native_types(**kwargs) - def _format_native_types(self, na_rep=u'NaT', **kwargs): + def _format_native_types(self, na_rep=six.u('NaT'), **kwargs): data = list(self) # tz formatter or time formatter zero_time = time(0, 0) for d in data: if d.time() != zero_time or d.tzinfo is not None: - return [u'%s' % x for x in data ] + return [six.u('%s') % x for x in data] values = np.array(data,dtype=object) mask = isnull(self.values) values[mask] = na_rep imask = -mask - values[imask] = np.array([ u'%d-%.2d-%.2d' % (dt.year, dt.month, dt.day) for dt in values[imask] ]) + values[imask] = np.array([six.u('%d-%.2d-%.2d') % ( + dt.year, dt.month, dt.day) + for dt in values[imask] ]) return values.tolist() def isin(self, values): @@ -766,7 +769,7 @@ def shift(self, n, freq=None): shifted : DatetimeIndex """ if freq is not None and freq != self.offset: - if isinstance(freq, basestring): + if isinstance(freq, six.string_types): freq = to_offset(freq) result = Index.shift(self, n, freq) result.tz = self.tz @@ -1230,7 +1233,7 @@ def slice_locs(self, start=None, end=None): """ Index.slice_locs, customized to handle partial ISO-8601 string slicing """ - if isinstance(start, basestring) or isinstance(end, basestring): + if isinstance(start, six.string_types) or isinstance(end, six.string_types): if self.is_monotonic: try: @@ -1543,7 +1546,7 @@ def indexer_at_time(self, time, asof=False): if asof: raise NotImplementedError - if isinstance(time, basestring): + if isinstance(time, six.string_types): time = parse(time).time() if time.tzinfo: @@ -1573,10 +1576,10 @@ def indexer_between_time(self, start_time, end_time, include_start=True, """ from dateutil.parser import parse - if isinstance(start_time, basestring): + if isinstance(start_time, six.string_types): start_time = parse(start_time).time() - if isinstance(end_time, basestring): + if isinstance(end_time, six.string_types): end_time = parse(end_time).time() if start_time.tzinfo or end_time.tzinfo: diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index fc57f96239636..ce63fa7db6728 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -1,4 +1,7 @@ from datetime import date, datetime, timedelta +from pandas.util.py3compat import range +from pandas.util import compat +import six import numpy as np from pandas.tseries.tools import to_datetime @@ -80,10 +83,10 @@ def __init__(self, n=1, **kwds): def apply(self, other): if len(self.kwds) > 0: if self.n > 0: - for i in xrange(self.n): + for i in range(self.n): other = other + self._offset else: - for i in xrange(-self.n): + for i in range(-self.n): other = other - self._offset return other else: @@ -137,7 +140,7 @@ def __eq__(self, other): if other is None: return False - if isinstance(other, basestring): + if isinstance(other, six.string_types): from pandas.tseries.frequencies import to_offset other = to_offset(other) @@ -428,7 +431,7 @@ def rule_code(self): @staticmethod def _to_dt64(dt, dtype='datetime64'): - if isinstance(dt, (datetime, basestring)): + if isinstance(dt, (datetime, six.string_types)): dt = np.datetime64(dt, dtype=dtype) if isinstance(dt, np.datetime64): dt = dt.astype(dtype) @@ -622,14 +625,14 @@ def apply(self, other): if otherDay != self.weekday: other = other + timedelta((self.weekday - otherDay) % 7) k = k - 1 - for i in xrange(k): + for i in range(k): other = other + self._inc else: k = self.n otherDay = other.weekday() if otherDay != self.weekday: other = other + timedelta((self.weekday - otherDay) % 7) - for i in xrange(-k): + for i in range(-k): other = other - self._inc return other @@ -713,7 +716,7 @@ def getOffsetOfMonth(self, dt): d = w.rollforward(d) - for i in xrange(self.week): + for i in range(self.week): d = w.apply(d) return d @@ -1166,7 +1169,7 @@ def __add__(self, other): return self.apply(other) def __eq__(self, other): - if isinstance(other, basestring): + if isinstance(other, six.string_types): from pandas.tseries.frequencies import to_offset other = to_offset(other) @@ -1181,7 +1184,7 @@ def __hash__(self): return hash(self._params()) def __ne__(self, other): - if isinstance(other, basestring): + if isinstance(other, six.string_types): from pandas.tseries.frequencies import to_offset other = to_offset(other) @@ -1315,7 +1318,7 @@ def generate_range(start=None, end=None, periods=None, end : datetime (default None) periods : int, optional time_rule : (legacy) name of DateOffset object to be used, optional - Corresponds with names expected by tseries.frequencies.get_offset + Corresponds with names expected by tseries.frequencies.get_offset Note ---- diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 4fec590dddd14..0a7b573875e8b 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -20,6 +20,8 @@ import pandas.lib as lib import pandas.tslib as tslib import pandas.algos as _algos +import six +from six.moves import map, zip #--------------- @@ -47,7 +49,7 @@ class Period(PandasObject): Parameters ---------- - value : Period or basestring, default None + value : Period or six.string_types, default None The time period represented (e.g., '4Q2005') freq : str, default None e.g., 'B' for businessday, ('T', 5) or '5T' for 5 minutes @@ -99,7 +101,7 @@ def __init__(self, value=None, freq=None, ordinal=None, converted = other.asfreq(freq) self.ordinal = converted.ordinal - elif isinstance(value, basestring) or com.is_integer(value): + elif isinstance(value, six.string_types) or com.is_integer(value): if com.is_integer(value): value = str(value) @@ -666,7 +668,7 @@ def _from_arraylike(cls, data, freq, tz): def __contains__(self, key): if not isinstance(key, Period) or key.freq != self.freq: - if isinstance(key, basestring): + if isinstance(key, six.string_types): try: self.get_loc(key) return True @@ -946,7 +948,7 @@ def slice_locs(self, start=None, end=None): """ Index.slice_locs, customized to handle partial ISO-8601 string slicing """ - if isinstance(start, basestring) or isinstance(end, basestring): + if isinstance(start, six.string_types) or isinstance(end, six.string_types): try: if start: start_loc = self._get_string_slice(start).start @@ -1057,14 +1059,14 @@ def __getitem__(self, key): def _format_with_header(self, header, **kwargs): return header + self._format_native_types(**kwargs) - def _format_native_types(self, na_rep=u'NaT', **kwargs): + def _format_native_types(self, na_rep=six.u('NaT'), **kwargs): values = np.array(list(self),dtype=object) mask = isnull(self.values) values[mask] = na_rep imask = -mask - values[imask] = np.array([ u'%s' % dt for dt in values[imask] ]) + values[imask] = np.array([six.u('%s') % dt for dt in values[imask]]) return values.tolist() def __array_finalize__(self, obj): @@ -1084,7 +1086,7 @@ def __repr__(self): def __unicode__(self): output = self.__class__.__name__ - output += u'(' + output += six.u('(') prefix = '' if py3compat.PY3 else 'u' mapper = "{0}'{{0}}'".format(prefix) output += '[{0}]'.format(', '.join(map(mapper.format, self))) diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index 9c22ad66d4f2b..253abcbd8adf3 100644 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -1,5 +1,6 @@ from datetime import timedelta +import six import numpy as np from pandas.core.groupby import BinGrouper, CustomGrouper @@ -230,7 +231,7 @@ def _resample_timestamps(self, obj): limit=self.limit) loffset = self.loffset - if isinstance(loffset, basestring): + if isinstance(loffset, six.string_types): loffset = to_offset(self.loffset) if isinstance(loffset, (DateOffset, timedelta)): @@ -291,7 +292,7 @@ def _take_new_index(obj, indexer, new_index, axis=0): def _get_range_edges(axis, offset, closed='left', base=0): - if isinstance(offset, basestring): + if isinstance(offset, six.string_types): offset = to_offset(offset) if isinstance(offset, Tick): diff --git a/pandas/tseries/tests/test_converter.py b/pandas/tseries/tests/test_converter.py index dc5d5cf67995b..aca7140801bce 100644 --- a/pandas/tseries/tests/test_converter.py +++ b/pandas/tseries/tests/test_converter.py @@ -6,6 +6,7 @@ import nose import numpy as np +import six try: import pandas.tseries.converter as converter @@ -14,7 +15,7 @@ def test_timtetonum_accepts_unicode(): - assert(converter.time2num("00:01") == converter.time2num(u"00:01")) + assert(converter.time2num("00:01") == converter.time2num(six.u("00:01"))) class TestDateTimeConverter(unittest.TestCase): @@ -25,7 +26,7 @@ def setUp(self): def test_convert_accepts_unicode(self): r1 = self.dtc.convert("12:22", None, None) - r2 = self.dtc.convert(u"12:22", None, None) + r2 = self.dtc.convert(six.u("12:22"), None, None) assert(r1 == r2), "DatetimeConverter.convert should accept unicode" def test_conversion(self): diff --git a/pandas/tseries/tests/test_cursor.py b/pandas/tseries/tests/test_cursor.py index ffada187620a4..fc02a83cbe639 100644 --- a/pandas/tseries/tests/test_cursor.py +++ b/pandas/tseries/tests/test_cursor.py @@ -11,7 +11,7 @@ def test_yearoffset(self): self.assert_(t.day == 1) self.assert_(t.month == 1) self.assert_(t.year == 2002 + i) - off.next() + next(off) for i in range(499, -1, -1): off.prev() @@ -27,7 +27,7 @@ def test_yearoffset(self): self.assert_(t.month == 12) self.assert_(t.day == 31) self.assert_(t.year == 2001 + i) - off.next() + next(off) for i in range(499, -1, -1): off.prev() @@ -47,7 +47,7 @@ def test_yearoffset(self): self.assert_(t.day == 31 or t.day == 30 or t.day == 29) self.assert_(t.year == 2001 + i) self.assert_(t.weekday() < 5) - off.next() + next(off) for i in range(499, -1, -1): off.prev() @@ -66,7 +66,7 @@ def test_monthoffset(self): self.assert_(t.day == 1) self.assert_(t.month == 1 + i) self.assert_(t.year == 2002) - off.next() + next(off) for i in range(11, -1, -1): off.prev() @@ -82,7 +82,7 @@ def test_monthoffset(self): self.assert_(t.day >= 28) self.assert_(t.month == (12 if i == 0 else i)) self.assert_(t.year == 2001 + (i != 0)) - off.next() + next(off) for i in range(11, -1, -1): off.prev() @@ -103,7 +103,7 @@ def test_monthoffset(self): else: self.assert_(t.day >= 26) self.assert_(t.weekday() < 5) - off.next() + next(off) for i in range(499, -1, -1): off.prev() @@ -124,8 +124,8 @@ def test_monthoffset(self): for k in range(500): self.assert_(off1.ts == off2.ts) - off1.next() - off2.next() + next(off1) + next(off2) for k in range(500): self.assert_(off1.ts == off2.ts) @@ -139,7 +139,7 @@ def test_dayoffset(self): t0 = lib.Timestamp(off.ts) for i in range(500): - off.next() + next(off) t1 = lib.Timestamp(off.ts) self.assert_(t1.value - t0.value == us_in_day) t0 = t1 @@ -155,7 +155,7 @@ def test_dayoffset(self): t0 = lib.Timestamp(off.ts) for i in range(500): - off.next() + next(off) t1 = lib.Timestamp(off.ts) self.assert_(t1.weekday() < 5) self.assert_(t1.value - t0.value == us_in_day or @@ -184,7 +184,7 @@ def test_dayofmonthoffset(self): t = lib.Timestamp(off.ts) stack.append(t) self.assert_(t.weekday() == day) - off.next() + next(off) for i in range(499, -1, -1): off.prev() diff --git a/pandas/tseries/tests/test_daterange.py b/pandas/tseries/tests/test_daterange.py index 4c46dcccbce1c..4f4df38af1e89 100644 --- a/pandas/tseries/tests/test_daterange.py +++ b/pandas/tseries/tests/test_daterange.py @@ -1,4 +1,5 @@ from datetime import datetime +from pandas.util.py3compat import range import pickle import unittest import nose diff --git a/pandas/tseries/tests/test_frequencies.py b/pandas/tseries/tests/test_frequencies.py index aad831ae48a64..bcaba1fee67c1 100644 --- a/pandas/tseries/tests/test_frequencies.py +++ b/pandas/tseries/tests/test_frequencies.py @@ -1,4 +1,5 @@ from datetime import datetime, time, timedelta +from pandas.util.py3compat import range import sys import os import unittest diff --git a/pandas/tseries/tests/test_offsets.py b/pandas/tseries/tests/test_offsets.py index 487a3091fd83b..9cc7383ed7d0b 100644 --- a/pandas/tseries/tests/test_offsets.py +++ b/pandas/tseries/tests/test_offsets.py @@ -1,4 +1,6 @@ from datetime import date, datetime, timedelta +from pandas.util.py3compat import range +from pandas.util import compat import unittest import nose from nose.tools import assert_raises @@ -1651,7 +1653,7 @@ def test_compare_ticks(): three = kls(3) four = kls(4) - for _ in xrange(10): + for _ in range(10): assert(three < kls(4)) assert(kls(3) < four) assert(four > kls(3)) diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index 9fd5e6bf5f3e9..8058d12029273 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -22,6 +22,9 @@ import pandas.core.datetools as datetools import pandas as pd import numpy as np +import six +from pandas.util.py3compat import range +from six.moves import map, zip randn = np.random.randn from pandas import Series, TimeSeries, DataFrame @@ -209,8 +212,8 @@ def test_repr(self): def test_strftime(self): p = Period('2000-1-1 12:34:12', freq='S') res = p.strftime('%Y-%m-%d %H:%M:%S') - self.assert_( res == '2000-01-01 12:34:12') - self.assert_( isinstance(res,unicode)) # GH3363 + self.assertEqual(res, '2000-01-01 12:34:12') + tm.assert_isinstance(res, six.text_type) # GH3363 def test_sub_delta(self): left, right = Period('2011', freq='A'), Period('2007', freq='A') @@ -1115,7 +1118,7 @@ def test_constructor_U(self): def test_constructor_arrays_negative_year(self): years = np.arange(1960, 2000).repeat(4) - quarters = np.tile(range(1, 5), 40) + quarters = np.tile(list(range(1, 5)), 40) pindex = PeriodIndex(year=years, quarter=quarters) @@ -1123,8 +1126,8 @@ def test_constructor_arrays_negative_year(self): self.assert_(np.array_equal(pindex.quarter, quarters)) def test_constructor_invalid_quarters(self): - self.assertRaises(ValueError, PeriodIndex, year=range(2000, 2004), - quarter=range(4), freq='Q-DEC') + self.assertRaises(ValueError, PeriodIndex, year=list(range(2000, 2004)), + quarter=list(range(4)), freq='Q-DEC') def test_constructor_corner(self): self.assertRaises(ValueError, PeriodIndex, periods=10, freq='A') @@ -1213,7 +1216,7 @@ def test_getitem_partial(self): def test_getitem_datetime(self): rng = period_range(start='2012-01-01', periods=10, freq='W-MON') - ts = Series(range(len(rng)), index=rng) + ts = Series(list(range(len(rng))), index=rng) dt1 = datetime(2011, 10, 2) dt4 = datetime(2012, 4, 20) @@ -1285,7 +1288,7 @@ def _get_with_delta(delta, freq='A-DEC'): def test_to_timestamp_quarterly_bug(self): years = np.arange(1960, 2000).repeat(4) - quarters = np.tile(range(1, 5), 40) + quarters = np.tile(list(range(1, 5)), 40) pindex = PeriodIndex(year=years, quarter=quarters) @@ -1622,45 +1625,45 @@ def test_ts_repr(self): def test_period_index_unicode(self): pi = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') assert_equal(len(pi), 9) - assert_equal(pi, eval(unicode(pi))) + assert_equal(pi, eval(six.text_type(pi))) pi = PeriodIndex(freq='Q', start='1/1/2001', end='12/1/2009') assert_equal(len(pi), 4 * 9) - assert_equal(pi, eval(unicode(pi))) + assert_equal(pi, eval(six.text_type(pi))) pi = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009') assert_equal(len(pi), 12 * 9) - assert_equal(pi, eval(unicode(pi))) + assert_equal(pi, eval(six.text_type(pi))) start = Period('02-Apr-2005', 'B') i1 = PeriodIndex(start=start, periods=20) assert_equal(len(i1), 20) assert_equal(i1.freq, start.freq) assert_equal(i1[0], start) - assert_equal(i1, eval(unicode(i1))) + assert_equal(i1, eval(six.text_type(i1))) end_intv = Period('2006-12-31', 'W') i1 = PeriodIndex(end=end_intv, periods=10) assert_equal(len(i1), 10) assert_equal(i1.freq, end_intv.freq) assert_equal(i1[-1], end_intv) - assert_equal(i1, eval(unicode(i1))) + assert_equal(i1, eval(six.text_type(i1))) end_intv = Period('2006-12-31', '1w') i2 = PeriodIndex(end=end_intv, periods=10) assert_equal(len(i1), len(i2)) self.assert_((i1 == i2).all()) assert_equal(i1.freq, i2.freq) - assert_equal(i1, eval(unicode(i1))) - assert_equal(i2, eval(unicode(i2))) + assert_equal(i1, eval(six.text_type(i1))) + assert_equal(i2, eval(six.text_type(i2))) end_intv = Period('2006-12-31', ('w', 1)) i2 = PeriodIndex(end=end_intv, periods=10) assert_equal(len(i1), len(i2)) self.assert_((i1 == i2).all()) assert_equal(i1.freq, i2.freq) - assert_equal(i1, eval(unicode(i1))) - assert_equal(i2, eval(unicode(i2))) + assert_equal(i1, eval(six.text_type(i1))) + assert_equal(i2, eval(six.text_type(i2))) try: PeriodIndex(start=start, end=end_intv) @@ -1670,7 +1673,7 @@ def test_period_index_unicode(self): end_intv = Period('2005-05-01', 'B') i1 = PeriodIndex(start=start, end=end_intv) - assert_equal(i1, eval(unicode(i1))) + assert_equal(i1, eval(six.text_type(i1))) try: PeriodIndex(start=start) @@ -1683,12 +1686,12 @@ def test_period_index_unicode(self): i2 = PeriodIndex([end_intv, Period('2005-05-05', 'B')]) assert_equal(len(i2), 2) assert_equal(i2[0], end_intv) - assert_equal(i2, eval(unicode(i2))) + assert_equal(i2, eval(six.text_type(i2))) i2 = PeriodIndex(np.array([end_intv, Period('2005-05-05', 'B')])) assert_equal(len(i2), 2) assert_equal(i2[0], end_intv) - assert_equal(i2, eval(unicode(i2))) + assert_equal(i2, eval(six.text_type(i2))) # Mixed freq should fail vals = [end_intv, Period('2006-12-31', 'w')] @@ -2001,7 +2004,7 @@ def test_map_with_string_constructor(self): types += unicode, for t in types: - expected = np.array(map(t, raw), dtype=object) + expected = np.array(list(map(t, raw)), dtype=object) res = index.map(t) # should return an array diff --git a/pandas/tseries/tests/test_plotting.py b/pandas/tseries/tests/test_plotting.py index f1602bbd3f020..2bb70e6ef3c76 100644 --- a/pandas/tseries/tests/test_plotting.py +++ b/pandas/tseries/tests/test_plotting.py @@ -3,6 +3,8 @@ import unittest import nose +from pandas.util.py3compat import range +from six.moves import zip import numpy as np from numpy.testing.decorators import slow @@ -186,7 +188,7 @@ def test_fake_inferred_business(self): plt.clf() fig.add_subplot(111) rng = date_range('2001-1-1', '2001-1-10') - ts = Series(range(len(rng)), rng) + ts = Series(list(range(len(rng))), rng) ts = ts[:3].append(ts[5:]) ax = ts.plot() self.assert_(not hasattr(ax, 'freq')) @@ -942,7 +944,7 @@ def test_format_date_axis(self): def test_ax_plot(self): x = DatetimeIndex(start='2012-01-02', periods=10, freq='D') - y = range(len(x)) + y = list(range(len(x))) import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(111) diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index 02a3030f69519..22e103e54c85b 100644 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -2,6 +2,8 @@ from datetime import datetime, timedelta +from pandas.util.py3compat import range +from six.moves import zip import numpy as np from pandas import Series, TimeSeries, DataFrame, Panel, isnull, notnull, Timestamp @@ -860,7 +862,7 @@ def test_resample_weekly_all_na(self): def test_resample_tz_localized(self): dr = date_range(start='2012-4-13', end='2012-5-1') - ts = Series(range(len(dr)), dr) + ts = Series(list(range(len(dr))), dr) ts_utc = ts.tz_localize('UTC') ts_local = ts_utc.tz_convert('America/Los_Angeles') diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index f41d31d2afbd0..4b87dd29518f8 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -1,5 +1,4 @@ # pylint: disable-msg=E1101,W0612 -import pandas.util.compat as itertools from datetime import datetime, time, timedelta import sys import os @@ -8,6 +7,9 @@ import nose import numpy as np +from pandas.util.py3compat import range, long, StringIO +from pandas.util.compat import product +from six.moves import map, zip randn = np.random.randn from pandas import (Index, Series, TimeSeries, DataFrame, @@ -23,8 +25,6 @@ from pandas.util.testing import assert_series_equal, assert_almost_equal import pandas.util.testing as tm -from pandas.util.py3compat import StringIO - from pandas.tslib import NaT, iNaT import pandas.lib as lib import pandas.tslib as tslib @@ -239,17 +239,17 @@ def test_indexing(self): # GH3546 (not including times on the last day) idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:00', freq='H') - ts = Series(range(len(idx)), index=idx) + ts = Series(list(range(len(idx))), index=idx) expected = ts['2013-05'] assert_series_equal(expected,ts) idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:59', freq='S') - ts = Series(range(len(idx)), index=idx) + ts = Series(list(range(len(idx))), index=idx) expected = ts['2013-05'] assert_series_equal(expected,ts) idx = [ Timestamp('2013-05-31 00:00'), Timestamp(datetime(2013,5,31,23,59,59,999999))] - ts = Series(range(len(idx)), index=idx) + ts = Series(list(range(len(idx))), index=idx) expected = ts['2013'] assert_series_equal(expected,ts) @@ -453,7 +453,7 @@ def test_frame_setitem_timestamp(self): # 2155 columns = DatetimeIndex(start='1/1/2012', end='2/1/2012', freq=datetools.bday) - index = range(10) + index = list(range(10)) data = DataFrame(columns=columns, index=index) t = datetime(2012, 11, 1) ts = Timestamp(t) @@ -664,7 +664,7 @@ def test_reindex_series_add_nat(self): rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s') series = Series(rng) - result = series.reindex(range(15)) + result = series.reindex(list(range(15))) self.assert_(np.issubdtype(result.dtype, np.dtype('M8[ns]'))) mask = result.isnull() @@ -675,7 +675,7 @@ def test_reindex_frame_add_nat(self): rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s') df = DataFrame({'A': np.random.randn(len(rng)), 'B': rng}) - result = df.reindex(range(15)) + result = df.reindex(list(range(15))) self.assert_(np.issubdtype(result['B'].dtype, np.dtype('M8[ns]'))) mask = com.isnull(result)['B'] @@ -890,7 +890,7 @@ def test_to_datetime_types(self): ### array = ['2012','20120101','20120101 12:01:01'] array = ['20120101','20120101 12:01:01'] expected = list(to_datetime(array)) - result = map(Timestamp,array) + result = list(map(Timestamp,array)) tm.assert_almost_equal(result,expected) ### currently fails ### @@ -954,7 +954,7 @@ def test_reasonable_keyerror(self): index = DatetimeIndex(['1/3/2000']) try: index.get_loc('1/1/2000') - except KeyError, e: + except KeyError as e: self.assert_('2000' in str(e)) def test_reindex_with_datetimes(self): @@ -1153,7 +1153,7 @@ def test_between_time(self): stime = time(0, 0) etime = time(1, 0) - close_open = itertools.product([True, False], [True, False]) + close_open = product([True, False], [True, False]) for inc_start, inc_end in close_open: filtered = ts.between_time(stime, etime, inc_start, inc_end) exp_len = 13 * 4 + 1 @@ -1185,7 +1185,7 @@ def test_between_time(self): stime = time(22, 0) etime = time(9, 0) - close_open = itertools.product([True, False], [True, False]) + close_open = product([True, False], [True, False]) for inc_start, inc_end in close_open: filtered = ts.between_time(stime, etime, inc_start, inc_end) exp_len = (12 * 11 + 1) * 4 + 1 @@ -1213,7 +1213,7 @@ def test_between_time_frame(self): stime = time(0, 0) etime = time(1, 0) - close_open = itertools.product([True, False], [True, False]) + close_open = product([True, False], [True, False]) for inc_start, inc_end in close_open: filtered = ts.between_time(stime, etime, inc_start, inc_end) exp_len = 13 * 4 + 1 @@ -1245,7 +1245,7 @@ def test_between_time_frame(self): stime = time(22, 0) etime = time(9, 0) - close_open = itertools.product([True, False], [True, False]) + close_open = product([True, False], [True, False]) for inc_start, inc_end in close_open: filtered = ts.between_time(stime, etime, inc_start, inc_end) exp_len = (12 * 11 + 1) * 4 + 1 @@ -1513,11 +1513,11 @@ def test_groupby_count_dateparseerror(self): dr = date_range(start='1/1/2012', freq='5min', periods=10) # BAD Example, datetimes first - s = Series(np.arange(10), index=[dr, range(10)]) + s = Series(np.arange(10), index=[dr, list(range(10))]) grouped = s.groupby(lambda x: x[1] % 2 == 0) result = grouped.count() - s = Series(np.arange(10), index=[range(10), dr]) + s = Series(np.arange(10), index=[list(range(10)), dr]) grouped = s.groupby(lambda x: x[0] % 2 == 0) expected = grouped.count() @@ -1668,7 +1668,7 @@ def test_concat_datetime_datetime64_frame(self): df2_obj = DataFrame.from_records(rows, columns=['date', 'test']) ind = date_range(start="2000/1/1", freq="D", periods=10) - df1 = DataFrame({'date': ind, 'test':range(10)}) + df1 = DataFrame({'date': ind, 'test':list(range(10))}) # it works! pd.concat([df1, df2_obj]) @@ -1687,7 +1687,7 @@ def test_stringified_slice_with_tz(self): import datetime start=datetime.datetime.now() idx=DatetimeIndex(start=start,freq="1d",periods=10) - df=DataFrame(range(10),index=idx) + df=DataFrame(list(range(10)),index=idx) df["2013-01-14 23:44:34.437768-05:00":] # no exception here def test_append_join_nondatetimeindex(self): @@ -1981,7 +1981,7 @@ def setUpClass(cls): cls.series = pickle.load(f) def test_pass_offset_warn(self): - from StringIO import StringIO + from pandas.util.py3compat import StringIO buf = StringIO() sys.stderr = buf @@ -2402,7 +2402,7 @@ def test_frame_apply_dont_convert_datetime64(self): class TestLegacyCompat(unittest.TestCase): def setUp(self): - from StringIO import StringIO + from pandas.util.py3compat import StringIO # suppress deprecation warnings sys.stderr = StringIO() @@ -2650,7 +2650,7 @@ def test_series_set_value(self): def test_slice_locs_indexerror(self): times = [datetime(2000, 1, 1) + timedelta(minutes=i * 10) for i in range(100000)] - s = Series(range(100000), times) + s = Series(list(range(100000)), times) s.ix[datetime(1900, 1, 1):datetime(2100, 1, 1)] @@ -2813,26 +2813,26 @@ def check(val,unit=None,h=1,s=1,us=0): days = (ts - Timestamp('1970-01-01')).days check(val) - check(val/1000L,unit='us') - check(val/1000000L,unit='ms') - check(val/1000000000L,unit='s') + check(val/long(1000),unit='us') + check(val/long(1000000),unit='ms') + check(val/long(1000000000),unit='s') check(days,unit='D',h=0) # using truediv, so these are like floats if py3compat.PY3: - check((val+500000)/1000000000L,unit='s',us=500) - check((val+500000000)/1000000000L,unit='s',us=500000) - check((val+500000)/1000000L,unit='ms',us=500) + check((val+500000)/long(1000000000),unit='s',us=500) + check((val+500000000)/long(1000000000),unit='s',us=500000) + check((val+500000)/long(1000000),unit='ms',us=500) # get chopped in py2 else: - check((val+500000)/1000000000L,unit='s') - check((val+500000000)/1000000000L,unit='s') - check((val+500000)/1000000L,unit='ms') + check((val+500000)/long(1000000000),unit='s') + check((val+500000000)/long(1000000000),unit='s') + check((val+500000)/long(1000000),unit='ms') # ok - check((val+500000)/1000L,unit='us',us=500) - check((val+500000000)/1000000L,unit='ms',us=500000) + check((val+500000)/long(1000),unit='us',us=500) + check((val+500000000)/long(1000000),unit='ms',us=500000) # floats check(val/1000.0 + 5,unit='us',us=5) @@ -2857,7 +2857,7 @@ def check(val,unit=None,h=1,s=1,us=0): def test_comparison(self): # 5-18-2012 00:00:00.000 - stamp = 1337299200000000000L + stamp = long(1337299200000000000) val = Timestamp(stamp) @@ -2908,7 +2908,7 @@ def test_cant_compare_tz_naive_w_aware(self): self.assertFalse(a.to_pydatetime() == b) def test_delta_preserve_nanos(self): - val = Timestamp(1337299200000000123L) + val = Timestamp(long(1337299200000000123)) result = val + timedelta(1) self.assert_(result.nanosecond == val.nanosecond) diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tseries/tests/test_timezones.py index 09224d0133e3d..bf441a97089b5 100644 --- a/pandas/tseries/tests/test_timezones.py +++ b/pandas/tseries/tests/test_timezones.py @@ -5,6 +5,8 @@ import unittest import nose +from pandas.util.py3compat import range +from six.moves import zip import numpy as np import pytz @@ -393,7 +395,7 @@ def test_take_dont_lose_meta(self): _skip_if_no_pytz() rng = date_range('1/1/2000', periods=20, tz='US/Eastern') - result = rng.take(range(5)) + result = rng.take(list(range(5))) self.assert_(result.tz == rng.tz) self.assert_(result.freq == rng.freq) @@ -620,7 +622,7 @@ def test_getitem_pydatetime_tz(self): tz='Europe/Berlin') ts = Series(index=index, data=index.hour) time_pandas = Timestamp('2012-12-24 17:00', tz='Europe/Berlin') - time_datetime = datetime(2012, 12, 24, 17, 00, + time_datetime = datetime(2012, 12, 24, 17, 0, tzinfo=pytz.timezone('Europe/Berlin')) self.assertEqual(ts[time_pandas], ts[time_datetime]) @@ -635,14 +637,14 @@ def test_datetimeindex_tz(self): """ Test different DatetimeIndex constructions with timezone Follow-up of #4229 """ - + arr = ['11/10/2005 08:00:00', '11/10/2005 09:00:00'] - + idx1 = to_datetime(arr).tz_localize('US/Eastern') idx2 = DatetimeIndex(start="2005-11-10 08:00:00", freq='H', periods=2, tz='US/Eastern') idx3 = DatetimeIndex(arr, tz='US/Eastern') idx4 = DatetimeIndex(np.array(arr), tz='US/Eastern') - + for other in [idx2, idx3, idx4]: self.assert_(idx1.equals(other)) @@ -746,7 +748,7 @@ def test_join_aware(self): test2 = DataFrame(np.zeros((3, 3)), index=date_range("2012-11-15 00:00:00", periods=3, freq="250L", tz="US/Central"), - columns=range(3, 6)) + columns=list(range(3, 6))) result = test1.join(test2, how='outer') ex_index = test1.index.union(test2.index) @@ -815,7 +817,7 @@ def test_append_aware_naive(self): # mixed rng1 = date_range('1/1/2011 01:00', periods=1, freq='H') - rng2 = range(100) + rng2 = list(range(100)) ts1 = Series(np.random.randn(len(rng1)), index=rng1) ts2 = Series(np.random.randn(len(rng2)), index=rng2) ts_result = ts1.append(ts2) diff --git a/pandas/tseries/tests/test_util.py b/pandas/tseries/tests/test_util.py index 09dad264b7ae0..5bfdbba56395c 100644 --- a/pandas/tseries/tests/test_util.py +++ b/pandas/tseries/tests/test_util.py @@ -1,3 +1,4 @@ +from pandas.util.py3compat import range import nose import unittest diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index d914a8fa570d4..c56fa192bad8d 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -2,6 +2,7 @@ import re import sys +import six import numpy as np import pandas.lib as lib @@ -40,7 +41,7 @@ def _infer(a, b): def _maybe_get_tz(tz): - if isinstance(tz, basestring): + if isinstance(tz, six.string_types): import pytz tz = pytz.timezone(tz) if com.is_integer(tz): @@ -91,7 +92,7 @@ def _convert_listlike(arg, box): if box and not isinstance(arg, DatetimeIndex): try: return DatetimeIndex(arg, tz='utc' if utc else None) - except ValueError, e: + except ValueError as e: values, tz = tslib.datetime_to_datetime64(arg) return DatetimeIndex._simple_new(values, None, tz=tz) @@ -109,7 +110,7 @@ def _convert_listlike(arg, box): result = DatetimeIndex(result, tz='utc' if utc else None) return result - except ValueError, e: + except ValueError as e: try: values, tz = tslib.datetime_to_datetime64(arg) return DatetimeIndex._simple_new(values, None, tz=tz) @@ -148,7 +149,7 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None): Parameters ---------- - arg : basestring + arg : six.string_types freq : str or DateOffset, default None Helps with interpreting time string if supplied dayfirst : bool, default None @@ -165,7 +166,7 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None): from pandas.tseries.frequencies import (_get_rule_month, _month_numbers, _get_freq_str) - if not isinstance(arg, basestring): + if not isinstance(arg, six.string_types): return arg arg = arg.upper() @@ -236,7 +237,7 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None): try: parsed, reso = dateutil_parse(arg, default, dayfirst=dayfirst, yearfirst=yearfirst) - except Exception, e: + except Exception as e: raise DateParseError(e) if parsed is None: @@ -278,7 +279,7 @@ def dateutil_parse(timestr, default, tzdata = tzinfos.get(res.tzname) if isinstance(tzdata, datetime.tzinfo): tzinfo = tzdata - elif isinstance(tzdata, basestring): + elif isinstance(tzdata, six.string_types): tzinfo = tz.tzstr(tzdata) elif isinstance(tzdata, int): tzinfo = tz.tzoffset(res.tzname, tzdata) diff --git a/pandas/tseries/util.py b/pandas/tseries/util.py index eb80746cf0c25..92ec7d2bec36e 100644 --- a/pandas/tseries/util.py +++ b/pandas/tseries/util.py @@ -1,3 +1,4 @@ +from pandas.util.py3compat import range import numpy as np import pandas as pd @@ -53,12 +54,12 @@ def pivot_annual(series, freq=None): # adjust for leap year offset[(-isleapyear(year)) & (offset >= 59)] += 1 - columns = range(1, 367) + columns = list(range(1, 367)) # todo: strings like 1/1, 1/25, etc.? elif freq in ('M', 'BM'): width = 12 offset = index.month - 1 - columns = range(1, 13) + columns = list(range(1, 13)) elif freq == 'H': width = 8784 grouped = series.groupby(series.index.year) @@ -66,7 +67,7 @@ def pivot_annual(series, freq=None): defaulted.index = defaulted.index.droplevel(0) offset = np.asarray(defaulted.index) offset[-isleapyear(year) & (offset >= 1416)] += 24 - columns = range(1, 8785) + columns = list(range(1, 8785)) else: raise NotImplementedError(freq) diff --git a/pandas/util/compat.py b/pandas/util/compat.py index c18044fc6c492..a42b9218a3acc 100644 --- a/pandas/util/compat.py +++ b/pandas/util/compat.py @@ -1,12 +1,15 @@ # itertools.product not in Python 2.5 +import sys +import six +from six.moves import map try: from itertools import product except ImportError: # python 2.5 def product(*args, **kwds): # product('ABCD', 'xy') --> Ax Ay Bx By Cx Cy Dx Dy # product(range(2), repeat=3) --> 000 001 010 011 100 101 110 111 - pools = map(tuple, args) * kwds.get('repeat', 1) + pools = list(map(tuple, args) * kwds.get('repeat', 1)) result = [[]] for pool in pools: result = [x + [y] for x in result for y in pool] @@ -17,7 +20,6 @@ def product(*args, **kwds): # OrderedDict Shim from Raymond Hettinger, python core dev # http://code.activestate.com/recipes/576693-ordered-dictionary-for-py24/ # here to support versions before 2.6 -import sys try: from thread import get_ident as _get_ident except ImportError: @@ -29,6 +31,14 @@ def product(*args, **kwds): pass +def iteritems(obj): + """replacement for six's iteritems to use iteritems on PandasObjects""" + if hasattr(obj, "iteritems"): + return obj.iteritems() + else: + return obj.items() + + class _OrderedDict(dict): 'Dictionary that remembers insertion order' # An inherited dict maps keys to values. @@ -98,7 +108,7 @@ def __reversed__(self): def clear(self): 'od.clear() -> None. Remove all items from od.' try: - for node in self.__map.itervalues(): + for node in six.itervalues(self.__map): del node[:] root = self.__root root[:] = [root, root, None] @@ -323,8 +333,8 @@ def most_common(self, n=None): ''' if n is None: - return sorted(self.iteritems(), key=itemgetter(1), reverse=True) - return nlargest(n, self.iteritems(), key=itemgetter(1)) + return sorted(iteritems(self), key=itemgetter(1), reverse=True) + return nlargest(n, iteritems(self), key=itemgetter(1)) def elements(self): '''Iterator over elements repeating each as many times as its count. @@ -337,7 +347,7 @@ def elements(self): elements() will ignore it. ''' - for elem, count in self.iteritems(): + for elem, count in iteritems(self): for _ in repeat(None, count): yield elem @@ -491,7 +501,7 @@ def __init__(self, *args, **kwargs): self.default_factory = newdefault super(self.__class__, self).__init__(*newargs, **kwargs) - def __missing__ (self, key): + def __missing__(self, key): if self.default_factory is None: raise KeyError(key) self[key] = value = self.default_factory() diff --git a/pandas/util/counter.py b/pandas/util/counter.py index 29e8906fdee38..90e71d3b806f3 100644 --- a/pandas/util/counter.py +++ b/pandas/util/counter.py @@ -1,9 +1,12 @@ # This is copied from collections in Python 2.7, for compatibility with older # versions of Python. It can be dropped when we depend on Python 2.7/3.1 +from pandas.util import compat import heapq as _heapq from itertools import repeat as _repeat, chain as _chain, starmap as _starmap from operator import itemgetter as _itemgetter +import six +from six.moves import map try: from collections import Mapping @@ -92,8 +95,8 @@ def most_common(self, n=None): ''' # Emulate Bag.sortedByCount from Smalltalk if n is None: - return sorted(self.iteritems(), key=_itemgetter(1), reverse=True) - return _heapq.nlargest(n, self.iteritems(), key=_itemgetter(1)) + return sorted(compat.iteritems(self), key=_itemgetter(1), reverse=True) + return _heapq.nlargest(n, compat.iteritems(self), key=_itemgetter(1)) def elements(self): '''Iterator over elements repeating each as many times as its count. @@ -115,7 +118,7 @@ def elements(self): ''' # Emulate Bag.do from Smalltalk and Multiset.begin from C++. - return _chain.from_iterable(_starmap(_repeat, self.iteritems())) + return _chain.from_iterable(_starmap(_repeat, compat.iteritems(self))) # Override dict methods where necessary @@ -150,7 +153,7 @@ def update(self, iterable=None, **kwds): if isinstance(iterable, Mapping): if self: self_get = self.get - for elem, count in iterable.iteritems(): + for elem, count in compat.iteritems(iterable): self[elem] = self_get(elem, 0) + count else: # fast path when counter is empty diff --git a/pandas/util/decorators.py b/pandas/util/decorators.py index 97b2ee3353fa3..4a8762dcb7ae0 100644 --- a/pandas/util/decorators.py +++ b/pandas/util/decorators.py @@ -5,7 +5,7 @@ def deprecate(name, alternative): - alt_name = alternative.func_name + alt_name = alternative.__name__ def wrapper(*args, **kwargs): warnings.warn("%s is deprecated. Use %s instead" % (name, alt_name), @@ -107,7 +107,7 @@ def __call__(self, func): def indent(text, indents=1): - if not text or type(text) != str: + if not text or not isinstance(text, str): return '' jointext = ''.join(['\n'] + [' '] * indents) return jointext.join(text.split('\n')) diff --git a/pandas/util/py3compat.py b/pandas/util/py3compat.py index dcc877b094dda..240f8c0fc88a2 100644 --- a/pandas/util/py3compat.py +++ b/pandas/util/py3compat.py @@ -12,7 +12,8 @@ def str_to_bytes(s, encoding='ascii'): def bytes_to_str(b, encoding='utf-8'): return b.decode(encoding) - lzip = lambda *args: list(zip(*args)) + range = range + long = int else: # Python 2 import re @@ -27,12 +28,18 @@ def str_to_bytes(s, encoding='ascii'): def bytes_to_str(b, encoding='ascii'): return b - lzip = zip + range = xrange + long = long try: - from cStringIO import StringIO + # not writeable if instantiated with string, not good with unicode + from cStringIO import StringIO as cStringIO + # writeable and handles unicode + from StringIO import StringIO except: + # no more StringIO from io import StringIO + cStringIO = StringIO try: from io import BytesIO diff --git a/pandas/util/terminal.py b/pandas/util/terminal.py index 3b5f893d1a0b3..fc985855d2682 100644 --- a/pandas/util/terminal.py +++ b/pandas/util/terminal.py @@ -11,6 +11,7 @@ It is mentioned in the stackoverflow response that this code works on linux, os x, windows and cygwin (windows). """ +from __future__ import print_function import os @@ -117,4 +118,4 @@ def ioctl_GWINSZ(fd): if __name__ == "__main__": sizex, sizey = get_terminal_size() - print ('width = %s height = %s' % (sizex, sizey)) + print('width = %s height = %s' % (sizex, sizey)) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 7b2960ef498e1..6b710b4425f3d 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -2,6 +2,8 @@ # pylint: disable-msg=W0402 +from pandas.util.py3compat import range +from six.moves import zip import random import string import sys @@ -26,11 +28,16 @@ import pandas.core.frame as frame import pandas.core.panel as panel import pandas.core.panel4d as panel4d +import pandas.util.compat as compat from pandas import bdate_range from pandas.tseries.index import DatetimeIndex from pandas.tseries.period import PeriodIndex +from pandas.io.common import urlopen +import six +from six.moves import map + Index = index.Index MultiIndex = index.MultiIndex Series = series.Series @@ -45,12 +52,13 @@ def rands(n): choices = string.ascii_letters + string.digits - return ''.join(random.choice(choices) for _ in xrange(n)) + return ''.join(random.choice(choices) for _ in range(n)) def randu(n): - choices = u"".join(map(unichr, range(1488, 1488 + 26))) + string.digits - return ''.join([random.choice(choices) for _ in xrange(n)]) + choices = six.u("").join(map(unichr, list(range(1488, 1488 + 26)))) + choices += string.digits + return ''.join([random.choice(choices) for _ in range(n)]) #------------------------------------------------------------------------------ # Console debugging tools @@ -123,8 +131,8 @@ def assert_almost_equal(a, b, check_less_precise = False): if isinstance(a, dict) or isinstance(b, dict): return assert_dict_equal(a, b) - if isinstance(a, basestring): - assert a == b, "%r != %r" % (a, b) + if isinstance(a, six.string_types): + assert a == b, "%s != %s" % (a, b) return True if isiterable(a): @@ -135,7 +143,7 @@ def assert_almost_equal(a, b, check_less_precise = False): if np.array_equal(a, b): return True else: - for i in xrange(na): + for i in range(na): assert_almost_equal(a[i], b[i], check_less_precise) return True @@ -258,7 +266,7 @@ def assert_panel_equal(left, right, assert(left.major_axis.equals(right.major_axis)) assert(left.minor_axis.equals(right.minor_axis)) - for col, series in left.iterkv(): + for col, series in compat.iteritems(left): assert(col in right) assert_frame_equal(series, right[col], check_less_precise=check_less_precise, check_names=False) # TODO strangely check_names fails in py3 ? @@ -273,7 +281,7 @@ def assert_panel4d_equal(left, right, assert(left.major_axis.equals(right.major_axis)) assert(left.minor_axis.equals(right.minor_axis)) - for col, series in left.iterkv(): + for col, series in compat.iteritems(left): assert(col in right) assert_panel_equal(series, right[col], check_less_precise=check_less_precise) @@ -291,15 +299,15 @@ def getCols(k): def makeStringIndex(k): - return Index([rands(10) for _ in xrange(k)]) + return Index([rands(10) for _ in range(k)]) def makeUnicodeIndex(k): - return Index([randu(10) for _ in xrange(k)]) + return Index([randu(10) for _ in range(k)]) def makeIntIndex(k): - return Index(range(k)) + return Index(list(range(k))) def makeFloatIndex(k): @@ -444,7 +452,7 @@ def makeCustomIndex(nentries, nlevels, prefix='#', names=False, ndupe_l=None, names = None # make singelton case uniform - if isinstance(names, basestring) and nlevels == 1: + if isinstance(names, six.string_types) and nlevels == 1: names = [names] # specific 1D index type requested? @@ -471,7 +479,7 @@ def makeCustomIndex(nentries, nlevels, prefix='#', names=False, ndupe_l=None, def keyfunc(x): import re numeric_tuple = re.sub("[^\d_]_?","",x).split("_") - return map(int,numeric_tuple) + return list(map(int,numeric_tuple)) # build a list of lists to create the index from div_factor = nentries // ndupe_l[i] + 1 @@ -483,7 +491,7 @@ def keyfunc(x): result = list(sorted(cnt.elements(), key=keyfunc))[:nentries] tuples.append(result) - tuples = zip(*tuples) + tuples = list(zip(*tuples)) # convert tuples to index if nentries == 1: diff --git a/scripts/bench_join.py b/scripts/bench_join.py index be24dac810aee..758a4fedda636 100644 --- a/scripts/bench_join.py +++ b/scripts/bench_join.py @@ -1,3 +1,4 @@ +from pandas.util.py3compat import range import numpy as np import pandas.lib as lib from pandas import * @@ -27,8 +28,8 @@ a_series = Series(av, index=a) b_series = Series(bv, index=b) -a_frame = DataFrame(avf, index=a, columns=range(K)) -b_frame = DataFrame(bvf, index=b, columns=range(K, 2 * K)) +a_frame = DataFrame(avf, index=a, columns=list(range(K))) +b_frame = DataFrame(bvf, index=b, columns=list(range(K, 2 * K))) def do_left_join(a, b, av, bv): @@ -77,7 +78,7 @@ def do_left_join_python(a, b, av, bv): def _take_multi(data, indexer, out): if not data.flags.c_contiguous: data = data.copy() - for i in xrange(data.shape[0]): + for i in range(data.shape[0]): data[i].take(indexer, out=out[i]) @@ -162,8 +163,8 @@ def bench_python(n=100000, pct_overlap=0.20, K=1): avf = np.random.randn(n, K) bvf = np.random.randn(n, K) - a_frame = DataFrame(avf, index=a, columns=range(K)) - b_frame = DataFrame(bvf, index=b, columns=range(K, 2 * K)) + a_frame = DataFrame(avf, index=a, columns=list(range(K))) + b_frame = DataFrame(bvf, index=b, columns=list(range(K, 2 * K))) all_results[logn] = result = {} diff --git a/scripts/bench_join_multi.py b/scripts/bench_join_multi.py index cdac37f289bb8..0683fbb67a1aa 100644 --- a/scripts/bench_join_multi.py +++ b/scripts/bench_join_multi.py @@ -1,26 +1,27 @@ from pandas import * import numpy as np -from itertools import izip +from six.moves import zip from pandas.util.testing import rands +from pandas.util.py3compat import range import pandas.lib as lib N = 100000 -key1 = [rands(10) for _ in xrange(N)] -key2 = [rands(10) for _ in xrange(N)] +key1 = [rands(10) for _ in range(N)] +key2 = [rands(10) for _ in range(N)] -zipped = izip(key1, key2) +zipped = list(zip(key1, key2)) def _zip(*args): arr = np.empty(N, dtype=object) - arr[:] = zip(*args) + arr[:] = list(zip(*args)) return arr def _zip2(*args): - return lib.list_to_object_array(zip(*args)) + return lib.list_to_object_array(list(zip(*args))) index = MultiIndex.from_arrays([key1, key2]) to_join = DataFrame({'j1': np.random.randn(100000)}, index=index) diff --git a/scripts/bench_refactor.py b/scripts/bench_refactor.py index 3d0c7e40ced7d..812c42b0ee30c 100644 --- a/scripts/bench_refactor.py +++ b/scripts/bench_refactor.py @@ -1,4 +1,5 @@ from pandas import * +from pandas.util.py3compat import range try: import pandas.core.internals as internals reload(internals) @@ -17,7 +18,7 @@ def horribly_unconsolidated(): df = DataMatrix(index=index) - for i in xrange(K): + for i in range(K): df[i] = float(K) return df @@ -25,13 +26,13 @@ def horribly_unconsolidated(): def bench_reindex_index(df, it=100): new_idx = np.arange(0, N, 2) - for i in xrange(it): + for i in range(it): df.reindex(new_idx) def bench_reindex_columns(df, it=100): new_cols = np.arange(0, K, 2) - for i in xrange(it): + for i in range(it): df.reindex(columns=new_cols) @@ -39,7 +40,7 @@ def bench_join_index(df, it=10): left = df.reindex(index=np.arange(0, N, 2), columns=np.arange(K // 2)) right = df.reindex(columns=np.arange(K // 2 + 1, K)) - for i in xrange(it): + for i in range(it): joined = left.join(right) if __name__ == '__main__': diff --git a/scripts/file_sizes.py b/scripts/file_sizes.py index 8720730d2bb10..12cd12c255e7c 100644 --- a/scripts/file_sizes.py +++ b/scripts/file_sizes.py @@ -1,3 +1,4 @@ +from __future__ import print_function import os import sys @@ -6,6 +7,7 @@ from pandas import DataFrame from pandas.util.testing import set_trace +from pandas.util import compat dirs = [] names = [] @@ -154,13 +156,13 @@ def x(): def doit(): for directory, _, files in walked: - print directory + print(directory) for path in files: if not _should_count_file(path): continue full_path = os.path.join(directory, path) - print full_path + print(full_path) lines = len(open(full_path).readlines()) dirs.append(directory) @@ -174,7 +176,7 @@ def doit(): def doit2(): counts = {} for directory, _, files in walked: - print directory + print(directory) for path in files: if not _should_count_file(path) or path.startswith('test_'): continue @@ -189,7 +191,7 @@ def doit2(): # counts = _get_file_function_lengths('pandas/tests/test_series.py') all_counts = [] -for k, v in counts.iteritems(): +for k, v in compat.iteritems(counts): all_counts.extend(v) all_counts = np.array(all_counts) diff --git a/scripts/find_commits_touching_func.py b/scripts/find_commits_touching_func.py index d23889ec80d05..925d40d0fc856 100755 --- a/scripts/find_commits_touching_func.py +++ b/scripts/find_commits_touching_func.py @@ -4,6 +4,9 @@ # copryright 2013, y-p @ github from __future__ import print_function +from pandas.util.py3compat import range +import six +from six.moves import map """Search the git history for all commits touching a named method @@ -93,7 +96,7 @@ def get_hits(defname,files=()): def get_commit_info(c,fmt,sep='\t'): r=sh.git('log', "--format={}".format(fmt), '{}^..{}'.format(c,c),"-n","1",_tty_out=False) - return unicode(r).split(sep) + return six.text_type(r).split(sep) def get_commit_vitals(c,hlen=HASH_LEN): h,s,d= get_commit_info(c,'%H\t%s\t%ci',"\t") @@ -159,7 +162,7 @@ def sorter(i): print("\nThese commits touched the %s method in these files on these dates:\n" \ % args.funcname) - for i in sorted(range(len(hits)),key=sorter): + for i in sorted(list(range(len(hits))),key=sorter): hit = hits[i] h,s,d=get_commit_vitals(hit.commit) p=hit.path.split(os.path.realpath(os.curdir)+os.path.sep)[-1] @@ -182,11 +185,11 @@ def main(): !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! """) return - if isinstance(args.file_masks,basestring): + if isinstance(args.file_masks,six.string_types): args.file_masks = args.file_masks.split(',') - if isinstance(args.path_masks,basestring): + if isinstance(args.path_masks,six.string_types): args.path_masks = args.path_masks.split(',') - if isinstance(args.dir_masks,basestring): + if isinstance(args.dir_masks,six.string_types): args.dir_masks = args.dir_masks.split(',') logger.setLevel(getattr(logging,args.debug_level)) diff --git a/scripts/find_undoc_args.py b/scripts/find_undoc_args.py index 4a4099afc9a2a..f6bcd43185fa6 100755 --- a/scripts/find_undoc_args.py +++ b/scripts/find_undoc_args.py @@ -41,18 +41,18 @@ def entry_gen(root_ns,module_name): seen.add(cand.__name__) q.insert(0,cand) elif (isinstance(cand,(types.MethodType,types.FunctionType)) and - cand not in seen and cand.func_doc): + cand not in seen and cand.__doc__): seen.add(cand) yield cand def cmp_docstring_sig(f): def build_loc(f): - path=f.func_code.co_filename.split(args.path,1)[-1][1:] - return dict(path=path,lnum=f.func_code.co_firstlineno) + path=f.__code__.co_filename.split(args.path,1)[-1][1:] + return dict(path=path,lnum=f.__code__.co_firstlineno) import inspect sig_names=set(inspect.getargspec(f).args) - doc = f.func_doc.lower() + doc = f.__doc__.lower() doc = re.split("^\s*parameters\s*",doc,1,re.M)[-1] doc = re.split("^\s*returns*",doc,1,re.M)[0] doc_names={x.split(":")[0].strip() for x in doc.split("\n") diff --git a/scripts/gen_release_notes.py b/scripts/gen_release_notes.py index c64b33d71ea2a..905240fcf6ca9 100644 --- a/scripts/gen_release_notes.py +++ b/scripts/gen_release_notes.py @@ -1,3 +1,4 @@ +from __future__ import print_function import sys import urllib2 import json @@ -93,4 +94,4 @@ def release_notes(milestone): if __name__ == '__main__': rs = release_notes(sys.argv[1]) - print rs + print(rs) diff --git a/scripts/groupby_sample.py b/scripts/groupby_sample.py index 8685b2bbe8ff7..af422bd4bab14 100644 --- a/scripts/groupby_sample.py +++ b/scripts/groupby_sample.py @@ -1,6 +1,8 @@ from pandas import * import numpy as np import string +import six +import pandas.util.compat as compat g1 = np.array(list(string.letters))[:-1] g2 = np.arange(510) @@ -30,7 +32,7 @@ def random_sample_v2(): grouped = df.groupby(['group1', 'group2'])['value'] from random import choice choose = lambda group: choice(group.index) - indices = [choice(v) for k, v in grouped.groups.iteritems()] + indices = [choice(v) for k, v in compat.iteritems(grouped.groups)] return df.reindex(indices) @@ -43,7 +45,7 @@ def do_shuffle(arr): def shuffle_uri(df, grouped): perm = np.r_[tuple([np.random.permutation( - idxs) for idxs in grouped.groups.itervalues()])] + idxs) for idxs in six.itervalues(grouped.groups)])] df['state_permuted'] = np.asarray(df.ix[perm]['value']) df2 = df.copy() diff --git a/scripts/groupby_speed.py b/scripts/groupby_speed.py index a25b00206733d..4e60c34556968 100644 --- a/scripts/groupby_speed.py +++ b/scripts/groupby_speed.py @@ -1,3 +1,4 @@ +from __future__ import print_function from pandas import * rng = DateRange('1/3/2011', '11/30/2011', offset=datetools.Minute()) @@ -23,12 +24,12 @@ def get2(dt): def f(): for i, date in enumerate(df.index): if i % 10000 == 0: - print i + print(i) get1(date) def g(): for i, date in enumerate(df.index): if i % 10000 == 0: - print i + print(i) get2(date) diff --git a/scripts/groupby_test.py b/scripts/groupby_test.py index 76c9cb0cb3bc5..6dbf1b073b6ec 100644 --- a/scripts/groupby_test.py +++ b/scripts/groupby_test.py @@ -8,6 +8,7 @@ import pandas.lib as tseries import pandas.core.groupby as gp import pandas.util.testing as tm +from pandas.util.py3compat import range reload(gp) """ diff --git a/scripts/hdfstore_panel_perf.py b/scripts/hdfstore_panel_perf.py index d344fc80943ca..18668d729bfff 100644 --- a/scripts/hdfstore_panel_perf.py +++ b/scripts/hdfstore_panel_perf.py @@ -1,13 +1,14 @@ from pandas import * from pandas.util.testing import rands +from pandas.util.py3compat import range i, j, k = 7, 771, 5532 panel = Panel(np.random.randn(i, j, k), - items=[rands(10) for _ in xrange(i)], + items=[rands(10) for _ in range(i)], major_axis=DateRange('1/1/2000', periods=j, offset=datetools.Minute()), - minor_axis=[rands(10) for _ in xrange(k)]) + minor_axis=[rands(10) for _ in range(k)]) store = HDFStore('test.h5') diff --git a/scripts/json_manip.py b/scripts/json_manip.py index e76a99cca344a..29c2d88aa9b09 100644 --- a/scripts/json_manip.py +++ b/scripts/json_manip.py @@ -65,6 +65,7 @@ themselves. """ +from __future__ import print_function from collections import Counter, namedtuple import csv @@ -73,7 +74,9 @@ from operator import attrgetter as aget, itemgetter as iget import operator import sys - +import six +from six.moves import map +import pandas.util.compat as compat ## note 'url' appears multiple places and not all extensions have same struct @@ -89,77 +92,77 @@ } ## much longer example -ex2 = {u'metadata': {u'accessibilities': [{u'name': u'accessibility.tabfocus', - u'value': 7}, - {u'name': u'accessibility.mouse_focuses_formcontrol', u'value': False}, - {u'name': u'accessibility.browsewithcaret', u'value': False}, - {u'name': u'accessibility.win32.force_disabled', u'value': False}, - {u'name': u'accessibility.typeaheadfind.startlinksonly', u'value': False}, - {u'name': u'accessibility.usebrailledisplay', u'value': u''}, - {u'name': u'accessibility.typeaheadfind.timeout', u'value': 5000}, - {u'name': u'accessibility.typeaheadfind.enabletimeout', u'value': True}, - {u'name': u'accessibility.tabfocus_applies_to_xul', u'value': False}, - {u'name': u'accessibility.typeaheadfind.flashBar', u'value': 1}, - {u'name': u'accessibility.typeaheadfind.autostart', u'value': True}, - {u'name': u'accessibility.blockautorefresh', u'value': False}, - {u'name': u'accessibility.browsewithcaret_shortcut.enabled', - u'value': True}, - {u'name': u'accessibility.typeaheadfind.enablesound', u'value': True}, - {u'name': u'accessibility.typeaheadfind.prefillwithselection', - u'value': True}, - {u'name': u'accessibility.typeaheadfind.soundURL', u'value': u'beep'}, - {u'name': u'accessibility.typeaheadfind', u'value': False}, - {u'name': u'accessibility.typeaheadfind.casesensitive', u'value': 0}, - {u'name': u'accessibility.warn_on_browsewithcaret', u'value': True}, - {u'name': u'accessibility.usetexttospeech', u'value': u''}, - {u'name': u'accessibility.accesskeycausesactivation', u'value': True}, - {u'name': u'accessibility.typeaheadfind.linksonly', u'value': False}, - {u'name': u'isInstantiated', u'value': True}], - u'extensions': [{u'id': u'216ee7f7f4a5b8175374cd62150664efe2433a31', - u'isEnabled': True}, - {u'id': u'1aa53d3b720800c43c4ced5740a6e82bb0b3813e', u'isEnabled': False}, - {u'id': u'01ecfac5a7bd8c9e27b7c5499e71c2d285084b37', u'isEnabled': True}, - {u'id': u'1c01f5b22371b70b312ace94785f7b0b87c3dfb2', u'isEnabled': True}, - {u'id': u'fb723781a2385055f7d024788b75e959ad8ea8c3', u'isEnabled': True}], - u'fxVersion': u'9.0', - u'location': u'zh-CN', - u'operatingSystem': u'WINNT Windows NT 5.1', - u'surveyAnswers': u'', - u'task_guid': u'd69fbd15-2517-45b5-8a17-bb7354122a75', - u'tpVersion': u'1.2', - u'updateChannel': u'beta'}, - u'survey_data': { - u'extensions': [{u'appDisabled': False, - u'id': u'testpilot?labs.mozilla.com', - u'isCompatible': True, - u'isEnabled': True, - u'isPlatformCompatible': True, - u'name': u'Test Pilot'}, - {u'appDisabled': True, - u'id': u'dict?www.youdao.com', - u'isCompatible': False, - u'isEnabled': False, - u'isPlatformCompatible': True, - u'name': u'Youdao Word Capturer'}, - {u'appDisabled': False, - u'id': u'jqs?sun.com', - u'isCompatible': True, - u'isEnabled': True, - u'isPlatformCompatible': True, - u'name': u'Java Quick Starter'}, - {u'appDisabled': False, - u'id': u'?20a82645-c095-46ed-80e3-08825760534b?', - u'isCompatible': True, - u'isEnabled': True, - u'isPlatformCompatible': True, - u'name': u'Microsoft .NET Framework Assistant'}, - {u'appDisabled': False, - u'id': u'?a0d7ccb3-214d-498b-b4aa-0e8fda9a7bf7?', - u'isCompatible': True, - u'isEnabled': True, - u'isPlatformCompatible': True, - u'name': u'WOT'}], - u'version_number': 1}} +ex2 = {six.u('metadata'): {six.u('accessibilities'): [{six.u('name'): six.u('accessibility.tabfocus'), + six.u('value'): 7}, + {six.u('name'): six.u('accessibility.mouse_focuses_formcontrol'), six.u('value'): False}, + {six.u('name'): six.u('accessibility.browsewithcaret'), six.u('value'): False}, + {six.u('name'): six.u('accessibility.win32.force_disabled'), six.u('value'): False}, + {six.u('name'): six.u('accessibility.typeaheadfind.startlinksonly'), six.u('value'): False}, + {six.u('name'): six.u('accessibility.usebrailledisplay'), six.u('value'): six.u('')}, + {six.u('name'): six.u('accessibility.typeaheadfind.timeout'), six.u('value'): 5000}, + {six.u('name'): six.u('accessibility.typeaheadfind.enabletimeout'), six.u('value'): True}, + {six.u('name'): six.u('accessibility.tabfocus_applies_to_xul'), six.u('value'): False}, + {six.u('name'): six.u('accessibility.typeaheadfind.flashBar'), six.u('value'): 1}, + {six.u('name'): six.u('accessibility.typeaheadfind.autostart'), six.u('value'): True}, + {six.u('name'): six.u('accessibility.blockautorefresh'), six.u('value'): False}, + {six.u('name'): six.u('accessibility.browsewithcaret_shortcut.enabled'), + six.u('value'): True}, + {six.u('name'): six.u('accessibility.typeaheadfind.enablesound'), six.u('value'): True}, + {six.u('name'): six.u('accessibility.typeaheadfind.prefillwithselection'), + six.u('value'): True}, + {six.u('name'): six.u('accessibility.typeaheadfind.soundURL'), six.u('value'): six.u('beep')}, + {six.u('name'): six.u('accessibility.typeaheadfind'), six.u('value'): False}, + {six.u('name'): six.u('accessibility.typeaheadfind.casesensitive'), six.u('value'): 0}, + {six.u('name'): six.u('accessibility.warn_on_browsewithcaret'), six.u('value'): True}, + {six.u('name'): six.u('accessibility.usetexttospeech'), six.u('value'): six.u('')}, + {six.u('name'): six.u('accessibility.accesskeycausesactivation'), six.u('value'): True}, + {six.u('name'): six.u('accessibility.typeaheadfind.linksonly'), six.u('value'): False}, + {six.u('name'): six.u('isInstantiated'), six.u('value'): True}], + six.u('extensions'): [{six.u('id'): six.u('216ee7f7f4a5b8175374cd62150664efe2433a31'), + six.u('isEnabled'): True}, + {six.u('id'): six.u('1aa53d3b720800c43c4ced5740a6e82bb0b3813e'), six.u('isEnabled'): False}, + {six.u('id'): six.u('01ecfac5a7bd8c9e27b7c5499e71c2d285084b37'), six.u('isEnabled'): True}, + {six.u('id'): six.u('1c01f5b22371b70b312ace94785f7b0b87c3dfb2'), six.u('isEnabled'): True}, + {six.u('id'): six.u('fb723781a2385055f7d024788b75e959ad8ea8c3'), six.u('isEnabled'): True}], + six.u('fxVersion'): six.u('9.0'), + six.u('location'): six.u('zh-CN'), + six.u('operatingSystem'): six.u('WINNT Windows NT 5.1'), + six.u('surveyAnswers'): six.u(''), + six.u('task_guid'): six.u('d69fbd15-2517-45b5-8a17-bb7354122a75'), + six.u('tpVersion'): six.u('1.2'), + six.u('updateChannel'): six.u('beta')}, + six.u('survey_data'): { + six.u('extensions'): [{six.u('appDisabled'): False, + six.u('id'): six.u('testpilot?labs.mozilla.com'), + six.u('isCompatible'): True, + six.u('isEnabled'): True, + six.u('isPlatformCompatible'): True, + six.u('name'): six.u('Test Pilot')}, + {six.u('appDisabled'): True, + six.u('id'): six.u('dict?www.youdao.com'), + six.u('isCompatible'): False, + six.u('isEnabled'): False, + six.u('isPlatformCompatible'): True, + six.u('name'): six.u('Youdao Word Capturer')}, + {six.u('appDisabled'): False, + six.u('id'): six.u('jqs?sun.com'), + six.u('isCompatible'): True, + six.u('isEnabled'): True, + six.u('isPlatformCompatible'): True, + six.u('name'): six.u('Java Quick Starter')}, + {six.u('appDisabled'): False, + six.u('id'): six.u('?20a82645-c095-46ed-80e3-08825760534b?'), + six.u('isCompatible'): True, + six.u('isEnabled'): True, + six.u('isPlatformCompatible'): True, + six.u('name'): six.u('Microsoft .NET Framework Assistant')}, + {six.u('appDisabled'): False, + six.u('id'): six.u('?a0d7ccb3-214d-498b-b4aa-0e8fda9a7bf7?'), + six.u('isCompatible'): True, + six.u('isEnabled'): True, + six.u('isPlatformCompatible'): True, + six.u('name'): six.u('WOT')}], + six.u('version_number'): 1}} # class SurveyResult(object): @@ -267,7 +270,7 @@ def flatten(*stack): """ stack = list(stack) while stack: - try: x = stack[0].next() + try: x = next(stack[0]) except StopIteration: stack.pop(0) continue @@ -282,7 +285,7 @@ def _Q(filter_, thing): """ underlying machinery for Q function recursion """ T = type(thing) if T is type({}): - for k,v in thing.iteritems(): + for k,v in compat.iteritems(thing): #print k,v if filter_ == k: if type(v) is type([]): @@ -386,34 +389,34 @@ def printout(queries,things,default=None, f=sys.stdout, **kwargs): def test_run(): - print "\n>>> print list(Q('url',ex1))" - print list(Q('url',ex1)) + print("\n>>> print list(Q('url',ex1))") + print(list(Q('url',ex1))) assert list(Q('url',ex1)) == ['url1','url2','url3'] assert Ql('url',ex1) == ['url1','url2','url3'] - print "\n>>> print list(Q(['name','id'],ex1))" - print list(Q(['name','id'],ex1)) + print("\n>>> print list(Q(['name','id'],ex1))") + print(list(Q(['name','id'],ex1))) assert Ql(['name','id'],ex1) == ['Gregg','hello','gbye'] - print "\n>>> print Ql('more url',ex1)" - print Ql('more url',ex1) + print("\n>>> print Ql('more url',ex1)") + print(Ql('more url',ex1)) - print "\n>>> list(Q('extensions',ex1))" - print list(Q('extensions',ex1)) + print("\n>>> list(Q('extensions',ex1))") + print(list(Q('extensions',ex1))) - print "\n>>> print Ql('extensions',ex1)" - print Ql('extensions',ex1) + print("\n>>> print Ql('extensions',ex1)") + print(Ql('extensions',ex1)) - print "\n>>> printout(['name','extensions'],[ex1,], extrasaction='ignore')" + print("\n>>> printout(['name','extensions'],[ex1,], extrasaction='ignore')") printout(['name','extensions'],[ex1,], extrasaction='ignore') - print "\n\n" + print("\n\n") from pprint import pprint as pp - print "-- note that the extension fields are also flattened! (and N/A) -- " + print("-- note that the extension fields are also flattened! (and N/A) -- ") pp(denorm(['location','fxVersion','notthere','survey_data extensions'],[ex2,], default="N/A")[:2]) diff --git a/scripts/leak.py b/scripts/leak.py index 3d704af4f9945..5b81a3dfcedcf 100644 --- a/scripts/leak.py +++ b/scripts/leak.py @@ -1,4 +1,5 @@ from pandas import * +from pandas.util.py3compat import range import numpy as np import pandas.util.testing as tm import os diff --git a/scripts/parser_magic.py b/scripts/parser_magic.py index c35611350988c..17bdba16565cd 100644 --- a/scripts/parser_magic.py +++ b/scripts/parser_magic.py @@ -1,5 +1,6 @@ from pandas.util.testing import set_trace import pandas.util.testing as tm +import pandas.util.compat as compat from pandas import * import ast @@ -45,7 +46,7 @@ def _format_call(call): if args: content += ', '.join(args) if kwds: - fmt_kwds = ['%s=%s' % item for item in kwds.iteritems()] + fmt_kwds = ['%s=%s' % item for item in compat.iteritems(kwds)] joined_kwds = ', '.join(fmt_kwds) if args: content = content + ', ' + joined_kwds diff --git a/scripts/roll_median_leak.py b/scripts/roll_median_leak.py index 6441a69f3a8bf..6dbb1a74d91f7 100644 --- a/scripts/roll_median_leak.py +++ b/scripts/roll_median_leak.py @@ -1,3 +1,4 @@ +from __future__ import print_function from pandas import * import numpy as np @@ -5,6 +6,7 @@ from vbench.api import Benchmark from pandas.util.testing import rands +from pandas.util.py3compat import range import pandas.lib as lib import pandas._sandbox as sbx import time @@ -18,7 +20,7 @@ lst.append([5] * 10000) lst.append(np.repeat(np.nan, 1000000)) -for _ in xrange(10000): - print proc.get_memory_info() +for _ in range(10000): + print(proc.get_memory_info()) sdf = SparseDataFrame({'A': lst.to_array()}) chunk = sdf[sdf['A'] == 5] diff --git a/scripts/runtests.py b/scripts/runtests.py index b995db65ac591..e14752b43116b 100644 --- a/scripts/runtests.py +++ b/scripts/runtests.py @@ -1,4 +1,5 @@ +from __future__ import print_function import os -print os.getpid() +print(os.getpid()) import nose nose.main('pandas.core') diff --git a/scripts/testmed.py b/scripts/testmed.py index ed0f76cd2f3fb..c90734912140b 100644 --- a/scripts/testmed.py +++ b/scripts/testmed.py @@ -2,6 +2,9 @@ from random import random from math import log, ceil +from pandas.util.py3compat import range +from numpy.random import randn +from pandas.lib.skiplist import rolling_median class Node(object): @@ -138,8 +141,6 @@ def _test(arr, k): _test(arr, K) -from numpy.random import randn -from pandas.lib.skiplist import rolling_median def test2(): diff --git a/setup.py b/setup.py index d66ac345aa61a..a99ba88322796 100755 --- a/setup.py +++ b/setup.py @@ -40,14 +40,12 @@ if sys.version_info[1] >= 3: # 3.3 needs numpy 1.7+ min_numpy_ver = "1.7.0b2" - setuptools_kwargs = {'use_2to3': True, + setuptools_kwargs = { 'zip_safe': False, 'install_requires': ['python-dateutil >= 2', 'pytz >= 2011k', 'numpy >= %s' % min_numpy_ver], 'setup_requires': ['numpy >= %s' % min_numpy_ver], - 'use_2to3_exclude_fixers': ['lib2to3.fixes.fix_next', - ], } if not _have_setuptools: sys.exit("need setuptools/distribute for Py3k" diff --git a/vb_suite/groupby.py b/vb_suite/groupby.py index f38f42c89f5de..665a33f924810 100644 --- a/vb_suite/groupby.py +++ b/vb_suite/groupby.py @@ -1,5 +1,6 @@ from vbench.api import Benchmark from datetime import datetime +from six.moves import map common_setup = """from pandas_vb_common import * """ @@ -284,12 +285,12 @@ def f(g): share_na = 0.1 dates = date_range('1997-12-31', periods=n_dates, freq='B') -dates = Index(map(lambda x: x.year * 10000 + x.month * 100 + x.day, dates)) +dates = Index(list(map(lambda x: x.year * 10000 + x.month * 100 + x.day, dates))) secid_min = int('10000000', 16) secid_max = int('F0000000', 16) step = (secid_max - secid_min) // (n_securities - 1) -security_ids = map(lambda x: hex(x)[2:10].upper(), range(secid_min, secid_max + 1, step)) +security_ids = list(map(lambda x: hex(x)[2:10].upper(), range(secid_min, secid_max + 1, step))) data_index = MultiIndex(levels=[dates.values, security_ids], labels=[[i for i in xrange(n_dates) for _ in xrange(n_securities)], range(n_securities) * n_dates], diff --git a/vb_suite/indexing.py b/vb_suite/indexing.py index 1264ae053ffca..8a56ef8ff2bcc 100644 --- a/vb_suite/indexing.py +++ b/vb_suite/indexing.py @@ -106,6 +106,7 @@ start_date=datetime(2012, 1, 1)) setup = common_setup + """ +from pandas.util.py3compat import range import pandas.core.expressions as expr df = DataFrame(np.random.randn(50000, 100)) df2 = DataFrame(np.random.randn(50000, 100)) diff --git a/vb_suite/make.py b/vb_suite/make.py index 5a8a8215db9a4..74a0818fbd1d1 100755 --- a/vb_suite/make.py +++ b/vb_suite/make.py @@ -71,7 +71,7 @@ def auto_update(): html() upload() sendmail() - except (Exception, SystemExit), inst: + except (Exception, SystemExit) as inst: msg += str(inst) + '\n' sendmail(msg) diff --git a/vb_suite/measure_memory_consumption.py b/vb_suite/measure_memory_consumption.py index bb73cf5da4302..8d15b78069b9c 100755 --- a/vb_suite/measure_memory_consumption.py +++ b/vb_suite/measure_memory_consumption.py @@ -45,7 +45,7 @@ def main(): s = Series(results) s.sort() - print((s)) + print(s) finally: shutil.rmtree(TMP_DIR) diff --git a/vb_suite/pandas_vb_common.py b/vb_suite/pandas_vb_common.py index 77d0e2e27260e..37775557fe1a2 100644 --- a/vb_suite/pandas_vb_common.py +++ b/vb_suite/pandas_vb_common.py @@ -4,6 +4,7 @@ from numpy.random import randn from numpy.random import randint from numpy.random import permutation +import pandas.util.compat as compat import pandas.util.testing as tm import random import numpy as np @@ -23,3 +24,9 @@ from pandas.core.index import MultiIndex except ImportError: pass +try: + # if no range in py3compat, then don't import zip or map either + from pandas.util.py3compat import range + from six.moves import zip, map +except ImportError: + pass diff --git a/vb_suite/parser.py b/vb_suite/parser.py index 50d37f37708e7..8bcba2b20e79d 100644 --- a/vb_suite/parser.py +++ b/vb_suite/parser.py @@ -44,7 +44,7 @@ start_date=datetime(2011, 11, 1)) setup = common_setup + """ -from cStringIO import StringIO +from six.moves import cStringIO as StringIO import os N = 10000 K = 8 @@ -63,7 +63,7 @@ read_table_multiple_date = Benchmark(cmd, setup, start_date=sdate) setup = common_setup + """ -from cStringIO import StringIO +from six.moves import cStringIO as StringIO import os N = 10000 K = 8 diff --git a/vb_suite/perf_HEAD.py b/vb_suite/perf_HEAD.py index c14a1795f01e0..0f2adf41d9fab 100755 --- a/vb_suite/perf_HEAD.py +++ b/vb_suite/perf_HEAD.py @@ -13,6 +13,7 @@ import json import pandas as pd +import pandas.util.compat as compat WEB_TIMEOUT = 10 @@ -114,7 +115,7 @@ def main(): if d['succeeded']: print("\nException:\n%s\n" % str(e)) else: - for k, v in sorted(d.iteritems()): + for k, v in sorted(compat.iteritems(d)): print("{k}: {v}".format(k=k, v=v)) print("------->\n") @@ -238,6 +239,6 @@ def mk_unique(df): dfs = get_all_results(repo_id) for k in dfs: dfs[k] = mk_unique(dfs[k]) - ss = [pd.Series(v.timing, name=k) for k, v in dfs.iteritems()] + ss = [pd.Series(v.timing, name=k) for k, v in compat.iteritems(dfs)] results = pd.concat(reversed(ss), 1) return results diff --git a/vb_suite/source/conf.py b/vb_suite/source/conf.py index d83448fd97d09..2b5753a03d378 100644 --- a/vb_suite/source/conf.py +++ b/vb_suite/source/conf.py @@ -12,6 +12,7 @@ import sys import os +import six # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the @@ -49,8 +50,8 @@ master_doc = 'index' # General information about the project. -project = u'pandas' -copyright = u'2008-2011, the pandas development team' +project = six.u('pandas') +copyright = six.u('2008-2011, the pandas development team') # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -197,8 +198,8 @@ # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ ('index', 'performance.tex', - u'pandas vbench Performance Benchmarks', - u'Wes McKinney', 'manual'), + six.u('pandas vbench Performance Benchmarks'), + six.u('Wes McKinney'), 'manual'), ] # The name of an image file (relative to this directory) to place at the top of diff --git a/vb_suite/suite.py b/vb_suite/suite.py index 905c4371837cc..76fafb87b05b6 100644 --- a/vb_suite/suite.py +++ b/vb_suite/suite.py @@ -1,3 +1,4 @@ +from __future__ import print_function from vbench.api import Benchmark, GitRepo from datetime import datetime @@ -90,15 +91,15 @@ def generate_rst_files(benchmarks): fig_base_path = os.path.join(vb_path, 'figures') if not os.path.exists(vb_path): - print 'creating %s' % vb_path + print('creating %s' % vb_path) os.makedirs(vb_path) if not os.path.exists(fig_base_path): - print 'creating %s' % fig_base_path + print('creating %s' % fig_base_path) os.makedirs(fig_base_path) for bmk in benchmarks: - print 'Generating rst file for %s' % bmk.name + print('Generating rst file for %s' % bmk.name) rst_path = os.path.join(RST_BASE, 'vbench/%s.txt' % bmk.name) fig_full_path = os.path.join(fig_base_path, '%s.png' % bmk.name) @@ -120,7 +121,7 @@ def generate_rst_files(benchmarks): f.write(rst_text) with open(os.path.join(RST_BASE, 'index.rst'), 'w') as f: - print >> f, """ + print(""" Performance Benchmarks ====================== @@ -141,15 +142,15 @@ def generate_rst_files(benchmarks): .. toctree:: :hidden: :maxdepth: 3 -""" +""", file=f) for modname, mod_bmks in sorted(by_module.items()): - print >> f, ' vb_%s' % modname + print(' vb_%s' % modname, file=f) modpath = os.path.join(RST_BASE, 'vb_%s.rst' % modname) with open(modpath, 'w') as mh: header = '%s\n%s\n\n' % (modname, '=' * len(modname)) - print >> mh, header + print(header, file=mh) for bmk in mod_bmks: - print >> mh, bmk.name - print >> mh, '-' * len(bmk.name) - print >> mh, '.. include:: vbench/%s.txt\n' % bmk.name + print(bmk.name, file=mh) + print('-' * len(bmk.name), file=mh) + print('.. include:: vbench/%s.txt\n' % bmk.name, file=mh) diff --git a/vb_suite/test_perf.py b/vb_suite/test_perf.py index ca98b94e4fbbd..7428bbb07df99 100755 --- a/vb_suite/test_perf.py +++ b/vb_suite/test_perf.py @@ -25,7 +25,10 @@ 5) print the results to the log file and to stdout. """ +from __future__ import print_function +from pandas.util.py3compat import range +from six.moves import map import shutil import os import sys @@ -275,7 +278,8 @@ def profile_head_single(benchmark): err = str(e) except: pass - print("%s died with:\n%s\nSkipping...\n" % (benchmark.name, err)) + print("%s died with:\n%s\nSkipping...\n" % (benchmark.name, + err)) results.append(d.get('timing',np.nan)) gc.enable() @@ -296,7 +300,8 @@ def profile_head_single(benchmark): # return df.set_index("name")[HEAD_COL] def profile_head(benchmarks): - print( "Performing %d benchmarks (%d runs each)" % ( len(benchmarks), args.hrepeats)) + print("Performing %d benchmarks (%d runs each)" % (len(benchmarks), + args.hrepeats)) ss= [profile_head_single(b) for b in benchmarks] print("\n") From 434f26bf08cc09e4749618cc601e413f6770556a Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Sat, 27 Jul 2013 10:27:54 -0400 Subject: [PATCH 02/11] TST/ENH: Add assert_isinstance util to util/testing + better type checking cleanup instance checks in pandas/tests, add assert_isinstance method CLN: assert_(isinstance --> assert_isinstace Make is_instance_factory cleaner + basestring --> (str, bytes) use list(range in pytables tests make _WINDOW_TYPES update 2/3 compatible --- pandas/core/config.py | 17 +-- pandas/core/format.py | 2 +- pandas/core/frame.py | 32 ++--- pandas/core/groupby.py | 21 +-- pandas/core/index.py | 2 +- pandas/core/nanops.py | 13 +- pandas/core/panel.py | 20 +-- pandas/core/reshape.py | 2 +- pandas/io/__init__.py | 2 - pandas/io/data.py | 2 +- pandas/io/html.py | 2 +- pandas/io/parsers.py | 14 +- pandas/io/pytables.py | 6 +- pandas/io/stata.py | 5 +- pandas/io/tests/test_json/test_pandas.py | 4 +- pandas/io/tests/test_json/test_ujson.py | 2 +- pandas/io/tests/test_parsers.py | 10 +- pandas/io/tests/test_pytables.py | 31 +++-- pandas/io/tests/test_stata.py | 1 + pandas/sparse/frame.py | 40 +++--- pandas/sparse/panel.py | 17 +-- pandas/sparse/tests/test_array.py | 9 +- pandas/sparse/tests/test_libsparse.py | 3 +- pandas/sparse/tests/test_sparse.py | 74 +++++----- pandas/stats/common.py | 2 +- pandas/stats/ols.py | 4 +- pandas/stats/plm.py | 4 +- pandas/stats/tests/test_fama_macbeth.py | 3 +- pandas/stats/tests/test_moments.py | 6 +- pandas/stats/tests/test_ols.py | 13 +- pandas/stats/var.py | 22 +-- pandas/tests/test_algos.py | 4 +- pandas/tests/test_categorical.py | 2 +- pandas/tests/test_config.py | 1 - pandas/tests/test_format.py | 2 +- pandas/tests/test_frame.py | 166 +++++++++++------------ pandas/tests/test_graphics.py | 4 +- pandas/tests/test_groupby.py | 52 ++++--- pandas/tests/test_index.py | 42 +++--- pandas/tests/test_indexing.py | 2 +- pandas/tests/test_multilevel.py | 32 ++--- pandas/tests/test_panel.py | 18 +-- pandas/tests/test_panel4d.py | 7 +- pandas/tests/test_rplot.py | 3 +- pandas/tests/test_series.py | 44 +++--- pandas/tests/test_strings.py | 50 +++---- pandas/tests/test_tests.py | 1 - pandas/tools/pivot.py | 2 +- pandas/tools/tests/test_merge.py | 13 +- pandas/tools/tests/test_pivot.py | 2 +- pandas/tseries/frequencies.py | 6 +- pandas/tseries/offsets.py | 2 +- pandas/tseries/tests/test_daterange.py | 47 +++---- pandas/tseries/tests/test_offsets.py | 39 +++--- pandas/tseries/tests/test_period.py | 26 ++-- pandas/tseries/tests/test_plotting.py | 8 +- pandas/tseries/tests/test_resample.py | 4 +- pandas/tseries/tests/test_timeseries.py | 52 +++---- pandas/tseries/tests/test_timezones.py | 6 +- pandas/util/compat.py | 36 ++--- pandas/util/py3compat.py | 2 + pandas/util/testing.py | 29 ++-- scripts/json_manip.py | 16 +-- vb_suite/perf_HEAD.py | 2 +- 64 files changed, 556 insertions(+), 551 deletions(-) diff --git a/pandas/core/config.py b/pandas/core/config.py index c66911c122e26..725f869580769 100644 --- a/pandas/core/config.py +++ b/pandas/core/config.py @@ -729,15 +729,16 @@ def is_instance_factory(_type): True if x is an instance of `_type` """ + if isinstance(_type, (tuple, list)): + _type = tuple(_type) + from pandas.core.common import pprint_thing + type_repr = "|".join(map(pprint_thing, _type)) + else: + type_repr = "'%s'" % _type def inner(x): - if isinstance(_type,(tuple,list)) : - if not any([isinstance(x,t) for t in _type]): - from pandas.core.common import pprint_thing as pp - pp_values = list(map(pp, _type)) - raise ValueError("Value must be an instance of %s" % pp("|".join(pp_values))) - elif not isinstance(x, _type): - raise ValueError("Value must be an instance of '%s'" % str(_type)) + if not isinstance(x, _type): + raise ValueError("Value must be an instance of %s" % type_repr) return inner @@ -757,4 +758,4 @@ def inner(x): is_float = is_type_factory(float) is_str = is_type_factory(str) is_unicode = is_type_factory(six.text_type) -is_text = is_instance_factory(basestring) +is_text = is_instance_factory((str, bytes)) diff --git a/pandas/core/format.py b/pandas/core/format.py index 1b78b501b58a3..8676d9a5447a5 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -872,7 +872,7 @@ def _helper_csv(self, writer, na_rep=None, cols=None, cols = self.columns series = {} - for k, v in self.obj._series.iteritems(): + for k, v in compat.iteritems(self.obj._series): series[k] = v.values diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 94b36ffed3b9f..07def64b22c57 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -384,7 +384,7 @@ class DataFrame(NDFrame): 'columns': 1 } - _AXIS_NAMES = dict((v, k) for k, v in _AXIS_NUMBERS.iteritems()) + _AXIS_NAMES = dict((v, k) for k, v in compat.iteritems(_AXIS_NUMBERS)) def __init__(self, data=None, index=None, columns=None, dtype=None, copy=False): @@ -493,7 +493,7 @@ def _init_dict(self, data, index, columns, dtype=None): # prefilter if columns passed - data = dict((k, v) for k, v in data.iteritems() if k in columns) + data = dict((k, v) for k, v in compat.iteritems(data) if k in columns) if index is None: index = extract_index(data.values()) @@ -986,11 +986,11 @@ def to_dict(self, outtype='dict'): warnings.warn("DataFrame columns are not unique, some " "columns will be omitted.", UserWarning) if outtype.lower().startswith('d'): - return dict((k, v.to_dict()) for k, v in self.iteritems()) + return dict((k, v.to_dict()) for k, v in compat.iteritems(self)) elif outtype.lower().startswith('l'): - return dict((k, v.tolist()) for k, v in self.iteritems()) + return dict((k, v.tolist()) for k, v in compat.iteritems(self)) elif outtype.lower().startswith('s'): - return dict((k, v) for k, v in self.iteritems()) + return dict((k, v) for k, v in compat.iteritems(self)) else: # pragma: no cover raise ValueError("outtype %s not understood" % outtype) @@ -1063,7 +1063,7 @@ def from_records(cls, data, index=None, exclude=None, columns=None, else: arrays = [] arr_columns = [] - for k, v in data.iteritems(): + for k, v in compat.iteritems(data): if k in columns: arr_columns.append(k) arrays.append(v) @@ -1682,7 +1682,7 @@ def info(self, verbose=True, buf=None, max_cols=None): counts = self.count() if len(cols) != len(counts): raise AssertionError('Columns must equal counts') - for col, count in counts.iteritems(): + for col, count in compat.iteritems(counts): col = com.pprint_thing(col) lines.append(_put_str(col, space) + '%d non-null values' % count) @@ -1690,7 +1690,7 @@ def info(self, verbose=True, buf=None, max_cols=None): lines.append(self.columns.summary(name='Columns')) counts = self.get_dtype_counts() - dtypes = ['%s(%d)' % k for k in sorted(counts.iteritems())] + dtypes = ['%s(%d)' % k for k in sorted(compat.iteritems(counts))] lines.append('dtypes: %s' % ', '.join(dtypes)) _put_lines(buf, lines) @@ -3457,7 +3457,7 @@ def fillna(self, value=None, method=None, axis=0, inplace=False, 'by column') result = self if inplace else self.copy() - for k, v in value.iteritems(): + for k, v in compat.iteritems(value): if k not in result: continue result[k].fillna(v, inplace=True) @@ -3632,7 +3632,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, if isinstance(to_replace, (dict, Series)): if isinstance(value, (dict, Series)): # {'A' : NA} -> {'A' : 0} new_data = self._data - for c, src in to_replace.iteritems(): + for c, src in compat.iteritems(to_replace): if c in value and c in self: new_data = new_data.replace(src, value[c], filter=[c], @@ -3641,7 +3641,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, elif not isinstance(value, (list, np.ndarray)): # {'A': NA} -> 0 new_data = self._data - for k, src in to_replace.iteritems(): + for k, src in compat.iteritems(to_replace): if k in self: new_data = new_data.replace(src, value, filter=[k], @@ -3681,7 +3681,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, if isinstance(value, (dict, Series)): # NA -> {'A' : 0, 'B' : -1} new_data = self._data - for k, v in value.iteritems(): + for k, v in compat.iteritems(value): if k in self: new_data = new_data.replace(to_replace, v, filter=[k], @@ -4864,7 +4864,7 @@ def describe(self, percentile_width=50): if len(numdata.columns) == 0: return DataFrame(dict((k, v.describe()) - for k, v in self.iteritems()), + for k, v in compat.iteritems(self)), columns=self.columns) lb = .5 * (1. - percentile_width / 100.) @@ -5803,7 +5803,7 @@ def _rec_to_dict(arr): sdict = dict((k, arr[k]) for k in columns) elif isinstance(arr, DataFrame): columns = list(arr.columns) - sdict = dict((k, v.values) for k, v in arr.iteritems()) + sdict = dict((k, v.values) for k, v in compat.iteritems(arr)) elif isinstance(arr, dict): columns = sorted(arr) sdict = arr.copy() @@ -5978,8 +5978,8 @@ def _homogenize(data, index, dtype=None): def _from_nested_dict(data): # TODO: this should be seriously cythonized new_data = OrderedDict() - for index, s in data.iteritems(): - for col, v in s.iteritems(): + for index, s in compat.iteritems(data): + for col, v in compat.iteritems(s): new_data[col] = new_data.get(col, OrderedDict()) new_data[col][index] = v return new_data diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 528d7baca330c..9bd4f24ee04e8 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1,9 +1,11 @@ import types import numpy as np -from pandas.util.py3compat import range, long + import six -from six.moves import zip +from pandas.util.py3compat import range, long +from pandas.util.compat import OrderedDict from pandas.util import compat +from six.moves import zip, builtins from pandas.core.base import PandasObject from pandas.core.categorical import Categorical @@ -14,7 +16,6 @@ from pandas.core.series import Series from pandas.core.panel import Panel from pandas.util.decorators import cache_readonly, Appender -from pandas.util.compat import OrderedDict import pandas.core.algorithms as algos import pandas.core.common as com from pandas.core.common import _possibly_downcast_to_dtype, notnull @@ -487,7 +488,7 @@ def _python_agg_general(self, func, *args, **kwargs): if self.grouper._filter_empty_groups: mask = counts.ravel() > 0 - for name, result in output.iteritems(): + for name, result in compat.iteritems(output): # since we are masking, make sure that we have a float object values = result @@ -1705,7 +1706,7 @@ def aggregate(self, arg, *args, **kwargs): if any(isinstance(x, (list, tuple, dict)) for x in arg.values()): new_arg = OrderedDict() - for k, v in arg.iteritems(): + for k, v in compat.iteritems(arg): if not isinstance(v, (tuple, list, dict)): new_arg[k] = [v] else: @@ -1718,13 +1719,13 @@ def aggregate(self, arg, *args, **kwargs): if isinstance(subset, DataFrame): raise NotImplementedError - for fname, agg_how in arg.iteritems(): + for fname, agg_how in compat.iteritems(arg): colg = SeriesGroupBy(subset, selection=self._selection, grouper=self.grouper) result[fname] = colg.aggregate(agg_how) keys.append(fname) else: - for col, agg_how in arg.iteritems(): + for col, agg_how in compat.iteritems(arg): colg = SeriesGroupBy(obj[col], selection=col, grouper=self.grouper) result[col] = colg.aggregate(agg_how) @@ -2606,14 +2607,14 @@ def _reorder_by_uniques(uniques, labels): return uniques, labels -import __builtin__ _func_table = { - __builtin__.sum: np.sum + builtins.sum: np.sum } + _cython_table = { - __builtin__.sum: 'sum', + builtins.sum: 'sum', np.sum: 'sum', np.mean: 'mean', np.prod: 'prod', diff --git a/pandas/core/index.py b/pandas/core/index.py index 7cff2e51ad399..c46e61271f997 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -261,7 +261,7 @@ def get_duplicates(self): counter = defaultdict(lambda: 0) for k in self.values: counter[k] += 1 - return sorted(k for k, v in counter.iteritems() if v > 1) + return sorted(k for k, v in compat.iteritems(counter) if v > 1) _get_duplicates = get_duplicates diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 72ba4364cfedd..699d0ac21823f 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -11,8 +11,11 @@ import pandas.algos as algos import pandas.hashtable as _hash import pandas.tslib as tslib + +from six.moves import builtins import six + try: import bottleneck as bn _USE_BOTTLENECK = True @@ -286,12 +289,11 @@ def nanmin(values, axis=None, skipna=True): # numpy 1.6.1 workaround in Python 3.x if (values.dtype == np.object_ and sys.version_info[0] >= 3): # pragma: no cover - import __builtin__ if values.ndim > 1: apply_ax = axis if axis is not None else 0 - result = np.apply_along_axis(__builtin__.min, apply_ax, values) + result = np.apply_along_axis(builtins.min, apply_ax, values) else: - result = __builtin__.min(values) + result = builtins.min(values) else: if ((axis is not None and values.shape[axis] == 0) or values.size == 0): @@ -311,13 +313,12 @@ def nanmax(values, axis=None, skipna=True): # numpy 1.6.1 workaround in Python 3.x if (values.dtype == np.object_ and sys.version_info[0] >= 3): # pragma: no cover - import __builtin__ if values.ndim > 1: apply_ax = axis if axis is not None else 0 - result = np.apply_along_axis(__builtin__.max, apply_ax, values) + result = np.apply_along_axis(builtins.max, apply_ax, values) else: - result = __builtin__.max(values) + result = builtins.max(values) else: if ((axis is not None and values.shape[axis] == 0) or values.size == 0): diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 739ffc6f31e9c..f2fb213f884e2 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -282,14 +282,14 @@ def _init_dict(self, data, axes, dtype=None): if haxis is not None: haxis = _ensure_index(haxis) data = OrderedDict((k, v) for k, v - in data.iteritems() if k in haxis) + in compat.iteritems(data) if k in haxis) else: ks = data.keys() if not isinstance(data,OrderedDict): ks = _try_sort(ks) haxis = Index(ks) - for k, v in data.iteritems(): + for k, v in compat.iteritems(data): if isinstance(v, dict): data[k] = self._constructor_sliced(v) @@ -352,8 +352,8 @@ def from_dict(cls, data, intersect=False, orient='items', dtype=None): orient = orient.lower() if orient == 'minor': new_data = OrderedDefaultdict(dict) - for col, df in data.iteritems(): - for item, s in df.iteritems(): + for col, df in compat.iteritems(data): + for item, s in compat.iteritems(df): new_data[item][col] = s data = new_data elif orient != 'items': # pragma: no cover @@ -544,7 +544,7 @@ def to_sparse(self, fill_value=None, kind='block'): y : SparseDataFrame """ from pandas.core.sparse import SparsePanel - frames = dict(self.iteritems()) + frames = dict(compat.iteritems(self)) return SparsePanel(frames, items=self.items, major_axis=self.major_axis, minor_axis=self.minor_axis, @@ -564,7 +564,7 @@ def to_excel(self, path, na_rep=''): """ from pandas.io.excel import ExcelWriter writer = ExcelWriter(path) - for item, df in self.iteritems(): + for item, df in compat.iteritems(self): name = str(item) df.to_excel(writer, name, na_rep=na_rep) writer.save() @@ -980,7 +980,7 @@ def fillna(self, value=None, method=None): if method is None: raise ValueError('must specify a fill method or value') result = {} - for col, s in self.iteritems(): + for col, s in compat.iteritems(self): result[col] = s.fillna(method=method, value=value) return self._constructor.from_dict(result) @@ -1137,7 +1137,7 @@ def transpose(self, *args, **kwargs): """ # construct the args args = list(args) - aliases = tuple(kwargs.iterkeys()) + aliases = tuple(six.iterkeys(kwargs)) for a in self._AXIS_ORDERS: if not a in kwargs: @@ -1518,7 +1518,7 @@ def _homogenize_dict(self, frames, intersect=True, dtype=None): result = OrderedDict() adj_frames = OrderedDict() - for k, v in frames.iteritems(): + for k, v in compat.iteritems(frames): if isinstance(v, dict): adj_frames[k] = self._constructor_sliced(v) else: @@ -1531,7 +1531,7 @@ def _homogenize_dict(self, frames, intersect=True, dtype=None): reindex_dict = dict( [(self._AXIS_SLICEMAP[a], axes_dict[a]) for a in axes]) reindex_dict['copy'] = False - for key, frame in adj_frames.iteritems(): + for key, frame in compat.iteritems(adj_frames): if frame is not None: result[key] = frame.reindex(**reindex_dict) else: diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index 436c2298164ff..3e6e4ea366623 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -776,7 +776,7 @@ def lreshape(data, groups, dropna=True, label=None): for c in pivot_cols: mask &= notnull(mdata[c]) if not mask.all(): - mdata = dict((k, v[mask]) for k, v in mdata.iteritems()) + mdata = dict((k, v[mask]) for k, v in compat.iteritems(mdata)) return DataFrame(mdata, columns=id_cols + pivot_cols) diff --git a/pandas/io/__init__.py b/pandas/io/__init__.py index a984c40cdc098..e69de29bb2d1d 100644 --- a/pandas/io/__init__.py +++ b/pandas/io/__init__.py @@ -1,2 +0,0 @@ -import sql -import stata diff --git a/pandas/io/data.py b/pandas/io/data.py index 74268241db2c7..afec826279988 100644 --- a/pandas/io/data.py +++ b/pandas/io/data.py @@ -811,7 +811,7 @@ def get_forward_data(self, months, call=True, put=False, near=False, data : dict of str, DataFrame """ warnings.warn("get_forward_data() is deprecated", FutureWarning) - in_months = range(CUR_MONTH, CUR_MONTH + months + 1) + in_months = list(range(CUR_MONTH, CUR_MONTH + months + 1)) in_years = [CUR_YEAR] * (months + 1) # Figure out how many items in in_months go past 12 diff --git a/pandas/io/html.py b/pandas/io/html.py index bcecc624434cc..3fee071cdf095 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -454,7 +454,7 @@ def _build_node_xpath_expr(attrs): if 'class_' in attrs: attrs['class'] = attrs.pop('class_') - s = (six.u("@{k}='{v}'").format(k=k, v=v) for k, v in attrs.iteritems()) + s = (six.u("@{k}='{v}'").format(k=k, v=v) for k, v in compat.iteritems(attrs)) return six.u('[{0}]').format(' and '.join(s)) diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 57f1daa623a5a..760d14467421a 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -486,7 +486,7 @@ def _get_options_with_defaults(self, engine): kwds = self.orig_options options = {} - for argname, default in _parser_defaults.iteritems(): + for argname, default in compat.iteritems(_parser_defaults): if argname in kwds: value = kwds[argname] else: @@ -494,7 +494,7 @@ def _get_options_with_defaults(self, engine): options[argname] = value - for argname, default in _c_parser_defaults.iteritems(): + for argname, default in compat.iteritems(_c_parser_defaults): if argname in kwds: value = kwds[argname] if engine != 'c' and value != default: @@ -503,7 +503,7 @@ def _get_options_with_defaults(self, engine): options[argname] = value if engine == 'python-fwf': - for argname, default in _fwf_defaults.iteritems(): + for argname, default in compat.iteritems(_fwf_defaults): if argname in kwds: value = kwds[argname] options[argname] = value @@ -866,7 +866,7 @@ def _agg_index(self, index, try_parse_dates=True): def _convert_to_ndarrays(self, dct, na_values, na_fvalues, verbose=False, converters=None): result = {} - for c, values in dct.iteritems(): + for c, values in compat.iteritems(dct): conv_f = None if converters is None else converters.get(c, None) col_na_values, col_na_fvalues = _get_na_values(c, na_values, na_fvalues) coerce_type = True @@ -1379,7 +1379,7 @@ def _convert_data(self, data): # apply converters clean_conv = {} - for col, f in self.converters.iteritems(): + for col, f in compat.iteritems(self.converters): if isinstance(col, int) and col not in self.orig_names: col = self.orig_names[col] clean_conv[col] = f @@ -1733,7 +1733,7 @@ def _isindex(colspec): elif isinstance(parse_spec, dict): # dict of new name to column list - for new_name, colspec in parse_spec.iteritems(): + for new_name, colspec in compat.iteritems(parse_spec): if new_name in data_dict: raise ValueError('Date column %s already in dict' % new_name) @@ -1782,7 +1782,7 @@ def _clean_na_values(na_values, keep_default_na=True): na_fvalues = set() elif isinstance(na_values, dict): if keep_default_na: - for k, v in na_values.iteritems(): + for k, v in compat.iteritems(na_values): v = set(list(v)) | _NA_VALUES na_values[k] = v na_fvalues = dict([ (k, _floatify_na_values(v)) for k, v in na_values.items() ]) diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 52cc7dc24ffde..e18cd2d8cf572 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -1729,7 +1729,7 @@ class GenericStorer(Storer): """ a generified storer version """ _index_type_map = { DatetimeIndex: 'datetime', PeriodIndex: 'period'} - _reverse_index_map = dict([ (v,k) for k, v in _index_type_map.iteritems() ]) + _reverse_index_map = dict([ (v,k) for k, v in compat.iteritems(_index_type_map) ]) attributes = [] # indexer helpders @@ -2104,7 +2104,7 @@ def read(self, **kwargs): def write(self, obj, **kwargs): """ write it as a collection of individual sparse series """ super(SparseFrameStorer, self).write(obj, **kwargs) - for name, ss in obj.iteritems(): + for name, ss in compat.iteritems(obj): key = 'sparse_series_%s' % name if key not in self.group._v_children: node = self._handle.createGroup(self.group, key) @@ -2140,7 +2140,7 @@ def write(self, obj, **kwargs): self.attrs.default_kind = obj.default_kind self.write_index('items', obj.items) - for name, sdf in obj.iteritems(): + for name, sdf in compat.iteritems(obj): key = 'sparse_frame_%s' % name if key not in self.group._v_children: node = self._handle.createGroup(self.group, key) diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 50b3d63cb459d..033f0cf0e2cbc 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -22,6 +22,7 @@ from pandas.core.categorical import Categorical import datetime from pandas.util import py3compat +from pandas.util import compat from pandas.util.py3compat import long from pandas import isnull from pandas.io.parsers import _parser_params, Appender @@ -546,12 +547,12 @@ def data(self, convert_dates=True, convert_categoricals=True, index=None): data[col] = data[col].apply(_stata_elapsed_date_to_datetime, args=(self.fmtlist[i],)) if convert_categoricals: - cols = np.where(map(lambda x: x in self.value_label_dict.iterkeys(), self.lbllist))[0] + cols = np.where(map(lambda x: x in six.iterkeys(self.value_label_dict), self.lbllist))[0] for i in cols: col = data.columns[i] labeled_data = np.copy(data[col]) labeled_data = labeled_data.astype(object) - for k, v in self.value_label_dict[self.lbllist[i]].iteritems(): + for k, v in compat.iteritems(self.value_label_dict[self.lbllist[i]]): labeled_data[data[col] == k] = v data[col] = Categorical.from_array(labeled_data) diff --git a/pandas/io/tests/test_json/test_pandas.py b/pandas/io/tests/test_json/test_pandas.py index 2aaffe40474f1..b739181b7e702 100644 --- a/pandas/io/tests/test_json/test_pandas.py +++ b/pandas/io/tests/test_json/test_pandas.py @@ -29,7 +29,7 @@ _frame = DataFrame(_seriesd) _frame2 = DataFrame(_seriesd, columns=['D', 'C', 'B', 'A']) _intframe = DataFrame(dict((k, v.astype(np.int64)) - for k, v in _seriesd.iteritems())) + for k, v in compat.iteritems(_seriesd))) _tsframe = DataFrame(_tsd) @@ -95,7 +95,7 @@ def _check_orient(df, orient, dtype=None, numpy=False, convert_axes=True, check_ numpy=numpy, convert_axes=convert_axes) except (Exception) as detail: if raise_ok is not None: - if type(detail) == raise_ok: + if isinstance(detail, raise_ok): return raise diff --git a/pandas/io/tests/test_json/test_ujson.py b/pandas/io/tests/test_json/test_ujson.py index a8f6ddffe8e68..1e5e455dd70d4 100644 --- a/pandas/io/tests/test_json/test_ujson.py +++ b/pandas/io/tests/test_json/test_ujson.py @@ -1471,7 +1471,7 @@ def test_decodeStringUTF8(self): """ def _clean_dict(d): - return dict((str(k), v) for k, v in d.iteritems()) + return dict((str(k), v) for k, v in compat.iteritems(d)) if __name__ == '__main__': # unittest.main() diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py index 198de5d0fb212..08b6a40df84a5 100644 --- a/pandas/io/tests/test_parsers.py +++ b/pandas/io/tests/test_parsers.py @@ -163,7 +163,7 @@ def test_squeeze(self): expected = Series([1, 2, 3], ['a', 'b', 'c']) result = self.read_table(StringIO(data), sep=',', index_col=0, header=None, squeeze=True) - self.assert_(isinstance(result, Series)) + tm.assert_isinstance(result, Series) assert_series_equal(result, expected) def test_inf_parsing(self): @@ -802,7 +802,7 @@ def test_parse_dates_column_list(self): expected['aux_date'] = to_datetime(expected['aux_date'], dayfirst=True) expected['aux_date'] = list(map(Timestamp, expected['aux_date'])) - self.assert_(isinstance(expected['aux_date'][0], datetime)) + tm.assert_isinstance(expected['aux_date'][0], datetime) df = self.read_csv(StringIO(data), sep=";", index_col=list(range(4)), parse_dates=[0, 5], dayfirst=True) @@ -874,7 +874,7 @@ def test_read_csv_no_index_name(self): def test_read_table_unicode(self): fin = BytesIO(six.u('\u0141aski, Jan;1').encode('utf-8')) df1 = read_table(fin, sep=";", encoding="utf-8", header=None) - self.assert_(isinstance(df1[0].values[0], unicode)) + tm.assert_isinstance(df1[0].values[0], six.text_type) def test_read_table_wrong_num_columns(self): # too few! @@ -1051,7 +1051,7 @@ def test_iterator(self): treader = self.read_table(StringIO(self.data1), sep=',', index_col=0, iterator=True) - self.assert_(isinstance(treader, TextFileReader)) + tm.assert_isinstance(treader, TextFileReader) # stopping iteration when on chunksize is specified, GH 3967 data = """A,B,C @@ -1265,7 +1265,7 @@ def test_converters(self): expected = self.read_csv(StringIO(data)) expected['D'] = expected['D'].map(parser.parse) - self.assert_(isinstance(result['D'][0], (datetime, Timestamp))) + tm.assert_isinstance(result['D'][0], (datetime, Timestamp)) tm.assert_frame_equal(result, expected) tm.assert_frame_equal(result2, expected) diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index aff43cc913203..ba5886ba12a85 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -1041,8 +1041,9 @@ def test_big_table2_frame(self): print ("\nbig_table2 start") import time start_time = time.time() - df = DataFrame(np.random.randn(1000 * 1000, 60), index=range(int( - 1000 * 1000)), columns=['E%03d' % i for i in range(60)]) + df = DataFrame(np.random.randn(1000 * 1000, 60), + index=list(range(int(1000 * 1000))), + columns=['E%03d' % i for i in range(60)]) for x in range(20): df['String%03d' % x] = 'string%03d' % x for x in range(20): @@ -1070,8 +1071,8 @@ def test_big_put_frame(self): print ("\nbig_put start") import time start_time = time.time() - df = DataFrame(np.random.randn(1000 * 1000, 60), index=range(int( - 1000 * 1000)), columns=['E%03d' % i for i in range(60)]) + df = DataFrame(np.random.randn(1000 * 1000, 60), index=list(range(int( + 1000 * 1000))), columns=['E%03d' % i for i in range(60)]) for x in range(20): df['String%03d' % x] = 'string%03d' % x for x in range(20): @@ -1405,7 +1406,7 @@ def compare(a,b): with ensure_clean(self.path) as store: # GH 4098 example - df = DataFrame(dict(A = Series(range(3), index=date_range('2000-1-1',periods=3,freq='H', tz='US/Eastern')))) + df = DataFrame(dict(A = Series(list(range(3)), index=date_range('2000-1-1',periods=3,freq='H', tz='US/Eastern')))) _maybe_remove(store, 'df') store.put('df',df) @@ -2214,7 +2215,7 @@ def test_select_iterator(self): def test_retain_index_attributes(self): # GH 3499, losing frequency info on index recreation - df = DataFrame(dict(A = Series(range(3), + df = DataFrame(dict(A = Series(list(range(3)), index=date_range('2000-1-1',periods=3,freq='H')))) with ensure_clean(self.path) as store: @@ -2231,7 +2232,7 @@ def test_retain_index_attributes(self): # try to append a table with a different frequency warnings.filterwarnings('ignore', category=AttributeConflictWarning) - df2 = DataFrame(dict(A = Series(range(3), + df2 = DataFrame(dict(A = Series(list(range(3)), index=date_range('2002-1-1',periods=3,freq='D')))) store.append('data',df2) warnings.filterwarnings('always', category=AttributeConflictWarning) @@ -2240,10 +2241,10 @@ def test_retain_index_attributes(self): # this is ok _maybe_remove(store,'df2') - df2 = DataFrame(dict(A = Series(range(3), + df2 = DataFrame(dict(A = Series(list(range(3)), index=[Timestamp('20010101'),Timestamp('20010102'),Timestamp('20020101')]))) store.append('df2',df2) - df3 = DataFrame(dict(A = Series(range(3),index=date_range('2002-1-1',periods=3,freq='D')))) + df3 = DataFrame(dict(A = Series(list(range(3)),index=date_range('2002-1-1',periods=3,freq='D')))) store.append('df2',df3) def test_retain_index_attributes2(self): @@ -2252,20 +2253,20 @@ def test_retain_index_attributes2(self): warnings.filterwarnings('ignore', category=AttributeConflictWarning) - df = DataFrame(dict(A = Series(range(3), index=date_range('2000-1-1',periods=3,freq='H')))) + df = DataFrame(dict(A = Series(list(range(3)), index=date_range('2000-1-1',periods=3,freq='H')))) df.to_hdf(path,'data',mode='w',append=True) - df2 = DataFrame(dict(A = Series(range(3), index=date_range('2002-1-1',periods=3,freq='D')))) + df2 = DataFrame(dict(A = Series(list(range(3)), index=date_range('2002-1-1',periods=3,freq='D')))) df2.to_hdf(path,'data',append=True) idx = date_range('2000-1-1',periods=3,freq='H') idx.name = 'foo' - df = DataFrame(dict(A = Series(range(3), index=idx))) + df = DataFrame(dict(A = Series(list(range(3)), index=idx))) df.to_hdf(path,'data',mode='w',append=True) self.assert_(read_hdf(path,'data').index.name == 'foo') idx2 = date_range('2001-1-1',periods=3,freq='H') idx2.name = 'bar' - df2 = DataFrame(dict(A = Series(range(3), index=idx2))) + df2 = DataFrame(dict(A = Series(list(range(3)), index=idx2))) df2.to_hdf(path,'data',append=True) self.assert_(read_hdf(path,'data').index.name is None) @@ -2389,7 +2390,7 @@ def f(): # valid result = store.select_column('df', 'index') tm.assert_almost_equal(result.values, Series(df.index).values) - self.assert_(isinstance(result,Series)) + tm.assert_isinstance(result,Series) # not a data indexable column self.assertRaises( @@ -2558,7 +2559,7 @@ def test_start_stop(self): result = store.select( 'df', [Term("columns", "=", ["A"])], start=30, stop=40) assert(len(result) == 0) - assert(type(result) == DataFrame) + tm.assert_isinstance(result, DataFrame) def test_select_filter_corner(self): diff --git a/pandas/io/tests/test_stata.py b/pandas/io/tests/test_stata.py index d75de149d6f4b..3f21150525f3f 100644 --- a/pandas/io/tests/test_stata.py +++ b/pandas/io/tests/test_stata.py @@ -13,6 +13,7 @@ from pandas.io.stata import read_stata, StataReader import pandas.util.testing as tm from pandas.util.misc import is_little_endian +import six class StataTests(unittest.TestCase): diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index 7bc6f818c663b..09e8bdb577a22 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -151,7 +151,7 @@ def _init_dict(self, data, index, columns, dtype=None): # pre-filter out columns if we passed it if columns is not None: columns = _ensure_index(columns) - data = dict((k, v) for k, v in data.iteritems() if k in columns) + data = dict((k, v) for k, v in compat.iteritems(data) if k in columns) else: columns = Index(_try_sort(data.keys())) @@ -164,7 +164,7 @@ def _init_dict(self, data, index, columns, dtype=None): copy=True) sdict = {} - for k, v in data.iteritems(): + for k, v in compat.iteritems(data): if isinstance(v, Series): # Force alignment, no copy necessary if not v.index.equals(index): @@ -214,7 +214,7 @@ def __array_wrap__(self, result): def __getstate__(self): series = dict((k, (v.sp_index, v.sp_values)) - for k, v in self.iteritems()) + for k, v in compat.iteritems(self)) columns = self.columns index = self.index @@ -235,7 +235,7 @@ def __setstate__(self, state): index = idx series_dict = {} - for col, (sp_index, sp_values) in series.iteritems(): + for col, (sp_index, sp_values) in compat.iteritems(series): series_dict[col] = SparseSeries(sp_values, sparse_index=sp_index, fill_value=fv) @@ -253,13 +253,13 @@ def to_dense(self): ------- df : DataFrame """ - data = dict((k, v.to_dense()) for k, v in self.iteritems()) + data = dict((k, v.to_dense()) for k, v in compat.iteritems(self)) return DataFrame(data, index=self.index) def get_dtype_counts(self): from collections import defaultdict d = defaultdict(int) - for k, v in self.iteritems(): + for k, v in compat.iteritems(self): d[v.dtype.name] += 1 return Series(d) @@ -270,7 +270,7 @@ def copy(self, deep=True): """ Make a copy of this SparseDataFrame """ - series = dict((k, v.copy()) for k, v in self.iteritems()) + series = dict((k, v.copy()) for k, v in compat.iteritems(self)) return SparseDataFrame(series, index=self.index, columns=self.columns, default_fill_value=self.default_fill_value, default_kind=self.default_kind) @@ -282,7 +282,7 @@ def density(self): represented in the frame """ tot_nonsparse = sum([ser.sp_index.npoints - for _, ser in self.iteritems()]) + for _, ser in compat.iteritems(self)]) tot = len(self.index) * len(self.columns) return tot_nonsparse / float(tot) @@ -548,7 +548,7 @@ def _combine_match_index(self, other, func, fill_value=None): if other.index is not new_index: other = other.reindex(new_index) - for col, series in this.iteritems(): + for col, series in compat.iteritems(this): new_data[col] = func(series.values, other.values) return self._constructor(new_data, index=new_index, @@ -579,7 +579,7 @@ def _combine_match_columns(self, other, func, fill_value): def _combine_const(self, other, func): new_data = {} - for col, series in self.iteritems(): + for col, series in compat.iteritems(self): new_data[col] = func(series, other) return self._constructor(data=new_data, index=self.index, @@ -605,7 +605,7 @@ def _reindex_index(self, index, method, copy, level, fill_value=np.nan, need_mask = mask.any() new_series = {} - for col, series in self.iteritems(): + for col, series in compat.iteritems(self): values = series.values new = values.take(indexer) @@ -629,7 +629,7 @@ def _reindex_columns(self, columns, copy, level, fill_value, limit=None, raise NotImplementedError # TODO: fill value handling - sdict = dict((k, v) for k, v in self.iteritems() if k in columns) + sdict = dict((k, v) for k, v in compat.iteritems(self) if k in columns) return SparseDataFrame(sdict, index=self.index, columns=columns, default_fill_value=self.default_fill_value) @@ -800,11 +800,11 @@ def shift(self, periods, freq=None, **kwds): new_series = {} if offset is None: new_index = self.index - for col, s in self.iteritems(): + for col, s in compat.iteritems(self): new_series[col] = s.shift(periods) else: new_index = self.index.shift(periods, offset) - for col, s in self.iteritems(): + for col, s in compat.iteritems(self): new_series[col] = SparseSeries(s.sp_values, index=new_index, sparse_index=s.sp_index, fill_value=s.fill_value) @@ -836,7 +836,7 @@ def apply(self, func, axis=0, broadcast=False): if isinstance(func, np.ufunc): new_series = {} - for k, v in self.iteritems(): + for k, v in compat.iteritems(self): applied = func(v) applied.fill_value = func(applied.fill_value) new_series[k] = applied @@ -870,7 +870,7 @@ def applymap(self, func): @Appender(DataFrame.fillna.__doc__) def fillna(self, value=None, method=None, inplace=False, limit=None): new_series = {} - for k, v in self.iteritems(): + for k, v in compat.iteritems(self): new_series[k] = v.fillna(value=value, method=method, limit=limit) if inplace: @@ -885,7 +885,7 @@ def stack_sparse_frame(frame): """ Only makes sense when fill_value is NaN """ - lengths = [s.sp_index.npoints for _, s in frame.iteritems()] + lengths = [s.sp_index.npoints for _, s in compat.iteritems(frame)] nobs = sum(lengths) # this is pretty fast @@ -896,7 +896,7 @@ def stack_sparse_frame(frame): # TODO: Figure out whether this can be reached. # I think this currently can't be reached because you can't build a SparseDataFrame # with a non-np.NaN fill value (fails earlier). - for _, series in frame.iteritems(): + for _, series in compat.iteritems(frame): if not np.isnan(series.fill_value): raise TypeError('This routine assumes NaN fill value') @@ -936,7 +936,7 @@ def homogenize(series_dict): need_reindex = False - for _, series in series_dict.iteritems(): + for _, series in compat.iteritems(series_dict): if not np.isnan(series.fill_value): raise TypeError('this method is only valid with NaN fill values') @@ -948,7 +948,7 @@ def homogenize(series_dict): if need_reindex: output = {} - for name, series in series_dict.iteritems(): + for name, series in compat.iteritems(series_dict): if not series.sp_index.equals(index): series = series.sparse_reindex(index) diff --git a/pandas/sparse/panel.py b/pandas/sparse/panel.py index 746b91a898a9c..494cbaf838175 100644 --- a/pandas/sparse/panel.py +++ b/pandas/sparse/panel.py @@ -17,6 +17,7 @@ from pandas.util.decorators import deprecate import pandas.core.common as com +import six class SparsePanelAxis(object): @@ -34,7 +35,7 @@ def __set__(self, obj, value): if isinstance(value, MultiIndex): raise NotImplementedError - for v in obj._frames.itervalues(): + for v in six.itervalues(obj._frames): setattr(v, self.frame_attr, value) setattr(obj, self.cache_field, value) @@ -334,7 +335,7 @@ def reindex(self, major=None, items=None, minor=None, major_axis=None, new_frames = self._frames if copy: - new_frames = dict((k, v.copy()) for k, v in new_frames.iteritems()) + new_frames = dict((k, v.copy()) for k, v in compat.iteritems(new_frames)) return SparsePanel(new_frames, items=items, major_axis=major, @@ -349,7 +350,7 @@ def _combine(self, other, func, axis=0): return self._combinePanel(other, func) elif np.isscalar(other): new_frames = dict((k, func(v, other)) - for k, v in self.iteritems()) + for k, v in compat.iteritems(self)) return self._new_like(new_frames) def _combineFrame(self, other, func, axis=0): @@ -426,7 +427,7 @@ def major_xs(self, key): y : DataFrame index -> minor axis, columns -> items """ - slices = dict((k, v.xs(key)) for k, v in self.iteritems()) + slices = dict((k, v.xs(key)) for k, v in compat.iteritems(self)) return DataFrame(slices, index=self.minor_axis, columns=self.items) def minor_xs(self, key): @@ -443,7 +444,7 @@ def minor_xs(self, key): y : SparseDataFrame index -> major axis, columns -> items """ - slices = dict((k, v[key]) for k, v in self.iteritems()) + slices = dict((k, v[key]) for k, v in compat.iteritems(self)) return SparseDataFrame(slices, index=self.major_axis, columns=self.items, default_fill_value=self.default_fill_value, @@ -455,7 +456,7 @@ def minor_xs(self, key): def _convert_frames(frames, index, columns, fill_value=np.nan, kind='block'): from pandas.core.panel import _get_combined_index output = {} - for item, df in frames.iteritems(): + for item, df in compat.iteritems(frames): if not isinstance(df, SparseDataFrame): df = SparseDataFrame(df, default_kind=kind, default_fill_value=fill_value) @@ -472,7 +473,7 @@ def _convert_frames(frames, index, columns, fill_value=np.nan, kind='block'): index = _ensure_index(index) columns = _ensure_index(columns) - for item, df in output.iteritems(): + for item, df in compat.iteritems(output): if not (df.index.equals(index) and df.columns.equals(columns)): output[item] = df.reindex(index=index, columns=columns) @@ -480,7 +481,7 @@ def _convert_frames(frames, index, columns, fill_value=np.nan, kind='block'): def _stack_sparse_info(frame): - lengths = [s.sp_index.npoints for _, s in frame.iteritems()] + lengths = [s.sp_index.npoints for _, s in compat.iteritems(frame)] # this is pretty fast minor_labels = np.repeat(np.arange(len(frame.columns)), lengths) diff --git a/pandas/sparse/tests/test_array.py b/pandas/sparse/tests/test_array.py index 96edc71d1fe5f..178f8ea8c9ca8 100644 --- a/pandas/sparse/tests/test_array.py +++ b/pandas/sparse/tests/test_array.py @@ -11,6 +11,7 @@ from pandas.core.common import notnull from pandas.sparse.api import SparseArray from pandas.util.testing import assert_almost_equal, assertRaisesRegexp +import pandas.util.testing as tm def assert_sp_array_equal(left, right): @@ -129,19 +130,19 @@ def _check_op(op, first, second): res = op(first, second) exp = SparseArray(op(first.values, second.values), fill_value=first.fill_value) - self.assert_(isinstance(res, SparseArray)) + tm.assert_isinstance(res, SparseArray) assert_almost_equal(res.values, exp.values) res2 = op(first, second.values) - self.assert_(isinstance(res2, SparseArray)) + tm.assert_isinstance(res2, SparseArray) assert_sp_array_equal(res, res2) res3 = op(first.values, second) - self.assert_(isinstance(res3, SparseArray)) + tm.assert_isinstance(res3, SparseArray) assert_sp_array_equal(res, res3) res4 = op(first, 4) - self.assert_(isinstance(res4, SparseArray)) + tm.assert_isinstance(res4, SparseArray) exp = op(first.values, 4) exp_fv = op(first.fill_value, 4) assert_almost_equal(res4.fill_value, exp_fv) diff --git a/pandas/sparse/tests/test_libsparse.py b/pandas/sparse/tests/test_libsparse.py index d31f919e2e84b..f820142a6e71d 100644 --- a/pandas/sparse/tests/test_libsparse.py +++ b/pandas/sparse/tests/test_libsparse.py @@ -7,6 +7,7 @@ import numpy as np import operator from numpy.testing import assert_almost_equal, assert_equal +import pandas.util.testing as tm from pandas.core.sparse import SparseSeries from pandas import DataFrame @@ -288,7 +289,7 @@ def _check_case(xloc, xlen, yloc, ylen, eloc, elen): # see if survive the round trip xbindex = xindex.to_int_index().to_block_index() ybindex = yindex.to_int_index().to_block_index() - self.assert_(isinstance(xbindex, BlockIndex)) + tm.assert_isinstance(xbindex, BlockIndex) self.assert_(xbindex.equals(xindex)) self.assert_(ybindex.equals(yindex)) check_cases(_check_case) diff --git a/pandas/sparse/tests/test_sparse.py b/pandas/sparse/tests/test_sparse.py index 75d58f483a1da..5be3703ddb742 100644 --- a/pandas/sparse/tests/test_sparse.py +++ b/pandas/sparse/tests/test_sparse.py @@ -87,7 +87,7 @@ def assert_sp_frame_equal(left, right, exact_indices=True): exact: Series SparseIndex objects must be exactly the same, otherwise just compare dense representations """ - for col, series in left.iteritems(): + for col, series in compat.iteritems(left): assert(col in right) # trade-off? @@ -107,7 +107,7 @@ def assert_sp_frame_equal(left, right, exact_indices=True): def assert_sp_panel_equal(left, right, exact_indices=True): - for item, frame in left.iteritems(): + for item, frame in compat.iteritems(left): assert(item in right) # trade-off? assert_sp_frame_equal(frame, right[item], exact_indices=exact_indices) @@ -206,9 +206,9 @@ def test_to_dense_preserve_name(self): def test_constructor(self): # test setup guys self.assert_(np.isnan(self.bseries.fill_value)) - self.assert_(isinstance(self.bseries.sp_index, BlockIndex)) + tm.assert_isinstance(self.bseries.sp_index, BlockIndex) self.assert_(np.isnan(self.iseries.fill_value)) - self.assert_(isinstance(self.iseries.sp_index, IntIndex)) + tm.assert_isinstance(self.iseries.sp_index, IntIndex) self.assertEquals(self.zbseries.fill_value, 0) assert_equal(self.zbseries.values, self.bseries.to_dense().fillna(0)) @@ -224,7 +224,7 @@ def test_constructor(self): # Sparse time series works date_index = bdate_range('1/1/2000', periods=len(self.bseries)) s5 = SparseSeries(self.bseries, index=date_index) - self.assert_(isinstance(s5, SparseTimeSeries)) + tm.assert_isinstance(s5, SparseTimeSeries) # pass Series bseries2 = SparseSeries(self.bseries.to_dense()) @@ -314,7 +314,7 @@ def _check_all(self, check_func): def test_getitem(self): def _check_getitem(sp, dense): - for idx, val in dense.iteritems(): + for idx, val in compat.iteritems(dense): assert_almost_equal(val, sp[idx]) for i in range(len(dense)): @@ -367,11 +367,11 @@ def test_set_value(self): def test_getitem_slice(self): idx = self.bseries.index res = self.bseries[::2] - self.assert_(isinstance(res, SparseSeries)) + tm.assert_isinstance(res, SparseSeries) assert_sp_series_equal(res, self.bseries.reindex(idx[::2])) res = self.bseries[:5] - self.assert_(isinstance(res, SparseSeries)) + tm.assert_isinstance(res, SparseSeries) assert_sp_series_equal(res, self.bseries.reindex(idx[:5])) res = self.bseries[5:] @@ -388,7 +388,7 @@ def _compare_with_dense(sp): def _compare(idx): dense_result = dense.take(idx).values sparse_result = sp.take(idx) - self.assert_(isinstance(sparse_result, SparseSeries)) + tm.assert_isinstance(sparse_result, SparseSeries) assert_almost_equal(dense_result, sparse_result.values) _compare([1., 2., 3., 4., 5., 0.]) @@ -626,7 +626,7 @@ def _check_matches(indices, expected): sparse_index=idx) homogenized = spf.homogenize(data) - for k, v in homogenized.iteritems(): + for k, v in compat.iteritems(homogenized): assert(v.sp_index.equals(expected)) indices1 = [BlockIndex(10, [2], [7]), @@ -682,13 +682,13 @@ def test_shift(self): def test_cumsum(self): result = self.bseries.cumsum() expected = self.bseries.to_dense().cumsum() - self.assert_(isinstance(result, SparseSeries)) + tm.assert_isinstance(result, SparseSeries) self.assertEquals(result.name, self.bseries.name) assert_series_equal(result.to_dense(), expected) result = self.zbseries.cumsum() expected = self.zbseries.to_dense().cumsum() - self.assert_(isinstance(result, Series)) + tm.assert_isinstance(result, Series) assert_series_equal(result, expected) def test_combine_first(self): @@ -753,15 +753,15 @@ def test_as_matrix(self): def test_copy(self): cp = self.frame.copy() - self.assert_(isinstance(cp, SparseDataFrame)) + tm.assert_isinstance(cp, SparseDataFrame) assert_sp_frame_equal(cp, self.frame) self.assert_(cp.index is self.frame.index) def test_constructor(self): - for col, series in self.frame.iteritems(): - self.assert_(isinstance(series, SparseSeries)) + for col, series in compat.iteritems(self.frame): + tm.assert_isinstance(series, SparseSeries) - self.assert_(isinstance(self.iframe['A'].sp_index, IntIndex)) + tm.assert_isinstance(self.iframe['A'].sp_index, IntIndex) # constructed zframe from matrix above self.assertEquals(self.zframe['A'].fill_value, 0) @@ -770,12 +770,12 @@ def test_constructor(self): # construct no data sdf = SparseDataFrame(columns=np.arange(10), index=np.arange(10)) - for col, series in sdf.iteritems(): - self.assert_(isinstance(series, SparseSeries)) + for col, series in compat.iteritems(sdf): + tm.assert_isinstance(series, SparseSeries) # construct from nested dict data = {} - for c, s in self.frame.iteritems(): + for c, s in compat.iteritems(self.frame): data[c] = s.to_dict() sdf = SparseDataFrame(data) @@ -836,9 +836,9 @@ def test_constructor_from_series(self): # GH 2873 x = Series(np.random.randn(10000), name='a') x = x.to_sparse(fill_value=0) - self.assert_(isinstance(x,SparseSeries)) + tm.assert_isinstance(x,SparseSeries) df = SparseDataFrame(x) - self.assert_(isinstance(df,SparseDataFrame)) + tm.assert_isinstance(df,SparseDataFrame) x = Series(np.random.randn(10000), name ='a') y = Series(np.random.randn(10000), name ='b') @@ -888,13 +888,13 @@ def test_dense_to_sparse(self): df = DataFrame({'A': [nan, nan, nan, 1, 2], 'B': [1, 2, nan, nan, nan]}) sdf = df.to_sparse() - self.assert_(isinstance(sdf, SparseDataFrame)) + tm.assert_isinstance(sdf, SparseDataFrame) self.assert_(np.isnan(sdf.default_fill_value)) - self.assert_(isinstance(sdf['A'].sp_index, BlockIndex)) + tm.assert_isinstance(sdf['A'].sp_index, BlockIndex) tm.assert_frame_equal(sdf.to_dense(), df) sdf = df.to_sparse(kind='integer') - self.assert_(isinstance(sdf['A'].sp_index, IntIndex)) + tm.assert_isinstance(sdf['A'].sp_index, IntIndex) df = DataFrame({'A': [0, 0, 0, 1, 2], 'B': [1, 2, 0, 0, 0]}, dtype=float) @@ -962,7 +962,7 @@ def _compare_to_dense(a, b, da, db, op): if isinstance(a, DataFrame) and isinstance(db, DataFrame): mixed_result = op(a, db) - self.assert_(isinstance(mixed_result, SparseDataFrame)) + tm.assert_isinstance(mixed_result, SparseDataFrame) assert_sp_frame_equal(mixed_result, sparse_result, exact_indices=False) @@ -1010,7 +1010,7 @@ def test_op_corners(self): self.assert_(empty.empty) foo = self.frame + self.empty - self.assert_(isinstance(foo.index, DatetimeIndex)) + tm.assert_isinstance(foo.index, DatetimeIndex) assert_frame_equal(foo, self.frame * np.nan) foo = self.empty + self.frame @@ -1085,7 +1085,7 @@ def _check_frame(frame): # insert SparseSeries frame['E'] = frame['A'] - self.assert_(isinstance(frame['E'], SparseSeries)) + tm.assert_isinstance(frame['E'], SparseSeries) assert_sp_series_equal(frame['E'], frame['A']) # insert SparseSeries differently-indexed @@ -1096,7 +1096,7 @@ def _check_frame(frame): # insert Series frame['F'] = frame['A'].to_dense() - self.assert_(isinstance(frame['F'], SparseSeries)) + tm.assert_isinstance(frame['F'], SparseSeries) assert_sp_series_equal(frame['F'], frame['A']) # insert Series differently-indexed @@ -1107,7 +1107,7 @@ def _check_frame(frame): # insert ndarray frame['H'] = np.random.randn(N) - self.assert_(isinstance(frame['H'], SparseSeries)) + tm.assert_isinstance(frame['H'], SparseSeries) to_sparsify = np.random.randn(N) to_sparsify[N // 2:] = frame.default_fill_value @@ -1178,7 +1178,7 @@ def test_append(self): def test_apply(self): applied = self.frame.apply(np.sqrt) - self.assert_(isinstance(applied, SparseDataFrame)) + tm.assert_isinstance(applied, SparseDataFrame) assert_almost_equal(applied.values, np.sqrt(self.frame.values)) applied = self.fill_frame.apply(np.sqrt) @@ -1190,7 +1190,7 @@ def test_apply(self): self.frame.to_dense().apply(np.sum)) broadcasted = self.frame.apply(np.sum, broadcast=True) - self.assert_(isinstance(broadcasted, SparseDataFrame)) + tm.assert_isinstance(broadcasted, SparseDataFrame) assert_frame_equal(broadcasted.to_dense(), self.frame.to_dense().apply(np.sum, broadcast=True)) @@ -1213,7 +1213,7 @@ def test_apply_nonuq(self): def test_applymap(self): # just test that it works result = self.frame.applymap(lambda x: x * 2) - self.assert_(isinstance(result, SparseDataFrame)) + tm.assert_isinstance(result, SparseDataFrame) def test_astype(self): self.assertRaises(Exception, self.frame.astype, np.int64) @@ -1399,7 +1399,7 @@ def test_count(self): def test_cumsum(self): result = self.frame.cumsum() expected = self.frame.to_dense().cumsum() - self.assert_(isinstance(result, SparseDataFrame)) + tm.assert_isinstance(result, SparseDataFrame) assert_frame_equal(result.to_dense(), expected) def _check_all(self, check_func): @@ -1535,9 +1535,9 @@ def test_pickle(self): def _test_roundtrip(panel): pickled = pickle.dumps(panel, protocol=pickle.HIGHEST_PROTOCOL) unpickled = pickle.loads(pickled) - self.assert_(isinstance(unpickled.items, Index)) - self.assert_(isinstance(unpickled.major_axis, Index)) - self.assert_(isinstance(unpickled.minor_axis, Index)) + tm.assert_isinstance(unpickled.items, Index) + tm.assert_isinstance(unpickled.major_axis, Index) + tm.assert_isinstance(unpickled.minor_axis, Index) assert_sp_panel_equal(panel, unpickled) _test_roundtrip(self.panel) @@ -1545,7 +1545,7 @@ def _test_roundtrip(panel): def test_dense_to_sparse(self): wp = Panel.from_dict(self.data_dict) dwp = wp.to_sparse() - self.assert_(isinstance(dwp['ItemA']['A'], SparseSeries)) + tm.assert_isinstance(dwp['ItemA']['A'], SparseSeries) def test_to_dense(self): dwp = self.panel.to_dense() diff --git a/pandas/stats/common.py b/pandas/stats/common.py index 75ebc9284ca21..c30b3e7a4bf61 100644 --- a/pandas/stats/common.py +++ b/pandas/stats/common.py @@ -5,7 +5,7 @@ 2: 'expanding' } # also allow 'rolling' as key -_WINDOW_TYPES.update((v, v) for k,v in _WINDOW_TYPES.items()) +_WINDOW_TYPES.update((v, v) for k,v in list(_WINDOW_TYPES.items())) _ADDITIONAL_CLUSTER_TYPES = set(("entity", "time")) def _get_cluster_type(cluster_type): diff --git a/pandas/stats/ols.py b/pandas/stats/ols.py index e9563dcd16f72..f1ac35cad1aaf 100644 --- a/pandas/stats/ols.py +++ b/pandas/stats/ols.py @@ -1254,7 +1254,7 @@ def _safe_update(d, other): """ Combine dictionaries with non-overlapping keys """ - for k, v in other.iteritems(): + for k, v in compat.iteritems(other): if k in d: raise Exception('Duplicate regressor: %s' % k) @@ -1320,7 +1320,7 @@ def _combine_rhs(rhs): elif isinstance(rhs, DataFrame): series = rhs.copy() elif isinstance(rhs, dict): - for name, value in rhs.iteritems(): + for name, value in compat.iteritems(rhs): if isinstance(value, Series): _safe_update(series, {name: value}) elif isinstance(value, (dict, DataFrame)): diff --git a/pandas/stats/plm.py b/pandas/stats/plm.py index 44f0dcf2bc16a..fb9f3aadcb8a6 100644 --- a/pandas/stats/plm.py +++ b/pandas/stats/plm.py @@ -263,7 +263,7 @@ def _add_categorical_dummies(self, panel, cat_mappings): val_map = cat_mappings.get(effect) if val_map: - val_map = dict((v, k) for k, v in val_map.iteritems()) + val_map = dict((v, k) for k, v in compat.iteritems(val_map)) if dropped_dummy or not self._use_all_dummies: if effect in self._dropped_dummies: @@ -672,7 +672,7 @@ def _enough_obs(self): def create_ols_dict(attr): def attr_getter(self): d = {} - for k, v in self.results.iteritems(): + for k, v in compat.iteritems(self.results): result = getattr(v, attr) d[k] = result diff --git a/pandas/stats/tests/test_fama_macbeth.py b/pandas/stats/tests/test_fama_macbeth.py index 593d6ab5e2945..2e55c3d5af293 100644 --- a/pandas/stats/tests/test_fama_macbeth.py +++ b/pandas/stats/tests/test_fama_macbeth.py @@ -3,6 +3,7 @@ from .common import assert_almost_equal, BaseTest from pandas.util.py3compat import range +from pandas.util import compat import numpy as np @@ -38,7 +39,7 @@ def checkFamaMacBethExtended(self, window_type, x, y, **kwds): end = index[i + window - 1] x2 = {} - for k, v in x.iteritems(): + for k, v in compat.iteritems(x): x2[k] = v.truncate(start, end) y2 = y.truncate(start, end) diff --git a/pandas/stats/tests/test_moments.py b/pandas/stats/tests/test_moments.py index c948d2abaca29..3780455c0bb6c 100644 --- a/pandas/stats/tests/test_moments.py +++ b/pandas/stats/tests/test_moments.py @@ -434,7 +434,7 @@ def _check_structures(self, func, static_comp, fill_value=None): series_result = func(self.series, 50) - self.assert_(isinstance(series_result, Series)) + tm.assert_isinstance(series_result, Series) frame_result = func(self.frame, 50) self.assertEquals(type(frame_result), DataFrame) @@ -568,7 +568,7 @@ def _check_ew_ndarray(self, func, preserve_nan=False): def _check_ew_structures(self, func): series_result = func(self.series, com=10) - self.assert_(isinstance(series_result, Series)) + tm.assert_isinstance(series_result, Series) frame_result = func(self.frame, com=10) self.assertEquals(type(frame_result), DataFrame) @@ -769,7 +769,7 @@ def _check_expanding_ndarray(self, func, static_comp, has_min_periods=True, def _check_expanding_structures(self, func): series_result = func(self.series) - self.assert_(isinstance(series_result, Series)) + tm.assert_isinstance(series_result, Series) frame_result = func(self.frame) self.assertEquals(type(frame_result), DataFrame) diff --git a/pandas/stats/tests/test_ols.py b/pandas/stats/tests/test_ols.py index cbfbc0ad11e57..f9bcb6fabbe6f 100644 --- a/pandas/stats/tests/test_ols.py +++ b/pandas/stats/tests/test_ols.py @@ -22,9 +22,8 @@ from pandas.util.testing import (assert_almost_equal, assert_series_equal, assert_frame_equal, assertRaisesRegexp) import pandas.util.testing as tm - -from common import BaseTest -import six +import pandas.util.compat as compat +from .common import BaseTest _have_statsmodels = True try: @@ -42,7 +41,7 @@ def _check_repr(obj): def _compare_ols_results(model1, model2): - assert(type(model1) == type(model2)) + tm.assert_isinstance(model1, type(model2)) if hasattr(model1, '_window_type'): _compare_moving_ols(model1, model2) @@ -369,7 +368,7 @@ def test_longpanel_series_combo(self): y = lp.pop('ItemA') model = ols(y=y, x=lp, entity_effects=True, window=20) self.assert_(notnull(model.beta.values).all()) - self.assert_(isinstance(model, PanelOLS)) + tm.assert_isinstance(model, PanelOLS) model.summary def test_series_rhs(self): @@ -390,7 +389,7 @@ def test_various_attributes(self): for attr in series_attrs: value = getattr(model, attr) - self.assert_(isinstance(value, Series)) + tm.assert_isinstance(value, Series) # works model._results @@ -531,7 +530,7 @@ def test_wls_panel(self): stack_y = y.stack() stack_x = DataFrame(dict((k, v.stack()) - for k, v in x.iteritems())) + for k, v in compat.iteritems(x))) weights = x.std('items') stack_weights = weights.stack() diff --git a/pandas/stats/var.py b/pandas/stats/var.py index b10d6b9fa3eb8..0aa7a50d8d076 100644 --- a/pandas/stats/var.py +++ b/pandas/stats/var.py @@ -62,7 +62,7 @@ def beta(self): DataFrame """ d = dict([(key, value.beta) - for (key, value) in self.ols_results.iteritems()]) + for (key, value) in compat.iteritems(self.ols_results)]) return DataFrame(d) def forecast(self, h): @@ -80,7 +80,7 @@ def forecast(self, h): DataFrame """ forecast = self._forecast_raw(h)[:, 0, :] - return DataFrame(forecast, index=range(1, 1 + h), + return DataFrame(forecast, index=list(range(1, 1 + h)), columns=self._columns) def forecast_cov(self, h): @@ -103,7 +103,7 @@ def forecast_std_err(self, h): DataFrame """ return DataFrame(self._forecast_std_err_raw(h), - index=range(1, 1 + h), columns=self._columns) + index=list(range(1, 1 + h)), columns=self._columns) @cache_readonly def granger_causality(self): @@ -135,13 +135,13 @@ def granger_causality(self): lagged_data = self._lagged_data[i].filter( self._columns - [col]) - for key, value in lagged_data.iteritems(): + for key, value in compat.iteritems(lagged_data): d[col][_make_param_name(i, key)] = value f_stat_dict = {} p_value_dict = {} - for col, y in self._data.iteritems(): + for col, y in compat.iteritems(self._data): ssr_full = (self.resid[col] ** 2).sum() f_stats = [] @@ -194,11 +194,11 @@ def ols_results(self): d = {} for i in range(1, 1 + self._p): - for col, series in self._lagged_data[i].iteritems(): + for col, series in compat.iteritems(self._lagged_data[i]): d[_make_param_name(i, col)] = series result = dict([(col, ols(y=y, x=d, intercept=self._intercept)) - for col, y in self._data.iteritems()]) + for col, y in compat.iteritems(self._data)]) return result @@ -214,7 +214,7 @@ def resid(self): DataFrame """ d = dict([(col, series.resid) - for (col, series) in self.ols_results.iteritems()]) + for (col, series) in compat.iteritems(self.ols_results)]) return DataFrame(d, index=self._index) @cache_readonly @@ -345,7 +345,7 @@ def _forecast_cov_beta_raw(self, n): for t in range(T + 1): index = t + p - y = values.take(range(index, index - p, -1), axis=0).ravel() + y = values.take(list(range(index, index - p, -1)), axis=0).ravel() trans_Z = np.hstack(([1], y)) trans_Z = trans_Z.reshape(1, len(trans_Z)) @@ -535,7 +535,7 @@ def forecast(self, h): Returns the forecasts at 1, 2, ..., n timesteps in the future. """ forecast = self._forecast_raw(h).T.swapaxes(1, 2) - index = range(1, 1 + h) + index = list(range(1, 1 + h)) w = Panel(forecast, items=self._data.items, major_axis=index, minor_axis=self._data.minor_axis) return w @@ -552,7 +552,7 @@ def resid(self): DataFrame """ d = dict([(key, value.resid) - for (key, value) in self.ols_results.iteritems()]) + for (key, value) in compat.iteritems(self.ols_results)]) return Panel.fromDict(d) def _data_xs(self, i): diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 1e04403b398a5..4c832f7850012 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -37,13 +37,13 @@ def test_ints(self): arr = np.random.randint(0, 100, size=50) result = algos.unique(arr) - self.assert_(isinstance(result, np.ndarray)) + tm.assert_isinstance(result, np.ndarray) def test_objects(self): arr = np.random.randint(0, 100, size=50).astype('O') result = algos.unique(arr) - self.assert_(isinstance(result, np.ndarray)) + tm.assert_isinstance(result, np.ndarray) def test_object_refcount_bug(self): lst = ['A', 'B', 'C', 'D', 'E'] diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 2b70fbcff8499..9bab218e7dfd5 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -95,7 +95,7 @@ def test_value_counts(self): arr = np.random.randn(4) factor = cut(arr, 4) - self.assert_(isinstance(factor, Categorical)) + tm.assert_isinstance(factor, Categorical) result = value_counts(factor) expected = value_counts(np.asarray(factor)) diff --git a/pandas/tests/test_config.py b/pandas/tests/test_config.py index a2b1ea43717cf..ed6f641cbcb2c 100644 --- a/pandas/tests/test_config.py +++ b/pandas/tests/test_config.py @@ -1,6 +1,5 @@ #!/usr/bin/python # -*- coding: utf-8 -*- -from __future__ import with_statement # support python 2.5 import pandas as pd import unittest import warnings diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index 4b7ca2c701611..50cf5a0d5bed8 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -292,7 +292,7 @@ def test_to_string_unicode_columns(self): buf.getvalue() result = self.frame.to_string() - self.assert_(isinstance(result, unicode)) + tm.assert_isinstance(result, six.text_type) def test_to_string_utf8_columns(self): n = six.u("\u05d0").encode('utf-8') diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index a3a799279f4fb..cd894ce2e7338 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -2,15 +2,19 @@ # pylint: disable-msg=W0612,E1101 from copy import deepcopy from datetime import datetime, timedelta, time -from pandas.util.py3compat import StringIO -from pandas.util.py3compat import range, long -from pandas.util import compat import cPickle as pickle import operator import re import unittest import nose +from pandas.util import py3compat +from pandas.util.py3compat import StringIO, range, long +from pandas.util.compat import OrderedDict +from pandas.util import compat +import six +from six.moves import map, zip + from numpy import random, nan from numpy.random import randn import numpy as np @@ -35,16 +39,11 @@ assertRaisesRegexp, makeCustomDataframe as mkdf, ensure_clean) -from pandas.util import py3compat -from pandas.util.compat import OrderedDict import pandas.util.testing as tm import pandas.lib as lib from numpy.testing.decorators import slow -import six -from six.moves import map -from six.moves import zip def _skip_if_no_scipy(): try: @@ -107,11 +106,11 @@ def test_getitem(self): # column access - for _, series in sl.iteritems(): + for _, series in compat.iteritems(sl): self.assertEqual(20, len(series.index)) self.assert_(tm.equalContents(series.index, sl.index)) - for key, _ in self.frame._series.iteritems(): + for key, _ in compat.iteritems(self.frame._series): self.assert_(self.frame[key] is not None) self.assert_('random' not in self.frame) @@ -1428,7 +1427,7 @@ def test_get_value(self): def test_iteritems(self): df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=['a', 'a', 'b']) - for k, v in df.iteritems(): + for k, v in compat.iteritems(df): self.assertEqual(type(v), Series) def test_lookup(self): @@ -1571,13 +1570,13 @@ def test_irow_icol_duplicates(self): result = df.irow(0) result2 = df.ix[0] - self.assert_(isinstance(result, Series)) + tm.assert_isinstance(result, Series) assert_almost_equal(result.values, df.values[0]) assert_series_equal(result, result2) result = df.T.icol(0) result2 = df.T.ix[:, 0] - self.assert_(isinstance(result, Series)) + tm.assert_isinstance(result, Series) assert_almost_equal(result.values, df.values[0]) assert_series_equal(result, result2) @@ -1637,7 +1636,7 @@ def test_nested_exception(self): _frame = DataFrame(_seriesd) _frame2 = DataFrame(_seriesd, columns=['D', 'C', 'B', 'A']) _intframe = DataFrame(dict((k, v.astype(int)) - for k, v in _seriesd.iteritems())) + for k, v in compat.iteritems(_seriesd))) _tsframe = DataFrame(_tsd) @@ -1783,7 +1782,7 @@ def setUp(self): self.frame2 = _frame2.copy() # force these all to int64 to avoid platform testing issues - self.intframe = DataFrame(dict([ (c,s) for c,s in _intframe.iteritems() ]), dtype = np.int64) + self.intframe = DataFrame(dict([ (c,s) for c,s in compat.iteritems(_intframe) ]), dtype = np.int64) self.tsframe = _tsframe.copy() self.mixed_frame = _mixed_frame.copy() self.mixed_float = DataFrame({ 'A': _frame['A'].copy().astype('float32'), @@ -1979,7 +1978,7 @@ def test_set_index_cast_datetimeindex(self): 'B': np.random.randn(1000)}) idf = df.set_index('A') - self.assert_(isinstance(idf.index, DatetimeIndex)) + tm.assert_isinstance(idf.index, DatetimeIndex) def test_set_index_multiindexcolumns(self): columns = MultiIndex.from_tuples([('foo', 1), ('foo', 2), ('bar', 1)]) @@ -2074,7 +2073,7 @@ def test_constructor_list_frames(self): self.assert_(result.shape == (1,0)) result = DataFrame([DataFrame(dict(A = list(range(5))))]) - self.assert_(type(result.iloc[0,0]) == DataFrame) + tm.assert_isinstance(result.iloc[0,0], DataFrame) def test_constructor_mixed_dtypes(self): @@ -2261,11 +2260,11 @@ def test_constructor_subclass_dict(self): data = {'col1': tm.TestSubDict((x, 10.0 * x) for x in range(10)), 'col2': tm.TestSubDict((x, 20.0 * x) for x in range(10))} df = DataFrame(data) - refdf = DataFrame(dict((col, dict(val.iteritems())) - for col, val in data.iteritems())) + refdf = DataFrame(dict((col, dict(compat.iteritems(val))) + for col, val in compat.iteritems(data))) assert_frame_equal(refdf, df) - data = tm.TestSubDict(data.iteritems()) + data = tm.TestSubDict(compat.iteritems(data)) df = DataFrame(data) assert_frame_equal(refdf, df) @@ -2273,7 +2272,7 @@ def test_constructor_subclass_dict(self): from collections import defaultdict data = {} self.frame['B'][:10] = np.nan - for k, v in self.frame.iteritems(): + for k, v in compat.iteritems(self.frame): dct = defaultdict(dict) dct.update(v.to_dict()) data[k] = dct @@ -2315,17 +2314,17 @@ def test_constructor_dict_cast(self): def test_constructor_dict_dont_upcast(self): d = {'Col1': {'Row1': 'A String', 'Row2': np.nan}} df = DataFrame(d) - self.assert_(isinstance(df['Col1']['Row2'], float)) + tm.assert_isinstance(df['Col1']['Row2'], float) dm = DataFrame([[1, 2], ['a', 'b']], index=[1, 2], columns=[1, 2]) - self.assert_(isinstance(dm[1][1], int)) + tm.assert_isinstance(dm[1][1], int) def test_constructor_dict_of_tuples(self): # GH #1491 data = {'a': (1, 2, 3), 'b': (4, 5, 6)} result = DataFrame(data) - expected = DataFrame(dict((k, list(v)) for k, v in data.iteritems())) + expected = DataFrame(dict((k, list(v)) for k, v in compat.iteritems(data))) assert_frame_equal(result, expected, check_dtype=False) def test_constructor_ndarray(self): @@ -2806,7 +2805,7 @@ def test_constructor_from_items(self): columns=self.mixed_frame.columns, orient='index') assert_frame_equal(recons, self.mixed_frame) - self.assert_(isinstance(recons['foo'][0], tuple)) + tm.assert_isinstance(recons['foo'][0], tuple) rs = DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])], orient='index', columns=['one', 'two', 'three']) @@ -3251,7 +3250,7 @@ def test_astype(self): # mixed casting def _check_cast(df, v): - self.assert_(list(set([ s.dtype.name for _, s in df.iteritems() ]))[0] == v) + self.assert_(list(set([ s.dtype.name for _, s in compat.iteritems(df) ]))[0] == v) mn = self.all_mixed._get_numeric_data().copy() mn['little_float'] = np.array(12345.,dtype='float16') @@ -3330,7 +3329,7 @@ def test_astype_cast_nan_int(self): def test_array_interface(self): result = np.sqrt(self.frame) - self.assert_(type(result) is type(self.frame)) + tm.assert_isinstance(result, type(self.frame)) self.assert_(result.index is self.frame.index) self.assert_(result.columns is self.frame.columns) @@ -3354,20 +3353,20 @@ def test_to_dict(self): } recons_data = DataFrame(test_data).to_dict() - for k, v in test_data.iteritems(): - for k2, v2 in v.iteritems(): + for k, v in compat.iteritems(test_data): + for k2, v2 in compat.iteritems(v): self.assertEqual(v2, recons_data[k][k2]) recons_data = DataFrame(test_data).to_dict("l") - for k, v in test_data.iteritems(): - for k2, v2 in v.iteritems(): + for k, v in compat.iteritems(test_data): + for k2, v2 in compat.iteritems(v): self.assertEqual(v2, recons_data[k][int(k2) - 1]) recons_data = DataFrame(test_data).to_dict("s") - for k, v in test_data.iteritems(): - for k2, v2 in v.iteritems(): + for k, v in compat.iteritems(test_data): + for k2, v2 in compat.iteritems(v): self.assertEqual(v2, recons_data[k][k2]) def test_to_records_dt64(self): @@ -3602,12 +3601,12 @@ def test_from_records_sequencelike(self): tuples = [] columns = [] dtypes = [] - for dtype, b in blocks.iteritems(): + for dtype, b in compat.iteritems(blocks): columns.extend(b.columns) dtypes.extend([ (c,np.dtype(dtype).descr[0][1]) for c in b.columns ]) for i in range(len(df.index)): tup = [] - for _, b in blocks.iteritems(): + for _, b in compat.iteritems(blocks): tup.extend(b.irow(i).values) tuples.append(tuple(tup)) @@ -3666,11 +3665,11 @@ def test_from_records_dictlike(self): # columns is in a different order here than the actual items iterated from the dict columns = [] - for dtype, b in df.blocks.iteritems(): + for dtype, b in compat.iteritems(df.blocks): columns.extend(b.columns) - asdict = dict((x, y) for x, y in df.iteritems()) - asdict2 = dict((x, y.values) for x, y in df.iteritems()) + asdict = dict((x, y) for x, y in compat.iteritems(df)) + asdict2 = dict((x, y.values) for x, y in compat.iteritems(df)) # dict of series & dict of ndarrays (have dtype info) results = [] @@ -3981,7 +3980,7 @@ def test_itertuples(self): 'ints': list(range(5))}, columns=['floats', 'ints']) for tup in df.itertuples(index=False): - self.assert_(isinstance(tup[1], np.integer)) + tm.assert_isinstance(tup[1], np.integer) df = DataFrame(data={"a": [1, 2, 3], "b": [4, 5, 6]}) dfaa = df[['a', 'a']] @@ -3997,16 +3996,16 @@ def test_operators(self): idSum = self.frame + self.frame seriesSum = self.frame + colSeries - for col, series in idSum.iteritems(): - for idx, val in series.iteritems(): + for col, series in compat.iteritems(idSum): + for idx, val in compat.iteritems(series): origVal = self.frame[col][idx] * 2 if not np.isnan(val): self.assertEqual(val, origVal) else: self.assert_(np.isnan(origVal)) - for col, series in seriesSum.iteritems(): - for idx, val in series.iteritems(): + for col, series in compat.iteritems(seriesSum): + for idx, val in compat.iteritems(series): origVal = self.frame[col][idx] + colSeries[col] if not np.isnan(val): self.assertEqual(val, origVal) @@ -4525,7 +4524,7 @@ def test_combineSeries(self): added = self.frame + series - for key, s in added.iteritems(): + for key, s in compat.iteritems(added): assert_series_equal(s, self.frame[key] + series[key]) larger_series = series.to_dict() @@ -4533,7 +4532,7 @@ def test_combineSeries(self): larger_series = Series(larger_series) larger_added = self.frame + larger_series - for key, s in self.frame.iteritems(): + for key, s in compat.iteritems(self.frame): assert_series_equal(larger_added[key], s + series[key]) self.assert_('E' in larger_added) self.assert_(np.isnan(larger_added['E']).all()) @@ -4564,7 +4563,7 @@ def test_combineSeries(self): ts = self.tsframe['A'] added = self.tsframe + ts - for key, col in self.tsframe.iteritems(): + for key, col in compat.iteritems(self.tsframe): assert_series_equal(added[key], col + ts) smaller_frame = self.tsframe[:-5] @@ -4596,7 +4595,7 @@ def test_combineFunc(self): # vs mix result = self.mixed_float * 2 - for c, s in result.iteritems(): + for c, s in compat.iteritems(result): self.assert_(np.array_equal(s.values, self.mixed_float[c].values * 2)) _check_mixed_float(result, dtype = dict(C = None)) @@ -4824,7 +4823,7 @@ def _do_test(df,path,r_dtype=None,c_dtype=None,rnlvl=None,cnlvl=None, recons = DataFrame.from_csv(path,header=0,parse_dates=False) def _to_uni(x): - if not isinstance(x,unicode): + if not isinstance(x, six.text_type): return x.decode('utf8') return x if dupe_col: @@ -5449,7 +5448,7 @@ def test_dtypes(self): self.mixed_frame['bool'] = self.mixed_frame['A'] > 0 result = self.mixed_frame.dtypes expected = Series(dict((k, v.dtype) - for k, v in self.mixed_frame.iteritems()), + for k, v in compat.iteritems(self.mixed_frame)), index=result.index) assert_series_equal(result, expected) @@ -5599,10 +5598,10 @@ def test_asfreq_datetimeindex(self): index=[datetime(2011, 11, 1), datetime(2011, 11, 2), datetime(2011, 11, 3)]) df = df.asfreq('B') - self.assert_(isinstance(df.index, DatetimeIndex)) + tm.assert_isinstance(df.index, DatetimeIndex) ts = df['A'].asfreq('B') - self.assert_(isinstance(ts.index, DatetimeIndex)) + tm.assert_isinstance(ts.index, DatetimeIndex) def test_at_time_between_time_datetimeindex(self): index = pan.date_range("2012-01-01", "2012-01-05", freq='30min') @@ -5700,7 +5699,7 @@ def test_deepcopy(self): cp = deepcopy(self.frame) series = cp['A'] series[:] = 10 - for idx, value in series.iteritems(): + for idx, value in compat.iteritems(series): self.assertNotEqual(self.frame['A'][idx], value) def test_copy(self): @@ -6158,8 +6157,7 @@ def test_drop_col_still_multiindex(self): ['', '', '', 'OD'], ['', '', '', 'wx']] - tuples = list(zip(*arrays)) - tuples.sort() + tuples = sorted(zip(*arrays)) index = MultiIndex.from_tuples(tuples) df = DataFrame(randn(3, 4), columns=index) @@ -7018,7 +7016,7 @@ def test_replace_input_formats(self): 'C': ['', 'asdf', 'fd']}) filled = df.replace(to_rep, values) expected = {} - for k, v in df.iteritems(): + for k, v in compat.iteritems(df): expected[k] = v.replace(to_rep[k], values[k]) assert_frame_equal(filled, DataFrame(expected)) @@ -7030,7 +7028,7 @@ def test_replace_input_formats(self): # dict to scalar filled = df.replace(to_rep, 0) expected = {} - for k, v in df.iteritems(): + for k, v in compat.iteritems(df): expected[k] = v.replace(to_rep[k], 0) assert_frame_equal(filled, DataFrame(expected)) @@ -7042,7 +7040,7 @@ def test_replace_input_formats(self): 'C': ['', 'asdf', 'fd']}) filled = df.replace(np.nan, values) expected = {} - for k, v in df.iteritems(): + for k, v in compat.iteritems(df): expected[k] = v.replace(np.nan, values[k]) assert_frame_equal(filled, DataFrame(expected)) @@ -7128,7 +7126,7 @@ def test_truncate_copy(self): def test_xs(self): idx = self.frame.index[5] xs = self.frame.xs(idx) - for item, value in xs.iteritems(): + for item, value in compat.iteritems(xs): if np.isnan(value): self.assert_(np.isnan(self.frame[item][idx])) else: @@ -7244,7 +7242,7 @@ def test_reindex(self): newFrame = self.frame.reindex(self.ts1.index) for col in newFrame.columns: - for idx, val in newFrame[col].iteritems(): + for idx, val in compat.iteritems(newFrame[col]): if idx in self.frame.index: if np.isnan(val): self.assert_(np.isnan(self.frame[col][idx])) @@ -7253,7 +7251,7 @@ def test_reindex(self): else: self.assert_(np.isnan(val)) - for col, series in newFrame.iteritems(): + for col, series in compat.iteritems(newFrame): self.assert_(tm.equalContents(series.index, newFrame.index)) emptyFrame = self.frame.reindex(Index([])) self.assert_(len(emptyFrame.index) == 0) @@ -7262,7 +7260,7 @@ def test_reindex(self): nonContigFrame = self.frame.reindex(self.ts1.index[::2]) for col in nonContigFrame.columns: - for idx, val in nonContigFrame[col].iteritems(): + for idx, val in compat.iteritems(nonContigFrame[col]): if idx in self.frame.index: if np.isnan(val): self.assert_(np.isnan(self.frame[col][idx])) @@ -7271,7 +7269,7 @@ def test_reindex(self): else: self.assert_(np.isnan(val)) - for col, series in nonContigFrame.iteritems(): + for col, series in compat.iteritems(nonContigFrame): self.assert_(tm.equalContents(series.index, nonContigFrame.index)) @@ -7552,13 +7550,13 @@ def _safe_add(df): # only add to the numeric items def is_ok(s): return issubclass(s.dtype.type, (np.integer,np.floating)) and s.dtype != 'uint8' - return DataFrame(dict([ (c,s+1) if is_ok(s) else (c,s) for c, s in df.iteritems() ])) + return DataFrame(dict([ (c,s+1) if is_ok(s) else (c,s) for c, s in compat.iteritems(df) ])) def _check_get(df, cond, check_dtypes = True): other1 = _safe_add(df) rs = df.where(cond, other1) rs2 = df.where(cond.values, other1) - for k, v in rs.iteritems(): + for k, v in compat.iteritems(rs): assert_series_equal(v, np.where(cond[k], df[k], other1[k])) assert_frame_equal(rs, rs2) @@ -7652,7 +7650,7 @@ def _check_set(df, cond, check_dtypes = True): # dtypes (and confirm upcasts)x if check_dtypes: - for k, v in df.dtypes.iteritems(): + for k, v in compat.iteritems(df.dtypes): if issubclass(v.type,np.integer) and not cond[k].all(): v = np.dtype('float64') self.assert_(dfi[k].dtype == v) @@ -7726,8 +7724,8 @@ def test_mask_edge_case_1xN_frame(self): def test_transpose(self): frame = self.frame dft = frame.T - for idx, series in dft.iteritems(): - for col, value in series.iteritems(): + for idx, series in compat.iteritems(dft): + for col, value in compat.iteritems(series): if np.isnan(value): self.assert_(np.isnan(frame[col][idx])) else: @@ -7738,7 +7736,7 @@ def test_transpose(self): mixed = DataFrame(data, index=index) mixed_T = mixed.T - for col, s in mixed_T.iteritems(): + for col, s in compat.iteritems(mixed_T): self.assert_(s.dtype == np.object_) def test_transpose_get_view(self): @@ -8045,7 +8043,7 @@ def test_apply_broadcast(self): broadcasted = self.frame.apply(np.mean, broadcast=True) agged = self.frame.apply(np.mean) - for col, ts in broadcasted.iteritems(): + for col, ts in compat.iteritems(broadcasted): self.assert_((ts == agged[col]).all()) broadcasted = self.frame.apply(np.mean, axis=1, broadcast=True) @@ -8102,10 +8100,10 @@ def _checkit(axis=0, raw=False): res = df.apply(f, axis=axis, raw=raw) if is_reduction: agg_axis = df._get_agg_axis(axis) - self.assert_(isinstance(res, Series)) + tm.assert_isinstance(res, Series) self.assert_(res.index is agg_axis) else: - self.assert_(isinstance(res, DataFrame)) + tm.assert_isinstance(res, DataFrame) _checkit() _checkit(axis=1) @@ -8118,7 +8116,7 @@ def _checkit(axis=0, raw=False): _check(no_index, lambda x: x.mean()) result = no_cols.apply(lambda x: x.mean(), broadcast=True) - self.assert_(isinstance(result, DataFrame)) + tm.assert_isinstance(result, DataFrame) def test_apply_with_args_kwds(self): def add_some(x, howmuch=0): @@ -8157,13 +8155,13 @@ def test_apply_differently_indexed(self): result0 = df.apply(Series.describe, axis=0) expected0 = DataFrame(dict((i, v.describe()) - for i, v in df.iteritems()), + for i, v in compat.iteritems(df)), columns=df.columns) assert_frame_equal(result0, expected0) result1 = df.apply(Series.describe, axis=1) expected1 = DataFrame(dict((i, v.describe()) - for i, v in df.T.iteritems()), + for i, v in compat.iteritems(df.T)), columns=df.index).T assert_frame_equal(result1, expected1) @@ -8254,7 +8252,7 @@ def test_apply_multi_index(self): s.index = MultiIndex.from_arrays([['a','a','b'], ['c','d','d']]) s.columns = ['col1','col2'] res = s.apply(lambda x: Series({'min': min(x), 'max': max(x)}), 1) - self.assert_(isinstance(res.index, MultiIndex)) + tm.assert_isinstance(res.index, MultiIndex) def test_applymap(self): applied = self.frame.applymap(lambda x: x * 2) @@ -8263,7 +8261,7 @@ def test_applymap(self): # GH #465, function returning tuples result = self.frame.applymap(lambda x: (x, x)) - self.assert_(isinstance(result['A'][0], tuple)) + tm.assert_isinstance(result['A'][0], tuple) # GH 2909, object conversion to float in constructor? df = DataFrame(data=[1,'a']) @@ -8550,7 +8548,7 @@ def test_frame_column_inplace_sort_exception(self): self.assertRaises(Exception, s.sort) cp = s.copy() - cp.sort() # it works! + cp.sort() # it works! def test_combine_first(self): # disjoint @@ -8960,10 +8958,10 @@ def test_count(self): # corner case frame = DataFrame() ct1 = frame.count(1) - self.assert_(isinstance(ct1, Series)) + tm.assert_isinstance(ct1, Series) ct2 = frame.count(0) - self.assert_(isinstance(ct2, Series)) + tm.assert_isinstance(ct2, Series) # GH #423 df = DataFrame(index=list(range(10))) @@ -9215,8 +9213,8 @@ def wrapper(x): def test_sum_corner(self): axis0 = self.empty.sum(0) axis1 = self.empty.sum(1) - self.assert_(isinstance(axis0, Series)) - self.assert_(isinstance(axis1, Series)) + tm.assert_isinstance(axis0, Series) + tm.assert_isinstance(axis1, Series) self.assertEquals(len(axis0), 0) self.assertEquals(len(axis1), 0) @@ -9492,7 +9490,7 @@ def test_describe_no_numeric(self): 'B': ['a', 'b', 'c', 'd'] * 6}) desc = df.describe() expected = DataFrame(dict((k, v.describe()) - for k, v in df.iteritems()), + for k, v in compat.iteritems(df)), columns=df.columns) assert_frame_equal(desc, expected) @@ -10310,12 +10308,12 @@ def test_take(self): assert_frame_equal(result, expected) def test_iterkv_names(self): - for k, v in self.mixed_frame.iteritems(): + for k, v in compat.iteritems(self.mixed_frame): self.assertEqual(v.name, k) def test_series_put_names(self): series = self.mixed_frame._series - for k, v in series.iteritems(): + for k, v in compat.iteritems(series): self.assertEqual(v.name, k) def test_dot(self): diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py index 53c169a7a6570..ebc00bb7cc779 100644 --- a/pandas/tests/test_graphics.py +++ b/pandas/tests/test_graphics.py @@ -425,7 +425,7 @@ def test_xcompat(self): pd.plot_params['x_compat'] = False ax = df.plot() lines = ax.get_lines() - self.assert_(isinstance(lines[0].get_xdata(), PeriodIndex)) + tm.assert_isinstance(lines[0].get_xdata(), PeriodIndex) plt.close('all') # useful if you're plotting a bunch together @@ -437,7 +437,7 @@ def test_xcompat(self): plt.close('all') ax = df.plot() lines = ax.get_lines() - self.assert_(isinstance(lines[0].get_xdata(), PeriodIndex)) + tm.assert_isinstance(lines[0].get_xdata(), PeriodIndex) def test_unsorted_index(self): df = DataFrame({'y': np.arange(100)}, diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 28756a1c079d5..a0ec25ab12a28 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -1,6 +1,7 @@ from __future__ import print_function from pandas.util.py3compat import range, long from pandas.util import compat +from six.moves import map, zip, builtins import nose import unittest @@ -26,8 +27,6 @@ import pandas.core.nanops as nanops import pandas.util.testing as tm -from six.moves import map -from six.moves import zip def commonSetUp(self): @@ -36,7 +35,7 @@ def commonSetUp(self): self.groupId = Series([x[0] for x in self.stringIndex], index=self.stringIndex) - self.groupDict = dict((k, v) for k, v in self.groupId.iteritems()) + self.groupDict = dict((k, v) for k, v in compat.iteritems(self.groupId)) self.columnIndex = Index(['A', 'B', 'C', 'D', 'E']) @@ -290,7 +289,7 @@ def func(dataf): return dataf["val2"] - dataf["val2"].mean() result = df1.groupby("val1", squeeze=True).apply(func) - self.assert_(isinstance(result,Series)) + tm.assert_isinstance(result,Series) df2 = DataFrame([{"val1": 1, "val2" : 20}, {"val1":1, "val2": 19}, {"val1":1, "val2": 27}, {"val1":1, "val2": 12}]) @@ -298,12 +297,12 @@ def func(dataf): return dataf["val2"] - dataf["val2"].mean() result = df2.groupby("val1", squeeze=True).apply(func) - self.assert_(isinstance(result,Series)) + tm.assert_isinstance(result,Series) # GH3596, return a consistent type (regression in 0.11 from 0.10.1) df = DataFrame([[1,1],[1,1]],columns=['X','Y']) result = df.groupby('X',squeeze=False).count() - self.assert_(isinstance(result,DataFrame)) + tm.assert_isinstance(result,DataFrame) def test_agg_regression1(self): grouped = self.tsframe.groupby([lambda x: x.year, lambda x: x.month]) @@ -340,7 +339,7 @@ def test_agg_period_index(self): prng = period_range('2012-1-1', freq='M', periods=3) df = DataFrame(np.random.randn(3, 2), index=prng) rs = df.groupby(level=0).sum() - self.assert_(isinstance(rs.index, PeriodIndex)) + tm.assert_isinstance(rs.index, PeriodIndex) # GH 3579 index = period_range(start='1999-01', periods=5, freq='M') @@ -433,13 +432,13 @@ def test_groups(self): groups = grouped.groups self.assert_(groups is grouped.groups) # caching works - for k, v in grouped.groups.iteritems(): + for k, v in compat.iteritems(grouped.groups): self.assert_((self.df.ix[v]['A'] == k).all()) grouped = self.df.groupby(['A', 'B']) groups = grouped.groups self.assert_(groups is grouped.groups) # caching works - for k, v in grouped.groups.iteritems(): + for k, v in compat.iteritems(grouped.groups): self.assert_((self.df.ix[v]['A'] == k[0]).all()) self.assert_((self.df.ix[v]['B'] == k[1]).all()) @@ -495,7 +494,7 @@ def test_aggregate_item_by_item(self): def aggfun(ser): return ser.size result = DataFrame().groupby(self.df.A).agg(aggfun) - self.assert_(isinstance(result, DataFrame)) + tm.assert_isinstance(result, DataFrame) self.assertEqual(len(result), 0) def test_agg_item_by_item_raise_typeerror(self): @@ -855,7 +854,7 @@ def test_frame_groupby(self): groups = grouped.groups indices = grouped.indices - for k, v in groups.iteritems(): + for k, v in compat.iteritems(groups): samething = self.tsframe.index.take(indices[k]) self.assertTrue((samething == v).all()) @@ -1046,7 +1045,7 @@ def _check_op(op): for n1, gp1 in data.groupby('A'): for n2, gp2 in gp1.groupby('B'): expected[n1][n2] = op(gp2.ix[:, ['C', 'D']]) - expected = dict((k, DataFrame(v)) for k, v in expected.iteritems()) + expected = dict((k, DataFrame(v)) for k, v in compat.iteritems(expected)) expected = Panel.fromDict(expected).swapaxes(0, 1) expected.major_axis.name, expected.minor_axis.name = 'A', 'B' @@ -1120,22 +1119,22 @@ def test_as_index_series_return_frame(self): result = grouped['C'].agg(np.sum) expected = grouped.agg(np.sum).ix[:, ['A', 'C']] - self.assert_(isinstance(result, DataFrame)) + tm.assert_isinstance(result, DataFrame) assert_frame_equal(result, expected) result2 = grouped2['C'].agg(np.sum) expected2 = grouped2.agg(np.sum).ix[:, ['A', 'B', 'C']] - self.assert_(isinstance(result2, DataFrame)) + tm.assert_isinstance(result2, DataFrame) assert_frame_equal(result2, expected2) result = grouped['C'].sum() expected = grouped.sum().ix[:, ['A', 'C']] - self.assert_(isinstance(result, DataFrame)) + tm.assert_isinstance(result, DataFrame) assert_frame_equal(result, expected) result2 = grouped2['C'].sum() expected2 = grouped2.sum().ix[:, ['A', 'B', 'C']] - self.assert_(isinstance(result2, DataFrame)) + tm.assert_isinstance(result2, DataFrame) assert_frame_equal(result2, expected2) # corner case @@ -1372,7 +1371,7 @@ def test_wrap_aggregated_output_multindex(self): keys = [np.array([0, 0, 1]), np.array([0, 0, 1])] agged = df.groupby(keys).agg(np.mean) - self.assert_(isinstance(agged.columns, MultiIndex)) + tm.assert_isinstance(agged.columns, MultiIndex) def aggfun(ser): if ser.name == ('foo', 'one'): @@ -1516,7 +1515,7 @@ def f(piece): grouped = ts.groupby(lambda x: x.month) result = grouped.apply(f) - self.assert_(isinstance(result, DataFrame)) + tm.assert_isinstance(result, DataFrame) self.assert_(result.index.equals(ts.index)) def test_apply_series_yield_constant(self): @@ -1892,11 +1891,11 @@ def convert_force_pure(x): result = grouped.agg(convert_fast) self.assert_(result.dtype == np.object_) - self.assert_(isinstance(result[0], Decimal)) + tm.assert_isinstance(result[0], Decimal) result = grouped.agg(convert_force_pure) self.assert_(result.dtype == np.object_) - self.assert_(isinstance(result[0], Decimal)) + tm.assert_isinstance(result[0], Decimal) def test_apply_with_mixed_dtype(self): # GH3480, apply with mixed dtype on axis=1 breaks in 0.11 @@ -2041,7 +2040,7 @@ def test_int64_overflow(self): tups = com._asarray_tuplesafe(tups) expected = df.groupby(tups).sum()['values'] - for k, v in expected.iteritems(): + for k, v in compat.iteritems(expected): self.assert_(left[k] == right[k[::-1]] == v) self.assert_(len(left) == len(right)) @@ -2076,12 +2075,11 @@ def test_groupby_sort_multi(self): _check_groupby(df, result, ['a', 'b'], 'd') def test_intercept_builtin_sum(self): - import __builtin__ s = Series([1., 2., np.nan, 3.]) grouped = s.groupby([0, 1, 2, 2]) - result = grouped.agg(__builtin__.sum) - result2 = grouped.apply(__builtin__.sum) + result = grouped.agg(builtins.sum) + result2 = grouped.apply(builtins.sum) expected = grouped.sum() assert_series_equal(result, expected) assert_series_equal(result2, expected) @@ -2189,7 +2187,7 @@ def g(group): result = self.df.groupby('A')['C'].apply(f) expected = self.df.groupby('A')['C'].apply(g) - self.assert_(isinstance(result, Series)) + tm.assert_isinstance(result, Series) assert_series_equal(result, expected) def test_getitem_list_of_columns(self): @@ -2378,7 +2376,7 @@ def test_groupby_groups_datetimeindex(self): # it works! groups = grouped.groups - self.assert_(isinstance(groups.keys()[0], datetime)) + tm.assert_isinstance(groups.keys()[0], datetime) def test_groupby_reindex_inside_function(self): from pandas.tseries.api import DatetimeIndex @@ -2681,7 +2679,7 @@ def _check_groupby(df, result, keys, field, f=lambda x: x.sum()): tups = list(map(tuple, df[keys].values)) tups = com._asarray_tuplesafe(tups) expected = f(df.groupby(tups)[field]) - for k, v in expected.iteritems(): + for k, v in compat.iteritems(expected): assert(result[k] == v) diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 2141a6fc9c8dd..d77c60ecb47d1 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -45,7 +45,7 @@ def test_hash_error(self): def test_new_axis(self): new_index = self.dateIndex[None, :] self.assert_(new_index.ndim == 2) - self.assert_(type(new_index) == np.ndarray) + tm.assert_isinstance(new_index, np.ndarray) def test_deepcopy(self): from copy import deepcopy @@ -77,7 +77,7 @@ def test_constructor(self): # copy arr = np.array(self.strIndex) index = Index(arr, copy=True, name='name') - self.assert_(isinstance(index, Index)) + tm.assert_isinstance(index, Index) self.assert_(index.name == 'name') assert_array_equal(arr, index) @@ -94,7 +94,7 @@ def test_index_ctor_infer_periodindex(self): xp = period_range('2012-1-1', freq='M', periods=3) rs = Index(xp) assert_array_equal(rs, xp) - self.assert_(isinstance(rs, PeriodIndex)) + tm.assert_isinstance(rs, PeriodIndex) def test_copy(self): i = Index([], name='Foo') @@ -142,7 +142,7 @@ def test_asof(self): self.assert_(self.dateIndex.asof(d + timedelta(1)) == d) d = self.dateIndex[0].to_datetime() - self.assert_(isinstance(self.dateIndex.asof(d), Timestamp)) + tm.assert_isinstance(self.dateIndex.asof(d), Timestamp) def test_argsort(self): result = self.strIndex.argsort() @@ -160,7 +160,7 @@ def _check(op): arr_result = op(arr, element) index_result = op(index, element) - self.assert_(isinstance(index_result, np.ndarray)) + tm.assert_isinstance(index_result, np.ndarray) self.assert_(not isinstance(index_result, Index)) self.assert_(np.array_equal(arr_result, index_result)) @@ -334,7 +334,7 @@ def testit(index): pickled = pickle.dumps(index) unpickled = pickle.loads(pickled) - self.assert_(isinstance(unpickled, Index)) + tm.assert_isinstance(unpickled, Index) self.assert_(np.array_equal(unpickled, index)) self.assertEquals(unpickled.name, index.name) @@ -600,11 +600,11 @@ def test_view(self): def test_coerce_list(self): # coerce things arr = Index([1, 2, 3, 4]) - self.assert_(type(arr) == Int64Index) + tm.assert_isinstance(arr, Int64Index) # but not if explicit dtype passed arr = Index([1, 2, 3, 4], dtype=object) - self.assert_(type(arr) == Index) + tm.assert_isinstance(arr, Index) def test_dtype(self): self.assert_(self.index.dtype == np.int64) @@ -655,7 +655,7 @@ def test_join_outer(self): eridx = np.array([-1, 3, 4, -1, 5, -1, 0, -1, -1, 1, -1, -1, -1, 2], dtype=np.int64) - self.assert_(isinstance(res, Int64Index)) + tm.assert_isinstance(res, Int64Index) self.assert_(res.equals(eres)) self.assert_(np.array_equal(lidx, elidx)) self.assert_(np.array_equal(ridx, eridx)) @@ -668,7 +668,7 @@ def test_join_outer(self): eridx = np.array([-1, 0, 1, -1, 2, -1, 3, -1, -1, 4, -1, -1, -1, 5], dtype=np.int64) - self.assert_(isinstance(res, Int64Index)) + tm.assert_isinstance(res, Int64Index) self.assert_(res.equals(eres)) self.assert_(np.array_equal(lidx, elidx)) self.assert_(np.array_equal(ridx, eridx)) @@ -691,7 +691,7 @@ def test_join_inner(self): elidx = np.array([1, 6]) eridx = np.array([4, 1]) - self.assert_(isinstance(res, Int64Index)) + tm.assert_isinstance(res, Int64Index) self.assert_(res.equals(eres)) self.assert_(np.array_equal(lidx, elidx)) self.assert_(np.array_equal(ridx, eridx)) @@ -704,7 +704,7 @@ def test_join_inner(self): self.assert_(res.equals(res2)) eridx = np.array([1, 4]) - self.assert_(isinstance(res, Int64Index)) + tm.assert_isinstance(res, Int64Index) self.assert_(res.equals(eres)) self.assert_(np.array_equal(lidx, elidx)) self.assert_(np.array_equal(ridx, eridx)) @@ -720,7 +720,7 @@ def test_join_left(self): eridx = np.array([-1, 4, -1, -1, -1, -1, 1, -1, -1, -1], dtype=np.int64) - self.assert_(isinstance(res, Int64Index)) + tm.assert_isinstance(res, Int64Index) self.assert_(res.equals(eres)) self.assert_(lidx is None) self.assert_(np.array_equal(ridx, eridx)) @@ -730,7 +730,7 @@ def test_join_left(self): return_indexers=True) eridx = np.array([-1, 1, -1, -1, -1, -1, 4, -1, -1, -1], dtype=np.int64) - self.assert_(isinstance(res, Int64Index)) + tm.assert_isinstance(res, Int64Index) self.assert_(res.equals(eres)) self.assert_(lidx is None) self.assert_(np.array_equal(ridx, eridx)) @@ -759,7 +759,7 @@ def test_join_right(self): elidx = np.array([-1, 6, -1, -1, 1, -1], dtype=np.int64) - self.assert_(isinstance(other, Int64Index)) + tm.assert_isinstance(other, Int64Index) self.assert_(res.equals(eres)) self.assert_(np.array_equal(lidx, elidx)) self.assert_(ridx is None) @@ -770,7 +770,7 @@ def test_join_right(self): eres = other_mono elidx = np.array([-1, 1, -1, -1, 6, -1], dtype=np.int64) - self.assert_(isinstance(other, Int64Index)) + tm.assert_isinstance(other, Int64Index) self.assert_(res.equals(eres)) self.assert_(np.array_equal(lidx, elidx)) self.assert_(ridx is None) @@ -947,7 +947,7 @@ def test_constructor_single_level(self): single_level = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']], labels=[[0, 1, 2, 3]], names=['first']) - self.assert_(isinstance(single_level, Index)) + tm.assert_isinstance(single_level, Index) self.assert_(not isinstance(single_level, MultiIndex)) self.assert_(single_level.name == 'first') @@ -1504,7 +1504,7 @@ def test_diff(self): sortorder=0, names=self.index.names) - self.assert_(isinstance(result, MultiIndex)) + tm.assert_isinstance(result, MultiIndex) self.assert_(result.equals(expected)) self.assertEqual(result.names, self.index.names) @@ -1725,16 +1725,16 @@ def _check_all(other): # some corner cases idx = Index(['three', 'one', 'two']) result = idx.join(self.index, level='second') - self.assert_(isinstance(result, MultiIndex)) + tm.assert_isinstance(result, MultiIndex) self.assertRaises(Exception, self.index.join, self.index, level=1) def test_reindex(self): result, indexer = self.index.reindex(list(self.index[:4])) - self.assert_(isinstance(result, MultiIndex)) + tm.assert_isinstance(result, MultiIndex) result, indexer = self.index.reindex(list(self.index)) - self.assert_(isinstance(result, MultiIndex)) + tm.assert_isinstance(result, MultiIndex) self.assert_(indexer is None) def test_reindex_level(self): diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index b72b1f3878597..b7297fc86e22e 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -211,7 +211,7 @@ def _print(result, error = None): # if we are in fails, the ok, otherwise raise it if fails is not None: - if fails == type(detail): + if isinstance(detail, fails): result = 'ok (%s)' % type(detail).__name__ _print(result) return diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index d152e6ed1c41d..5d3171365a56f 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -75,26 +75,26 @@ def test_dataframe_constructor(self): multi = DataFrame(np.random.randn(4, 4), index=[np.array(['a', 'a', 'b', 'b']), np.array(['x', 'y', 'x', 'y'])]) - self.assert_(isinstance(multi.index, MultiIndex)) + tm.assert_isinstance(multi.index, MultiIndex) self.assert_(not isinstance(multi.columns, MultiIndex)) multi = DataFrame(np.random.randn(4, 4), columns=[['a', 'a', 'b', 'b'], ['x', 'y', 'x', 'y']]) - self.assert_(isinstance(multi.columns, MultiIndex)) + tm.assert_isinstance(multi.columns, MultiIndex) def test_series_constructor(self): multi = Series(1., index=[np.array(['a', 'a', 'b', 'b']), np.array(['x', 'y', 'x', 'y'])]) - self.assert_(isinstance(multi.index, MultiIndex)) + tm.assert_isinstance(multi.index, MultiIndex) multi = Series(1., index=[['a', 'a', 'b', 'b'], ['x', 'y', 'x', 'y']]) - self.assert_(isinstance(multi.index, MultiIndex)) + tm.assert_isinstance(multi.index, MultiIndex) multi = Series(list(range(4)), index=[['a', 'a', 'b', 'b'], ['x', 'y', 'x', 'y']]) - self.assert_(isinstance(multi.index, MultiIndex)) + tm.assert_isinstance(multi.index, MultiIndex) def test_reindex_level(self): # axis=0 @@ -580,7 +580,7 @@ def test_setitem_change_dtype(self): s = dft['foo', 'two'] dft['foo', 'two'] = s > s.median() assert_series_equal(dft['foo', 'two'], s > s.median()) - self.assert_(isinstance(dft._data.blocks[1].items, MultiIndex)) + tm.assert_isinstance(dft._data.blocks[1].items, MultiIndex) reindexed = dft.reindex(columns=[('foo', 'two')]) assert_series_equal(reindexed['foo', 'two'], s > s.median()) @@ -676,12 +676,12 @@ def test_reset_index_with_drop(self): self.assertEquals(len(deleveled.columns), len(self.ymd.columns)) deleveled = self.series.reset_index() - self.assert_(isinstance(deleveled, DataFrame)) + tm.assert_isinstance(deleveled, DataFrame) self.assert_( len(deleveled.columns) == len(self.series.index.levels) + 1) deleveled = self.series.reset_index(drop=True) - self.assert_(isinstance(deleveled, Series)) + tm.assert_isinstance(deleveled, Series) def test_sortlevel_by_name(self): self.frame.index.names = ['first', 'second'] @@ -1095,7 +1095,7 @@ def test_reorder_levels(self): def test_insert_index(self): df = self.ymd[:5].T df[2000, 1, 10] = df[2000, 1, 7] - self.assert_(isinstance(df.columns, MultiIndex)) + tm.assert_isinstance(df.columns, MultiIndex) self.assert_((df[2000, 1, 10] == df[2000, 1, 7]).all()) def test_alignment(self): @@ -1499,8 +1499,7 @@ def test_mixed_depth_get(self): ['', 'OD', 'OD', 'result1', 'result2', 'result1'], ['', 'wx', 'wy', '', '', '']] - tuples = list(zip(*arrays)) - tuples.sort() + tuples = sorted(zip(*arrays)) index = MultiIndex.from_tuples(tuples) df = DataFrame(randn(4, 6), columns=index) @@ -1519,8 +1518,7 @@ def test_mixed_depth_insert(self): ['', 'OD', 'OD', 'result1', 'result2', 'result1'], ['', 'wx', 'wy', '', '', '']] - tuples = list(zip(*arrays)) - tuples.sort() + tuples = sorted(zip(*arrays)) index = MultiIndex.from_tuples(tuples) df = DataFrame(randn(4, 6), columns=index) @@ -1535,8 +1533,7 @@ def test_mixed_depth_drop(self): ['', 'OD', 'OD', 'result1', 'result2', 'result1'], ['', 'wx', 'wy', '', '', '']] - tuples = list(zip(*arrays)) - tuples.sort() + tuples = sorted(zip(*arrays)) index = MultiIndex.from_tuples(tuples) df = DataFrame(randn(4, 6), columns=index) @@ -1587,8 +1584,7 @@ def test_mixed_depth_pop(self): ['', 'OD', 'OD', 'result1', 'result2', 'result1'], ['', 'wx', 'wy', '', '', '']] - tuples = list(zip(*arrays)) - tuples.sort() + tuples = sorted(zip(*arrays)) index = MultiIndex.from_tuples(tuples) df = DataFrame(randn(4, 6), columns=index) @@ -1750,7 +1746,7 @@ def test_indexing_ambiguity_bug_1678(self): result = frame.ix[:, 1] exp = frame.icol(1) - self.assert_(isinstance(result, Series)) + tm.assert_isinstance(result, Series) assert_series_equal(result, exp) def test_nonunique_assignment_1750(self): diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index d8c45ed6599d0..69fae70bdf44e 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -273,10 +273,10 @@ def test_keys(self): def test_iteritems(self): # Test panel.iteritems(), aka panel.iteritems() # just test that it works - for k, v in self.panel.iteritems(): + for k, v in compat.iteritems(self.panel): pass - self.assertEqual(len(list(self.panel.iteritems())), + self.assertEqual(len(list(compat.iteritems(self.panel))), len(self.panel.items)) def test_combineFrame(self): @@ -731,7 +731,7 @@ def test_set_value(self): # resize res = self.panel.set_value('ItemE', 'foo', 'bar', 1.5) - self.assert_(isinstance(res, Panel)) + tm.assert_isinstance(res, Panel) self.assert_(res is not self.panel) self.assertEqual(res.get_value('ItemE', 'foo', 'bar'), 1.5) @@ -882,19 +882,19 @@ def test_ctor_dict(self): # cast dcasted = dict((k, v.reindex(wp.major_axis).fillna(0)) - for k, v in d.iteritems()) + for k, v in compat.iteritems(d)) result = Panel(dcasted, dtype=int) expected = Panel(dict((k, v.astype(int)) - for k, v in dcasted.iteritems())) + for k, v in compat.iteritems(dcasted))) assert_panel_equal(result, expected) result = Panel(dcasted, dtype=np.int32) expected = Panel(dict((k, v.astype(np.int32)) - for k, v in dcasted.iteritems())) + for k, v in compat.iteritems(dcasted))) assert_panel_equal(result, expected) def test_constructor_dict_mixed(self): - data = dict((k, v.values) for k, v in self.panel.iteritems()) + data = dict((k, v.values) for k, v in compat.iteritems(self.panel)) result = Panel(data) exp_major = Index(np.arange(len(self.panel.major_axis))) self.assert_(result.major_axis.equals(exp_major)) @@ -1284,7 +1284,7 @@ def test_shift(self): # negative numbers, #2164 result = self.panel.shift(-1) expected = Panel(dict((i, f.shift(-1)[:-1]) - for i, f in self.panel.iteritems())) + for i, f in compat.iteritems(self.panel))) assert_panel_equal(result, expected) def test_multiindex_get(self): @@ -1383,7 +1383,7 @@ def test_to_excel(self): except ImportError: raise nose.SkipTest - for item, df in self.panel.iteritems(): + for item, df in compat.iteritems(self.panel): recdf = reader.parse(str(item), index_col=0) assert_frame_equal(df, recdf) diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index 4119d2b5a0769..31f5bc64a94df 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -23,6 +23,7 @@ assert_series_equal, assert_almost_equal) import pandas.util.testing as tm +import pandas.util.compat as compat def add_nans(panel4d): @@ -221,7 +222,7 @@ def test_keys(self): def test_iteritems(self): """Test panel4d.iteritems()""" - self.assertEqual(len(list(self.panel4d.iteritems())), + self.assertEqual(len(list(compat.iteritems(self.panel4d))), len(self.panel4d.labels)) def test_combinePanel4d(self): @@ -534,7 +535,7 @@ def test_set_value(self): # resize res = self.panel4d.set_value('l4', 'ItemE', 'foo', 'bar', 1.5) - self.assert_(isinstance(res, Panel4D)) + tm.assert_isinstance(res, Panel4D) self.assert_(res is not self.panel4d) self.assertEqual(res.get_value('l4', 'ItemE', 'foo', 'bar'), 1.5) @@ -656,7 +657,7 @@ def test_ctor_dict(self): # assert_panel_equal(result, expected) def test_constructor_dict_mixed(self): - data = dict((k, v.values) for k, v in self.panel4d.iteritems()) + data = dict((k, v.values) for k, v in compat.iteritems(self.panel4d)) result = Panel4D(data) exp_major = Index(np.arange(len(self.panel4d.major_axis))) self.assert_(result.major_axis.equals(exp_major)) diff --git a/pandas/tests/test_rplot.py b/pandas/tests/test_rplot.py index 18f0c76b469a3..0dfae47dd9ce3 100644 --- a/pandas/tests/test_rplot.py +++ b/pandas/tests/test_rplot.py @@ -1,6 +1,7 @@ from pandas.util.py3compat import range import unittest import pandas.tools.rplot as rplot +import pandas.util.testing as tm from pandas import read_csv import os @@ -51,7 +52,7 @@ def test_make_aes1(self): self.assertTrue(aes['colour'] is None) self.assertTrue(aes['shape'] is None) self.assertTrue(aes['alpha'] is None) - self.assertTrue(type(aes) is dict) + self.assertTrue(isinstance(aes, dict)) def test_make_aes2(self): self.assertRaises(ValueError, rplot.make_aes, diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index f53b62474b811..84fbc4397b9cf 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -285,11 +285,11 @@ def setUp(self): def test_constructor(self): # Recognize TimeSeries - self.assert_(isinstance(self.ts, TimeSeries)) + tm.assert_isinstance(self.ts, TimeSeries) # Pass in Series derived = Series(self.ts) - self.assert_(isinstance(derived, TimeSeries)) + tm.assert_isinstance(derived, TimeSeries) self.assert_(tm.equalContents(derived.index, self.ts.index)) # Ensure new index is not created @@ -297,7 +297,7 @@ def test_constructor(self): # Pass in scalar scalar = Series(0.5) - self.assert_(isinstance(scalar, float)) + tm.assert_isinstance(scalar, float) # Mixed type Series mixed = Series(['hello', np.NaN], index=[0, 1]) @@ -428,7 +428,7 @@ def test_constructor_corner(self): df = tm.makeTimeDataFrame() objs = [df, df] s = Series(objs, index=[0, 1]) - self.assert_(isinstance(s, Series)) + tm.assert_isinstance(s, Series) def test_constructor_sanitize(self): s = Series(np.array([1., 1., 8.]), dtype='i8') @@ -516,7 +516,7 @@ def test_constructor_dict(self): def test_constructor_subclass_dict(self): data = tm.TestSubDict((x, 10.0 * x) for x in range(10)) series = Series(data) - refseries = Series(dict(data.iteritems())) + refseries = Series(dict(compat.iteritems(data))) assert_series_equal(refseries, series) def test_orderedDict_ctor(self): @@ -583,7 +583,7 @@ def test_setindex(self): # works series = self.series.copy() series.index = np.arange(len(series)) - self.assert_(isinstance(series.index, Index)) + tm.assert_isinstance(series.index, Index) def test_array_finalize(self): pass @@ -780,7 +780,7 @@ def test_getitem_setitem_integers(self): def test_getitem_box_float64(self): value = self.ts[5] - self.assert_(isinstance(value, np.float64)) + tm.assert_isinstance(value, np.float64) def test_getitem_ambiguous_keyerror(self): s = Series(list(range(10)), index=list(range(0, 20, 2))) @@ -1415,10 +1415,10 @@ def test_values(self): self.assert_(np.array_equal(self.ts, self.ts.values)) def test_iteritems(self): - for idx, val in self.series.iteritems(): + for idx, val in compat.iteritems(self.series): self.assertEqual(val, self.series[idx]) - for idx, val in self.ts.iteritems(): + for idx, val in compat.iteritems(self.ts): self.assertEqual(val, self.ts[idx]) def test_sum(self): @@ -1703,7 +1703,7 @@ def test_describe_none(self): def test_append(self): appendedSeries = self.series.append(self.objSeries) - for idx, value in appendedSeries.iteritems(): + for idx, value in compat.iteritems(appendedSeries): if idx in self.series.index: self.assertEqual(value, self.series[idx]) elif idx in self.objSeries.index: @@ -1907,7 +1907,7 @@ def test_operators_timedelta64(self): # scalar Timestamp on rhs maxa = df['A'].max() - self.assert_(isinstance(maxa,Timestamp)) + tm.assert_isinstance(maxa,Timestamp) resultb = df['A']- df['A'].max() self.assert_(resultb.dtype=='timedelta64[ns]') @@ -2038,7 +2038,7 @@ def test_timedelta64_functions(self): def test_sub_of_datetime_from_TimeSeries(self): from pandas.core import common as com from datetime import datetime - a = Timestamp(datetime(1993,01,07,13,30,00)) + a = Timestamp(datetime(1993,0o1,0o7,13,30,00)) b = datetime(1993, 6, 22, 13, 30) a = Series([a]) result = com._possibly_cast_to_timedelta(np.abs(a - b)) @@ -2875,7 +2875,7 @@ def test_clip(self): result = self.ts.clip(-0.5, 0.5) expected = np.clip(self.ts, -0.5, 0.5) assert_series_equal(result, expected) - self.assert_(isinstance(expected, Series)) + tm.assert_isinstance(expected, Series) def test_clip_types_and_nulls(self): @@ -3369,13 +3369,13 @@ def test_map(self): merged = target.map(source) - for k, v in merged.iteritems(): + for k, v in compat.iteritems(merged): self.assertEqual(v, source[target[k]]) # input could be a dict merged = target.map(source.to_dict()) - for k, v in merged.iteritems(): + for k, v in compat.iteritems(merged): self.assertEqual(v, source[target[k]]) # function @@ -3404,7 +3404,7 @@ def test_map_decimal(self): result = self.series.map(lambda x: Decimal(str(x))) self.assert_(result.dtype == np.object_) - self.assert_(isinstance(result[0], Decimal)) + tm.assert_isinstance(result[0], Decimal) def test_map_na_exclusion(self): s = Series([1.5, np.nan, 3, np.nan, 5]) @@ -3655,13 +3655,13 @@ def test_reindex(self): subIndex = self.series.index[10:20] subSeries = self.series.reindex(subIndex) - for idx, val in subSeries.iteritems(): + for idx, val in compat.iteritems(subSeries): self.assertEqual(val, self.series[idx]) subIndex2 = self.ts.index[10:20] subTS = self.ts.reindex(subIndex2) - for idx, val in subTS.iteritems(): + for idx, val in compat.iteritems(subTS): self.assertEqual(val, self.ts[idx]) stuffSeries = self.ts.reindex(subIndex) @@ -3670,7 +3670,7 @@ def test_reindex(self): # This is extremely important for the Cython code to not screw up nonContigIndex = self.ts.index[::2] subNonContig = self.ts.reindex(nonContigIndex) - for idx, val in subNonContig.iteritems(): + for idx, val in compat.iteritems(subNonContig): self.assertEqual(val, self.ts[idx]) self.assertRaises(ValueError, self.ts.reindex) @@ -4301,7 +4301,7 @@ def test_reset_index(self): rs = s.reset_index(level=[0, 2], drop=True) self.assert_(rs.index.equals(Index(index.get_level_values(1)))) - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) def test_set_index_makes_timeseries(self): idx = tm.makeDateIndex(10) @@ -4314,8 +4314,8 @@ def test_set_index_makes_timeseries(self): def test_timeseries_coercion(self): idx = tm.makeDateIndex(10000) ser = Series(np.random.randn(len(idx)), idx.astype(object)) - self.assert_(isinstance(ser, TimeSeries)) - self.assert_(isinstance(ser.index, DatetimeIndex)) + tm.assert_isinstance(ser, TimeSeries) + tm.assert_isinstance(ser.index, DatetimeIndex) def test_replace(self): N = 100 diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 9a1d3bc71a091..d54aedc43457e 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -36,14 +36,14 @@ def test_iter(self): for s in ds.str: # iter must yield a Series - self.assert_(isinstance(s, Series)) + tm.assert_isinstance(s, Series) # indices of each yielded Series should be equal to the index of # the original Series assert_array_equal(s.index, ds.index) for el in s: - # each element of the series is either a six.string_types or nan + # each element of the series is either a basestring/str or nan self.assert_(isinstance(el, six.string_types) or isnull(el)) # desired behavior is to iterate until everything would be nan on the @@ -142,7 +142,7 @@ def test_count(self): tm.assert_almost_equal(result, exp) result = Series(values).str.count('f[o]+') - self.assert_(isinstance(result, Series)) + tm.assert_isinstance(result, Series) tm.assert_almost_equal(result, exp) # mixed @@ -152,7 +152,7 @@ def test_count(self): tm.assert_almost_equal(rs, xp) rs = Series(mixed).str.count('a') - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode @@ -163,7 +163,7 @@ def test_count(self): tm.assert_almost_equal(result, exp) result = Series(values).str.count('f[o]+') - self.assert_(isinstance(result, Series)) + tm.assert_isinstance(result, Series) tm.assert_almost_equal(result, exp) def test_contains(self): @@ -187,7 +187,7 @@ def test_contains(self): tm.assert_almost_equal(rs, xp) rs = Series(mixed).str.contains('o') - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode @@ -227,7 +227,7 @@ def test_startswith(self): tm.assert_almost_equal(rs, xp) rs = Series(mixed).str.startswith('f') - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode @@ -255,7 +255,7 @@ def test_endswith(self): tm.assert_almost_equal(rs, xp) rs = Series(mixed).str.endswith('f') - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode @@ -307,7 +307,7 @@ def test_lower_upper(self): mixed = mixed.str.upper() rs = Series(mixed).str.lower() xp = ['a', NA, 'b', NA, NA, 'foo', NA, NA, NA] - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode @@ -337,7 +337,7 @@ def test_replace(self): rs = Series(mixed).str.replace('BAD[_]*', '') xp = ['a', NA, 'b', NA, NA, 'foo', NA, NA, NA] - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode @@ -375,7 +375,7 @@ def test_repeat(self): rs = Series(mixed).str.repeat(3) xp = ['aaa', NA, 'bbb', NA, NA, 'foofoofoo', NA, NA, NA] - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode @@ -405,7 +405,7 @@ def test_match(self): rs = Series(mixed).str.match('.*(BAD[_]+).*(BAD)') xp = [('BAD_', 'BAD'), NA, ('BAD_', 'BAD'), NA, NA, [], NA, NA, NA] - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode @@ -427,7 +427,7 @@ def test_join(self): rs = Series(mixed).str.split('_').str.join('_') xp = Series(['a_b', NA, 'asdf_cas_asdf', NA, NA, 'foo', NA, NA, NA]) - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode @@ -450,7 +450,7 @@ def test_len(self): rs = Series(mixed).str.len() xp = Series([3, NA, 13, NA, NA, 3, NA, NA, NA]) - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode @@ -475,7 +475,7 @@ def test_findall(self): rs = Series(mixed).str.findall('BAD[_]*') xp = Series([['BAD__', 'BAD'], NA, [], NA, NA, ['BAD'], NA, NA, NA]) - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode @@ -508,7 +508,7 @@ def test_pad(self): rs = Series(mixed).str.pad(5, side='left') xp = Series([' a', NA, ' b', NA, NA, ' ee', NA, NA, NA]) - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) mixed = Series(['a', NA, 'b', True, datetime.today(), @@ -517,7 +517,7 @@ def test_pad(self): rs = Series(mixed).str.pad(5, side='right') xp = Series(['a ', NA, 'b ', NA, NA, 'ee ', NA, NA, NA]) - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) mixed = Series(['a', NA, 'b', True, datetime.today(), @@ -526,7 +526,7 @@ def test_pad(self): rs = Series(mixed).str.pad(5, side='both') xp = Series([' a ', NA, ' b ', NA, NA, ' ee ', NA, NA, NA]) - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode @@ -563,7 +563,7 @@ def test_center(self): xp = Series([' a ', NA, ' b ', NA, NA, ' c ', ' eee ', NA, NA, NA]) - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode @@ -595,7 +595,7 @@ def test_split(self): xp = Series([['a', 'b', 'c'], NA, ['d', 'e', 'f'], NA, NA, NA, NA, NA]) - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode @@ -661,7 +661,7 @@ def test_slice(self): xp = Series(['foo', NA, 'bar', NA, NA, NA, NA, NA]) - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode @@ -699,21 +699,21 @@ def test_strip_lstrip_rstrip_mixed(self): xp = Series(['aa', NA, 'bb', NA, NA, NA, NA, NA]) - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) rs = Series(mixed).str.lstrip() xp = Series(['aa ', NA, 'bb \t\n', NA, NA, NA, NA, NA]) - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) rs = Series(mixed).str.rstrip() xp = Series([' aa', NA, ' bb', NA, NA, NA, NA, NA]) - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) def test_strip_lstrip_rstrip_unicode(self): @@ -782,7 +782,7 @@ def test_get(self): xp = Series(['b', NA, 'd', NA, NA, NA, NA, NA]) - self.assert_(isinstance(rs, Series)) + tm.assert_isinstance(rs, Series) tm.assert_almost_equal(rs, xp) # unicode diff --git a/pandas/tests/test_tests.py b/pandas/tests/test_tests.py index 89238187ce434..b52ab61f7be6b 100644 --- a/pandas/tests/test_tests.py +++ b/pandas/tests/test_tests.py @@ -1,6 +1,5 @@ #!/usr/bin/python # -*- coding: utf-8 -*- -from __future__ import with_statement # support python 2.5 import pandas as pd import unittest import warnings diff --git a/pandas/tools/pivot.py b/pandas/tools/pivot.py index bc1ebd3752cc3..f1d1ba322e38a 100644 --- a/pandas/tools/pivot.py +++ b/pandas/tools/pivot.py @@ -153,7 +153,7 @@ def pivot_table(data, values=None, rows=None, cols=None, aggfunc='mean', def _add_margins(table, data, values, rows=None, cols=None, aggfunc=np.mean): grand_margin = {} - for k, v in data[values].iteritems(): + for k, v in compat.iteritems(data[values]): try: if isinstance(aggfunc, six.string_types): grand_margin[k] = getattr(v, aggfunc)() diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py index 0e6235438f3d0..ea57fc752872a 100644 --- a/pandas/tools/tests/test_merge.py +++ b/pandas/tools/tests/test_merge.py @@ -1,8 +1,5 @@ # pylint: disable=E1103 -from pandas.util.py3compat import range -from six.moves import zip -from pandas.util import compat import nose import unittest @@ -12,12 +9,15 @@ import numpy as np import random -from pandas import * +from pandas.util.py3compat import range +from six.moves import zip +from pandas.util import compat from pandas.tseries.index import DatetimeIndex from pandas.tools.merge import merge, concat, ordered_merge, MergeError from pandas.util.testing import (assert_frame_equal, assert_series_equal, assert_almost_equal, rands, makeCustomDataframe as mkdf) +from pandas import isnull, DataFrame, Index, MultiIndex, Panel, Series, date_range import pandas.algos as algos import pandas.util.testing as tm @@ -1025,7 +1025,7 @@ def _join_by_hand(a, b, how='left'): result_columns = a.columns.append(b.columns) - for col, s in b_re.iteritems(): + for col, s in compat.iteritems(b_re): a_re[col] = s return a_re.reindex(columns=result_columns) @@ -1472,7 +1472,7 @@ def test_panel_join_many(self): data_dict = {} for p in panels: - data_dict.update(p.iteritems()) + data_dict.update(compat.iteritems(p)) joined = panels[0].join(panels[1:], how='inner') expected = Panel.from_dict(data_dict, intersect=True) @@ -1766,6 +1766,5 @@ def test_multigroup(self): self.assert_(result['group'].notnull().all()) if __name__ == '__main__': - import nose nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], exit=False) diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tools/tests/test_pivot.py index 11a9fef9a0b53..0847152917716 100644 --- a/pandas/tools/tests/test_pivot.py +++ b/pandas/tools/tests/test_pivot.py @@ -210,7 +210,7 @@ def _check_output(res, col, rows=['A', 'B'], cols=['C']): # no rows rtable = self.data.pivot_table(cols=['AA', 'BB'], margins=True, aggfunc=np.mean) - self.assert_(isinstance(rtable, Series)) + tm.assert_isinstance(rtable, Series) for item in ['DD', 'EE', 'FF']: gmarg = table[item]['All', ''] self.assertEqual(gmarg, self.data[item].mean()) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 20caf150cb55d..3157c694bdc2b 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -376,12 +376,12 @@ def get_period_alias(offset_str): # Note that _rule_aliases is not 1:1 (d[BA]==d[A@DEC]), and so traversal # order matters when constructing an inverse. we pick one. #2331 _legacy_reverse_map = dict((v, k) for k, v in - reversed(sorted(_rule_aliases.iteritems()))) + reversed(sorted(compat.iteritems(_rule_aliases)))) # for helping out with pretty-printing and name-lookups _offset_names = {} -for name, offset in _offset_map.iteritems(): +for name, offset in compat.iteritems(_offset_map): if offset is None: continue offset.name = name @@ -614,7 +614,7 @@ def get_standard_freq(freq): } _reverse_period_code_map = {} -for _k, _v in _period_code_map.iteritems(): +for _k, _v in compat.iteritems(_period_code_map): _reverse_period_code_map[_v] = _k # Additional aliases diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index ce63fa7db6728..3bcf93464e0c1 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -102,7 +102,7 @@ def _should_cache(self): return self.isAnchored() and self._cacheable def _params(self): - attrs = [(k, v) for k, v in vars(self).iteritems() + attrs = [(k, v) for k, v in compat.iteritems(vars(self)) if k not in ['kwds', '_offset', 'name', 'normalize', 'busdaycalendar']] attrs.extend(self.kwds.items()) diff --git a/pandas/tseries/tests/test_daterange.py b/pandas/tseries/tests/test_daterange.py index 4f4df38af1e89..b9f5e73150623 100644 --- a/pandas/tseries/tests/test_daterange.py +++ b/pandas/tseries/tests/test_daterange.py @@ -16,6 +16,7 @@ import pandas.core.datetools as datetools from pandas.util.testing import assertRaisesRegexp +import pandas.util.testing as tm def _skip_if_no_pytz(): @@ -147,7 +148,7 @@ def test_getitem(self): fancy_indexed = self.rng[[4, 3, 2, 1, 0]] self.assertEquals(len(fancy_indexed), 5) - self.assert_(isinstance(fancy_indexed, DatetimeIndex)) + tm.assert_isinstance(fancy_indexed, DatetimeIndex) self.assert_(fancy_indexed.freq is None) # 32-bit vs. 64-bit platforms @@ -187,21 +188,21 @@ def test_union(self): right = self.rng[5:10] the_union = left.union(right) - self.assert_(isinstance(the_union, DatetimeIndex)) + tm.assert_isinstance(the_union, DatetimeIndex) # non-overlapping, gap in middle left = self.rng[:5] right = self.rng[10:] the_union = left.union(right) - self.assert_(isinstance(the_union, Index)) + tm.assert_isinstance(the_union, Index) # non-overlapping, no gap left = self.rng[:5] right = self.rng[5:10] the_union = left.union(right) - self.assert_(isinstance(the_union, DatetimeIndex)) + tm.assert_isinstance(the_union, DatetimeIndex) # order does not matter self.assert_(np.array_equal(right.union(left), the_union)) @@ -210,7 +211,7 @@ def test_union(self): rng = date_range(START, END, freq=datetools.bmonthEnd) the_union = self.rng.union(rng) - self.assert_(isinstance(the_union, DatetimeIndex)) + tm.assert_isinstance(the_union, DatetimeIndex) def test_outer_join(self): # should just behave as union @@ -220,14 +221,14 @@ def test_outer_join(self): right = self.rng[5:10] the_join = left.join(right, how='outer') - self.assert_(isinstance(the_join, DatetimeIndex)) + tm.assert_isinstance(the_join, DatetimeIndex) # non-overlapping, gap in middle left = self.rng[:5] right = self.rng[10:] the_join = left.join(right, how='outer') - self.assert_(isinstance(the_join, DatetimeIndex)) + tm.assert_isinstance(the_join, DatetimeIndex) self.assert_(the_join.freq is None) # non-overlapping, no gap @@ -235,13 +236,13 @@ def test_outer_join(self): right = self.rng[5:10] the_join = left.join(right, how='outer') - self.assert_(isinstance(the_join, DatetimeIndex)) + tm.assert_isinstance(the_join, DatetimeIndex) # overlapping, but different offset rng = date_range(START, END, freq=datetools.bmonthEnd) the_join = self.rng.join(rng, how='outer') - self.assert_(isinstance(the_join, DatetimeIndex)) + tm.assert_isinstance(the_join, DatetimeIndex) self.assert_(the_join.freq is None) def test_union_not_cacheable(self): @@ -264,7 +265,7 @@ def test_intersection(self): the_int = rng1.intersection(rng2) expected = rng[10:25] self.assert_(the_int.equals(expected)) - self.assert_(isinstance(the_int, DatetimeIndex)) + tm.assert_isinstance(the_int, DatetimeIndex) self.assert_(the_int.offset == rng.offset) the_int = rng1.intersection(rng2.view(DatetimeIndex)) @@ -322,7 +323,7 @@ def test_daterange_bug_456(self): rng2.offset = datetools.BDay() result = rng1.union(rng2) - self.assert_(isinstance(result, DatetimeIndex)) + tm.assert_isinstance(result, DatetimeIndex) def test_error_with_zero_monthends(self): self.assertRaises(ValueError, date_range, '1/1/2000', '1/1/2001', @@ -367,13 +368,13 @@ def test_month_range_union_tz(self): early_start = datetime(2011, 1, 1) early_end = datetime(2011, 3, 1) - + late_start = datetime(2011, 3, 1) late_end = datetime(2011, 5, 1) early_dr = date_range(start=early_start, end=early_end, tz=tz, freq=datetools.monthEnd) late_dr = date_range(start=late_start, end=late_end, tz=tz, freq=datetools.monthEnd) - + early_dr.union(late_dr) @@ -435,7 +436,7 @@ def test_getitem(self): fancy_indexed = self.rng[[4, 3, 2, 1, 0]] self.assertEquals(len(fancy_indexed), 5) - self.assert_(isinstance(fancy_indexed, DatetimeIndex)) + tm.assert_isinstance(fancy_indexed, DatetimeIndex) self.assert_(fancy_indexed.freq is None) # 32-bit vs. 64-bit platforms @@ -475,21 +476,21 @@ def test_union(self): right = self.rng[5:10] the_union = left.union(right) - self.assert_(isinstance(the_union, DatetimeIndex)) + tm.assert_isinstance(the_union, DatetimeIndex) # non-overlapping, gap in middle left = self.rng[:5] right = self.rng[10:] the_union = left.union(right) - self.assert_(isinstance(the_union, Index)) + tm.assert_isinstance(the_union, Index) # non-overlapping, no gap left = self.rng[:5] right = self.rng[5:10] the_union = left.union(right) - self.assert_(isinstance(the_union, DatetimeIndex)) + tm.assert_isinstance(the_union, DatetimeIndex) # order does not matter self.assert_(np.array_equal(right.union(left), the_union)) @@ -498,7 +499,7 @@ def test_union(self): rng = date_range(START, END, freq=datetools.bmonthEnd) the_union = self.rng.union(rng) - self.assert_(isinstance(the_union, DatetimeIndex)) + tm.assert_isinstance(the_union, DatetimeIndex) def test_outer_join(self): # should just behave as union @@ -508,14 +509,14 @@ def test_outer_join(self): right = self.rng[5:10] the_join = left.join(right, how='outer') - self.assert_(isinstance(the_join, DatetimeIndex)) + tm.assert_isinstance(the_join, DatetimeIndex) # non-overlapping, gap in middle left = self.rng[:5] right = self.rng[10:] the_join = left.join(right, how='outer') - self.assert_(isinstance(the_join, DatetimeIndex)) + tm.assert_isinstance(the_join, DatetimeIndex) self.assert_(the_join.freq is None) # non-overlapping, no gap @@ -523,13 +524,13 @@ def test_outer_join(self): right = self.rng[5:10] the_join = left.join(right, how='outer') - self.assert_(isinstance(the_join, DatetimeIndex)) + tm.assert_isinstance(the_join, DatetimeIndex) # overlapping, but different offset rng = date_range(START, END, freq=datetools.bmonthEnd) the_join = self.rng.join(rng, how='outer') - self.assert_(isinstance(the_join, DatetimeIndex)) + tm.assert_isinstance(the_join, DatetimeIndex) self.assert_(the_join.freq is None) def test_intersection_bug(self): @@ -579,7 +580,7 @@ def test_daterange_bug_456(self): rng2.offset = datetools.CDay() result = rng1.union(rng2) - self.assert_(isinstance(result, DatetimeIndex)) + tm.assert_isinstance(result, DatetimeIndex) def test_cdaterange(self): rng = cdate_range('2013-05-01', periods=3) diff --git a/pandas/tseries/tests/test_offsets.py b/pandas/tseries/tests/test_offsets.py index 9cc7383ed7d0b..3f4960520104f 100644 --- a/pandas/tseries/tests/test_offsets.py +++ b/pandas/tseries/tests/test_offsets.py @@ -24,6 +24,7 @@ from pandas.tslib import monthrange from pandas.lib import Timestamp from pandas.util.testing import assertRaisesRegexp +import pandas.util.testing as tm _multiprocess_can_split_ = True @@ -77,7 +78,7 @@ def test_normalize_date(): def test_to_m8(): valb = datetime(2007, 10, 1) valu = _to_m8(valb) - assert type(valu) == np.datetime64 + tm.assert_isinstance(valu, np.datetime64) # assert valu == np.datetime64(datetime(2007,10,1)) # def test_datetime64_box(): @@ -272,7 +273,7 @@ def test_apply(self): datetime(2008, 1, 7): datetime(2008, 1, 7)})) for offset, cases in tests: - for base, expected in cases.iteritems(): + for base, expected in compat.iteritems(cases): assertEq(offset, base, expected) def test_apply_large_n(self): @@ -447,7 +448,7 @@ def test_apply(self): datetime(2008, 1, 7): datetime(2008, 1, 7)})) for offset, cases in tests: - for base, expected in cases.iteritems(): + for base, expected in compat.iteritems(cases): assertEq(offset, base, expected) def test_apply_large_n(self): @@ -564,7 +565,7 @@ def test_offset(self): datetime(2010, 4, 5): datetime(2010, 3, 23)})) for offset, cases in tests: - for base, expected in cases.iteritems(): + for base, expected in compat.iteritems(cases): assertEq(offset, base, expected) def test_onOffset(self): @@ -703,7 +704,7 @@ def test_offset(self): datetime(2007, 1, 1): datetime(2006, 12, 1)})) for offset, cases in tests: - for base, expected in cases.iteritems(): + for base, expected in compat.iteritems(cases): assertEq(offset, base, expected) def test_onOffset(self): @@ -760,7 +761,7 @@ def test_offset(self): datetime(2007, 1, 1): datetime(2006, 12, 29)})) for offset, cases in tests: - for base, expected in cases.iteritems(): + for base, expected in compat.iteritems(cases): assertEq(offset, base, expected) def test_normalize(self): @@ -821,7 +822,7 @@ def test_offset(self): datetime(2006, 1, 2): datetime(2006, 1, 1)})) for offset, cases in tests: - for base, expected in cases.iteritems(): + for base, expected in compat.iteritems(cases): assertEq(offset, base, expected) @@ -862,7 +863,7 @@ def test_offset(self): datetime(2007, 1, 1): datetime(2006, 12, 31)})) for offset, cases in tests: - for base, expected in cases.iteritems(): + for base, expected in compat.iteritems(cases): assertEq(offset, base, expected) # def test_day_of_month(self): @@ -969,7 +970,7 @@ def test_offset(self): datetime(2008, 4, 30): datetime(2008, 10, 1), })) for offset, cases in tests: - for base, expected in cases.iteritems(): + for base, expected in compat.iteritems(cases): assertEq(offset, base, expected) # corner @@ -1037,7 +1038,7 @@ def test_offset(self): datetime(2008, 4, 30): datetime(2008, 10, 31), })) for offset, cases in tests: - for base, expected in cases.iteritems(): + for base, expected in compat.iteritems(cases): assertEq(offset, base, expected) # corner @@ -1141,7 +1142,7 @@ def test_offset(self): datetime(2008, 4, 1): datetime(2008, 10, 1), })) for offset, cases in tests: - for base, expected in cases.iteritems(): + for base, expected in compat.iteritems(cases): assertEq(offset, base, expected) # corner @@ -1210,7 +1211,7 @@ def test_offset(self): datetime(2008, 4, 30): datetime(2008, 10, 31), })) for offset, cases in tests: - for base, expected in cases.iteritems(): + for base, expected in compat.iteritems(cases): assertEq(offset, base, expected) # corner @@ -1324,7 +1325,7 @@ def test_offset(self): datetime(2008, 12, 31): datetime(2007, 1, 1), })) for offset, cases in tests: - for base, expected in cases.iteritems(): + for base, expected in compat.iteritems(cases): assertEq(offset, base, expected) @@ -1384,7 +1385,7 @@ def test_offset(self): datetime(2012, 1, 31): datetime(2011, 4, 1), })) for offset, cases in tests: - for base, expected in cases.iteritems(): + for base, expected in compat.iteritems(cases): assertEq(offset, base, expected) def test_onOffset(self): @@ -1420,7 +1421,7 @@ def test_offset(self): )) for offset, cases in tests: - for base, expected in cases.iteritems(): + for base, expected in compat.iteritems(cases): self.assertEqual(base + offset, expected) def test_roll(self): @@ -1473,7 +1474,7 @@ def test_offset(self): datetime(2008, 12, 31): datetime(2006, 12, 29), })) for offset, cases in tests: - for base, expected in cases.iteritems(): + for base, expected in compat.iteritems(cases): assertEq(offset, base, expected) def test_onOffset(self): @@ -1524,7 +1525,7 @@ def test_offset(self): datetime(2008, 12, 31): datetime(2006, 12, 31), })) for offset, cases in tests: - for base, expected in cases.iteritems(): + for base, expected in compat.iteritems(cases): assertEq(offset, base, expected) def test_onOffset(self): @@ -1573,7 +1574,7 @@ def test_offset(self): datetime(2008, 3, 31): datetime(2006, 3, 31), })) for offset, cases in tests: - for base, expected in cases.iteritems(): + for base, expected in compat.iteritems(cases): assertEq(offset, base, expected) def test_onOffset(self): @@ -1733,7 +1734,7 @@ def setUp(self): def test_alias_equality(self): from pandas.tseries.frequencies import _offset_map - for k, v in _offset_map.iteritems(): + for k, v in compat.iteritems(_offset_map): if v is None: continue self.assertEqual(k, v.copy()) diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index 8058d12029273..053ff8af2f280 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -1064,7 +1064,7 @@ def setUp(self): def test_make_time_series(self): index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') series = Series(1, index=index) - self.assert_(isinstance(series, TimeSeries)) + tm.assert_isinstance(series, TimeSeries) def test_astype(self): idx = period_range('1990', '2009', freq='A') @@ -1181,7 +1181,7 @@ def test_getitem_ndim2(self): result = idx[:, None] # MPL kludge - self.assert_(type(result) == PeriodIndex) + tm.assert_isinstance(result, PeriodIndex) def test_getitem_partial(self): rng = period_range('2007-01', periods=50, freq='M') @@ -1238,7 +1238,7 @@ def test_periods_number_check(self): def test_tolist(self): index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') rs = index.tolist() - [self.assert_(isinstance(x, Period)) for x in rs] + [tm.assert_isinstance(x, Period) for x in rs] recon = PeriodIndex(rs) self.assert_(index.equals(recon)) @@ -1335,7 +1335,7 @@ def test_frame_setitem(self): self.assert_(rs.equals(rng)) rs = df.reset_index().set_index('index') - self.assert_(isinstance(rs.index, PeriodIndex)) + tm.assert_isinstance(rs.index, PeriodIndex) self.assert_(rs.index.equals(rng)) def test_nested_dict_frame_constructor(self): @@ -1835,7 +1835,7 @@ def test_iteration(self): index = PeriodIndex(start='1/1/10', periods=4, freq='B') result = list(index) - self.assert_(isinstance(result[0], Period)) + tm.assert_isinstance(result[0], Period) self.assert_(result[0].freq == index.freq) def test_take(self): @@ -1843,9 +1843,9 @@ def test_take(self): taken = index.take([5, 6, 8, 12]) taken2 = index[[5, 6, 8, 12]] - self.assert_(isinstance(taken, PeriodIndex)) + tm.assert_isinstance(taken, PeriodIndex) self.assert_(taken.freq == index.freq) - self.assert_(isinstance(taken2, PeriodIndex)) + tm.assert_isinstance(taken2, PeriodIndex) self.assert_(taken2.freq == index.freq) def test_joins(self): @@ -1854,7 +1854,7 @@ def test_joins(self): for kind in ['inner', 'outer', 'left', 'right']: joined = index.join(index[:-5], how=kind) - self.assert_(isinstance(joined, PeriodIndex)) + tm.assert_isinstance(joined, PeriodIndex) self.assert_(joined.freq == index.freq) def test_align_series(self): @@ -2008,7 +2008,7 @@ def test_map_with_string_constructor(self): res = index.map(t) # should return an array - self.assert_(isinstance(res, np.ndarray)) + tm.assert_isinstance(res, np.ndarray) # preserve element types self.assert_(all(isinstance(resi, t) for resi in res)) @@ -2024,7 +2024,7 @@ def test_convert_array_of_periods(self): periods = list(rng) result = pd.Index(periods) - self.assert_(isinstance(result, PeriodIndex)) + tm.assert_isinstance(result, PeriodIndex) def test_with_multi_index(self): # #1705 @@ -2033,9 +2033,9 @@ def test_with_multi_index(self): s = Series([0, 1, 2, 3], index_as_arrays) - self.assert_(isinstance(s.index.levels[0], PeriodIndex)) + tm.assert_isinstance(s.index.levels[0], PeriodIndex) - self.assert_(isinstance(s.index.values[0][0], Period)) + tm.assert_isinstance(s.index.values[0][0], Period) def test_to_datetime_1703(self): index = period_range('1/1/2012', periods=4, freq='D') @@ -2066,7 +2066,7 @@ def test_append_concat(self): # drops index result = pd.concat([s1, s2]) - self.assert_(isinstance(result.index, PeriodIndex)) + tm.assert_isinstance(result.index, PeriodIndex) self.assertEquals(result.index[0], s1.index[0]) def test_pickle_freq(self): diff --git a/pandas/tseries/tests/test_plotting.py b/pandas/tseries/tests/test_plotting.py index 2bb70e6ef3c76..f6242139e9e93 100644 --- a/pandas/tseries/tests/test_plotting.py +++ b/pandas/tseries/tests/test_plotting.py @@ -484,7 +484,7 @@ def test_gaps(self): self.assert_(len(lines) == 1) l = lines[0] data = l.get_xydata() - self.assert_(isinstance(data, np.ma.core.MaskedArray)) + tm.assert_isinstance(data, np.ma.core.MaskedArray) mask = data.mask self.assert_(mask[5:25, 1].all()) @@ -498,7 +498,7 @@ def test_gaps(self): self.assert_(len(lines) == 1) l = lines[0] data = l.get_xydata() - self.assert_(isinstance(data, np.ma.core.MaskedArray)) + tm.assert_isinstance(data, np.ma.core.MaskedArray) mask = data.mask self.assert_(mask[2:5, 1].all()) @@ -512,7 +512,7 @@ def test_gaps(self): self.assert_(len(lines) == 1) l = lines[0] data = l.get_xydata() - self.assert_(isinstance(data, np.ma.core.MaskedArray)) + tm.assert_isinstance(data, np.ma.core.MaskedArray) mask = data.mask self.assert_(mask[2:5, 1].all()) @@ -532,7 +532,7 @@ def test_gap_upsample(self): self.assert_(len(ax.right_ax.get_lines()) == 1) l = lines[0] data = l.get_xydata() - self.assert_(isinstance(data, np.ma.core.MaskedArray)) + tm.assert_isinstance(data, np.ma.core.MaskedArray) mask = data.mask self.assert_(mask[5:25, 1].all()) diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index 22e103e54c85b..b5e6d9de436a1 100644 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -268,7 +268,7 @@ def test_resample_reresample(self): bs = s.resample('B', closed='right', label='right') result = bs.resample('8H') self.assertEquals(len(result), 22) - self.assert_(isinstance(result.index.freq, offsets.DateOffset)) + tm.assert_isinstance(result.index.freq, offsets.DateOffset) self.assert_(result.index.freq == offsets.Hour(8)) def test_resample_timestamp_to_period(self): @@ -537,7 +537,7 @@ def test_upsample_apply_functions(self): ts = Series(np.random.randn(len(rng)), index=rng) result = ts.resample('20min', how=['mean', 'sum']) - self.assert_(isinstance(result, DataFrame)) + tm.assert_isinstance(result, DataFrame) def test_resample_not_monotonic(self): rng = pd.date_range('2012-06-12', periods=200, freq='h') diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index 4b87dd29518f8..0336f659f5dd6 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -65,8 +65,8 @@ def setUp(self): self.dups = Series(np.random.randn(len(dates)), index=dates) def test_constructor(self): - self.assert_(isinstance(self.dups, TimeSeries)) - self.assert_(isinstance(self.dups.index, DatetimeIndex)) + tm.assert_isinstance(self.dups, TimeSeries) + tm.assert_isinstance(self.dups.index, DatetimeIndex) def test_is_unique_monotonic(self): self.assert_(not self.dups.index.is_unique) @@ -325,13 +325,13 @@ def test_series_box_timestamp(self): rng = date_range('20090415', '20090519', freq='B') s = Series(rng) - self.assert_(isinstance(s[5], Timestamp)) + tm.assert_isinstance(s[5], Timestamp) rng = date_range('20090415', '20090519', freq='B') s = Series(rng, index=rng) - self.assert_(isinstance(s[5], Timestamp)) + tm.assert_isinstance(s[5], Timestamp) - self.assert_(isinstance(s.iget_value(5), Timestamp)) + tm.assert_isinstance(s.iget_value(5), Timestamp) def test_date_range_ambiguous_arguments(self): # #2538 @@ -356,9 +356,9 @@ def test_index_convert_to_datetime_array(self): def _check_rng(rng): converted = rng.to_pydatetime() - self.assert_(isinstance(converted, np.ndarray)) + tm.assert_isinstance(converted, np.ndarray) for x, stamp in zip(converted, rng): - self.assert_(type(x) is datetime) + tm.assert_isinstance(x, datetime) self.assertEquals(x, stamp.to_pydatetime()) self.assertEquals(x.tzinfo, stamp.tzinfo) @@ -657,7 +657,7 @@ def test_index_astype_datetime64(self): casted = idx.astype(np.dtype('M8[D]')) expected = DatetimeIndex(idx.values) - self.assert_(isinstance(casted, DatetimeIndex)) + tm.assert_isinstance(casted, DatetimeIndex) self.assert_(casted.equals(expected)) def test_reindex_series_add_nat(self): @@ -749,7 +749,7 @@ def test_string_na_nat_conversion(self): assert_almost_equal(result, expected) result2 = to_datetime(strings) - self.assert_(isinstance(result2, DatetimeIndex)) + tm.assert_isinstance(result2, DatetimeIndex) assert_almost_equal(result, result2) malformed = np.array(['1/100/2000', np.nan], dtype=object) @@ -1695,7 +1695,7 @@ def test_append_join_nondatetimeindex(self): idx = Index(['a', 'b', 'c', 'd']) result = rng.append(idx) - self.assert_(isinstance(result[0], Timestamp)) + tm.assert_isinstance(result[0], Timestamp) # it works rng.join(idx, how='outer') @@ -1790,7 +1790,7 @@ def test_add_union(self): def test_misc_coverage(self): rng = date_range('1/1/2000', periods=5) result = rng.groupby(rng.day) - self.assert_(isinstance(result.values()[0][0], Timestamp)) + tm.assert_isinstance(result.values()[0][0], Timestamp) idx = DatetimeIndex(['2000-01-03', '2000-01-01', '2000-01-02']) self.assert_(idx.equals(list(idx))) @@ -1898,7 +1898,7 @@ def test_groupby_function_tuple_1677(self): monthly_group = df.groupby(lambda x: (x.year, x.month)) result = monthly_group.mean() - self.assert_(isinstance(result.index[0], tuple)) + tm.assert_isinstance(result.index[0], tuple) def test_append_numpy_bug_1681(self): # another datetime64 bug @@ -2022,7 +2022,7 @@ def test_unpickle_legacy_len0_daterange(self): ex_index = DatetimeIndex([], freq='B') self.assert_(result.index.equals(ex_index)) - self.assert_(isinstance(result.index.freq, offsets.BDay)) + tm.assert_isinstance(result.index.freq, offsets.BDay) self.assert_(len(result) == 0) def test_arithmetic_interaction(self): @@ -2034,12 +2034,12 @@ def test_arithmetic_interaction(self): result = dseries + oseries expected = dseries * 2 - self.assert_(isinstance(result.index, DatetimeIndex)) + tm.assert_isinstance(result.index, DatetimeIndex) assert_series_equal(result, expected) result = dseries + oseries[:5] expected = dseries + dseries[:5] - self.assert_(isinstance(result.index, DatetimeIndex)) + tm.assert_isinstance(result.index, DatetimeIndex) assert_series_equal(result, expected) def test_join_interaction(self): @@ -2051,7 +2051,7 @@ def _check_join(left, right, how='inner'): ea, eb, ec = left.join(DatetimeIndex(right), how=how, return_indexers=True) - self.assert_(isinstance(ra, DatetimeIndex)) + tm.assert_isinstance(ra, DatetimeIndex) self.assert_(ra.equals(ea)) assert_almost_equal(rb, eb) @@ -2075,8 +2075,8 @@ def test_unpickle_daterange(self): filepath = os.path.join(pth, 'data', 'daterange_073.pickle') rng = read_pickle(filepath) - self.assert_(type(rng[0]) == datetime) - self.assert_(isinstance(rng.offset, offsets.BDay)) + tm.assert_isinstance(rng[0], datetime) + tm.assert_isinstance(rng.offset, offsets.BDay) self.assert_(rng.values.dtype == object) def test_setops(self): @@ -2085,17 +2085,17 @@ def test_setops(self): result = index[:5].union(obj_index[5:]) expected = index - self.assert_(isinstance(result, DatetimeIndex)) + tm.assert_isinstance(result, DatetimeIndex) self.assert_(result.equals(expected)) result = index[:10].intersection(obj_index[5:]) expected = index[5:10] - self.assert_(isinstance(result, DatetimeIndex)) + tm.assert_isinstance(result, DatetimeIndex) self.assert_(result.equals(expected)) result = index[:10] - obj_index[5:] expected = index[:5] - self.assert_(isinstance(result, DatetimeIndex)) + tm.assert_isinstance(result, DatetimeIndex) self.assert_(result.equals(expected)) def test_index_conversion(self): @@ -2111,7 +2111,7 @@ def test_tolist(self): rng = date_range('1/1/2000', periods=10) result = rng.tolist() - self.assert_(isinstance(result[0], Timestamp)) + tm.assert_isinstance(result[0], Timestamp) def test_object_convert_fail(self): idx = DatetimeIndex([NaT]) @@ -2336,8 +2336,8 @@ def test_min_max(self): the_min = rng2.min() the_max = rng2.max() - self.assert_(isinstance(the_min, Timestamp)) - self.assert_(isinstance(the_max, Timestamp)) + tm.assert_isinstance(the_min, Timestamp) + tm.assert_isinstance(the_max, Timestamp) self.assertEqual(the_min, rng[0]) self.assertEqual(the_max, rng[-1]) @@ -2623,11 +2623,11 @@ def test_datetimeindex_union_join_empty(self): empty = Index([]) result = dti.union(empty) - self.assert_(isinstance(result, DatetimeIndex)) + tm.assert_isinstance(result, DatetimeIndex) self.assert_(result is result) result = dti.join(empty) - self.assert_(isinstance(result, DatetimeIndex)) + tm.assert_isinstance(result, DatetimeIndex) def test_series_set_value(self): # #1561 diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tseries/tests/test_timezones.py index bf441a97089b5..1f3e80dc07893 100644 --- a/pandas/tseries/tests/test_timezones.py +++ b/pandas/tseries/tests/test_timezones.py @@ -182,7 +182,7 @@ def test_astimezone(self): expected = utc.tz_convert('US/Eastern') result = utc.astimezone('US/Eastern') self.assertEquals(expected, result) - self.assert_(isinstance(result, Timestamp)) + tm.assert_isinstance(result, Timestamp) def test_create_with_tz(self): stamp = Timestamp('3/11/2012 05:00', tz='US/Eastern') @@ -726,11 +726,11 @@ def test_join_utc_convert(self): for how in ['inner', 'outer', 'left', 'right']: result = left.join(left[:-5], how=how) - self.assert_(isinstance(result, DatetimeIndex)) + tm.assert_isinstance(result, DatetimeIndex) self.assert_(result.tz == left.tz) result = left.join(right[:-5], how=how) - self.assert_(isinstance(result, DatetimeIndex)) + tm.assert_isinstance(result, DatetimeIndex) self.assert_(result.tz.zone == 'UTC') def test_join_aware(self): diff --git a/pandas/util/compat.py b/pandas/util/compat.py index a42b9218a3acc..10fb2b1071f7d 100644 --- a/pandas/util/compat.py +++ b/pandas/util/compat.py @@ -1,20 +1,8 @@ -# itertools.product not in Python 2.5 - import sys import six -from six.moves import map -try: - from itertools import product -except ImportError: # python 2.5 - def product(*args, **kwds): - # product('ABCD', 'xy') --> Ax Ay Bx By Cx Cy Dx Dy - # product(range(2), repeat=3) --> 000 001 010 011 100 101 110 111 - pools = list(map(tuple, args) * kwds.get('repeat', 1)) - result = [[]] - for pool in pools: - result = [x + [y] for x in result for y in pool] - for prod in result: - yield tuple(prod) +from six.moves import map, filter +from pandas.util.py3compat import range +from itertools import product # OrderedDict Shim from Raymond Hettinger, python core dev @@ -31,12 +19,15 @@ def product(*args, **kwds): pass -def iteritems(obj): - """replacement for six's iteritems to use iteritems on PandasObjects""" +def iteritems(obj, **kwargs): + """replacement for six's iteritems for Python2/3 compat + uses 'iteritems' if available and otherwise uses 'items'. + + Passes kwargs to method.""" if hasattr(obj, "iteritems"): - return obj.iteritems() + return obj.iteritems(**kwargs) else: - return obj.items() + return obj.items(**kwargs) class _OrderedDict(dict): @@ -293,7 +284,6 @@ def viewitems(self): try: from operator import itemgetter from heapq import nlargest - from itertools import repeat, ifilter except ImportError: pass @@ -348,7 +338,7 @@ def elements(self): ''' for elem, count in iteritems(self): - for _ in repeat(None, count): + for _ in range(count): yield elem # Override dict methods where the meaning changes for Counter objects. @@ -375,7 +365,7 @@ def update(self, iterable=None, **kwds): if hasattr(iterable, 'iteritems'): if self: self_get = self.get - for elem, count in iterable.iteritems(): + for elem, count in iteritems(iterable): self[elem] = self_get(elem, 0) + count else: dict.update( @@ -474,7 +464,7 @@ def __and__(self, other): result = Counter() if len(self) < len(other): self, other = other, self - for elem in ifilter(self.__contains__, other): + for elem in filter(self.__contains__, other): newcount = _min(self[elem], other[elem]) if newcount > 0: result[elem] = newcount diff --git a/pandas/util/py3compat.py b/pandas/util/py3compat.py index 240f8c0fc88a2..ad13d913b87ec 100644 --- a/pandas/util/py3compat.py +++ b/pandas/util/py3compat.py @@ -14,6 +14,7 @@ def bytes_to_str(b, encoding='utf-8'): range = range long = int + unichr = chr else: # Python 2 import re @@ -30,6 +31,7 @@ def bytes_to_str(b, encoding='ascii'): range = xrange long = long + unichr = unichr try: # not writeable if instantiated with string, not good with unicode diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 6b710b4425f3d..16e8c649e4e12 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -123,10 +123,23 @@ def equalContents(arr1, arr2): return frozenset(arr1) == frozenset(arr2) +def assert_isinstance(obj, class_type_or_tuple): + """asserts that obj is an instance of class_type_or_tuple""" + assert isinstance(obj, class_type_or_tuple), ( + "Expected object to be of type %r, found %r instead" % ( + type(obj), class_type_or_tuple)) + + def isiterable(obj): return hasattr(obj, '__iter__') +def assert_isinstance(obj, class_type_or_tuple): + """asserts that obj is an instance of class_type_or_tuple""" + assert isinstance(obj, class_type_or_tuple), ( + "Expected object to be of type %r, found %r instead" % (type(obj), class_type_or_tuple)) + + def assert_almost_equal(a, b, check_less_precise = False): if isinstance(a, dict) or isinstance(b, dict): return assert_dict_equal(a, b) @@ -199,7 +212,7 @@ def assert_series_equal(left, right, check_dtype=True, check_series_type=False, check_less_precise=False): if check_series_type: - assert(type(left) == type(right)) + assert_isinstance(left, type(right)) assert_almost_equal(left.values, right.values, check_less_precise) if check_dtype: assert(left.dtype == right.dtype) @@ -208,7 +221,7 @@ def assert_series_equal(left, right, check_dtype=True, else: assert(left.index.equals(right.index)) if check_index_type: - assert(type(left.index) == type(right.index)) + assert_isinstance(left.index, type(right.index)) assert(left.index.dtype == right.index.dtype) assert(left.index.inferred_type == right.index.inferred_type) if check_index_freq: @@ -223,9 +236,9 @@ def assert_frame_equal(left, right, check_dtype=True, check_less_precise=False, check_names=True): if check_frame_type: - assert(type(left) == type(right)) - assert(isinstance(left, DataFrame)) - assert(isinstance(right, DataFrame)) + assert_isinstance(left, type(right)) + assert_isinstance(left, DataFrame) + assert_isinstance(right, DataFrame) if check_less_precise: assert_almost_equal(left.columns,right.columns) @@ -244,11 +257,11 @@ def assert_frame_equal(left, right, check_dtype=True, check_less_precise=check_less_precise) if check_index_type: - assert(type(left.index) == type(right.index)) + assert_isinstance(left.index, type(right.index)) assert(left.index.dtype == right.index.dtype) assert(left.index.inferred_type == right.index.inferred_type) if check_column_type: - assert(type(left.columns) == type(right.columns)) + assert_isinstance(left.columns, type(right.columns)) assert(left.columns.dtype == right.columns.dtype) assert(left.columns.inferred_type == right.columns.inferred_type) if check_names: @@ -260,7 +273,7 @@ def assert_panel_equal(left, right, check_panel_type=False, check_less_precise=False): if check_panel_type: - assert(type(left) == type(right)) + assert_isinstance(left, type(right)) assert(left.items.equals(right.items)) assert(left.major_axis.equals(right.major_axis)) diff --git a/scripts/json_manip.py b/scripts/json_manip.py index 29c2d88aa9b09..0b2ac8ff617aa 100644 --- a/scripts/json_manip.py +++ b/scripts/json_manip.py @@ -211,7 +211,7 @@ def _denorm(queries,thing): #print "-- result: ", r if not r: r = [default] - if type(r[0]) is type({}): + if isinstance(r[0], type({})): fields.append(sorted(r[0].keys())) # dicty answers else: fields.append([q]) # stringy answer @@ -227,7 +227,7 @@ def _denorm(queries,thing): U = dict() for (ii,thing) in enumerate(p): #print ii,thing - if type(thing) is type({}): + if isinstance(thing, type({})): U.update(thing) else: U[fields[ii][0]] = thing @@ -284,11 +284,11 @@ def flatten(*stack): def _Q(filter_, thing): """ underlying machinery for Q function recursion """ T = type(thing) - if T is type({}): + if isinstance({}, T): for k,v in compat.iteritems(thing): #print k,v if filter_ == k: - if type(v) is type([]): + if isinstance(v, type([])): yield iter(v) else: yield v @@ -296,7 +296,7 @@ def _Q(filter_, thing): if type(v) in (type({}),type([])): yield Q(filter_,v) - elif T is type([]): + elif isinstance([], T): for k in thing: #print k yield Q(filter_,k) @@ -318,9 +318,9 @@ def Q(filter_,thing): [3] returns a generator. Use ``Ql`` if you want a list. """ - if type(filter_) is type([]): + if isinstance(filter_, type([])): return flatten(*[_Q(x,thing) for x in filter_]) - elif type(filter_) is type({}): + elif isinstance(filter_, type({})): d = dict.fromkeys(filter_.keys()) #print d for k in d: @@ -346,7 +346,7 @@ def Ql(filter_,thing): """ same as Q, but returns a list, not a generator """ res = Q(filter_,thing) - if type(filter_) is type({}): + if isinstance(filter_, type({})): for k in res: res[k] = list(res[k]) return res diff --git a/vb_suite/perf_HEAD.py b/vb_suite/perf_HEAD.py index 0f2adf41d9fab..cd0a51d0232b5 100755 --- a/vb_suite/perf_HEAD.py +++ b/vb_suite/perf_HEAD.py @@ -105,7 +105,7 @@ def main(): except Exception as e: exit_code = 1 - if (type(e) == KeyboardInterrupt or + if (isinstance(e, KeyboardInterrupt) or 'KeyboardInterrupt' in str(d)): raise KeyboardInterrupt() From a0386534eb396b1ec72ef6d85252a6170b40114a Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Sat, 27 Jul 2013 15:02:37 -0400 Subject: [PATCH 03/11] CLN/ENH: Refactor url methods to be Py2/3 compatible --- doc/sphinxext/docscrape.py | 2 +- doc/sphinxext/docscrape_sphinx.py | 2 +- doc/sphinxext/numpydoc.py | 2 +- doc/sphinxext/traitsdoc.py | 2 +- pandas/core/groupby.py | 2 +- pandas/core/internals.py | 2 +- pandas/io/common.py | 53 ++++++++++++++---------- pandas/io/data.py | 3 +- pandas/io/html.py | 10 ++--- pandas/io/pickle.py | 5 ++- pandas/io/tests/test_html.py | 6 +-- pandas/io/tests/test_json/test_pandas.py | 4 +- pandas/io/tests/test_parsers.py | 15 +++---- pandas/io/wb.py | 11 +++-- pandas/tseries/tools.py | 4 +- pandas/util/compat.py | 2 +- pandas/util/decorators.py | 3 +- pandas/util/py3compat.py | 1 + pandas/util/testing.py | 20 ++++----- scripts/gen_release_notes.py | 6 +-- scripts/json_manip.py | 2 +- vb_suite/perf_HEAD.py | 14 +++---- 22 files changed, 87 insertions(+), 84 deletions(-) diff --git a/doc/sphinxext/docscrape.py b/doc/sphinxext/docscrape.py index 384a6db2c1dec..5d27810a11ac8 100755 --- a/doc/sphinxext/docscrape.py +++ b/doc/sphinxext/docscrape.py @@ -491,7 +491,7 @@ def methods(self): if self._cls is None: return [] return [name for name,func in inspect.getmembers(self._cls) - if not name.startswith('_') and callable(func)] + if not name.startswith('_') and six.callable(func)] @property def properties(self): diff --git a/doc/sphinxext/docscrape_sphinx.py b/doc/sphinxext/docscrape_sphinx.py index a5b53eb09ccd8..896ae070d739a 100755 --- a/doc/sphinxext/docscrape_sphinx.py +++ b/doc/sphinxext/docscrape_sphinx.py @@ -212,7 +212,7 @@ def get_doc_object(obj, what=None, doc=None, config={}): what = 'class' elif inspect.ismodule(obj): what = 'module' - elif callable(obj): + elif six.callable(obj): what = 'function' else: what = 'object' diff --git a/doc/sphinxext/numpydoc.py b/doc/sphinxext/numpydoc.py index f32d778b6b79f..4ddc12e4c1a1c 100755 --- a/doc/sphinxext/numpydoc.py +++ b/doc/sphinxext/numpydoc.py @@ -83,7 +83,7 @@ def mangle_signature(app, what, name, obj, options, sig, retann): 'initializes x; see ' in pydoc.getdoc(obj.__init__))): return '', '' - if not (callable(obj) or hasattr(obj, '__argspec_is_invalid_')): return + if not (six.callable(obj) or hasattr(obj, '__argspec_is_invalid_')): return if not hasattr(obj, '__doc__'): return doc = SphinxDocString(pydoc.getdoc(obj)) diff --git a/doc/sphinxext/traitsdoc.py b/doc/sphinxext/traitsdoc.py index 952206c442752..0298a441e26ff 100755 --- a/doc/sphinxext/traitsdoc.py +++ b/doc/sphinxext/traitsdoc.py @@ -117,7 +117,7 @@ def get_doc_object(obj, what=None, config=None): what = 'class' elif inspect.ismodule(obj): what = 'module' - elif callable(obj): + elif six.callable(obj): what = 'function' else: what = 'object' diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index 9bd4f24ee04e8..ccbec5e9f46ec 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1283,7 +1283,7 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True): # what are we after, exactly? match_axis_length = len(keys) == len(group_axis) - any_callable = any(callable(g) or isinstance(g, dict) for g in keys) + any_callable = any(six.callable(g) or isinstance(g, dict) for g in keys) any_arraylike = any(isinstance(g, (list, tuple, np.ndarray)) for g in keys) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 0ff462ce21073..37aa4e4ca27ec 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1266,7 +1266,7 @@ def apply(self, f, *args, **kwargs): if not blk.items.isin(filter).any(): result_blocks.append(blk) continue - if callable(f): + if six.callable(f): applied = f(blk, *args, **kwargs) else: applied = getattr(blk,f)(*args, **kwargs) diff --git a/pandas/io/common.py b/pandas/io/common.py index 3ad181c3dffcb..786153183d115 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -1,19 +1,39 @@ """Common IO api utilities""" import sys -import urlparse -import urllib2 import zipfile from contextlib import contextmanager, closing -from pandas.util.py3compat import StringIO +from pandas.util.py3compat import StringIO from pandas.util import py3compat -_VALID_URLS = set(urlparse.uses_relative + urlparse.uses_netloc + - urlparse.uses_params) -_VALID_URLS.discard('') + +if py3compat.PY3: + from urllib.request import urlopen + from urllib.parse import urlparse as parse_url + import urllib.parse as compat_parse + from urllib.parse import uses_relative, uses_netloc, uses_params + from urllib.error import URLError + from http.client import HTTPException +else: + from urllib2 import urlopen as _urlopen + from urlparse import urlparse as parse_url + from urlparse import uses_relative, uses_netloc, uses_params + from urllib2 import URLError + from httplib import HTTPException + from contextlib import contextmanager, closing + from functools import wraps + + @wraps(_urlopen) + @contextmanager + def urlopen(*args, **kwargs): + with closing(_urlopen(*args, **kwargs)) as f: + yield f +_VALID_URLS = set(uses_relative + uses_netloc + uses_params) +_VALID_URLS.discard('') + class PerformanceWarning(Warning): pass @@ -31,7 +51,7 @@ def _is_url(url): If `url` has a valid protocol return True otherwise False. """ try: - return urlparse.urlparse(url).scheme in _VALID_URLS + return parse_url(url).scheme in _VALID_URLS except: return False @@ -68,10 +88,11 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None): else: errors = 'replace' encoding = 'utf-8' - bytes = filepath_or_buffer.read().decode(encoding, errors) - filepath_or_buffer = StringIO(bytes) - return filepath_or_buffer, encoding - return filepath_or_buffer, None + out = StringIO(req.read().decode(encoding, errors)) + else: + encoding = None + out = req + return out, encoding if _is_s3_url(filepath_or_buffer): try: @@ -91,16 +112,6 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None): return filepath_or_buffer, None -# ---------------------- -# Prevent double closing -if py3compat.PY3: - urlopen = urllib2.urlopen -else: - @contextmanager - def urlopen(*args, **kwargs): - with closing(urllib2.urlopen(*args, **kwargs)) as f: - yield f - # ZipFile is not a context manager for <= 2.6 # must be tuple index here since 2.6 doesn't use namedtuple for version_info if sys.version_info[1] <= 6: diff --git a/pandas/io/data.py b/pandas/io/data.py index afec826279988..febf5c04959e4 100644 --- a/pandas/io/data.py +++ b/pandas/io/data.py @@ -3,7 +3,6 @@ """ -from pandas.util.py3compat import range import warnings import tempfile import datetime as dt @@ -14,7 +13,7 @@ import numpy as np -from pandas.util.py3compat import StringIO, bytes_to_str +from pandas.util.py3compat import StringIO, bytes_to_str, range from pandas import Panel, DataFrame, Series, read_csv, concat from pandas.core.common import PandasError from pandas.io.parsers import TextParser diff --git a/pandas/io/html.py b/pandas/io/html.py index 3fee071cdf095..dd58e8068f2c8 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -3,13 +3,9 @@ """ -from pandas.util.py3compat import range -from pandas.util import compat import os import re import numbers -import urllib2 -import urlparse import collections from distutils.version import LooseVersion @@ -17,7 +13,9 @@ import numpy as np from pandas import DataFrame, MultiIndex, isnull -from pandas.io.common import _is_url, urlopen +from pandas.io.common import _is_url, urlopen, parse_url +from pandas.util.py3compat import range +from pandas.util import compat import six from six.moves import map @@ -553,7 +551,7 @@ def _build_doc(self): pass else: # not a url - scheme = urlparse.urlparse(self.io).scheme + scheme = parse_url(self.io).scheme if scheme not in _valid_schemes: # lxml can't parse it msg = ('{0} is not a valid url scheme, valid schemes are ' diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index 765c0cd46d4e5..a81c77d870612 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -1,4 +1,7 @@ -import cPickle as pkl +try: + import cPickle as pkl +except ImportError: + import pickle as pkl def to_pickle(obj, path): diff --git a/pandas/io/tests/test_html.py b/pandas/io/tests/test_html.py index 2f7c6092d9bcf..fc78d630bed5b 100644 --- a/pandas/io/tests/test_html.py +++ b/pandas/io/tests/test_html.py @@ -6,7 +6,7 @@ import warnings import six from distutils.version import LooseVersion -import urllib2 +from pandas.io.common import URLError import nose from nose.tools import assert_raises @@ -291,12 +291,12 @@ def test_file_like(self): @network def test_bad_url_protocol(self): - self.assertRaises(urllib2.URLError, self.run_read_html, + self.assertRaises(URLError, self.run_read_html, 'git://github.com', '.*Water.*') @network def test_invalid_url(self): - self.assertRaises(urllib2.URLError, self.run_read_html, + self.assertRaises(URLError, self.run_read_html, 'http://www.a23950sdfa908sd.com') @slow diff --git a/pandas/io/tests/test_json/test_pandas.py b/pandas/io/tests/test_json/test_pandas.py index b739181b7e702..4bb73dc761502 100644 --- a/pandas/io/tests/test_json/test_pandas.py +++ b/pandas/io/tests/test_json/test_pandas.py @@ -5,6 +5,7 @@ from pandas.util.py3compat import StringIO from pandas.util.py3compat import range from pandas.util import compat +from pandas.io.common import URLError import cPickle as pickle import operator import os @@ -473,7 +474,6 @@ def test_round_trip_exception_(self): @network @slow def test_url(self): - import urllib2 try: url = 'https://api.github.com/repos/pydata/pandas/issues?per_page=5' @@ -484,5 +484,5 @@ def test_url(self): url = 'http://search.twitter.com/search.json?q=pandas%20python' result = read_json(url) - except urllib2.URLError: + except URLError: raise nose.SkipTest diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py index 08b6a40df84a5..5796ea57748b0 100644 --- a/pandas/io/tests/test_parsers.py +++ b/pandas/io/tests/test_parsers.py @@ -1,22 +1,19 @@ # pylint: disable=E1101 -from pandas.util.py3compat import StringIO, BytesIO, PY3 from datetime import datetime -from pandas.util.py3compat import range, long import csv import os import sys import re import unittest -from contextlib import closing -from urllib2 import urlopen - import nose from numpy import nan import numpy as np from pandas import DataFrame, Series, Index, MultiIndex, DatetimeIndex +from pandas.util.py3compat import StringIO, BytesIO, PY3, range, long +from pandas.io.common import urlopen, URLError import pandas.io.parsers as parsers from pandas.io.parsers import (read_csv, read_table, read_fwf, TextFileReader, TextParser) @@ -1393,7 +1390,6 @@ def test_na_value_dict(self): @slow @network def test_url(self): - import urllib2 try: # HTTP(S) url = ('https://raw.github.com/pydata/pandas/master/' @@ -1405,18 +1401,17 @@ def test_url(self): tm.assert_frame_equal(url_table, local_table) # TODO: ftp testing - except urllib2.URLError: + except URLError: try: with closing(urlopen('http://www.google.com')) as resp: pass - except urllib2.URLError: + except URLError: raise nose.SkipTest else: raise @slow def test_file(self): - import urllib2 # FILE if sys.version_info[:2] < (2, 6): @@ -1427,7 +1422,7 @@ def test_file(self): try: url_table = self.read_table('file://localhost/' + localtable) - except urllib2.URLError: + except URLError: # fails on some systems raise nose.SkipTest diff --git a/pandas/io/wb.py b/pandas/io/wb.py index 5048551cf09b0..59e3c211aeae6 100644 --- a/pandas/io/wb.py +++ b/pandas/io/wb.py @@ -1,8 +1,7 @@ from __future__ import print_function -from urllib2 import urlopen from pandas.util.py3compat import range -import json -from contextlib import closing +from pandas.io.common import urlopen +from pandas.io import json import pandas import numpy as np from six.moves import map, reduce @@ -89,7 +88,7 @@ def _get_data(indicator="NY.GNS.ICTR.GN.ZS", country='US', indicator + "?date=" + str(start) + ":" + str(end) + "&per_page=25000" + \ "&format=json" # Download - with closing(urlopen(url)) as response: + with urlopen(url) as response: data = response.read() # Parse JSON file data = json.loads(data)[1] @@ -106,7 +105,7 @@ def get_countries(): '''Query information about countries ''' url = 'http://api.worldbank.org/countries/all?format=json' - with closing(urlopen(url)) as response: + with urlopen(url) as response: data = response.read() data = json.loads(data)[1] data = pandas.DataFrame(data) @@ -122,7 +121,7 @@ def get_indicators(): '''Download information about all World Bank data series ''' url = 'http://api.worldbank.org/indicators?per_page=50000&format=json' - with closing(urlopen(url)) as response: + with urlopen(url) as response: data = response.read() data = json.loads(data)[1] data = pandas.DataFrame(data) diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index c56fa192bad8d..f043dcb87ccca 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -272,8 +272,8 @@ def dateutil_parse(timestr, default, if res.weekday is not None and not res.day: ret = ret + relativedelta.relativedelta(weekday=res.weekday) if not ignoretz: - if callable(tzinfos) or tzinfos and res.tzname in tzinfos: - if callable(tzinfos): + if six.callable(tzinfos) or tzinfos and res.tzname in tzinfos: + if six.callable(tzinfos): tzdata = tzinfos(res.tzname, res.tzoffset) else: tzdata = tzinfos.get(res.tzname) diff --git a/pandas/util/compat.py b/pandas/util/compat.py index 10fb2b1071f7d..8b91484562937 100644 --- a/pandas/util/compat.py +++ b/pandas/util/compat.py @@ -485,7 +485,7 @@ def __init__(self, *args, **kwargs): newargs = () if args: newdefault = args[0] - if not (newdefault is None or callable(newdefault)): + if not (newdefault is None or six.callable(newdefault)): raise TypeError('first argument must be callable or None') newargs = args[1:] self.default_factory = newdefault diff --git a/pandas/util/decorators.py b/pandas/util/decorators.py index 4a8762dcb7ae0..a5f4cc7e1077b 100644 --- a/pandas/util/decorators.py +++ b/pandas/util/decorators.py @@ -1,5 +1,6 @@ from pandas.util.py3compat import StringIO from pandas.lib import cache_readonly +import six import sys import warnings @@ -163,7 +164,7 @@ def knownfailureif(fail_condition, msg=None): msg = 'Test skipped due to known failure' # Allow for both boolean or callable known failure conditions. - if callable(fail_condition): + if six.callable(fail_condition): fail_val = fail_condition else: fail_val = lambda: fail_condition diff --git a/pandas/util/py3compat.py b/pandas/util/py3compat.py index ad13d913b87ec..f4d262e45c65d 100644 --- a/pandas/util/py3compat.py +++ b/pandas/util/py3compat.py @@ -30,6 +30,7 @@ def bytes_to_str(b, encoding='ascii'): return b range = xrange + # have to explicitly put builtins into the namespace long = long unichr = unichr diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 16e8c649e4e12..d235298e8a280 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -2,7 +2,7 @@ # pylint: disable-msg=W0402 -from pandas.util.py3compat import range +from pandas.util.py3compat import range, unichr from six.moves import zip import random import string @@ -14,9 +14,7 @@ from datetime import datetime from functools import wraps -from contextlib import contextmanager -from httplib import HTTPException -from urllib2 import urlopen +from contextlib import contextmanager, closing from distutils.version import LooseVersion from numpy.random import randn @@ -34,7 +32,7 @@ from pandas.tseries.index import DatetimeIndex from pandas.tseries.period import PeriodIndex -from pandas.io.common import urlopen +from pandas.io.common import urlopen, HTTPException import six from six.moves import map @@ -696,7 +694,7 @@ def wrapper(*args, **kwargs): def dec(f): return decorator(f, *args, **kwargs) - is_decorating = not kwargs and len(args) == 1 and callable(args[0]) + is_decorating = not kwargs and len(args) == 1 and six.callable(args[0]) if is_decorating: f = args[0] args = [] @@ -746,11 +744,12 @@ def network(t, raise_on_error=_RAISE_NETWORK_ERROR_DEFAULT, A test can be decorated as requiring network like this:: >>> from pandas.util.testing import network - >>> import urllib2 + >>> from pandas.io.common import urlopen >>> import nose >>> @network ... def test_network(): - ... urllib2.urlopen("rabbit://bonanza.com") + ... with urlopen("rabbit://bonanza.com") as f: + ... pass ... >>> try: ... test_network() @@ -764,7 +763,8 @@ def network(t, raise_on_error=_RAISE_NETWORK_ERROR_DEFAULT, >>> @network(raise_on_error=True) ... def test_network(): - ... urllib2.urlopen("complaint://deadparrot.com") + ... with urlopen("complaint://deadparrot.com") as f: + ... pass ... >>> test_network() Traceback (most recent call last): @@ -852,7 +852,7 @@ def with_connectivity_check(t, url="http://www.google.com", t : callable The test requiring network connectivity. url : path - The url to test via ``urllib2.urlopen`` to check for connectivity. + The url to test via ``pandas.io.common.urlopen`` to check for connectivity. Defaults to 'http://www.google.com'. raise_on_error : bool If True, never catches errors. diff --git a/scripts/gen_release_notes.py b/scripts/gen_release_notes.py index 905240fcf6ca9..02ba4f57c189d 100644 --- a/scripts/gen_release_notes.py +++ b/scripts/gen_release_notes.py @@ -1,8 +1,7 @@ from __future__ import print_function import sys -import urllib2 import json -from contextlib import closing +from pandas.io.common import urlopen from datetime import datetime @@ -49,8 +48,7 @@ def get_issues(): def _get_page(page_number): gh_url = ('https://api.github.com/repos/pydata/pandas/issues?' 'milestone=*&state=closed&assignee=*&page=%d') % page_number - req = urllib2.Request(gh_url) - with closing(urllib2.urlopen(req)) as resp: + with urlopen(gh_url) as resp: rs = resp.readlines()[0] jsondata = json.loads(rs) issues = [Issue(x['title'], x['labels'], x['number'], diff --git a/scripts/json_manip.py b/scripts/json_manip.py index 0b2ac8ff617aa..ccdf02b3f9681 100644 --- a/scripts/json_manip.py +++ b/scripts/json_manip.py @@ -274,7 +274,7 @@ def flatten(*stack): except StopIteration: stack.pop(0) continue - if hasattr(x,'next') and callable(getattr(x,'next')): + if hasattr(x,'next') and six.callable(getattr(x,'next')): stack.insert(0, x) #if isinstance(x, (GeneratorType,listerator)): diff --git a/vb_suite/perf_HEAD.py b/vb_suite/perf_HEAD.py index cd0a51d0232b5..4ecaa3b9c8185 100755 --- a/vb_suite/perf_HEAD.py +++ b/vb_suite/perf_HEAD.py @@ -7,9 +7,7 @@ """ -import urllib2 -from contextlib import closing -from urllib2 import urlopen +from pandas.io.common import urlopen import json import pandas as pd @@ -26,7 +24,7 @@ def get_travis_data(): if not jobid: return None, None - with closing(urlopen("https://api.travis-ci.org/workers/")) as resp: + with urlopen("https://api.travis-ci.org/workers/") as resp: workers = json.loads(resp.read()) host = njobs = None @@ -134,7 +132,7 @@ def main(): def get_vbench_log(build_url): - with closing(urllib2.urlopen(build_url)) as r: + with urlopen(build_url) as r: if not (200 <= r.getcode() < 300): return @@ -145,7 +143,7 @@ def get_vbench_log(build_url): if not s: return id = s[0]['id'] # should be just one for now - with closing(urllib2.urlopen("https://api.travis-ci.org/jobs/%s" % id)) as r2: + with urlopen("https://api.travis-ci.org/jobs/%s" % id) as r2: if not 200 <= r.getcode() < 300: return s2 = json.loads(r2.read()) @@ -173,7 +171,7 @@ def convert_json_to_df(results_url): df contains timings for all successful vbenchmarks """ - with closing(urlopen(results_url)) as resp: + with urlopen(results_url) as resp: res = json.loads(resp.read()) timings = res.get("timings") if not timings: @@ -217,7 +215,7 @@ def get_results_from_builds(builds): dfs = OrderedDict() while True: - with closing(urlopen(url)) as r: + with urlopen(url) as r: if not (200 <= r.getcode() < 300): break builds = json.loads(r.read()) From 0802d1c1a4cb17287015094b6c3202fbaa775135 Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Sat, 27 Jul 2013 16:49:54 -0400 Subject: [PATCH 04/11] CLN: Py2/3-compatible dict keys/items/values. + many other fixups --- doc/make.py | 2 +- pandas/core/array.py | 3 +-- pandas/core/common.py | 18 ++++++++--------- pandas/core/config.py | 10 +++++----- pandas/core/frame.py | 20 +++++++++---------- pandas/core/groupby.py | 6 +++--- pandas/core/index.py | 2 +- pandas/core/panel.py | 4 ++-- pandas/core/reshape.py | 8 ++++---- pandas/core/strings.py | 7 +++++-- pandas/io/common.py | 11 ++++++----- pandas/io/data.py | 7 +++---- pandas/io/excel.py | 2 +- pandas/io/ga.py | 4 ++-- pandas/io/html.py | 4 ++-- pandas/io/pickle.py | 6 +----- pandas/io/pytables.py | 6 ++++-- pandas/io/stata.py | 8 +++----- pandas/io/tests/generate_legacy_pickles.py | 4 ++-- pandas/io/tests/test_clipboard.py | 2 +- pandas/io/tests/test_json/test_pandas.py | 2 +- pandas/io/tests/test_pytables.py | 2 +- pandas/io/tests/test_sql.py | 8 ++++---- pandas/io/wb.py | 23 +++++++++++----------- pandas/sparse/frame.py | 4 ++-- pandas/sparse/tests/test_sparse.py | 6 +++--- pandas/stats/var.py | 4 ++-- pandas/tests/test_frame.py | 2 +- pandas/tests/test_groupby.py | 2 +- pandas/tests/test_multilevel.py | 7 +++---- pandas/tests/test_panel.py | 4 ++-- pandas/tests/test_panel4d.py | 2 +- pandas/tests/test_reshape.py | 20 +++++++++---------- pandas/tests/test_rplot.py | 2 +- pandas/tests/test_series.py | 4 ++-- pandas/tools/plotting.py | 2 +- pandas/tools/rplot.py | 18 ++++++++--------- pandas/tseries/offsets.py | 2 +- pandas/tseries/tests/test_period.py | 6 ++++-- pandas/tseries/tests/test_timeseries.py | 9 ++++----- pandas/tseries/tests/test_timezones.py | 5 ++--- pandas/util/compat.py | 6 +++--- pandas/util/py3compat.py | 4 ++-- scripts/json_manip.py | 2 +- scripts/pypistats.py | 2 +- vb_suite/make.py | 2 +- vb_suite/perf_HEAD.py | 2 +- vb_suite/test_perf.py | 2 +- 48 files changed, 142 insertions(+), 146 deletions(-) diff --git a/doc/make.py b/doc/make.py index 12b60a4f1098b..dbce5aaa7a1b4 100755 --- a/doc/make.py +++ b/doc/make.py @@ -259,7 +259,7 @@ def _get_config(): func = funcd.get(arg) if func is None: raise SystemExit('Do not know how to handle %s; valid args are %s' % ( - arg, funcd.keys())) + arg, list(funcd.keys()))) func() else: small_docs = False diff --git a/pandas/core/array.py b/pandas/core/array.py index d1d29649d4dc7..842bbdbf14ef2 100644 --- a/pandas/core/array.py +++ b/pandas/core/array.py @@ -2,7 +2,6 @@ Isolate pandas's exposure to NumPy """ -from pandas.util import compat import numpy as np import six @@ -18,7 +17,7 @@ _lift_types = [] -for _k, _v in compat.iteritems(_dtypes): +for _k, _v in _dtypes.items(): for _i in _v: _lift_types.append(_k + str(_i)) diff --git a/pandas/core/common.py b/pandas/core/common.py index 25353fe33ccf4..f4ae9e9c84053 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -2,10 +2,10 @@ Misc tools for implementing data structures """ -from pandas.util.py3compat import range, long -import itertools import re from datetime import datetime +import codecs +import csv from numpy.lib.format import read_array, write_array import numpy as np @@ -15,15 +15,13 @@ import pandas.tslib as tslib from pandas.util import py3compat -import codecs -import csv +from pandas.util.py3compat import StringIO, BytesIO, range, long +from six.moves import zip, map +import six -from pandas.util.py3compat import StringIO, BytesIO from pandas.core.config import get_option from pandas.core import array as pa -import six -from six.moves import map # XXX: HACK for NumPy 1.5.1 to suppress warnings try: @@ -1366,7 +1364,7 @@ def iterpairs(seq): seq_it_next = iter(seq) next(seq_it_next) - return itertools.izip(seq_it, seq_it_next) + return zip(seq_it, seq_it_next) def split_ranges(mask): @@ -1992,7 +1990,7 @@ def _pprint_dict(seq, _nest_lvl=0,**kwds): nitems = get_option("max_seq_items") or len(seq) - for k, v in seq.items()[:nitems]: + for k, v in list(seq.items())[:nitems]: pairs.append(pfmt % (pprint_thing(k,_nest_lvl+1,**kwds), pprint_thing(v,_nest_lvl+1,**kwds))) @@ -2048,7 +2046,7 @@ def as_escaped_unicode(thing,escape_chars=escape_chars): translate.update(escape_chars) else: translate = escape_chars - escape_chars = escape_chars.keys() + escape_chars = list(escape_chars.keys()) else: escape_chars = escape_chars or tuple() for c in escape_chars: diff --git a/pandas/core/config.py b/pandas/core/config.py index 725f869580769..34dd2b744d25b 100644 --- a/pandas/core/config.py +++ b/pandas/core/config.py @@ -128,8 +128,8 @@ def _set_option(*args, **kwargs): # if 1 kwarg then it must be silent=True or silent=False if nkwargs: - k, = kwargs.keys() - v, = kwargs.values() + k, = list(kwargs.keys()) + v, = list(kwargs.values()) if k != 'silent': raise ValueError("the only allowed keyword argument is 'silent', " @@ -209,7 +209,7 @@ def __getattr__(self, key): return _get_option(prefix) def __dir__(self): - return self.d.keys() + return list(self.d.keys()) # For user convenience, we'd like to have the available options described # in the docstring. For dev convenience we'd like to generate the docstrings @@ -232,7 +232,7 @@ def __call__(self, *args, **kwds): @property def __doc__(self): opts_desc = _describe_option('all', _print_desc=False) - opts_list = pp_options_list(_registered_options.keys()) + opts_list = pp_options_list(list(_registered_options.keys())) return self.__doc_tmpl__.format(opts_desc=opts_desc, opts_list=opts_list) @@ -351,7 +351,7 @@ def __init__(self, *args): errmsg = "Need to invoke as option_context(pat,val,[(pat,val),..))." raise AssertionError(errmsg) - ops = zip(args[::2], args[1::2]) + ops = list(zip(args[::2], args[1::2])) undo = [] for pat, val in ops: undo.append((pat, _get_option(pat, silent=True))) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 07def64b22c57..1c2ff4130d5c0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -496,7 +496,7 @@ def _init_dict(self, data, index, columns, dtype=None): data = dict((k, v) for k, v in compat.iteritems(data) if k in columns) if index is None: - index = extract_index(data.values()) + index = extract_index(list(data.values())) else: index = _ensure_index(index) @@ -521,9 +521,9 @@ def _init_dict(self, data, index, columns, dtype=None): data_names.append(k) arrays.append(v) else: - keys = data.keys() + keys = list(data.keys()) if not isinstance(data, OrderedDict): - keys = _try_sort(data.keys()) + keys = _try_sort(list(data.keys())) columns = data_names = Index(keys) arrays = [data[k] for k in columns] @@ -954,10 +954,10 @@ def from_dict(cls, data, orient='columns', dtype=None): if orient == 'index': if len(data) > 0: # TODO speed up Series case - if isinstance(data.values()[0], (Series, dict)): + if isinstance(list(data.values())[0], (Series, dict)): data = _from_nested_dict(data) else: - data, index = data.values(), data.keys() + data, index = list(data.values()), list(data.keys()) elif orient != 'columns': # pragma: no cover raise ValueError('only recognize index or columns for orient') @@ -3600,7 +3600,7 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, to_replace = regex regex = True - items = to_replace.items() + items = list(to_replace.items()) keys, values = zip(*items) are_mappings = [isinstance(v, (dict, Series)) for v in values] @@ -3615,8 +3615,8 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, value_dict = {} for k, v in items: - to_rep_dict[k] = v.keys() - value_dict[k] = v.values() + to_rep_dict[k] = list(v.keys()) + value_dict[k] = list(v.values()) to_replace, value = to_rep_dict, value_dict else: @@ -5735,7 +5735,7 @@ def extract_index(data): indexes.append(v.index) elif isinstance(v, dict): have_dicts = True - indexes.append(v.keys()) + indexes.append(list(v.keys())) elif isinstance(v, (list, tuple, np.ndarray)): have_raw_arrays = True raw_lengths.append(len(v)) @@ -5895,7 +5895,7 @@ def _list_of_series_to_arrays(data, columns, coerce_float=False, dtype=None): def _list_of_dict_to_arrays(data, columns, coerce_float=False, dtype=None): if columns is None: - gen = (x.keys() for x in data) + gen = (list(x.keys()) for x in data) columns = lib.fast_unique_multiple_list_gen(gen) # assure that they are of the base dict class and not of derived diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index ccbec5e9f46ec..d465ce8b1575f 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1438,8 +1438,8 @@ def aggregate(self, func_or_funcs, *args, **kwargs): def _aggregate_multiple_funcs(self, arg): if isinstance(arg, dict): - columns = arg.keys() - arg = arg.items() + columns = list(arg.keys()) + arg = list(arg.items()) elif any(isinstance(x, (tuple, list)) for x in arg): arg = [(x, x) if not isinstance(x, (tuple, list)) else x for x in arg] @@ -1731,7 +1731,7 @@ def aggregate(self, arg, *args, **kwargs): result[col] = colg.aggregate(agg_how) keys.append(col) - if isinstance(result.values()[0], DataFrame): + if isinstance(list(result.values())[0], DataFrame): from pandas.tools.merge import concat result = concat([result[k] for k in keys], keys=keys, axis=1) else: diff --git a/pandas/core/index.py b/pandas/core/index.py index c46e61271f997..8b2f420d83a2d 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -2704,7 +2704,7 @@ def _get_combined_index(indexes, intersect=False): def _get_distinct_indexes(indexes): - return dict((id(x), x) for x in indexes).values() + return list(dict((id(x), x) for x in indexes).values()) def _union_indexes(indexes): diff --git a/pandas/core/panel.py b/pandas/core/panel.py index f2fb213f884e2..63c5548833643 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -284,7 +284,7 @@ def _init_dict(self, data, axes, dtype=None): data = OrderedDict((k, v) for k, v in compat.iteritems(data) if k in haxis) else: - ks = data.keys() + ks = list(data.keys()) if not isinstance(data,OrderedDict): ks = _try_sort(ks) haxis = Index(ks) @@ -360,7 +360,7 @@ def from_dict(cls, data, intersect=False, orient='items', dtype=None): raise ValueError('Orientation must be one of {items, minor}.') d = cls._homogenize_dict(cls, data, intersect=intersect, dtype=dtype) - ks = d['data'].keys() + ks = list(d['data'].keys()) if not isinstance(d['data'],OrderedDict): ks = list(sorted(ks)) d[cls._info_axis] = Index(ks) diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index 3e6e4ea366623..a89f5f2706401 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -543,9 +543,9 @@ def _stack_multi_columns(frame, level=-1, dropna=True): # tuple list excluding level for grouping columns if len(frame.columns.levels) > 2: - tuples = zip(*[lev.values.take(lab) + tuples = list(zip(*[lev.values.take(lab) for lev, lab in zip(this.columns.levels[:-1], - this.columns.labels[:-1])]) + this.columns.labels[:-1])])) unique_groups = [key for key, _ in itertools.groupby(tuples)] new_names = this.columns.names[:-1] new_columns = MultiIndex.from_tuples(unique_groups, names=new_names) @@ -747,8 +747,8 @@ def lreshape(data, groups, dropna=True, label=None): reshaped : DataFrame """ if isinstance(groups, dict): - keys = groups.keys() - values = groups.values() + keys = list(groups.keys()) + values = list(groups.values()) else: keys, values = zip(*groups) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index e717f5a2b0f0f..dbf50aef59552 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -1,6 +1,7 @@ import numpy as np from six.moves import zip +import six from pandas.core.common import isnull from pandas.core.series import Series import re @@ -282,16 +283,18 @@ def str_repeat(arr, repeats): if np.isscalar(repeats): def rep(x): try: - return str.__mul__(x, repeats) + return six.binary_type.__mul__(x, repeats) except TypeError: return six.text_type.__mul__(x, repeats) + return _na_map(rep, arr) else: def rep(x, r): try: - return str.__mul__(x, r) + return six.binary_type.__mul__(x, r) except TypeError: return six.text_type.__mul__(x, r) + repeats = np.asarray(repeats, dtype=object) result = lib.vec_binop(arr, repeats, rep) return result diff --git a/pandas/io/common.py b/pandas/io/common.py index 786153183d115..93f4f0b5d4612 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -10,13 +10,15 @@ if py3compat.PY3: from urllib.request import urlopen + _urlopen = urlopen from urllib.parse import urlparse as parse_url import urllib.parse as compat_parse - from urllib.parse import uses_relative, uses_netloc, uses_params + from urllib.parse import uses_relative, uses_netloc, uses_params, urlencode from urllib.error import URLError from http.client import HTTPException else: from urllib2 import urlopen as _urlopen + from urllib import urlencode from urlparse import urlparse as parse_url from urlparse import uses_relative, uses_netloc, uses_params from urllib2 import URLError @@ -24,7 +26,7 @@ from contextlib import contextmanager, closing from functools import wraps - @wraps(_urlopen) + # @wraps(_urlopen) @contextmanager def urlopen(*args, **kwargs): with closing(_urlopen(*args, **kwargs)) as f: @@ -80,8 +82,7 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None): """ if _is_url(filepath_or_buffer): - from urllib2 import urlopen - filepath_or_buffer = urlopen(filepath_or_buffer) + req = _urlopen(filepath_or_buffer) if py3compat.PY3: # pragma: no cover if encoding: errors = 'strict' @@ -101,7 +102,7 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None): raise ImportError("boto is required to handle s3 files") # Assuming AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY # are environment variables - parsed_url = urlparse.urlparse(filepath_or_buffer) + parsed_url = parse_url(filepath_or_buffer) conn = boto.connect_s3() b = conn.get_bucket(parsed_url.netloc) k = boto.s3.key.Key(b) diff --git a/pandas/io/data.py b/pandas/io/data.py index febf5c04959e4..90e0469987806 100644 --- a/pandas/io/data.py +++ b/pandas/io/data.py @@ -6,7 +6,6 @@ import warnings import tempfile import datetime as dt -import urllib import time from collections import defaultdict @@ -17,7 +16,7 @@ from pandas import Panel, DataFrame, Series, read_csv, concat from pandas.core.common import PandasError from pandas.io.parsers import TextParser -from pandas.io.common import urlopen, ZipFile +from pandas.io.common import urlopen, ZipFile, urlencode from pandas.util.testing import _network_error_classes import six from six.moves import map, zip @@ -115,7 +114,7 @@ def get_quote_yahoo(symbols): # for codes see: http://www.gummy-stuff.org/Yahoo-data.htm request = ''.join(six.itervalues(_yahoo_codes)) # code request string - header = _yahoo_codes.keys() + header = list(_yahoo_codes.keys()) data = defaultdict(list) @@ -202,7 +201,7 @@ def _get_hist_google(sym, start, end, retry_count, pause): google_URL = 'http://www.google.com/finance/historical?' # www.google.com/finance/historical?q=GOOG&startdate=Jun+9%2C+2011&enddate=Jun+8%2C+2013&output=csv - url = google_URL + urllib.urlencode({"q": sym, + url = google_URL + urlencode({"q": sym, "startdate": start.strftime('%b %d, ' '%Y'), "enddate": end.strftime('%b %d, %Y'), diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 65d0b6f017f23..132b1549eba33 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -5,7 +5,6 @@ #---------------------------------------------------------------------- # ExcelFile class -from pandas.util.py3compat import range import datetime import numpy as np @@ -13,6 +12,7 @@ from pandas.tseries.period import Period from pandas import json from six.moves import map, zip, reduce +from pandas.util.py3compat import range import six def read_excel(path_or_buf, sheetname, kind=None, **kwds): diff --git a/pandas/io/ga.py b/pandas/io/ga.py index d71de9da4d2b7..b0db040b00ed2 100644 --- a/pandas/io/ga.py +++ b/pandas/io/ga.py @@ -385,8 +385,8 @@ def _maybe_add_arg(query, field, data, prefix='ga'): def _get_match(obj_store, name, id, **kwargs): key, val = None, None if len(kwargs) > 0: - key = kwargs.keys()[0] - val = kwargs.values()[0] + key = list(kwargs.keys())[0] + val = list(kwargs.values())[0] if name is None and id is None and key is None: return obj_store.get('items')[0] diff --git a/pandas/io/html.py b/pandas/io/html.py index dd58e8068f2c8..9805c194db994 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -708,7 +708,7 @@ def _parser_dispatch(flavor): ImportError * If you do not have the requested `flavor` """ - valid_parsers = _valid_parsers.keys() + valid_parsers = list(_valid_parsers.keys()) if flavor not in valid_parsers: raise AssertionError('"{0!r}" is not a valid flavor, valid flavors are' ' {1}'.format(flavor, valid_parsers)) @@ -744,7 +744,7 @@ def _validate_parser_flavor(flavor): raise TypeError('{0} is not a valid "flavor"'.format(flavor)) flavor = list(flavor) - valid_flavors = _valid_parsers.keys() + valid_flavors = list(_valid_parsers.keys()) if not set(flavor) & set(valid_flavors): raise ValueError('{0} is not a valid set of flavors, valid flavors are' diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index a81c77d870612..b1dee20e6ef04 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -1,8 +1,4 @@ -try: - import cPickle as pkl -except ImportError: - import pickle as pkl - +from six.moves import cPickle as pkl def to_pickle(obj, path): """ diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index e18cd2d8cf572..f0392836650b8 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -324,7 +324,7 @@ def __len__(self): def __unicode__(self): output = '%s\nFile path: %s\n' % (type(self), pprint_thing(self._path)) - if len(self.keys()): + if len(list(self.keys())): keys = [] values = [] @@ -372,6 +372,8 @@ def open(self, mode='a', warn=True): self._mode = mode if warn and mode == 'w': # pragma: no cover while True: + if py3compat.PY3: + raw_input = input response = raw_input("Re-opening as mode='w' will delete the " "current file. Continue (y/n)?") if response == 'y': @@ -787,7 +789,7 @@ def copy(self, file, mode = 'w', propindexes = True, keys = None, complib = None """ new_store = HDFStore(file, mode = mode, complib = complib, complevel = complevel, fletcher32 = fletcher32) if keys is None: - keys = self.keys() + keys = list(self.keys()) if not isinstance(keys, (tuple,list)): keys = [ keys ] for k in keys: diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 033f0cf0e2cbc..f76a6f1540670 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -10,8 +10,6 @@ http://statsmodels.sourceforge.net/devel/ """ # TODO: Fix this module so it can use cross-compatible zip, map, and range -from StringIO import StringIO -from pandas.util import compat import numpy as np import sys @@ -23,7 +21,7 @@ import datetime from pandas.util import py3compat from pandas.util import compat -from pandas.util.py3compat import long +from pandas.util.py3compat import StringIO, long from pandas import isnull from pandas.io.parsers import _parser_params, Appender from pandas.io.common import get_filepath_or_buffer @@ -541,13 +539,13 @@ def data(self, convert_dates=True, convert_categoricals=True, index=None): data[col] = Series(data[col], data[col].index, self.dtyplist[i]) if convert_dates: - cols = np.where(map(lambda x: x in _date_formats, self.fmtlist))[0] + cols = np.where(list(map(lambda x: x in _date_formats, self.fmtlist)))[0] for i in cols: col = data.columns[i] data[col] = data[col].apply(_stata_elapsed_date_to_datetime, args=(self.fmtlist[i],)) if convert_categoricals: - cols = np.where(map(lambda x: x in six.iterkeys(self.value_label_dict), self.lbllist))[0] + cols = np.where(list(map(lambda x: x in six.iterkeys(self.value_label_dict), self.lbllist)))[0] for i in cols: col = data.columns[i] labeled_data = np.copy(data[col]) diff --git a/pandas/io/tests/generate_legacy_pickles.py b/pandas/io/tests/generate_legacy_pickles.py index 49a7b90b2e1e8..7659b22e4a71b 100644 --- a/pandas/io/tests/generate_legacy_pickles.py +++ b/pandas/io/tests/generate_legacy_pickles.py @@ -1,6 +1,7 @@ """ self-contained to write legacy pickle files """ from __future__ import print_function -from six.moves import zip + +from six.moves import zip, cPickle as pickle def _create_sp_series(): @@ -88,7 +89,6 @@ def write_legacy_pickles(): import pandas import pandas.util.testing as tm import platform as pl - import cPickle as pickle print("This script generates a pickle file for the current arch, system, and python version") diff --git a/pandas/io/tests/test_clipboard.py b/pandas/io/tests/test_clipboard.py index 9eadd16c207a9..12c696f7076a4 100644 --- a/pandas/io/tests/test_clipboard.py +++ b/pandas/io/tests/test_clipboard.py @@ -33,7 +33,7 @@ def setUpClass(cls): cls.data['mixed'] = DataFrame({'a': np.arange(1.0, 6.0) + 0.01, 'b': np.arange(1, 6), 'c': list('abcde')}) - cls.data_types = cls.data.keys() + cls.data_types = list(cls.data.keys()) @classmethod def tearDownClass(cls): diff --git a/pandas/io/tests/test_json/test_pandas.py b/pandas/io/tests/test_json/test_pandas.py index 4bb73dc761502..36bf0306d4729 100644 --- a/pandas/io/tests/test_json/test_pandas.py +++ b/pandas/io/tests/test_json/test_pandas.py @@ -6,7 +6,7 @@ from pandas.util.py3compat import range from pandas.util import compat from pandas.io.common import URLError -import cPickle as pickle +from six.moves import cPickle as pickle import operator import os import unittest diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index ba5886ba12a85..bbfe7e93123d6 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -2700,7 +2700,7 @@ def do_copy(f = None, new_f = None, keys = None, propindexes = True, **kwargs): # check keys if keys is None: - keys = store.keys() + keys = list(store.keys()) self.assert_(set(keys) == set(tstore.keys())) # check indicies & nrows diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index 614b401ceaa73..28703975fcc81 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -1,7 +1,4 @@ from __future__ import print_function -from __future__ import with_statement -from pandas.util.py3compat import StringIO -from pandas.util.py3compat import range import unittest import sqlite3 import sys @@ -14,6 +11,8 @@ from pandas.core.datetools import format as date_format from pandas.core.api import DataFrame, isnull +from pandas.util.py3compat import StringIO, range +import six import pandas.io.sql as sql import pandas.util.testing as tm @@ -24,7 +23,8 @@ datetime: lambda dt: "'%s'" % date_format(dt), str: lambda x: "'%s'" % x, np.str_: lambda x: "'%s'" % x, - unicode: lambda x: "'%s'" % x, + six.text_type: lambda x: "'%s'" % x, + six.binary_type: lambda x: "'%s'" % x, float: lambda x: "%.8f" % x, int: lambda x: "%s" % x, type(None): lambda x: "NULL", diff --git a/pandas/io/wb.py b/pandas/io/wb.py index 59e3c211aeae6..65a666228e584 100644 --- a/pandas/io/wb.py +++ b/pandas/io/wb.py @@ -1,10 +1,11 @@ from __future__ import print_function + +from six.moves import map, reduce from pandas.util.py3compat import range from pandas.io.common import urlopen from pandas.io import json import pandas import numpy as np -from six.moves import map, reduce def download(country=['MX', 'CA', 'US'], indicator=['GDPPCKD', 'GDPPCKN'], @@ -92,10 +93,10 @@ def _get_data(indicator="NY.GNS.ICTR.GN.ZS", country='US', data = response.read() # Parse JSON file data = json.loads(data)[1] - country = list(map(lambda x: x['country']['value'], data)) - iso2c = list(map(lambda x: x['country']['id'], data)) - year = list(map(lambda x: x['date'], data)) - value = list(map(lambda x: x['value'], data)) + country = [x['country']['value'] for x in data] + iso2c = [x['country']['id'] for x in data] + year = [x['date'] for x in data] + value = [x['value'] for x in data] # Prepare output out = pandas.DataFrame([country, iso2c, year, value]).T return out @@ -109,10 +110,10 @@ def get_countries(): data = response.read() data = json.loads(data)[1] data = pandas.DataFrame(data) - data.adminregion = list(map(lambda x: x['value'], data.adminregion)) - data.incomeLevel = list(map(lambda x: x['value'], data.incomeLevel)) - data.lendingType = list(map(lambda x: x['value'], data.lendingType)) - data.region = list(map(lambda x: x['value'], data.region)) + data.adminregion = [x['value'] for x in data.adminregion] + data.incomeLevel = [x['value'] for x in data.incomeLevel] + data.lendingType = [x['value'] for x in data.lendingType] + data.region = [x['value'] for x in data.region] data = data.rename(columns={'id': 'iso3c', 'iso2Code': 'iso2c'}) return data @@ -126,7 +127,7 @@ def get_indicators(): data = json.loads(data)[1] data = pandas.DataFrame(data) # Clean fields - data.source = list(map(lambda x: x['value'], data.source)) + data.source = [x['value'] for x in data.source] fun = lambda x: x.encode('ascii', 'ignore') data.sourceOrganization = data.sourceOrganization.apply(fun) # Clean topic field @@ -136,7 +137,7 @@ def get_value(x): return x['value'] except: return '' - fun = lambda x: list(map(lambda y: get_value(y), x)) + fun = lambda x: [get_value(y) for y in x] data.topics = data.topics.apply(fun) data.topics = data.topics.apply(lambda x: ' ; '.join(x)) # Clean outpu diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index 09e8bdb577a22..26c0a151a8f4b 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -153,10 +153,10 @@ def _init_dict(self, data, index, columns, dtype=None): columns = _ensure_index(columns) data = dict((k, v) for k, v in compat.iteritems(data) if k in columns) else: - columns = Index(_try_sort(data.keys())) + columns = Index(_try_sort(list(data.keys()))) if index is None: - index = extract_index(data.values()) + index = extract_index(list(data.values())) sp_maker = lambda x: SparseSeries(x, index=index, kind=self.default_kind, diff --git a/pandas/sparse/tests/test_sparse.py b/pandas/sparse/tests/test_sparse.py index 5be3703ddb742..ff9d57bed449f 100644 --- a/pandas/sparse/tests/test_sparse.py +++ b/pandas/sparse/tests/test_sparse.py @@ -1,9 +1,6 @@ # pylint: disable-msg=E1101,W0612 from unittest import TestCase -from pandas.util.py3compat import range -from pandas.util import compat -import cPickle as pickle import operator from datetime import datetime @@ -25,6 +22,9 @@ import pandas.core.datetools as datetools from pandas.core.common import isnull import pandas.util.testing as tm +from pandas.util.py3compat import range +from pandas.util import compat +from six.moves import cPickle as pickle import pandas.sparse.frame as spf diff --git a/pandas/stats/var.py b/pandas/stats/var.py index 0aa7a50d8d076..5f4a4ec13fdde 100644 --- a/pandas/stats/var.py +++ b/pandas/stats/var.py @@ -255,7 +255,7 @@ def _alpha(self): @cache_readonly def _beta_raw(self): - return np.array([self.beta[col].values() for col in self._columns]).T + return np.array([list(self.beta[col].values()) for col in self._columns]).T def _trans_B(self, h): """ @@ -289,7 +289,7 @@ def _trans_B(self, h): @cache_readonly def _x(self): values = np.array([ - self._lagged_data[i][col].values() + list(self._lagged_data[i][col].values()) for i in range(1, 1 + self._p) for col in self._columns ]).T diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index cd894ce2e7338..aadfea05cff16 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -2729,7 +2729,7 @@ def test_constructor_orient(self): a = {'hi': [32, 3, 3], 'there': [3, 5, 3]} rs = DataFrame.from_dict(a, orient='index') - xp = DataFrame.from_dict(a).T.reindex(a.keys()) + xp = DataFrame.from_dict(a).T.reindex(list(a.keys())) assert_frame_equal(rs, xp) def test_constructor_Series_named(self): diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index a0ec25ab12a28..9fe98e27c38cc 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -2376,7 +2376,7 @@ def test_groupby_groups_datetimeindex(self): # it works! groups = grouped.groups - tm.assert_isinstance(groups.keys()[0], datetime) + tm.assert_isinstance(list(groups.keys())[0], datetime) def test_groupby_reindex_inside_function(self): from pandas.tseries.api import DatetimeIndex diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 5d3171365a56f..82f1fa8248731 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -1,6 +1,4 @@ # pylint: disable-msg=W0612,E1101,W0141 -from pandas.util.py3compat import StringIO -from pandas.util.py3compat import range import nose import unittest @@ -15,12 +13,14 @@ assert_frame_equal) import pandas.core.common as com import pandas.util.testing as tm +from pandas.util.py3compat import StringIO +from pandas.util.py3compat import range from pandas.util.compat import product as cart_product import pandas as pd import pandas.index as _index import six -from six.moves import zip +from six.moves import zip, cPickle class TestMultiLevel(unittest.TestCase): @@ -139,7 +139,6 @@ def _check_op(opname): _check_op('div') def test_pickle(self): - import cPickle def _test_roundtrip(frame): pickled = cPickle.dumps(frame) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 69fae70bdf44e..1d97078583dd0 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -16,6 +16,7 @@ from pandas.core.series import remove_na import pandas.core.common as com from pandas.util import py3compat +from six.moves import cPickle from pandas.util.testing import (assert_panel_equal, assert_frame_equal, @@ -40,7 +41,6 @@ class PanelTests(object): panel = None def test_pickle(self): - import cPickle pickled = cPickle.dumps(self.panel) unpickled = cPickle.loads(pickled) assert_frame_equal(unpickled['ItemA'], self.panel['ItemA']) @@ -268,7 +268,7 @@ def _test_op(panel, op): assert_frame_equal(result['ItemA'], op(panel['ItemA'], 1)) def test_keys(self): - tm.equalContents(self.panel.keys(), self.panel.items) + tm.equalContents(list(self.panel.keys()), self.panel.items) def test_iteritems(self): # Test panel.iteritems(), aka panel.iteritems() diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index 31f5bc64a94df..a1566bf30a89a 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -217,7 +217,7 @@ def _test_op(panel4d, op): assert_panel_equal(result['l1'], op(panel4d['l1'], 1)) def test_keys(self): - tm.equalContents(self.panel4d.keys(), self.panel4d.labels) + tm.equalContents(list(self.panel4d.keys()), self.panel4d.labels) def test_iteritems(self): """Test panel4d.iteritems()""" diff --git a/pandas/tests/test_reshape.py b/pandas/tests/test_reshape.py index 1228e1605f0a0..3b34934f1a7dc 100644 --- a/pandas/tests/test_reshape.py +++ b/pandas/tests/test_reshape.py @@ -1,9 +1,6 @@ # pylint: disable-msg=W0612,E1101 from copy import deepcopy from datetime import datetime, timedelta -from pandas.util.py3compat import StringIO -from pandas.util.py3compat import range -import cPickle as pickle import operator import os import unittest @@ -18,6 +15,9 @@ from pandas.core.reshape import melt, convert_dummies, lreshape import pandas.util.testing as tm +from pandas.util.py3compat import StringIO +from pandas.util.py3compat import range +from six.moves import cPickle _multiprocess_can_split_ = True @@ -57,9 +57,9 @@ def test_value_vars(self): 'id2': self.df['id2'].tolist() * 2, 'variable': ['A']*10 + ['B']*10, 'value': self.df['A'].tolist() + self.df['B'].tolist()}, - columns=['id1', 'id2', 'variable', 'value']) + columns=['id1', 'id2', 'variable', 'value']) tm.assert_frame_equal(result4, expected4) - + def test_custom_var_name(self): result5 = melt(self.df, var_name=self.var_name) self.assertEqual(result5.columns.tolist(), ['var', 'value']) @@ -80,7 +80,7 @@ def test_custom_var_name(self): 'id2': self.df['id2'].tolist() * 2, self.var_name: ['A']*10 + ['B']*10, 'value': self.df['A'].tolist() + self.df['B'].tolist()}, - columns=['id1', 'id2', self.var_name, 'value']) + columns=['id1', 'id2', self.var_name, 'value']) tm.assert_frame_equal(result9, expected9) def test_custom_value_name(self): @@ -98,12 +98,12 @@ def test_custom_value_name(self): self.assertEqual(result13.columns.tolist(), ['id1', 'id2', 'variable', 'val']) result14 = melt(self.df, id_vars=['id1', 'id2'], - value_vars=['A', 'B'], value_name=self.value_name) + value_vars=['A', 'B'], value_name=self.value_name) expected14 = DataFrame({'id1': self.df['id1'].tolist() * 2, 'id2': self.df['id2'].tolist() * 2, 'variable': ['A']*10 + ['B']*10, self.value_name: self.df['A'].tolist() + self.df['B'].tolist()}, - columns=['id1', 'id2', 'variable', self.value_name]) + columns=['id1', 'id2', 'variable', self.value_name]) tm.assert_frame_equal(result14, expected14) def test_custom_var_and_value_name(self): @@ -123,12 +123,12 @@ def test_custom_var_and_value_name(self): self.assertEqual(result18.columns.tolist(), ['id1', 'id2', 'var', 'val']) result19 = melt(self.df, id_vars=['id1', 'id2'], - value_vars=['A', 'B'], var_name=self.var_name, value_name=self.value_name) + value_vars=['A', 'B'], var_name=self.var_name, value_name=self.value_name) expected19 = DataFrame({'id1': self.df['id1'].tolist() * 2, 'id2': self.df['id2'].tolist() * 2, var_name: ['A']*10 + ['B']*10, value_name: self.df['A'].tolist() + self.df['B'].tolist()}, - columns=['id1', 'id2', self.var_name, self.value_name]) + columns=['id1', 'id2', self.var_name, self.value_name]) tm.assert_frame_equal(result19, expected19) def test_custom_var_and_value_name(self): diff --git a/pandas/tests/test_rplot.py b/pandas/tests/test_rplot.py index 0dfae47dd9ce3..95ef66eb8cb83 100644 --- a/pandas/tests/test_rplot.py +++ b/pandas/tests/test_rplot.py @@ -69,7 +69,7 @@ def test_dictionary_union(self): dict2 = {1 : 1, 2 : 2, 4 : 4} union = rplot.dictionary_union(dict1, dict2) self.assertEqual(len(union), 4) - keys = union.keys() + keys = list(union.keys()) self.assertTrue(1 in keys) self.assertTrue(2 in keys) self.assertTrue(3 in keys) diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 84fbc4397b9cf..3b7f693c8e6d4 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -525,7 +525,7 @@ def test_orderedDict_ctor(self): import pandas, random data = OrderedDict([('col%s' % i, random.random()) for i in range(12)]) s = pandas.Series(data) - self.assertTrue(all(s.values == data.values())) + self.assertTrue(all(s.values == list(data.values()))) def test_orderedDict_subclass_ctor(self): # GH3283 @@ -535,7 +535,7 @@ class A(OrderedDict): pass data = A([('col%s' % i, random.random()) for i in range(12)]) s = pandas.Series(data) - self.assertTrue(all(s.values == data.values())) + self.assertTrue(all(s.values == list(data.values()))) def test_constructor_list_of_tuples(self): data = [(1, 1), (2, 2), (2, 3)] diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index 483d989e9e13b..e356e9e9fe5f1 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -987,7 +987,7 @@ def _adorn_subplots(self): if self._need_to_set_index: labels = [com.pprint_thing(key) for key in self.data.index] - labels = dict(zip(list(range(len(self.data.index))), labels)) + labels = dict(zip(range(len(self.data.index)), labels)) for ax_ in self.axes: # ax_.set_xticks(self.xticks) diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 747d7bfb08d67..f2d2b1fd61fc0 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -602,7 +602,7 @@ def trellis(self, layers): grouped = data.groupby(self.by[0]) else: grouped = data.groupby(self.by) - groups = grouped.groups.keys() + groups = list(grouped.groups.keys()) if self.by[0] == '.' or self.by[1] == '.': shingle1 = set([g for g in groups]) else: @@ -646,8 +646,8 @@ def dictionary_union(dict1, dict2): A union of the dictionaries. It assumes that values with the same keys are identical. """ - keys1 = dict1.keys() - keys2 = dict2.keys() + keys1 = list(dict1.keys()) + keys2 = list(dict2.keys()) result = {} for key1 in keys1: result[key1] = dict1[key1] @@ -773,13 +773,13 @@ def adjust_subplots(fig, axes, trellis, layers): legend = dictionary_union(legend, layer.legend) patches = [] labels = [] - if len(legend.keys()) == 0: + if len(list(legend.keys())) == 0: key_function = lambda tup: tup - elif len(legend.keys()[0]) == 2: + elif len(list(legend.keys())[0]) == 2: key_function = lambda tup: (tup[1]) else: key_function = lambda tup: (tup[1], tup[3]) - for key in sorted(legend.keys(), key=key_function): + for key in sorted(list(legend.keys()), key=key_function): value = legend[key] patches.append(value) if len(key) == 2: @@ -846,13 +846,13 @@ def render(self, fig=None): legend = dictionary_union(legend, layer.legend) patches = [] labels = [] - if len(legend.keys()) == 0: + if len(list(legend.keys())) == 0: key_function = lambda tup: tup - elif len(legend.keys()[0]) == 2: + elif len(list(legend.keys())[0]) == 2: key_function = lambda tup: (tup[1]) else: key_function = lambda tup: (tup[1], tup[3]) - for key in sorted(legend.keys(), key=key_function): + for key in sorted(list(legend.keys()), key=key_function): value = legend[key] patches.append(value) if len(key) == 2: diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 3bcf93464e0c1..303a11929064a 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -105,7 +105,7 @@ def _params(self): attrs = [(k, v) for k, v in compat.iteritems(vars(self)) if k not in ['kwds', '_offset', 'name', 'normalize', 'busdaycalendar']] - attrs.extend(self.kwds.items()) + attrs.extend(list(self.kwds.items())) attrs = sorted(set(attrs)) params = tuple([str(self.__class__)] + attrs) diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index 053ff8af2f280..2057a418fa6f9 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -2000,8 +2000,10 @@ def test_map_with_string_constructor(self): raw = [2005, 2007, 2009] index = PeriodIndex(raw, freq='A') types = str, - if not py3compat.PY3: - types += unicode, + + if py3compat.PY3: + # unicode + types += six.text_type, for t in types: expected = np.array(list(map(t, raw)), dtype=object) diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index 0336f659f5dd6..68ea73a661c74 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -7,9 +7,6 @@ import nose import numpy as np -from pandas.util.py3compat import range, long, StringIO -from pandas.util.compat import product -from six.moves import map, zip randn = np.random.randn from pandas import (Index, Series, TimeSeries, DataFrame, @@ -31,7 +28,9 @@ import pandas.index as _index -import cPickle as pickle +from pandas.util.py3compat import range, long, StringIO +from pandas.util.compat import product +from six.moves import map, zip, cPickle as pickle from pandas import read_pickle import pandas.core.datetools as dt from numpy.random import rand @@ -1790,7 +1789,7 @@ def test_add_union(self): def test_misc_coverage(self): rng = date_range('1/1/2000', periods=5) result = rng.groupby(rng.day) - tm.assert_isinstance(result.values()[0][0], Timestamp) + tm.assert_isinstance(list(result.values())[0][0], Timestamp) idx = DatetimeIndex(['2000-01-03', '2000-01-01', '2000-01-02']) self.assert_(idx.equals(list(idx))) diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tseries/tests/test_timezones.py index 1f3e80dc07893..1c7607c63b76c 100644 --- a/pandas/tseries/tests/test_timezones.py +++ b/pandas/tseries/tests/test_timezones.py @@ -5,8 +5,6 @@ import unittest import nose -from pandas.util.py3compat import range -from six.moves import zip import numpy as np import pytz @@ -26,11 +24,12 @@ import pandas.util.testing as tm import pandas.lib as lib -import cPickle as pickle import pandas.core.datetools as dt from numpy.random import rand from pandas.util.testing import assert_frame_equal import pandas.util.py3compat as py3compat +from pandas.util.py3compat import range +from six.moves import zip, cPickle as pickle from pandas.core.datetools import BDay import pandas.core.common as com diff --git a/pandas/util/compat.py b/pandas/util/compat.py index 8b91484562937..413cc0a9dac83 100644 --- a/pandas/util/compat.py +++ b/pandas/util/compat.py @@ -223,7 +223,7 @@ def __repr__(self, _repr_running={}): try: if not self: return '%s()' % (self.__class__.__name__,) - return '%s(%r)' % (self.__class__.__name__, self.items()) + return '%s(%r)' % (self.__class__.__name__, list(self.items())) finally: del _repr_running[call_key] @@ -258,7 +258,7 @@ def __eq__(self, other): ''' if isinstance(other, OrderedDict): - return len(self) == len(other) and self.items() == other.items() + return len(self) == len(other) and list(self.items()) == list(other.items()) return dict.__eq__(self, other) def __ne__(self, other): @@ -499,4 +499,4 @@ def __missing__(self, key): def __reduce__(self): # optional, for pickle support args = self.default_factory if self.default_factory else tuple() - return type(self), args, None, None, self.items() + return type(self), args, None, None, list(self.items()) diff --git a/pandas/util/py3compat.py b/pandas/util/py3compat.py index f4d262e45c65d..969ba94fda76c 100644 --- a/pandas/util/py3compat.py +++ b/pandas/util/py3compat.py @@ -39,12 +39,12 @@ def bytes_to_str(b, encoding='ascii'): from cStringIO import StringIO as cStringIO # writeable and handles unicode from StringIO import StringIO -except: +except ImportError: # no more StringIO from io import StringIO cStringIO = StringIO try: from io import BytesIO -except: +except ImportError: from cStringIO import StringIO as BytesIO diff --git a/scripts/json_manip.py b/scripts/json_manip.py index ccdf02b3f9681..4733df68c5b64 100644 --- a/scripts/json_manip.py +++ b/scripts/json_manip.py @@ -321,7 +321,7 @@ def Q(filter_,thing): if isinstance(filter_, type([])): return flatten(*[_Q(x,thing) for x in filter_]) elif isinstance(filter_, type({})): - d = dict.fromkeys(filter_.keys()) + d = dict.fromkeys(list(filter_.keys())) #print d for k in d: #print flatten(Q(k,thing)) diff --git a/scripts/pypistats.py b/scripts/pypistats.py index e64be63551fde..41343f6d30c76 100644 --- a/scripts/pypistats.py +++ b/scripts/pypistats.py @@ -93,7 +93,7 @@ def get_downloads(self): result = pd.DataFrame({'downloads': totals, 'release_date': first_upload}) result = result.sort('release_date') - result = result.drop(to_omit + rollup.keys()) + result = result.drop(to_omit + list(rollup.keys())) result.index.name = 'release' by_date = result.reset_index().set_index('release_date').downloads diff --git a/vb_suite/make.py b/vb_suite/make.py index 74a0818fbd1d1..1bea9ae1abaea 100755 --- a/vb_suite/make.py +++ b/vb_suite/make.py @@ -159,7 +159,7 @@ def _get_config(): func = funcd.get(arg) if func is None: raise SystemExit('Do not know how to handle %s; valid args are %s' % ( - arg, funcd.keys())) + arg, list(funcd.keys()))) func() else: small_docs = False diff --git a/vb_suite/perf_HEAD.py b/vb_suite/perf_HEAD.py index 4ecaa3b9c8185..b9f859942d63f 100755 --- a/vb_suite/perf_HEAD.py +++ b/vb_suite/perf_HEAD.py @@ -71,7 +71,7 @@ def dump_as_gist(data, desc="The Commit", njobs=None): print("\n\n" + "-" * 80) gist = json.loads(r.read()) - file_raw_url = gist['files'].items()[0][1]['raw_url'] + file_raw_url = list(gist['files'].items())[0][1]['raw_url'] print("[vbench-gist-raw_url] %s" % file_raw_url) print("[vbench-html-url] %s" % gist['html_url']) print("[vbench-api-url] %s" % gist['url']) diff --git a/vb_suite/test_perf.py b/vb_suite/test_perf.py index 7428bbb07df99..d7a5b9d3e4049 100755 --- a/vb_suite/test_perf.py +++ b/vb_suite/test_perf.py @@ -144,7 +144,7 @@ def get_results_df(db, rev): # Sinch vbench.db._reg_rev_results returns an unlabeled dict, # we have to break encapsulation a bit. - results.columns = db._results.c.keys() + results.columns = list(db._results.c.keys()) results = results.join(bench['name'], on='checksum').set_index("checksum") return results From b83784fb9898e2877161f2ccd15c356a0af594e4 Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Sat, 27 Jul 2013 21:52:33 -0400 Subject: [PATCH 05/11] CLN/ENH: Add list versions of iterator methods. py3compat now has lrange, lzip, lmap, lreduce and lfilter, which work like the python builtins and produce lists (to make it more clear where looking for iterators vs. containers). --- bench/alignment.py | 6 +- bench/bench_merge.py | 8 +- bench/bench_merge_sqlite.py | 3 +- bench/bench_take_indexing.py | 2 +- bench/bench_unique.py | 2 +- bench/better_unique.py | 2 +- bench/serialize.py | 4 +- bench/test.py | 2 +- doc/sphinxext/comment_eater.py | 2 +- doc/sphinxext/compiler_unparse.py | 2 +- doc/sphinxext/ipython_directive.py | 8 +- doc/sphinxext/plot_directive.py | 4 +- examples/finance.py | 3 +- pandas/compat/scipy.py | 6 +- pandas/core/common.py | 2 +- pandas/core/config.py | 4 +- pandas/core/format.py | 8 +- pandas/core/frame.py | 23 +- pandas/core/generic.py | 2 +- pandas/core/groupby.py | 14 +- pandas/core/index.py | 15 +- pandas/core/indexing.py | 2 +- pandas/core/internals.py | 10 +- pandas/core/nanops.py | 2 +- pandas/core/panel.py | 12 +- pandas/core/panelnd.py | 2 +- pandas/core/reshape.py | 2 +- pandas/core/series.py | 4 +- pandas/core/strings.py | 2 +- pandas/io/data.py | 10 +- pandas/io/excel.py | 6 +- pandas/io/ga.py | 2 +- pandas/io/html.py | 12 +- pandas/io/parsers.py | 17 +- pandas/io/pickle.py | 2 +- pandas/io/pytables.py | 8 +- pandas/io/sql.py | 8 +- pandas/io/stata.py | 12 +- pandas/io/tests/generate_legacy_pickles.py | 2 +- pandas/io/tests/test_cparser.py | 2 +- pandas/io/tests/test_excel.py | 2 +- pandas/io/tests/test_html.py | 2 +- pandas/io/tests/test_json/test_pandas.py | 11 +- pandas/io/tests/test_json/test_ujson.py | 2 +- pandas/io/tests/test_parsers.py | 28 +-- pandas/io/tests/test_pytables.py | 53 ++--- pandas/io/tests/test_sql.py | 8 +- pandas/io/wb.py | 6 +- pandas/rpy/common.py | 2 +- pandas/sparse/frame.py | 8 +- pandas/sparse/panel.py | 6 +- pandas/sparse/tests/test_sparse.py | 10 +- pandas/stats/misc.py | 2 +- pandas/stats/ols.py | 2 +- pandas/stats/tests/test_moments.py | 2 +- pandas/stats/var.py | 12 +- pandas/tests/test_categorical.py | 4 +- pandas/tests/test_common.py | 8 +- pandas/tests/test_format.py | 43 ++-- pandas/tests/test_frame.py | 248 ++++++++++----------- pandas/tests/test_graphics.py | 31 ++- pandas/tests/test_groupby.py | 51 +++-- pandas/tests/test_index.py | 74 +++--- pandas/tests/test_indexing.py | 39 ++-- pandas/tests/test_internals.py | 2 +- pandas/tests/test_multilevel.py | 29 ++- pandas/tests/test_panel.py | 26 +-- pandas/tests/test_panel4d.py | 10 +- pandas/tests/test_py3compat.py | 70 ++++++ pandas/tests/test_reshape.py | 2 +- pandas/tests/test_series.py | 82 ++++--- pandas/tests/test_strings.py | 4 +- pandas/tests/test_tseries.py | 12 +- pandas/tools/merge.py | 8 +- pandas/tools/pivot.py | 6 +- pandas/tools/plotting.py | 22 +- pandas/tools/rplot.py | 2 +- pandas/tools/tests/test_merge.py | 32 +-- pandas/tools/tests/test_tile.py | 2 +- pandas/tools/tile.py | 2 +- pandas/tseries/converter.py | 4 +- pandas/tseries/frequencies.py | 2 +- pandas/tseries/period.py | 2 +- pandas/tseries/tests/test_period.py | 16 +- pandas/tseries/tests/test_plotting.py | 8 +- pandas/tseries/tests/test_resample.py | 6 +- pandas/tseries/tests/test_timeseries.py | 30 ++- pandas/tseries/tests/test_timezones.py | 10 +- pandas/tseries/util.py | 8 +- pandas/util/compat.py | 2 +- pandas/util/counter.py | 2 +- pandas/util/py3compat.py | 52 +++-- pandas/util/testing.py | 14 +- scripts/bench_join.py | 10 +- scripts/bench_join_multi.py | 10 +- scripts/find_commits_touching_func.py | 6 +- scripts/json_manip.py | 2 +- vb_suite/groupby.py | 6 +- vb_suite/pandas_vb_common.py | 2 +- vb_suite/parser.py | 4 +- vb_suite/test_perf.py | 2 +- 101 files changed, 735 insertions(+), 667 deletions(-) create mode 100644 pandas/tests/test_py3compat.py diff --git a/bench/alignment.py b/bench/alignment.py index a5ffe96140117..1f32064db7016 100644 --- a/bench/alignment.py +++ b/bench/alignment.py @@ -1,5 +1,5 @@ # Setup -from pandas.util.py3compat import range +from pandas.util.py3compat import range, lrange import numpy as np import pandas import la @@ -7,8 +7,8 @@ K = 50 arr1 = np.random.randn(N, K) arr2 = np.random.randn(N, K) -idx1 = list(range(N)) -idx2 = list(range(K)) +idx1 = lrange(N) +idx2 = lrange(K) # pandas dma1 = pandas.DataFrame(arr1, idx1, idx2) diff --git a/bench/bench_merge.py b/bench/bench_merge.py index 7820c7792afc4..c4f595eb05cb9 100644 --- a/bench/bench_merge.py +++ b/bench/bench_merge.py @@ -1,6 +1,6 @@ from pandas import * from pandas.util.testing import rands -from pandas.util.py3compat import range +from pandas.util.py3compat import range, lrange import random N = 10000 @@ -8,7 +8,7 @@ def get_test_data(ngroups=100, n=N): - unique_groups = list(range(ngroups)) + unique_groups = lrange(ngroups) arr = np.asarray(np.tile(unique_groups, n / ngroups), dtype=object) if len(arr) < n: @@ -66,7 +66,7 @@ def get_test_data(ngroups=100, n=N): # R results -from pandas.util.py3compat import StringIO +from pandas.util.py3compat import StringIO, lrange # many to one r_results = read_table(StringIO(""" base::merge plyr data.table inner 0.2475 0.1183 0.1100 @@ -94,7 +94,7 @@ def get_test_data(ngroups=100, n=N): # many to many -from pandas.util.py3compat import StringIO +from pandas.util.py3compat import StringIO, lrange # many to one r_results = read_table(StringIO("""base::merge plyr data.table inner 0.4610 0.1276 0.1269 diff --git a/bench/bench_merge_sqlite.py b/bench/bench_merge_sqlite.py index e15a482f39c50..cc2e3197109ce 100644 --- a/bench/bench_merge_sqlite.py +++ b/bench/bench_merge_sqlite.py @@ -4,8 +4,7 @@ import time from pandas import DataFrame from pandas.util.testing import rands -from pandas.util.py3compat import range -from six.moves import zip +from pandas.util.py3compat import range, zip import random N = 10000 diff --git a/bench/bench_take_indexing.py b/bench/bench_take_indexing.py index b6a7b04eb8adf..51a8e6441ff58 100644 --- a/bench/bench_take_indexing.py +++ b/bench/bench_take_indexing.py @@ -6,7 +6,7 @@ from pandas import DataFrame import timeit -from six.moves import zip +from pandas.util.py3compat import zip setup = """ from pandas import Series diff --git a/bench/bench_unique.py b/bench/bench_unique.py index 8a24630632684..8ede875b25e97 100644 --- a/bench/bench_unique.py +++ b/bench/bench_unique.py @@ -2,7 +2,7 @@ from pandas import * from pandas.util.testing import rands from pandas.util.py3compat import range -from six.moves import zip +from pandas.util.py3compat import zip import pandas._tseries as lib import numpy as np import matplotlib.pyplot as plt diff --git a/bench/better_unique.py b/bench/better_unique.py index f8881ecd7b6bc..f1d8115b1a6d8 100644 --- a/bench/better_unique.py +++ b/bench/better_unique.py @@ -1,7 +1,7 @@ from __future__ import print_function from pandas import DataFrame from pandas.util.py3compat import range -from six.moves import zip +from pandas.util.py3compat import zip import timeit setup = """ diff --git a/bench/serialize.py b/bench/serialize.py index 9c0ba84209c49..bc837622360c7 100644 --- a/bench/serialize.py +++ b/bench/serialize.py @@ -1,5 +1,5 @@ from __future__ import print_function -from pandas.util.py3compat import range +from pandas.util.py3compat import range, lrange import time import os import numpy as np @@ -22,7 +22,7 @@ def roundtrip_archive(N, iterations=10): # Create data arr = np.random.randn(N, N) lar = la.larry(arr) - dma = pandas.DataFrame(arr, list(range(N)), list(range(N))) + dma = pandas.DataFrame(arr, lrange(N), lrange(N)) # filenames filename_numpy = '/Users/wesm/tmp/numpy.npz' diff --git a/bench/test.py b/bench/test.py index 9d47c091b932d..3008fc67ade00 100644 --- a/bench/test.py +++ b/bench/test.py @@ -3,7 +3,7 @@ import itertools import collections import scipy.ndimage as ndi -from six.moves import zip +from pandas.util.py3compat import zip N = 10000 diff --git a/doc/sphinxext/comment_eater.py b/doc/sphinxext/comment_eater.py index 3b15bd178eade..6d216162a00e7 100755 --- a/doc/sphinxext/comment_eater.py +++ b/doc/sphinxext/comment_eater.py @@ -1,4 +1,4 @@ -from six.moves import cStringIO +from pandas.util.py3compat import cStringIO import compiler import inspect import textwrap diff --git a/doc/sphinxext/compiler_unparse.py b/doc/sphinxext/compiler_unparse.py index 240dd17243ef6..0fa3983abdd8a 100755 --- a/doc/sphinxext/compiler_unparse.py +++ b/doc/sphinxext/compiler_unparse.py @@ -12,7 +12,7 @@ """ import sys -from six.moves import cStringIO as StringIO +from pandas.util.py3compat import cStringIO as StringIO from compiler.ast import Const, Name, Tuple, Div, Mul, Sub, Add def unparse(ast, single_line_functions=False): diff --git a/doc/sphinxext/ipython_directive.py b/doc/sphinxext/ipython_directive.py index b74808f0e73d1..195875047e73d 100644 --- a/doc/sphinxext/ipython_directive.py +++ b/doc/sphinxext/ipython_directive.py @@ -58,8 +58,8 @@ #----------------------------------------------------------------------------- # Stdlib -from pandas.util.py3compat import range -from six.moves import map, cStringIO as StringIO +from pandas.util.py3compat import range, lmap +from pandas.util.py3compat import map, cStringIO as StringIO import ast import os import re @@ -72,7 +72,7 @@ from docutils import nodes from sphinx.util.compat import Directive import six -from six.moves import zip +from pandas.util.py3compat import zip matplotlib.use('Agg') @@ -303,7 +303,7 @@ def process_input(self, data, input_prompt, lineno): def _remove_first_space_if_any(line): return line[1:] if line.startswith(' ') else line - input_lines = list(map(_remove_first_space_if_any, input.split('\n'))) + input_lines = lmap(_remove_first_space_if_any, input.split('\n')) self.datacontent = data diff --git a/doc/sphinxext/plot_directive.py b/doc/sphinxext/plot_directive.py index 795410380f335..9c648f474ceea 100755 --- a/doc/sphinxext/plot_directive.py +++ b/doc/sphinxext/plot_directive.py @@ -77,11 +77,11 @@ from pandas.util.py3compat import range import sys, os, glob, shutil, imp, warnings, re, textwrap, traceback -from six.moves import cStringIO as StringIO +from pandas.util.py3compat import cStringIO as StringIO import sphinx import warnings -from six.moves import map +from pandas.util.py3compat import map warnings.warn("A plot_directive module is also available under " "matplotlib.sphinxext; expect this numpydoc.plot_directive " "module to be deprecated after relevant features have been " diff --git a/examples/finance.py b/examples/finance.py index 069f299d585bb..a8fb580f954a3 100644 --- a/examples/finance.py +++ b/examples/finance.py @@ -3,6 +3,7 @@ """ from datetime import datetime +from pandas.util.py3compat import zip import matplotlib.finance as fin import numpy as np @@ -19,7 +20,7 @@ def getQuotes(symbol, start, end): quotes = fin.quotes_historical_yahoo(symbol, start, end) - dates, open, close, high, low, volume = list(zip(*quotes)) + dates, open, close, high, low, volume = zip(*quotes) data = { 'open': open, diff --git a/pandas/compat/scipy.py b/pandas/compat/scipy.py index 26a70963d5bcc..53436c517f480 100644 --- a/pandas/compat/scipy.py +++ b/pandas/compat/scipy.py @@ -2,7 +2,7 @@ Shipping functions from SciPy to reduce dependency on having SciPy installed """ -from pandas.util.py3compat import range +from pandas.util.py3compat import range, lrange import numpy as np @@ -224,9 +224,9 @@ def percentileofscore(a, score, kind='rank'): if kind == 'rank': if not(np.any(a == score)): a = np.append(a, score) - a_len = np.array(list(range(len(a)))) + a_len = np.array(lrange(len(a))) else: - a_len = np.array(list(range(len(a)))) + 1.0 + a_len = np.array(lrange(len(a))) + 1.0 a = np.sort(a) idx = [a == score] diff --git a/pandas/core/common.py b/pandas/core/common.py index f4ae9e9c84053..3af0d7dba1699 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -16,7 +16,7 @@ from pandas.util import py3compat from pandas.util.py3compat import StringIO, BytesIO, range, long -from six.moves import zip, map +from pandas.util.py3compat import zip, map import six diff --git a/pandas/core/config.py b/pandas/core/config.py index 34dd2b744d25b..26fda8d3d926d 100644 --- a/pandas/core/config.py +++ b/pandas/core/config.py @@ -53,7 +53,7 @@ from collections import namedtuple import warnings import six -from six.moves import map +from pandas.util.py3compat import map, lmap DeprecatedOption = namedtuple('DeprecatedOption', 'key msg rkey removal_ver') RegisteredOption = namedtuple( @@ -746,7 +746,7 @@ def is_one_of_factory(legal_values): def inner(x): from pandas.core.common import pprint_thing as pp if not x in legal_values: - pp_values = list(map(pp, legal_values)) + pp_values = lmap(pp, legal_values) raise ValueError("Value must be one of %s" % pp("|".join(pp_values))) return inner diff --git a/pandas/core/format.py b/pandas/core/format.py index 8676d9a5447a5..150eade61c0ff 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -1,13 +1,11 @@ from __future__ import print_function # pylint: disable=W0141 -from pandas.util.py3compat import range from pandas.util import compat import sys import six -from six.moves import map, zip, reduce -from pandas.util.py3compat import StringIO +from pandas.util.py3compat import StringIO, lzip, range, map, zip, reduce from pandas.core.common import adjoin, isnull, notnull from pandas.core.index import Index, MultiIndex, _ensure_index from pandas.util import py3compat @@ -419,7 +417,7 @@ def is_numeric_dtype(dtype): if isinstance(self.columns, MultiIndex): fmt_columns = self.columns.format(sparsify=False, adjoin=False) - fmt_columns = list(zip(*fmt_columns)) + fmt_columns = lzip(*fmt_columns) dtypes = self.frame.dtypes.values need_leadsp = dict(zip(fmt_columns, map(is_numeric_dtype, dtypes))) str_columns = list(zip(*[[' ' + y @@ -718,7 +716,7 @@ def _write_hierarchical_rows(self, fmt_values, indent): idx_values = frame.index.format(sparsify=False, adjoin=False, names=False) - idx_values = list(zip(*idx_values)) + idx_values = lzip(*idx_values) if self.fmt.sparsify: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1c2ff4130d5c0..827ebc53d97fb 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -12,9 +12,7 @@ # pylint: disable=E1101,E1103 # pylint: disable=W0212,W0231,W0703,W0622 -from six.moves import zip -from pandas.util.py3compat import StringIO -from pandas.util.py3compat import range +from pandas.util.py3compat import range, zip, lrange, lmap, lzip, StringIO from pandas.util import compat import operator import sys @@ -59,7 +57,6 @@ from pandas.core.config import get_option, set_option import six -from six.moves import map #---------------------------------------------------------------------- # Docstring templates @@ -1151,7 +1148,7 @@ def to_records(self, index=True, convert_datetime64=True): else: if isinstance(self.index, MultiIndex): # array of tuples to numpy cols. copy copy copy - ix_vals = list(map(np.array,zip(*self.index.values))) + ix_vals = lmap(np.array,zip(*self.index.values)) else: ix_vals = [self.index.values] @@ -1166,10 +1163,10 @@ def to_records(self, index=True, convert_datetime64=True): count += 1 elif index_names[0] is None: index_names = ['index'] - names = index_names + list(map(str, self.columns)) + names = index_names + lmap(str, self.columns) else: arrays = [self[c].values for c in self.columns] - names = list(map(str, self.columns)) + names = lmap(str, self.columns) dtype = np.dtype([(x, v.dtype) for x, v in zip(names, arrays)]) return np.rec.fromarrays(arrays, dtype=dtype, names=names) @@ -1197,7 +1194,7 @@ def from_items(cls, items, columns=None, orient='columns'): ------- frame : DataFrame """ - keys, values = list(zip(*items)) + keys, values = lzip(*items) if orient == 'columns': if columns is not None: @@ -2911,7 +2908,7 @@ def _maybe_cast(values, labels=None): if not drop: names = self.index.names - zipped = list(zip(self.index.levels, self.index.labels)) + zipped = lzip(self.index.levels, self.index.labels) multi_col = isinstance(self.columns, MultiIndex) for i, (lev, lab) in reversed(list(enumerate(zipped))): @@ -4536,7 +4533,7 @@ def _apply_broadcast(self, func, axis): def applymap(self, func): """ Apply a function to a DataFrame that is intended to operate - elementwise, i.e. like doing list(map(func, series)) for each series in the + elementwise, i.e. like doing map(func, series) for each series in the DataFrame Parameters @@ -4889,7 +4886,7 @@ def pretty_name(x): series.min(), series.quantile(lb), series.median(), series.quantile(ub), series.max()]) - return self._constructor(list(map(list, zip(*destat))), index=destat_columns, + return self._constructor(lmap(list, zip(*destat)), index=destat_columns, columns=numdata.columns) #---------------------------------------------------------------------- @@ -5850,7 +5847,7 @@ def _to_arrays(data, columns, coerce_float=False, dtype=None): return arrays, columns else: # last ditch effort - data = list(map(tuple, data)) + data = lmap(tuple, data) return _list_to_arrays(data, columns, coerce_float=coerce_float, dtype=dtype) @@ -5924,7 +5921,7 @@ def _convert_object_array(content, columns, coerce_float=False, dtype=None): def _get_names_from_index(data): - index = list(range(len(data))) + index = lrange(len(data)) has_some_name = any([s.name is not None for s in data]) if not has_some_name: return index diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 2dce7430c144c..0cf9a066e11bd 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11,7 +11,7 @@ from pandas.tseries.index import DatetimeIndex import pandas.core.common as com import six -from six.moves import map, zip +from pandas.util.py3compat import map, zip class PandasError(Exception): diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index d465ce8b1575f..f65d1073017a5 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -2,10 +2,10 @@ import numpy as np import six -from pandas.util.py3compat import range, long +from pandas.util.py3compat import range, long, lrange, lzip from pandas.util.compat import OrderedDict from pandas.util import compat -from six.moves import zip, builtins +from pandas.util.py3compat import zip, builtins from pandas.core.base import PandasObject from pandas.core.categorical import Categorical @@ -675,7 +675,7 @@ def groups(self): if len(self.groupings) == 1: return self.groupings[0].groups else: - to_groupby = list(zip(*(ping.grouper for ping in self.groupings))) + to_groupby = lzip(*(ping.grouper for ping in self.groupings)) to_groupby = Index(to_groupby) return self.axis.groupby(to_groupby) @@ -1017,13 +1017,13 @@ def get_iterator(self, data, axis=0): else: start = 0 for edge, label in zip(self.bins, self.binlabels): - inds = list(range(start, edge)) + inds = lrange(start, edge) yield label, data.take(inds, axis=axis) start = edge n = len(data.axes[axis]) if start < n: - inds = list(range(start, n)) + inds = lrange(start, n) yield self.binlabels[-1], data.take(inds, axis=axis) def apply(self, f, data, axis=0, keep_internal=False): @@ -1445,7 +1445,7 @@ def _aggregate_multiple_funcs(self, arg): for x in arg] # indicated column order - columns = list(zip(*arg))[0] + columns = lzip(*arg)[0] else: # list of functions / function names columns = [] @@ -1454,7 +1454,7 @@ def _aggregate_multiple_funcs(self, arg): columns.append(f) else: columns.append(f.__name__) - arg = list(zip(columns, arg)) + arg = lzip(columns, arg) results = {} diff --git a/pandas/core/index.py b/pandas/core/index.py index 8b2f420d83a2d..c54aa895f668e 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -1,7 +1,6 @@ # pylint: disable=E1101,E1103,W0232 -from pandas.util.py3compat import range -from six.moves import zip +from pandas.util.py3compat import range, zip, lrange, lzip import six from pandas.util import compat import numpy as np @@ -1802,7 +1801,7 @@ def from_tuples(cls, tuples, sortorder=None, names=None): elif isinstance(tuples, list): arrays = list(lib.to_object_array_tuples(tuples).T) else: - arrays = list(zip(*tuples)) + arrays = lzip(*tuples) return MultiIndex.from_arrays(arrays, sortorder=sortorder, names=names) @@ -1942,7 +1941,7 @@ def drop(self, labels, level=None): if isinstance(loc, int): inds.append(loc) else: - inds.extend(list(range(loc.start, loc.stop))) + inds.extend(lrange(loc.start, loc.stop)) return self.delete(inds) @@ -2490,7 +2489,7 @@ def union(self, other): result_names = self.names if self.names == other.names else None uniq_tuples = lib.fast_unique_multiple([self.values, other.values]) - return MultiIndex.from_arrays(list(zip(*uniq_tuples)), sortorder=0, + return MultiIndex.from_arrays(lzip(*uniq_tuples), sortorder=0, names=result_names) def intersection(self, other): @@ -2520,7 +2519,7 @@ def intersection(self, other): labels=[[]] * self.nlevels, names=result_names) else: - return MultiIndex.from_arrays(list(zip(*uniq_tuples)), sortorder=0, + return MultiIndex.from_arrays(lzip(*uniq_tuples), sortorder=0, names=result_names) def diff(self, other): @@ -2637,7 +2636,7 @@ def _wrap_joined_index(self, joined, other): # For utility purposes def _sparsify(label_list, start=0,sentinal=''): - pivoted = list(zip(*label_list)) + pivoted = lzip(*label_list) k = len(label_list) result = pivoted[:start + 1] @@ -2661,7 +2660,7 @@ def _sparsify(label_list, start=0,sentinal=''): prev = cur - return list(zip(*result)) + return lzip(*result) def _ensure_index(index_like): diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index cb841169d88ab..1518aa3c94efe 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -4,7 +4,7 @@ from pandas.core.common import _asarray_tuplesafe from pandas.core.index import Index, MultiIndex, _ensure_index from pandas.util.py3compat import range -from six.moves import zip +from pandas.util.py3compat import zip import pandas.core.common as com import six import pandas.lib as lib diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 37aa4e4ca27ec..e2b3131fac9ce 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -19,8 +19,8 @@ from pandas.tslib import Timestamp from pandas.util import py3compat -from pandas.util.py3compat import range -from six.moves import map, zip +from pandas.util.py3compat import range, lrange, lmap +from pandas.util.py3compat import map, zip class Block(PandasObject): @@ -833,7 +833,7 @@ def re_replacer(s): f = np.vectorize(re_replacer, otypes=[self.dtype]) try: - filt = list(map(self.items.get_loc, filter)) + filt = lmap(self.items.get_loc, filter) except TypeError: filt = slice(None) @@ -1925,7 +1925,7 @@ def _add_new_block(self, item, value, loc=None): # need to shift elements to the right if self._ref_locs[loc] is not None: - for i in reversed(list(range(loc+1,len(self._ref_locs)))): + for i in reversed(lrange(loc+1,len(self._ref_locs))): self._ref_locs[i] = self._ref_locs[i-1] self._ref_locs[loc] = (new_block, 0) @@ -2535,5 +2535,5 @@ def _possibly_convert_to_indexer(loc): if com._is_bool_indexer(loc): loc = [i for i, v in enumerate(loc) if v] elif isinstance(loc,slice): - loc = list(range(loc.start,loc.stop)) + loc = lrange(loc.start,loc.stop) return loc diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 699d0ac21823f..937235730ba6e 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -12,7 +12,7 @@ import pandas.hashtable as _hash import pandas.tslib as tslib -from six.moves import builtins +from pandas.util.py3compat import builtins import six diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 63c5548833643..35e5d1e2327aa 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -3,7 +3,7 @@ """ # pylint: disable=E1103,W0231,W0212,W0621 -from pandas.util.py3compat import range +from pandas.util.py3compat import range, lrange, lmap from pandas.util import compat import operator import sys @@ -28,7 +28,7 @@ import pandas.core.nanops as nanops import pandas.lib as lib import six -from six.moves import map, zip +from pandas.util.py3compat import map, zip def _ensure_like_indices(time, panels): @@ -808,13 +808,13 @@ def _reindex_multi(self, items, major, minor): new_minor, indexer2 = self.minor_axis.reindex(minor) if indexer0 is None: - indexer0 = list(range(len(new_items))) + indexer0 = lrange(len(new_items)) if indexer1 is None: - indexer1 = list(range(len(new_major))) + indexer1 = lrange(len(new_major)) if indexer2 is None: - indexer2 = list(range(len(new_minor))) + indexer2 = lrange(len(new_minor)) for i, ind in enumerate(indexer0): com.take_2d_multi(values[ind], (indexer1, indexer2), @@ -1141,7 +1141,7 @@ def transpose(self, *args, **kwargs): for a in self._AXIS_ORDERS: if not a in kwargs: - where = list(map(a.startswith, aliases)) + where = lmap(a.startswith, aliases) if any(where): if sum(where) != 1: diff --git a/pandas/core/panelnd.py b/pandas/core/panelnd.py index 3981850d9f861..71d815482fcc5 100644 --- a/pandas/core/panelnd.py +++ b/pandas/core/panelnd.py @@ -1,7 +1,7 @@ """ Factory methods to create N-D panels """ import pandas.lib as lib -from six.moves import zip +from pandas.util.py3compat import zip import six diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index a89f5f2706401..136f4893008e3 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -3,7 +3,7 @@ from pandas.util.py3compat import range from pandas.util import compat -from six.moves import zip +from pandas.util.py3compat import zip import six import itertools diff --git a/pandas/core/series.py b/pandas/core/series.py index c8075e223df4f..b25afe5d9f7f1 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -6,7 +6,6 @@ # pylint: disable=W0703,W0622,W0613,W0201 from pandas.util import compat -from six.moves import zip import operator from distutils.version import LooseVersion import types @@ -28,6 +27,7 @@ from pandas.tseries.period import PeriodIndex, Period from pandas.util import py3compat from pandas.util.terminal import get_terminal_size +from pandas.util.py3compat import zip, lzip import pandas.core.array as pa @@ -1219,7 +1219,7 @@ def iteritems(self): """ Lazily iterate over (index, value) tuples """ - return list(zip(iter(self.index), iter(self))) + return lzip(iter(self.index), iter(self)) iterkv = iteritems if py3compat.PY3: # pragma: no cover diff --git a/pandas/core/strings.py b/pandas/core/strings.py index dbf50aef59552..c625438dfe203 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -1,6 +1,6 @@ import numpy as np -from six.moves import zip +from pandas.util.py3compat import zip import six from pandas.core.common import isnull from pandas.core.series import Series diff --git a/pandas/io/data.py b/pandas/io/data.py index 90e0469987806..04d6cbc956d70 100644 --- a/pandas/io/data.py +++ b/pandas/io/data.py @@ -12,14 +12,14 @@ import numpy as np -from pandas.util.py3compat import StringIO, bytes_to_str, range +from pandas.util.py3compat import StringIO, bytes_to_str, range, lrange, lmap from pandas import Panel, DataFrame, Series, read_csv, concat from pandas.core.common import PandasError from pandas.io.parsers import TextParser from pandas.io.common import urlopen, ZipFile, urlencode from pandas.util.testing import _network_error_classes import six -from six.moves import map, zip +from pandas.util.py3compat import map, zip class SymbolWarning(UserWarning): @@ -465,7 +465,7 @@ def get_data_famafrench(name): with ZipFile(tmpf, 'r') as zf: data = zf.open(name + '.txt').readlines() - line_lengths = np.array(list(map(len, data))) + line_lengths = np.array(lmap(len, data)) file_edges = np.where(line_lengths == 2)[0] datasets = {} @@ -473,7 +473,7 @@ def get_data_famafrench(name): for i, (left_edge, right_edge) in enumerate(edges): dataset = [d.split() for d in data[left_edge:right_edge]] if len(dataset) > 10: - ncol_raw = np.array(list(map(len, dataset))) + ncol_raw = np.array(lmap(len, dataset)) ncol = np.median(ncol_raw) header_index = np.where(ncol_raw == ncol - 1)[0][-1] header = dataset[header_index] @@ -809,7 +809,7 @@ def get_forward_data(self, months, call=True, put=False, near=False, data : dict of str, DataFrame """ warnings.warn("get_forward_data() is deprecated", FutureWarning) - in_months = list(range(CUR_MONTH, CUR_MONTH + months + 1)) + in_months = lrange(CUR_MONTH, CUR_MONTH + months + 1) in_years = [CUR_YEAR] * (months + 1) # Figure out how many items in in_months go past 12 diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 132b1549eba33..f592d80f33216 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -11,8 +11,8 @@ from pandas.io.parsers import TextParser from pandas.tseries.period import Period from pandas import json -from six.moves import map, zip, reduce -from pandas.util.py3compat import range +from pandas.util.py3compat import map, zip, reduce +from pandas.util.py3compat import range, lrange import six def read_excel(path_or_buf, sheetname, kind=None, **kwds): @@ -155,7 +155,7 @@ def _excel2num(x): for rng in areas.split(','): if ':' in rng: rng = rng.split(':') - cols += list(range(_excel2num(rng[0]), _excel2num(rng[1]) + 1)) + cols += lrange(_excel2num(rng[0]), _excel2num(rng[1]) + 1) else: cols.append(_excel2num(rng)) return cols diff --git a/pandas/io/ga.py b/pandas/io/ga.py index b0db040b00ed2..74157464b21de 100644 --- a/pandas/io/ga.py +++ b/pandas/io/ga.py @@ -18,7 +18,7 @@ from apiclient.errors import HttpError from oauth2client.client import AccessTokenRefreshError import six -from six.moves import zip +from pandas.util.py3compat import zip TYPE_MAP = {six.u('INTEGER'): int, six.u('FLOAT'): float, six.u('TIME'): int} diff --git a/pandas/io/html.py b/pandas/io/html.py index 9805c194db994..2617566add5a8 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -14,10 +14,10 @@ from pandas import DataFrame, MultiIndex, isnull from pandas.io.common import _is_url, urlopen, parse_url -from pandas.util.py3compat import range +from pandas.util.py3compat import range, lrange, lmap from pandas.util import compat import six -from six.moves import map +from pandas.util.py3compat import map try: @@ -93,9 +93,9 @@ def _get_skiprows_iter(skiprows): A proper iterator to use to skip rows of a DataFrame. """ if isinstance(skiprows, slice): - return list(range(skiprows.start or 0, skiprows.stop, skiprows.step or 1)) + return lrange(skiprows.start or 0, skiprows.stop, skiprows.step or 1) elif isinstance(skiprows, numbers.Integral): - return list(range(skiprows)) + return lrange(skiprows) elif isinstance(skiprows, collections.Container): return skiprows else: @@ -345,14 +345,14 @@ def _parse_raw_thead(self, table): thead = self._parse_thead(table) res = [] if thead: - res = list(map(self._text_getter, self._parse_th(thead[0]))) + res = lmap(self._text_getter, self._parse_th(thead[0])) return np.array(res).squeeze() if res and len(res) == 1 else res def _parse_raw_tfoot(self, table): tfoot = self._parse_tfoot(table) res = [] if tfoot: - res = list(map(self._text_getter, self._parse_td(tfoot[0]))) + res = lmap(self._text_getter, self._parse_td(tfoot[0])) return np.array(res).squeeze() if res and len(res) == 1 else res def _parse_raw_tbody(self, table): diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 760d14467421a..85d5ad0d39afb 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2,8 +2,7 @@ Module contains tools for processing files into DataFrames or other objects """ from __future__ import print_function -from pandas.util.py3compat import StringIO -from pandas.util.py3compat import range +from pandas.util.py3compat import range, lrange, StringIO, lzip from pandas.util import compat import re import csv @@ -26,7 +25,7 @@ import pandas.parser as _parser from pandas.tseries.period import Period import six -from six.moves import zip +from pandas.util.py3compat import zip _parser_params = """Also supports optionally iterating or breaking of the file into chunks. @@ -562,7 +561,7 @@ def _clean_options(self, options, engine): na_values, na_fvalues = _clean_na_values(na_values, keep_default_na) if com.is_integer(skiprows): - skiprows = list(range(skiprows)) + skiprows = lrange(skiprows) skiprows = set() if skiprows is None else set(skiprows) # put stuff back @@ -731,7 +730,7 @@ def _extract_multi_indexer_columns(self, header, index_names, col_names, passed_ field_count = len(header[0]) def extract(r): return tuple([ r[i] for i in range(field_count) if i not in sic ]) - columns = list(zip(*[ extract(r) for r in header ])) + columns = lzip(*[ extract(r) for r in header ]) names = ic + columns # if we find 'Unnamed' all of a single level, then our header was too long @@ -980,7 +979,7 @@ def __init__(self, src, **kwds): self.names = ['X%d' % i for i in range(self._reader.table_width)] else: - self.names = list(range(self._reader.table_width)) + self.names = lrange(self._reader.table_width) # XXX self._set_noconvert_columns() @@ -1454,7 +1453,7 @@ def _infer_columns(self): if self.prefix: columns = [ ['X%d' % i for i in range(ncols)] ] else: - columns = [ list(range(ncols)) ] + columns = [ lrange(ncols) ] else: columns = [ names ] @@ -1552,7 +1551,7 @@ def _get_index_name(self, columns): # column and index names on diff rows implicit_first_cols = 0 - self.index_col = list(range(len(line))) + self.index_col = lrange(len(line)) self.buf = self.buf[1:] for c in reversed(line): @@ -1563,7 +1562,7 @@ def _get_index_name(self, columns): if implicit_first_cols > 0: self._implicit_index = True if self.index_col is None: - self.index_col = list(range(implicit_first_cols)) + self.index_col = lrange(implicit_first_cols) index_name = None else: diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index b1dee20e6ef04..56bca476c04c6 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -1,4 +1,4 @@ -from six.moves import cPickle as pkl +from pandas.util.py3compat import cPickle as pkl def to_pickle(obj, path): """ diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index f0392836650b8..9080919421c97 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -6,7 +6,7 @@ # pylint: disable-msg=E1101,W0613,W0603 from datetime import datetime, date -from pandas.util.py3compat import range +from pandas.util.py3compat import range, lrange, lmap from pandas.util import compat import time import re @@ -39,7 +39,7 @@ from contextlib import contextmanager import six -from six.moves import map, zip +from pandas.util.py3compat import map, zip # versioning attribute _version = '0.10.1' @@ -3127,7 +3127,7 @@ def delete(self, where=None, **kwargs): # we must remove in reverse order! pg = groups.pop() for g in reversed(groups): - rows = l.take(list(range(g, pg))) + rows = l.take(lrange(g, pg)) table.removeRows(start=rows[rows.index[0] ], stop=rows[rows.index[-1]] + 1) pg = g @@ -3547,7 +3547,7 @@ def __init__(self, field, op=None, value=None, queryables=None, encoding=None): self.eval() def __unicode__(self): - attrs = list(map(pprint_thing, (self.field, self.op, self.value))) + attrs = lmap(pprint_thing, (self.field, self.op, self.value)) return "field->%s,op->%s,value->%s" % tuple(attrs) @property diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 16ccafcd14dd9..c5111c77cc4a1 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -5,13 +5,13 @@ from __future__ import print_function from datetime import datetime, date -from pandas.util.py3compat import range +from pandas.util.py3compat import range, lzip import numpy as np import traceback from pandas.core.datetools import format as date_format from pandas.core.api import DataFrame, isnull -from six.moves import map, zip +from pandas.util.py3compat import map, zip import six #------------------------------------------------------------------------------ @@ -108,7 +108,7 @@ def tquery(sql, con=None, cur=None, retry=True): if result and len(result[0]) == 1: # python 3 compat - result = list(list(zip(*result))[0]) + result = list(lzip(*result)[0]) elif result is None: # pragma: no cover result = [] @@ -293,7 +293,7 @@ def get_schema(frame, name, flavor, keys=None): lookup_type = lambda dtype: get_sqltype(dtype.type, flavor) # Replace spaces in DataFrame column names with _. safe_columns = [s.replace(' ', '_').strip() for s in frame.dtypes.index] - column_types = list(zip(safe_columns, map(lookup_type, frame.dtypes))) + column_types = lzip(safe_columns, map(lookup_type, frame.dtypes)) if flavor == 'sqlite': columns = ',\n '.join('[%s] %s' % x for x in column_types) else: diff --git a/pandas/io/stata.py b/pandas/io/stata.py index f76a6f1540670..338c6e1ac02f5 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -21,7 +21,7 @@ import datetime from pandas.util import py3compat from pandas.util import compat -from pandas.util.py3compat import StringIO, long +from pandas.util.py3compat import StringIO, long, lrange, lmap, lzip from pandas import isnull from pandas.io.parsers import _parser_params, Appender from pandas.io.common import get_filepath_or_buffer @@ -226,7 +226,7 @@ def __init__(self, encoding): # we're going to drop the label and cast to int self.DTYPE_MAP = \ dict( - list(zip(range(1, 245), ['a' + str(i) for i in range(1, 245)])) + + lzip(range(1, 245), ['a' + str(i) for i in range(1, 245)]) + [ (251, np.int16), (252, np.int32), @@ -235,7 +235,7 @@ def __init__(self, encoding): (255, np.float64) ] ) - self.TYPE_MAP = list(range(251)) + list('bhlfd') + self.TYPE_MAP = lrange(251) + list('bhlfd') #NOTE: technically, some of these are wrong. there are more numbers # that can be represented. it's the 27 ABOVE and BELOW the max listed # numeric data type in [U] 12.2.2 of the 11.2 manual @@ -385,7 +385,7 @@ def _calcsize(self, fmt): def _col_size(self, k=None): """Calculate size of a data record.""" if len(self.col_sizes) == 0: - self.col_sizes = list(map(lambda x: self._calcsize(x), self.typlist)) + self.col_sizes = lmap(lambda x: self._calcsize(x), self.typlist) if k is None: return self.col_sizes else: @@ -539,13 +539,13 @@ def data(self, convert_dates=True, convert_categoricals=True, index=None): data[col] = Series(data[col], data[col].index, self.dtyplist[i]) if convert_dates: - cols = np.where(list(map(lambda x: x in _date_formats, self.fmtlist)))[0] + cols = np.where(lmap(lambda x: x in _date_formats, self.fmtlist))[0] for i in cols: col = data.columns[i] data[col] = data[col].apply(_stata_elapsed_date_to_datetime, args=(self.fmtlist[i],)) if convert_categoricals: - cols = np.where(list(map(lambda x: x in six.iterkeys(self.value_label_dict), self.lbllist)))[0] + cols = np.where(lmap(lambda x: x in six.iterkeys(self.value_label_dict), self.lbllist))[0] for i in cols: col = data.columns[i] labeled_data = np.copy(data[col]) diff --git a/pandas/io/tests/generate_legacy_pickles.py b/pandas/io/tests/generate_legacy_pickles.py index 7659b22e4a71b..85052ed2ba1bb 100644 --- a/pandas/io/tests/generate_legacy_pickles.py +++ b/pandas/io/tests/generate_legacy_pickles.py @@ -1,7 +1,7 @@ """ self-contained to write legacy pickle files """ from __future__ import print_function -from six.moves import zip, cPickle as pickle +from pandas.util.py3compat import zip, cPickle as pickle def _create_sp_series(): diff --git a/pandas/io/tests/test_cparser.py b/pandas/io/tests/test_cparser.py index 2063b34c95e57..b3c88611d40b3 100644 --- a/pandas/io/tests/test_cparser.py +++ b/pandas/io/tests/test_cparser.py @@ -31,7 +31,7 @@ from pandas.parser import TextReader import pandas.parser as parser import six -from six.moves import map +from pandas.util.py3compat import map class TestCParser(unittest.TestCase): diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index 251a32cc3a933..7726711def5b2 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -37,7 +37,7 @@ from pandas.parser import OverflowError import six -from six.moves import map +from pandas.util.py3compat import map def _skip_if_no_xlrd(): try: diff --git a/pandas/io/tests/test_html.py b/pandas/io/tests/test_html.py index fc78d630bed5b..3c6848d86abe5 100644 --- a/pandas/io/tests/test_html.py +++ b/pandas/io/tests/test_html.py @@ -14,7 +14,7 @@ import numpy as np from numpy.random import rand from numpy.testing.decorators import slow -from six.moves import map, zip +from pandas.util.py3compat import map, zip try: from importlib import import_module diff --git a/pandas/io/tests/test_json/test_pandas.py b/pandas/io/tests/test_json/test_pandas.py index 36bf0306d4729..f27345b917b3b 100644 --- a/pandas/io/tests/test_json/test_pandas.py +++ b/pandas/io/tests/test_json/test_pandas.py @@ -2,11 +2,10 @@ # pylint: disable-msg=W0612,E1101 from copy import deepcopy from datetime import datetime, timedelta -from pandas.util.py3compat import StringIO -from pandas.util.py3compat import range +from pandas.util.py3compat import range, lrange, StringIO from pandas.util import compat from pandas.io.common import URLError -from six.moves import cPickle as pickle +from pandas.util.py3compat import cPickle as pickle import operator import os import unittest @@ -323,7 +322,7 @@ def _check_all_orients(series, dtype=None): _check_all_orients(self.ts) # dtype - s = Series(list(range(6)), index=['a','b','c','d','e','f']) + s = Series(lrange(6), index=['a','b','c','d','e','f']) _check_all_orients(Series(s, dtype=np.float64), dtype=np.float64) _check_all_orients(Series(s, dtype=np.int), dtype=np.int) @@ -343,7 +342,7 @@ def test_frame_from_json_precise_float(self): def test_typ(self): - s = Series(list(range(6)), index=['a','b','c','d','e','f'], dtype='int64') + s = Series(lrange(6), index=['a','b','c','d','e','f'], dtype='int64') result = read_json(s.to_json(),typ=None) assert_series_equal(result,s) @@ -442,7 +441,7 @@ def test_weird_nested_json(self): def test_doc_example(self): dfj2 = DataFrame(np.random.randn(5, 2), columns=list('AB')) dfj2['date'] = Timestamp('20130101') - dfj2['ints'] = list(range(5)) + dfj2['ints'] = lrange(5) dfj2['bools'] = True dfj2.index = pd.date_range('20130101',periods=5) diff --git a/pandas/io/tests/test_json/test_ujson.py b/pandas/io/tests/test_json/test_ujson.py index 1e5e455dd70d4..cbea04ffb46de 100644 --- a/pandas/io/tests/test_json/test_ujson.py +++ b/pandas/io/tests/test_json/test_ujson.py @@ -20,7 +20,7 @@ from pandas.util import compat import pandas.json as ujson import six -from six.moves import zip +from pandas.util.py3compat import zip import pandas.util.py3compat as py3compat import numpy as np diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py index 5796ea57748b0..eeb34862f0e6a 100644 --- a/pandas/io/tests/test_parsers.py +++ b/pandas/io/tests/test_parsers.py @@ -12,7 +12,7 @@ import numpy as np from pandas import DataFrame, Series, Index, MultiIndex, DatetimeIndex -from pandas.util.py3compat import StringIO, BytesIO, PY3, range, long +from pandas.util.py3compat import StringIO, BytesIO, PY3, range, long, lrange, lmap from pandas.io.common import urlopen, URLError import pandas.io.parsers as parsers from pandas.io.parsers import (read_csv, read_table, read_fwf, @@ -35,7 +35,7 @@ from pandas.parser import OverflowError import six -from six.moves import map +from pandas.util.py3compat import map class ParserTests(object): @@ -609,7 +609,7 @@ def test_nat_parse(self): # GH 3062 df = DataFrame(dict({ - 'A' : np.asarray(list(range(10)),dtype='float64'), + 'A' : np.asarray(lrange(10),dtype='float64'), 'B' : pd.Timestamp('20010101') })) df.iloc[3:6,:] = np.nan @@ -639,7 +639,7 @@ def test_skiprows_bug(self): 1/2/2000,4,5,6 1/3/2000,7,8,9 """ - data = self.read_csv(StringIO(text), skiprows=list(range(6)), header=None, + data = self.read_csv(StringIO(text), skiprows=lrange(6), header=None, index_col=0, parse_dates=True) data2 = self.read_csv(StringIO(text), skiprows=6, header=None, @@ -792,20 +792,20 @@ def test_parse_dates_column_list(self): 15/01/2010;P;P;50;1;14/1/2011 01/05/2010;P;P;50;1;15/1/2011''' - expected = self.read_csv(StringIO(data), sep=";", index_col=list(range(4))) + expected = self.read_csv(StringIO(data), sep=";", index_col=lrange(4)) lev = expected.index.levels[0] expected.index.levels[0] = lev.to_datetime(dayfirst=True) expected['aux_date'] = to_datetime(expected['aux_date'], dayfirst=True) - expected['aux_date'] = list(map(Timestamp, expected['aux_date'])) + expected['aux_date'] = lmap(Timestamp, expected['aux_date']) tm.assert_isinstance(expected['aux_date'][0], datetime) - df = self.read_csv(StringIO(data), sep=";", index_col=list(range(4)), + df = self.read_csv(StringIO(data), sep=";", index_col=lrange(4), parse_dates=[0, 5], dayfirst=True) tm.assert_frame_equal(df, expected) - df = self.read_csv(StringIO(data), sep=";", index_col=list(range(4)), + df = self.read_csv(StringIO(data), sep=";", index_col=lrange(4), parse_dates=['date', 'aux_date'], dayfirst=True) tm.assert_frame_equal(df, expected) @@ -828,7 +828,7 @@ def test_no_header(self): self.assert_(np.array_equal(df_pref.columns, ['X0', 'X1', 'X2', 'X3', 'X4'])) - self.assert_(np.array_equal(df.columns, list(range(5)))) + self.assert_(np.array_equal(df.columns, lrange(5))) self.assert_(np.array_equal(df2.columns, names)) @@ -1550,7 +1550,7 @@ def test_skipinitialspace(self): sfile = StringIO(s) # it's 33 columns - result = self.read_csv(sfile, names=list(range(33)), na_values=['-9999.0'], + result = self.read_csv(sfile, names=lrange(33), na_values=['-9999.0'], header=None, skipinitialspace=True) self.assertTrue(pd.isnull(result.ix[0, 29])) @@ -1607,7 +1607,7 @@ def test_converters_corner_with_nas(self): if hash(np.int64(-1)) != -2: raise nose.SkipTest - from pandas.util.py3compat import StringIO + from pandas.util.py3compat import StringIO, lrange, lmap csv = """id,score,days 1,2,12 2,2-5, @@ -1643,7 +1643,7 @@ def convert_score(x): if not x: return np.nan if x.find('-') > 0: - valmin, valmax = list(map(int, x.split('-'))) + valmin, valmax = lmap(int, x.split('-')) val = 0.5 * (valmin + valmax) else: val = float(x) @@ -2322,9 +2322,9 @@ def test_parse_ragged_csv(self): data = "1,2\n3,4,5" result = self.read_csv(StringIO(data), header=None, - names=list(range(50))) + names=lrange(50)) expected = self.read_csv(StringIO(data), header=None, - names=list(range(3))).reindex(columns=list(range(50))) + names=lrange(3)).reindex(columns=lrange(50)) tm.assert_frame_equal(result, expected) diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index bbfe7e93123d6..e10c5ad411bec 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -1,5 +1,5 @@ from __future__ import print_function -from pandas.util.py3compat import range +from pandas.util.py3compat import range, lrange import nose import unittest import os @@ -130,7 +130,7 @@ def roundtrip(key, obj,**kwargs): tm.assert_panel_equal(o, roundtrip('panel',o)) # table - df = DataFrame(dict(A=list(range(5)), B=list(range(5)))) + df = DataFrame(dict(A=lrange(5), B=lrange(5))) df.to_hdf(self.path,'table',append=True) result = read_hdf(self.path, 'table', where = ['index>2']) assert_frame_equal(df[df.index>2],result) @@ -484,7 +484,7 @@ def test_encoding(self): raise nose.SkipTest('system byteorder is not little, skipping test_encoding!') with ensure_clean(self.path) as store: - df = DataFrame(dict(A='foo',B='bar'),index=list(range(5))) + df = DataFrame(dict(A='foo',B='bar'),index=lrange(5)) df.loc[2,'A'] = np.nan df.loc[3,'B'] = np.nan _maybe_remove(store, 'df') @@ -607,7 +607,7 @@ def test_append_with_different_block_ordering(self): for i in range(10): df = DataFrame(np.random.randn(10,2),columns=list('AB')) - df['index'] = list(range(10)) + df['index'] = lrange(10) df['index'] += i*10 df['int64'] = Series([1]*len(df),dtype='int64') df['int16'] = Series([1]*len(df),dtype='int16') @@ -783,7 +783,7 @@ def check_col(key,name,size): def check_col(key,name,size): self.assert_(getattr(store.get_storer(key).table.description,name).itemsize == size) - df = DataFrame(dict(A = 'foo', B = 'bar'),index=list(range(10))) + df = DataFrame(dict(A = 'foo', B = 'bar'),index=lrange(10)) # a min_itemsize that creates a data_column _maybe_remove(store, 'df') @@ -1018,8 +1018,9 @@ def test_big_table_frame(self): raise nose.SkipTest('no big table frame') # create and write a big table - df = DataFrame(np.random.randn(2000 * 100, 100), index=list(range( - 2000 * 100)), columns=['E%03d' % i for i in range(100)]) + df = DataFrame(np.random.randn(2000 * 100, 100), + index=lrange(2000 * 100), + columns=['E%03d' % i for i in range(100)]) for x in range(20): df['String%03d' % x] = 'string%03d' % x @@ -1042,7 +1043,7 @@ def test_big_table2_frame(self): import time start_time = time.time() df = DataFrame(np.random.randn(1000 * 1000, 60), - index=list(range(int(1000 * 1000))), + index=lrange(int(1000 * 1000)), columns=['E%03d' % i for i in range(60)]) for x in range(20): df['String%03d' % x] = 'string%03d' % x @@ -1071,8 +1072,8 @@ def test_big_put_frame(self): print ("\nbig_put start") import time start_time = time.time() - df = DataFrame(np.random.randn(1000 * 1000, 60), index=list(range(int( - 1000 * 1000))), columns=['E%03d' % i for i in range(60)]) + df = DataFrame(np.random.randn(1000 * 1000, 60), index=lrange(int( + 1000 * 1000)), columns=['E%03d' % i for i in range(60)]) for x in range(20): df['String%03d' % x] = 'string%03d' % x for x in range(20): @@ -1381,14 +1382,14 @@ def compare(a,b): compare(store.select('df_tz',where=Term('A','>=',df.A[3])),df[df.A>=df.A[3]]) _maybe_remove(store, 'df_tz') - df = DataFrame(dict(A = Timestamp('20130102',tz='US/Eastern'), B = Timestamp('20130103',tz='US/Eastern')),index=list(range(5))) + df = DataFrame(dict(A = Timestamp('20130102',tz='US/Eastern'), B = Timestamp('20130103',tz='US/Eastern')),index=lrange(5)) store.append('df_tz',df) result = store['df_tz'] compare(result,df) assert_frame_equal(result,df) _maybe_remove(store, 'df_tz') - df = DataFrame(dict(A = Timestamp('20130102',tz='US/Eastern'), B = Timestamp('20130102',tz='EET')),index=list(range(5))) + df = DataFrame(dict(A = Timestamp('20130102',tz='US/Eastern'), B = Timestamp('20130102',tz='EET')),index=lrange(5)) self.assertRaises(TypeError, store.append, 'df_tz', df) # this is ok @@ -1399,14 +1400,14 @@ def compare(a,b): assert_frame_equal(result,df) # can't append with diff timezone - df = DataFrame(dict(A = Timestamp('20130102',tz='US/Eastern'), B = Timestamp('20130102',tz='CET')),index=list(range(5))) + df = DataFrame(dict(A = Timestamp('20130102',tz='US/Eastern'), B = Timestamp('20130102',tz='CET')),index=lrange(5)) self.assertRaises(ValueError, store.append, 'df_tz', df) # as index with ensure_clean(self.path) as store: # GH 4098 example - df = DataFrame(dict(A = Series(list(range(3)), index=date_range('2000-1-1',periods=3,freq='H', tz='US/Eastern')))) + df = DataFrame(dict(A = Series(lrange(3), index=date_range('2000-1-1',periods=3,freq='H', tz='US/Eastern')))) _maybe_remove(store, 'df') store.put('df',df) @@ -2096,7 +2097,7 @@ def test_select_with_many_inputs(self): df = DataFrame(dict(ts=bdate_range('2012-01-01', periods=300), A=np.random.randn(300), - B=list(range(300)), + B=lrange(300), users = ['a']*50 + ['b']*50 + ['c']*100 + ['a%03d' % i for i in range(100)])) _maybe_remove(store, 'df') store.append('df', df, data_columns=['ts', 'A', 'B', 'users']) @@ -2117,7 +2118,7 @@ def test_select_with_many_inputs(self): expected = df[ (df.ts >= Timestamp('2012-02-01')) & df.users.isin(selector) ] tm.assert_frame_equal(expected, result) - selector = list(range(100,200)) + selector = lrange(100,200) result = store.select('df', [Term('B', selector)]) expected = df[ df.B.isin(selector) ] tm.assert_frame_equal(expected, result) @@ -2215,7 +2216,7 @@ def test_select_iterator(self): def test_retain_index_attributes(self): # GH 3499, losing frequency info on index recreation - df = DataFrame(dict(A = Series(list(range(3)), + df = DataFrame(dict(A = Series(lrange(3), index=date_range('2000-1-1',periods=3,freq='H')))) with ensure_clean(self.path) as store: @@ -2232,7 +2233,7 @@ def test_retain_index_attributes(self): # try to append a table with a different frequency warnings.filterwarnings('ignore', category=AttributeConflictWarning) - df2 = DataFrame(dict(A = Series(list(range(3)), + df2 = DataFrame(dict(A = Series(lrange(3), index=date_range('2002-1-1',periods=3,freq='D')))) store.append('data',df2) warnings.filterwarnings('always', category=AttributeConflictWarning) @@ -2241,10 +2242,10 @@ def test_retain_index_attributes(self): # this is ok _maybe_remove(store,'df2') - df2 = DataFrame(dict(A = Series(list(range(3)), + df2 = DataFrame(dict(A = Series(lrange(3), index=[Timestamp('20010101'),Timestamp('20010102'),Timestamp('20020101')]))) store.append('df2',df2) - df3 = DataFrame(dict(A = Series(list(range(3)),index=date_range('2002-1-1',periods=3,freq='D')))) + df3 = DataFrame(dict(A = Series(lrange(3),index=date_range('2002-1-1',periods=3,freq='D')))) store.append('df2',df3) def test_retain_index_attributes2(self): @@ -2253,20 +2254,20 @@ def test_retain_index_attributes2(self): warnings.filterwarnings('ignore', category=AttributeConflictWarning) - df = DataFrame(dict(A = Series(list(range(3)), index=date_range('2000-1-1',periods=3,freq='H')))) + df = DataFrame(dict(A = Series(lrange(3), index=date_range('2000-1-1',periods=3,freq='H')))) df.to_hdf(path,'data',mode='w',append=True) - df2 = DataFrame(dict(A = Series(list(range(3)), index=date_range('2002-1-1',periods=3,freq='D')))) + df2 = DataFrame(dict(A = Series(lrange(3), index=date_range('2002-1-1',periods=3,freq='D')))) df2.to_hdf(path,'data',append=True) idx = date_range('2000-1-1',periods=3,freq='H') idx.name = 'foo' - df = DataFrame(dict(A = Series(list(range(3)), index=idx))) + df = DataFrame(dict(A = Series(lrange(3), index=idx))) df.to_hdf(path,'data',mode='w',append=True) self.assert_(read_hdf(path,'data').index.name == 'foo') idx2 = date_range('2001-1-1',periods=3,freq='H') idx2.name = 'bar' - df2 = DataFrame(dict(A = Series(list(range(3)), index=idx2))) + df2 = DataFrame(dict(A = Series(lrange(3), index=idx2))) df2.to_hdf(path,'data',append=True) self.assert_(read_hdf(path,'data').index.name is None) @@ -2426,7 +2427,7 @@ def test_coordinates(self): # get coordinates back & test vs frame _maybe_remove(store, 'df') - df = DataFrame(dict(A=list(range(5)), B=list(range(5)))) + df = DataFrame(dict(A=lrange(5), B=lrange(5))) store.append('df', df) c = store.select_as_coordinates('df', ['index<3']) assert((c.values == np.arange(3)).all() == True) @@ -2755,7 +2756,7 @@ def test_legacy_table_write(self): columns=['A', 'B', 'C']) store.append('mi', df) - df = DataFrame(dict(A = 'foo', B = 'bar'),index=list(range(10))) + df = DataFrame(dict(A = 'foo', B = 'bar'),index=lrange(10)) store.append('df', df, data_columns = ['B'], min_itemsize={'A' : 200 }) store.close() diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index 28703975fcc81..5dc719953c15a 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -11,7 +11,7 @@ from pandas.core.datetools import format as date_format from pandas.core.api import DataFrame, isnull -from pandas.util.py3compat import StringIO, range +from pandas.util.py3compat import StringIO, range, lrange import six import pandas.io.sql as sql @@ -173,12 +173,12 @@ def _check_roundtrip(self, frame): frame['txt'] = ['a'] * len(frame) frame2 = frame.copy() - frame2['Idx'] = Index(list(range(len(frame2)))) + 10 + frame2['Idx'] = Index(lrange(len(frame2))) + 10 sql.write_frame(frame2, name='test_table2', con=self.db) result = sql.read_frame("select * from test_table2", self.db, index_col='Idx') expected = frame.copy() - expected.index = Index(list(range(len(frame2)))) + 10 + expected.index = Index(lrange(len(frame2))) + 10 expected.index.name = 'Idx' print(expected.index.names) print(result.index.names) @@ -410,7 +410,7 @@ def _check_roundtrip(self, frame): frame['txt'] = ['a'] * len(frame) frame2 = frame.copy() - index = Index(list(range(len(frame2)))) + 10 + index = Index(lrange(len(frame2))) + 10 frame2['Idx'] = index drop_sql = "DROP TABLE IF EXISTS test_table2" cur = self.db.cursor() diff --git a/pandas/io/wb.py b/pandas/io/wb.py index 65a666228e584..4563c0a08cf93 100644 --- a/pandas/io/wb.py +++ b/pandas/io/wb.py @@ -1,7 +1,7 @@ from __future__ import print_function -from six.moves import map, reduce -from pandas.util.py3compat import range +from pandas.util.py3compat import map, reduce +from pandas.util.py3compat import range, lrange from pandas.io.common import urlopen from pandas.io import json import pandas @@ -142,7 +142,7 @@ def get_value(x): data.topics = data.topics.apply(lambda x: ' ; '.join(x)) # Clean outpu data = data.sort(columns='id') - data.index = pandas.Index(list(range(data.shape[0]))) + data.index = pandas.Index(lrange(data.shape[0])) return data diff --git a/pandas/rpy/common.py b/pandas/rpy/common.py index 75065a19da0f4..66e3e177771dc 100644 --- a/pandas/rpy/common.py +++ b/pandas/rpy/common.py @@ -4,7 +4,7 @@ """ from __future__ import print_function -from six.moves import zip +from pandas.util.py3compat import zip from pandas.util.py3compat import range import numpy as np diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index 26c0a151a8f4b..c889d4c19f915 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -6,7 +6,7 @@ # pylint: disable=E1101,E1103,W0231,E0202 from numpy import nan -from pandas.util.py3compat import range +from pandas.util.py3compat import range, lmap from pandas.util import compat import numpy as np @@ -23,7 +23,7 @@ from pandas.sparse.series import SparseSeries from pandas.util.decorators import Appender import pandas.lib as lib -from six.moves import map +from pandas.util.py3compat import map class _SparseMockBlockManager(object): @@ -853,7 +853,7 @@ def apply(self, func, axis=0, broadcast=False): def applymap(self, func): """ Apply a function to a DataFrame that is intended to operate - elementwise, i.e. like doing list(map(func, series)) for each series in the + elementwise, i.e. like doing map(func, series) for each series in the DataFrame Parameters @@ -865,7 +865,7 @@ def applymap(self, func): ------- applied : DataFrame """ - return self.apply(lambda x: list(map(func, x))) + return self.apply(lambda x: lmap(func, x)) @Appender(DataFrame.fillna.__doc__) def fillna(self, value=None, method=None, inplace=False, limit=None): diff --git a/pandas/sparse/panel.py b/pandas/sparse/panel.py index 494cbaf838175..e16dfdafd4fa2 100644 --- a/pandas/sparse/panel.py +++ b/pandas/sparse/panel.py @@ -5,8 +5,8 @@ # pylint: disable=E1101,E1103,W0231 -from pandas.util.py3compat import range -from six.moves import zip +from pandas.util.py3compat import range, lrange +from pandas.util.py3compat import zip from pandas.util import compat import numpy as np @@ -209,7 +209,7 @@ def set_value(self, item, major, minor, value): def __delitem__(self, key): loc = self.items.get_loc(key) - indices = list(range(loc)) + list(range(loc + 1, len(self.items))) + indices = lrange(loc) + lrange(loc + 1, len(self.items)) del self._frames[key] self._items = self._items.take(indices) diff --git a/pandas/sparse/tests/test_sparse.py b/pandas/sparse/tests/test_sparse.py index ff9d57bed449f..b39ec61f20fe4 100644 --- a/pandas/sparse/tests/test_sparse.py +++ b/pandas/sparse/tests/test_sparse.py @@ -22,9 +22,9 @@ import pandas.core.datetools as datetools from pandas.core.common import isnull import pandas.util.testing as tm -from pandas.util.py3compat import range +from pandas.util.py3compat import range, lrange from pandas.util import compat -from six.moves import cPickle as pickle +from pandas.util.py3compat import cPickle as pickle import pandas.sparse.frame as spf @@ -36,7 +36,7 @@ import pandas.tests.test_frame as test_frame import pandas.tests.test_panel as test_panel import pandas.tests.test_series as test_series -from pandas.util.py3compat import StringIO +from pandas.util.py3compat import StringIO, lrange from .test_array import assert_sp_array_equal @@ -828,7 +828,7 @@ def test_constructor_dataframe(self): def test_constructor_convert_index_once(self): arr = np.array([1.5, 2.5, 3.5]) - sdf = SparseDataFrame(columns=list(range(4)), index=arr) + sdf = SparseDataFrame(columns=lrange(4), index=arr) self.assertTrue(sdf[0].index is sdf[1].index) def test_constructor_from_series(self): @@ -1219,7 +1219,7 @@ def test_astype(self): self.assertRaises(Exception, self.frame.astype, np.int64) def test_fillna(self): - df = self.zframe.reindex(list(range(5))) + df = self.zframe.reindex(lrange(5)) result = df.fillna(0) expected = df.to_dense().fillna(0).to_sparse(fill_value=0) assert_sp_frame_equal(result, expected) diff --git a/pandas/stats/misc.py b/pandas/stats/misc.py index 3e5db98d8335e..00c93e07c77dd 100644 --- a/pandas/stats/misc.py +++ b/pandas/stats/misc.py @@ -5,7 +5,7 @@ from pandas.core.api import Series, DataFrame, isnull, notnull from pandas.core.series import remove_na import six -from six.moves import zip +from pandas.util.py3compat import zip def zscore(series): diff --git a/pandas/stats/ols.py b/pandas/stats/ols.py index f1ac35cad1aaf..f5ca39d0129c3 100644 --- a/pandas/stats/ols.py +++ b/pandas/stats/ols.py @@ -4,7 +4,7 @@ # pylint: disable-msg=W0201 -from six.moves import zip +from pandas.util.py3compat import zip from itertools import starmap from pandas.util.py3compat import StringIO diff --git a/pandas/stats/tests/test_moments.py b/pandas/stats/tests/test_moments.py index 3780455c0bb6c..df483aa5872a9 100644 --- a/pandas/stats/tests/test_moments.py +++ b/pandas/stats/tests/test_moments.py @@ -1,5 +1,5 @@ from pandas.util.py3compat import range -from six.moves import zip +from pandas.util.py3compat import zip import unittest import nose import sys diff --git a/pandas/stats/var.py b/pandas/stats/var.py index 5f4a4ec13fdde..524098292f148 100644 --- a/pandas/stats/var.py +++ b/pandas/stats/var.py @@ -1,7 +1,7 @@ from __future__ import division -from pandas.util.py3compat import range -from six.moves import zip, reduce +from pandas.util.py3compat import range, lrange +from pandas.util.py3compat import zip, reduce from pandas.util import compat import numpy as np from pandas.core.base import StringMixin @@ -80,7 +80,7 @@ def forecast(self, h): DataFrame """ forecast = self._forecast_raw(h)[:, 0, :] - return DataFrame(forecast, index=list(range(1, 1 + h)), + return DataFrame(forecast, index=lrange(1, 1 + h), columns=self._columns) def forecast_cov(self, h): @@ -103,7 +103,7 @@ def forecast_std_err(self, h): DataFrame """ return DataFrame(self._forecast_std_err_raw(h), - index=list(range(1, 1 + h)), columns=self._columns) + index=lrange(1, 1 + h), columns=self._columns) @cache_readonly def granger_causality(self): @@ -345,7 +345,7 @@ def _forecast_cov_beta_raw(self, n): for t in range(T + 1): index = t + p - y = values.take(list(range(index, index - p, -1)), axis=0).ravel() + y = values.take(lrange(index, index - p, -1), axis=0).ravel() trans_Z = np.hstack(([1], y)) trans_Z = trans_Z.reshape(1, len(trans_Z)) @@ -535,7 +535,7 @@ def forecast(self, h): Returns the forecasts at 1, 2, ..., n timesteps in the future. """ forecast = self._forecast_raw(h).T.swapaxes(1, 2) - index = list(range(1, 1 + h)) + index = lrange(1, 1 + h) w = Panel(forecast, items=self._data.items, major_axis=index, minor_axis=self._data.minor_axis) return w diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index 9bab218e7dfd5..b0722c49e99fb 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -1,7 +1,7 @@ # pylint: disable=E1101,E1103,W0232 from datetime import datetime -from pandas.util.py3compat import range +from pandas.util.py3compat import range, lrange import unittest import nose @@ -104,7 +104,7 @@ def test_value_counts(self): def test_na_flags_int_levels(self): # #1457 - levels = list(range(10)) + levels = lrange(10) labels = np.random.randint(0, 10, 20) labels[::5] = -1 diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 048b4c6f15b28..dc4ed02559a19 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -1,5 +1,5 @@ from datetime import datetime -from pandas.util.py3compat import range, long +from pandas.util.py3compat import range, long, lrange, lmap import sys import re @@ -17,7 +17,7 @@ from pandas.tslib import iNaT from pandas.util import py3compat import six -from six.moves import map +from pandas.util.py3compat import map _multiprocess_can_split_ = True @@ -123,7 +123,7 @@ def test_datetimeindex_from_empty_datetime64_array(): def test_nan_to_nat_conversions(): df = DataFrame(dict({ - 'A' : np.asarray(list(range(10)),dtype='float64'), + 'A' : np.asarray(lrange(10),dtype='float64'), 'B' : Timestamp('20010101') })) df.iloc[3:6,:] = np.nan result = df.loc[4,'B'].value @@ -196,7 +196,7 @@ def test_locs(mask): # exhaustively test all possible mask sequences of length 8 ncols = 8 for i in range(2 ** ncols): - cols = list(map(int, list(_bin(i, ncols)))) # count up in base2 + cols = lmap(int, list(_bin(i, ncols))) # count up in base2 mask = [cols[i] == 1 for i in range(len(cols))] test_locs(mask) diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index 50cf5a0d5bed8..fdd11b7bd6645 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -1,13 +1,7 @@ from __future__ import print_function # -*- coding: utf-8 -*- -try: - from pandas.util.py3compat import StringIO -except: - from io import StringIO - -from pandas.util.py3compat import range -from six.moves import zip +from pandas.util.py3compat import range, zip, lrange, StringIO, PY3, lzip import os import sys import unittest @@ -19,7 +13,6 @@ import numpy as np from pandas import DataFrame, Series, Index -from pandas.util.py3compat import PY3 import pandas.core.format as fmt import pandas.util.testing as tm @@ -90,7 +83,7 @@ def test_eng_float_formatter(self): def test_repr_tuples(self): buf = StringIO() - df = DataFrame({'tups': list(zip(range(10), range(10)))}) + df = DataFrame({'tups': lzip(range(10), range(10))}) repr(df) df.to_string(col_space=10, buf=buf) @@ -105,7 +98,7 @@ def test_repr_truncation(self): _strlen = fmt._strlen_func() - for line, value in list(zip(r.split('\n'), df['B'])): + for line, value in lzip(r.split('\n'), df['B']): if _strlen(value) + 1 > max_len: self.assert_('...' in line) else: @@ -136,10 +129,10 @@ def test_repr_obeys_max_seq_limit(self): #unlimited reset_option("display.max_seq_items") - self.assertTrue(len(com.pprint_thing(list(range(1000))))> 2000) + self.assertTrue(len(com.pprint_thing(lrange(1000)))> 2000) with option_context("display.max_seq_items",5): - self.assertTrue(len(com.pprint_thing(list(range(1000))))< 100) + self.assertTrue(len(com.pprint_thing(lrange(1000)))< 100) def test_repr_is_valid_construction_code(self): import pandas as pd @@ -171,8 +164,8 @@ def test_repr_no_backslash(self): def test_expand_frame_repr(self): df_small = DataFrame('hello', [0], [0]) - df_wide = DataFrame('hello', [0], list(range(10))) - df_tall = DataFrame('hello', list(range(30)), list(range(5))) + df_wide = DataFrame('hello', [0], lrange(10)) + df_tall = DataFrame('hello', lrange(30), lrange(5)) with option_context('mode.sim_interactive', True): with option_context('display.max_columns', 10, @@ -197,7 +190,7 @@ def test_expand_frame_repr(self): def test_repr_non_interactive(self): # in non interactive mode, there can be no dependency on the # result of terminal auto size detection - df = DataFrame('hello', list(range(1000)), list(range(5))) + df = DataFrame('hello', lrange(1000), lrange(5)) with option_context('mode.sim_interactive', False, 'display.width', 0, @@ -321,7 +314,7 @@ def test_to_string_with_formatters(self): ('float', lambda x: '[% 4.1f]' % x), ('object', lambda x: '-%s-' % str(x))] result = df.to_string(formatters=dict(formatters)) - result2 = df.to_string(formatters=list(zip(*formatters))[1]) + result2 = df.to_string(formatters=lzip(*formatters)[1]) self.assertEqual(result, (' int float object\n' '0 0x1 [ 1.0] -(1, 2)-\n' '1 0x2 [ 2.0] -True-\n' @@ -661,7 +654,7 @@ def test_to_html_multiindex_sparsify(self): def test_to_html_index_formatter(self): df = DataFrame([[0, 1], [2, 3], [4, 5], [6, 7]], - columns=['foo', None], index=list(range(4))) + columns=['foo', None], index=lrange(4)) f = lambda x: 'abcd'[x] result = df.to_html(formatters={'__index__': f}) @@ -974,7 +967,7 @@ def test_to_string(self): # big mixed biggie = DataFrame({'A': randn(200), 'B': tm.makeStringIndex(200)}, - index=list(range(200))) + index=lrange(200)) biggie['A'][:20] = nan biggie['B'][:20] = nan @@ -1112,7 +1105,7 @@ def test_to_string_small_float_values(self): def test_to_string_float_index(self): index = Index([1.5, 2, 3, 4, 5]) - df = DataFrame(list(range(5)), index=index) + df = DataFrame(lrange(5), index=index) result = df.to_string() expected = (' 0\n' @@ -1147,7 +1140,7 @@ def test_to_string_int_formatting(self): self.assertEqual(output, expected) def test_to_string_index_formatter(self): - df = DataFrame([list(range(5)), list(range(5, 10)), list(range(10, 15))]) + df = DataFrame([lrange(5), lrange(5, 10), lrange(10, 15)]) rs = df.to_string(formatters={'__index__': lambda x: 'abc'[x]}) @@ -1195,7 +1188,7 @@ def test_to_string_format_na(self): self.assertEqual(result, expected) def test_to_string_line_width(self): - df = pd.DataFrame(123, list(range(10, 15)), list(range(30))) + df = pd.DataFrame(123, lrange(10, 15), lrange(30)) s = df.to_string(line_width=80) self.assertEqual(max(len(l) for l in s.split('\n')), 80) @@ -1203,7 +1196,7 @@ def test_to_html(self): # big mixed biggie = DataFrame({'A': randn(200), 'B': tm.makeStringIndex(200)}, - index=list(range(200))) + index=lrange(200)) biggie['A'][:20] = nan biggie['B'][:20] = nan @@ -1230,7 +1223,7 @@ def test_to_html(self): def test_to_html_filename(self): biggie = DataFrame({'A': randn(200), 'B': tm.makeStringIndex(200)}, - index=list(range(200))) + index=lrange(200)) biggie['A'][:20] = nan biggie['B'][:20] = nan @@ -1258,7 +1251,7 @@ def test_to_html_columns_arg(self): def test_to_html_multiindex(self): columns = pandas.MultiIndex.from_tuples(list(zip(np.arange(2).repeat(2), - np.mod(list(range(4)), 2))), + np.mod(lrange(4), 2))), names=['CL0', 'CL1']) df = pandas.DataFrame([list('abcd'), list('efgh')], columns=columns) result = df.to_html(justify='left') @@ -1298,7 +1291,7 @@ def test_to_html_multiindex(self): self.assertEqual(result, expected) columns = pandas.MultiIndex.from_tuples(list(zip(range(4), - np.mod(list(range(4)), 2)))) + np.mod(lrange(4), 2)))) df = pandas.DataFrame([list('abcd'), list('efgh')], columns=columns) result = df.to_html(justify='right') diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index aadfea05cff16..9104a2140c5cd 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -2,18 +2,18 @@ # pylint: disable-msg=W0612,E1101 from copy import deepcopy from datetime import datetime, timedelta, time -import cPickle as pickle import operator import re import unittest import nose from pandas.util import py3compat -from pandas.util.py3compat import StringIO, range, long +from pandas.util.py3compat import cPickle as pickle +from pandas.util.py3compat import StringIO, range, long, lrange, lmap, lzip from pandas.util.compat import OrderedDict from pandas.util import compat import six -from six.moves import map, zip +from pandas.util.py3compat import map, zip from numpy import random, nan from numpy.random import randn @@ -177,7 +177,7 @@ def test_setitem_list(self): assert_series_equal(self.frame['B'], data['A']) assert_series_equal(self.frame['A'], data['B']) - df = DataFrame(0, list(range(3)), ['tt1', 'tt2'], dtype=np.int_) + df = DataFrame(0, lrange(3), ['tt1', 'tt2'], dtype=np.int_) df.ix[1, ['tt1', 'tt2']] = [1, 2] result = df.ix[1, ['tt1', 'tt2']] @@ -196,7 +196,7 @@ def test_setitem_list_not_dataframe(self): assert_almost_equal(self.frame[['A', 'B']].values, data) def test_setitem_list_of_tuples(self): - tuples = list(zip(self.frame['A'], self.frame['B'])) + tuples = lzip(self.frame['A'], self.frame['B']) self.frame['tuples'] = tuples result = self.frame['tuples'] @@ -362,7 +362,7 @@ def test_getattr(self): 'NONEXISTENT_NAME') def test_setattr_column(self): - df = DataFrame({'foobar': 1}, index=list(range(10))) + df = DataFrame({'foobar': 1}, index=lrange(10)) df.foobar = 5 self.assert_((df.foobar == 5).all()) @@ -566,11 +566,11 @@ def test_setitem_ambig(self): from decimal import Decimal # created as float type - dm = DataFrame(index=list(range(3)), columns=list(range(3))) + dm = DataFrame(index=lrange(3), columns=lrange(3)) coercable_series = Series([Decimal(1) for _ in range(3)], - index=list(range(3))) - uncoercable_series = Series(['foo', 'bzr', 'baz'], index=list(range(3))) + index=lrange(3)) + uncoercable_series = Series(['foo', 'bzr', 'baz'], index=lrange(3)) dm[0] = np.ones(3) self.assertEqual(len(dm.columns), 3) @@ -668,7 +668,7 @@ def test_getitem_fancy_slice_integers_step(self): self.assert_(isnull(df.ix[:8:2]).values.all()) def test_getitem_setitem_integer_slice_keyerrors(self): - df = DataFrame(np.random.randn(10, 5), index=list(range(0, 20, 2))) + df = DataFrame(np.random.randn(10, 5), index=lrange(0, 20, 2)) # this is OK cp = df.copy() @@ -781,11 +781,11 @@ def test_setitem_fancy_2d(self): assert_frame_equal(frame, expected) # new corner case of boolean slicing / setting - frame = DataFrame(list(zip([2, 3, 9, 6, 7], [np.nan] * 5)), + frame = DataFrame(lzip([2, 3, 9, 6, 7], [np.nan] * 5), columns=['a', 'b']) lst = [100] lst.extend([np.nan] * 4) - expected = DataFrame(list(zip([100, 3, 9, 6, 7], lst)), + expected = DataFrame(lzip([100, 3, 9, 6, 7], lst), columns=['a', 'b']) frame[frame['a'] == 2] = 100 assert_frame_equal(frame, expected) @@ -1492,7 +1492,7 @@ def test_set_value_resize(self): self.assertRaises(ValueError, res3.set_value, 'foobar', 'baz', 'sam') def test_set_value_with_index_dtype_change(self): - df = DataFrame(randn(3, 3), index=list(range(3)), columns=list('ABC')) + df = DataFrame(randn(3, 3), index=lrange(3), columns=list('ABC')) res = df.set_value('C', 2, 1.0) self.assert_(list(res.index) == list(df.index) + ['C']) self.assert_(list(res.columns) == list(df.columns) + [2]) @@ -1500,7 +1500,7 @@ def test_set_value_with_index_dtype_change(self): def test_get_set_value_no_partial_indexing(self): # partial w/ MultiIndex raise exception index = MultiIndex.from_tuples([(0, 1), (0, 2), (1, 1), (1, 2)]) - df = DataFrame(index=index, columns=list(range(4))) + df = DataFrame(index=index, columns=lrange(4)) self.assertRaises(KeyError, df.get_value, 0, 1) # self.assertRaises(KeyError, df.set_value, 0, 1, 0) @@ -1513,7 +1513,7 @@ def test_single_element_ix_dont_upcast(self): self.assert_(com.is_integer(result)) def test_irow(self): - df = DataFrame(np.random.randn(10, 4), index=list(range(0, 20, 2))) + df = DataFrame(np.random.randn(10, 4), index=lrange(0, 20, 2)) result = df.irow(1) exp = df.ix[2] @@ -1540,7 +1540,7 @@ def test_irow(self): assert_frame_equal(result, expected) def test_icol(self): - df = DataFrame(np.random.randn(4, 10), columns=list(range(0, 20, 2))) + df = DataFrame(np.random.randn(4, 10), columns=lrange(0, 20, 2)) result = df.icol(1) exp = df.ix[:, 2] @@ -2072,7 +2072,7 @@ def test_constructor_list_frames(self): result = DataFrame([DataFrame([])]) self.assert_(result.shape == (1,0)) - result = DataFrame([DataFrame(dict(A = list(range(5))))]) + result = DataFrame([DataFrame(dict(A = lrange(5)))]) tm.assert_isinstance(result.iloc[0,0], DataFrame) def test_constructor_mixed_dtypes(self): @@ -2086,7 +2086,7 @@ def _make_mixed_dtypes_df(typ, ad = None): dtypes = MIXED_FLOAT_DTYPES arrays = [ np.array(np.random.randint(10, size=10), dtype = d) for d in dtypes ] - zipper = list(zip(dtypes,arrays)) + zipper = lzip(dtypes,arrays) for d,a in zipper: assert(a.dtype == d) if ad is None: @@ -2162,7 +2162,7 @@ def test_is_mixed_type(self): def test_constructor_ordereddict(self): import random nitems = 100 - nums = list(range(nitems)) + nums = lrange(nitems) random.shuffle(nums) expected = ['A%d' % i for i in nums] df = DataFrame(OrderedDict(zip(expected, [[0]] * nitems))) @@ -2362,14 +2362,14 @@ def test_constructor_ndarray(self): # automatic labeling frame = DataFrame(mat) - self.assert_(np.array_equal(frame.index, list(range(2)))) - self.assert_(np.array_equal(frame.columns, list(range(3)))) + self.assert_(np.array_equal(frame.index, lrange(2))) + self.assert_(np.array_equal(frame.columns, lrange(3))) frame = DataFrame(mat, index=[1, 2]) - self.assert_(np.array_equal(frame.columns, list(range(3)))) + self.assert_(np.array_equal(frame.columns, lrange(3))) frame = DataFrame(mat, columns=['A', 'B', 'C']) - self.assert_(np.array_equal(frame.index, list(range(2)))) + self.assert_(np.array_equal(frame.index, lrange(2))) # 0-length axis frame = DataFrame(np.empty((0, 3))) @@ -2420,14 +2420,14 @@ def test_constructor_maskedarray(self): # automatic labeling frame = DataFrame(mat) - self.assert_(np.array_equal(frame.index, list(range(2)))) - self.assert_(np.array_equal(frame.columns, list(range(3)))) + self.assert_(np.array_equal(frame.index, lrange(2))) + self.assert_(np.array_equal(frame.columns, lrange(3))) frame = DataFrame(mat, index=[1, 2]) - self.assert_(np.array_equal(frame.columns, list(range(3)))) + self.assert_(np.array_equal(frame.columns, lrange(3))) frame = DataFrame(mat, columns=['A', 'B', 'C']) - self.assert_(np.array_equal(frame.index, list(range(2)))) + self.assert_(np.array_equal(frame.index, lrange(2))) # 0-length axis frame = DataFrame(ma.masked_all((0, 3))) @@ -2508,11 +2508,11 @@ def test_constructor_corner(self): self.assertEqual(df.values.shape, (0, 0)) # empty but with specified dtype - df = DataFrame(index=list(range(10)), columns=['a', 'b'], dtype=object) + df = DataFrame(index=lrange(10), columns=['a', 'b'], dtype=object) self.assert_(df.values.dtype == np.object_) # does not error but ends up float - df = DataFrame(index=list(range(10)), columns=['a', 'b'], dtype=int) + df = DataFrame(index=lrange(10), columns=['a', 'b'], dtype=int) self.assert_(df.values.dtype == np.object_) # #1783 empty dtype object @@ -2686,7 +2686,7 @@ def test_constructor_ragged(self): self.assertRaises(Exception, DataFrame, data) def test_constructor_scalar(self): - idx = Index(list(range(3))) + idx = Index(lrange(3)) df = DataFrame({"a": 0}, index=idx) expected = DataFrame({"a": [0, 0, 0]}, index=idx) assert_frame_equal(df, expected, check_dtype=False) @@ -2855,7 +2855,7 @@ def check(result, expected=None): # assignment # GH 3687 arr = np.random.randn(3, 2) - idx = list(range(2)) + idx = lrange(2) df = DataFrame(arr, columns=['A', 'A']) df.columns = idx expected = DataFrame(arr,columns=idx) @@ -2956,11 +2956,11 @@ def test_insert_benchmark(self): # from the vb_suite/frame_methods/frame_insert_columns N = 10 K = 5 - df = DataFrame(index=list(range(N))) + df = DataFrame(index=lrange(N)) new_col = np.random.randn(N) for i in range(K): df[i] = new_col - expected = DataFrame(np.repeat(new_col,K).reshape(N,K),index=list(range(N))) + expected = DataFrame(np.repeat(new_col,K).reshape(N,K),index=lrange(N)) assert_frame_equal(df,expected) def test_constructor_single_value(self): @@ -3096,12 +3096,12 @@ def test_constructor_for_list_with_dtypes(self): expected = Series({'float64' : 1}) assert_series_equal(result, expected) - df = DataFrame({'a' : 1 }, index=list(range(3))) + df = DataFrame({'a' : 1 }, index=lrange(3)) result = df.get_dtype_counts() expected = Series({'int64': 1}) assert_series_equal(result, expected) - df = DataFrame({'a' : 1. }, index=list(range(3))) + df = DataFrame({'a' : 1. }, index=lrange(3)) result = df.get_dtype_counts() expected = Series({'float64': 1 }) assert_series_equal(result, expected) @@ -3206,7 +3206,7 @@ def test_operators_timedelta64(self): def test__slice_consolidate_invalidate_item_cache(self): # #3970 - df = DataFrame({ "aa":list(range(5)), "bb":[2.2]*5}) + df = DataFrame({ "aa":lrange(5), "bb":[2.2]*5}) # Creates a second float block df["cc"] = 0.0 @@ -3579,7 +3579,7 @@ def test_join_str_datetime(self): str_dates = ['20120209', '20120222'] dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] - A = DataFrame(str_dates, index=list(range(2)), columns=['aa']) + A = DataFrame(str_dates, index=lrange(2), columns=['aa']) C = DataFrame([[1, 2], [3, 4]], index=str_dates, columns=dt_dates) tst = A.join(C, on='aa') @@ -3631,7 +3631,7 @@ def test_from_records_sequencelike(self): # tuples is in the order of the columns result = DataFrame.from_records(tuples) - self.assert_(np.array_equal(result.columns, list(range(8)))) + self.assert_(np.array_equal(result.columns, lrange(8))) # test exclude parameter & we are casting the results here (as we don't have dtype info to recover) columns_to_test = [ columns.index('C'), columns.index('E1') ] @@ -3714,7 +3714,7 @@ def __iter__(self): return iter(self.args) recs = [Record(1, 2, 3), Record(4, 5, 6), Record(7, 8, 9)] - tups = list(map(tuple, recs)) + tups = lmap(tuple, recs) result = DataFrame.from_records(recs) expected = DataFrame.from_records(tups) @@ -3773,7 +3773,7 @@ def test_repr_mixed_big(self): # big mixed biggie = DataFrame({'A': randn(200), 'B': tm.makeStringIndex(200)}, - index=list(range(200))) + index=lrange(200)) biggie['A'][:20] = nan biggie['B'][:20] = nan @@ -3809,8 +3809,8 @@ def test_repr_big(self): buf = StringIO() # big one - biggie = DataFrame(np.zeros((200, 4)), columns=list(range(4)), - index=list(range(200))) + biggie = DataFrame(np.zeros((200, 4)), columns=lrange(4), + index=lrange(200)) foo = repr(biggie) def test_repr_unsortable(self): @@ -3977,7 +3977,7 @@ def test_itertuples(self): assert_series_equal(s, expected) df = DataFrame({'floats': np.random.randn(5), - 'ints': list(range(5))}, columns=['floats', 'ints']) + 'ints': lrange(5)}, columns=['floats', 'ints']) for tup in df.itertuples(index=False): tm.assert_isinstance(tup[1], np.integer) @@ -4642,7 +4642,7 @@ def test_string_comparison(self): assert_frame_equal(df[-mask_b], df.ix[1:1, :]) def test_float_none_comparison(self): - df = DataFrame(np.random.randn(8, 3), index=list(range(8)), + df = DataFrame(np.random.randn(8, 3), index=lrange(8), columns=['A', 'B', 'C']) self.assertRaises(TypeError, df.__eq__, None) @@ -4685,8 +4685,8 @@ def test_to_csv_from_csv(self): assert_almost_equal(self.tsframe.values, recons.values) # corner case - dm = DataFrame({'s1': Series(list(range(3)), list(range(3))), - 's2': Series(list(range(2)), list(range(2)))}) + dm = DataFrame({'s1': Series(lrange(3), lrange(3)), + 's2': Series(lrange(2), lrange(2))}) dm.to_csv(path) recons = DataFrame.from_csv(path) assert_frame_equal(dm, recons) @@ -4729,8 +4729,8 @@ def test_to_csv_from_csv(self): df2.to_csv(path,mode='a',header=False) xp = pd.concat([df1,df2]) rs = pd.read_csv(path,index_col=0) - rs.columns = list(map(int,rs.columns)) - xp.columns = list(map(int,xp.columns)) + rs.columns = lmap(int,rs.columns) + xp.columns = lmap(int,xp.columns) assert_frame_equal(xp,rs) def test_to_csv_cols_reordering(self): @@ -4813,10 +4813,10 @@ def _do_test(df,path,r_dtype=None,c_dtype=None,rnlvl=None,cnlvl=None, dupe_col=False): if cnlvl: - header = list(range(cnlvl)) + header = lrange(cnlvl) with ensure_clean(path) as path: df.to_csv(path,encoding='utf8',chunksize=chunksize,tupleize_cols=False) - recons = DataFrame.from_csv(path,header=list(range(cnlvl)),tupleize_cols=False,parse_dates=False) + recons = DataFrame.from_csv(path,header=lrange(cnlvl),tupleize_cols=False,parse_dates=False) else: with ensure_clean(path) as path: df.to_csv(path,encoding='utf8',chunksize=chunksize) @@ -4840,14 +4840,14 @@ def _to_uni(x): if r_dtype: if r_dtype == 'u': # unicode r_dtype='O' - recons.index = np.array(list(map(_to_uni,recons.index)), + recons.index = np.array(lmap(_to_uni,recons.index), dtype=r_dtype) - df.index = np.array(list(map(_to_uni,df.index)),dtype=r_dtype) + df.index = np.array(lmap(_to_uni,df.index),dtype=r_dtype) if r_dtype == 'dt': # unicode r_dtype='O' - recons.index = np.array(list(map(Timestamp,recons.index)), + recons.index = np.array(lmap(Timestamp,recons.index), dtype=r_dtype) - df.index = np.array(list(map(Timestamp,df.index)),dtype=r_dtype) + df.index = np.array(lmap(Timestamp,df.index),dtype=r_dtype) elif r_dtype == 'p': r_dtype='O' recons.index = np.array(list(map(Timestamp, @@ -4863,19 +4863,19 @@ def _to_uni(x): if c_dtype: if c_dtype == 'u': c_dtype='O' - recons.columns = np.array(list(map(_to_uni,recons.columns)), + recons.columns = np.array(lmap(_to_uni,recons.columns), dtype=c_dtype) - df.columns = np.array(list(map(_to_uni,df.columns)),dtype=c_dtype ) + df.columns = np.array(lmap(_to_uni,df.columns),dtype=c_dtype ) elif c_dtype == 'dt': c_dtype='O' - recons.columns = np.array(list(map(Timestamp,recons.columns)), + recons.columns = np.array(lmap(Timestamp,recons.columns), dtype=c_dtype ) - df.columns = np.array(list(map(Timestamp,df.columns)),dtype=c_dtype) + df.columns = np.array(lmap(Timestamp,df.columns),dtype=c_dtype) elif c_dtype == 'p': c_dtype='O' - recons.columns = np.array(list(map(Timestamp,recons.columns.to_datetime())), + recons.columns = np.array(lmap(Timestamp,recons.columns.to_datetime()), dtype=c_dtype) - df.columns = np.array(list(map(Timestamp,df.columns.to_datetime())),dtype=c_dtype ) + df.columns = np.array(lmap(Timestamp,df.columns.to_datetime()),dtype=c_dtype ) else: c_dtype= type_map.get(c_dtype) recons.columns = np.array(recons.columns,dtype=c_dtype ) @@ -4956,7 +4956,7 @@ def make_dtnat_arr(n,nnat=None): _do_test(df,path,dupe_col=True) - _do_test(DataFrame(index=list(range(10))),path) + _do_test(DataFrame(index=lrange(10)),path) _do_test(mkdf(chunksize//2+1, 2,r_idx_nlevels=2),path,rnlvl=2) for ncols in [2,3,4]: base = int(chunksize//ncols) @@ -5132,15 +5132,15 @@ def _make_frame(names=None): # catch invalid headers def testit(): - read_csv(path,tupleize_cols=False,header=list(range(3)),index_col=0) + read_csv(path,tupleize_cols=False,header=lrange(3),index_col=0) assertRaisesRegexp(CParserError, 'Passed header=\[0,1,2\] are too many rows for this multi_index of columns', testit) def testit(): - read_csv(path,tupleize_cols=False,header=list(range(7)),index_col=0) + read_csv(path,tupleize_cols=False,header=lrange(7),index_col=0) assertRaisesRegexp(CParserError, 'Passed header=\[0,1,2,3,4,5,6\], len of 7, but only 6 lines in file', testit) for i in [3,4,5,6,7]: - self.assertRaises(Exception, read_csv, path, tupleize_cols=False, header=list(range(i)), index_col=0) + self.assertRaises(Exception, read_csv, path, tupleize_cols=False, header=lrange(i), index_col=0) self.assertRaises(Exception, read_csv, path, tupleize_cols=False, header=[0,2], index_col=0) # write with cols @@ -5209,7 +5209,7 @@ def create_cols(name): def test_to_csv_dups_cols(self): - df = DataFrame(np.random.randn(1000, 30),columns=list(range(15))+list(range(15)),dtype='float64') + df = DataFrame(np.random.randn(1000, 30),columns=lrange(15)+lrange(15),dtype='float64') with ensure_clean() as filename: df.to_csv(filename) # single dtype, fine @@ -5219,9 +5219,9 @@ def test_to_csv_dups_cols(self): df_float = DataFrame(np.random.randn(1000, 3),dtype='float64') df_int = DataFrame(np.random.randn(1000, 3),dtype='int64') - df_bool = DataFrame(True,index=df_float.index,columns=list(range(3))) - df_object = DataFrame('foo',index=df_float.index,columns=list(range(3))) - df_dt = DataFrame(Timestamp('20010101'),index=df_float.index,columns=list(range(3))) + df_bool = DataFrame(True,index=df_float.index,columns=lrange(3)) + df_object = DataFrame('foo',index=df_float.index,columns=lrange(3)) + df_dt = DataFrame(Timestamp('20010101'),index=df_float.index,columns=lrange(3)) df = pan.concat([ df_float, df_int, df_bool, df_object, df_dt ], axis=1, ignore_index=True) cols = [] @@ -5258,7 +5258,7 @@ def test_to_csv_dups_cols(self): def test_to_csv_chunking(self): - aa=DataFrame({'A':list(range(100000))}) + aa=DataFrame({'A':lrange(100000)}) aa['B'] = aa.A + 1.0 aa['C'] = aa.A + 2.0 aa['D'] = aa.A + 3.0 @@ -5938,7 +5938,7 @@ def test_dropna(self): assert_frame_equal(dropped, expected) dropped = df.dropna(axis=0) - expected = df.ix[list(range(2, 6))] + expected = df.ix[lrange(2, 6)] assert_frame_equal(dropped, expected) # threshold @@ -5947,7 +5947,7 @@ def test_dropna(self): assert_frame_equal(dropped, expected) dropped = df.dropna(axis=0, thresh=4) - expected = df.ix[list(range(2, 6))] + expected = df.ix[lrange(2, 6)] assert_frame_equal(dropped, expected) dropped = df.dropna(axis=1, thresh=4) @@ -5993,7 +5993,7 @@ def test_drop_duplicates(self): 'B': ['one', 'one', 'two', 'two', 'two', 'two', 'one', 'two'], 'C': [1, 1, 2, 2, 2, 2, 1, 2], - 'D': list(range(8))}) + 'D': lrange(8)}) # single column result = df.drop_duplicates('AAA') @@ -6033,7 +6033,7 @@ def test_drop_duplicates_tuple(self): 'B': ['one', 'one', 'two', 'two', 'two', 'two', 'one', 'two'], 'C': [1, 1, 2, 2, 2, 2, 1, 2], - 'D': list(range(8))}) + 'D': lrange(8)}) # single column result = df.drop_duplicates(('AA', 'AB')) @@ -6056,7 +6056,7 @@ def test_drop_duplicates_NA(self): 'B': ['one', 'one', 'two', 'two', 'two', 'two', 'one', 'two'], 'C': [1.0, np.nan, np.nan, np.nan, 1., 1., 1, 1.], - 'D': list(range(8))}) + 'D': lrange(8)}) # single column result = df.drop_duplicates('A') @@ -6082,7 +6082,7 @@ def test_drop_duplicates_NA(self): 'B': ['one', 'one', 'two', 'two', 'two', 'two', 'one', 'two'], 'C': [1.0, np.nan, np.nan, np.nan, 1., 1., 1, 1.], - 'D': list(range(8))}) + 'D': lrange(8)}) # single column result = df.drop_duplicates('C') @@ -6108,7 +6108,7 @@ def test_drop_duplicates_inplace(self): 'B': ['one', 'one', 'two', 'two', 'two', 'two', 'one', 'two'], 'C': [1, 1, 2, 2, 2, 2, 1, 2], - 'D': list(range(8))}) + 'D': lrange(8)}) # single column df = orig.copy() @@ -6313,7 +6313,7 @@ def test_replace_inplace(self): def test_regex_replace_scalar(self): obj = {'a': list('ab..'), 'b': list('efgh')} dfobj = DataFrame(obj) - mix = {'a': list(range(4)), 'b': list('ab..')} + mix = {'a': lrange(4), 'b': list('ab..')} dfmix = DataFrame(mix) ### simplest cases @@ -6379,7 +6379,7 @@ def test_regex_replace_scalar(self): def test_regex_replace_scalar_inplace(self): obj = {'a': list('ab..'), 'b': list('efgh')} dfobj = DataFrame(obj) - mix = {'a': list(range(4)), 'b': list('ab..')} + mix = {'a': lrange(4), 'b': list('ab..')} dfmix = DataFrame(mix) ### simplest cases @@ -6587,14 +6587,14 @@ def test_regex_replace_list_obj_inplace(self): def test_regex_replace_list_mixed(self): ## mixed frame to make sure this doesn't break things - mix = {'a': list(range(4)), 'b': list('ab..')} + mix = {'a': lrange(4), 'b': list('ab..')} dfmix = DataFrame(mix) ## lists of regexes and values # list of [re1, re2, ..., reN] -> [v1, v2, ..., vN] to_replace_res = [r'\s*\.\s*', r'a'] values = [nan, 'crap'] - mix2 = {'a': list(range(4)), 'b': list('ab..'), 'c': list('halo')} + mix2 = {'a': lrange(4), 'b': list('ab..'), 'c': list('halo')} dfmix2 = DataFrame(mix2) res = dfmix2.replace(to_replace_res, values, regex=True) expec = DataFrame({'a': mix2['a'], 'b': ['crap', 'b', nan, nan], @@ -6625,7 +6625,7 @@ def test_regex_replace_list_mixed(self): assert_frame_equal(res, expec) def test_regex_replace_list_mixed_inplace(self): - mix = {'a': list(range(4)), 'b': list('ab..')} + mix = {'a': lrange(4), 'b': list('ab..')} dfmix = DataFrame(mix) # the same inplace ## lists of regexes and values @@ -6664,7 +6664,7 @@ def test_regex_replace_list_mixed_inplace(self): assert_frame_equal(res, expec) def test_regex_replace_dict_mixed(self): - mix = {'a': list(range(4)), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} + mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} dfmix = DataFrame(mix) ## dicts @@ -6721,7 +6721,7 @@ def test_regex_replace_dict_mixed(self): def test_regex_replace_dict_nested(self): # nested dicts will not work until this is implemented for Series - mix = {'a': list(range(4)), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} + mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} dfmix = DataFrame(mix) res = dfmix.replace({'b': {r'\s*\.\s*': nan}}, regex=True) res2 = dfmix.copy() @@ -6742,7 +6742,7 @@ def test_regex_replace_dict_nested_gh4115(self): assert_frame_equal(df.replace({'Type': {'Q':0,'T':1}}), expected) def test_regex_replace_list_to_scalar(self): - mix = {'a': list(range(4)), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} + mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} df = DataFrame(mix) res = df.replace([r'\s*\.\s*', 'a|b'], nan, regex=True) res2 = df.copy() @@ -6757,7 +6757,7 @@ def test_regex_replace_list_to_scalar(self): def test_regex_replace_str_to_numeric(self): # what happens when you try to replace a numeric value with a regex? - mix = {'a': list(range(4)), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} + mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} df = DataFrame(mix) res = df.replace(r'\s*\.\s*', 0, regex=True) res2 = df.copy() @@ -6771,7 +6771,7 @@ def test_regex_replace_str_to_numeric(self): assert_frame_equal(res3, expec) def test_regex_replace_regex_list_to_numeric(self): - mix = {'a': list(range(4)), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} + mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} df = DataFrame(mix) res = df.replace([r'\s*\.\s*', 'b'], 0, regex=True) res2 = df.copy() @@ -6786,7 +6786,7 @@ def test_regex_replace_regex_list_to_numeric(self): assert_frame_equal(res3, expec) def test_regex_replace_series_of_regexes(self): - mix = {'a': list(range(4)), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} + mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} df = DataFrame(mix) s1 = Series({'b': r'\s*\.\s*'}) s2 = Series({'b': nan}) @@ -6802,7 +6802,7 @@ def test_regex_replace_series_of_regexes(self): assert_frame_equal(res3, expec) def test_regex_replace_numeric_to_object_conversion(self): - mix = {'a': list(range(4)), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} + mix = {'a': lrange(4), 'b': list('ab..'), 'c': ['a', 'b', nan, 'd']} df = DataFrame(mix) res = df.replace(0, 'a') expec = DataFrame({'a': ['a', 1, 2, 3], 'b': mix['b'], 'c': mix['c']}) @@ -7343,42 +7343,42 @@ def test_reindex_fill_value(self): df = DataFrame(np.random.randn(10, 4)) # axis=0 - result = df.reindex(list(range(15))) + result = df.reindex(lrange(15)) self.assert_(np.isnan(result.values[-5:]).all()) - result = df.reindex(list(range(15)), fill_value=0) - expected = df.reindex(list(range(15))).fillna(0) + result = df.reindex(lrange(15), fill_value=0) + expected = df.reindex(lrange(15)).fillna(0) assert_frame_equal(result, expected) # axis=1 - result = df.reindex(columns=list(range(5)), fill_value=0.) + result = df.reindex(columns=lrange(5), fill_value=0.) expected = df.copy() expected[4] = 0. assert_frame_equal(result, expected) - result = df.reindex(columns=list(range(5)), fill_value=0) + result = df.reindex(columns=lrange(5), fill_value=0) expected = df.copy() expected[4] = 0 assert_frame_equal(result, expected) - result = df.reindex(columns=list(range(5)), fill_value='foo') + result = df.reindex(columns=lrange(5), fill_value='foo') expected = df.copy() expected[4] = 'foo' assert_frame_equal(result, expected) # reindex_axis - result = df.reindex_axis(list(range(15)), fill_value=0., axis=0) - expected = df.reindex(list(range(15))).fillna(0) + result = df.reindex_axis(lrange(15), fill_value=0., axis=0) + expected = df.reindex(lrange(15)).fillna(0) assert_frame_equal(result, expected) - result = df.reindex_axis(list(range(5)), fill_value=0., axis=1) - expected = df.reindex(columns=list(range(5))).fillna(0) + result = df.reindex_axis(lrange(5), fill_value=0., axis=1) + expected = df.reindex(columns=lrange(5)).fillna(0) assert_frame_equal(result, expected) # other dtypes df['foo'] = 'foo' - result = df.reindex(list(range(15)), fill_value=0) - expected = df.reindex(list(range(15))).fillna(0) + result = df.reindex(lrange(15), fill_value=0) + expected = df.reindex(lrange(15)).fillna(0) assert_frame_equal(result, expected) def test_align(self): @@ -8964,12 +8964,12 @@ def test_count(self): tm.assert_isinstance(ct2, Series) # GH #423 - df = DataFrame(index=list(range(10))) + df = DataFrame(index=lrange(10)) result = df.count(1) expected = Series(0, index=df.index) assert_series_equal(result, expected) - df = DataFrame(columns=list(range(10))) + df = DataFrame(columns=lrange(10)) result = df.count(0) expected = Series(0, index=df.columns) assert_series_equal(result, expected) @@ -9152,7 +9152,7 @@ def _check_stat_op(self, name, alternative, frame=None, has_skipna=True, print (df) self.assertFalse(len(_f())) - df['a'] = list(range(len(df))) + df['a'] = lrange(len(df)) self.assert_(len(getattr(df, name)())) if has_skipna: @@ -9531,12 +9531,12 @@ def test_axis_aliases(self): assert_series_equal(result, expected) def test_combine_first_mixed(self): - a = Series(['a', 'b'], index=list(range(2))) - b = Series(list(range(2)), index=list(range(2))) + a = Series(['a', 'b'], index=lrange(2)) + b = Series(lrange(2), index=lrange(2)) f = DataFrame({'A': a, 'B': b}) - a = Series(['a', 'b'], index=list(range(5, 7))) - b = Series(list(range(2)), index=list(range(5, 7))) + a = Series(['a', 'b'], index=lrange(5, 7)) + b = Series(lrange(2), index=lrange(5, 7)) g = DataFrame({'A': a, 'B': b}) combined = f.combine_first(g) @@ -9554,7 +9554,7 @@ def test_reindex_boolean(self): self.assert_(reindexed.values.dtype == np.object_) self.assert_(isnull(reindexed[0][1])) - reindexed = frame.reindex(columns=list(range(3))) + reindexed = frame.reindex(columns=lrange(3)) self.assert_(reindexed.values.dtype == np.object_) self.assert_(isnull(reindexed[1]).all()) @@ -9614,22 +9614,22 @@ def test_reindex_with_nans(self): def test_reindex_multi(self): df = DataFrame(np.random.randn(3, 3)) - result = df.reindex(list(range(4)), list(range(4))) - expected = df.reindex(list(range(4))).reindex(columns=list(range(4))) + result = df.reindex(lrange(4), lrange(4)) + expected = df.reindex(lrange(4)).reindex(columns=lrange(4)) assert_frame_equal(result, expected) df = DataFrame(np.random.randint(0, 10, (3, 3))) - result = df.reindex(list(range(4)), list(range(4))) - expected = df.reindex(list(range(4))).reindex(columns=list(range(4))) + result = df.reindex(lrange(4), lrange(4)) + expected = df.reindex(lrange(4)).reindex(columns=lrange(4)) assert_frame_equal(result, expected) df = DataFrame(np.random.randint(0, 10, (3, 3))) - result = df.reindex(list(range(2)), list(range(2))) - expected = df.reindex(list(range(2))).reindex(columns=list(range(2))) + result = df.reindex(lrange(2), lrange(2)) + expected = df.reindex(lrange(2)).reindex(columns=lrange(2)) assert_frame_equal(result, expected) @@ -9665,7 +9665,7 @@ def test_count_objects(self): def test_cumsum_corner(self): dm = DataFrame(np.arange(20).reshape(4, 5), - index=list(range(4)), columns=list(range(5))) + index=lrange(4), columns=lrange(5)) result = dm.cumsum() #---------------------------------------------------------------------- @@ -9875,13 +9875,13 @@ def test_reset_index_multiindex_col(self): assert_frame_equal(rs, xp) rs = df.reset_index('a', col_fill=None) - xp = DataFrame(full, Index(list(range(3)), name='d'), + xp = DataFrame(full, Index(lrange(3), name='d'), columns=[['a', 'b', 'b', 'c'], ['a', 'mean', 'median', 'mean']]) assert_frame_equal(rs, xp) rs = df.reset_index('a', col_fill='blah', col_level=1) - xp = DataFrame(full, Index(list(range(3)), name='d'), + xp = DataFrame(full, Index(lrange(3), name='d'), columns=[['blah', 'b', 'b', 'c'], ['a', 'mean', 'median', 'mean']]) assert_frame_equal(rs, xp) @@ -10156,7 +10156,7 @@ def test_boolean_set_uncons(self): def test_xs_view(self): dm = DataFrame(np.arange(20.).reshape(4, 5), - index=list(range(4)), columns=list(range(5))) + index=lrange(4), columns=lrange(5)) dm.xs(2, copy=False)[:] = 5 self.assert_((dm.xs(2) == 5).all()) @@ -10174,7 +10174,7 @@ def test_xs_view(self): self.assert_((dm.xs(3) == 10).all()) def test_boolean_indexing(self): - idx = list(range(3)) + idx = lrange(3) cols = ['A','B','C'] df1 = DataFrame(index=idx, columns=cols, data=np.array([[0.0, 0.5, 1.0], @@ -10220,15 +10220,15 @@ def test_boolean_indexing_mixed(self): self.assertRaises(ValueError, df.__setitem__, df>0.3, 1) def test_sum_bools(self): - df = DataFrame(index=list(range(1)), columns=list(range(10))) + df = DataFrame(index=lrange(1), columns=lrange(10)) bools = isnull(df) self.assert_(bools.sum(axis=1)[0] == 10) def test_fillna_col_reordering(self): - idx = list(range(20)) + idx = lrange(20) cols = ["COL." + str(i) for i in range(5, 0, -1)] data = np.random.rand(20, 5) - df = DataFrame(index=list(range(20)), columns=cols, data=data) + df = DataFrame(index=lrange(20), columns=cols, data=data) filled = df.fillna(method='ffill') self.assert_(df.columns.tolist() == filled.columns.tolist()) @@ -10355,8 +10355,8 @@ def test_dot(self): result = A.dot(b) # unaligned - df = DataFrame(randn(3, 4), index=[1, 2, 3], columns=list(range(4))) - df2 = DataFrame(randn(5, 3), index=list(range(5)), columns=[1, 2, 3]) + df = DataFrame(randn(3, 4), index=[1, 2, 3], columns=lrange(4)) + df2 = DataFrame(randn(5, 3), index=lrange(5), columns=[1, 2, 3]) self.assertRaises(ValueError, df.dot, df2) diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py index ebc00bb7cc779..c03041e390ee7 100644 --- a/pandas/tests/test_graphics.py +++ b/pandas/tests/test_graphics.py @@ -1,4 +1,3 @@ -from pandas.util.py3compat import range import nose import os import string @@ -7,6 +6,7 @@ from datetime import datetime, date from pandas import Series, DataFrame, MultiIndex, PeriodIndex, date_range +from pandas.util.py3compat import range, lrange, StringIO, lmap, lzip import pandas.util.testing as tm from pandas.util.testing import ensure_clean from pandas.core.config import set_option @@ -19,8 +19,8 @@ from numpy.testing.decorators import slow import pandas.tools.plotting as plotting import six -from six.moves import map -from six.moves import zip +from pandas.util.py3compat import map +from pandas.util.py3compat import zip def _skip_if_no_scipy(): @@ -119,7 +119,7 @@ def test_bar_colors(self): rects = ax.patches - rgba_colors = list(map(cm.jet, np.linspace(0, 1, 5))) + rgba_colors = lmap(cm.jet, np.linspace(0, 1, 5)) for i, rect in enumerate(rects[::5]): xp = rgba_colors[i] rs = rect.get_facecolor() @@ -132,7 +132,7 @@ def test_bar_colors(self): rects = ax.patches - rgba_colors = list(map(cm.jet, np.linspace(0, 1, 5))) + rgba_colors = lmap(cm.jet, np.linspace(0, 1, 5)) for i, rect in enumerate(rects[::5]): xp = rgba_colors[i] rs = rect.get_facecolor() @@ -275,7 +275,7 @@ def test_invalid_plot_data(self): @slow def test_valid_object_plot(self): - s = Series(list(range(10)), dtype=object) + s = Series(lrange(10), dtype=object) kinds = 'line', 'bar', 'barh', 'kde', 'density' for kind in kinds: @@ -331,7 +331,7 @@ def test_plot(self): _check_plot_works(df.plot, subplots=True, title='blah') _check_plot_works(df.plot, title='blah') - tuples = list(zip(string.ascii_letters[:10], range(10))) + tuples = lzip(string.ascii_letters[:10], range(10)) df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples)) _check_plot_works(df.plot, use_index=True) @@ -388,7 +388,7 @@ def test_plot_xy(self): self._check_data(df.plot(y='B'), df.B.plot()) # columns.inferred_type == 'integer' - df.columns = list(range(1, len(df.columns) + 1)) + df.columns = lrange(1, len(df.columns) + 1) self._check_data(df.plot(x=1, y=2), df.set_index(1)[2].plot()) self._check_data(df.plot(x=1), df.set_index(1).plot()) @@ -501,7 +501,7 @@ def test_plot_bar(self): df = DataFrame(np.random.randn(10, 15), index=list(string.ascii_letters[:10]), - columns=list(range(15))) + columns=lrange(15)) _check_plot_works(df.plot, kind='bar') df = DataFrame({'a': [0, 1], 'b': [1, 0]}) @@ -509,13 +509,13 @@ def test_plot_bar(self): def test_bar_stacked_center(self): # GH2157 - df = DataFrame({'A': [3] * 5, 'B': list(range(5))}, index=list(range(5))) + df = DataFrame({'A': [3] * 5, 'B': lrange(5)}, index=lrange(5)) ax = df.plot(kind='bar', stacked='True', grid=True) self.assertEqual(ax.xaxis.get_ticklocs()[0], ax.patches[0].get_x() + ax.patches[0].get_width() / 2) def test_bar_center(self): - df = DataFrame({'A': [3] * 5, 'B': list(range(5))}, index=list(range(5))) + df = DataFrame({'A': [3] * 5, 'B': lrange(5)}, index=lrange(5)) ax = df.plot(kind='bar', grid=True) self.assertEqual(ax.xaxis.get_ticklocs()[0], ax.patches[0].get_x() + ax.patches[0].get_width()) @@ -525,7 +525,7 @@ def test_bar_log(self): # GH3254, GH3298 matplotlib/matplotlib#1882, #1892 # regressions in 1.2.1 - df = DataFrame({'A': [3] * 5, 'B': list(range(1, 6))}, index=list(range(5))) + df = DataFrame({'A': [3] * 5, 'B': lrange(1, 6)}, index=lrange(5)) ax = df.plot(kind='bar', grid=True, log=True) self.assertEqual(ax.yaxis.get_ticklocs()[0], 1.0) @@ -769,7 +769,6 @@ def test_style_by_column(self): def test_line_colors(self): import matplotlib.pyplot as plt import sys - from pandas.util.py3compat import StringIO from matplotlib import cm custom_colors = 'rgcby' @@ -800,7 +799,7 @@ def test_line_colors(self): ax = df.plot(colormap='jet') - rgba_colors = list(map(cm.jet, np.linspace(0, 1, len(df)))) + rgba_colors = lmap(cm.jet, np.linspace(0, 1, len(df))) lines = ax.get_lines() for i, l in enumerate(lines): @@ -812,7 +811,7 @@ def test_line_colors(self): ax = df.plot(colormap=cm.jet) - rgba_colors = list(map(cm.jet, np.linspace(0, 1, len(df)))) + rgba_colors = lmap(cm.jet, np.linspace(0, 1, len(df))) lines = ax.get_lines() for i, l in enumerate(lines): @@ -891,7 +890,7 @@ def test_boxplot(self): _check_plot_works(grouped.boxplot) _check_plot_works(grouped.boxplot, subplots=False) - tuples = list(zip(string.ascii_letters[:10], range(10))) + tuples = lzip(string.ascii_letters[:10], range(10)) df = DataFrame(np.random.rand(10, 3), index=MultiIndex.from_tuples(tuples)) grouped = df.groupby(level=1) diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 9fe98e27c38cc..005babf6f3416 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -1,7 +1,7 @@ from __future__ import print_function -from pandas.util.py3compat import range, long +from pandas.util.py3compat import range, long, lrange, StringIO, lmap, lzip from pandas.util import compat -from six.moves import map, zip, builtins +from pandas.util.py3compat import map, zip, builtins import nose import unittest @@ -193,9 +193,9 @@ def test_first_last_nth_dtypes(self): assert_frame_equal(nth, expected, check_names=False) # GH 2763, first/last shifting dtypes - idx = list(range(10)) + idx = lrange(10) idx.append(9) - s = Series(data=list(range(11)), index=idx, name='IntCol') + s = Series(data=lrange(11), index=idx, name='IntCol') self.assert_(s.dtype == 'int64') f = s.groupby(level=0).first() self.assert_(f.dtype == 'int64') @@ -267,7 +267,7 @@ def test_groupby_nonobject_dtype(self): # GH 3911, mixed frame non-conversion df = self.df_mixed_floats.copy() - df['value'] = list(range(len(df))) + df['value'] = lrange(len(df)) def max_value(group): return group.ix[group['value'].idxmax()] @@ -512,11 +512,11 @@ def raiseException(df): def test_basic_regression(self): # regression - T = [1.0 * x for x in list(range(1, 10)) * 10][:1095] - result = Series(T, list(range(0, len(T)))) + T = [1.0 * x for x in lrange(1, 10) * 10][:1095] + result = Series(T, lrange(0, len(T))) groupings = np.random.random((1100,)) - groupings = Series(groupings, list(range(0, len(groupings)))) * 10. + groupings = Series(groupings, lrange(0, len(groupings))) * 10. grouped = result.groupby(groupings) grouped.mean() @@ -711,12 +711,12 @@ def f3(x): return y df = DataFrame({'a':[1,2,2,2], - 'b':list(range(4)), - 'c':list(range(5,9))}) + 'b':lrange(4), + 'c':lrange(5,9)}) df2 = DataFrame({'a':[3,2,2,2], - 'b':list(range(4)), - 'c':list(range(5,9))}) + 'b':lrange(4), + 'c':lrange(5,9)}) # correct result @@ -1157,7 +1157,7 @@ def test_groupby_as_index_cython(self): result = grouped.mean() expected = data.groupby(['A', 'B']).mean() - arrays = list(zip(*expected.index._tuple_index)) + arrays = lzip(*expected.index._tuple_index) expected.insert(0, 'A', arrays[0]) expected.insert(1, 'B', arrays[1]) expected.index = np.arange(len(expected)) @@ -1420,7 +1420,7 @@ def test_groupby_level(self): def test_groupby_level_index_names(self): ## GH4014 this used to raise ValueError since 'exp'>1 (in py2) - df = DataFrame({'exp' : ['A']*3 + ['B']*3, 'var1' : list(range(6)),}).set_index('exp') + df = DataFrame({'exp' : ['A']*3 + ['B']*3, 'var1' : lrange(6),}).set_index('exp') df.groupby(level='exp') self.assertRaises(ValueError, df.groupby, level='foo') @@ -1569,7 +1569,7 @@ def test_mutate_groups(self): mydf = DataFrame({ 'cat1' : ['a'] * 8 + ['b'] * 6, 'cat2' : ['c'] * 2 + ['d'] * 2 + ['e'] * 2 + ['f'] * 2 + ['c'] * 2 + ['d'] * 2 + ['e'] * 2, - 'cat3' : list(map(lambda x: 'g%s' % x, list(range(1,15)))), + 'cat3' : lmap(lambda x: 'g%s' % x, lrange(1,15)), 'val' : np.random.randint(100, size=14), }) @@ -1589,7 +1589,7 @@ def f_no_copy(x): def test_apply_chunk_view(self): # Low level tinkering could be unsafe, make sure not df = DataFrame({'key': [1, 1, 1, 2, 2, 2, 3, 3, 3], - 'value': list(range(9))}) + 'value': lrange(9)}) # return view f = lambda x: x[:2] @@ -1601,7 +1601,7 @@ def test_apply_chunk_view(self): def test_apply_no_name_column_conflict(self): df = DataFrame({'name': [1, 1, 1, 1, 1, 1, 2, 2, 2, 2], 'name2': [0, 0, 0, 1, 1, 1, 0, 0, 1, 1], - 'value': list(range(10))[::-1]}) + 'value': lrange(10)[::-1]}) # it works! #2605 grouped = df.groupby(['name', 'name2']) @@ -1814,7 +1814,6 @@ def f(group): def test_groupby_wrong_multi_labels(self): from pandas import read_csv - from pandas.util.py3compat import StringIO data = """index,foo,bar,baz,spam,data 0,foo1,bar1,baz1,spam2,20 1,foo1,bar2,baz1,spam3,30 @@ -1853,8 +1852,8 @@ def test_groupby_nonstring_columns(self): def test_cython_grouper_series_bug_noncontig(self): arr = np.empty((100, 100)) arr.fill(np.nan) - obj = Series(arr[:, 0], index=list(range(100))) - inds = np.tile(list(range(10)), 10) + obj = Series(arr[:, 0], index=lrange(100)) + inds = np.tile(lrange(10), 10) result = obj.groupby(inds).agg(Series.median) self.assert_(result.isnull().all()) @@ -1876,7 +1875,7 @@ def test_convert_objects_leave_decimal_alone(self): from decimal import Decimal - s = Series(list(range(5))) + s = Series(lrange(5)) labels = np.array(['a', 'b', 'c', 'd', 'e'], dtype='O') def convert_fast(x): @@ -1991,7 +1990,7 @@ def test_numpy_groupby(self): assert_almost_equal(result, expected) def test_groupby_2d_malformed(self): - d = DataFrame(index=list(range(2))) + d = DataFrame(index=lrange(2)) d['group'] = ['g1', 'g2'] d['zeros'] = [0, 0] d['ones'] = [1, 1] @@ -2050,18 +2049,18 @@ def test_groupby_sort_multi(self): 'c': [0, 1, 2], 'd': np.random.randn(3)}) - tups = list(map(tuple, df[['a', 'b', 'c']].values)) + tups = lmap(tuple, df[['a', 'b', 'c']].values) tups = com._asarray_tuplesafe(tups) result = df.groupby(['a', 'b', 'c'], sort=True).sum() self.assert_(np.array_equal(result.index.values, tups[[1, 2, 0]])) - tups = list(map(tuple, df[['c', 'a', 'b']].values)) + tups = lmap(tuple, df[['c', 'a', 'b']].values) tups = com._asarray_tuplesafe(tups) result = df.groupby(['c', 'a', 'b'], sort=True).sum() self.assert_(np.array_equal(result.index.values, tups)) - tups = list(map(tuple, df[['b', 'c', 'a']].values)) + tups = lmap(tuple, df[['b', 'c', 'a']].values) tups = com._asarray_tuplesafe(tups) result = df.groupby(['b', 'c', 'a'], sort=True).sum() self.assert_(np.array_equal(result.index.values, @@ -2676,7 +2675,7 @@ def assert_fp_equal(a, b): def _check_groupby(df, result, keys, field, f=lambda x: x.sum()): - tups = list(map(tuple, df[keys].values)) + tups = lmap(tuple, df[keys].values) tups = com._asarray_tuplesafe(tups) expected = f(df.groupby(tups)[field]) for k, v in compat.iteritems(expected): diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index d77c60ecb47d1..4dae4378c7df4 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -1,7 +1,7 @@ # pylint: disable=E1101,E1103,W0232 from datetime import datetime, timedelta -from pandas.util.py3compat import range +from pandas.util.py3compat import range, lrange, lzip import operator import pickle import unittest @@ -24,7 +24,7 @@ import pandas as pd from pandas.lib import Timestamp import six -from six.moves import zip +from pandas.util.py3compat import zip class TestIndex(unittest.TestCase): @@ -37,7 +37,7 @@ def setUp(self): self.intIndex = tm.makeIntIndex(100) self.floatIndex = tm.makeFloatIndex(100) self.empty = Index([]) - self.tuples = Index(list(zip(['foo', 'bar', 'baz'], [1, 2, 3]))) + self.tuples = Index(lzip(['foo', 'bar', 'baz'], [1, 2, 3])) def test_hash_error(self): self.assertRaises(TypeError, hash, self.strIndex) @@ -470,8 +470,8 @@ def test_slice_locs_dup(self): def test_drop(self): n = len(self.strIndex) - dropped = self.strIndex.drop(self.strIndex[list(range(5, 10))]) - expected = self.strIndex[list(range(5)) + list(range(10, n))] + dropped = self.strIndex.drop(self.strIndex[lrange(5, 10)]) + expected = self.strIndex[lrange(5) + lrange(10, n)] self.assert_(dropped.equals(expected)) self.assertRaises(ValueError, self.strIndex.drop, ['foo', 'bar']) @@ -893,8 +893,8 @@ def test_take_preserve_name(self): def test_int_name_format(self): from pandas import Series, DataFrame index = Index(['a', 'b', 'c'], name=0) - s = Series(list(range(3)), index) - df = DataFrame(list(range(3)), index=index) + s = Series(lrange(3), index) + df = DataFrame(lrange(3), index=index) repr(s) repr(df) @@ -910,7 +910,7 @@ def test_repr_summary(self): self.assertTrue("..." in r) def test_unicode_string_with_unicode(self): - idx = Index(list(range(1000))) + idx = Index(lrange(1000)) if py3compat.PY3: str(idx) @@ -918,7 +918,7 @@ def test_unicode_string_with_unicode(self): six.text_type(idx) def test_bytestring_with_unicode(self): - idx = Index(list(range(1000))) + idx = Index(lrange(1000)) if py3compat.PY3: bytes(idx) else: @@ -1154,9 +1154,9 @@ def test_get_loc(self): self.assertRaises(KeyError, self.index.get_loc, 'quux') # 3 levels - index = MultiIndex(levels=[Index(list(range(4))), - Index(list(range(4))), - Index(list(range(4)))], + index = MultiIndex(levels=[Index(lrange(4)), + Index(lrange(4)), + Index(lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array([0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) @@ -1176,9 +1176,9 @@ def test_get_loc_duplicates(self): assert(rs == xp) def test_get_loc_level(self): - index = MultiIndex(levels=[Index(list(range(4))), - Index(list(range(4))), - Index(list(range(4)))], + index = MultiIndex(levels=[Index(lrange(4)), + Index(lrange(4)), + Index(lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array([0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) @@ -1196,7 +1196,7 @@ def test_get_loc_level(self): self.assertRaises(KeyError, index.get_loc_level, (2, 2)) - index = MultiIndex(levels=[[2000], list(range(4))], + index = MultiIndex(levels=[[2000], lrange(4)], labels=[np.array([0, 0, 0, 0]), np.array([0, 1, 2, 3])]) result, new_index = index.get_loc_level((2000, slice(None, None))) @@ -1222,9 +1222,9 @@ def test_slice_locs(self): tm.assert_almost_equal(sliced.values, expected.values) def test_slice_locs_not_sorted(self): - index = MultiIndex(levels=[Index(list(range(4))), - Index(list(range(4))), - Index(list(range(4)))], + index = MultiIndex(levels=[Index(lrange(4)), + Index(lrange(4)), + Index(lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array([0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) @@ -1279,11 +1279,11 @@ def test_slice_locs_not_contained(self): def test_consistency(self): # need to construct an overflow - major_axis = list(range(70000)) - minor_axis = list(range(10)) + major_axis = lrange(70000) + minor_axis = lrange(10) major_labels = np.arange(70000) - minor_labels = np.repeat(list(range(10)), 7000) + minor_labels = np.repeat(lrange(10), 7000) # the fact that is works means it's consistent index = MultiIndex(levels=[major_axis, minor_axis], @@ -1298,8 +1298,8 @@ def test_consistency(self): self.assert_(not index.is_unique) def test_truncate(self): - major_axis = Index(list(range(4))) - minor_axis = Index(list(range(2))) + major_axis = Index(lrange(4)) + minor_axis = Index(lrange(2)) major_labels = np.array([0, 0, 1, 2, 3, 3]) minor_labels = np.array([0, 1, 0, 1, 0, 1]) @@ -1322,8 +1322,8 @@ def test_truncate(self): self.assertRaises(ValueError, index.truncate, 3, 1) def test_get_indexer(self): - major_axis = Index(list(range(4))) - minor_axis = Index(list(range(2))) + major_axis = Index(lrange(4)) + minor_axis = Index(lrange(2)) major_labels = np.array([0, 0, 1, 2, 2, 3, 3]) minor_labels = np.array([0, 1, 0, 0, 1, 0, 1]) @@ -1405,9 +1405,9 @@ def test_equals(self): self.assert_(self.index.equals(self.index._tuple_index)) # different number of levels - index = MultiIndex(levels=[Index(list(range(4))), - Index(list(range(4))), - Index(list(range(4)))], + index = MultiIndex(levels=[Index(lrange(4)), + Index(lrange(4)), + Index(lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array([0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])]) @@ -1418,8 +1418,8 @@ def test_equals(self): self.assert_(not index.equal_levels(index2)) # levels are different - major_axis = Index(list(range(4))) - minor_axis = Index(list(range(2))) + major_axis = Index(lrange(4)) + minor_axis = Index(lrange(2)) major_labels = np.array([0, 0, 1, 2, 2, 3]) minor_labels = np.array([0, 1, 0, 0, 1, 0]) @@ -1638,9 +1638,9 @@ def test_droplevel_with_names(self): dropped = index.droplevel(0) self.assertEqual(dropped.name, 'second') - index = MultiIndex(levels=[Index(list(range(4))), - Index(list(range(4))), - Index(list(range(4)))], + index = MultiIndex(levels=[Index(lrange(4)), + Index(lrange(4)), + Index(lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array([0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])], @@ -1653,9 +1653,9 @@ def test_droplevel_with_names(self): self.assert_(dropped.equals(expected)) def test_droplevel_multiple(self): - index = MultiIndex(levels=[Index(list(range(4))), - Index(list(range(4))), - Index(list(range(4)))], + index = MultiIndex(levels=[Index(lrange(4)), + Index(lrange(4)), + Index(lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array([0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])], diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index b7297fc86e22e..a053b43f17dc6 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -1,10 +1,9 @@ # pylint: disable-msg=W0612,E1101 -from pandas.util.py3compat import range import unittest import nose import itertools -from pandas.util.py3compat import StringIO +from pandas.util.py3compat import range, lrange, StringIO, lmap from numpy import random, nan from numpy.random import randn import numpy as np @@ -22,7 +21,7 @@ import pandas.lib as lib from pandas import date_range from numpy.testing.decorators import slow -from six.moves import map +from pandas.util.py3compat import map _verbose = False @@ -38,7 +37,7 @@ def _generate_indices(f, values=False): axes = f.axes if values: - axes = [ list(range(len(a))) for a in axes ] + axes = [ lrange(len(a)) for a in axes ] return itertools.product(*axes) @@ -96,9 +95,9 @@ def setUp(self): import warnings warnings.filterwarnings(action='ignore', category=FutureWarning) - self.series_ints = Series(np.random.rand(4), index=list(range(0,8,2))) - self.frame_ints = DataFrame(np.random.randn(4, 4), index=list(range(0, 8, 2)), columns=list(range(0,12,3))) - self.panel_ints = Panel(np.random.rand(4,4,4), items=list(range(0,8,2)),major_axis=list(range(0,12,3)),minor_axis=list(range(0,16,4))) + self.series_ints = Series(np.random.rand(4), index=lrange(0,8,2)) + self.frame_ints = DataFrame(np.random.randn(4, 4), index=lrange(0, 8, 2), columns=lrange(0,12,3)) + self.panel_ints = Panel(np.random.rand(4,4,4), items=lrange(0,8,2),major_axis=lrange(0,12,3),minor_axis=lrange(0,16,4)) self.series_labels = Series(np.random.randn(4), index=list('abcd')) self.frame_labels = DataFrame(np.random.randn(4, 4), index=list('abcd'), columns=list('ABCD')) @@ -344,7 +343,7 @@ def test_iloc_getitem_dups(self): def test_iloc_getitem_array(self): # array like - s = Series(index=list(range(1,4))) + s = Series(index=lrange(1,4)) self.check_result('array like', 'iloc', s.index, 'ix', { 0 : [2,4,6], 1 : [3,6,9], 2: [4,8,12] }, typs = ['ints']) def test_iloc_getitem_bool(self): @@ -549,7 +548,7 @@ def test_loc_setitem_frame(self): def test_iloc_getitem_frame(self): """ originally from test_frame.py""" - df = DataFrame(np.random.randn(10, 4), index=list(range(0, 20, 2)), columns=list(range(0,8,2))) + df = DataFrame(np.random.randn(10, 4), index=lrange(0, 20, 2), columns=lrange(0,8,2)) result = df.iloc[2] exp = df.ix[4] @@ -588,7 +587,7 @@ def test_iloc_getitem_frame(self): assert_frame_equal(result, expected) # with index-like - s = Series(index=list(range(1,5))) + s = Series(index=lrange(1,5)) result = df.iloc[s.index] expected = df.ix[[2,4,6,8]] assert_frame_equal(result, expected) @@ -635,7 +634,7 @@ def test_iloc_setitem_series(self): assert_frame_equal(result, expected) def test_iloc_setitem_series(self): - s = Series(np.random.randn(10), index=list(range(0,20,2))) + s = Series(np.random.randn(10), index=lrange(0,20,2)) s.iloc[1] = 1 result = s.iloc[1] @@ -798,7 +797,7 @@ def test_dups_fancy_indexing(self): # GH 3561, dups not in selected order ind = ['A', 'A', 'B', 'C'] - df = DataFrame({'test':list(range(len(ind)))}, index=ind) + df = DataFrame({'test':lrange(len(ind))}, index=ind) rows = ['C', 'B'] res = df.ix[rows] self.assert_(rows == list(res.index)) @@ -880,8 +879,8 @@ def test_multi_assign(self): # GH 3626, an assignement of a sub-df to a df df = DataFrame({'FC':['a','b','a','b','a','b'], 'PF':[0,0,0,0,1,1], - 'col1':list(range(6)), - 'col2':list(range(6,12))}) + 'col1':lrange(6), + 'col2':lrange(6,12)}) df.ix[1,0]=np.nan df2 = df.copy() @@ -920,7 +919,7 @@ def test_ix_assign_column_mixed(self): assert_series_equal(df.B, orig + 1) # GH 3668, mixed frame with series value - df = DataFrame({'x':list(range(10)), 'y':list(range(10,20)),'z' : 'bar'}) + df = DataFrame({'x':lrange(10), 'y':lrange(10,20),'z' : 'bar'}) expected = df.copy() expected.ix[0, 'y'] = 1000 expected.ix[2, 'y'] = 1200 @@ -934,10 +933,10 @@ def test_ix_assign_column_mixed(self): def test_iloc_mask(self): # GH 3631, iloc with a mask (of a series) should raise - df = DataFrame(list(range(5)), list('ABCDE'), columns=['a']) + df = DataFrame(lrange(5), list('ABCDE'), columns=['a']) mask = (df.a%2 == 0) self.assertRaises(ValueError, df.iloc.__getitem__, tuple([mask])) - mask.index = list(range(len(mask))) + mask.index = lrange(len(mask)) self.assertRaises(NotImplementedError, df.iloc.__getitem__, tuple([mask])) # ndarray ok @@ -947,7 +946,7 @@ def test_iloc_mask(self): # the possibilities locs = np.arange(4) nums = 2**locs - reps = list(map(bin, nums)) + reps = lmap(bin, nums) df = DataFrame({'locs':locs, 'nums':nums}, reps) expected = { @@ -1044,7 +1043,7 @@ def test_iloc_non_unique_indexing(self): #GH 4017, non-unique indexing (on the axis) df = DataFrame({'A' : [0.1] * 3000, 'B' : [1] * 3000}) - idx = np.array(list(range(30))) * 99 + idx = np.array(lrange(30)) * 99 expected = df.iloc[idx] df3 = pd.concat([df, 2*df, 3*df]) @@ -1111,7 +1110,7 @@ def test_non_unique_loc_memory_error(self): columns = list('ABCDEFG') def gen_test(l,l2): - return pd.concat([ DataFrame(randn(l,len(columns)),index=list(range(l)),columns=columns), + return pd.concat([ DataFrame(randn(l,len(columns)),index=lrange(l),columns=columns), DataFrame(np.ones((l2,len(columns))),index=[0]*l2,columns=columns) ]) diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py index b1d29a97b39c1..9d2439b7cead5 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/test_internals.py @@ -12,7 +12,7 @@ from pandas.util.testing import ( assert_almost_equal, assert_frame_equal, randn) import six -from six.moves import zip +from pandas.util.py3compat import zip def assert_block_equal(left, right): diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 82f1fa8248731..ea2ab8a1d914a 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -13,14 +13,13 @@ assert_frame_equal) import pandas.core.common as com import pandas.util.testing as tm -from pandas.util.py3compat import StringIO -from pandas.util.py3compat import range +from pandas.util.py3compat import range, lrange, StringIO, lzip from pandas.util.compat import product as cart_product import pandas as pd import pandas.index as _index import six -from six.moves import zip, cPickle +from pandas.util.py3compat import zip, cPickle class TestMultiLevel(unittest.TestCase): @@ -46,7 +45,7 @@ def setUp(self): # create test series object arrays = [['bar', 'bar', 'baz', 'baz', 'qux', 'qux', 'foo', 'foo'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] - tuples = list(zip(*arrays)) + tuples = lzip(*arrays) index = MultiIndex.from_tuples(tuples) s = Series(randn(8), index=index) s[3] = np.NaN @@ -92,7 +91,7 @@ def test_series_constructor(self): ['x', 'y', 'x', 'y']]) tm.assert_isinstance(multi.index, MultiIndex) - multi = Series(list(range(4)), index=[['a', 'a', 'b', 'b'], + multi = Series(lrange(4), index=[['a', 'a', 'b', 'b'], ['x', 'y', 'x', 'y']]) tm.assert_isinstance(multi.index, MultiIndex) @@ -351,8 +350,8 @@ def test_frame_setitem_multi_column(self): def test_getitem_tuple_plus_slice(self): # GH #671 - df = DataFrame({'a': list(range(10)), - 'b': list(range(10)), + df = DataFrame({'a': lrange(10), + 'b': lrange(10), 'c': np.random.randn(10), 'd': np.random.randn(10)}) @@ -431,7 +430,7 @@ def test_xs_level(self): def test_xs_level_multiple(self): from pandas import read_table - from pandas.util.py3compat import StringIO + from pandas.util.py3compat import StringIO, lrange, lzip text = """ A B C D E one two three four a b 10.0032 5 -0.5109 -2.3358 -0.4645 0.05076 0.3640 @@ -445,7 +444,7 @@ def test_xs_level_multiple(self): assert_frame_equal(result, expected) # GH2107 - dates = list(range(20111201, 20111205)) + dates = lrange(20111201, 20111205) ids = 'abcde' idx = MultiIndex.from_tuples([x for x in cart_product(dates, ids)]) idx.names = ['date', 'secid'] @@ -456,7 +455,7 @@ def test_xs_level_multiple(self): def test_xs_level0(self): from pandas import read_table - from pandas.util.py3compat import StringIO + from pandas.util.py3compat import StringIO, lrange, lzip text = """ A B C D E one two three four a b 10.0032 5 -0.5109 -2.3358 -0.4645 0.05076 0.3640 @@ -590,7 +589,7 @@ def test_frame_setitem_ix(self): # with integer labels df = self.frame.copy() - df.columns = list(range(3)) + df.columns = lrange(3) df.ix[('bar', 'two'), 1] = 7 self.assertEquals(df.ix[('bar', 'two'), 1], 7) @@ -1169,7 +1168,7 @@ def test_frame_getitem_not_sorted(self): def test_series_getitem_not_sorted(self): arrays = [['bar', 'bar', 'baz', 'baz', 'qux', 'qux', 'foo', 'foo'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']] - tuples = list(zip(*arrays)) + tuples = lzip(*arrays) index = MultiIndex.from_tuples(tuples) s = Series(randn(8), index=index) @@ -1213,7 +1212,7 @@ def test_count(self): def test_series_group_min_max(self): for op, level, skipna in cart_product(self.AGG_FUNCTIONS, - list(range(2)), + lrange(2), [False, True]): grouped = self.series.groupby(level=level) aggf = lambda x: getattr(x, op)(skipna=skipna) @@ -1227,7 +1226,7 @@ def test_frame_group_ops(self): self.frame.ix[7, [0, 1]] = np.nan for op, level, axis, skipna in cart_product(self.AGG_FUNCTIONS, - list(range(2)), list(range(2)), + lrange(2), lrange(2), [False, True]): if axis == 0: frame = self.frame @@ -1689,7 +1688,7 @@ def test_unicode_repr_level_names(self): index = MultiIndex.from_tuples([(0, 0), (1, 1)], names=[six.u('\u0394'), 'i1']) - s = Series(list(range(2)), index=index) + s = Series(lrange(2), index=index) df = DataFrame(np.random.randn(2, 4), index=index) repr(s) repr(df) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 1d97078583dd0..5fdb487806c93 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1,7 +1,7 @@ # pylint: disable=W0612,E1101 from datetime import datetime -from pandas.util.py3compat import range +from pandas.util.py3compat import range, lrange, StringIO from pandas.util import compat import operator import unittest @@ -16,7 +16,7 @@ from pandas.core.series import remove_na import pandas.core.common as com from pandas.util import py3compat -from six.moves import cPickle +from pandas.util.py3compat import cPickle from pandas.util.testing import (assert_panel_equal, assert_frame_equal, @@ -392,7 +392,7 @@ def test_delitem_and_pop(self): values[1] = 1 values[2] = 2 - panel = Panel(values, list(range(3)), list(range(3)), list(range(3))) + panel = Panel(values, lrange(3), lrange(3), lrange(3)) # did we delete the right row? @@ -813,8 +813,8 @@ def test_constructor_empty_panel(self): def test_constructor_observe_dtype(self): # GH #411 - panel = Panel(items=list(range(3)), major_axis=list(range(3)), - minor_axis=list(range(3)), dtype='O') + panel = Panel(items=lrange(3), major_axis=lrange(3), + minor_axis=lrange(3), dtype='O') self.assert_(panel.values.dtype == np.object_) def test_constructor_dtypes(self): @@ -826,19 +826,19 @@ def _check_dtype(panel, dtype): # only nan holding types allowed here for dtype in ['float64','float32','object']: - panel = Panel(items=list(range(2)),major_axis=list(range(10)),minor_axis=list(range(5)),dtype=dtype) + panel = Panel(items=lrange(2),major_axis=lrange(10),minor_axis=lrange(5),dtype=dtype) _check_dtype(panel,dtype) for dtype in ['float64','float32','int64','int32','object']: - panel = Panel(np.array(np.random.randn(2,10,5),dtype=dtype),items=list(range(2)),major_axis=list(range(10)),minor_axis=list(range(5)),dtype=dtype) + panel = Panel(np.array(np.random.randn(2,10,5),dtype=dtype),items=lrange(2),major_axis=lrange(10),minor_axis=lrange(5),dtype=dtype) _check_dtype(panel,dtype) for dtype in ['float64','float32','int64','int32','object']: - panel = Panel(np.array(np.random.randn(2,10,5),dtype='O'),items=list(range(2)),major_axis=list(range(10)),minor_axis=list(range(5)),dtype=dtype) + panel = Panel(np.array(np.random.randn(2,10,5),dtype='O'),items=lrange(2),major_axis=lrange(10),minor_axis=lrange(5),dtype=dtype) _check_dtype(panel,dtype) for dtype in ['float64','float32','int64','int32','object']: - panel = Panel(np.random.randn(2,10,5),items=list(range(2)),major_axis=list(range(10)),minor_axis=list(range(5)),dtype=dtype) + panel = Panel(np.random.randn(2,10,5),items=lrange(2),major_axis=lrange(10),minor_axis=lrange(5),dtype=dtype) _check_dtype(panel,dtype) def test_consolidate(self): @@ -963,15 +963,15 @@ def test_from_dict_mixed_orient(self): def test_constructor_error_msgs(self): def testit(): - Panel(np.random.randn(3,4,5), list(range(4)), list(range(5)), list(range(5))) + Panel(np.random.randn(3,4,5), lrange(4), lrange(5), lrange(5)) assertRaisesRegexp(ValueError, "Shape of passed values is \(3, 4, 5\), indices imply \(4, 5, 5\)", testit) def testit(): - Panel(np.random.randn(3,4,5), list(range(5)), list(range(4)), list(range(5))) + Panel(np.random.randn(3,4,5), lrange(5), lrange(4), lrange(5)) assertRaisesRegexp(ValueError, "Shape of passed values is \(3, 4, 5\), indices imply \(5, 4, 5\)", testit) def testit(): - Panel(np.random.randn(3,4,5), list(range(5)), list(range(5)), list(range(4))) + Panel(np.random.randn(3,4,5), lrange(5), lrange(5), lrange(4)) assertRaisesRegexp(ValueError, "Shape of passed values is \(3, 4, 5\), indices imply \(5, 5, 4\)", testit) def test_conform(self): @@ -1617,8 +1617,6 @@ def is_sorted(arr): self.assert_(is_sorted(sorted_major.index.labels[0])) def test_to_string(self): - from pandas.util.py3compat import StringIO - buf = StringIO() self.panel.to_string(buf) diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index a1566bf30a89a..8f5d7641c02db 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -1,5 +1,5 @@ from datetime import datetime -from pandas.util.py3compat import range +from pandas.util.py3compat import range, lrange import os import operator import unittest @@ -307,7 +307,7 @@ def test_delitem_and_pop(self): values[2] = 2 values[3] = 3 - panel4d = Panel4D(values, list(range(4)), list(range(4)), list(range(4)), list(range(4))) + panel4d = Panel4D(values, lrange(4), lrange(4), lrange(4), lrange(4)) # did we delete the right row? @@ -609,8 +609,8 @@ def test_constructor_empty_panel(self): def test_constructor_observe_dtype(self): # GH #411 - panel = Panel(items=list(range(3)), major_axis=list(range(3)), - minor_axis=list(range(3)), dtype='O') + panel = Panel(items=lrange(3), major_axis=lrange(3), + minor_axis=lrange(3), dtype='O') self.assert_(panel.values.dtype == np.object_) def test_consolidate(self): @@ -720,7 +720,7 @@ def test_from_dict_mixed_orient(self): def test_values(self): self.assertRaises(Exception, Panel, np.random.randn(5, 5, 5), - list(range(5)), list(range(5)), list(range(4))) + lrange(5), lrange(5), lrange(4)) def test_conform(self): p = self.panel4d['l1'].filter(items=['ItemA', 'ItemB']) diff --git a/pandas/tests/test_py3compat.py b/pandas/tests/test_py3compat.py new file mode 100644 index 0000000000000..e74b8a86ea6a0 --- /dev/null +++ b/pandas/tests/test_py3compat.py @@ -0,0 +1,70 @@ +""" +Testing that functions from py3compat work as expected +""" + +from pandas.util.py3compat import ( + range, zip, map, filter, + lrange, lzip, lmap, lfilter, + builtins +) +import unittest +import nose +import pandas.util.testing as tm + +class TestBuiltinIterators(unittest.TestCase): + def check_result(self, actual, expected, lengths): + for (iter_res, list_res), exp, length in zip(actual, expected, lengths): + self.assert_(not isinstance(iter_res, list)) + tm.assert_isinstance(list_res, list) + iter_res = list(iter_res) + self.assertEqual(len(list_res), length) + self.assertEqual(len(iter_res), length) + self.assertEqual(iter_res, exp) + self.assertEqual(list_res, exp) + + def test_range(self): + actual1 = range(10) + actual2 = lrange(10) + actual = [actual1, actual2], + expected = list(builtins.range(10)), + lengths = 10, + + actual1 = range(1, 10, 2) + actual2 = lrange(1, 10, 2) + actual += [actual1, actual2], + lengths += 5, + expected += list(builtins.range(1, 10, 2)), + self.check_result(actual, expected, lengths) + + def test_map(self): + func = lambda x, y, z: x + y + z + lst = [builtins.range(10), builtins.range(10), builtins.range(10)] + actual1 = map(func, *lst) + actual2 = lmap(func, *lst) + actual = [actual1, actual2], + expected = list(builtins.map(func, *lst)), + lengths = 10, + self.check_result(actual, expected, lengths) + + + def test_filter(self): + func = lambda x: x + lst = list(builtins.range(10)) + actual1 = filter(func, lst) + actual2 = lfilter(func, lst) + actual = [actual1, actual2], + lengths = 9, + expected = list(builtins.filter(func, lst)), + self.check_result(actual, expected, lengths) + + def test_zip(self): + lst = [builtins.range(10), builtins.range(10), builtins.range(10)] + actual = [zip(*lst), lzip(*lst)], + expected = list(builtins.zip(*lst)), + lengths = 10, + self.check_result(actual, expected, lengths) + +if __name__ == '__main__': + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], + # '--with-coverage', '--cover-package=pandas.core'], + exit=False) diff --git a/pandas/tests/test_reshape.py b/pandas/tests/test_reshape.py index 3b34934f1a7dc..d0d5f260ea0c7 100644 --- a/pandas/tests/test_reshape.py +++ b/pandas/tests/test_reshape.py @@ -17,7 +17,7 @@ import pandas.util.testing as tm from pandas.util.py3compat import StringIO from pandas.util.py3compat import range -from six.moves import cPickle +from pandas.util.py3compat import cPickle _multiprocess_can_split_ = True diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 3b7f693c8e6d4..5e23efca2c5f8 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -1,9 +1,6 @@ # pylint: disable-msg=E1101,W0612 from datetime import datetime, timedelta, date -from pandas.util.py3compat import range -from six.moves import zip -from pandas.util import compat import os import operator import unittest @@ -26,7 +23,8 @@ import pandas.core.datetools as datetools import pandas.core.nanops as nanops -from pandas.util.py3compat import StringIO +from pandas.util.py3compat import StringIO, lrange, range, zip +from pandas.util import compat from pandas.util import py3compat from pandas.util.testing import (assert_series_equal, assert_almost_equal, @@ -144,7 +142,7 @@ def test_multilevel_name_print(self): labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]], names=['first', 'second']) - s = Series(list(range(0, len(index))), index=index, name='sth') + s = Series(lrange(0, len(index)), index=index, name='sth') expected = ["first second", "foo one 0", " two 1", @@ -181,7 +179,7 @@ def test_name_printing(self): s.name = None self.assert_(not "Name:" in repr(s)) # test big series (diff code path) - s = Series(list(range(0, 1000))) + s = Series(lrange(0, 1000)) s.name = "test" self.assert_("Name: test" in repr(s)) s.name = None @@ -235,7 +233,7 @@ def test_comparisons(self): def test_none_comparison(self): # bug brought up by #1079 - s = Series(np.random.randn(10), index=list(range(0, 20, 2))) + s = Series(np.random.randn(10), index=lrange(0, 20, 2)) self.assertRaises(TypeError, s.__eq__, None) def test_sum_zero(self): @@ -324,8 +322,8 @@ def test_constructor_empty(self): empty2 = Series([]) assert_series_equal(empty, empty2) - empty = Series(index=list(range(10))) - empty2 = Series(np.nan, index=list(range(10))) + empty = Series(index=lrange(10)) + empty2 = Series(np.nan, index=lrange(10)) assert_series_equal(empty, empty2) def test_constructor_series(self): @@ -340,12 +338,12 @@ def test_constructor_generator(self): gen = (i for i in range(10)) result = Series(gen) - exp = Series(list(range(10))) + exp = Series(lrange(10)) assert_series_equal(result, exp) gen = (i for i in range(10)) - result = Series(gen, index=list(range(10, 20))) - exp.index = list(range(10, 20)) + result = Series(gen, index=lrange(10, 20)) + exp.index = lrange(10, 20) assert_series_equal(result, exp) def test_constructor_maskedarray(self): @@ -438,10 +436,10 @@ def test_constructor_sanitize(self): self.assertEquals(s.dtype, np.dtype('f8')) def test_constructor_pass_none(self): - s = Series(None, index=list(range(5))) + s = Series(None, index=lrange(5)) self.assert_(s.dtype == np.float64) - s = Series(None, index=list(range(5)), dtype=object) + s = Series(None, index=lrange(5), dtype=object) self.assert_(s.dtype == np.object_) def test_constructor_cast(self): @@ -459,15 +457,15 @@ def test_constructor_dtype_nocast(self): def test_constructor_dtype_datetime64(self): import pandas.tslib as tslib - s = Series(tslib.iNaT, dtype='M8[ns]', index=list(range(5))) + s = Series(tslib.iNaT, dtype='M8[ns]', index=lrange(5)) self.assert_(isnull(s).all() == True) #### in theory this should be all nulls, but since #### we are not specifying a dtype is ambiguous - s = Series(tslib.iNaT, index=list(range(5))) + s = Series(tslib.iNaT, index=lrange(5)) self.assert_(isnull(s).all() == False) - s = Series(nan, dtype='M8[ns]', index=list(range(5))) + s = Series(nan, dtype='M8[ns]', index=lrange(5)) self.assert_(isnull(s).all() == True) s = Series([datetime(2001, 1, 2, 0, 0), tslib.iNaT], dtype='M8[ns]') @@ -643,7 +641,7 @@ def test_getitem_get(self): self.assertRaises(KeyError, self.ts.__getitem__, d) def test_iget(self): - s = Series(np.random.randn(10), index=list(range(0, 20, 2))) + s = Series(np.random.randn(10), index=lrange(0, 20, 2)) for i in range(len(s)): result = s.iget(i) exp = s[s.index[i]] @@ -668,12 +666,12 @@ def test_iget_nonunique(self): self.assertEqual(s.iget(2), 2) def test_getitem_regression(self): - s = Series(list(range(5)), index=list(range(5))) - result = s[list(range(5))] + s = Series(lrange(5), index=lrange(5)) + result = s[lrange(5)] assert_series_equal(result, s) def test_getitem_setitem_slice_bug(self): - s = Series(list(range(10)), list(range(10))) + s = Series(lrange(10), lrange(10)) result = s[-12:] assert_series_equal(result, s) @@ -683,7 +681,7 @@ def test_getitem_setitem_slice_bug(self): result = s[:-12] assert_series_equal(result, s[:0]) - s = Series(list(range(10)), list(range(10))) + s = Series(lrange(10), lrange(10)) s[-12:] = 0 self.assert_((s == 0).all()) @@ -783,12 +781,12 @@ def test_getitem_box_float64(self): tm.assert_isinstance(value, np.float64) def test_getitem_ambiguous_keyerror(self): - s = Series(list(range(10)), index=list(range(0, 20, 2))) + s = Series(lrange(10), index=lrange(0, 20, 2)) self.assertRaises(KeyError, s.__getitem__, 1) self.assertRaises(KeyError, s.ix.__getitem__, 1) def test_getitem_unordered_dup(self): - obj = Series(list(range(5)), index=['c', 'a', 'a', 'b', 'b']) + obj = Series(lrange(5), index=['c', 'a', 'a', 'b', 'b']) self.assert_(np.isscalar(obj['c'])) self.assert_(obj['c'] == 0) @@ -802,7 +800,7 @@ def test_getitem_dups_with_missing(self): assert_series_equal(result,expected) def test_setitem_ambiguous_keyerror(self): - s = Series(list(range(10)), index=list(range(0, 20, 2))) + s = Series(lrange(10), index=lrange(0, 20, 2)) self.assertRaises(KeyError, s.__setitem__, 1, 5) self.assertRaises(KeyError, s.ix.__setitem__, 1, 5) @@ -975,7 +973,7 @@ def test_basic_getitem_with_labels(self): assert_series_equal(result, expected) # integer indexes, be careful - s = Series(np.random.randn(10), index=list(range(0, 20, 2))) + s = Series(np.random.randn(10), index=lrange(0, 20, 2)) inds = [0, 2, 5, 7, 8] arr_inds = np.array([0, 2, 5, 7, 8]) result = s[inds] @@ -1002,7 +1000,7 @@ def test_basic_setitem_with_labels(self): assert_series_equal(cp, exp) # integer indexes, be careful - s = Series(np.random.randn(10), index=list(range(0, 20, 2))) + s = Series(np.random.randn(10), index=lrange(0, 20, 2)) inds = [0, 4, 6] arr_inds = np.array([0, 4, 6]) @@ -1051,7 +1049,7 @@ def test_ix_getitem_not_monotonic(self): self.assertRaises(KeyError, ts2.ix.__setitem__, slice(d1, d2), 0) def test_ix_getitem_setitem_integer_slice_keyerrors(self): - s = Series(np.random.randn(10), index=list(range(0, 20, 2))) + s = Series(np.random.randn(10), index=lrange(0, 20, 2)) # this is OK cp = s.copy() @@ -1115,8 +1113,8 @@ def test_where(self): for dtype in [ np.int8, np.int16, np.int32, np.int64, np.float16, np.float32, np.float64 ]: s = Series(np.arange(10), dtype=dtype) mask = s < 5 - s[mask] = list(range(2,7)) - expected = Series(list(range(2,7)) + list(range(5,10)), dtype=dtype) + s[mask] = lrange(2,7) + expected = Series(lrange(2,7) + lrange(5,10), dtype=dtype) assert_series_equal(s, expected) self.assertEquals(s.dtype, expected.dtype) @@ -1126,7 +1124,7 @@ def test_where(self): mask = s < 5 values = [2.5,3.5,4.5,5.5,6.5] s[mask] = values - expected = Series(values + list(range(5,10)), dtype='float64') + expected = Series(values + lrange(5,10), dtype='float64') assert_series_equal(s, expected) self.assertEquals(s.dtype, expected.dtype) @@ -1140,8 +1138,8 @@ def test_where(self): # GH3235 s = Series(np.arange(10),dtype='int64') mask = s < 5 - s[mask] = list(range(2,7)) - expected = Series(list(range(2,7)) + list(range(5,10)),dtype='int64') + s[mask] = lrange(2,7) + expected = Series(lrange(2,7) + lrange(5,10),dtype='int64') assert_series_equal(s, expected) self.assertEquals(s.dtype, expected.dtype) @@ -1451,7 +1449,7 @@ def test_median(self): self._check_stat_op('median', np.median) # test with integers, test failure - int_ts = TimeSeries(np.ones(10, dtype=int), index=list(range(10))) + int_ts = TimeSeries(np.ones(10, dtype=int), index=lrange(10)) self.assertAlmostEqual(np.median(int_ts), int_ts.median()) def test_prod(self): @@ -1512,11 +1510,11 @@ def test_argsort(self): self.assert_(isnull(shifted[4]) == True) result = s.argsort() - expected = Series(list(range(5)),dtype='int64') + expected = Series(lrange(5),dtype='int64') assert_series_equal(result,expected) result = shifted.argsort() - expected = Series(list(range(4)) + [-1],dtype='int64') + expected = Series(lrange(4) + [-1],dtype='int64') assert_series_equal(result,expected) def test_argsort_stable(self): @@ -1595,7 +1593,7 @@ def testit(): # 2888 l = [0] - l.extend(list(range(2**40,2**40+1000))) + l.extend(lrange(2**40,2**40+1000)) s = Series(l, dtype='int64') assert_almost_equal(float(f(s)), float(alternate(s.values))) @@ -1638,7 +1636,7 @@ def test_round(self): self.assertEqual(result.name, self.ts.name) def test_prod_numpy16_bug(self): - s = Series([1., 1., 1.], index=list(range(3))) + s = Series([1., 1., 1.], index=lrange(3)) result = s.prod() self.assert_(not isinstance(result, Series)) @@ -2624,7 +2622,7 @@ def test_value_counts_nunique(self): assert_series_equal(hist, expected) # GH 3002, datetime64[ns] - from pandas.util.py3compat import StringIO + from pandas.util.py3compat import StringIO, lrange import pandas as pd f = StringIO("xxyyzz20100101PIE\nxxyyzz20100101GUM\nxxyyww20090101EGG\nfoofoo20080909PIE") df = pd.read_fwf(f, widths=[6,8,3], names=["person_id", "dt", "food"], parse_dates=["dt"]) @@ -3347,7 +3345,7 @@ def test_astype_cast_object_int(self): def test_astype_datetimes(self): import pandas.tslib as tslib - s = Series(tslib.iNaT, dtype='M8[ns]', index=list(range(5))) + s = Series(tslib.iNaT, dtype='M8[ns]', index=lrange(5)) s = s.astype('O') self.assert_(s.dtype == np.object_) @@ -3395,7 +3393,7 @@ def test_map_int(self): self.assert_(not isnull(merged['c'])) def test_map_type_inference(self): - s = Series(list(range(3))) + s = Series(lrange(3)) s2 = s.map(lambda x: np.where(x == 0, 0, 1)) self.assert_(issubclass(s2.dtype.type, np.integer)) @@ -4306,7 +4304,7 @@ def test_reset_index(self): def test_set_index_makes_timeseries(self): idx = tm.makeDateIndex(10) - s = Series(list(range(10))) + s = Series(lrange(10)) s.index = idx self.assertTrue(isinstance(s, TimeSeries)) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index d54aedc43457e..7c05c9fa295e9 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -1,7 +1,7 @@ # pylint: disable-msg=E1101,W0612 from datetime import datetime, timedelta, date -from pandas.util.py3compat import range +from pandas.util.py3compat import range, lrange import os import operator import re @@ -75,7 +75,7 @@ def test_iter_single_element(self): def test_iter_numeric_try_string(self): # behavior identical to empty series - dsi = Series(list(range(4))) + dsi = Series(lrange(4)) i, s = 100, 'h' diff --git a/pandas/tests/test_tseries.py b/pandas/tests/test_tseries.py index 22679d36b26c3..5ce0041bf25c3 100644 --- a/pandas/tests/test_tseries.py +++ b/pandas/tests/test_tseries.py @@ -1,5 +1,5 @@ -from pandas.util.py3compat import range -from six.moves import zip +from pandas.util.py3compat import range, lrange +from pandas.util.py3compat import zip import unittest from numpy import nan @@ -32,7 +32,7 @@ def test_groupby_withnull(self): def test_backfill(self): old = Index([1, 5, 10]) - new = Index(list(range(12))) + new = Index(lrange(12)) filler = algos.backfill_int64(old, new) @@ -41,7 +41,7 @@ def test_backfill(self): # corner case old = Index([1, 4]) - new = Index(list(range(5, 10))) + new = Index(lrange(5, 10)) filler = algos.backfill_int64(old, new) expect_filler = [-1, -1, -1, -1, -1] @@ -49,7 +49,7 @@ def test_backfill(self): def test_pad(self): old = Index([1, 5, 10]) - new = Index(list(range(12))) + new = Index(lrange(12)) filler = algos.pad_int64(old, new) @@ -58,7 +58,7 @@ def test_pad(self): # corner case old = Index([5, 10]) - new = Index(list(range(5))) + new = Index(lrange(5)) filler = algos.pad_int64(old, new) expect_filler = [-1, -1, -1, -1, -1] self.assert_(np.array_equal(filler, expect_filler)) diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 63d78ddde6549..2987a73b34c6a 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -2,8 +2,8 @@ SQL-style merge routines """ -from pandas.util.py3compat import range, long -from six.moves import zip +from pandas.util.py3compat import range, long, lrange, lzip +from pandas.util.py3compat import zip import six import numpy as np import types @@ -1136,7 +1136,7 @@ def _get_new_axes(self): raise AssertionError() # ufff... - indices = list(range(ndim)) + indices = lrange(ndim) indices.remove(self.axis) for i, ax in zip(indices, self.join_axes): @@ -1201,7 +1201,7 @@ def _concat_indexes(indexes): def _make_concat_multiindex(indexes, keys, levels=None, names=None): if ((levels is None and isinstance(keys[0], tuple)) or (levels is not None and len(levels) > 1)): - zipped = list(zip(*keys)) + zipped = lzip(*keys) if names is None: names = [None] * len(zipped) diff --git a/pandas/tools/pivot.py b/pandas/tools/pivot.py index f1d1ba322e38a..e4aa0a7d6249e 100644 --- a/pandas/tools/pivot.py +++ b/pandas/tools/pivot.py @@ -5,11 +5,11 @@ from pandas.core.reshape import _unstack_multiple from pandas.tools.merge import concat from pandas.tools.util import cartesian_product -from pandas.util.py3compat import range +from pandas.util.py3compat import range, lrange from pandas.util import compat import six import pandas.core.common as com -from six.moves import zip +from pandas.util.py3compat import zip import numpy as np @@ -200,7 +200,7 @@ def _all_key(key): row_margin = row_margin.stack() # slight hack - new_order = [len(cols)] + list(range(len(cols))) + new_order = [len(cols)] + lrange(len(cols)) row_margin.index = row_margin.index.reorder_levels(new_order) else: row_margin = Series(np.nan, index=result.columns) diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index e356e9e9fe5f1..b83b2d176ea7d 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -16,8 +16,8 @@ from pandas.tseries.period import PeriodIndex, Period from pandas.tseries.frequencies import get_period_alias, get_base_alias from pandas.tseries.offsets import DateOffset -from pandas.util.py3compat import range -from six.moves import map, zip +from pandas.util.py3compat import range, lrange, lmap +from pandas.util.py3compat import map, zip try: # mpl optional import pandas.tseries.converter as conv @@ -105,7 +105,7 @@ def _get_standard_colors(num_colors=None, colormap=None, color_type='default', colormap = cm.get_cmap(colormap) if colormap is None: raise ValueError("Colormap {0} is not recognized".format(cmap)) - colors = list(map(colormap, np.linspace(0, 1, num=num_colors))) + colors = lmap(colormap, np.linspace(0, 1, num=num_colors)) elif color is not None: if colormap is not None: warnings.warn("'color' and 'colormap' cannot be used " @@ -122,7 +122,7 @@ def random_color(column): random.seed(column) return [random.random() for _ in range(3)] - colors = list(map(random_color, list(range(num_colors)))) + colors = lmap(random_color, lrange(num_colors)) else: raise NotImplementedError @@ -243,8 +243,8 @@ def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, marker = _get_marker_compat(marker) - for i, a in zip(list(range(n)), df.columns): - for j, b in zip(list(range(n)), df.columns): + for i, a in zip(lrange(n), df.columns): + for j, b in zip(lrange(n), df.columns): ax = axes[i, j] if i == j: @@ -503,7 +503,7 @@ def bootstrap_plot(series, fig=None, size=50, samples=500, **kwds): for sampling in samplings]) if fig is None: fig = plt.figure() - x = list(range(samples)) + x = lrange(samples) axes = [] ax1 = fig.add_subplot(2, 3, 1) ax1.set_xlabel("Sample") @@ -601,7 +601,7 @@ def parallel_coordinates(data, class_column, cols=None, ax=None, colors=None, raise ValueError('Length of xticks must match number of columns') x = xticks else: - x = list(range(ncols)) + x = lrange(ncols) if ax is None: ax = plt.gca() @@ -684,7 +684,7 @@ def autocorrelation_plot(series, ax=None): def r(h): return ((data[:n - h] - mean) * (data[h:] - mean)).sum() / float(n) / c0 x = np.arange(n) + 1 - y = list(map(r, x)) + y = lmap(r, x) z95 = 1.959963984540054 z99 = 2.5758293035489004 ax.axhline(y=z99 / np.sqrt(n), linestyle='--', color='grey') @@ -1038,9 +1038,9 @@ def _get_xticks(self, convert_period=False): x = self.data.index._mpl_repr() else: self._need_to_set_index = True - x = list(range(len(index))) + x = lrange(len(index)) else: - x = list(range(len(index))) + x = lrange(len(index)) return x diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index f2d2b1fd61fc0..2bc3775247478 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -1,5 +1,5 @@ from pandas.util.py3compat import range -from six.moves import zip +from pandas.util.py3compat import zip import numpy as np import random from copy import deepcopy diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py index ea57fc752872a..742bc81d485e1 100644 --- a/pandas/tools/tests/test_merge.py +++ b/pandas/tools/tests/test_merge.py @@ -9,8 +9,8 @@ import numpy as np import random -from pandas.util.py3compat import range -from six.moves import zip +from pandas.util.py3compat import range, lrange, lzip +from pandas.util.py3compat import zip from pandas.util import compat from pandas.tseries.index import DatetimeIndex from pandas.tools.merge import merge, concat, ordered_merge, MergeError @@ -29,7 +29,7 @@ def get_test_data(ngroups=NGROUPS, n=N): - unique_groups = list(range(ngroups)) + unique_groups = lrange(ngroups) arr = np.asarray(np.tile(unique_groups, n // ngroups)) if len(arr) < n: @@ -558,8 +558,8 @@ def test_merge_different_column_key_names(self): assert_almost_equal(merged['value_y'], [6, np.nan, 5, 8, 5, 8, 7]) def test_merge_nocopy(self): - left = DataFrame({'a': 0, 'b': 1}, index=list(range(10))) - right = DataFrame({'c': 'foo', 'd': 'bar'}, index=list(range(10))) + left = DataFrame({'a': 0, 'b': 1}, index=lrange(10)) + right = DataFrame({'c': 'foo', 'd': 'bar'}, index=lrange(10)) merged = merge(left, right, left_index=True, right_index=True, copy=False) @@ -585,15 +585,15 @@ def test_join_sort(self): # smoke test joined = left.join(right, on='key', sort=False) - self.assert_(np.array_equal(joined.index, list(range(4)))) + self.assert_(np.array_equal(joined.index, lrange(4))) def test_intelligently_handle_join_key(self): # #733, be a bit more 1337 about not returning unconsolidated DataFrame left = DataFrame({'key': [1, 1, 2, 2, 3], - 'value': list(range(5))}, columns=['value', 'key']) + 'value': lrange(5)}, columns=['value', 'key']) right = DataFrame({'key': [1, 1, 2, 3, 4, 5], - 'rvalue': list(range(6))}) + 'rvalue': lrange(6)}) joined = merge(left, right, on='key', how='outer') expected = DataFrame({'key': [1, 1, 1, 1, 2, 2, 3, 4, 5.], @@ -607,8 +607,8 @@ def test_intelligently_handle_join_key(self): def test_handle_join_key_pass_array(self): left = DataFrame({'key': [1, 1, 2, 2, 3], - 'value': list(range(5))}, columns=['value', 'key']) - right = DataFrame({'rvalue': list(range(6))}) + 'value': lrange(5)}, columns=['value', 'key']) + right = DataFrame({'rvalue': lrange(6)}) key = np.array([1, 1, 2, 3, 4, 5]) merged = merge(left, right, left_on='key', right_on=key, how='outer') @@ -618,8 +618,8 @@ def test_handle_join_key_pass_array(self): self.assert_(merged['key'].notnull().all()) self.assert_(merged2['key'].notnull().all()) - left = DataFrame({'value': list(range(5))}, columns=['value']) - right = DataFrame({'rvalue': list(range(6))}) + left = DataFrame({'value': lrange(5)}, columns=['value']) + right = DataFrame({'rvalue': lrange(6)}) lkey = np.array([1, 1, 2, 2, 3]) rkey = np.array([1, 1, 2, 3, 4, 5]) @@ -627,8 +627,8 @@ def test_handle_join_key_pass_array(self): self.assert_(np.array_equal(merged['key_0'], np.array([1, 1, 1, 1, 2, 2, 3, 4, 5]))) - left = DataFrame({'value': list(range(3))}) - right = DataFrame({'rvalue': list(range(6))}) + left = DataFrame({'value': lrange(3)}) + right = DataFrame({'rvalue': lrange(6)}) key = np.array([0, 1, 1, 2, 2, 3]) merged = merge(left, right, left_index=True, right_on=key, how='outer') @@ -790,7 +790,7 @@ def setUp(self): def test_merge_on_multikey(self): joined = self.data.join(self.to_join, on=['key1', 'key2']) - join_key = Index(list(zip(self.data['key1'], self.data['key2']))) + join_key = Index(lzip(self.data['key1'], self.data['key2'])) indexer = self.to_join.index.get_indexer(join_key) ex_values = self.to_join.values.take(indexer, axis=0) ex_values[indexer == -1] = np.nan @@ -1616,7 +1616,7 @@ def test_concat_series_axis1(self): s2.name = None result = concat([s, s2], axis=1) - self.assertTrue(np.array_equal(result.columns, list(range(2)))) + self.assertTrue(np.array_equal(result.columns, lrange(2))) # must reindex, #2603 s = Series(randn(3), index=['c', 'a', 'b'], name='A') diff --git a/pandas/tools/tests/test_tile.py b/pandas/tools/tests/test_tile.py index 54b8f05b61d52..09095ba801cf4 100644 --- a/pandas/tools/tests/test_tile.py +++ b/pandas/tools/tests/test_tile.py @@ -3,7 +3,7 @@ import unittest import numpy as np -from six.moves import zip +from pandas.util.py3compat import zip from pandas import DataFrame, Series, unique import pandas.util.testing as tm diff --git a/pandas/tools/tile.py b/pandas/tools/tile.py index 31db8ed705fe4..fd9d290d6e126 100644 --- a/pandas/tools/tile.py +++ b/pandas/tools/tile.py @@ -8,7 +8,7 @@ import pandas.core.algorithms as algos import pandas.core.common as com import pandas.core.nanops as nanops -from six.moves import zip +from pandas.util.py3compat import zip import numpy as np diff --git a/pandas/tseries/converter.py b/pandas/tseries/converter.py index efbd80350379f..3e79bdf0184ad 100644 --- a/pandas/tseries/converter.py +++ b/pandas/tseries/converter.py @@ -1,5 +1,5 @@ from datetime import datetime, timedelta -from pandas.util.py3compat import range +from pandas.util.py3compat import range, lrange import six import datetime as pydt import numpy as np @@ -886,7 +886,7 @@ def __call__(self): base = self.base (d, m) = divmod(vmin, base) vmin = (d + 1) * base - locs = list(range(vmin, vmax + 1, base)) + locs = lrange(vmin, vmax + 1, base) return locs def autoscale(self): diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 3157c694bdc2b..d6065a9a552ca 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -1,7 +1,7 @@ from datetime import datetime from pandas.util.py3compat import range, long from pandas.util import compat -from six.moves import zip +from pandas.util.py3compat import zip import six import re diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 0a7b573875e8b..9fce356522205 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -21,7 +21,7 @@ import pandas.tslib as tslib import pandas.algos as _algos import six -from six.moves import map, zip +from pandas.util.py3compat import map, zip #--------------- diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index 2057a418fa6f9..909c8b361de5a 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -23,8 +23,8 @@ import pandas as pd import numpy as np import six -from pandas.util.py3compat import range -from six.moves import map, zip +from pandas.util.py3compat import range, lrange, lmap +from pandas.util.py3compat import map, zip randn = np.random.randn from pandas import Series, TimeSeries, DataFrame @@ -1118,7 +1118,7 @@ def test_constructor_U(self): def test_constructor_arrays_negative_year(self): years = np.arange(1960, 2000).repeat(4) - quarters = np.tile(list(range(1, 5)), 40) + quarters = np.tile(lrange(1, 5), 40) pindex = PeriodIndex(year=years, quarter=quarters) @@ -1126,8 +1126,8 @@ def test_constructor_arrays_negative_year(self): self.assert_(np.array_equal(pindex.quarter, quarters)) def test_constructor_invalid_quarters(self): - self.assertRaises(ValueError, PeriodIndex, year=list(range(2000, 2004)), - quarter=list(range(4)), freq='Q-DEC') + self.assertRaises(ValueError, PeriodIndex, year=lrange(2000, 2004), + quarter=lrange(4), freq='Q-DEC') def test_constructor_corner(self): self.assertRaises(ValueError, PeriodIndex, periods=10, freq='A') @@ -1216,7 +1216,7 @@ def test_getitem_partial(self): def test_getitem_datetime(self): rng = period_range(start='2012-01-01', periods=10, freq='W-MON') - ts = Series(list(range(len(rng))), index=rng) + ts = Series(lrange(len(rng)), index=rng) dt1 = datetime(2011, 10, 2) dt4 = datetime(2012, 4, 20) @@ -1288,7 +1288,7 @@ def _get_with_delta(delta, freq='A-DEC'): def test_to_timestamp_quarterly_bug(self): years = np.arange(1960, 2000).repeat(4) - quarters = np.tile(list(range(1, 5)), 40) + quarters = np.tile(lrange(1, 5), 40) pindex = PeriodIndex(year=years, quarter=quarters) @@ -2006,7 +2006,7 @@ def test_map_with_string_constructor(self): types += six.text_type, for t in types: - expected = np.array(list(map(t, raw)), dtype=object) + expected = np.array(lmap(t, raw), dtype=object) res = index.map(t) # should return an array diff --git a/pandas/tseries/tests/test_plotting.py b/pandas/tseries/tests/test_plotting.py index f6242139e9e93..95bfa98d32cf2 100644 --- a/pandas/tseries/tests/test_plotting.py +++ b/pandas/tseries/tests/test_plotting.py @@ -3,8 +3,8 @@ import unittest import nose -from pandas.util.py3compat import range -from six.moves import zip +from pandas.util.py3compat import range, lrange +from pandas.util.py3compat import zip import numpy as np from numpy.testing.decorators import slow @@ -188,7 +188,7 @@ def test_fake_inferred_business(self): plt.clf() fig.add_subplot(111) rng = date_range('2001-1-1', '2001-1-10') - ts = Series(list(range(len(rng))), rng) + ts = Series(lrange(len(rng)), rng) ts = ts[:3].append(ts[5:]) ax = ts.plot() self.assert_(not hasattr(ax, 'freq')) @@ -944,7 +944,7 @@ def test_format_date_axis(self): def test_ax_plot(self): x = DatetimeIndex(start='2012-01-02', periods=10, freq='D') - y = list(range(len(x))) + y = lrange(len(x)) import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(111) diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index b5e6d9de436a1..1db735896c902 100644 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -2,8 +2,8 @@ from datetime import datetime, timedelta -from pandas.util.py3compat import range -from six.moves import zip +from pandas.util.py3compat import range, lrange +from pandas.util.py3compat import zip import numpy as np from pandas import Series, TimeSeries, DataFrame, Panel, isnull, notnull, Timestamp @@ -862,7 +862,7 @@ def test_resample_weekly_all_na(self): def test_resample_tz_localized(self): dr = date_range(start='2012-4-13', end='2012-5-1') - ts = Series(list(range(len(dr))), dr) + ts = Series(lrange(len(dr)), dr) ts_utc = ts.tz_localize('UTC') ts_local = ts_utc.tz_convert('America/Los_Angeles') diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index 68ea73a661c74..779166f3c5c75 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -28,9 +28,9 @@ import pandas.index as _index -from pandas.util.py3compat import range, long, StringIO +from pandas.util.py3compat import range, long, StringIO, lrange, lmap from pandas.util.compat import product -from six.moves import map, zip, cPickle as pickle +from pandas.util.py3compat import map, zip, cPickle as pickle from pandas import read_pickle import pandas.core.datetools as dt from numpy.random import rand @@ -238,17 +238,17 @@ def test_indexing(self): # GH3546 (not including times on the last day) idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:00', freq='H') - ts = Series(list(range(len(idx))), index=idx) + ts = Series(lrange(len(idx)), index=idx) expected = ts['2013-05'] assert_series_equal(expected,ts) idx = date_range(start='2013-05-31 00:00', end='2013-05-31 23:59', freq='S') - ts = Series(list(range(len(idx))), index=idx) + ts = Series(lrange(len(idx)), index=idx) expected = ts['2013-05'] assert_series_equal(expected,ts) idx = [ Timestamp('2013-05-31 00:00'), Timestamp(datetime(2013,5,31,23,59,59,999999))] - ts = Series(list(range(len(idx))), index=idx) + ts = Series(lrange(len(idx)), index=idx) expected = ts['2013'] assert_series_equal(expected,ts) @@ -452,7 +452,7 @@ def test_frame_setitem_timestamp(self): # 2155 columns = DatetimeIndex(start='1/1/2012', end='2/1/2012', freq=datetools.bday) - index = list(range(10)) + index = lrange(10) data = DataFrame(columns=columns, index=index) t = datetime(2012, 11, 1) ts = Timestamp(t) @@ -663,7 +663,7 @@ def test_reindex_series_add_nat(self): rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s') series = Series(rng) - result = series.reindex(list(range(15))) + result = series.reindex(lrange(15)) self.assert_(np.issubdtype(result.dtype, np.dtype('M8[ns]'))) mask = result.isnull() @@ -674,7 +674,7 @@ def test_reindex_frame_add_nat(self): rng = date_range('1/1/2000 00:00:00', periods=10, freq='10s') df = DataFrame({'A': np.random.randn(len(rng)), 'B': rng}) - result = df.reindex(list(range(15))) + result = df.reindex(lrange(15)) self.assert_(np.issubdtype(result['B'].dtype, np.dtype('M8[ns]'))) mask = com.isnull(result)['B'] @@ -889,7 +889,7 @@ def test_to_datetime_types(self): ### array = ['2012','20120101','20120101 12:01:01'] array = ['20120101','20120101 12:01:01'] expected = list(to_datetime(array)) - result = list(map(Timestamp,array)) + result = lmap(Timestamp,array) tm.assert_almost_equal(result,expected) ### currently fails ### @@ -1512,11 +1512,11 @@ def test_groupby_count_dateparseerror(self): dr = date_range(start='1/1/2012', freq='5min', periods=10) # BAD Example, datetimes first - s = Series(np.arange(10), index=[dr, list(range(10))]) + s = Series(np.arange(10), index=[dr, lrange(10)]) grouped = s.groupby(lambda x: x[1] % 2 == 0) result = grouped.count() - s = Series(np.arange(10), index=[list(range(10)), dr]) + s = Series(np.arange(10), index=[lrange(10), dr]) grouped = s.groupby(lambda x: x[0] % 2 == 0) expected = grouped.count() @@ -1667,7 +1667,7 @@ def test_concat_datetime_datetime64_frame(self): df2_obj = DataFrame.from_records(rows, columns=['date', 'test']) ind = date_range(start="2000/1/1", freq="D", periods=10) - df1 = DataFrame({'date': ind, 'test':list(range(10))}) + df1 = DataFrame({'date': ind, 'test':lrange(10)}) # it works! pd.concat([df1, df2_obj]) @@ -1686,7 +1686,7 @@ def test_stringified_slice_with_tz(self): import datetime start=datetime.datetime.now() idx=DatetimeIndex(start=start,freq="1d",periods=10) - df=DataFrame(list(range(10)),index=idx) + df=DataFrame(lrange(10),index=idx) df["2013-01-14 23:44:34.437768-05:00":] # no exception here def test_append_join_nondatetimeindex(self): @@ -1980,7 +1980,6 @@ def setUpClass(cls): cls.series = pickle.load(f) def test_pass_offset_warn(self): - from pandas.util.py3compat import StringIO buf = StringIO() sys.stderr = buf @@ -2401,7 +2400,6 @@ def test_frame_apply_dont_convert_datetime64(self): class TestLegacyCompat(unittest.TestCase): def setUp(self): - from pandas.util.py3compat import StringIO # suppress deprecation warnings sys.stderr = StringIO() @@ -2649,7 +2647,7 @@ def test_series_set_value(self): def test_slice_locs_indexerror(self): times = [datetime(2000, 1, 1) + timedelta(minutes=i * 10) for i in range(100000)] - s = Series(list(range(100000)), times) + s = Series(lrange(100000), times) s.ix[datetime(1900, 1, 1):datetime(2100, 1, 1)] diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tseries/tests/test_timezones.py index 1c7607c63b76c..7ee89f7cadb70 100644 --- a/pandas/tseries/tests/test_timezones.py +++ b/pandas/tseries/tests/test_timezones.py @@ -28,8 +28,8 @@ from numpy.random import rand from pandas.util.testing import assert_frame_equal import pandas.util.py3compat as py3compat -from pandas.util.py3compat import range -from six.moves import zip, cPickle as pickle +from pandas.util.py3compat import range, lrange +from pandas.util.py3compat import zip, cPickle as pickle from pandas.core.datetools import BDay import pandas.core.common as com @@ -394,7 +394,7 @@ def test_take_dont_lose_meta(self): _skip_if_no_pytz() rng = date_range('1/1/2000', periods=20, tz='US/Eastern') - result = rng.take(list(range(5))) + result = rng.take(lrange(5)) self.assert_(result.tz == rng.tz) self.assert_(result.freq == rng.freq) @@ -747,7 +747,7 @@ def test_join_aware(self): test2 = DataFrame(np.zeros((3, 3)), index=date_range("2012-11-15 00:00:00", periods=3, freq="250L", tz="US/Central"), - columns=list(range(3, 6))) + columns=lrange(3, 6)) result = test1.join(test2, how='outer') ex_index = test1.index.union(test2.index) @@ -816,7 +816,7 @@ def test_append_aware_naive(self): # mixed rng1 = date_range('1/1/2011 01:00', periods=1, freq='H') - rng2 = list(range(100)) + rng2 = lrange(100) ts1 = Series(np.random.randn(len(rng1)), index=rng1) ts2 = Series(np.random.randn(len(rng2)), index=rng2) ts_result = ts1.append(ts2) diff --git a/pandas/tseries/util.py b/pandas/tseries/util.py index 92ec7d2bec36e..5021214ac869a 100644 --- a/pandas/tseries/util.py +++ b/pandas/tseries/util.py @@ -1,4 +1,4 @@ -from pandas.util.py3compat import range +from pandas.util.py3compat import range, lrange import numpy as np import pandas as pd @@ -54,12 +54,12 @@ def pivot_annual(series, freq=None): # adjust for leap year offset[(-isleapyear(year)) & (offset >= 59)] += 1 - columns = list(range(1, 367)) + columns = lrange(1, 367) # todo: strings like 1/1, 1/25, etc.? elif freq in ('M', 'BM'): width = 12 offset = index.month - 1 - columns = list(range(1, 13)) + columns = lrange(1, 13) elif freq == 'H': width = 8784 grouped = series.groupby(series.index.year) @@ -67,7 +67,7 @@ def pivot_annual(series, freq=None): defaulted.index = defaulted.index.droplevel(0) offset = np.asarray(defaulted.index) offset[-isleapyear(year) & (offset >= 1416)] += 24 - columns = list(range(1, 8785)) + columns = lrange(1, 8785) else: raise NotImplementedError(freq) diff --git a/pandas/util/compat.py b/pandas/util/compat.py index 413cc0a9dac83..1f57d00256ab6 100644 --- a/pandas/util/compat.py +++ b/pandas/util/compat.py @@ -1,6 +1,6 @@ import sys import six -from six.moves import map, filter +from pandas.util.py3compat import map, filter from pandas.util.py3compat import range from itertools import product diff --git a/pandas/util/counter.py b/pandas/util/counter.py index 90e71d3b806f3..86200f5ed89ae 100644 --- a/pandas/util/counter.py +++ b/pandas/util/counter.py @@ -6,7 +6,7 @@ from itertools import repeat as _repeat, chain as _chain, starmap as _starmap from operator import itemgetter as _itemgetter import six -from six.moves import map +from pandas.util.py3compat import map try: from collections import Mapping diff --git a/pandas/util/py3compat.py b/pandas/util/py3compat.py index 969ba94fda76c..9361bad20df32 100644 --- a/pandas/util/py3compat.py +++ b/pandas/util/py3compat.py @@ -1,6 +1,22 @@ import sys PY3 = (sys.version_info[0] >= 3) +# import iterator versions of these functions +from six.moves import zip, filter, reduce, map + +try: + import __builtin__ as builtins + # not writeable when instantiated with string, doesn't handle unicode well + from cStringIO import StringIO as StringIO + # always writeable + from StringIO import StringIO + BytesIO = StringIO + import cPickle +except ImportError: + import builtins + from io import StringIO, BytesIO + cStringIO = StringIO + import pickle as cPickle if PY3: def isidentifier(s): @@ -12,6 +28,20 @@ def str_to_bytes(s, encoding='ascii'): def bytes_to_str(b, encoding='utf-8'): return b.decode(encoding) + # list-producing versions of the major Python iterating functions + def lrange(*args, **kwargs): + return list(range(*args, **kwargs)) + + def lzip(*args, **kwargs): + return list(zip(*args, **kwargs)) + + def lmap(*args, **kwargs): + return list(map(*args, **kwargs)) + + def lfilter(*args, **kwargs): + return list(filter(*args, **kwargs)) + + # need to put range in the namespace range = range long = int unichr = chr @@ -29,22 +59,14 @@ def str_to_bytes(s, encoding='ascii'): def bytes_to_str(b, encoding='ascii'): return b - range = xrange + # Python 2-builtin ranges produce lists + lrange = builtins.range + lzip = builtins.zip + lmap = builtins.map + lfilter = builtins.filter + # have to explicitly put builtins into the namespace + range = xrange long = long unichr = unichr -try: - # not writeable if instantiated with string, not good with unicode - from cStringIO import StringIO as cStringIO - # writeable and handles unicode - from StringIO import StringIO -except ImportError: - # no more StringIO - from io import StringIO - cStringIO = StringIO - -try: - from io import BytesIO -except ImportError: - from cStringIO import StringIO as BytesIO diff --git a/pandas/util/testing.py b/pandas/util/testing.py index d235298e8a280..6ee3ba3b730f7 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -2,8 +2,8 @@ # pylint: disable-msg=W0402 -from pandas.util.py3compat import range, unichr -from six.moves import zip +from pandas.util.py3compat import range, unichr, lrange, lmap, lzip +from pandas.util.py3compat import zip import random import string import sys @@ -34,7 +34,7 @@ from pandas.io.common import urlopen, HTTPException import six -from six.moves import map +from pandas.util.py3compat import map Index = index.Index MultiIndex = index.MultiIndex @@ -54,7 +54,7 @@ def rands(n): def randu(n): - choices = six.u("").join(map(unichr, list(range(1488, 1488 + 26)))) + choices = six.u("").join(map(unichr, lrange(1488, 1488 + 26))) choices += string.digits return ''.join([random.choice(choices) for _ in range(n)]) @@ -318,7 +318,7 @@ def makeUnicodeIndex(k): def makeIntIndex(k): - return Index(list(range(k))) + return Index(lrange(k)) def makeFloatIndex(k): @@ -490,7 +490,7 @@ def makeCustomIndex(nentries, nlevels, prefix='#', names=False, ndupe_l=None, def keyfunc(x): import re numeric_tuple = re.sub("[^\d_]_?","",x).split("_") - return list(map(int,numeric_tuple)) + return lmap(int,numeric_tuple) # build a list of lists to create the index from div_factor = nentries // ndupe_l[i] + 1 @@ -502,7 +502,7 @@ def keyfunc(x): result = list(sorted(cnt.elements(), key=keyfunc))[:nentries] tuples.append(result) - tuples = list(zip(*tuples)) + tuples = lzip(*tuples) # convert tuples to index if nentries == 1: diff --git a/scripts/bench_join.py b/scripts/bench_join.py index 758a4fedda636..a3bd4157ac163 100644 --- a/scripts/bench_join.py +++ b/scripts/bench_join.py @@ -1,4 +1,4 @@ -from pandas.util.py3compat import range +from pandas.util.py3compat import range, lrange import numpy as np import pandas.lib as lib from pandas import * @@ -28,8 +28,8 @@ a_series = Series(av, index=a) b_series = Series(bv, index=b) -a_frame = DataFrame(avf, index=a, columns=list(range(K))) -b_frame = DataFrame(bvf, index=b, columns=list(range(K, 2 * K))) +a_frame = DataFrame(avf, index=a, columns=lrange(K)) +b_frame = DataFrame(bvf, index=b, columns=lrange(K, 2 * K)) def do_left_join(a, b, av, bv): @@ -163,8 +163,8 @@ def bench_python(n=100000, pct_overlap=0.20, K=1): avf = np.random.randn(n, K) bvf = np.random.randn(n, K) - a_frame = DataFrame(avf, index=a, columns=list(range(K))) - b_frame = DataFrame(bvf, index=b, columns=list(range(K, 2 * K))) + a_frame = DataFrame(avf, index=a, columns=lrange(K)) + b_frame = DataFrame(bvf, index=b, columns=lrange(K, 2 * K)) all_results[logn] = result = {} diff --git a/scripts/bench_join_multi.py b/scripts/bench_join_multi.py index 0683fbb67a1aa..818ac300956ee 100644 --- a/scripts/bench_join_multi.py +++ b/scripts/bench_join_multi.py @@ -1,9 +1,9 @@ from pandas import * import numpy as np -from six.moves import zip +from pandas.util.py3compat import zip from pandas.util.testing import rands -from pandas.util.py3compat import range +from pandas.util.py3compat import range, lzip import pandas.lib as lib N = 100000 @@ -11,17 +11,17 @@ key1 = [rands(10) for _ in range(N)] key2 = [rands(10) for _ in range(N)] -zipped = list(zip(key1, key2)) +zipped = lzip(key1, key2) def _zip(*args): arr = np.empty(N, dtype=object) - arr[:] = list(zip(*args)) + arr[:] = lzip(*args) return arr def _zip2(*args): - return lib.list_to_object_array(list(zip(*args))) + return lib.list_to_object_array(lzip(*args)) index = MultiIndex.from_arrays([key1, key2]) to_join = DataFrame({'j1': np.random.randn(100000)}, index=index) diff --git a/scripts/find_commits_touching_func.py b/scripts/find_commits_touching_func.py index 925d40d0fc856..a4c76671d71c2 100755 --- a/scripts/find_commits_touching_func.py +++ b/scripts/find_commits_touching_func.py @@ -4,9 +4,9 @@ # copryright 2013, y-p @ github from __future__ import print_function -from pandas.util.py3compat import range +from pandas.util.py3compat import range, lrange import six -from six.moves import map +from pandas.util.py3compat import map """Search the git history for all commits touching a named method @@ -162,7 +162,7 @@ def sorter(i): print("\nThese commits touched the %s method in these files on these dates:\n" \ % args.funcname) - for i in sorted(list(range(len(hits))),key=sorter): + for i in sorted(lrange(len(hits)),key=sorter): hit = hits[i] h,s,d=get_commit_vitals(hit.commit) p=hit.path.split(os.path.realpath(os.curdir)+os.path.sep)[-1] diff --git a/scripts/json_manip.py b/scripts/json_manip.py index 4733df68c5b64..7bea33055f535 100644 --- a/scripts/json_manip.py +++ b/scripts/json_manip.py @@ -75,7 +75,7 @@ import operator import sys import six -from six.moves import map +from pandas.util.py3compat import map import pandas.util.compat as compat diff --git a/vb_suite/groupby.py b/vb_suite/groupby.py index 665a33f924810..748b101b144cf 100644 --- a/vb_suite/groupby.py +++ b/vb_suite/groupby.py @@ -1,6 +1,6 @@ from vbench.api import Benchmark from datetime import datetime -from six.moves import map +from pandas.util.py3compat import map common_setup = """from pandas_vb_common import * """ @@ -285,12 +285,12 @@ def f(g): share_na = 0.1 dates = date_range('1997-12-31', periods=n_dates, freq='B') -dates = Index(list(map(lambda x: x.year * 10000 + x.month * 100 + x.day, dates))) +dates = Index(lmap(lambda x: x.year * 10000 + x.month * 100 + x.day, dates)) secid_min = int('10000000', 16) secid_max = int('F0000000', 16) step = (secid_max - secid_min) // (n_securities - 1) -security_ids = list(map(lambda x: hex(x)[2:10].upper(), range(secid_min, secid_max + 1, step))) +security_ids = lmap(lambda x: hex(x)[2:10].upper(), range(secid_min, secid_max + 1, step)) data_index = MultiIndex(levels=[dates.values, security_ids], labels=[[i for i in xrange(n_dates) for _ in xrange(n_securities)], range(n_securities) * n_dates], diff --git a/vb_suite/pandas_vb_common.py b/vb_suite/pandas_vb_common.py index 37775557fe1a2..8206c3554545f 100644 --- a/vb_suite/pandas_vb_common.py +++ b/vb_suite/pandas_vb_common.py @@ -27,6 +27,6 @@ try: # if no range in py3compat, then don't import zip or map either from pandas.util.py3compat import range - from six.moves import zip, map + from pandas.util.py3compat import zip, map except ImportError: pass diff --git a/vb_suite/parser.py b/vb_suite/parser.py index 8bcba2b20e79d..caae86afdc061 100644 --- a/vb_suite/parser.py +++ b/vb_suite/parser.py @@ -44,7 +44,7 @@ start_date=datetime(2011, 11, 1)) setup = common_setup + """ -from six.moves import cStringIO as StringIO +from pandas.util.py3compat import cStringIO as StringIO import os N = 10000 K = 8 @@ -63,7 +63,7 @@ read_table_multiple_date = Benchmark(cmd, setup, start_date=sdate) setup = common_setup + """ -from six.moves import cStringIO as StringIO +from pandas.util.py3compat import cStringIO as StringIO import os N = 10000 K = 8 diff --git a/vb_suite/test_perf.py b/vb_suite/test_perf.py index d7a5b9d3e4049..5101cf7f911f0 100755 --- a/vb_suite/test_perf.py +++ b/vb_suite/test_perf.py @@ -28,7 +28,7 @@ from __future__ import print_function from pandas.util.py3compat import range -from six.moves import map +from pandas.util.py3compat import map import shutil import os import sys From 0c0cfbcab9d2d3802a40239a12b5e2c90435a940 Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Sun, 28 Jul 2013 00:07:39 -0400 Subject: [PATCH 06/11] CLN: Add __bool__ alias for __nonzero__ Python3 uses __bool__, not __nonzero__ --- pandas/core/generic.py | 3 +++ pandas/core/internals.py | 3 +++ 2 files changed, 6 insertions(+) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 0cf9a066e11bd..d7fc4ce21ca87 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -651,6 +651,9 @@ def empty(self): def __nonzero__(self): return not self.empty + # Python 3 compat + __bool__ = __nonzero__ + @property def ndim(self): return self._data.ndim diff --git a/pandas/core/internals.py b/pandas/core/internals.py index e2b3131fac9ce..21cfa8658b03c 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1016,6 +1016,9 @@ def make_empty(self): def __nonzero__(self): return True + # Python3 compat + __bool__ = __nonzero__ + @property def ndim(self): return len(self.axes) From e957fbc0aa9cdfe9d4c5592c088416b20cb23b28 Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Sun, 28 Jul 2013 01:10:35 -0400 Subject: [PATCH 07/11] CLN/ENH: Merge compat with py3compat and six For simplicity, move the relevant portions of six into compat and combine py3compat and compat together (given the distinctions were relatively arbitrary already). Also switch to importing u to make things cleaner. --- LICENSES/SIX | 21 ++ bench/alignment.py | 2 +- bench/bench_get_put_value.py | 2 +- bench/bench_groupby.py | 2 +- bench/bench_khash_dict.py | 2 +- bench/bench_merge.py | 12 +- bench/bench_merge_sqlite.py | 2 +- bench/bench_take_indexing.py | 2 +- bench/bench_unique.py | 3 +- bench/better_unique.py | 5 +- bench/io_roundtrip.py | 2 +- bench/serialize.py | 2 +- bench/test.py | 3 +- doc/plots/stats/moment_plots.py | 2 +- doc/source/conf.py | 10 +- doc/source/io.rst | 2 +- doc/sphinxext/comment_eater.py | 2 +- doc/sphinxext/compiler_unparse.py | 2 +- doc/sphinxext/docscrape.py | 7 +- doc/sphinxext/docscrape_sphinx.py | 6 +- doc/sphinxext/ipython_directive.py | 27 +- doc/sphinxext/numpydoc.py | 38 +-- doc/sphinxext/plot_directive.py | 4 +- doc/sphinxext/tests/test_docscrape.py | 16 +- doc/sphinxext/traitsdoc.py | 3 +- examples/finance.py | 2 +- pandas/compat/scipy.py | 2 +- pandas/core/algorithms.py | 4 +- pandas/core/array.py | 1 - pandas/core/base.py | 4 +- pandas/core/common.py | 52 ++-- pandas/core/config.py | 18 +- pandas/core/format.py | 34 +-- pandas/core/frame.py | 34 +-- pandas/core/generic.py | 5 +- pandas/core/groupby.py | 31 +- pandas/core/index.py | 5 +- pandas/core/indexing.py | 9 +- pandas/core/internals.py | 12 +- pandas/core/nanops.py | 5 +- pandas/core/panel.py | 25 +- pandas/core/panelnd.py | 6 +- pandas/core/reshape.py | 6 +- pandas/core/series.py | 34 +-- pandas/core/strings.py | 12 +- pandas/io/clipboard.py | 2 +- pandas/io/common.py | 8 +- pandas/io/data.py | 22 +- pandas/io/date_converters.py | 2 +- pandas/io/excel.py | 46 +-- pandas/io/ga.py | 19 +- pandas/io/html.py | 18 +- pandas/io/json.py | 10 +- pandas/io/parsers.py | 32 +- pandas/io/pickle.py | 3 +- pandas/io/pytables.py | 240 ++++++++------- pandas/io/sql.py | 32 +- pandas/io/stata.py | 16 +- pandas/io/tests/generate_legacy_pickles.py | 2 +- pandas/io/tests/test_cparser.py | 6 +- pandas/io/tests/test_data.py | 1 - pandas/io/tests/test_date_converters.py | 4 +- pandas/io/tests/test_excel.py | 9 +- pandas/io/tests/test_html.py | 7 +- pandas/io/tests/test_json/test_pandas.py | 3 +- pandas/io/tests/test_json/test_ujson.py | 48 ++- pandas/io/tests/test_parsers.py | 41 ++- pandas/io/tests/test_pickle.py | 4 +- pandas/io/tests/test_pytables.py | 11 +- pandas/io/tests/test_sql.py | 8 +- pandas/io/tests/test_stata.py | 1 - pandas/io/tests/test_wb.py | 19 +- pandas/io/wb.py | 3 +- pandas/rpy/common.py | 3 +- pandas/sparse/array.py | 4 +- pandas/sparse/frame.py | 3 +- pandas/sparse/panel.py | 6 +- pandas/sparse/series.py | 4 +- pandas/sparse/tests/test_array.py | 2 +- pandas/sparse/tests/test_list.py | 2 +- pandas/sparse/tests/test_sparse.py | 4 +- pandas/src/generate_code.py | 3 +- pandas/stats/fama_macbeth.py | 3 +- pandas/stats/math.py | 2 +- pandas/stats/misc.py | 3 +- pandas/stats/ols.py | 5 +- pandas/stats/plm.py | 2 +- pandas/stats/tests/test_fama_macbeth.py | 2 +- pandas/stats/tests/test_moments.py | 5 +- pandas/stats/tests/test_var.py | 2 +- pandas/stats/var.py | 3 +- pandas/tests/test_algos.py | 2 +- pandas/tests/test_categorical.py | 2 +- pandas/tests/test_common.py | 16 +- .../{test_py3compat.py => test_compat.py} | 4 +- pandas/tests/test_expressions.py | 4 +- pandas/tests/test_format.py | 112 +++---- pandas/tests/test_frame.py | 42 ++- pandas/tests/test_graphics.py | 27 +- pandas/tests/test_groupby.py | 11 +- pandas/tests/test_index.py | 32 +- pandas/tests/test_indexing.py | 5 +- pandas/tests/test_internals.py | 7 +- pandas/tests/test_multilevel.py | 12 +- pandas/tests/test_panel.py | 8 +- pandas/tests/test_panel4d.py | 4 +- pandas/tests/test_panelnd.py | 2 +- pandas/tests/test_reshape.py | 4 +- pandas/tests/test_rplot.py | 2 +- pandas/tests/test_series.py | 35 +-- pandas/tests/test_stats.py | 1 - pandas/tests/test_strings.py | 130 ++++---- pandas/tests/test_tseries.py | 3 +- pandas/tools/merge.py | 9 +- pandas/tools/pivot.py | 6 +- pandas/tools/plotting.py | 9 +- pandas/tools/rplot.py | 5 +- pandas/tools/tests/test_merge.py | 3 +- pandas/tools/tests/test_pivot.py | 37 +-- pandas/tools/tests/test_tile.py | 2 +- pandas/tools/tile.py | 2 +- pandas/tseries/converter.py | 14 +- pandas/tseries/frequencies.py | 10 +- pandas/tseries/index.py | 23 +- pandas/tseries/offsets.py | 11 +- pandas/tseries/period.py | 25 +- pandas/tseries/resample.py | 6 +- pandas/tseries/tests/test_converter.py | 6 +- pandas/tseries/tests/test_daterange.py | 2 +- pandas/tseries/tests/test_frequencies.py | 2 +- pandas/tseries/tests/test_offsets.py | 2 +- pandas/tseries/tests/test_period.py | 36 ++- pandas/tseries/tests/test_plotting.py | 3 +- pandas/tseries/tests/test_resample.py | 4 +- pandas/tseries/tests/test_timeseries.py | 12 +- pandas/tseries/tests/test_timezones.py | 5 +- pandas/tseries/tests/test_util.py | 2 +- pandas/tseries/tools.py | 16 +- pandas/tseries/util.py | 2 +- pandas/util/compat.py | 286 +++++++++++++++--- pandas/util/counter.py | 3 +- pandas/util/decorators.py | 5 +- pandas/util/py3compat.py | 72 ----- pandas/util/testing.py | 18 +- scripts/bench_join.py | 2 +- scripts/bench_join_multi.py | 3 +- scripts/bench_refactor.py | 2 +- scripts/find_commits_touching_func.py | 12 +- scripts/groupby_sample.py | 3 +- scripts/groupby_test.py | 2 +- scripts/hdfstore_panel_perf.py | 2 +- scripts/json_manip.py | 149 +++++---- scripts/leak.py | 2 +- scripts/roll_median_leak.py | 2 +- scripts/testmed.py | 2 +- vb_suite/groupby.py | 2 +- vb_suite/indexing.py | 2 +- vb_suite/pandas_vb_common.py | 7 - vb_suite/parser.py | 4 +- vb_suite/source/conf.py | 11 +- vb_suite/test_perf.py | 5 +- 161 files changed, 1237 insertions(+), 1233 deletions(-) create mode 100644 LICENSES/SIX rename pandas/tests/{test_py3compat.py => test_compat.py} (96%) delete mode 100644 pandas/util/py3compat.py diff --git a/LICENSES/SIX b/LICENSES/SIX new file mode 100644 index 0000000000000..6fd669af222d3 --- /dev/null +++ b/LICENSES/SIX @@ -0,0 +1,21 @@ +six license (substantial portions used in the python 3 compatibility module) +=========================================================================== +Copyright (c) 2010-2013 Benjamin Peterson + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: +# +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. +# +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/bench/alignment.py b/bench/alignment.py index 1f32064db7016..0cc0de797436a 100644 --- a/bench/alignment.py +++ b/bench/alignment.py @@ -1,5 +1,5 @@ # Setup -from pandas.util.py3compat import range, lrange +from pandas.util.compat import range, lrange import numpy as np import pandas import la diff --git a/bench/bench_get_put_value.py b/bench/bench_get_put_value.py index cf1b827e133ac..405f22450c2f6 100644 --- a/bench/bench_get_put_value.py +++ b/bench/bench_get_put_value.py @@ -1,6 +1,6 @@ from pandas import * from pandas.util.testing import rands -from pandas.util.py3compat import range +from pandas.util.compat import range N = 1000 K = 50 diff --git a/bench/bench_groupby.py b/bench/bench_groupby.py index aa337acf9308e..76c92407d69ce 100644 --- a/bench/bench_groupby.py +++ b/bench/bench_groupby.py @@ -1,6 +1,6 @@ from pandas import * from pandas.util.testing import rands -from pandas.util.py3compat import range +from pandas.util.compat import range import string import random diff --git a/bench/bench_khash_dict.py b/bench/bench_khash_dict.py index 784704cbb809a..7e9f3c10387bb 100644 --- a/bench/bench_khash_dict.py +++ b/bench/bench_khash_dict.py @@ -8,7 +8,7 @@ from vbench.api import Benchmark from pandas.util.testing import rands -from pandas.util.py3compat import range +from pandas.util.compat import range import pandas._tseries as lib import pandas._sandbox as sbx import time diff --git a/bench/bench_merge.py b/bench/bench_merge.py index c4f595eb05cb9..da2706dcff5e0 100644 --- a/bench/bench_merge.py +++ b/bench/bench_merge.py @@ -1,7 +1,9 @@ +import random +import gc +import time from pandas import * +from pandas.util.compat import range, lrange, StringIO from pandas.util.testing import rands -from pandas.util.py3compat import range, lrange -import random N = 10000 ngroups = 10 @@ -29,10 +31,6 @@ def get_test_data(ngroups=100, n=N): # 'value' : np.random.randn(N // 10)}) # result = merge.merge(df, df2, on='key2') -from collections import defaultdict -import gc -import time -from pandas.util.testing import rands N = 10000 indices = np.array([rands(10) for _ in range(N)], dtype='O') @@ -66,7 +64,6 @@ def get_test_data(ngroups=100, n=N): # R results -from pandas.util.py3compat import StringIO, lrange # many to one r_results = read_table(StringIO(""" base::merge plyr data.table inner 0.2475 0.1183 0.1100 @@ -94,7 +91,6 @@ def get_test_data(ngroups=100, n=N): # many to many -from pandas.util.py3compat import StringIO, lrange # many to one r_results = read_table(StringIO("""base::merge plyr data.table inner 0.4610 0.1276 0.1269 diff --git a/bench/bench_merge_sqlite.py b/bench/bench_merge_sqlite.py index cc2e3197109ce..6a8829e311408 100644 --- a/bench/bench_merge_sqlite.py +++ b/bench/bench_merge_sqlite.py @@ -4,7 +4,7 @@ import time from pandas import DataFrame from pandas.util.testing import rands -from pandas.util.py3compat import range, zip +from pandas.util.compat import range, zip import random N = 10000 diff --git a/bench/bench_take_indexing.py b/bench/bench_take_indexing.py index 51a8e6441ff58..cce9035a4735f 100644 --- a/bench/bench_take_indexing.py +++ b/bench/bench_take_indexing.py @@ -6,7 +6,7 @@ from pandas import DataFrame import timeit -from pandas.util.py3compat import zip +from pandas.util.compat import zip setup = """ from pandas import Series diff --git a/bench/bench_unique.py b/bench/bench_unique.py index 8ede875b25e97..0c89f636f942c 100644 --- a/bench/bench_unique.py +++ b/bench/bench_unique.py @@ -1,8 +1,7 @@ from __future__ import print_function from pandas import * from pandas.util.testing import rands -from pandas.util.py3compat import range -from pandas.util.py3compat import zip +from pandas.util.compat import range, zip import pandas._tseries as lib import numpy as np import matplotlib.pyplot as plt diff --git a/bench/better_unique.py b/bench/better_unique.py index f1d8115b1a6d8..97c667fbfe55f 100644 --- a/bench/better_unique.py +++ b/bench/better_unique.py @@ -1,13 +1,12 @@ from __future__ import print_function from pandas import DataFrame -from pandas.util.py3compat import range -from pandas.util.py3compat import zip +from pandas.util.compat import range, zip import timeit setup = """ from pandas import Series import pandas._tseries as _tseries -from pandas.util.py3compat import range +from pandas.util.compat import range import random import numpy as np diff --git a/bench/io_roundtrip.py b/bench/io_roundtrip.py index a033ef0c72857..bd2293d8fdb50 100644 --- a/bench/io_roundtrip.py +++ b/bench/io_roundtrip.py @@ -5,7 +5,7 @@ import la import pandas -from pandas.util.py3compat import range +from pandas.util.compat import range from pandas import datetools, DateRange diff --git a/bench/serialize.py b/bench/serialize.py index bc837622360c7..7a6d5838f8257 100644 --- a/bench/serialize.py +++ b/bench/serialize.py @@ -1,5 +1,5 @@ from __future__ import print_function -from pandas.util.py3compat import range, lrange +from pandas.util.compat import range, lrange import time import os import numpy as np diff --git a/bench/test.py b/bench/test.py index 3008fc67ade00..49396f608b5c9 100644 --- a/bench/test.py +++ b/bench/test.py @@ -1,9 +1,8 @@ -from pandas.util.py3compat import range import numpy as np import itertools import collections import scipy.ndimage as ndi -from pandas.util.py3compat import zip +from pandas.util.compat import zip, range N = 10000 diff --git a/doc/plots/stats/moment_plots.py b/doc/plots/stats/moment_plots.py index a078651d2fe89..0e7ee89bd07ec 100644 --- a/doc/plots/stats/moment_plots.py +++ b/doc/plots/stats/moment_plots.py @@ -1,4 +1,4 @@ -from pandas.util.py3compat import range +from pandas.util.compat import range import numpy as np import matplotlib.pyplot as plt diff --git a/doc/source/conf.py b/doc/source/conf.py index 128e4ade9220d..736d190115a73 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -12,7 +12,7 @@ import sys import os -import six +from pandas.util.compat import u # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the @@ -64,8 +64,8 @@ master_doc = 'index' # General information about the project. -project = six.u('pandas') -copyright = six.u('2008-2012, the pandas development team') +project = u('pandas') +copyright = u('2008-2012, the pandas development team') # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -212,8 +212,8 @@ # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ ('index', 'pandas.tex', - six.u('pandas: powerful Python data analysis toolkit'), - six.u('Wes McKinney\n\& PyData Development Team'), 'manual'), + u('pandas: powerful Python data analysis toolkit'), + u('Wes McKinney\n\& PyData Development Team'), 'manual'), ] # The name of an image file (relative to this directory) to place at the top of diff --git a/doc/source/io.rst b/doc/source/io.rst index ee6c35187808f..7290e499c6cbf 100644 --- a/doc/source/io.rst +++ b/doc/source/io.rst @@ -1184,7 +1184,7 @@ You can even pass in an instance of ``StringIO`` if you so desire .. ipython:: python - from six.moves import cStringIO as StringIO + from cStringIO import StringIO with open(file_path, 'r') as f: sio = StringIO(f.read()) diff --git a/doc/sphinxext/comment_eater.py b/doc/sphinxext/comment_eater.py index 6d216162a00e7..f1c290b30db8e 100755 --- a/doc/sphinxext/comment_eater.py +++ b/doc/sphinxext/comment_eater.py @@ -1,4 +1,4 @@ -from pandas.util.py3compat import cStringIO +from pandas.util.compat import cStringIO import compiler import inspect import textwrap diff --git a/doc/sphinxext/compiler_unparse.py b/doc/sphinxext/compiler_unparse.py index 0fa3983abdd8a..69a4f8e9b02f7 100755 --- a/doc/sphinxext/compiler_unparse.py +++ b/doc/sphinxext/compiler_unparse.py @@ -12,7 +12,7 @@ """ import sys -from pandas.util.py3compat import cStringIO as StringIO +from pandas.util.compat import cStringIO as StringIO from compiler.ast import Const, Name, Tuple, Div, Mul, Sub, Add def unparse(ast, single_line_functions=False): diff --git a/doc/sphinxext/docscrape.py b/doc/sphinxext/docscrape.py index 5d27810a11ac8..1cc57b415ebce 100755 --- a/doc/sphinxext/docscrape.py +++ b/doc/sphinxext/docscrape.py @@ -8,8 +8,7 @@ import re import pydoc from warnings import warn -from six import StringIO -import six +from pandas.util.compat import StringIO, callable class Reader(object): """A line-based string reader. @@ -372,7 +371,7 @@ def _str_index(self): idx = self['index'] out = [] out += ['.. index:: %s' % idx.get('default','')] - for section, references in six.iteritems(idx): + for section, references in compat.iteritems(idx): if section == 'default': continue out += [' :%s: %s' % (section, ', '.join(references))] @@ -491,7 +490,7 @@ def methods(self): if self._cls is None: return [] return [name for name,func in inspect.getmembers(self._cls) - if not name.startswith('_') and six.callable(func)] + if not name.startswith('_') and callable(func)] @property def properties(self): diff --git a/doc/sphinxext/docscrape_sphinx.py b/doc/sphinxext/docscrape_sphinx.py index 896ae070d739a..0f32807761c9f 100755 --- a/doc/sphinxext/docscrape_sphinx.py +++ b/doc/sphinxext/docscrape_sphinx.py @@ -1,7 +1,7 @@ import re, inspect, textwrap, pydoc import sphinx from .docscrape import NumpyDocString, FunctionDoc, ClassDoc -import six +from pandas.util.compat import callable class SphinxDocString(NumpyDocString): def __init__(self, docstring, config={}): @@ -128,7 +128,7 @@ def _str_index(self): return out out += ['.. index:: %s' % idx.get('default','')] - for section, references in six.iteritems(idx): + for section, references in compat.iteritems(idx): if section == 'default': continue elif section == 'refguide': @@ -212,7 +212,7 @@ def get_doc_object(obj, what=None, doc=None, config={}): what = 'class' elif inspect.ismodule(obj): what = 'module' - elif six.callable(obj): + elif callable(obj): what = 'function' else: what = 'object' diff --git a/doc/sphinxext/ipython_directive.py b/doc/sphinxext/ipython_directive.py index 195875047e73d..49e6ac913c9a5 100644 --- a/doc/sphinxext/ipython_directive.py +++ b/doc/sphinxext/ipython_directive.py @@ -58,8 +58,7 @@ #----------------------------------------------------------------------------- # Stdlib -from pandas.util.py3compat import range, lmap -from pandas.util.py3compat import map, cStringIO as StringIO +from pandas.util.compat import zip, range, map, lmap, u, cStringIO as StringIO import ast import os import re @@ -71,8 +70,6 @@ from docutils.parsers.rst import directives from docutils import nodes from sphinx.util.compat import Directive -import six -from pandas.util.py3compat import zip matplotlib.use('Agg') @@ -493,7 +490,7 @@ def process_pure_python(self, content): multiline = True cont_len = len(str(lineno)) + 2 line_to_process = line.strip('\\') - output.extend([six.u("%s %s") % (fmtin%lineno,line)]) + output.extend([u("%s %s") % (fmtin%lineno,line)]) continue else: # no we're still not line_to_process = line.strip('\\') @@ -501,12 +498,12 @@ def process_pure_python(self, content): line_to_process += line.strip('\\') if line_stripped.endswith('\\'): # and we still are continuation = '.' * cont_len - output.extend([(six.u(' %s: ')+line_stripped) % continuation]) + output.extend([(u(' %s: ')+line_stripped) % continuation]) continue # else go ahead and run this multiline then carry on # get output of line - self.process_input_line(six.text_type(line_to_process.strip()), + self.process_input_line(compat.text_type(line_to_process.strip()), store_history=False) out_line = self.cout.getvalue() self.clear_cout() @@ -520,15 +517,15 @@ def process_pure_python(self, content): # line numbers don't actually matter, they're replaced later if not multiline: - in_line = six.u("%s %s") % (fmtin%lineno,line) + in_line = u("%s %s") % (fmtin%lineno,line) output.extend([in_line]) else: - output.extend([(six.u(' %s: ')+line_stripped) % continuation]) + output.extend([(u(' %s: ')+line_stripped) % continuation]) multiline = False if len(out_line): output.extend([out_line]) - output.extend([six.u('')]) + output.extend([u('')]) return output @@ -570,19 +567,19 @@ def process_pure_python2(self, content): output.extend([line]) continue - continuation = six.u(' %s:')% ''.join(['.']*(len(str(ct))+2)) + continuation = u(' %s:')% ''.join(['.']*(len(str(ct))+2)) if not multiline: - modified = six.u("%s %s") % (fmtin % ct, line_stripped) + modified = u("%s %s") % (fmtin % ct, line_stripped) output.append(modified) ct += 1 try: ast.parse(line_stripped) - output.append(six.u('')) + output.append(u('')) except Exception: multiline = True multiline_start = lineno else: - modified = six.u('%s %s') % (continuation, line) + modified = u('%s %s') % (continuation, line) output.append(modified) try: @@ -594,7 +591,7 @@ def process_pure_python2(self, content): continue - output.extend([continuation, six.u('')]) + output.extend([continuation, u('')]) multiline = False except Exception: pass diff --git a/doc/sphinxext/numpydoc.py b/doc/sphinxext/numpydoc.py index 4ddc12e4c1a1c..8857c1f80cce6 100755 --- a/doc/sphinxext/numpydoc.py +++ b/doc/sphinxext/numpydoc.py @@ -17,13 +17,13 @@ """ import sphinx -import six if sphinx.__version__ < '1.0.1': raise RuntimeError("Sphinx 1.0.1 or newer is required") import os, re, pydoc from .docscrape_sphinx import get_doc_object, SphinxDocString +from pandas.util.compat import u, callable from sphinx.util.compat import Directive import inspect @@ -35,28 +35,28 @@ def mangle_docstrings(app, what, name, obj, options, lines, if what == 'module': # Strip top title - title_re = re.compile(six.u(r'^\s*[#*=]{4,}\n[a-z0-9 -]+\n[#*=]{4,}\s*'), + title_re = re.compile(u(r'^\s*[#*=]{4,}\n[a-z0-9 -]+\n[#*=]{4,}\s*'), re.I|re.S) - lines[:] = title_re.sub(six.u(''), six.u("\n").join(lines)).split(six.u("\n")) + lines[:] = title_re.sub(u(''), u("\n").join(lines)).split(u("\n")) else: - doc = get_doc_object(obj, what, six.u("\n").join(lines), config=cfg) - lines[:] = six.text_type(doc).split(six.u("\n")) + doc = get_doc_object(obj, what, u("\n").join(lines), config=cfg) + lines[:] = compat.text_type(doc).split(u("\n")) if app.config.numpydoc_edit_link and hasattr(obj, '__name__') and \ obj.__name__: if hasattr(obj, '__module__'): - v = dict(full_name=six.u("%s.%s") % (obj.__module__, obj.__name__)) + v = dict(full_name=u("%s.%s") % (obj.__module__, obj.__name__)) else: v = dict(full_name=obj.__name__) - lines += [six.u(''), six.u('.. htmlonly::'), ''] - lines += [six.u(' %s') % x for x in + lines += [u(''), u('.. htmlonly::'), ''] + lines += [u(' %s') % x for x in (app.config.numpydoc_edit_link % v).split("\n")] # replace reference numbers so that there are no duplicates references = [] for line in lines: line = line.strip() - m = re.match(six.u(r'^.. \[([a-z0-9_.-])\]'), line, re.I) + m = re.match(u(r'^.. \[([a-z0-9_.-])\]'), line, re.I) if m: references.append(m.group(1)) @@ -65,14 +65,14 @@ def mangle_docstrings(app, what, name, obj, options, lines, if references: for i, line in enumerate(lines): for r in references: - if re.match(six.u(r'^\d+$'), r): - new_r = six.u("R%d") % (reference_offset[0] + int(r)) + if re.match(u(r'^\d+$'), r): + new_r = u("R%d") % (reference_offset[0] + int(r)) else: - new_r = six.u("%s%d") % (r, reference_offset[0]) - lines[i] = lines[i].replace(six.u('[%s]_') % r, - six.u('[%s]_') % new_r) - lines[i] = lines[i].replace(six.u('.. [%s]') % r, - six.u('.. [%s]') % new_r) + new_r = u("%s%d") % (r, reference_offset[0]) + lines[i] = lines[i].replace(u('[%s]_') % r, + u('[%s]_') % new_r) + lines[i] = lines[i].replace(u('.. [%s]') % r, + u('.. [%s]') % new_r) reference_offset[0] += len(references) @@ -83,13 +83,13 @@ def mangle_signature(app, what, name, obj, options, sig, retann): 'initializes x; see ' in pydoc.getdoc(obj.__init__))): return '', '' - if not (six.callable(obj) or hasattr(obj, '__argspec_is_invalid_')): return + if not (callable(obj) or hasattr(obj, '__argspec_is_invalid_')): return if not hasattr(obj, '__doc__'): return doc = SphinxDocString(pydoc.getdoc(obj)) if doc['Signature']: - sig = re.sub(six.u("^[^(]*"), six.u(""), doc['Signature']) - return sig, six.u('') + sig = re.sub(u("^[^(]*"), u(""), doc['Signature']) + return sig, u('') def setup(app, get_doc_object_=get_doc_object): global get_doc_object diff --git a/doc/sphinxext/plot_directive.py b/doc/sphinxext/plot_directive.py index 9c648f474ceea..e48899a06bb8b 100755 --- a/doc/sphinxext/plot_directive.py +++ b/doc/sphinxext/plot_directive.py @@ -75,13 +75,11 @@ """ -from pandas.util.py3compat import range +from pandas.util.compat import range, cStringIO as StringIO, map import sys, os, glob, shutil, imp, warnings, re, textwrap, traceback -from pandas.util.py3compat import cStringIO as StringIO import sphinx import warnings -from pandas.util.py3compat import map warnings.warn("A plot_directive module is also available under " "matplotlib.sphinxext; expect this numpydoc.plot_directive " "module to be deprecated after relevant features have been " diff --git a/doc/sphinxext/tests/test_docscrape.py b/doc/sphinxext/tests/test_docscrape.py index 1abf11b77dc56..e9de8cf63314b 100755 --- a/doc/sphinxext/tests/test_docscrape.py +++ b/doc/sphinxext/tests/test_docscrape.py @@ -2,12 +2,12 @@ # -*- encoding:utf-8 -*- import sys, os -import six sys.path.append(os.path.join(os.path.dirname(__file__), '..')) from docscrape import NumpyDocString, FunctionDoc, ClassDoc from docscrape_sphinx import SphinxDocString, SphinxClassDoc from nose.tools import * +from pandas.util.compat import u doc_txt = '''\ numpy.multivariate_normal(mean, cov, shape=None) @@ -289,7 +289,7 @@ def test_sphinx_str(): The drawn samples, arranged according to `shape`. If the shape given is (m,n,...), then the shape of `out` is is (m,n,...,N). - + In other words, each entry ``out[i,j,...,:]`` is an N-dimensional value drawn from the distribution. @@ -298,12 +298,12 @@ def test_sphinx_str(): Certain warnings apply. .. seealso:: - + :obj:`some`, :obj:`other`, :obj:`funcs` - + :obj:`otherfunc` relationship - + .. rubric:: Notes Instead of specifying the full covariance matrix, popular @@ -350,7 +350,7 @@ def test_sphinx_str(): [True, True] """) - + doc2 = NumpyDocString(""" Returns array of indices of the maximum values of along the given axis. @@ -493,7 +493,7 @@ def test_unicode(): äää """) - assert doc['Summary'][0] == six.u('öäöäöäöäöåååå').encode('utf-8') + assert doc['Summary'][0] == u('öäöäöäöäöåååå').encode('utf-8') def test_plot_examples(): cfg = dict(use_plots=True) @@ -511,7 +511,7 @@ def test_plot_examples(): Examples -------- .. plot:: - + import matplotlib.pyplot as plt plt.plot([1,2,3],[4,5,6]) plt.show() diff --git a/doc/sphinxext/traitsdoc.py b/doc/sphinxext/traitsdoc.py index 0298a441e26ff..2c74e70bb8e50 100755 --- a/doc/sphinxext/traitsdoc.py +++ b/doc/sphinxext/traitsdoc.py @@ -18,6 +18,7 @@ import os import pydoc +from pandas.util.compat import callable from . import docscrape from . import docscrape_sphinx from .docscrape_sphinx import SphinxClassDoc, SphinxFunctionDoc, SphinxDocString @@ -117,7 +118,7 @@ def get_doc_object(obj, what=None, config=None): what = 'class' elif inspect.ismodule(obj): what = 'module' - elif six.callable(obj): + elif callable(obj): what = 'function' else: what = 'object' diff --git a/examples/finance.py b/examples/finance.py index a8fb580f954a3..f795b0c722cf7 100644 --- a/examples/finance.py +++ b/examples/finance.py @@ -3,7 +3,7 @@ """ from datetime import datetime -from pandas.util.py3compat import zip +from pandas.util.compat import zip import matplotlib.finance as fin import numpy as np diff --git a/pandas/compat/scipy.py b/pandas/compat/scipy.py index 53436c517f480..7b357e2ffdf14 100644 --- a/pandas/compat/scipy.py +++ b/pandas/compat/scipy.py @@ -2,7 +2,7 @@ Shipping functions from SciPy to reduce dependency on having SciPy installed """ -from pandas.util.py3compat import range, lrange +from pandas.util.compat import range, lrange import numpy as np diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 21b6f32892cf2..95bc7351adeb1 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -4,11 +4,11 @@ """ import numpy as np -import six import pandas.core.common as com import pandas.algos as algos import pandas.hashtable as htable +import pandas.util.compat as compat def match(to_match, values, na_sentinel=-1): @@ -32,7 +32,7 @@ def match(to_match, values, na_sentinel=-1): match : ndarray of integers """ values = com._asarray_tuplesafe(values) - if issubclass(values.dtype.type, six.string_types): + if issubclass(values.dtype.type, compat.string_types): values = np.array(values, dtype='O') f = lambda htype, caster: _match_generic(to_match, values, htype, caster) diff --git a/pandas/core/array.py b/pandas/core/array.py index 842bbdbf14ef2..c9a8a00b7f2d7 100644 --- a/pandas/core/array.py +++ b/pandas/core/array.py @@ -3,7 +3,6 @@ """ import numpy as np -import six Array = np.ndarray diff --git a/pandas/core/base.py b/pandas/core/base.py index 6122e78fa8bce..1f3cb7f9e6f6d 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1,7 +1,7 @@ """ Base class(es) for all pandas objects. """ -from pandas.util import py3compat +from pandas.util import compat class StringMixin(object): """implements string methods so long as object defines a `__unicode__` method. @@ -15,7 +15,7 @@ def __str__(self): Yields Bytestring in Py2, Unicode String in py3. """ - if py3compat.PY3: + if compat.PY3: return self.__unicode__() return self.__bytes__() diff --git a/pandas/core/common.py b/pandas/core/common.py index 3af0d7dba1699..2fdfe90f79771 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -14,10 +14,8 @@ import pandas.lib as lib import pandas.tslib as tslib -from pandas.util import py3compat -from pandas.util.py3compat import StringIO, BytesIO, range, long -from pandas.util.py3compat import zip, map -import six +from pandas.util import compat +from pandas.util.compat import StringIO, BytesIO, range, long, u, zip, map from pandas.core.config import get_option @@ -689,7 +687,7 @@ def _infer_dtype_from_scalar(val): dtype = val.dtype val = val.item() - elif isinstance(val, six.string_types): + elif isinstance(val, compat.string_types): # If we create an empty array using a string to infer # the dtype, NumPy will only allocate one character per entry @@ -782,7 +780,7 @@ def _maybe_promote(dtype, fill_value=np.nan): dtype = np.object_ # in case we have a string that looked like a number - if issubclass(np.dtype(dtype).type, six.string_types): + if issubclass(np.dtype(dtype).type, compat.string_types): dtype = np.object_ return dtype, fill_value @@ -1169,7 +1167,7 @@ def _possibly_cast_to_datetime(value, dtype, coerce = False): """ try to cast the array/value to a datetimelike dtype, converting float nan to iNaT """ if dtype is not None: - if isinstance(dtype, six.string_types): + if isinstance(dtype, compat.string_types): dtype = np.dtype(dtype) is_datetime64 = is_datetime64_dtype(dtype) @@ -1339,7 +1337,7 @@ def _join_unicode(lines, sep=''): try: return sep.join(lines) except UnicodeDecodeError: - sep = six.text_type(sep) + sep = compat.text_type(sep) return sep.join([x.decode('utf-8') if isinstance(x, str) else x for x in lines]) @@ -1479,7 +1477,7 @@ def _asarray_tuplesafe(values, dtype=None): result = np.asarray(values, dtype=dtype) - if issubclass(result.dtype.type, six.string_types): + if issubclass(result.dtype.type, compat.string_types): result = np.asarray(values, dtype=object) if result.ndim == 2: @@ -1495,7 +1493,7 @@ def _asarray_tuplesafe(values, dtype=None): def _index_labels_to_array(labels): - if isinstance(labels, (six.string_types, tuple)): + if isinstance(labels, (compat.string_types, tuple)): labels = [labels] if not isinstance(labels, (list, np.ndarray)): @@ -1610,13 +1608,13 @@ def is_re_compilable(obj): def is_list_like(arg): - return hasattr(arg, '__iter__') and not isinstance(arg, six.string_types) + return hasattr(arg, '__iter__') and not isinstance(arg, compat.string_types) def _is_sequence(x): try: iter(x) len(x) # it has a length - return not isinstance(x, six.string_types) and True + return not isinstance(x, compat.string_types) and True except Exception: return False @@ -1650,7 +1648,7 @@ def _astype_nansafe(arr, dtype, copy = True): return arr.astype(object) # in py3, timedelta64[ns] are int64 - elif (py3compat.PY3 and dtype not in [_INT64_DTYPE,_TD_DTYPE]) or (not py3compat.PY3 and dtype != _TD_DTYPE): + elif (compat.PY3 and dtype not in [_INT64_DTYPE,_TD_DTYPE]) or (not compat.PY3 and dtype != _TD_DTYPE): raise TypeError("cannot astype a timedelta from [%s] to [%s]" % (arr.dtype,dtype)) return arr.astype(_TD_DTYPE) elif (np.issubdtype(arr.dtype, np.floating) and @@ -1725,7 +1723,7 @@ def _get_handle(path, mode, encoding=None, compression=None): raise ValueError('Unrecognized compression type: %s' % compression) - if py3compat.PY3: # pragma: no cover + if compat.PY3: # pragma: no cover if encoding: f = open(path, mode, encoding=encoding) else: @@ -1734,7 +1732,7 @@ def _get_handle(path, mode, encoding=None, compression=None): f = open(path, mode) return f -if py3compat.PY3: # pragma: no cover +if compat.PY3: # pragma: no cover def UnicodeReader(f, dialect=csv.excel, encoding="utf-8", **kwds): # ignore encoding return csv.reader(f, dialect=dialect, **kwds) @@ -1757,7 +1755,7 @@ def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds): def next(self): row = next(self.reader) - return [six.text_type(s, "utf-8") for s in row] + return [compat.text_type(s, "utf-8") for s in row] # python 3 iterator __next__ = next @@ -1958,9 +1956,9 @@ def _pprint_seq(seq, _nest_lvl=0, **kwds): bounds length of printed sequence, depending on options """ if isinstance(seq,set): - fmt = six.u("set([%s])") + fmt = u("set([%s])") else: - fmt = six.u("[%s]") if hasattr(seq, '__setitem__') else six.u("(%s)") + fmt = u("[%s]") if hasattr(seq, '__setitem__') else u("(%s)") nitems = get_option("max_seq_items") or len(seq) @@ -1983,10 +1981,10 @@ def _pprint_dict(seq, _nest_lvl=0,**kwds): internal. pprinter for iterables. you should probably use pprint_thing() rather then calling this directly. """ - fmt = six.u("{%s}") + fmt = u("{%s}") pairs = [] - pfmt = six.u("%s: %s") + pfmt = u("%s: %s") nitems = get_option("max_seq_items") or len(seq) @@ -2032,7 +2030,7 @@ def as_escaped_unicode(thing,escape_chars=escape_chars): #should deal with it himself. try: - result = six.text_type(thing) # we should try this first + result = compat.text_type(thing) # we should try this first except UnicodeDecodeError: # either utf-8 or we replace errors result = str(thing).decode('utf-8', "replace") @@ -2052,11 +2050,11 @@ def as_escaped_unicode(thing,escape_chars=escape_chars): for c in escape_chars: result = result.replace(c, translate[c]) - return six.text_type(result) + return compat.text_type(result) - if (py3compat.PY3 and hasattr(thing, '__next__')) or \ + if (compat.PY3 and hasattr(thing, '__next__')) or \ hasattr(thing, 'next'): - return six.text_type(thing) + return compat.text_type(thing) elif (isinstance(thing, dict) and _nest_lvl < get_option("display.pprint_nest_depth")): result = _pprint_dict(thing, _nest_lvl,quote_strings=True) @@ -2064,8 +2062,8 @@ def as_escaped_unicode(thing,escape_chars=escape_chars): get_option("display.pprint_nest_depth"): result = _pprint_seq(thing, _nest_lvl, escape_chars=escape_chars, quote_strings=quote_strings) - elif isinstance(thing,six.string_types) and quote_strings: - if py3compat.PY3: + elif isinstance(thing,compat.string_types) and quote_strings: + if compat.PY3: fmt = "'%s'" else: fmt = "u'%s'" @@ -2073,7 +2071,7 @@ def as_escaped_unicode(thing,escape_chars=escape_chars): else: result = as_escaped_unicode(thing) - return six.text_type(result) # always unicode + return compat.text_type(result) # always unicode def pprint_thing_encoded(object, encoding='utf-8', errors='replace', **kwds): diff --git a/pandas/core/config.py b/pandas/core/config.py index 26fda8d3d926d..d55888bc18f5d 100644 --- a/pandas/core/config.py +++ b/pandas/core/config.py @@ -52,8 +52,8 @@ from collections import namedtuple import warnings -import six -from pandas.util.py3compat import map, lmap +from pandas.util.compat import map, lmap, u +import pandas.util.compat as compat DeprecatedOption = namedtuple('DeprecatedOption', 'key msg rkey removal_ver') RegisteredOption = namedtuple( @@ -149,7 +149,7 @@ def _describe_option(pat='', _print_desc=True): if len(keys) == 0: raise KeyError('No such keys(s)') - s = six.u('') + s = u('') for k in keys: # filter by pat s += _build_option_description(k) @@ -588,9 +588,9 @@ def _build_option_description(k): o = _get_registered_option(k) d = _get_deprecated_option(k) - s = six.u('%s: ') % k + s = u('%s: ') % k if o: - s += six.u('[default: %s] [currently: %s]') % (o.defval, _get_option(k, True)) + s += u('[default: %s] [currently: %s]') % (o.defval, _get_option(k, True)) if o.doc: s += '\n' + '\n '.join(o.doc.strip().split('\n')) @@ -598,9 +598,9 @@ def _build_option_description(k): s += 'No description available.\n' if d: - s += six.u('\n\t(Deprecated') - s += (six.u(', use `%s` instead.') % d.rkey if d.rkey else '') - s += six.u(')\n') + s += u('\n\t(Deprecated') + s += (u(', use `%s` instead.') % d.rkey if d.rkey else '') + s += u(')\n') s += '\n' return s @@ -757,5 +757,5 @@ def inner(x): is_bool = is_type_factory(bool) is_float = is_type_factory(float) is_str = is_type_factory(str) -is_unicode = is_type_factory(six.text_type) +is_unicode = is_type_factory(compat.text_type) is_text = is_instance_factory((str, bytes)) diff --git a/pandas/core/format.py b/pandas/core/format.py index 150eade61c0ff..1381d4e2ecba9 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -3,13 +3,11 @@ from pandas.util import compat import sys -import six -from pandas.util.py3compat import StringIO, lzip, range, map, zip, reduce +from pandas.util.compat import StringIO, lzip, range, map, zip, reduce, u, OrderedDict from pandas.core.common import adjoin, isnull, notnull from pandas.core.index import Index, MultiIndex, _ensure_index -from pandas.util import py3compat -from pandas.util.compat import OrderedDict +from pandas.util import compat from pandas.util.terminal import get_terminal_size from pandas.core.config import get_option, set_option, reset_option import pandas.core.common as com @@ -81,7 +79,7 @@ def __init__(self, series, buf=None, header=True, length=True, self.dtype = dtype def _get_footer(self): - footer = six.u('') + footer = u('') if self.name: if getattr(self.series.index, 'freq', None): @@ -106,7 +104,7 @@ def _get_footer(self): footer += ', ' footer += 'dtype: %s' % com.pprint_thing(self.series.dtype.name) - return six.text_type(footer) + return compat.text_type(footer) def _get_formatted_index(self): index = self.series.index @@ -129,7 +127,7 @@ def to_string(self): series = self.series if len(series) == 0: - return six.u('') + return u('') fmt_index, have_header = self._get_formatted_index() fmt_values = self._get_formatted_values() @@ -149,10 +147,10 @@ def to_string(self): if footer: result.append(footer) - return six.text_type(six.u('\n').join(result)) + return compat.text_type(u('\n').join(result)) def _strlen_func(): - if py3compat.PY3: # pragma: no cover + if compat.PY3: # pragma: no cover _strlen = len else: encoding = get_option("display.encoding") @@ -283,7 +281,7 @@ def to_string(self, force_unicode=None): frame = self.frame if len(frame.columns) == 0 or len(frame.index) == 0: - info_line = (six.u('Empty %s\nColumns: %s\nIndex: %s') + info_line = (u('Empty %s\nColumns: %s\nIndex: %s') % (type(self.frame).__name__, com.pprint_thing(frame.columns), com.pprint_thing(frame.index))) @@ -345,7 +343,7 @@ def get_col_type(dtype): frame = self.frame if len(frame.columns) == 0 or len(frame.index) == 0: - info_line = (six.u('Empty %s\nColumns: %s\nIndex: %s') + info_line = (u('Empty %s\nColumns: %s\nIndex: %s') % (type(self.frame).__name__, frame.columns, frame.index)) strcols = [[info_line]] @@ -358,7 +356,7 @@ def get_col_type(dtype): column_format = 'l%s' % ''.join(map(get_col_type, dtypes)) else: column_format = '%s' % ''.join(map(get_col_type, dtypes)) - elif not isinstance(column_format, six.string_types): + elif not isinstance(column_format, compat.string_types): raise AssertionError(('column_format must be str or unicode, not %s' % type(column_format))) @@ -381,7 +379,7 @@ def write(buf, frame, column_format, strcols): if hasattr(self.buf, 'write'): write(self.buf, frame, column_format, strcols) - elif isinstance(self.buf, six.string_types): + elif isinstance(self.buf, compat.string_types): with open(self.buf, 'w') as f: write(f, frame, column_format, strcols) else: @@ -402,7 +400,7 @@ def to_html(self, classes=None): html_renderer = HTMLFormatter(self, classes=classes) if hasattr(self.buf, 'write'): html_renderer.write_result(self.buf) - elif isinstance(self.buf, six.string_types): + elif isinstance(self.buf, compat.string_types): with open(self.buf, 'w') as f: html_renderer.write_result(f) else: @@ -1834,9 +1832,9 @@ def __call__(self, num): mant = sign * dnum / (10 ** pow10) if self.accuracy is None: # pragma: no cover - format_str = six.u("% g%s") + format_str = u("% g%s") else: - format_str = (six.u("%% .%if%%s") % self.accuracy) + format_str = (u("%% .%if%%s") % self.accuracy) formatted = format_str % (mant, prefix) @@ -1862,8 +1860,8 @@ def set_eng_float_format(precision=None, accuracy=3, use_eng_prefix=False): def _put_lines(buf, lines): - if any(isinstance(x, six.text_type) for x in lines): - lines = [six.text_type(x) for x in lines] + if any(isinstance(x, compat.text_type) for x in lines): + lines = [compat.text_type(x) for x in lines] buf.write('\n'.join(lines)) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 827ebc53d97fb..080abe8b00bc4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -12,7 +12,7 @@ # pylint: disable=E1101,E1103 # pylint: disable=W0212,W0231,W0703,W0622 -from pandas.util.py3compat import range, zip, lrange, lmap, lzip, StringIO +from pandas.util.compat import range, zip, lrange, lmap, lzip, StringIO, u, OrderedDict from pandas.util import compat import operator import sys @@ -36,8 +36,7 @@ from pandas.core.series import Series, _radd_compat import pandas.core.expressions as expressions from pandas.compat.scipy import scoreatpercentile as _quantile -from pandas.util.compat import OrderedDict -from pandas.util import py3compat +from pandas.util import compat from pandas.util.terminal import get_terminal_size from pandas.util.decorators import deprecate, Appender, Substitution @@ -56,7 +55,6 @@ import pandas.algos as _algos from pandas.core.config import get_option, set_option -import six #---------------------------------------------------------------------- # Docstring templates @@ -440,7 +438,7 @@ def __init__(self, data=None, index=None, columns=None, dtype=None, 'incompatible data and dtype') if arr.ndim == 0 and index is not None and columns is not None: - if isinstance(data, six.string_types) and dtype is None: + if isinstance(data, compat.string_types) and dtype is None: dtype = np.object_ if dtype is None: dtype, data = _infer_dtype_from_scalar(data) @@ -656,7 +654,7 @@ def __unicode__(self): Invoked by unicode(df) in py2 only. Yields a Unicode String in both py2/py3. """ - buf = StringIO(six.u("")) + buf = StringIO(u("")) fits_vertical = self._repr_fits_vertical_() fits_horizontal = False if fits_vertical: @@ -683,7 +681,7 @@ def __unicode__(self): self.info(buf=buf, verbose=verbose) value = buf.getvalue() - if not isinstance(value, six.text_type): + if not isinstance(value, compat.text_type): raise AssertionError() return value @@ -715,7 +713,7 @@ def _repr_html_(self): 'max-width:1500px;overflow:auto;">\n' + self.to_html() + '\n') else: - buf = StringIO(six.u("")) + buf = StringIO(u("")) max_info_rows = get_option('display.max_info_rows') verbose = (max_info_rows is None or self.shape[0] <= max_info_rows) @@ -789,7 +787,7 @@ def itertuples(self, index=True): return zip(*arrays) iterkv = iteritems - if py3compat.PY3: # pragma: no cover + if compat.PY3: # pragma: no cover items = iteritems def __len__(self): @@ -851,7 +849,7 @@ def __contains__(self, key): __xor__ = _arith_method(operator.xor, '__xor__') # Python 2 division methods - if not py3compat.PY3: + if not compat.PY3: __div__ = _arith_method(operator.div, '__div__', '/', default_axis=None, fill_zeros=np.inf, truediv=False) __rdiv__ = _arith_method(lambda x, y: y / x, '__rdiv__', @@ -1028,7 +1026,7 @@ def from_records(cls, data, index=None, exclude=None, columns=None, return cls() try: - if py3compat.PY3: + if compat.PY3: first_row = next(data) else: first_row = next(data) @@ -1093,7 +1091,7 @@ def from_records(cls, data, index=None, exclude=None, columns=None, result_index = None if index is not None: - if (isinstance(index, six.string_types) or + if (isinstance(index, compat.string_types) or not hasattr(index, "__iter__")): i = columns.get_loc(index) exclude.add(index) @@ -1452,7 +1450,7 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='', """ from pandas.io.excel import ExcelWriter need_save = False - if isinstance(excel_writer, six.string_types): + if isinstance(excel_writer, compat.string_types): excel_writer = ExcelWriter(excel_writer) need_save = True @@ -3028,7 +3026,7 @@ def filter(self, items=None, like=None, regex=None): if items is not None: return self.reindex(columns=[r for r in items if r in self]) elif like: - matchf = lambda x: (like in x if isinstance(x, six.string_types) + matchf = lambda x: (like in x if isinstance(x, compat.string_types) else like in str(x)) return self.select(matchf, axis=1) elif regex: @@ -3150,7 +3148,7 @@ def _m8_to_i8(x): if cols is None: values = list(_m8_to_i8(self.values.T)) else: - if np.iterable(cols) and not isinstance(cols, six.string_types): + if np.iterable(cols) and not isinstance(cols, compat.string_types): if isinstance(cols, tuple): if cols in self.columns: values = [self[cols]] @@ -4313,7 +4311,7 @@ def shift(self, periods=1, freq=None, **kwds): offset = _resolve_offset(freq, kwds) - if isinstance(offset, six.string_types): + if isinstance(offset, compat.string_types): offset = datetools.to_offset(offset) if offset is None: @@ -4945,7 +4943,7 @@ def _count_level(self, level, axis=0, numeric_only=False): # python 2.5 mask = notnull(frame.values).view(np.uint8) - if isinstance(level, six.string_types): + if isinstance(level, compat.string_types): level = self.index._get_level_number(level) level_index = frame.index.levels[level] @@ -5994,7 +5992,7 @@ def install_ipython_completers(): # pragma: no cover @complete_object.when_type(DataFrame) def complete_dataframe(obj, prev_completions): return prev_completions + [c for c in obj.columns - if isinstance(c, six.string_types) and py3compat.isidentifier(c)] + if isinstance(c, compat.string_types) and compat.isidentifier(c)] # Importing IPython brings in about 200 modules, so we want to avoid it unless diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d7fc4ce21ca87..c4be0fc3527bc 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10,8 +10,7 @@ from pandas.core.indexing import _maybe_convert_indices from pandas.tseries.index import DatetimeIndex import pandas.core.common as com -import six -from pandas.util.py3compat import map, zip +from pandas.util.compat import map, zip class PandasError(Exception): @@ -80,7 +79,7 @@ def _get_axis_number(self, axis): def _get_axis_name(self, axis): axis = self._AXIS_ALIASES.get(axis, axis) - if isinstance(axis, six.string_types): + if isinstance(axis, compat.string_types): if axis in self._AXIS_NUMBERS: return axis else: diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index f65d1073017a5..ed06362599be1 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1,11 +1,10 @@ import types import numpy as np -import six -from pandas.util.py3compat import range, long, lrange, lzip -from pandas.util.compat import OrderedDict +from pandas.util.compat import( + zip, builtins, range, long, lrange, lzip, OrderedDict, callable +) from pandas.util import compat -from pandas.util.py3compat import zip, builtins from pandas.core.base import PandasObject from pandas.core.categorical import Categorical @@ -1261,7 +1260,7 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True): if level is not None: if not isinstance(group_axis, MultiIndex): - if isinstance(level, six.string_types): + if isinstance(level, compat.string_types): if obj.index.name != level: raise ValueError('level name %s is not the name of the index' % level) elif level > 0: @@ -1283,7 +1282,7 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True): # what are we after, exactly? match_axis_length = len(keys) == len(group_axis) - any_callable = any(six.callable(g) or isinstance(g, dict) for g in keys) + any_callable = any(callable(g) or isinstance(g, dict) for g in keys) any_arraylike = any(isinstance(g, (list, tuple, np.ndarray)) for g in keys) @@ -1338,7 +1337,7 @@ def _get_grouper(obj, key=None, axis=0, level=None, sort=True): def _is_label_like(val): - return isinstance(val, six.string_types) or np.isscalar(val) + return isinstance(val, compat.string_types) or np.isscalar(val) def _convert_grouper(axis, grouper): @@ -1410,7 +1409,7 @@ def aggregate(self, func_or_funcs, *args, **kwargs): ------- Series or DataFrame """ - if isinstance(func_or_funcs, six.string_types): + if isinstance(func_or_funcs, compat.string_types): return getattr(self, func_or_funcs)(*args, **kwargs) if hasattr(func_or_funcs, '__iter__'): @@ -1450,7 +1449,7 @@ def _aggregate_multiple_funcs(self, arg): # list of functions / function names columns = [] for f in arg: - if isinstance(f, six.string_types): + if isinstance(f, compat.string_types): columns.append(f) else: columns.append(f.__name__) @@ -1538,7 +1537,7 @@ def transform(self, func, *args, **kwargs): result = result.values dtype = result.dtype - if isinstance(func, six.string_types): + if isinstance(func, compat.string_types): wrapper = lambda x: getattr(x, func)(*args, **kwargs) else: wrapper = lambda x: func(x, *args, **kwargs) @@ -1580,7 +1579,7 @@ def filter(self, func, dropna=True, *args, **kwargs): ------- filtered : Series """ - if isinstance(func, six.string_types): + if isinstance(func, compat.string_types): wrapper = lambda x: getattr(x, func)(*args, **kwargs) else: wrapper = lambda x: func(x, *args, **kwargs) @@ -1694,7 +1693,7 @@ def _obj_with_exclusions(self): @Appender(_agg_doc) def aggregate(self, arg, *args, **kwargs): - if isinstance(arg, six.string_types): + if isinstance(arg, compat.string_types): return getattr(self, arg)(*args, **kwargs) result = OrderedDict() @@ -2002,7 +2001,7 @@ def transform(self, func, *args, **kwargs): return concatenated def _define_paths(self, func, *args, **kwargs): - if isinstance(func, six.string_types): + if isinstance(func, compat.string_types): fast_path = lambda group: getattr(group, func)(*args, **kwargs) slow_path = lambda group: group.apply(lambda x: getattr(x, func)(*args, **kwargs), axis=self.axis) else: @@ -2253,7 +2252,7 @@ def aggregate(self, arg, *args, **kwargs): ------- aggregated : Panel """ - if isinstance(arg, six.string_types): + if isinstance(arg, compat.string_types): return getattr(self, arg)(*args, **kwargs) return self._aggregate_generic(arg, *args, **kwargs) @@ -2656,7 +2655,7 @@ def numpy_groupby(data, labels, axis=0): # Helper functions -from pandas.util import py3compat +from pandas.util import compat import sys @@ -2668,7 +2667,7 @@ def install_ipython_completers(): # pragma: no cover @complete_object.when_type(DataFrameGroupBy) def complete_dataframe(obj, prev_completions): return prev_completions + [c for c in obj.obj.columns - if isinstance(c, six.string_types) and py3compat.isidentifier(c)] + if isinstance(c, compat.string_types) and compat.isidentifier(c)] # Importing IPython brings in about 200 modules, so we want to avoid it unless diff --git a/pandas/core/index.py b/pandas/core/index.py index c54aa895f668e..713400619de77 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -1,7 +1,6 @@ # pylint: disable=E1101,E1103,W0232 -from pandas.util.py3compat import range, zip, lrange, lzip -import six +from pandas.util.compat import range, zip, lrange, lzip from pandas.util import compat import numpy as np @@ -1350,7 +1349,7 @@ def __new__(cls, data, dtype=None, copy=False, name=None): data = list(data) data = np.asarray(data) - if issubclass(data.dtype.type, six.string_types): + if issubclass(data.dtype.type, compat.string_types): raise TypeError('String dtype not supported, you may need ' 'to explicitly cast to int') elif issubclass(data.dtype.type, np.integer): diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 1518aa3c94efe..2ad00612347d9 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -3,10 +3,9 @@ from datetime import datetime from pandas.core.common import _asarray_tuplesafe from pandas.core.index import Index, MultiIndex, _ensure_index -from pandas.util.py3compat import range -from pandas.util.py3compat import zip +from pandas.util.compat import range, zip +import pandas.util.compat as compat import pandas.core.common as com -import six import pandas.lib as lib import numpy as np @@ -923,7 +922,7 @@ def _convert_to_index_sliceable(obj, key): indexer = obj.ix._convert_to_indexer(key, axis=0) return indexer - elif isinstance(key, six.string_types): + elif isinstance(key, compat.string_types): # we are an actual column if key in obj._data.items: @@ -1080,7 +1079,7 @@ def _is_label_like(key): def _is_list_like(obj): # Consider namedtuples to be not list like as they are useful as indices return (np.iterable(obj) - and not isinstance(obj, six.string_types) + and not isinstance(obj, compat.string_types) and not (isinstance(obj, tuple) and type(obj) is not tuple)) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 21cfa8658b03c..ca1200b87124d 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1,6 +1,5 @@ import itertools import re -import six from datetime import datetime from numpy import nan @@ -18,9 +17,8 @@ import pandas.core.expressions as expressions from pandas.tslib import Timestamp -from pandas.util import py3compat -from pandas.util.py3compat import range, lrange, lmap -from pandas.util.py3compat import map, zip +from pandas.util import compat +from pandas.util.compat import range, lrange, lmap, callable, map, zip class Block(PandasObject): @@ -689,7 +687,7 @@ class ObjectBlock(Block): _can_hold_na = True def __init__(self, values, items, ref_items, ndim=2, fastpath=False, placement=None): - if issubclass(values.dtype.type, six.string_types): + if issubclass(values.dtype.type, compat.string_types): values = np.array(values, dtype=object) super(ObjectBlock, self).__init__(values, items, ref_items, @@ -815,7 +813,7 @@ def _replace_single(self, to_replace, value, inplace=False, filter=None, # deal with replacing values with objects (strings) that match but # whose replacement is not a string (numeric, nan, object) - if isnull(value) or not isinstance(value, six.string_types): + if isnull(value) or not isinstance(value, compat.string_types): def re_replacer(s): try: return value if rx.search(s) is not None else s @@ -1269,7 +1267,7 @@ def apply(self, f, *args, **kwargs): if not blk.items.isin(filter).any(): result_blocks.append(blk) continue - if six.callable(f): + if callable(f): applied = f(blk, *args, **kwargs) else: applied = getattr(blk,f)(*args, **kwargs) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 937235730ba6e..20fcc1430524e 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -12,8 +12,7 @@ import pandas.hashtable as _hash import pandas.tslib as tslib -from pandas.util.py3compat import builtins -import six +from pandas.util.compat import builtins try: @@ -35,7 +34,7 @@ def check(self, obj): def __call__(self, f): @functools.wraps(f) def _f(*args, **kwargs): - obj_iter = itertools.chain(args, six.itervalues(kwargs)) + obj_iter = itertools.chain(args, compat.itervalues(kwargs)) if any(self.check(obj) for obj in obj_iter): raise TypeError('reduction operation {0!r} not allowed for ' 'this dtype'.format(f.__name__.replace('nan', diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 35e5d1e2327aa..29a8ecf940cea 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -3,7 +3,7 @@ """ # pylint: disable=E1103,W0231,W0212,W0621 -from pandas.util.py3compat import range, lrange, lmap +from pandas.util.compat import map, zip, range, lrange, lmap, u, OrderedDict, OrderedDefaultdict from pandas.util import compat import operator import sys @@ -22,13 +22,11 @@ from pandas.core.series import Series from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame -from pandas.util import py3compat +from pandas.util import compat from pandas.util.decorators import deprecate, Appender, Substitution import pandas.core.common as com import pandas.core.nanops as nanops import pandas.lib as lib -import six -from pandas.util.py3compat import map, zip def _ensure_like_indices(time, panels): @@ -227,7 +225,7 @@ def _construct_axes_dict_for_slice(self, axes=None, **kwargs): __rfloordiv__ = _arith_method(lambda x, y: y // x, '__rfloordiv__') __rpow__ = _arith_method(lambda x, y: y ** x, '__rpow__') - if not py3compat.PY3: + if not compat.PY3: __div__ = _arith_method(operator.div, '__div__') __rdiv__ = _arith_method(lambda x, y: y / x, '__rdiv__') @@ -275,7 +273,6 @@ def _from_axes(cls, data, axes): return cls(data, **d) def _init_dict(self, data, axes, dtype=None): - from pandas.util.compat import OrderedDict haxis = axes.pop(self._het_axis) # prefilter if haxis passed @@ -347,7 +344,6 @@ def from_dict(cls, data, intersect=False, orient='items', dtype=None): ------- Panel """ - from pandas.util.compat import OrderedDict,OrderedDefaultdict orient = orient.lower() if orient == 'minor': @@ -477,17 +473,17 @@ def __unicode__(self): class_name = str(self.__class__) shape = self.shape - dims = six.u('Dimensions: %s') % ' x '.join( + dims = u('Dimensions: %s') % ' x '.join( ["%d (%s)" % (s, a) for a, s in zip(self._AXIS_ORDERS, shape)]) def axis_pretty(a): v = getattr(self, a) if len(v) > 0: - return six.u('%s axis: %s to %s') % (a.capitalize(), + return u('%s axis: %s to %s') % (a.capitalize(), com.pprint_thing(v[0]), com.pprint_thing(v[-1])) else: - return six.u('%s axis: None') % a.capitalize() + return u('%s axis: None') % a.capitalize() output = '\n'.join( [class_name, dims] + [axis_pretty(a) for a in self._AXIS_ORDERS]) @@ -1137,7 +1133,7 @@ def transpose(self, *args, **kwargs): """ # construct the args args = list(args) - aliases = tuple(six.iterkeys(kwargs)) + aliases = tuple(compat.iterkeys(kwargs)) for a in self._AXIS_ORDERS: if not a in kwargs: @@ -1487,7 +1483,7 @@ def _prep_ndarray(self, values, copy=True): if not isinstance(values, np.ndarray): values = np.asarray(values) # NumPy strings are a pain, convert to object - if issubclass(values.dtype.type, six.string_types): + if issubclass(values.dtype.type, compat.string_types): values = np.array(values, dtype=object, copy=True) else: if copy: @@ -1511,7 +1507,6 @@ def _homogenize_dict(self, frames, intersect=True, dtype=None): ------- dict of aligned results & indicies """ - from pandas.util.compat import OrderedDict result = dict() if isinstance(frames,OrderedDict): # caller differs dict/ODict, presered type @@ -1715,8 +1710,8 @@ def install_ipython_completers(): # pragma: no cover @complete_object.when_type(Panel) def complete_dataframe(obj, prev_completions): return prev_completions + [c for c in obj.keys() - if isinstance(c, six.string_types) - and py3compat.isidentifier(c)] + if isinstance(c, compat.string_types) + and compat.isidentifier(c)] # Importing IPython brings in about 200 modules, so we want to avoid it unless # we're in IPython (when those modules are loaded anyway). diff --git a/pandas/core/panelnd.py b/pandas/core/panelnd.py index 71d815482fcc5..e1706a44fa834 100644 --- a/pandas/core/panelnd.py +++ b/pandas/core/panelnd.py @@ -1,8 +1,8 @@ """ Factory methods to create N-D panels """ import pandas.lib as lib -from pandas.util.py3compat import zip -import six +from pandas.util.compat import zip +import pandas.util.compat as compat def create_nd_panel_factory(klass_name, axis_orders, axis_slices, slicer, axis_aliases=None, stat_axis=2,ns=None): @@ -29,7 +29,7 @@ def create_nd_panel_factory(klass_name, axis_orders, axis_slices, slicer, axis_a """ # if slicer is a name, get the object - if isinstance(slicer, six.string_types): + if isinstance(slicer, compat.string_types): import pandas try: slicer = getattr(pandas, slicer) diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index 136f4893008e3..c73d0803f934f 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -1,10 +1,8 @@ # pylint: disable=E1101,E1103 # pylint: disable=W0703,W0622,W0613,W0201 -from pandas.util.py3compat import range +from pandas.util.compat import range, zip from pandas.util import compat -from pandas.util.py3compat import zip -import six import itertools import numpy as np @@ -693,7 +691,7 @@ def melt(frame, id_vars=None, value_vars=None, else: var_name = [frame.columns.name if frame.columns.name is not None else 'variable'] - if isinstance(var_name, six.string_types): + if isinstance(var_name, compat.string_types): var_name = [var_name] N, K = frame.shape diff --git a/pandas/core/series.py b/pandas/core/series.py index b25afe5d9f7f1..294898bbd4554 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -25,9 +25,9 @@ _check_slice_bounds, _maybe_convert_indices) from pandas.tseries.index import DatetimeIndex from pandas.tseries.period import PeriodIndex, Period -from pandas.util import py3compat +from pandas.util import compat from pandas.util.terminal import get_terminal_size -from pandas.util.py3compat import zip, lzip +from pandas.util.compat import zip, lzip, u, OrderedDict import pandas.core.array as pa @@ -44,7 +44,6 @@ from pandas.compat.scipy import scoreatpercentile as _quantile from pandas.core.config import get_option -import six __all__ = ['Series', 'TimeSeries'] @@ -450,7 +449,6 @@ def __new__(cls, data=None, index=None, dtype=None, name=None, data = data.reindex(index).values elif isinstance(data, dict): if index is None: - from pandas.util.compat import OrderedDict if isinstance(data, OrderedDict): index = Index(data) else: @@ -1118,9 +1116,9 @@ def __unicode__(self): name=True, dtype=True) else: - result = six.u('Series([], dtype: %s)') % self.dtype + result = u('Series([], dtype: %s)') % self.dtype - if not (isinstance(result, six.text_type)): + if not (isinstance(result, compat.text_type)): raise AssertionError() return result @@ -1139,12 +1137,12 @@ def _tidy_repr(self, max_vals=20): result = head + '\n...\n' + tail result = '%s\n%s' % (result, self._repr_footer()) - return six.text_type(result) + return compat.text_type(result) def _repr_footer(self): - namestr = six.u("Name: %s, ") % com.pprint_thing( + namestr = u("Name: %s, ") % com.pprint_thing( self.name) if self.name is not None else "" - return six.u('%sLength: %d, dtype: %s') % (namestr, len(self), + return u('%sLength: %d, dtype: %s') % (namestr, len(self), str(self.dtype.name)) def to_string(self, buf=None, na_rep='NaN', float_format=None, @@ -1182,7 +1180,7 @@ def to_string(self, buf=None, na_rep='NaN', float_format=None, length=length, dtype=dtype, name=name) # catch contract violations - if not isinstance(the_repr, six.text_type): + if not isinstance(the_repr, compat.text_type): raise AssertionError("expected unicode string") if buf is None: @@ -1205,7 +1203,7 @@ def _get_repr(self, name=False, print_header=False, length=True, dtype=True, length=length, dtype=dtype, na_rep=na_rep, float_format=float_format) result = formatter.to_string() - if not (isinstance(result, six.text_type)): + if not (isinstance(result, compat.text_type)): raise AssertionError() return result @@ -1222,7 +1220,7 @@ def iteritems(self): return lzip(iter(self.index), iter(self)) iterkv = iteritems - if py3compat.PY3: # pragma: no cover + if compat.PY3: # pragma: no cover items = iteritems #---------------------------------------------------------------------- @@ -1275,7 +1273,7 @@ def __invert__(self): __ipow__ = __pow__ # Python 2 division operators - if not py3compat.PY3: + if not compat.PY3: __div__ = _arith_method(operator.div, '__div__', fill_zeros=np.inf) __rdiv__ = _arith_method(lambda x, y: y / x, '__div__', fill_zeros=np.inf) __idiv__ = __div__ @@ -1386,7 +1384,7 @@ def count(self, level=None): if level is not None: mask = notnull(self.values) - if isinstance(level, six.string_types): + if isinstance(level, compat.string_types): level = self.index._get_level_number(level) level_index = self.index.levels[level] @@ -3048,7 +3046,7 @@ def shift(self, periods=1, freq=None, copy=True, **kwds): offset = _resolve_offset(freq, kwds) - if isinstance(offset, six.string_types): + if isinstance(offset, compat.string_types): offset = datetools.to_offset(offset) def _get_values(): @@ -3101,7 +3099,7 @@ def asof(self, where): ------- value or NaN """ - if isinstance(where, six.string_types): + if isinstance(where, compat.string_types): where = datetools.to_datetime(where) values = self.values @@ -3409,7 +3407,7 @@ def _try_cast(arr, take_fast_path): # This is to prevent mixed-type Series getting all casted to # NumPy string type, e.g. NaN --> '-1#IND'. - if issubclass(subarr.dtype.type, six.string_types): + if issubclass(subarr.dtype.type, compat.string_types): subarr = pa.array(data, dtype=object, copy=copy) return subarr @@ -3432,7 +3430,7 @@ def _resolve_offset(freq, kwds): if 'timeRule' in kwds or 'offset' in kwds: offset = kwds.get('offset', None) offset = kwds.get('timeRule', offset) - if isinstance(offset, six.string_types): + if isinstance(offset, compat.string_types): offset = datetools.getOffset(offset) warn = True else: diff --git a/pandas/core/strings.py b/pandas/core/strings.py index c625438dfe203..4ab6b379f1812 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -1,9 +1,9 @@ import numpy as np -from pandas.util.py3compat import zip -import six +from pandas.util.compat import zip from pandas.core.common import isnull from pandas.core.series import Series +import pandas.util.compat as compat import re import pandas.lib as lib @@ -283,17 +283,17 @@ def str_repeat(arr, repeats): if np.isscalar(repeats): def rep(x): try: - return six.binary_type.__mul__(x, repeats) + return compat.binary_type.__mul__(x, repeats) except TypeError: - return six.text_type.__mul__(x, repeats) + return compat.text_type.__mul__(x, repeats) return _na_map(rep, arr) else: def rep(x, r): try: - return six.binary_type.__mul__(x, r) + return compat.binary_type.__mul__(x, r) except TypeError: - return six.text_type.__mul__(x, r) + return compat.text_type.__mul__(x, r) repeats = np.asarray(repeats, dtype=object) result = lib.vec_binop(arr, repeats, rep) diff --git a/pandas/io/clipboard.py b/pandas/io/clipboard.py index fa3e384597ee6..ba0b80f2ee3e3 100644 --- a/pandas/io/clipboard.py +++ b/pandas/io/clipboard.py @@ -1,5 +1,5 @@ """ io on the clipboard """ -from pandas.util.py3compat import StringIO +from pandas.util.compat import StringIO def read_clipboard(**kwargs): # pragma: no cover """ diff --git a/pandas/io/common.py b/pandas/io/common.py index 93f4f0b5d4612..dc30010532e08 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -4,11 +4,11 @@ import zipfile from contextlib import contextmanager, closing -from pandas.util.py3compat import StringIO -from pandas.util import py3compat +from pandas.util.compat import StringIO +from pandas.util import compat -if py3compat.PY3: +if compat.PY3: from urllib.request import urlopen _urlopen = urlopen from urllib.parse import urlparse as parse_url @@ -83,7 +83,7 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None): if _is_url(filepath_or_buffer): req = _urlopen(filepath_or_buffer) - if py3compat.PY3: # pragma: no cover + if compat.PY3: # pragma: no cover if encoding: errors = 'strict' else: diff --git a/pandas/io/data.py b/pandas/io/data.py index 04d6cbc956d70..d1962648af033 100644 --- a/pandas/io/data.py +++ b/pandas/io/data.py @@ -12,14 +12,15 @@ import numpy as np -from pandas.util.py3compat import StringIO, bytes_to_str, range, lrange, lmap +from pandas.util.compat import( + StringIO, bytes_to_str, range, lrange, lmap, zip +) +import pandas.util.compat as compat from pandas import Panel, DataFrame, Series, read_csv, concat from pandas.core.common import PandasError from pandas.io.parsers import TextParser from pandas.io.common import urlopen, ZipFile, urlencode from pandas.util.testing import _network_error_classes -import six -from pandas.util.py3compat import map, zip class SymbolWarning(UserWarning): @@ -101,19 +102,20 @@ def _in_chunks(seq, size): _yahoo_codes = {'symbol': 's', 'last': 'l1', 'change_pct': 'p2', 'PE': 'r', 'time': 't1', 'short_ratio': 's7'} + def get_quote_yahoo(symbols): """ Get current yahoo quote Returns a DataFrame """ - if isinstance(symbols, six.string_types): + if isinstance(symbols, compat.string_types): sym_list = symbols else: sym_list = '+'.join(symbols) # for codes see: http://www.gummy-stuff.org/Yahoo-data.htm - request = ''.join(six.itervalues(_yahoo_codes)) # code request string + request = ''.join(compat.itervalues(_yahoo_codes)) # code request string header = list(_yahoo_codes.keys()) data = defaultdict(list) @@ -202,10 +204,9 @@ def _get_hist_google(sym, start, end, retry_count, pause): # www.google.com/finance/historical?q=GOOG&startdate=Jun+9%2C+2011&enddate=Jun+8%2C+2013&output=csv url = google_URL + urlencode({"q": sym, - "startdate": start.strftime('%b %d, ' - '%Y'), - "enddate": end.strftime('%b %d, %Y'), - "output": "csv"}) + "startdate": start.strftime('%b %d, ' '%Y'), + "enddate": end.strftime('%b %d, %Y'), + "output": "csv"}) return _retry_read_url(url, retry_count, pause, 'Google') @@ -322,6 +323,7 @@ def _dl_mult_symbols(symbols, start, end, chunksize, retry_count, pause, _source_functions = {'google': _get_hist_google, 'yahoo': _get_hist_yahoo} + def _get_data_from(symbols, start, end, retry_count, pause, adjust_price, ret_index, chunksize, source, name): if name is not None: @@ -332,7 +334,7 @@ def _get_data_from(symbols, start, end, retry_count, pause, adjust_price, src_fn = _source_functions[source] # If a single symbol, (e.g., 'GOOG') - if isinstance(symbols, (six.string_types, int)): + if isinstance(symbols, (compat.string_types, int)): hist_data = src_fn(symbols, start, end, retry_count, pause) # Or multiple symbols, (e.g., ['GOOG', 'AAPL', 'MSFT']) elif isinstance(symbols, DataFrame): diff --git a/pandas/io/date_converters.py b/pandas/io/date_converters.py index c0e9b4da8f52a..26c3162ecde29 100644 --- a/pandas/io/date_converters.py +++ b/pandas/io/date_converters.py @@ -1,5 +1,5 @@ """This module is designed for community supported date conversion functions""" -from pandas.util.py3compat import range +from pandas.util.compat import range import numpy as np import pandas.lib as lib diff --git a/pandas/io/excel.py b/pandas/io/excel.py index f592d80f33216..bf59d3620df4a 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -11,9 +11,9 @@ from pandas.io.parsers import TextParser from pandas.tseries.period import Period from pandas import json -from pandas.util.py3compat import map, zip, reduce -from pandas.util.py3compat import range, lrange -import six +from pandas.util.compat import map, zip, reduce, range, lrange +import pandas.util.compat as compat + def read_excel(path_or_buf, sheetname, kind=None, **kwds): """Read an Excel table into a pandas DataFrame @@ -67,15 +67,17 @@ class ExcelFile(object): def __init__(self, path_or_buf, kind=None, **kwds): self.kind = kind - import xlrd # throw an ImportError if we need to - ver = tuple(map(int,xlrd.__VERSION__.split(".")[:2])) + import xlrd # throw an ImportError if we need to + + ver = tuple(map(int, xlrd.__VERSION__.split(".")[:2])) if ver < (0, 9): - raise ImportError("pandas requires xlrd >= 0.9.0 for excel support, current version "+xlrd.__VERSION__) + raise ImportError("pandas requires xlrd >= 0.9.0 for excel " + "support, current version " + xlrd.__VERSION__) self.path_or_buf = path_or_buf self.tmpfile = None - if isinstance(path_or_buf, six.string_types): + if isinstance(path_or_buf, compat.string_types): self.book = xlrd.open_workbook(path_or_buf) else: data = path_or_buf.read() @@ -110,8 +112,8 @@ def parse(self, sheetname, header=0, skiprows=None, skip_footer=0, na_values : list-like, default None List of additional strings to recognize as NA/NaN keep_default_na : bool, default True - If na_values are specified and keep_default_na is False the default NaN - values are overridden, otherwise they're appended to + If na_values are specified and keep_default_na is False the default + NaN values are overridden, otherwise they're appended to verbose : boolean, default False Indicate number of NA values placed in non-numeric columns @@ -126,14 +128,14 @@ def parse(self, sheetname, header=0, skiprows=None, skip_footer=0, if skipfooter is not None: skip_footer = skipfooter - return self._parse_excel(sheetname, header=header, skiprows=skiprows, - index_col=index_col, - has_index_names=has_index_names, - parse_cols=parse_cols, - parse_dates=parse_dates, - date_parser=date_parser, na_values=na_values, - thousands=thousands, chunksize=chunksize, - skip_footer=skip_footer, **kwds) + return self._parse_excel(sheetname, header=header, skiprows=skiprows, + index_col=index_col, + has_index_names=has_index_names, + parse_cols=parse_cols, + parse_dates=parse_dates, + date_parser=date_parser, na_values=na_values, + thousands=thousands, chunksize=chunksize, + skip_footer=skip_footer, **kwds) def _should_parse(self, i, parse_cols): @@ -149,7 +151,8 @@ def _range2cols(areas): """ def _excel2num(x): "Convert Excel column name like 'AB' to 0-based column index" - return reduce(lambda s, a: s * 26 + ord(a) - ord('A') + 1, x.upper().strip(), 0) - 1 + return reduce(lambda s, a: s * 26 + ord(a) - ord('A') + 1, + x.upper().strip(), 0) - 1 cols = [] for rng in areas.split(','): @@ -162,7 +165,7 @@ def _excel2num(x): if isinstance(parse_cols, int): return i <= parse_cols - elif isinstance(parse_cols, six.string_types): + elif isinstance(parse_cols, compat.string_types): return i in _range2cols(parse_cols) else: return i in parse_cols @@ -175,7 +178,7 @@ def _parse_excel(self, sheetname, header=0, skiprows=None, skip_footer=0, XL_CELL_ERROR, XL_CELL_BOOLEAN) datemode = self.book.datemode - if isinstance(sheetname, six.string_types): + if isinstance(sheetname, compat.string_types): sheet = self.book.sheet_by_name(sheetname) else: # assume an integer if not a string sheet = self.book.sheet_by_index(sheetname) @@ -185,7 +188,7 @@ def _parse_excel(self, sheetname, header=0, skiprows=None, skip_footer=0, for i in range(sheet.nrows): row = [] for j, (value, typ) in enumerate(zip(sheet.row_values(i), - sheet.row_types(i))): + sheet.row_types(i))): if parse_cols is not None and j not in should_parse: should_parse[j] = self._should_parse(j, parse_cols) @@ -458,4 +461,3 @@ def _writecells_xls(self, cells, sheet_name, startrow, startcol): wks.write(startrow + cell.row, startcol + cell.col, val, style) - diff --git a/pandas/io/ga.py b/pandas/io/ga.py index 74157464b21de..19b478732d6b7 100644 --- a/pandas/io/ga.py +++ b/pandas/io/ga.py @@ -17,10 +17,9 @@ from apiclient.errors import HttpError from oauth2client.client import AccessTokenRefreshError -import six -from pandas.util.py3compat import zip +from pandas.util.compat import zip, u -TYPE_MAP = {six.u('INTEGER'): int, six.u('FLOAT'): float, six.u('TIME'): int} +TYPE_MAP = {u('INTEGER'): int, u('FLOAT'): float, u('TIME'): int} NO_CALLBACK = auth.OOB_CALLBACK_URN DOC_URL = auth.DOC_URL @@ -264,7 +263,7 @@ def get_data(self, metrics, start_date, end_date=None, profile_id = profile.get('id') if index_col is None and dimensions is not None: - if isinstance(dimensions, six.string_types): + if isinstance(dimensions, compat.string_types): dimensions = [dimensions] index_col = _clean_index(list(dimensions), parse_dates) @@ -315,7 +314,7 @@ def _parse_data(self, rows, col_info, index_col, parse_dates=True, if isinstance(sort, bool) and sort: return df.sort_index() - elif isinstance(sort, (six.string_types, list, tuple, np.ndarray)): + elif isinstance(sort, (compat.string_types, list, tuple, np.ndarray)): return df.sort_index(by=sort) return df @@ -340,7 +339,7 @@ def create_query(self, profile_id, metrics, start_date, end_date=None, def format_query(ids, metrics, start_date, end_date=None, dimensions=None, segment=None, filters=None, sort=None, start_index=None, max_results=10000, **kwargs): - if isinstance(metrics, six.string_types): + if isinstance(metrics, compat.string_types): metrics = [metrics] met = ','.join(['ga:%s' % x for x in metrics]) @@ -359,7 +358,7 @@ def format_query(ids, metrics, start_date, end_date=None, dimensions=None, lst = [dimensions, filters, sort] [_maybe_add_arg(qry, n, d) for n, d in zip(names, lst)] - if isinstance(segment, six.string_types): + if isinstance(segment, compat.string_types): _maybe_add_arg(qry, 'segment', segment, 'dynamic::ga') elif isinstance(segment, int): _maybe_add_arg(qry, 'segment', segment, 'gaid:') @@ -377,7 +376,7 @@ def format_query(ids, metrics, start_date, end_date=None, dimensions=None, def _maybe_add_arg(query, field, data, prefix='ga'): if data is not None: - if isinstance(data, (six.string_types, int)): + if isinstance(data, (compat.string_types, int)): data = [data] data = ','.join(['%s:%s' % (prefix, x) for x in data]) query[field] = data @@ -438,12 +437,12 @@ def _get_column_types(header_info): def _get_dim_names(header_info): return [x['name'][3:] for x in header_info - if x['columnType'] == six.u('DIMENSION')] + if x['columnType'] == u('DIMENSION')] def _get_met_names(header_info): return [x['name'][3:] for x in header_info - if x['columnType'] == six.u('METRIC')] + if x['columnType'] == u('METRIC')] def _get_data_types(header_info): diff --git a/pandas/io/html.py b/pandas/io/html.py index 2617566add5a8..841fd1bf9942d 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -14,10 +14,8 @@ from pandas import DataFrame, MultiIndex, isnull from pandas.io.common import _is_url, urlopen, parse_url -from pandas.util.py3compat import range, lrange, lmap +from pandas.util.compat import range, lrange, lmap, u, map from pandas.util import compat -import six -from pandas.util.py3compat import map try: @@ -122,7 +120,7 @@ def _read(io): elif os.path.isfile(io): with open(io) as f: raw_text = f.read() - elif isinstance(io, six.string_types): + elif isinstance(io, compat.string_types): raw_text = io else: raise TypeError("Cannot read object of type " @@ -452,8 +450,8 @@ def _build_node_xpath_expr(attrs): if 'class_' in attrs: attrs['class'] = attrs.pop('class_') - s = (six.u("@{k}='{v}'").format(k=k, v=v) for k, v in compat.iteritems(attrs)) - return six.u('[{0}]').format(' and '.join(s)) + s = (u("@{k}='{v}'").format(k=k, v=v) for k, v in compat.iteritems(attrs)) + return u('[{0}]').format(' and '.join(s)) _re_namespace = {'re': 'http://exslt.org/regular-expressions'} @@ -494,9 +492,9 @@ def _parse_tables(self, doc, match, kwargs): pattern = match.pattern # check all descendants for the given pattern - check_all_expr = six.u('//*') + check_all_expr = u('//*') if pattern: - check_all_expr += six.u("[re:test(text(), '{0}')]").format(pattern) + check_all_expr += u("[re:test(text(), '{0}')]").format(pattern) # go up the tree until we find a table check_table_expr = '/ancestor::table' @@ -735,10 +733,10 @@ def _parser_dispatch(flavor): def _validate_parser_flavor(flavor): if flavor is None: flavor = ['lxml', 'bs4'] - elif isinstance(flavor, six.string_types): + elif isinstance(flavor, compat.string_types): flavor = [flavor] elif isinstance(flavor, collections.Iterable): - if not all(isinstance(flav, six.string_types) for flav in flavor): + if not all(isinstance(flav, compat.string_types) for flav in flavor): raise TypeError('{0} is not an iterable of strings'.format(flavor)) else: raise TypeError('{0} is not a valid "flavor"'.format(flavor)) diff --git a/pandas/io/json.py b/pandas/io/json.py index ef53d0b9e93e7..35709b4dd992c 100644 --- a/pandas/io/json.py +++ b/pandas/io/json.py @@ -1,14 +1,12 @@ # pylint: disable-msg=E1101,W0613,W0603 -from pandas.util.py3compat import StringIO +from pandas.util.compat import StringIO, long from pandas.util import compat -from pandas.util.py3compat import long import os from pandas import Series, DataFrame, to_datetime from pandas.io.common import get_filepath_or_buffer import pandas.json as _json -import six loads = _json.loads dumps = _json.dumps @@ -29,7 +27,7 @@ def to_json(path_or_buf, obj, orient=None, date_format='epoch', double_precision else: raise NotImplementedError - if isinstance(path_or_buf, six.string_types): + if isinstance(path_or_buf, compat.string_types): with open(path_or_buf,'w') as fh: fh.write(s) elif path_or_buf is None: @@ -185,7 +183,7 @@ def read_json(path_or_buf=None, orient=None, typ='frame', dtype=True, """ filepath_or_buffer,_ = get_filepath_or_buffer(path_or_buf) - if isinstance(filepath_or_buffer, six.string_types): + if isinstance(filepath_or_buffer, compat.string_types): if os.path.exists(filepath_or_buffer): with open(filepath_or_buffer,'r') as fh: json = fh.read() @@ -470,7 +468,7 @@ def _try_convert_dates(self): def is_ok(col): """ return if this col is ok to try for a date parse """ - if not isinstance(col, six.string_types): return False + if not isinstance(col, compat.string_types): return False if (col.endswith('_at') or col.endswith('_time') or diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 85d5ad0d39afb..433e6d0f8d38e 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2,7 +2,7 @@ Module contains tools for processing files into DataFrames or other objects """ from __future__ import print_function -from pandas.util.py3compat import range, lrange, StringIO, lzip +from pandas.util.compat import range, lrange, StringIO, lzip, zip from pandas.util import compat import re import csv @@ -14,7 +14,7 @@ from pandas.core.frame import DataFrame import datetime import pandas.core.common as com -from pandas.util import py3compat +from pandas.util import compat from pandas.io.date_converters import generic_parser from pandas.io.common import get_filepath_or_buffer @@ -24,8 +24,6 @@ import pandas.tslib as tslib import pandas.parser as _parser from pandas.tseries.period import Period -import six -from pandas.util.py3compat import zip _parser_params = """Also supports optionally iterating or breaking of the file into chunks. @@ -787,7 +785,7 @@ def _make_index(self, data, alldata, columns, indexnamerow=False): def _get_simple_index(self, data, columns): def ix(col): - if not isinstance(col, six.string_types): + if not isinstance(col, compat.string_types): return col raise ValueError('Index %s invalid' % col) index = None @@ -810,7 +808,7 @@ def ix(col): def _get_complex_date_index(self, data, col_names): def _get_name(icol): - if isinstance(icol, six.string_types): + if isinstance(icol, compat.string_types): return icol if col_names is None: @@ -949,7 +947,7 @@ def __init__(self, src, **kwds): ParserBase.__init__(self, kwds) if 'utf-16' in (kwds.get('encoding') or ''): - if isinstance(src, six.string_types): + if isinstance(src, compat.string_types): src = open(src, 'rb') src = com.UTF8Recoder(src, kwds['encoding']) kwds['encoding'] = 'utf-8' @@ -1230,7 +1228,7 @@ def __init__(self, f, **kwds): self.comment = kwds['comment'] self._comment_lines = [] - if isinstance(f, six.string_types): + if isinstance(f, compat.string_types): f = com._get_handle(f, 'r', encoding=self.encoding, compression=self.compression) elif self.compression: @@ -1320,7 +1318,7 @@ class MyDialect(csv.Dialect): def _read(): line = next(f) pat = re.compile(sep) - if (py3compat.PY3 and isinstance(line, bytes)): + if (compat.PY3 and isinstance(line, bytes)): yield pat.split(line.decode('utf-8').strip()) for line in f: yield pat.split(line.decode('utf-8').strip()) @@ -1490,7 +1488,7 @@ def _check_comments(self, lines): for l in lines: rl = [] for x in l: - if (not isinstance(x, six.string_types) or + if (not isinstance(x, compat.string_types) or self.comment not in x): rl.append(x) else: @@ -1509,7 +1507,7 @@ def _check_thousands(self, lines): for l in lines: rl = [] for x in l: - if (not isinstance(x, six.string_types) or + if (not isinstance(x, compat.string_types) or self.thousands not in x or nonnum.search(x.strip())): rl.append(x) @@ -1809,7 +1807,7 @@ def _clean_index_names(columns, index_col): index_col = list(index_col) for i, c in enumerate(index_col): - if isinstance(c, six.string_types): + if isinstance(c, compat.string_types): index_names.append(c) for j, name in enumerate(cp_cols): if name == c: @@ -1822,7 +1820,7 @@ def _clean_index_names(columns, index_col): index_names.append(name) # hack - if isinstance(index_names[0], six.string_types) and 'Unnamed' in index_names[0]: + if isinstance(index_names[0], compat.string_types) and 'Unnamed' in index_names[0]: index_names[0] = None return index_names, columns, index_col @@ -1903,13 +1901,13 @@ def _get_col_names(colspec, columns): def _concat_date_cols(date_cols): if len(date_cols) == 1: - if py3compat.PY3: - return np.array([six.text_type(x) for x in date_cols[0]], dtype=object) + if compat.PY3: + return np.array([compat.text_type(x) for x in date_cols[0]], dtype=object) else: - return np.array([str(x) if not isinstance(x, six.string_types) else x + return np.array([str(x) if not isinstance(x, compat.string_types) else x for x in date_cols[0]], dtype=object) - rs = np.array([' '.join([six.text_type(y) for y in x]) + rs = np.array([' '.join([compat.text_type(y) for y in x]) for x in zip(*date_cols)], dtype=object) return rs diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index 56bca476c04c6..314a566d2dc3c 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -1,4 +1,4 @@ -from pandas.util.py3compat import cPickle as pkl +from pandas.util.compat import cPickle as pkl, PY3 def to_pickle(obj, path): """ @@ -35,7 +35,6 @@ def read_pickle(path): with open(path, 'rb') as fh: return pkl.load(fh) except: - from pandas.util.py3compat import PY3 if PY3: with open(path, 'rb') as fh: return pkl.load(fh, encoding='latin1') diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 9080919421c97..14de4d17e76e4 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -6,7 +6,7 @@ # pylint: disable-msg=E1101,W0613,W0603 from datetime import datetime, date -from pandas.util.py3compat import range, lrange, lmap +from pandas.util.compat import map, range, zip, lrange, lmap, u from pandas.util import compat import time import re @@ -30,7 +30,7 @@ from pandas.core.index import _ensure_index import pandas.core.common as com from pandas.tools.merge import concat -from pandas.util import py3compat +from pandas.util import compat from pandas.io.common import PerformanceWarning import pandas.lib as lib @@ -38,8 +38,6 @@ import pandas.tslib as tslib from contextlib import contextmanager -import six -from pandas.util.py3compat import map, zip # versioning attribute _version = '0.10.1' @@ -58,7 +56,7 @@ def _ensure_decoded(s): def _ensure_encoding(encoding): # set the encoding if we need if encoding is None: - if py3compat.PY3: + if compat.PY3: encoding = _default_encoding return encoding @@ -92,40 +90,40 @@ class AttributeConflictWarning(Warning): # map object types _TYPE_MAP = { - Series: six.u('series'), - SparseSeries: six.u('sparse_series'), - TimeSeries: six.u('series'), - DataFrame: six.u('frame'), - SparseDataFrame: six.u('sparse_frame'), - Panel: six.u('wide'), - Panel4D: six.u('ndim'), - SparsePanel: six.u('sparse_panel') + Series: u('series'), + SparseSeries: u('sparse_series'), + TimeSeries: u('series'), + DataFrame: u('frame'), + SparseDataFrame: u('sparse_frame'), + Panel: u('wide'), + Panel4D: u('ndim'), + SparsePanel: u('sparse_panel') } # storer class map _STORER_MAP = { - six.u('TimeSeries') : 'LegacySeriesStorer', - six.u('Series') : 'LegacySeriesStorer', - six.u('DataFrame') : 'LegacyFrameStorer', - six.u('DataMatrix') : 'LegacyFrameStorer', - six.u('series') : 'SeriesStorer', - six.u('sparse_series') : 'SparseSeriesStorer', - six.u('frame') : 'FrameStorer', - six.u('sparse_frame') : 'SparseFrameStorer', - six.u('wide') : 'PanelStorer', - six.u('sparse_panel') : 'SparsePanelStorer', + u('TimeSeries') : 'LegacySeriesStorer', + u('Series') : 'LegacySeriesStorer', + u('DataFrame') : 'LegacyFrameStorer', + u('DataMatrix') : 'LegacyFrameStorer', + u('series') : 'SeriesStorer', + u('sparse_series') : 'SparseSeriesStorer', + u('frame') : 'FrameStorer', + u('sparse_frame') : 'SparseFrameStorer', + u('wide') : 'PanelStorer', + u('sparse_panel') : 'SparsePanelStorer', } # table class map _TABLE_MAP = { - six.u('generic_table') : 'GenericTable', - six.u('appendable_frame') : 'AppendableFrameTable', - six.u('appendable_multiframe') : 'AppendableMultiFrameTable', - six.u('appendable_panel') : 'AppendablePanelTable', - six.u('appendable_ndim') : 'AppendableNDimTable', - six.u('worm') : 'WORMTable', - six.u('legacy_frame') : 'LegacyFrameTable', - six.u('legacy_panel') : 'LegacyPanelTable', + u('generic_table') : 'GenericTable', + u('appendable_frame') : 'AppendableFrameTable', + u('appendable_multiframe') : 'AppendableMultiFrameTable', + u('appendable_panel') : 'AppendablePanelTable', + u('appendable_ndim') : 'AppendableNDimTable', + u('worm') : 'WORMTable', + u('legacy_frame') : 'LegacyFrameTable', + u('legacy_panel') : 'LegacyPanelTable', } # axes map @@ -194,7 +192,7 @@ def to_hdf(path_or_buf, key, value, mode=None, complevel=None, complib=None, app else: f = lambda store: store.put(key, value, **kwargs) - if isinstance(path_or_buf, six.string_types): + if isinstance(path_or_buf, compat.string_types): with get_store(path_or_buf, mode=mode, complevel=complevel, complib=complib) as store: f(store) else: @@ -204,7 +202,7 @@ def read_hdf(path_or_buf, key, **kwargs): """ read from the store, closeit if we opened it """ f = lambda store, auto_close: store.select(key, auto_close=auto_close, **kwargs) - if isinstance(path_or_buf, six.string_types): + if isinstance(path_or_buf, compat.string_types): # can't auto open/close if we are using an iterator # so delegate to the iterator @@ -372,7 +370,7 @@ def open(self, mode='a', warn=True): self._mode = mode if warn and mode == 'w': # pragma: no cover while True: - if py3compat.PY3: + if compat.PY3: raw_input = input response = raw_input("Re-opening as mode='w' will delete the " "current file. Continue (y/n)?") @@ -520,7 +518,7 @@ def select_as_multiple(self, keys, where=None, selector=None, columns=None, star # default to single select if isinstance(keys, (list, tuple)) and len(keys) == 1: keys = keys[0] - if isinstance(keys, six.string_types): + if isinstance(keys, compat.string_types): return self.select(key=keys, where=where, columns=columns, start=start, stop=stop, iterator=iterator, chunksize=chunksize, **kwargs) if not isinstance(keys, (list, tuple)): @@ -751,7 +749,7 @@ def groups(self): """ return a list of all the top-level nodes (that are not themselves a pandas storage object) """ _tables() return [ g for g in self._handle.walkNodes() if getattr(g._v_attrs,'pandas_type',None) or getattr( - g,'table',None) or (isinstance(g,_table_mod.table.Table) and g._v_name != six.u('table')) ] + g,'table',None) or (isinstance(g,_table_mod.table.Table) and g._v_name != u('table')) ] def get_node(self, key): """ return the node with the key or None if it does not exist """ @@ -830,8 +828,8 @@ def error(t): _tables() if getattr(group,'table',None) or isinstance(group,_table_mod.table.Table): - pt = six.u('frame_table') - tt = six.u('generic_table') + pt = u('frame_table') + tt = u('generic_table') else: raise TypeError("cannot create a storer if the object is not existing nor a value are passed") else: @@ -843,10 +841,10 @@ def error(t): # we are actually a table if table or append: - pt += six.u('_table') + pt += u('_table') # a storer node - if six.u('table') not in pt: + if u('table') not in pt: try: return globals()[_STORER_MAP[pt]](self, group, **kwargs) except: @@ -858,26 +856,26 @@ def error(t): # if we are a writer, determin the tt if value is not None: - if pt == six.u('frame_table'): + if pt == u('frame_table'): index = getattr(value,'index',None) if index is not None: if index.nlevels == 1: - tt = six.u('appendable_frame') + tt = u('appendable_frame') elif index.nlevels > 1: - tt = six.u('appendable_multiframe') - elif pt == six.u('wide_table'): - tt = six.u('appendable_panel') - elif pt == six.u('ndim_table'): - tt = six.u('appendable_ndim') + tt = u('appendable_multiframe') + elif pt == u('wide_table'): + tt = u('appendable_panel') + elif pt == u('ndim_table'): + tt = u('appendable_ndim') else: # distiguish between a frame/table - tt = six.u('legacy_panel') + tt = u('legacy_panel') try: fields = group.table._v_attrs.fields - if len(fields) == 1 and fields[0] == six.u('value'): - tt = six.u('legacy_frame') + if len(fields) == 1 and fields[0] == u('value'): + tt = u('legacy_frame') except: pass @@ -1147,7 +1145,7 @@ def __iter__(self): def maybe_set_size(self, min_itemsize=None, **kwargs): """ maybe set a string col itemsize: min_itemsize can be an interger or a dict with this columns name with an integer size """ - if _ensure_decoded(self.kind) == six.u('string'): + if _ensure_decoded(self.kind) == u('string'): if isinstance(min_itemsize, dict): min_itemsize = min_itemsize.get(self.name) @@ -1167,7 +1165,7 @@ def validate_col(self, itemsize=None): # validate this column for string truncation (or reset to the max size) dtype = getattr(self, 'dtype', None) - if _ensure_decoded(self.kind) == six.u('string'): + if _ensure_decoded(self.kind) == u('string'): c = self.col if c is not None: @@ -1297,7 +1295,7 @@ def __init__(self, values=None, kind=None, typ=None, cname=None, data=None, bloc super(DataCol, self).__init__( values=values, kind=kind, typ=typ, cname=cname, **kwargs) self.dtype = None - self.dtype_attr = six.u("%s_dtype") % self.name + self.dtype_attr = u("%s_dtype") % self.name self.set_data(data) def __unicode__(self): @@ -1326,15 +1324,15 @@ def set_kind(self): # set my kind if we can if self.dtype is not None: dtype = _ensure_decoded(self.dtype) - if dtype.startswith(six.u('string')) or dtype.startswith(six.u('bytes')): + if dtype.startswith(u('string')) or dtype.startswith(u('bytes')): self.kind = 'string' - elif dtype.startswith(six.u('float')): + elif dtype.startswith(u('float')): self.kind = 'float' - elif dtype.startswith(six.u('int')) or dtype.startswith(six.u('uint')): + elif dtype.startswith(u('int')) or dtype.startswith(u('uint')): self.kind = 'integer' - elif dtype.startswith(six.u('date')): + elif dtype.startswith(u('date')): self.kind = 'datetime' - elif dtype.startswith(six.u('bool')): + elif dtype.startswith(u('bool')): self.kind = 'bool' else: raise AssertionError("cannot interpret dtype of [%s] in [%s]" % (dtype,self)) @@ -1508,7 +1506,7 @@ def convert(self, values, nan_rep, encoding): dtype = _ensure_decoded(self.dtype) # reverse converts - if dtype == six.u('datetime64'): + if dtype == u('datetime64'): # recreate the timezone if self.tz is not None: @@ -1521,10 +1519,10 @@ def convert(self, values, nan_rep, encoding): else: self.data = np.asarray(self.data, dtype='M8[ns]') - elif dtype == six.u('date'): + elif dtype == u('date'): self.data = np.array( [date.fromtimestamp(v) for v in self.data], dtype=object) - elif dtype == six.u('datetime'): + elif dtype == u('datetime'): self.data = np.array( [datetime.fromtimestamp(v) for v in self.data], dtype=object) @@ -1536,7 +1534,7 @@ def convert(self, values, nan_rep, encoding): self.data = self.data.astype('O') # convert nans / decode - if _ensure_decoded(self.kind) == six.u('string'): + if _ensure_decoded(self.kind) == u('string'): self.data = _unconvert_string_array(self.data, nan_rep=nan_rep, encoding=encoding) return self @@ -1560,7 +1558,7 @@ class DataIndexableCol(DataCol): @property def is_searchable(self): - return _ensure_decoded(self.kind) == six.u('string') + return _ensure_decoded(self.kind) == u('string') def get_atom_string(self, block, itemsize): return _tables().StringCol(itemsize=itemsize) @@ -1797,7 +1795,7 @@ def read_array(self, key): else: ret = data - if dtype == six.u('datetime64'): + if dtype == u('datetime64'): ret = np.array(ret, dtype='M8[ns]') if transposed: @@ -1808,13 +1806,13 @@ def read_array(self, key): def read_index(self, key): variety = _ensure_decoded(getattr(self.attrs, '%s_variety' % key)) - if variety == six.u('multi'): + if variety == u('multi'): return self.read_multi_index(key) - elif variety == six.u('block'): + elif variety == u('block'): return self.read_block_index(key) - elif variety == six.u('sparseint'): + elif variety == u('sparseint'): return self.read_sparse_intindex(key) - elif variety == six.u('regular'): + elif variety == u('regular'): _, index = self.read_index_node(getattr(self.group, key)) return index else: # pragma: no cover @@ -1923,13 +1921,13 @@ def read_index_node(self, node): factory = self._get_index_factory(index_class) kwargs = {} - if six.u('freq') in node._v_attrs: + if u('freq') in node._v_attrs: kwargs['freq'] = node._v_attrs['freq'] - if six.u('tz') in node._v_attrs: + if u('tz') in node._v_attrs: kwargs['tz'] = node._v_attrs['tz'] - if kind in (six.u('date'), six.u('datetime')): + if kind in (u('date'), u('datetime')): index = factory(_unconvert_index(data, kind, encoding=self.encoding), dtype=object, **kwargs) else: @@ -2038,7 +2036,7 @@ def read(self, **kwargs): return DataFrame(values, index=index, columns=columns) class SeriesStorer(GenericStorer): - pandas_kind = six.u('series') + pandas_kind = u('series') attributes = ['name'] @property @@ -2065,7 +2063,7 @@ def write(self, obj, **kwargs): self.attrs.name = obj.name class SparseSeriesStorer(GenericStorer): - pandas_kind = six.u('sparse_series') + pandas_kind = u('sparse_series') attributes = ['name','fill_value','kind'] def read(self, **kwargs): @@ -2074,7 +2072,7 @@ def read(self, **kwargs): sp_values = self.read_array('sp_values') sp_index = self.read_index('sp_index') return SparseSeries(sp_values, index=index, sparse_index=sp_index, - kind=self.kind or six.u('block'), fill_value=self.fill_value, + kind=self.kind or u('block'), fill_value=self.fill_value, name=self.name) def write(self, obj, **kwargs): @@ -2087,7 +2085,7 @@ def write(self, obj, **kwargs): self.attrs.kind = obj.kind class SparseFrameStorer(GenericStorer): - pandas_kind = six.u('sparse_frame') + pandas_kind = u('sparse_frame') attributes = ['default_kind','default_fill_value'] def read(self, **kwargs): @@ -2119,7 +2117,7 @@ def write(self, obj, **kwargs): self.write_index('columns', obj.columns) class SparsePanelStorer(GenericStorer): - pandas_kind = six.u('sparse_panel') + pandas_kind = u('sparse_panel') attributes = ['default_kind','default_fill_value'] def read(self, **kwargs): @@ -2223,11 +2221,11 @@ def write(self, obj, **kwargs): self.write_index('block%d_items' % i, blk.items) class FrameStorer(BlockManagerStorer): - pandas_kind = six.u('frame') + pandas_kind = u('frame') obj_type = DataFrame class PanelStorer(BlockManagerStorer): - pandas_kind = six.u('wide') + pandas_kind = u('wide') obj_type = Panel is_shape_reversed = True @@ -2252,7 +2250,7 @@ class Table(Storer): levels : the names of levels """ - pandas_kind = six.u('wide_table') + pandas_kind = u('wide_table') table_type = None levels = 1 is_table = True @@ -2326,7 +2324,7 @@ def nrows_expected(self): @property def is_exists(self): """ has this table been created """ - return six.u('table') in self.group + return u('table') in self.group @property def storable(self): @@ -2845,7 +2843,7 @@ class WORMTable(Table): table. writing is a one-time operation the data are stored in a format that allows for searching the data on disk """ - table_type = six.u('worm') + table_type = u('worm') def read(self, **kwargs): """ read the indicies and the indexing array, calculate offset rows and @@ -2870,7 +2868,7 @@ class LegacyTable(Table): IndexCol(name='column', axis=2, pos=1, index_kind='columns_kind'), DataCol(name='fields', cname='values', kind_attr='fields', pos=2)] - table_type = six.u('legacy') + table_type = u('legacy') ndim = 3 def write(self, **kwargs): @@ -2960,8 +2958,8 @@ def read(self, where=None, columns=None, **kwargs): class LegacyFrameTable(LegacyTable): """ support the legacy frame table """ - pandas_kind = six.u('frame_table') - table_type = six.u('legacy_frame') + pandas_kind = u('frame_table') + table_type = u('legacy_frame') obj_type = Panel def read(self, *args, **kwargs): @@ -2970,14 +2968,14 @@ def read(self, *args, **kwargs): class LegacyPanelTable(LegacyTable): """ support the legacy panel table """ - table_type = six.u('legacy_panel') + table_type = u('legacy_panel') obj_type = Panel class AppendableTable(LegacyTable): """ suppor the new appendable table formats """ _indexables = None - table_type = six.u('appendable') + table_type = u('appendable') def write(self, obj, axes=None, append=False, complib=None, complevel=None, fletcher32=None, min_itemsize=None, chunksize=None, @@ -3140,8 +3138,8 @@ def delete(self, where=None, **kwargs): class AppendableFrameTable(AppendableTable): """ suppor the new appendable table formats """ - pandas_kind = six.u('frame_table') - table_type = six.u('appendable_frame') + pandas_kind = u('frame_table') + table_type = u('appendable_frame') ndim = 2 obj_type = DataFrame @@ -3195,8 +3193,8 @@ def read(self, where=None, columns=None, **kwargs): class GenericTable(AppendableFrameTable): """ a table that read/writes the generic pytables table format """ - pandas_kind = six.u('frame_table') - table_type = six.u('generic_table') + pandas_kind = u('frame_table') + table_type = u('generic_table') ndim = 2 obj_type = DataFrame @@ -3240,13 +3238,13 @@ def write(self, **kwargs): class AppendableMultiFrameTable(AppendableFrameTable): """ a frame with a multi-index """ - table_type = six.u('appendable_multiframe') + table_type = u('appendable_multiframe') obj_type = DataFrame ndim = 2 @property def table_type_short(self): - return six.u('appendable_multi') + return u('appendable_multi') def write(self, obj, data_columns=None, **kwargs): if data_columns is None: @@ -3271,7 +3269,7 @@ def read(self, columns=None, **kwargs): class AppendablePanelTable(AppendableTable): """ suppor the new appendable table formats """ - table_type = six.u('appendable_panel') + table_type = u('appendable_panel') ndim = 3 obj_type = Panel @@ -3288,7 +3286,7 @@ def is_transposed(self): class AppendableNDimTable(AppendablePanelTable): """ suppor the new appendable table formats """ - table_type = six.u('appendable_ndim') + table_type = u('appendable_ndim') ndim = 4 obj_type = Panel4D @@ -3356,18 +3354,18 @@ def _convert_index(index, encoding=None): def _unconvert_index(data, kind, encoding=None): kind = _ensure_decoded(kind) - if kind == six.u('datetime64'): + if kind == u('datetime64'): index = DatetimeIndex(data) - elif kind == six.u('datetime'): + elif kind == u('datetime'): index = np.array([datetime.fromtimestamp(v) for v in data], dtype=object) - elif kind == six.u('date'): + elif kind == u('date'): index = np.array([date.fromtimestamp(v) for v in data], dtype=object) - elif kind in (six.u('integer'), six.u('float')): + elif kind in (u('integer'), u('float')): index = np.array(data) - elif kind in (six.u('string')): + elif kind in (u('string')): index = _unconvert_string_array(data, nan_rep=None, encoding=encoding) - elif kind == six.u('object'): + elif kind == u('object'): index = np.array(data[0]) else: # pragma: no cover raise ValueError('unrecognized index type %s' % kind) @@ -3375,11 +3373,11 @@ def _unconvert_index(data, kind, encoding=None): def _unconvert_index_legacy(data, kind, legacy=False, encoding=None): kind = _ensure_decoded(kind) - if kind == six.u('datetime'): + if kind == u('datetime'): index = lib.time64_to_datetime(data) - elif kind in (six.u('integer')): + elif kind in (u('integer')): index = np.array(data, dtype=object) - elif kind in (six.u('string')): + elif kind in (u('string')): index = _unconvert_string_array(data, nan_rep=None, encoding=encoding) else: # pragma: no cover raise ValueError('unrecognized index type %s' % kind) @@ -3437,7 +3435,7 @@ def _get_converter(kind, encoding): def _need_convert(kind): kind = _ensure_decoded(kind) - if kind in (six.u('datetime'), six.u('datetime64'), six.u('string')): + if kind in (u('datetime'), u('datetime64'), u('string')): return True return False @@ -3503,7 +3501,7 @@ def __init__(self, field, op=None, value=None, queryables=None, encoding=None): self.value = field.value # a string expression (or just the field) - elif isinstance(field, six.string_types): + elif isinstance(field, compat.string_types): # is a term is passed s = self._search.match(field) @@ -3516,7 +3514,7 @@ def __init__(self, field, op=None, value=None, queryables=None, encoding=None): self.field = field # is an op passed? - if isinstance(op, six.string_types) and op in self._ops: + if isinstance(op, compat.string_types) and op in self._ops: self.op = op self.value = value else: @@ -3537,7 +3535,7 @@ def __init__(self, field, op=None, value=None, queryables=None, encoding=None): # we have valid conditions if self.op in ['>', '>=', '<', '<=']: - if hasattr(self.value, '__iter__') and len(self.value) > 1 and not isinstance(self.value,six.string_types): + if hasattr(self.value, '__iter__') and len(self.value) > 1 and not isinstance(self.value,compat.string_types): raise ValueError("an inequality condition cannot have multiple values [%s]" % str(self)) if not is_list_like(self.value): @@ -3627,36 +3625,36 @@ def stringify(value): return value kind = _ensure_decoded(self.kind) - if kind == six.u('datetime64') or kind == six.u('datetime'): + if kind == u('datetime64') or kind == u('datetime'): v = lib.Timestamp(v) if v.tz is not None: v = v.tz_convert('UTC') return TermValue(v,v.value,kind) elif (isinstance(v, datetime) or hasattr(v, 'timetuple') - or kind == six.u('date')): + or kind == u('date')): v = time.mktime(v.timetuple()) return TermValue(v,Timestamp(v),kind) - elif kind == six.u('integer'): + elif kind == u('integer'): v = int(float(v)) return TermValue(v,v,kind) - elif kind == six.u('float'): + elif kind == u('float'): v = float(v) return TermValue(v,v,kind) - elif kind == six.u('bool'): - if isinstance(v, six.string_types): - poss_vals = [six.u('false'), six.u('f'), six.u('no'), - six.u('n'), six.u('none'), six.u('0'), - six.u('[]'), six.u('{}'), six.u('')] + elif kind == u('bool'): + if isinstance(v, compat.string_types): + poss_vals = [u('false'), u('f'), u('no'), + u('n'), u('none'), u('0'), + u('[]'), u('{}'), u('')] v = not v.strip().lower() in poss_vals else: v = bool(v) return TermValue(v,v,kind) - elif not isinstance(v, six.string_types): + elif not isinstance(v, compat.string_types): v = stringify(v) - return TermValue(v,stringify(v),six.u('string')) + return TermValue(v,stringify(v),u('string')) # string quoting - return TermValue(v,stringify(v),six.u('string')) + return TermValue(v,stringify(v),u('string')) class TermValue(object): """ hold a term value the we use to construct a condition/filter """ @@ -3669,7 +3667,7 @@ def __init__(self, value, converted, kind): def tostring(self, encoding): """ quote the string if not encoded else encode and return """ - if self.kind == six.u('string'): + if self.kind == u('string'): if encoding is not None: return self.converted return '"%s"' % self.converted @@ -3744,7 +3742,7 @@ def generate(self, where): # operands inside any terms if not any([isinstance(w, (list, tuple, Term)) for w in where]): - if not any([isinstance(w, six.string_types) and Term._search.match(w) for w in where]): + if not any([isinstance(w, compat.string_types) and Term._search.match(w) for w in where]): where = [where] queryables = self.table.queryables() diff --git a/pandas/io/sql.py b/pandas/io/sql.py index c5111c77cc4a1..3a88f4e3b2fff 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -5,14 +5,13 @@ from __future__ import print_function from datetime import datetime, date -from pandas.util.py3compat import range, lzip +from pandas.util.compat import range, lzip, map, zip +import pandas.util.compat as compat import numpy as np import traceback from pandas.core.datetools import format as date_format -from pandas.core.api import DataFrame, isnull -from pandas.util.py3compat import map, zip -import six +from pandas.core.api import DataFrame #------------------------------------------------------------------------------ # Helper execution function @@ -176,6 +175,7 @@ def read_frame(sql, con, index_col=None, coerce_float=True, params=None): frame_query = read_frame read_sql = read_frame + def write_frame(frame, name, con, flavor='sqlite', if_exists='fail', **kwargs): """ Write records stored in a DataFrame to a SQL database. @@ -197,9 +197,9 @@ def write_frame(frame, name, con, flavor='sqlite', if_exists='fail', **kwargs): warnings.warn("append is deprecated, use if_exists instead", FutureWarning) if kwargs['append']: - if_exists='append' + if_exists = 'append' else: - if_exists='fail' + if_exists = 'fail' exists = table_exists(name, con, flavor) if if_exists == 'fail' and exists: raise ValueError("Table '%s' already exists." % name) @@ -219,8 +219,8 @@ def write_frame(frame, name, con, flavor='sqlite', if_exists='fail', **kwargs): cur = con.cursor() # Replace spaces in DataFrame column names with _. safe_names = [s.replace(' ', '_').strip() for s in frame.columns] - flavor_picker = {'sqlite' : _write_sqlite, - 'mysql' : _write_mysql} + flavor_picker = {'sqlite': _write_sqlite, + 'mysql': _write_mysql} func = flavor_picker.get(flavor, None) if func is None: @@ -229,6 +229,7 @@ def write_frame(frame, name, con, flavor='sqlite', if_exists='fail', **kwargs): cur.close() con.commit() + def _write_sqlite(frame, table, names, cur): bracketed_names = ['[' + column + ']' for column in names] col_names = ','.join(bracketed_names) @@ -236,12 +237,13 @@ def _write_sqlite(frame, table, names, cur): insert_query = 'INSERT INTO %s (%s) VALUES (%s)' % ( table, col_names, wildcards) # pandas types are badly handled if there is only 1 column ( Issue #3628 ) - if not len(frame.columns )==1 : + if not len(frame.columns) == 1: data = [tuple(x) for x in frame.values] - else : + else: data = [tuple(x) for x in frame.values.tolist()] cur.executemany(insert_query, data) + def _write_mysql(frame, table, names, cur): bracketed_names = ['`' + column + '`' for column in names] col_names = ','.join(bracketed_names) @@ -251,16 +253,18 @@ def _write_mysql(frame, table, names, cur): data = [tuple(x) for x in frame.values] cur.executemany(insert_query, data) + def table_exists(name, con, flavor): flavor_map = { 'sqlite': ("SELECT name FROM sqlite_master " "WHERE type='table' AND name='%s';") % name, - 'mysql' : "SHOW TABLES LIKE '%s'" % name} + 'mysql': "SHOW TABLES LIKE '%s'" % name} query = flavor_map.get(flavor, None) if query is None: raise NotImplementedError return len(tquery(query, con)) > 0 + def get_sqltype(pytype, flavor): sqltype = {'mysql': 'VARCHAR (63)', 'sqlite': 'TEXT'} @@ -288,6 +292,7 @@ def get_sqltype(pytype, flavor): return sqltype[flavor] + def get_schema(frame, name, flavor, keys=None): "Return a CREATE TABLE statement to suit the contents of a DataFrame." lookup_type = lambda dtype: get_sqltype(dtype.type, flavor) @@ -301,7 +306,7 @@ def get_schema(frame, name, flavor, keys=None): keystr = '' if keys is not None: - if isinstance(keys, six.string_types): + if isinstance(keys, compat.string_types): keys = (keys,) keystr = ', PRIMARY KEY (%s)' % ','.join(keys) template = """CREATE TABLE %(name)s ( @@ -312,6 +317,7 @@ def get_schema(frame, name, flavor, keys=None): 'keystr': keystr} return create_statement + def sequence2dict(seq): """Helper function for cx_Oracle. @@ -324,6 +330,6 @@ def sequence2dict(seq): http://www.gingerandjohn.com/archives/2004/02/26/cx_oracle-executemany-example/ """ d = {} - for k,v in zip(range(1, 1 + len(seq)), seq): + for k, v in zip(range(1, 1 + len(seq)), seq): d[str(k)] = v return d diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 338c6e1ac02f5..1ffd99b1c8c2f 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -19,9 +19,9 @@ from pandas.core.series import Series from pandas.core.categorical import Categorical import datetime -from pandas.util import py3compat from pandas.util import compat -from pandas.util.py3compat import StringIO, long, lrange, lmap, lzip +from pandas.util import compat +from pandas.util.compat import StringIO, long, lrange, lmap, lzip from pandas import isnull from pandas.io.parsers import _parser_params, Appender from pandas.io.common import get_filepath_or_buffer @@ -256,7 +256,7 @@ def __init__(self, encoding): } def _decode_bytes(self, str, errors=None): - if py3compat.PY3: + if compat.PY3: return str.decode(self._encoding, errors) else: return str @@ -298,7 +298,7 @@ def __init__(self, path_or_buf, encoding=None): if encoding is not None: self._encoding = encoding - if type(path_or_buf) is str: + if isinstance(path_or_buf, (str, compat.text_type, bytes)): self.path_or_buf = open(path_or_buf, 'rb') else: self.path_or_buf = path_or_buf @@ -403,7 +403,7 @@ def _unpack(self, fmt, byt): return d def _null_terminate(self, s): - if py3compat.PY3: # have bytes not strings, so must decode + if compat.PY3: # have bytes not strings, so must decode null_byte = b"\0" try: s = s[:s.index(null_byte)] @@ -545,7 +545,7 @@ def data(self, convert_dates=True, convert_categoricals=True, index=None): data[col] = data[col].apply(_stata_elapsed_date_to_datetime, args=(self.fmtlist[i],)) if convert_categoricals: - cols = np.where(lmap(lambda x: x in six.iterkeys(self.value_label_dict), self.lbllist))[0] + cols = np.where(lmap(lambda x: x in compat.iterkeys(self.value_label_dict), self.lbllist))[0] for i in cols: col = data.columns[i] labeled_data = np.copy(data[col]) @@ -751,7 +751,7 @@ def _write(self, to_write): """ Helper to call encode before writing to file for Python 3 compat. """ - if py3compat.PY3: + if compat.PY3: self._file.write(to_write.encode(self._encoding)) else: self._file.write(to_write) @@ -907,7 +907,7 @@ def _write_data_dates(self): def _null_terminate(self, s, as_string=False): null_byte = '\x00' - if py3compat.PY3 and not as_string: + if compat.PY3 and not as_string: s += null_byte return s.encode(self._encoding) else: diff --git a/pandas/io/tests/generate_legacy_pickles.py b/pandas/io/tests/generate_legacy_pickles.py index 85052ed2ba1bb..ab08ff505739f 100644 --- a/pandas/io/tests/generate_legacy_pickles.py +++ b/pandas/io/tests/generate_legacy_pickles.py @@ -1,7 +1,7 @@ """ self-contained to write legacy pickle files """ from __future__ import print_function -from pandas.util.py3compat import zip, cPickle as pickle +from pandas.util.compat import zip, cPickle as pickle def _create_sp_series(): diff --git a/pandas/io/tests/test_cparser.py b/pandas/io/tests/test_cparser.py index b3c88611d40b3..d15262bb65dc5 100644 --- a/pandas/io/tests/test_cparser.py +++ b/pandas/io/tests/test_cparser.py @@ -2,7 +2,7 @@ C/Cython ascii file parser tests """ -from pandas.util.py3compat import StringIO, BytesIO +from pandas.util.compat import StringIO, BytesIO, map from datetime import datetime from pandas.util import compat import csv @@ -23,15 +23,13 @@ from pandas.util.testing import (assert_almost_equal, assert_frame_equal, assert_series_equal, network) import pandas.lib as lib -from pandas.util import py3compat +from pandas.util import compat from pandas.lib import Timestamp import pandas.util.testing as tm from pandas.parser import TextReader import pandas.parser as parser -import six -from pandas.util.py3compat import map class TestCParser(unittest.TestCase): diff --git a/pandas/io/tests/test_data.py b/pandas/io/tests/test_data.py index a6ccc56fb6050..1e1267558932e 100644 --- a/pandas/io/tests/test_data.py +++ b/pandas/io/tests/test_data.py @@ -14,7 +14,6 @@ from pandas.util.testing import (assert_series_equal, assert_produces_warning, network, assert_frame_equal) from numpy.testing import assert_array_equal -import six def assert_n_failed_equals_n_null_columns(wngs, obj, cls=SymbolWarning): diff --git a/pandas/io/tests/test_date_converters.py b/pandas/io/tests/test_date_converters.py index 396912c0f5f54..13f03683d9e82 100644 --- a/pandas/io/tests/test_date_converters.py +++ b/pandas/io/tests/test_date_converters.py @@ -1,4 +1,4 @@ -from pandas.util.py3compat import StringIO, BytesIO +from pandas.util.compat import StringIO, BytesIO from datetime import date, datetime import csv import os @@ -19,7 +19,7 @@ from pandas.util.testing import (assert_almost_equal, assert_frame_equal, assert_series_equal, network) import pandas.lib as lib -from pandas.util import py3compat +from pandas.util import compat from pandas.lib import Timestamp import pandas.io.date_converters as conv diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index 7726711def5b2..764c5959eee55 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -1,9 +1,8 @@ # pylint: disable=E1101 -from pandas.util.py3compat import StringIO, BytesIO, PY3 +from pandas.util.compat import StringIO, BytesIO, PY3, u, range, map from datetime import datetime from os.path import split as psplit -from pandas.util.py3compat import range import csv import os import sys @@ -28,7 +27,7 @@ import pandas as pd import pandas.lib as lib -from pandas.util import py3compat +from pandas.util import compat from pandas.lib import Timestamp from pandas.tseries.index import date_range import pandas.tseries.tools as tools @@ -36,8 +35,6 @@ from numpy.testing.decorators import slow from pandas.parser import OverflowError -import six -from pandas.util.py3compat import map def _skip_if_no_xlrd(): try: @@ -710,7 +707,7 @@ def test_to_excel_unicode_filename(self): _skip_if_no_excelsuite() for ext in ['xls', 'xlsx']: - filename = six.u('\u0192u.') + ext + filename = u('\u0192u.') + ext try: f = open(filename, 'wb') diff --git a/pandas/io/tests/test_html.py b/pandas/io/tests/test_html.py index 3c6848d86abe5..09e2c86dd265f 100644 --- a/pandas/io/tests/test_html.py +++ b/pandas/io/tests/test_html.py @@ -1,10 +1,8 @@ from __future__ import print_function import os import re -from pandas.util.py3compat import StringIO from unittest import TestCase import warnings -import six from distutils.version import LooseVersion from pandas.io.common import URLError @@ -14,7 +12,8 @@ import numpy as np from numpy.random import rand from numpy.testing.decorators import slow -from pandas.util.py3compat import map, zip +from pandas.util.compat import map, zip, StringIO +import pandas.util.compat as compat try: from importlib import import_module @@ -45,7 +44,7 @@ def _skip_if_no(module_name): def _skip_if_none_of(module_names): - if isinstance(module_names, six.string_types): + if isinstance(module_names, compat.string_types): _skip_if_no(module_names) if module_names == 'bs4': import bs4 diff --git a/pandas/io/tests/test_json/test_pandas.py b/pandas/io/tests/test_json/test_pandas.py index f27345b917b3b..94138ccbc2c4e 100644 --- a/pandas/io/tests/test_json/test_pandas.py +++ b/pandas/io/tests/test_json/test_pandas.py @@ -2,10 +2,9 @@ # pylint: disable-msg=W0612,E1101 from copy import deepcopy from datetime import datetime, timedelta -from pandas.util.py3compat import range, lrange, StringIO +from pandas.util.compat import range, lrange, StringIO, cPickle as pickle from pandas.util import compat from pandas.io.common import URLError -from pandas.util.py3compat import cPickle as pickle import operator import os import unittest diff --git a/pandas/io/tests/test_json/test_ujson.py b/pandas/io/tests/test_json/test_ujson.py index cbea04ffb46de..6d89daa7eaff1 100644 --- a/pandas/io/tests/test_json/test_ujson.py +++ b/pandas/io/tests/test_json/test_ujson.py @@ -16,12 +16,10 @@ import random import decimal from functools import partial -from pandas.util.py3compat import range, StringIO +from pandas.util.compat import range, zip, StringIO, u from pandas.util import compat import pandas.json as ujson -import six -from pandas.util.py3compat import zip -import pandas.util.py3compat as py3compat +import pandas.util.compat as compat import numpy as np from pandas.util.testing import assert_almost_equal @@ -72,7 +70,7 @@ def helper(expected_output, **encode_kwargs): helper(html_encoded, ensure_ascii=False, encode_html_chars=True) def test_doubleLongIssue(self): - sut = {six.u('a'): -4342969734183514} + sut = {u('a'): -4342969734183514} encoded = json.dumps(sut) decoded = json.loads(encoded) self.assertEqual(sut, decoded) @@ -81,7 +79,7 @@ def test_doubleLongIssue(self): self.assertEqual(sut, decoded) def test_doubleLongDecimalIssue(self): - sut = {six.u('a'): -12345678901234.56789012} + sut = {u('a'): -12345678901234.56789012} encoded = json.dumps(sut) decoded = json.loads(encoded) self.assertEqual(sut, decoded) @@ -91,12 +89,12 @@ def test_doubleLongDecimalIssue(self): def test_encodeDecodeLongDecimal(self): - sut = {six.u('a'): -528656961.4399388} + sut = {u('a'): -528656961.4399388} encoded = ujson.dumps(sut, double_precision=15) ujson.decode(encoded) def test_decimalDecodeTestPrecise(self): - sut = {six.u('a'): 4.56} + sut = {u('a'): 4.56} encoded = ujson.encode(sut) decoded = ujson.decode(encoded, precise_float=True) self.assertEqual(sut, decoded) @@ -112,16 +110,16 @@ def test_encodeDoubleTinyExponential(self): self.assert_(np.allclose(num, ujson.decode(ujson.encode(num)))) def test_encodeDictWithUnicodeKeys(self): - input = {six.u("key1"): six.u("value1"), six.u("key1"): - six.u("value1"), six.u("key1"): six.u("value1"), - six.u("key1"): six.u("value1"), six.u("key1"): - six.u("value1"), six.u("key1"): six.u("value1")} + input = {u("key1"): u("value1"), u("key1"): + u("value1"), u("key1"): u("value1"), + u("key1"): u("value1"), u("key1"): + u("value1"), u("key1"): u("value1")} output = ujson.encode(input) - input = {six.u("بن"): six.u("value1"), six.u("بن"): six.u("value1"), - six.u("بن"): six.u("value1"), six.u("بن"): six.u("value1"), - six.u("بن"): six.u("value1"), six.u("بن"): six.u("value1"), - six.u("بن"): six.u("value1")} + input = {u("بن"): u("value1"), u("بن"): u("value1"), + u("بن"): u("value1"), u("بن"): u("value1"), + u("بن"): u("value1"), u("بن"): u("value1"), + u("بن"): u("value1")} output = ujson.encode(input) pass @@ -370,7 +368,7 @@ def test_encodeToUTF8(self): self.assertEquals(dec, json.loads(enc)) def test_decodeFromUnicode(self): - input = six.u("{\"obj\": 31337}") + input = u("{\"obj\": 31337}") dec1 = ujson.decode(input) dec2 = ujson.decode(str(input)) self.assertEquals(dec1, dec2) @@ -620,7 +618,7 @@ def test_encodeNullCharacter(self): self.assertEquals(output, json.dumps(input)) self.assertEquals(input, ujson.decode(output)) - self.assertEquals('" \\u0000\\r\\n "', ujson.dumps(six.u(" \u0000\r\n "))) + self.assertEquals('" \\u0000\\r\\n "', ujson.dumps(u(" \u0000\r\n "))) pass def test_decodeNullCharacter(self): @@ -779,7 +777,7 @@ def test_decodeNumberWith32bitSignBit(self): def test_encodeBigEscape(self): for x in range(10): - if py3compat.PY3: + if compat.PY3: base = '\u00e5'.encode('utf-8') else: base = "\xc3\xa5" @@ -788,16 +786,16 @@ def test_encodeBigEscape(self): def test_decodeBigEscape(self): for x in range(10): - if py3compat.PY3: + if compat.PY3: base = '\u00e5'.encode('utf-8') else: base = "\xc3\xa5" - quote = py3compat.str_to_bytes("\"") + quote = compat.str_to_bytes("\"") input = quote + (base * 1024 * 1024 * 2) + quote output = ujson.decode(input) def test_toDict(self): - d = {six.u("key"): 31337} + d = {u("key"): 31337} class DictTest: def toDict(self): @@ -1043,16 +1041,16 @@ def testArrayNumpyLabelled(self): output = ujson.loads(ujson.dumps(input), numpy=True, labelled=True) self.assertTrue((np.array([42]) == output[0]).all()) self.assertTrue(output[1] is None) - self.assertTrue((np.array([six.u('a')]) == output[2]).all()) + self.assertTrue((np.array([u('a')]) == output[2]).all()) # py3 is non-determinstic on the ordering...... - if not py3compat.PY3: + if not compat.PY3: input = [{'a': 42, 'b':31}, {'a': 24, 'c': 99}, {'a': 2.4, 'b': 78}] output = ujson.loads(ujson.dumps(input), numpy=True, labelled=True) expectedvals = np.array([42, 31, 24, 99, 2.4, 78], dtype=int).reshape((3,2)) self.assertTrue((expectedvals == output[0]).all()) self.assertTrue(output[1] is None) - self.assertTrue((np.array([six.u('a'), 'b']) == output[2]).all()) + self.assertTrue((np.array([u('a'), 'b']) == output[2]).all()) input = {1: {'a': 42, 'b':31}, 2: {'a': 24, 'c': 99}, 3: {'a': 2.4, 'b': 78}} diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py index eeb34862f0e6a..0f46ffa9092a9 100644 --- a/pandas/io/tests/test_parsers.py +++ b/pandas/io/tests/test_parsers.py @@ -12,7 +12,9 @@ import numpy as np from pandas import DataFrame, Series, Index, MultiIndex, DatetimeIndex -from pandas.util.py3compat import StringIO, BytesIO, PY3, range, long, lrange, lmap +from pandas.util.compat import( + StringIO, BytesIO, PY3, range, long, lrange, lmap, u, map, StringIO +) from pandas.io.common import urlopen, URLError import pandas.io.parsers as parsers from pandas.io.parsers import (read_csv, read_table, read_fwf, @@ -26,7 +28,7 @@ import pandas as pd import pandas.lib as lib -from pandas.util import py3compat +from pandas.util import compat from pandas.lib import Timestamp from pandas.tseries.index import date_range import pandas.tseries.tools as tools @@ -34,8 +36,6 @@ from numpy.testing.decorators import slow from pandas.parser import OverflowError -import six -from pandas.util.py3compat import map class ParserTests(object): @@ -108,12 +108,12 @@ def test_empty_string(self): tm.assert_frame_equal(xp.reindex(columns=df.columns), df) def test_read_csv(self): - if not py3compat.PY3: + if not compat.PY3: if 'win' in sys.platform: - prefix = six.u("file:///") + prefix = u("file:///") else: - prefix = six.u("file://") - fname = prefix + six.text_type(self.csv1) + prefix = u("file://") + fname = prefix + compat.text_type(self.csv1) # it works! df1 = read_csv(fname, index_col=0, parse_dates=True) @@ -315,7 +315,7 @@ def test_multiple_date_cols_with_header(self): KORD,19990127, 23:00:00, 22:56:00, -0.5900, 1.7100, 4.6000, 0.0000, 280.0000""" df = self.read_csv(StringIO(data), parse_dates={'nominal': [1, 2]}) - self.assert_(not isinstance(df.nominal[0], six.string_types)) + self.assert_(not isinstance(df.nominal[0], compat.string_types)) ts_data = """\ ID,date,nominalTime,actualTime,A,B,C,D,E @@ -869,9 +869,9 @@ def test_read_csv_no_index_name(self): tm.assert_frame_equal(df, df2) def test_read_table_unicode(self): - fin = BytesIO(six.u('\u0141aski, Jan;1').encode('utf-8')) + fin = BytesIO(u('\u0141aski, Jan;1').encode('utf-8')) df1 = read_table(fin, sep=";", encoding="utf-8", header=None) - tm.assert_isinstance(df1[0].values[0], six.text_type) + tm.assert_isinstance(df1[0].values[0], compat.text_type) def test_read_table_wrong_num_columns(self): # too few! @@ -1556,13 +1556,13 @@ def test_skipinitialspace(self): def test_utf16_bom_skiprows(self): # #2298 - data = six.u("""skip this + data = u("""skip this skip this too A\tB\tC 1\t2\t3 4\t5\t6""") - data2 = six.u("""skip this + data2 = u("""skip this skip this too A,B,C 1,2,3 @@ -1578,7 +1578,7 @@ def test_utf16_bom_skiprows(self): f.write(bytes) s = BytesIO(dat.encode('utf-8')) - if py3compat.PY3: + if compat.PY3: # somewhat False since the code never sees bytes from io import TextIOWrapper s = TextIOWrapper(s, encoding='utf-8') @@ -1597,7 +1597,7 @@ def test_utf16_example(self): result = self.read_table(path, encoding='utf-16') self.assertEquals(len(result), 50) - if not py3compat.PY3: + if not compat.PY3: buf = BytesIO(open(path, 'rb').read()) result = self.read_table(buf, encoding='utf-16') self.assertEquals(len(result), 50) @@ -1607,7 +1607,6 @@ def test_converters_corner_with_nas(self): if hash(np.int64(-1)) != -2: raise nose.SkipTest - from pandas.util.py3compat import StringIO, lrange, lmap csv = """id,score,days 1,2,12 2,2-5, @@ -1669,7 +1668,7 @@ def test_unicode_encoding(self): result = result.set_index(0) got = result[1][1632] - expected = six.u('\xc1 k\xf6ldum klaka (Cold Fever) (1994)') + expected = u('\xc1 k\xf6ldum klaka (Cold Fever) (1994)') self.assertEquals(got, expected) @@ -1797,7 +1796,7 @@ def test_sniff_delimiter(self): sep=None, skiprows=2) tm.assert_frame_equal(data, data3) - text = six.u("""ignore this + text = u("""ignore this ignore this too index|A|B|C foo|1|2|3 @@ -1806,7 +1805,7 @@ def test_sniff_delimiter(self): """).encode('utf-8') s = BytesIO(text) - if py3compat.PY3: + if compat.PY3: # somewhat False since the code never sees bytes from io import TextIOWrapper s = TextIOWrapper(s, encoding='utf-8') @@ -2371,10 +2370,10 @@ def test_convert_sql_column_strings(self): assert_same_values_and_dtype(result, expected) def test_convert_sql_column_unicode(self): - arr = np.array([six.u('1.5'), None, six.u('3'), six.u('4.2')], + arr = np.array([u('1.5'), None, u('3'), u('4.2')], dtype=object) result = lib.convert_sql_column(arr) - expected = np.array([six.u('1.5'), np.nan, six.u('3'), six.u('4.2')], + expected = np.array([u('1.5'), np.nan, u('3'), u('4.2')], dtype=object) assert_same_values_and_dtype(result, expected) diff --git a/pandas/io/tests/test_pickle.py b/pandas/io/tests/test_pickle.py index 69a52f4489725..55e4756dc9ba9 100644 --- a/pandas/io/tests/test_pickle.py +++ b/pandas/io/tests/test_pickle.py @@ -14,7 +14,7 @@ import pandas as pd from pandas import Index from pandas.sparse.tests import test_sparse -from pandas.util import py3compat +from pandas.util import compat from pandas.util.misc import is_little_endian class TestPickle(unittest.TestCase): @@ -36,7 +36,7 @@ def compare(self, vf): # we are trying to read a py3 pickle in py2..... return except: - if not py3compat.PY3: + if not compat.PY3: raise with open(vf,'rb') as fh: data = pickle.load(fh, encoding='latin1') diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index e10c5ad411bec..ee5b70ccb3646 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -1,5 +1,5 @@ from __future__ import print_function -from pandas.util.py3compat import range, lrange +from pandas.util.compat import range, lrange, u import nose import unittest import os @@ -19,10 +19,9 @@ from pandas.tests.test_series import assert_series_equal from pandas.tests.test_frame import assert_frame_equal from pandas import concat, Timestamp -from pandas.util import py3compat +from pandas.util import compat from numpy.testing.decorators import slow -import six try: import tables @@ -1332,8 +1331,8 @@ def test_unimplemented_dtypes_table_columns(self): l = [('date', datetime.date(2001, 1, 2))] # py3 ok for unicode - if not py3compat.PY3: - l.append(('unicode', six.u('\u03c3'))) + if not compat.PY3: + l.append(('unicode', u('\u03c3'))) ### currently not supported dtypes #### for n, f in l: @@ -2813,7 +2812,7 @@ def test_tseries_indices_frame(self): def test_unicode_index(self): - unicode_values = [six.u('\u03c3'), six.u('\u03c3\u03c3')] + unicode_values = [u('\u03c3'), u('\u03c3\u03c3')] warnings.filterwarnings('ignore', category=PerformanceWarning) s = Series(np.random.randn(len(unicode_values)), unicode_values) self._check_roundtrip(s, tm.assert_series_equal) diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index 5dc719953c15a..8990515cee8c9 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -11,8 +11,8 @@ from pandas.core.datetools import format as date_format from pandas.core.api import DataFrame, isnull -from pandas.util.py3compat import StringIO, range, lrange -import six +from pandas.util.compat import StringIO, range, lrange +import pandas.util.compat as compat import pandas.io.sql as sql import pandas.util.testing as tm @@ -23,8 +23,8 @@ datetime: lambda dt: "'%s'" % date_format(dt), str: lambda x: "'%s'" % x, np.str_: lambda x: "'%s'" % x, - six.text_type: lambda x: "'%s'" % x, - six.binary_type: lambda x: "'%s'" % x, + compat.text_type: lambda x: "'%s'" % x, + compat.binary_type: lambda x: "'%s'" % x, float: lambda x: "%.8f" % x, int: lambda x: "%s" % x, type(None): lambda x: "NULL", diff --git a/pandas/io/tests/test_stata.py b/pandas/io/tests/test_stata.py index 3f21150525f3f..d75de149d6f4b 100644 --- a/pandas/io/tests/test_stata.py +++ b/pandas/io/tests/test_stata.py @@ -13,7 +13,6 @@ from pandas.io.stata import read_stata, StataReader import pandas.util.testing as tm from pandas.util.misc import is_little_endian -import six class StataTests(unittest.TestCase): diff --git a/pandas/io/tests/test_wb.py b/pandas/io/tests/test_wb.py index e1492c13c151d..e85c63d7d5999 100644 --- a/pandas/io/tests/test_wb.py +++ b/pandas/io/tests/test_wb.py @@ -5,22 +5,21 @@ from pandas.util.testing import assert_frame_equal from numpy.testing.decorators import slow from pandas.io.wb import search, download -import six @slow @network def test_wdi_search(): raise nose.SkipTest - expected = {six.u('id'): {2634: six.u('GDPPCKD'), - 4649: six.u('NY.GDP.PCAP.KD'), - 4651: six.u('NY.GDP.PCAP.KN'), - 4653: six.u('NY.GDP.PCAP.PP.KD')}, - six.u('name'): {2634: six.u('GDP per Capita, constant US$, ' + expected = {u('id'): {2634: u('GDPPCKD'), + 4649: u('NY.GDP.PCAP.KD'), + 4651: u('NY.GDP.PCAP.KN'), + 4653: u('NY.GDP.PCAP.PP.KD')}, + u('name'): {2634: u('GDP per Capita, constant US$, ' 'millions'), - 4649: six.u('GDP per capita (constant 2000 US$)'), - 4651: six.u('GDP per capita (constant LCU)'), - 4653: six.u('GDP per capita, PPP (constant 2005 ' + 4649: u('GDP per capita (constant 2000 US$)'), + 4651: u('GDP per capita (constant LCU)'), + 4653: u('GDP per capita, PPP (constant 2005 ' 'international $)')}} result = search('gdp.*capita.*constant').ix[:, :2] expected = pandas.DataFrame(expected) @@ -32,7 +31,7 @@ def test_wdi_search(): @network def test_wdi_download(): raise nose.SkipTest - expected = {'GDPPCKN': {(six.u('United States'), six.u('2003')): six.u('40800.0735367688'), (six.u('Canada'), six.u('2004')): six.u('37857.1261134552'), (six.u('United States'), six.u('2005')): six.u('42714.8594790102'), (six.u('Canada'), six.u('2003')): six.u('37081.4575704003'), (six.u('United States'), six.u('2004')): six.u('41826.1728310667'), (six.u('Mexico'), six.u('2003')): six.u('72720.0691255285'), (six.u('Mexico'), six.u('2004')): six.u('74751.6003347038'), (six.u('Mexico'), six.u('2005')): six.u('76200.2154469437'), (six.u('Canada'), six.u('2005')): six.u('38617.4563629611')}, 'GDPPCKD': {(six.u('United States'), six.u('2003')): six.u('40800.0735367688'), (six.u('Canada'), six.u('2004')): six.u('34397.055116118'), (six.u('United States'), six.u('2005')): six.u('42714.8594790102'), (six.u('Canada'), six.u('2003')): six.u('33692.2812368928'), (six.u('United States'), six.u('2004')): six.u('41826.1728310667'), (six.u('Mexico'), six.u('2003')): six.u('7608.43848670658'), (six.u('Mexico'), six.u('2004')): six.u('7820.99026814334'), (six.u('Mexico'), six.u('2005')): six.u('7972.55364129367'), (six.u('Canada'), six.u('2005')): six.u('35087.8925933298')}} + expected = {'GDPPCKN': {(u('United States'), u('2003')): u('40800.0735367688'), (u('Canada'), u('2004')): u('37857.1261134552'), (u('United States'), u('2005')): u('42714.8594790102'), (u('Canada'), u('2003')): u('37081.4575704003'), (u('United States'), u('2004')): u('41826.1728310667'), (u('Mexico'), u('2003')): u('72720.0691255285'), (u('Mexico'), u('2004')): u('74751.6003347038'), (u('Mexico'), u('2005')): u('76200.2154469437'), (u('Canada'), u('2005')): u('38617.4563629611')}, 'GDPPCKD': {(u('United States'), u('2003')): u('40800.0735367688'), (u('Canada'), u('2004')): u('34397.055116118'), (u('United States'), u('2005')): u('42714.8594790102'), (u('Canada'), u('2003')): u('33692.2812368928'), (u('United States'), u('2004')): u('41826.1728310667'), (u('Mexico'), u('2003')): u('7608.43848670658'), (u('Mexico'), u('2004')): u('7820.99026814334'), (u('Mexico'), u('2005')): u('7972.55364129367'), (u('Canada'), u('2005')): u('35087.8925933298')}} expected = pandas.DataFrame(expected) result = download(country=['CA', 'MX', 'US', 'junk'], indicator=['GDPPCKD', 'GDPPCKN', 'junk'], start=2003, end=2005) diff --git a/pandas/io/wb.py b/pandas/io/wb.py index 4563c0a08cf93..867032cc9c2fd 100644 --- a/pandas/io/wb.py +++ b/pandas/io/wb.py @@ -1,7 +1,6 @@ from __future__ import print_function -from pandas.util.py3compat import map, reduce -from pandas.util.py3compat import range, lrange +from pandas.util.compat import map, reduce, range, lrange from pandas.io.common import urlopen from pandas.io import json import pandas diff --git a/pandas/rpy/common.py b/pandas/rpy/common.py index 66e3e177771dc..54fe50b44bd48 100644 --- a/pandas/rpy/common.py +++ b/pandas/rpy/common.py @@ -4,8 +4,7 @@ """ from __future__ import print_function -from pandas.util.py3compat import zip -from pandas.util.py3compat import range +from pandas.util.compat import zip, range import numpy as np import pandas as pd diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py index 48fa9caa0a05c..7dee8230b3dfe 100644 --- a/pandas/sparse/array.py +++ b/pandas/sparse/array.py @@ -11,7 +11,7 @@ from pandas.core.base import PandasObject import pandas.core.common as com -from pandas.util import py3compat +from pandas.util import compat from pandas._sparse import BlockIndex, IntIndex import pandas._sparse as splib @@ -216,7 +216,7 @@ def disable(self, other): __ipow__ = disable # Python 2 division operators - if not py3compat.PY3: + if not compat.PY3: __div__ = _sparse_op_wrap(operator.div, 'div') __rdiv__ = _sparse_op_wrap(lambda x, y: y / x, '__rdiv__') __idiv__ = disable diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index c889d4c19f915..4505aac4ecd66 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -6,7 +6,7 @@ # pylint: disable=E1101,E1103,W0231,E0202 from numpy import nan -from pandas.util.py3compat import range, lmap +from pandas.util.compat import range, lmap, map from pandas.util import compat import numpy as np @@ -23,7 +23,6 @@ from pandas.sparse.series import SparseSeries from pandas.util.decorators import Appender import pandas.lib as lib -from pandas.util.py3compat import map class _SparseMockBlockManager(object): diff --git a/pandas/sparse/panel.py b/pandas/sparse/panel.py index e16dfdafd4fa2..3f6b5e0d795bb 100644 --- a/pandas/sparse/panel.py +++ b/pandas/sparse/panel.py @@ -5,8 +5,7 @@ # pylint: disable=E1101,E1103,W0231 -from pandas.util.py3compat import range, lrange -from pandas.util.py3compat import zip +from pandas.util.compat import range, lrange, zip from pandas.util import compat import numpy as np @@ -17,7 +16,6 @@ from pandas.util.decorators import deprecate import pandas.core.common as com -import six class SparsePanelAxis(object): @@ -35,7 +33,7 @@ def __set__(self, obj, value): if isinstance(value, MultiIndex): raise NotImplementedError - for v in six.itervalues(obj._frames): + for v in compat.itervalues(obj._frames): setattr(v, self.frame_attr, value) setattr(obj, self.cache_field, value) diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py index 802808954c8f4..866ee5cb150bc 100644 --- a/pandas/sparse/series.py +++ b/pandas/sparse/series.py @@ -17,7 +17,7 @@ import pandas.core.common as com import pandas.core.datetools as datetools -from pandas.util import py3compat +from pandas.util import compat from pandas.sparse.array import (make_sparse, _sparse_array_op, SparseArray) from pandas._sparse import BlockIndex, IntIndex @@ -265,7 +265,7 @@ def __unicode__(self): __rpow__ = _sparse_op_wrap(lambda x, y: y ** x, '__rpow__') # Python 2 division operators - if not py3compat.PY3: + if not compat.PY3: __div__ = _sparse_op_wrap(operator.div, 'div') __rdiv__ = _sparse_op_wrap(lambda x, y: y / x, '__rdiv__') diff --git a/pandas/sparse/tests/test_array.py b/pandas/sparse/tests/test_array.py index 178f8ea8c9ca8..f11632e28c111 100644 --- a/pandas/sparse/tests/test_array.py +++ b/pandas/sparse/tests/test_array.py @@ -1,4 +1,4 @@ -from pandas.util.py3compat import range +from pandas.util.compat import range import re from numpy import nan, ndarray import numpy as np diff --git a/pandas/sparse/tests/test_list.py b/pandas/sparse/tests/test_list.py index 47ad7b0c18d30..8be3026dd403d 100644 --- a/pandas/sparse/tests/test_list.py +++ b/pandas/sparse/tests/test_list.py @@ -1,4 +1,4 @@ -from pandas.util.py3compat import range +from pandas.util.compat import range import unittest from numpy import nan diff --git a/pandas/sparse/tests/test_sparse.py b/pandas/sparse/tests/test_sparse.py index b39ec61f20fe4..6a4280e057538 100644 --- a/pandas/sparse/tests/test_sparse.py +++ b/pandas/sparse/tests/test_sparse.py @@ -22,9 +22,8 @@ import pandas.core.datetools as datetools from pandas.core.common import isnull import pandas.util.testing as tm -from pandas.util.py3compat import range, lrange +from pandas.util.compat import range, lrange, cPickle as pickle, StringIO, lrange from pandas.util import compat -from pandas.util.py3compat import cPickle as pickle import pandas.sparse.frame as spf @@ -36,7 +35,6 @@ import pandas.tests.test_frame as test_frame import pandas.tests.test_panel as test_panel import pandas.tests.test_series as test_series -from pandas.util.py3compat import StringIO, lrange from .test_array import assert_sp_array_equal diff --git a/pandas/src/generate_code.py b/pandas/src/generate_code.py index 040e12922d26c..b94ec6df7c738 100644 --- a/pandas/src/generate_code.py +++ b/pandas/src/generate_code.py @@ -1,7 +1,6 @@ from __future__ import print_function -from pandas.util.py3compat import range +from pandas.util.compat import range, cStringIO as StringIO import os -from pandas.util.py3compat import cStringIO as StringIO header = """ cimport numpy as np diff --git a/pandas/stats/fama_macbeth.py b/pandas/stats/fama_macbeth.py index 9e4e62a07d46d..04dd7e045f4c8 100644 --- a/pandas/stats/fama_macbeth.py +++ b/pandas/stats/fama_macbeth.py @@ -1,7 +1,6 @@ from pandas.core.base import StringMixin -from pandas.util.py3compat import StringIO +from pandas.util.compat import StringIO, range -from pandas.util.py3compat import range import numpy as np from pandas.core.api import Series, DataFrame diff --git a/pandas/stats/math.py b/pandas/stats/math.py index 7a36654a4395f..583c588c9c037 100644 --- a/pandas/stats/math.py +++ b/pandas/stats/math.py @@ -3,7 +3,7 @@ from __future__ import division -from pandas.util.py3compat import range +from pandas.util.compat import range import numpy as np import numpy.linalg as linalg diff --git a/pandas/stats/misc.py b/pandas/stats/misc.py index 00c93e07c77dd..aeeec7068d5e4 100644 --- a/pandas/stats/misc.py +++ b/pandas/stats/misc.py @@ -4,8 +4,7 @@ from pandas.core.api import Series, DataFrame, isnull, notnull from pandas.core.series import remove_na -import six -from pandas.util.py3compat import zip +from pandas.util.compat import zip def zscore(series): diff --git a/pandas/stats/ols.py b/pandas/stats/ols.py index f5ca39d0129c3..9ea85739dca55 100644 --- a/pandas/stats/ols.py +++ b/pandas/stats/ols.py @@ -4,11 +4,8 @@ # pylint: disable-msg=W0201 -from pandas.util.py3compat import zip +from pandas.util.compat import zip, range, StringIO from itertools import starmap -from pandas.util.py3compat import StringIO - -from pandas.util.py3compat import range from pandas.util import compat import numpy as np diff --git a/pandas/stats/plm.py b/pandas/stats/plm.py index fb9f3aadcb8a6..923f1b4272681 100644 --- a/pandas/stats/plm.py +++ b/pandas/stats/plm.py @@ -6,7 +6,7 @@ # pylint: disable-msg=E1101,E1103 from __future__ import division -from pandas.util.py3compat import range +from pandas.util.compat import range from pandas.util import compat import warnings diff --git a/pandas/stats/tests/test_fama_macbeth.py b/pandas/stats/tests/test_fama_macbeth.py index 2e55c3d5af293..6d315ceec511a 100644 --- a/pandas/stats/tests/test_fama_macbeth.py +++ b/pandas/stats/tests/test_fama_macbeth.py @@ -2,7 +2,7 @@ from pandas.stats.api import fama_macbeth from .common import assert_almost_equal, BaseTest -from pandas.util.py3compat import range +from pandas.util.compat import range from pandas.util import compat import numpy as np diff --git a/pandas/stats/tests/test_moments.py b/pandas/stats/tests/test_moments.py index df483aa5872a9..233ca78ce6db0 100644 --- a/pandas/stats/tests/test_moments.py +++ b/pandas/stats/tests/test_moments.py @@ -1,5 +1,3 @@ -from pandas.util.py3compat import range -from pandas.util.py3compat import zip import unittest import nose import sys @@ -13,10 +11,10 @@ from pandas.util.testing import ( assert_almost_equal, assert_series_equal, assert_frame_equal ) -from pandas.util.py3compat import PY3 import pandas.core.datetools as datetools import pandas.stats.moments as mom import pandas.util.testing as tm +from pandas.util.compat import range, zip, PY3, StringIO N, K = 100, 10 @@ -489,7 +487,6 @@ def _check_structures(self, func, static_comp, assert_frame_equal(frame_xp, frame_rs) def test_legacy_time_rule_arg(self): - from pandas.util.py3compat import StringIO # suppress deprecation warnings sys.stderr = StringIO() diff --git a/pandas/stats/tests/test_var.py b/pandas/stats/tests/test_var.py index 99ee9f3bf93a1..1c7eec1264afd 100644 --- a/pandas/stats/tests/test_var.py +++ b/pandas/stats/tests/test_var.py @@ -3,7 +3,7 @@ from pandas.util.testing import assert_almost_equal -from pandas.util.py3compat import range +from pandas.util.compat import range import nose import unittest diff --git a/pandas/stats/var.py b/pandas/stats/var.py index 524098292f148..2337dcf9c9e36 100644 --- a/pandas/stats/var.py +++ b/pandas/stats/var.py @@ -1,7 +1,6 @@ from __future__ import division -from pandas.util.py3compat import range, lrange -from pandas.util.py3compat import zip, reduce +from pandas.util.compat import range, lrange, zip, reduce from pandas.util import compat import numpy as np from pandas.core.base import StringMixin diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 4c832f7850012..af3b56e047765 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1,4 +1,4 @@ -from pandas.util.py3compat import range +from pandas.util.compat import range import unittest import numpy as np diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index b0722c49e99fb..dc60cda24bd60 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -1,7 +1,7 @@ # pylint: disable=E1101,E1103,W0232 from datetime import datetime -from pandas.util.py3compat import range, lrange +from pandas.util.compat import range, lrange import unittest import nose diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index dc4ed02559a19..7001f582efffe 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -1,5 +1,4 @@ from datetime import datetime -from pandas.util.py3compat import range, long, lrange, lmap import sys import re @@ -7,6 +6,7 @@ import unittest from pandas import Series, DataFrame, date_range, DatetimeIndex, Timestamp +from pandas.util.compat import range, long, lrange, lmap, u, map from pandas.core.common import notnull, isnull import pandas.core.common as com import pandas.util.testing as tm @@ -15,9 +15,7 @@ import numpy as np from pandas.tslib import iNaT -from pandas.util import py3compat -import six -from pandas.util.py3compat import map +from pandas.util import compat _multiprocess_can_split_ = True @@ -27,7 +25,7 @@ def test_is_sequence(): assert(is_seq((1, 2))) assert(is_seq([1, 2])) assert(not is_seq("abcd")) - assert(not is_seq(six.u("abcd"))) + assert(not is_seq(u("abcd"))) assert(not is_seq(np.int64)) class A(object): @@ -97,7 +95,7 @@ def test_isnull_lists(): result = isnull(['foo', 'bar']) assert(not result.any()) - result = isnull([six.u('foo'), six.u('bar')]) + result = isnull([u('foo'), u('bar')]) assert(not result.any()) @@ -314,7 +312,7 @@ def test_ensure_platform_int(): # On Python 2, if sys.stdin.encoding is None (IPython with zmq frontend) # common.console_encode should encode things as utf-8. # """ -# if py3compat.PY3: +# if compat.PY3: # raise nose.SkipTest # with tm.stdin_encoding(encoding=None): @@ -335,8 +333,8 @@ def test_is_re(): def test_is_recompilable(): - passes = (r'a', six.u('x'), r'asdf', re.compile('adsf'), - six.u(r'\u2233\s*'), re.compile(r'')) + passes = (r'a', u('x'), r'asdf', re.compile('adsf'), + u(r'\u2233\s*'), re.compile(r'')) fails = 1, [], object() for p in passes: diff --git a/pandas/tests/test_py3compat.py b/pandas/tests/test_compat.py similarity index 96% rename from pandas/tests/test_py3compat.py rename to pandas/tests/test_compat.py index e74b8a86ea6a0..fe5c7590dec0f 100644 --- a/pandas/tests/test_py3compat.py +++ b/pandas/tests/test_compat.py @@ -1,8 +1,8 @@ """ -Testing that functions from py3compat work as expected +Testing that functions from compat work as expected """ -from pandas.util.py3compat import ( +from pandas.util.compat import ( range, zip, map, filter, lrange, lzip, lmap, lfilter, builtins diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index 018440dd09f87..8cfffaacc1058 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -17,7 +17,7 @@ from pandas.util.testing import (assert_almost_equal, assert_series_equal, assert_frame_equal) -from pandas.util import py3compat +from pandas.util import compat import pandas.util.testing as tm import pandas.lib as lib @@ -55,7 +55,7 @@ def tearDown(self): def run_arithmetic_test(self, df, assert_func, check_dtype=False): expr._MIN_ELEMENTS = 0 operations = ['add', 'sub', 'mul','mod','truediv','floordiv','pow'] - if not py3compat.PY3: + if not compat.PY3: operations.append('div') for arith in operations: op = getattr(operator, arith) diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index fdd11b7bd6645..7cd484f50d4c3 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -1,7 +1,8 @@ from __future__ import print_function # -*- coding: utf-8 -*- -from pandas.util.py3compat import range, zip, lrange, StringIO, PY3, lzip +from pandas.util.compat import range, zip, lrange, StringIO, PY3, lzip, u +import pandas.util.compat as compat import os import sys import unittest @@ -21,7 +22,6 @@ import pandas as pd from pandas.core.config import (set_option, get_option, option_context, reset_option) -import six _frame = DataFrame(tm.getSeriesData()) @@ -151,9 +151,9 @@ def test_repr_should_return_str(self): data = [8, 5, 3, 5] - index1 = [six.u("\u03c3"), six.u("\u03c4"), six.u("\u03c5"), - six.u("\u03c6")] - cols = [six.u("\u03c8")] + index1 = [u("\u03c3"), u("\u03c4"), u("\u03c5"), + u("\u03c6")] + cols = [u("\u03c8")] df = DataFrame(data, columns=cols, index=index1) self.assertTrue(type(df.__repr__() == str)) # both py2 / 3 @@ -245,7 +245,7 @@ def mkframe(n): def test_to_string_repr_unicode(self): buf = StringIO() - unicode_values = [six.u('\u03c3')] * 10 + unicode_values = [u('\u03c3')] * 10 unicode_values = np.array(unicode_values, dtype=object) df = DataFrame({'unicode': unicode_values}) df.to_string(col_space=10, buf=buf) @@ -253,7 +253,7 @@ def test_to_string_repr_unicode(self): # it works! repr(df) - idx = Index(['abc', six.u('\u03c3a'), 'aegdvg']) + idx = Index(['abc', u('\u03c3a'), 'aegdvg']) ser = Series(np.random.randn(len(idx)), idx) rs = repr(ser).split('\n') line_len = len(rs[0]) @@ -274,7 +274,7 @@ def test_to_string_repr_unicode(self): sys.stdin = _stdin def test_to_string_unicode_columns(self): - df = DataFrame({six.u('\u03c3'): np.arange(10.)}) + df = DataFrame({u('\u03c3'): np.arange(10.)}) buf = StringIO() df.to_string(buf=buf) @@ -285,17 +285,17 @@ def test_to_string_unicode_columns(self): buf.getvalue() result = self.frame.to_string() - tm.assert_isinstance(result, six.text_type) + tm.assert_isinstance(result, compat.text_type) def test_to_string_utf8_columns(self): - n = six.u("\u05d0").encode('utf-8') + n = u("\u05d0").encode('utf-8') with option_context('display.max_rows', 1): df = pd.DataFrame([1, 2], columns=[n]) repr(df) def test_to_string_unicode_two(self): - dm = DataFrame({six.u('c/\u03c3'): []}) + dm = DataFrame({u('c/\u03c3'): []}) buf = StringIO() dm.to_string(buf) @@ -322,17 +322,17 @@ def test_to_string_with_formatters(self): self.assertEqual(result, result2) def test_to_string_with_formatters_unicode(self): - df = DataFrame({six.u('c/\u03c3'): [1, 2, 3]}) - result = df.to_string(formatters={six.u('c/\u03c3'): + df = DataFrame({u('c/\u03c3'): [1, 2, 3]}) + result = df.to_string(formatters={u('c/\u03c3'): lambda x: '%s' % x}) - self.assertEqual(result, six.u(' c/\u03c3\n') + + self.assertEqual(result, u(' c/\u03c3\n') + '0 1\n1 2\n2 3') def test_to_string_buffer_all_unicode(self): buf = StringIO() - empty = DataFrame({six.u('c/\u03c3'): Series()}) - nonempty = DataFrame({six.u('c/\u03c3'): Series([1, 2, 3])}) + empty = DataFrame({u('c/\u03c3'): Series()}) + nonempty = DataFrame({u('c/\u03c3'): Series([1, 2, 3])}) print(empty, file=buf) print(nonempty, file=buf) @@ -373,9 +373,9 @@ def test_to_html_with_empty_string_label(self): def test_to_html_unicode(self): # it works! - df = DataFrame({six.u('\u03c3'): np.arange(10.)}) + df = DataFrame({u('\u03c3'): np.arange(10.)}) df.to_html() - df = DataFrame({'A': [six.u('\u03c3')]}) + df = DataFrame({'A': [u('\u03c3')]}) df.to_html() def test_to_html_escaped(self): @@ -699,8 +699,8 @@ def test_nonunicode_nonascii_alignment(self): self.assert_(len(lines[1]) == len(lines[2])) def test_unicode_problem_decoding_as_ascii(self): - dm = DataFrame({six.u('c/\u03c3'): Series({'test': np.NaN})}) - six.text_type(dm.to_string()) + dm = DataFrame({u('c/\u03c3'): Series({'test': np.NaN})}) + compat.text_type(dm.to_string()) def test_string_repr_encoding(self): filepath = tm.get_data_path('unicode_series.csv') @@ -768,24 +768,24 @@ def test_pprint_thing(self): if PY3: raise nose.SkipTest() - self.assertEquals(pp_t('a') , six.u('a')) - self.assertEquals(pp_t(six.u('a')) , six.u('a')) + self.assertEquals(pp_t('a') , u('a')) + self.assertEquals(pp_t(u('a')) , u('a')) self.assertEquals(pp_t(None) , 'None') - self.assertEquals(pp_t(six.u('\u05d0'), quote_strings=True), - six.u("u'\u05d0'")) - self.assertEquals(pp_t(six.u('\u05d0'), quote_strings=False), - six.u('\u05d0')) - self.assertEquals(pp_t((six.u('\u05d0'), - six.u('\u05d1')), quote_strings=True), - six.u("(u'\u05d0', u'\u05d1')")) - self.assertEquals(pp_t((six.u('\u05d0'), (six.u('\u05d1'), - six.u('\u05d2'))), + self.assertEquals(pp_t(u('\u05d0'), quote_strings=True), + u("u'\u05d0'")) + self.assertEquals(pp_t(u('\u05d0'), quote_strings=False), + u('\u05d0')) + self.assertEquals(pp_t((u('\u05d0'), + u('\u05d1')), quote_strings=True), + u("(u'\u05d0', u'\u05d1')")) + self.assertEquals(pp_t((u('\u05d0'), (u('\u05d1'), + u('\u05d2'))), quote_strings=True), - six.u("(u'\u05d0', (u'\u05d1', u'\u05d2'))")) - self.assertEquals(pp_t(('foo', six.u('\u05d0'), (six.u('\u05d0'), - six.u('\u05d0'))), + u("(u'\u05d0', (u'\u05d1', u'\u05d2'))")) + self.assertEquals(pp_t(('foo', u('\u05d0'), (u('\u05d0'), + u('\u05d0'))), quote_strings=True), - six.u("(u'foo', u'\u05d0', (u'\u05d0', u'\u05d0'))")) + u("(u'foo', u'\u05d0', (u'\u05d0', u'\u05d0'))")) # escape embedded tabs in string # GH #2038 @@ -927,13 +927,13 @@ def test_index_with_nan(self): # multi-index y = df.set_index(['id1', 'id2', 'id3']) result = y.to_string() - expected = six.u(' value\nid1 id2 id3 \n1a3 NaN 78d 123\n9h4 d67 79d 64') + expected = u(' value\nid1 id2 id3 \n1a3 NaN 78d 123\n9h4 d67 79d 64') self.assert_(result == expected) # index y = df.set_index('id2') result = y.to_string() - expected = six.u(' id1 id3 value\nid2 \nNaN 1a3 78d 123\nd67 9h4 79d 64') + expected = u(' id1 id3 value\nid2 \nNaN 1a3 78d 123\nd67 9h4 79d 64') self.assert_(result == expected) # all-nan in mi @@ -941,7 +941,7 @@ def test_index_with_nan(self): df2.ix[:,'id2'] = np.nan y = df2.set_index('id2') result = y.to_string() - expected = six.u(' id1 id3 value\nid2 \nNaN 1a3 78d 123\nNaN 9h4 79d 64') + expected = u(' id1 id3 value\nid2 \nNaN 1a3 78d 123\nNaN 9h4 79d 64') self.assert_(result == expected) # partial nan in mi @@ -949,7 +949,7 @@ def test_index_with_nan(self): df2.ix[:,'id2'] = np.nan y = df2.set_index(['id2','id3']) result = y.to_string() - expected = six.u(' id1 value\nid2 id3 \nNaN 78d 1a3 123\n 79d 9h4 64') + expected = u(' id1 value\nid2 id3 \nNaN 78d 1a3 123\n 79d 9h4 64') self.assert_(result == expected) df = DataFrame({'id1': {0: np.nan, 1: '9h4'}, 'id2': {0: np.nan, 1: 'd67'}, @@ -957,7 +957,7 @@ def test_index_with_nan(self): y = df.set_index(['id1','id2','id3']) result = y.to_string() - expected = six.u(' value\nid1 id2 id3 \nNaN NaN NaN 123\n9h4 d67 79d 64') + expected = u(' value\nid1 id2 id3 \nNaN NaN NaN 123\n9h4 d67 79d 64') self.assert_(result == expected) def test_to_string(self): @@ -978,7 +978,7 @@ def test_to_string(self): self.assert_(retval is None) self.assertEqual(buf.getvalue(), s) - tm.assert_isinstance(s, six.string_types) + tm.assert_isinstance(s, compat.string_types) # print in right order result = biggie.to_string(columns=['B', 'A'], col_space=17, @@ -1118,8 +1118,8 @@ def test_to_string_float_index(self): def test_to_string_ascii_error(self): data = [('0 ', - six.u(' .gitignore '), - six.u(' 5 '), + u(' .gitignore '), + u(' 5 '), ' \xe2\x80\xa2\xe2\x80\xa2\xe2\x80' '\xa2\xe2\x80\xa2\xe2\x80\xa2')] df = DataFrame(data) @@ -1207,7 +1207,7 @@ def test_to_html(self): self.assert_(retval is None) self.assertEqual(buf.getvalue(), s) - tm.assert_isinstance(s, six.string_types) + tm.assert_isinstance(s, compat.string_types) biggie.to_html(columns=['B', 'A'], col_space=17) biggie.to_html(columns=['B', 'A'], @@ -1542,10 +1542,10 @@ def setUp(self): self.ts = tm.makeTimeSeries() def test_repr_unicode(self): - s = Series([six.u('\u03c3')] * 10) + s = Series([u('\u03c3')] * 10) repr(s) - a = Series([six.u("\u05d0")] * 1000) + a = Series([u("\u05d0")] * 1000) a.name = 'title1' repr(a) @@ -1589,16 +1589,16 @@ def test_freq_name_separation(self): def test_to_string_mixed(self): s = Series(['foo', np.nan, -1.23, 4.56]) result = s.to_string() - expected = (six.u('0 foo\n') + - six.u('1 NaN\n') + - six.u('2 -1.23\n') + - six.u('3 4.56')) + expected = (u('0 foo\n') + + u('1 NaN\n') + + u('2 -1.23\n') + + u('3 4.56')) self.assertEqual(result, expected) # but don't count NAs as floats s = Series(['foo', np.nan, 'bar', 'baz']) result = s.to_string() - expected = (six.u('0 foo\n') + + expected = (u('0 foo\n') + '1 NaN\n' + '2 bar\n' + '3 baz') @@ -1606,7 +1606,7 @@ def test_to_string_mixed(self): s = Series(['foo', 5, 'bar', 'baz']) result = s.to_string() - expected = (six.u('0 foo\n') + + expected = (u('0 foo\n') + '1 5\n' + '2 bar\n' + '3 baz') @@ -1617,7 +1617,7 @@ def test_to_string_float_na_spacing(self): s[::2] = np.nan result = s.to_string() - expected = (six.u('0 NaN\n') + + expected = (u('0 NaN\n') + '1 1.5678\n' + '2 NaN\n' + '3 -3.0000\n' + @@ -1625,8 +1625,8 @@ def test_to_string_float_na_spacing(self): self.assertEqual(result, expected) def test_unicode_name_in_footer(self): - s = Series([1, 2], name=six.u('\u05e2\u05d1\u05e8\u05d9\u05ea')) - sf = fmt.SeriesFormatter(s, name=six.u('\u05e2\u05d1\u05e8\u05d9\u05ea')) + s = Series([1, 2], name=u('\u05e2\u05d1\u05e8\u05d9\u05ea')) + sf = fmt.SeriesFormatter(s, name=u('\u05e2\u05d1\u05e8\u05d9\u05ea')) sf._get_footer() # should not raise exception def test_float_trim_zeros(self): @@ -1920,7 +1920,7 @@ def test_rounding(self): formatter = fmt.EngFormatter(accuracy=3, use_eng_prefix=True) result = formatter(0) - self.assertEqual(result, six.u(' 0.000')) + self.assertEqual(result, u(' 0.000')) def _three_digit_exp(): diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 9104a2140c5cd..25397c09cb135 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -7,13 +7,11 @@ import unittest import nose -from pandas.util import py3compat -from pandas.util.py3compat import cPickle as pickle -from pandas.util.py3compat import StringIO, range, long, lrange, lmap, lzip -from pandas.util.compat import OrderedDict +from pandas.util.compat import( + map, zip, range, long, lrange, lmap, lzip, + OrderedDict, cPickle as pickle, u, StringIO +) from pandas.util import compat -import six -from pandas.util.py3compat import map, zip from numpy import random, nan from numpy.random import randn @@ -63,7 +61,7 @@ def _check_mixed_float(df, dtype = None): # float16 are most likely to be upcasted to float32 dtypes = dict(A = 'float32', B = 'float32', C = 'float16', D = 'float64') - if isinstance(dtype, six.string_types): + if isinstance(dtype, compat.string_types): dtypes = dict([ (k,dtype) for k, v in dtypes.items() ]) elif isinstance(dtype, dict): dtypes.update(dtype) @@ -78,7 +76,7 @@ def _check_mixed_float(df, dtype = None): def _check_mixed_int(df, dtype = None): dtypes = dict(A = 'int32', B = 'uint64', C = 'uint8', D = 'int64') - if isinstance(dtype, six.string_types): + if isinstance(dtype, compat.string_types): dtypes = dict([ (k,dtype) for k, v in dtypes.items() ]) elif isinstance(dtype, dict): dtypes.update(dtype) @@ -3843,7 +3841,7 @@ def test_repr_unsortable(self): warnings.filters = warn_filters def test_repr_unicode(self): - uval = six.u('\u03c3\u03c3\u03c3\u03c3') + uval = u('\u03c3\u03c3\u03c3\u03c3') bval = uval.encode('utf-8') df = DataFrame({'A': [uval, uval]}) @@ -3856,16 +3854,16 @@ def test_repr_unicode(self): self.assertEqual(result.split('\n')[0].rstrip(), ex_top) def test_unicode_string_with_unicode(self): - df = DataFrame({'A': [six.u("\u05d0")]}) + df = DataFrame({'A': [u("\u05d0")]}) - if py3compat.PY3: + if compat.PY3: str(df) else: - six.text_type(df) + compat.text_type(df) def test_bytestring_with_unicode(self): - df = DataFrame({'A': [six.u("\u05d0")]}) - if py3compat.PY3: + df = DataFrame({'A': [u("\u05d0")]}) + if compat.PY3: bytes(df) else: str(df) @@ -4144,7 +4142,7 @@ def _check_unary_op(op): _check_unary_op(operator.neg) def test_logical_typeerror(self): - if py3compat.PY3: + if compat.PY3: pass else: self.assertRaises(TypeError, self.frame.__eq__, 'foo') @@ -4823,7 +4821,7 @@ def _do_test(df,path,r_dtype=None,c_dtype=None,rnlvl=None,cnlvl=None, recons = DataFrame.from_csv(path,header=0,parse_dates=False) def _to_uni(x): - if not isinstance(x, six.text_type): + if not isinstance(x, compat.text_type): return x.decode('utf8') return x if dupe_col: @@ -5282,7 +5280,7 @@ def test_to_csv_bug(self): def test_to_csv_unicode(self): - df = DataFrame({six.u('c/\u03c3'): [1, 2, 3]}) + df = DataFrame({u('c/\u03c3'): [1, 2, 3]}) with ensure_clean() as path: df.to_csv(path, encoding='UTF-8') @@ -5296,10 +5294,10 @@ def test_to_csv_unicode(self): def test_to_csv_unicode_index_col(self): buf = StringIO('') df = DataFrame( - [[six.u("\u05d0"), "d2", "d3", "d4"], ["a1", "a2", "a3", "a4"]], - columns=[six.u("\u05d0"), - six.u("\u05d1"), six.u("\u05d2"), six.u("\u05d3")], - index=[six.u("\u05d0"), six.u("\u05d1")]) + [[u("\u05d0"), "d2", "d3", "d4"], ["a1", "a2", "a3", "a4"]], + columns=[u("\u05d0"), + u("\u05d1"), u("\u05d2"), u("\u05d3")], + index=[u("\u05d0"), u("\u05d1")]) df.to_csv(buf, encoding='UTF-8') buf.seek(0) @@ -8311,7 +8309,7 @@ def test_filter(self): self.assert_('foo' in filtered) # unicode columns, won't ascii-encode - df = self.frame.rename(columns={'B': six.u('\u2202')}) + df = self.frame.rename(columns={'B': u('\u2202')}) filtered = df.filter(like='C') self.assertTrue('C' in filtered) diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py index c03041e390ee7..4364e741c3b65 100644 --- a/pandas/tests/test_graphics.py +++ b/pandas/tests/test_graphics.py @@ -6,7 +6,7 @@ from datetime import datetime, date from pandas import Series, DataFrame, MultiIndex, PeriodIndex, date_range -from pandas.util.py3compat import range, lrange, StringIO, lmap, lzip +from pandas.util.compat import range, lrange, StringIO, lmap, lzip, u, map, zip import pandas.util.testing as tm from pandas.util.testing import ensure_clean from pandas.core.config import set_option @@ -18,9 +18,6 @@ from numpy.testing import assert_array_equal from numpy.testing.decorators import slow import pandas.tools.plotting as plotting -import six -from pandas.util.py3compat import map -from pandas.util.py3compat import zip def _skip_if_no_scipy(): @@ -337,21 +334,21 @@ def test_plot(self): _check_plot_works(df.plot, use_index=True) # unicode - index = MultiIndex.from_tuples([(six.u('\u03b1'), 0), - (six.u('\u03b1'), 1), - (six.u('\u03b2'), 2), - (six.u('\u03b2'), 3), - (six.u('\u03b3'), 4), - (six.u('\u03b3'), 5), - (six.u('\u03b4'), 6), - (six.u('\u03b4'), 7)], names=['i0', 'i1']) - columns = MultiIndex.from_tuples([('bar', six.u('\u0394')), - ('bar', six.u('\u0395'))], names=['c0', + index = MultiIndex.from_tuples([(u('\u03b1'), 0), + (u('\u03b1'), 1), + (u('\u03b2'), 2), + (u('\u03b2'), 3), + (u('\u03b3'), 4), + (u('\u03b3'), 5), + (u('\u03b4'), 6), + (u('\u03b4'), 7)], names=['i0', 'i1']) + columns = MultiIndex.from_tuples([('bar', u('\u0394')), + ('bar', u('\u0395'))], names=['c0', 'c1']) df = DataFrame(np.random.randint(0, 10, (8, 2)), columns=columns, index=index) - _check_plot_works(df.plot, title=six.u('\u03A3')) + _check_plot_works(df.plot, title=u('\u03A3')) def test_nonnumeric_exclude(self): import matplotlib.pyplot as plt diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 005babf6f3416..58b7d808eedc3 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -1,7 +1,4 @@ from __future__ import print_function -from pandas.util.py3compat import range, long, lrange, StringIO, lmap, lzip -from pandas.util import compat -from pandas.util.py3compat import map, zip, builtins import nose import unittest @@ -16,6 +13,10 @@ from pandas.core.series import Series from pandas.util.testing import (assert_panel_equal, assert_frame_equal, assert_series_equal, assert_almost_equal) +from pandas.util.compat import( + range, long, lrange, StringIO, lmap, lzip, map, zip, builtins, OrderedDict +) +from pandas.util import compat from pandas.core.panel import Panel from pandas.tools.merge import concat from collections import defaultdict @@ -443,7 +444,6 @@ def test_groups(self): self.assert_((self.df.ix[v]['B'] == k[1]).all()) def test_aggregate_str_func(self): - from pandas.util.compat import OrderedDict def _check_results(grouped): # single series @@ -1068,7 +1068,6 @@ def _check_op(op): assert_series_equal(result, expected) def test_groupby_as_index_agg(self): - from pandas.util.compat import OrderedDict grouped = self.df.groupby('A', as_index=False) # single-key @@ -2238,7 +2237,6 @@ def test_agg_multiple_functions_too_many_lambdas(self): def test_more_flexible_frame_multi_function(self): from pandas import concat - from pandas.util.compat import OrderedDict grouped = self.df.groupby('A') @@ -2277,7 +2275,6 @@ def bar(x): def test_multi_function_flexible_mix(self): # GH #1268 - from pandas.util.compat import OrderedDict grouped = self.df.groupby('A') d = OrderedDict([['C', OrderedDict([['foo', 'mean'], diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 4dae4378c7df4..46fd7f2186c33 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -1,7 +1,7 @@ # pylint: disable=E1101,E1103,W0232 from datetime import datetime, timedelta -from pandas.util.py3compat import range, lrange, lzip +from pandas.util.compat import range, lrange, lzip, u, zip import operator import pickle import unittest @@ -13,7 +13,7 @@ from pandas.core.index import Index, Int64Index, MultiIndex from pandas.util.testing import assert_almost_equal -from pandas.util import py3compat +from pandas.util import compat import pandas.util.testing as tm import pandas.core.config as cf @@ -23,8 +23,6 @@ import pandas as pd from pandas.lib import Timestamp -import six -from pandas.util.py3compat import zip class TestIndex(unittest.TestCase): @@ -371,13 +369,13 @@ def test_format(self): # 2845 index = Index([1, 2.0+3.0j, np.nan]) formatted = index.format() - expected = [str(index[0]), str(index[1]), six.u('NaN')] + expected = [str(index[0]), str(index[1]), u('NaN')] self.assertEquals(formatted, expected) # is this really allowed? index = Index([1, 2.0+3.0j, None]) formatted = index.format() - expected = [str(index[0]), str(index[1]), six.u('NaN')] + expected = [str(index[0]), str(index[1]), u('NaN')] self.assertEquals(formatted, expected) self.strIndex[:0].format() @@ -900,7 +898,7 @@ def test_int_name_format(self): def test_print_unicode_columns(self): df = pd.DataFrame( - {six.u("\u05d0"): [1, 2, 3], "\u05d1": [4, 5, 6], "c": [7, 8, 9]}) + {u("\u05d0"): [1, 2, 3], "\u05d1": [4, 5, 6], "c": [7, 8, 9]}) repr(df.columns) # should not raise UnicodeDecodeError def test_repr_summary(self): @@ -912,14 +910,14 @@ def test_repr_summary(self): def test_unicode_string_with_unicode(self): idx = Index(lrange(1000)) - if py3compat.PY3: + if compat.PY3: str(idx) else: - six.text_type(idx) + compat.text_type(idx) def test_bytestring_with_unicode(self): idx = Index(lrange(1000)) - if py3compat.PY3: + if compat.PY3: bytes(idx) else: str(idx) @@ -1065,7 +1063,7 @@ def test_pickle(self): self.assert_(self.index.equals(unpickled)) def test_legacy_pickle(self): - if py3compat.PY3: + if compat.PY3: raise nose.SkipTest def curpath(): @@ -1775,24 +1773,24 @@ def test_tolist(self): self.assertEqual(result, exp) def test_repr_with_unicode_data(self): - d = {"a": [six.u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} + d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} index = pd.DataFrame(d).set_index(["a", "b"]).index self.assertFalse("\\u" in repr(index)) # we don't want unicode-escaped def test_unicode_string_with_unicode(self): - d = {"a": [six.u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} + d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} idx = pd.DataFrame(d).set_index(["a", "b"]).index - if py3compat.PY3: + if compat.PY3: str(idx) else: - six.text_type(idx) + compat.text_type(idx) def test_bytestring_with_unicode(self): - d = {"a": [six.u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} + d = {"a": [u("\u05d0"), 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]} idx = pd.DataFrame(d).set_index(["a", "b"]).index - if py3compat.PY3: + if compat.PY3: bytes(idx) else: str(idx) diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index a053b43f17dc6..f2b22a4d9b3d1 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -3,7 +3,7 @@ import nose import itertools -from pandas.util.py3compat import range, lrange, StringIO, lmap +from pandas.util.compat import range, lrange, StringIO, lmap, map from numpy import random, nan from numpy.random import randn import numpy as np @@ -15,13 +15,12 @@ MultiIndex, DatetimeIndex, Timestamp) from pandas.util.testing import (assert_almost_equal, assert_series_equal, assert_frame_equal, assert_panel_equal) -from pandas.util import py3compat +from pandas.util import compat import pandas.util.testing as tm import pandas.lib as lib from pandas import date_range from numpy.testing.decorators import slow -from pandas.util.py3compat import map _verbose = False diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py index 9d2439b7cead5..2490fa211f6bf 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/test_internals.py @@ -11,8 +11,7 @@ from pandas.util.testing import ( assert_almost_equal, assert_frame_equal, randn) -import six -from pandas.util.py3compat import zip +from pandas.util.compat import zip, u def assert_block_equal(left, right): @@ -201,7 +200,7 @@ def test_unicode_repr(self): mat = np.empty((N, 2), dtype=object) mat[:, 0] = 'foo' mat[:, 1] = 'bar' - cols = ['b', six.u("\u05d0")] + cols = ['b', u("\u05d0")] str_repr = repr(make_block(mat.T, cols, TEST_COLS)) def test_get(self): @@ -537,7 +536,7 @@ def test_get_numeric_data(self): def test_missing_unicode_key(self): df = DataFrame({"a": [1]}) try: - df.ix[:, six.u("\u05d0")] # should not raise UnicodeEncodeError + df.ix[:, u("\u05d0")] # should not raise UnicodeEncodeError except KeyError: pass # this is the expected exception diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index ea2ab8a1d914a..a98b613aed746 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -13,13 +13,11 @@ assert_frame_equal) import pandas.core.common as com import pandas.util.testing as tm -from pandas.util.py3compat import range, lrange, StringIO, lzip -from pandas.util.compat import product as cart_product +from pandas.util.compat import (range, lrange, StringIO, lzip, u, cPickle, + product as cart_product, zip) import pandas as pd import pandas.index as _index -import six -from pandas.util.py3compat import zip, cPickle class TestMultiLevel(unittest.TestCase): @@ -430,7 +428,6 @@ def test_xs_level(self): def test_xs_level_multiple(self): from pandas import read_table - from pandas.util.py3compat import StringIO, lrange, lzip text = """ A B C D E one two three four a b 10.0032 5 -0.5109 -2.3358 -0.4645 0.05076 0.3640 @@ -455,7 +452,6 @@ def test_xs_level_multiple(self): def test_xs_level0(self): from pandas import read_table - from pandas.util.py3compat import StringIO, lrange, lzip text = """ A B C D E one two three four a b 10.0032 5 -0.5109 -2.3358 -0.4645 0.05076 0.3640 @@ -1674,7 +1670,7 @@ def test_drop_preserve_names(self): self.assert_(result.index.names == ['one', 'two']) def test_unicode_repr_issues(self): - levels = [Index([six.u('a/\u03c3'), six.u('b/\u03c3'), six.u('c/\u03c3')]), + levels = [Index([u('a/\u03c3'), u('b/\u03c3'), u('c/\u03c3')]), Index([0, 1])] labels = [np.arange(3).repeat(2), np.tile(np.arange(2), 3)] index = MultiIndex(levels=levels, labels=labels) @@ -1686,7 +1682,7 @@ def test_unicode_repr_issues(self): def test_unicode_repr_level_names(self): index = MultiIndex.from_tuples([(0, 0), (1, 1)], - names=[six.u('\u0394'), 'i1']) + names=[u('\u0394'), 'i1']) s = Series(lrange(2), index=index) df = DataFrame(np.random.randn(2, 4), index=index) diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 5fdb487806c93..38117a591d849 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1,7 +1,7 @@ # pylint: disable=W0612,E1101 from datetime import datetime -from pandas.util.py3compat import range, lrange, StringIO +from pandas.util.compat import range, lrange, StringIO, cPickle, OrderedDict from pandas.util import compat import operator import unittest @@ -15,8 +15,7 @@ from pandas.core.panel import Panel from pandas.core.series import remove_na import pandas.core.common as com -from pandas.util import py3compat -from pandas.util.py3compat import cPickle +from pandas.util import compat from pandas.util.testing import (assert_panel_equal, assert_frame_equal, @@ -311,7 +310,7 @@ def check_op(op, name): check_op(operator.add, 'add') check_op(operator.sub, 'subtract') check_op(operator.mul, 'multiply') - if py3compat.PY3: + if compat.PY3: check_op(operator.truediv, 'divide') else: check_op(operator.div, 'divide') @@ -916,7 +915,6 @@ def test_constructor_dict_mixed(self): self.assertRaises(Exception, Panel, data) def test_ctor_orderedDict(self): - from pandas.util.compat import OrderedDict keys = list(set(np.random.randint(0,5000,100)))[:50] # unique random int keys d = OrderedDict([(k,mkdf(10,5)) for k in keys]) p = Panel(d) diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index 8f5d7641c02db..f1b9bc645d2ba 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -1,5 +1,5 @@ from datetime import datetime -from pandas.util.py3compat import range, lrange +from pandas.util.compat import range, lrange import os import operator import unittest @@ -15,7 +15,7 @@ from pandas.core.series import remove_na import pandas.core.common as com import pandas.core.panel as panelmod -from pandas.util import py3compat +from pandas.util import compat from pandas.util.testing import (assert_panel_equal, assert_panel4d_equal, diff --git a/pandas/tests/test_panelnd.py b/pandas/tests/test_panelnd.py index 5675cfec58678..452fd2470204f 100644 --- a/pandas/tests/test_panelnd.py +++ b/pandas/tests/test_panelnd.py @@ -9,7 +9,7 @@ from pandas.core import panelnd from pandas.core.panel import Panel import pandas.core.common as com -from pandas.util import py3compat +from pandas.util import compat from pandas.util.testing import (assert_panel_equal, assert_panel4d_equal, diff --git a/pandas/tests/test_reshape.py b/pandas/tests/test_reshape.py index d0d5f260ea0c7..e285d97642b93 100644 --- a/pandas/tests/test_reshape.py +++ b/pandas/tests/test_reshape.py @@ -15,9 +15,7 @@ from pandas.core.reshape import melt, convert_dummies, lreshape import pandas.util.testing as tm -from pandas.util.py3compat import StringIO -from pandas.util.py3compat import range -from pandas.util.py3compat import cPickle +from pandas.util.compat import StringIO, cPickle, range _multiprocess_can_split_ = True diff --git a/pandas/tests/test_rplot.py b/pandas/tests/test_rplot.py index 95ef66eb8cb83..176ffa23108b3 100644 --- a/pandas/tests/test_rplot.py +++ b/pandas/tests/test_rplot.py @@ -1,4 +1,4 @@ -from pandas.util.py3compat import range +from pandas.util.compat import range import unittest import pandas.tools.rplot as rplot import pandas.util.testing as tm diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 5e23efca2c5f8..3ab924312ac28 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -23,14 +23,12 @@ import pandas.core.datetools as datetools import pandas.core.nanops as nanops -from pandas.util.py3compat import StringIO, lrange, range, zip +from pandas.util.compat import StringIO, lrange, range, zip, u, OrderedDict from pandas.util import compat -from pandas.util import py3compat from pandas.util.testing import (assert_series_equal, assert_almost_equal, ensure_clean) import pandas.util.testing as tm -import six def _skip_if_no_scipy(): @@ -519,7 +517,6 @@ def test_constructor_subclass_dict(self): def test_orderedDict_ctor(self): # GH3283 - from pandas.util.compat import OrderedDict import pandas, random data = OrderedDict([('col%s' % i, random.random()) for i in range(12)]) s = pandas.Series(data) @@ -527,7 +524,6 @@ def test_orderedDict_ctor(self): def test_orderedDict_subclass_ctor(self): # GH3283 - from pandas.util.compat import OrderedDict import pandas, random class A(OrderedDict): pass @@ -1288,13 +1284,13 @@ def test_repr(self): repr(ots) # various names - for name in ['', 1, 1.2, 'foo', six.u('\u03B1\u03B2\u03B3'), + for name in ['', 1, 1.2, 'foo', u('\u03B1\u03B2\u03B3'), 'loooooooooooooooooooooooooooooooooooooooooooooooooooong', ('foo', 'bar', 'baz'), (1, 2), ('foo', 1, 2.3), - (six.u('\u03B1'), six.u('\u03B2'), six.u('\u03B3')), - (six.u('\u03B1'), 'bar')]: + (u('\u03B1'), u('\u03B2'), u('\u03B3')), + (u('\u03B1'), 'bar')]: self.series.name = name repr(self.series) @@ -1318,7 +1314,7 @@ def test_repr(self): self.assertFalse("a\n" in repr(ser)) def test_tidy_repr(self): - a = Series([six.u("\u05d0")] * 1000) + a = Series([u("\u05d0")] * 1000) a.name = 'title1' repr(a) # should not raise exception @@ -1343,7 +1339,7 @@ def test_repr_name_iterable_indexable(self): # it works! repr(s) - s.name = (six.u("\u05d0"),) * 2 + s.name = (u("\u05d0"),) * 2 repr(s) def test_repr_should_return_str(self): @@ -1356,20 +1352,20 @@ def test_repr_should_return_str(self): """ data = [8, 5, 3, 5] - index1 = [six.u("\u03c3"), six.u("\u03c4"), six.u("\u03c5"), six.u("\u03c6")] + index1 = [u("\u03c3"), u("\u03c4"), u("\u03c5"), u("\u03c6")] df = Series(data, index=index1) self.assertTrue(type(df.__repr__() == str)) # both py2 / 3 def test_unicode_string_with_unicode(self): - df = Series([six.u("\u05d0")], name=six.u("\u05d1")) - if py3compat.PY3: + df = Series([u("\u05d0")], name=u("\u05d1")) + if compat.PY3: str(df) else: - six.text_type(df) + compat.text_type(df) def test_bytestring_with_unicode(self): - df = Series([six.u("\u05d0")], name=six.u("\u05d1")) - if py3compat.PY3: + df = Series([u("\u05d0")], name=u("\u05d1")) + if compat.PY3: bytes(df) else: str(df) @@ -1790,7 +1786,7 @@ def test_div(self): p = DataFrame({ 'first' : [3,4,5,8], 'second' : [1,1,1,1] }) result = p['first'] / p['second'] - if py3compat.PY3: + if compat.PY3: assert_series_equal(result,p['first'].astype('float64')) else: assert_series_equal(result,p['first']) @@ -2406,7 +2402,7 @@ def _check_fill(meth, op, a, b, fill_value=0): ops = [Series.add, Series.sub, Series.mul, Series.div] equivs = [operator.add, operator.sub, operator.mul] - if py3compat.PY3: + if compat.PY3: equivs.append(operator.truediv) else: equivs.append(operator.div) @@ -2622,7 +2618,6 @@ def test_value_counts_nunique(self): assert_series_equal(hist, expected) # GH 3002, datetime64[ns] - from pandas.util.py3compat import StringIO, lrange import pandas as pd f = StringIO("xxyyzz20100101PIE\nxxyyzz20100101GUM\nxxyyww20090101EGG\nfoofoo20080909PIE") df = pd.read_fwf(f, widths=[6,8,3], names=["person_id", "dt", "food"], parse_dates=["dt"]) @@ -2821,7 +2816,7 @@ def test_to_csv(self): def test_to_csv_unicode_index(self): buf = StringIO() - s = Series([six.u("\u05d0"), "d2"], index=[six.u("\u05d0"), six.u("\u05d1")]) + s = Series([u("\u05d0"), "d2"], index=[u("\u05d0"), u("\u05d1")]) s.to_csv(buf, encoding='UTF-8') buf.seek(0) diff --git a/pandas/tests/test_stats.py b/pandas/tests/test_stats.py index 8dc5823429be4..c8fb09cb30641 100644 --- a/pandas/tests/test_stats.py +++ b/pandas/tests/test_stats.py @@ -11,7 +11,6 @@ from pandas.util.testing import (assert_frame_equal, assert_series_equal, assert_almost_equal) -import six class TestRank(unittest.TestCase): diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index 7c05c9fa295e9..d3bdb437249fa 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -1,7 +1,6 @@ # pylint: disable-msg=E1101,W0612 from datetime import datetime, timedelta, date -from pandas.util.py3compat import range, lrange import os import operator import re @@ -14,6 +13,8 @@ from numpy.testing import assert_array_equal from numpy.random import randint +from pandas.util.compat import range, lrange, u +import pandas.util.compat as compat from pandas import (Index, Series, TimeSeries, DataFrame, isnull, notnull, bdate_range, date_range) import pandas.core.common as com @@ -22,7 +23,6 @@ import pandas.util.testing as tm import pandas.core.strings as strings -import six class TestStringMethods(unittest.TestCase): @@ -44,7 +44,7 @@ def test_iter(self): for el in s: # each element of the series is either a basestring/str or nan - self.assert_(isinstance(el, six.string_types) or isnull(el)) + self.assert_(isinstance(el, compat.string_types) or isnull(el)) # desired behavior is to iterate until everything would be nan on the # next iter so make sure the last element of the iterator was 'l' in @@ -156,7 +156,7 @@ def test_count(self): tm.assert_almost_equal(rs, xp) # unicode - values = [six.u('foo'), six.u('foofoo'), NA, six.u('foooofooofommmfoo')] + values = [u('foo'), u('foofoo'), NA, u('foooofooofommmfoo')] result = strings.str_count(values, 'f[o]+') exp = [1, 2, NA, 4] @@ -191,7 +191,7 @@ def test_contains(self): tm.assert_almost_equal(rs, xp) # unicode - values = [six.u('foo'), NA, six.u('fooommm__foo'), six.u('mmm_')] + values = [u('foo'), NA, u('fooommm__foo'), u('mmm_')] pat = 'mmm[_]+' result = strings.str_contains(values, pat) @@ -231,8 +231,8 @@ def test_startswith(self): tm.assert_almost_equal(rs, xp) # unicode - values = Series([six.u('om'), NA, six.u('foo_nom'), six.u('nom'), six.u('bar_foo'), NA, - six.u('foo')]) + values = Series([u('om'), NA, u('foo_nom'), u('nom'), u('bar_foo'), NA, + u('foo')]) result = values.str.startswith('foo') exp = Series([False, NA, True, False, False, NA, True]) @@ -259,8 +259,8 @@ def test_endswith(self): tm.assert_almost_equal(rs, xp) # unicode - values = Series([six.u('om'), NA, six.u('foo_nom'), six.u('nom'), six.u('bar_foo'), NA, - six.u('foo')]) + values = Series([u('om'), NA, u('foo_nom'), u('nom'), u('bar_foo'), NA, + u('foo')]) result = values.str.endswith('foo') exp = Series([False, NA, False, False, True, NA, True]) @@ -284,10 +284,10 @@ def test_title(self): tm.assert_almost_equal(mixed, exp) # unicode - values = Series([six.u("FOO"), NA, six.u("bar"), six.u("Blurg")]) + values = Series([u("FOO"), NA, u("bar"), u("Blurg")]) results = values.str.title() - exp = Series([six.u("Foo"), NA, six.u("Bar"), six.u("Blurg")]) + exp = Series([u("Foo"), NA, u("Bar"), u("Blurg")]) tm.assert_series_equal(results, exp) @@ -311,10 +311,10 @@ def test_lower_upper(self): tm.assert_almost_equal(rs, xp) # unicode - values = Series([six.u('om'), NA, six.u('nom'), six.u('nom')]) + values = Series([u('om'), NA, u('nom'), u('nom')]) result = values.str.upper() - exp = Series([six.u('OM'), NA, six.u('NOM'), six.u('NOM')]) + exp = Series([u('OM'), NA, u('NOM'), u('NOM')]) tm.assert_series_equal(result, exp) result = result.str.lower() @@ -341,14 +341,14 @@ def test_replace(self): tm.assert_almost_equal(rs, xp) # unicode - values = Series([six.u('fooBAD__barBAD'), NA]) + values = Series([u('fooBAD__barBAD'), NA]) result = values.str.replace('BAD[_]*', '') - exp = Series([six.u('foobar'), NA]) + exp = Series([u('foobar'), NA]) tm.assert_series_equal(result, exp) result = values.str.replace('BAD[_]*', '', n=1) - exp = Series([six.u('foobarBAD'), NA]) + exp = Series([u('foobarBAD'), NA]) tm.assert_series_equal(result, exp) #flags + unicode @@ -379,17 +379,17 @@ def test_repeat(self): tm.assert_almost_equal(rs, xp) # unicode - values = Series([six.u('a'), six.u('b'), NA, six.u('c'), NA, - six.u('d')]) + values = Series([u('a'), u('b'), NA, u('c'), NA, + u('d')]) result = values.str.repeat(3) - exp = Series([six.u('aaa'), six.u('bbb'), NA, six.u('ccc'), NA, - six.u('ddd')]) + exp = Series([u('aaa'), u('bbb'), NA, u('ccc'), NA, + u('ddd')]) tm.assert_series_equal(result, exp) result = values.str.repeat([1, 2, 3, 4, 5, 6]) - exp = Series([six.u('a'), six.u('bb'), NA, six.u('cccc'), NA, - six.u('dddddd')]) + exp = Series([u('a'), u('bb'), NA, u('cccc'), NA, + u('dddddd')]) tm.assert_series_equal(result, exp) def test_match(self): @@ -409,10 +409,10 @@ def test_match(self): tm.assert_almost_equal(rs, xp) # unicode - values = Series([six.u('fooBAD__barBAD'), NA, six.u('foo')]) + values = Series([u('fooBAD__barBAD'), NA, u('foo')]) result = values.str.match('.*(BAD[_]+).*(BAD)') - exp = Series([(six.u('BAD__'), six.u('BAD')), NA, []]) + exp = Series([(u('BAD__'), u('BAD')), NA, []]) tm.assert_series_equal(result, exp) def test_join(self): @@ -431,8 +431,8 @@ def test_join(self): tm.assert_almost_equal(rs, xp) # unicode - values = Series([six.u('a_b_c'), six.u('c_d_e'), np.nan, - six.u('f_g_h')]) + values = Series([u('a_b_c'), u('c_d_e'), np.nan, + u('f_g_h')]) result = values.str.split('_').str.join('_') tm.assert_series_equal(values, result) @@ -454,8 +454,8 @@ def test_len(self): tm.assert_almost_equal(rs, xp) # unicode - values = Series([six.u('foo'), six.u('fooo'), six.u('fooooo'), np.nan, - six.u('fooooooo')]) + values = Series([u('foo'), u('fooo'), u('fooooo'), np.nan, + u('fooooooo')]) result = values.str.len() exp = values.map(lambda x: len(x) if com.notnull(x) else NA) @@ -479,11 +479,11 @@ def test_findall(self): tm.assert_almost_equal(rs, xp) # unicode - values = Series([six.u('fooBAD__barBAD'), NA, six.u('foo'), - six.u('BAD')]) + values = Series([u('fooBAD__barBAD'), NA, u('foo'), + u('BAD')]) result = values.str.findall('BAD[_]*') - exp = Series([[six.u('BAD__'), six.u('BAD')], NA, [], [six.u('BAD')]]) + exp = Series([[u('BAD__'), u('BAD')], NA, [], [u('BAD')]]) tm.assert_almost_equal(result, exp) def test_pad(self): @@ -530,22 +530,22 @@ def test_pad(self): tm.assert_almost_equal(rs, xp) # unicode - values = Series([six.u('a'), six.u('b'), NA, six.u('c'), NA, - six.u('eeeeee')]) + values = Series([u('a'), u('b'), NA, u('c'), NA, + u('eeeeee')]) result = values.str.pad(5, side='left') - exp = Series([six.u(' a'), six.u(' b'), NA, six.u(' c'), NA, - six.u('eeeeee')]) + exp = Series([u(' a'), u(' b'), NA, u(' c'), NA, + u('eeeeee')]) tm.assert_almost_equal(result, exp) result = values.str.pad(5, side='right') - exp = Series([six.u('a '), six.u('b '), NA, six.u('c '), NA, - six.u('eeeeee')]) + exp = Series([u('a '), u('b '), NA, u('c '), NA, + u('eeeeee')]) tm.assert_almost_equal(result, exp) result = values.str.pad(5, side='both') - exp = Series([six.u(' a '), six.u(' b '), NA, six.u(' c '), NA, - six.u('eeeeee')]) + exp = Series([u(' a '), u(' b '), NA, u(' c '), NA, + u('eeeeee')]) tm.assert_almost_equal(result, exp) def test_center(self): @@ -567,12 +567,12 @@ def test_center(self): tm.assert_almost_equal(rs, xp) # unicode - values = Series([six.u('a'), six.u('b'), NA, six.u('c'), NA, - six.u('eeeeee')]) + values = Series([u('a'), u('b'), NA, u('c'), NA, + u('eeeeee')]) result = values.str.center(5) - exp = Series([six.u(' a '), six.u(' b '), NA, six.u(' c '), NA, - six.u('eeeeee')]) + exp = Series([u(' a '), u(' b '), NA, u(' c '), NA, + u('eeeeee')]) tm.assert_almost_equal(result, exp) def test_split(self): @@ -599,12 +599,12 @@ def test_split(self): tm.assert_almost_equal(rs, xp) # unicode - values = Series([six.u('a_b_c'), six.u('c_d_e'), NA, six.u('f_g_h')]) + values = Series([u('a_b_c'), u('c_d_e'), NA, u('f_g_h')]) result = values.str.split('_') - exp = Series([[six.u('a'), six.u('b'), six.u('c')], - [six.u('c'), six.u('d'), six.u('e')], NA, - [six.u('f'), six.u('g'), six.u('h')]]) + exp = Series([[u('a'), u('b'), u('c')], + [u('c'), u('d'), u('e')], NA, + [u('f'), u('g'), u('h')]]) tm.assert_series_equal(result, exp) def test_split_noargs(self): @@ -665,11 +665,11 @@ def test_slice(self): tm.assert_almost_equal(rs, xp) # unicode - values = Series([six.u('aafootwo'), six.u('aabartwo'), NA, - six.u('aabazqux')]) + values = Series([u('aafootwo'), u('aabartwo'), NA, + u('aabazqux')]) result = values.str.slice(2, 5) - exp = Series([six.u('foo'), six.u('bar'), NA, six.u('baz')]) + exp = Series([u('foo'), u('bar'), NA, u('baz')]) tm.assert_series_equal(result, exp) def test_slice_replace(self): @@ -718,19 +718,19 @@ def test_strip_lstrip_rstrip_mixed(self): def test_strip_lstrip_rstrip_unicode(self): # unicode - values = Series([six.u(' aa '), six.u(' bb \n'), NA, - six.u('cc ')]) + values = Series([u(' aa '), u(' bb \n'), NA, + u('cc ')]) result = values.str.strip() - exp = Series([six.u('aa'), six.u('bb'), NA, six.u('cc')]) + exp = Series([u('aa'), u('bb'), NA, u('cc')]) tm.assert_series_equal(result, exp) result = values.str.lstrip() - exp = Series([six.u('aa '), six.u('bb \n'), NA, six.u('cc ')]) + exp = Series([u('aa '), u('bb \n'), NA, u('cc ')]) tm.assert_series_equal(result, exp) result = values.str.rstrip() - exp = Series([six.u(' aa'), six.u(' bb'), NA, six.u('cc')]) + exp = Series([u(' aa'), u(' bb'), NA, u('cc')]) tm.assert_series_equal(result, exp) def test_strip_lstrip_rstrip_args(self): @@ -749,18 +749,18 @@ def test_strip_lstrip_rstrip_args(self): assert_series_equal(rs, xp) def test_strip_lstrip_rstrip_args_unicode(self): - values = Series([six.u('xxABCxx'), six.u('xx BNSD'), - six.u('LDFJH xx')]) + values = Series([u('xxABCxx'), u('xx BNSD'), + u('LDFJH xx')]) - rs = values.str.strip(six.u('x')) + rs = values.str.strip(u('x')) xp = Series(['ABC', ' BNSD', 'LDFJH ']) assert_series_equal(rs, xp) - rs = values.str.lstrip(six.u('x')) + rs = values.str.lstrip(u('x')) xp = Series(['ABCxx', ' BNSD', 'LDFJH xx']) assert_series_equal(rs, xp) - rs = values.str.rstrip(six.u('x')) + rs = values.str.rstrip(u('x')) xp = Series(['xxABC', 'xx BNSD', 'LDFJH ']) assert_series_equal(rs, xp) @@ -786,11 +786,11 @@ def test_get(self): tm.assert_almost_equal(rs, xp) # unicode - values = Series([six.u('a_b_c'), six.u('c_d_e'), np.nan, - six.u('f_g_h')]) + values = Series([u('a_b_c'), u('c_d_e'), np.nan, + u('f_g_h')]) result = values.str.split('_').str.get(1) - expected = Series([six.u('b'), six.u('d'), np.nan, six.u('g')]) + expected = Series([u('b'), u('d'), np.nan, u('g')]) tm.assert_series_equal(result, expected) def test_more_contains(self): @@ -891,7 +891,7 @@ def test_match_findall_flags(self): self.assertEquals(result[0], True) def test_encode_decode(self): - base = Series([six.u('a'), six.u('b'), six.u('a\xe4')]) + base = Series([u('a'), u('b'), u('a\xe4')]) series = base.str.encode('utf-8') f = lambda x: x.decode('utf-8') @@ -901,7 +901,7 @@ def test_encode_decode(self): tm.assert_series_equal(result, exp) def test_encode_decode_errors(self): - encodeBase = Series([six.u('a'), six.u('b'), six.u('a\x9d')]) + encodeBase = Series([u('a'), u('b'), u('a\x9d')]) self.assertRaises(UnicodeEncodeError, encodeBase.str.encode, 'cp1252') diff --git a/pandas/tests/test_tseries.py b/pandas/tests/test_tseries.py index 5ce0041bf25c3..651c888a0b659 100644 --- a/pandas/tests/test_tseries.py +++ b/pandas/tests/test_tseries.py @@ -1,5 +1,3 @@ -from pandas.util.py3compat import range, lrange -from pandas.util.py3compat import zip import unittest from numpy import nan @@ -7,6 +5,7 @@ from pandas import Index, isnull, Timestamp from pandas.util.testing import assert_almost_equal import pandas.util.testing as common +from pandas.util.compat import range, lrange, zip import pandas.lib as lib import pandas.algos as algos from datetime import datetime diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 2987a73b34c6a..04c7dfa6ed036 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -1,12 +1,11 @@ """ SQL-style merge routines """ +import types -from pandas.util.py3compat import range, long, lrange, lzip -from pandas.util.py3compat import zip -import six import numpy as np -import types +from pandas.util.compat import range, long, lrange, lzip, zip +import pandas.util.compat as compat from pandas.core.categorical import Categorical from pandas.core.frame import DataFrame, _merge_doc from pandas.core.generic import NDFrame @@ -1299,7 +1298,7 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None): def _should_fill(lname, rname): - if not isinstance(lname, six.string_types) or not isinstance(rname, six.string_types): + if not isinstance(lname, compat.string_types) or not isinstance(rname, compat.string_types): return True return lname == rname diff --git a/pandas/tools/pivot.py b/pandas/tools/pivot.py index e4aa0a7d6249e..ed463fbe61d68 100644 --- a/pandas/tools/pivot.py +++ b/pandas/tools/pivot.py @@ -5,11 +5,9 @@ from pandas.core.reshape import _unstack_multiple from pandas.tools.merge import concat from pandas.tools.util import cartesian_product -from pandas.util.py3compat import range, lrange +from pandas.util.compat import range, lrange, zip from pandas.util import compat -import six import pandas.core.common as com -from pandas.util.py3compat import zip import numpy as np @@ -155,7 +153,7 @@ def _add_margins(table, data, values, rows=None, cols=None, aggfunc=np.mean): grand_margin = {} for k, v in compat.iteritems(data[values]): try: - if isinstance(aggfunc, six.string_types): + if isinstance(aggfunc, compat.string_types): grand_margin[k] = getattr(v, aggfunc)() else: grand_margin[k] = aggfunc(v) diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index b83b2d176ea7d..aef035ec41e67 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -1,6 +1,5 @@ # being a bit too dynamic # pylint: disable=E1101 -import six import datetime import warnings import re @@ -16,8 +15,8 @@ from pandas.tseries.period import PeriodIndex, Period from pandas.tseries.frequencies import get_period_alias, get_base_alias from pandas.tseries.offsets import DateOffset -from pandas.util.py3compat import range, lrange, lmap -from pandas.util.py3compat import map, zip +from pandas.util.compat import range, lrange, lmap, map, zip +import pandas.util.compat as compat try: # mpl optional import pandas.tseries.converter as conv @@ -99,7 +98,7 @@ def _get_standard_colors(num_colors=None, colormap=None, color_type='default', import matplotlib.pyplot as plt if color is None and colormap is not None: - if isinstance(colormap, six.string_types): + if isinstance(colormap, compat.string_types): import matplotlib.cm as cm cmap = colormap colormap = cm.get_cmap(colormap) @@ -114,7 +113,7 @@ def _get_standard_colors(num_colors=None, colormap=None, color_type='default', else: if color_type == 'default': colors = plt.rcParams.get('axes.color_cycle', list('bgrcmyk')) - if isinstance(colors, six.string_types): + if isinstance(colors, compat.string_types): colors = list(colors) elif color_type == 'random': import random diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 2bc3775247478..0bd1e79a10470 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -1,9 +1,8 @@ -from pandas.util.py3compat import range -from pandas.util.py3compat import zip -import numpy as np import random from copy import deepcopy +import numpy as np +from pandas.util.compat import range, zip # # TODO: # * Make sure legends work properly diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py index 742bc81d485e1..c3b91ed27d8f2 100644 --- a/pandas/tools/tests/test_merge.py +++ b/pandas/tools/tests/test_merge.py @@ -9,8 +9,7 @@ import numpy as np import random -from pandas.util.py3compat import range, lrange, lzip -from pandas.util.py3compat import zip +from pandas.util.compat import range, lrange, lzip, zip from pandas.util import compat from pandas.tseries.index import DatetimeIndex from pandas.tools.merge import merge, concat, ordered_merge, MergeError diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tools/tests/test_pivot.py index 0847152917716..8d3f25a7d60d5 100644 --- a/pandas/tools/tests/test_pivot.py +++ b/pandas/tools/tests/test_pivot.py @@ -1,14 +1,15 @@ -from pandas.util.py3compat import range +import datetime import unittest import numpy as np from numpy.testing import assert_equal +import pandas from pandas import DataFrame, Series, Index, MultiIndex from pandas.tools.merge import concat from pandas.tools.pivot import pivot_table, crosstab +from pandas.util.compat import range, u, product import pandas.util.testing as tm -import six class TestPivotTable(unittest.TestCase): @@ -74,18 +75,18 @@ def test_pivot_table_dropna(self): pv_col = df.pivot_table('quantity', 'month', ['customer', 'product'], dropna=False) pv_ind = df.pivot_table('quantity', ['customer', 'product'], 'month', dropna=False) - m = MultiIndex.from_tuples([(six.u('A'), six.u('a')), - (six.u('A'), six.u('b')), - (six.u('A'), six.u('c')), - (six.u('A'), six.u('d')), - (six.u('B'), six.u('a')), - (six.u('B'), six.u('b')), - (six.u('B'), six.u('c')), - (six.u('B'), six.u('d')), - (six.u('C'), six.u('a')), - (six.u('C'), six.u('b')), - (six.u('C'), six.u('c')), - (six.u('C'), six.u('d'))]) + m = MultiIndex.from_tuples([(u('A'), u('a')), + (u('A'), u('b')), + (u('A'), u('c')), + (u('A'), u('d')), + (u('B'), u('a')), + (u('B'), u('b')), + (u('B'), u('c')), + (u('B'), u('d')), + (u('C'), u('a')), + (u('C'), u('b')), + (u('C'), u('c')), + (u('C'), u('d'))]) assert_equal(pv_col.columns.values, m.values) assert_equal(pv_ind.index.values, m.values) @@ -162,7 +163,7 @@ def test_pivot_index_with_nan(self): nan = np.nan df = DataFrame({"a":['R1', 'R2', nan, 'R4'], 'b':["C1", "C2", "C3" , "C4"], "c":[10, 15, nan , 20]}) result = df.pivot('a','b','c') - expected = DataFrame([[nan,nan,nan,nan],[nan,10,nan,nan], + expected = DataFrame([[nan,nan,nan,nan],[nan,10,nan,nan], [nan,nan,nan,nan],[nan,nan,15,20]], index = Index(['R1','R2',nan,'R4'],name='a'), columns = Index(['C1','C2','C3','C4'],name='b')) @@ -217,9 +218,6 @@ def _check_output(res, col, rows=['A', 'B'], cols=['C']): def test_pivot_integer_columns(self): # caused by upstream bug in unstack - from pandas.util.compat import product - import datetime - import pandas d = datetime.date.min data = list(product(['foo', 'bar'], ['A', 'B', 'C'], ['x1', 'x2'], @@ -247,9 +245,6 @@ def test_pivot_no_level_overlap(self): tm.assert_frame_equal(table, expected) def test_pivot_columns_lexsorted(self): - import datetime - import numpy as np - import pandas n = 10000 diff --git a/pandas/tools/tests/test_tile.py b/pandas/tools/tests/test_tile.py index 09095ba801cf4..d939bebdefaeb 100644 --- a/pandas/tools/tests/test_tile.py +++ b/pandas/tools/tests/test_tile.py @@ -3,7 +3,7 @@ import unittest import numpy as np -from pandas.util.py3compat import zip +from pandas.util.compat import zip from pandas import DataFrame, Series, unique import pandas.util.testing as tm diff --git a/pandas/tools/tile.py b/pandas/tools/tile.py index fd9d290d6e126..f987042bb91f2 100644 --- a/pandas/tools/tile.py +++ b/pandas/tools/tile.py @@ -8,7 +8,7 @@ import pandas.core.algorithms as algos import pandas.core.common as com import pandas.core.nanops as nanops -from pandas.util.py3compat import zip +from pandas.util.compat import zip import numpy as np diff --git a/pandas/tseries/converter.py b/pandas/tseries/converter.py index 3e79bdf0184ad..3226a1cb426bf 100644 --- a/pandas/tseries/converter.py +++ b/pandas/tseries/converter.py @@ -1,6 +1,4 @@ from datetime import datetime, timedelta -from pandas.util.py3compat import range, lrange -import six import datetime as pydt import numpy as np @@ -12,6 +10,8 @@ from matplotlib.ticker import Formatter, AutoLocator, Locator from matplotlib.transforms import nonsingular +from pandas.util.compat import range, lrange +import pandas.util.compat as compat import pandas.lib as lib import pandas.core.common as com from pandas.core.index import Index @@ -38,7 +38,7 @@ def _to_ordinalf(tm): def time2num(d): - if isinstance(d, six.string_types): + if isinstance(d, compat.string_types): parsed = tools.to_datetime(d) if not isinstance(parsed, datetime): raise ValueError('Could not parse time %s' % d) @@ -163,7 +163,7 @@ def try_parse(values): return dates.date2num(values) elif (com.is_integer(values) or com.is_float(values)): return values - elif isinstance(values, six.string_types): + elif isinstance(values, compat.string_types): return try_parse(values) elif isinstance(values, (list, tuple, np.ndarray)): if not isinstance(values, np.ndarray): @@ -810,7 +810,7 @@ def _annual_finder(vmin, vmax, freq): def get_finder(freq): - if isinstance(freq, six.string_types): + if isinstance(freq, compat.string_types): freq = frequencies.get_freq(freq) fgroup = frequencies.get_freq_group(freq) @@ -847,7 +847,7 @@ class TimeSeries_DateLocator(Locator): def __init__(self, freq, minor_locator=False, dynamic_mode=True, base=1, quarter=1, month=1, day=1, plot_obj=None): - if isinstance(freq, six.string_types): + if isinstance(freq, compat.string_types): freq = frequencies.get_freq(freq) self.freq = freq self.base = base @@ -926,7 +926,7 @@ class TimeSeries_DateFormatter(Formatter): def __init__(self, freq, minor_locator=False, dynamic_mode=True, plot_obj=None): - if isinstance(freq, six.string_types): + if isinstance(freq, compat.string_types): freq = frequencies.get_freq(freq) self.format = None self.freq = freq diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index d6065a9a552ca..f6e792d4bf193 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -1,8 +1,6 @@ from datetime import datetime -from pandas.util.py3compat import range, long +from pandas.util.compat import range, long, zip from pandas.util import compat -from pandas.util.py3compat import zip -import six import re import numpy as np @@ -58,14 +56,14 @@ def get_to_timestamp_base(base): def get_freq_group(freq): - if isinstance(freq, six.string_types): + if isinstance(freq, compat.string_types): base, mult = get_freq_code(freq) freq = base return (freq // 1000) * 1000 def get_freq(freq): - if isinstance(freq, six.string_types): + if isinstance(freq, compat.string_types): base, mult = get_freq_code(freq) freq = base return freq @@ -420,7 +418,7 @@ def to_offset(freqstr): if isinstance(freqstr, tuple): name = freqstr[0] stride = freqstr[1] - if isinstance(stride, six.string_types): + if isinstance(stride, compat.string_types): name, stride = stride, name name, _ = _base_and_stride(name) delta = get_offset(name) * stride diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 2bff7c0e4498c..63e96efc2048d 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -8,6 +8,8 @@ from pandas.core.common import isnull, _NS_DTYPE, _INT64_DTYPE from pandas.core.index import Index, Int64Index +import pandas.util.compat as compat +from pandas.util.compat import u from pandas.tseries.frequencies import ( infer_freq, to_offset, get_period_alias, Resolution, get_reso_string) @@ -23,7 +25,6 @@ import pandas.tslib as tslib import pandas.algos as _algos import pandas.index as _index -import six def _utc(): @@ -71,7 +72,7 @@ def wrapper(self, other): other = _to_m8(other, tz=self.tz) elif isinstance(other, list): other = DatetimeIndex(other) - elif isinstance(other, six.string_types): + elif isinstance(other, compat.string_types): other = _to_m8(other, tz=self.tz) elif not isinstance(other, np.ndarray): other = _ensure_datetime64(other) @@ -208,7 +209,7 @@ def __new__(cls, data=None, return data - if issubclass(data.dtype.type, six.string_types): + if issubclass(data.dtype.type, compat.string_types): data = _str_to_dt_array(data, offset, dayfirst=dayfirst, yearfirst=yearfirst) @@ -582,21 +583,21 @@ def __contains__(self, key): def _format_with_header(self, header, **kwargs): return header + self._format_native_types(**kwargs) - def _format_native_types(self, na_rep=six.u('NaT'), **kwargs): + def _format_native_types(self, na_rep=u('NaT'), **kwargs): data = list(self) # tz formatter or time formatter zero_time = time(0, 0) for d in data: if d.time() != zero_time or d.tzinfo is not None: - return [six.u('%s') % x for x in data] + return [u('%s') % x for x in data] values = np.array(data,dtype=object) mask = isnull(self.values) values[mask] = na_rep imask = -mask - values[imask] = np.array([six.u('%d-%.2d-%.2d') % ( + values[imask] = np.array([u('%d-%.2d-%.2d') % ( dt.year, dt.month, dt.day) for dt in values[imask] ]) return values.tolist() @@ -769,7 +770,7 @@ def shift(self, n, freq=None): shifted : DatetimeIndex """ if freq is not None and freq != self.offset: - if isinstance(freq, six.string_types): + if isinstance(freq, compat.string_types): freq = to_offset(freq) result = Index.shift(self, n, freq) result.tz = self.tz @@ -1233,7 +1234,7 @@ def slice_locs(self, start=None, end=None): """ Index.slice_locs, customized to handle partial ISO-8601 string slicing """ - if isinstance(start, six.string_types) or isinstance(end, six.string_types): + if isinstance(start, compat.string_types) or isinstance(end, compat.string_types): if self.is_monotonic: try: @@ -1546,7 +1547,7 @@ def indexer_at_time(self, time, asof=False): if asof: raise NotImplementedError - if isinstance(time, six.string_types): + if isinstance(time, compat.string_types): time = parse(time).time() if time.tzinfo: @@ -1576,10 +1577,10 @@ def indexer_between_time(self, start_time, end_time, include_start=True, """ from dateutil.parser import parse - if isinstance(start_time, six.string_types): + if isinstance(start_time, compat.string_types): start_time = parse(start_time).time() - if isinstance(end_time, six.string_types): + if isinstance(end_time, compat.string_types): end_time = parse(end_time).time() if start_time.tzinfo or end_time.tzinfo: diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 303a11929064a..565abc195145c 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -1,7 +1,6 @@ from datetime import date, datetime, timedelta -from pandas.util.py3compat import range +from pandas.util.compat import range from pandas.util import compat -import six import numpy as np from pandas.tseries.tools import to_datetime @@ -140,7 +139,7 @@ def __eq__(self, other): if other is None: return False - if isinstance(other, six.string_types): + if isinstance(other, compat.string_types): from pandas.tseries.frequencies import to_offset other = to_offset(other) @@ -431,7 +430,7 @@ def rule_code(self): @staticmethod def _to_dt64(dt, dtype='datetime64'): - if isinstance(dt, (datetime, six.string_types)): + if isinstance(dt, (datetime, compat.string_types)): dt = np.datetime64(dt, dtype=dtype) if isinstance(dt, np.datetime64): dt = dt.astype(dtype) @@ -1169,7 +1168,7 @@ def __add__(self, other): return self.apply(other) def __eq__(self, other): - if isinstance(other, six.string_types): + if isinstance(other, compat.string_types): from pandas.tseries.frequencies import to_offset other = to_offset(other) @@ -1184,7 +1183,7 @@ def __hash__(self): return hash(self._params()) def __ne__(self, other): - if isinstance(other, six.string_types): + if isinstance(other, compat.string_types): from pandas.tseries.frequencies import to_offset other = to_offset(other) diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index 9fce356522205..c512331ae66f9 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -14,14 +14,13 @@ import pandas.core.common as com from pandas.core.common import isnull, _NS_DTYPE, _INT64_DTYPE -from pandas.util import py3compat +from pandas.util import compat from pandas.lib import Timestamp import pandas.lib as lib import pandas.tslib as tslib import pandas.algos as _algos -import six -from pandas.util.py3compat import map, zip +from pandas.util.compat import map, zip, u #--------------- @@ -49,7 +48,7 @@ class Period(PandasObject): Parameters ---------- - value : Period or six.string_types, default None + value : Period or compat.string_types, default None The time period represented (e.g., '4Q2005') freq : str, default None e.g., 'B' for businessday, ('T', 5) or '5T' for 5 minutes @@ -101,7 +100,7 @@ def __init__(self, value=None, freq=None, ordinal=None, converted = other.asfreq(freq) self.ordinal = converted.ordinal - elif isinstance(value, six.string_types) or com.is_integer(value): + elif isinstance(value, compat.string_types) or com.is_integer(value): if com.is_integer(value): value = str(value) @@ -269,7 +268,7 @@ def __repr__(self): formatted = tslib.period_format(self.ordinal, base) freqstr = _freq_mod._reverse_period_code_map[base] - if not py3compat.PY3: + if not compat.PY3: encoding = com.get_option("display.encoding") formatted = formatted.encode(encoding) @@ -668,7 +667,7 @@ def _from_arraylike(cls, data, freq, tz): def __contains__(self, key): if not isinstance(key, Period) or key.freq != self.freq: - if isinstance(key, six.string_types): + if isinstance(key, compat.string_types): try: self.get_loc(key) return True @@ -948,7 +947,7 @@ def slice_locs(self, start=None, end=None): """ Index.slice_locs, customized to handle partial ISO-8601 string slicing """ - if isinstance(start, six.string_types) or isinstance(end, six.string_types): + if isinstance(start, compat.string_types) or isinstance(end, compat.string_types): try: if start: start_loc = self._get_string_slice(start).start @@ -1059,14 +1058,14 @@ def __getitem__(self, key): def _format_with_header(self, header, **kwargs): return header + self._format_native_types(**kwargs) - def _format_native_types(self, na_rep=six.u('NaT'), **kwargs): + def _format_native_types(self, na_rep=u('NaT'), **kwargs): values = np.array(list(self),dtype=object) mask = isnull(self.values) values[mask] = na_rep imask = -mask - values[imask] = np.array([six.u('%s') % dt for dt in values[imask]]) + values[imask] = np.array([u('%s') % dt for dt in values[imask]]) return values.tolist() def __array_finalize__(self, obj): @@ -1086,8 +1085,8 @@ def __repr__(self): def __unicode__(self): output = self.__class__.__name__ - output += six.u('(') - prefix = '' if py3compat.PY3 else 'u' + output += u('(') + prefix = '' if compat.PY3 else 'u' mapper = "{0}'{{0}}'".format(prefix) output += '[{0}]'.format(', '.join(map(mapper.format, self))) output += ", freq='{0}'".format(self.freq) @@ -1099,7 +1098,7 @@ def __bytes__(self): return self.__unicode__().encode(encoding, 'replace') def __str__(self): - if py3compat.PY3: + if compat.PY3: return self.__unicode__() return self.__bytes__() diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index 253abcbd8adf3..687d505dbb611 100644 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -1,6 +1,5 @@ from datetime import timedelta -import six import numpy as np from pandas.core.groupby import BinGrouper, CustomGrouper @@ -10,6 +9,7 @@ from pandas.tseries.period import PeriodIndex, period_range import pandas.tseries.tools as tools import pandas.core.common as com +import pandas.util.compat as compat from pandas.lib import Timestamp import pandas.lib as lib @@ -231,7 +231,7 @@ def _resample_timestamps(self, obj): limit=self.limit) loffset = self.loffset - if isinstance(loffset, six.string_types): + if isinstance(loffset, compat.string_types): loffset = to_offset(self.loffset) if isinstance(loffset, (DateOffset, timedelta)): @@ -292,7 +292,7 @@ def _take_new_index(obj, indexer, new_index, axis=0): def _get_range_edges(axis, offset, closed='left', base=0): - if isinstance(offset, six.string_types): + if isinstance(offset, compat.string_types): offset = to_offset(offset) if isinstance(offset, Tick): diff --git a/pandas/tseries/tests/test_converter.py b/pandas/tseries/tests/test_converter.py index aca7140801bce..0d6449ec79339 100644 --- a/pandas/tseries/tests/test_converter.py +++ b/pandas/tseries/tests/test_converter.py @@ -6,7 +6,7 @@ import nose import numpy as np -import six +from pandas.util.compat import u try: import pandas.tseries.converter as converter @@ -15,7 +15,7 @@ def test_timtetonum_accepts_unicode(): - assert(converter.time2num("00:01") == converter.time2num(six.u("00:01"))) + assert(converter.time2num("00:01") == converter.time2num(u("00:01"))) class TestDateTimeConverter(unittest.TestCase): @@ -26,7 +26,7 @@ def setUp(self): def test_convert_accepts_unicode(self): r1 = self.dtc.convert("12:22", None, None) - r2 = self.dtc.convert(six.u("12:22"), None, None) + r2 = self.dtc.convert(u("12:22"), None, None) assert(r1 == r2), "DatetimeConverter.convert should accept unicode" def test_conversion(self): diff --git a/pandas/tseries/tests/test_daterange.py b/pandas/tseries/tests/test_daterange.py index b9f5e73150623..ad1c04739a192 100644 --- a/pandas/tseries/tests/test_daterange.py +++ b/pandas/tseries/tests/test_daterange.py @@ -1,5 +1,5 @@ from datetime import datetime -from pandas.util.py3compat import range +from pandas.util.compat import range import pickle import unittest import nose diff --git a/pandas/tseries/tests/test_frequencies.py b/pandas/tseries/tests/test_frequencies.py index bcaba1fee67c1..24d268972b6a8 100644 --- a/pandas/tseries/tests/test_frequencies.py +++ b/pandas/tseries/tests/test_frequencies.py @@ -1,5 +1,5 @@ from datetime import datetime, time, timedelta -from pandas.util.py3compat import range +from pandas.util.compat import range import sys import os import unittest diff --git a/pandas/tseries/tests/test_offsets.py b/pandas/tseries/tests/test_offsets.py index 3f4960520104f..3e64e4c03025c 100644 --- a/pandas/tseries/tests/test_offsets.py +++ b/pandas/tseries/tests/test_offsets.py @@ -1,5 +1,5 @@ from datetime import date, datetime, timedelta -from pandas.util.py3compat import range +from pandas.util.compat import range from pandas.util import compat import unittest import nose diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index 909c8b361de5a..c2faf4511f200 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -22,15 +22,13 @@ import pandas.core.datetools as datetools import pandas as pd import numpy as np -import six -from pandas.util.py3compat import range, lrange, lmap -from pandas.util.py3compat import map, zip +from pandas.util.compat import range, lrange, lmap, map, zip randn = np.random.randn from pandas import Series, TimeSeries, DataFrame from pandas.util.testing import assert_series_equal, assert_almost_equal import pandas.util.testing as tm -from pandas.util import py3compat +from pandas.util import compat from numpy.testing import assert_array_equal @@ -213,7 +211,7 @@ def test_strftime(self): p = Period('2000-1-1 12:34:12', freq='S') res = p.strftime('%Y-%m-%d %H:%M:%S') self.assertEqual(res, '2000-01-01 12:34:12') - tm.assert_isinstance(res, six.text_type) # GH3363 + tm.assert_isinstance(res, compat.text_type) # GH3363 def test_sub_delta(self): left, right = Period('2011', freq='A'), Period('2007', freq='A') @@ -1625,45 +1623,45 @@ def test_ts_repr(self): def test_period_index_unicode(self): pi = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') assert_equal(len(pi), 9) - assert_equal(pi, eval(six.text_type(pi))) + assert_equal(pi, eval(compat.text_type(pi))) pi = PeriodIndex(freq='Q', start='1/1/2001', end='12/1/2009') assert_equal(len(pi), 4 * 9) - assert_equal(pi, eval(six.text_type(pi))) + assert_equal(pi, eval(compat.text_type(pi))) pi = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009') assert_equal(len(pi), 12 * 9) - assert_equal(pi, eval(six.text_type(pi))) + assert_equal(pi, eval(compat.text_type(pi))) start = Period('02-Apr-2005', 'B') i1 = PeriodIndex(start=start, periods=20) assert_equal(len(i1), 20) assert_equal(i1.freq, start.freq) assert_equal(i1[0], start) - assert_equal(i1, eval(six.text_type(i1))) + assert_equal(i1, eval(compat.text_type(i1))) end_intv = Period('2006-12-31', 'W') i1 = PeriodIndex(end=end_intv, periods=10) assert_equal(len(i1), 10) assert_equal(i1.freq, end_intv.freq) assert_equal(i1[-1], end_intv) - assert_equal(i1, eval(six.text_type(i1))) + assert_equal(i1, eval(compat.text_type(i1))) end_intv = Period('2006-12-31', '1w') i2 = PeriodIndex(end=end_intv, periods=10) assert_equal(len(i1), len(i2)) self.assert_((i1 == i2).all()) assert_equal(i1.freq, i2.freq) - assert_equal(i1, eval(six.text_type(i1))) - assert_equal(i2, eval(six.text_type(i2))) + assert_equal(i1, eval(compat.text_type(i1))) + assert_equal(i2, eval(compat.text_type(i2))) end_intv = Period('2006-12-31', ('w', 1)) i2 = PeriodIndex(end=end_intv, periods=10) assert_equal(len(i1), len(i2)) self.assert_((i1 == i2).all()) assert_equal(i1.freq, i2.freq) - assert_equal(i1, eval(six.text_type(i1))) - assert_equal(i2, eval(six.text_type(i2))) + assert_equal(i1, eval(compat.text_type(i1))) + assert_equal(i2, eval(compat.text_type(i2))) try: PeriodIndex(start=start, end=end_intv) @@ -1673,7 +1671,7 @@ def test_period_index_unicode(self): end_intv = Period('2005-05-01', 'B') i1 = PeriodIndex(start=start, end=end_intv) - assert_equal(i1, eval(six.text_type(i1))) + assert_equal(i1, eval(compat.text_type(i1))) try: PeriodIndex(start=start) @@ -1686,12 +1684,12 @@ def test_period_index_unicode(self): i2 = PeriodIndex([end_intv, Period('2005-05-05', 'B')]) assert_equal(len(i2), 2) assert_equal(i2[0], end_intv) - assert_equal(i2, eval(six.text_type(i2))) + assert_equal(i2, eval(compat.text_type(i2))) i2 = PeriodIndex(np.array([end_intv, Period('2005-05-05', 'B')])) assert_equal(len(i2), 2) assert_equal(i2[0], end_intv) - assert_equal(i2, eval(six.text_type(i2))) + assert_equal(i2, eval(compat.text_type(i2))) # Mixed freq should fail vals = [end_intv, Period('2006-12-31', 'w')] @@ -2001,9 +1999,9 @@ def test_map_with_string_constructor(self): index = PeriodIndex(raw, freq='A') types = str, - if py3compat.PY3: + if compat.PY3: # unicode - types += six.text_type, + types += compat.text_type, for t in types: expected = np.array(lmap(t, raw), dtype=object) diff --git a/pandas/tseries/tests/test_plotting.py b/pandas/tseries/tests/test_plotting.py index 95bfa98d32cf2..e4a707f28f56b 100644 --- a/pandas/tseries/tests/test_plotting.py +++ b/pandas/tseries/tests/test_plotting.py @@ -3,8 +3,7 @@ import unittest import nose -from pandas.util.py3compat import range, lrange -from pandas.util.py3compat import zip +from pandas.util.compat import range, lrange, zip import numpy as np from numpy.testing.decorators import slow diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index 1db735896c902..52055d13f42a6 100644 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -2,8 +2,7 @@ from datetime import datetime, timedelta -from pandas.util.py3compat import range, lrange -from pandas.util.py3compat import zip +from pandas.util.compat import range, lrange, zip, product import numpy as np from pandas import Series, TimeSeries, DataFrame, Panel, isnull, notnull, Timestamp @@ -605,7 +604,6 @@ def _simple_pts(start, end, freq='D'): from pandas.tseries.frequencies import MONTHS, DAYS -from pandas.util.compat import product class TestResamplePeriodIndex(unittest.TestCase): diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index 779166f3c5c75..e2213a85a9daa 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -28,15 +28,15 @@ import pandas.index as _index -from pandas.util.py3compat import range, long, StringIO, lrange, lmap -from pandas.util.compat import product -from pandas.util.py3compat import map, zip, cPickle as pickle +from pandas.util.compat import( + range, long, StringIO, lrange, lmap, map, zip, cPickle as pickle, product +) from pandas import read_pickle import pandas.core.datetools as dt from numpy.random import rand from numpy.testing import assert_array_equal from pandas.util.testing import assert_frame_equal -import pandas.util.py3compat as py3compat +import pandas.util.compat as compat from pandas.core.datetools import BDay import pandas.core.common as com from pandas import concat @@ -1966,7 +1966,7 @@ class TestLegacySupport(unittest.TestCase): @classmethod def setUpClass(cls): - if py3compat.PY3: + if compat.PY3: raise nose.SkipTest pth, _ = os.path.split(os.path.abspath(__file__)) @@ -2816,7 +2816,7 @@ def check(val,unit=None,h=1,s=1,us=0): check(days,unit='D',h=0) # using truediv, so these are like floats - if py3compat.PY3: + if compat.PY3: check((val+500000)/long(1000000000),unit='s',us=500) check((val+500000000)/long(1000000000),unit='s',us=500000) check((val+500000)/long(1000000),unit='ms',us=500) diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tseries/tests/test_timezones.py index 7ee89f7cadb70..47e006af3326d 100644 --- a/pandas/tseries/tests/test_timezones.py +++ b/pandas/tseries/tests/test_timezones.py @@ -27,9 +27,8 @@ import pandas.core.datetools as dt from numpy.random import rand from pandas.util.testing import assert_frame_equal -import pandas.util.py3compat as py3compat -from pandas.util.py3compat import range, lrange -from pandas.util.py3compat import zip, cPickle as pickle +import pandas.util.compat as compat +from pandas.util.compat import range, lrange, zip, cPickle as pickle from pandas.core.datetools import BDay import pandas.core.common as com diff --git a/pandas/tseries/tests/test_util.py b/pandas/tseries/tests/test_util.py index 5bfdbba56395c..84666b0192cf1 100644 --- a/pandas/tseries/tests/test_util.py +++ b/pandas/tseries/tests/test_util.py @@ -1,4 +1,4 @@ -from pandas.util.py3compat import range +from pandas.util.compat import range import nose import unittest diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index f043dcb87ccca..f7eafdac16816 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -2,13 +2,13 @@ import re import sys -import six import numpy as np import pandas.lib as lib import pandas.tslib as tslib import pandas.core.common as com -from pandas.util.py3compat import StringIO +from pandas.util.compat import StringIO, callable +import pandas.util.compat as compat try: import dateutil @@ -41,7 +41,7 @@ def _infer(a, b): def _maybe_get_tz(tz): - if isinstance(tz, six.string_types): + if isinstance(tz, compat.string_types): import pytz tz = pytz.timezone(tz) if com.is_integer(tz): @@ -149,7 +149,7 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None): Parameters ---------- - arg : six.string_types + arg : compat.string_types freq : str or DateOffset, default None Helps with interpreting time string if supplied dayfirst : bool, default None @@ -166,7 +166,7 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None): from pandas.tseries.frequencies import (_get_rule_month, _month_numbers, _get_freq_str) - if not isinstance(arg, six.string_types): + if not isinstance(arg, compat.string_types): return arg arg = arg.upper() @@ -272,14 +272,14 @@ def dateutil_parse(timestr, default, if res.weekday is not None and not res.day: ret = ret + relativedelta.relativedelta(weekday=res.weekday) if not ignoretz: - if six.callable(tzinfos) or tzinfos and res.tzname in tzinfos: - if six.callable(tzinfos): + if callable(tzinfos) or tzinfos and res.tzname in tzinfos: + if callable(tzinfos): tzdata = tzinfos(res.tzname, res.tzoffset) else: tzdata = tzinfos.get(res.tzname) if isinstance(tzdata, datetime.tzinfo): tzinfo = tzdata - elif isinstance(tzdata, six.string_types): + elif isinstance(tzdata, compat.string_types): tzinfo = tz.tzstr(tzdata) elif isinstance(tzdata, int): tzinfo = tz.tzoffset(res.tzname, tzdata) diff --git a/pandas/tseries/util.py b/pandas/tseries/util.py index 5021214ac869a..33d33045c8743 100644 --- a/pandas/tseries/util.py +++ b/pandas/tseries/util.py @@ -1,4 +1,4 @@ -from pandas.util.py3compat import range, lrange +from pandas.util.compat import range, lrange import numpy as np import pandas as pd diff --git a/pandas/util/compat.py b/pandas/util/compat.py index 1f57d00256ab6..27f5671ca02b9 100644 --- a/pandas/util/compat.py +++ b/pandas/util/compat.py @@ -1,22 +1,113 @@ -import sys -import six -from pandas.util.py3compat import map, filter -from pandas.util.py3compat import range +""" +compat +====== + +Cross-compatible functions for Python 2 and 3. + +Key items to import for 2/3 compatible code: +* iterators: range(), map(), zip(), filter(), reduce() +* lists: lrange(), lmap(), lzip(), lfilter() +* unicode: u() [u"" is a syntax error in Python 3.0-3.2] +* longs: long (int in Python 3) +* callable +* iterable method compatibility: iteritems, iterkeys, itervalues + * Uses the original method if available, otherwise uses items, keys, values. +* types: + * text_type: unicode in Python 2, str in Python 3 + * binary_type: str in Python 2, bythes in Python 3 + * string_types: basestring in Python 2, str in Python 3 +* bind_method: binds functions to classes + +Python 2.6 compatibility: +* OrderedDict +* Counter + +Other items: +* OrderedDefaultDict +""" +# pylint disable=W0611 +import functools +import itertools from itertools import product +import sys +import types +PY3 = (sys.version_info[0] >= 3) +# import iterator versions of these functions -# OrderedDict Shim from Raymond Hettinger, python core dev -# http://code.activestate.com/recipes/576693-ordered-dictionary-for-py24/ -# here to support versions before 2.6 try: - from thread import get_ident as _get_ident + import __builtin__ as builtins + # not writeable when instantiated with string, doesn't handle unicode well + from cStringIO import StringIO as cStringIO + # always writeable + from StringIO import StringIO + BytesIO = StringIO + import cPickle except ImportError: - from dummy_thread import get_ident as _get_ident + import builtins + from io import StringIO, BytesIO + cStringIO = StringIO + import pickle as cPickle -try: - from _abcoll import KeysView, ValuesView, ItemsView -except ImportError: - pass + +if PY3: + def isidentifier(s): + return s.isidentifier() + + def str_to_bytes(s, encoding='ascii'): + return s.encode(encoding) + + def bytes_to_str(b, encoding='utf-8'): + return b.decode(encoding) + + # have to explicitly put builtins into the namespace + range = range + map = map + zip = zip + filter = filter + reduce = functools.reduce + long = int + unichr = chr + + # list-producing versions of the major Python iterating functions + def lrange(*args, **kwargs): + return list(range(*args, **kwargs)) + + def lzip(*args, **kwargs): + return list(zip(*args, **kwargs)) + + def lmap(*args, **kwargs): + return list(map(*args, **kwargs)) + + def lfilter(*args, **kwargs): + return list(filter(*args, **kwargs)) +else: + # Python 2 + import re + _name_re = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*$") + + def isidentifier(s, dotted=False): + return bool(_name_re.match(s)) + + def str_to_bytes(s, encoding='ascii'): + return s + + def bytes_to_str(b, encoding='ascii'): + return b + + range = xrange + zip = itertools.izip + filter = itertools.ifilter + map = itertools.imap + reduce = reduce + long = long + unichr = unichr + + # Python 2-builtin ranges produce lists + lrange = builtins.range + lzip = builtins.zip + lmap = builtins.map + lfilter = builtins.filter def iteritems(obj, **kwargs): @@ -24,13 +115,104 @@ def iteritems(obj, **kwargs): uses 'iteritems' if available and otherwise uses 'items'. Passes kwargs to method.""" - if hasattr(obj, "iteritems"): - return obj.iteritems(**kwargs) + func = getattr(obj, "iteritems", None) + if not func: + func = obj.items + return func(**kwargs) + + +def iterkeys(obj, **kwargs): + func = getattr(obj, "iterkeys", None) + if not func: + func = obj.keys + return func(**kwargs) + + +def itervalues(obj, **kwargs): + func = getattr(obj, "itervalues", None) + if not func: + func = obj.values + return func(**kwargs) + + +def bind_method(cls, name, func): + """Bind a method to class, python 2 and python 3 compatible. + + Parameters + ---------- + + cls : type + class to receive bound method + name : basestring + name of method on class instance + func : function + function to be bound as method + + + Returns + ------- + None + """ + # only python 2 has bound/unbound method issue + if not PY3: + setattr(cls, name, types.MethodType(func, None, cls)) else: - return obj.items(**kwargs) + setattr(cls, name, func) +# ---------------------------------------------------------------------------- +# functions largely based / taken from the six module + +# Much of the code in this module comes from Benjamin Peterson's six library. +# The license for this library can be found in LICENSES/SIX and the code can be +# found at https://bitbucket.org/gutworth/six + +if PY3: + string_types = str, + integer_types = int, + class_types = type, + text_type = str + binary_type = bytes + + def u(s): + return s +else: + string_types = basestring, + integer_types = (int, long) + class_types = (type, types.ClassType) + text_type = unicode + binary_type = str + + def u(s): + return unicode(s, "unicode_escape") + +try: + # callable reintroduced in later versions of Python + callable = callable +except NameError: + def callable(obj): + return any("__call__" in klass.__dict__ for klass in type(obj).__mro__) + +# ---------------------------------------------------------------------------- +# Python 2.6 compatibility shims +# + +# OrderedDict Shim from Raymond Hettinger, python core dev +# http://code.activestate.com/recipes/576693-ordered-dictionary-for-py24/ +# here to support versions before 2.6 +if not PY3: + # don't need this except in 2.6 + try: + from thread import get_ident as _get_ident + except ImportError: + from dummy_thread import get_ident as _get_ident + +try: + from _abcoll import KeysView, ValuesView, ItemsView +except ImportError: + pass class _OrderedDict(dict): + 'Dictionary that remembers insertion order' # An inherited dict maps keys to values. # The inherited dict provides __getitem__, __len__, __contains__, and get. @@ -38,10 +220,11 @@ class _OrderedDict(dict): # Big-O running times for all methods are the same as for regular # dictionaries. - # The internal self.__map dictionary maps keys to links in a doubly linked list. - # The circular doubly linked list starts and ends with a sentinel element. - # The sentinel element never gets deleted (this simplifies the algorithm). - # Each link is stored as a list of length three: [PREV, NEXT, KEY]. + # The internal self.__map dictionary maps keys to links in a doubly linked + # list. The circular doubly linked list starts and ends with a sentinel + # element. The sentinel element never gets deleted (this simplifies the + # algorithm). Each link is stored as a list of length three: [PREV, NEXT, + # KEY]. def __init__(self, *args, **kwds): '''Initialize an ordered dictionary. Signature is the same as for @@ -61,9 +244,9 @@ def __init__(self, *args, **kwds): def __setitem__(self, key, value, dict_setitem=dict.__setitem__): 'od.__setitem__(i, y) <==> od[i]=y' - # Setting a new item creates a new link which goes at the end of the linked - # list, and the inherited dictionary is updated with the new key/value - # pair. + # Setting a new item creates a new link which goes at the end of the + # linked list, and the inherited dictionary is updated with the new + # key/value pair. if key not in self: root = self.__root last = root[0] @@ -99,7 +282,7 @@ def __reversed__(self): def clear(self): 'od.clear() -> None. Remove all items from od.' try: - for node in six.itervalues(self.__map): + for node in itervalues(self.__map): del node[:] root = self.__root root[:] = [root, root, None] @@ -110,8 +293,8 @@ def clear(self): def popitem(self, last=True): '''od.popitem() -> (k, v), return and remove a (key, value) pair. - Pairs are returned in LIFO order if last is true or FIFO order if false. - + Pairs are returned in LIFO order if last is true or FIFO order if + false. ''' if not self: raise KeyError('dictionary is empty') @@ -162,11 +345,10 @@ def iteritems(self): def update(*args, **kwds): '''od.update(E, **F) -> None. Update od from dict/iterable E and F. - If E is a dict instance, does: for k in E: od[k] = E[k] - If E has a .keys() method, does: for k in E.keys(): od[k] = E[k] - Or if E is an iterable of items, does: for k, v in E: od[k] = v - In either case, this is followed by: for k, v in F.items(): od[k] = v - + If E is a dict instance, does: for k in E: od[k] = E[k] + If E has a .keys() method, does: for k in E.keys(): od[k] = E[k] + Or if E is an iterable of items, does:for k, v in E: od[k] = v + In either case, this is followed by: for k, v in F.items(): od[k] = v ''' if len(args) > 2: raise TypeError('update() takes at most 2 positional ' @@ -189,15 +371,15 @@ def update(*args, **kwds): self[key] = value for key, value in kwds.items(): self[key] = value - - __update = update # let subclasses override update without breaking __init__ + # let subclasses override update without breaking __init__ + __update = update __marker = object() def pop(self, key, default=__marker): - '''od.pop(k[,d]) -> v, remove specified key and return the corresponding value. - If key is not found, d is returned if given, otherwise KeyError is raised. - + '''od.pop(k[,d]) -> v, remove specified key and return the\ + corresponding value. If key is not found, d is returned if given, + otherwise KeyError is raised. ''' if key in self: result = self[key] @@ -243,9 +425,8 @@ def copy(self): @classmethod def fromkeys(cls, iterable, value=None): - '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S - and values equal to v (which defaults to None). - + '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S and + values equal to v (which defaults to None). ''' d = cls() for key in iterable: @@ -253,12 +434,13 @@ def fromkeys(cls, iterable, value=None): return d def __eq__(self, other): - '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive - while comparison to a regular mapping is order-insensitive. - + '''od.__eq__(y) <==> od==y. Comparison to another OD is + order-sensitive while comparison to a regular mapping is + order-insensitive. ''' if isinstance(other, OrderedDict): - return len(self) == len(other) and list(self.items()) == list(other.items()) + return (len(self) == len(other) and + list(self.items()) == list(other.items())) return dict.__eq__(self, other) def __ne__(self, other): @@ -279,7 +461,7 @@ def viewitems(self): return ItemsView(self) -## {{{ http://code.activestate.com/recipes/576611/ (r11) +# {{{ http://code.activestate.com/recipes/576611/ (r11) try: from operator import itemgetter @@ -289,6 +471,7 @@ def viewitems(self): class _Counter(dict): + '''Dict subclass for counting hashable objects. Sometimes called a bag or multiset. Elements are stored as dictionary keys and their counts are stored as dictionary values. @@ -303,10 +486,10 @@ def __init__(self, iterable=None, **kwds): from an input iterable. Or, initialize the count from another mapping of elements to their counts. - >>> c = Counter() # a new, empty counter - >>> c = Counter('gallahad') # a new counter from an iterable - >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping - >>> c = Counter(a=4, b=2) # a new counter from keyword args + >>> c = Counter() # a new, empty counter + >>> c = Counter('gallahad') # a new counter from an iterable + >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping + >>> c = Counter(a=4, b=2) # a new counter from keyword args ''' self.update(iterable, **kwds) @@ -382,7 +565,8 @@ def copy(self): return Counter(self) def __delitem__(self, elem): - 'Like dict.__delitem__() but does not raise KeyError for missing values.' + '''Like dict.__delitem__() but does not raise KeyError for missing + values.''' if elem in self: dict.__delitem__(self, elem) @@ -479,13 +663,15 @@ def __and__(self, other): # http://stackoverflow.com/questions/4126348 # Thanks to @martineau at SO + class OrderedDefaultdict(OrderedDict): + def __init__(self, *args, **kwargs): newdefault = None newargs = () if args: newdefault = args[0] - if not (newdefault is None or six.callable(newdefault)): + if not (newdefault is None or callable(newdefault)): raise TypeError('first argument must be callable or None') newargs = args[1:] self.default_factory = newdefault diff --git a/pandas/util/counter.py b/pandas/util/counter.py index 86200f5ed89ae..f074782466d9d 100644 --- a/pandas/util/counter.py +++ b/pandas/util/counter.py @@ -5,8 +5,7 @@ import heapq as _heapq from itertools import repeat as _repeat, chain as _chain, starmap as _starmap from operator import itemgetter as _itemgetter -import six -from pandas.util.py3compat import map +from pandas.util.compat import map try: from collections import Mapping diff --git a/pandas/util/decorators.py b/pandas/util/decorators.py index a5f4cc7e1077b..9711f360042dd 100644 --- a/pandas/util/decorators.py +++ b/pandas/util/decorators.py @@ -1,6 +1,5 @@ -from pandas.util.py3compat import StringIO +from pandas.util.compat import StringIO, callable from pandas.lib import cache_readonly -import six import sys import warnings @@ -164,7 +163,7 @@ def knownfailureif(fail_condition, msg=None): msg = 'Test skipped due to known failure' # Allow for both boolean or callable known failure conditions. - if six.callable(fail_condition): + if callable(fail_condition): fail_val = fail_condition else: fail_val = lambda: fail_condition diff --git a/pandas/util/py3compat.py b/pandas/util/py3compat.py deleted file mode 100644 index 9361bad20df32..0000000000000 --- a/pandas/util/py3compat.py +++ /dev/null @@ -1,72 +0,0 @@ -import sys - -PY3 = (sys.version_info[0] >= 3) -# import iterator versions of these functions -from six.moves import zip, filter, reduce, map - -try: - import __builtin__ as builtins - # not writeable when instantiated with string, doesn't handle unicode well - from cStringIO import StringIO as StringIO - # always writeable - from StringIO import StringIO - BytesIO = StringIO - import cPickle -except ImportError: - import builtins - from io import StringIO, BytesIO - cStringIO = StringIO - import pickle as cPickle - -if PY3: - def isidentifier(s): - return s.isidentifier() - - def str_to_bytes(s, encoding='ascii'): - return s.encode(encoding) - - def bytes_to_str(b, encoding='utf-8'): - return b.decode(encoding) - - # list-producing versions of the major Python iterating functions - def lrange(*args, **kwargs): - return list(range(*args, **kwargs)) - - def lzip(*args, **kwargs): - return list(zip(*args, **kwargs)) - - def lmap(*args, **kwargs): - return list(map(*args, **kwargs)) - - def lfilter(*args, **kwargs): - return list(filter(*args, **kwargs)) - - # need to put range in the namespace - range = range - long = int - unichr = chr -else: - # Python 2 - import re - _name_re = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*$") - - def isidentifier(s, dotted=False): - return bool(_name_re.match(s)) - - def str_to_bytes(s, encoding='ascii'): - return s - - def bytes_to_str(b, encoding='ascii'): - return b - - # Python 2-builtin ranges produce lists - lrange = builtins.range - lzip = builtins.zip - lmap = builtins.map - lfilter = builtins.filter - - # have to explicitly put builtins into the namespace - range = xrange - long = long - unichr = unichr - diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 6ee3ba3b730f7..469612698f2ae 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -2,8 +2,6 @@ # pylint: disable-msg=W0402 -from pandas.util.py3compat import range, unichr, lrange, lmap, lzip -from pandas.util.py3compat import zip import random import string import sys @@ -14,7 +12,7 @@ from datetime import datetime from functools import wraps -from contextlib import contextmanager, closing +from contextlib import contextmanager from distutils.version import LooseVersion from numpy.random import randn @@ -27,14 +25,15 @@ import pandas.core.panel as panel import pandas.core.panel4d as panel4d import pandas.util.compat as compat +from pandas.util.compat import( + map, zip, range, unichr, lrange, lmap, lzip, u, callable, Counter +) from pandas import bdate_range from pandas.tseries.index import DatetimeIndex from pandas.tseries.period import PeriodIndex from pandas.io.common import urlopen, HTTPException -import six -from pandas.util.py3compat import map Index = index.Index MultiIndex = index.MultiIndex @@ -54,7 +53,7 @@ def rands(n): def randu(n): - choices = six.u("").join(map(unichr, lrange(1488, 1488 + 26))) + choices = u("").join(map(unichr, lrange(1488, 1488 + 26))) choices += string.digits return ''.join([random.choice(choices) for _ in range(n)]) @@ -142,7 +141,7 @@ def assert_almost_equal(a, b, check_less_precise = False): if isinstance(a, dict) or isinstance(b, dict): return assert_dict_equal(a, b) - if isinstance(a, six.string_types): + if isinstance(a, compat.string_types): assert a == b, "%s != %s" % (a, b) return True @@ -446,7 +445,6 @@ def makeCustomIndex(nentries, nlevels, prefix='#', names=False, ndupe_l=None, if unspecified, string labels will be generated. """ - from pandas.util.compat import Counter if ndupe_l is None: ndupe_l = [1] * nlevels assert (_is_sequence(ndupe_l) and len(ndupe_l) <= nlevels) @@ -463,7 +461,7 @@ def makeCustomIndex(nentries, nlevels, prefix='#', names=False, ndupe_l=None, names = None # make singelton case uniform - if isinstance(names, six.string_types) and nlevels == 1: + if isinstance(names, compat.string_types) and nlevels == 1: names = [names] # specific 1D index type requested? @@ -694,7 +692,7 @@ def wrapper(*args, **kwargs): def dec(f): return decorator(f, *args, **kwargs) - is_decorating = not kwargs and len(args) == 1 and six.callable(args[0]) + is_decorating = not kwargs and len(args) == 1 and callable(args[0]) if is_decorating: f = args[0] args = [] diff --git a/scripts/bench_join.py b/scripts/bench_join.py index a3bd4157ac163..e82d9cee60f12 100644 --- a/scripts/bench_join.py +++ b/scripts/bench_join.py @@ -1,4 +1,4 @@ -from pandas.util.py3compat import range, lrange +from pandas.util.compat import range, lrange import numpy as np import pandas.lib as lib from pandas import * diff --git a/scripts/bench_join_multi.py b/scripts/bench_join_multi.py index 818ac300956ee..7e67eeb429893 100644 --- a/scripts/bench_join_multi.py +++ b/scripts/bench_join_multi.py @@ -1,9 +1,8 @@ from pandas import * import numpy as np -from pandas.util.py3compat import zip +from pandas.util.compat import zip, range, lzip from pandas.util.testing import rands -from pandas.util.py3compat import range, lzip import pandas.lib as lib N = 100000 diff --git a/scripts/bench_refactor.py b/scripts/bench_refactor.py index 812c42b0ee30c..9ec57633fa865 100644 --- a/scripts/bench_refactor.py +++ b/scripts/bench_refactor.py @@ -1,5 +1,5 @@ from pandas import * -from pandas.util.py3compat import range +from pandas.util.compat import range try: import pandas.core.internals as internals reload(internals) diff --git a/scripts/find_commits_touching_func.py b/scripts/find_commits_touching_func.py index a4c76671d71c2..29a9c780c2e77 100755 --- a/scripts/find_commits_touching_func.py +++ b/scripts/find_commits_touching_func.py @@ -4,9 +4,7 @@ # copryright 2013, y-p @ github from __future__ import print_function -from pandas.util.py3compat import range, lrange -import six -from pandas.util.py3compat import map +from pandas.util.compat import range, lrange, map """Search the git history for all commits touching a named method @@ -96,7 +94,7 @@ def get_hits(defname,files=()): def get_commit_info(c,fmt,sep='\t'): r=sh.git('log', "--format={}".format(fmt), '{}^..{}'.format(c,c),"-n","1",_tty_out=False) - return six.text_type(r).split(sep) + return compat.text_type(r).split(sep) def get_commit_vitals(c,hlen=HASH_LEN): h,s,d= get_commit_info(c,'%H\t%s\t%ci',"\t") @@ -185,11 +183,11 @@ def main(): !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! """) return - if isinstance(args.file_masks,six.string_types): + if isinstance(args.file_masks,compat.string_types): args.file_masks = args.file_masks.split(',') - if isinstance(args.path_masks,six.string_types): + if isinstance(args.path_masks,compat.string_types): args.path_masks = args.path_masks.split(',') - if isinstance(args.dir_masks,six.string_types): + if isinstance(args.dir_masks,compat.string_types): args.dir_masks = args.dir_masks.split(',') logger.setLevel(getattr(logging,args.debug_level)) diff --git a/scripts/groupby_sample.py b/scripts/groupby_sample.py index af422bd4bab14..a5e7dc60d607e 100644 --- a/scripts/groupby_sample.py +++ b/scripts/groupby_sample.py @@ -1,7 +1,6 @@ from pandas import * import numpy as np import string -import six import pandas.util.compat as compat g1 = np.array(list(string.letters))[:-1] @@ -45,7 +44,7 @@ def do_shuffle(arr): def shuffle_uri(df, grouped): perm = np.r_[tuple([np.random.permutation( - idxs) for idxs in six.itervalues(grouped.groups)])] + idxs) for idxs in compat.itervalues(grouped.groups)])] df['state_permuted'] = np.asarray(df.ix[perm]['value']) df2 = df.copy() diff --git a/scripts/groupby_test.py b/scripts/groupby_test.py index 6dbf1b073b6ec..b6f9152afccaa 100644 --- a/scripts/groupby_test.py +++ b/scripts/groupby_test.py @@ -8,7 +8,7 @@ import pandas.lib as tseries import pandas.core.groupby as gp import pandas.util.testing as tm -from pandas.util.py3compat import range +from pandas.util.compat import range reload(gp) """ diff --git a/scripts/hdfstore_panel_perf.py b/scripts/hdfstore_panel_perf.py index 18668d729bfff..d530a02debcbb 100644 --- a/scripts/hdfstore_panel_perf.py +++ b/scripts/hdfstore_panel_perf.py @@ -1,6 +1,6 @@ from pandas import * from pandas.util.testing import rands -from pandas.util.py3compat import range +from pandas.util.compat import range i, j, k = 7, 771, 5532 diff --git a/scripts/json_manip.py b/scripts/json_manip.py index 7bea33055f535..3ad0edac2bbd6 100644 --- a/scripts/json_manip.py +++ b/scripts/json_manip.py @@ -67,15 +67,14 @@ """ from __future__ import print_function -from collections import Counter, namedtuple +from collections import namedtuple import csv import itertools from itertools import product from operator import attrgetter as aget, itemgetter as iget import operator import sys -import six -from pandas.util.py3compat import map +from pandas.util.compat import map, u, callable, Counter import pandas.util.compat as compat @@ -92,77 +91,77 @@ } ## much longer example -ex2 = {six.u('metadata'): {six.u('accessibilities'): [{six.u('name'): six.u('accessibility.tabfocus'), - six.u('value'): 7}, - {six.u('name'): six.u('accessibility.mouse_focuses_formcontrol'), six.u('value'): False}, - {six.u('name'): six.u('accessibility.browsewithcaret'), six.u('value'): False}, - {six.u('name'): six.u('accessibility.win32.force_disabled'), six.u('value'): False}, - {six.u('name'): six.u('accessibility.typeaheadfind.startlinksonly'), six.u('value'): False}, - {six.u('name'): six.u('accessibility.usebrailledisplay'), six.u('value'): six.u('')}, - {six.u('name'): six.u('accessibility.typeaheadfind.timeout'), six.u('value'): 5000}, - {six.u('name'): six.u('accessibility.typeaheadfind.enabletimeout'), six.u('value'): True}, - {six.u('name'): six.u('accessibility.tabfocus_applies_to_xul'), six.u('value'): False}, - {six.u('name'): six.u('accessibility.typeaheadfind.flashBar'), six.u('value'): 1}, - {six.u('name'): six.u('accessibility.typeaheadfind.autostart'), six.u('value'): True}, - {six.u('name'): six.u('accessibility.blockautorefresh'), six.u('value'): False}, - {six.u('name'): six.u('accessibility.browsewithcaret_shortcut.enabled'), - six.u('value'): True}, - {six.u('name'): six.u('accessibility.typeaheadfind.enablesound'), six.u('value'): True}, - {six.u('name'): six.u('accessibility.typeaheadfind.prefillwithselection'), - six.u('value'): True}, - {six.u('name'): six.u('accessibility.typeaheadfind.soundURL'), six.u('value'): six.u('beep')}, - {six.u('name'): six.u('accessibility.typeaheadfind'), six.u('value'): False}, - {six.u('name'): six.u('accessibility.typeaheadfind.casesensitive'), six.u('value'): 0}, - {six.u('name'): six.u('accessibility.warn_on_browsewithcaret'), six.u('value'): True}, - {six.u('name'): six.u('accessibility.usetexttospeech'), six.u('value'): six.u('')}, - {six.u('name'): six.u('accessibility.accesskeycausesactivation'), six.u('value'): True}, - {six.u('name'): six.u('accessibility.typeaheadfind.linksonly'), six.u('value'): False}, - {six.u('name'): six.u('isInstantiated'), six.u('value'): True}], - six.u('extensions'): [{six.u('id'): six.u('216ee7f7f4a5b8175374cd62150664efe2433a31'), - six.u('isEnabled'): True}, - {six.u('id'): six.u('1aa53d3b720800c43c4ced5740a6e82bb0b3813e'), six.u('isEnabled'): False}, - {six.u('id'): six.u('01ecfac5a7bd8c9e27b7c5499e71c2d285084b37'), six.u('isEnabled'): True}, - {six.u('id'): six.u('1c01f5b22371b70b312ace94785f7b0b87c3dfb2'), six.u('isEnabled'): True}, - {six.u('id'): six.u('fb723781a2385055f7d024788b75e959ad8ea8c3'), six.u('isEnabled'): True}], - six.u('fxVersion'): six.u('9.0'), - six.u('location'): six.u('zh-CN'), - six.u('operatingSystem'): six.u('WINNT Windows NT 5.1'), - six.u('surveyAnswers'): six.u(''), - six.u('task_guid'): six.u('d69fbd15-2517-45b5-8a17-bb7354122a75'), - six.u('tpVersion'): six.u('1.2'), - six.u('updateChannel'): six.u('beta')}, - six.u('survey_data'): { - six.u('extensions'): [{six.u('appDisabled'): False, - six.u('id'): six.u('testpilot?labs.mozilla.com'), - six.u('isCompatible'): True, - six.u('isEnabled'): True, - six.u('isPlatformCompatible'): True, - six.u('name'): six.u('Test Pilot')}, - {six.u('appDisabled'): True, - six.u('id'): six.u('dict?www.youdao.com'), - six.u('isCompatible'): False, - six.u('isEnabled'): False, - six.u('isPlatformCompatible'): True, - six.u('name'): six.u('Youdao Word Capturer')}, - {six.u('appDisabled'): False, - six.u('id'): six.u('jqs?sun.com'), - six.u('isCompatible'): True, - six.u('isEnabled'): True, - six.u('isPlatformCompatible'): True, - six.u('name'): six.u('Java Quick Starter')}, - {six.u('appDisabled'): False, - six.u('id'): six.u('?20a82645-c095-46ed-80e3-08825760534b?'), - six.u('isCompatible'): True, - six.u('isEnabled'): True, - six.u('isPlatformCompatible'): True, - six.u('name'): six.u('Microsoft .NET Framework Assistant')}, - {six.u('appDisabled'): False, - six.u('id'): six.u('?a0d7ccb3-214d-498b-b4aa-0e8fda9a7bf7?'), - six.u('isCompatible'): True, - six.u('isEnabled'): True, - six.u('isPlatformCompatible'): True, - six.u('name'): six.u('WOT')}], - six.u('version_number'): 1}} +ex2 = {u('metadata'): {u('accessibilities'): [{u('name'): u('accessibility.tabfocus'), + u('value'): 7}, + {u('name'): u('accessibility.mouse_focuses_formcontrol'), u('value'): False}, + {u('name'): u('accessibility.browsewithcaret'), u('value'): False}, + {u('name'): u('accessibility.win32.force_disabled'), u('value'): False}, + {u('name'): u('accessibility.typeaheadfind.startlinksonly'), u('value'): False}, + {u('name'): u('accessibility.usebrailledisplay'), u('value'): u('')}, + {u('name'): u('accessibility.typeaheadfind.timeout'), u('value'): 5000}, + {u('name'): u('accessibility.typeaheadfind.enabletimeout'), u('value'): True}, + {u('name'): u('accessibility.tabfocus_applies_to_xul'), u('value'): False}, + {u('name'): u('accessibility.typeaheadfind.flashBar'), u('value'): 1}, + {u('name'): u('accessibility.typeaheadfind.autostart'), u('value'): True}, + {u('name'): u('accessibility.blockautorefresh'), u('value'): False}, + {u('name'): u('accessibility.browsewithcaret_shortcut.enabled'), + u('value'): True}, + {u('name'): u('accessibility.typeaheadfind.enablesound'), u('value'): True}, + {u('name'): u('accessibility.typeaheadfind.prefillwithselection'), + u('value'): True}, + {u('name'): u('accessibility.typeaheadfind.soundURL'), u('value'): u('beep')}, + {u('name'): u('accessibility.typeaheadfind'), u('value'): False}, + {u('name'): u('accessibility.typeaheadfind.casesensitive'), u('value'): 0}, + {u('name'): u('accessibility.warn_on_browsewithcaret'), u('value'): True}, + {u('name'): u('accessibility.usetexttospeech'), u('value'): u('')}, + {u('name'): u('accessibility.accesskeycausesactivation'), u('value'): True}, + {u('name'): u('accessibility.typeaheadfind.linksonly'), u('value'): False}, + {u('name'): u('isInstantiated'), u('value'): True}], + u('extensions'): [{u('id'): u('216ee7f7f4a5b8175374cd62150664efe2433a31'), + u('isEnabled'): True}, + {u('id'): u('1aa53d3b720800c43c4ced5740a6e82bb0b3813e'), u('isEnabled'): False}, + {u('id'): u('01ecfac5a7bd8c9e27b7c5499e71c2d285084b37'), u('isEnabled'): True}, + {u('id'): u('1c01f5b22371b70b312ace94785f7b0b87c3dfb2'), u('isEnabled'): True}, + {u('id'): u('fb723781a2385055f7d024788b75e959ad8ea8c3'), u('isEnabled'): True}], + u('fxVersion'): u('9.0'), + u('location'): u('zh-CN'), + u('operatingSystem'): u('WINNT Windows NT 5.1'), + u('surveyAnswers'): u(''), + u('task_guid'): u('d69fbd15-2517-45b5-8a17-bb7354122a75'), + u('tpVersion'): u('1.2'), + u('updateChannel'): u('beta')}, + u('survey_data'): { + u('extensions'): [{u('appDisabled'): False, + u('id'): u('testpilot?labs.mozilla.com'), + u('isCompatible'): True, + u('isEnabled'): True, + u('isPlatformCompatible'): True, + u('name'): u('Test Pilot')}, + {u('appDisabled'): True, + u('id'): u('dict?www.youdao.com'), + u('isCompatible'): False, + u('isEnabled'): False, + u('isPlatformCompatible'): True, + u('name'): u('Youdao Word Capturer')}, + {u('appDisabled'): False, + u('id'): u('jqs?sun.com'), + u('isCompatible'): True, + u('isEnabled'): True, + u('isPlatformCompatible'): True, + u('name'): u('Java Quick Starter')}, + {u('appDisabled'): False, + u('id'): u('?20a82645-c095-46ed-80e3-08825760534b?'), + u('isCompatible'): True, + u('isEnabled'): True, + u('isPlatformCompatible'): True, + u('name'): u('Microsoft .NET Framework Assistant')}, + {u('appDisabled'): False, + u('id'): u('?a0d7ccb3-214d-498b-b4aa-0e8fda9a7bf7?'), + u('isCompatible'): True, + u('isEnabled'): True, + u('isPlatformCompatible'): True, + u('name'): u('WOT')}], + u('version_number'): 1}} # class SurveyResult(object): @@ -274,7 +273,7 @@ def flatten(*stack): except StopIteration: stack.pop(0) continue - if hasattr(x,'next') and six.callable(getattr(x,'next')): + if hasattr(x,'next') and callable(getattr(x,'next')): stack.insert(0, x) #if isinstance(x, (GeneratorType,listerator)): diff --git a/scripts/leak.py b/scripts/leak.py index 5b81a3dfcedcf..3416213cd9668 100644 --- a/scripts/leak.py +++ b/scripts/leak.py @@ -1,5 +1,5 @@ from pandas import * -from pandas.util.py3compat import range +from pandas.util.compat import range import numpy as np import pandas.util.testing as tm import os diff --git a/scripts/roll_median_leak.py b/scripts/roll_median_leak.py index 6dbb1a74d91f7..cd3feb60e2a3c 100644 --- a/scripts/roll_median_leak.py +++ b/scripts/roll_median_leak.py @@ -6,7 +6,7 @@ from vbench.api import Benchmark from pandas.util.testing import rands -from pandas.util.py3compat import range +from pandas.util.compat import range import pandas.lib as lib import pandas._sandbox as sbx import time diff --git a/scripts/testmed.py b/scripts/testmed.py index c90734912140b..c3724af270ef9 100644 --- a/scripts/testmed.py +++ b/scripts/testmed.py @@ -2,7 +2,7 @@ from random import random from math import log, ceil -from pandas.util.py3compat import range +from pandas.util.compat import range from numpy.random import randn from pandas.lib.skiplist import rolling_median diff --git a/vb_suite/groupby.py b/vb_suite/groupby.py index 748b101b144cf..88aac24986805 100644 --- a/vb_suite/groupby.py +++ b/vb_suite/groupby.py @@ -1,6 +1,6 @@ from vbench.api import Benchmark from datetime import datetime -from pandas.util.py3compat import map +from pandas.util.compat import map common_setup = """from pandas_vb_common import * """ diff --git a/vb_suite/indexing.py b/vb_suite/indexing.py index 8a56ef8ff2bcc..03961821bd14f 100644 --- a/vb_suite/indexing.py +++ b/vb_suite/indexing.py @@ -106,7 +106,7 @@ start_date=datetime(2012, 1, 1)) setup = common_setup + """ -from pandas.util.py3compat import range +from pandas.util.compat import range import pandas.core.expressions as expr df = DataFrame(np.random.randn(50000, 100)) df2 = DataFrame(np.random.randn(50000, 100)) diff --git a/vb_suite/pandas_vb_common.py b/vb_suite/pandas_vb_common.py index 8206c3554545f..77d0e2e27260e 100644 --- a/vb_suite/pandas_vb_common.py +++ b/vb_suite/pandas_vb_common.py @@ -4,7 +4,6 @@ from numpy.random import randn from numpy.random import randint from numpy.random import permutation -import pandas.util.compat as compat import pandas.util.testing as tm import random import numpy as np @@ -24,9 +23,3 @@ from pandas.core.index import MultiIndex except ImportError: pass -try: - # if no range in py3compat, then don't import zip or map either - from pandas.util.py3compat import range - from pandas.util.py3compat import zip, map -except ImportError: - pass diff --git a/vb_suite/parser.py b/vb_suite/parser.py index caae86afdc061..1d5f809f1561f 100644 --- a/vb_suite/parser.py +++ b/vb_suite/parser.py @@ -44,7 +44,7 @@ start_date=datetime(2011, 11, 1)) setup = common_setup + """ -from pandas.util.py3compat import cStringIO as StringIO +from pandas.util.compat import cStringIO as StringIO import os N = 10000 K = 8 @@ -63,7 +63,7 @@ read_table_multiple_date = Benchmark(cmd, setup, start_date=sdate) setup = common_setup + """ -from pandas.util.py3compat import cStringIO as StringIO +from pandas.util.compat import cStringIO as StringIO import os N = 10000 K = 8 diff --git a/vb_suite/source/conf.py b/vb_suite/source/conf.py index 2b5753a03d378..ac24d1d039ec5 100644 --- a/vb_suite/source/conf.py +++ b/vb_suite/source/conf.py @@ -12,7 +12,8 @@ import sys import os -import six + +from pandas/util.compat import u # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the @@ -50,8 +51,8 @@ master_doc = 'index' # General information about the project. -project = six.u('pandas') -copyright = six.u('2008-2011, the pandas development team') +project = u('pandas') +copyright = u('2008-2011, the pandas development team') # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the @@ -198,8 +199,8 @@ # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ ('index', 'performance.tex', - six.u('pandas vbench Performance Benchmarks'), - six.u('Wes McKinney'), 'manual'), + u('pandas vbench Performance Benchmarks'), + u('Wes McKinney'), 'manual'), ] # The name of an image file (relative to this directory) to place at the top of diff --git a/vb_suite/test_perf.py b/vb_suite/test_perf.py index 5101cf7f911f0..095eb04ec294d 100755 --- a/vb_suite/test_perf.py +++ b/vb_suite/test_perf.py @@ -27,8 +27,7 @@ """ from __future__ import print_function -from pandas.util.py3compat import range -from pandas.util.py3compat import map +from pandas.util.compat import range, lmap import shutil import os import sys @@ -140,7 +139,7 @@ def get_results_df(db, rev): """Takes a git commit hash and returns a Dataframe of benchmark results """ bench = DataFrame(db.get_benchmarks()) - results = DataFrame(map(list,db.get_rev_results(rev).values())) + results = DataFrame(lmap(list,db.get_rev_results(rev).values())) # Sinch vbench.db._reg_rev_results returns an unlabeled dict, # we have to break encapsulation a bit. From e73c54d0f079bb5b8a264d3d0d972c2b4f453c71 Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Sun, 28 Jul 2013 04:19:22 -0400 Subject: [PATCH 08/11] DOC/ENH: Deprecate iterkv + add docs --- doc/source/release.rst | 15 +++++++++++++++ doc/source/v0.13.0.txt | 16 +++++++++++++++- pandas/core/frame.py | 19 +++---------------- pandas/core/generic.py | 11 ++++++++--- pandas/core/panel.py | 4 ---- pandas/core/series.py | 7 ++++++- pandas/tests/test_frame.py | 4 ++++ 7 files changed, 51 insertions(+), 25 deletions(-) diff --git a/doc/source/release.rst b/doc/source/release.rst index fdcd0863d9f59..abf7cb1c5c9e9 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -47,6 +47,21 @@ pandas 0.13 **API Changes** + - ``pandas`` now is Python 2/3 compatible without the need for 2to3 thanks to + @jtratner. As a result, pandas now uses iterators more extensively. This + also led to the introduction of substantive parts of the Benjamin + Peterson's ``six`` library into compat. (:issue:`4384`, :issue:`4375`, + :issue:`4372`) + - ``pandas.util.py3compat`` has been merged into ``pandas.util.compat`` and + removed from pandas. It contains both list and iterator versions of range, + filter, map and zip, plus other necessary elements for Python 3 + compatibility. ``lmap``, ``lzip``, ``lrange`` and ``lfilter`` all produce + lists instead of iterators, for compatibility with ``numpy``, subscripting + and ``pandas`` constructors.(:issue:`4384`, :issue:`4375`, :issue:`4372`) + - deprecated ``iterkv``, which will be removed in a future release (was just + an alias of iteritems used to get around ``2to3``'s changes). + (:issue:`4384`, :issue:`4375`, :issue:`4372`) + **Experimental Features** **Bug Fixes** diff --git a/doc/source/v0.13.0.txt b/doc/source/v0.13.0.txt index 1264f649ace21..5aee66e34e46d 100644 --- a/doc/source/v0.13.0.txt +++ b/doc/source/v0.13.0.txt @@ -12,8 +12,22 @@ API changes - ``read_excel`` now supports an integer in its ``sheetname`` argument giving the index of the sheet to read in (:issue:`4301`). - Text parser now treats anything that reads like inf ("inf", "Inf", "-Inf", - "iNf", etc.) to infinity. (:issue:`4220`, :issue:`4219`), affecting + "iNf", etc.) as infinity. (:issue:`4220`, :issue:`4219`), affecting ``read_table``, ``read_csv``, etc. + - ``pandas`` now is Python 2/3 compatible without the need for 2to3 thanks to + @jtratner. As a result, pandas now uses iterators more extensively. This + also led to the introduction of substantive parts of the Benjamin + Peterson's ``six`` library into compat. (:issue:`4384`, :issue:`4375`, + :issue:`4372`) + - ``pandas.util.py3compat`` has been merged into ``pandas.util.compat`` and + removed from pandas. It contains both list and iterator versions of range, + filter, map and zip, plus other necessary elements for Python 3 + compatibility. ``lmap``, ``lzip``, ``lrange`` and ``lfilter`` all produce + lists instead of iterators, for compatibility with ``numpy``, subscripting + and ``pandas`` constructors.(:issue:`4384`, :issue:`4375`, :issue:`4372`) + - deprecated ``iterkv``, which will be removed in a future release (was just + an alias of iteritems used to get around ``2to3``'s changes). + (:issue:`4384`, :issue:`4375`, :issue:`4372`) Enhancements ~~~~~~~~~~~~ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 080abe8b00bc4..b947d61abbd93 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -17,6 +17,7 @@ import operator import sys import collections +import warnings from numpy import nan as NA import numpy as np @@ -564,14 +565,12 @@ def _wrap_array(self, arr, axes, copy=False): @property def _verbose_info(self): - import warnings warnings.warn('The _verbose_info property will be removed in version ' '0.13. please use "max_info_rows"', FutureWarning) return get_option('display.max_info_rows') is None @_verbose_info.setter def _verbose_info(self, value): - import warnings warnings.warn('The _verbose_info property will be removed in version ' '0.13. please use "max_info_rows"', FutureWarning) @@ -786,7 +785,6 @@ def itertuples(self, index=True): arrays.extend(self.iloc[:, k] for k in range(len(self.columns))) return zip(*arrays) - iterkv = iteritems if compat.PY3: # pragma: no cover items = iteritems @@ -976,7 +974,6 @@ def to_dict(self, outtype='dict'): ------- result : dict like {column -> {index -> value}} """ - import warnings if not self.columns.is_unique: warnings.warn("DataFrame columns are not unique, some " "columns will be omitted.", UserWarning) @@ -1391,7 +1388,6 @@ def to_csv(self, path_or_buf, sep=",", na_rep='', float_format=None, or new (expanded format) if False) """ if nanRep is not None: # pragma: no cover - import warnings warnings.warn("nanRep is deprecated, use na_rep", FutureWarning) na_rep = nanRep @@ -1527,7 +1523,6 @@ def to_string(self, buf=None, columns=None, col_space=None, colSpace=None, """ Render a DataFrame to a console-friendly tabular output. """ - import warnings if force_unicode is not None: # pragma: no cover warnings.warn("force_unicode is deprecated, it will have no " "effect", FutureWarning) @@ -1576,7 +1571,6 @@ def to_html(self, buf=None, columns=None, col_space=None, colSpace=None, Render a DataFrame as an HTML table. """ - import warnings if force_unicode is not None: # pragma: no cover warnings.warn("force_unicode is deprecated, it will have no " "effect", FutureWarning) @@ -1615,7 +1609,6 @@ def to_latex(self, buf=None, columns=None, col_space=None, colSpace=None, You can splice this into a LaTeX document. """ - import warnings if force_unicode is not None: # pragma: no cover warnings.warn("force_unicode is deprecated, it will have no " "effect", FutureWarning) @@ -2014,7 +2007,6 @@ def _getitem_array(self, key): # go with the __setitem__ behavior since that is more consistent # with all other indexing behavior if isinstance(key, Series) and not key.index.equals(self.index): - import warnings warnings.warn("Boolean Series key will be reindexed to match " "DataFrame index.", UserWarning) elif len(key) != len(self.index): @@ -3194,7 +3186,6 @@ def sort(self, columns=None, column=None, axis=0, ascending=True, sorted : DataFrame """ if column is not None: # pragma: no cover - import warnings warnings.warn("column is deprecated, use columns", FutureWarning) columns = column return self.sort_index(by=columns, axis=axis, ascending=ascending, @@ -3576,13 +3567,11 @@ def replace(self, to_replace=None, value=None, inplace=False, limit=None, raise AssertionError("'to_replace' must be 'None' if 'regex' is " "not a bool") if method is not None: - from warnings import warn - warn('the "method" argument is deprecated and will be removed in' + warnings.warn('the "method" argument is deprecated and will be removed in' 'v0.13; this argument has no effect') if axis is not None: - from warnings import warn - warn('the "axis" argument is deprecated and will be removed in' + warnings.warn('the "axis" argument is deprecated and will be removed in' 'v0.13; this argument has no effect') self._consolidate_inplace() @@ -3717,7 +3706,6 @@ def interpolate(self, to_replace, method='pad', axis=0, inplace=False, -------- reindex, replace, fillna """ - from warnings import warn warn('DataFrame.interpolate will be removed in v0.13, please use ' 'either DataFrame.fillna or DataFrame.replace instead', FutureWarning) @@ -3867,7 +3855,6 @@ def _combine_series_infer(self, other, func, fill_value=None): # teeny hack because one does DataFrame + TimeSeries all the time if self.index.is_all_dates and other.index.is_all_dates: - import warnings warnings.warn(("TimeSeries broadcasting along DataFrame index " "by default is deprecated. Please use " "DataFrame. to explicitly broadcast arithmetic " diff --git a/pandas/core/generic.py b/pandas/core/generic.py index c4be0fc3527bc..8bea809d957f8 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1,5 +1,5 @@ # pylint: disable=W0231,E1101 - +import warnings from pandas.util import compat import numpy as np import pandas.lib as lib @@ -40,13 +40,11 @@ def to_pickle(self, path): return to_pickle(self, path) def save(self, path): # TODO remove in 0.13 - import warnings from pandas.io.pickle import to_pickle warnings.warn("save is deprecated, use to_pickle", FutureWarning) return to_pickle(self, path) def load(self, path): # TODO remove in 0.13 - import warnings from pandas.io.pickle import read_pickle warnings.warn("load is deprecated, use pd.read_pickle", FutureWarning) return read_pickle(path) @@ -717,6 +715,13 @@ def __delitem__(self, key): except KeyError: pass + # originally used to get around 2to3's changes to iteritems. + # Now unnecessary. + def iterkv(self, *args, **kwargs): + warnings.warn("iterkv is deprecated and will be removed in a future " + "release, use ``iteritems`` instead.", DeprecationWarning) + return self.iteritems(*args, **kwargs) + def get_dtype_counts(self): """ return the counts of dtypes in this frame """ from pandas import Series diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 29a8ecf940cea..05a4a4b4fcedf 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -496,10 +496,6 @@ def iteritems(self): for h in getattr(self, self._info_axis): yield h, self[h] - # Name that won't get automatically converted to items by 2to3. items is - # already in use for the first axis. - iterkv = iteritems - def _get_plane_axes(self, axis): """ Get my plane axes: these are already diff --git a/pandas/core/series.py b/pandas/core/series.py index 294898bbd4554..21853f28375b0 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -9,6 +9,7 @@ import operator from distutils.version import LooseVersion import types +import warnings from numpy import nan, ndarray import numpy as np @@ -1219,7 +1220,11 @@ def iteritems(self): """ return lzip(iter(self.index), iter(self)) - iterkv = iteritems + def iterkv(self): + warnings.warn("iterkv is deprecated and will be removed in a future " + "release. Use ``iteritems`` instead", DeprecationWarning) + return self.iteritems() + if compat.PY3: # pragma: no cover items = iteritems diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 25397c09cb135..39a62ddc3922b 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -10305,6 +10305,10 @@ def test_take(self): expected = df.ix[:, ['B', 'C', 'A', 'D']] assert_frame_equal(result, expected) + def test_iterkv_deprecation(self): + with tm.assert_produces_warning(DeprecationWarning): + self.mixed_float.iterkv() + def test_iterkv_names(self): for k, v in compat.iteritems(self.mixed_frame): self.assertEqual(v.name, k) From 27b601e81e45845555f7f565cf5670fce499af3a Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Sun, 28 Jul 2013 17:03:57 -0400 Subject: [PATCH 09/11] CLN: pandas.util.compat --> pandas.compat --- bench/alignment.py | 2 +- bench/bench_get_put_value.py | 2 +- bench/bench_groupby.py | 2 +- bench/bench_khash_dict.py | 2 +- bench/bench_merge.py | 2 +- bench/bench_merge_sqlite.py | 2 +- bench/bench_sparse.py | 2 +- bench/bench_take_indexing.py | 2 +- bench/bench_unique.py | 2 +- bench/better_unique.py | 4 +- bench/io_roundtrip.py | 2 +- bench/serialize.py | 2 +- bench/test.py | 2 +- doc/plots/stats/moment_plots.py | 2 +- doc/source/conf.py | 2 +- doc/source/release.rst | 5 +- doc/source/v0.13.0.txt | 5 +- doc/sphinxext/comment_eater.py | 2 +- doc/sphinxext/compiler_unparse.py | 2 +- doc/sphinxext/docscrape.py | 2 +- doc/sphinxext/docscrape_sphinx.py | 2 +- doc/sphinxext/ipython_directive.py | 2 +- doc/sphinxext/numpydoc.py | 2 +- doc/sphinxext/plot_directive.py | 2 +- doc/sphinxext/tests/test_docscrape.py | 2 +- doc/sphinxext/traitsdoc.py | 2 +- examples/finance.py | 2 +- pandas/compat/__init__.py | 688 +++++++++++++++++++++ pandas/compat/scipy.py | 2 +- pandas/core/algorithms.py | 2 +- pandas/core/base.py | 2 +- pandas/core/common.py | 4 +- pandas/core/config.py | 4 +- pandas/core/format.py | 6 +- pandas/core/frame.py | 6 +- pandas/core/generic.py | 4 +- pandas/core/groupby.py | 6 +- pandas/core/index.py | 4 +- pandas/core/indexing.py | 4 +- pandas/core/internals.py | 4 +- pandas/core/nanops.py | 4 +- pandas/core/panel.py | 6 +- pandas/core/panelnd.py | 4 +- pandas/core/reshape.py | 4 +- pandas/core/series.py | 6 +- pandas/core/strings.py | 4 +- pandas/io/clipboard.py | 2 +- pandas/io/common.py | 4 +- pandas/io/data.py | 4 +- pandas/io/date_converters.py | 2 +- pandas/io/excel.py | 4 +- pandas/io/ga.py | 4 +- pandas/io/html.py | 4 +- pandas/io/json.py | 4 +- pandas/io/parsers.py | 6 +- pandas/io/pickle.py | 2 +- pandas/io/pytables.py | 6 +- pandas/io/sql.py | 4 +- pandas/io/stata.py | 6 +- pandas/io/tests/generate_legacy_pickles.py | 2 +- pandas/io/tests/test_cparser.py | 6 +- pandas/io/tests/test_data.py | 2 +- pandas/io/tests/test_date_converters.py | 4 +- pandas/io/tests/test_excel.py | 4 +- pandas/io/tests/test_html.py | 4 +- pandas/io/tests/test_json/test_pandas.py | 4 +- pandas/io/tests/test_json/test_ujson.py | 6 +- pandas/io/tests/test_parsers.py | 4 +- pandas/io/tests/test_pickle.py | 2 +- pandas/io/tests/test_pytables.py | 4 +- pandas/io/tests/test_sql.py | 4 +- pandas/io/wb.py | 2 +- pandas/rpy/common.py | 2 +- pandas/sparse/array.py | 2 +- pandas/sparse/frame.py | 4 +- pandas/sparse/panel.py | 4 +- pandas/sparse/series.py | 2 +- pandas/sparse/tests/test_array.py | 2 +- pandas/sparse/tests/test_list.py | 2 +- pandas/sparse/tests/test_sparse.py | 4 +- pandas/src/generate_code.py | 2 +- pandas/stats/fama_macbeth.py | 2 +- pandas/stats/math.py | 2 +- pandas/stats/misc.py | 4 +- pandas/stats/ols.py | 4 +- pandas/stats/plm.py | 4 +- pandas/stats/tests/test_fama_macbeth.py | 4 +- pandas/stats/tests/test_moments.py | 2 +- pandas/stats/tests/test_ols.py | 4 +- pandas/stats/tests/test_var.py | 2 +- pandas/stats/var.py | 4 +- pandas/tests/test_algos.py | 2 +- pandas/tests/test_categorical.py | 2 +- pandas/tests/test_common.py | 4 +- pandas/tests/test_compat.py | 2 +- pandas/tests/test_expressions.py | 2 +- pandas/tests/test_format.py | 4 +- pandas/tests/test_frame.py | 4 +- pandas/tests/test_graphics.py | 2 +- pandas/tests/test_groupby.py | 4 +- pandas/tests/test_index.py | 4 +- pandas/tests/test_indexing.py | 4 +- pandas/tests/test_internals.py | 2 +- pandas/tests/test_multilevel.py | 2 +- pandas/tests/test_panel.py | 6 +- pandas/tests/test_panel4d.py | 6 +- pandas/tests/test_panelnd.py | 2 +- pandas/tests/test_reshape.py | 2 +- pandas/tests/test_rplot.py | 2 +- pandas/tests/test_series.py | 4 +- pandas/tests/test_stats.py | 4 +- pandas/tests/test_strings.py | 4 +- pandas/tests/test_tseries.py | 2 +- pandas/tools/merge.py | 4 +- pandas/tools/pivot.py | 4 +- pandas/tools/plotting.py | 4 +- pandas/tools/rplot.py | 2 +- pandas/tools/tests/test_merge.py | 4 +- pandas/tools/tests/test_pivot.py | 2 +- pandas/tools/tests/test_tile.py | 2 +- pandas/tools/tile.py | 2 +- pandas/tools/util.py | 2 +- pandas/tseries/converter.py | 4 +- pandas/tseries/frequencies.py | 4 +- pandas/tseries/index.py | 4 +- pandas/tseries/offsets.py | 4 +- pandas/tseries/period.py | 4 +- pandas/tseries/resample.py | 2 +- pandas/tseries/tests/test_converter.py | 2 +- pandas/tseries/tests/test_daterange.py | 2 +- pandas/tseries/tests/test_frequencies.py | 2 +- pandas/tseries/tests/test_offsets.py | 4 +- pandas/tseries/tests/test_period.py | 4 +- pandas/tseries/tests/test_plotting.py | 2 +- pandas/tseries/tests/test_resample.py | 2 +- pandas/tseries/tests/test_timeseries.py | 4 +- pandas/tseries/tests/test_timezones.py | 4 +- pandas/tseries/tests/test_util.py | 2 +- pandas/tseries/tools.py | 4 +- pandas/tseries/util.py | 2 +- pandas/util/compat.py | 688 --------------------- pandas/util/counter.py | 4 +- pandas/util/decorators.py | 2 +- pandas/util/testing.py | 4 +- scripts/bench_join.py | 2 +- scripts/bench_join_multi.py | 2 +- scripts/bench_refactor.py | 2 +- scripts/file_sizes.py | 2 +- scripts/find_commits_touching_func.py | 2 +- scripts/groupby_sample.py | 2 +- scripts/groupby_test.py | 2 +- scripts/hdfstore_panel_perf.py | 2 +- scripts/json_manip.py | 4 +- scripts/leak.py | 2 +- scripts/parser_magic.py | 2 +- scripts/roll_median_leak.py | 2 +- scripts/testmed.py | 2 +- vb_suite/groupby.py | 2 +- vb_suite/indexing.py | 2 +- vb_suite/parser.py | 4 +- vb_suite/perf_HEAD.py | 2 +- vb_suite/source/conf.py | 2 +- vb_suite/test_perf.py | 2 +- 163 files changed, 938 insertions(+), 936 deletions(-) delete mode 100644 pandas/util/compat.py diff --git a/bench/alignment.py b/bench/alignment.py index 0cc0de797436a..bc3134f597ee0 100644 --- a/bench/alignment.py +++ b/bench/alignment.py @@ -1,5 +1,5 @@ # Setup -from pandas.util.compat import range, lrange +from pandas.compat import range, lrange import numpy as np import pandas import la diff --git a/bench/bench_get_put_value.py b/bench/bench_get_put_value.py index 405f22450c2f6..427e0b1b10a22 100644 --- a/bench/bench_get_put_value.py +++ b/bench/bench_get_put_value.py @@ -1,6 +1,6 @@ from pandas import * from pandas.util.testing import rands -from pandas.util.compat import range +from pandas.compat import range N = 1000 K = 50 diff --git a/bench/bench_groupby.py b/bench/bench_groupby.py index 76c92407d69ce..a86e8ed623ef7 100644 --- a/bench/bench_groupby.py +++ b/bench/bench_groupby.py @@ -1,6 +1,6 @@ from pandas import * from pandas.util.testing import rands -from pandas.util.compat import range +from pandas.compat import range import string import random diff --git a/bench/bench_khash_dict.py b/bench/bench_khash_dict.py index 7e9f3c10387bb..054fc36131b65 100644 --- a/bench/bench_khash_dict.py +++ b/bench/bench_khash_dict.py @@ -8,7 +8,7 @@ from vbench.api import Benchmark from pandas.util.testing import rands -from pandas.util.compat import range +from pandas.compat import range import pandas._tseries as lib import pandas._sandbox as sbx import time diff --git a/bench/bench_merge.py b/bench/bench_merge.py index da2706dcff5e0..330dba7b9af69 100644 --- a/bench/bench_merge.py +++ b/bench/bench_merge.py @@ -2,7 +2,7 @@ import gc import time from pandas import * -from pandas.util.compat import range, lrange, StringIO +from pandas.compat import range, lrange, StringIO from pandas.util.testing import rands N = 10000 diff --git a/bench/bench_merge_sqlite.py b/bench/bench_merge_sqlite.py index 6a8829e311408..3ad4b810119c3 100644 --- a/bench/bench_merge_sqlite.py +++ b/bench/bench_merge_sqlite.py @@ -4,7 +4,7 @@ import time from pandas import DataFrame from pandas.util.testing import rands -from pandas.util.compat import range, zip +from pandas.compat import range, zip import random N = 10000 diff --git a/bench/bench_sparse.py b/bench/bench_sparse.py index beb3e84c3e42b..7dc2db05cfe20 100644 --- a/bench/bench_sparse.py +++ b/bench/bench_sparse.py @@ -3,7 +3,7 @@ from pandas import * import pandas.core.sparse as spm -import pandas.util.compat as compat +import pandas.compat as compat reload(spm) from pandas.core.sparse import * diff --git a/bench/bench_take_indexing.py b/bench/bench_take_indexing.py index cce9035a4735f..5fb584bcfe45f 100644 --- a/bench/bench_take_indexing.py +++ b/bench/bench_take_indexing.py @@ -6,7 +6,7 @@ from pandas import DataFrame import timeit -from pandas.util.compat import zip +from pandas.compat import zip setup = """ from pandas import Series diff --git a/bench/bench_unique.py b/bench/bench_unique.py index 0c89f636f942c..87bd2f2df586c 100644 --- a/bench/bench_unique.py +++ b/bench/bench_unique.py @@ -1,7 +1,7 @@ from __future__ import print_function from pandas import * from pandas.util.testing import rands -from pandas.util.compat import range, zip +from pandas.compat import range, zip import pandas._tseries as lib import numpy as np import matplotlib.pyplot as plt diff --git a/bench/better_unique.py b/bench/better_unique.py index 97c667fbfe55f..e03a4f433ce66 100644 --- a/bench/better_unique.py +++ b/bench/better_unique.py @@ -1,12 +1,12 @@ from __future__ import print_function from pandas import DataFrame -from pandas.util.compat import range, zip +from pandas.compat import range, zip import timeit setup = """ from pandas import Series import pandas._tseries as _tseries -from pandas.util.compat import range +from pandas.compat import range import random import numpy as np diff --git a/bench/io_roundtrip.py b/bench/io_roundtrip.py index bd2293d8fdb50..e389481d1aabc 100644 --- a/bench/io_roundtrip.py +++ b/bench/io_roundtrip.py @@ -5,7 +5,7 @@ import la import pandas -from pandas.util.compat import range +from pandas.compat import range from pandas import datetools, DateRange diff --git a/bench/serialize.py b/bench/serialize.py index 7a6d5838f8257..b0edd6a5752d2 100644 --- a/bench/serialize.py +++ b/bench/serialize.py @@ -1,5 +1,5 @@ from __future__ import print_function -from pandas.util.compat import range, lrange +from pandas.compat import range, lrange import time import os import numpy as np diff --git a/bench/test.py b/bench/test.py index 49396f608b5c9..2339deab313a1 100644 --- a/bench/test.py +++ b/bench/test.py @@ -2,7 +2,7 @@ import itertools import collections import scipy.ndimage as ndi -from pandas.util.compat import zip, range +from pandas.compat import zip, range N = 10000 diff --git a/doc/plots/stats/moment_plots.py b/doc/plots/stats/moment_plots.py index 0e7ee89bd07ec..86ec1d10de520 100644 --- a/doc/plots/stats/moment_plots.py +++ b/doc/plots/stats/moment_plots.py @@ -1,4 +1,4 @@ -from pandas.util.compat import range +from pandas.compat import range import numpy as np import matplotlib.pyplot as plt diff --git a/doc/source/conf.py b/doc/source/conf.py index 736d190115a73..99da77dd5d570 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -12,7 +12,7 @@ import sys import os -from pandas.util.compat import u +from pandas.compat import u # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the diff --git a/doc/source/release.rst b/doc/source/release.rst index abf7cb1c5c9e9..90d5b1600b4eb 100644 --- a/doc/source/release.rst +++ b/doc/source/release.rst @@ -52,8 +52,9 @@ pandas 0.13 also led to the introduction of substantive parts of the Benjamin Peterson's ``six`` library into compat. (:issue:`4384`, :issue:`4375`, :issue:`4372`) - - ``pandas.util.py3compat`` has been merged into ``pandas.util.compat`` and - removed from pandas. It contains both list and iterator versions of range, + - ``pandas.util.compat`` and ``pandas.util.py3compat`` have been merged into + ``pandas.compat``. ``pandas.compat`` now includes many functions allowing + 2/3 compatibility. It contains both list and iterator versions of range, filter, map and zip, plus other necessary elements for Python 3 compatibility. ``lmap``, ``lzip``, ``lrange`` and ``lfilter`` all produce lists instead of iterators, for compatibility with ``numpy``, subscripting diff --git a/doc/source/v0.13.0.txt b/doc/source/v0.13.0.txt index 5aee66e34e46d..9f2f7c870f849 100644 --- a/doc/source/v0.13.0.txt +++ b/doc/source/v0.13.0.txt @@ -19,8 +19,9 @@ API changes also led to the introduction of substantive parts of the Benjamin Peterson's ``six`` library into compat. (:issue:`4384`, :issue:`4375`, :issue:`4372`) - - ``pandas.util.py3compat`` has been merged into ``pandas.util.compat`` and - removed from pandas. It contains both list and iterator versions of range, + - ``pandas.util.compat`` and ``pandas.util.py3compat`` have been merged into + ``pandas.compat``. ``pandas.compat`` now includes many functions allowing + 2/3 compatibility. It contains both list and iterator versions of range, filter, map and zip, plus other necessary elements for Python 3 compatibility. ``lmap``, ``lzip``, ``lrange`` and ``lfilter`` all produce lists instead of iterators, for compatibility with ``numpy``, subscripting diff --git a/doc/sphinxext/comment_eater.py b/doc/sphinxext/comment_eater.py index f1c290b30db8e..1c6d46c5aed6c 100755 --- a/doc/sphinxext/comment_eater.py +++ b/doc/sphinxext/comment_eater.py @@ -1,4 +1,4 @@ -from pandas.util.compat import cStringIO +from pandas.compat import cStringIO import compiler import inspect import textwrap diff --git a/doc/sphinxext/compiler_unparse.py b/doc/sphinxext/compiler_unparse.py index 69a4f8e9b02f7..46b7257c455f7 100755 --- a/doc/sphinxext/compiler_unparse.py +++ b/doc/sphinxext/compiler_unparse.py @@ -12,7 +12,7 @@ """ import sys -from pandas.util.compat import cStringIO as StringIO +from pandas.compat import cStringIO as StringIO from compiler.ast import Const, Name, Tuple, Div, Mul, Sub, Add def unparse(ast, single_line_functions=False): diff --git a/doc/sphinxext/docscrape.py b/doc/sphinxext/docscrape.py index 1cc57b415ebce..3c2c303e85ccd 100755 --- a/doc/sphinxext/docscrape.py +++ b/doc/sphinxext/docscrape.py @@ -8,7 +8,7 @@ import re import pydoc from warnings import warn -from pandas.util.compat import StringIO, callable +from pandas.compat import StringIO, callable class Reader(object): """A line-based string reader. diff --git a/doc/sphinxext/docscrape_sphinx.py b/doc/sphinxext/docscrape_sphinx.py index 0f32807761c9f..650a2d8f33dd0 100755 --- a/doc/sphinxext/docscrape_sphinx.py +++ b/doc/sphinxext/docscrape_sphinx.py @@ -1,7 +1,7 @@ import re, inspect, textwrap, pydoc import sphinx from .docscrape import NumpyDocString, FunctionDoc, ClassDoc -from pandas.util.compat import callable +from pandas.compat import callable class SphinxDocString(NumpyDocString): def __init__(self, docstring, config={}): diff --git a/doc/sphinxext/ipython_directive.py b/doc/sphinxext/ipython_directive.py index 49e6ac913c9a5..948d60c3760e9 100644 --- a/doc/sphinxext/ipython_directive.py +++ b/doc/sphinxext/ipython_directive.py @@ -58,7 +58,7 @@ #----------------------------------------------------------------------------- # Stdlib -from pandas.util.compat import zip, range, map, lmap, u, cStringIO as StringIO +from pandas.compat import zip, range, map, lmap, u, cStringIO as StringIO import ast import os import re diff --git a/doc/sphinxext/numpydoc.py b/doc/sphinxext/numpydoc.py index 8857c1f80cce6..6f79703380a3d 100755 --- a/doc/sphinxext/numpydoc.py +++ b/doc/sphinxext/numpydoc.py @@ -23,7 +23,7 @@ import os, re, pydoc from .docscrape_sphinx import get_doc_object, SphinxDocString -from pandas.util.compat import u, callable +from pandas.compat import u, callable from sphinx.util.compat import Directive import inspect diff --git a/doc/sphinxext/plot_directive.py b/doc/sphinxext/plot_directive.py index e48899a06bb8b..b86c43249dbe8 100755 --- a/doc/sphinxext/plot_directive.py +++ b/doc/sphinxext/plot_directive.py @@ -75,7 +75,7 @@ """ -from pandas.util.compat import range, cStringIO as StringIO, map +from pandas.compat import range, cStringIO as StringIO, map import sys, os, glob, shutil, imp, warnings, re, textwrap, traceback import sphinx diff --git a/doc/sphinxext/tests/test_docscrape.py b/doc/sphinxext/tests/test_docscrape.py index e9de8cf63314b..ef2dfacc5b560 100755 --- a/doc/sphinxext/tests/test_docscrape.py +++ b/doc/sphinxext/tests/test_docscrape.py @@ -7,7 +7,7 @@ from docscrape import NumpyDocString, FunctionDoc, ClassDoc from docscrape_sphinx import SphinxDocString, SphinxClassDoc from nose.tools import * -from pandas.util.compat import u +from pandas.compat import u doc_txt = '''\ numpy.multivariate_normal(mean, cov, shape=None) diff --git a/doc/sphinxext/traitsdoc.py b/doc/sphinxext/traitsdoc.py index 2c74e70bb8e50..8ec57a607ffb9 100755 --- a/doc/sphinxext/traitsdoc.py +++ b/doc/sphinxext/traitsdoc.py @@ -18,7 +18,7 @@ import os import pydoc -from pandas.util.compat import callable +from pandas.compat import callable from . import docscrape from . import docscrape_sphinx from .docscrape_sphinx import SphinxClassDoc, SphinxFunctionDoc, SphinxDocString diff --git a/examples/finance.py b/examples/finance.py index f795b0c722cf7..91ac57f67d91d 100644 --- a/examples/finance.py +++ b/examples/finance.py @@ -3,7 +3,7 @@ """ from datetime import datetime -from pandas.util.compat import zip +from pandas.compat import zip import matplotlib.finance as fin import numpy as np diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index e69de29bb2d1d..27f5671ca02b9 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -0,0 +1,688 @@ +""" +compat +====== + +Cross-compatible functions for Python 2 and 3. + +Key items to import for 2/3 compatible code: +* iterators: range(), map(), zip(), filter(), reduce() +* lists: lrange(), lmap(), lzip(), lfilter() +* unicode: u() [u"" is a syntax error in Python 3.0-3.2] +* longs: long (int in Python 3) +* callable +* iterable method compatibility: iteritems, iterkeys, itervalues + * Uses the original method if available, otherwise uses items, keys, values. +* types: + * text_type: unicode in Python 2, str in Python 3 + * binary_type: str in Python 2, bythes in Python 3 + * string_types: basestring in Python 2, str in Python 3 +* bind_method: binds functions to classes + +Python 2.6 compatibility: +* OrderedDict +* Counter + +Other items: +* OrderedDefaultDict +""" +# pylint disable=W0611 +import functools +import itertools +from itertools import product +import sys +import types + +PY3 = (sys.version_info[0] >= 3) +# import iterator versions of these functions + +try: + import __builtin__ as builtins + # not writeable when instantiated with string, doesn't handle unicode well + from cStringIO import StringIO as cStringIO + # always writeable + from StringIO import StringIO + BytesIO = StringIO + import cPickle +except ImportError: + import builtins + from io import StringIO, BytesIO + cStringIO = StringIO + import pickle as cPickle + + +if PY3: + def isidentifier(s): + return s.isidentifier() + + def str_to_bytes(s, encoding='ascii'): + return s.encode(encoding) + + def bytes_to_str(b, encoding='utf-8'): + return b.decode(encoding) + + # have to explicitly put builtins into the namespace + range = range + map = map + zip = zip + filter = filter + reduce = functools.reduce + long = int + unichr = chr + + # list-producing versions of the major Python iterating functions + def lrange(*args, **kwargs): + return list(range(*args, **kwargs)) + + def lzip(*args, **kwargs): + return list(zip(*args, **kwargs)) + + def lmap(*args, **kwargs): + return list(map(*args, **kwargs)) + + def lfilter(*args, **kwargs): + return list(filter(*args, **kwargs)) +else: + # Python 2 + import re + _name_re = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*$") + + def isidentifier(s, dotted=False): + return bool(_name_re.match(s)) + + def str_to_bytes(s, encoding='ascii'): + return s + + def bytes_to_str(b, encoding='ascii'): + return b + + range = xrange + zip = itertools.izip + filter = itertools.ifilter + map = itertools.imap + reduce = reduce + long = long + unichr = unichr + + # Python 2-builtin ranges produce lists + lrange = builtins.range + lzip = builtins.zip + lmap = builtins.map + lfilter = builtins.filter + + +def iteritems(obj, **kwargs): + """replacement for six's iteritems for Python2/3 compat + uses 'iteritems' if available and otherwise uses 'items'. + + Passes kwargs to method.""" + func = getattr(obj, "iteritems", None) + if not func: + func = obj.items + return func(**kwargs) + + +def iterkeys(obj, **kwargs): + func = getattr(obj, "iterkeys", None) + if not func: + func = obj.keys + return func(**kwargs) + + +def itervalues(obj, **kwargs): + func = getattr(obj, "itervalues", None) + if not func: + func = obj.values + return func(**kwargs) + + +def bind_method(cls, name, func): + """Bind a method to class, python 2 and python 3 compatible. + + Parameters + ---------- + + cls : type + class to receive bound method + name : basestring + name of method on class instance + func : function + function to be bound as method + + + Returns + ------- + None + """ + # only python 2 has bound/unbound method issue + if not PY3: + setattr(cls, name, types.MethodType(func, None, cls)) + else: + setattr(cls, name, func) +# ---------------------------------------------------------------------------- +# functions largely based / taken from the six module + +# Much of the code in this module comes from Benjamin Peterson's six library. +# The license for this library can be found in LICENSES/SIX and the code can be +# found at https://bitbucket.org/gutworth/six + +if PY3: + string_types = str, + integer_types = int, + class_types = type, + text_type = str + binary_type = bytes + + def u(s): + return s +else: + string_types = basestring, + integer_types = (int, long) + class_types = (type, types.ClassType) + text_type = unicode + binary_type = str + + def u(s): + return unicode(s, "unicode_escape") + +try: + # callable reintroduced in later versions of Python + callable = callable +except NameError: + def callable(obj): + return any("__call__" in klass.__dict__ for klass in type(obj).__mro__) + +# ---------------------------------------------------------------------------- +# Python 2.6 compatibility shims +# + +# OrderedDict Shim from Raymond Hettinger, python core dev +# http://code.activestate.com/recipes/576693-ordered-dictionary-for-py24/ +# here to support versions before 2.6 +if not PY3: + # don't need this except in 2.6 + try: + from thread import get_ident as _get_ident + except ImportError: + from dummy_thread import get_ident as _get_ident + +try: + from _abcoll import KeysView, ValuesView, ItemsView +except ImportError: + pass + + +class _OrderedDict(dict): + + 'Dictionary that remembers insertion order' + # An inherited dict maps keys to values. + # The inherited dict provides __getitem__, __len__, __contains__, and get. + # The remaining methods are order-aware. + # Big-O running times for all methods are the same as for regular + # dictionaries. + + # The internal self.__map dictionary maps keys to links in a doubly linked + # list. The circular doubly linked list starts and ends with a sentinel + # element. The sentinel element never gets deleted (this simplifies the + # algorithm). Each link is stored as a list of length three: [PREV, NEXT, + # KEY]. + + def __init__(self, *args, **kwds): + '''Initialize an ordered dictionary. Signature is the same as for + regular dictionaries, but keyword arguments are not recommended + because their insertion order is arbitrary. + + ''' + if len(args) > 1: + raise TypeError('expected at most 1 arguments, got %d' % len(args)) + try: + self.__root + except AttributeError: + self.__root = root = [] # sentinel node + root[:] = [root, root, None] + self.__map = {} + self.__update(*args, **kwds) + + def __setitem__(self, key, value, dict_setitem=dict.__setitem__): + 'od.__setitem__(i, y) <==> od[i]=y' + # Setting a new item creates a new link which goes at the end of the + # linked list, and the inherited dictionary is updated with the new + # key/value pair. + if key not in self: + root = self.__root + last = root[0] + last[1] = root[0] = self.__map[key] = [last, root, key] + dict_setitem(self, key, value) + + def __delitem__(self, key, dict_delitem=dict.__delitem__): + 'od.__delitem__(y) <==> del od[y]' + # Deleting an existing item uses self.__map to find the link which is + # then removed by updating the links in the predecessor and successor + # nodes. + dict_delitem(self, key) + link_prev, link_next, key = self.__map.pop(key) + link_prev[1] = link_next + link_next[0] = link_prev + + def __iter__(self): + 'od.__iter__() <==> iter(od)' + root = self.__root + curr = root[1] + while curr is not root: + yield curr[2] + curr = curr[1] + + def __reversed__(self): + 'od.__reversed__() <==> reversed(od)' + root = self.__root + curr = root[0] + while curr is not root: + yield curr[2] + curr = curr[0] + + def clear(self): + 'od.clear() -> None. Remove all items from od.' + try: + for node in itervalues(self.__map): + del node[:] + root = self.__root + root[:] = [root, root, None] + self.__map.clear() + except AttributeError: + pass + dict.clear(self) + + def popitem(self, last=True): + '''od.popitem() -> (k, v), return and remove a (key, value) pair. + Pairs are returned in LIFO order if last is true or FIFO order if + false. + ''' + if not self: + raise KeyError('dictionary is empty') + root = self.__root + if last: + link = root[0] + link_prev = link[0] + link_prev[1] = root + root[0] = link_prev + else: + link = root[1] + link_next = link[1] + root[1] = link_next + link_next[0] = root + key = link[2] + del self.__map[key] + value = dict.pop(self, key) + return key, value + + # -- the following methods do not depend on the internal structure -- + + def keys(self): + 'od.keys() -> list of keys in od' + return list(self) + + def values(self): + 'od.values() -> list of values in od' + return [self[key] for key in self] + + def items(self): + 'od.items() -> list of (key, value) pairs in od' + return [(key, self[key]) for key in self] + + def iterkeys(self): + 'od.iterkeys() -> an iterator over the keys in od' + return iter(self) + + def itervalues(self): + 'od.itervalues -> an iterator over the values in od' + for k in self: + yield self[k] + + def iteritems(self): + 'od.iteritems -> an iterator over the (key, value) items in od' + for k in self: + yield (k, self[k]) + + def update(*args, **kwds): + '''od.update(E, **F) -> None. Update od from dict/iterable E and F. + + If E is a dict instance, does: for k in E: od[k] = E[k] + If E has a .keys() method, does: for k in E.keys(): od[k] = E[k] + Or if E is an iterable of items, does:for k, v in E: od[k] = v + In either case, this is followed by: for k, v in F.items(): od[k] = v + ''' + if len(args) > 2: + raise TypeError('update() takes at most 2 positional ' + 'arguments (%d given)' % (len(args),)) + elif not args: + raise TypeError('update() takes at least 1 argument (0 given)') + self = args[0] + # Make progressively weaker assumptions about "other" + other = () + if len(args) == 2: + other = args[1] + if isinstance(other, dict): + for key in other: + self[key] = other[key] + elif hasattr(other, 'keys'): + for key in other.keys(): + self[key] = other[key] + else: + for key, value in other: + self[key] = value + for key, value in kwds.items(): + self[key] = value + # let subclasses override update without breaking __init__ + __update = update + + __marker = object() + + def pop(self, key, default=__marker): + '''od.pop(k[,d]) -> v, remove specified key and return the\ + corresponding value. If key is not found, d is returned if given, + otherwise KeyError is raised. + ''' + if key in self: + result = self[key] + del self[key] + return result + if default is self.__marker: + raise KeyError(key) + return default + + def setdefault(self, key, default=None): + 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od' + if key in self: + return self[key] + self[key] = default + return default + + def __repr__(self, _repr_running={}): + 'od.__repr__() <==> repr(od)' + call_key = id(self), _get_ident() + if call_key in _repr_running: + return '...' + _repr_running[call_key] = 1 + try: + if not self: + return '%s()' % (self.__class__.__name__,) + return '%s(%r)' % (self.__class__.__name__, list(self.items())) + finally: + del _repr_running[call_key] + + def __reduce__(self): + 'Return state information for pickling' + items = [[k, self[k]] for k in self] + inst_dict = vars(self).copy() + for k in vars(OrderedDict()): + inst_dict.pop(k, None) + if inst_dict: + return (self.__class__, (items,), inst_dict) + return self.__class__, (items,) + + def copy(self): + 'od.copy() -> a shallow copy of od' + return self.__class__(self) + + @classmethod + def fromkeys(cls, iterable, value=None): + '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S and + values equal to v (which defaults to None). + ''' + d = cls() + for key in iterable: + d[key] = value + return d + + def __eq__(self, other): + '''od.__eq__(y) <==> od==y. Comparison to another OD is + order-sensitive while comparison to a regular mapping is + order-insensitive. + ''' + if isinstance(other, OrderedDict): + return (len(self) == len(other) and + list(self.items()) == list(other.items())) + return dict.__eq__(self, other) + + def __ne__(self, other): + return not self == other + + # -- the following methods are only used in Python 2.7 -- + + def viewkeys(self): + "od.viewkeys() -> a set-like object providing a view on od's keys" + return KeysView(self) + + def viewvalues(self): + "od.viewvalues() -> an object providing a view on od's values" + return ValuesView(self) + + def viewitems(self): + "od.viewitems() -> a set-like object providing a view on od's items" + return ItemsView(self) + + +# {{{ http://code.activestate.com/recipes/576611/ (r11) + +try: + from operator import itemgetter + from heapq import nlargest +except ImportError: + pass + + +class _Counter(dict): + + '''Dict subclass for counting hashable objects. Sometimes called a bag + or multiset. Elements are stored as dictionary keys and their counts + are stored as dictionary values. + + >>> Counter('zyzygy') + Counter({'y': 3, 'z': 2, 'g': 1}) + + ''' + + def __init__(self, iterable=None, **kwds): + '''Create a new, empty Counter object. And if given, count elements + from an input iterable. Or, initialize the count from another mapping + of elements to their counts. + + >>> c = Counter() # a new, empty counter + >>> c = Counter('gallahad') # a new counter from an iterable + >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping + >>> c = Counter(a=4, b=2) # a new counter from keyword args + + ''' + self.update(iterable, **kwds) + + def __missing__(self, key): + return 0 + + def most_common(self, n=None): + '''List the n most common elements and their counts from the most + common to the least. If n is None, then list all element counts. + + >>> Counter('abracadabra').most_common(3) + [('a', 5), ('r', 2), ('b', 2)] + + ''' + if n is None: + return sorted(iteritems(self), key=itemgetter(1), reverse=True) + return nlargest(n, iteritems(self), key=itemgetter(1)) + + def elements(self): + '''Iterator over elements repeating each as many times as its count. + + >>> c = Counter('ABCABC') + >>> sorted(c.elements()) + ['A', 'A', 'B', 'B', 'C', 'C'] + + If an element's count has been set to zero or is a negative number, + elements() will ignore it. + + ''' + for elem, count in iteritems(self): + for _ in range(count): + yield elem + + # Override dict methods where the meaning changes for Counter objects. + + @classmethod + def fromkeys(cls, iterable, v=None): + raise NotImplementedError( + 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.') + + def update(self, iterable=None, **kwds): + '''Like dict.update() but add counts instead of replacing them. + + Source can be an iterable, a dictionary, or another Counter instance. + + >>> c = Counter('which') + >>> c.update('witch') # add elements from another iterable + >>> d = Counter('watch') + >>> c.update(d) # add elements from another counter + >>> c['h'] # four 'h' in which, witch, and watch + 4 + + ''' + if iterable is not None: + if hasattr(iterable, 'iteritems'): + if self: + self_get = self.get + for elem, count in iteritems(iterable): + self[elem] = self_get(elem, 0) + count + else: + dict.update( + self, iterable) # fast path when counter is empty + else: + self_get = self.get + for elem in iterable: + self[elem] = self_get(elem, 0) + 1 + if kwds: + self.update(kwds) + + def copy(self): + 'Like dict.copy() but returns a Counter instance instead of a dict.' + return Counter(self) + + def __delitem__(self, elem): + '''Like dict.__delitem__() but does not raise KeyError for missing + values.''' + if elem in self: + dict.__delitem__(self, elem) + + def __repr__(self): + if not self: + return '%s()' % self.__class__.__name__ + items = ', '.join(map('%r: %r'.__mod__, self.most_common())) + return '%s({%s})' % (self.__class__.__name__, items) + + # Multiset-style mathematical operations discussed in: + # Knuth TAOCP Volume II section 4.6.3 exercise 19 + # and at http://en.wikipedia.org/wiki/Multiset + # + # Outputs guaranteed to only include positive counts. + # + # To strip negative and zero counts, add-in an empty counter: + # c += Counter() + + def __add__(self, other): + '''Add counts from two counters. + + >>> Counter('abbb') + Counter('bcc') + Counter({'b': 4, 'c': 2, 'a': 1}) + + + ''' + if not isinstance(other, Counter): + return NotImplemented + result = Counter() + for elem in set(self) | set(other): + newcount = self[elem] + other[elem] + if newcount > 0: + result[elem] = newcount + return result + + def __sub__(self, other): + ''' Subtract count, but keep only results with positive counts. + + >>> Counter('abbbc') - Counter('bccd') + Counter({'b': 2, 'a': 1}) + + ''' + if not isinstance(other, Counter): + return NotImplemented + result = Counter() + for elem in set(self) | set(other): + newcount = self[elem] - other[elem] + if newcount > 0: + result[elem] = newcount + return result + + def __or__(self, other): + '''Union is the maximum of value in either of the input counters. + + >>> Counter('abbb') | Counter('bcc') + Counter({'b': 3, 'c': 2, 'a': 1}) + + ''' + if not isinstance(other, Counter): + return NotImplemented + _max = max + result = Counter() + for elem in set(self) | set(other): + newcount = _max(self[elem], other[elem]) + if newcount > 0: + result[elem] = newcount + return result + + def __and__(self, other): + ''' Intersection is the minimum of corresponding counts. + + >>> Counter('abbb') & Counter('bcc') + Counter({'b': 1}) + + ''' + if not isinstance(other, Counter): + return NotImplemented + _min = min + result = Counter() + if len(self) < len(other): + self, other = other, self + for elem in filter(self.__contains__, other): + newcount = _min(self[elem], other[elem]) + if newcount > 0: + result[elem] = newcount + return result + +if sys.version_info[:2] < (2, 7): + OrderedDict = _OrderedDict + Counter = _Counter +else: + from collections import OrderedDict, Counter + +# http://stackoverflow.com/questions/4126348 +# Thanks to @martineau at SO + + +class OrderedDefaultdict(OrderedDict): + + def __init__(self, *args, **kwargs): + newdefault = None + newargs = () + if args: + newdefault = args[0] + if not (newdefault is None or callable(newdefault)): + raise TypeError('first argument must be callable or None') + newargs = args[1:] + self.default_factory = newdefault + super(self.__class__, self).__init__(*newargs, **kwargs) + + def __missing__(self, key): + if self.default_factory is None: + raise KeyError(key) + self[key] = value = self.default_factory() + return value + + def __reduce__(self): # optional, for pickle support + args = self.default_factory if self.default_factory else tuple() + return type(self), args, None, None, list(self.items()) diff --git a/pandas/compat/scipy.py b/pandas/compat/scipy.py index 7b357e2ffdf14..3dab5b1f0451e 100644 --- a/pandas/compat/scipy.py +++ b/pandas/compat/scipy.py @@ -2,7 +2,7 @@ Shipping functions from SciPy to reduce dependency on having SciPy installed """ -from pandas.util.compat import range, lrange +from pandas.compat import range, lrange import numpy as np diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 95bc7351adeb1..a649edfada739 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -8,7 +8,7 @@ import pandas.core.common as com import pandas.algos as algos import pandas.hashtable as htable -import pandas.util.compat as compat +import pandas.compat as compat def match(to_match, values, na_sentinel=-1): diff --git a/pandas/core/base.py b/pandas/core/base.py index 1f3cb7f9e6f6d..16fe28a804b6b 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1,7 +1,7 @@ """ Base class(es) for all pandas objects. """ -from pandas.util import compat +from pandas import compat class StringMixin(object): """implements string methods so long as object defines a `__unicode__` method. diff --git a/pandas/core/common.py b/pandas/core/common.py index 2fdfe90f79771..7e835a5b8a7ac 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -14,8 +14,8 @@ import pandas.lib as lib import pandas.tslib as tslib -from pandas.util import compat -from pandas.util.compat import StringIO, BytesIO, range, long, u, zip, map +from pandas import compat +from pandas.compat import StringIO, BytesIO, range, long, u, zip, map from pandas.core.config import get_option diff --git a/pandas/core/config.py b/pandas/core/config.py index d55888bc18f5d..a14e8afa21322 100644 --- a/pandas/core/config.py +++ b/pandas/core/config.py @@ -52,8 +52,8 @@ from collections import namedtuple import warnings -from pandas.util.compat import map, lmap, u -import pandas.util.compat as compat +from pandas.compat import map, lmap, u +import pandas.compat as compat DeprecatedOption = namedtuple('DeprecatedOption', 'key msg rkey removal_ver') RegisteredOption = namedtuple( diff --git a/pandas/core/format.py b/pandas/core/format.py index 1381d4e2ecba9..30856d371c084 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -1,13 +1,13 @@ from __future__ import print_function # pylint: disable=W0141 -from pandas.util import compat +from pandas import compat import sys -from pandas.util.compat import StringIO, lzip, range, map, zip, reduce, u, OrderedDict +from pandas.compat import StringIO, lzip, range, map, zip, reduce, u, OrderedDict from pandas.core.common import adjoin, isnull, notnull from pandas.core.index import Index, MultiIndex, _ensure_index -from pandas.util import compat +from pandas import compat from pandas.util.terminal import get_terminal_size from pandas.core.config import get_option, set_option, reset_option import pandas.core.common as com diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b947d61abbd93..902a6c736b569 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -12,8 +12,8 @@ # pylint: disable=E1101,E1103 # pylint: disable=W0212,W0231,W0703,W0622 -from pandas.util.compat import range, zip, lrange, lmap, lzip, StringIO, u, OrderedDict -from pandas.util import compat +from pandas.compat import range, zip, lrange, lmap, lzip, StringIO, u, OrderedDict +from pandas import compat import operator import sys import collections @@ -37,7 +37,7 @@ from pandas.core.series import Series, _radd_compat import pandas.core.expressions as expressions from pandas.compat.scipy import scoreatpercentile as _quantile -from pandas.util import compat +from pandas import compat from pandas.util.terminal import get_terminal_size from pandas.util.decorators import deprecate, Appender, Substitution diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 8bea809d957f8..0eaae228da627 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1,6 +1,6 @@ # pylint: disable=W0231,E1101 import warnings -from pandas.util import compat +from pandas import compat import numpy as np import pandas.lib as lib from pandas.core.base import PandasObject @@ -10,7 +10,7 @@ from pandas.core.indexing import _maybe_convert_indices from pandas.tseries.index import DatetimeIndex import pandas.core.common as com -from pandas.util.compat import map, zip +from pandas.compat import map, zip class PandasError(Exception): diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py index ed06362599be1..e12795682460c 100644 --- a/pandas/core/groupby.py +++ b/pandas/core/groupby.py @@ -1,10 +1,10 @@ import types import numpy as np -from pandas.util.compat import( +from pandas.compat import( zip, builtins, range, long, lrange, lzip, OrderedDict, callable ) -from pandas.util import compat +from pandas import compat from pandas.core.base import PandasObject from pandas.core.categorical import Categorical @@ -2655,7 +2655,7 @@ def numpy_groupby(data, labels, axis=0): # Helper functions -from pandas.util import compat +from pandas import compat import sys diff --git a/pandas/core/index.py b/pandas/core/index.py index 713400619de77..5175e01d116c0 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -1,7 +1,7 @@ # pylint: disable=E1101,E1103,W0232 -from pandas.util.compat import range, zip, lrange, lzip -from pandas.util import compat +from pandas.compat import range, zip, lrange, lzip +from pandas import compat import numpy as np import pandas.tslib as tslib diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 2ad00612347d9..4d64b058a15d7 100644 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -3,8 +3,8 @@ from datetime import datetime from pandas.core.common import _asarray_tuplesafe from pandas.core.index import Index, MultiIndex, _ensure_index -from pandas.util.compat import range, zip -import pandas.util.compat as compat +from pandas.compat import range, zip +import pandas.compat as compat import pandas.core.common as com import pandas.lib as lib diff --git a/pandas/core/internals.py b/pandas/core/internals.py index ca1200b87124d..2d09bbec85ffa 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -17,8 +17,8 @@ import pandas.core.expressions as expressions from pandas.tslib import Timestamp -from pandas.util import compat -from pandas.util.compat import range, lrange, lmap, callable, map, zip +from pandas import compat +from pandas.compat import range, lrange, lmap, callable, map, zip class Block(PandasObject): diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 20fcc1430524e..23cc4fe31eba1 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -1,4 +1,4 @@ -from pandas.util import compat +from pandas import compat import sys import itertools import functools @@ -12,7 +12,7 @@ import pandas.hashtable as _hash import pandas.tslib as tslib -from pandas.util.compat import builtins +from pandas.compat import builtins try: diff --git a/pandas/core/panel.py b/pandas/core/panel.py index 05a4a4b4fcedf..9f7785ae27465 100644 --- a/pandas/core/panel.py +++ b/pandas/core/panel.py @@ -3,8 +3,8 @@ """ # pylint: disable=E1103,W0231,W0212,W0621 -from pandas.util.compat import map, zip, range, lrange, lmap, u, OrderedDict, OrderedDefaultdict -from pandas.util import compat +from pandas.compat import map, zip, range, lrange, lmap, u, OrderedDict, OrderedDefaultdict +from pandas import compat import operator import sys import numpy as np @@ -22,7 +22,7 @@ from pandas.core.series import Series from pandas.core.frame import DataFrame from pandas.core.generic import NDFrame -from pandas.util import compat +from pandas import compat from pandas.util.decorators import deprecate, Appender, Substitution import pandas.core.common as com import pandas.core.nanops as nanops diff --git a/pandas/core/panelnd.py b/pandas/core/panelnd.py index e1706a44fa834..f43ec2c31ba96 100644 --- a/pandas/core/panelnd.py +++ b/pandas/core/panelnd.py @@ -1,8 +1,8 @@ """ Factory methods to create N-D panels """ import pandas.lib as lib -from pandas.util.compat import zip -import pandas.util.compat as compat +from pandas.compat import zip +import pandas.compat as compat def create_nd_panel_factory(klass_name, axis_orders, axis_slices, slicer, axis_aliases=None, stat_axis=2,ns=None): diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py index c73d0803f934f..b69e4a6a96acc 100644 --- a/pandas/core/reshape.py +++ b/pandas/core/reshape.py @@ -1,8 +1,8 @@ # pylint: disable=E1101,E1103 # pylint: disable=W0703,W0622,W0613,W0201 -from pandas.util.compat import range, zip -from pandas.util import compat +from pandas.compat import range, zip +from pandas import compat import itertools import numpy as np diff --git a/pandas/core/series.py b/pandas/core/series.py index 21853f28375b0..0e995f47935a0 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -5,7 +5,7 @@ # pylint: disable=E1101,E1103 # pylint: disable=W0703,W0622,W0613,W0201 -from pandas.util import compat +from pandas import compat import operator from distutils.version import LooseVersion import types @@ -26,9 +26,9 @@ _check_slice_bounds, _maybe_convert_indices) from pandas.tseries.index import DatetimeIndex from pandas.tseries.period import PeriodIndex, Period -from pandas.util import compat +from pandas import compat from pandas.util.terminal import get_terminal_size -from pandas.util.compat import zip, lzip, u, OrderedDict +from pandas.compat import zip, lzip, u, OrderedDict import pandas.core.array as pa diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 4ab6b379f1812..462ed81aaf875 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -1,9 +1,9 @@ import numpy as np -from pandas.util.compat import zip +from pandas.compat import zip from pandas.core.common import isnull from pandas.core.series import Series -import pandas.util.compat as compat +import pandas.compat as compat import re import pandas.lib as lib diff --git a/pandas/io/clipboard.py b/pandas/io/clipboard.py index ba0b80f2ee3e3..798f30e85544f 100644 --- a/pandas/io/clipboard.py +++ b/pandas/io/clipboard.py @@ -1,5 +1,5 @@ """ io on the clipboard """ -from pandas.util.compat import StringIO +from pandas.compat import StringIO def read_clipboard(**kwargs): # pragma: no cover """ diff --git a/pandas/io/common.py b/pandas/io/common.py index dc30010532e08..e96c54a9a74fb 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -4,8 +4,8 @@ import zipfile from contextlib import contextmanager, closing -from pandas.util.compat import StringIO -from pandas.util import compat +from pandas.compat import StringIO +from pandas import compat if compat.PY3: diff --git a/pandas/io/data.py b/pandas/io/data.py index d1962648af033..e6d19aee4a9d6 100644 --- a/pandas/io/data.py +++ b/pandas/io/data.py @@ -12,10 +12,10 @@ import numpy as np -from pandas.util.compat import( +from pandas.compat import( StringIO, bytes_to_str, range, lrange, lmap, zip ) -import pandas.util.compat as compat +import pandas.compat as compat from pandas import Panel, DataFrame, Series, read_csv, concat from pandas.core.common import PandasError from pandas.io.parsers import TextParser diff --git a/pandas/io/date_converters.py b/pandas/io/date_converters.py index 26c3162ecde29..2be477f49e28b 100644 --- a/pandas/io/date_converters.py +++ b/pandas/io/date_converters.py @@ -1,5 +1,5 @@ """This module is designed for community supported date conversion functions""" -from pandas.util.compat import range +from pandas.compat import range import numpy as np import pandas.lib as lib diff --git a/pandas/io/excel.py b/pandas/io/excel.py index bf59d3620df4a..534a88e303dbf 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -11,8 +11,8 @@ from pandas.io.parsers import TextParser from pandas.tseries.period import Period from pandas import json -from pandas.util.compat import map, zip, reduce, range, lrange -import pandas.util.compat as compat +from pandas.compat import map, zip, reduce, range, lrange +import pandas.compat as compat def read_excel(path_or_buf, sheetname, kind=None, **kwds): diff --git a/pandas/io/ga.py b/pandas/io/ga.py index 19b478732d6b7..dcbecd74886ac 100644 --- a/pandas/io/ga.py +++ b/pandas/io/ga.py @@ -5,7 +5,7 @@ 4. Download JSON secret file and move into same directory as this file """ from datetime import datetime -from pandas.util import compat +from pandas import compat import numpy as np from pandas import DataFrame import pandas as pd @@ -17,7 +17,7 @@ from apiclient.errors import HttpError from oauth2client.client import AccessTokenRefreshError -from pandas.util.compat import zip, u +from pandas.compat import zip, u TYPE_MAP = {u('INTEGER'): int, u('FLOAT'): float, u('TIME'): int} diff --git a/pandas/io/html.py b/pandas/io/html.py index 841fd1bf9942d..df94e0ffa2e79 100644 --- a/pandas/io/html.py +++ b/pandas/io/html.py @@ -14,8 +14,8 @@ from pandas import DataFrame, MultiIndex, isnull from pandas.io.common import _is_url, urlopen, parse_url -from pandas.util.compat import range, lrange, lmap, u, map -from pandas.util import compat +from pandas.compat import range, lrange, lmap, u, map +from pandas import compat try: diff --git a/pandas/io/json.py b/pandas/io/json.py index 35709b4dd992c..7b6c97be21393 100644 --- a/pandas/io/json.py +++ b/pandas/io/json.py @@ -1,7 +1,7 @@ # pylint: disable-msg=E1101,W0613,W0603 -from pandas.util.compat import StringIO, long -from pandas.util import compat +from pandas.compat import StringIO, long +from pandas import compat import os from pandas import Series, DataFrame, to_datetime diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 433e6d0f8d38e..f76b1c563a7a5 100644 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -2,8 +2,8 @@ Module contains tools for processing files into DataFrames or other objects """ from __future__ import print_function -from pandas.util.compat import range, lrange, StringIO, lzip, zip -from pandas.util import compat +from pandas.compat import range, lrange, StringIO, lzip, zip +from pandas import compat import re import csv from warnings import warn @@ -14,7 +14,7 @@ from pandas.core.frame import DataFrame import datetime import pandas.core.common as com -from pandas.util import compat +from pandas import compat from pandas.io.date_converters import generic_parser from pandas.io.common import get_filepath_or_buffer diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index 314a566d2dc3c..efa8bdb0b123b 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -1,4 +1,4 @@ -from pandas.util.compat import cPickle as pkl, PY3 +from pandas.compat import cPickle as pkl, PY3 def to_pickle(obj, path): """ diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 14de4d17e76e4..a7daa7e7c8691 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -6,8 +6,8 @@ # pylint: disable-msg=E1101,W0613,W0603 from datetime import datetime, date -from pandas.util.compat import map, range, zip, lrange, lmap, u -from pandas.util import compat +from pandas.compat import map, range, zip, lrange, lmap, u +from pandas import compat import time import re import copy @@ -30,7 +30,7 @@ from pandas.core.index import _ensure_index import pandas.core.common as com from pandas.tools.merge import concat -from pandas.util import compat +from pandas import compat from pandas.io.common import PerformanceWarning import pandas.lib as lib diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 3a88f4e3b2fff..b65c35e6b352a 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -5,8 +5,8 @@ from __future__ import print_function from datetime import datetime, date -from pandas.util.compat import range, lzip, map, zip -import pandas.util.compat as compat +from pandas.compat import range, lzip, map, zip +import pandas.compat as compat import numpy as np import traceback diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 1ffd99b1c8c2f..21cf6d40ddec9 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -19,9 +19,9 @@ from pandas.core.series import Series from pandas.core.categorical import Categorical import datetime -from pandas.util import compat -from pandas.util import compat -from pandas.util.compat import StringIO, long, lrange, lmap, lzip +from pandas import compat +from pandas import compat +from pandas.compat import StringIO, long, lrange, lmap, lzip from pandas import isnull from pandas.io.parsers import _parser_params, Appender from pandas.io.common import get_filepath_or_buffer diff --git a/pandas/io/tests/generate_legacy_pickles.py b/pandas/io/tests/generate_legacy_pickles.py index ab08ff505739f..f5d949e2cfc45 100644 --- a/pandas/io/tests/generate_legacy_pickles.py +++ b/pandas/io/tests/generate_legacy_pickles.py @@ -1,7 +1,7 @@ """ self-contained to write legacy pickle files """ from __future__ import print_function -from pandas.util.compat import zip, cPickle as pickle +from pandas.compat import zip, cPickle as pickle def _create_sp_series(): diff --git a/pandas/io/tests/test_cparser.py b/pandas/io/tests/test_cparser.py index d15262bb65dc5..d5f62cf909513 100644 --- a/pandas/io/tests/test_cparser.py +++ b/pandas/io/tests/test_cparser.py @@ -2,9 +2,9 @@ C/Cython ascii file parser tests """ -from pandas.util.compat import StringIO, BytesIO, map +from pandas.compat import StringIO, BytesIO, map from datetime import datetime -from pandas.util import compat +from pandas import compat import csv import os import sys @@ -23,7 +23,7 @@ from pandas.util.testing import (assert_almost_equal, assert_frame_equal, assert_series_equal, network) import pandas.lib as lib -from pandas.util import compat +from pandas import compat from pandas.lib import Timestamp import pandas.util.testing as tm diff --git a/pandas/io/tests/test_data.py b/pandas/io/tests/test_data.py index 1e1267558932e..c85fd61e975e9 100644 --- a/pandas/io/tests/test_data.py +++ b/pandas/io/tests/test_data.py @@ -1,5 +1,5 @@ from __future__ import print_function -from pandas.util import compat +from pandas import compat import unittest import warnings import nose diff --git a/pandas/io/tests/test_date_converters.py b/pandas/io/tests/test_date_converters.py index 13f03683d9e82..8c1009b904857 100644 --- a/pandas/io/tests/test_date_converters.py +++ b/pandas/io/tests/test_date_converters.py @@ -1,4 +1,4 @@ -from pandas.util.compat import StringIO, BytesIO +from pandas.compat import StringIO, BytesIO from datetime import date, datetime import csv import os @@ -19,7 +19,7 @@ from pandas.util.testing import (assert_almost_equal, assert_frame_equal, assert_series_equal, network) import pandas.lib as lib -from pandas.util import compat +from pandas import compat from pandas.lib import Timestamp import pandas.io.date_converters as conv diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py index 764c5959eee55..1ac4d4e31ed10 100644 --- a/pandas/io/tests/test_excel.py +++ b/pandas/io/tests/test_excel.py @@ -1,6 +1,6 @@ # pylint: disable=E1101 -from pandas.util.compat import StringIO, BytesIO, PY3, u, range, map +from pandas.compat import StringIO, BytesIO, PY3, u, range, map from datetime import datetime from os.path import split as psplit import csv @@ -27,7 +27,7 @@ import pandas as pd import pandas.lib as lib -from pandas.util import compat +from pandas import compat from pandas.lib import Timestamp from pandas.tseries.index import date_range import pandas.tseries.tools as tools diff --git a/pandas/io/tests/test_html.py b/pandas/io/tests/test_html.py index 09e2c86dd265f..44e4b5cfda7b6 100644 --- a/pandas/io/tests/test_html.py +++ b/pandas/io/tests/test_html.py @@ -12,8 +12,8 @@ import numpy as np from numpy.random import rand from numpy.testing.decorators import slow -from pandas.util.compat import map, zip, StringIO -import pandas.util.compat as compat +from pandas.compat import map, zip, StringIO +import pandas.compat as compat try: from importlib import import_module diff --git a/pandas/io/tests/test_json/test_pandas.py b/pandas/io/tests/test_json/test_pandas.py index 94138ccbc2c4e..893243d148618 100644 --- a/pandas/io/tests/test_json/test_pandas.py +++ b/pandas/io/tests/test_json/test_pandas.py @@ -2,8 +2,8 @@ # pylint: disable-msg=W0612,E1101 from copy import deepcopy from datetime import datetime, timedelta -from pandas.util.compat import range, lrange, StringIO, cPickle as pickle -from pandas.util import compat +from pandas.compat import range, lrange, StringIO, cPickle as pickle +from pandas import compat from pandas.io.common import URLError import operator import os diff --git a/pandas/io/tests/test_json/test_ujson.py b/pandas/io/tests/test_json/test_ujson.py index 6d89daa7eaff1..ff684e30b206d 100644 --- a/pandas/io/tests/test_json/test_ujson.py +++ b/pandas/io/tests/test_json/test_ujson.py @@ -16,10 +16,10 @@ import random import decimal from functools import partial -from pandas.util.compat import range, zip, StringIO, u -from pandas.util import compat +from pandas.compat import range, zip, StringIO, u +from pandas import compat import pandas.json as ujson -import pandas.util.compat as compat +import pandas.compat as compat import numpy as np from pandas.util.testing import assert_almost_equal diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py index 0f46ffa9092a9..163171c56bbc4 100644 --- a/pandas/io/tests/test_parsers.py +++ b/pandas/io/tests/test_parsers.py @@ -12,7 +12,7 @@ import numpy as np from pandas import DataFrame, Series, Index, MultiIndex, DatetimeIndex -from pandas.util.compat import( +from pandas.compat import( StringIO, BytesIO, PY3, range, long, lrange, lmap, u, map, StringIO ) from pandas.io.common import urlopen, URLError @@ -28,7 +28,7 @@ import pandas as pd import pandas.lib as lib -from pandas.util import compat +from pandas import compat from pandas.lib import Timestamp from pandas.tseries.index import date_range import pandas.tseries.tools as tools diff --git a/pandas/io/tests/test_pickle.py b/pandas/io/tests/test_pickle.py index 55e4756dc9ba9..3c805e9fa260d 100644 --- a/pandas/io/tests/test_pickle.py +++ b/pandas/io/tests/test_pickle.py @@ -14,7 +14,7 @@ import pandas as pd from pandas import Index from pandas.sparse.tests import test_sparse -from pandas.util import compat +from pandas import compat from pandas.util.misc import is_little_endian class TestPickle(unittest.TestCase): diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index ee5b70ccb3646..3c532ea287755 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -1,5 +1,5 @@ from __future__ import print_function -from pandas.util.compat import range, lrange, u +from pandas.compat import range, lrange, u import nose import unittest import os @@ -19,7 +19,7 @@ from pandas.tests.test_series import assert_series_equal from pandas.tests.test_frame import assert_frame_equal from pandas import concat, Timestamp -from pandas.util import compat +from pandas import compat from numpy.testing.decorators import slow diff --git a/pandas/io/tests/test_sql.py b/pandas/io/tests/test_sql.py index 8990515cee8c9..624f16b3207cd 100644 --- a/pandas/io/tests/test_sql.py +++ b/pandas/io/tests/test_sql.py @@ -11,8 +11,8 @@ from pandas.core.datetools import format as date_format from pandas.core.api import DataFrame, isnull -from pandas.util.compat import StringIO, range, lrange -import pandas.util.compat as compat +from pandas.compat import StringIO, range, lrange +import pandas.compat as compat import pandas.io.sql as sql import pandas.util.testing as tm diff --git a/pandas/io/wb.py b/pandas/io/wb.py index 867032cc9c2fd..7c50c0b41e897 100644 --- a/pandas/io/wb.py +++ b/pandas/io/wb.py @@ -1,6 +1,6 @@ from __future__ import print_function -from pandas.util.compat import map, reduce, range, lrange +from pandas.compat import map, reduce, range, lrange from pandas.io.common import urlopen from pandas.io import json import pandas diff --git a/pandas/rpy/common.py b/pandas/rpy/common.py index 54fe50b44bd48..a640b43ab97e6 100644 --- a/pandas/rpy/common.py +++ b/pandas/rpy/common.py @@ -4,7 +4,7 @@ """ from __future__ import print_function -from pandas.util.compat import zip, range +from pandas.compat import zip, range import numpy as np import pandas as pd diff --git a/pandas/sparse/array.py b/pandas/sparse/array.py index 7dee8230b3dfe..7710749a869f0 100644 --- a/pandas/sparse/array.py +++ b/pandas/sparse/array.py @@ -11,7 +11,7 @@ from pandas.core.base import PandasObject import pandas.core.common as com -from pandas.util import compat +from pandas import compat from pandas._sparse import BlockIndex, IntIndex import pandas._sparse as splib diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py index 4505aac4ecd66..d108094036f64 100644 --- a/pandas/sparse/frame.py +++ b/pandas/sparse/frame.py @@ -6,8 +6,8 @@ # pylint: disable=E1101,E1103,W0231,E0202 from numpy import nan -from pandas.util.compat import range, lmap, map -from pandas.util import compat +from pandas.compat import range, lmap, map +from pandas import compat import numpy as np from pandas.core.common import _pickle_array, _unpickle_array, _try_sort diff --git a/pandas/sparse/panel.py b/pandas/sparse/panel.py index 3f6b5e0d795bb..260d648243633 100644 --- a/pandas/sparse/panel.py +++ b/pandas/sparse/panel.py @@ -5,8 +5,8 @@ # pylint: disable=E1101,E1103,W0231 -from pandas.util.compat import range, lrange, zip -from pandas.util import compat +from pandas.compat import range, lrange, zip +from pandas import compat import numpy as np from pandas.core.index import Index, MultiIndex, _ensure_index diff --git a/pandas/sparse/series.py b/pandas/sparse/series.py index 866ee5cb150bc..83adf135d47d3 100644 --- a/pandas/sparse/series.py +++ b/pandas/sparse/series.py @@ -17,7 +17,7 @@ import pandas.core.common as com import pandas.core.datetools as datetools -from pandas.util import compat +from pandas import compat from pandas.sparse.array import (make_sparse, _sparse_array_op, SparseArray) from pandas._sparse import BlockIndex, IntIndex diff --git a/pandas/sparse/tests/test_array.py b/pandas/sparse/tests/test_array.py index f11632e28c111..bd5f99ef73fe8 100644 --- a/pandas/sparse/tests/test_array.py +++ b/pandas/sparse/tests/test_array.py @@ -1,4 +1,4 @@ -from pandas.util.compat import range +from pandas.compat import range import re from numpy import nan, ndarray import numpy as np diff --git a/pandas/sparse/tests/test_list.py b/pandas/sparse/tests/test_list.py index 8be3026dd403d..21241050e39dc 100644 --- a/pandas/sparse/tests/test_list.py +++ b/pandas/sparse/tests/test_list.py @@ -1,4 +1,4 @@ -from pandas.util.compat import range +from pandas.compat import range import unittest from numpy import nan diff --git a/pandas/sparse/tests/test_sparse.py b/pandas/sparse/tests/test_sparse.py index 6a4280e057538..248c920b03838 100644 --- a/pandas/sparse/tests/test_sparse.py +++ b/pandas/sparse/tests/test_sparse.py @@ -22,8 +22,8 @@ import pandas.core.datetools as datetools from pandas.core.common import isnull import pandas.util.testing as tm -from pandas.util.compat import range, lrange, cPickle as pickle, StringIO, lrange -from pandas.util import compat +from pandas.compat import range, lrange, cPickle as pickle, StringIO, lrange +from pandas import compat import pandas.sparse.frame as spf diff --git a/pandas/src/generate_code.py b/pandas/src/generate_code.py index b94ec6df7c738..70b68eae7564a 100644 --- a/pandas/src/generate_code.py +++ b/pandas/src/generate_code.py @@ -1,5 +1,5 @@ from __future__ import print_function -from pandas.util.compat import range, cStringIO as StringIO +from pandas.compat import range, cStringIO as StringIO import os header = """ diff --git a/pandas/stats/fama_macbeth.py b/pandas/stats/fama_macbeth.py index 04dd7e045f4c8..38fb5894c94bb 100644 --- a/pandas/stats/fama_macbeth.py +++ b/pandas/stats/fama_macbeth.py @@ -1,5 +1,5 @@ from pandas.core.base import StringMixin -from pandas.util.compat import StringIO, range +from pandas.compat import StringIO, range import numpy as np diff --git a/pandas/stats/math.py b/pandas/stats/math.py index 583c588c9c037..64548b90dade8 100644 --- a/pandas/stats/math.py +++ b/pandas/stats/math.py @@ -3,7 +3,7 @@ from __future__ import division -from pandas.util.compat import range +from pandas.compat import range import numpy as np import numpy.linalg as linalg diff --git a/pandas/stats/misc.py b/pandas/stats/misc.py index aeeec7068d5e4..c79bae34f20c4 100644 --- a/pandas/stats/misc.py +++ b/pandas/stats/misc.py @@ -1,10 +1,10 @@ from numpy import NaN -from pandas.util import compat +from pandas import compat import numpy as np from pandas.core.api import Series, DataFrame, isnull, notnull from pandas.core.series import remove_na -from pandas.util.compat import zip +from pandas.compat import zip def zscore(series): diff --git a/pandas/stats/ols.py b/pandas/stats/ols.py index 9ea85739dca55..2b8f6fc1601c8 100644 --- a/pandas/stats/ols.py +++ b/pandas/stats/ols.py @@ -4,9 +4,9 @@ # pylint: disable-msg=W0201 -from pandas.util.compat import zip, range, StringIO +from pandas.compat import zip, range, StringIO from itertools import starmap -from pandas.util import compat +from pandas import compat import numpy as np from pandas.core.api import DataFrame, Series, isnull diff --git a/pandas/stats/plm.py b/pandas/stats/plm.py index 923f1b4272681..2c4e4c47c684a 100644 --- a/pandas/stats/plm.py +++ b/pandas/stats/plm.py @@ -6,8 +6,8 @@ # pylint: disable-msg=E1101,E1103 from __future__ import division -from pandas.util.compat import range -from pandas.util import compat +from pandas.compat import range +from pandas import compat import warnings import numpy as np diff --git a/pandas/stats/tests/test_fama_macbeth.py b/pandas/stats/tests/test_fama_macbeth.py index 6d315ceec511a..dd2f196361226 100644 --- a/pandas/stats/tests/test_fama_macbeth.py +++ b/pandas/stats/tests/test_fama_macbeth.py @@ -2,8 +2,8 @@ from pandas.stats.api import fama_macbeth from .common import assert_almost_equal, BaseTest -from pandas.util.compat import range -from pandas.util import compat +from pandas.compat import range +from pandas import compat import numpy as np diff --git a/pandas/stats/tests/test_moments.py b/pandas/stats/tests/test_moments.py index 233ca78ce6db0..24fc04d849c7f 100644 --- a/pandas/stats/tests/test_moments.py +++ b/pandas/stats/tests/test_moments.py @@ -14,7 +14,7 @@ import pandas.core.datetools as datetools import pandas.stats.moments as mom import pandas.util.testing as tm -from pandas.util.compat import range, zip, PY3, StringIO +from pandas.compat import range, zip, PY3, StringIO N, K = 100, 10 diff --git a/pandas/stats/tests/test_ols.py b/pandas/stats/tests/test_ols.py index f9bcb6fabbe6f..697425c8e0fcf 100644 --- a/pandas/stats/tests/test_ols.py +++ b/pandas/stats/tests/test_ols.py @@ -7,7 +7,7 @@ from __future__ import division from datetime import datetime -from pandas.util import compat +from pandas import compat import unittest import nose import numpy as np @@ -22,7 +22,7 @@ from pandas.util.testing import (assert_almost_equal, assert_series_equal, assert_frame_equal, assertRaisesRegexp) import pandas.util.testing as tm -import pandas.util.compat as compat +import pandas.compat as compat from .common import BaseTest _have_statsmodels = True diff --git a/pandas/stats/tests/test_var.py b/pandas/stats/tests/test_var.py index 1c7eec1264afd..ab5709d013fa9 100644 --- a/pandas/stats/tests/test_var.py +++ b/pandas/stats/tests/test_var.py @@ -3,7 +3,7 @@ from pandas.util.testing import assert_almost_equal -from pandas.util.compat import range +from pandas.compat import range import nose import unittest diff --git a/pandas/stats/var.py b/pandas/stats/var.py index 2337dcf9c9e36..be55507f976cb 100644 --- a/pandas/stats/var.py +++ b/pandas/stats/var.py @@ -1,7 +1,7 @@ from __future__ import division -from pandas.util.compat import range, lrange, zip, reduce -from pandas.util import compat +from pandas.compat import range, lrange, zip, reduce +from pandas import compat import numpy as np from pandas.core.base import StringMixin from pandas.util.decorators import cache_readonly diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index af3b56e047765..d0a050984a07f 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1,4 +1,4 @@ -from pandas.util.compat import range +from pandas.compat import range import unittest import numpy as np diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index dc60cda24bd60..29d104e9c465c 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -1,7 +1,7 @@ # pylint: disable=E1101,E1103,W0232 from datetime import datetime -from pandas.util.compat import range, lrange +from pandas.compat import range, lrange import unittest import nose diff --git a/pandas/tests/test_common.py b/pandas/tests/test_common.py index 7001f582efffe..ca119a8e263bf 100644 --- a/pandas/tests/test_common.py +++ b/pandas/tests/test_common.py @@ -6,7 +6,7 @@ import unittest from pandas import Series, DataFrame, date_range, DatetimeIndex, Timestamp -from pandas.util.compat import range, long, lrange, lmap, u, map +from pandas.compat import range, long, lrange, lmap, u, map from pandas.core.common import notnull, isnull import pandas.core.common as com import pandas.util.testing as tm @@ -15,7 +15,7 @@ import numpy as np from pandas.tslib import iNaT -from pandas.util import compat +from pandas import compat _multiprocess_can_split_ = True diff --git a/pandas/tests/test_compat.py b/pandas/tests/test_compat.py index fe5c7590dec0f..a8b9a88126861 100644 --- a/pandas/tests/test_compat.py +++ b/pandas/tests/test_compat.py @@ -2,7 +2,7 @@ Testing that functions from compat work as expected """ -from pandas.util.compat import ( +from pandas.compat import ( range, zip, map, filter, lrange, lzip, lmap, lfilter, builtins diff --git a/pandas/tests/test_expressions.py b/pandas/tests/test_expressions.py index 8cfffaacc1058..ff76c7c070946 100644 --- a/pandas/tests/test_expressions.py +++ b/pandas/tests/test_expressions.py @@ -17,7 +17,7 @@ from pandas.util.testing import (assert_almost_equal, assert_series_equal, assert_frame_equal) -from pandas.util import compat +from pandas import compat import pandas.util.testing as tm import pandas.lib as lib diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index 7cd484f50d4c3..e7a52756089cc 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -1,8 +1,8 @@ from __future__ import print_function # -*- coding: utf-8 -*- -from pandas.util.compat import range, zip, lrange, StringIO, PY3, lzip, u -import pandas.util.compat as compat +from pandas.compat import range, zip, lrange, StringIO, PY3, lzip, u +import pandas.compat as compat import os import sys import unittest diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 39a62ddc3922b..e08f3552382c2 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -7,11 +7,11 @@ import unittest import nose -from pandas.util.compat import( +from pandas.compat import( map, zip, range, long, lrange, lmap, lzip, OrderedDict, cPickle as pickle, u, StringIO ) -from pandas.util import compat +from pandas import compat from numpy import random, nan from numpy.random import randn diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py index 4364e741c3b65..f017acce0419b 100644 --- a/pandas/tests/test_graphics.py +++ b/pandas/tests/test_graphics.py @@ -6,7 +6,7 @@ from datetime import datetime, date from pandas import Series, DataFrame, MultiIndex, PeriodIndex, date_range -from pandas.util.compat import range, lrange, StringIO, lmap, lzip, u, map, zip +from pandas.compat import range, lrange, StringIO, lmap, lzip, u, map, zip import pandas.util.testing as tm from pandas.util.testing import ensure_clean from pandas.core.config import set_option diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py index 58b7d808eedc3..19f15e44dc096 100644 --- a/pandas/tests/test_groupby.py +++ b/pandas/tests/test_groupby.py @@ -13,10 +13,10 @@ from pandas.core.series import Series from pandas.util.testing import (assert_panel_equal, assert_frame_equal, assert_series_equal, assert_almost_equal) -from pandas.util.compat import( +from pandas.compat import( range, long, lrange, StringIO, lmap, lzip, map, zip, builtins, OrderedDict ) -from pandas.util import compat +from pandas import compat from pandas.core.panel import Panel from pandas.tools.merge import concat from collections import defaultdict diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 46fd7f2186c33..200bc5d6611f9 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -1,7 +1,7 @@ # pylint: disable=E1101,E1103,W0232 from datetime import datetime, timedelta -from pandas.util.compat import range, lrange, lzip, u, zip +from pandas.compat import range, lrange, lzip, u, zip import operator import pickle import unittest @@ -13,7 +13,7 @@ from pandas.core.index import Index, Int64Index, MultiIndex from pandas.util.testing import assert_almost_equal -from pandas.util import compat +from pandas import compat import pandas.util.testing as tm import pandas.core.config as cf diff --git a/pandas/tests/test_indexing.py b/pandas/tests/test_indexing.py index f2b22a4d9b3d1..f6a6bd1587a04 100644 --- a/pandas/tests/test_indexing.py +++ b/pandas/tests/test_indexing.py @@ -3,7 +3,7 @@ import nose import itertools -from pandas.util.compat import range, lrange, StringIO, lmap, map +from pandas.compat import range, lrange, StringIO, lmap, map from numpy import random, nan from numpy.random import randn import numpy as np @@ -15,7 +15,7 @@ MultiIndex, DatetimeIndex, Timestamp) from pandas.util.testing import (assert_almost_equal, assert_series_equal, assert_frame_equal, assert_panel_equal) -from pandas.util import compat +from pandas import compat import pandas.util.testing as tm import pandas.lib as lib diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py index 2490fa211f6bf..6f13678339425 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/test_internals.py @@ -11,7 +11,7 @@ from pandas.util.testing import ( assert_almost_equal, assert_frame_equal, randn) -from pandas.util.compat import zip, u +from pandas.compat import zip, u def assert_block_equal(left, right): diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index a98b613aed746..d54fc32b6efa6 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -13,7 +13,7 @@ assert_frame_equal) import pandas.core.common as com import pandas.util.testing as tm -from pandas.util.compat import (range, lrange, StringIO, lzip, u, cPickle, +from pandas.compat import (range, lrange, StringIO, lzip, u, cPickle, product as cart_product, zip) import pandas as pd diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 38117a591d849..d04192772ce7d 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1,8 +1,8 @@ # pylint: disable=W0612,E1101 from datetime import datetime -from pandas.util.compat import range, lrange, StringIO, cPickle, OrderedDict -from pandas.util import compat +from pandas.compat import range, lrange, StringIO, cPickle, OrderedDict +from pandas import compat import operator import unittest import nose @@ -15,7 +15,7 @@ from pandas.core.panel import Panel from pandas.core.series import remove_na import pandas.core.common as com -from pandas.util import compat +from pandas import compat from pandas.util.testing import (assert_panel_equal, assert_frame_equal, diff --git a/pandas/tests/test_panel4d.py b/pandas/tests/test_panel4d.py index f1b9bc645d2ba..3c6ab18126e8f 100644 --- a/pandas/tests/test_panel4d.py +++ b/pandas/tests/test_panel4d.py @@ -1,5 +1,5 @@ from datetime import datetime -from pandas.util.compat import range, lrange +from pandas.compat import range, lrange import os import operator import unittest @@ -15,7 +15,7 @@ from pandas.core.series import remove_na import pandas.core.common as com import pandas.core.panel as panelmod -from pandas.util import compat +from pandas import compat from pandas.util.testing import (assert_panel_equal, assert_panel4d_equal, @@ -23,7 +23,7 @@ assert_series_equal, assert_almost_equal) import pandas.util.testing as tm -import pandas.util.compat as compat +import pandas.compat as compat def add_nans(panel4d): diff --git a/pandas/tests/test_panelnd.py b/pandas/tests/test_panelnd.py index 452fd2470204f..e195839242f55 100644 --- a/pandas/tests/test_panelnd.py +++ b/pandas/tests/test_panelnd.py @@ -9,7 +9,7 @@ from pandas.core import panelnd from pandas.core.panel import Panel import pandas.core.common as com -from pandas.util import compat +from pandas import compat from pandas.util.testing import (assert_panel_equal, assert_panel4d_equal, diff --git a/pandas/tests/test_reshape.py b/pandas/tests/test_reshape.py index e285d97642b93..0c6c34ff4dc29 100644 --- a/pandas/tests/test_reshape.py +++ b/pandas/tests/test_reshape.py @@ -15,7 +15,7 @@ from pandas.core.reshape import melt, convert_dummies, lreshape import pandas.util.testing as tm -from pandas.util.compat import StringIO, cPickle, range +from pandas.compat import StringIO, cPickle, range _multiprocess_can_split_ = True diff --git a/pandas/tests/test_rplot.py b/pandas/tests/test_rplot.py index 176ffa23108b3..e7faa8f25deb3 100644 --- a/pandas/tests/test_rplot.py +++ b/pandas/tests/test_rplot.py @@ -1,4 +1,4 @@ -from pandas.util.compat import range +from pandas.compat import range import unittest import pandas.tools.rplot as rplot import pandas.util.testing as tm diff --git a/pandas/tests/test_series.py b/pandas/tests/test_series.py index 3ab924312ac28..151a97a281ad3 100644 --- a/pandas/tests/test_series.py +++ b/pandas/tests/test_series.py @@ -23,8 +23,8 @@ import pandas.core.datetools as datetools import pandas.core.nanops as nanops -from pandas.util.compat import StringIO, lrange, range, zip, u, OrderedDict -from pandas.util import compat +from pandas.compat import StringIO, lrange, range, zip, u, OrderedDict +from pandas import compat from pandas.util.testing import (assert_series_equal, assert_almost_equal, ensure_clean) diff --git a/pandas/tests/test_stats.py b/pandas/tests/test_stats.py index c8fb09cb30641..e3533afc71e95 100644 --- a/pandas/tests/test_stats.py +++ b/pandas/tests/test_stats.py @@ -1,4 +1,4 @@ -from pandas.util import compat +from pandas import compat import nose import unittest @@ -7,7 +7,7 @@ from pandas import Series, DataFrame -from pandas.util.compat import product +from pandas.compat import product from pandas.util.testing import (assert_frame_equal, assert_series_equal, assert_almost_equal) diff --git a/pandas/tests/test_strings.py b/pandas/tests/test_strings.py index d3bdb437249fa..4170f34c13095 100644 --- a/pandas/tests/test_strings.py +++ b/pandas/tests/test_strings.py @@ -13,8 +13,8 @@ from numpy.testing import assert_array_equal from numpy.random import randint -from pandas.util.compat import range, lrange, u -import pandas.util.compat as compat +from pandas.compat import range, lrange, u +import pandas.compat as compat from pandas import (Index, Series, TimeSeries, DataFrame, isnull, notnull, bdate_range, date_range) import pandas.core.common as com diff --git a/pandas/tests/test_tseries.py b/pandas/tests/test_tseries.py index 651c888a0b659..1ed6dd4469f4d 100644 --- a/pandas/tests/test_tseries.py +++ b/pandas/tests/test_tseries.py @@ -5,7 +5,7 @@ from pandas import Index, isnull, Timestamp from pandas.util.testing import assert_almost_equal import pandas.util.testing as common -from pandas.util.compat import range, lrange, zip +from pandas.compat import range, lrange, zip import pandas.lib as lib import pandas.algos as algos from datetime import datetime diff --git a/pandas/tools/merge.py b/pandas/tools/merge.py index 04c7dfa6ed036..7133782fa66d3 100644 --- a/pandas/tools/merge.py +++ b/pandas/tools/merge.py @@ -4,8 +4,8 @@ import types import numpy as np -from pandas.util.compat import range, long, lrange, lzip, zip -import pandas.util.compat as compat +from pandas.compat import range, long, lrange, lzip, zip +import pandas.compat as compat from pandas.core.categorical import Categorical from pandas.core.frame import DataFrame, _merge_doc from pandas.core.generic import NDFrame diff --git a/pandas/tools/pivot.py b/pandas/tools/pivot.py index ed463fbe61d68..effcc3ff7695f 100644 --- a/pandas/tools/pivot.py +++ b/pandas/tools/pivot.py @@ -5,8 +5,8 @@ from pandas.core.reshape import _unstack_multiple from pandas.tools.merge import concat from pandas.tools.util import cartesian_product -from pandas.util.compat import range, lrange, zip -from pandas.util import compat +from pandas.compat import range, lrange, zip +from pandas import compat import pandas.core.common as com import numpy as np diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index aef035ec41e67..3e3fff32a654a 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -15,8 +15,8 @@ from pandas.tseries.period import PeriodIndex, Period from pandas.tseries.frequencies import get_period_alias, get_base_alias from pandas.tseries.offsets import DateOffset -from pandas.util.compat import range, lrange, lmap, map, zip -import pandas.util.compat as compat +from pandas.compat import range, lrange, lmap, map, zip +import pandas.compat as compat try: # mpl optional import pandas.tseries.converter as conv diff --git a/pandas/tools/rplot.py b/pandas/tools/rplot.py index 0bd1e79a10470..5928472df1c22 100644 --- a/pandas/tools/rplot.py +++ b/pandas/tools/rplot.py @@ -2,7 +2,7 @@ from copy import deepcopy import numpy as np -from pandas.util.compat import range, zip +from pandas.compat import range, zip # # TODO: # * Make sure legends work properly diff --git a/pandas/tools/tests/test_merge.py b/pandas/tools/tests/test_merge.py index c3b91ed27d8f2..1008e23c3ebcd 100644 --- a/pandas/tools/tests/test_merge.py +++ b/pandas/tools/tests/test_merge.py @@ -9,8 +9,8 @@ import numpy as np import random -from pandas.util.compat import range, lrange, lzip, zip -from pandas.util import compat +from pandas.compat import range, lrange, lzip, zip +from pandas import compat from pandas.tseries.index import DatetimeIndex from pandas.tools.merge import merge, concat, ordered_merge, MergeError from pandas.util.testing import (assert_frame_equal, assert_series_equal, diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tools/tests/test_pivot.py index 8d3f25a7d60d5..57e7d2f7f6ae9 100644 --- a/pandas/tools/tests/test_pivot.py +++ b/pandas/tools/tests/test_pivot.py @@ -8,7 +8,7 @@ from pandas import DataFrame, Series, Index, MultiIndex from pandas.tools.merge import concat from pandas.tools.pivot import pivot_table, crosstab -from pandas.util.compat import range, u, product +from pandas.compat import range, u, product import pandas.util.testing as tm diff --git a/pandas/tools/tests/test_tile.py b/pandas/tools/tests/test_tile.py index d939bebdefaeb..53258864b1ab8 100644 --- a/pandas/tools/tests/test_tile.py +++ b/pandas/tools/tests/test_tile.py @@ -3,7 +3,7 @@ import unittest import numpy as np -from pandas.util.compat import zip +from pandas.compat import zip from pandas import DataFrame, Series, unique import pandas.util.testing as tm diff --git a/pandas/tools/tile.py b/pandas/tools/tile.py index f987042bb91f2..aa64b046c6891 100644 --- a/pandas/tools/tile.py +++ b/pandas/tools/tile.py @@ -8,7 +8,7 @@ import pandas.core.algorithms as algos import pandas.core.common as com import pandas.core.nanops as nanops -from pandas.util.compat import zip +from pandas.compat import zip import numpy as np diff --git a/pandas/tools/util.py b/pandas/tools/util.py index 1f2905b86f7d0..7de8c25379258 100644 --- a/pandas/tools/util.py +++ b/pandas/tools/util.py @@ -8,7 +8,7 @@ def match(needles, haystack): def cartesian_product(X): ''' - Numpy version of itertools.product or pandas.util.compat.product. + Numpy version of itertools.product or pandas.compat.product. Sometimes faster (for large inputs)... Examples diff --git a/pandas/tseries/converter.py b/pandas/tseries/converter.py index 3226a1cb426bf..54c2a4a2a3056 100644 --- a/pandas/tseries/converter.py +++ b/pandas/tseries/converter.py @@ -10,8 +10,8 @@ from matplotlib.ticker import Formatter, AutoLocator, Locator from matplotlib.transforms import nonsingular -from pandas.util.compat import range, lrange -import pandas.util.compat as compat +from pandas.compat import range, lrange +import pandas.compat as compat import pandas.lib as lib import pandas.core.common as com from pandas.core.index import Index diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index f6e792d4bf193..2c4fc0d1b9c78 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -1,6 +1,6 @@ from datetime import datetime -from pandas.util.compat import range, long, zip -from pandas.util import compat +from pandas.compat import range, long, zip +from pandas import compat import re import numpy as np diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 63e96efc2048d..17d357370c078 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -8,8 +8,8 @@ from pandas.core.common import isnull, _NS_DTYPE, _INT64_DTYPE from pandas.core.index import Index, Int64Index -import pandas.util.compat as compat -from pandas.util.compat import u +import pandas.compat as compat +from pandas.compat import u from pandas.tseries.frequencies import ( infer_freq, to_offset, get_period_alias, Resolution, get_reso_string) diff --git a/pandas/tseries/offsets.py b/pandas/tseries/offsets.py index 565abc195145c..b78fa52f0be03 100644 --- a/pandas/tseries/offsets.py +++ b/pandas/tseries/offsets.py @@ -1,6 +1,6 @@ from datetime import date, datetime, timedelta -from pandas.util.compat import range -from pandas.util import compat +from pandas.compat import range +from pandas import compat import numpy as np from pandas.tseries.tools import to_datetime diff --git a/pandas/tseries/period.py b/pandas/tseries/period.py index c512331ae66f9..bf1199dc2690f 100644 --- a/pandas/tseries/period.py +++ b/pandas/tseries/period.py @@ -14,13 +14,13 @@ import pandas.core.common as com from pandas.core.common import isnull, _NS_DTYPE, _INT64_DTYPE -from pandas.util import compat +from pandas import compat from pandas.lib import Timestamp import pandas.lib as lib import pandas.tslib as tslib import pandas.algos as _algos -from pandas.util.compat import map, zip, u +from pandas.compat import map, zip, u #--------------- diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index 687d505dbb611..be0c5dfad9071 100644 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -9,7 +9,7 @@ from pandas.tseries.period import PeriodIndex, period_range import pandas.tseries.tools as tools import pandas.core.common as com -import pandas.util.compat as compat +import pandas.compat as compat from pandas.lib import Timestamp import pandas.lib as lib diff --git a/pandas/tseries/tests/test_converter.py b/pandas/tseries/tests/test_converter.py index 0d6449ec79339..c3bb7d82dfb6d 100644 --- a/pandas/tseries/tests/test_converter.py +++ b/pandas/tseries/tests/test_converter.py @@ -6,7 +6,7 @@ import nose import numpy as np -from pandas.util.compat import u +from pandas.compat import u try: import pandas.tseries.converter as converter diff --git a/pandas/tseries/tests/test_daterange.py b/pandas/tseries/tests/test_daterange.py index ad1c04739a192..536d718d72eba 100644 --- a/pandas/tseries/tests/test_daterange.py +++ b/pandas/tseries/tests/test_daterange.py @@ -1,5 +1,5 @@ from datetime import datetime -from pandas.util.compat import range +from pandas.compat import range import pickle import unittest import nose diff --git a/pandas/tseries/tests/test_frequencies.py b/pandas/tseries/tests/test_frequencies.py index 24d268972b6a8..6386f61a24a85 100644 --- a/pandas/tseries/tests/test_frequencies.py +++ b/pandas/tseries/tests/test_frequencies.py @@ -1,5 +1,5 @@ from datetime import datetime, time, timedelta -from pandas.util.compat import range +from pandas.compat import range import sys import os import unittest diff --git a/pandas/tseries/tests/test_offsets.py b/pandas/tseries/tests/test_offsets.py index 3e64e4c03025c..7d026a46dde15 100644 --- a/pandas/tseries/tests/test_offsets.py +++ b/pandas/tseries/tests/test_offsets.py @@ -1,6 +1,6 @@ from datetime import date, datetime, timedelta -from pandas.util.compat import range -from pandas.util import compat +from pandas.compat import range +from pandas import compat import unittest import nose from nose.tools import assert_raises diff --git a/pandas/tseries/tests/test_period.py b/pandas/tseries/tests/test_period.py index c2faf4511f200..03b1d89714f68 100644 --- a/pandas/tseries/tests/test_period.py +++ b/pandas/tseries/tests/test_period.py @@ -22,13 +22,13 @@ import pandas.core.datetools as datetools import pandas as pd import numpy as np -from pandas.util.compat import range, lrange, lmap, map, zip +from pandas.compat import range, lrange, lmap, map, zip randn = np.random.randn from pandas import Series, TimeSeries, DataFrame from pandas.util.testing import assert_series_equal, assert_almost_equal import pandas.util.testing as tm -from pandas.util import compat +from pandas import compat from numpy.testing import assert_array_equal diff --git a/pandas/tseries/tests/test_plotting.py b/pandas/tseries/tests/test_plotting.py index e4a707f28f56b..717e7bfe5da96 100644 --- a/pandas/tseries/tests/test_plotting.py +++ b/pandas/tseries/tests/test_plotting.py @@ -3,7 +3,7 @@ import unittest import nose -from pandas.util.compat import range, lrange, zip +from pandas.compat import range, lrange, zip import numpy as np from numpy.testing.decorators import slow diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index 52055d13f42a6..1b75961cb2721 100644 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -2,7 +2,7 @@ from datetime import datetime, timedelta -from pandas.util.compat import range, lrange, zip, product +from pandas.compat import range, lrange, zip, product import numpy as np from pandas import Series, TimeSeries, DataFrame, Panel, isnull, notnull, Timestamp diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index e2213a85a9daa..efee7379240d5 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -28,7 +28,7 @@ import pandas.index as _index -from pandas.util.compat import( +from pandas.compat import( range, long, StringIO, lrange, lmap, map, zip, cPickle as pickle, product ) from pandas import read_pickle @@ -36,7 +36,7 @@ from numpy.random import rand from numpy.testing import assert_array_equal from pandas.util.testing import assert_frame_equal -import pandas.util.compat as compat +import pandas.compat as compat from pandas.core.datetools import BDay import pandas.core.common as com from pandas import concat diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tseries/tests/test_timezones.py index 47e006af3326d..883025bee1ba1 100644 --- a/pandas/tseries/tests/test_timezones.py +++ b/pandas/tseries/tests/test_timezones.py @@ -27,8 +27,8 @@ import pandas.core.datetools as dt from numpy.random import rand from pandas.util.testing import assert_frame_equal -import pandas.util.compat as compat -from pandas.util.compat import range, lrange, zip, cPickle as pickle +import pandas.compat as compat +from pandas.compat import range, lrange, zip, cPickle as pickle from pandas.core.datetools import BDay import pandas.core.common as com diff --git a/pandas/tseries/tests/test_util.py b/pandas/tseries/tests/test_util.py index 84666b0192cf1..8bf448118561d 100644 --- a/pandas/tseries/tests/test_util.py +++ b/pandas/tseries/tests/test_util.py @@ -1,4 +1,4 @@ -from pandas.util.compat import range +from pandas.compat import range import nose import unittest diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index f7eafdac16816..9373d307640f6 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -7,8 +7,8 @@ import pandas.lib as lib import pandas.tslib as tslib import pandas.core.common as com -from pandas.util.compat import StringIO, callable -import pandas.util.compat as compat +from pandas.compat import StringIO, callable +import pandas.compat as compat try: import dateutil diff --git a/pandas/tseries/util.py b/pandas/tseries/util.py index 33d33045c8743..664a42543822d 100644 --- a/pandas/tseries/util.py +++ b/pandas/tseries/util.py @@ -1,4 +1,4 @@ -from pandas.util.compat import range, lrange +from pandas.compat import range, lrange import numpy as np import pandas as pd diff --git a/pandas/util/compat.py b/pandas/util/compat.py deleted file mode 100644 index 27f5671ca02b9..0000000000000 --- a/pandas/util/compat.py +++ /dev/null @@ -1,688 +0,0 @@ -""" -compat -====== - -Cross-compatible functions for Python 2 and 3. - -Key items to import for 2/3 compatible code: -* iterators: range(), map(), zip(), filter(), reduce() -* lists: lrange(), lmap(), lzip(), lfilter() -* unicode: u() [u"" is a syntax error in Python 3.0-3.2] -* longs: long (int in Python 3) -* callable -* iterable method compatibility: iteritems, iterkeys, itervalues - * Uses the original method if available, otherwise uses items, keys, values. -* types: - * text_type: unicode in Python 2, str in Python 3 - * binary_type: str in Python 2, bythes in Python 3 - * string_types: basestring in Python 2, str in Python 3 -* bind_method: binds functions to classes - -Python 2.6 compatibility: -* OrderedDict -* Counter - -Other items: -* OrderedDefaultDict -""" -# pylint disable=W0611 -import functools -import itertools -from itertools import product -import sys -import types - -PY3 = (sys.version_info[0] >= 3) -# import iterator versions of these functions - -try: - import __builtin__ as builtins - # not writeable when instantiated with string, doesn't handle unicode well - from cStringIO import StringIO as cStringIO - # always writeable - from StringIO import StringIO - BytesIO = StringIO - import cPickle -except ImportError: - import builtins - from io import StringIO, BytesIO - cStringIO = StringIO - import pickle as cPickle - - -if PY3: - def isidentifier(s): - return s.isidentifier() - - def str_to_bytes(s, encoding='ascii'): - return s.encode(encoding) - - def bytes_to_str(b, encoding='utf-8'): - return b.decode(encoding) - - # have to explicitly put builtins into the namespace - range = range - map = map - zip = zip - filter = filter - reduce = functools.reduce - long = int - unichr = chr - - # list-producing versions of the major Python iterating functions - def lrange(*args, **kwargs): - return list(range(*args, **kwargs)) - - def lzip(*args, **kwargs): - return list(zip(*args, **kwargs)) - - def lmap(*args, **kwargs): - return list(map(*args, **kwargs)) - - def lfilter(*args, **kwargs): - return list(filter(*args, **kwargs)) -else: - # Python 2 - import re - _name_re = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*$") - - def isidentifier(s, dotted=False): - return bool(_name_re.match(s)) - - def str_to_bytes(s, encoding='ascii'): - return s - - def bytes_to_str(b, encoding='ascii'): - return b - - range = xrange - zip = itertools.izip - filter = itertools.ifilter - map = itertools.imap - reduce = reduce - long = long - unichr = unichr - - # Python 2-builtin ranges produce lists - lrange = builtins.range - lzip = builtins.zip - lmap = builtins.map - lfilter = builtins.filter - - -def iteritems(obj, **kwargs): - """replacement for six's iteritems for Python2/3 compat - uses 'iteritems' if available and otherwise uses 'items'. - - Passes kwargs to method.""" - func = getattr(obj, "iteritems", None) - if not func: - func = obj.items - return func(**kwargs) - - -def iterkeys(obj, **kwargs): - func = getattr(obj, "iterkeys", None) - if not func: - func = obj.keys - return func(**kwargs) - - -def itervalues(obj, **kwargs): - func = getattr(obj, "itervalues", None) - if not func: - func = obj.values - return func(**kwargs) - - -def bind_method(cls, name, func): - """Bind a method to class, python 2 and python 3 compatible. - - Parameters - ---------- - - cls : type - class to receive bound method - name : basestring - name of method on class instance - func : function - function to be bound as method - - - Returns - ------- - None - """ - # only python 2 has bound/unbound method issue - if not PY3: - setattr(cls, name, types.MethodType(func, None, cls)) - else: - setattr(cls, name, func) -# ---------------------------------------------------------------------------- -# functions largely based / taken from the six module - -# Much of the code in this module comes from Benjamin Peterson's six library. -# The license for this library can be found in LICENSES/SIX and the code can be -# found at https://bitbucket.org/gutworth/six - -if PY3: - string_types = str, - integer_types = int, - class_types = type, - text_type = str - binary_type = bytes - - def u(s): - return s -else: - string_types = basestring, - integer_types = (int, long) - class_types = (type, types.ClassType) - text_type = unicode - binary_type = str - - def u(s): - return unicode(s, "unicode_escape") - -try: - # callable reintroduced in later versions of Python - callable = callable -except NameError: - def callable(obj): - return any("__call__" in klass.__dict__ for klass in type(obj).__mro__) - -# ---------------------------------------------------------------------------- -# Python 2.6 compatibility shims -# - -# OrderedDict Shim from Raymond Hettinger, python core dev -# http://code.activestate.com/recipes/576693-ordered-dictionary-for-py24/ -# here to support versions before 2.6 -if not PY3: - # don't need this except in 2.6 - try: - from thread import get_ident as _get_ident - except ImportError: - from dummy_thread import get_ident as _get_ident - -try: - from _abcoll import KeysView, ValuesView, ItemsView -except ImportError: - pass - - -class _OrderedDict(dict): - - 'Dictionary that remembers insertion order' - # An inherited dict maps keys to values. - # The inherited dict provides __getitem__, __len__, __contains__, and get. - # The remaining methods are order-aware. - # Big-O running times for all methods are the same as for regular - # dictionaries. - - # The internal self.__map dictionary maps keys to links in a doubly linked - # list. The circular doubly linked list starts and ends with a sentinel - # element. The sentinel element never gets deleted (this simplifies the - # algorithm). Each link is stored as a list of length three: [PREV, NEXT, - # KEY]. - - def __init__(self, *args, **kwds): - '''Initialize an ordered dictionary. Signature is the same as for - regular dictionaries, but keyword arguments are not recommended - because their insertion order is arbitrary. - - ''' - if len(args) > 1: - raise TypeError('expected at most 1 arguments, got %d' % len(args)) - try: - self.__root - except AttributeError: - self.__root = root = [] # sentinel node - root[:] = [root, root, None] - self.__map = {} - self.__update(*args, **kwds) - - def __setitem__(self, key, value, dict_setitem=dict.__setitem__): - 'od.__setitem__(i, y) <==> od[i]=y' - # Setting a new item creates a new link which goes at the end of the - # linked list, and the inherited dictionary is updated with the new - # key/value pair. - if key not in self: - root = self.__root - last = root[0] - last[1] = root[0] = self.__map[key] = [last, root, key] - dict_setitem(self, key, value) - - def __delitem__(self, key, dict_delitem=dict.__delitem__): - 'od.__delitem__(y) <==> del od[y]' - # Deleting an existing item uses self.__map to find the link which is - # then removed by updating the links in the predecessor and successor - # nodes. - dict_delitem(self, key) - link_prev, link_next, key = self.__map.pop(key) - link_prev[1] = link_next - link_next[0] = link_prev - - def __iter__(self): - 'od.__iter__() <==> iter(od)' - root = self.__root - curr = root[1] - while curr is not root: - yield curr[2] - curr = curr[1] - - def __reversed__(self): - 'od.__reversed__() <==> reversed(od)' - root = self.__root - curr = root[0] - while curr is not root: - yield curr[2] - curr = curr[0] - - def clear(self): - 'od.clear() -> None. Remove all items from od.' - try: - for node in itervalues(self.__map): - del node[:] - root = self.__root - root[:] = [root, root, None] - self.__map.clear() - except AttributeError: - pass - dict.clear(self) - - def popitem(self, last=True): - '''od.popitem() -> (k, v), return and remove a (key, value) pair. - Pairs are returned in LIFO order if last is true or FIFO order if - false. - ''' - if not self: - raise KeyError('dictionary is empty') - root = self.__root - if last: - link = root[0] - link_prev = link[0] - link_prev[1] = root - root[0] = link_prev - else: - link = root[1] - link_next = link[1] - root[1] = link_next - link_next[0] = root - key = link[2] - del self.__map[key] - value = dict.pop(self, key) - return key, value - - # -- the following methods do not depend on the internal structure -- - - def keys(self): - 'od.keys() -> list of keys in od' - return list(self) - - def values(self): - 'od.values() -> list of values in od' - return [self[key] for key in self] - - def items(self): - 'od.items() -> list of (key, value) pairs in od' - return [(key, self[key]) for key in self] - - def iterkeys(self): - 'od.iterkeys() -> an iterator over the keys in od' - return iter(self) - - def itervalues(self): - 'od.itervalues -> an iterator over the values in od' - for k in self: - yield self[k] - - def iteritems(self): - 'od.iteritems -> an iterator over the (key, value) items in od' - for k in self: - yield (k, self[k]) - - def update(*args, **kwds): - '''od.update(E, **F) -> None. Update od from dict/iterable E and F. - - If E is a dict instance, does: for k in E: od[k] = E[k] - If E has a .keys() method, does: for k in E.keys(): od[k] = E[k] - Or if E is an iterable of items, does:for k, v in E: od[k] = v - In either case, this is followed by: for k, v in F.items(): od[k] = v - ''' - if len(args) > 2: - raise TypeError('update() takes at most 2 positional ' - 'arguments (%d given)' % (len(args),)) - elif not args: - raise TypeError('update() takes at least 1 argument (0 given)') - self = args[0] - # Make progressively weaker assumptions about "other" - other = () - if len(args) == 2: - other = args[1] - if isinstance(other, dict): - for key in other: - self[key] = other[key] - elif hasattr(other, 'keys'): - for key in other.keys(): - self[key] = other[key] - else: - for key, value in other: - self[key] = value - for key, value in kwds.items(): - self[key] = value - # let subclasses override update without breaking __init__ - __update = update - - __marker = object() - - def pop(self, key, default=__marker): - '''od.pop(k[,d]) -> v, remove specified key and return the\ - corresponding value. If key is not found, d is returned if given, - otherwise KeyError is raised. - ''' - if key in self: - result = self[key] - del self[key] - return result - if default is self.__marker: - raise KeyError(key) - return default - - def setdefault(self, key, default=None): - 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od' - if key in self: - return self[key] - self[key] = default - return default - - def __repr__(self, _repr_running={}): - 'od.__repr__() <==> repr(od)' - call_key = id(self), _get_ident() - if call_key in _repr_running: - return '...' - _repr_running[call_key] = 1 - try: - if not self: - return '%s()' % (self.__class__.__name__,) - return '%s(%r)' % (self.__class__.__name__, list(self.items())) - finally: - del _repr_running[call_key] - - def __reduce__(self): - 'Return state information for pickling' - items = [[k, self[k]] for k in self] - inst_dict = vars(self).copy() - for k in vars(OrderedDict()): - inst_dict.pop(k, None) - if inst_dict: - return (self.__class__, (items,), inst_dict) - return self.__class__, (items,) - - def copy(self): - 'od.copy() -> a shallow copy of od' - return self.__class__(self) - - @classmethod - def fromkeys(cls, iterable, value=None): - '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S and - values equal to v (which defaults to None). - ''' - d = cls() - for key in iterable: - d[key] = value - return d - - def __eq__(self, other): - '''od.__eq__(y) <==> od==y. Comparison to another OD is - order-sensitive while comparison to a regular mapping is - order-insensitive. - ''' - if isinstance(other, OrderedDict): - return (len(self) == len(other) and - list(self.items()) == list(other.items())) - return dict.__eq__(self, other) - - def __ne__(self, other): - return not self == other - - # -- the following methods are only used in Python 2.7 -- - - def viewkeys(self): - "od.viewkeys() -> a set-like object providing a view on od's keys" - return KeysView(self) - - def viewvalues(self): - "od.viewvalues() -> an object providing a view on od's values" - return ValuesView(self) - - def viewitems(self): - "od.viewitems() -> a set-like object providing a view on od's items" - return ItemsView(self) - - -# {{{ http://code.activestate.com/recipes/576611/ (r11) - -try: - from operator import itemgetter - from heapq import nlargest -except ImportError: - pass - - -class _Counter(dict): - - '''Dict subclass for counting hashable objects. Sometimes called a bag - or multiset. Elements are stored as dictionary keys and their counts - are stored as dictionary values. - - >>> Counter('zyzygy') - Counter({'y': 3, 'z': 2, 'g': 1}) - - ''' - - def __init__(self, iterable=None, **kwds): - '''Create a new, empty Counter object. And if given, count elements - from an input iterable. Or, initialize the count from another mapping - of elements to their counts. - - >>> c = Counter() # a new, empty counter - >>> c = Counter('gallahad') # a new counter from an iterable - >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping - >>> c = Counter(a=4, b=2) # a new counter from keyword args - - ''' - self.update(iterable, **kwds) - - def __missing__(self, key): - return 0 - - def most_common(self, n=None): - '''List the n most common elements and their counts from the most - common to the least. If n is None, then list all element counts. - - >>> Counter('abracadabra').most_common(3) - [('a', 5), ('r', 2), ('b', 2)] - - ''' - if n is None: - return sorted(iteritems(self), key=itemgetter(1), reverse=True) - return nlargest(n, iteritems(self), key=itemgetter(1)) - - def elements(self): - '''Iterator over elements repeating each as many times as its count. - - >>> c = Counter('ABCABC') - >>> sorted(c.elements()) - ['A', 'A', 'B', 'B', 'C', 'C'] - - If an element's count has been set to zero or is a negative number, - elements() will ignore it. - - ''' - for elem, count in iteritems(self): - for _ in range(count): - yield elem - - # Override dict methods where the meaning changes for Counter objects. - - @classmethod - def fromkeys(cls, iterable, v=None): - raise NotImplementedError( - 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.') - - def update(self, iterable=None, **kwds): - '''Like dict.update() but add counts instead of replacing them. - - Source can be an iterable, a dictionary, or another Counter instance. - - >>> c = Counter('which') - >>> c.update('witch') # add elements from another iterable - >>> d = Counter('watch') - >>> c.update(d) # add elements from another counter - >>> c['h'] # four 'h' in which, witch, and watch - 4 - - ''' - if iterable is not None: - if hasattr(iterable, 'iteritems'): - if self: - self_get = self.get - for elem, count in iteritems(iterable): - self[elem] = self_get(elem, 0) + count - else: - dict.update( - self, iterable) # fast path when counter is empty - else: - self_get = self.get - for elem in iterable: - self[elem] = self_get(elem, 0) + 1 - if kwds: - self.update(kwds) - - def copy(self): - 'Like dict.copy() but returns a Counter instance instead of a dict.' - return Counter(self) - - def __delitem__(self, elem): - '''Like dict.__delitem__() but does not raise KeyError for missing - values.''' - if elem in self: - dict.__delitem__(self, elem) - - def __repr__(self): - if not self: - return '%s()' % self.__class__.__name__ - items = ', '.join(map('%r: %r'.__mod__, self.most_common())) - return '%s({%s})' % (self.__class__.__name__, items) - - # Multiset-style mathematical operations discussed in: - # Knuth TAOCP Volume II section 4.6.3 exercise 19 - # and at http://en.wikipedia.org/wiki/Multiset - # - # Outputs guaranteed to only include positive counts. - # - # To strip negative and zero counts, add-in an empty counter: - # c += Counter() - - def __add__(self, other): - '''Add counts from two counters. - - >>> Counter('abbb') + Counter('bcc') - Counter({'b': 4, 'c': 2, 'a': 1}) - - - ''' - if not isinstance(other, Counter): - return NotImplemented - result = Counter() - for elem in set(self) | set(other): - newcount = self[elem] + other[elem] - if newcount > 0: - result[elem] = newcount - return result - - def __sub__(self, other): - ''' Subtract count, but keep only results with positive counts. - - >>> Counter('abbbc') - Counter('bccd') - Counter({'b': 2, 'a': 1}) - - ''' - if not isinstance(other, Counter): - return NotImplemented - result = Counter() - for elem in set(self) | set(other): - newcount = self[elem] - other[elem] - if newcount > 0: - result[elem] = newcount - return result - - def __or__(self, other): - '''Union is the maximum of value in either of the input counters. - - >>> Counter('abbb') | Counter('bcc') - Counter({'b': 3, 'c': 2, 'a': 1}) - - ''' - if not isinstance(other, Counter): - return NotImplemented - _max = max - result = Counter() - for elem in set(self) | set(other): - newcount = _max(self[elem], other[elem]) - if newcount > 0: - result[elem] = newcount - return result - - def __and__(self, other): - ''' Intersection is the minimum of corresponding counts. - - >>> Counter('abbb') & Counter('bcc') - Counter({'b': 1}) - - ''' - if not isinstance(other, Counter): - return NotImplemented - _min = min - result = Counter() - if len(self) < len(other): - self, other = other, self - for elem in filter(self.__contains__, other): - newcount = _min(self[elem], other[elem]) - if newcount > 0: - result[elem] = newcount - return result - -if sys.version_info[:2] < (2, 7): - OrderedDict = _OrderedDict - Counter = _Counter -else: - from collections import OrderedDict, Counter - -# http://stackoverflow.com/questions/4126348 -# Thanks to @martineau at SO - - -class OrderedDefaultdict(OrderedDict): - - def __init__(self, *args, **kwargs): - newdefault = None - newargs = () - if args: - newdefault = args[0] - if not (newdefault is None or callable(newdefault)): - raise TypeError('first argument must be callable or None') - newargs = args[1:] - self.default_factory = newdefault - super(self.__class__, self).__init__(*newargs, **kwargs) - - def __missing__(self, key): - if self.default_factory is None: - raise KeyError(key) - self[key] = value = self.default_factory() - return value - - def __reduce__(self): # optional, for pickle support - args = self.default_factory if self.default_factory else tuple() - return type(self), args, None, None, list(self.items()) diff --git a/pandas/util/counter.py b/pandas/util/counter.py index f074782466d9d..75f7b214ce6a5 100644 --- a/pandas/util/counter.py +++ b/pandas/util/counter.py @@ -1,11 +1,11 @@ # This is copied from collections in Python 2.7, for compatibility with older # versions of Python. It can be dropped when we depend on Python 2.7/3.1 -from pandas.util import compat +from pandas import compat import heapq as _heapq from itertools import repeat as _repeat, chain as _chain, starmap as _starmap from operator import itemgetter as _itemgetter -from pandas.util.compat import map +from pandas.compat import map try: from collections import Mapping diff --git a/pandas/util/decorators.py b/pandas/util/decorators.py index 9711f360042dd..8c6744cbf2963 100644 --- a/pandas/util/decorators.py +++ b/pandas/util/decorators.py @@ -1,4 +1,4 @@ -from pandas.util.compat import StringIO, callable +from pandas.compat import StringIO, callable from pandas.lib import cache_readonly import sys import warnings diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 469612698f2ae..0628d6705c769 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -24,8 +24,8 @@ import pandas.core.frame as frame import pandas.core.panel as panel import pandas.core.panel4d as panel4d -import pandas.util.compat as compat -from pandas.util.compat import( +import pandas.compat as compat +from pandas.compat import( map, zip, range, unichr, lrange, lmap, lzip, u, callable, Counter ) diff --git a/scripts/bench_join.py b/scripts/bench_join.py index e82d9cee60f12..5e50e8da61fdb 100644 --- a/scripts/bench_join.py +++ b/scripts/bench_join.py @@ -1,4 +1,4 @@ -from pandas.util.compat import range, lrange +from pandas.compat import range, lrange import numpy as np import pandas.lib as lib from pandas import * diff --git a/scripts/bench_join_multi.py b/scripts/bench_join_multi.py index 7e67eeb429893..7b93112b7f869 100644 --- a/scripts/bench_join_multi.py +++ b/scripts/bench_join_multi.py @@ -1,7 +1,7 @@ from pandas import * import numpy as np -from pandas.util.compat import zip, range, lzip +from pandas.compat import zip, range, lzip from pandas.util.testing import rands import pandas.lib as lib diff --git a/scripts/bench_refactor.py b/scripts/bench_refactor.py index 9ec57633fa865..dafba371e995a 100644 --- a/scripts/bench_refactor.py +++ b/scripts/bench_refactor.py @@ -1,5 +1,5 @@ from pandas import * -from pandas.util.compat import range +from pandas.compat import range try: import pandas.core.internals as internals reload(internals) diff --git a/scripts/file_sizes.py b/scripts/file_sizes.py index 12cd12c255e7c..de03c72ffbd09 100644 --- a/scripts/file_sizes.py +++ b/scripts/file_sizes.py @@ -7,7 +7,7 @@ from pandas import DataFrame from pandas.util.testing import set_trace -from pandas.util import compat +from pandas import compat dirs = [] names = [] diff --git a/scripts/find_commits_touching_func.py b/scripts/find_commits_touching_func.py index 29a9c780c2e77..0d4ecced957f6 100755 --- a/scripts/find_commits_touching_func.py +++ b/scripts/find_commits_touching_func.py @@ -4,7 +4,7 @@ # copryright 2013, y-p @ github from __future__ import print_function -from pandas.util.compat import range, lrange, map +from pandas.compat import range, lrange, map """Search the git history for all commits touching a named method diff --git a/scripts/groupby_sample.py b/scripts/groupby_sample.py index a5e7dc60d607e..42008858d3cad 100644 --- a/scripts/groupby_sample.py +++ b/scripts/groupby_sample.py @@ -1,7 +1,7 @@ from pandas import * import numpy as np import string -import pandas.util.compat as compat +import pandas.compat as compat g1 = np.array(list(string.letters))[:-1] g2 = np.arange(510) diff --git a/scripts/groupby_test.py b/scripts/groupby_test.py index b6f9152afccaa..3425f0cd98723 100644 --- a/scripts/groupby_test.py +++ b/scripts/groupby_test.py @@ -8,7 +8,7 @@ import pandas.lib as tseries import pandas.core.groupby as gp import pandas.util.testing as tm -from pandas.util.compat import range +from pandas.compat import range reload(gp) """ diff --git a/scripts/hdfstore_panel_perf.py b/scripts/hdfstore_panel_perf.py index d530a02debcbb..06c2a15bdc7c2 100644 --- a/scripts/hdfstore_panel_perf.py +++ b/scripts/hdfstore_panel_perf.py @@ -1,6 +1,6 @@ from pandas import * from pandas.util.testing import rands -from pandas.util.compat import range +from pandas.compat import range i, j, k = 7, 771, 5532 diff --git a/scripts/json_manip.py b/scripts/json_manip.py index 3ad0edac2bbd6..72d0bbb34d6b6 100644 --- a/scripts/json_manip.py +++ b/scripts/json_manip.py @@ -74,8 +74,8 @@ from operator import attrgetter as aget, itemgetter as iget import operator import sys -from pandas.util.compat import map, u, callable, Counter -import pandas.util.compat as compat +from pandas.compat import map, u, callable, Counter +import pandas.compat as compat ## note 'url' appears multiple places and not all extensions have same struct diff --git a/scripts/leak.py b/scripts/leak.py index 3416213cd9668..47f74bf020597 100644 --- a/scripts/leak.py +++ b/scripts/leak.py @@ -1,5 +1,5 @@ from pandas import * -from pandas.util.compat import range +from pandas.compat import range import numpy as np import pandas.util.testing as tm import os diff --git a/scripts/parser_magic.py b/scripts/parser_magic.py index 17bdba16565cd..72fef39d8db65 100644 --- a/scripts/parser_magic.py +++ b/scripts/parser_magic.py @@ -1,6 +1,6 @@ from pandas.util.testing import set_trace import pandas.util.testing as tm -import pandas.util.compat as compat +import pandas.compat as compat from pandas import * import ast diff --git a/scripts/roll_median_leak.py b/scripts/roll_median_leak.py index cd3feb60e2a3c..07161cc6499bf 100644 --- a/scripts/roll_median_leak.py +++ b/scripts/roll_median_leak.py @@ -6,7 +6,7 @@ from vbench.api import Benchmark from pandas.util.testing import rands -from pandas.util.compat import range +from pandas.compat import range import pandas.lib as lib import pandas._sandbox as sbx import time diff --git a/scripts/testmed.py b/scripts/testmed.py index c3724af270ef9..dd3b952d58c60 100644 --- a/scripts/testmed.py +++ b/scripts/testmed.py @@ -2,7 +2,7 @@ from random import random from math import log, ceil -from pandas.util.compat import range +from pandas.compat import range from numpy.random import randn from pandas.lib.skiplist import rolling_median diff --git a/vb_suite/groupby.py b/vb_suite/groupby.py index 88aac24986805..ded6a064eebd3 100644 --- a/vb_suite/groupby.py +++ b/vb_suite/groupby.py @@ -1,6 +1,6 @@ from vbench.api import Benchmark from datetime import datetime -from pandas.util.compat import map +from pandas.compat import map common_setup = """from pandas_vb_common import * """ diff --git a/vb_suite/indexing.py b/vb_suite/indexing.py index 03961821bd14f..a87c95f54c9d5 100644 --- a/vb_suite/indexing.py +++ b/vb_suite/indexing.py @@ -106,7 +106,7 @@ start_date=datetime(2012, 1, 1)) setup = common_setup + """ -from pandas.util.compat import range +from pandas.compat import range import pandas.core.expressions as expr df = DataFrame(np.random.randn(50000, 100)) df2 = DataFrame(np.random.randn(50000, 100)) diff --git a/vb_suite/parser.py b/vb_suite/parser.py index 1d5f809f1561f..fb9fbc436eaa4 100644 --- a/vb_suite/parser.py +++ b/vb_suite/parser.py @@ -44,7 +44,7 @@ start_date=datetime(2011, 11, 1)) setup = common_setup + """ -from pandas.util.compat import cStringIO as StringIO +from pandas.compat import cStringIO as StringIO import os N = 10000 K = 8 @@ -63,7 +63,7 @@ read_table_multiple_date = Benchmark(cmd, setup, start_date=sdate) setup = common_setup + """ -from pandas.util.compat import cStringIO as StringIO +from pandas.compat import cStringIO as StringIO import os N = 10000 K = 8 diff --git a/vb_suite/perf_HEAD.py b/vb_suite/perf_HEAD.py index b9f859942d63f..95aa8893918e8 100755 --- a/vb_suite/perf_HEAD.py +++ b/vb_suite/perf_HEAD.py @@ -11,7 +11,7 @@ import json import pandas as pd -import pandas.util.compat as compat +import pandas.compat as compat WEB_TIMEOUT = 10 diff --git a/vb_suite/source/conf.py b/vb_suite/source/conf.py index ac24d1d039ec5..735a800fb9c02 100644 --- a/vb_suite/source/conf.py +++ b/vb_suite/source/conf.py @@ -13,7 +13,7 @@ import sys import os -from pandas/util.compat import u +from pandas.compat import u # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the diff --git a/vb_suite/test_perf.py b/vb_suite/test_perf.py index 095eb04ec294d..92e27ba0f3b89 100755 --- a/vb_suite/test_perf.py +++ b/vb_suite/test_perf.py @@ -27,7 +27,7 @@ """ from __future__ import print_function -from pandas.util.compat import range, lmap +from pandas.compat import range, lmap import shutil import os import sys From 98e25fcf9051ab52ba6f045bef9bc4c3e0020dcb Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Sun, 28 Jul 2013 19:46:21 -0400 Subject: [PATCH 10/11] CLN: Add unicode compatibility wrapper for dateutil. Dateutil < 2.0 doesn't always handle unicode well. This wraps `dateutil.parser.parse` and converts unicode to bytes. +wrap get_filepath_or_buffer in `str` --- pandas/compat/__init__.py | 10 ++++++++++ pandas/core/datetools.py | 1 - pandas/io/common.py | 2 +- pandas/io/tests/test_parsers.py | 14 +++++++------- pandas/tseries/tests/test_timeseries.py | 4 ++-- pandas/tseries/tools.py | 9 ++++++++- pandas/tslib.pyx | 8 +++----- scripts/find_commits_touching_func.py | 4 ++-- scripts/git_code_churn.py | 1 - vb_suite/test_perf.py | 4 ++-- 10 files changed, 35 insertions(+), 22 deletions(-) diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 27f5671ca02b9..eaf2928e4482c 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -28,6 +28,7 @@ # pylint disable=W0611 import functools import itertools +from distutils.version import LooseVersion from itertools import product import sys import types @@ -663,6 +664,15 @@ def __and__(self, other): # http://stackoverflow.com/questions/4126348 # Thanks to @martineau at SO +from dateutil import parser as _date_parser +import dateutil +if LooseVersion(dateutil.__version__) < '2.0': + @functools.wraps(_date_parser.parse) + def parse_date(timestr, *args, **kwargs): + timestr = bytes(timestr) + return _date_parser.parse(timestr, *args, **kwargs) +else: + parse_date = _date_parser.parse class OrderedDefaultdict(OrderedDict): diff --git a/pandas/core/datetools.py b/pandas/core/datetools.py index d6da94856b140..228dc7574f8f3 100644 --- a/pandas/core/datetools.py +++ b/pandas/core/datetools.py @@ -3,7 +3,6 @@ from pandas.tseries.tools import * from pandas.tseries.offsets import * from pandas.tseries.frequencies import * -from dateutil import parser day = DateOffset() bday = BDay() diff --git a/pandas/io/common.py b/pandas/io/common.py index e96c54a9a74fb..a2cf057c8f531 100644 --- a/pandas/io/common.py +++ b/pandas/io/common.py @@ -82,7 +82,7 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None): """ if _is_url(filepath_or_buffer): - req = _urlopen(filepath_or_buffer) + req = _urlopen(str(filepath_or_buffer)) if compat.PY3: # pragma: no cover if encoding: errors = 'strict' diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py index 163171c56bbc4..a46a3de60fe04 100644 --- a/pandas/io/tests/test_parsers.py +++ b/pandas/io/tests/test_parsers.py @@ -27,6 +27,7 @@ import pandas.util.testing as tm import pandas as pd +from pandas.compat import parse_date import pandas.lib as lib from pandas import compat from pandas.lib import Timestamp @@ -1254,13 +1255,13 @@ def test_converters(self): b,3,4,01/02/2009 c,4,5,01/03/2009 """ - from dateutil import parser + from pandas.compat import parse_date - result = self.read_csv(StringIO(data), converters={'D': parser.parse}) - result2 = self.read_csv(StringIO(data), converters={3: parser.parse}) + result = self.read_csv(StringIO(data), converters={'D': parse_date}) + result2 = self.read_csv(StringIO(data), converters={3: parse_date}) expected = self.read_csv(StringIO(data)) - expected['D'] = expected['D'].map(parser.parse) + expected['D'] = expected['D'].map(parse_date) tm.assert_isinstance(result['D'][0], (datetime, Timestamp)) tm.assert_frame_equal(result, expected) @@ -1327,13 +1328,12 @@ def test_read_csv_parse_simple_list(self): tm.assert_frame_equal(df, expected) def test_parse_dates_custom_euroformat(self): - from dateutil.parser import parse text = """foo,bar,baz 31/01/2010,1,2 01/02/2010,1,NA 02/02/2010,1,2 """ - parser = lambda d: parse(d, dayfirst=True) + parser = lambda d: parse_date(d, dayfirst=True) df = self.read_csv(StringIO(text), names=['time', 'Q', 'NTU'], header=0, index_col=0, parse_dates=True, @@ -1345,7 +1345,7 @@ def test_parse_dates_custom_euroformat(self): index=exp_index, columns=['Q', 'NTU']) tm.assert_frame_equal(df, expected) - parser = lambda d: parse(d, day_first=True) + parser = lambda d: parse_date(d, day_first=True) self.assertRaises(Exception, self.read_csv, StringIO(text), skiprows=[0], names=['time', 'Q', 'NTU'], index_col=0, diff --git a/pandas/tseries/tests/test_timeseries.py b/pandas/tseries/tests/test_timeseries.py index efee7379240d5..0fcdcf344ca38 100644 --- a/pandas/tseries/tests/test_timeseries.py +++ b/pandas/tseries/tests/test_timeseries.py @@ -732,7 +732,7 @@ def test_fillna_nat(self): def test_string_na_nat_conversion(self): # GH #999, #858 - from dateutil.parser import parse + from pandas.compat import parse_date strings = np.array(['1/1/2000', '1/2/2000', np.nan, '1/4/2000, 12:34:56'], dtype=object) @@ -742,7 +742,7 @@ def test_string_na_nat_conversion(self): if com.isnull(val): expected[i] = iNaT else: - expected[i] = parse(val) + expected[i] = parse_date(val) result = tslib.array_to_datetime(strings) assert_almost_equal(result, expected) diff --git a/pandas/tseries/tools.py b/pandas/tseries/tools.py index 9373d307640f6..3087d54396691 100644 --- a/pandas/tseries/tools.py +++ b/pandas/tseries/tools.py @@ -238,6 +238,7 @@ def parse_time_string(arg, freq=None, dayfirst=None, yearfirst=None): parsed, reso = dateutil_parse(arg, default, dayfirst=dayfirst, yearfirst=yearfirst) except Exception as e: + # TODO: allow raise of errors within instead raise DateParseError(e) if parsed is None: @@ -252,19 +253,25 @@ def dateutil_parse(timestr, default, """ lifted from dateutil to get resolution""" from dateutil import tz import time + fobj = StringIO(str(timestr)) - res = DEFAULTPARSER._parse(StringIO(timestr), **kwargs) + res = DEFAULTPARSER._parse(fobj, **kwargs) if res is None: raise ValueError("unknown string format") repl = {} + reso = None for attr in ["year", "month", "day", "hour", "minute", "second", "microsecond"]: value = getattr(res, attr) if value is not None: repl[attr] = value reso = attr + + if reso is None: + raise ValueError("Cannot parse date.") + if reso == 'microsecond' and repl['microsecond'] == 0: reso = 'second' diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index 3439e6bb37eb7..1c12b627f0690 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -28,7 +28,7 @@ cimport cython from datetime import timedelta, datetime from datetime import time as datetime_time -from dateutil.parser import parse as parse_date +from pandas.compat import parse_date cdef extern from "Python.h": int PySlice_Check(object) @@ -852,8 +852,6 @@ def array_to_datetime(ndarray[object] values, raise_=False, dayfirst=False, _TSObject _ts int64_t m = cast_from_unit(unit,None) - from dateutil.parser import parse - try: result = np.empty(n, dtype='M8[ns]') iresult = result.view('i8') @@ -917,7 +915,7 @@ def array_to_datetime(ndarray[object] values, raise_=False, dayfirst=False, elif raise_: raise try: - result[i] = parse(val, dayfirst=dayfirst) + result[i] = parse_date(val, dayfirst=dayfirst) except Exception: if coerce: iresult[i] = iNaT @@ -946,7 +944,7 @@ def array_to_datetime(ndarray[object] values, raise_=False, dayfirst=False, oresult[i] = 'NaT' continue try: - oresult[i] = parse(val, dayfirst=dayfirst) + oresult[i] = parse_date(val, dayfirst=dayfirst) except Exception: if raise_: raise diff --git a/scripts/find_commits_touching_func.py b/scripts/find_commits_touching_func.py index 0d4ecced957f6..e4c24b8c3bcbb 100755 --- a/scripts/find_commits_touching_func.py +++ b/scripts/find_commits_touching_func.py @@ -16,7 +16,7 @@ import re import os from collections import namedtuple -from dateutil import parser +from pandas.compat import parse_date try: import sh @@ -98,7 +98,7 @@ def get_commit_info(c,fmt,sep='\t'): def get_commit_vitals(c,hlen=HASH_LEN): h,s,d= get_commit_info(c,'%H\t%s\t%ci',"\t") - return h[:hlen],s,parser.parse(d) + return h[:hlen],s,parse_date(d) def file_filter(state,dirname,fnames): if args.dir_masks and not any([re.search(x,dirname) for x in args.dir_masks]): diff --git a/scripts/git_code_churn.py b/scripts/git_code_churn.py index 3e999aec1ad33..18c9b244a6ba0 100644 --- a/scripts/git_code_churn.py +++ b/scripts/git_code_churn.py @@ -1,4 +1,3 @@ -from dateutil import parser import subprocess import os import re diff --git a/vb_suite/test_perf.py b/vb_suite/test_perf.py index 92e27ba0f3b89..9eca76a5f3226 100755 --- a/vb_suite/test_perf.py +++ b/vb_suite/test_perf.py @@ -466,7 +466,7 @@ def main(): def _parse_commit_log(this,repo_path,base_commit=None): from vbench.git import _convert_timezones from pandas import Series - from dateutil import parser as dparser + from pandas.compat import parse_date git_cmd = 'git --git-dir=%s/.git --work-tree=%s ' % (repo_path, repo_path) githist = git_cmd + ('log --graph --pretty=format:'+ @@ -488,7 +488,7 @@ def _parse_commit_log(this,repo_path,base_commit=None): _, sha, stamp, message, author = line.split('::', 4) # parse timestamp into datetime object - stamp = dparser.parse(stamp) + stamp = parse_date(stamp) shas.append(sha) timestamps.append(stamp) From fa16b95143411f3a018228676be98ff2dc02dcf1 Mon Sep 17 00:00:00 2001 From: Jeffrey Tratner Date: Sun, 28 Jul 2013 20:26:41 -0400 Subject: [PATCH 11/11] TST: Make 2.6 require dateutil==1.5 in Travis --- ci/requirements-2.6.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/requirements-2.6.txt b/ci/requirements-2.6.txt index ac77449b2df02..5038b9e2b6552 100644 --- a/ci/requirements-2.6.txt +++ b/ci/requirements-2.6.txt @@ -1,6 +1,6 @@ numpy==1.6.1 cython==0.19.1 -python-dateutil==2.1 +python-dateutil==1.5 pytz==2013b http://www.crummy.com/software/BeautifulSoup/bs4/download/4.2/beautifulsoup4-4.2.0.tar.gz html5lib==1.0b2