Skip to content

Commit f0b0f15

Browse files
committed
Merge PR #200 bringing Python 3 support
* py3-compat-merge: All but one test now passing on Python 3. Sort out division for in sparse code. Use .iterkv() instead of .iteritems(), which gets incorrectly converted to .items() by 2to3. Hack so test works on Python 3. More changes to support Python 3 better. Fix up more tests for Python 3. Fixes so more tests pass on Python 3. Pandas can now be imported successfully in Python 3. Pandas installs on Python 3
2 parents cbf7616 + ccc9cfc commit f0b0f15

19 files changed

+204
-89
lines changed

pandas/core/common.py

+12-6
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
"""
22
Misc tools for implementing data structures
33
"""
4-
5-
from cStringIO import StringIO
4+
try:
5+
from io import BytesIO
6+
except ImportError: # Python < 2.6
7+
from cStringIO import StringIO as BytesIO
68
import itertools
79

810
from numpy.lib.format import read_array, write_array
@@ -77,13 +79,13 @@ def notnull(obj):
7779
def _pickle_array(arr):
7880
arr = arr.view(np.ndarray)
7981

80-
buf = StringIO()
82+
buf = BytesIO()
8183
write_array(buf, arr)
8284

8385
return buf.getvalue()
8486

8587
def _unpickle_array(bytes):
86-
arr = read_array(StringIO(bytes))
88+
arr = read_array(BytesIO(bytes))
8789
return arr
8890

8991
def _take_1d_bool(arr, indexer, out):
@@ -412,7 +414,7 @@ def rands(n):
412414
"""Generates a random alphanumeric string of length *n*"""
413415
from random import Random
414416
import string
415-
return ''.join(Random().sample(string.letters+string.digits, n))
417+
return ''.join(Random().sample(string.ascii_letters+string.digits, n))
416418

417419
def adjoin(space, *lists):
418420
"""
@@ -480,7 +482,11 @@ def __init__(self, seq, key=lambda x:x):
480482
for value in seq:
481483
k = key(value)
482484
self.setdefault(k, []).append(value)
483-
__iter__ = dict.iteritems
485+
try:
486+
__iter__ = dict.iteritems
487+
except AttributeError: # Python 3
488+
def __iter__(self):
489+
return iter(dict.items(self))
484490

485491
def map_indices_py(arr):
486492
"""

pandas/core/frame.py

+19-7
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
from pandas.core.internals import BlockManager, make_block, form_blocks
3131
from pandas.core.series import Series, _is_bool_indexer
3232
from pandas.util.decorators import deprecate
33+
from pandas.util import py3compat
3334
import pandas.core.common as common
3435
import pandas.core.datetools as datetools
3536
import pandas._tseries as _tseries
@@ -277,6 +278,10 @@ def iteritems(self):
277278
"""Iterator over (column, series) pairs"""
278279
series = self._series
279280
return ((k, series[k]) for k in self.columns)
281+
282+
iterkv = iteritems
283+
if py3compat.PY3:
284+
items = iteritems
280285

281286
def __len__(self):
282287
"""Returns length of index"""
@@ -292,7 +297,7 @@ def __contains__(self, key):
292297
add = _arith_method(operator.add, 'add')
293298
mul = _arith_method(operator.mul, 'multiply')
294299
sub = _arith_method(operator.sub, 'subtract')
295-
div = _arith_method(operator.div, 'divide')
300+
div = _arith_method(lambda x, y: x / y, 'divide')
296301

297302
radd = _arith_method(operator.add, 'add')
298303
rmul = _arith_method(operator.mul, 'multiply')
@@ -302,19 +307,26 @@ def __contains__(self, key):
302307
__add__ = _arith_method(operator.add, '__add__', default_axis=None)
303308
__sub__ = _arith_method(operator.sub, '__sub__', default_axis=None)
304309
__mul__ = _arith_method(operator.mul, '__mul__', default_axis=None)
305-
__div__ = _arith_method(operator.div, '__div__', default_axis=None)
306310
__truediv__ = _arith_method(operator.truediv, '__truediv__',
307311
default_axis=None)
312+
__floordiv__ = _arith_method(operator.floordiv, '__floordiv__',
313+
default_axis=None)
308314
__pow__ = _arith_method(operator.pow, '__pow__', default_axis=None)
309315

310316
__radd__ = _arith_method(operator.add, '__radd__', default_axis=None)
311317
__rmul__ = _arith_method(operator.mul, '__rmul__', default_axis=None)
312318
__rsub__ = _arith_method(lambda x, y: y - x, '__rsub__', default_axis=None)
313-
__rdiv__ = _arith_method(lambda x, y: y / x, '__rdiv__', default_axis=None)
314319
__rtruediv__ = _arith_method(lambda x, y: y / x, '__rtruediv__',
315320
default_axis=None)
321+
__rfloordiv__ = _arith_method(lambda x, y: y // x, '__rfloordiv__',
322+
default_axis=None)
316323
__rpow__ = _arith_method(lambda x, y: y ** x, '__rpow__',
317324
default_axis=None)
325+
326+
# Python 2 division methods
327+
if not py3compat.PY3:
328+
__div__ = _arith_method(operator.div, '__div__', default_axis=None)
329+
__rdiv__ = _arith_method(lambda x, y: y / x, '__rdiv__', default_axis=None)
318330

319331
def __neg__(self):
320332
return self * -1
@@ -464,7 +476,7 @@ def to_sparse(self, fill_value=None, kind='block'):
464476
default_fill_value=fill_value)
465477

466478
def to_csv(self, path, nanRep='', cols=None, header=True,
467-
index=True, index_label=None, mode='wb'):
479+
index=True, index_label=None, mode='w'):
468480
"""
469481
Write DataFrame to a comma-separated values (csv) file
470482
@@ -483,7 +495,7 @@ def to_csv(self, path, nanRep='', cols=None, header=True,
483495
Column label for index column(s) if desired. If None is given, and
484496
`header` and `index` are True, then the index names are used. A
485497
sequence should be given if the DataFrame uses MultiIndex.
486-
mode : Python write mode, default 'wb'
498+
mode : Python write mode, default 'w'
487499
"""
488500
f = open(path, mode)
489501
csvout = csv.writer(f)
@@ -658,7 +670,7 @@ def dtypes(self):
658670

659671
def get_dtype_counts(self):
660672
counts = {}
661-
for _, series in self.iteritems():
673+
for _, series in self.iterkv():
662674
if series.dtype in counts:
663675
counts[series.dtype] += 1
664676
else:
@@ -915,7 +927,7 @@ def _set_item(self, key, value):
915927
def _sanitize_column(self, value):
916928
# Need to make sure new columns (which go into the BlockManager as new
917929
# blocks) are always copied
918-
if hasattr(value, '__iter__'):
930+
if hasattr(value, '__iter__') and not isinstance(value, basestring):
919931
if isinstance(value, Series):
920932
if value.index.equals(self.index):
921933
# copy the values

pandas/core/panel.py

+25-6
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from pandas.core.generic import AxisProperty, NDFrame
1616
from pandas.core.series import Series
1717
from pandas.util.decorators import deprecate
18+
from pandas.util import py3compat
1819
import pandas.core.common as common
1920
import pandas._tseries as _tseries
2021

@@ -170,16 +171,22 @@ class Panel(NDFrame):
170171

171172
__add__ = _arith_method(operator.add, '__add__')
172173
__sub__ = _arith_method(operator.sub, '__sub__')
174+
__truediv__ = _arith_method(operator.truediv, '__truediv__')
175+
__floordiv__ = _arith_method(operator.floordiv, '__floordiv__')
173176
__mul__ = _arith_method(operator.mul, '__mul__')
174-
__div__ = _arith_method(operator.div, '__div__')
175177
__pow__ = _arith_method(operator.pow, '__pow__')
176178

177179
__radd__ = _arith_method(operator.add, '__radd__')
178180
__rmul__ = _arith_method(operator.mul, '__rmul__')
179181
__rsub__ = _arith_method(lambda x, y: y - x, '__rsub__')
180-
__rdiv__ = _arith_method(lambda x, y: y / x, '__rdiv__')
182+
__rtruediv__ = _arith_method(lambda x, y: y / x, '__rtruediv__')
183+
__rfloordiv__ = _arith_method(lambda x, y: y // x, '__rfloordiv__')
181184
__rpow__ = _arith_method(lambda x, y: y ** x, '__rpow__')
182185

186+
if not py3compat.PY3:
187+
__div__ = _arith_method(operator.div, '__div__')
188+
__rdiv__ = _arith_method(lambda x, y: y / x, '__rdiv__')
189+
183190
def __init__(self, data, items=None, major_axis=None, minor_axis=None,
184191
copy=False, dtype=None):
185192
"""
@@ -329,6 +336,10 @@ def __iter__(self):
329336
def iteritems(self):
330337
for item in self.items:
331338
yield item, self[item]
339+
340+
# Name that won't get automatically converted to items by 2to3. items is
341+
# already in use for the first axis.
342+
iterkv = iteritems
332343

333344
def _get_plane_axes(self, axis):
334345
"""
@@ -383,7 +394,7 @@ def to_sparse(self, fill_value=None, kind='block'):
383394
y : SparseDataFrame
384395
"""
385396
from pandas.core.sparse import SparsePanel
386-
frames = dict(self.iteritems())
397+
frames = dict(self.iterkv())
387398
return SparsePanel(frames, items=self.items,
388399
major_axis=self.major_axis,
389400
minor_axis=self.minor_axis,
@@ -632,7 +643,7 @@ def fillna(self, value=None, method='pad'):
632643
"""
633644
if value is None:
634645
result = {}
635-
for col, s in self.iteritems():
646+
for col, s in self.iterkv():
636647
result[col] = s.fillna(method=method, value=value)
637648

638649
return Panel.from_dict(result)
@@ -642,8 +653,12 @@ def fillna(self, value=None, method='pad'):
642653

643654
add = _panel_arith_method(operator.add, 'add')
644655
subtract = sub = _panel_arith_method(operator.sub, 'subtract')
645-
divide = div = _panel_arith_method(operator.div, 'divide')
646656
multiply = mul = _panel_arith_method(operator.mul, 'multiply')
657+
658+
try:
659+
divide = div = _panel_arith_method(operator.div, 'divide')
660+
except AttributeError: # Python 3
661+
divide = div = _panel_arith_method(operator.truediv, 'divide')
647662

648663
def major_xs(self, key, copy=True):
649664
"""
@@ -1214,8 +1229,12 @@ def _combine_panel_frame(self, other, func, axis='items'):
12141229

12151230
add = _panel_arith_method(operator.add, 'add')
12161231
subtract = sub = _panel_arith_method(operator.sub, 'subtract')
1217-
divide = div = _panel_arith_method(operator.div, 'divide')
12181232
multiply = mul = _panel_arith_method(operator.mul, 'multiply')
1233+
1234+
try:
1235+
divide = div = _panel_arith_method(operator.div, 'divide')
1236+
except AttributeError: # Python 3
1237+
divide = div = _panel_arith_method(operator.truediv, 'divide')
12191238

12201239
def to_wide(self):
12211240
"""

pandas/core/series.py

+19-5
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from pandas.core.index import Index, MultiIndex, _ensure_index
2222
from pandas.core.indexing import _SeriesIndexer, _maybe_droplevels
2323
from pandas.util.decorators import deprecate
24+
from pandas.util import py3compat
2425
import pandas.core.common as common
2526
import pandas.core.datetools as datetools
2627
import pandas._tseries as lib
@@ -418,31 +419,41 @@ def iteritems(self):
418419
Lazily iterate over (index, value) tuples
419420
"""
420421
return itertools.izip(iter(self.index), iter(self))
422+
423+
iterkv = iteritems
424+
if py3compat.PY3:
425+
items = iteritems
421426

422427
#----------------------------------------------------------------------
423428
# Arithmetic operators
424429

425430
__add__ = _arith_method(operator.add, '__add__')
426431
__sub__ = _arith_method(operator.sub, '__sub__')
427432
__mul__ = _arith_method(operator.mul, '__mul__')
428-
__div__ = _arith_method(operator.div, '__div__')
429433
__truediv__ = _arith_method(operator.truediv, '__truediv__')
434+
__floordiv__ = _arith_method(operator.floordiv, '__floordiv__')
430435
__pow__ = _arith_method(operator.pow, '__pow__')
431-
__truediv__ = _arith_method(operator.truediv, '__truediv__')
432436

433437
__radd__ = _arith_method(operator.add, '__add__')
434438
__rmul__ = _arith_method(operator.mul, '__mul__')
435439
__rsub__ = _arith_method(lambda x, y: y - x, '__sub__')
436-
__rdiv__ = _arith_method(lambda x, y: y / x, '__div__')
437440
__rtruediv__ = _arith_method(lambda x, y: y / x, '__truediv__')
441+
__rfloordiv__ = _arith_method(lambda x, y: y // x, '__floordiv__')
438442
__rpow__ = _arith_method(lambda x, y: y ** x, '__pow__')
439443

440444
# Inplace operators
441445
__iadd__ = __add__
442446
__isub__ = __sub__
443447
__imul__ = __mul__
444-
__idiv__ = __div__
448+
__itruediv__ = __truediv__
449+
__ifloordiv__ = __floordiv__
445450
__ipow__ = __pow__
451+
452+
# Python 2 division operators
453+
if not py3compat.PY3:
454+
__div__ = _arith_method(operator.div, '__div__')
455+
__rdiv__ = _arith_method(lambda x, y: y / x, '__div__')
456+
__idiv__ = __div__
446457

447458
#----------------------------------------------------------------------
448459
# Misc public methods
@@ -1031,7 +1042,10 @@ def _binop(self, other, func, fill_value=None):
10311042
add = _flex_method(operator.add, 'add')
10321043
sub = _flex_method(operator.sub, 'subtract')
10331044
mul = _flex_method(operator.mul, 'multiply')
1034-
div = _flex_method(operator.div, 'divide')
1045+
try:
1046+
div = _flex_method(operator.div, 'divide')
1047+
except AttributeError: # Python 3
1048+
div = _flex_method(operator.truediv, 'divide')
10351049

10361050
def combine(self, other, func, fill_value=nan):
10371051
"""

pandas/core/sparse.py

+16-7
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
import pandas.core.common as common
2121
import pandas.core.datetools as datetools
2222

23+
from pandas.util import py3compat
24+
2325
from pandas._sparse import BlockIndex, IntIndex
2426
import pandas._sparse as splib
2527

@@ -296,24 +298,31 @@ def __repr__(self):
296298
__add__ = _sparse_op_wrap(operator.add, 'add')
297299
__sub__ = _sparse_op_wrap(operator.sub, 'sub')
298300
__mul__ = _sparse_op_wrap(operator.mul, 'mul')
299-
__div__ = _sparse_op_wrap(operator.div, 'div')
300301
__truediv__ = _sparse_op_wrap(operator.truediv, 'truediv')
302+
__floordiv__ = _sparse_op_wrap(operator.floordiv, 'floordiv')
301303
__pow__ = _sparse_op_wrap(operator.pow, 'pow')
302304

303305
# reverse operators
304306
__radd__ = _sparse_op_wrap(operator.add, '__radd__')
305-
__rmul__ = _sparse_op_wrap(operator.mul, '__rmul__')
306307
__rsub__ = _sparse_op_wrap(lambda x, y: y - x, '__rsub__')
307-
__rdiv__ = _sparse_op_wrap(lambda x, y: y / x, '__rdiv__')
308+
__rmul__ = _sparse_op_wrap(operator.mul, '__rmul__')
308309
__rtruediv__ = _sparse_op_wrap(lambda x, y: y / x, '__rtruediv__')
310+
__rfloordiv__ = _sparse_op_wrap(lambda x, y: y // x, 'floordiv')
309311
__rpow__ = _sparse_op_wrap(lambda x, y: y ** x, '__rpow__')
310312

311313
# Inplace operators
312314
__iadd__ = __add__
313315
__isub__ = __sub__
314316
__imul__ = __mul__
315-
__idiv__ = __div__
317+
__itruediv__ = __truediv__
318+
__ifloordiv__ = __floordiv__
316319
__ipow__ = __pow__
320+
321+
# Python 2 division operators
322+
if not py3compat.PY3:
323+
__div__ = _sparse_op_wrap(operator.div, 'div')
324+
__rdiv__ = _sparse_op_wrap(lambda x, y: y / x, '__rdiv__')
325+
__idiv__ = __div__
317326

318327
@property
319328
def values(self):
@@ -1590,7 +1599,7 @@ def _combine(self, other, func, axis=0):
15901599
return self._combinePanel(other, func)
15911600
elif np.isscalar(other):
15921601
new_frames = dict((k, func(v, other))
1593-
for k, v in self.iteritems())
1602+
for k, v in self.iterkv())
15941603
return self._new_like(new_frames)
15951604

15961605
def _combineFrame(self, other, func, axis=0):
@@ -1666,7 +1675,7 @@ def major_xs(self, key):
16661675
y : DataFrame
16671676
index -> minor axis, columns -> items
16681677
"""
1669-
slices = dict((k, v.xs(key)) for k, v in self.iteritems())
1678+
slices = dict((k, v.xs(key)) for k, v in self.iterkv())
16701679
return DataFrame(slices, index=self.minor_axis, columns=self.items)
16711680

16721681
def minor_xs(self, key):
@@ -1683,7 +1692,7 @@ def minor_xs(self, key):
16831692
y : SparseDataFrame
16841693
index -> major axis, columns -> items
16851694
"""
1686-
slices = dict((k, v[key]) for k, v in self.iteritems())
1695+
slices = dict((k, v[key]) for k, v in self.iterkv())
16871696
return SparseDataFrame(slices, index=self.major_axis,
16881697
columns=self.items,
16891698
default_fill_value=self.default_fill_value,

0 commit comments

Comments
 (0)