pandas-dev · wesm · Apr 12, 2013 · Apr 10, 2013 · Apr 10, 2013 · Apr 10, 2013
diff --git a/RELEASE.rst b/RELEASE.rst
@@ -293,6 +293,7 @@ pandas 0.11.0
   - fixed pretty priniting of sets (GH3294_)
   - Panel() and Panel.from_dict() now respects ordering when give OrderedDict (GH3303_)
   - DataFrame where with a datetimelike incorrectly selecting (GH3311_)
+  - Ensure pickles created in py2 can be read in py3
 
 .. _GH3294: https://github.com/pydata/pandas/issues/3294
 .. _GH622: https://github.com/pydata/pandas/issues/622

diff --git a/pandas/core/common.py b/pandas/core/common.py
@@ -1575,12 +1575,12 @@ def load(path):
     -------
     unpickled : type of object stored in file
     """
-    f = open(path, 'rb')
     try:
-        return pickle.load(f)
-    finally:
-        f.close()
-
+        with open(path,'rb') as fh:
+            return pickle.load(fh)
+    except:
+        with open(path,'rb') as fh:
+            return pickle.load(fh, encoding='latin1')
 
 class UTF8Recoder:
     """

diff --git a/pandas/io/tests/legacy.h5 → pandas/io/tests/data/legacy_hdf/legacy.h5 b/pandas/io/tests/legacy.h5 → pandas/io/tests/data/legacy_hdf/legacy.h5
diff --git a/pandas/io/tests/legacy_0.10.h5 → ...s/io/tests/data/legacy_hdf/legacy_0.10.h5 b/pandas/io/tests/legacy_0.10.h5 → ...s/io/tests/data/legacy_hdf/legacy_0.10.h5
diff --git a/pandas/io/tests/legacy_table.h5 → .../io/tests/data/legacy_hdf/legacy_table.h5 b/pandas/io/tests/legacy_table.h5 → .../io/tests/data/legacy_hdf/legacy_table.h5
diff --git a/pandas/io/tests/pytables_native.h5 → .../tests/data/legacy_hdf/pytables_native.h5 b/pandas/io/tests/pytables_native.h5 → .../tests/data/legacy_hdf/pytables_native.h5
diff --git a/pandas/io/tests/pytables_native2.h5 → ...tests/data/legacy_hdf/pytables_native2.h5 b/pandas/io/tests/pytables_native2.h5 → ...tests/data/legacy_hdf/pytables_native2.h5
diff --git a/pandas/io/tests/data/legacy_pickle/0.10.1/AMD64_windows_2.7.3.pickle b/pandas/io/tests/data/legacy_pickle/0.10.1/AMD64_windows_2.7.3.pickle
diff --git a/pandas/io/tests/data/legacy_pickle/0.10.1/x86_64_linux_2.7.3.pickle b/pandas/io/tests/data/legacy_pickle/0.10.1/x86_64_linux_2.7.3.pickle
diff --git a/pandas/io/tests/data/legacy_pickle/0.11.0/x86_64_linux_2.7.3.pickle b/pandas/io/tests/data/legacy_pickle/0.11.0/x86_64_linux_2.7.3.pickle
diff --git a/pandas/io/tests/data/legacy_pickle/0.11.0/x86_64_linux_3.3.0.pickle b/pandas/io/tests/data/legacy_pickle/0.11.0/x86_64_linux_3.3.0.pickle
diff --git a/pandas/io/tests/salary.table → pandas/io/tests/data/salary.table b/pandas/io/tests/salary.table → pandas/io/tests/data/salary.table
diff --git a/pandas/io/tests/test.xls → pandas/io/tests/data/test.xls b/pandas/io/tests/test.xls → pandas/io/tests/data/test.xls
diff --git a/pandas/io/tests/test.xlsx → pandas/io/tests/data/test.xlsx b/pandas/io/tests/test.xlsx → pandas/io/tests/data/test.xlsx
diff --git a/pandas/io/tests/test1.csv → pandas/io/tests/data/test1.csv b/pandas/io/tests/test1.csv → pandas/io/tests/data/test1.csv
diff --git a/pandas/io/tests/test2.csv → pandas/io/tests/data/test2.csv b/pandas/io/tests/test2.csv → pandas/io/tests/data/test2.csv
diff --git a/pandas/io/tests/test2.xls → pandas/io/tests/data/test2.xls b/pandas/io/tests/test2.xls → pandas/io/tests/data/test2.xls
diff --git a/pandas/io/tests/test3.xls → pandas/io/tests/data/test3.xls b/pandas/io/tests/test3.xls → pandas/io/tests/data/test3.xls
diff --git a/pandas/io/tests/data/unicode_series.csv b/pandas/io/tests/data/unicode_series.csv
@@ -0,0 +1,18 @@
+1617,King of New York (1990)
+1618,All Things Fair (1996)
+1619,"Sixth Man, The (1997)"
+1620,Butterfly Kiss (1995)
+1621,"Paris, France (1993)"
+1622,"C�r�monie, La (1995)"
+1623,Hush (1998)
+1624,Nightwatch (1997)
+1625,Nobody Loves Me (Keiner liebt mich) (1994)
+1626,"Wife, The (1995)"
+1627,Lamerica (1994)
+1628,Nico Icon (1995)
+1629,"Silence of the Palace, The (Saimt el Qusur) (1994)"
+1630,"Slingshot, The (1993)"
+1631,Land and Freedom (Tierra y libertad) (1995)
+1632,� k�ldum klaka (Cold Fever) (1994)
+1633,Etz Hadomim Tafus (Under the Domin Tree) (1994)
+1634,Two Friends (1986)
diff --git a/pandas/io/tests/utf16_ex.txt → pandas/io/tests/data/utf16_ex.txt b/pandas/io/tests/utf16_ex.txt → pandas/io/tests/data/utf16_ex.txt
diff --git a/pandas/io/tests/generate_legacy_pickles.py b/pandas/io/tests/generate_legacy_pickles.py
@@ -0,0 +1,119 @@
+""" self-contained to write legacy pickle files """
+
+def _create_sp_series():
+
+    import numpy as np
+    from pandas import bdate_range, SparseSeries
+
+    nan = np.nan
+
+    # nan-based
+    arr = np.arange(15, dtype=float)
+    index = np.arange(15)
+    arr[7:12] = nan
+    arr[-1:] = nan
+
+    date_index = bdate_range('1/1/2011', periods=len(index))
+    bseries = SparseSeries(arr, index=index, kind='block')
+    bseries.name = 'bseries'
+    return bseries
+
+def _create_sp_frame():
+    import numpy as np
+    from pandas import bdate_range, SparseDataFrame
+
+    nan = np.nan
+
+    data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
+            'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
+            'C': np.arange(10),
+            'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]}
+
+    dates = bdate_range('1/1/2011', periods=10)
+    return SparseDataFrame(data, index=dates)
+
+def create_data():
+    """ create the pickle data """
+
+    import numpy as np
+    import pandas
+    from pandas import (Series,DataFrame,Panel,
+                        SparseSeries,SparseDataFrame,SparsePanel,
+                        Index,MultiIndex,PeriodIndex,
+                        date_range,bdate_range,Timestamp)
+    nan = np.nan
+
+    data = {
+        'A': [0., 1., 2., 3., np.nan],
+        'B': [0, 1, 0, 1, 0],
+        'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'],
+        'D': date_range('1/1/2009', periods=5),
+        'E' : [0., 1, Timestamp('20100101'),'foo',2.],
+        }
+
+    index  = dict(int   = Index(np.arange(10)),
+                  date  = date_range('20130101',periods=10))
+    mi     = dict(reg   = MultiIndex.from_tuples(zip([['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
+                                                      ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]),
+                                                 names=['first', 'second']))
+    series = dict(float = Series(data['A']),
+                  int   = Series(data['B']),
+                  mixed = Series(data['E']))
+    frame  = dict(float = DataFrame(dict(A = series['float'], B = series['float'] + 1)),
+                  int   = DataFrame(dict(A = series['int']  , B = series['int']   + 1)),
+                  mixed = DataFrame(dict([ (k,data[k]) for k in ['A','B','C','D']])))
+    panel  = dict(float = Panel(dict(ItemA = frame['float'], ItemB = frame['float']+1)))
+
+
+
+    return dict( series = series, 
+                 frame  = frame, 
+                 panel  = panel,
+                 index  = index,
+                 mi     = mi,
+                 sp_series = dict(float = _create_sp_series()),
+                 sp_frame  = dict(float = _create_sp_frame())
+                 )
+
+def write_legacy_pickles():
+
+    # force our cwd to be the first searched
+    import sys
+    sys.path.insert(0,'.')
+
+    import os
+    import numpy as np
+    import pandas
+    import pandas.util.testing as tm
+    import platform as pl
+    import cPickle as pickle
+
+    print("This script generates a pickle file for the current arch, system, and python version")
+
+    base_dir, _ = os.path.split(os.path.abspath(__file__))
+    base_dir = os.path.join(base_dir,'data/legacy_pickle')
+
+    # could make this a parameter?
+    version  = None
+
+
+    if version is None:
+        version = pandas.__version__
+    pth = os.path.join(base_dir, str(version))
+    try:
+        os.mkdir(pth)
+    except:
+        pass
+
+    # construct a reasonable platform name
+    f = '_'.join([ str(pl.machine()), str(pl.system().lower()), str(pl.python_version()) ])
+    pth = os.path.abspath(os.path.join(pth,'%s.pickle' % f))
+
+    fh = open(pth,'wb')
+    pickle.dump(create_data(),fh,pickle.HIGHEST_PROTOCOL)
+    fh.close()
+
+    print("created pickle file: %s" % pth)
+
+if __name__ == '__main__':
+    write_legacy_pickles()
diff --git a/pandas/io/tests/test_cparser.py b/pandas/io/tests/test_cparser.py
@@ -31,15 +31,10 @@
 import pandas._parser as parser
 
 
-def curpath():
-    pth, _ = os.path.split(os.path.abspath(__file__))
-    return pth
-
-
 class TestCParser(unittest.TestCase):
 
     def setUp(self):
-        self.dirpath = curpath()
+        self.dirpath = tm.get_data_path('/')
         self.csv1 = os.path.join(self.dirpath, 'test1.csv')
         self.csv2 = os.path.join(self.dirpath, 'test2.csv')
         self.xls1 = os.path.join(self.dirpath, 'test.xls')

diff --git a/pandas/io/tests/test_excel.py b/pandas/io/tests/test_excel.py
@@ -65,10 +65,6 @@ def _skip_if_no_excelsuite():
     _skip_if_no_openpyxl()
 
 
-def curpath():
-    pth, _ = os.path.split(os.path.abspath(__file__))
-    return pth
-
 _seriesd = tm.getSeriesData()
 _tsd = tm.getTimeSeriesData()
 _frame = DataFrame(_seriesd)[:10]
@@ -81,7 +77,7 @@ def curpath():
 class ExcelTests(unittest.TestCase):
 
     def setUp(self):
-        self.dirpath = curpath()
+        self.dirpath = tm.get_data_path()
         self.csv1 = os.path.join(self.dirpath, 'test1.csv')
         self.csv2 = os.path.join(self.dirpath, 'test2.csv')
         self.xls1 = os.path.join(self.dirpath, 'test.xls')

diff --git a/pandas/io/tests/test_parsers.py b/pandas/io/tests/test_parsers.py
@@ -59,7 +59,7 @@ def setUp(self):
         import warnings
         warnings.filterwarnings(action='ignore', category=FutureWarning)
 
-        self.dirpath = curpath()
+        self.dirpath = tm.get_data_path()
         self.csv1 = os.path.join(self.dirpath, 'test1.csv')
         self.csv2 = os.path.join(self.dirpath, 'test2.csv')
         self.xls1 = os.path.join(self.dirpath, 'test.xls')
@@ -1208,7 +1208,7 @@ def test_url(self):
             url = ('https://raw.github.com/pydata/pandas/master/'
                    'pandas/io/tests/salary.table')
             url_table = self.read_table(url)
-            dirpath = curpath()
+            dirpath = tm.get_data_path()
             localtable = os.path.join(dirpath, 'salary.table')
             local_table = self.read_table(localtable)
             tm.assert_frame_equal(url_table, local_table)
@@ -1229,7 +1229,7 @@ def test_file(self):
         # FILE
         if sys.version_info[:2] < (2, 6):
             raise nose.SkipTest("file:// not supported with Python < 2.6")
-        dirpath = curpath()
+        dirpath = tm.get_data_path()
         localtable = os.path.join(dirpath, 'salary.table')
         local_table = self.read_table(localtable)
 
@@ -1404,7 +1404,7 @@ def test_utf16_bom_skiprows(self):
                     tm.assert_frame_equal(result, expected)
 
     def test_utf16_example(self):
-        path = os.path.join(self.dirpath, 'utf16_ex.txt')
+        path = tm.get_data_path('utf16_ex.txt')
 
         # it works! and is the right length
         result = self.read_table(path, encoding='utf-16')
@@ -1476,8 +1476,7 @@ def convert_score(x):
         tm.assert_frame_equal(result, result2)
 
     def test_unicode_encoding(self):
-        pth = psplit(psplit(curpath())[0])[0]
-        pth = os.path.join(pth, 'tests/data/unicode_series.csv')
+        pth = tm.get_data_path('unicode_series.csv')
 
         result = self.read_csv(pth, header=None, encoding='latin-1')
         result = result.set_index(0)
@@ -2185,11 +2184,6 @@ def assert_same_values_and_dtype(res, exp):
     assert_almost_equal(res, exp)
 
 
-def curpath():
-    pth, _ = os.path.split(os.path.abspath(__file__))
-    return pth
-
-
 if __name__ == '__main__':
     nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
                    exit=False)
diff --git a/pandas/io/tests/test_pickle.py b/pandas/io/tests/test_pickle.py
@@ -0,0 +1,74 @@
+# pylint: disable=E1101,E1103,W0232
+
+""" manage legacy pickle tests """
+
+from datetime import datetime, timedelta
+import operator
+import pickle
+import unittest
+import nose
+import os
+
+import numpy as np
+import pandas.util.testing as tm
+import pandas as pd
+from pandas import Index
+from pandas.sparse.tests import test_sparse
+
+class TestPickle(unittest.TestCase):
+    _multiprocess_can_split_ = True
+
+    def setUp(self):
+        from pandas.io.tests.generate_legacy_pickles import create_data
+        self.data = create_data()
+
+    def compare(self, vf):
+
+        # py3 compat when reading py2 pickle
+
+        try:
+            with open(vf,'rb') as fh:
+                data = pickle.load(fh)
+        except (ValueError):
+
+            # we are trying to read a py3 pickle in py2.....
+            return
+        except:
+            with open(vf,'rb') as fh:
+                data = pickle.load(fh, encoding='latin1')
+
+        for typ, dv in data.items():
+            for dt, result in dv.items():
+
+                expected = self.data[typ][dt]
+
+                if isinstance(expected,Index):
+                    self.assert_(expected.equals(result))
+                    continue
+
+                if typ.startswith('sp_'):
+                    comparator = getattr(test_sparse,"assert_%s_equal" % typ)
+                    comparator(result,expected,exact_indices=False)
+                else:
+                    comparator = getattr(tm,"assert_%s_equal" % typ)
+                    comparator(result,expected)
+
+    def test_read_pickles_0_10_1(self):
+
+        pth = tm.get_data_path('legacy_pickle/0.10.1')
+        for f in os.listdir(pth):
+            vf = os.path.join(pth,f)
+            self.compare(vf)
+
+    def test_read_pickles_0_11_0(self):
+
+        pth = tm.get_data_path('legacy_pickle/0.11.0')
+        for f in os.listdir(pth):
+            vf = os.path.join(pth,f)
+            self.compare(vf)
+
+if __name__ == '__main__':
+    import nose
+    nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
+                   # '--with-coverage', '--cover-package=pandas.core'],
+                   exit=False)