Skip to content

TST/CLN: legacy pickle testing #3310

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Apr 12, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,7 @@ pandas 0.11.0
- fixed pretty priniting of sets (GH3294_)
- Panel() and Panel.from_dict() now respects ordering when give OrderedDict (GH3303_)
- DataFrame where with a datetimelike incorrectly selecting (GH3311_)
- Ensure pickles created in py2 can be read in py3

.. _GH3294: https://github.com/pydata/pandas/issues/3294
.. _GH622: https://github.com/pydata/pandas/issues/622
Expand Down
10 changes: 5 additions & 5 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1575,12 +1575,12 @@ def load(path):
-------
unpickled : type of object stored in file
"""
f = open(path, 'rb')
try:
return pickle.load(f)
finally:
f.close()

with open(path,'rb') as fh:
return pickle.load(fh)
except:
with open(path,'rb') as fh:
return pickle.load(fh, encoding='latin1')

class UTF8Recoder:
"""
Expand Down
File renamed without changes.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
18 changes: 18 additions & 0 deletions pandas/io/tests/data/unicode_series.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
1617,King of New York (1990)
1618,All Things Fair (1996)
1619,"Sixth Man, The (1997)"
1620,Butterfly Kiss (1995)
1621,"Paris, France (1993)"
1622,"C�r�monie, La (1995)"
1623,Hush (1998)
1624,Nightwatch (1997)
1625,Nobody Loves Me (Keiner liebt mich) (1994)
1626,"Wife, The (1995)"
1627,Lamerica (1994)
1628,Nico Icon (1995)
1629,"Silence of the Palace, The (Saimt el Qusur) (1994)"
1630,"Slingshot, The (1993)"
1631,Land and Freedom (Tierra y libertad) (1995)
1632,� k�ldum klaka (Cold Fever) (1994)
1633,Etz Hadomim Tafus (Under the Domin Tree) (1994)
1634,Two Friends (1986)
File renamed without changes.
119 changes: 119 additions & 0 deletions pandas/io/tests/generate_legacy_pickles.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
""" self-contained to write legacy pickle files """

def _create_sp_series():

import numpy as np
from pandas import bdate_range, SparseSeries

nan = np.nan

# nan-based
arr = np.arange(15, dtype=float)
index = np.arange(15)
arr[7:12] = nan
arr[-1:] = nan

date_index = bdate_range('1/1/2011', periods=len(index))
bseries = SparseSeries(arr, index=index, kind='block')
bseries.name = 'bseries'
return bseries

def _create_sp_frame():
import numpy as np
from pandas import bdate_range, SparseDataFrame

nan = np.nan

data = {'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6],
'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6],
'C': np.arange(10),
'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan]}

dates = bdate_range('1/1/2011', periods=10)
return SparseDataFrame(data, index=dates)

def create_data():
""" create the pickle data """

import numpy as np
import pandas
from pandas import (Series,DataFrame,Panel,
SparseSeries,SparseDataFrame,SparsePanel,
Index,MultiIndex,PeriodIndex,
date_range,bdate_range,Timestamp)
nan = np.nan

data = {
'A': [0., 1., 2., 3., np.nan],
'B': [0, 1, 0, 1, 0],
'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'],
'D': date_range('1/1/2009', periods=5),
'E' : [0., 1, Timestamp('20100101'),'foo',2.],
}

index = dict(int = Index(np.arange(10)),
date = date_range('20130101',periods=10))
mi = dict(reg = MultiIndex.from_tuples(zip([['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'],
['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]),
names=['first', 'second']))
series = dict(float = Series(data['A']),
int = Series(data['B']),
mixed = Series(data['E']))
frame = dict(float = DataFrame(dict(A = series['float'], B = series['float'] + 1)),
int = DataFrame(dict(A = series['int'] , B = series['int'] + 1)),
mixed = DataFrame(dict([ (k,data[k]) for k in ['A','B','C','D']])))
panel = dict(float = Panel(dict(ItemA = frame['float'], ItemB = frame['float']+1)))



return dict( series = series,
frame = frame,
panel = panel,
index = index,
mi = mi,
sp_series = dict(float = _create_sp_series()),
sp_frame = dict(float = _create_sp_frame())
)

def write_legacy_pickles():

# force our cwd to be the first searched
import sys
sys.path.insert(0,'.')

import os
import numpy as np
import pandas
import pandas.util.testing as tm
import platform as pl
import cPickle as pickle

print("This script generates a pickle file for the current arch, system, and python version")

base_dir, _ = os.path.split(os.path.abspath(__file__))
base_dir = os.path.join(base_dir,'data/legacy_pickle')

# could make this a parameter?
version = None


if version is None:
version = pandas.__version__
pth = os.path.join(base_dir, str(version))
try:
os.mkdir(pth)
except:
pass

# construct a reasonable platform name
f = '_'.join([ str(pl.machine()), str(pl.system().lower()), str(pl.python_version()) ])
pth = os.path.abspath(os.path.join(pth,'%s.pickle' % f))

fh = open(pth,'wb')
pickle.dump(create_data(),fh,pickle.HIGHEST_PROTOCOL)
fh.close()

print("created pickle file: %s" % pth)

if __name__ == '__main__':
write_legacy_pickles()
7 changes: 1 addition & 6 deletions pandas/io/tests/test_cparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,10 @@
import pandas._parser as parser


def curpath():
pth, _ = os.path.split(os.path.abspath(__file__))
return pth


class TestCParser(unittest.TestCase):

def setUp(self):
self.dirpath = curpath()
self.dirpath = tm.get_data_path('/')
self.csv1 = os.path.join(self.dirpath, 'test1.csv')
self.csv2 = os.path.join(self.dirpath, 'test2.csv')
self.xls1 = os.path.join(self.dirpath, 'test.xls')
Expand Down
6 changes: 1 addition & 5 deletions pandas/io/tests/test_excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,10 +65,6 @@ def _skip_if_no_excelsuite():
_skip_if_no_openpyxl()


def curpath():
pth, _ = os.path.split(os.path.abspath(__file__))
return pth

_seriesd = tm.getSeriesData()
_tsd = tm.getTimeSeriesData()
_frame = DataFrame(_seriesd)[:10]
Expand All @@ -81,7 +77,7 @@ def curpath():
class ExcelTests(unittest.TestCase):

def setUp(self):
self.dirpath = curpath()
self.dirpath = tm.get_data_path()
self.csv1 = os.path.join(self.dirpath, 'test1.csv')
self.csv2 = os.path.join(self.dirpath, 'test2.csv')
self.xls1 = os.path.join(self.dirpath, 'test.xls')
Expand Down
16 changes: 5 additions & 11 deletions pandas/io/tests/test_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def setUp(self):
import warnings
warnings.filterwarnings(action='ignore', category=FutureWarning)

self.dirpath = curpath()
self.dirpath = tm.get_data_path()
self.csv1 = os.path.join(self.dirpath, 'test1.csv')
self.csv2 = os.path.join(self.dirpath, 'test2.csv')
self.xls1 = os.path.join(self.dirpath, 'test.xls')
Expand Down Expand Up @@ -1208,7 +1208,7 @@ def test_url(self):
url = ('https://raw.github.com/pydata/pandas/master/'
'pandas/io/tests/salary.table')
url_table = self.read_table(url)
dirpath = curpath()
dirpath = tm.get_data_path()
localtable = os.path.join(dirpath, 'salary.table')
local_table = self.read_table(localtable)
tm.assert_frame_equal(url_table, local_table)
Expand All @@ -1229,7 +1229,7 @@ def test_file(self):
# FILE
if sys.version_info[:2] < (2, 6):
raise nose.SkipTest("file:// not supported with Python < 2.6")
dirpath = curpath()
dirpath = tm.get_data_path()
localtable = os.path.join(dirpath, 'salary.table')
local_table = self.read_table(localtable)

Expand Down Expand Up @@ -1404,7 +1404,7 @@ def test_utf16_bom_skiprows(self):
tm.assert_frame_equal(result, expected)

def test_utf16_example(self):
path = os.path.join(self.dirpath, 'utf16_ex.txt')
path = tm.get_data_path('utf16_ex.txt')

# it works! and is the right length
result = self.read_table(path, encoding='utf-16')
Expand Down Expand Up @@ -1476,8 +1476,7 @@ def convert_score(x):
tm.assert_frame_equal(result, result2)

def test_unicode_encoding(self):
pth = psplit(psplit(curpath())[0])[0]
pth = os.path.join(pth, 'tests/data/unicode_series.csv')
pth = tm.get_data_path('unicode_series.csv')

result = self.read_csv(pth, header=None, encoding='latin-1')
result = result.set_index(0)
Expand Down Expand Up @@ -2185,11 +2184,6 @@ def assert_same_values_and_dtype(res, exp):
assert_almost_equal(res, exp)


def curpath():
pth, _ = os.path.split(os.path.abspath(__file__))
return pth


if __name__ == '__main__':
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
exit=False)
74 changes: 74 additions & 0 deletions pandas/io/tests/test_pickle.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
# pylint: disable=E1101,E1103,W0232

""" manage legacy pickle tests """

from datetime import datetime, timedelta
import operator
import pickle
import unittest
import nose
import os

import numpy as np
import pandas.util.testing as tm
import pandas as pd
from pandas import Index
from pandas.sparse.tests import test_sparse

class TestPickle(unittest.TestCase):
_multiprocess_can_split_ = True

def setUp(self):
from pandas.io.tests.generate_legacy_pickles import create_data
self.data = create_data()

def compare(self, vf):

# py3 compat when reading py2 pickle

try:
with open(vf,'rb') as fh:
data = pickle.load(fh)
except (ValueError):

# we are trying to read a py3 pickle in py2.....
return
except:
with open(vf,'rb') as fh:
data = pickle.load(fh, encoding='latin1')

for typ, dv in data.items():
for dt, result in dv.items():

expected = self.data[typ][dt]

if isinstance(expected,Index):
self.assert_(expected.equals(result))
continue

if typ.startswith('sp_'):
comparator = getattr(test_sparse,"assert_%s_equal" % typ)
comparator(result,expected,exact_indices=False)
else:
comparator = getattr(tm,"assert_%s_equal" % typ)
comparator(result,expected)

def test_read_pickles_0_10_1(self):

pth = tm.get_data_path('legacy_pickle/0.10.1')
for f in os.listdir(pth):
vf = os.path.join(pth,f)
self.compare(vf)

def test_read_pickles_0_11_0(self):

pth = tm.get_data_path('legacy_pickle/0.11.0')
for f in os.listdir(pth):
vf = os.path.join(pth,f)
self.compare(vf)

if __name__ == '__main__':
import nose
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
# '--with-coverage', '--cover-package=pandas.core'],
exit=False)
Loading