Skip to content

TST/BUG/CLN: make stata IO tests use temporary files for writing #4356

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 25, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ pandas 0.13
(:issue:`4102`, :issue:`4014`) in ``*.hist`` plotting methods
- Fixed bug in ``PeriodIndex.map`` where using ``str`` would return the str
representation of the index (:issue:`4136`)
- Fix running of stata IO tests. Now uses temporary files to write
(:issue:`4353`)

pandas 0.12
===========
Expand Down
3 changes: 3 additions & 0 deletions doc/source/v0.13.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ Bug Fixes
- Fixed bug in ``PeriodIndex.map`` where using ``str`` would return the str
representation of the index (:issue:`4136`)

- Fix running of stata IO tests. Now uses temporary files to write
(:issue:`4353`)

See the :ref:`full release notes
<release>` or issue tracker
on GitHub for a complete list.
63 changes: 35 additions & 28 deletions pandas/io/tests/test_stata.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,8 @@

from pandas.core.frame import DataFrame, Series
from pandas.io.parsers import read_csv
from pandas.io.stata import read_stata, StataReader, StataWriter
from pandas.io.stata import read_stata, StataReader
import pandas.util.testing as tm
from pandas.util.testing import ensure_clean
from pandas.util.misc import is_little_endian


Expand All @@ -27,15 +26,12 @@ def setUp(self):
self.dta3 = os.path.join(self.dirpath, 'stata3.dta')
self.csv3 = os.path.join(self.dirpath, 'stata3.csv')
self.dta4 = os.path.join(self.dirpath, 'stata4.dta')
self.dta5 = os.path.join(self.dirpath, 'stata5.dta')
self.dta6 = os.path.join(self.dirpath, 'stata6.dta')
self.dta7 = os.path.join(self.dirpath, 'cancer.dta')
self.csv7 = os.path.join(self.dirpath, 'cancer.csv')
self.dta8 = os.path.join(self.dirpath, 'tbl19-3.dta')
self.csv8 = os.path.join(self.dirpath, 'tbl19-3.csv')
self.dta9 = os.path.join(self.dirpath, 'lbw.dta')
self.csv9 = os.path.join(self.dirpath, 'lbw.csv')
self.dta10 = os.path.join(self.dirpath, 'stata10.dta')

def read_dta(self, file):
return read_stata(file, convert_dates=True)
Expand All @@ -46,9 +42,11 @@ def read_csv(self, file):
def test_read_dta1(self):
reader = StataReader(self.dta1)
parsed = reader.data()
# Pandas uses np.nan as missing value. Thus, all columns will be of type float, regardless of their name.
# Pandas uses np.nan as missing value.
# Thus, all columns will be of type float, regardless of their name.
expected = DataFrame([(np.nan, np.nan, np.nan, np.nan, np.nan)],
columns=['float_miss', 'double_miss', 'byte_miss', 'int_miss', 'long_miss'])
columns=['float_miss', 'double_miss', 'byte_miss',
'int_miss', 'long_miss'])

for i, col in enumerate(parsed.columns):
np.testing.assert_almost_equal(
Expand Down Expand Up @@ -90,7 +88,9 @@ def test_read_dta2(self):
np.datetime64('NaT')
)
],
columns=['datetime_c', 'datetime_big_c', 'date', 'weekly_date', 'monthly_date', 'quarterly_date', 'half_yearly_date', 'yearly_date']
columns=['datetime_c', 'datetime_big_c', 'date', 'weekly_date',
'monthly_date', 'quarterly_date', 'half_yearly_date',
'yearly_date']
)

with warnings.catch_warnings(record=True) as w:
Expand Down Expand Up @@ -125,34 +125,40 @@ def test_read_dta4(self):
["nine", "two", 9, np.nan, "nine"],
["ten", "one", "ten", np.nan, "ten"]
],
columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled', 'labeled_with_missings', 'float_labelled'])
columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled',
'labeled_with_missings', 'float_labelled'])

tm.assert_frame_equal(parsed, expected)

def test_write_dta5(self):
def test_read_write_dta5(self):
if not is_little_endian():
raise nose.SkipTest("known failure of test_write_dta5 on non-little endian")
raise nose.SkipTest("known failure of test_write_dta5 on "
"non-little endian")

original = DataFrame([(np.nan, np.nan, np.nan, np.nan, np.nan)],
columns=['float_miss', 'double_miss', 'byte_miss', 'int_miss', 'long_miss'])
columns=['float_miss', 'double_miss', 'byte_miss',
'int_miss', 'long_miss'])
original.index.name = 'index'

with ensure_clean(self.dta5) as path:
with tm.ensure_clean() as path:
original.to_stata(path, None, False)
written_and_read_again = self.read_dta(path)
tm.assert_frame_equal(written_and_read_again.set_index('index'), original)
tm.assert_frame_equal(written_and_read_again.set_index('index'),
original)

def test_write_dta6(self):
if not is_little_endian():
raise nose.SkipTest("known failure of test_write_dta6 on non-little endian")
raise nose.SkipTest("known failure of test_write_dta6 on "
"non-little endian")

original = self.read_csv(self.csv3)
original.index.name = 'index'

with ensure_clean(self.dta6) as path:
with tm.ensure_clean() as path:
original.to_stata(path, None, False)
written_and_read_again = self.read_dta(path)
tm.assert_frame_equal(written_and_read_again.set_index('index'), original)
tm.assert_frame_equal(written_and_read_again.set_index('index'),
original)

@nose.tools.nottest
def test_read_dta7(self):
Expand Down Expand Up @@ -190,29 +196,30 @@ def test_read_dta9(self):
decimal=3
)

def test_read_dta10(self):
def test_read_write_dta10(self):
if not is_little_endian():
raise nose.SkipTest("known failure of test_write_dta10 on non-little endian")
raise nose.SkipTest("known failure of test_write_dta10 on "
"non-little endian")

original = DataFrame(
data=
[
["string", "object", 1, 1.1, np.datetime64('2003-12-25')]
],
columns=['string', 'object', 'integer', 'float', 'datetime'])
original = DataFrame(data=[["string", "object", 1, 1.1,
np.datetime64('2003-12-25')]],
columns=['string', 'object', 'integer', 'float',
'datetime'])
original["object"] = Series(original["object"], dtype=object)
original.index.name = 'index'

with ensure_clean(self.dta10) as path:
with tm.ensure_clean() as path:
original.to_stata(path, {'datetime': 'tc'}, False)
written_and_read_again = self.read_dta(path)
tm.assert_frame_equal(written_and_read_again.set_index('index'), original)
tm.assert_frame_equal(written_and_read_again.set_index('index'),
original)

def test_stata_doc_examples(self):
with ensure_clean(self.dta5) as path:
with tm.ensure_clean() as path:
df = DataFrame(np.random.randn(10, 2), columns=list('AB'))
df.to_stata(path)


if __name__ == '__main__':
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
exit=False)
2 changes: 1 addition & 1 deletion pandas/util/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def set_trace():
#------------------------------------------------------------------------------
# contextmanager to ensure the file cleanup
@contextmanager
def ensure_clean(filename = None):
def ensure_clean(filename=None):
# if we are not passed a filename, generate a temporary
if filename is None:
filename = tempfile.mkstemp()[1]
Expand Down