Skip to content

Commit 29a709c

Browse files
committed
Merge pull request #4356 from cpcloud/fix-stata-testing
TST/BUG/CLN: make stata IO tests use temporary files for writing
2 parents 5d2b85f + c98e099 commit 29a709c

File tree

4 files changed

+41
-29
lines changed

4 files changed

+41
-29
lines changed

doc/source/release.rst

+2
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ pandas 0.13
5252
(:issue:`4102`, :issue:`4014`) in ``*.hist`` plotting methods
5353
- Fixed bug in ``PeriodIndex.map`` where using ``str`` would return the str
5454
representation of the index (:issue:`4136`)
55+
- Fix running of stata IO tests. Now uses temporary files to write
56+
(:issue:`4353`)
5557

5658
pandas 0.12
5759
===========

doc/source/v0.13.0.txt

+3
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ Bug Fixes
3030
- Fixed bug in ``PeriodIndex.map`` where using ``str`` would return the str
3131
representation of the index (:issue:`4136`)
3232

33+
- Fix running of stata IO tests. Now uses temporary files to write
34+
(:issue:`4353`)
35+
3336
See the :ref:`full release notes
3437
<release>` or issue tracker
3538
on GitHub for a complete list.

pandas/io/tests/test_stata.py

+35-28
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,8 @@
1010

1111
from pandas.core.frame import DataFrame, Series
1212
from pandas.io.parsers import read_csv
13-
from pandas.io.stata import read_stata, StataReader, StataWriter
13+
from pandas.io.stata import read_stata, StataReader
1414
import pandas.util.testing as tm
15-
from pandas.util.testing import ensure_clean
1615
from pandas.util.misc import is_little_endian
1716

1817

@@ -27,15 +26,12 @@ def setUp(self):
2726
self.dta3 = os.path.join(self.dirpath, 'stata3.dta')
2827
self.csv3 = os.path.join(self.dirpath, 'stata3.csv')
2928
self.dta4 = os.path.join(self.dirpath, 'stata4.dta')
30-
self.dta5 = os.path.join(self.dirpath, 'stata5.dta')
31-
self.dta6 = os.path.join(self.dirpath, 'stata6.dta')
3229
self.dta7 = os.path.join(self.dirpath, 'cancer.dta')
3330
self.csv7 = os.path.join(self.dirpath, 'cancer.csv')
3431
self.dta8 = os.path.join(self.dirpath, 'tbl19-3.dta')
3532
self.csv8 = os.path.join(self.dirpath, 'tbl19-3.csv')
3633
self.dta9 = os.path.join(self.dirpath, 'lbw.dta')
3734
self.csv9 = os.path.join(self.dirpath, 'lbw.csv')
38-
self.dta10 = os.path.join(self.dirpath, 'stata10.dta')
3935

4036
def read_dta(self, file):
4137
return read_stata(file, convert_dates=True)
@@ -46,9 +42,11 @@ def read_csv(self, file):
4642
def test_read_dta1(self):
4743
reader = StataReader(self.dta1)
4844
parsed = reader.data()
49-
# Pandas uses np.nan as missing value. Thus, all columns will be of type float, regardless of their name.
45+
# Pandas uses np.nan as missing value.
46+
# Thus, all columns will be of type float, regardless of their name.
5047
expected = DataFrame([(np.nan, np.nan, np.nan, np.nan, np.nan)],
51-
columns=['float_miss', 'double_miss', 'byte_miss', 'int_miss', 'long_miss'])
48+
columns=['float_miss', 'double_miss', 'byte_miss',
49+
'int_miss', 'long_miss'])
5250

5351
for i, col in enumerate(parsed.columns):
5452
np.testing.assert_almost_equal(
@@ -90,7 +88,9 @@ def test_read_dta2(self):
9088
np.datetime64('NaT')
9189
)
9290
],
93-
columns=['datetime_c', 'datetime_big_c', 'date', 'weekly_date', 'monthly_date', 'quarterly_date', 'half_yearly_date', 'yearly_date']
91+
columns=['datetime_c', 'datetime_big_c', 'date', 'weekly_date',
92+
'monthly_date', 'quarterly_date', 'half_yearly_date',
93+
'yearly_date']
9494
)
9595

9696
with warnings.catch_warnings(record=True) as w:
@@ -125,34 +125,40 @@ def test_read_dta4(self):
125125
["nine", "two", 9, np.nan, "nine"],
126126
["ten", "one", "ten", np.nan, "ten"]
127127
],
128-
columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled', 'labeled_with_missings', 'float_labelled'])
128+
columns=['fully_labeled', 'fully_labeled2', 'incompletely_labeled',
129+
'labeled_with_missings', 'float_labelled'])
129130

130131
tm.assert_frame_equal(parsed, expected)
131132

132-
def test_write_dta5(self):
133+
def test_read_write_dta5(self):
133134
if not is_little_endian():
134-
raise nose.SkipTest("known failure of test_write_dta5 on non-little endian")
135+
raise nose.SkipTest("known failure of test_write_dta5 on "
136+
"non-little endian")
135137

136138
original = DataFrame([(np.nan, np.nan, np.nan, np.nan, np.nan)],
137-
columns=['float_miss', 'double_miss', 'byte_miss', 'int_miss', 'long_miss'])
139+
columns=['float_miss', 'double_miss', 'byte_miss',
140+
'int_miss', 'long_miss'])
138141
original.index.name = 'index'
139142

140-
with ensure_clean(self.dta5) as path:
143+
with tm.ensure_clean() as path:
141144
original.to_stata(path, None, False)
142145
written_and_read_again = self.read_dta(path)
143-
tm.assert_frame_equal(written_and_read_again.set_index('index'), original)
146+
tm.assert_frame_equal(written_and_read_again.set_index('index'),
147+
original)
144148

145149
def test_write_dta6(self):
146150
if not is_little_endian():
147-
raise nose.SkipTest("known failure of test_write_dta6 on non-little endian")
151+
raise nose.SkipTest("known failure of test_write_dta6 on "
152+
"non-little endian")
148153

149154
original = self.read_csv(self.csv3)
150155
original.index.name = 'index'
151156

152-
with ensure_clean(self.dta6) as path:
157+
with tm.ensure_clean() as path:
153158
original.to_stata(path, None, False)
154159
written_and_read_again = self.read_dta(path)
155-
tm.assert_frame_equal(written_and_read_again.set_index('index'), original)
160+
tm.assert_frame_equal(written_and_read_again.set_index('index'),
161+
original)
156162

157163
@nose.tools.nottest
158164
def test_read_dta7(self):
@@ -190,29 +196,30 @@ def test_read_dta9(self):
190196
decimal=3
191197
)
192198

193-
def test_read_dta10(self):
199+
def test_read_write_dta10(self):
194200
if not is_little_endian():
195-
raise nose.SkipTest("known failure of test_write_dta10 on non-little endian")
201+
raise nose.SkipTest("known failure of test_write_dta10 on "
202+
"non-little endian")
196203

197-
original = DataFrame(
198-
data=
199-
[
200-
["string", "object", 1, 1.1, np.datetime64('2003-12-25')]
201-
],
202-
columns=['string', 'object', 'integer', 'float', 'datetime'])
204+
original = DataFrame(data=[["string", "object", 1, 1.1,
205+
np.datetime64('2003-12-25')]],
206+
columns=['string', 'object', 'integer', 'float',
207+
'datetime'])
203208
original["object"] = Series(original["object"], dtype=object)
204209
original.index.name = 'index'
205210

206-
with ensure_clean(self.dta10) as path:
211+
with tm.ensure_clean() as path:
207212
original.to_stata(path, {'datetime': 'tc'}, False)
208213
written_and_read_again = self.read_dta(path)
209-
tm.assert_frame_equal(written_and_read_again.set_index('index'), original)
214+
tm.assert_frame_equal(written_and_read_again.set_index('index'),
215+
original)
210216

211217
def test_stata_doc_examples(self):
212-
with ensure_clean(self.dta5) as path:
218+
with tm.ensure_clean() as path:
213219
df = DataFrame(np.random.randn(10, 2), columns=list('AB'))
214220
df.to_stata(path)
215221

222+
216223
if __name__ == '__main__':
217224
nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'],
218225
exit=False)

pandas/util/testing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def set_trace():
8686
#------------------------------------------------------------------------------
8787
# contextmanager to ensure the file cleanup
8888
@contextmanager
89-
def ensure_clean(filename = None):
89+
def ensure_clean(filename=None):
9090
# if we are not passed a filename, generate a temporary
9191
if filename is None:
9292
filename = tempfile.mkstemp()[1]

0 commit comments

Comments
 (0)