|
| 1 | +import nose |
| 2 | +import sys |
| 3 | +import os |
| 4 | +import warnings |
| 5 | +import tempfile |
| 6 | +from contextlib import contextmanager |
| 7 | + |
| 8 | +import datetime |
| 9 | +import numpy as np |
| 10 | + |
| 11 | +import pandas |
| 12 | +from pandas import (Series, DataFrame, Panel, MultiIndex, Categorical, bdate_range, |
| 13 | + date_range, Index, DatetimeIndex, isnull) |
| 14 | +from pandas.io.pytables import (HDFStore, get_store, Term, read_hdf, |
| 15 | + IncompatibilityWarning, PerformanceWarning, |
| 16 | + AttributeConflictWarning, DuplicateWarning, |
| 17 | + PossibleDataLossError, ClosedFileError) |
| 18 | +from pandas.io import pytables as pytables |
| 19 | +import pandas.util.testing as tm |
| 20 | +from pandas.util.testing import (assert_panel4d_equal, |
| 21 | + assert_panel_equal, |
| 22 | + assert_frame_equal, |
| 23 | + assert_series_equal) |
| 24 | +from pandas import concat, Timestamp |
| 25 | +from pandas import compat |
| 26 | +from pandas.compat import range, lrange, u |
| 27 | +from pandas.util.testing import assert_produces_warning |
| 28 | + |
| 29 | +try: |
| 30 | + import tables |
| 31 | +except ImportError: |
| 32 | + raise nose.SkipTest('no pytables') |
| 33 | + |
| 34 | +from distutils.version import LooseVersion |
| 35 | + |
| 36 | +_default_compressor = LooseVersion(tables.__version__) >= '2.2' \ |
| 37 | + and 'blosc' or 'zlib' |
| 38 | + |
| 39 | +_multiprocess_can_split_ = False |
| 40 | + |
| 41 | +# contextmanager to ensure the file cleanup |
| 42 | +def safe_remove(path): |
| 43 | + if path is not None: |
| 44 | + try: |
| 45 | + os.remove(path) |
| 46 | + except: |
| 47 | + pass |
| 48 | + |
| 49 | + |
| 50 | +def safe_close(store): |
| 51 | + try: |
| 52 | + if store is not None: |
| 53 | + store.close() |
| 54 | + except: |
| 55 | + pass |
| 56 | + |
| 57 | + |
| 58 | +def create_tempfile(path): |
| 59 | + """ create an unopened named temporary file """ |
| 60 | + return os.path.join(tempfile.gettempdir(),path) |
| 61 | + |
| 62 | + |
| 63 | +@contextmanager |
| 64 | +def ensure_clean_store(path, mode='a', complevel=None, complib=None, |
| 65 | + fletcher32=False): |
| 66 | + |
| 67 | + try: |
| 68 | + |
| 69 | + # put in the temporary path if we don't have one already |
| 70 | + if not len(os.path.dirname(path)): |
| 71 | + path = create_tempfile(path) |
| 72 | + |
| 73 | + store = HDFStore(path, mode=mode, complevel=complevel, |
| 74 | + complib=complib, fletcher32=False) |
| 75 | + yield store |
| 76 | + finally: |
| 77 | + safe_close(store) |
| 78 | + if mode == 'w' or mode == 'a': |
| 79 | + safe_remove(path) |
| 80 | + |
| 81 | + |
| 82 | +@contextmanager |
| 83 | +def ensure_clean_path(path): |
| 84 | + """ |
| 85 | + return essentially a named temporary file that is not opened |
| 86 | + and deleted on existing; if path is a list, then create and |
| 87 | + return list of filenames |
| 88 | + """ |
| 89 | + try: |
| 90 | + if isinstance(path, list): |
| 91 | + filenames = [ create_tempfile(p) for p in path ] |
| 92 | + yield filenames |
| 93 | + else: |
| 94 | + filenames = [ create_tempfile(path) ] |
| 95 | + yield filenames[0] |
| 96 | + finally: |
| 97 | + for f in filenames: |
| 98 | + safe_remove(f) |
| 99 | + |
| 100 | + |
| 101 | +# set these parameters so we don't have file sharing |
| 102 | +tables.parameters.MAX_NUMEXPR_THREADS = 1 |
| 103 | +tables.parameters.MAX_BLOSC_THREADS = 1 |
| 104 | +tables.parameters.MAX_THREADS = 1 |
| 105 | + |
| 106 | +def _maybe_remove(store, key): |
| 107 | + """For tests using tables, try removing the table to be sure there is |
| 108 | + no content from previous tests using the same table name.""" |
| 109 | + try: |
| 110 | + store.remove(key) |
| 111 | + except: |
| 112 | + pass |
| 113 | + |
| 114 | + |
| 115 | +def compat_assert_produces_warning(w,f): |
| 116 | + """ don't produce a warning under PY3 """ |
| 117 | + if compat.PY3: |
| 118 | + f() |
| 119 | + else: |
| 120 | + with tm.assert_produces_warning(expected_warning=w): |
| 121 | + f() |
| 122 | + |
| 123 | + |
| 124 | +class TestHDFStore(tm.TestCase): |
| 125 | + |
| 126 | + @classmethod |
| 127 | + def setUpClass(cls): |
| 128 | + super(TestHDFStore, cls).setUpClass() |
| 129 | + |
| 130 | + # Pytables 3.0.0 deprecates lots of things |
| 131 | + tm.reset_testing_mode() |
| 132 | + |
| 133 | + @classmethod |
| 134 | + def tearDownClass(cls): |
| 135 | + super(TestHDFStore, cls).tearDownClass() |
| 136 | + |
| 137 | + # Pytables 3.0.0 deprecates lots of things |
| 138 | + tm.set_testing_mode() |
| 139 | + |
| 140 | + def setUp(self): |
| 141 | + warnings.filterwarnings(action='ignore', category=FutureWarning) |
| 142 | + |
| 143 | + self.path = 'tmp.__%s__.h5' % tm.rands(10) |
| 144 | + |
| 145 | + def tearDown(self): |
| 146 | + pass |
| 147 | + |
| 148 | + def test_select_iterator_8014(self): |
| 149 | + |
| 150 | + # single table |
| 151 | + with ensure_clean_store(self.path) as store: |
| 152 | + |
| 153 | + frames = [] |
| 154 | + df = tm.makeTimeDataFrame(200000, 'S') |
| 155 | + _maybe_remove(store, 'df') |
| 156 | + store.append('df', df) |
| 157 | + frames.append(df) |
| 158 | + df = tm.makeTimeDataFrame(58689, 'S') |
| 159 | + store.append('df', df) |
| 160 | + frames.append(df) |
| 161 | + df = tm.makeTimeDataFrame(41375, 'S') |
| 162 | + frames.append(df) |
| 163 | + store.append('df', df) |
| 164 | + expected = concat(frames) |
| 165 | + |
| 166 | + beg_dt = expected.index[0] |
| 167 | + end_dt = expected.index[-1] |
| 168 | + #expected = store.select('df') |
| 169 | + |
| 170 | + # select w/o iteration and no where clause works |
| 171 | + result = store.select('df') |
| 172 | + tm.assert_frame_equal(expected, result) |
| 173 | + |
| 174 | + # select w/iterator and no where clause works |
| 175 | + results = [] |
| 176 | + for s in store.select('df',iterator=True): |
| 177 | + results.append(s) |
| 178 | + result = concat(results) |
| 179 | + tm.assert_frame_equal(expected, result) |
| 180 | + |
| 181 | + # select w/o iterator and where clause, single term, begin |
| 182 | + # of range, works |
| 183 | + where = "index >= '%s'" % beg_dt |
| 184 | + result = store.select('df',where=where) |
| 185 | + tm.assert_frame_equal(expected, result) |
| 186 | + |
| 187 | + # select w/o iterator and where clause, single term, end |
| 188 | + # of range, fails |
| 189 | + where = "index <= '%s'" % end_dt |
| 190 | + result = store.select('df',where=where) |
| 191 | + #tm.assert_frame_equal(expected, result) |
| 192 | + |
| 193 | + # select w/o iterator and where clause, inclusive range, |
| 194 | + # fails |
| 195 | + where = "index >= '%s' & index <= '%s'" % (beg_dt, end_dt) |
| 196 | + result = store.select('df',where=where) |
| 197 | + tm.assert_frame_equal(expected, result) |
| 198 | + |
| 199 | + # |
| 200 | + # |
| 201 | + # |
| 202 | + |
| 203 | + |
| 204 | + |
| 205 | + # select w/iterator and where clause, single term, begin |
| 206 | + # of range, fails |
| 207 | + where = "index >= '%s'" % beg_dt |
| 208 | + results = [] |
| 209 | + for s in store.select('df',where=where,iterator=True): |
| 210 | + results.append(s) |
| 211 | + result = concat(results) |
| 212 | + tm.assert_frame_equal(expected, result) |
| 213 | + |
| 214 | + # select w/iterator and where clause, single term, end of |
| 215 | + # range, fails |
| 216 | + where = "index <= '%s'" % end_dt |
| 217 | + results = [] |
| 218 | + for s in store.select('df',where=where,iterator=True): |
| 219 | + results.append(s) |
| 220 | + result = concat(results) |
| 221 | + tm.assert_frame_equal(expected, result) |
| 222 | + |
| 223 | + # select w/iterator and where clause, inclusive range, fails |
| 224 | + where = "index >= '%s' & index <= '%s'" % (beg_dt, end_dt) |
| 225 | + results = [] |
| 226 | + for s in store.select('df',where=where,iterator=True): |
| 227 | + results.append(s) |
| 228 | + result = concat(results) |
| 229 | + tm.assert_frame_equal(expected, result) |
| 230 | + |
| 231 | + def test_select_iterator(self): |
| 232 | + |
| 233 | + # single table |
| 234 | + with ensure_clean_store(self.path) as store: |
| 235 | + |
| 236 | + df = tm.makeTimeDataFrame(500) |
| 237 | + _maybe_remove(store, 'df') |
| 238 | + store.append('df', df) |
| 239 | + |
| 240 | + expected = store.select('df') |
| 241 | + |
| 242 | + results = [] |
| 243 | + for s in store.select('df',iterator=True): |
| 244 | + results.append(s) |
| 245 | + result = concat(results) |
| 246 | + tm.assert_frame_equal(expected, result) |
| 247 | + results = [] |
| 248 | + for s in store.select('df',chunksize=100): |
| 249 | + results.append(s) |
| 250 | + self.assertEqual(len(results), 5) |
| 251 | + result = concat(results) |
| 252 | + tm.assert_frame_equal(expected, result) |
| 253 | + |
| 254 | + results = [] |
| 255 | + for s in store.select('df',chunksize=150): |
| 256 | + results.append(s) |
| 257 | + result = concat(results) |
| 258 | + tm.assert_frame_equal(result, expected) |
| 259 | + |
| 260 | + with ensure_clean_path(self.path) as path: |
| 261 | + |
| 262 | + df = tm.makeTimeDataFrame(500) |
| 263 | + df.to_hdf(path,'df_non_table') |
| 264 | + self.assertRaises(TypeError, read_hdf, path,'df_non_table',chunksize=100) |
| 265 | + self.assertRaises(TypeError, read_hdf, path,'df_non_table',iterator=True) |
| 266 | + |
| 267 | + with ensure_clean_path(self.path) as path: |
| 268 | + |
| 269 | + df = tm.makeTimeDataFrame(500) |
| 270 | + df.to_hdf(path,'df',format='table') |
| 271 | + |
| 272 | + results = [] |
| 273 | + for x in read_hdf(path,'df',chunksize=100): |
| 274 | + results.append(x) |
| 275 | + |
| 276 | + self.assertEqual(len(results), 5) |
| 277 | + result = concat(results) |
| 278 | + tm.assert_frame_equal(result, df) |
| 279 | + tm.assert_frame_equal(result, read_hdf(path,'df')) |
| 280 | + |
| 281 | + # multiple |
| 282 | + |
| 283 | + with ensure_clean_store(self.path) as store: |
| 284 | + |
| 285 | + df1 = tm.makeTimeDataFrame(500) |
| 286 | + store.append('df1',df1,data_columns=True) |
| 287 | + df2 = tm.makeTimeDataFrame(500).rename(columns=lambda x: "%s_2" % x) |
| 288 | + df2['foo'] = 'bar' |
| 289 | + store.append('df2',df2) |
| 290 | + |
| 291 | + df = concat([df1, df2], axis=1) |
| 292 | + |
| 293 | + # full selection |
| 294 | + expected = store.select_as_multiple( |
| 295 | + ['df1', 'df2'], selector='df1') |
| 296 | + results = [] |
| 297 | + for s in store.select_as_multiple( |
| 298 | + ['df1', 'df2'], selector='df1', chunksize=150): |
| 299 | + results.append(s) |
| 300 | + result = concat(results) |
| 301 | + tm.assert_frame_equal(expected, result) |
| 302 | + |
| 303 | + # where selection |
| 304 | + #expected = store.select_as_multiple( |
| 305 | + # ['df1', 'df2'], where= Term('A>0'), selector='df1') |
| 306 | + #results = [] |
| 307 | + #for s in store.select_as_multiple( |
| 308 | + # ['df1', 'df2'], where= Term('A>0'), selector='df1', chunksize=25): |
| 309 | + # results.append(s) |
| 310 | + #result = concat(results) |
| 311 | + #tm.assert_frame_equal(expected, result) |
| 312 | + |
| 313 | + |
| 314 | +def _test_sort(obj): |
| 315 | + if isinstance(obj, DataFrame): |
| 316 | + return obj.reindex(sorted(obj.index)) |
| 317 | + elif isinstance(obj, Panel): |
| 318 | + return obj.reindex(major=sorted(obj.major_axis)) |
| 319 | + else: |
| 320 | + raise ValueError('type not supported here') |
| 321 | + |
| 322 | + |
| 323 | +if __name__ == '__main__': |
| 324 | + import nose |
| 325 | + nose.runmodule(argv=[__file__, '-vvs', '-x', '--pdb', '--pdb-failure'], |
| 326 | + exit=False) |
0 commit comments