|
1 | 1 | import nose
|
2 | 2 |
|
| 3 | +from contextlib import contextmanager |
3 | 4 | import os
|
4 | 5 | import datetime
|
5 | 6 | import numpy as np
|
|
10 | 11 | from pandas.compat import u
|
11 | 12 | from pandas import (Series, DataFrame, Panel, MultiIndex, bdate_range,
|
12 | 13 | date_range, period_range, Index)
|
| 14 | +from pandas.core.common import PerformanceWarning |
13 | 15 | from pandas.io.packers import to_msgpack, read_msgpack
|
14 | 16 | import pandas.util.testing as tm
|
15 | 17 | from pandas.util.testing import (ensure_clean, assert_index_equal,
|
@@ -57,6 +59,20 @@ def check_arbitrary(a, b):
|
57 | 59 | assert(a == b)
|
58 | 60 |
|
59 | 61 |
|
| 62 | +@contextmanager |
| 63 | +def patch(ob, attr, value): |
| 64 | + noattr = object() # mark that the attribute never existed |
| 65 | + old = getattr(ob, attr, noattr) |
| 66 | + setattr(ob, attr, value) |
| 67 | + try: |
| 68 | + yield |
| 69 | + finally: |
| 70 | + if old is noattr: |
| 71 | + delattr(ob, attr) |
| 72 | + else: |
| 73 | + setattr(ob, attr, old) |
| 74 | + |
| 75 | + |
60 | 76 | class TestPackers(tm.TestCase):
|
61 | 77 |
|
62 | 78 | def setUp(self):
|
@@ -539,17 +555,89 @@ def test_plain(self):
|
539 | 555 | for k in self.frame.keys():
|
540 | 556 | assert_frame_equal(self.frame[k], i_rec[k])
|
541 | 557 |
|
542 |
| - def test_compression_zlib(self): |
543 |
| - i_rec = self.encode_decode(self.frame, compress='zlib') |
| 558 | + def _test_compression(self, compress): |
| 559 | + i_rec = self.encode_decode(self.frame, compress=compress) |
544 | 560 | for k in self.frame.keys():
|
545 |
| - assert_frame_equal(self.frame[k], i_rec[k]) |
| 561 | + value = i_rec[k] |
| 562 | + expected = self.frame[k] |
| 563 | + assert_frame_equal(value, expected) |
| 564 | + # make sure that we can write to the new frames |
| 565 | + for block in value._data.blocks: |
| 566 | + self.assertTrue(block.values.flags.writeable) |
| 567 | + |
| 568 | + def test_compression_zlib(self): |
| 569 | + if not _ZLIB_INSTALLED: |
| 570 | + raise nose.SkipTest('no zlib') |
| 571 | + self._test_compression('zlib') |
546 | 572 |
|
547 | 573 | def test_compression_blosc(self):
|
548 | 574 | if not _BLOSC_INSTALLED:
|
549 | 575 | raise nose.SkipTest('no blosc')
|
550 |
| - i_rec = self.encode_decode(self.frame, compress='blosc') |
551 |
| - for k in self.frame.keys(): |
552 |
| - assert_frame_equal(self.frame[k], i_rec[k]) |
| 576 | + self._test_compression('blosc') |
| 577 | + |
| 578 | + def _test_compression_warns_when_decompress_caches(self, compress): |
| 579 | + not_garbage = [] |
| 580 | + control = [] # copied data |
| 581 | + |
| 582 | + compress_module = globals()[compress] |
| 583 | + real_decompress = compress_module.decompress |
| 584 | + |
| 585 | + def decompress(ob): |
| 586 | + """mock decompress function that delegates to the real |
| 587 | + decompress but caches the result and a copy of the result. |
| 588 | + """ |
| 589 | + res = real_decompress(ob) |
| 590 | + not_garbage.append(res) # hold a reference to this bytes object |
| 591 | + control.append(bytearray(res)) # copy the data here to check later |
| 592 | + return res |
| 593 | + |
| 594 | + # types mapped to values to add in place. |
| 595 | + rhs = { |
| 596 | + np.dtype('float64'): 1.0, |
| 597 | + np.dtype('int32'): 1, |
| 598 | + np.dtype('object'): 'a', |
| 599 | + np.dtype('datetime64[ns]'): np.timedelta64(1, 'ns'), |
| 600 | + np.dtype('timedelta64[ns]'): np.timedelta64(1, 'ns'), |
| 601 | + } |
| 602 | + |
| 603 | + with patch(compress_module, 'decompress', decompress), \ |
| 604 | + tm.assert_produces_warning(PerformanceWarning) as ws: |
| 605 | + |
| 606 | + i_rec = self.encode_decode(self.frame, compress=compress) |
| 607 | + for k in self.frame.keys(): |
| 608 | + |
| 609 | + value = i_rec[k] |
| 610 | + expected = self.frame[k] |
| 611 | + assert_frame_equal(value, expected) |
| 612 | + # make sure that we can write to the new frames even though |
| 613 | + # we needed to copy the data |
| 614 | + for block in value._data.blocks: |
| 615 | + self.assertTrue(block.values.flags.writeable) |
| 616 | + # mutate the data in some way |
| 617 | + block.values[0] += rhs[block.dtype] |
| 618 | + |
| 619 | + for w in ws: |
| 620 | + # check the messages from our warnings |
| 621 | + self.assertEqual( |
| 622 | + str(w.message), |
| 623 | + 'copying data after decompressing; this may mean that' |
| 624 | + ' decompress is caching its result', |
| 625 | + ) |
| 626 | + |
| 627 | + for buf, control_buf in zip(not_garbage, control): |
| 628 | + # make sure none of our mutations above affected the |
| 629 | + # original buffers |
| 630 | + self.assertEqual(buf, control_buf) |
| 631 | + |
| 632 | + def test_compression_warns_when_decompress_caches_zlib(self): |
| 633 | + if not _ZLIB_INSTALLED: |
| 634 | + raise nose.SkipTest('no zlib') |
| 635 | + self._test_compression_warns_when_decompress_caches('zlib') |
| 636 | + |
| 637 | + def test_compression_warns_when_decompress_caches_blosc(self): |
| 638 | + if not _ZLIB_INSTALLED: |
| 639 | + raise nose.SkipTest('no blosc') |
| 640 | + self._test_compression_warns_when_decompress_caches('blosc') |
553 | 641 |
|
554 | 642 | def test_readonly_axis_blosc(self):
|
555 | 643 | # GH11880
|
|
0 commit comments