|
10 | 10 | from pandas.compat import u
|
11 | 11 | from pandas import (Series, DataFrame, Panel, MultiIndex, bdate_range,
|
12 | 12 | date_range, period_range, Index)
|
| 13 | +from pandas.core.common import PerformanceWarning |
13 | 14 | from pandas.io.packers import to_msgpack, read_msgpack
|
14 | 15 | import pandas.util.testing as tm
|
15 | 16 | from pandas.util.testing import (ensure_clean, assert_index_equal,
|
16 | 17 | assert_series_equal,
|
17 |
| - assert_frame_equal) |
| 18 | + assert_frame_equal, |
| 19 | + patch) |
18 | 20 | from pandas.tests.test_panel import assert_panel_equal
|
19 | 21 |
|
20 | 22 | import pandas
|
@@ -539,17 +541,126 @@ def test_plain(self):
|
539 | 541 | for k in self.frame.keys():
|
540 | 542 | assert_frame_equal(self.frame[k], i_rec[k])
|
541 | 543 |
|
542 |
| - def test_compression_zlib(self): |
543 |
| - i_rec = self.encode_decode(self.frame, compress='zlib') |
| 544 | + def _test_compression(self, compress): |
| 545 | + i_rec = self.encode_decode(self.frame, compress=compress) |
544 | 546 | for k in self.frame.keys():
|
545 |
| - assert_frame_equal(self.frame[k], i_rec[k]) |
| 547 | + value = i_rec[k] |
| 548 | + expected = self.frame[k] |
| 549 | + assert_frame_equal(value, expected) |
| 550 | + # make sure that we can write to the new frames |
| 551 | + for block in value._data.blocks: |
| 552 | + self.assertTrue(block.values.flags.writeable) |
| 553 | + |
| 554 | + def test_compression_zlib(self): |
| 555 | + if not _ZLIB_INSTALLED: |
| 556 | + raise nose.SkipTest('no zlib') |
| 557 | + self._test_compression('zlib') |
546 | 558 |
|
547 | 559 | def test_compression_blosc(self):
|
548 | 560 | if not _BLOSC_INSTALLED:
|
549 | 561 | raise nose.SkipTest('no blosc')
|
550 |
| - i_rec = self.encode_decode(self.frame, compress='blosc') |
551 |
| - for k in self.frame.keys(): |
552 |
| - assert_frame_equal(self.frame[k], i_rec[k]) |
| 562 | + self._test_compression('blosc') |
| 563 | + |
| 564 | + def _test_compression_warns_when_decompress_caches(self, compress): |
| 565 | + not_garbage = [] |
| 566 | + control = [] # copied data |
| 567 | + |
| 568 | + compress_module = globals()[compress] |
| 569 | + real_decompress = compress_module.decompress |
| 570 | + |
| 571 | + def decompress(ob): |
| 572 | + """mock decompress function that delegates to the real |
| 573 | + decompress but caches the result and a copy of the result. |
| 574 | + """ |
| 575 | + res = real_decompress(ob) |
| 576 | + not_garbage.append(res) # hold a reference to this bytes object |
| 577 | + control.append(bytearray(res)) # copy the data here to check later |
| 578 | + return res |
| 579 | + |
| 580 | + # types mapped to values to add in place. |
| 581 | + rhs = { |
| 582 | + np.dtype('float64'): 1.0, |
| 583 | + np.dtype('int32'): 1, |
| 584 | + np.dtype('object'): 'a', |
| 585 | + np.dtype('datetime64[ns]'): np.timedelta64(1, 'ns'), |
| 586 | + np.dtype('timedelta64[ns]'): np.timedelta64(1, 'ns'), |
| 587 | + } |
| 588 | + |
| 589 | + with patch(compress_module, 'decompress', decompress), \ |
| 590 | + tm.assert_produces_warning(PerformanceWarning) as ws: |
| 591 | + |
| 592 | + i_rec = self.encode_decode(self.frame, compress=compress) |
| 593 | + for k in self.frame.keys(): |
| 594 | + |
| 595 | + value = i_rec[k] |
| 596 | + expected = self.frame[k] |
| 597 | + assert_frame_equal(value, expected) |
| 598 | + # make sure that we can write to the new frames even though |
| 599 | + # we needed to copy the data |
| 600 | + for block in value._data.blocks: |
| 601 | + self.assertTrue(block.values.flags.writeable) |
| 602 | + # mutate the data in some way |
| 603 | + block.values[0] += rhs[block.dtype] |
| 604 | + |
| 605 | + for w in ws: |
| 606 | + # check the messages from our warnings |
| 607 | + self.assertEqual( |
| 608 | + str(w.message), |
| 609 | + 'copying data after decompressing; this may mean that' |
| 610 | + ' decompress is caching its result', |
| 611 | + ) |
| 612 | + |
| 613 | + for buf, control_buf in zip(not_garbage, control): |
| 614 | + # make sure none of our mutations above affected the |
| 615 | + # original buffers |
| 616 | + self.assertEqual(buf, control_buf) |
| 617 | + |
| 618 | + def test_compression_warns_when_decompress_caches_zlib(self): |
| 619 | + if not _ZLIB_INSTALLED: |
| 620 | + raise nose.SkipTest('no zlib') |
| 621 | + self._test_compression_warns_when_decompress_caches('zlib') |
| 622 | + |
| 623 | + def test_compression_warns_when_decompress_caches_blosc(self): |
| 624 | + if not _BLOSC_INSTALLED: |
| 625 | + raise nose.SkipTest('no blosc') |
| 626 | + self._test_compression_warns_when_decompress_caches('blosc') |
| 627 | + |
| 628 | + def _test_small_strings_no_warn(self, compress): |
| 629 | + empty = np.array([], dtype='uint8') |
| 630 | + with tm.assert_produces_warning(None): |
| 631 | + empty_unpacked = self.encode_decode(empty, compress=compress) |
| 632 | + |
| 633 | + np.testing.assert_array_equal(empty_unpacked, empty) |
| 634 | + self.assertTrue(empty_unpacked.flags.writeable) |
| 635 | + |
| 636 | + char = np.array([ord(b'a')], dtype='uint8') |
| 637 | + with tm.assert_produces_warning(None): |
| 638 | + char_unpacked = self.encode_decode(char, compress=compress) |
| 639 | + |
| 640 | + np.testing.assert_array_equal(char_unpacked, char) |
| 641 | + self.assertTrue(char_unpacked.flags.writeable) |
| 642 | + # if this test fails I am sorry because the interpreter is now in a |
| 643 | + # bad state where b'a' points to 98 == ord(b'b'). |
| 644 | + char_unpacked[0] = ord(b'b') |
| 645 | + |
| 646 | + # we compare the ord of bytes b'a' with unicode u'a' because the should |
| 647 | + # always be the same (unless we were able to mutate the shared |
| 648 | + # character singleton in which case ord(b'a') == ord(b'b'). |
| 649 | + self.assertEqual(ord(b'a'), ord(u'a')) |
| 650 | + np.testing.assert_array_equal( |
| 651 | + char_unpacked, |
| 652 | + np.array([ord(b'b')], dtype='uint8'), |
| 653 | + ) |
| 654 | + |
| 655 | + def test_small_strings_no_warn_zlib(self): |
| 656 | + if not _ZLIB_INSTALLED: |
| 657 | + raise nose.SkipTest('no zlib') |
| 658 | + self._test_small_strings_no_warn('zlib') |
| 659 | + |
| 660 | + def test_small_strings_no_warn_blosc(self): |
| 661 | + if not _BLOSC_INSTALLED: |
| 662 | + raise nose.SkipTest('no blosc') |
| 663 | + self._test_small_strings_no_warn('blosc') |
553 | 664 |
|
554 | 665 | def test_readonly_axis_blosc(self):
|
555 | 666 | # GH11880
|
|
0 commit comments