diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt index 05c762b91b925..806f77ea56122 100644 --- a/doc/source/whatsnew/v0.16.1.txt +++ b/doc/source/whatsnew/v0.16.1.txt @@ -71,6 +71,7 @@ Bug Fixes - Bug in ``transform`` causing length mismatch when null entries were present and a fast aggregator was being used (:issue:`9697`) +- Bug in ``equals`` causing false negatives when block order differed (:issue:`9330`) - Bug in ``DataFrame`` slicing may not retain metadata (:issue:`9776`) - Bug where ``TimdeltaIndex`` were not properly serialized in fixed ``HDFStore`` (:issue:`9635`) @@ -80,3 +81,6 @@ Bug Fixes - Bug in ``Series.quantile`` on empty Series of type ``Datetime`` or ``Timedelta`` (:issue:`9675`) - Bug in ``where`` causing incorrect results when upcasting was required (:issue:`9731`) +- Bug in ``FloatArrayFormatter`` where decision boundary for displaying "small" floats in decimal format is off by one order of magnitude for a given display.precision (:issue:`9764`) + +- Fixed bug (:issue:`9671`) where ``DataFrame.plot()`` raised an error when both ``color`` and ``style`` keywords were passed and there was no color symbol in the style strings (this should be allowed). diff --git a/pandas/core/format.py b/pandas/core/format.py index b21ca9050ffd0..7b8a3161b5e05 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -1996,7 +1996,7 @@ def _format_strings(self): # this is pretty arbitrary for now has_large_values = (abs_vals > 1e8).any() - has_small_values = ((abs_vals < 10 ** (-self.digits)) & + has_small_values = ((abs_vals < 10 ** (-self.digits+1)) & (abs_vals > 0)).any() if too_long and has_large_values: diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 7a16fb2b6b0d7..9b2d366bfb2be 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -3310,8 +3310,20 @@ def equals(self, other): return False self._consolidate_inplace() other._consolidate_inplace() + if len(self.blocks) != len(other.blocks): + return False + + # canonicalize block order, using a tuple combining the type + # name and then mgr_locs because there might be unconsolidated + # blocks (say, Categorical) which can only be distinguished by + # the iteration order + def canonicalize(block): + return (block.dtype.name, block.mgr_locs.as_array.tolist()) + + self_blocks = sorted(self.blocks, key=canonicalize) + other_blocks = sorted(other.blocks, key=canonicalize) return all(block.equals(oblock) for block, oblock in - zip(self.blocks, other.blocks)) + zip(self_blocks, other_blocks)) class SingleBlockManager(BlockManager): diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py index acdc991c92efe..03e7a8eae549d 100644 --- a/pandas/io/tests/test_pytables.py +++ b/pandas/io/tests/test_pytables.py @@ -4584,19 +4584,33 @@ def test_duplicate_column_name(self): with ensure_clean_path(self.path) as path: self.assertRaises(ValueError, df.to_hdf, path, 'df', format='fixed') + df.to_hdf(path, 'df', format='table') + other = read_hdf(path, 'df') + + tm.assert_frame_equal(df, other) + self.assertTrue(df.equals(other)) + self.assertTrue(other.equals(df)) + + def test_round_trip_equals(self): + # GH 9330 + df = DataFrame({"B": [1,2], "A": ["x","y"]}) + + with ensure_clean_path(self.path) as path: df.to_hdf(path, 'df', format='table') other = read_hdf(path, 'df') tm.assert_frame_equal(df, other) + self.assertTrue(df.equals(other)) + self.assertTrue(other.equals(df)) def test_preserve_timedeltaindex_type(self): - # GH9635 + # GH9635 # Storing TimedeltaIndexed DataFrames in fixed stores did not preserve # the type of the index. df = DataFrame(np.random.normal(size=(10,5))) df.index = timedelta_range(start='0s',periods=10,freq='1s',name='example') with ensure_clean_store(self.path) as store: - + store['df'] = df assert_frame_equal(store['df'], df) diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py index ce32c8af99a73..1dcdbf12a6b59 100644 --- a/pandas/tests/test_format.py +++ b/pandas/tests/test_format.py @@ -2986,6 +2986,25 @@ def test_format(self): self.assertEqual(result[0], " 12") self.assertEqual(result[1], " 0") + def test_output_significant_digits(self): + # Issue #9764 + + # In case default display precision changes: + with pd.option_context('display.precision', 7): + # DataFrame example from issue #9764 + d=pd.DataFrame({'col1':[9.999e-8, 1e-7, 1.0001e-7, 2e-7, 4.999e-7, 5e-7, 5.0001e-7, 6e-7, 9.999e-7, 1e-6, 1.0001e-6, 2e-6, 4.999e-6, 5e-6, 5.0001e-6, 6e-6]}) + + expected_output={ + (0,6):' col1\n0 9.999000e-08\n1 1.000000e-07\n2 1.000100e-07\n3 2.000000e-07\n4 4.999000e-07\n5 5.000000e-07', + (1,6):' col1\n1 1.000000e-07\n2 1.000100e-07\n3 2.000000e-07\n4 4.999000e-07\n5 5.000000e-07', + (1,8):' col1\n1 1.000000e-07\n2 1.000100e-07\n3 2.000000e-07\n4 4.999000e-07\n5 5.000000e-07\n6 5.000100e-07\n7 6.000000e-07', + (8,16):' col1\n8 9.999000e-07\n9 1.000000e-06\n10 1.000100e-06\n11 2.000000e-06\n12 4.999000e-06\n13 5.000000e-06\n14 5.000100e-06\n15 6.000000e-06', + (9,16):' col1\n9 0.000001\n10 0.000001\n11 0.000002\n12 0.000005\n13 0.000005\n14 0.000005\n15 0.000006' + } + + for (start, stop), v in expected_output.items(): + self.assertEqual(str(d[start:stop]), v) + class TestRepr_timedelta64(tm.TestCase): diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index cdda087b27613..3e4c16f63035f 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -5944,6 +5944,20 @@ def test_boolean_comparison(self): self.assertRaises(ValueError, lambda : df == (2,2)) self.assertRaises(ValueError, lambda : df == [2,2]) + def test_equals_different_blocks(self): + # GH 9330 + df0 = pd.DataFrame({"A": ["x","y"], "B": [1,2], + "C": ["w","z"]}) + df1 = df0.reset_index()[["A","B","C"]] + # this assert verifies that the above operations have + # induced a block rearrangement + self.assertTrue(df0._data.blocks[0].dtype != + df1._data.blocks[0].dtype) + # do the real tests + self.assert_frame_equal(df0, df1) + self.assertTrue(df0.equals(df1)) + self.assertTrue(df1.equals(df0)) + def test_to_csv_from_csv(self): pname = '__tmp_to_csv_from_csv__' diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py index 3ce4e150326a2..36c19cd39f76c 100644 --- a/pandas/tests/test_graphics.py +++ b/pandas/tests/test_graphics.py @@ -1154,6 +1154,22 @@ def test_plot(self): self.assertEqual(len(axes), 1) self.assertIs(ax.get_axes(), axes[0]) + def test_color_and_style_arguments(self): + df = DataFrame({'x': [1, 2], 'y': [3, 4]}) + # passing both 'color' and 'style' arguments should be allowed + # if there is no color symbol in the style strings: + ax = df.plot(color = ['red', 'black'], style = ['-', '--']) + # check that the linestyles are correctly set: + linestyle = [line.get_linestyle() for line in ax.lines] + self.assertEqual(linestyle, ['-', '--']) + # check that the colors are correctly set: + color = [line.get_color() for line in ax.lines] + self.assertEqual(color, ['red', 'black']) + # passing both 'color' and 'style' arguments should not be allowed + # if there is a color symbol in the style strings: + with tm.assertRaises(ValueError): + df.plot(color = ['red', 'black'], style = ['k-', 'r--']) + def test_nonnumeric_exclude(self): df = DataFrame({'A': ["x", "y", "z"], 'B': [1, 2, 3]}) ax = df.plot() diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py index 45f089f5e0a53..36585abd1b98f 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/test_internals.py @@ -68,15 +68,15 @@ def create_block(typestr, placement, item_shape=None, num_offset=0): elif typestr in ('object', 'string', 'O'): values = np.reshape(['A%d' % i for i in mat.ravel() + num_offset], shape) - elif typestr in ('bool'): + elif typestr in ('b','bool',): values = np.ones(shape, dtype=np.bool_) elif typestr in ('datetime', 'dt', 'M8[ns]'): values = (mat * 1e9).astype('M8[ns]') elif typestr in ('timedelta', 'td', 'm8[ns]'): values = (mat * 1).astype('m8[ns]') - elif typestr in ('category'): + elif typestr in ('category',): values = Categorical([1,1,2,2,3,3,3,3,4,4]) - elif typestr in ('category2'): + elif typestr in ('category2',): values = Categorical(['a','a','a','a','b','b','c','c','c','d']) elif typestr in ('sparse', 'sparse_na'): # FIXME: doesn't support num_rows != 10 @@ -751,6 +751,25 @@ def test_equals(self): bm2 = BlockManager(bm1.blocks[::-1], bm1.axes) self.assertTrue(bm1.equals(bm2)) + def test_equals_block_order_different_dtypes(self): + # GH 9330 + + mgr_strings = [ + "a:i8;b:f8", # basic case + "a:i8;b:f8;c:c8;d:b", # many types + "a:i8;e:dt;f:td;g:string", # more types + "a:i8;b:category;c:category2;d:category2", # categories + "c:sparse;d:sparse_na;b:f8", # sparse + ] + + for mgr_string in mgr_strings: + bm = create_mgr(mgr_string) + block_perms = itertools.permutations(bm.blocks) + for bm_perm in block_perms: + bm_this = BlockManager(bm_perm, bm.axes) + self.assertTrue(bm.equals(bm_this)) + self.assertTrue(bm_this.equals(bm)) + def test_single_mgr_ctor(self): mgr = create_single_mgr('f8', num_rows=5) self.assertEqual(mgr.as_matrix().tolist(), [0., 1., 2., 3., 4.]) diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py index 0be030d7c2c8e..358c5b0dd5940 100644 --- a/pandas/tools/plotting.py +++ b/pandas/tools/plotting.py @@ -867,12 +867,17 @@ def _validate_color_args(self): "simultaneously. Using 'color'") if 'color' in self.kwds and self.style is not None: + if com.is_list_like(self.style): + styles = self.style + else: + styles = [self.style] # need only a single match - if re.match('^[a-z]+?', self.style) is not None: - raise ValueError("Cannot pass 'style' string with a color " - "symbol and 'color' keyword argument. Please" - " use one or the other or pass 'style' " - "without a color symbol") + for s in styles: + if re.match('^[a-z]+?', s) is not None: + raise ValueError("Cannot pass 'style' string with a color " + "symbol and 'color' keyword argument. Please" + " use one or the other or pass 'style' " + "without a color symbol") def _iter_data(self, data=None, keep_index=False, fillna=None): if data is None: