pandas-dev · davidbrochart · Mar 17, 2015 · Mar 18, 2015 · Mar 18, 2015 · Mar 18, 2015
diff --git a/doc/source/whatsnew/v0.16.1.txt b/doc/source/whatsnew/v0.16.1.txt
@@ -71,6 +71,7 @@ Bug Fixes
 
 - Bug in ``transform`` causing length mismatch when null entries were present and a fast aggregator was being used (:issue:`9697`)
 
+- Bug in ``equals`` causing false negatives when block order differed (:issue:`9330`)
 
 - Bug in ``DataFrame`` slicing may not retain metadata (:issue:`9776`)
 - Bug where ``TimdeltaIndex`` were not properly serialized in fixed ``HDFStore`` (:issue:`9635`)
@@ -80,3 +81,6 @@ Bug Fixes
 
 - Bug in ``Series.quantile`` on empty Series of type ``Datetime`` or ``Timedelta`` (:issue:`9675`)
 - Bug in ``where`` causing incorrect results when upcasting was required (:issue:`9731`)
+- Bug in ``FloatArrayFormatter`` where decision boundary for displaying "small" floats in decimal format is off by one order of magnitude for a given display.precision (:issue:`9764`)
+
+- Fixed bug (:issue:`9671`) where ``DataFrame.plot()`` raised an error when both ``color`` and ``style`` keywords were passed and there was no color symbol in the style strings (this should be allowed).
diff --git a/pandas/core/format.py b/pandas/core/format.py
@@ -1996,7 +1996,7 @@ def _format_strings(self):
 
             # this is pretty arbitrary for now
             has_large_values = (abs_vals > 1e8).any()
-            has_small_values = ((abs_vals < 10 ** (-self.digits)) &
+            has_small_values = ((abs_vals < 10 ** (-self.digits+1)) &
                                 (abs_vals > 0)).any()
 
             if too_long and has_large_values:

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -3310,8 +3310,20 @@ def equals(self, other):
             return False
         self._consolidate_inplace()
         other._consolidate_inplace()
+        if len(self.blocks) != len(other.blocks):
+            return False
+
+        # canonicalize block order, using a tuple combining the type
+        # name and then mgr_locs because there might be unconsolidated
+        # blocks (say, Categorical) which can only be distinguished by
+        # the iteration order
+        def canonicalize(block):
+            return (block.dtype.name, block.mgr_locs.as_array.tolist())
+
+        self_blocks = sorted(self.blocks, key=canonicalize)
+        other_blocks = sorted(other.blocks, key=canonicalize)
         return all(block.equals(oblock) for block, oblock in
-                   zip(self.blocks, other.blocks))
+                   zip(self_blocks, other_blocks))
 
 
 class SingleBlockManager(BlockManager):

diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
@@ -4584,19 +4584,33 @@ def test_duplicate_column_name(self):
         with ensure_clean_path(self.path) as path:
             self.assertRaises(ValueError, df.to_hdf, path, 'df', format='fixed')
 
+            df.to_hdf(path, 'df', format='table')
+            other = read_hdf(path, 'df')
+
+            tm.assert_frame_equal(df, other)
+            self.assertTrue(df.equals(other))
+            self.assertTrue(other.equals(df))
+
+    def test_round_trip_equals(self):
+        # GH 9330
+        df = DataFrame({"B": [1,2], "A": ["x","y"]})
+
+        with ensure_clean_path(self.path) as path:
             df.to_hdf(path, 'df', format='table')
             other = read_hdf(path, 'df')
             tm.assert_frame_equal(df, other)
+            self.assertTrue(df.equals(other))
+            self.assertTrue(other.equals(df))
 
     def test_preserve_timedeltaindex_type(self):
-        # GH9635 
+        # GH9635
         # Storing TimedeltaIndexed DataFrames in fixed stores did not preserve
         # the type of the index.
         df = DataFrame(np.random.normal(size=(10,5)))
         df.index = timedelta_range(start='0s',periods=10,freq='1s',name='example')
 
         with ensure_clean_store(self.path) as store:
-            
+
             store['df'] = df
             assert_frame_equal(store['df'], df)
 

diff --git a/pandas/tests/test_format.py b/pandas/tests/test_format.py
@@ -2986,6 +2986,25 @@ def test_format(self):
         self.assertEqual(result[0], " 12")
         self.assertEqual(result[1], "  0")
 
+    def test_output_significant_digits(self):
+        # Issue #9764
+
+        # In case default display precision changes:
+        with pd.option_context('display.precision', 7):
+            # DataFrame example from issue #9764
+            d=pd.DataFrame({'col1':[9.999e-8, 1e-7, 1.0001e-7, 2e-7, 4.999e-7, 5e-7, 5.0001e-7, 6e-7, 9.999e-7, 1e-6, 1.0001e-6, 2e-6, 4.999e-6, 5e-6, 5.0001e-6, 6e-6]})
+
+            expected_output={
+                (0,6):'           col1\n0  9.999000e-08\n1  1.000000e-07\n2  1.000100e-07\n3  2.000000e-07\n4  4.999000e-07\n5  5.000000e-07',
+                (1,6):'           col1\n1  1.000000e-07\n2  1.000100e-07\n3  2.000000e-07\n4  4.999000e-07\n5  5.000000e-07',
+                (1,8):'           col1\n1  1.000000e-07\n2  1.000100e-07\n3  2.000000e-07\n4  4.999000e-07\n5  5.000000e-07\n6  5.000100e-07\n7  6.000000e-07',
+                (8,16):'            col1\n8   9.999000e-07\n9   1.000000e-06\n10  1.000100e-06\n11  2.000000e-06\n12  4.999000e-06\n13  5.000000e-06\n14  5.000100e-06\n15  6.000000e-06',
+                (9,16):'        col1\n9   0.000001\n10  0.000001\n11  0.000002\n12  0.000005\n13  0.000005\n14  0.000005\n15  0.000006'
+            }
+
+            for (start, stop), v in expected_output.items():
+                self.assertEqual(str(d[start:stop]), v)
+
 
 class TestRepr_timedelta64(tm.TestCase):
 

diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
@@ -5944,6 +5944,20 @@ def test_boolean_comparison(self):
         self.assertRaises(ValueError, lambda : df == (2,2))
         self.assertRaises(ValueError, lambda : df == [2,2])
 
+    def test_equals_different_blocks(self):
+        # GH 9330
+        df0 = pd.DataFrame({"A": ["x","y"], "B": [1,2], 
+                            "C": ["w","z"]})
+        df1 = df0.reset_index()[["A","B","C"]]
+        # this assert verifies that the above operations have 
+        # induced a block rearrangement
+        self.assertTrue(df0._data.blocks[0].dtype != 
+                        df1._data.blocks[0].dtype)
+        # do the real tests
+        self.assert_frame_equal(df0, df1)
+        self.assertTrue(df0.equals(df1))
+        self.assertTrue(df1.equals(df0))
+
     def test_to_csv_from_csv(self):
 
         pname = '__tmp_to_csv_from_csv__'

diff --git a/pandas/tests/test_graphics.py b/pandas/tests/test_graphics.py
@@ -1154,6 +1154,22 @@ def test_plot(self):
         self.assertEqual(len(axes), 1)
         self.assertIs(ax.get_axes(), axes[0])
 
+    def test_color_and_style_arguments(self):
+        df = DataFrame({'x': [1, 2], 'y': [3, 4]})
+        # passing both 'color' and 'style' arguments should be allowed
+        # if there is no color symbol in the style strings:
+        ax = df.plot(color = ['red', 'black'], style = ['-', '--'])
+        # check that the linestyles are correctly set:
+        linestyle = [line.get_linestyle() for line in ax.lines]
+        self.assertEqual(linestyle, ['-', '--'])
+        # check that the colors are correctly set:
+        color = [line.get_color() for line in ax.lines]
+        self.assertEqual(color, ['red', 'black'])
+        # passing both 'color' and 'style' arguments should not be allowed
+        # if there is a color symbol in the style strings:
+        with tm.assertRaises(ValueError):
+            df.plot(color = ['red', 'black'], style = ['k-', 'r--'])
+
     def test_nonnumeric_exclude(self):
         df = DataFrame({'A': ["x", "y", "z"], 'B': [1, 2, 3]})
         ax = df.plot()

diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py
@@ -68,15 +68,15 @@ def create_block(typestr, placement, item_shape=None, num_offset=0):
     elif typestr in ('object', 'string', 'O'):
         values = np.reshape(['A%d' % i for i in mat.ravel() + num_offset],
                             shape)
-    elif typestr in ('bool'):
+    elif typestr in ('b','bool',):
         values = np.ones(shape, dtype=np.bool_)
     elif typestr in ('datetime', 'dt', 'M8[ns]'):
         values = (mat * 1e9).astype('M8[ns]')
     elif typestr in ('timedelta', 'td', 'm8[ns]'):
         values = (mat * 1).astype('m8[ns]')
-    elif typestr in ('category'):
+    elif typestr in ('category',):
         values = Categorical([1,1,2,2,3,3,3,3,4,4])
-    elif typestr in ('category2'):
+    elif typestr in ('category2',):
         values = Categorical(['a','a','a','a','b','b','c','c','c','d'])
     elif typestr in ('sparse', 'sparse_na'):
         # FIXME: doesn't support num_rows != 10
@@ -751,6 +751,25 @@ def test_equals(self):
         bm2 = BlockManager(bm1.blocks[::-1], bm1.axes)
         self.assertTrue(bm1.equals(bm2))
 
+    def test_equals_block_order_different_dtypes(self):
+        # GH 9330
+
+        mgr_strings = [ 
+            "a:i8;b:f8", # basic case
+            "a:i8;b:f8;c:c8;d:b", # many types
+            "a:i8;e:dt;f:td;g:string", # more types
+            "a:i8;b:category;c:category2;d:category2", # categories
+            "c:sparse;d:sparse_na;b:f8", # sparse
+            ]
+
+        for mgr_string in mgr_strings:
+            bm = create_mgr(mgr_string)
+            block_perms = itertools.permutations(bm.blocks)
+            for bm_perm in block_perms:
+                bm_this = BlockManager(bm_perm, bm.axes)
+                self.assertTrue(bm.equals(bm_this))
+                self.assertTrue(bm_this.equals(bm))
+
     def test_single_mgr_ctor(self):
         mgr = create_single_mgr('f8', num_rows=5)
         self.assertEqual(mgr.as_matrix().tolist(), [0., 1., 2., 3., 4.])

diff --git a/pandas/tools/plotting.py b/pandas/tools/plotting.py
@@ -867,12 +867,17 @@ def _validate_color_args(self):
                           "simultaneously. Using 'color'")
 
         if 'color' in self.kwds and self.style is not None:
+            if com.is_list_like(self.style):
+                styles = self.style
+            else:
+                styles = [self.style]
             # need only a single match
-            if re.match('^[a-z]+?', self.style) is not None:
-                raise ValueError("Cannot pass 'style' string with a color "
-                                 "symbol and 'color' keyword argument. Please"
-                                 " use one or the other or pass 'style' "
-                                 "without a color symbol")
+            for s in styles:
+                if re.match('^[a-z]+?', s) is not None:
+                    raise ValueError("Cannot pass 'style' string with a color "
+                                     "symbol and 'color' keyword argument. Please"
+                                     " use one or the other or pass 'style' "
+                                     "without a color symbol")
 
     def _iter_data(self, data=None, keep_index=False, fillna=None):
         if data is None: