From 02f90d5b3a4afef95b8fcb8e0e7529acaf5ad9a3 Mon Sep 17 00:00:00 2001
From: Christian Hudon <chrish@pianocktail.org>
Date: Thu, 2 Jun 2016 16:46:16 -0700
Subject: [PATCH 1/7] Use if-expression.

---
 pandas/io/tests/test_pytables.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
index 96b66265ea586..28d687b00d73f 100644
--- a/pandas/io/tests/test_pytables.py
+++ b/pandas/io/tests/test_pytables.py
@@ -46,8 +46,8 @@
 
 from distutils.version import LooseVersion
 
-_default_compressor = LooseVersion(tables.__version__) >= '2.2' \
-    and 'blosc' or 'zlib'
+_default_compressor = ('blosc' if LooseVersion(tables.__version__) >= '2.2'
+                       else 'zlib')
 
 _multiprocess_can_split_ = False
 

From b3a577319a0a4c00cd8a3c6211e910b5a160424d Mon Sep 17 00:00:00 2001
From: Christian Hudon <chrish@pianocktail.org>
Date: Thu, 2 Jun 2016 16:47:01 -0700
Subject: [PATCH 2/7] Add test that fails for GitHub bug #13231

---
 pandas/io/tests/test_pytables.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
index 28d687b00d73f..14f5b867280a3 100644
--- a/pandas/io/tests/test_pytables.py
+++ b/pandas/io/tests/test_pytables.py
@@ -4877,6 +4877,8 @@ def test_read_nokey(self):
         df = DataFrame(np.random.rand(4, 5),
                        index=list('abcd'),
                        columns=list('ABCDE'))
+        # Categorical dtype not supported for "fixed" format. So no need
+        # to test for that.
         with ensure_clean_path(self.path) as path:
             df.to_hdf(path, 'df', mode='a')
             reread = read_hdf(path)
@@ -4884,6 +4886,17 @@ def test_read_nokey(self):
             df.to_hdf(path, 'df2', mode='a')
             self.assertRaises(ValueError, read_hdf, path)
 
+    def test_read_nokey_table(self):
+        # GH13231
+        df = DataFrame({'i': range(5),
+                        'c': Series(list('abacd'), dtype='category')})
+        with ensure_clean_path(self.path) as path:
+            df.to_hdf(path, 'df', mode='a', format='table')
+            reread = read_hdf(path)
+            assert_frame_equal(df, reread)
+            df.to_hdf(path, 'df2', mode='a', format='table')
+            self.assertRaises(ValueError, read_hdf, path)
+
     def test_read_from_pathlib_path(self):
 
         # GH11773

From 2f41aef53d97bfbba2cb5f4a01e5aa2928b9f4e5 Mon Sep 17 00:00:00 2001
From: Christian Hudon <chrish@pianocktail.org>
Date: Fri, 3 Jun 2016 15:28:06 -0700
Subject: [PATCH 3/7] Tweak comment to be clearer.

---
 pandas/io/tests/test_pytables.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
index 14f5b867280a3..6dbc2746ebf38 100644
--- a/pandas/io/tests/test_pytables.py
+++ b/pandas/io/tests/test_pytables.py
@@ -4878,7 +4878,7 @@ def test_read_nokey(self):
                        index=list('abcd'),
                        columns=list('ABCDE'))
         # Categorical dtype not supported for "fixed" format. So no need
-        # to test for that.
+        # to test with that dtype in the dataframe here.
         with ensure_clean_path(self.path) as path:
             df.to_hdf(path, 'df', mode='a')
             reread = read_hdf(path)

From df100160ded4d36e957e701de9289e54275e00c1 Mon Sep 17 00:00:00 2001
From: Christian Hudon <chrish@pianocktail.org>
Date: Fri, 3 Jun 2016 15:29:45 -0700
Subject: [PATCH 4/7] Make logic that detects if there is only one dataset in a
 HDF5 file work when storing a dataframe that contains categorical data.

---
 pandas/io/pytables.py | 30 +++++++++++++++++++++++++-----
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index fcf5125d956c6..072f634618c58 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -331,11 +331,17 @@ def read_hdf(path_or_buf, key=None, **kwargs):
 
     try:
         if key is None:
-            keys = store.keys()
-            if len(keys) != 1:
-                raise ValueError('key must be provided when HDF file contains '
-                                 'multiple datasets.')
-            key = keys[0]
+            groups = store.groups()
+            candidate_only_group = groups[0]
+            # For the HDF file to have only one dataset, all other groups
+            # should then be metadata groups for that candidate group. (This
+            # assumes that the groups() method enumerates parent groups
+            # before their children.)
+            for group_to_check in groups[1:]:
+                if not _is_metadata_of(group_to_check, candidate_only_group):
+                    raise ValueError('key must be provided when HDF file '
+                                     'contains multiple datasets.')
+            key = candidate_only_group._v_pathname
         return store.select(key, auto_close=auto_close, **kwargs)
     except:
         # if there is an error, close the store
@@ -347,6 +353,20 @@ def read_hdf(path_or_buf, key=None, **kwargs):
         raise
 
 
+def _is_metadata_of(group, parent_group):
+    """Check if a given group is a metadata group for a given parent_group."""
+    if group._v_depth <= parent_group._v_depth:
+        return False
+
+    current = group
+    while current._v_depth > 1:
+        parent = current._v_parent
+        if parent == parent_group and current._v_name == 'meta':
+            return True
+        current = current._v_parent
+    return False
+
+
 class HDFStore(StringMixin):
 
     """

From e7c8313e611ca13024f31ce179a4758155047835 Mon Sep 17 00:00:00 2001
From: Christian Hudon <chrish@pianocktail.org>
Date: Fri, 3 Jun 2016 16:03:17 -0700
Subject: [PATCH 5/7] Add changelog entry.

---
 doc/source/whatsnew/v0.18.2.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/doc/source/whatsnew/v0.18.2.txt b/doc/source/whatsnew/v0.18.2.txt
index 950bf397f43b5..7cf27d13a44ac 100644
--- a/doc/source/whatsnew/v0.18.2.txt
+++ b/doc/source/whatsnew/v0.18.2.txt
@@ -374,3 +374,6 @@ Bug Fixes
 
 
 - Bug in ``Categorical.remove_unused_categories()`` changes ``.codes`` dtype to platform int (:issue:`13261`)
+
+- Bug in ``pd.read_hdf()`` where attempting to load an HDF file with a single dataset (that had one or more categorical columns) failed unless the key argument was set to the name of the dataset. (:issue:`13231`)
+

From 611aa284695af9c6a5b22f155961795729664432 Mon Sep 17 00:00:00 2001
From: Christian Hudon <chrish@pianocktail.org>
Date: Sat, 4 Jun 2016 12:11:26 -0700
Subject: [PATCH 6/7] Formatting fixes.

---
 pandas/io/pytables.py            | 1 +
 pandas/io/tests/test_pytables.py | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 072f634618c58..2d26ab06207a8 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -333,6 +333,7 @@ def read_hdf(path_or_buf, key=None, **kwargs):
         if key is None:
             groups = store.groups()
             candidate_only_group = groups[0]
+
             # For the HDF file to have only one dataset, all other groups
             # should then be metadata groups for that candidate group. (This
             # assumes that the groups() method enumerates parent groups
diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
index 6dbc2746ebf38..d1d665a7f1a43 100644
--- a/pandas/io/tests/test_pytables.py
+++ b/pandas/io/tests/test_pytables.py
@@ -4877,6 +4877,7 @@ def test_read_nokey(self):
         df = DataFrame(np.random.rand(4, 5),
                        index=list('abcd'),
                        columns=list('ABCDE'))
+
         # Categorical dtype not supported for "fixed" format. So no need
         # to test with that dtype in the dataframe here.
         with ensure_clean_path(self.path) as path:
@@ -4890,6 +4891,7 @@ def test_read_nokey_table(self):
         # GH13231
         df = DataFrame({'i': range(5),
                         'c': Series(list('abacd'), dtype='category')})
+
         with ensure_clean_path(self.path) as path:
             df.to_hdf(path, 'df', mode='a', format='table')
             reread = read_hdf(path)

From e8396382fa164573614950e3d0f16e0bcb22c47d Mon Sep 17 00:00:00 2001
From: Christian Hudon <chrish@pianocktail.org>
Date: Sat, 4 Jun 2016 12:22:42 -0700
Subject: [PATCH 7/7] Raise a better exception when the HDF file is empty and
 kwy=None.

---
 pandas/io/pytables.py            | 2 ++
 pandas/io/tests/test_pytables.py | 6 ++++++
 2 files changed, 8 insertions(+)

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 2d26ab06207a8..6c7623ec7ed4a 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -332,6 +332,8 @@ def read_hdf(path_or_buf, key=None, **kwargs):
     try:
         if key is None:
             groups = store.groups()
+            if len(groups) == 0:
+                raise ValueError('No dataset in HDF file.')
             candidate_only_group = groups[0]
 
             # For the HDF file to have only one dataset, all other groups
diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
index d1d665a7f1a43..9c13162bd774c 100644
--- a/pandas/io/tests/test_pytables.py
+++ b/pandas/io/tests/test_pytables.py
@@ -4899,6 +4899,12 @@ def test_read_nokey_table(self):
             df.to_hdf(path, 'df2', mode='a', format='table')
             self.assertRaises(ValueError, read_hdf, path)
 
+    def test_read_nokey_empty(self):
+        with ensure_clean_path(self.path) as path:
+            store = HDFStore(path)
+            store.close()
+            self.assertRaises(ValueError, read_hdf, path)
+
     def test_read_from_pathlib_path(self):
 
         # GH11773