diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 06f19782682b0..da615c1176cd1 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -580,6 +580,7 @@ I/O - Bug in :func:`read_html` where import check fails when run in multiple threads (:issue:`16928`) - Bug in :func:`read_csv` where automatic delimiter detection caused a ``TypeError`` to be thrown when a bad line was encountered rather than the correct error message (:issue:`13374`) - Bug in ``DataFrame.to_html()`` with ``notebook=True`` where DataFrames with named indices or non-MultiIndex indices had undesired horizontal or vertical alignment for column or row labels, respectively (:issue:`16792`) +- Bug in :func:`HDFStore.select` when reading a contiguous mixed-data table featuring VLArray (:issue:`17021`) Plotting ^^^^^^^^ diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 4d300b200971a..ea69116ec363d 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -2441,13 +2441,12 @@ def read_array(self, key, start=None, stop=None): """ read an array for the specified node (off of group """ import tables node = getattr(self.group, key) - data = node[start:stop] attrs = node._v_attrs transposed = getattr(attrs, 'transposed', False) if isinstance(node, tables.VLArray): - ret = data[0] + ret = node[0][start:stop] else: dtype = getattr(attrs, 'value_type', None) shape = getattr(attrs, 'shape', None) @@ -2456,7 +2455,7 @@ def read_array(self, key, start=None, stop=None): # length 0 axis ret = np.empty(shape, dtype=dtype) else: - ret = data + ret = node[start:stop] if dtype == u('datetime64'): diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index ff21afc11d220..ae8f7221d48ac 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -4387,6 +4387,19 @@ def test_path_pathlib(self): lambda p: pd.read_hdf(p, 'df')) tm.assert_frame_equal(df, result) + @pytest.mark.parametrize('start, stop', [(0, 2), (1, 2), (None, None)]) + def test_contiguous_mixed_data_table(self, start, stop): + # GH 17021 + # ValueError when reading a contiguous mixed-data table ft. VLArray + df = DataFrame({'a': Series([20111010, 20111011, 20111012]), + 'b': Series(['ab', 'cd', 'ab'])}) + + with ensure_clean_store(self.path) as store: + store.append('test_dataset', df) + + result = store.select('test_dataset', start=start, stop=stop) + assert_frame_equal(df[start:stop], result) + def test_path_pathlib_hdfstore(self): df = tm.makeDataFrame()