Skip to content

Commit 935d249

Browse files
committed
BUG: closes issue #7212 - side effect on passed columns list
1 parent 9b04bd0 commit 935d249

File tree

3 files changed

+29
-0
lines changed

3 files changed

+29
-0
lines changed

doc/source/whatsnew/v0.17.0.txt

+2
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ Performance Improvements
6060
Bug Fixes
6161
~~~~~~~~~
6262

63+
- Bug where read_hdf store.select modifies the passed columns list when
64+
multi-indexed (:issue:`7212`)
6365
- Bug in ``Categorical`` repr with ``display.width`` of ``None`` in Python 3 (:issue:`10087`)
6466

6567

pandas/io/pytables.py

+4
Original file line numberDiff line numberDiff line change
@@ -3453,6 +3453,10 @@ def get_blk_items(mgr, blocks):
34533453
def process_axes(self, obj, columns=None):
34543454
""" process axes filters """
34553455

3456+
# make a copy to avoid side effects
3457+
if columns is not None:
3458+
columns = list(columns)
3459+
34563460
# make sure to include levels if we have them
34573461
if columns is not None and self.is_multi_index:
34583462
for n in self.levels:

pandas/io/tests/test_pytables.py

+23
Original file line numberDiff line numberDiff line change
@@ -4617,6 +4617,29 @@ def test_preserve_timedeltaindex_type(self):
46174617
store['df'] = df
46184618
assert_frame_equal(store['df'], df)
46194619

4620+
def test_colums_multiindex_modified(self):
4621+
# BUG: 7212
4622+
# read_hdf store.select modified the passed columns parameters
4623+
# when multi-indexed.
4624+
4625+
df = DataFrame(np.random.rand(4, 5),
4626+
index=list('abcd'),
4627+
columns=list('ABCDE'))
4628+
df.index.name = 'letters'
4629+
df = df.set_index(keys='E', append=True)
4630+
4631+
data_columns = df.index.names+df.columns.tolist()
4632+
with ensure_clean_path(self.path) as path:
4633+
df.to_hdf(path, 'df',
4634+
mode='a',
4635+
append=True,
4636+
data_columns=data_columns,
4637+
index=False)
4638+
cols2load = list('BCD')
4639+
cols2load_original = list(cols2load)
4640+
df_loaded = read_hdf(path, 'df', columns=cols2load)
4641+
self.assertTrue(cols2load_original == cols2load)
4642+
46204643

46214644
def _test_sort(obj):
46224645
if isinstance(obj, DataFrame):

0 commit comments

Comments
 (0)