Skip to content

BUG: Ensure data_columns is always a list (i.e. min_itemsize can exte… #12252

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -3247,7 +3247,7 @@ def validate_data_columns(self, data_columns, min_itemsize):
# evaluate the passed data_columns, True == use all columns
# take only valide axis labels
if data_columns is True:
data_columns = axis_labels
data_columns = list(axis_labels)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this type coercion will fail if you pass a string

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in fact this should be not done here at all. The data_columns should be cleaned in a central location (IIRC they already are and just needs an adjustment)

elif data_columns is None:
data_columns = []

Expand Down Expand Up @@ -4084,7 +4084,7 @@ def write(self, obj, data_columns=None, **kwargs):
obj = DataFrame({name: obj}, index=obj.index)
obj.columns = [name]
return super(AppendableSeriesTable, self).write(
obj=obj, data_columns=obj.columns, **kwargs)
obj=obj, data_columns=list(obj.columns), **kwargs)

def read(self, columns=None, **kwargs):

Expand Down Expand Up @@ -4185,7 +4185,7 @@ def write(self, obj, data_columns=None, **kwargs):
if data_columns is None:
data_columns = []
elif data_columns is True:
data_columns = obj.columns[:]
data_columns = list(obj.columns[:])
obj, self.levels = self.validate_multiindex(obj)
for n in self.levels:
if n not in data_columns:
Expand Down
10 changes: 10 additions & 0 deletions pandas/io/tests/test_pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -1338,6 +1338,16 @@ def check_col(key, name, size):
[[124, 'abcdefqhij'], [346, 'abcdefghijklmnopqrtsuvwxyz']])
self.assertRaises(ValueError, store.append, 'df_new', df_new)

# min_itemsize on Series with Multiindex (GH 10381)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

put this with tests that are actually testing min_itemsize

df = tm.makeMixedDataFrame().set_index(['A', 'C'])
store.append('ss', df['B'], min_itemsize={'index': 4})
tm.assert_series_equal(store.select('ss'), df['B'])

# min_itemsize with MultiIndex and data_columns=True
store.append('midf', df, data_columns=True,
min_itemsize={'index': 4})
tm.assert_frame_equal(store.select('midf'), df)

# with nans
_maybe_remove(store, 'df')
df = tm.makeTimeDataFrame()
Expand Down