Skip to content

Commit 3aac68e

Browse files
1MLightyearsluckyvs1
authored andcommitted
BUG: fix the bad error raised by HDFStore.put() (pandas-dev#38919)
1 parent 8a61d92 commit 3aac68e

File tree

3 files changed

+28
-7
lines changed

3 files changed

+28
-7
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,7 @@ I/O
275275
- Allow custom error values for parse_dates argument of :func:`read_sql`, :func:`read_sql_query` and :func:`read_sql_table` (:issue:`35185`)
276276
- Bug in :func:`to_hdf` raising ``KeyError`` when trying to apply
277277
for subclasses of ``DataFrame`` or ``Series`` (:issue:`33748`).
278+
- Bug in :meth:`~HDFStore.put` raising a wrong ``TypeError`` when saving a DataFrame with non-string dtype (:issue:`34274`)
278279
- Bug in :func:`json_normalize` resulting in the first element of a generator object not being included in the returned ``DataFrame`` (:issue:`35923`)
279280
- Bug in :func:`read_excel` forward filling :class:`MultiIndex` names with multiple header and index columns specified (:issue:`34673`)
280281
- :func:`pandas.read_excel` now respects :func:``pandas.set_option`` (:issue:`34252`)

pandas/io/pytables.py

+19-5
Original file line numberDiff line numberDiff line change
@@ -3915,6 +3915,7 @@ def _create_axes(
39153915
nan_rep=nan_rep,
39163916
encoding=self.encoding,
39173917
errors=self.errors,
3918+
block_columns=b_items,
39183919
)
39193920
adj_name = _maybe_adjust_name(new_name, self.version)
39203921

@@ -4878,7 +4879,14 @@ def _unconvert_index(
48784879

48794880

48804881
def _maybe_convert_for_string_atom(
4881-
name: str, block: "Block", existing_col, min_itemsize, nan_rep, encoding, errors
4882+
name: str,
4883+
block: "Block",
4884+
existing_col,
4885+
min_itemsize,
4886+
nan_rep,
4887+
encoding,
4888+
errors,
4889+
block_columns: List[str],
48824890
):
48834891
if not block.is_object:
48844892
return block.values
@@ -4912,14 +4920,20 @@ def _maybe_convert_for_string_atom(
49124920

49134921
# we cannot serialize this data, so report an exception on a column
49144922
# by column basis
4915-
for i in range(len(block.shape[0])):
4923+
4924+
# expected behaviour:
4925+
# search block for a non-string object column by column
4926+
for i in range(block.shape[0]):
49164927
col = block.iget(i)
49174928
inferred_type = lib.infer_dtype(col, skipna=False)
49184929
if inferred_type != "string":
4919-
iloc = block.mgr_locs.indexer[i]
4930+
error_column_label = (
4931+
block_columns[i] if len(block_columns) > i else f"No.{i}"
4932+
)
49204933
raise TypeError(
4921-
f"Cannot serialize the column [{iloc}] because\n"
4922-
f"its data contents are [{inferred_type}] object dtype"
4934+
f"Cannot serialize the column [{error_column_label}]\n"
4935+
f"because its data contents are not [string] but "
4936+
f"[{inferred_type}] object dtype"
49234937
)
49244938

49254939
# itemsize is the maximum length of a string (along any dimension)

pandas/tests/io/pytables/test_store.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -2055,7 +2055,10 @@ def test_append_raise(self, setup_path):
20552055
df = tm.makeDataFrame()
20562056
df["invalid"] = [["a"]] * len(df)
20572057
assert df.dtypes["invalid"] == np.object_
2058-
msg = re.escape("object of type 'int' has no len()")
2058+
msg = re.escape(
2059+
"""Cannot serialize the column [invalid]
2060+
because its data contents are not [string] but [mixed] object dtype"""
2061+
)
20592062
with pytest.raises(TypeError, match=msg):
20602063
store.append("df", df)
20612064

@@ -2221,7 +2224,10 @@ def test_unimplemented_dtypes_table_columns(self, setup_path):
22212224

22222225
with ensure_clean_store(setup_path) as store:
22232226
# this fails because we have a date in the object block......
2224-
msg = "object of type 'int' has no len()"
2227+
msg = re.escape(
2228+
"""Cannot serialize the column [datetime1]
2229+
because its data contents are not [string] but [date] object dtype"""
2230+
)
22252231
with pytest.raises(TypeError, match=msg):
22262232
store.append("df_unimplemented", df)
22272233

0 commit comments

Comments
 (0)