Skip to content

Commit 946a48e

Browse files
committed
ERR: more strict HDFStore string comparison
1 parent 213585f commit 946a48e

File tree

3 files changed

+34
-11
lines changed

3 files changed

+34
-11
lines changed

doc/source/whatsnew/v0.20.0.txt

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,26 @@ New Behavior:
450450

451451
df.groupby('A').agg([np.mean, np.std, np.min, np.max])
452452

453+
HDFStore where string comparison
454+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
455+
456+
In previous versions most types could be compared to string column in a ``HDFStore``
457+
usually resulting in an invalid comparsion. These comparisions will now raise a
458+
``TypeError`` (:issue:`15492`)
459+
460+
New Behavior:
461+
462+
.. code-block:: ipython
463+
464+
df = pd.DataFrame({'unparsed_date': ['2014-01-01', '2014-01-01']})
465+
df.to_hdf('store.h5', 'key', format='table', data_columns=True)
466+
ts = pd.Timestamp('2014-01-01')
467+
try:
468+
pd.read_hdf('store.h5', 'key', where='unparsed_date > ts')
469+
except TypeError:
470+
print("TypeError raised")
471+
472+
453473
.. _whatsnew_0200.api:
454474

455475
Other API Changes

pandas/computation/pytables.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -214,12 +214,13 @@ def stringify(value):
214214
else:
215215
v = bool(v)
216216
return TermValue(v, v, kind)
217-
elif not isinstance(v, string_types):
218-
v = stringify(v)
217+
elif isinstance(v, string_types):
218+
# string quoting
219219
return TermValue(v, stringify(v), u('string'))
220-
221-
# string quoting
222-
return TermValue(v, stringify(v), u('string'))
220+
else:
221+
raise TypeError(("Cannot compare {v} of type {typ}"
222+
" to {kind} column").format(v=v, typ=type(v),
223+
kind=kind))
223224

224225
def convert_values(self):
225226
pass

pandas/tests/io/test_pytables.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5080,25 +5080,27 @@ def test_query_long_float_literal(self):
50805080
expected = df.loc[[1], :]
50815081
tm.assert_frame_equal(expected, result)
50825082

5083-
def test_query_ts_string_column(self):
5083+
def test_query_compare_string_column(self):
50845084
# GH 15492
50855085
df = pd.DataFrame({'date': ['2014-01-01', '2014-01-02'],
50865086
'real_date': date_range('2014-01-01', periods=2),
50875087
'values': [1, 2]},
50885088
columns=['date', 'real_date', 'values'])
50895089

5090-
ts = pd.Timestamp('2014-01-01') # noqa
5091-
50925090
with ensure_clean_store(self.path) as store:
50935091
store.append('test', df, format='table', data_columns=True)
50945092

5095-
result = store.select('test', where='date > ts')
5096-
self.assertTrue(result.empty)
5097-
5093+
ts = pd.Timestamp('2014-01-01') # noqa
50985094
result = store.select('test', where='real_date > ts')
50995095
expected = df.loc[[1], :]
51005096
tm.assert_frame_equal(expected, result)
51015097

5098+
for v in [2.1, True, pd.Timestamp('2014-01-01')]:
5099+
for op in ['<', '>', '==']:
5100+
query = 'date {op} v'.format(op=op)
5101+
with tm.assertRaises(TypeError):
5102+
result = store.select('test', where=query)
5103+
51025104

51035105
class TestHDFComplexValues(Base):
51045106
# GH10447

0 commit comments

Comments
 (0)