Skip to content

Commit a94ed1e

Browse files
chris-b1AnkurDedania
authored andcommitted
BUG: syntax error in hdf query with ts
closes pandas-dev#15492 Author: Chris <[email protected]> Closes pandas-dev#15544 from chris-b1/hdf-dt-error and squashes the following commits: 8288dca [Chris] lint 7c7100d [Chris] expand test cases 946a48e [Chris] ERR: more strict HDFStore string comparison 213585f [Chris] CLN: remove timetuple type check cc977f0 [Chris] BUG: syntax error in hdf query with ts
1 parent c4d2061 commit a94ed1e

File tree

3 files changed

+82
-14
lines changed

3 files changed

+82
-14
lines changed

doc/source/whatsnew/v0.20.0.txt

+30-1
Original file line numberDiff line numberDiff line change
@@ -501,6 +501,35 @@ New Behavior:
501501

502502
df.groupby('A').agg([np.mean, np.std, np.min, np.max])
503503

504+
.. _whatsnew_0200.api_breaking.hdfstore_where:
505+
506+
HDFStore where string comparison
507+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
508+
509+
In previous versions most types could be compared to string column in a ``HDFStore``
510+
usually resulting in an invalid comparsion. These comparisions will now raise a
511+
``TypeError`` (:issue:`15492`)
512+
513+
New Behavior:
514+
515+
.. code-block:: ipython
516+
517+
In [15]: df = pd.DataFrame({'unparsed_date': ['2014-01-01', '2014-01-01']})
518+
519+
In [16]: df.dtypes
520+
Out[16]:
521+
unparsed_date object
522+
dtype: object
523+
524+
In [17]: df.to_hdf('store.h5', 'key', format='table', data_columns=True)
525+
526+
In [18]: ts = pd.Timestamp('2014-01-01')
527+
528+
In [19]: pd.read_hdf('store.h5', 'key', where='unparsed_date > ts')
529+
TypeError: Cannot compare 2014-01-01 00:00:00 of
530+
type <class 'pandas.tslib.Timestamp'> to string column
531+
532+
504533
.. _whatsnew_0200.api:
505534

506535
Other API Changes
@@ -671,7 +700,7 @@ Bug Fixes
671700
- Bug in ``pd.merge_asof()`` where ``left_index``/``right_index`` together caused a failure when ``tolerance`` was specified (:issue:`15135`)
672701

673702

674-
703+
- Bug in ``pd.read_hdf()`` passing a ``Timestamp`` to the ``where`` parameter with a non date column (:issue:`15492`)
675704

676705

677706
- Bug in ``Series`` constructor when both ``copy=True`` and ``dtype`` arguments are provided (:issue:`15125`)

pandas/computation/pytables.py

+8-13
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
""" manage PyTables query interface via Expressions """
22

33
import ast
4-
import time
54
import warnings
65
from functools import partial
76
from datetime import datetime, timedelta
@@ -188,10 +187,6 @@ def stringify(value):
188187
if v.tz is not None:
189188
v = v.tz_convert('UTC')
190189
return TermValue(v, v.value, kind)
191-
elif (isinstance(v, datetime) or hasattr(v, 'timetuple') or
192-
kind == u('date')):
193-
v = time.mktime(v.timetuple())
194-
return TermValue(v, pd.Timestamp(v), kind)
195190
elif kind == u('timedelta64') or kind == u('timedelta'):
196191
v = _coerce_scalar_to_timedelta_type(v, unit='s').value
197192
return TermValue(int(v), v, kind)
@@ -218,12 +213,13 @@ def stringify(value):
218213
else:
219214
v = bool(v)
220215
return TermValue(v, v, kind)
221-
elif not isinstance(v, string_types):
222-
v = stringify(v)
216+
elif isinstance(v, string_types):
217+
# string quoting
223218
return TermValue(v, stringify(v), u('string'))
224-
225-
# string quoting
226-
return TermValue(v, stringify(v), u('string'))
219+
else:
220+
raise TypeError(("Cannot compare {v} of type {typ}"
221+
" to {kind} column").format(v=v, typ=type(v),
222+
kind=kind))
227223

228224
def convert_values(self):
229225
pass
@@ -558,9 +554,8 @@ def parse_back_compat(self, w, op=None, value=None):
558554

559555
# stringify with quotes these values
560556
def convert(v):
561-
if (isinstance(v, (datetime, np.datetime64,
562-
timedelta, np.timedelta64)) or
563-
hasattr(v, 'timetuple')):
557+
if isinstance(v, (datetime, np.datetime64,
558+
timedelta, np.timedelta64)):
564559
return "'{0}'".format(v)
565560
return v
566561

pandas/tests/io/test_pytables.py

+44
Original file line numberDiff line numberDiff line change
@@ -5071,6 +5071,50 @@ def test_query_long_float_literal(self):
50715071
expected = df.loc[[1], :]
50725072
tm.assert_frame_equal(expected, result)
50735073

5074+
def test_query_compare_column_type(self):
5075+
# GH 15492
5076+
df = pd.DataFrame({'date': ['2014-01-01', '2014-01-02'],
5077+
'real_date': date_range('2014-01-01', periods=2),
5078+
'float': [1.1, 1.2],
5079+
'int': [1, 2]},
5080+
columns=['date', 'real_date', 'float', 'int'])
5081+
5082+
with ensure_clean_store(self.path) as store:
5083+
store.append('test', df, format='table', data_columns=True)
5084+
5085+
ts = pd.Timestamp('2014-01-01') # noqa
5086+
result = store.select('test', where='real_date > ts')
5087+
expected = df.loc[[1], :]
5088+
tm.assert_frame_equal(expected, result)
5089+
5090+
for op in ['<', '>', '==']:
5091+
# non strings to string column always fail
5092+
for v in [2.1, True, pd.Timestamp('2014-01-01'),
5093+
pd.Timedelta(1, 's')]:
5094+
query = 'date {op} v'.format(op=op)
5095+
with tm.assertRaises(TypeError):
5096+
result = store.select('test', where=query)
5097+
5098+
# strings to other columns must be convertible to type
5099+
v = 'a'
5100+
for col in ['int', 'float', 'real_date']:
5101+
query = '{col} {op} v'.format(op=op, col=col)
5102+
with tm.assertRaises(ValueError):
5103+
result = store.select('test', where=query)
5104+
5105+
for v, col in zip(['1', '1.1', '2014-01-01'],
5106+
['int', 'float', 'real_date']):
5107+
query = '{col} {op} v'.format(op=op, col=col)
5108+
result = store.select('test', where=query)
5109+
5110+
if op == '==':
5111+
expected = df.loc[[0], :]
5112+
elif op == '>':
5113+
expected = df.loc[[1], :]
5114+
else:
5115+
expected = df.loc[[], :]
5116+
tm.assert_frame_equal(expected, result)
5117+
50745118

50755119
class TestHDFComplexValues(Base):
50765120
# GH10447

0 commit comments

Comments
 (0)