From cc977f0bf03db7f3ec0b1ef90713bbe0160c3ba0 Mon Sep 17 00:00:00 2001 From: Chris Date: Wed, 1 Mar 2017 18:43:15 -0600 Subject: [PATCH 1/5] BUG: syntax error in hdf query with ts --- pandas/computation/pytables.py | 4 ---- pandas/tests/io/test_pytables.py | 19 +++++++++++++++++++ 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/pandas/computation/pytables.py b/pandas/computation/pytables.py index 9dc18284ec22c..20313784b0fc8 100644 --- a/pandas/computation/pytables.py +++ b/pandas/computation/pytables.py @@ -188,10 +188,6 @@ def stringify(value): if v.tz is not None: v = v.tz_convert('UTC') return TermValue(v, v.value, kind) - elif (isinstance(v, datetime) or hasattr(v, 'timetuple') or - kind == u('date')): - v = time.mktime(v.timetuple()) - return TermValue(v, pd.Timestamp(v), kind) elif kind == u('timedelta64') or kind == u('timedelta'): v = _coerce_scalar_to_timedelta_type(v, unit='s').value return TermValue(int(v), v, kind) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index a840ff46aa845..7d98c6879fec2 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -5080,6 +5080,25 @@ def test_query_long_float_literal(self): expected = df.loc[[1], :] tm.assert_frame_equal(expected, result) + def test_query_ts_string_column(self): + # GH 15492 + df = pd.DataFrame({'date': ['2014-01-01', '2014-01-02'], + 'real_date': date_range('2014-01-01', periods=2), + 'values': [1, 2]}, + columns=['date', 'real_date', 'values']) + + ts = pd.Timestamp('2014-01-01') # noqa + + with ensure_clean_store(self.path) as store: + store.append('test', df, format='table', data_columns=True) + + result = store.select('test', where='date > ts') + self.assertTrue(result.empty) + + result = store.select('test', where='real_date > ts') + expected = df.loc[[1], :] + tm.assert_frame_equal(expected, result) + class TestHDFComplexValues(Base): # GH10447 From 213585f49b2ea8523397ed782c81b24299d7f4db Mon Sep 17 00:00:00 2001 From: Chris Date: Wed, 1 Mar 2017 19:14:17 -0600 Subject: [PATCH 2/5] CLN: remove timetuple type check --- doc/source/whatsnew/v0.20.0.txt | 2 +- pandas/computation/pytables.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index fa24c973a7549..d36eadf0e134d 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -602,7 +602,7 @@ Bug Fixes - Bug in ``pd.merge_asof()`` where ``left_index``/``right_index`` together caused a failure when ``tolerance`` was specified (:issue:`15135`) - +- Bug in ``pd.read_hdf()`` passing a ``Timestamp`` to the ``where`` parameter with a non date column (:issue:`15492`) - Bug in ``Series`` constructor when both ``copy=True`` and ``dtype`` arguments are provided (:issue:`15125`) diff --git a/pandas/computation/pytables.py b/pandas/computation/pytables.py index 20313784b0fc8..6a8fde54d4477 100644 --- a/pandas/computation/pytables.py +++ b/pandas/computation/pytables.py @@ -554,9 +554,8 @@ def parse_back_compat(self, w, op=None, value=None): # stringify with quotes these values def convert(v): - if (isinstance(v, (datetime, np.datetime64, - timedelta, np.timedelta64)) or - hasattr(v, 'timetuple')): + if isinstance(v, (datetime, np.datetime64, + timedelta, np.timedelta64)): return "'{0}'".format(v) return v From 946a48ecc12761242e4fa5319ddb26761a8791b4 Mon Sep 17 00:00:00 2001 From: Chris Date: Thu, 2 Mar 2017 18:08:31 -0600 Subject: [PATCH 3/5] ERR: more strict HDFStore string comparison --- doc/source/whatsnew/v0.20.0.txt | 20 ++++++++++++++++++++ pandas/computation/pytables.py | 11 ++++++----- pandas/tests/io/test_pytables.py | 14 ++++++++------ 3 files changed, 34 insertions(+), 11 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index d36eadf0e134d..3e300920a0152 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -450,6 +450,26 @@ New Behavior: df.groupby('A').agg([np.mean, np.std, np.min, np.max]) +HDFStore where string comparison +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In previous versions most types could be compared to string column in a ``HDFStore`` +usually resulting in an invalid comparsion. These comparisions will now raise a +``TypeError`` (:issue:`15492`) + +New Behavior: + +.. code-block:: ipython + + df = pd.DataFrame({'unparsed_date': ['2014-01-01', '2014-01-01']}) + df.to_hdf('store.h5', 'key', format='table', data_columns=True) + ts = pd.Timestamp('2014-01-01') + try: + pd.read_hdf('store.h5', 'key', where='unparsed_date > ts') + except TypeError: + print("TypeError raised") + + .. _whatsnew_0200.api: Other API Changes diff --git a/pandas/computation/pytables.py b/pandas/computation/pytables.py index 6a8fde54d4477..3c2fdd7089da7 100644 --- a/pandas/computation/pytables.py +++ b/pandas/computation/pytables.py @@ -214,12 +214,13 @@ def stringify(value): else: v = bool(v) return TermValue(v, v, kind) - elif not isinstance(v, string_types): - v = stringify(v) + elif isinstance(v, string_types): + # string quoting return TermValue(v, stringify(v), u('string')) - - # string quoting - return TermValue(v, stringify(v), u('string')) + else: + raise TypeError(("Cannot compare {v} of type {typ}" + " to {kind} column").format(v=v, typ=type(v), + kind=kind)) def convert_values(self): pass diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 7d98c6879fec2..fcfec68f2379b 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -5080,25 +5080,27 @@ def test_query_long_float_literal(self): expected = df.loc[[1], :] tm.assert_frame_equal(expected, result) - def test_query_ts_string_column(self): + def test_query_compare_string_column(self): # GH 15492 df = pd.DataFrame({'date': ['2014-01-01', '2014-01-02'], 'real_date': date_range('2014-01-01', periods=2), 'values': [1, 2]}, columns=['date', 'real_date', 'values']) - ts = pd.Timestamp('2014-01-01') # noqa - with ensure_clean_store(self.path) as store: store.append('test', df, format='table', data_columns=True) - result = store.select('test', where='date > ts') - self.assertTrue(result.empty) - + ts = pd.Timestamp('2014-01-01') # noqa result = store.select('test', where='real_date > ts') expected = df.loc[[1], :] tm.assert_frame_equal(expected, result) + for v in [2.1, True, pd.Timestamp('2014-01-01')]: + for op in ['<', '>', '==']: + query = 'date {op} v'.format(op=op) + with tm.assertRaises(TypeError): + result = store.select('test', where=query) + class TestHDFComplexValues(Base): # GH10447 From 7c7100d03310c181e81c09baf00ba819b6d7939d Mon Sep 17 00:00:00 2001 From: Chris Date: Thu, 2 Mar 2017 18:35:31 -0600 Subject: [PATCH 4/5] expand test cases --- doc/source/whatsnew/v0.20.0.txt | 21 +++++++++++++------- pandas/tests/io/test_pytables.py | 33 +++++++++++++++++++++++++++----- 2 files changed, 42 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt index 3e300920a0152..8e44bec377e35 100644 --- a/doc/source/whatsnew/v0.20.0.txt +++ b/doc/source/whatsnew/v0.20.0.txt @@ -461,13 +461,20 @@ New Behavior: .. code-block:: ipython - df = pd.DataFrame({'unparsed_date': ['2014-01-01', '2014-01-01']}) - df.to_hdf('store.h5', 'key', format='table', data_columns=True) - ts = pd.Timestamp('2014-01-01') - try: - pd.read_hdf('store.h5', 'key', where='unparsed_date > ts') - except TypeError: - print("TypeError raised") + In [15]: df = pd.DataFrame({'unparsed_date': ['2014-01-01', '2014-01-01']}) + + In [16]: df.dtypes + Out[16]: + unparsed_date object + dtype: object + + In [17]: df.to_hdf('store.h5', 'key', format='table', data_columns=True) + + In [18]: ts = pd.Timestamp('2014-01-01') + + In [19]: pd.read_hdf('store.h5', 'key', where='unparsed_date > ts') + TypeError: Cannot compare 2014-01-01 00:00:00 of + type to string column .. _whatsnew_0200.api: diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index fcfec68f2379b..6b6a49ebe8f40 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -5080,12 +5080,13 @@ def test_query_long_float_literal(self): expected = df.loc[[1], :] tm.assert_frame_equal(expected, result) - def test_query_compare_string_column(self): + def test_query_compare_column_type(self): # GH 15492 df = pd.DataFrame({'date': ['2014-01-01', '2014-01-02'], 'real_date': date_range('2014-01-01', periods=2), - 'values': [1, 2]}, - columns=['date', 'real_date', 'values']) + 'float': [1.1, 1.2], + 'int': [1, 2]}, + columns=['date', 'real_date', 'float', 'int']) with ensure_clean_store(self.path) as store: store.append('test', df, format='table', data_columns=True) @@ -5095,12 +5096,34 @@ def test_query_compare_string_column(self): expected = df.loc[[1], :] tm.assert_frame_equal(expected, result) - for v in [2.1, True, pd.Timestamp('2014-01-01')]: - for op in ['<', '>', '==']: + for op in ['<', '>', '==']: + # non strings to string column always fail + for v in [2.1, True, pd.Timestamp('2014-01-01'), + pd.Timedelta(1, 's')]: query = 'date {op} v'.format(op=op) with tm.assertRaises(TypeError): result = store.select('test', where=query) + # strings to other columns must be convertible to type + v = 'a' + for col in ['int', 'float', 'real_date']: + query = '{col} {op} v'.format(op=op, col=col) + with tm.assertRaises(ValueError): + result = store.select('test', where=query) + + for v, col in zip(['1', '1.1', '2014-01-01'], + ['int', 'float', 'real_date']): + query = '{col} {op} v'.format(op=op, col=col) + result = store.select('test', where=query) + + if op == '==': + expected = df.loc[[0], :] + elif op == '>': + expected = df.loc[[1], :] + else: + expected = df.loc[[], :] + tm.assert_frame_equal(expected, result) + class TestHDFComplexValues(Base): # GH10447 From 8288dcaa62cfcb2bc827e1c7b49589bca77a1bdc Mon Sep 17 00:00:00 2001 From: Chris Date: Fri, 3 Mar 2017 06:52:43 -0600 Subject: [PATCH 5/5] lint --- pandas/computation/pytables.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/computation/pytables.py b/pandas/computation/pytables.py index 3c2fdd7089da7..7c09ca8d38773 100644 --- a/pandas/computation/pytables.py +++ b/pandas/computation/pytables.py @@ -1,7 +1,6 @@ """ manage PyTables query interface via Expressions """ import ast -import time import warnings from functools import partial from datetime import datetime, timedelta