Skip to content

TST/API/BUG: resolve scoping issues in pytables query where rhs is a compound selection or scoped variable #5566

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 21, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 16 additions & 12 deletions pandas/computation/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,10 @@ def __init__(self, gbls=None, lcls=None, level=1, resolvers=None,
self.globals['True'] = True
self.globals['False'] = False

# function defs
self.globals['list'] = list
self.globals['tuple'] = tuple

res_keys = (list(o.keys()) for o in self.resolvers)
self.resolver_keys = frozenset(reduce(operator.add, res_keys, []))
self._global_resolvers = self.resolvers + (self.locals, self.globals)
Expand Down Expand Up @@ -505,21 +509,21 @@ def _possibly_evaluate_binop(self, op, op_class, lhs, rhs,
maybe_eval_in_python=('==', '!=')):
res = op(lhs, rhs)

if (res.op in _cmp_ops_syms and lhs.is_datetime or rhs.is_datetime and
self.engine != 'pytables'):
# all date ops must be done in python bc numexpr doesn't work well
# with NaT
return self._possibly_eval(res, self.binary_ops)
if self.engine != 'pytables':
if (res.op in _cmp_ops_syms and getattr(lhs,'is_datetime',False) or getattr(rhs,'is_datetime',False)):
# all date ops must be done in python bc numexpr doesn't work well
# with NaT
return self._possibly_eval(res, self.binary_ops)

if res.op in eval_in_python:
# "in"/"not in" ops are always evaluated in python
return self._possibly_eval(res, eval_in_python)
elif (lhs.return_type == object or rhs.return_type == object and
self.engine != 'pytables'):
# evaluate "==" and "!=" in python if either of our operands has an
# object return type
return self._possibly_eval(res, eval_in_python +
maybe_eval_in_python)
elif self.engine != 'pytables':
if (getattr(lhs,'return_type',None) == object or getattr(rhs,'return_type',None) == object):
# evaluate "==" and "!=" in python if either of our operands has an
# object return type
return self._possibly_eval(res, eval_in_python +
maybe_eval_in_python)
return res

def visit_BinOp(self, node, **kwargs):
Expand Down Expand Up @@ -635,7 +639,7 @@ def visit_Attribute(self, node, **kwargs):

raise ValueError("Invalid Attribute context {0}".format(ctx.__name__))

def visit_Call(self, node, **kwargs):
def visit_Call(self, node, side=None, **kwargs):

# this can happen with: datetime.datetime
if isinstance(node.func, ast.Attribute):
Expand Down
18 changes: 16 additions & 2 deletions pandas/computation/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,8 +401,15 @@ def visit_Assign(self, node, **kwargs):
return self.visit(cmpr)

def visit_Subscript(self, node, **kwargs):
# only allow simple suscripts

value = self.visit(node.value)
slobj = self.visit(node.slice)
try:
value = value.value
except:
pass

try:
return self.const_type(value[slobj], self.env)
except TypeError:
Expand All @@ -416,9 +423,16 @@ def visit_Attribute(self, node, **kwargs):
ctx = node.ctx.__class__
if ctx == ast.Load:
# resolve the value
resolved = self.visit(value).value
resolved = self.visit(value)

# try to get the value to see if we are another expression
try:
resolved = resolved.value
except (AttributeError):
pass

try:
return getattr(resolved, attr)
return self.term_type(getattr(resolved, attr), self.env)
except AttributeError:

# something like datetime.datetime where scope is overriden
Expand Down
4 changes: 4 additions & 0 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,10 @@ def read_hdf(path_or_buf, key, **kwargs):

"""

# grab the scope
if 'where' in kwargs:
kwargs['where'] = _ensure_term(kwargs['where'])

f = lambda store, auto_close: store.select(
key, auto_close=auto_close, **kwargs)

Expand Down
79 changes: 74 additions & 5 deletions pandas/io/tests/test_pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,14 +81,19 @@ def ensure_clean_store(path, mode='a', complevel=None, complib=None,
def ensure_clean_path(path):
"""
return essentially a named temporary file that is not opened
and deleted on existing
and deleted on existing; if path is a list, then create and
return list of filenames
"""

try:
filename = create_tempfile(path)
yield filename
if isinstance(path, list):
filenames = [ create_tempfile(p) for p in path ]
yield filenames
else:
filenames = [ create_tempfile(path) ]
yield filenames[0]
finally:
safe_remove(filename)
for f in filenames:
safe_remove(f)

# set these parameters so we don't have file sharing
tables.parameters.MAX_NUMEXPR_THREADS = 1
Expand Down Expand Up @@ -3124,6 +3129,70 @@ def test_frame_select_complex(self):
expected = df.loc[df.index>df.index[3]].reindex(columns=['A','B'])
tm.assert_frame_equal(result, expected)

def test_frame_select_complex2(self):

with ensure_clean_path(['parms.hdf','hist.hdf']) as paths:

pp, hh = paths

# use non-trivial selection criteria
parms = DataFrame({ 'A' : [1,1,2,2,3] })
parms.to_hdf(pp,'df',mode='w',format='table',data_columns=['A'])

selection = read_hdf(pp,'df',where='A=[2,3]')
hist = DataFrame(np.random.randn(25,1),columns=['data'],
index=MultiIndex.from_tuples([ (i,j) for i in range(5) for j in range(5) ],
names=['l1','l2']))

hist.to_hdf(hh,'df',mode='w',format='table')

expected = read_hdf(hh,'df',where=Term('l1','=',[2,3,4]))

# list like
result = read_hdf(hh,'df',where=Term('l1','=',selection.index.tolist()))
assert_frame_equal(result, expected)
l = selection.index.tolist()

# sccope with list like
store = HDFStore(hh)
result = store.select('df',where='l1=l')
assert_frame_equal(result, expected)
store.close()

result = read_hdf(hh,'df',where='l1=l')
assert_frame_equal(result, expected)

# index
index = selection.index
result = read_hdf(hh,'df',where='l1=index')
assert_frame_equal(result, expected)

result = read_hdf(hh,'df',where='l1=selection.index')
assert_frame_equal(result, expected)

result = read_hdf(hh,'df',where='l1=selection.index.tolist()')
assert_frame_equal(result, expected)

result = read_hdf(hh,'df',where='l1=list(selection.index)')
assert_frame_equal(result, expected)

# sccope with index
store = HDFStore(hh)

result = store.select('df',where='l1=index')
assert_frame_equal(result, expected)

result = store.select('df',where='l1=selection.index')
assert_frame_equal(result, expected)

result = store.select('df',where='l1=selection.index.tolist()')
assert_frame_equal(result, expected)

result = store.select('df',where='l1=list(selection.index)')
assert_frame_equal(result, expected)

store.close()

def test_invalid_filtering(self):

# can't use more than one filter (atm)
Expand Down