@@ -261,17 +261,22 @@ def __str__(self):
261
261
return 'Immediate(%d)' % (self .node .value ,)
262
262
263
263
264
- _forbidden_re = re .compile ('[\;[\:]|__|\.[abcdefghjklmnopqstuvwxyzA-Z_]' )
265
- def stringToExpression (s , types , context ):
264
+ _flow_pat = r'[\;\[\:]'
265
+ _dunder_pat = r'__[\w]+__'
266
+ _attr_pat = r'\.\b(?!(real|imag|\d+)\b)'
267
+ _blacklist_re = re .compile (f'{ _flow_pat } |{ _dunder_pat } |{ _attr_pat } ' )
268
+
269
+ def stringToExpression (s , types , context , sanitize : bool ):
266
270
"""Given a string, convert it to a tree of ExpressionNode's.
267
271
"""
268
272
# sanitize the string for obvious attack vectors that NumExpr cannot
269
273
# parse into its homebrew AST. This is to protect the call to `eval` below.
270
274
# We forbid `;`, `:`. `[` and `__`, and attribute access via '.'.
271
275
# We cannot ban `.real` or `.imag` however...
272
- no_whitespace = re .sub (r'\s+' , '' , s )
273
- if _forbidden_re .search (no_whitespace ) is not None :
274
- raise ValueError (f'Expression { s } has forbidden control characters.' )
276
+ if sanitize :
277
+ no_whitespace = re .sub (r'\s+' , '' , s )
278
+ if _blacklist_re .search (no_whitespace ) is not None :
279
+ raise ValueError (f'Expression { s } has forbidden control characters.' )
275
280
276
281
old_ctx = expressions ._context .get_current_context ()
277
282
try :
@@ -558,15 +563,15 @@ def getContext(kwargs, _frame_depth=1):
558
563
return context
559
564
560
565
561
- def precompile (ex , signature = (), context = {}):
566
+ def precompile (ex , signature = (), context = {}, sanitize : bool = True ):
562
567
"""
563
568
Compile the expression to an intermediate form.
564
569
"""
565
570
types = dict (signature )
566
571
input_order = [name for (name , type_ ) in signature ]
567
572
568
573
if isinstance (ex , str ):
569
- ex = stringToExpression (ex , types , context )
574
+ ex = stringToExpression (ex , types , context , sanitize )
570
575
571
576
# the AST is like the expression, but the node objects don't have
572
577
# any odd interpretations
@@ -612,7 +617,7 @@ def precompile(ex, signature=(), context={}):
612
617
return threeAddrProgram , signature , tempsig , constants , input_names
613
618
614
619
615
- def NumExpr (ex , signature = (), ** kwargs ):
620
+ def NumExpr (ex , signature = (), sanitize : bool = True , ** kwargs ):
616
621
"""
617
622
Compile an expression built using E.<variable> variables to a function.
618
623
@@ -629,7 +634,7 @@ def NumExpr(ex, signature=(), **kwargs):
629
634
# translated to either True or False).
630
635
_frame_depth = 1
631
636
context = getContext (kwargs , _frame_depth = _frame_depth )
632
- threeAddrProgram , inputsig , tempsig , constants , input_names = precompile (ex , signature , context )
637
+ threeAddrProgram , inputsig , tempsig , constants , input_names = precompile (ex , signature , context , sanitize = sanitize )
633
638
program = compileThreeAddrForm (threeAddrProgram )
634
639
return interpreter .NumExpr (inputsig .encode ('ascii' ),
635
640
tempsig .encode ('ascii' ),
@@ -710,8 +715,8 @@ def getType(a):
710
715
raise ValueError ("unknown type %s" % a .dtype .name )
711
716
712
717
713
- def getExprNames (text , context ):
714
- ex = stringToExpression (text , {}, context )
718
+ def getExprNames (text , context , sanitize : bool = True ):
719
+ ex = stringToExpression (text , {}, context , sanitize )
715
720
ast = expressionToAST (ex )
716
721
input_order = getInputOrder (ast , None )
717
722
#try to figure out if vml operations are used by expression
@@ -779,6 +784,7 @@ def validate(ex: str,
779
784
order : str = 'K' ,
780
785
casting : str = 'safe' ,
781
786
_frame_depth : int = 2 ,
787
+ sanitize : bool = True ,
782
788
** kwargs ) -> Optional [Exception ]:
783
789
"""
784
790
Validate a NumExpr expression with the given `local_dict` or `locals()`.
@@ -826,16 +832,19 @@ def validate(ex: str,
826
832
like float64 to float32, are allowed.
827
833
* 'unsafe' means any data conversions may be done.
828
834
835
+ sanitize: bool
836
+ Both `validate` and by extension `evaluate` call `eval(ex)`, which is
837
+ potentially dangerous on unsanitized inputs. As such, NumExpr by default
838
+ performs simple sanitization, banning the character ':;[', the
839
+ dunder '__[\w+]__', and attribute access to all but '.real' and '.imag'.
840
+
829
841
_frame_depth: int
830
842
The calling frame depth. Unless you are a NumExpr developer you should
831
843
not set this value.
832
844
833
845
Note
834
846
----
835
- Both `validate` and by extension `evaluate` call `eval(ex)`, which is
836
- potentially dangerous on unsanitized inputs. As such, NumExpr does some
837
- sanitization, banning the character ':;[', the dunder '__', and attribute
838
- access to all but '.r' for real and '.i' for imag access to complex numbers.
847
+
839
848
"""
840
849
global _numexpr_last
841
850
@@ -848,7 +857,7 @@ def validate(ex: str,
848
857
context = getContext (kwargs )
849
858
expr_key = (ex , tuple (sorted (context .items ())))
850
859
if expr_key not in _names_cache :
851
- _names_cache [expr_key ] = getExprNames (ex , context )
860
+ _names_cache [expr_key ] = getExprNames (ex , context , sanitize = sanitize )
852
861
names , ex_uses_vml = _names_cache [expr_key ]
853
862
arguments = getArguments (names , local_dict , global_dict , _frame_depth = _frame_depth )
854
863
@@ -861,7 +870,7 @@ def validate(ex: str,
861
870
try :
862
871
compiled_ex = _numexpr_cache [numexpr_key ]
863
872
except KeyError :
864
- compiled_ex = _numexpr_cache [numexpr_key ] = NumExpr (ex , signature , ** context )
873
+ compiled_ex = _numexpr_cache [numexpr_key ] = NumExpr (ex , signature , sanitize = sanitize , ** context )
865
874
kwargs = {'out' : out , 'order' : order , 'casting' : casting ,
866
875
'ex_uses_vml' : ex_uses_vml }
867
876
_numexpr_last = dict (ex = compiled_ex , argnames = names , kwargs = kwargs )
@@ -875,6 +884,7 @@ def evaluate(ex: str,
875
884
out : numpy .ndarray = None ,
876
885
order : str = 'K' ,
877
886
casting : str = 'safe' ,
887
+ sanitize : bool = True ,
878
888
_frame_depth : int = 3 ,
879
889
** kwargs ) -> numpy .ndarray :
880
890
"""
@@ -920,6 +930,12 @@ def evaluate(ex: str,
920
930
like float64 to float32, are allowed.
921
931
* 'unsafe' means any data conversions may be done.
922
932
933
+ sanitize: bool
934
+ Both `validate` and by extension `evaluate` call `eval(ex)`, which is
935
+ potentially dangerous on unsanitized inputs. As such, NumExpr by default
936
+ performs simple sanitization, banning the character ':;[', the
937
+ dunder '__[\w+]__', and attribute access to all but '.real' and '.imag'.
938
+
923
939
_frame_depth: int
924
940
The calling frame depth. Unless you are a NumExpr developer you should
925
941
not set this value.
@@ -936,7 +952,7 @@ def evaluate(ex: str,
936
952
# `getArguments`
937
953
e = validate (ex , local_dict = local_dict , global_dict = global_dict ,
938
954
out = out , order = order , casting = casting ,
939
- _frame_depth = _frame_depth , ** kwargs )
955
+ _frame_depth = _frame_depth , sanitize = sanitize , ** kwargs )
940
956
if e is None :
941
957
return re_evaluate (local_dict = local_dict , _frame_depth = _frame_depth )
942
958
else :
0 commit comments