Skip to content

Commit 1253657

Browse files
authored
Merge pull request #10664 from hvitved/type-tracking-more-caching
Ruby/Python: Cache more type tracking predicates
2 parents 2593120 + e57c3be commit 1253657

File tree

2 files changed

+164
-152
lines changed

2 files changed

+164
-152
lines changed

python/ql/lib/semmle/python/dataflow/new/internal/TypeTracker.qll

Lines changed: 82 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,32 @@ private module Cached {
1616
LoadStep(TypeTrackerContent content) { basicLoadStep(_, _, content) } or
1717
JumpStep()
1818

19+
cached
20+
newtype TTypeTracker =
21+
MkTypeTracker(Boolean hasCall, OptionalTypeTrackerContent content) {
22+
content = noContent()
23+
or
24+
// Restrict `content` to those that might eventually match a load.
25+
// We can't rely on `basicStoreStep` since `startInContent` might be used with
26+
// a content that has no corresponding store.
27+
exists(TypeTrackerContent loadContents |
28+
basicLoadStep(_, _, loadContents) and
29+
compatibleContents(content, loadContents)
30+
)
31+
}
32+
33+
cached
34+
newtype TTypeBackTracker =
35+
MkTypeBackTracker(Boolean hasReturn, OptionalTypeTrackerContent content) {
36+
content = noContent()
37+
or
38+
// As in MkTypeTracker, restrict `content` to those that might eventually match a store.
39+
exists(TypeTrackerContent storeContent |
40+
basicStoreStep(_, _, storeContent) and
41+
compatibleContents(storeContent, content)
42+
)
43+
}
44+
1945
pragma[nomagic]
2046
private TypeTracker noContentTypeTracker(boolean hasCall) {
2147
result = MkTypeTracker(hasCall, noContent())
@@ -105,10 +131,65 @@ private module Cached {
105131
predicate stepCall(TypeTrackingNode nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
106132
exists(Node mid | nodeFrom.flowsTo(mid) and smallstepCall(mid, nodeTo, summary))
107133
}
134+
135+
cached
136+
predicate smallstepNoCall(Node nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
137+
jumpStep(nodeFrom, nodeTo) and
138+
summary = JumpStep()
139+
or
140+
levelStep(nodeFrom, nodeTo) and
141+
summary = LevelStep()
142+
or
143+
exists(TypeTrackerContent content |
144+
flowsToStoreStep(nodeFrom, nodeTo, content) and
145+
summary = StoreStep(content)
146+
or
147+
basicLoadStep(nodeFrom, nodeTo, content) and summary = LoadStep(content)
148+
)
149+
}
150+
151+
cached
152+
predicate smallstepCall(Node nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
153+
callStep(nodeFrom, nodeTo) and summary = CallStep()
154+
or
155+
returnStep(nodeFrom, nodeTo) and
156+
summary = ReturnStep()
157+
}
108158
}
109159

110160
private import Cached
111161

162+
/**
163+
* Holds if `nodeFrom` is being written to the `content` of the object in `nodeTo`.
164+
*
165+
* Note that `nodeTo` will always be a local source node that flows to the place where the content
166+
* is written in `basicStoreStep`. This may lead to the flow of information going "back in time"
167+
* from the point of view of the execution of the program.
168+
*
169+
* For instance, if we interpret attribute writes in Python as writing to content with the same
170+
* name as the attribute and consider the following snippet
171+
*
172+
* ```python
173+
* def foo(y):
174+
* x = Foo()
175+
* bar(x)
176+
* x.attr = y
177+
* baz(x)
178+
*
179+
* def bar(x):
180+
* z = x.attr
181+
* ```
182+
* for the attribute write `x.attr = y`, we will have `content` being the literal string `"attr"`,
183+
* `nodeFrom` will be `y`, and `nodeTo` will be the object `Foo()` created on the first line of the
184+
* function. This means we will track the fact that `x.attr` can have the type of `y` into the
185+
* assignment to `z` inside `bar`, even though this attribute write happens _after_ `bar` is called.
186+
*/
187+
private predicate flowsToStoreStep(
188+
Node nodeFrom, TypeTrackingNode nodeTo, TypeTrackerContent content
189+
) {
190+
exists(Node obj | nodeTo.flowsTo(obj) and basicStoreStep(nodeFrom, obj, content))
191+
}
192+
112193
/**
113194
* INTERNAL: Use `TypeTracker` or `TypeBackTracker` instead.
114195
*
@@ -131,30 +212,6 @@ class StepSummary extends TStepSummary {
131212
}
132213
}
133214

134-
pragma[noinline]
135-
private predicate smallstepNoCall(Node nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
136-
jumpStep(nodeFrom, nodeTo) and
137-
summary = JumpStep()
138-
or
139-
levelStep(nodeFrom, nodeTo) and
140-
summary = LevelStep()
141-
or
142-
exists(TypeTrackerContent content |
143-
StepSummary::localSourceStoreStep(nodeFrom, nodeTo, content) and
144-
summary = StoreStep(content)
145-
or
146-
basicLoadStep(nodeFrom, nodeTo, content) and summary = LoadStep(content)
147-
)
148-
}
149-
150-
pragma[noinline]
151-
private predicate smallstepCall(Node nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
152-
callStep(nodeFrom, nodeTo) and summary = CallStep()
153-
or
154-
returnStep(nodeFrom, nodeTo) and
155-
summary = ReturnStep()
156-
}
157-
158215
/** Provides predicates for updating step summaries (`StepSummary`s). */
159216
module StepSummary {
160217
/**
@@ -188,49 +245,9 @@ module StepSummary {
188245
smallstepCall(nodeFrom, nodeTo, summary)
189246
}
190247

191-
/**
192-
* Holds if `nodeFrom` is being written to the `content` of the object in `nodeTo`.
193-
*
194-
* Note that `nodeTo` will always be a local source node that flows to the place where the content
195-
* is written in `basicStoreStep`. This may lead to the flow of information going "back in time"
196-
* from the point of view of the execution of the program.
197-
*
198-
* For instance, if we interpret attribute writes in Python as writing to content with the same
199-
* name as the attribute and consider the following snippet
200-
*
201-
* ```python
202-
* def foo(y):
203-
* x = Foo()
204-
* bar(x)
205-
* x.attr = y
206-
* baz(x)
207-
*
208-
* def bar(x):
209-
* z = x.attr
210-
* ```
211-
* for the attribute write `x.attr = y`, we will have `content` being the literal string `"attr"`,
212-
* `nodeFrom` will be `y`, and `nodeTo` will be the object `Foo()` created on the first line of the
213-
* function. This means we will track the fact that `x.attr` can have the type of `y` into the
214-
* assignment to `z` inside `bar`, even though this attribute write happens _after_ `bar` is called.
215-
*/
216-
predicate localSourceStoreStep(Node nodeFrom, TypeTrackingNode nodeTo, TypeTrackerContent content) {
217-
exists(Node obj | nodeTo.flowsTo(obj) and basicStoreStep(nodeFrom, obj, content))
218-
}
248+
deprecated predicate localSourceStoreStep = flowsToStoreStep/3;
219249
}
220250

221-
private newtype TTypeTracker =
222-
MkTypeTracker(Boolean hasCall, OptionalTypeTrackerContent content) {
223-
content = noContent()
224-
or
225-
// Restrict `content` to those that might eventually match a load.
226-
// We can't rely on `basicStoreStep` since `startInContent` might be used with
227-
// a content that has no corresponding store.
228-
exists(TypeTrackerContent loadContents |
229-
basicLoadStep(_, _, loadContents) and
230-
compatibleContents(content, loadContents)
231-
)
232-
}
233-
234251
/**
235252
* A summary of the steps needed to track a value to a given dataflow node.
236253
*
@@ -382,17 +399,6 @@ module TypeTracker {
382399
TypeTracker end() { result.end() }
383400
}
384401

385-
private newtype TTypeBackTracker =
386-
MkTypeBackTracker(Boolean hasReturn, OptionalTypeTrackerContent content) {
387-
content = noContent()
388-
or
389-
// As in MkTypeTracker, restrict `content` to those that might eventually match a store.
390-
exists(TypeTrackerContent storeContent |
391-
basicStoreStep(_, _, storeContent) and
392-
compatibleContents(storeContent, content)
393-
)
394-
}
395-
396402
/**
397403
* A summary of the steps needed to back-track a use of a value to a given dataflow node.
398404
*

ruby/ql/lib/codeql/ruby/typetracking/TypeTracker.qll

Lines changed: 82 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,32 @@ private module Cached {
1616
LoadStep(TypeTrackerContent content) { basicLoadStep(_, _, content) } or
1717
JumpStep()
1818

19+
cached
20+
newtype TTypeTracker =
21+
MkTypeTracker(Boolean hasCall, OptionalTypeTrackerContent content) {
22+
content = noContent()
23+
or
24+
// Restrict `content` to those that might eventually match a load.
25+
// We can't rely on `basicStoreStep` since `startInContent` might be used with
26+
// a content that has no corresponding store.
27+
exists(TypeTrackerContent loadContents |
28+
basicLoadStep(_, _, loadContents) and
29+
compatibleContents(content, loadContents)
30+
)
31+
}
32+
33+
cached
34+
newtype TTypeBackTracker =
35+
MkTypeBackTracker(Boolean hasReturn, OptionalTypeTrackerContent content) {
36+
content = noContent()
37+
or
38+
// As in MkTypeTracker, restrict `content` to those that might eventually match a store.
39+
exists(TypeTrackerContent storeContent |
40+
basicStoreStep(_, _, storeContent) and
41+
compatibleContents(storeContent, content)
42+
)
43+
}
44+
1945
pragma[nomagic]
2046
private TypeTracker noContentTypeTracker(boolean hasCall) {
2147
result = MkTypeTracker(hasCall, noContent())
@@ -105,10 +131,65 @@ private module Cached {
105131
predicate stepCall(TypeTrackingNode nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
106132
exists(Node mid | nodeFrom.flowsTo(mid) and smallstepCall(mid, nodeTo, summary))
107133
}
134+
135+
cached
136+
predicate smallstepNoCall(Node nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
137+
jumpStep(nodeFrom, nodeTo) and
138+
summary = JumpStep()
139+
or
140+
levelStep(nodeFrom, nodeTo) and
141+
summary = LevelStep()
142+
or
143+
exists(TypeTrackerContent content |
144+
flowsToStoreStep(nodeFrom, nodeTo, content) and
145+
summary = StoreStep(content)
146+
or
147+
basicLoadStep(nodeFrom, nodeTo, content) and summary = LoadStep(content)
148+
)
149+
}
150+
151+
cached
152+
predicate smallstepCall(Node nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
153+
callStep(nodeFrom, nodeTo) and summary = CallStep()
154+
or
155+
returnStep(nodeFrom, nodeTo) and
156+
summary = ReturnStep()
157+
}
108158
}
109159

110160
private import Cached
111161

162+
/**
163+
* Holds if `nodeFrom` is being written to the `content` of the object in `nodeTo`.
164+
*
165+
* Note that `nodeTo` will always be a local source node that flows to the place where the content
166+
* is written in `basicStoreStep`. This may lead to the flow of information going "back in time"
167+
* from the point of view of the execution of the program.
168+
*
169+
* For instance, if we interpret attribute writes in Python as writing to content with the same
170+
* name as the attribute and consider the following snippet
171+
*
172+
* ```python
173+
* def foo(y):
174+
* x = Foo()
175+
* bar(x)
176+
* x.attr = y
177+
* baz(x)
178+
*
179+
* def bar(x):
180+
* z = x.attr
181+
* ```
182+
* for the attribute write `x.attr = y`, we will have `content` being the literal string `"attr"`,
183+
* `nodeFrom` will be `y`, and `nodeTo` will be the object `Foo()` created on the first line of the
184+
* function. This means we will track the fact that `x.attr` can have the type of `y` into the
185+
* assignment to `z` inside `bar`, even though this attribute write happens _after_ `bar` is called.
186+
*/
187+
private predicate flowsToStoreStep(
188+
Node nodeFrom, TypeTrackingNode nodeTo, TypeTrackerContent content
189+
) {
190+
exists(Node obj | nodeTo.flowsTo(obj) and basicStoreStep(nodeFrom, obj, content))
191+
}
192+
112193
/**
113194
* INTERNAL: Use `TypeTracker` or `TypeBackTracker` instead.
114195
*
@@ -131,30 +212,6 @@ class StepSummary extends TStepSummary {
131212
}
132213
}
133214

134-
pragma[noinline]
135-
private predicate smallstepNoCall(Node nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
136-
jumpStep(nodeFrom, nodeTo) and
137-
summary = JumpStep()
138-
or
139-
levelStep(nodeFrom, nodeTo) and
140-
summary = LevelStep()
141-
or
142-
exists(TypeTrackerContent content |
143-
StepSummary::localSourceStoreStep(nodeFrom, nodeTo, content) and
144-
summary = StoreStep(content)
145-
or
146-
basicLoadStep(nodeFrom, nodeTo, content) and summary = LoadStep(content)
147-
)
148-
}
149-
150-
pragma[noinline]
151-
private predicate smallstepCall(Node nodeFrom, TypeTrackingNode nodeTo, StepSummary summary) {
152-
callStep(nodeFrom, nodeTo) and summary = CallStep()
153-
or
154-
returnStep(nodeFrom, nodeTo) and
155-
summary = ReturnStep()
156-
}
157-
158215
/** Provides predicates for updating step summaries (`StepSummary`s). */
159216
module StepSummary {
160217
/**
@@ -188,49 +245,9 @@ module StepSummary {
188245
smallstepCall(nodeFrom, nodeTo, summary)
189246
}
190247

191-
/**
192-
* Holds if `nodeFrom` is being written to the `content` of the object in `nodeTo`.
193-
*
194-
* Note that `nodeTo` will always be a local source node that flows to the place where the content
195-
* is written in `basicStoreStep`. This may lead to the flow of information going "back in time"
196-
* from the point of view of the execution of the program.
197-
*
198-
* For instance, if we interpret attribute writes in Python as writing to content with the same
199-
* name as the attribute and consider the following snippet
200-
*
201-
* ```python
202-
* def foo(y):
203-
* x = Foo()
204-
* bar(x)
205-
* x.attr = y
206-
* baz(x)
207-
*
208-
* def bar(x):
209-
* z = x.attr
210-
* ```
211-
* for the attribute write `x.attr = y`, we will have `content` being the literal string `"attr"`,
212-
* `nodeFrom` will be `y`, and `nodeTo` will be the object `Foo()` created on the first line of the
213-
* function. This means we will track the fact that `x.attr` can have the type of `y` into the
214-
* assignment to `z` inside `bar`, even though this attribute write happens _after_ `bar` is called.
215-
*/
216-
predicate localSourceStoreStep(Node nodeFrom, TypeTrackingNode nodeTo, TypeTrackerContent content) {
217-
exists(Node obj | nodeTo.flowsTo(obj) and basicStoreStep(nodeFrom, obj, content))
218-
}
248+
deprecated predicate localSourceStoreStep = flowsToStoreStep/3;
219249
}
220250

221-
private newtype TTypeTracker =
222-
MkTypeTracker(Boolean hasCall, OptionalTypeTrackerContent content) {
223-
content = noContent()
224-
or
225-
// Restrict `content` to those that might eventually match a load.
226-
// We can't rely on `basicStoreStep` since `startInContent` might be used with
227-
// a content that has no corresponding store.
228-
exists(TypeTrackerContent loadContents |
229-
basicLoadStep(_, _, loadContents) and
230-
compatibleContents(content, loadContents)
231-
)
232-
}
233-
234251
/**
235252
* A summary of the steps needed to track a value to a given dataflow node.
236253
*
@@ -382,17 +399,6 @@ module TypeTracker {
382399
TypeTracker end() { result.end() }
383400
}
384401

385-
private newtype TTypeBackTracker =
386-
MkTypeBackTracker(Boolean hasReturn, OptionalTypeTrackerContent content) {
387-
content = noContent()
388-
or
389-
// As in MkTypeTracker, restrict `content` to those that might eventually match a store.
390-
exists(TypeTrackerContent storeContent |
391-
basicStoreStep(_, _, storeContent) and
392-
compatibleContents(storeContent, content)
393-
)
394-
}
395-
396402
/**
397403
* A summary of the steps needed to back-track a use of a value to a given dataflow node.
398404
*

0 commit comments

Comments
 (0)