@@ -19,48 +19,154 @@ private import codeql.ruby.dataflow.internal.DataFlowDispatch as DataFlowDispatc
19
19
*/
20
20
module API {
21
21
/**
22
- * An abstract representation of a definition or use of an API component such as a Ruby module,
23
- * or the result of a method call.
22
+ * A node in the API graph, representing a value that has crossed the boundary between this
23
+ * codebase and an external library (or in general, any external codebase).
24
+ *
25
+ * ### Basic usage
26
+ *
27
+ * API graphs are typically used to identify "API calls", that is, calls to an external function
28
+ * whose implementation is not necessarily part of the current codebase.
29
+ *
30
+ * The most basic use of API graphs is typically as follows:
31
+ * 1. Start with `API::getTopLevelMember` for the relevant library.
32
+ * 2. Follow up with a chain of accessors such as `getMethod` describing how to get to the relevant API function.
33
+ * 3. Map the resulting API graph nodes to data-flow nodes, using `asSource` or `asSink`.
34
+ *
35
+ * For example, a simplified way to get arguments to `Foo.bar` would be
36
+ * ```codeql
37
+ * API::getTopLevelMember("Foo").getMethod("bar").getParameter(0).asSink()
38
+ * ```
39
+ *
40
+ * The most commonly used accessors are `getMember`, `getMethod`, `getParameter`, and `getReturn`.
41
+ *
42
+ * ### API graph nodes
43
+ *
44
+ * There are two kinds of nodes in the API graphs, distinguished by who is "holding" the value:
45
+ * - **Use-nodes** represent values held by the current codebase, which came from an external library.
46
+ * (The current codebase is "using" a value that came from the library).
47
+ * - **Def-nodes** represent values held by the external library, which came from this codebase.
48
+ * (The current codebase "defines" the value seen by the library).
49
+ *
50
+ * API graph nodes are associated with data-flow nodes in the current codebase.
51
+ * (Since external libraries are not part of the database, there is no way to associate with concrete
52
+ * data-flow nodes from the external library).
53
+ * - **Use-nodes** are associated with data-flow nodes where a value enters the current codebase,
54
+ * such as the return value of a call to an external function.
55
+ * - **Def-nodes** are associated with data-flow nodes where a value leaves the current codebase,
56
+ * such as an argument passed in a call to an external function.
57
+ *
58
+ *
59
+ * ### Access paths and edge labels
60
+ *
61
+ * Nodes in the API graph are associated with a set of access paths, describing a series of operations
62
+ * that may be performed to obtain that value.
63
+ *
64
+ * For example, the access path `API::getTopLevelMember("Foo").getMethod("bar")` represents the action of
65
+ * reading the top-level constant `Foo` and then accessing the method `bar` on the resulting object.
66
+ * It would be associated with a call such as `Foo.bar()`.
67
+ *
68
+ * Each edge in the graph is labelled by such an "operation". For an edge `A->B`, the type of the `A` node
69
+ * determines who is performing the operation, and the type of the `B` node determines who ends up holding
70
+ * the result:
71
+ * - An edge starting from a use-node describes what the current codebase is doing to a value that
72
+ * came from a library.
73
+ * - An edge starting from a def-node describes what the external library might do to a value that
74
+ * came from the current codebase.
75
+ * - An edge ending in a use-node means the result ends up in the current codebase (at its associated data-flow node).
76
+ * - An edge ending in a def-node means the result ends up in external code (its associated data-flow node is
77
+ * the place where it was "last seen" in the current codebase before flowing out)
78
+ *
79
+ * Because the implementation of the external library is not visible, it is not known exactly what operations
80
+ * it will perform on values that flow there. Instead, the edges starting from a def-node are operations that would
81
+ * lead to an observable effect within the current codebase; without knowing for certain if the library will actually perform
82
+ * those operations. (When constructing these edges, we assume the library is somewhat well-behaved).
83
+ *
84
+ * For example, given this snippet:
85
+ * ```ruby
86
+ * Foo.bar(->(x) { doSomething(x) })
87
+ * ```
88
+ * A callback is passed to the external function `Foo.bar`. We can't know if `Foo.bar` will actually invoke this callback.
89
+ * But _if_ the library should decide to invoke the callback, then a value will flow into the current codebase via the `x` parameter.
90
+ * For that reason, an edge is generated representing the argument-passing operation that might be performed by `Foo.bar`.
91
+ * This edge is going from the def-node associated with the callback to the use-node associated with the parameter `x` of the lambda.
24
92
*/
25
93
class Node extends Impl:: TApiNode {
26
94
/**
27
- * Gets a data-flow node corresponding to a use of the API component represented by this node .
95
+ * Gets a data-flow node where this value may flow after entering the current codebase .
28
96
*
29
- * For example, `Kernel.format "%s world!", "Hello"` is a use of the return of the `format` function of
30
- * the `Kernel` module.
31
- *
32
- * This includes indirect uses found via data flow.
97
+ * This is similar to `asSource()` but additionally includes nodes that are transitively reachable by data flow.
98
+ * See `asSource()` for examples.
33
99
*/
34
- DataFlow:: Node getAUse ( ) {
100
+ DataFlow:: Node getAValueReachableFromSource ( ) {
35
101
exists ( DataFlow:: LocalSourceNode src | Impl:: use ( this , src ) |
36
102
Impl:: trackUseNode ( src ) .flowsTo ( result )
37
103
)
38
104
}
39
105
40
106
/**
41
- * Gets an immediate use of the API component represented by this node.
107
+ * Gets a data-flow node where this value enters the current codebase.
108
+ *
109
+ * For example:
110
+ * ```ruby
111
+ * # API::getTopLevelMember("Foo").asSource()
112
+ * Foo
42
113
*
43
- * Unlike `getAUse()`, this predicate only gets the immediate references, not the indirect uses
44
- * found via data flow.
114
+ * # API::getTopLevelMember("Foo").getMethod("bar").getReturn().asSource()
115
+ * Foo.bar
116
+ *
117
+ * # 'x' is found by:
118
+ * # API::getTopLevelMember("Foo").getMethod("bar").getBlock().getParameter(0).asSource()
119
+ * Foo.bar do |x|
120
+ * end
121
+ * ```
45
122
*/
46
- DataFlow:: LocalSourceNode getAnImmediateUse ( ) { Impl:: use ( this , result ) }
123
+ DataFlow:: LocalSourceNode asSource ( ) { Impl:: use ( this , result ) }
47
124
48
125
/**
49
- * Gets a data-flow node corresponding the value flowing into this API component.
126
+ * Gets a data-flow node where this value leaves the current codebase and flows into an
127
+ * external library (or in general, any external codebase).
128
+ *
129
+ * Concretely, this corresponds to an argument passed to a call to external code.
130
+ *
131
+ * For example:
132
+ * ```ruby
133
+ * # 'x' is found by:
134
+ * # API::getTopLevelMember("Foo").getMethod("bar").getParameter(0).asSink()
135
+ * Foo.bar(x)
136
+ *
137
+ * Foo.bar(-> {
138
+ * # 'x' is found by:
139
+ * # API::getTopLevelMember("Foo").getMethod("bar").getParameter(0).getReturn().asSink()
140
+ * x
141
+ * })
142
+ * ```
50
143
*/
51
- DataFlow:: Node getARhs ( ) { Impl:: def ( this , result ) }
144
+ DataFlow:: Node asSink ( ) { Impl:: def ( this , result ) }
52
145
53
146
/**
54
- * Gets a data-flow node that may interprocedurally flow to the value escaping into this API component.
147
+ * Get a data-flow node that transitively flows to an external library (or in general, any external codebase).
148
+ *
149
+ * This is similar to `asSink()` but additionally includes nodes that transitively reach a sink by data flow.
150
+ * See `asSink()` for examples.
55
151
*/
56
- DataFlow:: Node getAValueReachingRhs ( ) { result = Impl:: trackDefNode ( this .getARhs ( ) ) }
152
+ DataFlow:: Node getAValueReachingSink ( ) { result = Impl:: trackDefNode ( this .asSink ( ) ) }
153
+
154
+ /** DEPRECATED. This predicate has been renamed to `getAValueReachableFromSource()`. */
155
+ deprecated DataFlow:: Node getAUse ( ) { result = this .getAValueReachableFromSource ( ) }
156
+
157
+ /** DEPRECATED. This predicate has been renamed to `asSource()`. */
158
+ deprecated DataFlow:: LocalSourceNode getAnImmediateUse ( ) { result = this .asSource ( ) }
159
+
160
+ /** DEPRECATED. This predicate has been renamed to `asSink()`. */
161
+ deprecated DataFlow:: Node getARhs ( ) { result = this .asSink ( ) }
162
+
163
+ /** DEPRECATED. This predicate has been renamed to `getAValueReachingSink()`. */
164
+ deprecated DataFlow:: Node getAValueReachingRhs ( ) { result = this .getAValueReachingSink ( ) }
57
165
58
166
/**
59
167
* Gets a call to a method on the receiver represented by this API component.
60
168
*/
61
- DataFlow:: CallNode getAMethodCall ( string method ) {
62
- result = this .getReturn ( method ) .getAnImmediateUse ( )
63
- }
169
+ DataFlow:: CallNode getAMethodCall ( string method ) { result = this .getReturn ( method ) .asSource ( ) }
64
170
65
171
/**
66
172
* Gets a node representing member `m` of this API component.
@@ -135,7 +241,7 @@ module API {
135
241
/**
136
242
* Gets a `new` call to the function represented by this API component.
137
243
*/
138
- DataFlow:: ExprNode getAnInstantiation ( ) { result = this .getInstance ( ) .getAnImmediateUse ( ) }
244
+ DataFlow:: ExprNode getAnInstantiation ( ) { result = this .getInstance ( ) .asSource ( ) }
139
245
140
246
/**
141
247
* Gets a node representing a (direct or indirect) subclass of the class represented by this node.
0 commit comments