Skip to content

Commit 5575fc6

Browse files
authored
Merge pull request #13636 from github/tausbn/add-sink-alert-metrics-query
Java: Add metric queries for counting sinks coming from models
2 parents 434815b + 895e829 commit 5575fc6

6 files changed

+304
-0
lines changed
Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
private import java
2+
private import semmle.code.java.dataflow.ExternalFlow as ExternalFlow
3+
private import semmle.code.java.dataflow.internal.DataFlow
4+
private import semmle.code.java.dataflow.TaintTracking
5+
private import semmle.code.java.security.RequestForgeryConfig
6+
private import semmle.code.java.security.CommandLineQuery
7+
private import semmle.code.java.security.SqlConcatenatedQuery
8+
private import semmle.code.java.security.SqlInjectionQuery
9+
private import semmle.code.java.security.UrlRedirectQuery
10+
private import semmle.code.java.security.TaintedPathQuery
11+
private import semmle.code.java.security.SqlInjectionQuery
12+
private import AutomodelJavaUtil
13+
14+
private newtype TSinkModel =
15+
MkSinkModel(
16+
string package, string type, boolean subtypes, string name, string signature, string ext,
17+
string input, string kind, string provenance
18+
) {
19+
ExternalFlow::sinkModel(package, type, subtypes, name, signature, ext, input, kind, provenance)
20+
}
21+
22+
class SinkModel extends TSinkModel {
23+
string package;
24+
string type;
25+
boolean subtypes;
26+
string name;
27+
string signature;
28+
string ext;
29+
string input;
30+
string kind;
31+
string provenance;
32+
33+
SinkModel() {
34+
this = MkSinkModel(package, type, subtypes, name, signature, ext, input, kind, provenance)
35+
}
36+
37+
/** Gets the package for this sink model. */
38+
string getPackage() { result = package }
39+
40+
/** Gets the type for this sink model. */
41+
string getType() { result = type }
42+
43+
/** Gets whether this sink model considers subtypes. */
44+
boolean getSubtypes() { result = subtypes }
45+
46+
/** Gets the name for this sink model. */
47+
string getName() { result = name }
48+
49+
/** Gets the signature for this sink model. */
50+
string getSignature() { result = signature }
51+
52+
/** Gets the input for this sink model. */
53+
string getInput() { result = input }
54+
55+
/** Gets the extension for this sink model. */
56+
string getExt() { result = ext }
57+
58+
/** Gets the kind for this sink model. */
59+
string getKind() { result = kind }
60+
61+
/** Gets the provenance for this sink model. */
62+
string getProvenance() { result = provenance }
63+
64+
/** Gets the number of instances of this sink model. */
65+
int getInstanceCount() { result = count(PotentialSinkModelExpr p | p.getSinkModel() = this) }
66+
67+
/** Gets a string representation of this sink model. */
68+
string toString() {
69+
result =
70+
"SinkModel(" + package + ", " + type + ", " + subtypes + ", " + name + ", " + signature + ", "
71+
+ ext + ", " + input + ", " + kind + ", " + provenance + ")"
72+
}
73+
74+
/** Gets a string representation of this sink model as it would appear in a Models-as-Data file. */
75+
string getRepr() {
76+
result =
77+
"\"" + package + "\", \"" + type + "\", " + pyBool(subtypes) + ", \"" + name + "\", \"" +
78+
signature + "\", \"" + ext + "\", \"" + input + "\", \"" + kind + "\", \"" + provenance +
79+
"\""
80+
}
81+
}
82+
83+
/** An expression that may correspond to a sink model. */
84+
class PotentialSinkModelExpr extends Expr {
85+
/**
86+
* Holds if this expression has the given signature. The signature should contain enough
87+
* information to determine a corresponding sink model, if one exists.
88+
*/
89+
pragma[nomagic]
90+
predicate hasSignature(
91+
string package, string type, boolean subtypes, string name, string signature, string input
92+
) {
93+
exists(Call call, Callable callable, int argIdx |
94+
call.getCallee() = callable and
95+
(
96+
this = call.getArgument(argIdx)
97+
or
98+
this = call.getQualifier() and argIdx = -1
99+
) and
100+
input = getArgumentForIndex(argIdx) and
101+
package = callable.getDeclaringType().getPackage().getName() and
102+
type = callable.getDeclaringType().getErasure().(RefType).nestedName() and
103+
subtypes = considerSubtypes(callable) and
104+
name = callable.getName() and
105+
signature = ExternalFlow::paramsString(callable)
106+
)
107+
}
108+
109+
/** Gets a sink model that corresponds to this expression. */
110+
SinkModel getSinkModel() {
111+
this.hasSignature(result.getPackage(), result.getType(), result.getSubtypes(), result.getName(),
112+
result.getSignature(), result.getInput())
113+
}
114+
}
115+
116+
private string pyBool(boolean b) {
117+
b = true and result = "True"
118+
or
119+
b = false and result = "False"
120+
}
121+
122+
/**
123+
* Gets a string representation of the existing sink model at the expression `e`, in the format in
124+
* which it would appear in a Models-as-Data file. Also restricts the provenance of the sink model
125+
* to be `ai-generated`.
126+
*/
127+
string getSinkModelRepr(PotentialSinkModelExpr e) {
128+
result = e.getSinkModel().getRepr() and
129+
e.getSinkModel().getProvenance() = "ai-generated"
130+
}
131+
132+
/**
133+
* Gets the string representation of a sink model in a format suitable for appending to an alert
134+
* message.
135+
*/
136+
string getSinkModelQueryRepr(PotentialSinkModelExpr e) {
137+
result = "\nsinkModel: " + getSinkModelRepr(e)
138+
}
139+
140+
/**
141+
* A parameterised module that takes a dataflow config, and exposes a predicate for counting the
142+
* number of AI-generated sink models that appear in alerts for that query.
143+
*/
144+
private module SinkTallier<DataFlow::ConfigSig Config> {
145+
module ConfigFlow = TaintTracking::Global<Config>;
146+
147+
predicate getSinkModelCount(int c, SinkModel s) {
148+
s = any(ConfigFlow::PathNode sink).getNode().asExpr().(PotentialSinkModelExpr).getSinkModel() and
149+
c =
150+
strictcount(ConfigFlow::PathNode sink |
151+
ConfigFlow::flowPath(_, sink) and
152+
s = sink.getNode().asExpr().(PotentialSinkModelExpr).getSinkModel()
153+
)
154+
}
155+
}
156+
157+
predicate sinkModelTallyPerQuery(string queryName, int alertCount, SinkModel sinkModel) {
158+
queryName = "java/request-forgery" and
159+
SinkTallier<RequestForgeryConfig>::getSinkModelCount(alertCount, sinkModel)
160+
or
161+
queryName = "java/command-line-injection" and
162+
exists(int c1, int c2 |
163+
SinkTallier<RemoteUserInputToArgumentToExecFlowConfig>::getSinkModelCount(c1, sinkModel) and
164+
SinkTallier<LocalUserInputToArgumentToExecFlowConfig>::getSinkModelCount(c2, sinkModel) and
165+
alertCount = c1 + c2
166+
)
167+
or
168+
queryName = "java/concatenated-sql-query" and
169+
SinkTallier<UncontrolledStringBuilderSourceFlowConfig>::getSinkModelCount(alertCount, sinkModel)
170+
or
171+
queryName = "java/ssrf" and
172+
SinkTallier<RequestForgeryConfig>::getSinkModelCount(alertCount, sinkModel)
173+
or
174+
queryName = "java/path-injection" and
175+
SinkTallier<TaintedPathConfig>::getSinkModelCount(alertCount, sinkModel)
176+
or
177+
queryName = "java/unvalidated-url-redirection" and
178+
SinkTallier<UrlRedirectConfig>::getSinkModelCount(alertCount, sinkModel)
179+
or
180+
queryName = "java/sql-injection" and
181+
SinkTallier<QueryInjectionFlowConfig>::getSinkModelCount(alertCount, sinkModel)
182+
}
183+
184+
predicate sinkModelTally(int alertCount, SinkModel sinkModel) {
185+
sinkModelTallyPerQuery(_, _, sinkModel) and
186+
alertCount = sum(int c | sinkModelTallyPerQuery(_, c, sinkModel))
187+
}
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
/**
2+
* @name Number of alerts per sink model
3+
* @description Counts the number of alerts using `ai-generated` sink models.
4+
* @kind table
5+
* @id java/ml/metrics-count-alerts-per-sink-model
6+
* @tags internal automodel metrics
7+
*/
8+
9+
private import java
10+
private import AutomodelAlertSinkUtil
11+
12+
from int alertCount, SinkModel s
13+
where sinkModelTally(alertCount, s) and s.getProvenance() = "ai-generated"
14+
select alertCount, s.getPackage() as package, s.getType() as type, s.getSubtypes() as subtypes,
15+
s.getName() as name, s.getSignature() as signature, s.getInput() as input, s.getExt() as ext,
16+
s.getKind() as kind, s.getProvenance() as provenance order by alertCount desc
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
/**
2+
* @name Number of alerts per sink model and query
3+
* @description Counts the number of alerts per query using `ai-generated` sink models.
4+
* @kind table
5+
* @id java/ml/metrics-count-alerts-per-sink-model-and-query
6+
* @tags internal automodel metrics
7+
*/
8+
9+
private import java
10+
private import AutomodelAlertSinkUtil
11+
12+
from string queryId, int alertCount, SinkModel s
13+
where
14+
sinkModelTallyPerQuery(queryId, alertCount, s) and
15+
s.getProvenance() = "ai-generated"
16+
select queryId, alertCount, s.getPackage() as package, s.getType() as type,
17+
s.getSubtypes() as subtypes, s.getName() as name, s.getSignature() as signature,
18+
s.getInput() as input, s.getExt() as ext, s.getKind() as kind, s.getProvenance() as provenance
19+
order by queryId, alertCount desc
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
/**
2+
* @name Number of instances of each sink model
3+
* @description Counts the number of instances of `ai-generated` sink models.
4+
* @kind table
5+
* @id java/ml/metrics-count-instances-per-sink-model
6+
* @tags internal automodel metrics
7+
*/
8+
9+
private import java
10+
private import AutomodelAlertSinkUtil
11+
12+
from int instanceCount, SinkModel s
13+
where
14+
instanceCount = s.getInstanceCount() and
15+
instanceCount > 0 and
16+
s.getProvenance() = "ai-generated"
17+
select instanceCount, s.getPackage() as package, s.getType() as type, s.getSubtypes() as subtypes,
18+
s.getName() as name, s.getSignature() as signature, s.getInput() as input, s.getExt() as ext,
19+
s.getKind() as kind, s.getProvenance() as provenance order by instanceCount desc

java/ql/src/Telemetry/AutomodelJavaUtil.qll

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ string getArgumentForIndex(int index) {
5656
* It would technically be ok to always use the value 'true', but this would
5757
* break convention.
5858
*/
59+
pragma[nomagic]
5960
boolean considerSubtypes(Callable callable) {
6061
if
6162
callable.isStatic() or
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
/**
2+
* This file contains query predicates for use when gathering metrics at scale using Multi Repo
3+
* Variant Analysis.
4+
*/
5+
6+
private import java
7+
private import AutomodelAlertSinkUtil
8+
9+
/**
10+
* Holds if `alertCount` is the number of alerts for the query with ID `queryId` for which the
11+
* sinks correspond to the given `ai-generated` sink model.
12+
*/
13+
query predicate sinkModelCountPerQuery(
14+
string queryId, int alertCount, string package, string type, boolean subtypes, string name,
15+
string signature, string input, string ext, string kind, string provenance
16+
) {
17+
exists(SinkModel s |
18+
sinkModelTallyPerQuery(queryId, alertCount, s) and
19+
s.getProvenance() = "ai-generated" and
20+
s.getPackage() = package and
21+
s.getType() = type and
22+
s.getSubtypes() = subtypes and
23+
s.getName() = name and
24+
s.getSignature() = signature and
25+
s.getInput() = input and
26+
s.getExt() = ext and
27+
s.getKind() = kind and
28+
s.getProvenance() = provenance
29+
)
30+
}
31+
32+
/**
33+
* Holds if `instanceCount` is the number of instances corresponding to the given `ai-generated`
34+
* sink model (as identified by the `package`, `name`, `input`, etc.).
35+
*/
36+
query predicate instanceCount(
37+
int instanceCount, string package, string type, boolean subtypes, string name, string signature,
38+
string input, string ext, string kind, string provenance
39+
) {
40+
exists(SinkModel s |
41+
instanceCount = s.getInstanceCount() and
42+
instanceCount > 0 and
43+
s.getProvenance() = "ai-generated" and
44+
s.getPackage() = package and
45+
s.getType() = type and
46+
s.getSubtypes() = subtypes and
47+
s.getName() = name and
48+
s.getSignature() = signature and
49+
s.getInput() = input and
50+
s.getExt() = ext and
51+
s.getKind() = kind and
52+
s.getProvenance() = provenance
53+
)
54+
}
55+
56+
// MRVA requires a select clause, so we repurpose it to tell us which query predicates had results.
57+
from string hadResults
58+
where
59+
sinkModelCountPerQuery(_, _, _, _, _, _, _, _, _, _, _) and hadResults = "sinkModelCountPerQuery"
60+
or
61+
instanceCount(_, _, _, _, _, _, _, _, _, _) and hadResults = "instanceCount"
62+
select hadResults

0 commit comments

Comments
 (0)