Skip to content

Commit 475a892

Browse files
authored
Merge pull request #13760 from MathiasVP/split-invalid-ptr-deref-into-more-files
C++: Split `cpp/invalid-pointer-deref` into more files
2 parents 8a46ff3 + 2f48cde commit 475a892

File tree

10 files changed

+710
-516
lines changed

10 files changed

+710
-516
lines changed

cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/ProductFlow.qll

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,22 @@ module ProductFlow {
297297
reachable(source1, source2, sink1, sink2)
298298
}
299299

300+
/** Holds if data can flow from `(source1, source2)` to `(sink1, sink2)`. */
301+
predicate flow(
302+
DataFlow::Node source1, DataFlow::Node source2, DataFlow::Node sink1, DataFlow::Node sink2
303+
) {
304+
exists(
305+
Flow1::PathNode pSource1, Flow2::PathNode pSource2, Flow1::PathNode pSink1,
306+
Flow2::PathNode pSink2
307+
|
308+
pSource1.getNode() = source1 and
309+
pSource2.getNode() = source2 and
310+
pSink1.getNode() = sink1 and
311+
pSink2.getNode() = sink2 and
312+
flowPath(pSource1, pSource2, pSink1, pSink2)
313+
)
314+
}
315+
300316
private module Config1 implements DataFlow::StateConfigSig {
301317
class FlowState = FlowState1;
302318

Lines changed: 256 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,256 @@
1+
/**
2+
* This file provides the first phase of the `cpp/invalid-pointer-deref` query that identifies flow
3+
* from an allocation to a pointer-arithmetic instruction that constructs a pointer that is out of bounds.
4+
*/
5+
6+
private import cpp
7+
private import semmle.code.cpp.ir.dataflow.internal.ProductFlow
8+
private import semmle.code.cpp.ir.ValueNumbering
9+
private import semmle.code.cpp.controlflow.IRGuards
10+
private import codeql.util.Unit
11+
private import RangeAnalysisUtil
12+
13+
private VariableAccess getAVariableAccess(Expr e) { e.getAChild*() = result }
14+
15+
/**
16+
* Holds if the `(n, state)` pair represents the source of flow for the size
17+
* expression associated with `alloc`.
18+
*/
19+
predicate hasSize(HeuristicAllocationExpr alloc, DataFlow::Node n, int state) {
20+
exists(VariableAccess va, Expr size, int delta |
21+
size = alloc.getSizeExpr() and
22+
// Get the unique variable in a size expression like `x` in `malloc(x + 1)`.
23+
va = unique( | | getAVariableAccess(size)) and
24+
// Compute `delta` as the constant difference between `x` and `x + 1`.
25+
bounded1(any(Instruction instr | instr.getUnconvertedResultExpression() = size),
26+
any(LoadInstruction load | load.getUnconvertedResultExpression() = va), delta) and
27+
n.asConvertedExpr() = va.getFullyConverted() and
28+
state = delta
29+
)
30+
}
31+
32+
/**
33+
* A module that encapsulates a barrier guard to remove false positives from flow like:
34+
* ```cpp
35+
* char *p = new char[size];
36+
* // ...
37+
* unsigned n = size;
38+
* // ...
39+
* if(n < size) {
40+
* use(*p[n]);
41+
* }
42+
* ```
43+
* In this case, the sink pair identified by the product flow library (without any additional barriers)
44+
* would be `(p, n)` (where `n` is the `n` in `p[n]`), because there exists a pointer-arithmetic
45+
* instruction `pai` such that:
46+
* 1. The left-hand of `pai` flows from the allocation, and
47+
* 2. The right-hand of `pai` is non-strictly upper bounded by `n` (where `n` is the `n` in `p[n]`)
48+
* but because there's a strict comparison that compares `n` against the size of the allocation this
49+
* snippet is fine.
50+
*/
51+
module Barrier2 {
52+
private class FlowState2 = int;
53+
54+
private module BarrierConfig2 implements DataFlow::ConfigSig {
55+
predicate isSource(DataFlow::Node source) {
56+
// The sources is the same as in the sources for the second
57+
// projection in the `AllocToInvalidPointerConfig` module.
58+
hasSize(_, source, _)
59+
}
60+
61+
additional predicate isSink(
62+
DataFlow::Node left, DataFlow::Node right, IRGuardCondition g, FlowState2 state,
63+
boolean testIsTrue
64+
) {
65+
// The sink is any "large" side of a relational comparison.
66+
g.comparesLt(left.asOperand(), right.asOperand(), state, true, testIsTrue)
67+
}
68+
69+
predicate isSink(DataFlow::Node sink) { isSink(_, sink, _, _, _) }
70+
}
71+
72+
private import DataFlow::Global<BarrierConfig2>
73+
74+
private FlowState2 getAFlowStateForNode(DataFlow::Node node) {
75+
exists(DataFlow::Node source |
76+
flow(source, node) and
77+
hasSize(_, source, result)
78+
)
79+
}
80+
81+
private predicate operandGuardChecks(
82+
IRGuardCondition g, Operand left, Operand right, FlowState2 state, boolean edge
83+
) {
84+
exists(DataFlow::Node nLeft, DataFlow::Node nRight, FlowState2 state0 |
85+
nRight.asOperand() = right and
86+
nLeft.asOperand() = left and
87+
BarrierConfig2::isSink(nLeft, nRight, g, state0, edge) and
88+
state = getAFlowStateForNode(nRight) and
89+
state0 <= state
90+
)
91+
}
92+
93+
/**
94+
* Gets an instruction that is guarded by a guard condition which ensures that
95+
* the value of the instruction is upper-bounded by size of some allocation.
96+
*/
97+
Instruction getABarrierInstruction(FlowState2 state) {
98+
exists(IRGuardCondition g, ValueNumber value, Operand use, boolean edge |
99+
use = value.getAUse() and
100+
operandGuardChecks(pragma[only_bind_into](g), pragma[only_bind_into](use), _,
101+
pragma[only_bind_into](state), pragma[only_bind_into](edge)) and
102+
result = value.getAnInstruction() and
103+
g.controls(result.getBlock(), edge)
104+
)
105+
}
106+
107+
/**
108+
* Gets a `DataFlow::Node` that is guarded by a guard condition which ensures that
109+
* the value of the node is upper-bounded by size of some allocation.
110+
*/
111+
DataFlow::Node getABarrierNode(FlowState2 state) {
112+
result.asOperand() = getABarrierInstruction(state).getAUse()
113+
}
114+
115+
/**
116+
* Gets the block of a node that is guarded (see `getABarrierInstruction` or
117+
* `getABarrierNode` for the definition of what it means to be guarded).
118+
*/
119+
IRBlock getABarrierBlock(FlowState2 state) {
120+
result.getAnInstruction() = getABarrierInstruction(state)
121+
}
122+
}
123+
124+
private module InterestingPointerAddInstruction {
125+
private module PointerAddInstructionConfig implements DataFlow::ConfigSig {
126+
predicate isSource(DataFlow::Node source) {
127+
// The sources is the same as in the sources for the second
128+
// projection in the `AllocToInvalidPointerConfig` module.
129+
hasSize(source.asConvertedExpr(), _, _)
130+
}
131+
132+
predicate isSink(DataFlow::Node sink) {
133+
sink.asInstruction() = any(PointerAddInstruction pai).getLeft()
134+
}
135+
}
136+
137+
private import DataFlow::Global<PointerAddInstructionConfig>
138+
139+
/**
140+
* Holds if `pai` is a pointer-arithmetic instruction such that the
141+
* result of an allocation flows to the left-hand side of `pai`.
142+
*
143+
* This predicate is used to reduce the set of tuples in `isSinkPair`.
144+
*/
145+
predicate isInteresting(PointerAddInstruction pai) {
146+
exists(DataFlow::Node n |
147+
n.asInstruction() = pai.getLeft() and
148+
flowTo(n)
149+
)
150+
}
151+
}
152+
153+
/**
154+
* A product-flow configuration for flow from an (allocation, size) pair to a
155+
* pointer-arithmetic operation that is non-strictly upper-bounded by `allocation + size`.
156+
*
157+
* The goal of this query is to find patterns such as:
158+
* ```cpp
159+
* 1. char* begin = (char*)malloc(size);
160+
* 2. char* end = begin + size;
161+
* 3. for(int *p = begin; p <= end; p++) {
162+
* 4. use(*p);
163+
* 5. }
164+
* ```
165+
*
166+
* We do this by splitting the task up into two configurations:
167+
* 1. `AllocToInvalidPointerConfig` find flow from `malloc(size)` to `begin + size`, and
168+
* 2. `InvalidPointerToDerefConfig` finds flow from `begin + size` to an `end` (on line 3).
169+
*
170+
* Finally, the range-analysis library will find a load from (or store to) an address that
171+
* is non-strictly upper-bounded by `end` (which in this case is `*p`).
172+
*/
173+
private module Config implements ProductFlow::StateConfigSig {
174+
class FlowState1 = Unit;
175+
176+
class FlowState2 = int;
177+
178+
predicate isSourcePair(
179+
DataFlow::Node source1, FlowState1 state1, DataFlow::Node source2, FlowState2 state2
180+
) {
181+
// In the case of an allocation like
182+
// ```cpp
183+
// malloc(size + 1);
184+
// ```
185+
// we use `state2` to remember that there was an offset (in this case an offset of `1`) added
186+
// to the size of the allocation. This state is then checked in `isSinkPair`.
187+
exists(state1) and
188+
hasSize(source1.asConvertedExpr(), source2, state2)
189+
}
190+
191+
predicate isSinkPair(
192+
DataFlow::Node sink1, FlowState1 state1, DataFlow::Node sink2, FlowState2 state2
193+
) {
194+
exists(state1) and
195+
// We check that the delta computed by the range analysis matches the
196+
// state value that we set in `isSourcePair`.
197+
pointerAddInstructionHasBounds0(_, sink1, sink2, state2)
198+
}
199+
200+
predicate isBarrier2(DataFlow::Node node, FlowState2 state) {
201+
node = Barrier2::getABarrierNode(state)
202+
}
203+
204+
predicate isBarrierIn1(DataFlow::Node node) { isSourcePair(node, _, _, _) }
205+
206+
predicate isBarrierOut2(DataFlow::Node node) {
207+
node = any(DataFlow::SsaPhiNode phi).getAnInput(true)
208+
}
209+
}
210+
211+
private module AllocToInvalidPointerFlow = ProductFlow::GlobalWithState<Config>;
212+
213+
/**
214+
* Holds if `pai` is non-strictly upper bounded by `sink2 + delta` and `sink1` is the
215+
* left operand of the pointer-arithmetic operation.
216+
*
217+
* For example in,
218+
* ```cpp
219+
* char* end = p + (size + 1);
220+
* ```
221+
* We will have:
222+
* - `pai` is `p + (size + 1)`,
223+
* - `sink1` is `p`
224+
* - `sink2` is `size`
225+
* - `delta` is `1`.
226+
*/
227+
pragma[nomagic]
228+
private predicate pointerAddInstructionHasBounds0(
229+
PointerAddInstruction pai, DataFlow::Node sink1, DataFlow::Node sink2, int delta
230+
) {
231+
InterestingPointerAddInstruction::isInteresting(pragma[only_bind_into](pai)) and
232+
exists(Instruction right, Instruction instr2 |
233+
pai.getRight() = right and
234+
pai.getLeft() = sink1.asInstruction() and
235+
instr2 = sink2.asInstruction() and
236+
// pai.getRight() <= sink2 + delta
237+
bounded1(right, instr2, delta) and
238+
not right = Barrier2::getABarrierInstruction(delta) and
239+
not instr2 = Barrier2::getABarrierInstruction(delta)
240+
)
241+
}
242+
243+
/**
244+
* Holds if `allocation` flows to `sink1` and `sink1` represents the left-hand
245+
* side of the pointer-arithmetic instruction `pai`, and the right-hand side of `pai`
246+
* is non-strictly upper bounded by the size of `alllocation` + `delta`.
247+
*/
248+
pragma[nomagic]
249+
predicate pointerAddInstructionHasBounds(
250+
DataFlow::Node allocation, PointerAddInstruction pai, DataFlow::Node sink1, int delta
251+
) {
252+
exists(DataFlow::Node sink2 |
253+
AllocToInvalidPointerFlow::flow(allocation, _, sink1, sink2) and
254+
pointerAddInstructionHasBounds0(pai, sink1, sink2, delta)
255+
)
256+
}

0 commit comments

Comments
 (0)