Skip to content

Commit d41d2bc

Browse files
authored
Merge pull request #13699 from MathiasVP/final-config-to-invalid-pointer-deref
C++: Handle call-contexts mismatches in `cpp/invalid-pointer-deref`
2 parents e803e98 + d63ead5 commit d41d2bc

File tree

3 files changed

+340
-1564
lines changed

3 files changed

+340
-1564
lines changed

cpp/ql/src/experimental/Security/CWE/CWE-193/InvalidPointerDeref.ql

Lines changed: 112 additions & 145 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import semmle.code.cpp.ir.ValueNumbering
2323
import semmle.code.cpp.controlflow.IRGuards
2424
import semmle.code.cpp.ir.IR
2525
import codeql.util.Unit
26+
import FinalFlow::PathGraph
2627

2728
pragma[nomagic]
2829
Instruction getABoundIn(SemBound b, IRFunction func) {
@@ -318,7 +319,7 @@ module InvalidPointerToDerefBarrier {
318319
private module BarrierConfig implements DataFlow::ConfigSig {
319320
predicate isSource(DataFlow::Node source) {
320321
// The sources is the same as in the sources for `InvalidPointerToDerefConfig`.
321-
invalidPointerToDerefSource(_, source, _)
322+
invalidPointerToDerefSource(_, _, source, _)
322323
}
323324

324325
additional predicate isSink(
@@ -336,7 +337,7 @@ module InvalidPointerToDerefBarrier {
336337
private int getInvalidPointerToDerefSourceDelta(DataFlow::Node node) {
337338
exists(DataFlow::Node source |
338339
flow(source, node) and
339-
invalidPointerToDerefSource(_, source, result)
340+
invalidPointerToDerefSource(_, _, source, result)
340341
)
341342
}
342343

@@ -373,7 +374,7 @@ module InvalidPointerToDerefBarrier {
373374
* by `AllocToInvalidPointerConfig` to a dereference of the pointer.
374375
*/
375376
module InvalidPointerToDerefConfig implements DataFlow::ConfigSig {
376-
predicate isSource(DataFlow::Node source) { invalidPointerToDerefSource(_, source, _) }
377+
predicate isSource(DataFlow::Node source) { invalidPointerToDerefSource(_, _, source, _) }
377378

378379
pragma[inline]
379380
predicate isSink(DataFlow::Node sink) { isInvalidPointerDerefSink(sink, _, _, _) }
@@ -388,189 +389,155 @@ module InvalidPointerToDerefConfig implements DataFlow::ConfigSig {
388389
module InvalidPointerToDerefFlow = DataFlow::Global<InvalidPointerToDerefConfig>;
389390

390391
/**
391-
* Holds if `pai` is a pointer-arithmetic operation and `source` is a dataflow node with a
392-
* pointer-value that is non-strictly upper bounded by `pai + delta`.
392+
* Holds if `source1` is dataflow node that represents an allocation that flows to the
393+
* left-hand side of the pointer-arithmetic `pai`, and `derefSource` is a dataflow node with
394+
* a pointer-value that is non-strictly upper bounded by `pai + delta`.
393395
*
394396
* For example, if `pai` is a pointer-arithmetic operation `p + size` in an expression such
395-
* as `(p + size) + 1` and `source` is the node representing `(p + size) + 1`. In this
397+
* as `(p + size) + 1` and `derefSource` is the node representing `(p + size) + 1`. In this
396398
* case `delta` is 1.
397399
*/
398400
predicate invalidPointerToDerefSource(
399-
PointerArithmeticInstruction pai, DataFlow::Node source, int delta
401+
DataFlow::Node source1, PointerArithmeticInstruction pai, DataFlow::Node derefSource, int delta
400402
) {
401403
exists(
402-
AllocToInvalidPointerFlow::PathNode1 p1, AllocToInvalidPointerFlow::PathNode2 p2,
403-
DataFlow::Node sink1, DataFlow::Node sink2, int delta0
404+
AllocToInvalidPointerFlow::PathNode1 pSource1, AllocToInvalidPointerFlow::PathNode1 pSink1,
405+
AllocToInvalidPointerFlow::PathNode2 pSink2, DataFlow::Node sink1, DataFlow::Node sink2,
406+
int delta0
404407
|
405-
pragma[only_bind_out](p1.getNode()) = sink1 and
406-
pragma[only_bind_out](p2.getNode()) = sink2 and
407-
AllocToInvalidPointerFlow::flowPath(_, _, pragma[only_bind_into](p1), pragma[only_bind_into](p2)) and
408+
pragma[only_bind_out](pSource1.getNode()) = source1 and
409+
pragma[only_bind_out](pSink1.getNode()) = sink1 and
410+
pragma[only_bind_out](pSink2.getNode()) = sink2 and
411+
AllocToInvalidPointerFlow::flowPath(pSource1, _, pragma[only_bind_into](pSink1),
412+
pragma[only_bind_into](pSink2)) and
408413
// Note that `delta` is not necessarily equal to `delta0`:
409414
// `delta0` is the constant offset added to the size of the allocation, and
410415
// delta is the constant difference between the pointer-arithmetic instruction
411416
// and the instruction computing the address for which we will search for a dereference.
412417
isSinkImpl(pai, sink1, sink2, delta0) and
413-
bounded2(source.asInstruction(), pai, delta) and
418+
bounded2(derefSource.asInstruction(), pai, delta) and
414419
delta >= 0 and
415-
not source.getBasicBlock() = Barrier2::getABarrierBlock(delta0)
420+
not derefSource.getBasicBlock() = Barrier2::getABarrierBlock(delta0)
416421
)
417422
}
418423

419-
newtype TMergedPathNode =
420-
// The path nodes computed by the first projection of `AllocToInvalidPointerConfig`
421-
TPathNode1(AllocToInvalidPointerFlow::PathNode1 p) or
422-
// The path nodes computed by `InvalidPointerToDerefConfig`
423-
TPathNode3(InvalidPointerToDerefFlow::PathNode p) or
424-
// The read/write that uses the invalid pointer identified by `InvalidPointerToDerefConfig`.
425-
// This one is needed because the sink identified by `InvalidPointerToDerefConfig` is the
426-
// pointer, but we want to raise an alert at the dereference.
427-
TPathNodeSink(Instruction i) {
428-
exists(DataFlow::Node n |
429-
InvalidPointerToDerefFlow::flowTo(pragma[only_bind_into](n)) and
430-
isInvalidPointerDerefSink(n, i, _, _) and
431-
i = getASuccessor(n.asInstruction())
432-
)
433-
}
434-
435-
class MergedPathNode extends TMergedPathNode {
436-
string toString() { none() }
437-
438-
final AllocToInvalidPointerFlow::PathNode1 asPathNode1() { this = TPathNode1(result) }
439-
440-
final InvalidPointerToDerefFlow::PathNode asPathNode3() { this = TPathNode3(result) }
441-
442-
final Instruction asSinkNode() { this = TPathNodeSink(result) }
443-
444-
predicate hasLocationInfo(
445-
string filepath, int startline, int startcolumn, int endline, int endcolumn
446-
) {
447-
none()
448-
}
424+
/**
425+
* Holds if `derefSink` is a dataflow node that represents an out-of-bounds address that is about to
426+
* be dereferenced by `operation` (which is either a `StoreInstruction` or `LoadInstruction`), and
427+
* `pai` is the pointer-arithmetic operation that caused the `derefSink` to be out-of-bounds.
428+
*/
429+
predicate derefSinkToOperation(
430+
DataFlow::Node derefSink, PointerArithmeticInstruction pai, DataFlow::Node operation
431+
) {
432+
exists(DataFlow::Node source, Instruction i |
433+
InvalidPointerToDerefFlow::flow(pragma[only_bind_into](source),
434+
pragma[only_bind_into](derefSink)) and
435+
invalidPointerToDerefSource(_, pai, source, _) and
436+
isInvalidPointerDerefSink(derefSink, i, _, _) and
437+
i = getASuccessor(derefSink.asInstruction()) and
438+
operation.asInstruction() = i
439+
)
449440
}
450441

451-
class PathNode1 extends MergedPathNode, TPathNode1 {
452-
override string toString() {
453-
exists(AllocToInvalidPointerFlow::PathNode1 p |
454-
this = TPathNode1(p) and
455-
result = p.toString()
456-
)
457-
}
458-
459-
override predicate hasLocationInfo(
460-
string filepath, int startline, int startcolumn, int endline, int endcolumn
461-
) {
462-
this.asPathNode1().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
463-
}
464-
}
442+
/**
443+
* A configuration that represents the full dataflow path all the way from
444+
* the allocation to the dereference. We need this final dataflow traversal
445+
* to ensure that the transition from the sink in `AllocToInvalidPointerConfig`
446+
* to the source in `InvalidPointerToDerefFlow` did not make us construct an
447+
* infeasible path (which can happen since the transition from one configuration
448+
* to the next does not preserve information about call contexts).
449+
*/
450+
module FinalConfig implements DataFlow::StateConfigSig {
451+
newtype FlowState =
452+
additional TInitial() or
453+
additional TPointerArith(PointerArithmeticInstruction pai) {
454+
invalidPointerToDerefSource(_, pai, _, _)
455+
}
465456

466-
class PathNode3 extends MergedPathNode, TPathNode3 {
467-
override string toString() {
468-
exists(InvalidPointerToDerefFlow::PathNode p |
469-
this = TPathNode3(p) and
470-
result = p.toString()
457+
predicate isSource(DataFlow::Node source, FlowState state) {
458+
state = TInitial() and
459+
exists(DataFlow::Node derefSource |
460+
invalidPointerToDerefSource(source, _, derefSource, _) and
461+
InvalidPointerToDerefFlow::flow(derefSource, _)
471462
)
472463
}
473464

474-
override predicate hasLocationInfo(
475-
string filepath, int startline, int startcolumn, int endline, int endcolumn
476-
) {
477-
this.asPathNode3().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
478-
}
479-
}
480-
481-
class PathSinkNode extends MergedPathNode, TPathNodeSink {
482-
override string toString() {
483-
exists(Instruction i |
484-
this = TPathNodeSink(i) and
485-
result = i.toString()
465+
predicate isSink(DataFlow::Node sink, FlowState state) {
466+
exists(PointerArithmeticInstruction pai |
467+
derefSinkToOperation(_, pai, sink) and state = TPointerArith(pai)
486468
)
487469
}
488470

489-
override predicate hasLocationInfo(
490-
string filepath, int startline, int startcolumn, int endline, int endcolumn
471+
predicate isAdditionalFlowStep(
472+
DataFlow::Node node1, FlowState state1, DataFlow::Node node2, FlowState state2
491473
) {
492-
this.asSinkNode()
493-
.getLocation()
494-
.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn)
474+
// A step from the left-hand side of a pointer-arithmetic operation that has been
475+
// identified as creating an out-of-bounds pointer to the result of the pointer-arithmetic
476+
// operation.
477+
exists(
478+
PointerArithmeticInstruction pai, AllocToInvalidPointerFlow::PathNode1 p1,
479+
InvalidPointerToDerefFlow::PathNode p2
480+
|
481+
isSinkImpl(pai, node1, _, _) and
482+
invalidPointerToDerefSource(_, pai, node2, _) and
483+
node1 = p1.getNode() and
484+
node2 = p2.getNode() and
485+
state1 = TInitial() and
486+
state2 = TPointerArith(pai)
487+
)
488+
or
489+
// A step from an out-of-bounds address to the operation (which is either a `StoreInstruction`
490+
// or a `LoadInstruction`) that dereferences the address.
491+
// This step exists purely for aesthetic reasons: we want the alert to be placed at the operation
492+
// that causes the dereference, and not at the address that flows into the operation.
493+
state1 = state2 and
494+
exists(DataFlow::Node derefSource, PointerArithmeticInstruction pai |
495+
InvalidPointerToDerefFlow::flow(derefSource, node1) and
496+
invalidPointerToDerefSource(_, pai, derefSource, _) and
497+
state1 = TPointerArith(pai) and
498+
derefSinkToOperation(node1, pai, node2)
499+
)
495500
}
496501
}
497502

498-
query predicate edges(MergedPathNode node1, MergedPathNode node2) {
499-
node1.asPathNode1().getASuccessor() = node2.asPathNode1()
500-
or
501-
joinOn1(_, node1.asPathNode1(), node2.asPathNode3())
502-
or
503-
node1.asPathNode3().getASuccessor() = node2.asPathNode3()
504-
or
505-
joinOn2(node1.asPathNode3(), node2.asSinkNode(), _, _)
506-
}
507-
508-
query predicate nodes(MergedPathNode n, string key, string val) {
509-
AllocToInvalidPointerFlow::PathGraph1::nodes(n.asPathNode1(), key, val)
510-
or
511-
InvalidPointerToDerefFlow::PathGraph::nodes(n.asPathNode3(), key, val)
512-
or
513-
key = "semmle.label" and val = n.asSinkNode().toString()
514-
}
515-
516-
query predicate subpaths(
517-
MergedPathNode arg, MergedPathNode par, MergedPathNode ret, MergedPathNode out
518-
) {
519-
AllocToInvalidPointerFlow::PathGraph1::subpaths(arg.asPathNode1(), par.asPathNode1(),
520-
ret.asPathNode1(), out.asPathNode1())
521-
or
522-
InvalidPointerToDerefFlow::PathGraph::subpaths(arg.asPathNode3(), par.asPathNode3(),
523-
ret.asPathNode3(), out.asPathNode3())
524-
}
503+
module FinalFlow = DataFlow::GlobalWithState<FinalConfig>;
525504

526505
/**
527-
* Holds if `p1` is a sink of `AllocToInvalidPointerConfig` and `p2` is a source
528-
* of `InvalidPointerToDerefConfig`, and they are connected through `pai`.
529-
*/
530-
predicate joinOn1(
531-
PointerArithmeticInstruction pai, AllocToInvalidPointerFlow::PathNode1 p1,
532-
InvalidPointerToDerefFlow::PathNode p2
533-
) {
534-
isSinkImpl(pai, p1.getNode(), _, _) and
535-
invalidPointerToDerefSource(pai, p2.getNode(), _)
536-
}
537-
538-
/**
539-
* Holds if `p1` is a sink of `InvalidPointerToDerefConfig` and `i` is the instruction
540-
* that dereferences `p1`. The string `operation` describes whether the `i` is
541-
* a `StoreInstruction` or `LoadInstruction`.
506+
* Holds if `source` is an allocation that flows into the left-hand side of `pai`, which produces an out-of-bounds
507+
* pointer that flows into an address that is dereferenced by `sink` (which is either a `LoadInstruction` or a
508+
* `StoreInstruction`). The end result is that `sink` writes to an address that is off-by-`delta` from the end of
509+
* the allocation. The string `operation` describes whether the `sink` is a load or a store (which is then used
510+
* to produce the alert message).
511+
*
512+
* Note that multiple `delta`s can exist for a given `(source, pai, sink)` triplet.
542513
*/
543-
pragma[inline]
544-
predicate joinOn2(InvalidPointerToDerefFlow::PathNode p1, Instruction i, string operation, int delta) {
545-
isInvalidPointerDerefSink(p1.getNode(), i, operation, delta)
546-
}
547-
548514
predicate hasFlowPath(
549-
MergedPathNode source1, MergedPathNode sink, InvalidPointerToDerefFlow::PathNode source3,
550-
PointerArithmeticInstruction pai, string operation, int delta
515+
FinalFlow::PathNode source, FinalFlow::PathNode sink, PointerArithmeticInstruction pai,
516+
string operation, int delta
551517
) {
552-
exists(InvalidPointerToDerefFlow::PathNode sink3, AllocToInvalidPointerFlow::PathNode1 sink1 |
553-
AllocToInvalidPointerFlow::flowPath(source1.asPathNode1(), _, sink1, _) and
554-
joinOn1(pai, sink1, source3) and
555-
InvalidPointerToDerefFlow::flowPath(source3, sink3) and
556-
joinOn2(sink3, sink.asSinkNode(), operation, delta)
518+
exists(
519+
DataFlow::Node derefSink, DataFlow::Node derefSource, int deltaDerefSourceAndPai,
520+
int deltaDerefSinkAndDerefAddress
521+
|
522+
FinalFlow::flowPath(source, sink) and
523+
sink.getState() = FinalConfig::TPointerArith(pai) and
524+
invalidPointerToDerefSource(source.getNode(), pai, derefSource, deltaDerefSourceAndPai) and
525+
InvalidPointerToDerefFlow::flow(derefSource, derefSink) and
526+
derefSinkToOperation(derefSink, pai, sink.getNode()) and
527+
isInvalidPointerDerefSink(derefSink, sink.getNode().asInstruction(), operation,
528+
deltaDerefSinkAndDerefAddress) and
529+
delta = deltaDerefSourceAndPai + deltaDerefSinkAndDerefAddress
557530
)
558531
}
559532

560533
from
561-
MergedPathNode source, MergedPathNode sink, int k, string kstr, PointerArithmeticInstruction pai,
562-
string operation, Expr offset, DataFlow::Node n
534+
FinalFlow::PathNode source, FinalFlow::PathNode sink, int k, string kstr,
535+
PointerArithmeticInstruction pai, string operation, Expr offset, DataFlow::Node n
563536
where
564-
k =
565-
min(int k2, int k3, InvalidPointerToDerefFlow::PathNode source3 |
566-
hasFlowPath(source, sink, source3, pai, operation, k3) and
567-
invalidPointerToDerefSource(pai, source3.getNode(), k2)
568-
|
569-
k2 + k3
570-
) and
537+
k = min(int cand | hasFlowPath(source, sink, pai, operation, cand)) and
571538
offset = pai.getRight().getUnconvertedResultExpression() and
572-
n = source.asPathNode1().getNode() and
539+
n = source.getNode() and
573540
if k = 0 then kstr = "" else kstr = " + " + k
574-
select sink, source, sink,
541+
select sink.getNode(), source, sink,
575542
"This " + operation + " might be out of bounds, as the pointer might be equal to $@ + $@" + kstr +
576543
".", n, n.toString(), offset, offset.toString()

0 commit comments

Comments
 (0)