@@ -23,6 +23,7 @@ import semmle.code.cpp.ir.ValueNumbering
23
23
import semmle.code.cpp.controlflow.IRGuards
24
24
import semmle.code.cpp.ir.IR
25
25
import codeql.util.Unit
26
+ import FinalFlow:: PathGraph
26
27
27
28
pragma [ nomagic]
28
29
Instruction getABoundIn ( SemBound b , IRFunction func ) {
@@ -318,7 +319,7 @@ module InvalidPointerToDerefBarrier {
318
319
private module BarrierConfig implements DataFlow:: ConfigSig {
319
320
predicate isSource ( DataFlow:: Node source ) {
320
321
// The sources is the same as in the sources for `InvalidPointerToDerefConfig`.
321
- invalidPointerToDerefSource ( _, source , _)
322
+ invalidPointerToDerefSource ( _, _ , source , _)
322
323
}
323
324
324
325
additional predicate isSink (
@@ -336,7 +337,7 @@ module InvalidPointerToDerefBarrier {
336
337
private int getInvalidPointerToDerefSourceDelta ( DataFlow:: Node node ) {
337
338
exists ( DataFlow:: Node source |
338
339
flow ( source , node ) and
339
- invalidPointerToDerefSource ( _, source , result )
340
+ invalidPointerToDerefSource ( _, _ , source , result )
340
341
)
341
342
}
342
343
@@ -373,7 +374,7 @@ module InvalidPointerToDerefBarrier {
373
374
* by `AllocToInvalidPointerConfig` to a dereference of the pointer.
374
375
*/
375
376
module InvalidPointerToDerefConfig implements DataFlow:: ConfigSig {
376
- predicate isSource ( DataFlow:: Node source ) { invalidPointerToDerefSource ( _, source , _) }
377
+ predicate isSource ( DataFlow:: Node source ) { invalidPointerToDerefSource ( _, _ , source , _) }
377
378
378
379
pragma [ inline]
379
380
predicate isSink ( DataFlow:: Node sink ) { isInvalidPointerDerefSink ( sink , _, _, _) }
@@ -388,189 +389,155 @@ module InvalidPointerToDerefConfig implements DataFlow::ConfigSig {
388
389
module InvalidPointerToDerefFlow = DataFlow:: Global< InvalidPointerToDerefConfig > ;
389
390
390
391
/**
391
- * Holds if `pai` is a pointer-arithmetic operation and `source` is a dataflow node with a
392
- * pointer-value that is non-strictly upper bounded by `pai + delta`.
392
+ * Holds if `source1` is dataflow node that represents an allocation that flows to the
393
+ * left-hand side of the pointer-arithmetic `pai`, and `derefSource` is a dataflow node with
394
+ * a pointer-value that is non-strictly upper bounded by `pai + delta`.
393
395
*
394
396
* For example, if `pai` is a pointer-arithmetic operation `p + size` in an expression such
395
- * as `(p + size) + 1` and `source ` is the node representing `(p + size) + 1`. In this
397
+ * as `(p + size) + 1` and `derefSource ` is the node representing `(p + size) + 1`. In this
396
398
* case `delta` is 1.
397
399
*/
398
400
predicate invalidPointerToDerefSource (
399
- PointerArithmeticInstruction pai , DataFlow:: Node source , int delta
401
+ DataFlow :: Node source1 , PointerArithmeticInstruction pai , DataFlow:: Node derefSource , int delta
400
402
) {
401
403
exists (
402
- AllocToInvalidPointerFlow:: PathNode1 p1 , AllocToInvalidPointerFlow:: PathNode2 p2 ,
403
- DataFlow:: Node sink1 , DataFlow:: Node sink2 , int delta0
404
+ AllocToInvalidPointerFlow:: PathNode1 pSource1 , AllocToInvalidPointerFlow:: PathNode1 pSink1 ,
405
+ AllocToInvalidPointerFlow:: PathNode2 pSink2 , DataFlow:: Node sink1 , DataFlow:: Node sink2 ,
406
+ int delta0
404
407
|
405
- pragma [ only_bind_out ] ( p1 .getNode ( ) ) = sink1 and
406
- pragma [ only_bind_out ] ( p2 .getNode ( ) ) = sink2 and
407
- AllocToInvalidPointerFlow:: flowPath ( _, _, pragma [ only_bind_into ] ( p1 ) , pragma [ only_bind_into ] ( p2 ) ) and
408
+ pragma [ only_bind_out ] ( pSource1 .getNode ( ) ) = source1 and
409
+ pragma [ only_bind_out ] ( pSink1 .getNode ( ) ) = sink1 and
410
+ pragma [ only_bind_out ] ( pSink2 .getNode ( ) ) = sink2 and
411
+ AllocToInvalidPointerFlow:: flowPath ( pSource1 , _, pragma [ only_bind_into ] ( pSink1 ) ,
412
+ pragma [ only_bind_into ] ( pSink2 ) ) and
408
413
// Note that `delta` is not necessarily equal to `delta0`:
409
414
// `delta0` is the constant offset added to the size of the allocation, and
410
415
// delta is the constant difference between the pointer-arithmetic instruction
411
416
// and the instruction computing the address for which we will search for a dereference.
412
417
isSinkImpl ( pai , sink1 , sink2 , delta0 ) and
413
- bounded2 ( source .asInstruction ( ) , pai , delta ) and
418
+ bounded2 ( derefSource .asInstruction ( ) , pai , delta ) and
414
419
delta >= 0 and
415
- not source .getBasicBlock ( ) = Barrier2:: getABarrierBlock ( delta0 )
420
+ not derefSource .getBasicBlock ( ) = Barrier2:: getABarrierBlock ( delta0 )
416
421
)
417
422
}
418
423
419
- newtype TMergedPathNode =
420
- // The path nodes computed by the first projection of `AllocToInvalidPointerConfig`
421
- TPathNode1 ( AllocToInvalidPointerFlow:: PathNode1 p ) or
422
- // The path nodes computed by `InvalidPointerToDerefConfig`
423
- TPathNode3 ( InvalidPointerToDerefFlow:: PathNode p ) or
424
- // The read/write that uses the invalid pointer identified by `InvalidPointerToDerefConfig`.
425
- // This one is needed because the sink identified by `InvalidPointerToDerefConfig` is the
426
- // pointer, but we want to raise an alert at the dereference.
427
- TPathNodeSink ( Instruction i ) {
428
- exists ( DataFlow:: Node n |
429
- InvalidPointerToDerefFlow:: flowTo ( pragma [ only_bind_into ] ( n ) ) and
430
- isInvalidPointerDerefSink ( n , i , _, _) and
431
- i = getASuccessor ( n .asInstruction ( ) )
432
- )
433
- }
434
-
435
- class MergedPathNode extends TMergedPathNode {
436
- string toString ( ) { none ( ) }
437
-
438
- final AllocToInvalidPointerFlow:: PathNode1 asPathNode1 ( ) { this = TPathNode1 ( result ) }
439
-
440
- final InvalidPointerToDerefFlow:: PathNode asPathNode3 ( ) { this = TPathNode3 ( result ) }
441
-
442
- final Instruction asSinkNode ( ) { this = TPathNodeSink ( result ) }
443
-
444
- predicate hasLocationInfo (
445
- string filepath , int startline , int startcolumn , int endline , int endcolumn
446
- ) {
447
- none ( )
448
- }
424
+ /**
425
+ * Holds if `derefSink` is a dataflow node that represents an out-of-bounds address that is about to
426
+ * be dereferenced by `operation` (which is either a `StoreInstruction` or `LoadInstruction`), and
427
+ * `pai` is the pointer-arithmetic operation that caused the `derefSink` to be out-of-bounds.
428
+ */
429
+ predicate derefSinkToOperation (
430
+ DataFlow:: Node derefSink , PointerArithmeticInstruction pai , DataFlow:: Node operation
431
+ ) {
432
+ exists ( DataFlow:: Node source , Instruction i |
433
+ InvalidPointerToDerefFlow:: flow ( pragma [ only_bind_into ] ( source ) ,
434
+ pragma [ only_bind_into ] ( derefSink ) ) and
435
+ invalidPointerToDerefSource ( _, pai , source , _) and
436
+ isInvalidPointerDerefSink ( derefSink , i , _, _) and
437
+ i = getASuccessor ( derefSink .asInstruction ( ) ) and
438
+ operation .asInstruction ( ) = i
439
+ )
449
440
}
450
441
451
- class PathNode1 extends MergedPathNode , TPathNode1 {
452
- override string toString ( ) {
453
- exists ( AllocToInvalidPointerFlow :: PathNode1 p |
454
- this = TPathNode1 ( p ) and
455
- result = p . toString ( )
456
- )
457
- }
458
-
459
- override predicate hasLocationInfo (
460
- string filepath , int startline , int startcolumn , int endline , int endcolumn
461
- ) {
462
- this . asPathNode1 ( ) . hasLocationInfo ( filepath , startline , startcolumn , endline , endcolumn )
463
- }
464
- }
442
+ /**
443
+ * A configuration that represents the full dataflow path all the way from
444
+ * the allocation to the dereference. We need this final dataflow traversal
445
+ * to ensure that the transition from the sink in `AllocToInvalidPointerConfig`
446
+ * to the source in `InvalidPointerToDerefFlow` did not make us construct an
447
+ * infeasible path (which can happen since the transition from one configuration
448
+ * to the next does not preserve information about call contexts).
449
+ */
450
+ module FinalConfig implements DataFlow :: StateConfigSig {
451
+ newtype FlowState =
452
+ additional TInitial ( ) or
453
+ additional TPointerArith ( PointerArithmeticInstruction pai ) {
454
+ invalidPointerToDerefSource ( _ , pai , _ , _ )
455
+ }
465
456
466
- class PathNode3 extends MergedPathNode , TPathNode3 {
467
- override string toString ( ) {
468
- exists ( InvalidPointerToDerefFlow :: PathNode p |
469
- this = TPathNode3 ( p ) and
470
- result = p . toString ( )
457
+ predicate isSource ( DataFlow :: Node source , FlowState state ) {
458
+ state = TInitial ( ) and
459
+ exists ( DataFlow :: Node derefSource |
460
+ invalidPointerToDerefSource ( source , _ , derefSource , _ ) and
461
+ InvalidPointerToDerefFlow :: flow ( derefSource , _ )
471
462
)
472
463
}
473
464
474
- override predicate hasLocationInfo (
475
- string filepath , int startline , int startcolumn , int endline , int endcolumn
476
- ) {
477
- this .asPathNode3 ( ) .hasLocationInfo ( filepath , startline , startcolumn , endline , endcolumn )
478
- }
479
- }
480
-
481
- class PathSinkNode extends MergedPathNode , TPathNodeSink {
482
- override string toString ( ) {
483
- exists ( Instruction i |
484
- this = TPathNodeSink ( i ) and
485
- result = i .toString ( )
465
+ predicate isSink ( DataFlow:: Node sink , FlowState state ) {
466
+ exists ( PointerArithmeticInstruction pai |
467
+ derefSinkToOperation ( _, pai , sink ) and state = TPointerArith ( pai )
486
468
)
487
469
}
488
470
489
- override predicate hasLocationInfo (
490
- string filepath , int startline , int startcolumn , int endline , int endcolumn
471
+ predicate isAdditionalFlowStep (
472
+ DataFlow :: Node node1 , FlowState state1 , DataFlow :: Node node2 , FlowState state2
491
473
) {
492
- this .asSinkNode ( )
493
- .getLocation ( )
494
- .hasLocationInfo ( filepath , startline , startcolumn , endline , endcolumn )
474
+ // A step from the left-hand side of a pointer-arithmetic operation that has been
475
+ // identified as creating an out-of-bounds pointer to the result of the pointer-arithmetic
476
+ // operation.
477
+ exists (
478
+ PointerArithmeticInstruction pai , AllocToInvalidPointerFlow:: PathNode1 p1 ,
479
+ InvalidPointerToDerefFlow:: PathNode p2
480
+ |
481
+ isSinkImpl ( pai , node1 , _, _) and
482
+ invalidPointerToDerefSource ( _, pai , node2 , _) and
483
+ node1 = p1 .getNode ( ) and
484
+ node2 = p2 .getNode ( ) and
485
+ state1 = TInitial ( ) and
486
+ state2 = TPointerArith ( pai )
487
+ )
488
+ or
489
+ // A step from an out-of-bounds address to the operation (which is either a `StoreInstruction`
490
+ // or a `LoadInstruction`) that dereferences the address.
491
+ // This step exists purely for aesthetic reasons: we want the alert to be placed at the operation
492
+ // that causes the dereference, and not at the address that flows into the operation.
493
+ state1 = state2 and
494
+ exists ( DataFlow:: Node derefSource , PointerArithmeticInstruction pai |
495
+ InvalidPointerToDerefFlow:: flow ( derefSource , node1 ) and
496
+ invalidPointerToDerefSource ( _, pai , derefSource , _) and
497
+ state1 = TPointerArith ( pai ) and
498
+ derefSinkToOperation ( node1 , pai , node2 )
499
+ )
495
500
}
496
501
}
497
502
498
- query predicate edges ( MergedPathNode node1 , MergedPathNode node2 ) {
499
- node1 .asPathNode1 ( ) .getASuccessor ( ) = node2 .asPathNode1 ( )
500
- or
501
- joinOn1 ( _, node1 .asPathNode1 ( ) , node2 .asPathNode3 ( ) )
502
- or
503
- node1 .asPathNode3 ( ) .getASuccessor ( ) = node2 .asPathNode3 ( )
504
- or
505
- joinOn2 ( node1 .asPathNode3 ( ) , node2 .asSinkNode ( ) , _, _)
506
- }
507
-
508
- query predicate nodes ( MergedPathNode n , string key , string val ) {
509
- AllocToInvalidPointerFlow:: PathGraph1:: nodes ( n .asPathNode1 ( ) , key , val )
510
- or
511
- InvalidPointerToDerefFlow:: PathGraph:: nodes ( n .asPathNode3 ( ) , key , val )
512
- or
513
- key = "semmle.label" and val = n .asSinkNode ( ) .toString ( )
514
- }
515
-
516
- query predicate subpaths (
517
- MergedPathNode arg , MergedPathNode par , MergedPathNode ret , MergedPathNode out
518
- ) {
519
- AllocToInvalidPointerFlow:: PathGraph1:: subpaths ( arg .asPathNode1 ( ) , par .asPathNode1 ( ) ,
520
- ret .asPathNode1 ( ) , out .asPathNode1 ( ) )
521
- or
522
- InvalidPointerToDerefFlow:: PathGraph:: subpaths ( arg .asPathNode3 ( ) , par .asPathNode3 ( ) ,
523
- ret .asPathNode3 ( ) , out .asPathNode3 ( ) )
524
- }
503
+ module FinalFlow = DataFlow:: GlobalWithState< FinalConfig > ;
525
504
526
505
/**
527
- * Holds if `p1` is a sink of `AllocToInvalidPointerConfig` and `p2` is a source
528
- * of `InvalidPointerToDerefConfig`, and they are connected through `pai`.
529
- */
530
- predicate joinOn1 (
531
- PointerArithmeticInstruction pai , AllocToInvalidPointerFlow:: PathNode1 p1 ,
532
- InvalidPointerToDerefFlow:: PathNode p2
533
- ) {
534
- isSinkImpl ( pai , p1 .getNode ( ) , _, _) and
535
- invalidPointerToDerefSource ( pai , p2 .getNode ( ) , _)
536
- }
537
-
538
- /**
539
- * Holds if `p1` is a sink of `InvalidPointerToDerefConfig` and `i` is the instruction
540
- * that dereferences `p1`. The string `operation` describes whether the `i` is
541
- * a `StoreInstruction` or `LoadInstruction`.
506
+ * Holds if `source` is an allocation that flows into the left-hand side of `pai`, which produces an out-of-bounds
507
+ * pointer that flows into an address that is dereferenced by `sink` (which is either a `LoadInstruction` or a
508
+ * `StoreInstruction`). The end result is that `sink` writes to an address that is off-by-`delta` from the end of
509
+ * the allocation. The string `operation` describes whether the `sink` is a load or a store (which is then used
510
+ * to produce the alert message).
511
+ *
512
+ * Note that multiple `delta`s can exist for a given `(source, pai, sink)` triplet.
542
513
*/
543
- pragma [ inline]
544
- predicate joinOn2 ( InvalidPointerToDerefFlow:: PathNode p1 , Instruction i , string operation , int delta ) {
545
- isInvalidPointerDerefSink ( p1 .getNode ( ) , i , operation , delta )
546
- }
547
-
548
514
predicate hasFlowPath (
549
- MergedPathNode source1 , MergedPathNode sink , InvalidPointerToDerefFlow :: PathNode source3 ,
550
- PointerArithmeticInstruction pai , string operation , int delta
515
+ FinalFlow :: PathNode source , FinalFlow :: PathNode sink , PointerArithmeticInstruction pai ,
516
+ string operation , int delta
551
517
) {
552
- exists ( InvalidPointerToDerefFlow:: PathNode sink3 , AllocToInvalidPointerFlow:: PathNode1 sink1 |
553
- AllocToInvalidPointerFlow:: flowPath ( source1 .asPathNode1 ( ) , _, sink1 , _) and
554
- joinOn1 ( pai , sink1 , source3 ) and
555
- InvalidPointerToDerefFlow:: flowPath ( source3 , sink3 ) and
556
- joinOn2 ( sink3 , sink .asSinkNode ( ) , operation , delta )
518
+ exists (
519
+ DataFlow:: Node derefSink , DataFlow:: Node derefSource , int deltaDerefSourceAndPai ,
520
+ int deltaDerefSinkAndDerefAddress
521
+ |
522
+ FinalFlow:: flowPath ( source , sink ) and
523
+ sink .getState ( ) = FinalConfig:: TPointerArith ( pai ) and
524
+ invalidPointerToDerefSource ( source .getNode ( ) , pai , derefSource , deltaDerefSourceAndPai ) and
525
+ InvalidPointerToDerefFlow:: flow ( derefSource , derefSink ) and
526
+ derefSinkToOperation ( derefSink , pai , sink .getNode ( ) ) and
527
+ isInvalidPointerDerefSink ( derefSink , sink .getNode ( ) .asInstruction ( ) , operation ,
528
+ deltaDerefSinkAndDerefAddress ) and
529
+ delta = deltaDerefSourceAndPai + deltaDerefSinkAndDerefAddress
557
530
)
558
531
}
559
532
560
533
from
561
- MergedPathNode source , MergedPathNode sink , int k , string kstr , PointerArithmeticInstruction pai ,
562
- string operation , Expr offset , DataFlow:: Node n
534
+ FinalFlow :: PathNode source , FinalFlow :: PathNode sink , int k , string kstr ,
535
+ PointerArithmeticInstruction pai , string operation , Expr offset , DataFlow:: Node n
563
536
where
564
- k =
565
- min ( int k2 , int k3 , InvalidPointerToDerefFlow:: PathNode source3 |
566
- hasFlowPath ( source , sink , source3 , pai , operation , k3 ) and
567
- invalidPointerToDerefSource ( pai , source3 .getNode ( ) , k2 )
568
- |
569
- k2 + k3
570
- ) and
537
+ k = min ( int cand | hasFlowPath ( source , sink , pai , operation , cand ) ) and
571
538
offset = pai .getRight ( ) .getUnconvertedResultExpression ( ) and
572
- n = source .asPathNode1 ( ) . getNode ( ) and
539
+ n = source .getNode ( ) and
573
540
if k = 0 then kstr = "" else kstr = " + " + k
574
- select sink , source , sink ,
541
+ select sink . getNode ( ) , source , sink ,
575
542
"This " + operation + " might be out of bounds, as the pointer might be equal to $@ + $@" + kstr +
576
543
"." , n , n .toString ( ) , offset , offset .toString ( )
0 commit comments