|
| 1 | +/** |
| 2 | + * This file provides the first phase of the `cpp/invalid-pointer-deref` query that identifies flow |
| 3 | + * from an allocation to a pointer-arithmetic instruction that constructs a pointer that is out of bounds. |
| 4 | + */ |
| 5 | + |
| 6 | +private import cpp |
| 7 | +private import semmle.code.cpp.ir.dataflow.internal.ProductFlow |
| 8 | +private import semmle.code.cpp.ir.ValueNumbering |
| 9 | +private import semmle.code.cpp.controlflow.IRGuards |
| 10 | +private import codeql.util.Unit |
| 11 | +private import RangeAnalysisUtil |
| 12 | + |
| 13 | +private VariableAccess getAVariableAccess(Expr e) { e.getAChild*() = result } |
| 14 | + |
| 15 | +/** |
| 16 | + * Holds if the `(n, state)` pair represents the source of flow for the size |
| 17 | + * expression associated with `alloc`. |
| 18 | + */ |
| 19 | +predicate hasSize(HeuristicAllocationExpr alloc, DataFlow::Node n, int state) { |
| 20 | + exists(VariableAccess va, Expr size, int delta | |
| 21 | + size = alloc.getSizeExpr() and |
| 22 | + // Get the unique variable in a size expression like `x` in `malloc(x + 1)`. |
| 23 | + va = unique( | | getAVariableAccess(size)) and |
| 24 | + // Compute `delta` as the constant difference between `x` and `x + 1`. |
| 25 | + bounded1(any(Instruction instr | instr.getUnconvertedResultExpression() = size), |
| 26 | + any(LoadInstruction load | load.getUnconvertedResultExpression() = va), delta) and |
| 27 | + n.asConvertedExpr() = va.getFullyConverted() and |
| 28 | + state = delta |
| 29 | + ) |
| 30 | +} |
| 31 | + |
| 32 | +/** |
| 33 | + * A module that encapsulates a barrier guard to remove false positives from flow like: |
| 34 | + * ```cpp |
| 35 | + * char *p = new char[size]; |
| 36 | + * // ... |
| 37 | + * unsigned n = size; |
| 38 | + * // ... |
| 39 | + * if(n < size) { |
| 40 | + * use(*p[n]); |
| 41 | + * } |
| 42 | + * ``` |
| 43 | + * In this case, the sink pair identified by the product flow library (without any additional barriers) |
| 44 | + * would be `(p, n)` (where `n` is the `n` in `p[n]`), because there exists a pointer-arithmetic |
| 45 | + * instruction `pai` such that: |
| 46 | + * 1. The left-hand of `pai` flows from the allocation, and |
| 47 | + * 2. The right-hand of `pai` is non-strictly upper bounded by `n` (where `n` is the `n` in `p[n]`) |
| 48 | + * but because there's a strict comparison that compares `n` against the size of the allocation this |
| 49 | + * snippet is fine. |
| 50 | + */ |
| 51 | +module Barrier2 { |
| 52 | + private class FlowState2 = int; |
| 53 | + |
| 54 | + private module BarrierConfig2 implements DataFlow::ConfigSig { |
| 55 | + predicate isSource(DataFlow::Node source) { |
| 56 | + // The sources is the same as in the sources for the second |
| 57 | + // projection in the `AllocToInvalidPointerConfig` module. |
| 58 | + hasSize(_, source, _) |
| 59 | + } |
| 60 | + |
| 61 | + additional predicate isSink( |
| 62 | + DataFlow::Node left, DataFlow::Node right, IRGuardCondition g, FlowState2 state, |
| 63 | + boolean testIsTrue |
| 64 | + ) { |
| 65 | + // The sink is any "large" side of a relational comparison. |
| 66 | + g.comparesLt(left.asOperand(), right.asOperand(), state, true, testIsTrue) |
| 67 | + } |
| 68 | + |
| 69 | + predicate isSink(DataFlow::Node sink) { isSink(_, sink, _, _, _) } |
| 70 | + } |
| 71 | + |
| 72 | + private import DataFlow::Global<BarrierConfig2> |
| 73 | + |
| 74 | + private FlowState2 getAFlowStateForNode(DataFlow::Node node) { |
| 75 | + exists(DataFlow::Node source | |
| 76 | + flow(source, node) and |
| 77 | + hasSize(_, source, result) |
| 78 | + ) |
| 79 | + } |
| 80 | + |
| 81 | + private predicate operandGuardChecks( |
| 82 | + IRGuardCondition g, Operand left, Operand right, FlowState2 state, boolean edge |
| 83 | + ) { |
| 84 | + exists(DataFlow::Node nLeft, DataFlow::Node nRight, FlowState2 state0 | |
| 85 | + nRight.asOperand() = right and |
| 86 | + nLeft.asOperand() = left and |
| 87 | + BarrierConfig2::isSink(nLeft, nRight, g, state0, edge) and |
| 88 | + state = getAFlowStateForNode(nRight) and |
| 89 | + state0 <= state |
| 90 | + ) |
| 91 | + } |
| 92 | + |
| 93 | + /** |
| 94 | + * Gets an instruction that is guarded by a guard condition which ensures that |
| 95 | + * the value of the instruction is upper-bounded by size of some allocation. |
| 96 | + */ |
| 97 | + Instruction getABarrierInstruction(FlowState2 state) { |
| 98 | + exists(IRGuardCondition g, ValueNumber value, Operand use, boolean edge | |
| 99 | + use = value.getAUse() and |
| 100 | + operandGuardChecks(pragma[only_bind_into](g), pragma[only_bind_into](use), _, |
| 101 | + pragma[only_bind_into](state), pragma[only_bind_into](edge)) and |
| 102 | + result = value.getAnInstruction() and |
| 103 | + g.controls(result.getBlock(), edge) |
| 104 | + ) |
| 105 | + } |
| 106 | + |
| 107 | + /** |
| 108 | + * Gets a `DataFlow::Node` that is guarded by a guard condition which ensures that |
| 109 | + * the value of the node is upper-bounded by size of some allocation. |
| 110 | + */ |
| 111 | + DataFlow::Node getABarrierNode(FlowState2 state) { |
| 112 | + result.asOperand() = getABarrierInstruction(state).getAUse() |
| 113 | + } |
| 114 | + |
| 115 | + /** |
| 116 | + * Gets the block of a node that is guarded (see `getABarrierInstruction` or |
| 117 | + * `getABarrierNode` for the definition of what it means to be guarded). |
| 118 | + */ |
| 119 | + IRBlock getABarrierBlock(FlowState2 state) { |
| 120 | + result.getAnInstruction() = getABarrierInstruction(state) |
| 121 | + } |
| 122 | +} |
| 123 | + |
| 124 | +private module InterestingPointerAddInstruction { |
| 125 | + private module PointerAddInstructionConfig implements DataFlow::ConfigSig { |
| 126 | + predicate isSource(DataFlow::Node source) { |
| 127 | + // The sources is the same as in the sources for the second |
| 128 | + // projection in the `AllocToInvalidPointerConfig` module. |
| 129 | + hasSize(source.asConvertedExpr(), _, _) |
| 130 | + } |
| 131 | + |
| 132 | + predicate isSink(DataFlow::Node sink) { |
| 133 | + sink.asInstruction() = any(PointerAddInstruction pai).getLeft() |
| 134 | + } |
| 135 | + } |
| 136 | + |
| 137 | + private import DataFlow::Global<PointerAddInstructionConfig> |
| 138 | + |
| 139 | + /** |
| 140 | + * Holds if `pai` is a pointer-arithmetic instruction such that the |
| 141 | + * result of an allocation flows to the left-hand side of `pai`. |
| 142 | + * |
| 143 | + * This predicate is used to reduce the set of tuples in `isSinkPair`. |
| 144 | + */ |
| 145 | + predicate isInteresting(PointerAddInstruction pai) { |
| 146 | + exists(DataFlow::Node n | |
| 147 | + n.asInstruction() = pai.getLeft() and |
| 148 | + flowTo(n) |
| 149 | + ) |
| 150 | + } |
| 151 | +} |
| 152 | + |
| 153 | +/** |
| 154 | + * A product-flow configuration for flow from an (allocation, size) pair to a |
| 155 | + * pointer-arithmetic operation that is non-strictly upper-bounded by `allocation + size`. |
| 156 | + * |
| 157 | + * The goal of this query is to find patterns such as: |
| 158 | + * ```cpp |
| 159 | + * 1. char* begin = (char*)malloc(size); |
| 160 | + * 2. char* end = begin + size; |
| 161 | + * 3. for(int *p = begin; p <= end; p++) { |
| 162 | + * 4. use(*p); |
| 163 | + * 5. } |
| 164 | + * ``` |
| 165 | + * |
| 166 | + * We do this by splitting the task up into two configurations: |
| 167 | + * 1. `AllocToInvalidPointerConfig` find flow from `malloc(size)` to `begin + size`, and |
| 168 | + * 2. `InvalidPointerToDerefConfig` finds flow from `begin + size` to an `end` (on line 3). |
| 169 | + * |
| 170 | + * Finally, the range-analysis library will find a load from (or store to) an address that |
| 171 | + * is non-strictly upper-bounded by `end` (which in this case is `*p`). |
| 172 | + */ |
| 173 | +private module Config implements ProductFlow::StateConfigSig { |
| 174 | + class FlowState1 = Unit; |
| 175 | + |
| 176 | + class FlowState2 = int; |
| 177 | + |
| 178 | + predicate isSourcePair( |
| 179 | + DataFlow::Node source1, FlowState1 state1, DataFlow::Node source2, FlowState2 state2 |
| 180 | + ) { |
| 181 | + // In the case of an allocation like |
| 182 | + // ```cpp |
| 183 | + // malloc(size + 1); |
| 184 | + // ``` |
| 185 | + // we use `state2` to remember that there was an offset (in this case an offset of `1`) added |
| 186 | + // to the size of the allocation. This state is then checked in `isSinkPair`. |
| 187 | + exists(state1) and |
| 188 | + hasSize(source1.asConvertedExpr(), source2, state2) |
| 189 | + } |
| 190 | + |
| 191 | + predicate isSinkPair( |
| 192 | + DataFlow::Node sink1, FlowState1 state1, DataFlow::Node sink2, FlowState2 state2 |
| 193 | + ) { |
| 194 | + exists(state1) and |
| 195 | + // We check that the delta computed by the range analysis matches the |
| 196 | + // state value that we set in `isSourcePair`. |
| 197 | + pointerAddInstructionHasBounds0(_, sink1, sink2, state2) |
| 198 | + } |
| 199 | + |
| 200 | + predicate isBarrier2(DataFlow::Node node, FlowState2 state) { |
| 201 | + node = Barrier2::getABarrierNode(state) |
| 202 | + } |
| 203 | + |
| 204 | + predicate isBarrierIn1(DataFlow::Node node) { isSourcePair(node, _, _, _) } |
| 205 | + |
| 206 | + predicate isBarrierOut2(DataFlow::Node node) { |
| 207 | + node = any(DataFlow::SsaPhiNode phi).getAnInput(true) |
| 208 | + } |
| 209 | +} |
| 210 | + |
| 211 | +private module AllocToInvalidPointerFlow = ProductFlow::GlobalWithState<Config>; |
| 212 | + |
| 213 | +/** |
| 214 | + * Holds if `pai` is non-strictly upper bounded by `sink2 + delta` and `sink1` is the |
| 215 | + * left operand of the pointer-arithmetic operation. |
| 216 | + * |
| 217 | + * For example in, |
| 218 | + * ```cpp |
| 219 | + * char* end = p + (size + 1); |
| 220 | + * ``` |
| 221 | + * We will have: |
| 222 | + * - `pai` is `p + (size + 1)`, |
| 223 | + * - `sink1` is `p` |
| 224 | + * - `sink2` is `size` |
| 225 | + * - `delta` is `1`. |
| 226 | + */ |
| 227 | +pragma[nomagic] |
| 228 | +private predicate pointerAddInstructionHasBounds0( |
| 229 | + PointerAddInstruction pai, DataFlow::Node sink1, DataFlow::Node sink2, int delta |
| 230 | +) { |
| 231 | + InterestingPointerAddInstruction::isInteresting(pragma[only_bind_into](pai)) and |
| 232 | + exists(Instruction right, Instruction instr2 | |
| 233 | + pai.getRight() = right and |
| 234 | + pai.getLeft() = sink1.asInstruction() and |
| 235 | + instr2 = sink2.asInstruction() and |
| 236 | + // pai.getRight() <= sink2 + delta |
| 237 | + bounded1(right, instr2, delta) and |
| 238 | + not right = Barrier2::getABarrierInstruction(delta) and |
| 239 | + not instr2 = Barrier2::getABarrierInstruction(delta) |
| 240 | + ) |
| 241 | +} |
| 242 | + |
| 243 | +/** |
| 244 | + * Holds if `allocation` flows to `sink1` and `sink1` represents the left-hand |
| 245 | + * side of the pointer-arithmetic instruction `pai`, and the right-hand side of `pai` |
| 246 | + * is non-strictly upper bounded by the size of `alllocation` + `delta`. |
| 247 | + */ |
| 248 | +pragma[nomagic] |
| 249 | +predicate pointerAddInstructionHasBounds( |
| 250 | + DataFlow::Node allocation, PointerAddInstruction pai, DataFlow::Node sink1, int delta |
| 251 | +) { |
| 252 | + exists(DataFlow::Node sink2 | |
| 253 | + AllocToInvalidPointerFlow::flow(allocation, _, sink1, sink2) and |
| 254 | + pointerAddInstructionHasBounds0(pai, sink1, sink2, delta) |
| 255 | + ) |
| 256 | +} |
0 commit comments